Spaces:

justinkay
/

coda

Running

App Files Files Community

justinkay commited on Sep 30

Commit

e7063f6

1 Parent(s): 3c56581

Cleanup script

Browse files

Files changed (1) hide show

cleanup_deleted_images.py +89 -0

cleanup_deleted_images.py ADDED Viewed

	@@ -0,0 +1,89 @@

+#!/usr/bin/env python3
+"""
+Script to clean up references to deleted images from:
+- iwildcam_demo_annotations.json
+- iwildcam_demo.pt
+- iwildcam_demo_labels.pt
+- images.txt
+"""
+import json
+import os
+import torch
+# Get list of existing images
+image_dir = "iwildcam_demo_images"
+existing_images = set(os.listdir(image_dir))
+print(f"Found {len(existing_images)} existing images")
+# Read images.txt to get current order
+with open("images.txt", "r") as f:
+    current_images = [line.strip() for line in f]
+print(f"Found {len(current_images)} images in images.txt")
+# Identify which images still exist and their new indices
+valid_images = []
+valid_indices = []
+for idx, img in enumerate(current_images):
+    if img in existing_images:
+        valid_images.append(img)
+        valid_indices.append(idx)
+print(f"Keeping {len(valid_images)} images")
+print(f"Removing {len(current_images) - len(valid_images)} images")
+# Update images.txt
+with open("images.txt", "w") as f:
+    for img in valid_images:
+        f.write(f"{img}\n")
+print("Updated images.txt")
+# Load and filter .pt files
+demo_tensors = torch.load("iwildcam_demo.pt")
+demo_labels = torch.load("iwildcam_demo_labels.pt")
+print(f"Original iwildcam_demo.pt shape: {demo_tensors.shape}")
+print(f"Original iwildcam_demo_labels.pt shape: {demo_labels.shape}")
+# Filter tensors to only keep valid indices
+# demo_tensors has shape [3, N, 5] where N is number of images
+# We need to filter along dimension 1
+filtered_demo = demo_tensors[:, valid_indices, :]
+filtered_labels = demo_labels[valid_indices]
+# Save filtered tensors
+torch.save(filtered_demo, "iwildcam_demo.pt")
+torch.save(filtered_labels, "iwildcam_demo_labels.pt")
+print(f"Updated iwildcam_demo.pt: {demo_tensors.shape} -> {filtered_demo.shape}")
+print(f"Updated iwildcam_demo_labels.pt: {demo_labels.shape} -> {filtered_labels.shape}")
+# Load and filter JSON annotations
+with open("iwildcam_demo_annotations.json", "r") as f:
+    annotations = json.load(f)
+# Filter images in JSON
+if "images" in annotations:
+    original_count = len(annotations["images"])
+    annotations["images"] = [
+        img for img in annotations["images"]
+        if img["file_name"] in existing_images
+    ]
+    print(f"Updated JSON images: {original_count} -> {len(annotations['images'])}")
+# Filter annotations in JSON (if they reference image_id)
+if "annotations" in annotations:
+    # Build mapping of file_name to image_id for existing images
+    valid_image_ids = {img["id"] for img in annotations["images"]}
+    original_count = len(annotations["annotations"])
+    annotations["annotations"] = [
+        ann for ann in annotations["annotations"]
+        if ann["image_id"] in valid_image_ids
+    ]
+    print(f"Updated JSON annotations: {original_count} -> {len(annotations['annotations'])}")
+# Save updated JSON
+with open("iwildcam_demo_annotations.json", "w") as f:
+    json.dump(annotations, f, indent=2)
+print("Updated iwildcam_demo_annotations.json")
+print("\nCleanup complete!")