justinkay commited on
Commit
e7063f6
·
1 Parent(s): 3c56581

Cleanup script

Browse files
Files changed (1) hide show
  1. cleanup_deleted_images.py +89 -0
cleanup_deleted_images.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Script to clean up references to deleted images from:
4
+ - iwildcam_demo_annotations.json
5
+ - iwildcam_demo.pt
6
+ - iwildcam_demo_labels.pt
7
+ - images.txt
8
+ """
9
+
10
+ import json
11
+ import os
12
+ import torch
13
+
14
+ # Get list of existing images
15
+ image_dir = "iwildcam_demo_images"
16
+ existing_images = set(os.listdir(image_dir))
17
+ print(f"Found {len(existing_images)} existing images")
18
+
19
+ # Read images.txt to get current order
20
+ with open("images.txt", "r") as f:
21
+ current_images = [line.strip() for line in f]
22
+ print(f"Found {len(current_images)} images in images.txt")
23
+
24
+ # Identify which images still exist and their new indices
25
+ valid_images = []
26
+ valid_indices = []
27
+ for idx, img in enumerate(current_images):
28
+ if img in existing_images:
29
+ valid_images.append(img)
30
+ valid_indices.append(idx)
31
+
32
+ print(f"Keeping {len(valid_images)} images")
33
+ print(f"Removing {len(current_images) - len(valid_images)} images")
34
+
35
+ # Update images.txt
36
+ with open("images.txt", "w") as f:
37
+ for img in valid_images:
38
+ f.write(f"{img}\n")
39
+ print("Updated images.txt")
40
+
41
+ # Load and filter .pt files
42
+ demo_tensors = torch.load("iwildcam_demo.pt")
43
+ demo_labels = torch.load("iwildcam_demo_labels.pt")
44
+
45
+ print(f"Original iwildcam_demo.pt shape: {demo_tensors.shape}")
46
+ print(f"Original iwildcam_demo_labels.pt shape: {demo_labels.shape}")
47
+
48
+ # Filter tensors to only keep valid indices
49
+ # demo_tensors has shape [3, N, 5] where N is number of images
50
+ # We need to filter along dimension 1
51
+ filtered_demo = demo_tensors[:, valid_indices, :]
52
+ filtered_labels = demo_labels[valid_indices]
53
+
54
+ # Save filtered tensors
55
+ torch.save(filtered_demo, "iwildcam_demo.pt")
56
+ torch.save(filtered_labels, "iwildcam_demo_labels.pt")
57
+ print(f"Updated iwildcam_demo.pt: {demo_tensors.shape} -> {filtered_demo.shape}")
58
+ print(f"Updated iwildcam_demo_labels.pt: {demo_labels.shape} -> {filtered_labels.shape}")
59
+
60
+ # Load and filter JSON annotations
61
+ with open("iwildcam_demo_annotations.json", "r") as f:
62
+ annotations = json.load(f)
63
+
64
+ # Filter images in JSON
65
+ if "images" in annotations:
66
+ original_count = len(annotations["images"])
67
+ annotations["images"] = [
68
+ img for img in annotations["images"]
69
+ if img["file_name"] in existing_images
70
+ ]
71
+ print(f"Updated JSON images: {original_count} -> {len(annotations['images'])}")
72
+
73
+ # Filter annotations in JSON (if they reference image_id)
74
+ if "annotations" in annotations:
75
+ # Build mapping of file_name to image_id for existing images
76
+ valid_image_ids = {img["id"] for img in annotations["images"]}
77
+ original_count = len(annotations["annotations"])
78
+ annotations["annotations"] = [
79
+ ann for ann in annotations["annotations"]
80
+ if ann["image_id"] in valid_image_ids
81
+ ]
82
+ print(f"Updated JSON annotations: {original_count} -> {len(annotations['annotations'])}")
83
+
84
+ # Save updated JSON
85
+ with open("iwildcam_demo_annotations.json", "w") as f:
86
+ json.dump(annotations, f, indent=2)
87
+ print("Updated iwildcam_demo_annotations.json")
88
+
89
+ print("\nCleanup complete!")