Spaces:

justinkay
/

coda

Sleeping

coda / process_iwildcam_data.py

justinkay

Correct bioclipv2 results

8a229fe about 2 months ago

4.76 kB

	import json
	import torch
	import numpy as np

	def load_annotations(annotation_file):
	with open(annotation_file, 'r') as f:
	data = json.load(f)

	# Create mapping from image_id to category_id
	image_to_category = {}
	for annotation in data['annotations']:
	image_to_category[annotation['image_id']] = annotation['category_id']

	# Get image order and their corresponding labels
	images = []
	labels = []
	for image in data['images']:
	image_id = image['id']
	file_name = image['file_name']
	if image_id in image_to_category:
	images.append(file_name)
	labels.append(image_to_category[image_id])

	return images, labels

	def load_model_predictions(model_files):
	models = []
	all_predictions = {}
	class_names = None

	for model_file in model_files:
	with open(model_file, 'r') as f:
	data = json.load(f)

	model_name = data['model']
	models.append(model_name)

	if class_names is None:
	class_names = data['class_names']

	# Store predictions for each image
	for image_name, predictions in data['results'].items():
	if image_name not in all_predictions:
	all_predictions[image_name] = {}
	all_predictions[image_name][model_name] = predictions

	return models, all_predictions, class_names

	def create_tensors():
	# Load annotations
	images, labels = load_annotations('iwildcam_demo_annotations.json')

	# Load model predictions
	model_files = [
	'zeroshot_results_facebook_PE_Core_L14_336.json',
	'zeroshot_results_google_siglip2_so400m_patch16_naflex.json',
	'zeroshot_results_openai_clip_vit_large_patch14.json',
	'zeroshot_results_imageomics_bioclip_v2.json',
	'zeroshot_results_laion_CLIP_ViT_L_14_laion2B_s32B_b82K.json',
	]
	models, all_predictions, class_names = load_model_predictions(model_files)

	# Create class to index mapping (0-indexed)
	class_to_idx = {class_name: idx for idx, class_name in enumerate(class_names)}

	# Filter images that have predictions from all models
	valid_images = []
	valid_labels = []
	for i, image_name in enumerate(images):
	if image_name in all_predictions and len(all_predictions[image_name]) == len(models):
	valid_images.append(image_name)
	valid_labels.append(labels[i])

	print(f"Found {len(valid_images)} images with predictions from all {len(models)} models")

	# Create prediction tensor: H x N x C
	H = len(models) # number of models
	N = len(valid_images) # number of images
	C = len(class_names) # number of classes

	prediction_tensor = torch.zeros(H, N, C)

	for h, model_name in enumerate(models):
	for n, image_name in enumerate(valid_images):
	predictions = all_predictions[image_name][model_name]
	for class_name, score in predictions.items():
	c = class_to_idx[class_name]
	prediction_tensor[h, n, c] = score

	# Convert category_ids to 0-indexed labels based on class_names order
	# Need to map from species names back to category IDs
	from collections import OrderedDict
	SPECIES_MAP = OrderedDict([
	(24, "Jaguar"),
	(10, "Ocelot"),
	(6, "Mountain Lion"),
	(101, "Common Eland"),
	(102, "Waterbuck")
	])
	reverse_species_map = {v: k for k, v in SPECIES_MAP.items()}

	# Create category_to_label mapping based on class_names order (not sorted category IDs)
	category_order = [reverse_species_map[name] for name in class_names]
	category_to_label = {cat: idx for idx, cat in enumerate(category_order)}
	label_tensor = torch.tensor([category_to_label[cat] for cat in valid_labels])

	# Save tensors
	torch.save(prediction_tensor, 'iwildcam_demo.pt')
	torch.save(label_tensor, 'iwildcam_demo_labels.pt')

	# Save text files
	with open('models.txt', 'w') as f:
	for model in models:
	f.write(f"{model}\n")

	with open('images.txt', 'w') as f:
	for image in valid_images:
	f.write(f"{image}\n")

	with open('classes.txt', 'w') as f:
	for class_name in class_names:
	f.write(f"{class_name}\n")

	print(f"Saved prediction tensor of shape {prediction_tensor.shape} to iwildcam_demo.pt")
	print(f"Saved label tensor of shape {label_tensor.shape} to iwildcam_demo_labels.pt")
	print(f"Saved {len(models)} models to models.txt")
	print(f"Saved {len(valid_images)} images to images.txt")
	print(f"Saved {len(class_names)} classes to classes.txt")

	return prediction_tensor, label_tensor, models, valid_images, class_names

	if __name__ == "__main__":
	create_tensors()