#!/usr/bin/env python3 """ Compute top-1 accuracy for each model by comparing predictions with ground truth. """ import json import os from collections import OrderedDict # Species mapping from demo/app.py SPECIES_MAP = OrderedDict([ (24, "Jaguar"), # panthera onca (10, "Ocelot"), # leopardus pardalis (6, "Mountain Lion"), # puma concolor (101, "Common Eland"), # tragelaphus oryx (102, "Waterbuck"), # kobus ellipsiprymnus ]) def load_ground_truth(): """Load ground truth labels from annotations.""" with open('iwildcam_demo_annotations.json', 'r') as f: data = json.load(f) # Create mapping from filename to true label ground_truth = {} for annotation in data['annotations']: image_id = annotation['image_id'] category_id = annotation['category_id'] image_info = next((img for img in data['images'] if img['id'] == image_id), None) if image_info: filename = image_info['file_name'] true_label = SPECIES_MAP.get(category_id, "Unknown") if true_label != "Unknown": ground_truth[filename] = true_label return ground_truth def compute_accuracy(results_file, ground_truth): """Compute top-1 accuracy for a model's results.""" with open(results_file, 'r') as f: data = json.load(f) model_name = data['model'] results = data['results'] correct = 0 total = 0 for filename, scores in results.items(): if filename in ground_truth: # Get predicted class (highest score) predicted_class = max(scores, key=scores.get) true_class = ground_truth[filename] if predicted_class == true_class: correct += 1 total += 1 accuracy = correct / total if total > 0 else 0.0 return accuracy, correct, total def main(): """Compute accuracy for all models.""" print("Computing top-1 accuracy for each model...\n") # Load ground truth ground_truth = load_ground_truth() print(f"Loaded ground truth for {len(ground_truth)} images") # Find all results files results_files = [f for f in os.listdir('.') if f.startswith('zeroshot_results_') and f.endswith('.json')] if not results_files: print("No results files found!") return print(f"Found {len(results_files)} results files\n") # Compute accuracy for each model accuracies = {} for results_file in sorted(results_files): try: accuracy, correct, total = compute_accuracy(results_file, ground_truth) # Extract model name from filename model_name = results_file.replace('zeroshot_results_', '').replace('.json', '').replace('_', '/') accuracies[model_name] = { 'accuracy': accuracy, 'correct': correct, 'total': total } print(f"{model_name}:") print(f" Accuracy: {accuracy:.4f} ({correct}/{total})") print() except Exception as e: print(f"Error processing {results_file}: {e}") # Summary print("="*60) print("SUMMARY") print("="*60) # Sort by accuracy sorted_models = sorted(accuracies.items(), key=lambda x: x[1]['accuracy'], reverse=True) for i, (model_name, stats) in enumerate(sorted_models, 1): print(f"{i}. {model_name}: {stats['accuracy']:.4f}") # Show some example predictions vs ground truth print("\n" + "="*60) print("SAMPLE PREDICTIONS (first 10 images)") print("="*60) if results_files: # Use the first model's results to show examples with open(results_files[0], 'r') as f: data = json.load(f) results = data['results'] count = 0 for filename, scores in results.items(): if filename in ground_truth and count < 10: predicted_class = max(scores, key=scores.get) true_class = ground_truth[filename] confidence = scores[predicted_class] status = "✓" if predicted_class == true_class else "✗" print(f"{filename}:") print(f" True: {true_class}") print(f" Pred: {predicted_class} ({confidence:.4f}) {status}") print() count += 1 if __name__ == "__main__": main()