#!/usr/bin/env python r"""Create a list containing image names and their corresponding class indices for ImageNet validation data. The directory structure of the original ImageNet validation data set is expected to be: data_dir/ILSVRC2012_val_00000001.JPEG data_dir/ILSVRC2012_val_00000002.JPEG ... date_dir/ILSVRC2012_val_00050000.JPEG This script generate a list like: ILSVRC2012_val_00000001.JPEG 65 ILSVRC2012_val_00000002.JPEG 970 ... ILSVRC2012_val_00050000.JPEG 355 Usage: Download https://github.com/tensorflow/models/blob/master/research/slim/datasets/imagenet_2012_validation_synset_labels.txt and then, ./create_image_labels.py imagenet_2012_validation_synset_labels.txt """ import sys if __name__ == '__main__': if len(sys.argv) < 2: print('Usage: ./create_image_labels.py ') sys.exit(-1) labels_file = sys.argv[1] labels = [l.strip() for l in open(labels_file).readlines()] sorted_labels = sorted(labels) class_idx = 0 label_to_class_idx = {} for label in sorted_labels: if label not in label_to_class_idx: label_to_class_idx[label] = class_idx class_idx += 1 image_list_file = 'val.txt' with open(image_list_file, 'w') as f: for i, label in enumerate(labels): image_name = 'ILSVRC2012_val_000%.5d.JPEG' % (i + 1) f.write(f'{image_name} {label_to_class_idx[label]}\n') print(f'Output image list file: {image_list_file}')