Spaces:
Runtime error
Runtime error
import os | |
import argparse | |
import json | |
import numpy as np | |
from collections import defaultdict | |
# images exist in annotations but not in image folder. | |
objv2_ignore_list = [ | |
os.path.join('patch16', 'objects365_v2_00908726.jpg'), | |
os.path.join('patch6', 'objects365_v1_00320532.jpg'), | |
os.path.join('patch6', 'objects365_v1_00320534.jpg'), | |
] | |
def process_coco(data_dir): | |
things_instances = json.load(open(os.path.join(data_dir, 'coco2017', 'annotations', 'instances_train2017.json'))) | |
stuff_instances = json.load(open(os.path.join(data_dir, 'cocostuff', 'annotations', 'stuff_train2017.json'))) | |
image_info = dict() | |
for image in things_instances['images'] + stuff_instances['images']: | |
image_id = image['id'] | |
if image_id not in image_info: | |
image_info[image_id] = image | |
else: | |
assert image_info[image_id]['file_name'] == image['file_name'] | |
category_info = dict() | |
for category in things_instances['categories'] + stuff_instances['categories']: | |
category_info[category['id']] = category['name'] | |
image2annotations = defaultdict(list) | |
for annotation in things_instances['annotations'] + stuff_instances['annotations']: | |
image_id = annotation['image_id'] | |
image_file_name = image_info[image_id]['file_name'] | |
image2annotations[image_file_name].append({'category_id':annotation['category_id'], 'bbox':annotation['bbox']}) | |
with open(os.path.join(data_dir, 'cocostuff', 'annotations', 'image2bboxes.json'), 'w') as f: | |
json.dump(image2annotations, f) | |
def process_objects365(data_dir): | |
instances = json.load(open(os.path.join(data_dir, 'object365', 'zhiyuan_objv2_train.json'))) | |
image_info = dict() | |
for image in instances['images']: | |
image_id = image['id'] | |
if image_id not in image_info: | |
image_info[image_id] = image | |
else: | |
assert image_info[image_id]['file_name'] == image['file_name'] | |
category_info = dict() | |
for category in instances['categories']: | |
category_info[category['id']] = category['name'] | |
image2annotations = defaultdict(list) | |
for annotation in instances['annotations']: | |
image_id = annotation['image_id'] | |
image_file_name = image_info[image_id]['file_name'] | |
image_file_name = (os.sep).join(image_file_name.split(os.sep)[2:]) | |
if image_file_name in objv2_ignore_list: | |
continue | |
image2annotations[image_file_name].append({'category_id':annotation['category_id'], 'bbox':annotation['bbox']}) | |
with open(os.path.join(data_dir, 'object365', 'image2bboxes.json'), 'w') as f: | |
json.dump(image2annotations, f) | |
def process_goldG(data_dir): | |
instances = json.load(open(os.path.join(data_dir, 'MixedGrounding', 'final_mixed_train.json'))) | |
flickr_instances = json.load(open(os.path.join(data_dir, 'MixedGrounding', 'final_flickr_separateGT_train.json'))) | |
image_info = [] | |
for image in instances['images']: | |
image_info.append({'file_name':image['file_name'], 'caption':image['caption'], 'data_source':image['data_source'], 'instances':[]}) | |
for annotation in instances['annotations']: | |
image_id = annotation['image_id'] | |
image_info[image_id]['instances'].append(annotation) | |
for image in flickr_instances['images']: | |
image_info.append({'file_name':image['file_name'], 'caption':image['caption'], 'data_source':'flickr', 'instances':[]}) | |
for annotation in flickr_instances['annotations']: | |
image_id = annotation['image_id'] | |
image_info[image_id]['instances'].append(annotation) | |
with open(os.path.join(data_dir, 'MixedGrounding', 'goldG_train.json'), 'w') as f: | |
json.dump(image_info, f) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--data_dir", type=str, default="../data") | |
args = parser.parse_args() | |
process_coco(args.data_dir) | |
process_objects365(args.data_dir) | |
process_goldG(args.data_dir) |