|
|
|
import argparse
|
|
from collections import defaultdict
|
|
|
|
from mmengine import dump, load
|
|
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument(
|
|
'stad_anno', help='spatiotemporal action detection anno path')
|
|
parser.add_argument('det_path', help='output detection anno path')
|
|
args = parser.parse_args()
|
|
return args
|
|
|
|
|
|
def generate_mmdet_coco_anno(args):
|
|
ori_anno = load(args.stad_anno)
|
|
train_videos = ori_anno['train_videos']
|
|
val_videos = ori_anno['test_videos']
|
|
videos = {'train': train_videos, 'val': val_videos}
|
|
for split in ['train', 'val']:
|
|
img_id = 0
|
|
bbox_id = 0
|
|
img_list = []
|
|
anno_list = []
|
|
for vid in videos[split][0]:
|
|
vid_tubes = ori_anno['gttubes'][vid]
|
|
height, width = ori_anno['resolution'][vid]
|
|
frm2bbox = defaultdict(list)
|
|
for label_idx, tube_list in vid_tubes.items():
|
|
for tube in tube_list:
|
|
for frm_anno in tube:
|
|
frm_idx, bbox = frm_anno[0], frm_anno[1:]
|
|
frm2bbox[frm_idx].append({'label': 0, 'bbox': bbox})
|
|
for frm_idx, frm_bboxes in frm2bbox.items():
|
|
img_path = f'{vid}/{int(frm_idx):05d}.jpg'
|
|
img_instance = {
|
|
'file_name': img_path,
|
|
'height': height,
|
|
'width': width,
|
|
'id': img_id
|
|
}
|
|
img_list.append(img_instance)
|
|
|
|
for bbox_info in frm_bboxes:
|
|
label = bbox_info['label']
|
|
x1, y1, x2, y2 = bbox_info['bbox']
|
|
bbox = [x1, y1, x2 - x1, y2 - y1]
|
|
anno_instance = {
|
|
'area': bbox[2] * bbox[3],
|
|
'image_id': img_id,
|
|
'bbox': bbox,
|
|
'category_id': label,
|
|
'iscrowd': 0,
|
|
'id': bbox_id
|
|
}
|
|
anno_list.append(anno_instance)
|
|
bbox_id += 1
|
|
img_id += 1
|
|
total_anno = {
|
|
'images': img_list,
|
|
'annotations': anno_list,
|
|
'categories': [{
|
|
'id': 0,
|
|
'name': 'person'
|
|
}],
|
|
}
|
|
dump(total_anno, args.det_path[:-5] + f'_{split}' + args.det_path[-5:])
|
|
|
|
|
|
if __name__ == '__main__':
|
|
args = parse_args()
|
|
generate_mmdet_coco_anno(args)
|
|
|