Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .history/datasets/__init___20250113130205.py +40 -0
- .history/datasets/ytvos_20241227174300.py +246 -0
- .history/datasets/ytvos_20250113131154.py +246 -0
- .history/datasets/ytvos_20250113131303.py +246 -0
- .history/datasets/ytvos_ref_20250113130047.py +237 -0
- .history/datasets/ytvos_ref_20250113131359.py +239 -0
- .history/datasets/ytvos_ref_20250113162825.py +244 -0
- .history/datasets/ytvos_ref_20250113163406.py +250 -0
- .history/datasets/ytvos_ref_20250113163605.py +250 -0
- .history/datasets/ytvos_ref_20250113180729.py +250 -0
- .history/datasets/ytvos_ref_20250114201918.py +253 -0
- .history/datasets/ytvos_ref_20250114202502.py +250 -0
- .history/datasets/ytvos_ref_20250114205233.py +252 -0
- .history/datasets/ytvos_ref_20250114210537.py +250 -0
- .history/make_ref-ytvos/annotate_ref_ytvos_20241227174304.py +288 -0
- .history/make_ref-ytvos/annotate_ref_ytvos_20250113111315.py +288 -0
- davis2017/utils.py +174 -0
- inference_davis.py +330 -0
- main.py +243 -0
- main_joint.py +198 -0
- main_pretrain.py +304 -0
- make_refcoco/refcocog_google/motion_split_generation_grefg_val.ipynb +0 -0
- make_refcoco/refcocog_google/part4_ref_id.txt +130 -0
- make_refcoco/refcocog_google/revised_refid_part4.json +506 -0
- make_refcoco/refcocog_umd/motion_split_generation.ipynb +0 -0
- make_refcoco/refcocog_umd/part4_ref_id.txt +126 -0
- make_refcoco/refcocog_umd/revised_refid_part4.json +498 -0
- mbench/__init__.py +0 -0
- mbench/__pycache__/transforms_video.cpython-39.pyc +0 -0
- mbench/__pycache__/ytvos_ref.cpython-39.pyc +0 -0
- mbench/check_image.ipynb +0 -0
- mbench/check_image_numbered.ipynb +0 -0
- mbench/check_image_revised.ipynb +164 -0
- mbench/gpt_ref-ytvos-revised.py +428 -0
- mbench/gpt_ref-ytvos.ipynb +0 -0
- mbench/gpt_ref-ytvos.py +302 -0
- mbench/gpt_ref-ytvos_numbered_cy.py +460 -0
- mbench/gpt_ref-ytvos_numbered_cy_sanity.py +643 -0
- mbench/gpt_ref-ytvos_numbered_cy_sanity_2.py +676 -0
- mbench/gpt_test.ipynb +0 -0
- mbench/make_ref-ytvos_json.py +108 -0
- mbench/numbered_captions_gpt-4o_final.json +0 -0
- mbench/numbered_captions_gpt-4o_no_mask_color.json +0 -0
- mbench/numbered_captions_gpt-4o_nomask_randcap.json +0 -0
- mbench/numbered_captions_gpt-4o_randcap.json +0 -0
- mbench/numbered_valid_obj_ids.json +2153 -0
- mbench/numbered_valid_obj_ids_gpt-4o.json +2153 -0
- mbench/numbered_valid_obj_ids_gpt-4o_no_mask_color.json +2153 -0
- mbench/numbered_valid_obj_ids_gpt-4o_nomask_randcap.json +2153 -0
- mbench/numbered_valid_obj_ids_gpt-4o_randcap.json +2153 -0
.history/datasets/__init___20250113130205.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch.utils.data
|
2 |
+
import torchvision
|
3 |
+
|
4 |
+
from .ytvos import build as build_ytvos
|
5 |
+
from .ytvos_ref import build as build_ytvos_ref
|
6 |
+
from .davis import build as build_davis
|
7 |
+
from .a2d import build as build_a2d
|
8 |
+
from .jhmdb import build as build_jhmdb
|
9 |
+
from .refexp import build as build_refexp
|
10 |
+
from .concat_dataset import build as build_joint
|
11 |
+
|
12 |
+
|
13 |
+
def get_coco_api_from_dataset(dataset):
|
14 |
+
for _ in range(10):
|
15 |
+
# if isinstance(dataset, torchvision.datasets.CocoDetection):
|
16 |
+
# break
|
17 |
+
if isinstance(dataset, torch.utils.data.Subset):
|
18 |
+
dataset = dataset.dataset
|
19 |
+
if isinstance(dataset, torchvision.datasets.CocoDetection):
|
20 |
+
return dataset.coco
|
21 |
+
|
22 |
+
|
23 |
+
def build_dataset(dataset_file: str, image_set: str, args):
|
24 |
+
if dataset_file == 'ytvos':
|
25 |
+
return build_ytvos(image_set, args)
|
26 |
+
if dataset_file == 'ytvos_ref':
|
27 |
+
return build_ytvos_ref(image_set, args)
|
28 |
+
if dataset_file == 'davis':
|
29 |
+
return build_davis(image_set, args)
|
30 |
+
if dataset_file == 'a2d':
|
31 |
+
return build_a2d(image_set, args)
|
32 |
+
if dataset_file == 'jhmdb':
|
33 |
+
return build_jhmdb(image_set, args)
|
34 |
+
# for pretraining
|
35 |
+
if dataset_file == "refcoco" or dataset_file == "refcoco+" or dataset_file == "refcocog":
|
36 |
+
return build_refexp(dataset_file, image_set, args)
|
37 |
+
# for joint training of refcoco and ytvos
|
38 |
+
if dataset_file == 'joint':
|
39 |
+
return build_joint(image_set, args)
|
40 |
+
raise ValueError(f'dataset {dataset_file} not supported')
|
.history/datasets/ytvos_20241227174300.py
ADDED
@@ -0,0 +1,246 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Ref-YoutubeVOS data loader
|
3 |
+
"""
|
4 |
+
from pathlib import Path
|
5 |
+
|
6 |
+
import torch
|
7 |
+
from torch.autograd.grad_mode import F
|
8 |
+
from torch.utils.data import Dataset
|
9 |
+
import datasets.transforms_video as T
|
10 |
+
|
11 |
+
import os
|
12 |
+
from PIL import Image
|
13 |
+
import json
|
14 |
+
import numpy as np
|
15 |
+
import random
|
16 |
+
|
17 |
+
from datasets.categories import ytvos_category_dict as category_dict
|
18 |
+
|
19 |
+
|
20 |
+
class YTVOSDataset(Dataset):
|
21 |
+
"""
|
22 |
+
A dataset class for the Refer-Youtube-VOS dataset which was first introduced in the paper:
|
23 |
+
"URVOS: Unified Referring Video Object Segmentation Network with a Large-Scale Benchmark"
|
24 |
+
(see https://link.springer.com/content/pdf/10.1007/978-3-030-58555-6_13.pdf).
|
25 |
+
The original release of the dataset contained both 'first-frame' and 'full-video' expressions. However, the first
|
26 |
+
dataset is not publicly available anymore as now only the harder 'full-video' subset is available to download
|
27 |
+
through the Youtube-VOS referring video object segmentation competition page at:
|
28 |
+
https://competitions.codalab.org/competitions/29139
|
29 |
+
Furthermore, for the competition the subset's original validation set, which consists of 507 videos, was split into
|
30 |
+
two competition 'validation' & 'test' subsets, consisting of 202 and 305 videos respectively. Evaluation can
|
31 |
+
currently only be done on the competition 'validation' subset using the competition's server, as
|
32 |
+
annotations were publicly released only for the 'train' subset of the competition.
|
33 |
+
|
34 |
+
"""
|
35 |
+
def __init__(self, img_folder: Path, ann_file: Path, transforms, return_masks: bool,
|
36 |
+
num_frames: int, max_skip: int):
|
37 |
+
self.img_folder = img_folder
|
38 |
+
self.ann_file = ann_file
|
39 |
+
self._transforms = transforms
|
40 |
+
self.return_masks = return_masks # not used
|
41 |
+
self.num_frames = num_frames
|
42 |
+
self.max_skip = max_skip
|
43 |
+
# create video meta data
|
44 |
+
self.prepare_metas()
|
45 |
+
|
46 |
+
print('\n video num: ', len(self.videos), ' clip num: ', len(self.metas))
|
47 |
+
print('\n')
|
48 |
+
|
49 |
+
def prepare_metas(self):
|
50 |
+
# read object information
|
51 |
+
with open(os.path.join(str(self.img_folder), 'meta.json'), 'r') as f:
|
52 |
+
subset_metas_by_video = json.load(f)['videos']
|
53 |
+
|
54 |
+
# read expression data
|
55 |
+
with open(str(self.ann_file), 'r') as f:
|
56 |
+
subset_expressions_by_video = json.load(f)['videos']
|
57 |
+
self.videos = list(subset_expressions_by_video.keys())
|
58 |
+
|
59 |
+
self.metas = []
|
60 |
+
for vid in self.videos:
|
61 |
+
vid_meta = subset_metas_by_video[vid]
|
62 |
+
vid_data = subset_expressions_by_video[vid]
|
63 |
+
vid_frames = sorted(vid_data['frames'])
|
64 |
+
vid_len = len(vid_frames)
|
65 |
+
for exp_id, exp_dict in vid_data['expressions'].items():
|
66 |
+
for frame_id in range(0, vid_len, self.num_frames):
|
67 |
+
meta = {}
|
68 |
+
meta['video'] = vid
|
69 |
+
meta['exp'] = exp_dict['exp']
|
70 |
+
meta['obj_id'] = int(exp_dict['obj_id'])
|
71 |
+
meta['frames'] = vid_frames
|
72 |
+
meta['frame_id'] = frame_id
|
73 |
+
# get object category
|
74 |
+
obj_id = exp_dict['obj_id']
|
75 |
+
meta['category'] = vid_meta['objects'][obj_id]['category']
|
76 |
+
self.metas.append(meta)
|
77 |
+
|
78 |
+
@staticmethod
|
79 |
+
def bounding_box(img):
|
80 |
+
rows = np.any(img, axis=1)
|
81 |
+
cols = np.any(img, axis=0)
|
82 |
+
rmin, rmax = np.where(rows)[0][[0, -1]]
|
83 |
+
cmin, cmax = np.where(cols)[0][[0, -1]]
|
84 |
+
return rmin, rmax, cmin, cmax # y1, y2, x1, x2
|
85 |
+
|
86 |
+
def __len__(self):
|
87 |
+
return len(self.metas)
|
88 |
+
|
89 |
+
def __getitem__(self, idx):
|
90 |
+
instance_check = False
|
91 |
+
while not instance_check:
|
92 |
+
meta = self.metas[idx] # dict
|
93 |
+
|
94 |
+
video, exp, obj_id, category, frames, frame_id = \
|
95 |
+
meta['video'], meta['exp'], meta['obj_id'], meta['category'], meta['frames'], meta['frame_id']
|
96 |
+
# clean up the caption
|
97 |
+
exp = " ".join(exp.lower().split())
|
98 |
+
category_id = category_dict[category]
|
99 |
+
vid_len = len(frames)
|
100 |
+
|
101 |
+
num_frames = self.num_frames
|
102 |
+
# random sparse sample
|
103 |
+
sample_indx = [frame_id]
|
104 |
+
if self.num_frames != 1:
|
105 |
+
# local sample
|
106 |
+
sample_id_before = random.randint(1, 3)
|
107 |
+
sample_id_after = random.randint(1, 3)
|
108 |
+
local_indx = [max(0, frame_id - sample_id_before), min(vid_len - 1, frame_id + sample_id_after)]
|
109 |
+
sample_indx.extend(local_indx)
|
110 |
+
sample_indx = list(set(sample_indx))
|
111 |
+
|
112 |
+
# global sampling
|
113 |
+
if num_frames > 3:
|
114 |
+
all_inds = list(range(vid_len))
|
115 |
+
global_inds = all_inds[:min(sample_indx)] + all_inds[max(sample_indx):]
|
116 |
+
global_n = num_frames - len(sample_indx)
|
117 |
+
if len(global_inds) > global_n:
|
118 |
+
select_id = random.sample(range(len(global_inds)), global_n)
|
119 |
+
for s_id in select_id:
|
120 |
+
sample_indx.append(global_inds[s_id])
|
121 |
+
elif vid_len >=global_n: # sample long range global frames
|
122 |
+
select_id = random.sample(range(vid_len), global_n)
|
123 |
+
for s_id in select_id:
|
124 |
+
sample_indx.append(all_inds[s_id])
|
125 |
+
else:
|
126 |
+
select_id = random.sample(range(vid_len), global_n - vid_len) + list(range(vid_len))
|
127 |
+
for s_id in select_id:
|
128 |
+
sample_indx.append(all_inds[s_id])
|
129 |
+
sample_indx.sort()
|
130 |
+
|
131 |
+
# read frames and masks
|
132 |
+
imgs, labels, boxes, masks, valid = [], [], [], [], []
|
133 |
+
for j in range(self.num_frames):
|
134 |
+
frame_indx = sample_indx[j]
|
135 |
+
frame_name = frames[frame_indx]
|
136 |
+
img_path = os.path.join(str(self.img_folder), 'JPEGImages', video, frame_name + '.jpg')
|
137 |
+
mask_path = os.path.join(str(self.img_folder), 'Annotations', video, frame_name + '.png')
|
138 |
+
img = Image.open(img_path).convert('RGB')
|
139 |
+
mask = Image.open(mask_path).convert('P')
|
140 |
+
|
141 |
+
# create the target
|
142 |
+
label = torch.tensor(category_id)
|
143 |
+
mask = np.array(mask)
|
144 |
+
mask = (mask==obj_id).astype(np.float32) # 0,1 binary
|
145 |
+
if (mask > 0).any():
|
146 |
+
y1, y2, x1, x2 = self.bounding_box(mask)
|
147 |
+
box = torch.tensor([x1, y1, x2, y2]).to(torch.float)
|
148 |
+
valid.append(1)
|
149 |
+
else: # some frame didn't contain the instance
|
150 |
+
box = torch.tensor([0, 0, 0, 0]).to(torch.float)
|
151 |
+
valid.append(0)
|
152 |
+
mask = torch.from_numpy(mask)
|
153 |
+
|
154 |
+
# append
|
155 |
+
imgs.append(img)
|
156 |
+
labels.append(label)
|
157 |
+
masks.append(mask)
|
158 |
+
boxes.append(box)
|
159 |
+
|
160 |
+
# transform
|
161 |
+
w, h = img.size
|
162 |
+
labels = torch.stack(labels, dim=0)
|
163 |
+
boxes = torch.stack(boxes, dim=0)
|
164 |
+
boxes[:, 0::2].clamp_(min=0, max=w)
|
165 |
+
boxes[:, 1::2].clamp_(min=0, max=h)
|
166 |
+
masks = torch.stack(masks, dim=0)
|
167 |
+
target = {
|
168 |
+
'frames_idx': torch.tensor(sample_indx), # [T,]
|
169 |
+
'labels': labels, # [T,]
|
170 |
+
'boxes': boxes, # [T, 4], xyxy
|
171 |
+
'masks': masks, # [T, H, W]
|
172 |
+
'valid': torch.tensor(valid), # [T,]
|
173 |
+
'caption': exp,
|
174 |
+
'orig_size': torch.as_tensor([int(h), int(w)]),
|
175 |
+
'size': torch.as_tensor([int(h), int(w)])
|
176 |
+
}
|
177 |
+
|
178 |
+
# "boxes" normalize to [0, 1] and transform from xyxy to cxcywh in self._transform
|
179 |
+
if self._transforms:
|
180 |
+
imgs, target = self._transforms(imgs, target)
|
181 |
+
imgs = torch.stack(imgs, dim=0) # [T, 3, H, W]
|
182 |
+
else:
|
183 |
+
imgs = np.array(imgs)
|
184 |
+
imgs = torch.tensor(imgs.transpose(0, 3, 1, 2))
|
185 |
+
|
186 |
+
|
187 |
+
# FIXME: handle "valid", since some box may be removed due to random crop
|
188 |
+
if torch.any(target['valid'] == 1): # at leatst one instance
|
189 |
+
instance_check = True
|
190 |
+
else:
|
191 |
+
idx = random.randint(0, self.__len__() - 1)
|
192 |
+
|
193 |
+
return imgs, target
|
194 |
+
|
195 |
+
|
196 |
+
def make_coco_transforms(image_set, max_size=640):
|
197 |
+
normalize = T.Compose([
|
198 |
+
T.ToTensor(),
|
199 |
+
T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
200 |
+
])
|
201 |
+
|
202 |
+
scales = [288, 320, 352, 392, 416, 448, 480, 512]
|
203 |
+
|
204 |
+
if image_set == 'train':
|
205 |
+
return T.Compose([
|
206 |
+
T.RandomHorizontalFlip(),
|
207 |
+
T.PhotometricDistort(),
|
208 |
+
T.RandomSelect(
|
209 |
+
T.Compose([
|
210 |
+
T.RandomResize(scales, max_size=max_size),
|
211 |
+
T.Check(),
|
212 |
+
]),
|
213 |
+
T.Compose([
|
214 |
+
T.RandomResize([400, 500, 600]),
|
215 |
+
T.RandomSizeCrop(384, 600),
|
216 |
+
T.RandomResize(scales, max_size=max_size),
|
217 |
+
T.Check(),
|
218 |
+
])
|
219 |
+
),
|
220 |
+
normalize,
|
221 |
+
])
|
222 |
+
|
223 |
+
# we do not use the 'val' set since the annotations are inaccessible
|
224 |
+
if image_set == 'val':
|
225 |
+
return T.Compose([
|
226 |
+
T.RandomResize([360], max_size=640),
|
227 |
+
normalize,
|
228 |
+
])
|
229 |
+
|
230 |
+
raise ValueError(f'unknown {image_set}')
|
231 |
+
|
232 |
+
|
233 |
+
def build(image_set, args):
|
234 |
+
root = Path(args.ytvos_path)
|
235 |
+
assert root.exists(), f'provided YTVOS path {root} does not exist'
|
236 |
+
PATHS = {
|
237 |
+
"train": (root / "train", root / "meta_expressions" / "train" / "meta_expressions.json"),
|
238 |
+
"val": (root / "valid", root / "meta_expressions" / "valid" / "meta_expressions.json"), # not used actually
|
239 |
+
}
|
240 |
+
img_folder, ann_file = PATHS[image_set]
|
241 |
+
# dataset = YTVOSDataset(img_folder, ann_file, transforms=make_coco_transforms(image_set, max_size=args.max_size), return_masks=args.masks,
|
242 |
+
# num_frames=args.num_frames, max_skip=args.max_skip)
|
243 |
+
dataset = YTVOSDataset(img_folder, ann_file, transforms=None, return_masks=args.masks,
|
244 |
+
num_frames=args.num_frames, max_skip=args.max_skip)
|
245 |
+
return dataset
|
246 |
+
|
.history/datasets/ytvos_20250113131154.py
ADDED
@@ -0,0 +1,246 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Ref-YoutubeVOS data loader
|
3 |
+
"""
|
4 |
+
from pathlib import Path
|
5 |
+
|
6 |
+
import torch
|
7 |
+
from torch.autograd.grad_mode import F
|
8 |
+
from torch.utils.data import Dataset
|
9 |
+
import datasets.transforms_video as T
|
10 |
+
|
11 |
+
import os
|
12 |
+
from PIL import Image
|
13 |
+
import json
|
14 |
+
import numpy as np
|
15 |
+
import random
|
16 |
+
|
17 |
+
from datasets.categories import ytvos_category_dict as category_dict
|
18 |
+
|
19 |
+
|
20 |
+
class YTVOSDataset(Dataset):
|
21 |
+
"""
|
22 |
+
A dataset class for the Refer-Youtube-VOS dataset which was first introduced in the paper:
|
23 |
+
"URVOS: Unified Referring Video Object Segmentation Network with a Large-Scale Benchmark"
|
24 |
+
(see https://link.springer.com/content/pdf/10.1007/978-3-030-58555-6_13.pdf).
|
25 |
+
The original release of the dataset contained both 'first-frame' and 'full-video' expressions. However, the first
|
26 |
+
dataset is not publicly available anymore as now only the harder 'full-video' subset is available to download
|
27 |
+
through the Youtube-VOS referring video object segmentation competition page at:
|
28 |
+
https://competitions.codalab.org/competitions/29139
|
29 |
+
Furthermore, for the competition the subset's original validation set, which consists of 507 videos, was split into
|
30 |
+
two competition 'validation' & 'test' subsets, consisting of 202 and 305 videos respectively. Evaluation can
|
31 |
+
currently only be done on the competition 'validation' subset using the competition's server, as
|
32 |
+
annotations were publicly released only for the 'train' subset of the competition.
|
33 |
+
|
34 |
+
"""
|
35 |
+
def __init__(self, img_folder: Path, ann_file: Path, transforms, return_masks: bool,
|
36 |
+
num_frames: int, max_skip: int):
|
37 |
+
self.img_folder = img_folder
|
38 |
+
self.ann_file = ann_file
|
39 |
+
self._transforms = transforms
|
40 |
+
self.return_masks = return_masks # not used
|
41 |
+
self.num_frames = num_frames
|
42 |
+
self.max_skip = max_skip
|
43 |
+
# create video meta data
|
44 |
+
self.prepare_metas()
|
45 |
+
|
46 |
+
print('\n video num: ', len(self.videos), ' clip num: ', len(self.metas))
|
47 |
+
print('\n')
|
48 |
+
|
49 |
+
def prepare_metas(self):
|
50 |
+
# read object information
|
51 |
+
with open(os.path.join(str(self.img_folder), 'meta.json'), 'r') as f:
|
52 |
+
subset_metas_by_video = json.load(f)['videos']
|
53 |
+
|
54 |
+
# read expression data
|
55 |
+
with open(str(self.ann_file), 'r') as f:
|
56 |
+
subset_expressions_by_video = json.load(f)['videos']
|
57 |
+
self.videos = list(subset_expressions_by_video.keys())
|
58 |
+
|
59 |
+
self.metas = []
|
60 |
+
for vid in self.videos:
|
61 |
+
vid_meta = subset_metas_by_video[vid]
|
62 |
+
vid_data = subset_expressions_by_video[vid]
|
63 |
+
vid_frames = sorted(vid_data['frames'])
|
64 |
+
vid_len = len(vid_frames)
|
65 |
+
for exp_id, exp_dict in vid_data['expressions'].items():
|
66 |
+
for frame_id in range(0, vid_len, self.num_frames):
|
67 |
+
meta = {}
|
68 |
+
meta['video'] = vid
|
69 |
+
meta['exp'] = exp_dict['exp']
|
70 |
+
meta['obj_id'] = int(exp_dict['obj_id'])
|
71 |
+
meta['frames'] = vid_frames
|
72 |
+
meta['frame_id'] = frame_id
|
73 |
+
# get object category
|
74 |
+
obj_id =
|
75 |
+
meta['category'] = vid_meta['objects'][obj_id]['category']
|
76 |
+
self.metas.append(meta)
|
77 |
+
|
78 |
+
@staticmethod
|
79 |
+
def bounding_box(img):
|
80 |
+
rows = np.any(img, axis=1)
|
81 |
+
cols = np.any(img, axis=0)
|
82 |
+
rmin, rmax = np.where(rows)[0][[0, -1]]
|
83 |
+
cmin, cmax = np.where(cols)[0][[0, -1]]
|
84 |
+
return rmin, rmax, cmin, cmax # y1, y2, x1, x2
|
85 |
+
|
86 |
+
def __len__(self):
|
87 |
+
return len(self.metas)
|
88 |
+
|
89 |
+
def __getitem__(self, idx):
|
90 |
+
instance_check = False
|
91 |
+
while not instance_check:
|
92 |
+
meta = self.metas[idx] # dict
|
93 |
+
exp_dict['obj_id']
|
94 |
+
video, exp, obj_id, category, frames, frame_id = \
|
95 |
+
meta['video'], meta['exp'], meta['obj_id'], meta['category'], meta['frames'], meta['frame_id']
|
96 |
+
# clean up the caption
|
97 |
+
exp = " ".join(exp.lower().split())
|
98 |
+
category_id = category_dict[category]
|
99 |
+
vid_len = len(frames)
|
100 |
+
|
101 |
+
num_frames = self.num_frames
|
102 |
+
# random sparse sample
|
103 |
+
sample_indx = [frame_id]
|
104 |
+
if self.num_frames != 1:
|
105 |
+
# local sample
|
106 |
+
sample_id_before = random.randint(1, 3)
|
107 |
+
sample_id_after = random.randint(1, 3)
|
108 |
+
local_indx = [max(0, frame_id - sample_id_before), min(vid_len - 1, frame_id + sample_id_after)]
|
109 |
+
sample_indx.extend(local_indx)
|
110 |
+
sample_indx = list(set(sample_indx))
|
111 |
+
|
112 |
+
# global sampling
|
113 |
+
if num_frames > 3:
|
114 |
+
all_inds = list(range(vid_len))
|
115 |
+
global_inds = all_inds[:min(sample_indx)] + all_inds[max(sample_indx):]
|
116 |
+
global_n = num_frames - len(sample_indx)
|
117 |
+
if len(global_inds) > global_n:
|
118 |
+
select_id = random.sample(range(len(global_inds)), global_n)
|
119 |
+
for s_id in select_id:
|
120 |
+
sample_indx.append(global_inds[s_id])
|
121 |
+
elif vid_len >=global_n: # sample long range global frames
|
122 |
+
select_id = random.sample(range(vid_len), global_n)
|
123 |
+
for s_id in select_id:
|
124 |
+
sample_indx.append(all_inds[s_id])
|
125 |
+
else:
|
126 |
+
select_id = random.sample(range(vid_len), global_n - vid_len) + list(range(vid_len))
|
127 |
+
for s_id in select_id:
|
128 |
+
sample_indx.append(all_inds[s_id])
|
129 |
+
sample_indx.sort()
|
130 |
+
|
131 |
+
# read frames and masks
|
132 |
+
imgs, labels, boxes, masks, valid = [], [], [], [], []
|
133 |
+
for j in range(self.num_frames):
|
134 |
+
frame_indx = sample_indx[j]
|
135 |
+
frame_name = frames[frame_indx]
|
136 |
+
img_path = os.path.join(str(self.img_folder), 'JPEGImages', video, frame_name + '.jpg')
|
137 |
+
mask_path = os.path.join(str(self.img_folder), 'Annotations', video, frame_name + '.png')
|
138 |
+
img = Image.open(img_path).convert('RGB')
|
139 |
+
mask = Image.open(mask_path).convert('P')
|
140 |
+
|
141 |
+
# create the target
|
142 |
+
label = torch.tensor(category_id)
|
143 |
+
mask = np.array(mask)
|
144 |
+
mask = (mask==obj_id).astype(np.float32) # 0,1 binary
|
145 |
+
if (mask > 0).any():
|
146 |
+
y1, y2, x1, x2 = self.bounding_box(mask)
|
147 |
+
box = torch.tensor([x1, y1, x2, y2]).to(torch.float)
|
148 |
+
valid.append(1)
|
149 |
+
else: # some frame didn't contain the instance
|
150 |
+
box = torch.tensor([0, 0, 0, 0]).to(torch.float)
|
151 |
+
valid.append(0)
|
152 |
+
mask = torch.from_numpy(mask)
|
153 |
+
|
154 |
+
# append
|
155 |
+
imgs.append(img)
|
156 |
+
labels.append(label)
|
157 |
+
masks.append(mask)
|
158 |
+
boxes.append(box)
|
159 |
+
|
160 |
+
# transform
|
161 |
+
w, h = img.size
|
162 |
+
labels = torch.stack(labels, dim=0)
|
163 |
+
boxes = torch.stack(boxes, dim=0)
|
164 |
+
boxes[:, 0::2].clamp_(min=0, max=w)
|
165 |
+
boxes[:, 1::2].clamp_(min=0, max=h)
|
166 |
+
masks = torch.stack(masks, dim=0)
|
167 |
+
target = {
|
168 |
+
'frames_idx': torch.tensor(sample_indx), # [T,]
|
169 |
+
'labels': labels, # [T,]
|
170 |
+
'boxes': boxes, # [T, 4], xyxy
|
171 |
+
'masks': masks, # [T, H, W]
|
172 |
+
'valid': torch.tensor(valid), # [T,]
|
173 |
+
'caption': exp,
|
174 |
+
'orig_size': torch.as_tensor([int(h), int(w)]),
|
175 |
+
'size': torch.as_tensor([int(h), int(w)])
|
176 |
+
}
|
177 |
+
|
178 |
+
# "boxes" normalize to [0, 1] and transform from xyxy to cxcywh in self._transform
|
179 |
+
if self._transforms:
|
180 |
+
imgs, target = self._transforms(imgs, target)
|
181 |
+
imgs = torch.stack(imgs, dim=0) # [T, 3, H, W]
|
182 |
+
else:
|
183 |
+
imgs = np.array(imgs)
|
184 |
+
imgs = torch.tensor(imgs.transpose(0, 3, 1, 2))
|
185 |
+
|
186 |
+
|
187 |
+
# FIXME: handle "valid", since some box may be removed due to random crop
|
188 |
+
if torch.any(target['valid'] == 1): # at leatst one instance
|
189 |
+
instance_check = True
|
190 |
+
else:
|
191 |
+
idx = random.randint(0, self.__len__() - 1)
|
192 |
+
|
193 |
+
return imgs, target
|
194 |
+
|
195 |
+
|
196 |
+
def make_coco_transforms(image_set, max_size=640):
|
197 |
+
normalize = T.Compose([
|
198 |
+
T.ToTensor(),
|
199 |
+
T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
200 |
+
])
|
201 |
+
|
202 |
+
scales = [288, 320, 352, 392, 416, 448, 480, 512]
|
203 |
+
|
204 |
+
if image_set == 'train':
|
205 |
+
return T.Compose([
|
206 |
+
T.RandomHorizontalFlip(),
|
207 |
+
T.PhotometricDistort(),
|
208 |
+
T.RandomSelect(
|
209 |
+
T.Compose([
|
210 |
+
T.RandomResize(scales, max_size=max_size),
|
211 |
+
T.Check(),
|
212 |
+
]),
|
213 |
+
T.Compose([
|
214 |
+
T.RandomResize([400, 500, 600]),
|
215 |
+
T.RandomSizeCrop(384, 600),
|
216 |
+
T.RandomResize(scales, max_size=max_size),
|
217 |
+
T.Check(),
|
218 |
+
])
|
219 |
+
),
|
220 |
+
normalize,
|
221 |
+
])
|
222 |
+
|
223 |
+
# we do not use the 'val' set since the annotations are inaccessible
|
224 |
+
if image_set == 'val':
|
225 |
+
return T.Compose([
|
226 |
+
T.RandomResize([360], max_size=640),
|
227 |
+
normalize,
|
228 |
+
])
|
229 |
+
|
230 |
+
raise ValueError(f'unknown {image_set}')
|
231 |
+
|
232 |
+
|
233 |
+
def build(image_set, args):
|
234 |
+
root = Path(args.ytvos_path)
|
235 |
+
assert root.exists(), f'provided YTVOS path {root} does not exist'
|
236 |
+
PATHS = {
|
237 |
+
"train": (root / "train", root / "meta_expressions" / "train" / "meta_expressions.json"),
|
238 |
+
"val": (root / "valid", root / "meta_expressions" / "valid" / "meta_expressions.json"), # not used actually
|
239 |
+
}
|
240 |
+
img_folder, ann_file = PATHS[image_set]
|
241 |
+
# dataset = YTVOSDataset(img_folder, ann_file, transforms=make_coco_transforms(image_set, max_size=args.max_size), return_masks=args.masks,
|
242 |
+
# num_frames=args.num_frames, max_skip=args.max_skip)
|
243 |
+
dataset = YTVOSDataset(img_folder, ann_file, transforms=None, return_masks=args.masks,
|
244 |
+
num_frames=args.num_frames, max_skip=args.max_skip)
|
245 |
+
return dataset
|
246 |
+
|
.history/datasets/ytvos_20250113131303.py
ADDED
@@ -0,0 +1,246 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Ref-YoutubeVOS data loader
|
3 |
+
"""
|
4 |
+
from pathlib import Path
|
5 |
+
|
6 |
+
import torch
|
7 |
+
from torch.autograd.grad_mode import F
|
8 |
+
from torch.utils.data import Dataset
|
9 |
+
import datasets.transforms_video as T
|
10 |
+
|
11 |
+
import os
|
12 |
+
from PIL import Image
|
13 |
+
import json
|
14 |
+
import numpy as np
|
15 |
+
import random
|
16 |
+
|
17 |
+
from datasets.categories import ytvos_category_dict as category_dict
|
18 |
+
|
19 |
+
|
20 |
+
class YTVOSDataset(Dataset):
|
21 |
+
"""
|
22 |
+
A dataset class for the Refer-Youtube-VOS dataset which was first introduced in the paper:
|
23 |
+
"URVOS: Unified Referring Video Object Segmentation Network with a Large-Scale Benchmark"
|
24 |
+
(see https://link.springer.com/content/pdf/10.1007/978-3-030-58555-6_13.pdf).
|
25 |
+
The original release of the dataset contained both 'first-frame' and 'full-video' expressions. However, the first
|
26 |
+
dataset is not publicly available anymore as now only the harder 'full-video' subset is available to download
|
27 |
+
through the Youtube-VOS referring video object segmentation competition page at:
|
28 |
+
https://competitions.codalab.org/competitions/29139
|
29 |
+
Furthermore, for the competition the subset's original validation set, which consists of 507 videos, was split into
|
30 |
+
two competition 'validation' & 'test' subsets, consisting of 202 and 305 videos respectively. Evaluation can
|
31 |
+
currently only be done on the competition 'validation' subset using the competition's server, as
|
32 |
+
annotations were publicly released only for the 'train' subset of the competition.
|
33 |
+
|
34 |
+
"""
|
35 |
+
def __init__(self, img_folder: Path, ann_file: Path, transforms, return_masks: bool,
|
36 |
+
num_frames: int, max_skip: int):
|
37 |
+
self.img_folder = img_folder
|
38 |
+
self.ann_file = ann_file
|
39 |
+
self._transforms = transforms
|
40 |
+
self.return_masks = return_masks # not used
|
41 |
+
self.num_frames = num_frames
|
42 |
+
self.max_skip = max_skip
|
43 |
+
# create video meta data
|
44 |
+
self.prepare_metas()
|
45 |
+
|
46 |
+
print('\n video num: ', len(self.videos), ' clip num: ', len(self.metas))
|
47 |
+
print('\n')
|
48 |
+
|
49 |
+
def prepare_metas(self):
|
50 |
+
# read object information
|
51 |
+
with open(os.path.join(str(self.img_folder), 'meta.json'), 'r') as f:
|
52 |
+
subset_metas_by_video = json.load(f)['videos']
|
53 |
+
|
54 |
+
# read expression data
|
55 |
+
with open(str(self.ann_file), 'r') as f:
|
56 |
+
subset_expressions_by_video = json.load(f)['videos']
|
57 |
+
self.videos = list(subset_expressions_by_video.keys())
|
58 |
+
|
59 |
+
self.metas = []
|
60 |
+
for vid in self.videos:
|
61 |
+
vid_meta = subset_metas_by_video[vid]
|
62 |
+
vid_data = subset_expressions_by_video[vid]
|
63 |
+
vid_frames = sorted(vid_data['frames'])
|
64 |
+
vid_len = len(vid_frames)
|
65 |
+
for exp_id, exp_dict in vid_data['expressions'].items():
|
66 |
+
for frame_id in range(0, vid_len, self.num_frames):
|
67 |
+
meta = {}
|
68 |
+
meta['video'] = vid
|
69 |
+
meta['exp'] = exp_dict['exp']
|
70 |
+
meta['obj_id'] = int(exp_dict['obj_id'])
|
71 |
+
meta['frames'] = vid_frames
|
72 |
+
meta['frame_id'] = frame_id
|
73 |
+
# get object category
|
74 |
+
obj_id = exp_dict['obj_id']
|
75 |
+
meta['category'] = vid_meta['objects'][obj_id]['category']
|
76 |
+
self.metas.append(meta)
|
77 |
+
|
78 |
+
@staticmethod
|
79 |
+
def bounding_box(img):
|
80 |
+
rows = np.any(img, axis=1)
|
81 |
+
cols = np.any(img, axis=0)
|
82 |
+
rmin, rmax = np.where(rows)[0][[0, -1]]
|
83 |
+
cmin, cmax = np.where(cols)[0][[0, -1]]
|
84 |
+
return rmin, rmax, cmin, cmax # y1, y2, x1, x2
|
85 |
+
|
86 |
+
def __len__(self):
|
87 |
+
return len(self.metas)
|
88 |
+
|
89 |
+
def __getitem__(self, idx):
|
90 |
+
instance_check = False
|
91 |
+
while not instance_check:
|
92 |
+
meta = self.metas[idx] # dict
|
93 |
+
|
94 |
+
video, exp, obj_id, category, frames, frame_id = \
|
95 |
+
meta['video'], meta['exp'], meta['obj_id'], meta['category'], meta['frames'], meta['frame_id']
|
96 |
+
# clean up the caption
|
97 |
+
exp = " ".join(exp.lower().split())
|
98 |
+
category_id = category_dict[category]
|
99 |
+
vid_len = len(frames)
|
100 |
+
|
101 |
+
num_frames = self.num_frames
|
102 |
+
# random sparse sample
|
103 |
+
sample_indx = [frame_id]
|
104 |
+
if self.num_frames != 1:
|
105 |
+
# local sample
|
106 |
+
sample_id_before = random.randint(1, 3)
|
107 |
+
sample_id_after = random.randint(1, 3)
|
108 |
+
local_indx = [max(0, frame_id - sample_id_before), min(vid_len - 1, frame_id + sample_id_after)]
|
109 |
+
sample_indx.extend(local_indx)
|
110 |
+
sample_indx = list(set(sample_indx))
|
111 |
+
|
112 |
+
# global sampling
|
113 |
+
if num_frames > 3:
|
114 |
+
all_inds = list(range(vid_len))
|
115 |
+
global_inds = all_inds[:min(sample_indx)] + all_inds[max(sample_indx):]
|
116 |
+
global_n = num_frames - len(sample_indx)
|
117 |
+
if len(global_inds) > global_n:
|
118 |
+
select_id = random.sample(range(len(global_inds)), global_n)
|
119 |
+
for s_id in select_id:
|
120 |
+
sample_indx.append(global_inds[s_id])
|
121 |
+
elif vid_len >=global_n: # sample long range global frames
|
122 |
+
select_id = random.sample(range(vid_len), global_n)
|
123 |
+
for s_id in select_id:
|
124 |
+
sample_indx.append(all_inds[s_id])
|
125 |
+
else:
|
126 |
+
select_id = random.sample(range(vid_len), global_n - vid_len) + list(range(vid_len))
|
127 |
+
for s_id in select_id:
|
128 |
+
sample_indx.append(all_inds[s_id])
|
129 |
+
sample_indx.sort()
|
130 |
+
|
131 |
+
# read frames and masks
|
132 |
+
imgs, labels, boxes, masks, valid = [], [], [], [], []
|
133 |
+
for j in range(self.num_frames):
|
134 |
+
frame_indx = sample_indx[j]
|
135 |
+
frame_name = frames[frame_indx]
|
136 |
+
img_path = os.path.join(str(self.img_folder), 'JPEGImages', video, frame_name + '.jpg')
|
137 |
+
mask_path = os.path.join(str(self.img_folder), 'Annotations', video, frame_name + '.png')
|
138 |
+
img = Image.open(img_path).convert('RGB')
|
139 |
+
mask = Image.open(mask_path).convert('P')
|
140 |
+
|
141 |
+
# create the target
|
142 |
+
label = torch.tensor(category_id)
|
143 |
+
mask = np.array(mask)
|
144 |
+
mask = (mask==obj_id).astype(np.float32) # 0,1 binary
|
145 |
+
if (mask > 0).any():
|
146 |
+
y1, y2, x1, x2 = self.bounding_box(mask)
|
147 |
+
box = torch.tensor([x1, y1, x2, y2]).to(torch.float)
|
148 |
+
valid.append(1)
|
149 |
+
else: # some frame didn't contain the instance
|
150 |
+
box = torch.tensor([0, 0, 0, 0]).to(torch.float)
|
151 |
+
valid.append(0)
|
152 |
+
mask = torch.from_numpy(mask)
|
153 |
+
|
154 |
+
# append
|
155 |
+
imgs.append(img)
|
156 |
+
labels.append(label)
|
157 |
+
masks.append(mask)
|
158 |
+
boxes.append(box)
|
159 |
+
|
160 |
+
# transform
|
161 |
+
w, h = img.size
|
162 |
+
labels = torch.stack(labels, dim=0)
|
163 |
+
boxes = torch.stack(boxes, dim=0)
|
164 |
+
boxes[:, 0::2].clamp_(min=0, max=w)
|
165 |
+
boxes[:, 1::2].clamp_(min=0, max=h)
|
166 |
+
masks = torch.stack(masks, dim=0)
|
167 |
+
target = {
|
168 |
+
'frames_idx': torch.tensor(sample_indx), # [T,]
|
169 |
+
'labels': labels, # [T,]
|
170 |
+
'boxes': boxes, # [T, 4], xyxy
|
171 |
+
'masks': masks, # [T, H, W]
|
172 |
+
'valid': torch.tensor(valid), # [T,]
|
173 |
+
'caption': exp,
|
174 |
+
'orig_size': torch.as_tensor([int(h), int(w)]),
|
175 |
+
'size': torch.as_tensor([int(h), int(w)])
|
176 |
+
}
|
177 |
+
|
178 |
+
# "boxes" normalize to [0, 1] and transform from xyxy to cxcywh in self._transform
|
179 |
+
if self._transforms:
|
180 |
+
imgs, target = self._transforms(imgs, target)
|
181 |
+
imgs = torch.stack(imgs, dim=0) # [T, 3, H, W]
|
182 |
+
else:
|
183 |
+
imgs = np.array(imgs)
|
184 |
+
imgs = torch.tensor(imgs.transpose(0, 3, 1, 2))
|
185 |
+
|
186 |
+
|
187 |
+
# FIXME: handle "valid", since some box may be removed due to random crop
|
188 |
+
if torch.any(target['valid'] == 1): # at leatst one instance
|
189 |
+
instance_check = True
|
190 |
+
else:
|
191 |
+
idx = random.randint(0, self.__len__() - 1)
|
192 |
+
|
193 |
+
return imgs, target
|
194 |
+
|
195 |
+
|
196 |
+
def make_coco_transforms(image_set, max_size=640):
|
197 |
+
normalize = T.Compose([
|
198 |
+
T.ToTensor(),
|
199 |
+
T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
200 |
+
])
|
201 |
+
|
202 |
+
scales = [288, 320, 352, 392, 416, 448, 480, 512]
|
203 |
+
|
204 |
+
if image_set == 'train':
|
205 |
+
return T.Compose([
|
206 |
+
T.RandomHorizontalFlip(),
|
207 |
+
T.PhotometricDistort(),
|
208 |
+
T.RandomSelect(
|
209 |
+
T.Compose([
|
210 |
+
T.RandomResize(scales, max_size=max_size),
|
211 |
+
T.Check(),
|
212 |
+
]),
|
213 |
+
T.Compose([
|
214 |
+
T.RandomResize([400, 500, 600]),
|
215 |
+
T.RandomSizeCrop(384, 600),
|
216 |
+
T.RandomResize(scales, max_size=max_size),
|
217 |
+
T.Check(),
|
218 |
+
])
|
219 |
+
),
|
220 |
+
normalize,
|
221 |
+
])
|
222 |
+
|
223 |
+
# we do not use the 'val' set since the annotations are inaccessible
|
224 |
+
if image_set == 'val':
|
225 |
+
return T.Compose([
|
226 |
+
T.RandomResize([360], max_size=640),
|
227 |
+
normalize,
|
228 |
+
])
|
229 |
+
|
230 |
+
raise ValueError(f'unknown {image_set}')
|
231 |
+
|
232 |
+
|
233 |
+
def build(image_set, args):
|
234 |
+
root = Path(args.ytvos_path)
|
235 |
+
assert root.exists(), f'provided YTVOS path {root} does not exist'
|
236 |
+
PATHS = {
|
237 |
+
"train": (root / "train", root / "meta_expressions" / "train" / "meta_expressions.json"),
|
238 |
+
"val": (root / "valid", root / "meta_expressions" / "valid" / "meta_expressions.json"), # not used actually
|
239 |
+
}
|
240 |
+
img_folder, ann_file = PATHS[image_set]
|
241 |
+
# dataset = YTVOSDataset(img_folder, ann_file, transforms=make_coco_transforms(image_set, max_size=args.max_size), return_masks=args.masks,
|
242 |
+
# num_frames=args.num_frames, max_skip=args.max_skip)
|
243 |
+
dataset = YTVOSDataset(img_folder, ann_file, transforms=None, return_masks=args.masks,
|
244 |
+
num_frames=args.num_frames, max_skip=args.max_skip)
|
245 |
+
return dataset
|
246 |
+
|
.history/datasets/ytvos_ref_20250113130047.py
ADDED
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Ref-YoutubeVOS data loader
|
3 |
+
"""
|
4 |
+
from pathlib import Path
|
5 |
+
|
6 |
+
import torch
|
7 |
+
from torch.autograd.grad_mode import F
|
8 |
+
from torch.utils.data import Dataset
|
9 |
+
import datasets.transforms_video as T
|
10 |
+
|
11 |
+
import os
|
12 |
+
from PIL import Image
|
13 |
+
import json
|
14 |
+
import numpy as np
|
15 |
+
import random
|
16 |
+
|
17 |
+
from datasets.categories import ytvos_category_dict as category_dict
|
18 |
+
|
19 |
+
|
20 |
+
class YTVOSDataset(Dataset):
|
21 |
+
"""
|
22 |
+
A dataset class for the Refer-Youtube-VOS dataset which was first introduced in the paper:
|
23 |
+
"URVOS: Unified Referring Video Object Segmentation Network with a Large-Scale Benchmark"
|
24 |
+
(see https://link.springer.com/content/pdf/10.1007/978-3-030-58555-6_13.pdf).
|
25 |
+
The original release of the dataset contained both 'first-frame' and 'full-video' expressions. However, the first
|
26 |
+
dataset is not publicly available anymore as now only the harder 'full-video' subset is available to download
|
27 |
+
through the Youtube-VOS referring video object segmentation competition page at:
|
28 |
+
https://competitions.codalab.org/competitions/29139
|
29 |
+
Furthermore, for the competition the subset's original validation set, which consists of 507 videos, was split into
|
30 |
+
two competition 'validation' & 'test' subsets, consisting of 202 and 305 videos respectively. Evaluation can
|
31 |
+
currently only be done on the competition 'validation' subset using the competition's server, as
|
32 |
+
annotations were publicly released only for the 'train' subset of the competition.
|
33 |
+
|
34 |
+
"""
|
35 |
+
def __init__(self, img_folder: Path, ann_file: Path, transforms, return_masks: bool,
|
36 |
+
num_frames: int, max_skip: int):
|
37 |
+
self.img_folder = img_folder
|
38 |
+
self.ann_file = ann_file
|
39 |
+
self._transforms = transforms
|
40 |
+
self.return_masks = return_masks # not used
|
41 |
+
self.num_frames = num_frames
|
42 |
+
self.max_skip = max_skip
|
43 |
+
# create video meta data
|
44 |
+
self.prepare_metas()
|
45 |
+
|
46 |
+
print('\n video num: ', len(self.videos), ' clip num: ', len(self.metas))
|
47 |
+
print('\n')
|
48 |
+
|
49 |
+
def prepare_metas(self):
|
50 |
+
# read object information
|
51 |
+
with open(os.path.join(str(self.img_folder), 'meta.json'), 'r') as f:
|
52 |
+
subset_metas_by_video = json.load(f)['videos']
|
53 |
+
|
54 |
+
# read expression data
|
55 |
+
with open(str(self.ann_file), 'r') as f:
|
56 |
+
subset_expressions_by_video = json.load(f)['videos']
|
57 |
+
self.videos = list(subset_expressions_by_video.keys())
|
58 |
+
|
59 |
+
self.metas = []
|
60 |
+
for vid in self.videos:
|
61 |
+
vid_meta = subset_metas_by_video[vid]
|
62 |
+
vid_data = subset_expressions_by_video[vid]
|
63 |
+
vid_frames = sorted(vid_data['frames'])
|
64 |
+
vid_len = len(vid_frames)
|
65 |
+
|
66 |
+
for exp_id, exp_dict in vid_data['expressions'].items():
|
67 |
+
# Exclude start_idx (0, 1) and end_idx (vid_len-1, vid_len-2)
|
68 |
+
start_idx , end_idx = 2, vid_len-2
|
69 |
+
bin_size = (end_idx - start_idx) // 4
|
70 |
+
|
71 |
+
bins = []
|
72 |
+
for i in range(4):
|
73 |
+
bin_start = start_idx + i * bin_size
|
74 |
+
bin_end = bin_start + bin_size if i < 3 else end_idx
|
75 |
+
|
76 |
+
bins.append((bin_start, bin_end))
|
77 |
+
|
78 |
+
meta = {
|
79 |
+
'video': vid,
|
80 |
+
'exp': exp_dict['exp'],
|
81 |
+
'obj_id': int(exp_dict['obj_id']),
|
82 |
+
'frames': vid_frames,
|
83 |
+
'bins': bins,
|
84 |
+
'category': vid_meta['objects'][int(exp_dict['obj_id'])]['category']
|
85 |
+
}
|
86 |
+
self.metas.append(meta)
|
87 |
+
|
88 |
+
|
89 |
+
@staticmethod
|
90 |
+
def bounding_box(img):
|
91 |
+
rows = np.any(img, axis=1)
|
92 |
+
cols = np.any(img, axis=0)
|
93 |
+
rmin, rmax = np.where(rows)[0][[0, -1]]
|
94 |
+
cmin, cmax = np.where(cols)[0][[0, -1]]
|
95 |
+
return rmin, rmax, cmin, cmax # y1, y2, x1, x2
|
96 |
+
|
97 |
+
def __len__(self):
|
98 |
+
return len(self.metas)
|
99 |
+
|
100 |
+
def __getitem__(self, idx):
|
101 |
+
instance_check = False
|
102 |
+
while not instance_check:
|
103 |
+
meta = self.metas[idx] # dict
|
104 |
+
|
105 |
+
|
106 |
+
video, exp, obj_id, category, frames, bins = \
|
107 |
+
meta['video'], meta['exp'], meta['obj_id'], meta['category'], meta['frames'], meta['bins']
|
108 |
+
|
109 |
+
|
110 |
+
# clean up the caption
|
111 |
+
exp = " ".join(exp.lower().split())
|
112 |
+
category_id = category_dict[category]
|
113 |
+
vid_len = len(frames)
|
114 |
+
|
115 |
+
# num_frames = self.num_frames
|
116 |
+
|
117 |
+
# Random sample one frame from each bin
|
118 |
+
sample_indx = []
|
119 |
+
for start_idx, end_idx in bins:
|
120 |
+
sample_indx.append(random.randint(start_idx, end_idx - 1))
|
121 |
+
sample_indx.sort() # Ensure indices are in order
|
122 |
+
|
123 |
+
# read frames and masks
|
124 |
+
imgs, labels, boxes, masks, valid = [], [], [], [], []
|
125 |
+
for frame_indx in sample_indx:
|
126 |
+
frame_name = frames[frame_indx]
|
127 |
+
img_path = os.path.join(str(self.img_folder), 'JPEGImages', video, frame_name + '.jpg')
|
128 |
+
mask_path = os.path.join(str(self.img_folder), 'Annotations', video, frame_name + '.png')
|
129 |
+
img = Image.open(img_path).convert('RGB')
|
130 |
+
mask = Image.open(mask_path).convert('P')
|
131 |
+
|
132 |
+
# create the target
|
133 |
+
label = torch.tensor(category_id)
|
134 |
+
mask = np.array(mask)
|
135 |
+
mask = (mask==obj_id).astype(np.float32) # 0,1 binary
|
136 |
+
if (mask > 0).any():
|
137 |
+
y1, y2, x1, x2 = self.bounding_box(mask)
|
138 |
+
box = torch.tensor([x1, y1, x2, y2]).to(torch.float)
|
139 |
+
valid.append(1)
|
140 |
+
else: # some frame didn't contain the instance
|
141 |
+
box = torch.tensor([0, 0, 0, 0]).to(torch.float)
|
142 |
+
valid.append(0)
|
143 |
+
mask = torch.from_numpy(mask)
|
144 |
+
|
145 |
+
# append
|
146 |
+
imgs.append(img)
|
147 |
+
labels.append(label)
|
148 |
+
masks.append(mask)
|
149 |
+
boxes.append(box)
|
150 |
+
|
151 |
+
# transform
|
152 |
+
w, h = img.size
|
153 |
+
labels = torch.stack(labels, dim=0)
|
154 |
+
boxes = torch.stack(boxes, dim=0)
|
155 |
+
boxes[:, 0::2].clamp_(min=0, max=w)
|
156 |
+
boxes[:, 1::2].clamp_(min=0, max=h)
|
157 |
+
masks = torch.stack(masks, dim=0)
|
158 |
+
target = {
|
159 |
+
'frames_idx': torch.tensor(sample_indx), # [T,]
|
160 |
+
'labels': labels, # [T,]
|
161 |
+
'boxes': boxes, # [T, 4], xyxy
|
162 |
+
'masks': masks, # [T, H, W]
|
163 |
+
'valid': torch.tensor(valid), # [T,]
|
164 |
+
'caption': exp,
|
165 |
+
'orig_size': torch.as_tensor([int(h), int(w)]),
|
166 |
+
'size': torch.as_tensor([int(h), int(w)])
|
167 |
+
}
|
168 |
+
|
169 |
+
# "boxes" normalize to [0, 1] and transform from xyxy to cxcywh in self._transform
|
170 |
+
if self._transforms:
|
171 |
+
imgs, target = self._transforms(imgs, target)
|
172 |
+
imgs = torch.stack(imgs, dim=0) # [T, 3, H, W]
|
173 |
+
else:
|
174 |
+
imgs = np.array(imgs)
|
175 |
+
imgs = torch.tensor(imgs.transpose(0, 3, 1, 2))
|
176 |
+
|
177 |
+
|
178 |
+
# FIXME: handle "valid", since some box may be removed due to random crop
|
179 |
+
if torch.any(target['valid'] == 1): # at leatst one instance
|
180 |
+
instance_check = True
|
181 |
+
else:
|
182 |
+
idx = random.randint(0, self.__len__() - 1)
|
183 |
+
|
184 |
+
return imgs, target
|
185 |
+
|
186 |
+
|
187 |
+
def make_coco_transforms(image_set, max_size=640):
|
188 |
+
normalize = T.Compose([
|
189 |
+
T.ToTensor(),
|
190 |
+
T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
191 |
+
])
|
192 |
+
|
193 |
+
scales = [288, 320, 352, 392, 416, 448, 480, 512]
|
194 |
+
|
195 |
+
if image_set == 'train':
|
196 |
+
return T.Compose([
|
197 |
+
T.RandomHorizontalFlip(),
|
198 |
+
T.PhotometricDistort(),
|
199 |
+
T.RandomSelect(
|
200 |
+
T.Compose([
|
201 |
+
T.RandomResize(scales, max_size=max_size),
|
202 |
+
T.Check(),
|
203 |
+
]),
|
204 |
+
T.Compose([
|
205 |
+
T.RandomResize([400, 500, 600]),
|
206 |
+
T.RandomSizeCrop(384, 600),
|
207 |
+
T.RandomResize(scales, max_size=max_size),
|
208 |
+
T.Check(),
|
209 |
+
])
|
210 |
+
),
|
211 |
+
normalize,
|
212 |
+
])
|
213 |
+
|
214 |
+
# we do not use the 'val' set since the annotations are inaccessible
|
215 |
+
if image_set == 'val':
|
216 |
+
return T.Compose([
|
217 |
+
T.RandomResize([360], max_size=640),
|
218 |
+
normalize,
|
219 |
+
])
|
220 |
+
|
221 |
+
raise ValueError(f'unknown {image_set}')
|
222 |
+
|
223 |
+
|
224 |
+
def build(image_set, args):
|
225 |
+
root = Path(args.ytvos_path)
|
226 |
+
assert root.exists(), f'provided YTVOS path {root} does not exist'
|
227 |
+
PATHS = {
|
228 |
+
"train": (root / "train", root / "meta_expressions" / "train" / "meta_expressions.json"),
|
229 |
+
"val": (root / "valid", root / "meta_expressions" / "valid" / "meta_expressions.json"), # not used actually
|
230 |
+
}
|
231 |
+
img_folder, ann_file = PATHS[image_set]
|
232 |
+
# dataset = YTVOSDataset(img_folder, ann_file, transforms=make_coco_transforms(image_set, max_size=args.max_size), return_masks=args.masks,
|
233 |
+
# num_frames=args.num_frames, max_skip=args.max_skip)
|
234 |
+
dataset = YTVOSDataset(img_folder, ann_file, transforms=None, return_masks=args.masks,
|
235 |
+
num_frames=args.num_frames, max_skip=args.max_skip)
|
236 |
+
return dataset
|
237 |
+
|
.history/datasets/ytvos_ref_20250113131359.py
ADDED
@@ -0,0 +1,239 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Ref-YoutubeVOS data loader
|
3 |
+
"""
|
4 |
+
from pathlib import Path
|
5 |
+
|
6 |
+
import torch
|
7 |
+
from torch.autograd.grad_mode import F
|
8 |
+
from torch.utils.data import Dataset
|
9 |
+
import datasets.transforms_video as T
|
10 |
+
|
11 |
+
import os
|
12 |
+
from PIL import Image
|
13 |
+
import json
|
14 |
+
import numpy as np
|
15 |
+
import random
|
16 |
+
|
17 |
+
from datasets.categories import ytvos_category_dict as category_dict
|
18 |
+
|
19 |
+
|
20 |
+
class YTVOSDataset(Dataset):
|
21 |
+
"""
|
22 |
+
A dataset class for the Refer-Youtube-VOS dataset which was first introduced in the paper:
|
23 |
+
"URVOS: Unified Referring Video Object Segmentation Network with a Large-Scale Benchmark"
|
24 |
+
(see https://link.springer.com/content/pdf/10.1007/978-3-030-58555-6_13.pdf).
|
25 |
+
The original release of the dataset contained both 'first-frame' and 'full-video' expressions. However, the first
|
26 |
+
dataset is not publicly available anymore as now only the harder 'full-video' subset is available to download
|
27 |
+
through the Youtube-VOS referring video object segmentation competition page at:
|
28 |
+
https://competitions.codalab.org/competitions/29139
|
29 |
+
Furthermore, for the competition the subset's original validation set, which consists of 507 videos, was split into
|
30 |
+
two competition 'validation' & 'test' subsets, consisting of 202 and 305 videos respectively. Evaluation can
|
31 |
+
currently only be done on the competition 'validation' subset using the competition's server, as
|
32 |
+
annotations were publicly released only for the 'train' subset of the competition.
|
33 |
+
|
34 |
+
"""
|
35 |
+
def __init__(self, img_folder: Path, ann_file: Path, transforms, return_masks: bool,
|
36 |
+
num_frames: int, max_skip: int):
|
37 |
+
self.img_folder = img_folder
|
38 |
+
self.ann_file = ann_file
|
39 |
+
self._transforms = transforms
|
40 |
+
self.return_masks = return_masks # not used
|
41 |
+
self.num_frames = num_frames
|
42 |
+
self.max_skip = max_skip
|
43 |
+
# create video meta data
|
44 |
+
self.prepare_metas()
|
45 |
+
|
46 |
+
print('\n video num: ', len(self.videos), ' clip num: ', len(self.metas))
|
47 |
+
print('\n')
|
48 |
+
|
49 |
+
def prepare_metas(self):
|
50 |
+
# read object information
|
51 |
+
with open(os.path.join(str(self.img_folder), 'meta.json'), 'r') as f:
|
52 |
+
subset_metas_by_video = json.load(f)['videos']
|
53 |
+
|
54 |
+
# read expression data
|
55 |
+
with open(str(self.ann_file), 'r') as f:
|
56 |
+
subset_expressions_by_video = json.load(f)['videos']
|
57 |
+
self.videos = list(subset_expressions_by_video.keys())
|
58 |
+
|
59 |
+
self.metas = []
|
60 |
+
for vid in self.videos:
|
61 |
+
vid_meta = subset_metas_by_video[vid]
|
62 |
+
vid_data = subset_expressions_by_video[vid]
|
63 |
+
vid_frames = sorted(vid_data['frames'])
|
64 |
+
vid_len = len(vid_frames)
|
65 |
+
|
66 |
+
|
67 |
+
for exp_id, exp_dict in vid_data['expressions'].items():
|
68 |
+
# Exclude start_idx (0, 1) and end_idx (vid_len-1, vid_len-2)
|
69 |
+
start_idx , end_idx = 2, vid_len-2
|
70 |
+
bin_size = (end_idx - start_idx) // 4
|
71 |
+
|
72 |
+
bins = []
|
73 |
+
for i in range(4):
|
74 |
+
bin_start = start_idx + i * bin_size
|
75 |
+
bin_end = bin_start + bin_size if i < 3 else end_idx
|
76 |
+
|
77 |
+
bins.append((bin_start, bin_end))
|
78 |
+
|
79 |
+
|
80 |
+
meta = {
|
81 |
+
'video': vid,
|
82 |
+
'exp': exp_dict['exp'],
|
83 |
+
'obj_id': int(exp_dict['obj_id']),
|
84 |
+
'frames': vid_frames,
|
85 |
+
'bins': bins,
|
86 |
+
'category': vid_meta['objects'][exp_dict['obj_id']]['category']
|
87 |
+
}
|
88 |
+
self.metas.append(meta)
|
89 |
+
|
90 |
+
|
91 |
+
@staticmethod
|
92 |
+
def bounding_box(img):
|
93 |
+
rows = np.any(img, axis=1)
|
94 |
+
cols = np.any(img, axis=0)
|
95 |
+
rmin, rmax = np.where(rows)[0][[0, -1]]
|
96 |
+
cmin, cmax = np.where(cols)[0][[0, -1]]
|
97 |
+
return rmin, rmax, cmin, cmax # y1, y2, x1, x2
|
98 |
+
|
99 |
+
def __len__(self):
|
100 |
+
return len(self.metas)
|
101 |
+
|
102 |
+
def __getitem__(self, idx):
|
103 |
+
instance_check = False
|
104 |
+
while not instance_check:
|
105 |
+
meta = self.metas[idx] # dict
|
106 |
+
|
107 |
+
|
108 |
+
video, exp, obj_id, category, frames, bins = \
|
109 |
+
meta['video'], meta['exp'], meta['obj_id'], meta['category'], meta['frames'], meta['bins']
|
110 |
+
|
111 |
+
|
112 |
+
# clean up the caption
|
113 |
+
exp = " ".join(exp.lower().split())
|
114 |
+
category_id = category_dict[category]
|
115 |
+
vid_len = len(frames)
|
116 |
+
|
117 |
+
# num_frames = self.num_frames
|
118 |
+
|
119 |
+
# Random sample one frame from each bin
|
120 |
+
sample_indx = []
|
121 |
+
for start_idx, end_idx in bins:
|
122 |
+
sample_indx.append(random.randint(start_idx, end_idx - 1))
|
123 |
+
sample_indx.sort() # Ensure indices are in order
|
124 |
+
|
125 |
+
# read frames and masks
|
126 |
+
imgs, labels, boxes, masks, valid = [], [], [], [], []
|
127 |
+
for frame_indx in sample_indx:
|
128 |
+
frame_name = frames[frame_indx]
|
129 |
+
img_path = os.path.join(str(self.img_folder), 'JPEGImages', video, frame_name + '.jpg')
|
130 |
+
mask_path = os.path.join(str(self.img_folder), 'Annotations', video, frame_name + '.png')
|
131 |
+
img = Image.open(img_path).convert('RGB')
|
132 |
+
mask = Image.open(mask_path).convert('P')
|
133 |
+
|
134 |
+
# create the target
|
135 |
+
label = torch.tensor(category_id)
|
136 |
+
mask = np.array(mask)
|
137 |
+
mask = (mask==obj_id).astype(np.float32) # 0,1 binary
|
138 |
+
if (mask > 0).any():
|
139 |
+
y1, y2, x1, x2 = self.bounding_box(mask)
|
140 |
+
box = torch.tensor([x1, y1, x2, y2]).to(torch.float)
|
141 |
+
valid.append(1)
|
142 |
+
else: # some frame didn't contain the instance
|
143 |
+
box = torch.tensor([0, 0, 0, 0]).to(torch.float)
|
144 |
+
valid.append(0)
|
145 |
+
mask = torch.from_numpy(mask)
|
146 |
+
|
147 |
+
# append
|
148 |
+
imgs.append(img)
|
149 |
+
labels.append(label)
|
150 |
+
masks.append(mask)
|
151 |
+
boxes.append(box)
|
152 |
+
|
153 |
+
# transform
|
154 |
+
w, h = img.size
|
155 |
+
labels = torch.stack(labels, dim=0)
|
156 |
+
boxes = torch.stack(boxes, dim=0)
|
157 |
+
boxes[:, 0::2].clamp_(min=0, max=w)
|
158 |
+
boxes[:, 1::2].clamp_(min=0, max=h)
|
159 |
+
masks = torch.stack(masks, dim=0)
|
160 |
+
target = {
|
161 |
+
'frames_idx': torch.tensor(sample_indx), # [T,]
|
162 |
+
'labels': labels, # [T,]
|
163 |
+
'boxes': boxes, # [T, 4], xyxy
|
164 |
+
'masks': masks, # [T, H, W]
|
165 |
+
'valid': torch.tensor(valid), # [T,]
|
166 |
+
'caption': exp,
|
167 |
+
'orig_size': torch.as_tensor([int(h), int(w)]),
|
168 |
+
'size': torch.as_tensor([int(h), int(w)])
|
169 |
+
}
|
170 |
+
|
171 |
+
# "boxes" normalize to [0, 1] and transform from xyxy to cxcywh in self._transform
|
172 |
+
if self._transforms:
|
173 |
+
imgs, target = self._transforms(imgs, target)
|
174 |
+
imgs = torch.stack(imgs, dim=0) # [T, 3, H, W]
|
175 |
+
else:
|
176 |
+
imgs = np.array(imgs)
|
177 |
+
imgs = torch.tensor(imgs.transpose(0, 3, 1, 2))
|
178 |
+
|
179 |
+
|
180 |
+
# FIXME: handle "valid", since some box may be removed due to random crop
|
181 |
+
if torch.any(target['valid'] == 1): # at leatst one instance
|
182 |
+
instance_check = True
|
183 |
+
else:
|
184 |
+
idx = random.randint(0, self.__len__() - 1)
|
185 |
+
|
186 |
+
return imgs, target
|
187 |
+
|
188 |
+
|
189 |
+
def make_coco_transforms(image_set, max_size=640):
|
190 |
+
normalize = T.Compose([
|
191 |
+
T.ToTensor(),
|
192 |
+
T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
193 |
+
])
|
194 |
+
|
195 |
+
scales = [288, 320, 352, 392, 416, 448, 480, 512]
|
196 |
+
|
197 |
+
if image_set == 'train':
|
198 |
+
return T.Compose([
|
199 |
+
T.RandomHorizontalFlip(),
|
200 |
+
T.PhotometricDistort(),
|
201 |
+
T.RandomSelect(
|
202 |
+
T.Compose([
|
203 |
+
T.RandomResize(scales, max_size=max_size),
|
204 |
+
T.Check(),
|
205 |
+
]),
|
206 |
+
T.Compose([
|
207 |
+
T.RandomResize([400, 500, 600]),
|
208 |
+
T.RandomSizeCrop(384, 600),
|
209 |
+
T.RandomResize(scales, max_size=max_size),
|
210 |
+
T.Check(),
|
211 |
+
])
|
212 |
+
),
|
213 |
+
normalize,
|
214 |
+
])
|
215 |
+
|
216 |
+
# we do not use the 'val' set since the annotations are inaccessible
|
217 |
+
if image_set == 'val':
|
218 |
+
return T.Compose([
|
219 |
+
T.RandomResize([360], max_size=640),
|
220 |
+
normalize,
|
221 |
+
])
|
222 |
+
|
223 |
+
raise ValueError(f'unknown {image_set}')
|
224 |
+
|
225 |
+
|
226 |
+
def build(image_set, args):
|
227 |
+
root = Path(args.ytvos_path)
|
228 |
+
assert root.exists(), f'provided YTVOS path {root} does not exist'
|
229 |
+
PATHS = {
|
230 |
+
"train": (root / "train", root / "meta_expressions" / "train" / "meta_expressions.json"),
|
231 |
+
"val": (root / "valid", root / "meta_expressions" / "valid" / "meta_expressions.json"), # not used actually
|
232 |
+
}
|
233 |
+
img_folder, ann_file = PATHS[image_set]
|
234 |
+
# dataset = YTVOSDataset(img_folder, ann_file, transforms=make_coco_transforms(image_set, max_size=args.max_size), return_masks=args.masks,
|
235 |
+
# num_frames=args.num_frames, max_skip=args.max_skip)
|
236 |
+
dataset = YTVOSDataset(img_folder, ann_file, transforms=None, return_masks=args.masks,
|
237 |
+
num_frames=args.num_frames, max_skip=args.max_skip)
|
238 |
+
return dataset
|
239 |
+
|
.history/datasets/ytvos_ref_20250113162825.py
ADDED
@@ -0,0 +1,244 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Ref-YoutubeVOS data loader
|
3 |
+
"""
|
4 |
+
from pathlib import Path
|
5 |
+
|
6 |
+
import torch
|
7 |
+
from torch.autograd.grad_mode import F
|
8 |
+
from torch.utils.data import Dataset
|
9 |
+
import datasets.transforms_video as T
|
10 |
+
|
11 |
+
import os
|
12 |
+
from PIL import Image
|
13 |
+
import json
|
14 |
+
import numpy as np
|
15 |
+
import random
|
16 |
+
|
17 |
+
from datasets.categories import ytvos_category_dict as category_dict
|
18 |
+
|
19 |
+
|
20 |
+
class YTVOSDataset(Dataset):
|
21 |
+
"""
|
22 |
+
A dataset class for the Refer-Youtube-VOS dataset which was first introduced in the paper:
|
23 |
+
"URVOS: Unified Referring Video Object Segmentation Network with a Large-Scale Benchmark"
|
24 |
+
(see https://link.springer.com/content/pdf/10.1007/978-3-030-58555-6_13.pdf).
|
25 |
+
The original release of the dataset contained both 'first-frame' and 'full-video' expressions. However, the first
|
26 |
+
dataset is not publicly available anymore as now only the harder 'full-video' subset is available to download
|
27 |
+
through the Youtube-VOS referring video object segmentation competition page at:
|
28 |
+
https://competitions.codalab.org/competitions/29139
|
29 |
+
Furthermore, for the competition the subset's original validation set, which consists of 507 videos, was split into
|
30 |
+
two competition 'validation' & 'test' subsets, consisting of 202 and 305 videos respectively. Evaluation can
|
31 |
+
currently only be done on the competition 'validation' subset using the competition's server, as
|
32 |
+
annotations were publicly released only for the 'train' subset of the competition.
|
33 |
+
|
34 |
+
"""
|
35 |
+
def __init__(self, img_folder: Path, ann_file: Path, transforms, return_masks: bool,
|
36 |
+
num_frames: int, max_skip: int):
|
37 |
+
self.img_folder = img_folder
|
38 |
+
self.ann_file = ann_file
|
39 |
+
self._transforms = transforms
|
40 |
+
self.return_masks = return_masks # not used
|
41 |
+
self.num_frames = num_frames
|
42 |
+
self.max_skip = max_skip
|
43 |
+
# create video meta data
|
44 |
+
self.prepare_metas()
|
45 |
+
|
46 |
+
print('\n video num: ', len(self.videos), ' clip num: ', len(self.metas))
|
47 |
+
print('\n')
|
48 |
+
|
49 |
+
def prepare_metas(self):
|
50 |
+
# read object information
|
51 |
+
with open(os.path.join(str(self.img_folder), 'meta.json'), 'r') as f:
|
52 |
+
subset_metas_by_video = json.load(f)['videos']
|
53 |
+
|
54 |
+
# read expression data
|
55 |
+
with open(str(self.ann_file), 'r') as f:
|
56 |
+
subset_expressions_by_video = json.load(f)['videos']
|
57 |
+
self.videos = list(subset_expressions_by_video.keys())
|
58 |
+
|
59 |
+
self.metas = []
|
60 |
+
for vid in self.videos:
|
61 |
+
vid_meta = subset_metas_by_video[vid]
|
62 |
+
vid_data = subset_expressions_by_video[vid]
|
63 |
+
vid_frames = sorted(vid_data['frames'])
|
64 |
+
vid_len = len(vid_frames)
|
65 |
+
|
66 |
+
for exp_id, exp_dict in vid_data['expressions'].items():
|
67 |
+
# Exclude start_idx (0, 1) and end_idx (vid_len-1, vid_len-2)
|
68 |
+
start_idx , end_idx = 2, vid_len-2
|
69 |
+
bin_size = (end_idx - start_idx) // 4
|
70 |
+
|
71 |
+
bins = []
|
72 |
+
for i in range(4):
|
73 |
+
bin_start = start_idx + i * bin_size
|
74 |
+
bin_end = bin_start + bin_size if i < 3 else end_idx
|
75 |
+
|
76 |
+
bins.append((bin_start, bin_end))
|
77 |
+
|
78 |
+
# Random sample one frame from each bin
|
79 |
+
sample_indx = []
|
80 |
+
for start_idx, end_idx in bins:
|
81 |
+
try:
|
82 |
+
sample_indx.append(random.randint(start_idx, end_idx - 1))
|
83 |
+
except ValueError:
|
84 |
+
print(bins)
|
85 |
+
sample_indx.sort() # Ensure indices are in order
|
86 |
+
|
87 |
+
|
88 |
+
for frame_id in sample_indx:
|
89 |
+
meta = {
|
90 |
+
'video': vid,
|
91 |
+
'exp': exp_dict['exp'],
|
92 |
+
'obj_id': int(exp_dict['obj_id']),
|
93 |
+
'frames': vid_frames,
|
94 |
+
'frame_id' : frame_id,
|
95 |
+
'sample_frames_id' : sample_indx,
|
96 |
+
'bins': bins,
|
97 |
+
'category': vid_meta['objects'][exp_dict['obj_id']]['category']
|
98 |
+
}
|
99 |
+
self.metas.append(meta)
|
100 |
+
|
101 |
+
|
102 |
+
@staticmethod
|
103 |
+
def bounding_box(img):
|
104 |
+
rows = np.any(img, axis=1)
|
105 |
+
cols = np.any(img, axis=0)
|
106 |
+
rmin, rmax = np.where(rows)[0][[0, -1]]
|
107 |
+
cmin, cmax = np.where(cols)[0][[0, -1]]
|
108 |
+
return rmin, rmax, cmin, cmax # y1, y2, x1, x2
|
109 |
+
|
110 |
+
def __len__(self):
|
111 |
+
return len(self.metas)
|
112 |
+
|
113 |
+
def __getitem__(self, idx):
|
114 |
+
instance_check = False
|
115 |
+
while not instance_check:
|
116 |
+
meta = self.metas[idx] # dict
|
117 |
+
|
118 |
+
|
119 |
+
video, exp, obj_id, category, frames, frame_id, sample_frames_id, bins = \
|
120 |
+
meta['video'], meta['exp'], meta['obj_id'], meta['category'], meta['frames'], metas['frame_id'], metas['sample_frames_id'], meta['bins']
|
121 |
+
|
122 |
+
|
123 |
+
# clean up the caption
|
124 |
+
exp = " ".join(exp.lower().split())
|
125 |
+
category_id = category_dict[category]
|
126 |
+
vid_len = len(frames)
|
127 |
+
|
128 |
+
# num_frames = self.num_frames
|
129 |
+
|
130 |
+
# read frames and masks
|
131 |
+
imgs, labels, boxes, masks, valid = [], [], [], [], []
|
132 |
+
for frame_indx in sample_frames_id:
|
133 |
+
frame_name = frames[frame_indx]
|
134 |
+
img_path = os.path.join(str(self.img_folder), 'JPEGImages', video, frame_name + '.jpg')
|
135 |
+
mask_path = os.path.join(str(self.img_folder), 'Annotations', video, frame_name + '.png')
|
136 |
+
img = Image.open(img_path).convert('RGB')
|
137 |
+
mask = Image.open(mask_path).convert('P')
|
138 |
+
|
139 |
+
# create the target
|
140 |
+
label = torch.tensor(category_id)
|
141 |
+
mask = np.array(mask)
|
142 |
+
mask = (mask==obj_id).astype(np.float32) # 0,1 binary
|
143 |
+
if (mask > 0).any():
|
144 |
+
y1, y2, x1, x2 = self.bounding_box(mask)
|
145 |
+
box = torch.tensor([x1, y1, x2, y2]).to(torch.float)
|
146 |
+
valid.append(1)
|
147 |
+
else: # some frame didn't contain the instance
|
148 |
+
box = torch.tensor([0, 0, 0, 0]).to(torch.float)
|
149 |
+
valid.append(0)
|
150 |
+
mask = torch.from_numpy(mask)
|
151 |
+
|
152 |
+
# append
|
153 |
+
imgs.append(img)
|
154 |
+
labels.append(label)
|
155 |
+
masks.append(mask)
|
156 |
+
boxes.append(box)
|
157 |
+
|
158 |
+
# transform
|
159 |
+
w, h = img.size
|
160 |
+
labels = torch.stack(labels, dim=0)
|
161 |
+
boxes = torch.stack(boxes, dim=0)
|
162 |
+
boxes[:, 0::2].clamp_(min=0, max=w)
|
163 |
+
boxes[:, 1::2].clamp_(min=0, max=h)
|
164 |
+
masks = torch.stack(masks, dim=0)
|
165 |
+
target = {
|
166 |
+
'frames_idx': torch.tensor(sample_frames_id), # [T,]
|
167 |
+
'labels': labels, # [T,]
|
168 |
+
'boxes': boxes, # [T, 4], xyxy
|
169 |
+
'masks': masks, # [T, H, W]
|
170 |
+
'valid': torch.tensor(valid), # [T,]
|
171 |
+
'caption': exp,
|
172 |
+
'orig_size': torch.as_tensor([int(h), int(w)]),
|
173 |
+
'size': torch.as_tensor([int(h), int(w)])
|
174 |
+
}
|
175 |
+
|
176 |
+
# "boxes" normalize to [0, 1] and transform from xyxy to cxcywh in self._transform
|
177 |
+
if self._transforms:
|
178 |
+
imgs, target = self._transforms(imgs, target)
|
179 |
+
imgs = torch.stack(imgs, dim=0) # [T, 3, H, W]
|
180 |
+
else:
|
181 |
+
imgs = np.array(imgs)
|
182 |
+
imgs = torch.tensor(imgs.transpose(0, 3, 1, 2))
|
183 |
+
|
184 |
+
|
185 |
+
# FIXME: handle "valid", since some box may be removed due to random crop
|
186 |
+
if torch.any(target['valid'] == 1): # at leatst one instance
|
187 |
+
instance_check = True
|
188 |
+
else:
|
189 |
+
idx = random.randint(0, self.__len__() - 1)
|
190 |
+
|
191 |
+
return imgs, target
|
192 |
+
|
193 |
+
|
194 |
+
def make_coco_transforms(image_set, max_size=640):
|
195 |
+
normalize = T.Compose([
|
196 |
+
T.ToTensor(),
|
197 |
+
T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
198 |
+
])
|
199 |
+
|
200 |
+
scales = [288, 320, 352, 392, 416, 448, 480, 512]
|
201 |
+
|
202 |
+
if image_set == 'train':
|
203 |
+
return T.Compose([
|
204 |
+
T.RandomHorizontalFlip(),
|
205 |
+
T.PhotometricDistort(),
|
206 |
+
T.RandomSelect(
|
207 |
+
T.Compose([
|
208 |
+
T.RandomResize(scales, max_size=max_size),
|
209 |
+
T.Check(),
|
210 |
+
]),
|
211 |
+
T.Compose([
|
212 |
+
T.RandomResize([400, 500, 600]),
|
213 |
+
T.RandomSizeCrop(384, 600),
|
214 |
+
T.RandomResize(scales, max_size=max_size),
|
215 |
+
T.Check(),
|
216 |
+
])
|
217 |
+
),
|
218 |
+
normalize,
|
219 |
+
])
|
220 |
+
|
221 |
+
# we do not use the 'val' set since the annotations are inaccessible
|
222 |
+
if image_set == 'val':
|
223 |
+
return T.Compose([
|
224 |
+
T.RandomResize([360], max_size=640),
|
225 |
+
normalize,
|
226 |
+
])
|
227 |
+
|
228 |
+
raise ValueError(f'unknown {image_set}')
|
229 |
+
|
230 |
+
|
231 |
+
def build(image_set, args):
|
232 |
+
root = Path(args.ytvos_path)
|
233 |
+
assert root.exists(), f'provided YTVOS path {root} does not exist'
|
234 |
+
PATHS = {
|
235 |
+
"train": (root / "train", root / "meta_expressions" / "train" / "meta_expressions.json"),
|
236 |
+
"val": (root / "valid", root / "meta_expressions" / "valid" / "meta_expressions.json"), # not used actually
|
237 |
+
}
|
238 |
+
img_folder, ann_file = PATHS[image_set]
|
239 |
+
# dataset = YTVOSDataset(img_folder, ann_file, transforms=make_coco_transforms(image_set, max_size=args.max_size), return_masks=args.masks,
|
240 |
+
# num_frames=args.num_frames, max_skip=args.max_skip)
|
241 |
+
dataset = YTVOSDataset(img_folder, ann_file, transforms=None, return_masks=args.masks,
|
242 |
+
num_frames=args.num_frames, max_skip=args.max_skip)
|
243 |
+
return dataset
|
244 |
+
|
.history/datasets/ytvos_ref_20250113163406.py
ADDED
@@ -0,0 +1,250 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Ref-YoutubeVOS data loader
|
3 |
+
"""
|
4 |
+
from pathlib import Path
|
5 |
+
|
6 |
+
import torch
|
7 |
+
from torch.autograd.grad_mode import F
|
8 |
+
from torch.utils.data import Dataset
|
9 |
+
import datasets.transforms_video as T
|
10 |
+
|
11 |
+
import os
|
12 |
+
from PIL import Image
|
13 |
+
import json
|
14 |
+
import numpy as np
|
15 |
+
import random
|
16 |
+
|
17 |
+
from datasets.categories import ytvos_category_dict as category_dict
|
18 |
+
|
19 |
+
|
20 |
+
class YTVOSDataset(Dataset):
|
21 |
+
"""
|
22 |
+
A dataset class for the Refer-Youtube-VOS dataset which was first introduced in the paper:
|
23 |
+
"URVOS: Unified Referring Video Object Segmentation Network with a Large-Scale Benchmark"
|
24 |
+
(see https://link.springer.com/content/pdf/10.1007/978-3-030-58555-6_13.pdf).
|
25 |
+
The original release of the dataset contained both 'first-frame' and 'full-video' expressions. However, the first
|
26 |
+
dataset is not publicly available anymore as now only the harder 'full-video' subset is available to download
|
27 |
+
through the Youtube-VOS referring video object segmentation competition page at:
|
28 |
+
https://competitions.codalab.org/competitions/29139
|
29 |
+
Furthermore, for the competition the subset's original validation set, which consists of 507 videos, was split into
|
30 |
+
two competition 'validation' & 'test' subsets, consisting of 202 and 305 videos respectively. Evaluation can
|
31 |
+
currently only be done on the competition 'validation' subset using the competition's server, as
|
32 |
+
annotations were publicly released only for the 'train' subset of the competition.
|
33 |
+
|
34 |
+
"""
|
35 |
+
def __init__(self, img_folder: Path, ann_file: Path, transforms, return_masks: bool,
|
36 |
+
num_frames: int, max_skip: int):
|
37 |
+
self.img_folder = img_folder
|
38 |
+
self.ann_file = ann_file
|
39 |
+
self._transforms = transforms
|
40 |
+
self.return_masks = return_masks # not used
|
41 |
+
self.num_frames = num_frames
|
42 |
+
self.max_skip = max_skip
|
43 |
+
# create video meta data
|
44 |
+
self.prepare_metas()
|
45 |
+
|
46 |
+
print('\n video num: ', len(self.videos), ' clip num: ', len(self.metas))
|
47 |
+
print('\n')
|
48 |
+
|
49 |
+
def prepare_metas(self):
|
50 |
+
# read object information
|
51 |
+
with open(os.path.join(str(self.img_folder), 'meta.json'), 'r') as f:
|
52 |
+
subset_metas_by_video = json.load(f)['videos']
|
53 |
+
|
54 |
+
# read expression data
|
55 |
+
with open(str(self.ann_file), 'r') as f:
|
56 |
+
subset_expressions_by_video = json.load(f)['videos']
|
57 |
+
self.videos = list(subset_expressions_by_video.keys())
|
58 |
+
|
59 |
+
self.metas = []
|
60 |
+
skip_vid_count = 0
|
61 |
+
|
62 |
+
for vid in self.videos:
|
63 |
+
vid_meta = subset_metas_by_video[vid]
|
64 |
+
vid_data = subset_expressions_by_video[vid]
|
65 |
+
vid_frames = sorted(vid_data['frames'])
|
66 |
+
vid_len = len(vid_frames)
|
67 |
+
|
68 |
+
if vid_len < 11:
|
69 |
+
#print(f"Too short video: {vid} with frame length {vid_len}")
|
70 |
+
skip_vid_count += 1
|
71 |
+
continue
|
72 |
+
|
73 |
+
for exp_id, exp_dict in vid_data['expressions'].items():
|
74 |
+
# Exclude start_idx (0, 1) and end_idx (vid_len-1, vid_len-2)
|
75 |
+
start_idx , end_idx = 2, vid_len-2
|
76 |
+
bin_size = (end_idx - start_idx) // 4
|
77 |
+
|
78 |
+
bins = []
|
79 |
+
for i in range(4):
|
80 |
+
bin_start = start_idx + i * bin_size
|
81 |
+
bin_end = bin_start + bin_size if i < 3 else end_idx
|
82 |
+
|
83 |
+
bins.append((bin_start, bin_end))
|
84 |
+
|
85 |
+
# Random sample one frame from each bin
|
86 |
+
sample_indx = []
|
87 |
+
for start_idx, end_idx in bins:
|
88 |
+
sample_indx.append(random.randint(start_idx, end_idx - 1))
|
89 |
+
sample_indx.sort() # Ensure indices are in order
|
90 |
+
|
91 |
+
|
92 |
+
for frame_id in sample_indx:
|
93 |
+
meta = {
|
94 |
+
'video': vid,
|
95 |
+
'exp': exp_dict['exp'],
|
96 |
+
'obj_id': int(exp_dict['obj_id']),
|
97 |
+
'frames': vid_frames,
|
98 |
+
'frame_id' : frame_id,
|
99 |
+
'sample_frames_id' : sample_indx,
|
100 |
+
'bins': bins,
|
101 |
+
'category': vid_meta['objects'][exp_dict['obj_id']]['category']
|
102 |
+
}
|
103 |
+
self.metas.append(meta)
|
104 |
+
|
105 |
+
print(f"skipped {skip_vid_count} short videos")
|
106 |
+
|
107 |
+
|
108 |
+
@staticmethod
|
109 |
+
def bounding_box(img):
|
110 |
+
rows = np.any(img, axis=1)
|
111 |
+
cols = np.any(img, axis=0)
|
112 |
+
rmin, rmax = np.where(rows)[0][[0, -1]]
|
113 |
+
cmin, cmax = np.where(cols)[0][[0, -1]]
|
114 |
+
return rmin, rmax, cmin, cmax # y1, y2, x1, x2
|
115 |
+
|
116 |
+
def __len__(self):
|
117 |
+
return len(self.metas)
|
118 |
+
|
119 |
+
def __getitem__(self, idx):
|
120 |
+
instance_check = False
|
121 |
+
while not instance_check:
|
122 |
+
meta = self.metas[idx] # dict
|
123 |
+
|
124 |
+
|
125 |
+
video, exp, obj_id, category, frames, frame_id, sample_frames_id, bins = \
|
126 |
+
meta['video'], meta['exp'], meta['obj_id'], meta['category'], meta['frames'], metas['frame_id'], metas['sample_frames_id'], meta['bins']
|
127 |
+
|
128 |
+
|
129 |
+
# clean up the caption
|
130 |
+
exp = " ".join(exp.lower().split())
|
131 |
+
category_id = category_dict[category]
|
132 |
+
vid_len = len(frames)
|
133 |
+
|
134 |
+
# num_frames = self.num_frames
|
135 |
+
|
136 |
+
# read frames and masks
|
137 |
+
imgs, labels, boxes, masks, valid = [], [], [], [], []
|
138 |
+
for frame_indx in sample_frames_id:
|
139 |
+
frame_name = frames[frame_indx]
|
140 |
+
img_path = os.path.join(str(self.img_folder), 'JPEGImages', video, frame_name + '.jpg')
|
141 |
+
mask_path = os.path.join(str(self.img_folder), 'Annotations', video, frame_name + '.png')
|
142 |
+
img = Image.open(img_path).convert('RGB')
|
143 |
+
mask = Image.open(mask_path).convert('P')
|
144 |
+
|
145 |
+
# create the target
|
146 |
+
label = torch.tensor(category_id)
|
147 |
+
mask = np.array(mask)
|
148 |
+
mask = (mask==obj_id).astype(np.float32) # 0,1 binary
|
149 |
+
if (mask > 0).any():
|
150 |
+
y1, y2, x1, x2 = self.bounding_box(mask)
|
151 |
+
box = torch.tensor([x1, y1, x2, y2]).to(torch.float)
|
152 |
+
valid.append(1)
|
153 |
+
else: # some frame didn't contain the instance
|
154 |
+
box = torch.tensor([0, 0, 0, 0]).to(torch.float)
|
155 |
+
valid.append(0)
|
156 |
+
mask = torch.from_numpy(mask)
|
157 |
+
|
158 |
+
# append
|
159 |
+
imgs.append(img)
|
160 |
+
labels.append(label)
|
161 |
+
masks.append(mask)
|
162 |
+
boxes.append(box)
|
163 |
+
|
164 |
+
# transform
|
165 |
+
w, h = img.size
|
166 |
+
labels = torch.stack(labels, dim=0)
|
167 |
+
boxes = torch.stack(boxes, dim=0)
|
168 |
+
boxes[:, 0::2].clamp_(min=0, max=w)
|
169 |
+
boxes[:, 1::2].clamp_(min=0, max=h)
|
170 |
+
masks = torch.stack(masks, dim=0)
|
171 |
+
target = {
|
172 |
+
'frames_idx': torch.tensor(sample_frames_id), # [T,]
|
173 |
+
'labels': labels, # [T,]
|
174 |
+
'boxes': boxes, # [T, 4], xyxy
|
175 |
+
'masks': masks, # [T, H, W]
|
176 |
+
'valid': torch.tensor(valid), # [T,]
|
177 |
+
'caption': exp,
|
178 |
+
'orig_size': torch.as_tensor([int(h), int(w)]),
|
179 |
+
'size': torch.as_tensor([int(h), int(w)])
|
180 |
+
}
|
181 |
+
|
182 |
+
# "boxes" normalize to [0, 1] and transform from xyxy to cxcywh in self._transform
|
183 |
+
if self._transforms:
|
184 |
+
imgs, target = self._transforms(imgs, target)
|
185 |
+
imgs = torch.stack(imgs, dim=0) # [T, 3, H, W]
|
186 |
+
else:
|
187 |
+
imgs = np.array(imgs)
|
188 |
+
imgs = torch.tensor(imgs.transpose(0, 3, 1, 2))
|
189 |
+
|
190 |
+
|
191 |
+
# FIXME: handle "valid", since some box may be removed due to random crop
|
192 |
+
if torch.any(target['valid'] == 1): # at leatst one instance
|
193 |
+
instance_check = True
|
194 |
+
else:
|
195 |
+
idx = random.randint(0, self.__len__() - 1)
|
196 |
+
|
197 |
+
return imgs, target
|
198 |
+
|
199 |
+
|
200 |
+
def make_coco_transforms(image_set, max_size=640):
|
201 |
+
normalize = T.Compose([
|
202 |
+
T.ToTensor(),
|
203 |
+
T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
204 |
+
])
|
205 |
+
|
206 |
+
scales = [288, 320, 352, 392, 416, 448, 480, 512]
|
207 |
+
|
208 |
+
if image_set == 'train':
|
209 |
+
return T.Compose([
|
210 |
+
T.RandomHorizontalFlip(),
|
211 |
+
T.PhotometricDistort(),
|
212 |
+
T.RandomSelect(
|
213 |
+
T.Compose([
|
214 |
+
T.RandomResize(scales, max_size=max_size),
|
215 |
+
T.Check(),
|
216 |
+
]),
|
217 |
+
T.Compose([
|
218 |
+
T.RandomResize([400, 500, 600]),
|
219 |
+
T.RandomSizeCrop(384, 600),
|
220 |
+
T.RandomResize(scales, max_size=max_size),
|
221 |
+
T.Check(),
|
222 |
+
])
|
223 |
+
),
|
224 |
+
normalize,
|
225 |
+
])
|
226 |
+
|
227 |
+
# we do not use the 'val' set since the annotations are inaccessible
|
228 |
+
if image_set == 'val':
|
229 |
+
return T.Compose([
|
230 |
+
T.RandomResize([360], max_size=640),
|
231 |
+
normalize,
|
232 |
+
])
|
233 |
+
|
234 |
+
raise ValueError(f'unknown {image_set}')
|
235 |
+
|
236 |
+
|
237 |
+
def build(image_set, args):
|
238 |
+
root = Path(args.ytvos_path)
|
239 |
+
assert root.exists(), f'provided YTVOS path {root} does not exist'
|
240 |
+
PATHS = {
|
241 |
+
"train": (root / "train", root / "meta_expressions" / "train" / "meta_expressions.json"),
|
242 |
+
"val": (root / "valid", root / "meta_expressions" / "valid" / "meta_expressions.json"), # not used actually
|
243 |
+
}
|
244 |
+
img_folder, ann_file = PATHS[image_set]
|
245 |
+
# dataset = YTVOSDataset(img_folder, ann_file, transforms=make_coco_transforms(image_set, max_size=args.max_size), return_masks=args.masks,
|
246 |
+
# num_frames=args.num_frames, max_skip=args.max_skip)
|
247 |
+
dataset = YTVOSDataset(img_folder, ann_file, transforms=None, return_masks=args.masks,
|
248 |
+
num_frames=args.num_frames, max_skip=args.max_skip)
|
249 |
+
return dataset
|
250 |
+
|
.history/datasets/ytvos_ref_20250113163605.py
ADDED
@@ -0,0 +1,250 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Ref-YoutubeVOS data loader
|
3 |
+
"""
|
4 |
+
from pathlib import Path
|
5 |
+
|
6 |
+
import torch
|
7 |
+
from torch.autograd.grad_mode import F
|
8 |
+
from torch.utils.data import Dataset
|
9 |
+
import datasets.transforms_video as T
|
10 |
+
|
11 |
+
import os
|
12 |
+
from PIL import Image
|
13 |
+
import json
|
14 |
+
import numpy as np
|
15 |
+
import random
|
16 |
+
|
17 |
+
from datasets.categories import ytvos_category_dict as category_dict
|
18 |
+
|
19 |
+
|
20 |
+
class YTVOSDataset(Dataset):
|
21 |
+
"""
|
22 |
+
A dataset class for the Refer-Youtube-VOS dataset which was first introduced in the paper:
|
23 |
+
"URVOS: Unified Referring Video Object Segmentation Network with a Large-Scale Benchmark"
|
24 |
+
(see https://link.springer.com/content/pdf/10.1007/978-3-030-58555-6_13.pdf).
|
25 |
+
The original release of the dataset contained both 'first-frame' and 'full-video' expressions. However, the first
|
26 |
+
dataset is not publicly available anymore as now only the harder 'full-video' subset is available to download
|
27 |
+
through the Youtube-VOS referring video object segmentation competition page at:
|
28 |
+
https://competitions.codalab.org/competitions/29139
|
29 |
+
Furthermore, for the competition the subset's original validation set, which consists of 507 videos, was split into
|
30 |
+
two competition 'validation' & 'test' subsets, consisting of 202 and 305 videos respectively. Evaluation can
|
31 |
+
currently only be done on the competition 'validation' subset using the competition's server, as
|
32 |
+
annotations were publicly released only for the 'train' subset of the competition.
|
33 |
+
|
34 |
+
"""
|
35 |
+
def __init__(self, img_folder: Path, ann_file: Path, transforms, return_masks: bool,
|
36 |
+
num_frames: int, max_skip: int):
|
37 |
+
self.img_folder = img_folder
|
38 |
+
self.ann_file = ann_file
|
39 |
+
self._transforms = transforms
|
40 |
+
self.return_masks = return_masks # not used
|
41 |
+
self.num_frames = num_frames
|
42 |
+
self.max_skip = max_skip
|
43 |
+
# create video meta data
|
44 |
+
self.prepare_metas()
|
45 |
+
|
46 |
+
print('\n video num: ', len(self.videos), ' clip num: ', len(self.metas))
|
47 |
+
print('\n')
|
48 |
+
|
49 |
+
def prepare_metas(self):
|
50 |
+
# read object information
|
51 |
+
with open(os.path.join(str(self.img_folder), 'meta.json'), 'r') as f:
|
52 |
+
subset_metas_by_video = json.load(f)['videos']
|
53 |
+
|
54 |
+
# read expression data
|
55 |
+
with open(str(self.ann_file), 'r') as f:
|
56 |
+
subset_expressions_by_video = json.load(f)['videos']
|
57 |
+
self.videos = list(subset_expressions_by_video.keys())
|
58 |
+
|
59 |
+
self.metas = []
|
60 |
+
skip_vid_count = 0
|
61 |
+
|
62 |
+
for vid in self.videos:
|
63 |
+
vid_meta = subset_metas_by_video[vid]
|
64 |
+
vid_data = subset_expressions_by_video[vid]
|
65 |
+
vid_frames = sorted(vid_data['frames'])
|
66 |
+
vid_len = len(vid_frames)
|
67 |
+
|
68 |
+
if vid_len < 11:
|
69 |
+
#print(f"Too short video: {vid} with frame length {vid_len}")
|
70 |
+
skip_vid_count += 1
|
71 |
+
continue
|
72 |
+
|
73 |
+
for exp_id, exp_dict in vid_data['expressions'].items():
|
74 |
+
# Exclude start_idx (0, 1) and end_idx (vid_len-1, vid_len-2)
|
75 |
+
start_idx , end_idx = 2, vid_len-2
|
76 |
+
bin_size = (end_idx - start_idx) // 4
|
77 |
+
|
78 |
+
bins = []
|
79 |
+
for i in range(4):
|
80 |
+
bin_start = start_idx + i * bin_size
|
81 |
+
bin_end = bin_start + bin_size if i < 3 else end_idx
|
82 |
+
|
83 |
+
bins.append((bin_start, bin_end))
|
84 |
+
|
85 |
+
# Random sample one frame from each bin
|
86 |
+
sample_indx = []
|
87 |
+
for start_idx, end_idx in bins:
|
88 |
+
sample_indx.append(random.randint(start_idx, end_idx - 1))
|
89 |
+
sample_indx.sort() # Ensure indices are in order
|
90 |
+
|
91 |
+
|
92 |
+
for frame_id in sample_indx:
|
93 |
+
meta = {
|
94 |
+
'video': vid,
|
95 |
+
'exp': exp_dict['exp'],
|
96 |
+
'obj_id': int(exp_dict['obj_id']),
|
97 |
+
'frames': vid_frames,
|
98 |
+
'frame_id' : frame_id,
|
99 |
+
'sample_frames_id' : sample_indx,
|
100 |
+
'bins': bins,
|
101 |
+
'category': vid_meta['objects'][exp_dict['obj_id']]['category']
|
102 |
+
}
|
103 |
+
self.metas.append(meta)
|
104 |
+
|
105 |
+
print(f"skipped {skip_vid_count} short videos")
|
106 |
+
|
107 |
+
|
108 |
+
@staticmethod
|
109 |
+
def bounding_box(img):
|
110 |
+
rows = np.any(img, axis=1)
|
111 |
+
cols = np.any(img, axis=0)
|
112 |
+
rmin, rmax = np.where(rows)[0][[0, -1]]
|
113 |
+
cmin, cmax = np.where(cols)[0][[0, -1]]
|
114 |
+
return rmin, rmax, cmin, cmax # y1, y2, x1, x2
|
115 |
+
|
116 |
+
def __len__(self):
|
117 |
+
return len(self.metas)
|
118 |
+
|
119 |
+
def __getitem__(self, idx):
|
120 |
+
instance_check = False
|
121 |
+
while not instance_check:
|
122 |
+
meta = self.metas[idx] # dict
|
123 |
+
|
124 |
+
|
125 |
+
video, exp, obj_id, category, frames, frame_id, sample_frames_id, bins = \
|
126 |
+
meta['video'], meta['exp'], meta['obj_id'], meta['category'], meta['frames'], meta['frame_id'], meta['sample_frames_id'], meta['bins']
|
127 |
+
|
128 |
+
|
129 |
+
# clean up the caption
|
130 |
+
exp = " ".join(exp.lower().split())
|
131 |
+
category_id = category_dict[category]
|
132 |
+
vid_len = len(frames)
|
133 |
+
|
134 |
+
# num_frames = self.num_frames
|
135 |
+
|
136 |
+
# read frames and masks
|
137 |
+
imgs, labels, boxes, masks, valid = [], [], [], [], []
|
138 |
+
for frame_indx in sample_frames_id:
|
139 |
+
frame_name = frames[frame_indx]
|
140 |
+
img_path = os.path.join(str(self.img_folder), 'JPEGImages', video, frame_name + '.jpg')
|
141 |
+
mask_path = os.path.join(str(self.img_folder), 'Annotations', video, frame_name + '.png')
|
142 |
+
img = Image.open(img_path).convert('RGB')
|
143 |
+
mask = Image.open(mask_path).convert('P')
|
144 |
+
|
145 |
+
# create the target
|
146 |
+
label = torch.tensor(category_id)
|
147 |
+
mask = np.array(mask)
|
148 |
+
mask = (mask==obj_id).astype(np.float32) # 0,1 binary
|
149 |
+
if (mask > 0).any():
|
150 |
+
y1, y2, x1, x2 = self.bounding_box(mask)
|
151 |
+
box = torch.tensor([x1, y1, x2, y2]).to(torch.float)
|
152 |
+
valid.append(1)
|
153 |
+
else: # some frame didn't contain the instance
|
154 |
+
box = torch.tensor([0, 0, 0, 0]).to(torch.float)
|
155 |
+
valid.append(0)
|
156 |
+
mask = torch.from_numpy(mask)
|
157 |
+
|
158 |
+
# append
|
159 |
+
imgs.append(img)
|
160 |
+
labels.append(label)
|
161 |
+
masks.append(mask)
|
162 |
+
boxes.append(box)
|
163 |
+
|
164 |
+
# transform
|
165 |
+
w, h = img.size
|
166 |
+
labels = torch.stack(labels, dim=0)
|
167 |
+
boxes = torch.stack(boxes, dim=0)
|
168 |
+
boxes[:, 0::2].clamp_(min=0, max=w)
|
169 |
+
boxes[:, 1::2].clamp_(min=0, max=h)
|
170 |
+
masks = torch.stack(masks, dim=0)
|
171 |
+
target = {
|
172 |
+
'frames_idx': torch.tensor(sample_frames_id), # [T,]
|
173 |
+
'labels': labels, # [T,]
|
174 |
+
'boxes': boxes, # [T, 4], xyxy
|
175 |
+
'masks': masks, # [T, H, W]
|
176 |
+
'valid': torch.tensor(valid), # [T,]
|
177 |
+
'caption': exp,
|
178 |
+
'orig_size': torch.as_tensor([int(h), int(w)]),
|
179 |
+
'size': torch.as_tensor([int(h), int(w)])
|
180 |
+
}
|
181 |
+
|
182 |
+
# "boxes" normalize to [0, 1] and transform from xyxy to cxcywh in self._transform
|
183 |
+
if self._transforms:
|
184 |
+
imgs, target = self._transforms(imgs, target)
|
185 |
+
imgs = torch.stack(imgs, dim=0) # [T, 3, H, W]
|
186 |
+
else:
|
187 |
+
imgs = np.array(imgs)
|
188 |
+
imgs = torch.tensor(imgs.transpose(0, 3, 1, 2))
|
189 |
+
|
190 |
+
|
191 |
+
# FIXME: handle "valid", since some box may be removed due to random crop
|
192 |
+
if torch.any(target['valid'] == 1): # at leatst one instance
|
193 |
+
instance_check = True
|
194 |
+
else:
|
195 |
+
idx = random.randint(0, self.__len__() - 1)
|
196 |
+
|
197 |
+
return imgs, target
|
198 |
+
|
199 |
+
|
200 |
+
def make_coco_transforms(image_set, max_size=640):
|
201 |
+
normalize = T.Compose([
|
202 |
+
T.ToTensor(),
|
203 |
+
T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
204 |
+
])
|
205 |
+
|
206 |
+
scales = [288, 320, 352, 392, 416, 448, 480, 512]
|
207 |
+
|
208 |
+
if image_set == 'train':
|
209 |
+
return T.Compose([
|
210 |
+
T.RandomHorizontalFlip(),
|
211 |
+
T.PhotometricDistort(),
|
212 |
+
T.RandomSelect(
|
213 |
+
T.Compose([
|
214 |
+
T.RandomResize(scales, max_size=max_size),
|
215 |
+
T.Check(),
|
216 |
+
]),
|
217 |
+
T.Compose([
|
218 |
+
T.RandomResize([400, 500, 600]),
|
219 |
+
T.RandomSizeCrop(384, 600),
|
220 |
+
T.RandomResize(scales, max_size=max_size),
|
221 |
+
T.Check(),
|
222 |
+
])
|
223 |
+
),
|
224 |
+
normalize,
|
225 |
+
])
|
226 |
+
|
227 |
+
# we do not use the 'val' set since the annotations are inaccessible
|
228 |
+
if image_set == 'val':
|
229 |
+
return T.Compose([
|
230 |
+
T.RandomResize([360], max_size=640),
|
231 |
+
normalize,
|
232 |
+
])
|
233 |
+
|
234 |
+
raise ValueError(f'unknown {image_set}')
|
235 |
+
|
236 |
+
|
237 |
+
def build(image_set, args):
|
238 |
+
root = Path(args.ytvos_path)
|
239 |
+
assert root.exists(), f'provided YTVOS path {root} does not exist'
|
240 |
+
PATHS = {
|
241 |
+
"train": (root / "train", root / "meta_expressions" / "train" / "meta_expressions.json"),
|
242 |
+
"val": (root / "valid", root / "meta_expressions" / "valid" / "meta_expressions.json"), # not used actually
|
243 |
+
}
|
244 |
+
img_folder, ann_file = PATHS[image_set]
|
245 |
+
# dataset = YTVOSDataset(img_folder, ann_file, transforms=make_coco_transforms(image_set, max_size=args.max_size), return_masks=args.masks,
|
246 |
+
# num_frames=args.num_frames, max_skip=args.max_skip)
|
247 |
+
dataset = YTVOSDataset(img_folder, ann_file, transforms=None, return_masks=args.masks,
|
248 |
+
num_frames=args.num_frames, max_skip=args.max_skip)
|
249 |
+
return dataset
|
250 |
+
|
.history/datasets/ytvos_ref_20250113180729.py
ADDED
@@ -0,0 +1,250 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Ref-YoutubeVOS data loader
|
3 |
+
"""
|
4 |
+
from pathlib import Path
|
5 |
+
|
6 |
+
import torch
|
7 |
+
from torch.autograd.grad_mode import F
|
8 |
+
from torch.utils.data import Dataset
|
9 |
+
import datasets.transforms_video as T
|
10 |
+
|
11 |
+
import os
|
12 |
+
from PIL import Image
|
13 |
+
import json
|
14 |
+
import numpy as np
|
15 |
+
import random
|
16 |
+
|
17 |
+
from datasets.categories import ytvos_category_dict as category_dict
|
18 |
+
|
19 |
+
|
20 |
+
class YTVOSDataset(Dataset):
|
21 |
+
"""
|
22 |
+
A dataset class for the Refer-Youtube-VOS dataset which was first introduced in the paper:
|
23 |
+
"URVOS: Unified Referring Video Object Segmentation Network with a Large-Scale Benchmark"
|
24 |
+
(see https://link.springer.com/content/pdf/10.1007/978-3-030-58555-6_13.pdf).
|
25 |
+
The original release of the dataset contained both 'first-frame' and 'full-video' expressions. However, the first
|
26 |
+
dataset is not publicly available anymore as now only the harder 'full-video' subset is available to download
|
27 |
+
through the Youtube-VOS referring video object segmentation competition page at:
|
28 |
+
https://competitions.codalab.org/competitions/29139
|
29 |
+
Furthermore, for the competition the subset's original validation set, which consists of 507 videos, was split into
|
30 |
+
two competition 'validation' & 'test' subsets, consisting of 202 and 305 videos respectively. Evaluation can
|
31 |
+
currently only be done on the competition 'validation' subset using the competition's server, as
|
32 |
+
annotations were publicly released only for the 'train' subset of the competition.
|
33 |
+
|
34 |
+
"""
|
35 |
+
def __init__(self, img_folder: Path, ann_file: Path, transforms, return_masks: bool,
|
36 |
+
num_frames: int, max_skip: int):
|
37 |
+
self.img_folder = img_folder
|
38 |
+
self.ann_file = ann_file
|
39 |
+
self._transforms = transforms
|
40 |
+
self.return_masks = return_masks # not used
|
41 |
+
self.num_frames = num_frames
|
42 |
+
self.max_skip = max_skip
|
43 |
+
# create video meta data
|
44 |
+
self.prepare_metas()
|
45 |
+
|
46 |
+
print('\n video num: ', len(self.videos), ' clip num: ', len(self.metas))
|
47 |
+
print('\n')
|
48 |
+
|
49 |
+
def prepare_metas(self):
|
50 |
+
# read object information
|
51 |
+
with open(os.path.join(str(self.img_folder), 'meta.json'), 'r') as f:
|
52 |
+
subset_metas_by_video = json.load(f)['videos']
|
53 |
+
|
54 |
+
# read expression data
|
55 |
+
with open(str(self.ann_file), 'r') as f:
|
56 |
+
subset_expressions_by_video = json.load(f)['videos']
|
57 |
+
self.videos = list(subset_expressions_by_video.keys())
|
58 |
+
|
59 |
+
self.metas = []
|
60 |
+
skip_vid_count = 0
|
61 |
+
|
62 |
+
for vid in self.videos:
|
63 |
+
vid_meta = subset_metas_by_video[vid]
|
64 |
+
vid_data = subset_expressions_by_video[vid]
|
65 |
+
vid_frames = sorted(vid_data['frames'])
|
66 |
+
vid_len = len(vid_frames)
|
67 |
+
|
68 |
+
if vid_len < 11:
|
69 |
+
#print(f"Too short video: {vid} with frame length {vid_len}")
|
70 |
+
skip_vid_count += 1
|
71 |
+
continue
|
72 |
+
|
73 |
+
for exp_id, exp_dict in vid_data['expressions'].items():
|
74 |
+
# Exclude start_idx (0, 1) and end_idx (vid_len-1, vid_len-2)
|
75 |
+
start_idx , end_idx = 2, vid_len-2
|
76 |
+
bin_size = (end_idx - start_idx) // 4
|
77 |
+
|
78 |
+
bins = []
|
79 |
+
for i in range(4):
|
80 |
+
bin_start = start_idx + i * bin_size
|
81 |
+
bin_end = bin_start + bin_size if i < 3 else end_idx
|
82 |
+
|
83 |
+
bins.append((bin_start, bin_end))
|
84 |
+
|
85 |
+
# Random sample one frame from each bin
|
86 |
+
sample_indx = []
|
87 |
+
for start_idx, end_idx in bins:
|
88 |
+
sample_indx.append(random.randint(start_idx, end_idx - 1))
|
89 |
+
sample_indx.sort() # Ensure indices are in order
|
90 |
+
|
91 |
+
|
92 |
+
for sample_id in sample_indx:
|
93 |
+
meta = {
|
94 |
+
'video': vid,
|
95 |
+
'exp': exp_dict['exp'],
|
96 |
+
'obj_id': int(exp_dict['obj_id']),
|
97 |
+
'frames': vid_frames,
|
98 |
+
'sample_id' : sample_id,
|
99 |
+
'sample_frames_id' : sample_indx,
|
100 |
+
'bins': bins,
|
101 |
+
'category': vid_meta['objects'][exp_dict['obj_id']]['category']
|
102 |
+
}
|
103 |
+
self.metas.append(meta)
|
104 |
+
|
105 |
+
print(f"skipped {skip_vid_count} short videos")
|
106 |
+
|
107 |
+
|
108 |
+
@staticmethod
|
109 |
+
def bounding_box(img):
|
110 |
+
rows = np.any(img, axis=1)
|
111 |
+
cols = np.any(img, axis=0)
|
112 |
+
rmin, rmax = np.where(rows)[0][[0, -1]]
|
113 |
+
cmin, cmax = np.where(cols)[0][[0, -1]]
|
114 |
+
return rmin, rmax, cmin, cmax # y1, y2, x1, x2
|
115 |
+
|
116 |
+
def __len__(self):
|
117 |
+
return len(self.metas)
|
118 |
+
|
119 |
+
def __getitem__(self, idx):
|
120 |
+
instance_check = False
|
121 |
+
while not instance_check:
|
122 |
+
meta = self.metas[idx] # dict
|
123 |
+
|
124 |
+
|
125 |
+
video, exp, obj_id, category, frames, sample_id, sample_frames_id, bins = \
|
126 |
+
meta['video'], meta['exp'], meta['obj_id'], meta['category'], meta['frames'], meta['sample_id'], meta['sample_frames_id'], meta['bins']
|
127 |
+
|
128 |
+
|
129 |
+
# clean up the caption
|
130 |
+
exp = " ".join(exp.lower().split())
|
131 |
+
category_id = category_dict[category]
|
132 |
+
vid_len = len(frames)
|
133 |
+
|
134 |
+
# num_frames = self.num_frames
|
135 |
+
|
136 |
+
# read frames and masks
|
137 |
+
imgs, labels, boxes, masks, valid = [], [], [], [], []
|
138 |
+
for frame_indx in sample_frames_id:
|
139 |
+
frame_name = frames[frame_indx]
|
140 |
+
img_path = os.path.join(str(self.img_folder), 'JPEGImages', video, frame_name + '.jpg')
|
141 |
+
mask_path = os.path.join(str(self.img_folder), 'Annotations', video, frame_name + '.png')
|
142 |
+
img = Image.open(img_path).convert('RGB')
|
143 |
+
mask = Image.open(mask_path).convert('P')
|
144 |
+
|
145 |
+
# create the target
|
146 |
+
label = torch.tensor(category_id)
|
147 |
+
mask = np.array(mask)
|
148 |
+
mask = (mask==obj_id).astype(np.float32) # 0,1 binary
|
149 |
+
if (mask > 0).any():
|
150 |
+
y1, y2, x1, x2 = self.bounding_box(mask)
|
151 |
+
box = torch.tensor([x1, y1, x2, y2]).to(torch.float)
|
152 |
+
valid.append(1)
|
153 |
+
else: # some frame didn't contain the instance
|
154 |
+
box = torch.tensor([0, 0, 0, 0]).to(torch.float)
|
155 |
+
valid.append(0)
|
156 |
+
mask = torch.from_numpy(mask)
|
157 |
+
|
158 |
+
# append
|
159 |
+
imgs.append(img)
|
160 |
+
labels.append(label)
|
161 |
+
masks.append(mask)
|
162 |
+
boxes.append(box)
|
163 |
+
|
164 |
+
# transform
|
165 |
+
w, h = img.size
|
166 |
+
labels = torch.stack(labels, dim=0)
|
167 |
+
boxes = torch.stack(boxes, dim=0)
|
168 |
+
boxes[:, 0::2].clamp_(min=0, max=w)
|
169 |
+
boxes[:, 1::2].clamp_(min=0, max=h)
|
170 |
+
masks = torch.stack(masks, dim=0)
|
171 |
+
target = {
|
172 |
+
'frames_idx': torch.tensor(sample_frames_id), # [T,]
|
173 |
+
'labels': labels, # [T,]
|
174 |
+
'boxes': boxes, # [T, 4], xyxy
|
175 |
+
'masks': masks, # [T, H, W]
|
176 |
+
'valid': torch.tensor(valid), # [T,]
|
177 |
+
'caption': exp,
|
178 |
+
'orig_size': torch.as_tensor([int(h), int(w)]),
|
179 |
+
'size': torch.as_tensor([int(h), int(w)])
|
180 |
+
}
|
181 |
+
|
182 |
+
# "boxes" normalize to [0, 1] and transform from xyxy to cxcywh in self._transform
|
183 |
+
if self._transforms:
|
184 |
+
imgs, target = self._transforms(imgs, target)
|
185 |
+
imgs = torch.stack(imgs, dim=0) # [T, 3, H, W]
|
186 |
+
else:
|
187 |
+
imgs = np.array(imgs)
|
188 |
+
imgs = torch.tensor(imgs.transpose(0, 3, 1, 2))
|
189 |
+
|
190 |
+
|
191 |
+
# FIXME: handle "valid", since some box may be removed due to random crop
|
192 |
+
if torch.any(target['valid'] == 1): # at leatst one instance
|
193 |
+
instance_check = True
|
194 |
+
else:
|
195 |
+
idx = random.randint(0, self.__len__() - 1)
|
196 |
+
|
197 |
+
return imgs, target
|
198 |
+
|
199 |
+
|
200 |
+
def make_coco_transforms(image_set, max_size=640):
|
201 |
+
normalize = T.Compose([
|
202 |
+
T.ToTensor(),
|
203 |
+
T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
204 |
+
])
|
205 |
+
|
206 |
+
scales = [288, 320, 352, 392, 416, 448, 480, 512]
|
207 |
+
|
208 |
+
if image_set == 'train':
|
209 |
+
return T.Compose([
|
210 |
+
T.RandomHorizontalFlip(),
|
211 |
+
T.PhotometricDistort(),
|
212 |
+
T.RandomSelect(
|
213 |
+
T.Compose([
|
214 |
+
T.RandomResize(scales, max_size=max_size),
|
215 |
+
T.Check(),
|
216 |
+
]),
|
217 |
+
T.Compose([
|
218 |
+
T.RandomResize([400, 500, 600]),
|
219 |
+
T.RandomSizeCrop(384, 600),
|
220 |
+
T.RandomResize(scales, max_size=max_size),
|
221 |
+
T.Check(),
|
222 |
+
])
|
223 |
+
),
|
224 |
+
normalize,
|
225 |
+
])
|
226 |
+
|
227 |
+
# we do not use the 'val' set since the annotations are inaccessible
|
228 |
+
if image_set == 'val':
|
229 |
+
return T.Compose([
|
230 |
+
T.RandomResize([360], max_size=640),
|
231 |
+
normalize,
|
232 |
+
])
|
233 |
+
|
234 |
+
raise ValueError(f'unknown {image_set}')
|
235 |
+
|
236 |
+
|
237 |
+
def build(image_set, args):
|
238 |
+
root = Path(args.ytvos_path)
|
239 |
+
assert root.exists(), f'provided YTVOS path {root} does not exist'
|
240 |
+
PATHS = {
|
241 |
+
"train": (root / "train", root / "meta_expressions" / "train" / "meta_expressions.json"),
|
242 |
+
"val": (root / "valid", root / "meta_expressions" / "valid" / "meta_expressions.json"), # not used actually
|
243 |
+
}
|
244 |
+
img_folder, ann_file = PATHS[image_set]
|
245 |
+
# dataset = YTVOSDataset(img_folder, ann_file, transforms=make_coco_transforms(image_set, max_size=args.max_size), return_masks=args.masks,
|
246 |
+
# num_frames=args.num_frames, max_skip=args.max_skip)
|
247 |
+
dataset = YTVOSDataset(img_folder, ann_file, transforms=None, return_masks=args.masks,
|
248 |
+
num_frames=args.num_frames, max_skip=args.max_skip)
|
249 |
+
return dataset
|
250 |
+
|
.history/datasets/ytvos_ref_20250114201918.py
ADDED
@@ -0,0 +1,253 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Ref-YoutubeVOS data loader
|
3 |
+
"""
|
4 |
+
from pathlib import Path
|
5 |
+
|
6 |
+
import torch
|
7 |
+
from torch.autograd.grad_mode import F
|
8 |
+
from torch.utils.data import Dataset
|
9 |
+
import datasets.transforms_video as T
|
10 |
+
|
11 |
+
import os
|
12 |
+
from PIL import Image
|
13 |
+
import json
|
14 |
+
import numpy as np
|
15 |
+
import random
|
16 |
+
|
17 |
+
from datasets.categories import ytvos_category_dict as category_dict
|
18 |
+
|
19 |
+
|
20 |
+
class YTVOSDataset(Dataset):
|
21 |
+
"""
|
22 |
+
A dataset class for the Refer-Youtube-VOS dataset which was first introduced in the paper:
|
23 |
+
"URVOS: Unified Referring Video Object Segmentation Network with a Large-Scale Benchmark"
|
24 |
+
(see https://link.springer.com/content/pdf/10.1007/978-3-030-58555-6_13.pdf).
|
25 |
+
The original release of the dataset contained both 'first-frame' and 'full-video' expressions. However, the first
|
26 |
+
dataset is not publicly available anymore as now only the harder 'full-video' subset is available to download
|
27 |
+
through the Youtube-VOS referring video object segmentation competition page at:
|
28 |
+
https://competitions.codalab.org/competitions/29139
|
29 |
+
Furthermore, for the competition the subset's original validation set, which consists of 507 videos, was split into
|
30 |
+
two competition 'validation' & 'test' subsets, consisting of 202 and 305 videos respectively. Evaluation can
|
31 |
+
currently only be done on the competition 'validation' subset using the competition's server, as
|
32 |
+
annotations were publicly released only for the 'train' subset of the competition.
|
33 |
+
|
34 |
+
"""
|
35 |
+
def __init__(self, img_folder: Path, ann_file: Path, transforms, return_masks: bool,
|
36 |
+
num_frames: int, max_skip: int):
|
37 |
+
self.img_folder = img_folder
|
38 |
+
self.ann_file = ann_file
|
39 |
+
self._transforms = transforms
|
40 |
+
self.return_masks = return_masks # not used
|
41 |
+
self.num_frames = num_frames
|
42 |
+
self.max_skip = max_skip
|
43 |
+
# create video meta data
|
44 |
+
self.prepare_metas()
|
45 |
+
|
46 |
+
print('\n video num: ', len(self.videos), ' clip num: ', len(self.metas))
|
47 |
+
print('\n')
|
48 |
+
|
49 |
+
def prepare_metas(self):
|
50 |
+
# read object information
|
51 |
+
with open(os.path.join(str(self.img_folder), 'meta.json'), 'r') as f:
|
52 |
+
subset_metas_by_video = json.load(f)['videos']
|
53 |
+
|
54 |
+
# read expression data
|
55 |
+
with open(str(self.ann_file), 'r') as f:
|
56 |
+
subset_expressions_by_video = json.load(f)['videos']
|
57 |
+
self.videos = list(subset_expressions_by_video.keys())
|
58 |
+
|
59 |
+
self.metas = []
|
60 |
+
skip_vid_count = 0
|
61 |
+
|
62 |
+
for vid in self.videos:
|
63 |
+
vid_meta = subset_metas_by_video[vid]
|
64 |
+
vid_data = subset_expressions_by_video[vid]
|
65 |
+
vid_frames = sorted(vid_data['frames'])
|
66 |
+
vid_len = len(vid_frames)
|
67 |
+
|
68 |
+
if vid_len < 11:
|
69 |
+
#print(f"Too short video: {vid} with frame length {vid_len}")
|
70 |
+
skip_vid_count += 1
|
71 |
+
continue
|
72 |
+
|
73 |
+
print(f"vid_data: {vid_data}")
|
74 |
+
print(f"vid_meta: {vid_meta}")
|
75 |
+
return
|
76 |
+
for exp_id, exp_dict in vid_data['expressions'].items():
|
77 |
+
# Exclude start_idx (0, 1) and end_idx (vid_len-1, vid_len-2)
|
78 |
+
start_idx , end_idx = 2, vid_len-2
|
79 |
+
bin_size = (end_idx - start_idx) // 4
|
80 |
+
|
81 |
+
bins = []
|
82 |
+
for i in range(4):
|
83 |
+
bin_start = start_idx + i * bin_size
|
84 |
+
bin_end = bin_start + bin_size if i < 3 else end_idx
|
85 |
+
|
86 |
+
bins.append((bin_start, bin_end))
|
87 |
+
|
88 |
+
# Random sample one frame from each bin
|
89 |
+
sample_indx = []
|
90 |
+
for start_idx, end_idx in bins:
|
91 |
+
sample_indx.append(random.randint(start_idx, end_idx - 1))
|
92 |
+
sample_indx.sort() # Ensure indices are in order
|
93 |
+
|
94 |
+
|
95 |
+
for sample_id in sample_indx:
|
96 |
+
meta = {
|
97 |
+
'video': vid,
|
98 |
+
'exp': exp_dict['exp'],
|
99 |
+
'obj_id': int(exp_dict['obj_id']),
|
100 |
+
'frames': vid_frames,
|
101 |
+
'sample_id' : sample_id,
|
102 |
+
'sample_frames_id' : sample_indx,
|
103 |
+
'bins': bins,
|
104 |
+
'category': vid_meta['objects'][exp_dict['obj_id']]['category']
|
105 |
+
}
|
106 |
+
self.metas.append(meta)
|
107 |
+
|
108 |
+
print(f"skipped {skip_vid_count} short videos")
|
109 |
+
|
110 |
+
|
111 |
+
@staticmethod
|
112 |
+
def bounding_box(img):
|
113 |
+
rows = np.any(img, axis=1)
|
114 |
+
cols = np.any(img, axis=0)
|
115 |
+
rmin, rmax = np.where(rows)[0][[0, -1]]
|
116 |
+
cmin, cmax = np.where(cols)[0][[0, -1]]
|
117 |
+
return rmin, rmax, cmin, cmax # y1, y2, x1, x2
|
118 |
+
|
119 |
+
def __len__(self):
|
120 |
+
return len(self.metas)
|
121 |
+
|
122 |
+
def __getitem__(self, idx):
|
123 |
+
instance_check = False
|
124 |
+
while not instance_check:
|
125 |
+
meta = self.metas[idx] # dict
|
126 |
+
|
127 |
+
|
128 |
+
video, exp, obj_id, category, frames, sample_id, sample_frames_id, bins = \
|
129 |
+
meta['video'], meta['exp'], meta['obj_id'], meta['category'], meta['frames'], meta['sample_id'], meta['sample_frames_id'], meta['bins']
|
130 |
+
|
131 |
+
|
132 |
+
# clean up the caption
|
133 |
+
exp = " ".join(exp.lower().split())
|
134 |
+
category_id = category_dict[category]
|
135 |
+
vid_len = len(frames)
|
136 |
+
|
137 |
+
# num_frames = self.num_frames
|
138 |
+
|
139 |
+
# read frames and masks
|
140 |
+
imgs, labels, boxes, masks, valid = [], [], [], [], []
|
141 |
+
for frame_indx in sample_frames_id:
|
142 |
+
frame_name = frames[frame_indx]
|
143 |
+
img_path = os.path.join(str(self.img_folder), 'JPEGImages', video, frame_name + '.jpg')
|
144 |
+
mask_path = os.path.join(str(self.img_folder), 'Annotations', video, frame_name + '.png')
|
145 |
+
img = Image.open(img_path).convert('RGB')
|
146 |
+
mask = Image.open(mask_path).convert('P')
|
147 |
+
|
148 |
+
# create the target
|
149 |
+
label = torch.tensor(category_id)
|
150 |
+
mask = np.array(mask)
|
151 |
+
mask = (mask==obj_id).astype(np.float32) # 0,1 binary
|
152 |
+
if (mask > 0).any():
|
153 |
+
y1, y2, x1, x2 = self.bounding_box(mask)
|
154 |
+
box = torch.tensor([x1, y1, x2, y2]).to(torch.float)
|
155 |
+
valid.append(1)
|
156 |
+
else: # some frame didn't contain the instance
|
157 |
+
box = torch.tensor([0, 0, 0, 0]).to(torch.float)
|
158 |
+
valid.append(0)
|
159 |
+
mask = torch.from_numpy(mask)
|
160 |
+
|
161 |
+
# append
|
162 |
+
imgs.append(img)
|
163 |
+
labels.append(label)
|
164 |
+
masks.append(mask)
|
165 |
+
boxes.append(box)
|
166 |
+
|
167 |
+
# transform
|
168 |
+
w, h = img.size
|
169 |
+
labels = torch.stack(labels, dim=0)
|
170 |
+
boxes = torch.stack(boxes, dim=0)
|
171 |
+
boxes[:, 0::2].clamp_(min=0, max=w)
|
172 |
+
boxes[:, 1::2].clamp_(min=0, max=h)
|
173 |
+
masks = torch.stack(masks, dim=0)
|
174 |
+
target = {
|
175 |
+
'frames_idx': torch.tensor(sample_frames_id), # [T,]
|
176 |
+
'labels': labels, # [T,]
|
177 |
+
'boxes': boxes, # [T, 4], xyxy
|
178 |
+
'masks': masks, # [T, H, W]
|
179 |
+
'valid': torch.tensor(valid), # [T,]
|
180 |
+
'caption': exp,
|
181 |
+
'orig_size': torch.as_tensor([int(h), int(w)]),
|
182 |
+
'size': torch.as_tensor([int(h), int(w)])
|
183 |
+
}
|
184 |
+
|
185 |
+
# "boxes" normalize to [0, 1] and transform from xyxy to cxcywh in self._transform
|
186 |
+
if self._transforms:
|
187 |
+
imgs, target = self._transforms(imgs, target)
|
188 |
+
imgs = torch.stack(imgs, dim=0) # [T, 3, H, W]
|
189 |
+
else:
|
190 |
+
imgs = np.array(imgs)
|
191 |
+
imgs = torch.tensor(imgs.transpose(0, 3, 1, 2))
|
192 |
+
|
193 |
+
|
194 |
+
# FIXME: handle "valid", since some box may be removed due to random crop
|
195 |
+
if torch.any(target['valid'] == 1): # at leatst one instance
|
196 |
+
instance_check = True
|
197 |
+
else:
|
198 |
+
idx = random.randint(0, self.__len__() - 1)
|
199 |
+
|
200 |
+
return imgs, target
|
201 |
+
|
202 |
+
|
203 |
+
def make_coco_transforms(image_set, max_size=640):
|
204 |
+
normalize = T.Compose([
|
205 |
+
T.ToTensor(),
|
206 |
+
T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
207 |
+
])
|
208 |
+
|
209 |
+
scales = [288, 320, 352, 392, 416, 448, 480, 512]
|
210 |
+
|
211 |
+
if image_set == 'train':
|
212 |
+
return T.Compose([
|
213 |
+
T.RandomHorizontalFlip(),
|
214 |
+
T.PhotometricDistort(),
|
215 |
+
T.RandomSelect(
|
216 |
+
T.Compose([
|
217 |
+
T.RandomResize(scales, max_size=max_size),
|
218 |
+
T.Check(),
|
219 |
+
]),
|
220 |
+
T.Compose([
|
221 |
+
T.RandomResize([400, 500, 600]),
|
222 |
+
T.RandomSizeCrop(384, 600),
|
223 |
+
T.RandomResize(scales, max_size=max_size),
|
224 |
+
T.Check(),
|
225 |
+
])
|
226 |
+
),
|
227 |
+
normalize,
|
228 |
+
])
|
229 |
+
|
230 |
+
# we do not use the 'val' set since the annotations are inaccessible
|
231 |
+
if image_set == 'val':
|
232 |
+
return T.Compose([
|
233 |
+
T.RandomResize([360], max_size=640),
|
234 |
+
normalize,
|
235 |
+
])
|
236 |
+
|
237 |
+
raise ValueError(f'unknown {image_set}')
|
238 |
+
|
239 |
+
|
240 |
+
def build(image_set, args):
|
241 |
+
root = Path(args.ytvos_path)
|
242 |
+
assert root.exists(), f'provided YTVOS path {root} does not exist'
|
243 |
+
PATHS = {
|
244 |
+
"train": (root / "train", root / "meta_expressions" / "train" / "meta_expressions.json"),
|
245 |
+
"val": (root / "valid", root / "meta_expressions" / "valid" / "meta_expressions.json"), # not used actually
|
246 |
+
}
|
247 |
+
img_folder, ann_file = PATHS[image_set]
|
248 |
+
# dataset = YTVOSDataset(img_folder, ann_file, transforms=make_coco_transforms(image_set, max_size=args.max_size), return_masks=args.masks,
|
249 |
+
# num_frames=args.num_frames, max_skip=args.max_skip)
|
250 |
+
dataset = YTVOSDataset(img_folder, ann_file, transforms=None, return_masks=args.masks,
|
251 |
+
num_frames=args.num_frames, max_skip=args.max_skip)
|
252 |
+
return dataset
|
253 |
+
|
.history/datasets/ytvos_ref_20250114202502.py
ADDED
@@ -0,0 +1,250 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Ref-YoutubeVOS data loader
|
3 |
+
"""
|
4 |
+
from pathlib import Path
|
5 |
+
|
6 |
+
import torch
|
7 |
+
from torch.autograd.grad_mode import F
|
8 |
+
from torch.utils.data import Dataset
|
9 |
+
import datasets.transforms_video as T
|
10 |
+
|
11 |
+
import os
|
12 |
+
from PIL import Image
|
13 |
+
import json
|
14 |
+
import numpy as np
|
15 |
+
import random
|
16 |
+
|
17 |
+
from datasets.categories import ytvos_category_dict as category_dict
|
18 |
+
|
19 |
+
|
20 |
+
class YTVOSDataset(Dataset):
|
21 |
+
"""
|
22 |
+
A dataset class for the Refer-Youtube-VOS dataset which was first introduced in the paper:
|
23 |
+
"URVOS: Unified Referring Video Object Segmentation Network with a Large-Scale Benchmark"
|
24 |
+
(see https://link.springer.com/content/pdf/10.1007/978-3-030-58555-6_13.pdf).
|
25 |
+
The original release of the dataset contained both 'first-frame' and 'full-video' expressions. However, the first
|
26 |
+
dataset is not publicly available anymore as now only the harder 'full-video' subset is available to download
|
27 |
+
through the Youtube-VOS referring video object segmentation competition page at:
|
28 |
+
https://competitions.codalab.org/competitions/29139
|
29 |
+
Furthermore, for the competition the subset's original validation set, which consists of 507 videos, was split into
|
30 |
+
two competition 'validation' & 'test' subsets, consisting of 202 and 305 videos respectively. Evaluation can
|
31 |
+
currently only be done on the competition 'validation' subset using the competition's server, as
|
32 |
+
annotations were publicly released only for the 'train' subset of the competition.
|
33 |
+
|
34 |
+
"""
|
35 |
+
def __init__(self, img_folder: Path, ann_file: Path, transforms, return_masks: bool,
|
36 |
+
num_frames: int, max_skip: int):
|
37 |
+
self.img_folder = img_folder
|
38 |
+
self.ann_file = ann_file
|
39 |
+
self._transforms = transforms
|
40 |
+
self.return_masks = return_masks # not used
|
41 |
+
self.num_frames = num_frames
|
42 |
+
self.max_skip = max_skip
|
43 |
+
# create video meta data
|
44 |
+
self.prepare_metas()
|
45 |
+
|
46 |
+
print('\n video num: ', len(self.videos), ' clip num: ', len(self.metas))
|
47 |
+
print('\n')
|
48 |
+
|
49 |
+
def prepare_metas(self):
|
50 |
+
# read object information
|
51 |
+
with open(os.path.join(str(self.img_folder), 'meta.json'), 'r') as f:
|
52 |
+
subset_metas_by_video = json.load(f)['videos']
|
53 |
+
|
54 |
+
# read expression data
|
55 |
+
with open(str(self.ann_file), 'r') as f:
|
56 |
+
subset_expressions_by_video = json.load(f)['videos']
|
57 |
+
self.videos = list(subset_expressions_by_video.keys())
|
58 |
+
|
59 |
+
self.metas = []
|
60 |
+
skip_vid_count = 0
|
61 |
+
|
62 |
+
for vid in self.videos:
|
63 |
+
vid_meta = subset_metas_by_video[vid]
|
64 |
+
vid_data = subset_expressions_by_video[vid]
|
65 |
+
vid_frames = sorted(vid_data['frames'])
|
66 |
+
vid_len = len(vid_frames)
|
67 |
+
|
68 |
+
if vid_len < 11:
|
69 |
+
#print(f"Too short video: {vid} with frame length {vid_len}")
|
70 |
+
skip_vid_count += 1
|
71 |
+
continue
|
72 |
+
|
73 |
+
for exp_id, exp_dict in vid_data['expressions'].items():
|
74 |
+
# Exclude start_idx (0, 1) and end_idx (vid_len-1, vid_len-2)
|
75 |
+
start_idx , end_idx = 2, vid_len-2
|
76 |
+
bin_size = (end_idx - start_idx) // 4
|
77 |
+
|
78 |
+
bins = []
|
79 |
+
for i in range(4):
|
80 |
+
bin_start = start_idx + i * bin_size
|
81 |
+
bin_end = bin_start + bin_size if i < 3 else end_idx
|
82 |
+
|
83 |
+
bins.append((bin_start, bin_end))
|
84 |
+
|
85 |
+
# Random sample one frame from each bin
|
86 |
+
sample_indx = []
|
87 |
+
for start_idx, end_idx in bins:
|
88 |
+
sample_indx.append(random.randint(start_idx, end_idx - 1))
|
89 |
+
sample_indx.sort() # Ensure indices are in order
|
90 |
+
|
91 |
+
|
92 |
+
for sample_id in sample_indx:
|
93 |
+
meta = {
|
94 |
+
'video': vid,
|
95 |
+
'exp': exp_dict['exp'],
|
96 |
+
'obj_id': int(exp_dict['obj_id']),
|
97 |
+
'frames': vid_frames,
|
98 |
+
'sample_id' : sample_id,
|
99 |
+
'sample_frames_id' : sample_indx,
|
100 |
+
'bins': bins,
|
101 |
+
'category': vid_meta['objects'][exp_dict['obj_id']]['category']
|
102 |
+
}
|
103 |
+
self.metas.append(meta)
|
104 |
+
|
105 |
+
print(f"skipped {skip_vid_count} short videos")
|
106 |
+
|
107 |
+
|
108 |
+
@staticmethod
|
109 |
+
def bounding_box(img):
|
110 |
+
rows = np.any(img, axis=1)
|
111 |
+
cols = np.any(img, axis=0)
|
112 |
+
rmin, rmax = np.where(rows)[0][[0, -1]]
|
113 |
+
cmin, cmax = np.where(cols)[0][[0, -1]]
|
114 |
+
return rmin, rmax, cmin, cmax # y1, y2, x1, x2
|
115 |
+
|
116 |
+
def __len__(self):
|
117 |
+
return len(self.metas)
|
118 |
+
|
119 |
+
def __getitem__(self, idx):
|
120 |
+
instance_check = False
|
121 |
+
while not instance_check:
|
122 |
+
meta = self.metas[idx] # dict
|
123 |
+
|
124 |
+
|
125 |
+
video, exp, obj_id, category, frames, sample_id, sample_frames_id, bins = \
|
126 |
+
meta['video'], meta['exp'], meta['obj_id'], meta['category'], meta['frames'], meta['sample_id'], meta['sample_frames_id'], meta['bins']
|
127 |
+
|
128 |
+
|
129 |
+
# clean up the caption
|
130 |
+
exp = " ".join(exp.lower().split())
|
131 |
+
category_id = category_dict[category]
|
132 |
+
vid_len = len(frames)
|
133 |
+
|
134 |
+
# num_frames = self.num_frames
|
135 |
+
|
136 |
+
# read frames and masks
|
137 |
+
imgs, labels, boxes, masks, valid = [], [], [], [], []
|
138 |
+
for frame_indx in sample_frames_id:
|
139 |
+
frame_name = frames[frame_indx]
|
140 |
+
img_path = os.path.join(str(self.img_folder), 'JPEGImages', video, frame_name + '.jpg')
|
141 |
+
mask_path = os.path.join(str(self.img_folder), 'Annotations', video, frame_name + '.png')
|
142 |
+
img = Image.open(img_path).convert('RGB')
|
143 |
+
mask = Image.open(mask_path).convert('P')
|
144 |
+
|
145 |
+
# create the target
|
146 |
+
label = torch.tensor(category_id)
|
147 |
+
mask = np.array(mask)
|
148 |
+
mask = (mask==obj_id).astype(np.float32) # 0,1 binary
|
149 |
+
if (mask > 0).any():
|
150 |
+
y1, y2, x1, x2 = self.bounding_box(mask)
|
151 |
+
box = torch.tensor([x1, y1, x2, y2]).to(torch.float)
|
152 |
+
valid.append(1)
|
153 |
+
else: # some frame didn't contain the instance
|
154 |
+
box = torch.tensor([0, 0, 0, 0]).to(torch.float)
|
155 |
+
valid.append(0)
|
156 |
+
mask = torch.from_numpy(mask)
|
157 |
+
|
158 |
+
# append
|
159 |
+
imgs.append(img)
|
160 |
+
labels.append(label)
|
161 |
+
masks.append(mask)
|
162 |
+
boxes.append(box)
|
163 |
+
|
164 |
+
# transform
|
165 |
+
w, h = img.size
|
166 |
+
labels = torch.stack(labels, dim=0)
|
167 |
+
boxes = torch.stack(boxes, dim=0)
|
168 |
+
boxes[:, 0::2].clamp_(min=0, max=w)
|
169 |
+
boxes[:, 1::2].clamp_(min=0, max=h)
|
170 |
+
masks = torch.stack(masks, dim=0)
|
171 |
+
target = {
|
172 |
+
'frames_idx': torch.tensor(sample_frames_id), # [T,]
|
173 |
+
'labels': labels, # [T,]
|
174 |
+
'boxes': boxes, # [T, 4], xyxy
|
175 |
+
'masks': masks, # [T, H, W]
|
176 |
+
'valid': torch.tensor(valid), # [T,]
|
177 |
+
'caption': exp,
|
178 |
+
'orig_size': torch.as_tensor([int(h), int(w)]),
|
179 |
+
'size': torch.as_tensor([int(h), int(w)])
|
180 |
+
}
|
181 |
+
|
182 |
+
# "boxes" normalize to [0, 1] and transform from xyxy to cxcywh in self._transform
|
183 |
+
if self._transforms:
|
184 |
+
imgs, target = self._transforms(imgs, target)
|
185 |
+
imgs = torch.stack(imgs, dim=0) # [T, 3, H, W]
|
186 |
+
else:
|
187 |
+
imgs = np.array(imgs)
|
188 |
+
imgs = torch.tensor(imgs.transpose(0, 3, 1, 2))
|
189 |
+
|
190 |
+
|
191 |
+
# FIXME: handle "valid", since some box may be removed due to random crop
|
192 |
+
if torch.any(target['valid'] == 1): # at leatst one instance
|
193 |
+
instance_check = True
|
194 |
+
else:
|
195 |
+
idx = random.randint(0, self.__len__() - 1)
|
196 |
+
|
197 |
+
return imgs, target
|
198 |
+
|
199 |
+
|
200 |
+
def make_coco_transforms(image_set, max_size=640):
|
201 |
+
normalize = T.Compose([
|
202 |
+
T.ToTensor(),
|
203 |
+
T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
204 |
+
])
|
205 |
+
|
206 |
+
scales = [288, 320, 352, 392, 416, 448, 480, 512]
|
207 |
+
|
208 |
+
if image_set == 'train':
|
209 |
+
return T.Compose([
|
210 |
+
T.RandomHorizontalFlip(),
|
211 |
+
T.PhotometricDistort(),
|
212 |
+
T.RandomSelect(
|
213 |
+
T.Compose([
|
214 |
+
T.RandomResize(scales, max_size=max_size),
|
215 |
+
T.Check(),
|
216 |
+
]),
|
217 |
+
T.Compose([
|
218 |
+
T.RandomResize([400, 500, 600]),
|
219 |
+
T.RandomSizeCrop(384, 600),
|
220 |
+
T.RandomResize(scales, max_size=max_size),
|
221 |
+
T.Check(),
|
222 |
+
])
|
223 |
+
),
|
224 |
+
normalize,
|
225 |
+
])
|
226 |
+
|
227 |
+
# we do not use the 'val' set since the annotations are inaccessible
|
228 |
+
if image_set == 'val':
|
229 |
+
return T.Compose([
|
230 |
+
T.RandomResize([360], max_size=640),
|
231 |
+
normalize,
|
232 |
+
])
|
233 |
+
|
234 |
+
raise ValueError(f'unknown {image_set}')
|
235 |
+
|
236 |
+
|
237 |
+
def build(image_set, args):
|
238 |
+
root = Path(args.ytvos_path)
|
239 |
+
assert root.exists(), f'provided YTVOS path {root} does not exist'
|
240 |
+
PATHS = {
|
241 |
+
"train": (root / "train", root / "meta_expressions" / "train" / "meta_expressions.json"),
|
242 |
+
"val": (root / "valid", root / "meta_expressions" / "valid" / "meta_expressions.json"), # not used actually
|
243 |
+
}
|
244 |
+
img_folder, ann_file = PATHS[image_set]
|
245 |
+
# dataset = YTVOSDataset(img_folder, ann_file, transforms=make_coco_transforms(image_set, max_size=args.max_size), return_masks=args.masks,
|
246 |
+
# num_frames=args.num_frames, max_skip=args.max_skip)
|
247 |
+
dataset = YTVOSDataset(img_folder, ann_file, transforms=None, return_masks=args.masks,
|
248 |
+
num_frames=args.num_frames, max_skip=args.max_skip)
|
249 |
+
return dataset
|
250 |
+
|
.history/datasets/ytvos_ref_20250114205233.py
ADDED
@@ -0,0 +1,252 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Ref-YoutubeVOS data loader
|
3 |
+
"""
|
4 |
+
from pathlib import Path
|
5 |
+
|
6 |
+
import torch
|
7 |
+
from torch.autograd.grad_mode import F
|
8 |
+
from torch.utils.data import Dataset
|
9 |
+
import datasets.transforms_video as T
|
10 |
+
|
11 |
+
import os
|
12 |
+
from PIL import Image
|
13 |
+
import json
|
14 |
+
import numpy as np
|
15 |
+
import random
|
16 |
+
|
17 |
+
from datasets.categories import ytvos_category_dict as category_dict
|
18 |
+
|
19 |
+
|
20 |
+
class YTVOSDataset(Dataset):
|
21 |
+
"""
|
22 |
+
A dataset class for the Refer-Youtube-VOS dataset which was first introduced in the paper:
|
23 |
+
"URVOS: Unified Referring Video Object Segmentation Network with a Large-Scale Benchmark"
|
24 |
+
(see https://link.springer.com/content/pdf/10.1007/978-3-030-58555-6_13.pdf).
|
25 |
+
The original release of the dataset contained both 'first-frame' and 'full-video' expressions. However, the first
|
26 |
+
dataset is not publicly available anymore as now only the harder 'full-video' subset is available to download
|
27 |
+
through the Youtube-VOS referring video object segmentation competition page at:
|
28 |
+
https://competitions.codalab.org/competitions/29139
|
29 |
+
Furthermore, for the competition the subset's original validation set, which consists of 507 videos, was split into
|
30 |
+
two competition 'validation' & 'test' subsets, consisting of 202 and 305 videos respectively. Evaluation can
|
31 |
+
currently only be done on the competition 'validation' subset using the competition's server, as
|
32 |
+
annotations were publicly released only for the 'train' subset of the competition.
|
33 |
+
|
34 |
+
"""
|
35 |
+
def __init__(self, img_folder: Path, ann_file: Path, transforms, return_masks: bool,
|
36 |
+
num_frames: int, max_skip: int):
|
37 |
+
self.img_folder = img_folder
|
38 |
+
self.ann_file = ann_file
|
39 |
+
self._transforms = transforms
|
40 |
+
self.return_masks = return_masks # not used
|
41 |
+
self.num_frames = num_frames
|
42 |
+
self.max_skip = max_skip
|
43 |
+
# create video meta data
|
44 |
+
self.vid_meta, self.vid_data = self.prepare_metas()
|
45 |
+
|
46 |
+
print('\n video num: ', len(self.videos), ' clip num: ', len(self.metas))
|
47 |
+
print('\n')
|
48 |
+
|
49 |
+
def prepare_metas(self):
|
50 |
+
# read object information
|
51 |
+
with open(os.path.join(str(self.img_folder), 'meta.json'), 'r') as f:
|
52 |
+
subset_metas_by_video = json.load(f)['videos']
|
53 |
+
|
54 |
+
# read expression data
|
55 |
+
with open(str(self.ann_file), 'r') as f:
|
56 |
+
subset_expressions_by_video = json.load(f)['videos']
|
57 |
+
self.videos = list(subset_expressions_by_video.keys())
|
58 |
+
|
59 |
+
self.metas = []
|
60 |
+
skip_vid_count = 0
|
61 |
+
|
62 |
+
for vid in self.videos:
|
63 |
+
vid_meta = subset_metas_by_video[vid]
|
64 |
+
vid_data = subset_expressions_by_video[vid]
|
65 |
+
vid_frames = sorted(vid_data['frames'])
|
66 |
+
vid_len = len(vid_frames)
|
67 |
+
|
68 |
+
return vid_meta, vid_data
|
69 |
+
|
70 |
+
if vid_len < 11:
|
71 |
+
#print(f"Too short video: {vid} with frame length {vid_len}")
|
72 |
+
skip_vid_count += 1
|
73 |
+
continue
|
74 |
+
|
75 |
+
for exp_id, exp_dict in vid_data['expressions'].items():
|
76 |
+
# Exclude start_idx (0, 1) and end_idx (vid_len-1, vid_len-2)
|
77 |
+
start_idx , end_idx = 2, vid_len-2
|
78 |
+
bin_size = (end_idx - start_idx) // 4
|
79 |
+
|
80 |
+
bins = []
|
81 |
+
for i in range(4):
|
82 |
+
bin_start = start_idx + i * bin_size
|
83 |
+
bin_end = bin_start + bin_size if i < 3 else end_idx
|
84 |
+
|
85 |
+
bins.append((bin_start, bin_end))
|
86 |
+
|
87 |
+
# Random sample one frame from each bin
|
88 |
+
sample_indx = []
|
89 |
+
for start_idx, end_idx in bins:
|
90 |
+
sample_indx.append(random.randint(start_idx, end_idx - 1))
|
91 |
+
sample_indx.sort() # Ensure indices are in order
|
92 |
+
|
93 |
+
|
94 |
+
for sample_id in sample_indx:
|
95 |
+
meta = {
|
96 |
+
'video': vid,
|
97 |
+
'exp': exp_dict['exp'],
|
98 |
+
'obj_id': int(exp_dict['obj_id']),
|
99 |
+
'frames': vid_frames,
|
100 |
+
'sample_id' : sample_id,
|
101 |
+
'sample_frames_id' : sample_indx,
|
102 |
+
'bins': bins,
|
103 |
+
'category': vid_meta['objects'][exp_dict['obj_id']]['category']
|
104 |
+
}
|
105 |
+
self.metas.append(meta)
|
106 |
+
|
107 |
+
print(f"skipped {skip_vid_count} short videos")
|
108 |
+
|
109 |
+
|
110 |
+
@staticmethod
|
111 |
+
def bounding_box(img):
|
112 |
+
rows = np.any(img, axis=1)
|
113 |
+
cols = np.any(img, axis=0)
|
114 |
+
rmin, rmax = np.where(rows)[0][[0, -1]]
|
115 |
+
cmin, cmax = np.where(cols)[0][[0, -1]]
|
116 |
+
return rmin, rmax, cmin, cmax # y1, y2, x1, x2
|
117 |
+
|
118 |
+
def __len__(self):
|
119 |
+
return len(self.metas)
|
120 |
+
|
121 |
+
def __getitem__(self, idx):
|
122 |
+
instance_check = False
|
123 |
+
while not instance_check:
|
124 |
+
meta = self.metas[idx] # dict
|
125 |
+
|
126 |
+
|
127 |
+
video, exp, obj_id, category, frames, sample_id, sample_frames_id, bins = \
|
128 |
+
meta['video'], meta['exp'], meta['obj_id'], meta['category'], meta['frames'], meta['sample_id'], meta['sample_frames_id'], meta['bins']
|
129 |
+
|
130 |
+
|
131 |
+
# clean up the caption
|
132 |
+
exp = " ".join(exp.lower().split())
|
133 |
+
category_id = category_dict[category]
|
134 |
+
vid_len = len(frames)
|
135 |
+
|
136 |
+
# num_frames = self.num_frames
|
137 |
+
|
138 |
+
# read frames and masks
|
139 |
+
imgs, labels, boxes, masks, valid = [], [], [], [], []
|
140 |
+
for frame_indx in sample_frames_id:
|
141 |
+
frame_name = frames[frame_indx]
|
142 |
+
img_path = os.path.join(str(self.img_folder), 'JPEGImages', video, frame_name + '.jpg')
|
143 |
+
mask_path = os.path.join(str(self.img_folder), 'Annotations', video, frame_name + '.png')
|
144 |
+
img = Image.open(img_path).convert('RGB')
|
145 |
+
mask = Image.open(mask_path).convert('P')
|
146 |
+
|
147 |
+
# create the target
|
148 |
+
label = torch.tensor(category_id)
|
149 |
+
mask = np.array(mask)
|
150 |
+
mask = (mask==obj_id).astype(np.float32) # 0,1 binary
|
151 |
+
if (mask > 0).any():
|
152 |
+
y1, y2, x1, x2 = self.bounding_box(mask)
|
153 |
+
box = torch.tensor([x1, y1, x2, y2]).to(torch.float)
|
154 |
+
valid.append(1)
|
155 |
+
else: # some frame didn't contain the instance
|
156 |
+
box = torch.tensor([0, 0, 0, 0]).to(torch.float)
|
157 |
+
valid.append(0)
|
158 |
+
mask = torch.from_numpy(mask)
|
159 |
+
|
160 |
+
# append
|
161 |
+
imgs.append(img)
|
162 |
+
labels.append(label)
|
163 |
+
masks.append(mask)
|
164 |
+
boxes.append(box)
|
165 |
+
|
166 |
+
# transform
|
167 |
+
w, h = img.size
|
168 |
+
labels = torch.stack(labels, dim=0)
|
169 |
+
boxes = torch.stack(boxes, dim=0)
|
170 |
+
boxes[:, 0::2].clamp_(min=0, max=w)
|
171 |
+
boxes[:, 1::2].clamp_(min=0, max=h)
|
172 |
+
masks = torch.stack(masks, dim=0)
|
173 |
+
target = {
|
174 |
+
'frames_idx': torch.tensor(sample_frames_id), # [T,]
|
175 |
+
'labels': labels, # [T,]
|
176 |
+
'boxes': boxes, # [T, 4], xyxy
|
177 |
+
'masks': masks, # [T, H, W]
|
178 |
+
'valid': torch.tensor(valid), # [T,]
|
179 |
+
'caption': exp,
|
180 |
+
'orig_size': torch.as_tensor([int(h), int(w)]),
|
181 |
+
'size': torch.as_tensor([int(h), int(w)])
|
182 |
+
}
|
183 |
+
|
184 |
+
# "boxes" normalize to [0, 1] and transform from xyxy to cxcywh in self._transform
|
185 |
+
if self._transforms:
|
186 |
+
imgs, target = self._transforms(imgs, target)
|
187 |
+
imgs = torch.stack(imgs, dim=0) # [T, 3, H, W]
|
188 |
+
else:
|
189 |
+
imgs = np.array(imgs)
|
190 |
+
imgs = torch.tensor(imgs.transpose(0, 3, 1, 2))
|
191 |
+
|
192 |
+
|
193 |
+
# FIXME: handle "valid", since some box may be removed due to random crop
|
194 |
+
if torch.any(target['valid'] == 1): # at leatst one instance
|
195 |
+
instance_check = True
|
196 |
+
else:
|
197 |
+
idx = random.randint(0, self.__len__() - 1)
|
198 |
+
|
199 |
+
return imgs, target
|
200 |
+
|
201 |
+
|
202 |
+
def make_coco_transforms(image_set, max_size=640):
|
203 |
+
normalize = T.Compose([
|
204 |
+
T.ToTensor(),
|
205 |
+
T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
206 |
+
])
|
207 |
+
|
208 |
+
scales = [288, 320, 352, 392, 416, 448, 480, 512]
|
209 |
+
|
210 |
+
if image_set == 'train':
|
211 |
+
return T.Compose([
|
212 |
+
T.RandomHorizontalFlip(),
|
213 |
+
T.PhotometricDistort(),
|
214 |
+
T.RandomSelect(
|
215 |
+
T.Compose([
|
216 |
+
T.RandomResize(scales, max_size=max_size),
|
217 |
+
T.Check(),
|
218 |
+
]),
|
219 |
+
T.Compose([
|
220 |
+
T.RandomResize([400, 500, 600]),
|
221 |
+
T.RandomSizeCrop(384, 600),
|
222 |
+
T.RandomResize(scales, max_size=max_size),
|
223 |
+
T.Check(),
|
224 |
+
])
|
225 |
+
),
|
226 |
+
normalize,
|
227 |
+
])
|
228 |
+
|
229 |
+
# we do not use the 'val' set since the annotations are inaccessible
|
230 |
+
if image_set == 'val':
|
231 |
+
return T.Compose([
|
232 |
+
T.RandomResize([360], max_size=640),
|
233 |
+
normalize,
|
234 |
+
])
|
235 |
+
|
236 |
+
raise ValueError(f'unknown {image_set}')
|
237 |
+
|
238 |
+
|
239 |
+
def build(image_set, args):
|
240 |
+
root = Path(args.ytvos_path)
|
241 |
+
assert root.exists(), f'provided YTVOS path {root} does not exist'
|
242 |
+
PATHS = {
|
243 |
+
"train": (root / "train", root / "meta_expressions" / "train" / "meta_expressions.json"),
|
244 |
+
"val": (root / "valid", root / "meta_expressions" / "valid" / "meta_expressions.json"), # not used actually
|
245 |
+
}
|
246 |
+
img_folder, ann_file = PATHS[image_set]
|
247 |
+
# dataset = YTVOSDataset(img_folder, ann_file, transforms=make_coco_transforms(image_set, max_size=args.max_size), return_masks=args.masks,
|
248 |
+
# num_frames=args.num_frames, max_skip=args.max_skip)
|
249 |
+
dataset = YTVOSDataset(img_folder, ann_file, transforms=None, return_masks=args.masks,
|
250 |
+
num_frames=args.num_frames, max_skip=args.max_skip)
|
251 |
+
return dataset
|
252 |
+
|
.history/datasets/ytvos_ref_20250114210537.py
ADDED
@@ -0,0 +1,250 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Ref-YoutubeVOS data loader
|
3 |
+
"""
|
4 |
+
from pathlib import Path
|
5 |
+
|
6 |
+
import torch
|
7 |
+
from torch.autograd.grad_mode import F
|
8 |
+
from torch.utils.data import Dataset
|
9 |
+
import datasets.transforms_video as T
|
10 |
+
|
11 |
+
import os
|
12 |
+
from PIL import Image
|
13 |
+
import json
|
14 |
+
import numpy as np
|
15 |
+
import random
|
16 |
+
|
17 |
+
from datasets.categories import ytvos_category_dict as category_dict
|
18 |
+
|
19 |
+
|
20 |
+
class YTVOSDataset(Dataset):
|
21 |
+
"""
|
22 |
+
A dataset class for the Refer-Youtube-VOS dataset which was first introduced in the paper:
|
23 |
+
"URVOS: Unified Referring Video Object Segmentation Network with a Large-Scale Benchmark"
|
24 |
+
(see https://link.springer.com/content/pdf/10.1007/978-3-030-58555-6_13.pdf).
|
25 |
+
The original release of the dataset contained both 'first-frame' and 'full-video' expressions. However, the first
|
26 |
+
dataset is not publicly available anymore as now only the harder 'full-video' subset is available to download
|
27 |
+
through the Youtube-VOS referring video object segmentation competition page at:
|
28 |
+
https://competitions.codalab.org/competitions/29139
|
29 |
+
Furthermore, for the competition the subset's original validation set, which consists of 507 videos, was split into
|
30 |
+
two competition 'validation' & 'test' subsets, consisting of 202 and 305 videos respectively. Evaluation can
|
31 |
+
currently only be done on the competition 'validation' subset using the competition's server, as
|
32 |
+
annotations were publicly released only for the 'train' subset of the competition.
|
33 |
+
|
34 |
+
"""
|
35 |
+
def __init__(self, img_folder: Path, ann_file: Path, transforms, return_masks: bool,
|
36 |
+
num_frames: int, max_skip: int):
|
37 |
+
self.img_folder = img_folder
|
38 |
+
self.ann_file = ann_file
|
39 |
+
self._transforms = transforms
|
40 |
+
self.return_masks = return_masks # not used
|
41 |
+
self.num_frames = num_frames
|
42 |
+
self.max_skip = max_skip
|
43 |
+
# create video meta data
|
44 |
+
self.prepare_metas()
|
45 |
+
|
46 |
+
print('\n video num: ', len(self.videos), ' clip num: ', len(self.metas))
|
47 |
+
print('\n')
|
48 |
+
|
49 |
+
def prepare_metas(self):
|
50 |
+
# read object information
|
51 |
+
with open(os.path.join(str(self.img_folder), 'meta.json'), 'r') as f:
|
52 |
+
subset_metas_by_video = json.load(f)['videos']
|
53 |
+
|
54 |
+
# read expression data
|
55 |
+
with open(str(self.ann_file), 'r') as f:
|
56 |
+
subset_expressions_by_video = json.load(f)['videos']
|
57 |
+
self.videos = list(subset_expressions_by_video.keys())
|
58 |
+
|
59 |
+
self.metas = []
|
60 |
+
skip_vid_count = 0
|
61 |
+
|
62 |
+
for vid in self.videos:
|
63 |
+
vid_meta = subset_metas_by_video[vid]
|
64 |
+
vid_data = subset_expressions_by_video[vid]
|
65 |
+
vid_frames = sorted(vid_data['frames'])
|
66 |
+
vid_len = len(vid_frames)
|
67 |
+
|
68 |
+
if vid_len < 11:
|
69 |
+
#print(f"Too short video: {vid} with frame length {vid_len}")
|
70 |
+
skip_vid_count += 1
|
71 |
+
continue
|
72 |
+
|
73 |
+
|
74 |
+
# Exclude start_idx (0, 1) and end_idx (vid_len-1, vid_len-2)
|
75 |
+
start_idx , end_idx = 2, vid_len-2
|
76 |
+
bin_size = (end_idx - start_idx) // 4
|
77 |
+
|
78 |
+
bins = []
|
79 |
+
for i in range(4):
|
80 |
+
bin_start = start_idx + i * bin_size
|
81 |
+
bin_end = bin_start + bin_size if i < 3 else end_idx
|
82 |
+
|
83 |
+
bins.append((bin_start, bin_end))
|
84 |
+
|
85 |
+
# Random sample one frame from each bin
|
86 |
+
sample_indx = []
|
87 |
+
for start_idx, end_idx in bins:
|
88 |
+
sample_indx.append(random.randint(start_idx, end_idx - 1))
|
89 |
+
sample_indx.sort() # Ensure indices are in order
|
90 |
+
|
91 |
+
|
92 |
+
meta = {
|
93 |
+
'video':vid,
|
94 |
+
'sample_indx':sample_indx,
|
95 |
+
'bins':bins
|
96 |
+
}
|
97 |
+
obj_id_cat = {}
|
98 |
+
for exp_id, exp_dict in vid_data['expressions'].items():
|
99 |
+
obj_id = exp_dict['obj_id']
|
100 |
+
if obj_id not in obj_id_cat:
|
101 |
+
obj_id_cat[obj_id] = vid_meta[obj_id]['category']
|
102 |
+
meta['obj_id_cat'] = obj_id_cat
|
103 |
+
self.metas.append(meta)
|
104 |
+
|
105 |
+
print(f"skipped {skip_vid_count} short videos")
|
106 |
+
|
107 |
+
|
108 |
+
@staticmethod
|
109 |
+
def bounding_box(img):
|
110 |
+
rows = np.any(img, axis=1)
|
111 |
+
cols = np.any(img, axis=0)
|
112 |
+
rmin, rmax = np.where(rows)[0][[0, -1]]
|
113 |
+
cmin, cmax = np.where(cols)[0][[0, -1]]
|
114 |
+
return rmin, rmax, cmin, cmax # y1, y2, x1, x2
|
115 |
+
|
116 |
+
def __len__(self):
|
117 |
+
return len(self.metas)
|
118 |
+
|
119 |
+
def __getitem__(self, idx):
|
120 |
+
instance_check = False
|
121 |
+
while not instance_check:
|
122 |
+
meta = self.metas[idx] # dict
|
123 |
+
|
124 |
+
|
125 |
+
video, exp, obj_id, category, frames, sample_id, sample_frames_id, bins = \
|
126 |
+
meta['video'], meta['exp'], meta['obj_id'], meta['category'], meta['frames'], meta['sample_id'], meta['sample_frames_id'], meta['bins']
|
127 |
+
|
128 |
+
|
129 |
+
# clean up the caption
|
130 |
+
exp = " ".join(exp.lower().split())
|
131 |
+
category_id = category_dict[category]
|
132 |
+
vid_len = len(frames)
|
133 |
+
|
134 |
+
# num_frames = self.num_frames
|
135 |
+
|
136 |
+
# read frames and masks
|
137 |
+
imgs, labels, boxes, masks, valid = [], [], [], [], []
|
138 |
+
for frame_indx in sample_frames_id:
|
139 |
+
frame_name = frames[frame_indx]
|
140 |
+
img_path = os.path.join(str(self.img_folder), 'JPEGImages', video, frame_name + '.jpg')
|
141 |
+
mask_path = os.path.join(str(self.img_folder), 'Annotations', video, frame_name + '.png')
|
142 |
+
img = Image.open(img_path).convert('RGB')
|
143 |
+
mask = Image.open(mask_path).convert('P')
|
144 |
+
|
145 |
+
# create the target
|
146 |
+
label = torch.tensor(category_id)
|
147 |
+
mask = np.array(mask)
|
148 |
+
mask = (mask==obj_id).astype(np.float32) # 0,1 binary
|
149 |
+
if (mask > 0).any():
|
150 |
+
y1, y2, x1, x2 = self.bounding_box(mask)
|
151 |
+
box = torch.tensor([x1, y1, x2, y2]).to(torch.float)
|
152 |
+
valid.append(1)
|
153 |
+
else: # some frame didn't contain the instance
|
154 |
+
box = torch.tensor([0, 0, 0, 0]).to(torch.float)
|
155 |
+
valid.append(0)
|
156 |
+
mask = torch.from_numpy(mask)
|
157 |
+
|
158 |
+
# append
|
159 |
+
imgs.append(img)
|
160 |
+
labels.append(label)
|
161 |
+
masks.append(mask)
|
162 |
+
boxes.append(box)
|
163 |
+
|
164 |
+
# transform
|
165 |
+
w, h = img.size
|
166 |
+
labels = torch.stack(labels, dim=0)
|
167 |
+
boxes = torch.stack(boxes, dim=0)
|
168 |
+
boxes[:, 0::2].clamp_(min=0, max=w)
|
169 |
+
boxes[:, 1::2].clamp_(min=0, max=h)
|
170 |
+
masks = torch.stack(masks, dim=0)
|
171 |
+
target = {
|
172 |
+
'frames_idx': torch.tensor(sample_frames_id), # [T,]
|
173 |
+
'labels': labels, # [T,]
|
174 |
+
'boxes': boxes, # [T, 4], xyxy
|
175 |
+
'masks': masks, # [T, H, W]
|
176 |
+
'valid': torch.tensor(valid), # [T,]
|
177 |
+
'caption': exp,
|
178 |
+
'orig_size': torch.as_tensor([int(h), int(w)]),
|
179 |
+
'size': torch.as_tensor([int(h), int(w)])
|
180 |
+
}
|
181 |
+
|
182 |
+
# "boxes" normalize to [0, 1] and transform from xyxy to cxcywh in self._transform
|
183 |
+
if self._transforms:
|
184 |
+
imgs, target = self._transforms(imgs, target)
|
185 |
+
imgs = torch.stack(imgs, dim=0) # [T, 3, H, W]
|
186 |
+
else:
|
187 |
+
imgs = np.array(imgs)
|
188 |
+
imgs = torch.tensor(imgs.transpose(0, 3, 1, 2))
|
189 |
+
|
190 |
+
|
191 |
+
# FIXME: handle "valid", since some box may be removed due to random crop
|
192 |
+
if torch.any(target['valid'] == 1): # at leatst one instance
|
193 |
+
instance_check = True
|
194 |
+
else:
|
195 |
+
idx = random.randint(0, self.__len__() - 1)
|
196 |
+
|
197 |
+
return imgs, target
|
198 |
+
|
199 |
+
|
200 |
+
def make_coco_transforms(image_set, max_size=640):
|
201 |
+
normalize = T.Compose([
|
202 |
+
T.ToTensor(),
|
203 |
+
T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
204 |
+
])
|
205 |
+
|
206 |
+
scales = [288, 320, 352, 392, 416, 448, 480, 512]
|
207 |
+
|
208 |
+
if image_set == 'train':
|
209 |
+
return T.Compose([
|
210 |
+
T.RandomHorizontalFlip(),
|
211 |
+
T.PhotometricDistort(),
|
212 |
+
T.RandomSelect(
|
213 |
+
T.Compose([
|
214 |
+
T.RandomResize(scales, max_size=max_size),
|
215 |
+
T.Check(),
|
216 |
+
]),
|
217 |
+
T.Compose([
|
218 |
+
T.RandomResize([400, 500, 600]),
|
219 |
+
T.RandomSizeCrop(384, 600),
|
220 |
+
T.RandomResize(scales, max_size=max_size),
|
221 |
+
T.Check(),
|
222 |
+
])
|
223 |
+
),
|
224 |
+
normalize,
|
225 |
+
])
|
226 |
+
|
227 |
+
# we do not use the 'val' set since the annotations are inaccessible
|
228 |
+
if image_set == 'val':
|
229 |
+
return T.Compose([
|
230 |
+
T.RandomResize([360], max_size=640),
|
231 |
+
normalize,
|
232 |
+
])
|
233 |
+
|
234 |
+
raise ValueError(f'unknown {image_set}')
|
235 |
+
|
236 |
+
|
237 |
+
def build(image_set, args):
|
238 |
+
root = Path(args.ytvos_path)
|
239 |
+
assert root.exists(), f'provided YTVOS path {root} does not exist'
|
240 |
+
PATHS = {
|
241 |
+
"train": (root / "train", root / "meta_expressions" / "train" / "meta_expressions.json"),
|
242 |
+
"val": (root / "valid", root / "meta_expressions" / "valid" / "meta_expressions.json"), # not used actually
|
243 |
+
}
|
244 |
+
img_folder, ann_file = PATHS[image_set]
|
245 |
+
# dataset = YTVOSDataset(img_folder, ann_file, transforms=make_coco_transforms(image_set, max_size=args.max_size), return_masks=args.masks,
|
246 |
+
# num_frames=args.num_frames, max_skip=args.max_skip)
|
247 |
+
dataset = YTVOSDataset(img_folder, ann_file, transforms=None, return_masks=args.masks,
|
248 |
+
num_frames=args.num_frames, max_skip=args.max_skip)
|
249 |
+
return dataset
|
250 |
+
|
.history/make_ref-ytvos/annotate_ref_ytvos_20241227174304.py
ADDED
@@ -0,0 +1,288 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datasets import build_dataset
|
2 |
+
import argparse
|
3 |
+
import opts
|
4 |
+
|
5 |
+
import sys
|
6 |
+
from pathlib import Path
|
7 |
+
from os import path as osp
|
8 |
+
import io
|
9 |
+
|
10 |
+
import numpy as np
|
11 |
+
import pandas as pd
|
12 |
+
import regex as re
|
13 |
+
import json
|
14 |
+
|
15 |
+
import cv2
|
16 |
+
from PIL import Image
|
17 |
+
import torch
|
18 |
+
from torchvision.transforms import functional as F
|
19 |
+
|
20 |
+
from skimage import measure # (pip install scikit-image)
|
21 |
+
from shapely.geometry import Polygon, MultiPolygon # (pip install Shapely)
|
22 |
+
|
23 |
+
import matplotlib.pyplot as plt
|
24 |
+
from matplotlib.collections import PatchCollection
|
25 |
+
from matplotlib.patches import Rectangle
|
26 |
+
|
27 |
+
import ipywidgets as widgets
|
28 |
+
from IPython.display import display, clear_output
|
29 |
+
|
30 |
+
parser = argparse.ArgumentParser('ReferFormer training and evaluation script', parents=[opts.get_args_parser()])
|
31 |
+
args = parser.parse_args()
|
32 |
+
|
33 |
+
#==================데이터 불러오기===================
|
34 |
+
# 전체 데이터셋
|
35 |
+
train_dataset = build_dataset('ytvos', image_set = 'train', args = args)
|
36 |
+
|
37 |
+
# 전체 데이터셋 메타데이터
|
38 |
+
metas = train_dataset.metas
|
39 |
+
|
40 |
+
# 필터링한 프레임들
|
41 |
+
selected_frames_df = pd.read_json("selected_frames4.jsonl", lines = True)
|
42 |
+
|
43 |
+
#==================마스크 만드는 함수들===================
|
44 |
+
def prepare_mask_for_pil(mask_tensor):
|
45 |
+
mask_array = mask_tensor.squeeze(0).cpu().numpy()
|
46 |
+
mask_array = (mask_array * 255).astype(np.uint8)
|
47 |
+
mask_image = Image.fromarray(mask_array)
|
48 |
+
return mask_image
|
49 |
+
|
50 |
+
def create_sub_masks(mask_image):
|
51 |
+
width, height = mask_image.size
|
52 |
+
|
53 |
+
sub_masks = {}
|
54 |
+
for x in range(width):
|
55 |
+
for y in range(height):
|
56 |
+
# Get the RGB values of the pixel
|
57 |
+
pixel = mask_image.getpixel((x, y))
|
58 |
+
|
59 |
+
# If the pixel is not black...
|
60 |
+
if pixel != 0 :
|
61 |
+
# Check to see if we've created a sub-mask...
|
62 |
+
pixel_str = str(pixel)
|
63 |
+
sub_mask = sub_masks.get(pixel_str)
|
64 |
+
if sub_mask is None:
|
65 |
+
# Create a sub-mask (one bit per pixel) and add to the dictionary
|
66 |
+
# Note: we add 1 pixel of padding in each direction
|
67 |
+
# because the contours module doesn't handle cases
|
68 |
+
# where pixels bleed to the edge of the image
|
69 |
+
sub_masks[pixel_str] = Image.new('1', (width+2, height+2))
|
70 |
+
|
71 |
+
# Set the pixel value to 1 (default is 0), accounting for padding
|
72 |
+
sub_masks[pixel_str].putpixel((x+1, y+1), 1)
|
73 |
+
return sub_masks
|
74 |
+
|
75 |
+
#==================마스크 annotation 만드는 함수===================
|
76 |
+
def create_sub_mask_annotation(sub_mask, image_id, annotation_id, is_crowd):
|
77 |
+
# Find contours (boundary lines) around each sub-mask
|
78 |
+
# Note: there could be multiple contours if the object
|
79 |
+
# is partially occluded. (E.g. an elephant behind a tree)
|
80 |
+
contours = measure.find_contours(sub_mask, 0.5, positive_orientation='low')
|
81 |
+
|
82 |
+
segmentations = []
|
83 |
+
polygons = []
|
84 |
+
for contour in contours:
|
85 |
+
# Flip from (row, col) representation to (x, y)
|
86 |
+
# and subtract the padding pixel
|
87 |
+
for i in range(len(contour)):
|
88 |
+
row, col = contour[i]
|
89 |
+
contour[i] = (col - 1, row - 1)
|
90 |
+
|
91 |
+
# Make a polygon and simplify it
|
92 |
+
poly = Polygon(contour)
|
93 |
+
poly = poly.simplify(1.0, preserve_topology=False)
|
94 |
+
polygons.append(poly)
|
95 |
+
segmentation = np.array(poly.exterior.coords).ravel().tolist()
|
96 |
+
segmentations.append(segmentation)
|
97 |
+
|
98 |
+
# Combine the polygons to calculate the bounding box and area
|
99 |
+
multi_poly = MultiPolygon(polygons)
|
100 |
+
x, y, max_x, max_y = multi_poly.bounds
|
101 |
+
width = max_x - x
|
102 |
+
height = max_y - y
|
103 |
+
bbox = (x, y, width, height)
|
104 |
+
area = multi_poly.area
|
105 |
+
|
106 |
+
annotation = {
|
107 |
+
'segmentation': segmentations,
|
108 |
+
'iscrowd': is_crowd,
|
109 |
+
'image_id': image_id,
|
110 |
+
'id': annotation_id,
|
111 |
+
'bbox': bbox,
|
112 |
+
'area': area
|
113 |
+
}
|
114 |
+
return annotation
|
115 |
+
|
116 |
+
#==================시각화 함수===================
|
117 |
+
# annotation dictionary as input
|
118 |
+
def showRef(annotation, image_dir, seg_box='seg'):
|
119 |
+
ax = plt.gca()
|
120 |
+
I = io.imread(osp.join(image_dir, annotation['file_name']))
|
121 |
+
ax.imshow(I)
|
122 |
+
|
123 |
+
|
124 |
+
for sid, sent in enumerate(annotation['sentences']):
|
125 |
+
print('%s. %s' % (sid + 1, sent))
|
126 |
+
|
127 |
+
if seg_box == 'seg':
|
128 |
+
polygons = []
|
129 |
+
color = []
|
130 |
+
c = (np.random.random((1, 3)) * 0.6 + 0.4).tolist()[0]
|
131 |
+
|
132 |
+
if type(annotation['segmentation'][0]) == list:
|
133 |
+
# polygon used for refcoco*
|
134 |
+
for seg in annotation['segmentation']:
|
135 |
+
poly = np.array(seg).reshape((int(len(seg) / 2), 2))
|
136 |
+
polygons.append(Polygon(poly))
|
137 |
+
color.append(c)
|
138 |
+
|
139 |
+
p = PatchCollection(polygons,
|
140 |
+
facecolors=(221/255, 160/255, 221/255), # 연보라색
|
141 |
+
linewidths=0,
|
142 |
+
alpha=0.4)
|
143 |
+
ax.add_collection(p)
|
144 |
+
|
145 |
+
p = PatchCollection(polygons,
|
146 |
+
facecolors='none',
|
147 |
+
edgecolors=color,
|
148 |
+
linewidths=2)
|
149 |
+
ax.add_collection(p)
|
150 |
+
# else:
|
151 |
+
# # mask used for refclef
|
152 |
+
# rle = annotation['segmentation']
|
153 |
+
# m = mask.decode(rle)
|
154 |
+
# img = np.ones((m.shape[0], m.shape[1], 3))
|
155 |
+
# color_mask = np.array([2.0, 166.0, 101.0]) / 255
|
156 |
+
# for i in range(3):
|
157 |
+
# img[:, :, i] = color_mask[i]
|
158 |
+
# ax.imshow(np.dstack((img, m * 0.5)))
|
159 |
+
|
160 |
+
# bounding box
|
161 |
+
elif seg_box == 'box':
|
162 |
+
bbox = annotation['bbox']
|
163 |
+
box_plot = Rectangle((bbox[0], bbox[1]),
|
164 |
+
bbox[2],
|
165 |
+
bbox[3],
|
166 |
+
fill=False,
|
167 |
+
edgecolor='green',
|
168 |
+
linewidth=3)
|
169 |
+
ax.add_patch(box_plot)
|
170 |
+
|
171 |
+
#==================모두 종합한 함수===================
|
172 |
+
def create_dict_from_selected_images(selected_frames_df):
|
173 |
+
|
174 |
+
image_id = 0
|
175 |
+
anno_id = 0
|
176 |
+
train_idx = 0
|
177 |
+
|
178 |
+
with open("/home/yejin/data/data/dataset/VRIS/mbench/ytvos/selected_instances2.jsonl", "w") as f:
|
179 |
+
|
180 |
+
for selected_idx in range(len(selected_frames_df)):
|
181 |
+
selected = selected_frames_df.loc[selected_idx]
|
182 |
+
selected_vid_id = selected['video']
|
183 |
+
selected_frame_id = selected['frame_id']
|
184 |
+
|
185 |
+
for obj_id in selected['objects'].keys():
|
186 |
+
|
187 |
+
selected_exp = selected['objects'][obj_id][0] #캡션
|
188 |
+
selected_verb = selected['objects'][obj_id][1] #동사
|
189 |
+
|
190 |
+
train_idx = next(
|
191 |
+
idx for idx, meta in enumerate(metas)
|
192 |
+
if meta['video'] == selected_vid_id
|
193 |
+
and meta['frame_id'] == selected_frame_id
|
194 |
+
and meta['obj_id'] == int(obj_id)
|
195 |
+
and meta['exp'] == selected_exp
|
196 |
+
)
|
197 |
+
|
198 |
+
train_frames, train_info = train_dataset[train_idx]
|
199 |
+
|
200 |
+
try:
|
201 |
+
valid_frame_loc = train_info['frames_idx'].tolist().index(selected_frame_id) #valid한 frame이 있는 index
|
202 |
+
except ValueError:
|
203 |
+
print(f"selected vid id: {selected_vid_id}, metas['frame_id']: {metas[train_idx]['frame_id']}, selected frame id: {selected_frame_id}, train_info['frames_idx']: {train_info['frames_idx'].tolist()}")
|
204 |
+
|
205 |
+
|
206 |
+
frame = train_frames[valid_frame_loc] #해당 frame
|
207 |
+
frame = F.to_pil_image(frame)
|
208 |
+
|
209 |
+
image_file_name = f"{selected_vid_id}_{str(selected_frame_id).rjust(5, '0')}"
|
210 |
+
|
211 |
+
#원래 frame 저장하기
|
212 |
+
save_dir = Path("/home/yejin/data/data/dataset/VRIS/mbench/ytvos/selected_frames")
|
213 |
+
#save_dir.mkdir(exist_ok=True)
|
214 |
+
save_path = save_dir / f"{image_file_name}.png"
|
215 |
+
#frame.save(save_path)
|
216 |
+
|
217 |
+
#카테고리
|
218 |
+
label = train_info['labels'][valid_frame_loc].item() #category id
|
219 |
+
category_name = metas[train_idx]['category'] #category name
|
220 |
+
|
221 |
+
#박스 정보
|
222 |
+
box = train_info['boxes'][valid_frame_loc]
|
223 |
+
|
224 |
+
# Annotation tools ########################################################################
|
225 |
+
mask = train_info['masks'][valid_frame_loc]
|
226 |
+
# print(mask.shape)
|
227 |
+
|
228 |
+
# frame과 mask 맞는지 확인만
|
229 |
+
# plt.imshow(frame.permute(1, 2, 0))
|
230 |
+
# mask_color = np.zeros((*mask.shape, 3), dtype = np.uint8)
|
231 |
+
# mask_color[mask == 1] = [255, 0, 0]
|
232 |
+
# plt.imshow(mask_color, alpha = 0.5)
|
233 |
+
# plt.show()
|
234 |
+
|
235 |
+
|
236 |
+
mask_image = prepare_mask_for_pil(mask)
|
237 |
+
sub_masks = create_sub_masks(mask_image)
|
238 |
+
|
239 |
+
for color, sub_mask in sub_masks.items():
|
240 |
+
# print(f"Color: {color}, Sub-mask size: {sub_mask.size}")
|
241 |
+
sub_mask_array = np.array(sub_mask, dtype=np.uint8)
|
242 |
+
annotation = create_sub_mask_annotation(sub_mask_array, image_id, anno_id, is_crowd = 0)
|
243 |
+
anno_id += 1
|
244 |
+
image_id += 1
|
245 |
+
|
246 |
+
#파일 경로 추가
|
247 |
+
annotation['file_name'] = f"{image_file_name}.png"
|
248 |
+
|
249 |
+
#불필요한 정보 지우기
|
250 |
+
annotation.pop('iscrowd', None)
|
251 |
+
annotation.pop('image_id', None)
|
252 |
+
annotation.pop('id', None)
|
253 |
+
|
254 |
+
valid = train_info['valid'][valid_frame_loc]
|
255 |
+
orig_size = train_info['orig_size']
|
256 |
+
size = train_info['size']
|
257 |
+
caption = metas[train_idx]['exp']
|
258 |
+
|
259 |
+
#filename, height, width 추가
|
260 |
+
#annotation['file_name'] = save_path
|
261 |
+
annotation['height'] = orig_size[0].item()
|
262 |
+
annotation['width'] = orig_size[1].item()
|
263 |
+
|
264 |
+
# category id,name, sentence dictionary 추가
|
265 |
+
annotation['label'] = label
|
266 |
+
annotation['category_name'] = category_name
|
267 |
+
sentence_dict = {
|
268 |
+
"tokens" : caption.split(' '),
|
269 |
+
"raw" : caption,
|
270 |
+
"sent" : re.sub('[^A-Za-z0-9\s]+', '', caption.lower())
|
271 |
+
}
|
272 |
+
annotation['sentences'] = sentence_dict
|
273 |
+
############################################################################################
|
274 |
+
# double check for segmentation annotation
|
275 |
+
# orig_img_np = draw_polygon_on_image(frame, annotation['segmentation'])
|
276 |
+
# plt.imshow(orig_img_np)
|
277 |
+
# plt.axis('off')
|
278 |
+
# plt.show()
|
279 |
+
|
280 |
+
# showRef(annotation, save_dir)
|
281 |
+
############################################################################################
|
282 |
+
|
283 |
+
# 최종
|
284 |
+
f.write(json.dumps(annotation) + "\n")
|
285 |
+
f.flush()
|
286 |
+
|
287 |
+
if __name__ == '__main__':
|
288 |
+
create_dict_from_selected_images(selected_frames_df)
|
.history/make_ref-ytvos/annotate_ref_ytvos_20250113111315.py
ADDED
@@ -0,0 +1,288 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datasets import build_dataset
|
2 |
+
import argparse
|
3 |
+
import opts
|
4 |
+
|
5 |
+
import sys
|
6 |
+
from pathlib import Path
|
7 |
+
from os import path as osp
|
8 |
+
import io
|
9 |
+
|
10 |
+
import numpy as np
|
11 |
+
import pandas as pd
|
12 |
+
import regex as re
|
13 |
+
import json
|
14 |
+
|
15 |
+
import cv2
|
16 |
+
from PIL import Image
|
17 |
+
import torch
|
18 |
+
from torchvision.transforms import functional as F
|
19 |
+
|
20 |
+
from skimage import measure # (pip install scikit-image)
|
21 |
+
from shapely.geometry import Polygon, MultiPolygon # (pip install Shapely)
|
22 |
+
|
23 |
+
import matplotlib.pyplot as plt
|
24 |
+
from matplotlib.collections import PatchCollection
|
25 |
+
from matplotlib.patches import Rectangle
|
26 |
+
|
27 |
+
import ipywidgets as widgets
|
28 |
+
from IPython.display import display, clear_output
|
29 |
+
|
30 |
+
parser = argparse.ArgumentParser('ReferFormer training and evaluation script', parents=[opts.get_args_parser()])
|
31 |
+
args = parser.parse_args()
|
32 |
+
|
33 |
+
#==================데이터 불러오기===================
|
34 |
+
# 전체 데이터셋
|
35 |
+
train_dataset = build_dataset('ytvos', image_set = 'train', args = args)
|
36 |
+
|
37 |
+
# 전체 데이터셋 메타데이터
|
38 |
+
metas = train_dataset.metas
|
39 |
+
|
40 |
+
# 필터링한 프레임들
|
41 |
+
selected_frames_df = pd.read_json("selected_frames4.jsonl", lines = True)
|
42 |
+
|
43 |
+
#==================마스크 만드는 함수들===================
|
44 |
+
def prepare_mask_for_pil(mask_tensor):
|
45 |
+
mask_array = mask_tensor.squeeze(0).cpu().numpy()
|
46 |
+
mask_array = (mask_array * 255).astype(np.uint8)
|
47 |
+
mask_image = Image.fromarray(mask_array)
|
48 |
+
return mask_image
|
49 |
+
|
50 |
+
def create_sub_masks(mask_image):
|
51 |
+
width, height = mask_image.size
|
52 |
+
|
53 |
+
sub_masks = {}
|
54 |
+
for x in range(width):
|
55 |
+
for y in range(height):
|
56 |
+
# Get the RGB values of the pixel
|
57 |
+
pixel = mask_image.getpixel((x, y))
|
58 |
+
|
59 |
+
# If the pixel is not black...
|
60 |
+
if pixel != 0 :
|
61 |
+
# Check to see if we've created a sub-mask...
|
62 |
+
pixel_str = str(pixel)
|
63 |
+
sub_mask = sub_masks.get(pixel_str)
|
64 |
+
if sub_mask is None:
|
65 |
+
# Create a sub-mask (one bit per pixel) and add to the dictionary
|
66 |
+
# Note: we add 1 pixel of padding in each direction
|
67 |
+
# because the contours module doesn't handle cases
|
68 |
+
# where pixels bleed to the edge of the image
|
69 |
+
sub_masks[pixel_str] = Image.new('1', (width+2, height+2))
|
70 |
+
|
71 |
+
# Set the pixel value to 1 (default is 0), accounting for padding
|
72 |
+
sub_masks[pixel_str].putpixel((x+1, y+1), 1)
|
73 |
+
return sub_masks
|
74 |
+
|
75 |
+
#==================마스크 annotation 만드는 함수===================
|
76 |
+
def create_sub_mask_annotation(sub_mask, image_id, annotation_id, is_crowd):
|
77 |
+
# Find contours (boundary lines) around each sub-mask
|
78 |
+
# Note: there could be multiple contours if the object
|
79 |
+
# is partially occluded. (E.g. an elephant behind a tree)
|
80 |
+
contours = measure.find_contours(sub_mask, 0.5, positive_orientation='low')
|
81 |
+
|
82 |
+
segmentations = []
|
83 |
+
polygons = []
|
84 |
+
for contour in contours:
|
85 |
+
# Flip from (row, col) representation to (x, y)
|
86 |
+
# and subtract the padding pixel
|
87 |
+
for i in range(len(contour)):
|
88 |
+
row, col = contour[i]
|
89 |
+
contour[i] = (col - 1, row - 1)
|
90 |
+
|
91 |
+
# Make a polygon and simplify it
|
92 |
+
poly = Polygon(contour)
|
93 |
+
poly = poly.simplify(1.0, preserve_topology=False)
|
94 |
+
polygons.append(poly)
|
95 |
+
segmentation = np.array(poly.exterior.coords).ravel().tolist()
|
96 |
+
segmentations.append(segmentation)
|
97 |
+
|
98 |
+
# Combine the polygons to calculate the bounding box and area
|
99 |
+
multi_poly = MultiPolygon(polygons)
|
100 |
+
x, y, max_x, max_y = multi_poly.bounds
|
101 |
+
width = max_x - x
|
102 |
+
height = max_y - y
|
103 |
+
bbox = (x, y, width, height)
|
104 |
+
area = multi_poly.area
|
105 |
+
|
106 |
+
annotation = {
|
107 |
+
'segmentation': segmentations,
|
108 |
+
'iscrowd': is_crowd,
|
109 |
+
'image_id': image_id,
|
110 |
+
'id': annotation_id,
|
111 |
+
'bbox': bbox,
|
112 |
+
'area': area
|
113 |
+
}
|
114 |
+
return annotation
|
115 |
+
|
116 |
+
#==================시각화 함수===================
|
117 |
+
# annotation dictionary as input
|
118 |
+
def showRef(annotation, image_dir, seg_box='seg'):
|
119 |
+
ax = plt.gca()
|
120 |
+
I = io.imread(osp.join(image_dir, annotation['file_name']))
|
121 |
+
ax.imshow(I)
|
122 |
+
|
123 |
+
|
124 |
+
for sid, sent in enumerate(annotation['sentences']):
|
125 |
+
print('%s. %s' % (sid + 1, sent))
|
126 |
+
|
127 |
+
if seg_box == 'seg':
|
128 |
+
polygons = []
|
129 |
+
color = []
|
130 |
+
c = (np.random.random((1, 3)) * 0.6 + 0.4).tolist()[0]
|
131 |
+
|
132 |
+
if type(annotation['segmentation'][0]) == list:
|
133 |
+
# polygon used for refcoco*
|
134 |
+
for seg in annotation['segmentation']:
|
135 |
+
poly = np.array(seg).reshape((int(len(seg) / 2), 2))
|
136 |
+
polygons.append(Polygon(poly))
|
137 |
+
color.append(c)
|
138 |
+
|
139 |
+
p = PatchCollection(polygons,
|
140 |
+
facecolors=(221/255, 160/255, 221/255), # 연보라색
|
141 |
+
linewidths=0,
|
142 |
+
alpha=0.4)
|
143 |
+
ax.add_collection(p)
|
144 |
+
|
145 |
+
p = PatchCollection(polygons,
|
146 |
+
facecolors='none',
|
147 |
+
edgecolors=color,
|
148 |
+
linewidths=2)
|
149 |
+
ax.add_collection(p)
|
150 |
+
# else:
|
151 |
+
# # mask used for refclef
|
152 |
+
# rle = annotation['segmentation']
|
153 |
+
# m = mask.decode(rle)
|
154 |
+
# img = np.ones((m.shape[0], m.shape[1], 3))
|
155 |
+
# color_mask = np.array([2.0, 166.0, 101.0]) / 255
|
156 |
+
# for i in range(3):
|
157 |
+
# img[:, :, i] = color_mask[i]
|
158 |
+
# ax.imshow(np.dstack((img, m * 0.5)))
|
159 |
+
|
160 |
+
# bounding box
|
161 |
+
elif seg_box == 'box':
|
162 |
+
bbox = annotation['bbox']
|
163 |
+
box_plot = Rectangle((bbox[0], bbox[1]),
|
164 |
+
bbox[2],
|
165 |
+
bbox[3],
|
166 |
+
fill=False,
|
167 |
+
edgecolor='green',
|
168 |
+
linewidth=3)
|
169 |
+
ax.add_patch(box_plot)
|
170 |
+
|
171 |
+
#==================모두 종합한 함수===================
|
172 |
+
def create_dict_from_selected_images(selected_frames_df):
|
173 |
+
|
174 |
+
image_id = 0
|
175 |
+
anno_id = 0
|
176 |
+
train_idx = 0
|
177 |
+
|
178 |
+
with open("/home/yejin/data/data/dataset/VRIS/mbench/ytvos/selected_instances2.jsonl", "w") as f:
|
179 |
+
|
180 |
+
for selected_idx in range(len(selected_frames_df)):
|
181 |
+
selected = selected_frames_df.loc[selected_idx]
|
182 |
+
selected_vid_id = selected['video']
|
183 |
+
selected_frame_id = selected['frame_id']
|
184 |
+
|
185 |
+
for obj_id in selected['objects'].keys():
|
186 |
+
|
187 |
+
selected_exp = selected['objects'][obj_id][0] #캡션
|
188 |
+
selected_verb = selected['objects'][obj_id][1] #동사
|
189 |
+
|
190 |
+
train_idx = next(
|
191 |
+
idx for idx, meta in enumerate(metas)
|
192 |
+
if meta['video'] == selected_vid_id
|
193 |
+
and meta['frame_id'] == selected_frame_id
|
194 |
+
and meta['obj_id'] == int(obj_id)
|
195 |
+
and meta['exp'] == selected_exp
|
196 |
+
)
|
197 |
+
|
198 |
+
train_frames, train_info = train_dataset[train_idx]
|
199 |
+
|
200 |
+
try:
|
201 |
+
valid_frame_loc = train_info['frames_idx'].tolist().index(selected_frame_id) #valid한 frame이 있는 index
|
202 |
+
except ValueError:
|
203 |
+
print(f"selected vid id: {selected_vid_id}, metas['frame_id']: {metas[train_idx]['frame_id']}, selected frame id: {selected_frame_id}, train_info['frames_idx']: {train_info['frames_idx'].tolist()}")
|
204 |
+
|
205 |
+
|
206 |
+
frame = train_frames[valid_frame_loc] #해당 frame
|
207 |
+
frame = F.to_pil_image(frame)
|
208 |
+
|
209 |
+
image_file_name = f"{selected_vid_id}_{str(selected_frame_id).rjust(5, '0')}"
|
210 |
+
|
211 |
+
#원래 frame 저장하기
|
212 |
+
save_dir = Path("/home/yejin/data/data/dataset/VRIS/mbench/ytvos/selected_frames")
|
213 |
+
#save_dir.mkdir(exist_ok=True)
|
214 |
+
save_path = save_dir / f"{image_file_name}.png"
|
215 |
+
#frame.save(save_path)
|
216 |
+
|
217 |
+
#카테고리
|
218 |
+
label = train_info['labels'][valid_frame_loc].item() #category id
|
219 |
+
category_name = metas[train_idx]['category'] #category name
|
220 |
+
|
221 |
+
#박스 정보
|
222 |
+
box = train_info['boxes'][valid_frame_loc]
|
223 |
+
|
224 |
+
# Annotation tools ########################################################################
|
225 |
+
mask = train_info['masks'][valid_frame_loc]
|
226 |
+
# print(mask.shape)
|
227 |
+
|
228 |
+
# frame과 mask 맞는지 확인만
|
229 |
+
# plt.imshow(frame.permute(1, 2, 0))
|
230 |
+
# mask_color = np.zeros((*mask.shape, 3), dtype = np.uint8)
|
231 |
+
# mask_color[mask == 1] = [255, 0, 0]
|
232 |
+
# plt.imshow(mask_color, alpha = 0.5)
|
233 |
+
# plt.show()
|
234 |
+
|
235 |
+
|
236 |
+
mask_image = prepare_mask_for_pil(mask)
|
237 |
+
sub_masks = create_sub_masks(mask_image)
|
238 |
+
|
239 |
+
for color, sub_mask in sub_masks.items():
|
240 |
+
# print(f"Color: {color}, Sub-mask size: {sub_mask.size}")
|
241 |
+
sub_mask_array = np.array(sub_mask, dtype=np.uint8)
|
242 |
+
annotation = create_sub_mask_annotation(sub_mask_array, image_id, anno_id, is_crowd = 0)
|
243 |
+
anno_id += 1
|
244 |
+
image_id += 1
|
245 |
+
|
246 |
+
#파일 경로 추가
|
247 |
+
annotation['file_name'] = f"{image_file_name}.png"
|
248 |
+
|
249 |
+
#불필요한 정보 지우기
|
250 |
+
annotation.pop('iscrowd', None)
|
251 |
+
annotation.pop('image_id', None)
|
252 |
+
annotation.pop('id', None)
|
253 |
+
|
254 |
+
valid = train_info['valid'][valid_frame_loc]
|
255 |
+
orig_size = train_info['orig_size']
|
256 |
+
size = train_info['size']
|
257 |
+
caption = metas[train_idx]['exp']
|
258 |
+
|
259 |
+
#filename, height, width 추가
|
260 |
+
#annotation['file_name'] = save_path
|
261 |
+
annotation['height'] = orig_size[0].item()
|
262 |
+
annotation['width'] = orig_size[1].item()
|
263 |
+
|
264 |
+
# category id,name, sentence dictionary 추가
|
265 |
+
annotation['label'] = label
|
266 |
+
annotation['category_name'] = category_name
|
267 |
+
sentence_dict = {
|
268 |
+
"tokens" : caption.split(' '),
|
269 |
+
"raw" : caption,
|
270 |
+
"sent" : re.sub('[^A-Za-z0-9\s]+', '', caption.lower())
|
271 |
+
}
|
272 |
+
annotation['sentences'] = sentence_dict
|
273 |
+
############################################################################################
|
274 |
+
# double check for segmentation annotation
|
275 |
+
# orig_img_np = draw_polygon_on_image(frame, annotation['segmentation'])
|
276 |
+
# plt.imshow(orig_img_np)
|
277 |
+
# plt.axis('off')
|
278 |
+
# plt.show()
|
279 |
+
|
280 |
+
# showRef(annotation, save_dir)
|
281 |
+
############################################################################################
|
282 |
+
|
283 |
+
# 최종
|
284 |
+
f.write(json.dumps(annotation) + "\n")
|
285 |
+
f.flush()
|
286 |
+
|
287 |
+
# if __name__ == '__main__':
|
288 |
+
# create_dict_from_selected_images(selected_frames_df)
|
davis2017/utils.py
ADDED
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import errno
|
3 |
+
import numpy as np
|
4 |
+
from PIL import Image
|
5 |
+
import warnings
|
6 |
+
from davis2017.davis import DAVIS
|
7 |
+
|
8 |
+
|
9 |
+
def _pascal_color_map(N=256, normalized=False):
|
10 |
+
"""
|
11 |
+
Python implementation of the color map function for the PASCAL VOC data set.
|
12 |
+
Official Matlab version can be found in the PASCAL VOC devkit
|
13 |
+
http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html#devkit
|
14 |
+
"""
|
15 |
+
|
16 |
+
def bitget(byteval, idx):
|
17 |
+
return (byteval & (1 << idx)) != 0
|
18 |
+
|
19 |
+
dtype = 'float32' if normalized else 'uint8'
|
20 |
+
cmap = np.zeros((N, 3), dtype=dtype)
|
21 |
+
for i in range(N):
|
22 |
+
r = g = b = 0
|
23 |
+
c = i
|
24 |
+
for j in range(8):
|
25 |
+
r = r | (bitget(c, 0) << 7 - j)
|
26 |
+
g = g | (bitget(c, 1) << 7 - j)
|
27 |
+
b = b | (bitget(c, 2) << 7 - j)
|
28 |
+
c = c >> 3
|
29 |
+
|
30 |
+
cmap[i] = np.array([r, g, b])
|
31 |
+
|
32 |
+
cmap = cmap / 255 if normalized else cmap
|
33 |
+
return cmap
|
34 |
+
|
35 |
+
|
36 |
+
def overlay_semantic_mask(im, ann, alpha=0.5, colors=None, contour_thickness=None):
|
37 |
+
im, ann = np.asarray(im, dtype=np.uint8), np.asarray(ann, dtype=np.int)
|
38 |
+
if im.shape[:-1] != ann.shape:
|
39 |
+
raise ValueError('First two dimensions of `im` and `ann` must match')
|
40 |
+
if im.shape[-1] != 3:
|
41 |
+
raise ValueError('im must have three channels at the 3 dimension')
|
42 |
+
|
43 |
+
colors = colors or _pascal_color_map()
|
44 |
+
colors = np.asarray(colors, dtype=np.uint8)
|
45 |
+
|
46 |
+
mask = colors[ann]
|
47 |
+
fg = im * alpha + (1 - alpha) * mask
|
48 |
+
|
49 |
+
img = im.copy()
|
50 |
+
img[ann > 0] = fg[ann > 0]
|
51 |
+
|
52 |
+
if contour_thickness: # pragma: no cover
|
53 |
+
import cv2
|
54 |
+
for obj_id in np.unique(ann[ann > 0]):
|
55 |
+
contours = cv2.findContours((ann == obj_id).astype(
|
56 |
+
np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)[-2:]
|
57 |
+
cv2.drawContours(img, contours[0], -1, colors[obj_id].tolist(),
|
58 |
+
contour_thickness)
|
59 |
+
return img
|
60 |
+
|
61 |
+
|
62 |
+
def generate_obj_proposals(davis_root, subset, num_proposals, save_path):
|
63 |
+
dataset = DAVIS(davis_root, subset=subset, codalab=True)
|
64 |
+
for seq in dataset.get_sequences():
|
65 |
+
save_dir = os.path.join(save_path, seq)
|
66 |
+
if os.path.exists(save_dir):
|
67 |
+
continue
|
68 |
+
all_gt_masks, all_masks_id = dataset.get_all_masks(seq, True)
|
69 |
+
img_size = all_gt_masks.shape[2:]
|
70 |
+
num_rows = int(np.ceil(np.sqrt(num_proposals)))
|
71 |
+
proposals = np.zeros((num_proposals, len(all_masks_id), *img_size))
|
72 |
+
height_slices = np.floor(np.arange(0, img_size[0] + 1, img_size[0]/num_rows)).astype(np.uint).tolist()
|
73 |
+
width_slices = np.floor(np.arange(0, img_size[1] + 1, img_size[1]/num_rows)).astype(np.uint).tolist()
|
74 |
+
ii = 0
|
75 |
+
prev_h, prev_w = 0, 0
|
76 |
+
for h in height_slices[1:]:
|
77 |
+
for w in width_slices[1:]:
|
78 |
+
proposals[ii, :, prev_h:h, prev_w:w] = 1
|
79 |
+
prev_w = w
|
80 |
+
ii += 1
|
81 |
+
if ii == num_proposals:
|
82 |
+
break
|
83 |
+
prev_h, prev_w = h, 0
|
84 |
+
if ii == num_proposals:
|
85 |
+
break
|
86 |
+
|
87 |
+
os.makedirs(save_dir, exist_ok=True)
|
88 |
+
for i, mask_id in enumerate(all_masks_id):
|
89 |
+
mask = np.sum(proposals[:, i, ...] * np.arange(1, proposals.shape[0] + 1)[:, None, None], axis=0)
|
90 |
+
save_mask(mask, os.path.join(save_dir, f'{mask_id}.png'))
|
91 |
+
|
92 |
+
|
93 |
+
def generate_random_permutation_gt_obj_proposals(davis_root, subset, save_path):
|
94 |
+
dataset = DAVIS(davis_root, subset=subset, codalab=True)
|
95 |
+
for seq in dataset.get_sequences():
|
96 |
+
gt_masks, all_masks_id = dataset.get_all_masks(seq, True)
|
97 |
+
obj_swap = np.random.permutation(np.arange(gt_masks.shape[0]))
|
98 |
+
gt_masks = gt_masks[obj_swap, ...]
|
99 |
+
save_dir = os.path.join(save_path, seq)
|
100 |
+
os.makedirs(save_dir, exist_ok=True)
|
101 |
+
for i, mask_id in enumerate(all_masks_id):
|
102 |
+
mask = np.sum(gt_masks[:, i, ...] * np.arange(1, gt_masks.shape[0] + 1)[:, None, None], axis=0)
|
103 |
+
save_mask(mask, os.path.join(save_dir, f'{mask_id}.png'))
|
104 |
+
|
105 |
+
|
106 |
+
def color_map(N=256, normalized=False):
|
107 |
+
def bitget(byteval, idx):
|
108 |
+
return ((byteval & (1 << idx)) != 0)
|
109 |
+
|
110 |
+
dtype = 'float32' if normalized else 'uint8'
|
111 |
+
cmap = np.zeros((N, 3), dtype=dtype)
|
112 |
+
for i in range(N):
|
113 |
+
r = g = b = 0
|
114 |
+
c = i
|
115 |
+
for j in range(8):
|
116 |
+
r = r | (bitget(c, 0) << 7-j)
|
117 |
+
g = g | (bitget(c, 1) << 7-j)
|
118 |
+
b = b | (bitget(c, 2) << 7-j)
|
119 |
+
c = c >> 3
|
120 |
+
|
121 |
+
cmap[i] = np.array([r, g, b])
|
122 |
+
|
123 |
+
cmap = cmap/255 if normalized else cmap
|
124 |
+
return cmap
|
125 |
+
|
126 |
+
|
127 |
+
def save_mask(mask, img_path):
|
128 |
+
if np.max(mask) > 255:
|
129 |
+
raise ValueError('Maximum id pixel value is 255')
|
130 |
+
mask_img = Image.fromarray(mask.astype(np.uint8))
|
131 |
+
mask_img.putpalette(color_map().flatten().tolist())
|
132 |
+
mask_img.save(img_path)
|
133 |
+
|
134 |
+
|
135 |
+
def db_statistics(per_frame_values):
|
136 |
+
""" Compute mean,recall and decay from per-frame evaluation.
|
137 |
+
Arguments:
|
138 |
+
per_frame_values (ndarray): per-frame evaluation
|
139 |
+
|
140 |
+
Returns:
|
141 |
+
M,O,D (float,float,float):
|
142 |
+
return evaluation statistics: mean,recall,decay.
|
143 |
+
"""
|
144 |
+
|
145 |
+
# strip off nan values
|
146 |
+
with warnings.catch_warnings():
|
147 |
+
warnings.simplefilter("ignore", category=RuntimeWarning)
|
148 |
+
M = np.nanmean(per_frame_values)
|
149 |
+
O = np.nanmean(per_frame_values > 0.5)
|
150 |
+
|
151 |
+
N_bins = 4
|
152 |
+
ids = np.round(np.linspace(1, len(per_frame_values), N_bins + 1) + 1e-10) - 1
|
153 |
+
ids = ids.astype(np.uint8)
|
154 |
+
|
155 |
+
D_bins = [per_frame_values[ids[i]:ids[i + 1] + 1] for i in range(0, 4)]
|
156 |
+
|
157 |
+
with warnings.catch_warnings():
|
158 |
+
warnings.simplefilter("ignore", category=RuntimeWarning)
|
159 |
+
D = np.nanmean(D_bins[0]) - np.nanmean(D_bins[3])
|
160 |
+
|
161 |
+
return M, O, D
|
162 |
+
|
163 |
+
|
164 |
+
def list_files(dir, extension=".png"):
|
165 |
+
return [os.path.splitext(file_)[0] for file_ in os.listdir(dir) if file_.endswith(extension)]
|
166 |
+
|
167 |
+
|
168 |
+
def force_symlink(file1, file2):
|
169 |
+
try:
|
170 |
+
os.symlink(file1, file2)
|
171 |
+
except OSError as e:
|
172 |
+
if e.errno == errno.EEXIST:
|
173 |
+
os.remove(file2)
|
174 |
+
os.symlink(file1, file2)
|
inference_davis.py
ADDED
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
'''
|
2 |
+
Inference code for ReferFormer, on Ref-Youtube-VOS
|
3 |
+
Modified from DETR (https://github.com/facebookresearch/detr)
|
4 |
+
Ref-Davis17 does not support visualize
|
5 |
+
'''
|
6 |
+
import argparse
|
7 |
+
import json
|
8 |
+
import random
|
9 |
+
import time
|
10 |
+
from pathlib import Path
|
11 |
+
|
12 |
+
import numpy as np
|
13 |
+
import torch
|
14 |
+
|
15 |
+
|
16 |
+
import util.misc as utils
|
17 |
+
from models import build_model
|
18 |
+
import torchvision.transforms as T
|
19 |
+
import matplotlib.pyplot as plt
|
20 |
+
import os
|
21 |
+
import cv2
|
22 |
+
from PIL import Image, ImageDraw
|
23 |
+
import math
|
24 |
+
import torch.nn.functional as F
|
25 |
+
import json
|
26 |
+
|
27 |
+
|
28 |
+
import opts
|
29 |
+
from tqdm import tqdm
|
30 |
+
|
31 |
+
import multiprocessing as mp
|
32 |
+
import threading
|
33 |
+
|
34 |
+
from tools.colormap import colormap
|
35 |
+
|
36 |
+
|
37 |
+
# colormap
|
38 |
+
color_list = colormap()
|
39 |
+
color_list = color_list.astype('uint8').tolist()
|
40 |
+
|
41 |
+
# build transform
|
42 |
+
transform = T.Compose([
|
43 |
+
T.Resize(360),
|
44 |
+
T.ToTensor(),
|
45 |
+
T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
46 |
+
])
|
47 |
+
|
48 |
+
|
49 |
+
def main(args):
|
50 |
+
args.dataset_file = "davis"
|
51 |
+
args.masks = True
|
52 |
+
args.batch_size == 1
|
53 |
+
print("Inference only supports for batch size = 1")
|
54 |
+
print(args)
|
55 |
+
|
56 |
+
# fix the seed for reproducibility
|
57 |
+
seed = args.seed + utils.get_rank()
|
58 |
+
torch.manual_seed(seed)
|
59 |
+
np.random.seed(seed)
|
60 |
+
random.seed(seed)
|
61 |
+
|
62 |
+
split = args.split
|
63 |
+
# save path
|
64 |
+
output_dir = args.output_dir
|
65 |
+
save_path_prefix = os.path.join(output_dir, split)
|
66 |
+
if not os.path.exists(save_path_prefix):
|
67 |
+
os.makedirs(save_path_prefix)
|
68 |
+
|
69 |
+
save_visualize_path_prefix = os.path.join(output_dir, split + '_images')
|
70 |
+
if args.visualize:
|
71 |
+
if not os.path.exists(save_visualize_path_prefix):
|
72 |
+
os.makedirs(save_visualize_path_prefix)
|
73 |
+
|
74 |
+
# load data
|
75 |
+
root = Path(args.davis_path) # data/ref-davis
|
76 |
+
img_folder = os.path.join(root, split, "JPEGImages")
|
77 |
+
meta_file = os.path.join(root, "meta_expressions", split, "meta_expressions.json")
|
78 |
+
with open(meta_file, "r") as f:
|
79 |
+
data = json.load(f)["videos"]
|
80 |
+
video_list = list(data.keys())
|
81 |
+
|
82 |
+
# create subprocess
|
83 |
+
thread_num = args.ngpu
|
84 |
+
global result_dict
|
85 |
+
result_dict = mp.Manager().dict()
|
86 |
+
|
87 |
+
processes = []
|
88 |
+
lock = threading.Lock()
|
89 |
+
|
90 |
+
video_num = len(video_list)
|
91 |
+
per_thread_video_num = math.ceil(float(video_num) / float(thread_num))
|
92 |
+
|
93 |
+
start_time = time.time()
|
94 |
+
print('Start inference')
|
95 |
+
for i in range(thread_num):
|
96 |
+
if i == thread_num - 1:
|
97 |
+
sub_video_list = video_list[i * per_thread_video_num:]
|
98 |
+
else:
|
99 |
+
sub_video_list = video_list[i * per_thread_video_num: (i + 1) * per_thread_video_num]
|
100 |
+
p = mp.Process(target=sub_processor, args=(lock, i, args, data,
|
101 |
+
save_path_prefix, save_visualize_path_prefix,
|
102 |
+
img_folder, sub_video_list))
|
103 |
+
p.start()
|
104 |
+
processes.append(p)
|
105 |
+
|
106 |
+
for p in processes:
|
107 |
+
p.join()
|
108 |
+
|
109 |
+
end_time = time.time()
|
110 |
+
total_time = end_time - start_time
|
111 |
+
|
112 |
+
result_dict = dict(result_dict)
|
113 |
+
num_all_frames_gpus = 0
|
114 |
+
for pid, num_all_frames in result_dict.items():
|
115 |
+
num_all_frames_gpus += num_all_frames
|
116 |
+
|
117 |
+
print("Total inference time: %.4f s" %(total_time))
|
118 |
+
|
119 |
+
|
120 |
+
def sub_processor(lock, pid, args, data, save_path_prefix, save_visualize_path_prefix, img_folder, video_list):
|
121 |
+
text = 'processor %d' % pid
|
122 |
+
with lock:
|
123 |
+
progress = tqdm(
|
124 |
+
total=len(video_list),
|
125 |
+
position=pid,
|
126 |
+
desc=text,
|
127 |
+
ncols=0
|
128 |
+
)
|
129 |
+
torch.cuda.set_device(pid)
|
130 |
+
|
131 |
+
# model
|
132 |
+
model, criterion, _ = build_model(args)
|
133 |
+
device = args.device
|
134 |
+
model.to(device)
|
135 |
+
|
136 |
+
model_without_ddp = model
|
137 |
+
n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
|
138 |
+
|
139 |
+
if pid == 0:
|
140 |
+
print('number of params:', n_parameters)
|
141 |
+
|
142 |
+
if args.resume:
|
143 |
+
checkpoint = torch.load(args.resume, map_location='cpu')
|
144 |
+
missing_keys, unexpected_keys = model_without_ddp.load_state_dict(checkpoint['model'], strict=False)
|
145 |
+
unexpected_keys = [k for k in unexpected_keys if not (k.endswith('total_params') or k.endswith('total_ops'))]
|
146 |
+
if len(missing_keys) > 0:
|
147 |
+
print('Missing Keys: {}'.format(missing_keys))
|
148 |
+
if len(unexpected_keys) > 0:
|
149 |
+
print('Unexpected Keys: {}'.format(unexpected_keys))
|
150 |
+
else:
|
151 |
+
raise ValueError('Please specify the checkpoint for inference.')
|
152 |
+
|
153 |
+
# get palette
|
154 |
+
palette_img = os.path.join(args.davis_path, "valid/Annotations/blackswan/00000.png")
|
155 |
+
palette = Image.open(palette_img).getpalette()
|
156 |
+
|
157 |
+
# start inference
|
158 |
+
num_all_frames = 0
|
159 |
+
model.eval()
|
160 |
+
|
161 |
+
# 1. for each video
|
162 |
+
for video in video_list:
|
163 |
+
metas = []
|
164 |
+
|
165 |
+
expressions = data[video]["expressions"]
|
166 |
+
expression_list = list(expressions.keys())
|
167 |
+
num_expressions = len(expression_list)
|
168 |
+
video_len = len(data[video]["frames"])
|
169 |
+
|
170 |
+
# read all the anno meta
|
171 |
+
for i in range(num_expressions):
|
172 |
+
meta = {}
|
173 |
+
meta["video"] = video
|
174 |
+
meta["exp"] = expressions[expression_list[i]]["exp"]
|
175 |
+
meta["exp_id"] = expression_list[i] # start from 0
|
176 |
+
meta["frames"] = data[video]["frames"]
|
177 |
+
metas.append(meta)
|
178 |
+
meta = metas
|
179 |
+
|
180 |
+
# since there are 4 annotations
|
181 |
+
num_obj = num_expressions // 4
|
182 |
+
|
183 |
+
# 2. for each annotator
|
184 |
+
for anno_id in range(4): # 4 annotators
|
185 |
+
anno_logits = []
|
186 |
+
anno_masks = [] # [num_obj+1, video_len, h, w], +1 for background
|
187 |
+
|
188 |
+
for obj_id in range(num_obj):
|
189 |
+
i = obj_id * 4 + anno_id
|
190 |
+
video_name = meta[i]["video"]
|
191 |
+
exp = meta[i]["exp"]
|
192 |
+
exp_id = meta[i]["exp_id"]
|
193 |
+
frames = meta[i]["frames"]
|
194 |
+
|
195 |
+
video_len = len(frames)
|
196 |
+
# NOTE: the im2col_step for MSDeformAttention is set as 64
|
197 |
+
# so the max length for a clip is 64
|
198 |
+
# store the video pred results
|
199 |
+
all_pred_logits = []
|
200 |
+
all_pred_masks = []
|
201 |
+
|
202 |
+
# 3. for each clip
|
203 |
+
for clip_id in range(0, video_len, 36):
|
204 |
+
frames_ids = [x for x in range(video_len)]
|
205 |
+
clip_frames_ids = frames_ids[clip_id : clip_id + 36]
|
206 |
+
clip_len = len(clip_frames_ids)
|
207 |
+
|
208 |
+
# load the clip images
|
209 |
+
imgs = []
|
210 |
+
for t in clip_frames_ids:
|
211 |
+
frame = frames[t]
|
212 |
+
img_path = os.path.join(img_folder, video_name, frame + ".jpg")
|
213 |
+
img = Image.open(img_path).convert('RGB')
|
214 |
+
origin_w, origin_h = img.size
|
215 |
+
imgs.append(transform(img)) # list[Img]
|
216 |
+
|
217 |
+
imgs = torch.stack(imgs, dim=0).to(args.device) # [video_len, 3, H, W]
|
218 |
+
img_h, img_w = imgs.shape[-2:]
|
219 |
+
size = torch.as_tensor([int(img_h), int(img_w)]).to(args.device)
|
220 |
+
target = {"size": size}
|
221 |
+
|
222 |
+
with torch.no_grad():
|
223 |
+
outputs = model([imgs], [exp], [target])
|
224 |
+
|
225 |
+
pred_logits = outputs["pred_logits"][0] # [t, q, k]
|
226 |
+
pred_masks = outputs["pred_masks"][0] # [t, q, h, w]
|
227 |
+
|
228 |
+
# according to pred_logits, select the query index
|
229 |
+
pred_scores = pred_logits.sigmoid() # [t, q, k]
|
230 |
+
pred_scores = pred_scores.mean(0) # [q, K]
|
231 |
+
max_scores, _ = pred_scores.max(-1) # [q,]
|
232 |
+
_, max_ind = max_scores.max(-1) # [1,]
|
233 |
+
max_inds = max_ind.repeat(clip_len)
|
234 |
+
pred_masks = pred_masks[range(clip_len), max_inds, ...] # [t, h, w]
|
235 |
+
pred_masks = pred_masks.unsqueeze(0)
|
236 |
+
|
237 |
+
pred_masks = F.interpolate(pred_masks, size=(origin_h, origin_w), mode='bilinear', align_corners=False)
|
238 |
+
pred_masks = pred_masks.sigmoid()[0] # [t, h, w], NOTE: here mask is score
|
239 |
+
|
240 |
+
# store the clip results
|
241 |
+
pred_logits = pred_logits[range(clip_len), max_inds] # [t, k]
|
242 |
+
all_pred_logits.append(pred_logits)
|
243 |
+
all_pred_masks.append(pred_masks)
|
244 |
+
|
245 |
+
all_pred_logits = torch.cat(all_pred_logits, dim=0) # (video_len, K)
|
246 |
+
all_pred_masks = torch.cat(all_pred_masks, dim=0) # (video_len, h, w)
|
247 |
+
anno_logits.append(all_pred_logits)
|
248 |
+
anno_masks.append(all_pred_masks)
|
249 |
+
|
250 |
+
# handle a complete image (all objects of a annotator)
|
251 |
+
anno_logits = torch.stack(anno_logits) # [num_obj, video_len, k]
|
252 |
+
anno_masks = torch.stack(anno_masks) # [num_obj, video_len, h, w]
|
253 |
+
t, h, w = anno_masks.shape[-3:]
|
254 |
+
anno_masks[anno_masks < 0.5] = 0.0
|
255 |
+
background = 0.1 * torch.ones(1, t, h, w).to(args.device)
|
256 |
+
anno_masks = torch.cat([background, anno_masks], dim=0) # [num_obj+1, video_len, h, w]
|
257 |
+
out_masks = torch.argmax(anno_masks, dim=0) # int, the value indicate which object, [video_len, h, w]
|
258 |
+
|
259 |
+
out_masks = out_masks.detach().cpu().numpy().astype(np.uint8) # [video_len, h, w]
|
260 |
+
|
261 |
+
# save results
|
262 |
+
anno_save_path = os.path.join(save_path_prefix, f"anno_{anno_id}", video)
|
263 |
+
if not os.path.exists(anno_save_path):
|
264 |
+
os.makedirs(anno_save_path)
|
265 |
+
for f in range(out_masks.shape[0]):
|
266 |
+
img_E = Image.fromarray(out_masks[f])
|
267 |
+
img_E.putpalette(palette)
|
268 |
+
img_E.save(os.path.join(anno_save_path, '{:05d}.png'.format(f)))
|
269 |
+
|
270 |
+
|
271 |
+
with lock:
|
272 |
+
progress.update(1)
|
273 |
+
result_dict[str(pid)] = num_all_frames
|
274 |
+
with lock:
|
275 |
+
progress.close()
|
276 |
+
|
277 |
+
|
278 |
+
|
279 |
+
# Post-process functions
|
280 |
+
def box_cxcywh_to_xyxy(x):
|
281 |
+
x_c, y_c, w, h = x.unbind(1)
|
282 |
+
b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
|
283 |
+
(x_c + 0.5 * w), (y_c + 0.5 * h)]
|
284 |
+
return torch.stack(b, dim=1)
|
285 |
+
|
286 |
+
def rescale_bboxes(out_bbox, size):
|
287 |
+
img_w, img_h = size
|
288 |
+
b = box_cxcywh_to_xyxy(out_bbox)
|
289 |
+
b = b.cpu() * torch.tensor([img_w, img_h, img_w, img_h], dtype=torch.float32)
|
290 |
+
return b
|
291 |
+
|
292 |
+
|
293 |
+
# Visualization functions
|
294 |
+
def draw_reference_points(draw, reference_points, img_size, color):
|
295 |
+
W, H = img_size
|
296 |
+
for i, ref_point in enumerate(reference_points):
|
297 |
+
init_x, init_y = ref_point
|
298 |
+
x, y = W * init_x, H * init_y
|
299 |
+
cur_color = color
|
300 |
+
draw.line((x-10, y, x+10, y), tuple(cur_color), width=4)
|
301 |
+
draw.line((x, y-10, x, y+10), tuple(cur_color), width=4)
|
302 |
+
|
303 |
+
def draw_sample_points(draw, sample_points, img_size, color_list):
|
304 |
+
alpha = 255
|
305 |
+
for i, samples in enumerate(sample_points):
|
306 |
+
for sample in samples:
|
307 |
+
x, y = sample
|
308 |
+
cur_color = color_list[i % len(color_list)][::-1]
|
309 |
+
cur_color += [alpha]
|
310 |
+
draw.ellipse((x-2, y-2, x+2, y+2),
|
311 |
+
fill=tuple(cur_color), outline=tuple(cur_color), width=1)
|
312 |
+
|
313 |
+
def vis_add_mask(img, mask, color):
|
314 |
+
origin_img = np.asarray(img.convert('RGB')).copy()
|
315 |
+
color = np.array(color)
|
316 |
+
|
317 |
+
mask = mask.reshape(mask.shape[0], mask.shape[1]).astype('uint8') # np
|
318 |
+
mask = mask > 0.5
|
319 |
+
|
320 |
+
origin_img[mask] = origin_img[mask] * 0.5 + color * 0.5
|
321 |
+
origin_img = Image.fromarray(origin_img)
|
322 |
+
return origin_img
|
323 |
+
|
324 |
+
|
325 |
+
|
326 |
+
if __name__ == '__main__':
|
327 |
+
parser = argparse.ArgumentParser('ReferFormer inference script', parents=[opts.get_args_parser()])
|
328 |
+
args = parser.parse_args()
|
329 |
+
main(args)
|
330 |
+
|
main.py
ADDED
@@ -0,0 +1,243 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Training script of ReferFormer
|
3 |
+
Modified from DETR (https://github.com/facebookresearch/detr)
|
4 |
+
"""
|
5 |
+
import argparse
|
6 |
+
import datetime
|
7 |
+
import json
|
8 |
+
import random
|
9 |
+
import time
|
10 |
+
from pathlib import Path
|
11 |
+
|
12 |
+
import numpy as np
|
13 |
+
import torch
|
14 |
+
from torch.utils.data import DataLoader, DistributedSampler
|
15 |
+
|
16 |
+
import util.misc as utils
|
17 |
+
import datasets.samplers as samplers
|
18 |
+
from datasets import build_dataset, get_coco_api_from_dataset
|
19 |
+
from engine import train_one_epoch, evaluate, evaluate_a2d
|
20 |
+
from models import build_model
|
21 |
+
|
22 |
+
from tools.load_pretrained_weights import pre_trained_model_to_finetune
|
23 |
+
|
24 |
+
import opts
|
25 |
+
|
26 |
+
|
27 |
+
|
28 |
+
def main(args):
|
29 |
+
args.masks = True
|
30 |
+
|
31 |
+
utils.init_distributed_mode(args)
|
32 |
+
print("git:\n {}\n".format(utils.get_sha()))
|
33 |
+
print(args)
|
34 |
+
|
35 |
+
print(f'\n Run on {args.dataset_file} dataset.')
|
36 |
+
print('\n')
|
37 |
+
|
38 |
+
device = torch.device(args.device)
|
39 |
+
|
40 |
+
# fix the seed for reproducibility
|
41 |
+
seed = args.seed + utils.get_rank()
|
42 |
+
torch.manual_seed(seed)
|
43 |
+
np.random.seed(seed)
|
44 |
+
random.seed(seed)
|
45 |
+
|
46 |
+
model, criterion, postprocessor = build_model(args)
|
47 |
+
model.to(device)
|
48 |
+
|
49 |
+
model_without_ddp = model
|
50 |
+
if args.distributed:
|
51 |
+
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
|
52 |
+
model_without_ddp = model.module
|
53 |
+
|
54 |
+
# for n, p in model_without_ddp.named_parameters():
|
55 |
+
# print(n)
|
56 |
+
|
57 |
+
n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
|
58 |
+
print('number of params:', n_parameters)
|
59 |
+
|
60 |
+
def match_name_keywords(n, name_keywords):
|
61 |
+
out = False
|
62 |
+
for b in name_keywords:
|
63 |
+
if b in n:
|
64 |
+
out = True
|
65 |
+
break
|
66 |
+
return out
|
67 |
+
|
68 |
+
|
69 |
+
param_dicts = [
|
70 |
+
{
|
71 |
+
"params":
|
72 |
+
[p for n, p in model_without_ddp.named_parameters()
|
73 |
+
if not match_name_keywords(n, args.lr_backbone_names) and not match_name_keywords(n, args.lr_text_encoder_names)
|
74 |
+
and not match_name_keywords(n, args.lr_linear_proj_names) and p.requires_grad],
|
75 |
+
"lr": args.lr,
|
76 |
+
},
|
77 |
+
{
|
78 |
+
"params": [p for n, p in model_without_ddp.named_parameters() if match_name_keywords(n, args.lr_backbone_names) and p.requires_grad],
|
79 |
+
"lr": args.lr_backbone,
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"params": [p for n, p in model_without_ddp.named_parameters() if match_name_keywords(n, args.lr_text_encoder_names) and p.requires_grad],
|
83 |
+
"lr": args.lr_text_encoder,
|
84 |
+
},
|
85 |
+
{
|
86 |
+
"params": [p for n, p in model_without_ddp.named_parameters() if match_name_keywords(n, args.lr_linear_proj_names) and p.requires_grad],
|
87 |
+
"lr": args.lr * args.lr_linear_proj_mult,
|
88 |
+
}
|
89 |
+
]
|
90 |
+
optimizer = torch.optim.AdamW(param_dicts, lr=args.lr,
|
91 |
+
weight_decay=args.weight_decay)
|
92 |
+
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, args.lr_drop)
|
93 |
+
|
94 |
+
# no validation ground truth for ytvos dataset
|
95 |
+
dataset_train = build_dataset(args.dataset_file, image_set='train', args=args)
|
96 |
+
|
97 |
+
if args.distributed:
|
98 |
+
if args.cache_mode:
|
99 |
+
sampler_train = samplers.NodeDistributedSampler(dataset_train)
|
100 |
+
else:
|
101 |
+
sampler_train = samplers.DistributedSampler(dataset_train)
|
102 |
+
else:
|
103 |
+
sampler_train = torch.utils.data.RandomSampler(dataset_train)
|
104 |
+
|
105 |
+
batch_sampler_train = torch.utils.data.BatchSampler(
|
106 |
+
sampler_train, args.batch_size, drop_last=True)
|
107 |
+
|
108 |
+
data_loader_train = DataLoader(dataset_train, batch_sampler=batch_sampler_train,
|
109 |
+
collate_fn=utils.collate_fn, num_workers=args.num_workers)
|
110 |
+
|
111 |
+
# A2D-Sentences
|
112 |
+
if args.dataset_file == 'a2d' or args.dataset_file == 'jhmdb':
|
113 |
+
dataset_val = build_dataset(args.dataset_file, image_set='val', args=args)
|
114 |
+
if args.distributed:
|
115 |
+
if args.cache_mode:
|
116 |
+
sampler_val = samplers.NodeDistributedSampler(dataset_val, shuffle=False)
|
117 |
+
else:
|
118 |
+
sampler_val = samplers.DistributedSampler(dataset_val, shuffle=False)
|
119 |
+
else:
|
120 |
+
sampler_val = torch.utils.data.SequentialSampler(dataset_val)
|
121 |
+
data_loader_val = DataLoader(dataset_val, args.batch_size, sampler=sampler_val,
|
122 |
+
drop_last=False, collate_fn=utils.collate_fn, num_workers=args.num_workers,
|
123 |
+
pin_memory=True)
|
124 |
+
|
125 |
+
|
126 |
+
if args.dataset_file == "davis":
|
127 |
+
assert args.pretrained_weights is not None, "Please provide the pretrained weight to finetune for Ref-DAVIS17"
|
128 |
+
print("============================================>")
|
129 |
+
print("Ref-DAVIS17 are finetuned using the checkpoint trained on Ref-Youtube-VOS")
|
130 |
+
print("Load checkpoint weights from {} ...".format(args.pretrained_weights))
|
131 |
+
checkpoint = torch.load(args.pretrained_weights, map_location="cpu")
|
132 |
+
checkpoint_dict = pre_trained_model_to_finetune(checkpoint, args)
|
133 |
+
model_without_ddp.load_state_dict(checkpoint_dict, strict=False)
|
134 |
+
print("============================================>")
|
135 |
+
|
136 |
+
if args.dataset_file == "jhmdb":
|
137 |
+
assert args.resume is not None, "Please provide the checkpoint to resume for JHMDB-Sentences"
|
138 |
+
print("============================================>")
|
139 |
+
print("JHMDB-Sentences are directly evaluated using the checkpoint trained on A2D-Sentences")
|
140 |
+
print("Load checkpoint weights from {} ...".format(args.pretrained_weights))
|
141 |
+
# load checkpoint in the args.resume
|
142 |
+
print("============================================>")
|
143 |
+
|
144 |
+
# for Ref-Youtube-VOS and A2D-Sentences
|
145 |
+
# finetune using the pretrained weights on Ref-COCO
|
146 |
+
if args.dataset_file != "davis" and args.dataset_file != "jhmdb" and args.pretrained_weights is not None:
|
147 |
+
print("============================================>")
|
148 |
+
print("Load pretrained weights from {} ...".format(args.pretrained_weights))
|
149 |
+
checkpoint = torch.load(args.pretrained_weights, map_location="cpu")
|
150 |
+
checkpoint_dict = pre_trained_model_to_finetune(checkpoint, args)
|
151 |
+
model_without_ddp.load_state_dict(checkpoint_dict, strict=False)
|
152 |
+
print("============================================>")
|
153 |
+
|
154 |
+
|
155 |
+
output_dir = Path(args.output_dir)
|
156 |
+
if args.resume:
|
157 |
+
if args.resume.startswith('https'):
|
158 |
+
checkpoint = torch.hub.load_state_dict_from_url(
|
159 |
+
args.resume, map_location='cpu', check_hash=True)
|
160 |
+
else:
|
161 |
+
checkpoint = torch.load(args.resume, map_location='cpu')
|
162 |
+
missing_keys, unexpected_keys = model_without_ddp.load_state_dict(checkpoint['model'], strict=False)
|
163 |
+
unexpected_keys = [k for k in unexpected_keys if not (k.endswith('total_params') or k.endswith('total_ops'))]
|
164 |
+
if len(missing_keys) > 0:
|
165 |
+
print('Missing Keys: {}'.format(missing_keys))
|
166 |
+
if len(unexpected_keys) > 0:
|
167 |
+
print('Unexpected Keys: {}'.format(unexpected_keys))
|
168 |
+
if not args.eval and 'optimizer' in checkpoint and 'lr_scheduler' in checkpoint and 'epoch' in checkpoint:
|
169 |
+
import copy
|
170 |
+
p_groups = copy.deepcopy(optimizer.param_groups)
|
171 |
+
optimizer.load_state_dict(checkpoint['optimizer'])
|
172 |
+
for pg, pg_old in zip(optimizer.param_groups, p_groups):
|
173 |
+
pg['lr'] = pg_old['lr']
|
174 |
+
pg['initial_lr'] = pg_old['initial_lr']
|
175 |
+
print(optimizer.param_groups)
|
176 |
+
lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
|
177 |
+
# todo: this is a hack for doing experiment that resume from checkpoint and also modify lr scheduler (e.g., decrease lr in advance).
|
178 |
+
args.override_resumed_lr_drop = True
|
179 |
+
if args.override_resumed_lr_drop:
|
180 |
+
print('Warning: (hack) args.override_resumed_lr_drop is set to True, so args.lr_drop would override lr_drop in resumed lr_scheduler.')
|
181 |
+
lr_scheduler.step_size = args.lr_drop
|
182 |
+
lr_scheduler.base_lrs = list(map(lambda group: group['initial_lr'], optimizer.param_groups))
|
183 |
+
lr_scheduler.step(lr_scheduler.last_epoch)
|
184 |
+
args.start_epoch = checkpoint['epoch'] + 1
|
185 |
+
|
186 |
+
if args.eval:
|
187 |
+
assert args.dataset_file == 'a2d' or args.dataset_file == 'jhmdb', \
|
188 |
+
'Only A2D-Sentences and JHMDB-Sentences datasets support evaluation'
|
189 |
+
test_stats = evaluate_a2d(model, data_loader_val, postprocessor, device, args)
|
190 |
+
return
|
191 |
+
|
192 |
+
|
193 |
+
print("Start training")
|
194 |
+
start_time = time.time()
|
195 |
+
for epoch in range(args.start_epoch, args.epochs):
|
196 |
+
if args.distributed:
|
197 |
+
sampler_train.set_epoch(epoch)
|
198 |
+
train_stats = train_one_epoch(
|
199 |
+
model, criterion, data_loader_train, optimizer, device, epoch,
|
200 |
+
args.clip_max_norm)
|
201 |
+
lr_scheduler.step()
|
202 |
+
if args.output_dir:
|
203 |
+
checkpoint_paths = [output_dir / 'checkpoint.pth']
|
204 |
+
# extra checkpoint before LR drop and every epochs
|
205 |
+
# if (epoch + 1) % args.lr_drop == 0 or (epoch + 1) % 1 == 0:
|
206 |
+
if (epoch + 1) % 1 == 0:
|
207 |
+
checkpoint_paths.append(output_dir / f'checkpoint{epoch:04}.pth')
|
208 |
+
for checkpoint_path in checkpoint_paths:
|
209 |
+
utils.save_on_master({
|
210 |
+
'model': model_without_ddp.state_dict(),
|
211 |
+
'optimizer': optimizer.state_dict(),
|
212 |
+
'lr_scheduler': lr_scheduler.state_dict(),
|
213 |
+
'epoch': epoch,
|
214 |
+
'args': args,
|
215 |
+
}, checkpoint_path)
|
216 |
+
|
217 |
+
log_stats = {**{f'train_{k}': v for k, v in train_stats.items()},
|
218 |
+
'epoch': epoch,
|
219 |
+
'n_parameters': n_parameters}
|
220 |
+
|
221 |
+
if args.dataset_file == 'a2d':
|
222 |
+
test_stats = evaluate_a2d(model, data_loader_val, postprocessor, device, args)
|
223 |
+
log_stats.update({**{f'{k}': v for k, v in test_stats.items()}})
|
224 |
+
|
225 |
+
if args.output_dir and utils.is_main_process():
|
226 |
+
with (output_dir / "log.txt").open("a") as f:
|
227 |
+
f.write(json.dumps(log_stats) + "\n")
|
228 |
+
|
229 |
+
|
230 |
+
total_time = time.time() - start_time
|
231 |
+
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
|
232 |
+
print('Training time {}'.format(total_time_str))
|
233 |
+
|
234 |
+
|
235 |
+
if __name__ == '__main__':
|
236 |
+
parser = argparse.ArgumentParser('ReferFormer training and evaluation script', parents=[opts.get_args_parser()])
|
237 |
+
args = parser.parse_args()
|
238 |
+
if args.output_dir:
|
239 |
+
Path(args.output_dir).mkdir(parents=True, exist_ok=True)
|
240 |
+
main(args)
|
241 |
+
|
242 |
+
|
243 |
+
|
main_joint.py
ADDED
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Training script of ReferFormer
|
3 |
+
Modified from DETR (https://github.com/facebookresearch/detr)
|
4 |
+
"""
|
5 |
+
import argparse
|
6 |
+
import datetime
|
7 |
+
import json
|
8 |
+
import random
|
9 |
+
import time
|
10 |
+
from pathlib import Path
|
11 |
+
|
12 |
+
import numpy as np
|
13 |
+
import torch
|
14 |
+
from torch.utils.data import DataLoader, DistributedSampler
|
15 |
+
|
16 |
+
import util.misc as utils
|
17 |
+
import datasets.samplers as samplers
|
18 |
+
from datasets import build_dataset, get_coco_api_from_dataset
|
19 |
+
from engine import train_one_epoch, evaluate, evaluate_a2d
|
20 |
+
from models import build_model
|
21 |
+
|
22 |
+
from tools.load_pretrained_weights import pre_trained_model_to_finetune
|
23 |
+
|
24 |
+
import opts
|
25 |
+
|
26 |
+
|
27 |
+
|
28 |
+
def main(args):
|
29 |
+
args.masks = True
|
30 |
+
args.dataset_file = 'joint' # joint training of ytvos and refcoco
|
31 |
+
args.binary = 1 # only run on binary referred
|
32 |
+
|
33 |
+
utils.init_distributed_mode(args)
|
34 |
+
print("git:\n {}\n".format(utils.get_sha()))
|
35 |
+
print(args)
|
36 |
+
|
37 |
+
print(f'\n Run on {args.dataset_file} dataset.')
|
38 |
+
print('\n')
|
39 |
+
|
40 |
+
device = torch.device(args.device)
|
41 |
+
|
42 |
+
# fix the seed for reproducibility
|
43 |
+
seed = args.seed + utils.get_rank()
|
44 |
+
torch.manual_seed(seed)
|
45 |
+
np.random.seed(seed)
|
46 |
+
random.seed(seed)
|
47 |
+
|
48 |
+
model, criterion, postprocessor = build_model(args)
|
49 |
+
model.to(device)
|
50 |
+
|
51 |
+
model_without_ddp = model
|
52 |
+
if args.distributed:
|
53 |
+
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
|
54 |
+
model_without_ddp = model.module
|
55 |
+
|
56 |
+
# for n, p in model_without_ddp.named_parameters():
|
57 |
+
# print(n)
|
58 |
+
|
59 |
+
n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
|
60 |
+
print('number of params:', n_parameters)
|
61 |
+
|
62 |
+
def match_name_keywords(n, name_keywords):
|
63 |
+
out = False
|
64 |
+
for b in name_keywords:
|
65 |
+
if b in n:
|
66 |
+
out = True
|
67 |
+
break
|
68 |
+
return out
|
69 |
+
|
70 |
+
|
71 |
+
param_dicts = [
|
72 |
+
{
|
73 |
+
"params":
|
74 |
+
[p for n, p in model_without_ddp.named_parameters()
|
75 |
+
if not match_name_keywords(n, args.lr_backbone_names) and not match_name_keywords(n, args.lr_text_encoder_names)
|
76 |
+
and not match_name_keywords(n, args.lr_linear_proj_names) and p.requires_grad],
|
77 |
+
"lr": args.lr,
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"params": [p for n, p in model_without_ddp.named_parameters() if match_name_keywords(n, args.lr_backbone_names) and p.requires_grad],
|
81 |
+
"lr": args.lr_backbone,
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"params": [p for n, p in model_without_ddp.named_parameters() if match_name_keywords(n, args.lr_text_encoder_names) and p.requires_grad],
|
85 |
+
"lr": args.lr_text_encoder,
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"params": [p for n, p in model_without_ddp.named_parameters() if match_name_keywords(n, args.lr_linear_proj_names) and p.requires_grad],
|
89 |
+
"lr": args.lr * args.lr_linear_proj_mult,
|
90 |
+
}
|
91 |
+
]
|
92 |
+
optimizer = torch.optim.AdamW(param_dicts, lr=args.lr,
|
93 |
+
weight_decay=args.weight_decay)
|
94 |
+
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, args.lr_drop)
|
95 |
+
|
96 |
+
# no validation ground truth for ytvos dataset
|
97 |
+
dataset_train = build_dataset(args.dataset_file, image_set='train', args=args)
|
98 |
+
|
99 |
+
if args.distributed:
|
100 |
+
if args.cache_mode:
|
101 |
+
sampler_train = samplers.NodeDistributedSampler(dataset_train)
|
102 |
+
else:
|
103 |
+
sampler_train = samplers.DistributedSampler(dataset_train)
|
104 |
+
else:
|
105 |
+
sampler_train = torch.utils.data.RandomSampler(dataset_train)
|
106 |
+
|
107 |
+
batch_sampler_train = torch.utils.data.BatchSampler(
|
108 |
+
sampler_train, args.batch_size, drop_last=True)
|
109 |
+
|
110 |
+
data_loader_train = DataLoader(dataset_train, batch_sampler=batch_sampler_train,
|
111 |
+
collate_fn=utils.collate_fn, num_workers=args.num_workers)
|
112 |
+
|
113 |
+
|
114 |
+
output_dir = Path(args.output_dir)
|
115 |
+
if args.resume:
|
116 |
+
if args.resume.startswith('https'):
|
117 |
+
checkpoint = torch.hub.load_state_dict_from_url(
|
118 |
+
args.resume, map_location='cpu', check_hash=True)
|
119 |
+
else:
|
120 |
+
checkpoint = torch.load(args.resume, map_location='cpu')
|
121 |
+
missing_keys, unexpected_keys = model_without_ddp.load_state_dict(checkpoint['model'], strict=False)
|
122 |
+
unexpected_keys = [k for k in unexpected_keys if not (k.endswith('total_params') or k.endswith('total_ops'))]
|
123 |
+
if len(missing_keys) > 0:
|
124 |
+
print('Missing Keys: {}'.format(missing_keys))
|
125 |
+
if len(unexpected_keys) > 0:
|
126 |
+
print('Unexpected Keys: {}'.format(unexpected_keys))
|
127 |
+
if not args.eval and 'optimizer' in checkpoint and 'lr_scheduler' in checkpoint and 'epoch' in checkpoint:
|
128 |
+
import copy
|
129 |
+
p_groups = copy.deepcopy(optimizer.param_groups)
|
130 |
+
optimizer.load_state_dict(checkpoint['optimizer'])
|
131 |
+
for pg, pg_old in zip(optimizer.param_groups, p_groups):
|
132 |
+
pg['lr'] = pg_old['lr']
|
133 |
+
pg['initial_lr'] = pg_old['initial_lr']
|
134 |
+
print(optimizer.param_groups)
|
135 |
+
lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
|
136 |
+
# todo: this is a hack for doing experiment that resume from checkpoint and also modify lr scheduler (e.g., decrease lr in advance).
|
137 |
+
args.override_resumed_lr_drop = True
|
138 |
+
if args.override_resumed_lr_drop:
|
139 |
+
print('Warning: (hack) args.override_resumed_lr_drop is set to True, so args.lr_drop would override lr_drop in resumed lr_scheduler.')
|
140 |
+
lr_scheduler.step_size = args.lr_drop
|
141 |
+
lr_scheduler.base_lrs = list(map(lambda group: group['initial_lr'], optimizer.param_groups))
|
142 |
+
lr_scheduler.step(lr_scheduler.last_epoch)
|
143 |
+
args.start_epoch = checkpoint['epoch'] + 1
|
144 |
+
|
145 |
+
if args.eval:
|
146 |
+
assert args.dataset_file == 'a2d' or args.dataset_file == 'jhmdb', \
|
147 |
+
'Only A2D-Sentences and JHMDB-Sentences datasets support evaluation'
|
148 |
+
test_stats = evaluate_a2d(model, data_loader_val, postprocessor, device, args)
|
149 |
+
return
|
150 |
+
|
151 |
+
|
152 |
+
print("Start training")
|
153 |
+
start_time = time.time()
|
154 |
+
for epoch in range(args.start_epoch, args.epochs):
|
155 |
+
if args.distributed:
|
156 |
+
sampler_train.set_epoch(epoch)
|
157 |
+
train_stats = train_one_epoch(
|
158 |
+
model, criterion, data_loader_train, optimizer, device, epoch,
|
159 |
+
args.clip_max_norm)
|
160 |
+
lr_scheduler.step()
|
161 |
+
if args.output_dir:
|
162 |
+
checkpoint_paths = [output_dir / 'checkpoint.pth']
|
163 |
+
# extra checkpoint before LR drop and every epochs
|
164 |
+
# if (epoch + 1) % args.lr_drop == 0 or (epoch + 1) % 1 == 0:
|
165 |
+
if (epoch + 1) % 1 == 0:
|
166 |
+
checkpoint_paths.append(output_dir / f'checkpoint{epoch:04}.pth')
|
167 |
+
for checkpoint_path in checkpoint_paths:
|
168 |
+
utils.save_on_master({
|
169 |
+
'model': model_without_ddp.state_dict(),
|
170 |
+
'optimizer': optimizer.state_dict(),
|
171 |
+
'lr_scheduler': lr_scheduler.state_dict(),
|
172 |
+
'epoch': epoch,
|
173 |
+
'args': args,
|
174 |
+
}, checkpoint_path)
|
175 |
+
|
176 |
+
log_stats = {**{f'train_{k}': v for k, v in train_stats.items()},
|
177 |
+
'epoch': epoch,
|
178 |
+
'n_parameters': n_parameters}
|
179 |
+
|
180 |
+
|
181 |
+
if args.output_dir and utils.is_main_process():
|
182 |
+
with (output_dir / "log.txt").open("a") as f:
|
183 |
+
f.write(json.dumps(log_stats) + "\n")
|
184 |
+
|
185 |
+
|
186 |
+
total_time = time.time() - start_time
|
187 |
+
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
|
188 |
+
print('Training time {}'.format(total_time_str))
|
189 |
+
|
190 |
+
|
191 |
+
if __name__ == '__main__':
|
192 |
+
parser = argparse.ArgumentParser('ReferFormer training and evaluation script', parents=[opts.get_args_parser()])
|
193 |
+
args = parser.parse_args()
|
194 |
+
if args.output_dir:
|
195 |
+
Path(args.output_dir).mkdir(parents=True, exist_ok=True)
|
196 |
+
main(args)
|
197 |
+
|
198 |
+
|
main_pretrain.py
ADDED
@@ -0,0 +1,304 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import datetime
|
3 |
+
import json
|
4 |
+
import random
|
5 |
+
import time
|
6 |
+
from pathlib import Path
|
7 |
+
from collections import namedtuple
|
8 |
+
from functools import partial
|
9 |
+
|
10 |
+
import os
|
11 |
+
import numpy as np
|
12 |
+
import torch
|
13 |
+
from torch.utils.data import DataLoader, DistributedSampler
|
14 |
+
|
15 |
+
import util.misc as utils
|
16 |
+
import datasets.samplers as samplers
|
17 |
+
from datasets.coco_eval import CocoEvaluator
|
18 |
+
from datasets import build_dataset, get_coco_api_from_dataset
|
19 |
+
from engine import evaluate, train_one_epoch
|
20 |
+
from models import build_model
|
21 |
+
from models.postprocessors import build_postprocessors
|
22 |
+
|
23 |
+
import opts
|
24 |
+
|
25 |
+
|
26 |
+
|
27 |
+
def main(args):
|
28 |
+
# set environ
|
29 |
+
os.environ["MDETR_CPU_REDUCE"] = "1"
|
30 |
+
|
31 |
+
args.masks = True
|
32 |
+
assert args.dataset_file in ["refcoco", "refcoco+", "refcocog", "all"]
|
33 |
+
|
34 |
+
utils.init_distributed_mode(args)
|
35 |
+
print("git:\n {}\n".format(utils.get_sha()))
|
36 |
+
print(args)
|
37 |
+
|
38 |
+
device = torch.device(args.device)
|
39 |
+
|
40 |
+
# fix the seed for reproducibility
|
41 |
+
seed = args.seed + utils.get_rank()
|
42 |
+
torch.manual_seed(seed)
|
43 |
+
np.random.seed(seed)
|
44 |
+
random.seed(seed)
|
45 |
+
|
46 |
+
model, criterion, postprocessors = build_model(args)
|
47 |
+
model.to(device)
|
48 |
+
|
49 |
+
model_without_ddp = model
|
50 |
+
if args.distributed:
|
51 |
+
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
|
52 |
+
model_without_ddp = model.module
|
53 |
+
n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
|
54 |
+
print('number of params:', n_parameters)
|
55 |
+
|
56 |
+
# lr_backbone_names = ["backbone.0", "text_encoder"]
|
57 |
+
def match_name_keywords(n, name_keywords):
|
58 |
+
out = False
|
59 |
+
for b in name_keywords:
|
60 |
+
if b in n:
|
61 |
+
out = True
|
62 |
+
break
|
63 |
+
return out
|
64 |
+
|
65 |
+
# for n, p in model_without_ddp.named_parameters():
|
66 |
+
# print(n)
|
67 |
+
|
68 |
+
param_dicts = [
|
69 |
+
{
|
70 |
+
"params":
|
71 |
+
[p for n, p in model_without_ddp.named_parameters()
|
72 |
+
if not match_name_keywords(n, args.lr_backbone_names) and not match_name_keywords(n, args.lr_text_encoder_names)
|
73 |
+
and not match_name_keywords(n, args.lr_linear_proj_names) and p.requires_grad],
|
74 |
+
"lr": args.lr,
|
75 |
+
},
|
76 |
+
{
|
77 |
+
"params": [p for n, p in model_without_ddp.named_parameters() if match_name_keywords(n, args.lr_backbone_names) and p.requires_grad],
|
78 |
+
"lr": args.lr_backbone,
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"params": [p for n, p in model_without_ddp.named_parameters() if match_name_keywords(n, args.lr_text_encoder_names) and p.requires_grad],
|
82 |
+
"lr": args.lr_text_encoder,
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"params": [p for n, p in model_without_ddp.named_parameters() if match_name_keywords(n, args.lr_linear_proj_names) and p.requires_grad],
|
86 |
+
"lr": args.lr * args.lr_linear_proj_mult,
|
87 |
+
}
|
88 |
+
]
|
89 |
+
optimizer = torch.optim.AdamW(param_dicts, lr=args.lr,
|
90 |
+
weight_decay=args.weight_decay)
|
91 |
+
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, args.lr_drop)
|
92 |
+
|
93 |
+
# build train dataset
|
94 |
+
if args.dataset_file != "all":
|
95 |
+
dataset_train = build_dataset(args.dataset_file, image_set='train', args=args)
|
96 |
+
else:
|
97 |
+
dataset_names = ["refcoco", "refcoco+", "refcocog"]
|
98 |
+
dataset_train = torch.utils.data.ConcatDataset(
|
99 |
+
[build_dataset(name, image_set="train", args=args) for name in dataset_names]
|
100 |
+
)
|
101 |
+
|
102 |
+
print("\nTrain dataset sample number: ", len(dataset_train))
|
103 |
+
print("\n")
|
104 |
+
|
105 |
+
if args.distributed:
|
106 |
+
if args.cache_mode:
|
107 |
+
sampler_train = samplers.NodeDistributedSampler(dataset_train)
|
108 |
+
else:
|
109 |
+
sampler_train = samplers.DistributedSampler(dataset_train)
|
110 |
+
else:
|
111 |
+
sampler_train = torch.utils.data.RandomSampler(dataset_train)
|
112 |
+
|
113 |
+
batch_sampler_train = torch.utils.data.BatchSampler(
|
114 |
+
sampler_train, args.batch_size, drop_last=True)
|
115 |
+
|
116 |
+
data_loader_train = DataLoader(dataset_train, batch_sampler=batch_sampler_train,
|
117 |
+
collate_fn=utils.collate_fn, num_workers=args.num_workers,
|
118 |
+
pin_memory=True)
|
119 |
+
|
120 |
+
# build val datasets
|
121 |
+
Val_all = namedtuple(typename="val_data", field_names=["dataset_name", "dataloader", "base_ds", "evaluator_list"])
|
122 |
+
if args.dataset_file != "all":
|
123 |
+
dataset_names = [args.dataset_file]
|
124 |
+
else:
|
125 |
+
dataset_names = ["refcoco", "refcoco+", "refcocog"]
|
126 |
+
|
127 |
+
val_tuples = []
|
128 |
+
for name in dataset_names:
|
129 |
+
dataset_val = build_dataset(name, image_set="val", args=args)
|
130 |
+
sampler_val = (
|
131 |
+
samplers.DistributedSampler(dataset_val, shuffle=False) if args.distributed else torch.utils.data.SequentialSampler(dataset_val)
|
132 |
+
)
|
133 |
+
data_loader_val = DataLoader(
|
134 |
+
dataset_val,
|
135 |
+
args.batch_size,
|
136 |
+
sampler=sampler_val,
|
137 |
+
drop_last=False,
|
138 |
+
collate_fn=utils.collate_fn,
|
139 |
+
num_workers=args.num_workers,
|
140 |
+
)
|
141 |
+
base_ds = get_coco_api_from_dataset(dataset_val)
|
142 |
+
val_tuples.append(Val_all(dataset_name=name, dataloader=data_loader_val, base_ds=base_ds, evaluator_list=None))
|
143 |
+
|
144 |
+
# build evaluator list for dataset_val
|
145 |
+
def build_evaluator_list(base_ds, dataset_name):
|
146 |
+
"""Helper function to build the list of evaluators for a given dataset"""
|
147 |
+
evaluator_list = []
|
148 |
+
iou_types = ["bbox"]
|
149 |
+
if args.masks:
|
150 |
+
iou_types.append("segm")
|
151 |
+
|
152 |
+
evaluator_list.append(CocoEvaluator(base_ds, tuple(iou_types), useCats=False))
|
153 |
+
# TODO: currently ont support RefExpEvaluator (memory error)
|
154 |
+
return evaluator_list
|
155 |
+
|
156 |
+
|
157 |
+
|
158 |
+
output_dir = Path(args.output_dir)
|
159 |
+
if args.resume:
|
160 |
+
print("Resume from {}".format(args.resume))
|
161 |
+
if args.resume.startswith('https'):
|
162 |
+
checkpoint = torch.hub.load_state_dict_from_url(
|
163 |
+
args.resume, map_location='cpu', check_hash=True)
|
164 |
+
else:
|
165 |
+
checkpoint = torch.load(args.resume, map_location='cpu')
|
166 |
+
missing_keys, unexpected_keys = model_without_ddp.load_state_dict(checkpoint['model'], strict=False)
|
167 |
+
unexpected_keys = [k for k in unexpected_keys if not (k.endswith('total_params') or k.endswith('total_ops'))]
|
168 |
+
if len(missing_keys) > 0:
|
169 |
+
print('Missing Keys: {}'.format(missing_keys))
|
170 |
+
if len(unexpected_keys) > 0:
|
171 |
+
print('Unexpected Keys: {}'.format(unexpected_keys))
|
172 |
+
if not args.eval and 'optimizer' in checkpoint and 'lr_scheduler' in checkpoint and 'epoch' in checkpoint:
|
173 |
+
import copy
|
174 |
+
p_groups = copy.deepcopy(optimizer.param_groups)
|
175 |
+
optimizer.load_state_dict(checkpoint['optimizer'])
|
176 |
+
for pg, pg_old in zip(optimizer.param_groups, p_groups):
|
177 |
+
pg['lr'] = pg_old['lr']
|
178 |
+
pg['initial_lr'] = pg_old['initial_lr']
|
179 |
+
print(optimizer.param_groups)
|
180 |
+
lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
|
181 |
+
# todo: this is a hack for doing experiment that resume from checkpoint and also modify lr scheduler (e.g., decrease lr in advance).
|
182 |
+
args.override_resumed_lr_drop = True
|
183 |
+
if args.override_resumed_lr_drop:
|
184 |
+
print('Warning: (hack) args.override_resumed_lr_drop is set to True, so args.lr_drop would override lr_drop in resumed lr_scheduler.')
|
185 |
+
lr_scheduler.step_size = args.lr_drop
|
186 |
+
lr_scheduler.base_lrs = list(map(lambda group: group['initial_lr'], optimizer.param_groups))
|
187 |
+
lr_scheduler.step(lr_scheduler.last_epoch)
|
188 |
+
args.start_epoch = checkpoint['epoch'] + 1
|
189 |
+
if not args.eval:
|
190 |
+
test_stats = {}
|
191 |
+
for i, item in enumerate(val_tuples):
|
192 |
+
evaluator_list = build_evaluator_list(item.base_ds, item.dataset_name)
|
193 |
+
postprocessors = build_postprocessors(args, item.dataset_name)
|
194 |
+
item = item._replace(evaluator_list=evaluator_list)
|
195 |
+
print(f"Evaluating {item.dataset_name}")
|
196 |
+
curr_test_stats = evaluate(
|
197 |
+
model=model,
|
198 |
+
criterion=criterion,
|
199 |
+
postprocessors=postprocessors,
|
200 |
+
data_loader=item.dataloader,
|
201 |
+
evaluator_list=item.evaluator_list,
|
202 |
+
device=device,
|
203 |
+
args=args,
|
204 |
+
)
|
205 |
+
test_stats.update({item.dataset_name + "_" + k: v for k, v in curr_test_stats.items()})
|
206 |
+
|
207 |
+
log_stats = {
|
208 |
+
**{f"test_{k}": v for k, v in test_stats.items()},
|
209 |
+
"n_parameters": n_parameters,
|
210 |
+
}
|
211 |
+
print(log_stats)
|
212 |
+
|
213 |
+
|
214 |
+
if args.eval:
|
215 |
+
print("Evaluating......")
|
216 |
+
test_stats = {}
|
217 |
+
for i, item in enumerate(val_tuples):
|
218 |
+
evaluator_list = build_evaluator_list(item.base_ds, item.dataset_name)
|
219 |
+
postprocessors = build_postprocessors(args, item.dataset_name)
|
220 |
+
item = item._replace(evaluator_list=evaluator_list)
|
221 |
+
print(f"Evaluating {item.dataset_name}")
|
222 |
+
curr_test_stats = evaluate(
|
223 |
+
model=model,
|
224 |
+
criterion=criterion,
|
225 |
+
postprocessors=postprocessors,
|
226 |
+
data_loader=item.dataloader,
|
227 |
+
evaluator_list=item.evaluator_list,
|
228 |
+
device=device,
|
229 |
+
args=args,
|
230 |
+
)
|
231 |
+
test_stats.update({item.dataset_name + "_" + k: v for k, v in curr_test_stats.items()})
|
232 |
+
|
233 |
+
log_stats = {
|
234 |
+
**{f"test_{k}": v for k, v in test_stats.items()},
|
235 |
+
"n_parameters": n_parameters,
|
236 |
+
}
|
237 |
+
print(log_stats)
|
238 |
+
|
239 |
+
return
|
240 |
+
|
241 |
+
|
242 |
+
print("Start training")
|
243 |
+
start_time = time.time()
|
244 |
+
for epoch in range(args.start_epoch, args.epochs):
|
245 |
+
if args.distributed:
|
246 |
+
sampler_train.set_epoch(epoch)
|
247 |
+
train_stats = train_one_epoch(
|
248 |
+
model, criterion, data_loader_train, optimizer, device, epoch,
|
249 |
+
args.clip_max_norm)
|
250 |
+
lr_scheduler.step()
|
251 |
+
if args.output_dir:
|
252 |
+
checkpoint_paths = [output_dir / 'checkpoint.pth']
|
253 |
+
# extra checkpoint before LR drop and every epochs
|
254 |
+
# if (epoch + 1) % args.lr_drop == 0 or (epoch + 1) % 1 == 0:
|
255 |
+
if (epoch + 1) % 1 == 0:
|
256 |
+
checkpoint_paths.append(output_dir / f'checkpoint{epoch:04}.pth')
|
257 |
+
for checkpoint_path in checkpoint_paths:
|
258 |
+
utils.save_on_master({
|
259 |
+
'model': model_without_ddp.state_dict(),
|
260 |
+
'optimizer': optimizer.state_dict(),
|
261 |
+
'lr_scheduler': lr_scheduler.state_dict(),
|
262 |
+
'epoch': epoch,
|
263 |
+
'args': args,
|
264 |
+
}, checkpoint_path)
|
265 |
+
|
266 |
+
test_stats = {}
|
267 |
+
for i, item in enumerate(val_tuples):
|
268 |
+
evaluator_list = build_evaluator_list(item.base_ds, item.dataset_name)
|
269 |
+
postprocessors = build_postprocessors(args, item.dataset_name)
|
270 |
+
item = item._replace(evaluator_list=evaluator_list)
|
271 |
+
print(f"Evaluating {item.dataset_name}")
|
272 |
+
curr_test_stats = evaluate(
|
273 |
+
model=model,
|
274 |
+
criterion=criterion,
|
275 |
+
postprocessors=postprocessors,
|
276 |
+
data_loader=item.dataloader,
|
277 |
+
evaluator_list=item.evaluator_list,
|
278 |
+
device=device,
|
279 |
+
args=args,
|
280 |
+
)
|
281 |
+
test_stats.update({item.dataset_name + "_" + k: v for k, v in curr_test_stats.items()})
|
282 |
+
|
283 |
+
log_stats = {**{f'train_{k}': v for k, v in train_stats.items()},
|
284 |
+
**{f'test_{k}': v for k, v in test_stats.items()},
|
285 |
+
'epoch': epoch,
|
286 |
+
'n_parameters': n_parameters}
|
287 |
+
|
288 |
+
if args.output_dir and utils.is_main_process():
|
289 |
+
with (output_dir / "log.txt").open("a") as f:
|
290 |
+
f.write(json.dumps(log_stats) + "\n")
|
291 |
+
|
292 |
+
|
293 |
+
total_time = time.time() - start_time
|
294 |
+
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
|
295 |
+
print('Training time {}'.format(total_time_str))
|
296 |
+
|
297 |
+
|
298 |
+
if __name__ == '__main__':
|
299 |
+
parser = argparse.ArgumentParser('ReferFormer pretrain training and evaluation script', parents=[opts.get_args_parser()])
|
300 |
+
args = parser.parse_args()
|
301 |
+
if args.output_dir:
|
302 |
+
Path(args.output_dir).mkdir(parents=True, exist_ok=True)
|
303 |
+
main(args)
|
304 |
+
|
make_refcoco/refcocog_google/motion_split_generation_grefg_val.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
make_refcoco/refcocog_google/part4_ref_id.txt
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
4859
|
2 |
+
678
|
3 |
+
162
|
4 |
+
3052
|
5 |
+
2355
|
6 |
+
3408
|
7 |
+
834
|
8 |
+
328
|
9 |
+
1646
|
10 |
+
4400
|
11 |
+
3683
|
12 |
+
3788
|
13 |
+
4701
|
14 |
+
1211
|
15 |
+
2138
|
16 |
+
3510
|
17 |
+
899
|
18 |
+
293
|
19 |
+
3196
|
20 |
+
1939
|
21 |
+
2659
|
22 |
+
2849
|
23 |
+
756
|
24 |
+
4573
|
25 |
+
4514
|
26 |
+
304
|
27 |
+
3465
|
28 |
+
1092
|
29 |
+
2025
|
30 |
+
1701
|
31 |
+
2958
|
32 |
+
4793
|
33 |
+
1247
|
34 |
+
1841
|
35 |
+
4404
|
36 |
+
4536
|
37 |
+
2787
|
38 |
+
3377
|
39 |
+
3889
|
40 |
+
2194
|
41 |
+
2969
|
42 |
+
1951
|
43 |
+
508
|
44 |
+
2312
|
45 |
+
3948
|
46 |
+
1388
|
47 |
+
2690
|
48 |
+
1109
|
49 |
+
1374
|
50 |
+
3475
|
51 |
+
1333
|
52 |
+
2068
|
53 |
+
2824
|
54 |
+
2294
|
55 |
+
2446
|
56 |
+
4771
|
57 |
+
2686
|
58 |
+
4558
|
59 |
+
1499
|
60 |
+
4303
|
61 |
+
1376
|
62 |
+
3544
|
63 |
+
1858
|
64 |
+
434
|
65 |
+
3024
|
66 |
+
513
|
67 |
+
693
|
68 |
+
2523
|
69 |
+
4987
|
70 |
+
3133
|
71 |
+
4041
|
72 |
+
2105
|
73 |
+
135
|
74 |
+
3613
|
75 |
+
1722
|
76 |
+
1607
|
77 |
+
2761
|
78 |
+
2454
|
79 |
+
1603
|
80 |
+
4794
|
81 |
+
2485
|
82 |
+
3280
|
83 |
+
3336
|
84 |
+
3118
|
85 |
+
4494
|
86 |
+
3004
|
87 |
+
127
|
88 |
+
3389
|
89 |
+
2568
|
90 |
+
2283
|
91 |
+
1530
|
92 |
+
4251
|
93 |
+
2540
|
94 |
+
2870
|
95 |
+
4946
|
96 |
+
113
|
97 |
+
711
|
98 |
+
3209
|
99 |
+
3620
|
100 |
+
4382
|
101 |
+
2861
|
102 |
+
3954
|
103 |
+
1984
|
104 |
+
2069
|
105 |
+
2016
|
106 |
+
1153
|
107 |
+
3614
|
108 |
+
198
|
109 |
+
3012
|
110 |
+
4247
|
111 |
+
2205
|
112 |
+
4831
|
113 |
+
4534
|
114 |
+
638
|
115 |
+
1419
|
116 |
+
1992
|
117 |
+
542
|
118 |
+
2223
|
119 |
+
4865
|
120 |
+
751
|
121 |
+
3540
|
122 |
+
3765
|
123 |
+
2879
|
124 |
+
4529
|
125 |
+
2131
|
126 |
+
1306
|
127 |
+
3508
|
128 |
+
4165
|
129 |
+
4126
|
130 |
+
388
|
make_refcoco/refcocog_google/revised_refid_part4.json
ADDED
@@ -0,0 +1,506 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"4859": {
|
3 |
+
"(motion, 101105)": "man sitting on the ground playing wii",
|
4 |
+
"(static, 101106)": "man in white and light blue t - shirt"
|
5 |
+
},
|
6 |
+
"678": {
|
7 |
+
"(motion, 14720)": "the man crouching inside the plane",
|
8 |
+
"(static, 14721)": "the man wearing white hat"
|
9 |
+
},
|
10 |
+
"162": {
|
11 |
+
"(motion, 2908)": "the man resting his face on his hands",
|
12 |
+
"(static, 2909)": "the man with a plastic bag between his feet"
|
13 |
+
},
|
14 |
+
"3052": {
|
15 |
+
"(motion, 63901)": "person looking at a book",
|
16 |
+
"(static, 63902)": "person wearing a hat and backpack"
|
17 |
+
},
|
18 |
+
"2355": {
|
19 |
+
"(motion, 49522)": "the cat sitting in the chair",
|
20 |
+
"(static, 49523)": "cat on left side"
|
21 |
+
},
|
22 |
+
"3408": {
|
23 |
+
"(motion, 71397)": "a man bending and judging a tennis match",
|
24 |
+
"(static, 71398)": "a man wearing a red shirt and black pants"
|
25 |
+
},
|
26 |
+
"834": {
|
27 |
+
"(motion, 17983)": "a giraffe who is eating hay out of a feeder",
|
28 |
+
"(static, 17984)": "the giraffe on the right side of the pole"
|
29 |
+
},
|
30 |
+
"328": {
|
31 |
+
"(motion, 6730)": "person bending over",
|
32 |
+
"(static, 6731)": "big person in blue cap"
|
33 |
+
},
|
34 |
+
"1646": {
|
35 |
+
"(motion, 35169)": "person about to hit a ball",
|
36 |
+
"(static, 35170)": "person wearing shirt and pants"
|
37 |
+
},
|
38 |
+
"4400": {
|
39 |
+
"(motion, 91825)": "boy sitting on his skateboard and looking at another boy",
|
40 |
+
"(static, 91826)": "boy wearing dark t - shirt and jeans"
|
41 |
+
},
|
42 |
+
"3683": {
|
43 |
+
"(motion, 77184)": "a man dishing up food",
|
44 |
+
"(static, 77185)": "a man in military camo and a black hat on the right"
|
45 |
+
},
|
46 |
+
"3788": {
|
47 |
+
"(motion, 79367)": "a black cat sitting and starring",
|
48 |
+
"(static, 79368)": "a cat with a heart shaped tag"
|
49 |
+
},
|
50 |
+
"4701": {
|
51 |
+
"(motion, 97795)": "person whose tie is being pulled by another person",
|
52 |
+
"(static, 97796)": "person in blue shirt with a red undone tie"
|
53 |
+
},
|
54 |
+
"1211": {
|
55 |
+
"(motion, 26003)": "person putting arm around another person",
|
56 |
+
"(static, 26004)": "person with backpack"
|
57 |
+
},
|
58 |
+
"2138": {
|
59 |
+
"(motion, 45446)": "a person sleeping on the top bunk",
|
60 |
+
"(static, 45447)": "a person in a green shirt and brown shorts"
|
61 |
+
},
|
62 |
+
"3510": {
|
63 |
+
"(motion, 73478)": "personn sitting in a train compartment and reading book",
|
64 |
+
"(static, 73479)": "person in striped shirt"
|
65 |
+
},
|
66 |
+
"899": {
|
67 |
+
"(motion, 19308)": "a man serving soup",
|
68 |
+
"(static, 19309)": "a man with tattoo on his arm"
|
69 |
+
},
|
70 |
+
"293": {
|
71 |
+
"(motion, 5939)": "a lady laughing and looking at another lady",
|
72 |
+
"(static, 5940)": "a lady with dark hair and a dark shirt"
|
73 |
+
},
|
74 |
+
"3196": {
|
75 |
+
"(motion, 67017)": "person holding a pen",
|
76 |
+
"(static, 67018)": "person in a brown suit"
|
77 |
+
},
|
78 |
+
"1939": {
|
79 |
+
"(motion, 41076)": "a person sitting cross legged on the beach",
|
80 |
+
"(static, 41077)": "person in khakis and a white shirt with yellow flowers"
|
81 |
+
},
|
82 |
+
"2659": {
|
83 |
+
"(motion, 56121)": "person helping another cross a stream",
|
84 |
+
"(static, 56122)": "person in white dress"
|
85 |
+
},
|
86 |
+
"2849": {
|
87 |
+
"(motion, 59798)": "person looking down drinking a glass of wine",
|
88 |
+
"(static, 59799)": "person on the right side not wearing glasses"
|
89 |
+
},
|
90 |
+
"756": {
|
91 |
+
"(motion, 16375)": "the woman about to pick up a slice of pizza",
|
92 |
+
"(static, 16376)": "a woman with a flower shirt"
|
93 |
+
},
|
94 |
+
"4573": {
|
95 |
+
"(motion, 95258)": "person reaching for another person with the frisbee",
|
96 |
+
"(static, 95259)": "person with blue and white striped shirt on"
|
97 |
+
},
|
98 |
+
"4514": {
|
99 |
+
"(motion, 94061)": "person running behind",
|
100 |
+
"(static, 94062)": "person in dark brown top and jeans"
|
101 |
+
},
|
102 |
+
"304": {
|
103 |
+
"(motion, 6165)": "person resting her head in hand and crossing one's legs",
|
104 |
+
"(static, 6166)": "the person in pink jacket"
|
105 |
+
},
|
106 |
+
"3465": {
|
107 |
+
"(motion, 72753)": "person sitting on a love seat and watching others play wii",
|
108 |
+
"(static, 72754)": "person in a black shirt and white shorts"
|
109 |
+
},
|
110 |
+
"1092": {
|
111 |
+
"(motion, 23796)": "a bear standing up with its mouth open",
|
112 |
+
"(static, 23797)": "a bear on the right"
|
113 |
+
},
|
114 |
+
"2025": {
|
115 |
+
"(motion, 42838)": "the person leading the horse",
|
116 |
+
"(static, 42839)": "the person in gray top and jeans"
|
117 |
+
},
|
118 |
+
"1701": {
|
119 |
+
"(motion, 36094)": "giraffe biting off of a tree",
|
120 |
+
"(static, 36095)": "tall giraffe on the right"
|
121 |
+
},
|
122 |
+
"2958": {
|
123 |
+
"(motion, 62137)": "person playing with dog",
|
124 |
+
"(static, 62138)": "balding person wearing brown hoodie"
|
125 |
+
},
|
126 |
+
"4793": {
|
127 |
+
"(motion, 99824)": "the girl eating and looking at her plate",
|
128 |
+
"(static, 99825)": "the girl wearing a pink shirt"
|
129 |
+
},
|
130 |
+
"1247": {
|
131 |
+
"(motion, 26624)": "the person holding the bat",
|
132 |
+
"(static, 26625)": "the person in white t - shirt and grey pants"
|
133 |
+
},
|
134 |
+
"1841": {
|
135 |
+
"(motion, 38888)": "person resting hands on other people's shoulders",
|
136 |
+
"(static, 38889)": "tallest person wearing bright suit"
|
137 |
+
},
|
138 |
+
"4404": {
|
139 |
+
"(motion, 91907)": "a elephant whose trunk pointing to the floor , may be touching",
|
140 |
+
"(static, 91908)": "elephant more on the right side of the picture"
|
141 |
+
},
|
142 |
+
"4536": {
|
143 |
+
"(motion, 94448)": "a person reaching for the microwave looking at the camera",
|
144 |
+
"(static, 94449)": "person in black t shirt"
|
145 |
+
},
|
146 |
+
"2787": {
|
147 |
+
"(motion, 58740)": "a giraffe snacking on the tree",
|
148 |
+
"(static, 58741)": "a giraffe on the right"
|
149 |
+
},
|
150 |
+
"3377": {
|
151 |
+
"(motion, 70765)": "a zebra resting its head on another zebra ' s back",
|
152 |
+
"(static, 70766)": "a zebra on the left"
|
153 |
+
},
|
154 |
+
"3889": {
|
155 |
+
"(motion, 81051)": "a man holding a basket of pastries",
|
156 |
+
"(static, 81052)": "a man wearing grey hoodie"
|
157 |
+
},
|
158 |
+
"2194": {
|
159 |
+
"(motion, 46507)": "standing dog",
|
160 |
+
"(static, 46508)": "a black and white dog with a blue collar tag"
|
161 |
+
},
|
162 |
+
"508": {
|
163 |
+
"(motion, 11146)": "person being held by another person",
|
164 |
+
"(static, 11147)": "person dressed in a red suit and blue cap"
|
165 |
+
},
|
166 |
+
"2312": {
|
167 |
+
"(motion, 48847)": "a bird standing on a table",
|
168 |
+
"(static, 48848)": "a bird on the left"
|
169 |
+
},
|
170 |
+
"3948": {
|
171 |
+
"(motion, 82190)": "the woman who is squinting in one eye",
|
172 |
+
"(static, 82191)": "a blue eyed brown haired woman not wearing glasses"
|
173 |
+
},
|
174 |
+
"1388": {
|
175 |
+
"(motion, 29353)": "person holding another person while watching giraffe drink water",
|
176 |
+
"(static, 29354)": "person in brown shirt with bag"
|
177 |
+
},
|
178 |
+
"2690": {
|
179 |
+
"(motion, 56849)": "a man about to kick a ball",
|
180 |
+
"(static, 56850)": "a man in all white with number 23 on his chest"
|
181 |
+
},
|
182 |
+
"1109": {
|
183 |
+
"(motion, 24043)": "man holding the ktie",
|
184 |
+
"(static, 24044)": "man on the right"
|
185 |
+
},
|
186 |
+
"1374": {
|
187 |
+
"(motion, 29120)": "person arranging pansts of another person",
|
188 |
+
"(static, 29121)": "the person with in the black tuxedo and glasses in his head"
|
189 |
+
},
|
190 |
+
"3475": {
|
191 |
+
"(motion, 72951)": "woman holding the horse",
|
192 |
+
"(static, 72952)": "a woman wearing spectacles with violet shirt and flourecent colour waist vest"
|
193 |
+
},
|
194 |
+
"1333": {
|
195 |
+
"(motion, 28225)": "a person holding another person",
|
196 |
+
"(static, 28226)": "a person in a pink and orange flannel shirt"
|
197 |
+
},
|
198 |
+
"2068": {
|
199 |
+
"(motion, 43909)": "person standing and playing wii",
|
200 |
+
"(static, 43910)": "person wearing black t - shirt"
|
201 |
+
},
|
202 |
+
"2824": {
|
203 |
+
"(motion, 59394)": "person standing besides a table crossing arms",
|
204 |
+
"(static, 59395)": "person with glasses and long hair"
|
205 |
+
},
|
206 |
+
"2294": {
|
207 |
+
"(motion, 48483)": "a person sitting on bike holding another person",
|
208 |
+
"(static, 48484)": "a person with a helmet on the head"
|
209 |
+
},
|
210 |
+
"2446": {
|
211 |
+
"(motion, 51355)": "an elephant that has it ' s trunk pointing towards the water",
|
212 |
+
"(static, 51356)": "elephant on the left"
|
213 |
+
},
|
214 |
+
"2686": {
|
215 |
+
"(motion, 56783)": "a man staring at another man",
|
216 |
+
"(static, 56784)": "a man in an orange tie"
|
217 |
+
},
|
218 |
+
"4558": {
|
219 |
+
"(motion, 94950)": "a zebra facing the camera",
|
220 |
+
"(static, 94951)": "a small zebra beside a larger zebra"
|
221 |
+
},
|
222 |
+
"1499": {
|
223 |
+
"(motion, 32051)": "a man resting on a metal fence",
|
224 |
+
"(static, 32052)": "a man in white shirt and polka dot tie"
|
225 |
+
},
|
226 |
+
"4303": {
|
227 |
+
"(motion, 89833)": "a man throwing a banana",
|
228 |
+
"(static, 89834)": "a man in bike gear on the right of the picture"
|
229 |
+
},
|
230 |
+
"1376": {
|
231 |
+
"(motion, 29146)": "a man sitting down with his hands together",
|
232 |
+
"(static, 29147)": "a man with a purple shirt and khaki pants "
|
233 |
+
},
|
234 |
+
"3544": {
|
235 |
+
"(motion, 74100)": "the man holding a riding crop",
|
236 |
+
"(static, 74101)": "man in black shirt and slacks on the left"
|
237 |
+
},
|
238 |
+
"1858": {
|
239 |
+
"(motion, 39103)": "a bull standing",
|
240 |
+
"(static, 39104)": "a white and brown bull on the left of the picture"
|
241 |
+
},
|
242 |
+
"434": {
|
243 |
+
"(motion, 9561)": "the man looking down",
|
244 |
+
"(static, 9562)": "the man on the left"
|
245 |
+
},
|
246 |
+
"3024": {
|
247 |
+
"(motion, 63345)": "a baseball player sliding into a base",
|
248 |
+
"(static, 63346)": "baseball player wearing the number 12"
|
249 |
+
},
|
250 |
+
"513": {
|
251 |
+
"(motion, 11239)": "a man riding on a skateboard as his picture is being taken",
|
252 |
+
"(static, 11240)": "a man in a purple t - shirt and ripped jeans"
|
253 |
+
},
|
254 |
+
"693": {
|
255 |
+
"(motion, 14989)": "a person standing",
|
256 |
+
"(static, 14990)": "a small person"
|
257 |
+
},
|
258 |
+
"2523": {
|
259 |
+
"(motion, 53103)": "a baseball player sliding into home plate and getting tagged by the catcher",
|
260 |
+
"(static, 53104)": "a la dodgers player on the right of the picture"
|
261 |
+
},
|
262 |
+
"4987": {
|
263 |
+
"(motion, 104145)": "a girl punching out her arm while playing an interactive video game",
|
264 |
+
"(static, 104146)": "girl wearing grey and white stripes and sweatpants"
|
265 |
+
},
|
266 |
+
"4041": {
|
267 |
+
"(motion, 84159)": "soccer player about to kick soccer ball",
|
268 |
+
"(static, 84160)": "soccer player wearing black t - shirt and black gloves"
|
269 |
+
},
|
270 |
+
"2105": {
|
271 |
+
"(motion, 44674)": "a baseball player holding his arm up to catch a ball",
|
272 |
+
"(static, 44675)": "a baseball player wearing helmet and vest"
|
273 |
+
},
|
274 |
+
"135": {
|
275 |
+
"(motion, 2353)": "dog resting it ' s head on a table",
|
276 |
+
"(static, 2354)": "golden dog"
|
277 |
+
},
|
278 |
+
"3613": {
|
279 |
+
"(motion, 75580)": "person talking to another person while crossing legs",
|
280 |
+
"(static, 75581)": "person with long sleeve shirt, jeans and cap"
|
281 |
+
},
|
282 |
+
"1722": {
|
283 |
+
"(motion, 36451)": "person pulling another person's tie",
|
284 |
+
"(static, 36452)": "blonde person in black dress"
|
285 |
+
},
|
286 |
+
"1607": {
|
287 |
+
"(motion, 34281)": "a person reading a book to another person he ' s holding",
|
288 |
+
"(static, 34282)": "a bald person wearing a beige t - shirt and gray jeans"
|
289 |
+
},
|
290 |
+
"2761": {
|
291 |
+
"(motion, 58225)": "girl propping her chin on her hand",
|
292 |
+
"(static, 58226)": "girl in a pink shirt near window"
|
293 |
+
},
|
294 |
+
"2454": {
|
295 |
+
"(motion, 51492)": "a man looking at laptop",
|
296 |
+
"(static, 51493)": "the man with glasses and painted fingernails"
|
297 |
+
},
|
298 |
+
"1603": {
|
299 |
+
"(motion, 34234)": "person eating a donut",
|
300 |
+
"(static, 34235)": "person with the black beanie"
|
301 |
+
},
|
302 |
+
"4794": {
|
303 |
+
"(motion, 99868)": "a duck that is looking straight ahead",
|
304 |
+
"(static, 99869)": "the duck on the right side"
|
305 |
+
},
|
306 |
+
"2485": {
|
307 |
+
"(motion, 52246)": "a person reaching across the net",
|
308 |
+
"(static, 52247)": "tallest person in a grey shirt and shorts"
|
309 |
+
},
|
310 |
+
"3280": {
|
311 |
+
"(motion, 68799)": "a boy walking towards his skate board",
|
312 |
+
"(static, 68800)": "a boy in a striped shirt"
|
313 |
+
},
|
314 |
+
"3336": {
|
315 |
+
"(motion, 69882)": "person holding a piece of chocolate cake",
|
316 |
+
"(static, 69883)": "person wearing a purple dress"
|
317 |
+
},
|
318 |
+
"3118": {
|
319 |
+
"(motion, 65349)": "giraffe stretching its neck straight up",
|
320 |
+
"(static, 65350)": "taller giraffe"
|
321 |
+
},
|
322 |
+
"4494": {
|
323 |
+
"(motion, 93729)": "man touching the frisbee",
|
324 |
+
"(static, 93730)": "a man in a white shirt"
|
325 |
+
},
|
326 |
+
"3004": {
|
327 |
+
"(motion, 62940)": "person crouching to catch a ball",
|
328 |
+
"(static, 62941)": "person in a red uniform and helmet"
|
329 |
+
},
|
330 |
+
"127": {
|
331 |
+
"(motion, 2256)": "a person holding a plate",
|
332 |
+
"(static, 2257)": "the person in the purple coat"
|
333 |
+
},
|
334 |
+
"3389": {
|
335 |
+
"(motion, 70905)": "person waving",
|
336 |
+
"(static, 70906)": "person in black sneakers"
|
337 |
+
},
|
338 |
+
"2568": {
|
339 |
+
"(motion, 54256)": "person looking at phone",
|
340 |
+
"(static, 54257)": "blonde person on the right"
|
341 |
+
},
|
342 |
+
"2283": {
|
343 |
+
"(motion, 48251)": "the cook holding a plate",
|
344 |
+
"(static, 48252)": "middle cook of three cooks"
|
345 |
+
},
|
346 |
+
"1530": {
|
347 |
+
"(motion, 32639)": "person petting the cat",
|
348 |
+
"(static, 32640)": "person with sleeves rolled up"
|
349 |
+
},
|
350 |
+
"4251": {
|
351 |
+
"(motion, 88833)": "a person reading a book",
|
352 |
+
"(static, 88834)": "person in a striped jacket "
|
353 |
+
},
|
354 |
+
"2540": {
|
355 |
+
"(motion, 53539)": "a man reaching out his right arm holding a controller",
|
356 |
+
"(static, 53540)": "a man in red shirt and black jeans"
|
357 |
+
},
|
358 |
+
"2870": {
|
359 |
+
"(motion, 60169)": "a person watching horse riding",
|
360 |
+
"(static, 60170)": "a person in a white jacket and beige pants"
|
361 |
+
},
|
362 |
+
"4946": {
|
363 |
+
"(motion, 103092)": "a man about to hit a ball",
|
364 |
+
"(static, 103093)": "a man in red shirt and blue vest"
|
365 |
+
},
|
366 |
+
"113": {
|
367 |
+
"(motion, 1973)": "person holding phone",
|
368 |
+
"(static, 1974)": "person with a black shirt and brown coat"
|
369 |
+
},
|
370 |
+
"711": {
|
371 |
+
"(motion, 15398)": "girl crouching and holding an umbrella",
|
372 |
+
"(static, 15399)": "girl wearing light green socks on the left"
|
373 |
+
},
|
374 |
+
"3209": {
|
375 |
+
"(motion, 67236)": "the person that is sliding into home , getting tagged out by the catcher",
|
376 |
+
"(static, 67237)": "the person in the white vest over the blue shirt"
|
377 |
+
},
|
378 |
+
"3620": {
|
379 |
+
"(motion, 75711)": "person petting a horse",
|
380 |
+
"(static, 75712)": "a person in white t - shirt"
|
381 |
+
},
|
382 |
+
"4382": {
|
383 |
+
"(motion, 91559)": "horse being hugged by a person",
|
384 |
+
"(static, 91560)": "white and brown horse"
|
385 |
+
},
|
386 |
+
"2861": {
|
387 |
+
"(motion, 60004)": "a man playing tennis",
|
388 |
+
"(static, 60005)": "a man wearing a blue shirt and white shorts"
|
389 |
+
},
|
390 |
+
"3954": {
|
391 |
+
"(motion, 82306)": "a person putting gloves on",
|
392 |
+
"(static, 82307)": "person with dark blue jumper"
|
393 |
+
},
|
394 |
+
"1984": {
|
395 |
+
"(motion, 42076)": "a person being held by another person",
|
396 |
+
"(static, 42077)": "little person on pink skiis with yellow parka on"
|
397 |
+
},
|
398 |
+
"2069": {
|
399 |
+
"(motion, 43945)": "a person helping another person ski",
|
400 |
+
"(static, 43946)": "a big person in white jumper and backpack"
|
401 |
+
},
|
402 |
+
"2016": {
|
403 |
+
"(motion, 42686)": "person putting food in the oven",
|
404 |
+
"(static, 42687)": "person in green t - shirt"
|
405 |
+
},
|
406 |
+
"1153": {
|
407 |
+
"(motion, 25076)": "a giraffe , with head lowered , crosses in front of another giraffe",
|
408 |
+
"(static, 25077)": "giraffe in the middle"
|
409 |
+
},
|
410 |
+
"3614": {
|
411 |
+
"(motion, 75583)": "a man in explaining something on a tablet",
|
412 |
+
"(static, 75584)": "a man with a blue cap and striped shirt"
|
413 |
+
},
|
414 |
+
"198": {
|
415 |
+
"(motion, 3830)": "a giraffe bending down to eat grass",
|
416 |
+
"(static, 3831)": "giraffe in front"
|
417 |
+
},
|
418 |
+
"3012": {
|
419 |
+
"(motion, 63097)": "person standing with hands on hips",
|
420 |
+
"(static, 63098)": "person in a white collared shirt and jeans"
|
421 |
+
},
|
422 |
+
"4247": {
|
423 |
+
"(motion, 88808)": "man pointing toward another man",
|
424 |
+
"(static, 88809)": "man in plaid shirt"
|
425 |
+
},
|
426 |
+
"2205": {
|
427 |
+
"(motion, 46674)": "person bending over",
|
428 |
+
"(static, 46675)": "person in red shirt and cap"
|
429 |
+
},
|
430 |
+
"4831": {
|
431 |
+
"(motion, 100694)": "person holding bat in hands",
|
432 |
+
"(static, 100695)": "person wearing light blue shirt and glass"
|
433 |
+
},
|
434 |
+
"4534": {
|
435 |
+
"(motion, 94419)": "the bird not drinking",
|
436 |
+
"(static, 94420)": "the bird on the left"
|
437 |
+
},
|
438 |
+
"638": {
|
439 |
+
"(motion, 13717)": "person sitting on another person's lap and holding the remote controller",
|
440 |
+
"(static, 13718)": "small person in red shirt"
|
441 |
+
},
|
442 |
+
"1419": {
|
443 |
+
"(motion, 30082)": "person squatting on the ground to catch a ball",
|
444 |
+
"(static, 30083)": "person in red and white wearing glove"
|
445 |
+
},
|
446 |
+
"1992": {
|
447 |
+
"(motion, 42197)": "a person reaching for a cupcake",
|
448 |
+
"(static, 42198)": "a person in a blue vest"
|
449 |
+
},
|
450 |
+
"542": {
|
451 |
+
"(motion, 11877)": "man receiving food",
|
452 |
+
"(static, 11878)": "a black man in a black shirt"
|
453 |
+
},
|
454 |
+
"2223": {
|
455 |
+
"(motion, 47051)": "person sitting a chair holding a protest sign",
|
456 |
+
"(static, 47052)": "old person in grey t - shirt and blue jeans"
|
457 |
+
},
|
458 |
+
"4865": {
|
459 |
+
"(motion, 101219)": "person being held by another person",
|
460 |
+
"(static, 101220)": "a young person wearing a yellow shirt"
|
461 |
+
},
|
462 |
+
"751": {
|
463 |
+
"(motion, 16247)": "person holding a painting brush",
|
464 |
+
"(static, 16248)": "person wearing white top and cap"
|
465 |
+
},
|
466 |
+
"3540": {
|
467 |
+
"(motion, 74039)": "a man swinging a bat",
|
468 |
+
"(static, 74040)": "a man in a blue baseball shirt and white pants"
|
469 |
+
},
|
470 |
+
"3765": {
|
471 |
+
"(motion, 78908)": "person sitting",
|
472 |
+
"(static, 78909)": "person wearing white shirt and red shoes"
|
473 |
+
},
|
474 |
+
"2879": {
|
475 |
+
"(motion, 60471)": "bear standing against the fence",
|
476 |
+
"(static, 60472)": "a small bear on the right"
|
477 |
+
},
|
478 |
+
"4529": {
|
479 |
+
"(motion, 94312)": "kid holding out left arm playing wii",
|
480 |
+
"(static, 94313)": "kid in a green and red sweatshirt"
|
481 |
+
},
|
482 |
+
"2131": {
|
483 |
+
"(motion, 45308)": "man putting both hands behind his head",
|
484 |
+
"(static, 45309)": "a man with the pool noodle"
|
485 |
+
},
|
486 |
+
"1306": {
|
487 |
+
"(motion, 27841)": "a cow eating grass",
|
488 |
+
"(static, 27842)": "the cow on the right"
|
489 |
+
},
|
490 |
+
"3508": {
|
491 |
+
"(motion, 73469)": "a person standing and playing a video game",
|
492 |
+
"(static, 73470)": "a little person dressed in brown"
|
493 |
+
},
|
494 |
+
"4165": {
|
495 |
+
"(motion, 87036)": "a child holding feathers",
|
496 |
+
"(static, 87037)": "a child wearing green t - shirt"
|
497 |
+
},
|
498 |
+
"4126": {
|
499 |
+
"(motion, 86073)": "a person standing and reading a book",
|
500 |
+
"(static, 86074)": "a person in a suit"
|
501 |
+
},
|
502 |
+
"388": {
|
503 |
+
"(motion, 8339)": "a man holding up an umbrella in the rain for a man who is fixing a tire",
|
504 |
+
"(static, 8340)": "a man wearing glasses in a red jacket"
|
505 |
+
}
|
506 |
+
}
|
make_refcoco/refcocog_umd/motion_split_generation.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
make_refcoco/refcocog_umd/part4_ref_id.txt
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
1679
|
2 |
+
4048
|
3 |
+
2530
|
4 |
+
4385
|
5 |
+
5018
|
6 |
+
2290
|
7 |
+
2347
|
8 |
+
3143
|
9 |
+
4745
|
10 |
+
1688
|
11 |
+
944
|
12 |
+
3477
|
13 |
+
2497
|
14 |
+
4110
|
15 |
+
2011
|
16 |
+
2884
|
17 |
+
1076
|
18 |
+
4803
|
19 |
+
3508
|
20 |
+
169
|
21 |
+
258
|
22 |
+
3661
|
23 |
+
4831
|
24 |
+
2214
|
25 |
+
2266
|
26 |
+
2477
|
27 |
+
5005
|
28 |
+
2919
|
29 |
+
1850
|
30 |
+
3757
|
31 |
+
524
|
32 |
+
4363
|
33 |
+
2976
|
34 |
+
838
|
35 |
+
3044
|
36 |
+
2426
|
37 |
+
2113
|
38 |
+
2327
|
39 |
+
4727
|
40 |
+
859
|
41 |
+
935
|
42 |
+
1105
|
43 |
+
395
|
44 |
+
771
|
45 |
+
2942
|
46 |
+
41
|
47 |
+
885
|
48 |
+
4862
|
49 |
+
1246
|
50 |
+
3346
|
51 |
+
3657
|
52 |
+
540
|
53 |
+
3364
|
54 |
+
1880
|
55 |
+
1949
|
56 |
+
1620
|
57 |
+
2902
|
58 |
+
397
|
59 |
+
732
|
60 |
+
1173
|
61 |
+
2920
|
62 |
+
1643
|
63 |
+
1454
|
64 |
+
1725
|
65 |
+
2338
|
66 |
+
4249
|
67 |
+
3917
|
68 |
+
1156
|
69 |
+
1998
|
70 |
+
3571
|
71 |
+
292
|
72 |
+
3367
|
73 |
+
2069
|
74 |
+
4050
|
75 |
+
2953
|
76 |
+
4280
|
77 |
+
1743
|
78 |
+
4598
|
79 |
+
3380
|
80 |
+
3439
|
81 |
+
3355
|
82 |
+
3409
|
83 |
+
711
|
84 |
+
3764
|
85 |
+
113
|
86 |
+
518
|
87 |
+
3158
|
88 |
+
3223
|
89 |
+
914
|
90 |
+
3568
|
91 |
+
592
|
92 |
+
2856
|
93 |
+
4879
|
94 |
+
157
|
95 |
+
1774
|
96 |
+
2354
|
97 |
+
174
|
98 |
+
2369
|
99 |
+
4247
|
100 |
+
1014
|
101 |
+
1080
|
102 |
+
2272
|
103 |
+
2495
|
104 |
+
3511
|
105 |
+
3955
|
106 |
+
2409
|
107 |
+
2775
|
108 |
+
996
|
109 |
+
4789
|
110 |
+
1028
|
111 |
+
244
|
112 |
+
3538
|
113 |
+
557
|
114 |
+
1810
|
115 |
+
4982
|
116 |
+
4570
|
117 |
+
1698
|
118 |
+
3182
|
119 |
+
846
|
120 |
+
671
|
121 |
+
3254
|
122 |
+
3318
|
123 |
+
1424
|
124 |
+
3926
|
125 |
+
862
|
126 |
+
2932
|
make_refcoco/refcocog_umd/revised_refid_part4.json
ADDED
@@ -0,0 +1,498 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"1679": {
|
3 |
+
"(motion, 37582)": "player holding a baseball glove",
|
4 |
+
"(static, 37583)": "a blurred player"
|
5 |
+
},
|
6 |
+
"4048": {
|
7 |
+
"(motion, 92810)": "player hitting a ball with a baseball bat",
|
8 |
+
"(static, 92811)": "player with number 18 on his back"
|
9 |
+
},
|
10 |
+
"2530": {
|
11 |
+
"(motion, 57782)": "man crouching ready to catch a ball",
|
12 |
+
"(static, 57783)": "man with 55 on his back"
|
13 |
+
},
|
14 |
+
"4385": {
|
15 |
+
"(motion, 101410)": "man leaning on one leg watching the players",
|
16 |
+
"(static, 101411)": "man in gray pants"
|
17 |
+
},
|
18 |
+
"5018": {
|
19 |
+
"(motion, 102413)": "man standing ready to swing his bat",
|
20 |
+
"(static, 102414)": "man in front of the other two men"
|
21 |
+
},
|
22 |
+
"2290": {
|
23 |
+
"(motion, 52302)": "sheep standing in the pasture next to a sitting sheep",
|
24 |
+
"(static, 52303)": "the front most sheep"
|
25 |
+
},
|
26 |
+
"2347": {
|
27 |
+
"(motion, 53861)": "a sheep sitting down in the grass",
|
28 |
+
"(static, 53862)": "a sheep in the background"
|
29 |
+
},
|
30 |
+
"3143": {
|
31 |
+
"(motion, 71854)": "a horse being led by it ' s trainer",
|
32 |
+
"(static, 71855)": "a horse in front of the picture"
|
33 |
+
},
|
34 |
+
"1688": {
|
35 |
+
"(motion, 37818)": "zebra eating grass",
|
36 |
+
"(static, 37819)": "the zebra in the middle with its face near the ground"
|
37 |
+
},
|
38 |
+
"944": {
|
39 |
+
"(motion, 21007)": "a bird touching its neck with its right feet",
|
40 |
+
"(static, 21008)": "a bird in the back"
|
41 |
+
},
|
42 |
+
"3477": {
|
43 |
+
"(motion, 79163)": "the bird standing and looking to the left",
|
44 |
+
"(static, 79164)": "bird with both feet in the water"
|
45 |
+
},
|
46 |
+
"2497": {
|
47 |
+
"(motion, 56845)": "person holding a baseball bat",
|
48 |
+
"(static, 56846)": "person in blue baseball cap"
|
49 |
+
},
|
50 |
+
"4110": {
|
51 |
+
"(motion, 94298)": "person sitting and watching children play a ballgame",
|
52 |
+
"(static, 94299)": "person wearing a white shirt and black leggings"
|
53 |
+
},
|
54 |
+
"2011": {
|
55 |
+
"(motion, 45909)": "a woman talking on her cell phone",
|
56 |
+
"(static, 45910)": "a blonde woman wearing a blue shirt and white shorts"
|
57 |
+
},
|
58 |
+
"2884": {
|
59 |
+
"(motion, 65819)": "a woman looking at her phone",
|
60 |
+
"(static, 65820)": "a woman with black hair wearing jeans, a striped gray shirt and flip flops"
|
61 |
+
},
|
62 |
+
"1076": {
|
63 |
+
"(motion, 24000)": "person crossing a stream of water",
|
64 |
+
"(static, 24001)": "person wearing jeans and a green vest"
|
65 |
+
},
|
66 |
+
"4803": {
|
67 |
+
"(motion, 56121)": "person helping the other cross a stream",
|
68 |
+
"(static, 56122)": "person in white dress"
|
69 |
+
},
|
70 |
+
"3508": {
|
71 |
+
"(motion, 80112)": "baseball player placing his hands on his hips",
|
72 |
+
"(static, 80113)": "a baseball player named datz"
|
73 |
+
},
|
74 |
+
"169": {
|
75 |
+
"(motion, 4002)": "person feeding a giraffe",
|
76 |
+
"(static, 4003)": "a small person in light blue shirt"
|
77 |
+
},
|
78 |
+
"258": {
|
79 |
+
"(motion, 5988)": "person holding a child",
|
80 |
+
"(static, 5989)": "person wearing glasses and navy shirt"
|
81 |
+
},
|
82 |
+
"3661": {
|
83 |
+
"(motion, 83542)": "person sitting on the floor",
|
84 |
+
"(static, 83543)": "person in a grey shirt and dark pants"
|
85 |
+
},
|
86 |
+
"4831": {
|
87 |
+
"(motion, 62137)": "person sitting on couch and playing with a dog",
|
88 |
+
"(static, 62138)": "bald person wearing jeans and brown hoodie"
|
89 |
+
},
|
90 |
+
"2214": {
|
91 |
+
"(motion, 50208)": "a woman eating a donut",
|
92 |
+
"(static, 50209)": "a brown hair woman in gray sweater"
|
93 |
+
},
|
94 |
+
"2266": {
|
95 |
+
"(motion, 51661)": "a woman holding a purse",
|
96 |
+
"(static, 51662)": "a woman with blonde hair and a black shirt"
|
97 |
+
},
|
98 |
+
"2477": {
|
99 |
+
"(motion, 56429)": "girl talking and looking at another girl",
|
100 |
+
"(static, 56430)": "girl in black"
|
101 |
+
},
|
102 |
+
"5005": {
|
103 |
+
"(motion, 99824)": "girl eating and looking at her plate",
|
104 |
+
"(static, 99825)": "girl wearing a pink shirt"
|
105 |
+
},
|
106 |
+
"2919": {
|
107 |
+
"(motion, 66832)": "person riding a bike",
|
108 |
+
"(static, 66833)": "asian person wearing black jacket"
|
109 |
+
},
|
110 |
+
"1850": {
|
111 |
+
"(motion, 42078)": "man placing his hand on another man's shoulder",
|
112 |
+
"(static, 42079)": "a man who is wearing a red color tie"
|
113 |
+
},
|
114 |
+
"3757": {
|
115 |
+
"(motion, 85761)": "boy holding a cell phone",
|
116 |
+
"(static, 85762)": "boy in a blue hoodie"
|
117 |
+
},
|
118 |
+
"524": {
|
119 |
+
"(motion, 12089)": "a zebra that is not eating grass",
|
120 |
+
"(static, 12090)": "a zebra on the far right"
|
121 |
+
},
|
122 |
+
"4363": {
|
123 |
+
"(motion, 100914)": "elephant holding up its trunk",
|
124 |
+
"(static, 100915)": "an elephant in front of another"
|
125 |
+
},
|
126 |
+
"2976": {
|
127 |
+
"(motion, 68306)": "girl eating food from her right hand",
|
128 |
+
"(static, 68307)": "a girl in a black flowered top"
|
129 |
+
},
|
130 |
+
"838": {
|
131 |
+
"(motion, 18887)": "man leaning on bike on boat",
|
132 |
+
"(static, 18888)": "a man not wearing a hat"
|
133 |
+
},
|
134 |
+
"3044": {
|
135 |
+
"(motion, 69755)": "man rowing boat",
|
136 |
+
"(static, 69756)": "a man on the left side of the picture"
|
137 |
+
},
|
138 |
+
"2426": {
|
139 |
+
"(motion, 55424)": "the baseball player facing towards the right not doing a high five",
|
140 |
+
"(static, 55425)": "baseball player in catcher ' s uniform"
|
141 |
+
},
|
142 |
+
"2113": {
|
143 |
+
"(motion, 47984)": "person that is dancing",
|
144 |
+
"(static, 47985)": "person with the thick beard, glasses and a hat"
|
145 |
+
},
|
146 |
+
"2327": {
|
147 |
+
"(motion, 53376)": "person bathing another person",
|
148 |
+
"(static, 53377)": "person in a floral print dress and hat"
|
149 |
+
},
|
150 |
+
"4727": {
|
151 |
+
"(motion, 39103)": "a bull laying down",
|
152 |
+
"(static, 39104)": "a white and brown bull on the right"
|
153 |
+
},
|
154 |
+
"859": {
|
155 |
+
"(motion, 19350)": "cat sitting on a luggage and staring at the camera",
|
156 |
+
"(static, 19351)": "cat infront of another cat"
|
157 |
+
},
|
158 |
+
"935": {
|
159 |
+
"(motion, 20809)": "cat laying down on a bag",
|
160 |
+
"(static, 20810)": "cat behind another cat"
|
161 |
+
},
|
162 |
+
"1105": {
|
163 |
+
"(motion, 24654)": "an elephant stepping on a large log",
|
164 |
+
"(static, 24655)": "elephant on far right"
|
165 |
+
},
|
166 |
+
"395": {
|
167 |
+
"(motion, 8819)": "person placing her hands on one's hips",
|
168 |
+
"(static, 8820)": "person on the far left"
|
169 |
+
},
|
170 |
+
"771": {
|
171 |
+
"(motion, 17614)": "person holding a child on one's shoulders",
|
172 |
+
"(static, 17615)": "tall person on the right"
|
173 |
+
},
|
174 |
+
"2942": {
|
175 |
+
"(motion, 67334)": "person sitting on another person's shoulders",
|
176 |
+
"(static, 67335)": "small person on the right"
|
177 |
+
},
|
178 |
+
"41": {
|
179 |
+
"(motion, 961)": "a lady pouring wine in a glass",
|
180 |
+
"(static, 962)": "a lady in black tank top"
|
181 |
+
},
|
182 |
+
"885": {
|
183 |
+
"(motion, 19926)": "person feeding another person with a bottle",
|
184 |
+
"(static, 19927)": "person in black blouse"
|
185 |
+
},
|
186 |
+
"4862": {
|
187 |
+
"(motion, 69276)": "person drinking from a bottle",
|
188 |
+
"(static, 69277)": "small person in white pajamas"
|
189 |
+
},
|
190 |
+
"1246": {
|
191 |
+
"(motion, 27831)": "person holding a laptop",
|
192 |
+
"(static, 27832)": "person with curly brown hair wearing jeans"
|
193 |
+
},
|
194 |
+
"3346": {
|
195 |
+
"(motion, 76051)": "person filing her nails",
|
196 |
+
"(static, 76052)": "person wearing a red robe and has a towel on her head"
|
197 |
+
},
|
198 |
+
"3657": {
|
199 |
+
"(motion, 83493)": "person holding a bottle and listening to music",
|
200 |
+
"(static, 83494)": "person wearing black in headphones"
|
201 |
+
},
|
202 |
+
"540": {
|
203 |
+
"(motion, 12381)": "the woman is swinging the controller",
|
204 |
+
"(static, 12382)": "woman in brown top on the right"
|
205 |
+
},
|
206 |
+
"3364": {
|
207 |
+
"(motion, 76757)": "the woman looking at the camera and opening her mouth",
|
208 |
+
"(static, 76758)": "a woman wearing a brown hooded sweatshirt on the left"
|
209 |
+
},
|
210 |
+
"1880": {
|
211 |
+
"(motion, 42973)": "man looking ahead at the tv",
|
212 |
+
"(static, 42974)": "a man in a white shirt"
|
213 |
+
},
|
214 |
+
"1949": {
|
215 |
+
"(motion, 44400)": "a man looking at his phone",
|
216 |
+
"(static, 44401)": "man in black t - shirt and cap"
|
217 |
+
},
|
218 |
+
"1620": {
|
219 |
+
"(motion, 36248)": "person playing tennis",
|
220 |
+
"(static, 36249)": "person in red tank top and black shorts"
|
221 |
+
},
|
222 |
+
"2902": {
|
223 |
+
"(motion, 66297)": "person sitting and watching a tennis game",
|
224 |
+
"(static, 66298)": "person in blue top"
|
225 |
+
},
|
226 |
+
"397": {
|
227 |
+
"(motion, 8843)": "giraffe bending its head down",
|
228 |
+
"(static, 8844)": "giraffe on the far right"
|
229 |
+
},
|
230 |
+
"732": {
|
231 |
+
"(motion, 16725)": "baseball player squatting and watching closely to judge a play",
|
232 |
+
"(static, 16726)": "baseball player in black top and gray pants"
|
233 |
+
},
|
234 |
+
"1173": {
|
235 |
+
"(motion, 26074)": "a man swinging a bat",
|
236 |
+
"(static, 26075)": "a man in blue and grey"
|
237 |
+
},
|
238 |
+
"2920": {
|
239 |
+
"(motion, 66854)": "a man reaching out his left arm to catch a ball",
|
240 |
+
"(static, 66855)": "a man in red uniform and helmet"
|
241 |
+
},
|
242 |
+
"1643": {
|
243 |
+
"(motion, 36762)": "a man smiling looking down at other people",
|
244 |
+
"(static, 36763)": "a man in a grey suite wearing a pink tie"
|
245 |
+
},
|
246 |
+
"1454": {
|
247 |
+
"(motion, 32177)": "person in putting hands in one's pockets",
|
248 |
+
"(static, 32178)": "person in gray shirt and jeans"
|
249 |
+
},
|
250 |
+
"1725": {
|
251 |
+
"(motion, 38835)": "person crossing her arms walking with another person",
|
252 |
+
"(static, 38836)": "person in a black shirt and jeans"
|
253 |
+
},
|
254 |
+
"2338": {
|
255 |
+
"(motion, 53733)": "the person crouching and placing his hands on his knees",
|
256 |
+
"(static, 53734)": "person with a black shirt and dark grey pants"
|
257 |
+
},
|
258 |
+
"4249": {
|
259 |
+
"(motion, 97957)": "a baseball player reaching out his arm to catch a ball",
|
260 |
+
"(static, 97958)": "a baseball player in green top"
|
261 |
+
},
|
262 |
+
"3917": {
|
263 |
+
"(motion, 89675)": "cow looking at camera",
|
264 |
+
"(static, 89676)": "a cow with an ear tag with the number 949 on it"
|
265 |
+
},
|
266 |
+
"1156": {
|
267 |
+
"(motion, 25761)": "man sitting on the couch using a laptop",
|
268 |
+
"(static, 25762)": "a man with a hat"
|
269 |
+
},
|
270 |
+
"1998": {
|
271 |
+
"(motion, 45619)": "a person watching his phone",
|
272 |
+
"(static, 45620)": "person wearing glasses"
|
273 |
+
},
|
274 |
+
"3571": {
|
275 |
+
"(motion, 81719)": "person looking at one's phone",
|
276 |
+
"(static, 81720)": "mature person with blonde hair and glasses"
|
277 |
+
},
|
278 |
+
"292": {
|
279 |
+
"(motion, 6707)": "a zebra lying down in dirt",
|
280 |
+
"(static, 6708)": "the zebra in the foreground"
|
281 |
+
},
|
282 |
+
"3367": {
|
283 |
+
"(motion, 76808)": "a zebra standing in the zoo",
|
284 |
+
"(static, 76809)": "a zebra in the background"
|
285 |
+
},
|
286 |
+
"2069": {
|
287 |
+
"(motion, 47212)": "person leaning forward on skis",
|
288 |
+
"(static, 47213)": "person in blue hat and jacket, black pants"
|
289 |
+
},
|
290 |
+
"4050": {
|
291 |
+
"(motion, 92834)": "person standing straight looking at another person",
|
292 |
+
"(static, 92835)": "a small person wearing purple pants"
|
293 |
+
},
|
294 |
+
"2953": {
|
295 |
+
"(motion, 67711)": "person who is looking away",
|
296 |
+
"(static, 67712)": "person in a suit"
|
297 |
+
},
|
298 |
+
"4280": {
|
299 |
+
"(motion, 98813)": "person pulling another person's tie",
|
300 |
+
"(static, 98814)": "a person in a white shirt"
|
301 |
+
},
|
302 |
+
"1743": {
|
303 |
+
"(motion, 39371)": "a person holding and looking at another person",
|
304 |
+
"(static, 39372)": "person with bald head and glasses"
|
305 |
+
},
|
306 |
+
"4598": {
|
307 |
+
"(motion, 13717)": "person playing with the remote controller",
|
308 |
+
"(static, 13718)": "small person in red shirt"
|
309 |
+
},
|
310 |
+
"3380": {
|
311 |
+
"(motion, 77052)": "a person cutting a cake",
|
312 |
+
"(static, 77053)": "a person in gray shirt that is not striped"
|
313 |
+
},
|
314 |
+
"3439": {
|
315 |
+
"(motion, 78305)": "a person holding a spatula getting readyy to have a cake",
|
316 |
+
"(static, 78306)": "a person in striped shirt"
|
317 |
+
},
|
318 |
+
"3355": {
|
319 |
+
"(motion, 76309)": "a man swining his bat",
|
320 |
+
"(static, 76310)": "a man in a baseball uniform with a brace on his left ankle"
|
321 |
+
},
|
322 |
+
"3409": {
|
323 |
+
"(motion, 77608)": "a man holding out his arm to catch a ball",
|
324 |
+
"(static, 77609)": "a man wearing a red vest with red shin guards"
|
325 |
+
},
|
326 |
+
"711": {
|
327 |
+
"(motion, 16184)": "the man holding a cat in his arms",
|
328 |
+
"(static, 16185)": "this is a man with thin rimmed glasses and a black scarf"
|
329 |
+
},
|
330 |
+
"3764": {
|
331 |
+
"(motion, 85913)": "person holding a remote and smilling",
|
332 |
+
"(static, 85914)": "person in a black t - shirt and not wearing glasses"
|
333 |
+
},
|
334 |
+
"113": {
|
335 |
+
"(motion, 2741)": "a sheep being fed by a little girl",
|
336 |
+
"(static, 2742)": "a sheep on the right"
|
337 |
+
},
|
338 |
+
"518": {
|
339 |
+
"(motion, 12021)": "a sheep eating grass with its head down",
|
340 |
+
"(static, 12022)": "a sheep on the left"
|
341 |
+
},
|
342 |
+
"3158": {
|
343 |
+
"(motion, 72128)": "a boy crouching and placing both hands on his knees",
|
344 |
+
"(static, 72129)": "boy wearing white baseball helmet , white baseball uniform with orange writing"
|
345 |
+
},
|
346 |
+
"3223": {
|
347 |
+
"(motion, 73555)": "a boy pitching the ball to a player",
|
348 |
+
"(static, 73556)": "a boy with the number 4 on his blue jersey"
|
349 |
+
},
|
350 |
+
"914": {
|
351 |
+
"(motion, 20478)": "a person standing on a surf board , riding a wave",
|
352 |
+
"(static, 20479)": "a person on the right"
|
353 |
+
},
|
354 |
+
"3568": {
|
355 |
+
"(motion, 81669)": "surfer laying down",
|
356 |
+
"(static, 81670)": "surfer on the left"
|
357 |
+
},
|
358 |
+
"592": {
|
359 |
+
"(motion, 13643)": "person sits on the floor watching tv",
|
360 |
+
"(static, 13644)": "person with a black hat and a beige shirt"
|
361 |
+
},
|
362 |
+
"2856": {
|
363 |
+
"(motion, 65208)": "person sitting on a chair watching another person play video games",
|
364 |
+
"(static, 65209)": "person in black shirt and jeans"
|
365 |
+
},
|
366 |
+
"4879": {
|
367 |
+
"(motion, 73469)": "person playing a video game",
|
368 |
+
"(static, 73470)": "blonde person dressed in brown"
|
369 |
+
},
|
370 |
+
"157": {
|
371 |
+
"(motion, 3682)": "a woman holding a plate and reaching for condiments",
|
372 |
+
"(static, 3683)": "woman wearing grey button up sweater"
|
373 |
+
},
|
374 |
+
"1774": {
|
375 |
+
"(motion, 40317)": "person being held by another person",
|
376 |
+
"(static, 40318)": "person with red hair, wearing a pink shirt"
|
377 |
+
},
|
378 |
+
"2354": {
|
379 |
+
"(motion, 53948)": "person with child , catching a frisby",
|
380 |
+
"(static, 53949)": "bigger person in white t - shirt"
|
381 |
+
},
|
382 |
+
"174": {
|
383 |
+
"(motion, 4179)": "a lamb eating grass",
|
384 |
+
"(static, 4180)": "a lamb to the left of another lamb"
|
385 |
+
},
|
386 |
+
"2369": {
|
387 |
+
"(motion, 54196)": "the sheep that is looking into the camera",
|
388 |
+
"(static, 54197)": "a white sheep with a black head on the right"
|
389 |
+
},
|
390 |
+
"4247": {
|
391 |
+
"(motion, 97897)": "a woman holding an umbrella on a bench",
|
392 |
+
"(static, 97898)": "woman on the right"
|
393 |
+
},
|
394 |
+
"1014": {
|
395 |
+
"(motion, 22621)": "man receiving an award",
|
396 |
+
"(static, 22622)": "a man in an orange and white uniform with a black cap"
|
397 |
+
},
|
398 |
+
"1080": {
|
399 |
+
"(motion, 24100)": "a man offers a trophy to anothe man",
|
400 |
+
"(static, 24101)": "a man in a suit"
|
401 |
+
},
|
402 |
+
"2272": {
|
403 |
+
"(motion, 51815)": "the baseball player catching a ball",
|
404 |
+
"(static, 51816)": "the baseball player in dark top and helmet"
|
405 |
+
},
|
406 |
+
"2495": {
|
407 |
+
"(motion, 56804)": "a baseball player swinging at a ball",
|
408 |
+
"(static, 56805)": "the baseball player in white uniform"
|
409 |
+
},
|
410 |
+
"3511": {
|
411 |
+
"(motion, 80309)": "person holding a cup",
|
412 |
+
"(static, 80310)": "person wearing pink shirt"
|
413 |
+
},
|
414 |
+
"3955": {
|
415 |
+
"(motion, 90542)": "person holding a remote control",
|
416 |
+
"(static, 90543)": "person in orange shirt"
|
417 |
+
},
|
418 |
+
"2409": {
|
419 |
+
"(motion, 55054)": "a man adjusting his head band",
|
420 |
+
"(static, 55055)": "man in orange and gray shirt"
|
421 |
+
},
|
422 |
+
"2775": {
|
423 |
+
"(motion, 63273)": "a person holding a remote control",
|
424 |
+
"(static, 63274)": "a tall person in white striped shirt and black pants"
|
425 |
+
},
|
426 |
+
"996": {
|
427 |
+
"(motion, 22281)": "a woman holding a baby",
|
428 |
+
"(static, 22282)": "woman wearing a black shirt and green apron"
|
429 |
+
},
|
430 |
+
"4789": {
|
431 |
+
"(motion, 52629)": "a person holding skies in one's hands",
|
432 |
+
"(static, 52630)": "a person with orange mirrored goggles"
|
433 |
+
},
|
434 |
+
"1028": {
|
435 |
+
"(motion, 22786)": "the cow standing up",
|
436 |
+
"(static, 22787)": "a cow in the middle"
|
437 |
+
},
|
438 |
+
"244": {
|
439 |
+
"(motion, 5666)": "a man holding wine glass",
|
440 |
+
"(static, 5668)": "a blonde man in a white shirt"
|
441 |
+
},
|
442 |
+
"3538": {
|
443 |
+
"(motion, 80923)": "the man throwing the ball from the picther ' s mound",
|
444 |
+
"(static, 80924)": "the man in front"
|
445 |
+
},
|
446 |
+
"557": {
|
447 |
+
"(motion, 12739)": "a baseball player getting ready to swing the bat",
|
448 |
+
"(static, 12740)": "a baseball player , wearing a white and blue uniform"
|
449 |
+
},
|
450 |
+
"4982": {
|
451 |
+
"(motion, 95870)": "cat sitting in front of television on a stand",
|
452 |
+
"(static, 95871)": "orange cat on the right side of the picture"
|
453 |
+
},
|
454 |
+
"4570": {
|
455 |
+
"(motion, 6638)": "a woman cutting a cake",
|
456 |
+
"(static, 6639)": "a woman wearing a long sleeve pink sweater"
|
457 |
+
},
|
458 |
+
"1698": {
|
459 |
+
"(motion, 38093)": "a baseball player swinging his bat",
|
460 |
+
"(static, 38094)": "a baseball player weaing a white uniform and blue helmet"
|
461 |
+
},
|
462 |
+
"3182": {
|
463 |
+
"(motion, 72616)": "the baseball player playing the catcher position",
|
464 |
+
"(static, 72617)": "the baseball player wearing a red and white uniform"
|
465 |
+
},
|
466 |
+
"846": {
|
467 |
+
"(motion, 19100)": "a man holding a toothbrush in his mouth",
|
468 |
+
"(static, 19101)": "a man wearing striped shirt"
|
469 |
+
},
|
470 |
+
"671": {
|
471 |
+
"(motion, 15227)": "person petting a horse",
|
472 |
+
"(static, 15228)": "person wearing a red jacket"
|
473 |
+
},
|
474 |
+
"3254": {
|
475 |
+
"(motion, 74216)": "person sitting in the chair",
|
476 |
+
"(static, 74217)": "person in the tan shirt wearing glasses"
|
477 |
+
},
|
478 |
+
"3318": {
|
479 |
+
"(motion, 75539)": "the person who is smashing cake in his own face",
|
480 |
+
"(static, 75540)": "person with a fake tie on its onesie"
|
481 |
+
},
|
482 |
+
"1424": {
|
483 |
+
"(motion, 31548)": "person watching another person eat",
|
484 |
+
"(static, 31549)": "person in the green shirt"
|
485 |
+
},
|
486 |
+
"3926": {
|
487 |
+
"(motion, 89831)": "person eating a sandwich",
|
488 |
+
"(static, 89832)": "person in orange top with sunglasses in one's head"
|
489 |
+
},
|
490 |
+
"862": {
|
491 |
+
"(motion, 19444)": "a man driving a bicycle and pulling a cart behind",
|
492 |
+
"(static, 19445)": "the man is wearing a pair of khaki shorts"
|
493 |
+
},
|
494 |
+
"2932": {
|
495 |
+
"(motion, 67140)": "man standing on bike",
|
496 |
+
"(static, 67141)": "man in blue jean shorts"
|
497 |
+
}
|
498 |
+
}
|
mbench/__init__.py
ADDED
File without changes
|
mbench/__pycache__/transforms_video.cpython-39.pyc
ADDED
Binary file (20 kB). View file
|
|
mbench/__pycache__/ytvos_ref.cpython-39.pyc
ADDED
Binary file (7.4 kB). View file
|
|
mbench/check_image.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
mbench/check_image_numbered.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
mbench/check_image_revised.ipynb
ADDED
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 32,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import os\n",
|
10 |
+
"import argparse\n",
|
11 |
+
"import sys\n",
|
12 |
+
"import opts\n",
|
13 |
+
"import matplotlib.pyplot as plt\n",
|
14 |
+
"import matplotlib.patches as patches\n",
|
15 |
+
"import textwrap\n",
|
16 |
+
"\n",
|
17 |
+
"from PIL import Image, ImageDraw\n",
|
18 |
+
"import json\n",
|
19 |
+
"import numpy as np\n",
|
20 |
+
"from mbench.ytvos_ref import build as build_ytvos_ref"
|
21 |
+
]
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"cell_type": "code",
|
25 |
+
"execution_count": 26,
|
26 |
+
"metadata": {},
|
27 |
+
"outputs": [],
|
28 |
+
"source": [
|
29 |
+
"img_folder = 'data/ref-youtube-vos/train'\n",
|
30 |
+
"text_colors = ['red', 'blue']"
|
31 |
+
]
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"cell_type": "code",
|
35 |
+
"execution_count": 2,
|
36 |
+
"metadata": {},
|
37 |
+
"outputs": [],
|
38 |
+
"source": [
|
39 |
+
"with open('mbench/result_revised50.json') as file:\n",
|
40 |
+
" data = json.load(file)"
|
41 |
+
]
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"cell_type": "code",
|
45 |
+
"execution_count": 24,
|
46 |
+
"metadata": {},
|
47 |
+
"outputs": [],
|
48 |
+
"source": [
|
49 |
+
"def bounding_box(img):\n",
|
50 |
+
" rows = np.any(img, axis=1)\n",
|
51 |
+
" cols = np.any(img, axis=0)\n",
|
52 |
+
" rmin, rmax = np.where(rows)[0][[0, -1]]\n",
|
53 |
+
" cmin, cmax = np.where(cols)[0][[0, -1]]\n",
|
54 |
+
" return rmin, rmax, cmin, cmax # y1, y2, x1, x2 "
|
55 |
+
]
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"cell_type": "code",
|
59 |
+
"execution_count": 97,
|
60 |
+
"metadata": {},
|
61 |
+
"outputs": [],
|
62 |
+
"source": [
|
63 |
+
"def showImageRef(vid_id):\n",
|
64 |
+
" vid_data = data[vid_id]\n",
|
65 |
+
" cats = list(vid_data.keys())\n",
|
66 |
+
"\n",
|
67 |
+
" for cat in cats:\n",
|
68 |
+
" cat_data = vid_data[cat]\n",
|
69 |
+
" frames = list(cat_data.keys())\n",
|
70 |
+
" \n",
|
71 |
+
" for frame in frames:\n",
|
72 |
+
" frame_data = cat_data[frame]\n",
|
73 |
+
" \n",
|
74 |
+
" img_path = os.path.join(img_folder, 'JPEGImages', vid_id, frame + '.jpg')\n",
|
75 |
+
" mask_path = os.path.join(img_folder, 'Annotations', vid_id, frame + '.png')\n",
|
76 |
+
" img = Image.open(img_path).convert('RGB')\n",
|
77 |
+
" mask = Image.open(mask_path).convert('P')\n",
|
78 |
+
" mask = np.array(mask)\n",
|
79 |
+
" \n",
|
80 |
+
" if frame_data:\n",
|
81 |
+
" obj_ids = list(frame_data.keys())\n",
|
82 |
+
" obj_nums = len(obj_ids)\n",
|
83 |
+
"\n",
|
84 |
+
" fig, axes = plt.subplots(1, obj_nums, figsize=(16, obj_nums))\n",
|
85 |
+
"\n",
|
86 |
+
" for i in range(len(obj_ids)):\n",
|
87 |
+
" obj_id = obj_ids[i]\n",
|
88 |
+
" obj_data = frame_data[obj_id]\n",
|
89 |
+
" if obj_data:\n",
|
90 |
+
" ref_exp = obj_data['ref_exp']\n",
|
91 |
+
" isValid = obj_data['isValid']\n",
|
92 |
+
"\n",
|
93 |
+
" obj_mask = (mask == int(obj_id)).astype(np.float32)\n",
|
94 |
+
" if (obj_mask > 0).any():\n",
|
95 |
+
" y1, y2, x1, x2 = bounding_box(obj_mask)\n",
|
96 |
+
" box = np.array([x1, y1, x2, y2])\n",
|
97 |
+
" else:\n",
|
98 |
+
" box = np.array([0, 0, 0, 0])\n",
|
99 |
+
" \n",
|
100 |
+
" if obj_nums == 1:\n",
|
101 |
+
" ax = axes\n",
|
102 |
+
" else:\n",
|
103 |
+
" ax = axes[i]\n",
|
104 |
+
" ax.imshow(img)\n",
|
105 |
+
" width, height = box[2] - box[0], box[3] - box[1]\n",
|
106 |
+
" rect = patches.Rectangle((x1, y1), width, height, linewidth=2, edgecolor='red', facecolor='none')\n",
|
107 |
+
" ax.add_patch(rect)\n",
|
108 |
+
"\n",
|
109 |
+
" wrapped_text = \"\\n\".join(textwrap.wrap(ref_exp, width=30))\n",
|
110 |
+
" ax.annotate(wrapped_text, xy=(0.5, -1.5), xycoords=\"axes fraction\", ha = \"center\", color=text_colors[isValid])\n",
|
111 |
+
" \n",
|
112 |
+
" plt.suptitle(f\"video: {vid_id} - cat: {cat} - frame: {frame}\")\n",
|
113 |
+
" plt.show()"
|
114 |
+
]
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"cell_type": "code",
|
118 |
+
"execution_count": 142,
|
119 |
+
"metadata": {},
|
120 |
+
"outputs": [
|
121 |
+
{
|
122 |
+
"name": "stdout",
|
123 |
+
"output_type": "stream",
|
124 |
+
"text": [
|
125 |
+
"04667fabaa\n"
|
126 |
+
]
|
127 |
+
}
|
128 |
+
],
|
129 |
+
"source": [
|
130 |
+
"vid_id = list(data.keys())[49]\n",
|
131 |
+
"print(vid_id)\n",
|
132 |
+
"showImageRef(vid_id)"
|
133 |
+
]
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"cell_type": "code",
|
137 |
+
"execution_count": null,
|
138 |
+
"metadata": {},
|
139 |
+
"outputs": [],
|
140 |
+
"source": []
|
141 |
+
}
|
142 |
+
],
|
143 |
+
"metadata": {
|
144 |
+
"kernelspec": {
|
145 |
+
"display_name": "referformer",
|
146 |
+
"language": "python",
|
147 |
+
"name": "referformer"
|
148 |
+
},
|
149 |
+
"language_info": {
|
150 |
+
"codemirror_mode": {
|
151 |
+
"name": "ipython",
|
152 |
+
"version": 3
|
153 |
+
},
|
154 |
+
"file_extension": ".py",
|
155 |
+
"mimetype": "text/x-python",
|
156 |
+
"name": "python",
|
157 |
+
"nbconvert_exporter": "python",
|
158 |
+
"pygments_lexer": "ipython3",
|
159 |
+
"version": "3.10.16"
|
160 |
+
}
|
161 |
+
},
|
162 |
+
"nbformat": 4,
|
163 |
+
"nbformat_minor": 2
|
164 |
+
}
|
mbench/gpt_ref-ytvos-revised.py
ADDED
@@ -0,0 +1,428 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
from os import path as osp
|
3 |
+
sys.path.append(osp.abspath(osp.join(osp.dirname(__file__), '..')))
|
4 |
+
|
5 |
+
from mbench.ytvos_ref import build as build_ytvos_ref
|
6 |
+
import argparse
|
7 |
+
import opts
|
8 |
+
|
9 |
+
import sys
|
10 |
+
from pathlib import Path
|
11 |
+
import os
|
12 |
+
from os import path as osp
|
13 |
+
import skimage
|
14 |
+
from io import BytesIO
|
15 |
+
|
16 |
+
import numpy as np
|
17 |
+
import pandas as pd
|
18 |
+
import regex as re
|
19 |
+
import json
|
20 |
+
|
21 |
+
import cv2
|
22 |
+
from PIL import Image, ImageDraw
|
23 |
+
import torch
|
24 |
+
from torchvision.transforms import functional as F
|
25 |
+
|
26 |
+
from skimage import measure # (pip install scikit-image)
|
27 |
+
from shapely.geometry import Polygon, MultiPolygon # (pip install Shapely)
|
28 |
+
|
29 |
+
import matplotlib.pyplot as plt
|
30 |
+
import matplotlib.patches as patches
|
31 |
+
from matplotlib.collections import PatchCollection
|
32 |
+
from matplotlib.patches import Rectangle
|
33 |
+
|
34 |
+
|
35 |
+
import ipywidgets as widgets
|
36 |
+
from IPython.display import display, clear_output
|
37 |
+
|
38 |
+
from openai import OpenAI
|
39 |
+
import base64
|
40 |
+
|
41 |
+
# Function to encode the image
|
42 |
+
def encode_image(image_path):
|
43 |
+
with open(image_path, "rb") as image_file:
|
44 |
+
return base64.b64encode(image_file.read()).decode("utf-8")
|
45 |
+
|
46 |
+
# Captioner
|
47 |
+
ytvos_category_valid_list = [
|
48 |
+
'airplane', 'ape', 'bear', 'bike', 'bird', 'boat', 'bus', 'camel', 'cat', 'cow', 'crocodile',
|
49 |
+
'deer', 'dog', 'dolphin', 'duck', 'eagle', 'earless_seal', 'elephant', 'fish', 'fox', 'frog',
|
50 |
+
'giant_panda', 'giraffe', 'hedgehog', 'horse', 'leopard', 'lion', 'lizard',
|
51 |
+
'monkey', 'motorbike', 'mouse', 'owl', 'parrot', 'penguin', 'person',
|
52 |
+
'rabbit', 'raccoon', 'sedan', 'shark', 'sheep', 'snail', 'snake',
|
53 |
+
'squirrel', 'tiger', 'train', 'truck', 'turtle', 'whale', 'zebra'
|
54 |
+
]
|
55 |
+
def getCaption(video_id, json_data):
|
56 |
+
#데이터 가져오기
|
57 |
+
video_data = json_data[video_id]
|
58 |
+
frame_names = video_data['frame_names']
|
59 |
+
video_path = video_data['video_path']
|
60 |
+
|
61 |
+
cat_names = set()
|
62 |
+
all_captions = dict()
|
63 |
+
for obj_id in list(video_data['annotations'][0].keys()):
|
64 |
+
cat_names.add(video_data['annotations'][0][obj_id]['category_name'])
|
65 |
+
|
66 |
+
# cat_names : person, snowboard
|
67 |
+
# 1. gpt에서 직접 action의 대상이 될 수 있는가 물어보기
|
68 |
+
# 2. ref-youtube-vos 에서 제공하는 카테고리 정보에서 우리가 처리하고 싶은 카테고리 이름만 남긴다
|
69 |
+
|
70 |
+
for cat_name in list(cat_names) :
|
71 |
+
image_paths = [os.path.join(video_path, frame_name + '.jpg') for frame_name in frame_names]
|
72 |
+
image_captions = {}
|
73 |
+
|
74 |
+
captioner = OpenAI()
|
75 |
+
|
76 |
+
#0단계: action의 대상이 될 수 있는가?
|
77 |
+
is_movable = False
|
78 |
+
if cat_name in ytvos_category_valid_list :
|
79 |
+
is_movable = True
|
80 |
+
|
81 |
+
# response_check = captioner.chat.completions.create(
|
82 |
+
# model="gpt-4o",
|
83 |
+
# messages=[
|
84 |
+
# {
|
85 |
+
# "role": "user",
|
86 |
+
# "content": f"""
|
87 |
+
# Can a {cat_name} be a subject of distinct actions or movements?
|
88 |
+
# For example, if {cat_name} is a person, animal, or vehicle, it is likely an action-capable subject.
|
89 |
+
# However, if it is an inanimate object like a snowboard, tree, or book, it cannot independently perform actions.
|
90 |
+
# Respond with YES if {cat_name} can perform distinct actions or movements; otherwise, respond with NONE.
|
91 |
+
# Answer only YES or NONE.
|
92 |
+
# """
|
93 |
+
# }
|
94 |
+
# ],
|
95 |
+
# )
|
96 |
+
# response_check_content = response_check.choices[0].message.content.strip().lower()
|
97 |
+
# print(f"Movable Check for {cat_name}: {response_check_content}")
|
98 |
+
|
99 |
+
# if response_check_content == "yes": is_movable = True
|
100 |
+
|
101 |
+
if not is_movable:
|
102 |
+
print(f"Skipping {cat_name}: Determined to be non-movable.")
|
103 |
+
continue
|
104 |
+
|
105 |
+
for i in range(len(image_paths)):
|
106 |
+
image_path = image_paths[i]
|
107 |
+
frame_name = frame_names[i]
|
108 |
+
base64_image = encode_image(image_path)
|
109 |
+
|
110 |
+
#1단계: 필터링
|
111 |
+
#print(f"-----------category name: {cat_name}, frame name: {frame_name}")
|
112 |
+
response1 = captioner.chat.completions.create(
|
113 |
+
model="chatgpt-4o-latest",
|
114 |
+
messages=[
|
115 |
+
{
|
116 |
+
"role": "user",
|
117 |
+
"content": [
|
118 |
+
{
|
119 |
+
"type": "text",
|
120 |
+
|
121 |
+
"text": f"""Are there multiple {cat_name}s in the image, each performing distinct and recognizable actions?
|
122 |
+
Focus only on clear and prominent actions, avoiding minor or ambiguous ones.
|
123 |
+
Each action should be unique and clearly associated with a specific object.
|
124 |
+
|
125 |
+
Respond with YES if:
|
126 |
+
- The {cat_name}s are people, animals or vehicles, and their actions are distinct and recognizable.
|
127 |
+
- The {cat_name}s involve clear, distinguishable actions performed independently.
|
128 |
+
|
129 |
+
Respond with NONE if:
|
130 |
+
- The {cat_name}s are objects (e.g., snowboard, tree, books) and do not involve direct interaction with a person.
|
131 |
+
- Actions are ambiguous, minor, or not clearly visible.
|
132 |
+
|
133 |
+
If the {cat_name} is 'snowboard' and it is not actively being used or interacted with by a person, output NONE.
|
134 |
+
If the {cat_name} is 'person' and their actions are distinct and clear, output YES.
|
135 |
+
|
136 |
+
Answer only YES or NONE."""
|
137 |
+
|
138 |
+
},
|
139 |
+
{
|
140 |
+
"type": "image_url",
|
141 |
+
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
|
142 |
+
},
|
143 |
+
],
|
144 |
+
}
|
145 |
+
],
|
146 |
+
)
|
147 |
+
response_content = response1.choices[0].message.content
|
148 |
+
should_caption = True if "yes" in response_content.lower() else False
|
149 |
+
#print(f"are {cat_name}s distinguished by action: {response_content}")
|
150 |
+
|
151 |
+
#2단계: dense caption 만들기
|
152 |
+
if should_caption:
|
153 |
+
response2 = captioner.chat.completions.create(
|
154 |
+
model="chatgpt-4o-latest",
|
155 |
+
messages=[
|
156 |
+
{
|
157 |
+
"role": "user",
|
158 |
+
"content": [
|
159 |
+
{
|
160 |
+
"type": "text",
|
161 |
+
|
162 |
+
"text": f"""
|
163 |
+
Generate a detailed action-centric caption describing the actions of the {cat_name}s in the image.
|
164 |
+
1. Focus only on clear, unique, and prominent actions that distinguish each object.
|
165 |
+
2. Avoid describing actions that are too minor, ambiguous, or not visible from the image.
|
166 |
+
3. Avoid subjective terms such as 'skilled', 'controlled', or 'focused'. Only describe observable actions.
|
167 |
+
4. Do not include common-sense or overly general descriptions like 'the elephant walks'.
|
168 |
+
5. Use dynamic action verbs (e.g., holding, throwing, jumping, inspecting) to describe interactions, poses, or movements.
|
169 |
+
6. Avoid overly detailed or speculative descriptions such as 'slightly moving its mouth' or 'appears to be anticipating'.
|
170 |
+
7. Pretend you are observing the scene directly, avoiding phrases like 'it seems' or 'based on the description'.
|
171 |
+
8. Include interactions with objects or other entities when they are prominent and observable.
|
172 |
+
9. If the image contains multiple {cat_name}s, describe the actions of each individually and ensure the descriptions are non-overlapping and specific.
|
173 |
+
Output only the caption.""",
|
174 |
+
},
|
175 |
+
{
|
176 |
+
"type": "image_url",
|
177 |
+
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
|
178 |
+
},
|
179 |
+
],
|
180 |
+
}
|
181 |
+
],
|
182 |
+
)
|
183 |
+
|
184 |
+
caption = response2.choices[0].message.content
|
185 |
+
#print(f"{image_path} - {frame_name}: {caption}")
|
186 |
+
else:
|
187 |
+
caption = None
|
188 |
+
|
189 |
+
image_captions[frame_name] = caption
|
190 |
+
all_captions[cat_name] = image_captions
|
191 |
+
|
192 |
+
# final : also prepare valid object ids
|
193 |
+
valid_obj_ids = []
|
194 |
+
valid_cat_names = list(all_captions.keys())
|
195 |
+
for obj_id in list(video_data['annotations'][0].keys()):
|
196 |
+
cat = video_data['annotations'][0][obj_id]['category_name']
|
197 |
+
if cat in valid_cat_names : valid_obj_ids.append(obj_id)
|
198 |
+
|
199 |
+
return all_captions, valid_obj_ids
|
200 |
+
|
201 |
+
# Referring expression generator and QA filter
|
202 |
+
def getRefExp(video_id, frame_name, caption, obj_id, json_data):
|
203 |
+
|
204 |
+
# 이미지에 해당 물체 바운딩 박스 그리기
|
205 |
+
video_data = json_data[video_id]
|
206 |
+
frame_names = video_data['frame_names']
|
207 |
+
video_path = video_data['video_path']
|
208 |
+
I = skimage.io.imread(osp.join(video_path, frame_name + '.jpg'))
|
209 |
+
frame_indx = frame_names.index(frame_name)
|
210 |
+
obj_data = video_data['annotations'][frame_indx][obj_id]
|
211 |
+
|
212 |
+
bbox = obj_data['bbox']
|
213 |
+
cat_name = obj_data['category_name']
|
214 |
+
valid = obj_data['valid']
|
215 |
+
|
216 |
+
if valid == 0:
|
217 |
+
print("Object not in this frame!")
|
218 |
+
return {}
|
219 |
+
|
220 |
+
|
221 |
+
x_min, y_min, x_max, y_max = bbox
|
222 |
+
x_min, y_min, x_max, y_max = int(x_min), int(y_min), int(x_max), int(y_max)
|
223 |
+
cv2.rectangle(I, (x_min, y_min), (x_max, y_max), (225, 0, 0), 2)
|
224 |
+
plt.figure()
|
225 |
+
plt.imshow(I)
|
226 |
+
plt.axis('off')
|
227 |
+
plt.show()
|
228 |
+
|
229 |
+
#cropped object for visibility check
|
230 |
+
cropped_I = I[y_min:y_max, x_min:x_max]
|
231 |
+
pil_cropped_I = Image.fromarray(cropped_I)
|
232 |
+
buff_crop = BytesIO()
|
233 |
+
pil_cropped_I.save(buff_crop, format='JPEG')
|
234 |
+
base64_cropped_I = base64.b64encode(buff_crop.getvalue()).decode("utf-8")
|
235 |
+
|
236 |
+
#entire image for referring expression generation
|
237 |
+
pil_I = Image.fromarray(I)
|
238 |
+
buff = BytesIO()
|
239 |
+
pil_I.save(buff, format='JPEG')
|
240 |
+
base64_I = base64.b64encode(buff.getvalue()).decode("utf-8")
|
241 |
+
|
242 |
+
# 구분 가능 여부 확인
|
243 |
+
generator = OpenAI()
|
244 |
+
response_check = generator.chat.completions.create(
|
245 |
+
model="chatgpt-4o-latest",
|
246 |
+
messages=[
|
247 |
+
{
|
248 |
+
"role": "user",
|
249 |
+
"content": [
|
250 |
+
{
|
251 |
+
|
252 |
+
"type": "text",
|
253 |
+
"text": f"""Can the {cat_name} in the provided cropped image be clearly identified as belonging to the category {cat_name}?
|
254 |
+
Focus on whether the cropped image provides enough visible features (e.g., ears, head shape, fur texture) to confirm that it is a {cat_name}, even if the full body is not visible.
|
255 |
+
|
256 |
+
Guidelines:
|
257 |
+
- If the visible features (like ears, fur texture or head shape) are sufficient to identify the {cat_name}, respond with YES.
|
258 |
+
- If multiple {cat_name}s are entangled or overlapping, making it difficult to distinguish one from another, respond with NONE.
|
259 |
+
- If the object is clearly visible and identifiable as a {cat_name}, respond with YES.
|
260 |
+
|
261 |
+
Output only either YES or NONE.
|
262 |
+
"""
|
263 |
+
},
|
264 |
+
{
|
265 |
+
"type": "image_url",
|
266 |
+
"image_url": {"url": f"data:image/jpeg;base64,{base64_cropped_I}"},
|
267 |
+
}
|
268 |
+
]
|
269 |
+
},
|
270 |
+
]
|
271 |
+
)
|
272 |
+
|
273 |
+
response_check_content = response_check.choices[0].message.content.strip().lower()
|
274 |
+
#print(f"is object {obj_id} visible: {response_check_content}")
|
275 |
+
|
276 |
+
if "yes" not in response_check_content:
|
277 |
+
print(f"Referring expression not generated: {cat_name} is ambiguous in this frame.")
|
278 |
+
return {"ref_exp": "NONE", "caption": caption, "cat_name": cat_name, "file_name": frame_name, "isValid" : False}
|
279 |
+
|
280 |
+
# Referring expression 만들기
|
281 |
+
# generator = OpenAI()
|
282 |
+
response = generator.chat.completions.create(
|
283 |
+
model="chatgpt-4o-latest",
|
284 |
+
messages=[
|
285 |
+
{
|
286 |
+
"role": "user",
|
287 |
+
"content": [
|
288 |
+
{
|
289 |
+
"type": "text",
|
290 |
+
|
291 |
+
"text": f"""Based on the dense caption, create a referring expression for the {cat_name} highlighted with the red box, corresponding to Object ID {obj_id}.
|
292 |
+
Guidelines for creating the referring expression:
|
293 |
+
1. The referring expression should describe the prominent actions or poses of the highlighted {cat_name} (Object ID {obj_id}).
|
294 |
+
2. Focus on the behavior or pose described in the caption that is specifically associated with this {cat_name}. Do not include actions or poses of other {cat_name}s.
|
295 |
+
3. If multiple {cat_name}s are present, ensure that the referring expression exclusively describes the {cat_name} corresponding to Object ID {obj_id}.
|
296 |
+
4. Avoid ambiguous or subjective terms. Use specific and clear action verbs to describe the highlighted {cat_name}.
|
297 |
+
5. The referring expression should only describe Object ID {obj_id} and not any other objects or entities.
|
298 |
+
6. Use '{cat_name}' as the noun for the referring expressions.
|
299 |
+
Output only the referring expression for the highlighted {cat_name} (Object ID {obj_id}).
|
300 |
+
|
301 |
+
{caption}
|
302 |
+
"""
|
303 |
+
},
|
304 |
+
{
|
305 |
+
"type": "image_url",
|
306 |
+
"image_url": {"url": f"data:image/jpeg;base64,{base64_I}"},
|
307 |
+
},
|
308 |
+
# {
|
309 |
+
# "type": "image_url",
|
310 |
+
# "image_url": {"url": f"data:image/jpeg;base64,{base64_cropped_I}"},
|
311 |
+
# }
|
312 |
+
],
|
313 |
+
}
|
314 |
+
],
|
315 |
+
)
|
316 |
+
|
317 |
+
ref_exp = response.choices[0].message.content.strip()
|
318 |
+
|
319 |
+
#QA filtering
|
320 |
+
#QA1: 원하는 물체를 설명하는지
|
321 |
+
filter = OpenAI()
|
322 |
+
response1 = filter.chat.completions.create(
|
323 |
+
model="chatgpt-4o-latest",
|
324 |
+
messages=[
|
325 |
+
{
|
326 |
+
"role": "user",
|
327 |
+
"content": [
|
328 |
+
{
|
329 |
+
"type": "text",
|
330 |
+
"text": f"""Does the given expression describe the {cat_name} highlighted with the red box? If so, only return YES and if not, NO.
|
331 |
+
{ref_exp}""",
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"type": "image_url",
|
335 |
+
"image_url": {"url": f"data:image/jpeg;base64,{base64_I}"},
|
336 |
+
},
|
337 |
+
],
|
338 |
+
}
|
339 |
+
],
|
340 |
+
)
|
341 |
+
|
342 |
+
response1_content = response1.choices[0].message.content
|
343 |
+
describesHighlighted = True if "yes" in response1_content.lower() else False
|
344 |
+
|
345 |
+
#QA2: 원하지 않는 물체를 설명하지 않는지
|
346 |
+
response2 = filter.chat.completions.create(
|
347 |
+
model="chatgpt-4o-latest",
|
348 |
+
messages=[
|
349 |
+
{
|
350 |
+
"role": "user",
|
351 |
+
"content": [
|
352 |
+
{
|
353 |
+
"type": "text",
|
354 |
+
"text": f"""Does the given expression describe the person not highlighted with the red box? If so, only return YES and if not, NO.
|
355 |
+
{ref_exp}""",
|
356 |
+
},
|
357 |
+
{
|
358 |
+
"type": "image_url",
|
359 |
+
"image_url": {"url": f"data:image/jpeg;base64,{base64_I}"},
|
360 |
+
},
|
361 |
+
],
|
362 |
+
}
|
363 |
+
],
|
364 |
+
)
|
365 |
+
|
366 |
+
response2_content = response2.choices[0].message.content
|
367 |
+
notDescribesNotHighlighted = False if "yes" in response2_content.lower() else True
|
368 |
+
|
369 |
+
isValid = True if describesHighlighted and notDescribesNotHighlighted else False
|
370 |
+
|
371 |
+
#print(f"describesHighlighted: {describesHighlighted}, notDescribesNotHighlighted: {notDescribesNotHighlighted}")
|
372 |
+
#print(f"ref exp: {ref_exp}")
|
373 |
+
#print("")
|
374 |
+
|
375 |
+
return {"ref_exp": ref_exp, "caption": caption, "cat_name": cat_name, "file_name": frame_name, "isValid" : isValid}
|
376 |
+
|
377 |
+
|
378 |
+
if __name__ == '__main__':
|
379 |
+
with open('mbench/sampled_frame3.json', 'r') as file:
|
380 |
+
data = json.load(file)
|
381 |
+
|
382 |
+
vid_ids = list(data.keys())
|
383 |
+
all_ref_exps = {}
|
384 |
+
|
385 |
+
os.environ['OPENAI_API_KEY'] = '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'
|
386 |
+
|
387 |
+
# 전체 데이터셋의 vid_id에 대해
|
388 |
+
for i in range(50):
|
389 |
+
vid_id = vid_ids[i]
|
390 |
+
|
391 |
+
#====캡션 만들기====
|
392 |
+
# print("=====================captioner========================")
|
393 |
+
captions, valid_obj_ids = getCaption(vid_id, data)
|
394 |
+
cats_in_vid = list(captions.keys())
|
395 |
+
# print()
|
396 |
+
|
397 |
+
#====referring expression 만들고 QA filtering====
|
398 |
+
# print("=====================referring expression generator & QA filter========================")
|
399 |
+
ref_expressions = {}
|
400 |
+
|
401 |
+
# 각 카테고리별로
|
402 |
+
for cat_name in cats_in_vid:
|
403 |
+
if cat_name not in ref_expressions:
|
404 |
+
ref_expressions[cat_name] = {}
|
405 |
+
# 각 비디오 프레임 별로
|
406 |
+
for frame_name in data[vid_id]['frame_names']:
|
407 |
+
# print(f'--------category: {cat_name}, frame_name: {frame_name}')
|
408 |
+
|
409 |
+
if frame_name not in ref_expressions[cat_name]:
|
410 |
+
ref_expressions[cat_name][frame_name] = {} # Create frame-level dictionary
|
411 |
+
caption = captions[cat_name][frame_name]
|
412 |
+
if not caption : continue
|
413 |
+
else :
|
414 |
+
# 각 obj id별로
|
415 |
+
for obj_id in valid_obj_ids:
|
416 |
+
ref_exp = getRefExp(vid_id, frame_name, caption, obj_id, data)
|
417 |
+
ref_expressions[cat_name][frame_name][obj_id] = ref_exp # Store ref_exp
|
418 |
+
|
419 |
+
all_ref_exps[vid_id] = ref_expressions
|
420 |
+
|
421 |
+
|
422 |
+
with open('mbench/result_revised50.json', 'w') as file:
|
423 |
+
json.dump(all_ref_exps, file, indent=4)
|
424 |
+
|
425 |
+
|
426 |
+
|
427 |
+
|
428 |
+
|
mbench/gpt_ref-ytvos.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
mbench/gpt_ref-ytvos.py
ADDED
@@ -0,0 +1,302 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
from os import path as osp
|
3 |
+
sys.path.append(osp.abspath(osp.join(osp.dirname(__file__), '..')))
|
4 |
+
|
5 |
+
from datasets import build_dataset
|
6 |
+
import argparse
|
7 |
+
import opts
|
8 |
+
|
9 |
+
from pathlib import Path
|
10 |
+
import os
|
11 |
+
import skimage
|
12 |
+
from io import BytesIO
|
13 |
+
|
14 |
+
import numpy as np
|
15 |
+
import pandas as pd
|
16 |
+
import regex as re
|
17 |
+
import json
|
18 |
+
|
19 |
+
import cv2
|
20 |
+
from PIL import Image, ImageDraw
|
21 |
+
import torch
|
22 |
+
from torchvision.transforms import functional as F
|
23 |
+
|
24 |
+
from skimage import measure # (pip install scikit-image)
|
25 |
+
from shapely.geometry import Polygon, MultiPolygon # (pip install Shapely)
|
26 |
+
|
27 |
+
import matplotlib.pyplot as plt
|
28 |
+
import matplotlib.patches as patches
|
29 |
+
from matplotlib.collections import PatchCollection
|
30 |
+
from matplotlib.patches import Rectangle
|
31 |
+
|
32 |
+
|
33 |
+
import ipywidgets as widgets
|
34 |
+
from IPython.display import display, clear_output
|
35 |
+
|
36 |
+
from openai import OpenAI
|
37 |
+
import base64
|
38 |
+
|
39 |
+
os.environ['OPENAI_API_KEY'] = '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'
|
40 |
+
|
41 |
+
|
42 |
+
ytvos_category_valid_list = [
|
43 |
+
'airplane', 'ape', 'bear', 'bike', 'bird', 'boat', 'bus', 'camel', 'cat', 'cow', 'crocodile',
|
44 |
+
'deer', 'dog', 'dolphin', 'duck', 'eagle', 'earless_seal', 'elephant', 'fish', 'fox', 'frog',
|
45 |
+
'giant_panda', 'giraffe', 'hedgehog', 'horse', 'leopard', 'lion', 'lizard',
|
46 |
+
'monkey', 'motorbike', 'mouse', 'owl', 'parrot', 'penguin', 'person',
|
47 |
+
'rabbit', 'raccoon', 'sedan', 'shark', 'sheep', 'snail', 'snake',
|
48 |
+
'squirrel', 'tiger', 'train', 'truck', 'turtle', 'whale', 'zebra'
|
49 |
+
]
|
50 |
+
|
51 |
+
# Function to encode the image
|
52 |
+
def encode_image(image_path):
|
53 |
+
with open(image_path, "rb") as image_file:
|
54 |
+
return base64.b64encode(image_file.read()).decode("utf-8")
|
55 |
+
|
56 |
+
def getCaption(video_id, json_data):
|
57 |
+
#데이터 가져오기
|
58 |
+
video_data = json_data[video_id]
|
59 |
+
frame_names = video_data['frame_names']
|
60 |
+
video_path = video_data['video_path']
|
61 |
+
|
62 |
+
cat_names = set()
|
63 |
+
for obj_id in list(video_data['annotations'][0].keys()):
|
64 |
+
cat_names.add(video_data['annotations'][0][obj_id]['category_name'])
|
65 |
+
|
66 |
+
if len(cat_names) == 1:
|
67 |
+
cat_name = next(iter(cat_names))
|
68 |
+
else:
|
69 |
+
print("more than 2 categories")
|
70 |
+
return -1
|
71 |
+
|
72 |
+
image_paths = [os.path.join(video_path, frame_name + '.jpg') for frame_name in frame_names]
|
73 |
+
image_captions = {}
|
74 |
+
|
75 |
+
captioner = OpenAI()
|
76 |
+
for i in range(len(image_paths)):
|
77 |
+
image_path = image_paths[i]
|
78 |
+
frame_name = frame_names[i]
|
79 |
+
base64_image = encode_image(image_path)
|
80 |
+
|
81 |
+
#1단계: 필터링
|
82 |
+
response1 = captioner.chat.completions.create(
|
83 |
+
model="gpt-4o-mini",
|
84 |
+
messages=[
|
85 |
+
{
|
86 |
+
"role": "user",
|
87 |
+
"content": [
|
88 |
+
{
|
89 |
+
"type": "text",
|
90 |
+
"text": f"Are there multiple {cat_name}s that can be distinguished by action? Each action should be prominent and describe the corresponding object only. If so, only output YES. If not, only output None",
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"type": "image_url",
|
94 |
+
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
|
95 |
+
},
|
96 |
+
],
|
97 |
+
}
|
98 |
+
],
|
99 |
+
)
|
100 |
+
response_content = response1.choices[0].message.content
|
101 |
+
should_caption = True if "yes" in response_content.lower() else False
|
102 |
+
|
103 |
+
#2단계: dense caption 만들기
|
104 |
+
if should_caption:
|
105 |
+
response2 = captioner.chat.completions.create(
|
106 |
+
model="gpt-4o-mini",
|
107 |
+
messages=[
|
108 |
+
{
|
109 |
+
"role": "user",
|
110 |
+
"content": [
|
111 |
+
{
|
112 |
+
"type": "text",
|
113 |
+
"text": f"""
|
114 |
+
Describe the image in detail focusing on the {cat_name}s' actions.
|
115 |
+
1. Each action should be prominent, clear and unique, describing the corresponding object only.
|
116 |
+
2. Avoid overly detailed or indeterminate details such as ‘in anticipation’.
|
117 |
+
3. Avoid subjective descriptions such as ‘soft’, ‘controlled’, ‘attentive’, ‘skilled’, ‘casual atmosphere’ and descriptions of the setting.
|
118 |
+
4. Do not include actions that needs to be guessed or suggested.""",
|
119 |
+
},
|
120 |
+
{
|
121 |
+
"type": "image_url",
|
122 |
+
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
|
123 |
+
},
|
124 |
+
],
|
125 |
+
}
|
126 |
+
],
|
127 |
+
)
|
128 |
+
|
129 |
+
caption = response2.choices[0].message.content
|
130 |
+
else:
|
131 |
+
caption = None
|
132 |
+
|
133 |
+
image_captions[frame_name] = caption
|
134 |
+
return image_captions
|
135 |
+
|
136 |
+
def getRefExp(video_id, frame_name, caption, obj_id, json_data):
|
137 |
+
# 이미지에 해당 물체 바운딩 박스 그리기
|
138 |
+
video_data = json_data[video_id]
|
139 |
+
frame_names = video_data['frame_names']
|
140 |
+
video_path = video_data['video_path']
|
141 |
+
I = skimage.io.imread(osp.join(video_path, frame_name + '.jpg'))
|
142 |
+
frame_indx = frame_names.index(frame_name)
|
143 |
+
obj_data = video_data['annotations'][frame_indx][obj_id]
|
144 |
+
|
145 |
+
bbox = obj_data['bbox']
|
146 |
+
cat_name = obj_data['category_name']
|
147 |
+
valid = obj_data['valid']
|
148 |
+
|
149 |
+
if valid == 0:
|
150 |
+
print("Object not in this frame!")
|
151 |
+
return {}
|
152 |
+
|
153 |
+
|
154 |
+
x_min, y_min, x_max, y_max = bbox
|
155 |
+
x_min, y_min, x_max, y_max = int(x_min), int(y_min), int(x_max), int(y_max)
|
156 |
+
cv2.rectangle(I, (x_min, y_min), (x_max, y_max), (225, 0, 0), 2)
|
157 |
+
# plt.figure()
|
158 |
+
# plt.imshow(I)
|
159 |
+
# plt.axis('off')
|
160 |
+
# plt.show()
|
161 |
+
pil_I = Image.fromarray(I)
|
162 |
+
buff = BytesIO()
|
163 |
+
pil_I.save(buff, format='JPEG')
|
164 |
+
base64_I = base64.b64encode(buff.getvalue()).decode("utf-8")
|
165 |
+
|
166 |
+
#ref expression 만들기
|
167 |
+
generator = OpenAI()
|
168 |
+
response = generator.chat.completions.create(
|
169 |
+
model="gpt-4o-mini",
|
170 |
+
messages=[
|
171 |
+
{
|
172 |
+
"role": "user",
|
173 |
+
"content": [
|
174 |
+
{
|
175 |
+
"type": "text",
|
176 |
+
"text": f"""Based on the dense caption, create a referring expression for the {cat_name} highlighted with the red box.
|
177 |
+
1. The referring expression describes the action and does not contain information about appearance or location in the picture.
|
178 |
+
2. Focus only on prominent actions and avoid overly detailed or indeterminate details.
|
179 |
+
3. Avoid subjective terms describing emotion such as ‘in anticipation’, ‘attentively’ or ‘relaxed’ and professional, difficult words.
|
180 |
+
4. The referring expression should only describe the highlighted {cat_name} and not any other.
|
181 |
+
5. Use '{cat_name}' as the noun for the referring expressions.
|
182 |
+
Output only the referring expression.
|
183 |
+
{caption}""",
|
184 |
+
},
|
185 |
+
{
|
186 |
+
"type": "image_url",
|
187 |
+
"image_url": {"url": f"data:image/jpeg;base64,{base64_I}"},
|
188 |
+
},
|
189 |
+
],
|
190 |
+
}
|
191 |
+
],
|
192 |
+
)
|
193 |
+
|
194 |
+
ref_exp = response.choices[0].message.content
|
195 |
+
|
196 |
+
#QA filtering
|
197 |
+
#QA1: 원하는 물체를 설명하는지
|
198 |
+
filter = OpenAI()
|
199 |
+
response1 = filter.chat.completions.create(
|
200 |
+
model="gpt-4o-mini",
|
201 |
+
messages=[
|
202 |
+
{
|
203 |
+
"role": "user",
|
204 |
+
"content": [
|
205 |
+
{
|
206 |
+
"type": "text",
|
207 |
+
"text": f"""Does the given expression describe the {cat_name} highlighted with the red box? If so, only return YES and if not, NO.
|
208 |
+
{ref_exp}""",
|
209 |
+
},
|
210 |
+
{
|
211 |
+
"type": "image_url",
|
212 |
+
"image_url": {"url": f"data:image/jpeg;base64,{base64_I}"},
|
213 |
+
},
|
214 |
+
],
|
215 |
+
}
|
216 |
+
],
|
217 |
+
)
|
218 |
+
|
219 |
+
response1_content = response1.choices[0].message.content
|
220 |
+
describesHighlighted = True if "yes" in response1_content.lower() else False
|
221 |
+
|
222 |
+
#QA2: 원하지 않는 물체를 설명하지 않는지
|
223 |
+
response2 = filter.chat.completions.create(
|
224 |
+
model="gpt-4o-mini",
|
225 |
+
messages=[
|
226 |
+
{
|
227 |
+
"role": "user",
|
228 |
+
"content": [
|
229 |
+
{
|
230 |
+
"type": "text",
|
231 |
+
"text": f"""Does the given expression describe the person not highlighted with the red box? If so, only return YES and if not, NO.
|
232 |
+
{ref_exp}""",
|
233 |
+
},
|
234 |
+
{
|
235 |
+
"type": "image_url",
|
236 |
+
"image_url": {"url": f"data:image/jpeg;base64,{base64_I}"},
|
237 |
+
},
|
238 |
+
],
|
239 |
+
}
|
240 |
+
],
|
241 |
+
)
|
242 |
+
|
243 |
+
response2_content = response2.choices[0].message.content
|
244 |
+
describesNotHighlighted = True if "yes" in response2_content.lower() else False
|
245 |
+
|
246 |
+
isValid = True if describesHighlighted and not describesNotHighlighted else False
|
247 |
+
|
248 |
+
print(f"describesHighlighted: {describesHighlighted}, describesNotHighlighted: {describesNotHighlighted}")
|
249 |
+
|
250 |
+
return {"ref_exp": ref_exp, "caption": caption, "cat_name": cat_name, "file_name": frame_name, "isValid" : isValid}
|
251 |
+
|
252 |
+
def createRefExp(video_id, json_data):
|
253 |
+
video_data = json_data[video_id]
|
254 |
+
obj_ids = list(video_data['annotations'][0].keys())
|
255 |
+
frame_names = video_data['frame_names']
|
256 |
+
|
257 |
+
captions_per_frame = getCaption(video_id, json_data)
|
258 |
+
|
259 |
+
if captions_per_frame == -1:
|
260 |
+
print("There are more than 2 cateories")
|
261 |
+
return None
|
262 |
+
|
263 |
+
|
264 |
+
video_ref_exps = {}
|
265 |
+
|
266 |
+
for frame_name in frame_names:
|
267 |
+
frame_caption = captions_per_frame[frame_name]
|
268 |
+
|
269 |
+
if frame_caption == None:
|
270 |
+
video_ref_exps[frame_name] = None
|
271 |
+
|
272 |
+
else:
|
273 |
+
frame_ref_exps = {}
|
274 |
+
for obj_id in obj_ids:
|
275 |
+
exp_per_obj = getRefExp(video_id, frame_name, frame_caption, obj_id, json_data)
|
276 |
+
frame_ref_exps[obj_id] = exp_per_obj
|
277 |
+
video_ref_exps[frame_name] = frame_ref_exps
|
278 |
+
|
279 |
+
return video_ref_exps
|
280 |
+
|
281 |
+
if __name__ == '__main__':
|
282 |
+
with open('mbench/sampled_frame3.json', 'r') as file:
|
283 |
+
data = json.load(file)
|
284 |
+
|
285 |
+
videos = set()
|
286 |
+
with open('make_ref-ytvos/selected_frames.jsonl', 'r') as file:
|
287 |
+
manual_select = list(file)
|
288 |
+
for frame in manual_select:
|
289 |
+
result = json.loads(frame)
|
290 |
+
videos.add(result['video'])
|
291 |
+
videos = list(videos)
|
292 |
+
|
293 |
+
|
294 |
+
all_video_refs = {}
|
295 |
+
for i in range(10):
|
296 |
+
video_id = videos[i]
|
297 |
+
video_ref = createRefExp(video_id, data)
|
298 |
+
all_video_refs[video_id] = video_ref
|
299 |
+
|
300 |
+
json_obj = json.dumps(all_video_refs, indent=4)
|
301 |
+
with open('mbench/result.json', 'w') as file:
|
302 |
+
file.write(json_obj)
|
mbench/gpt_ref-ytvos_numbered_cy.py
ADDED
@@ -0,0 +1,460 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
4 |
+
import time
|
5 |
+
|
6 |
+
from os import path as osp
|
7 |
+
from io import BytesIO
|
8 |
+
|
9 |
+
from mbench.ytvos_ref import build as build_ytvos_ref
|
10 |
+
import argparse
|
11 |
+
import opts
|
12 |
+
|
13 |
+
import sys
|
14 |
+
from pathlib import Path
|
15 |
+
import os
|
16 |
+
from os import path as osp
|
17 |
+
import skimage
|
18 |
+
from io import BytesIO
|
19 |
+
|
20 |
+
import numpy as np
|
21 |
+
import pandas as pd
|
22 |
+
import regex as re
|
23 |
+
import json
|
24 |
+
|
25 |
+
import cv2
|
26 |
+
from PIL import Image, ImageDraw
|
27 |
+
import torch
|
28 |
+
from torchvision.transforms import functional as F
|
29 |
+
|
30 |
+
from skimage import measure # (pip install scikit-image)
|
31 |
+
from shapely.geometry import Polygon, MultiPolygon # (pip install Shapely)
|
32 |
+
|
33 |
+
import matplotlib.pyplot as plt
|
34 |
+
import matplotlib.patches as patches
|
35 |
+
from matplotlib.collections import PatchCollection
|
36 |
+
from matplotlib.patches import Rectangle
|
37 |
+
import textwrap
|
38 |
+
|
39 |
+
|
40 |
+
import ipywidgets as widgets
|
41 |
+
from IPython.display import display, clear_output
|
42 |
+
|
43 |
+
from openai import OpenAI
|
44 |
+
import base64
|
45 |
+
import json
|
46 |
+
|
47 |
+
def number_objects_and_encode(idx, color_mask=False):
|
48 |
+
encoded_frames = {}
|
49 |
+
contoured_frames = {} # New dictionary for original images
|
50 |
+
vid_cat_cnts = {}
|
51 |
+
|
52 |
+
vid_meta = metas[idx]
|
53 |
+
vid_data = train_dataset[idx]
|
54 |
+
vid_id = vid_meta['video']
|
55 |
+
frame_indx = vid_meta['sample_indx']
|
56 |
+
cat_names = set(vid_meta['obj_id_cat'].values())
|
57 |
+
imgs = vid_data[0]
|
58 |
+
|
59 |
+
for cat in cat_names:
|
60 |
+
cat_frames = []
|
61 |
+
contour_frames = []
|
62 |
+
frame_cat_cnts = {}
|
63 |
+
|
64 |
+
for i in range(imgs.size(0)):
|
65 |
+
frame_name = frame_indx[i]
|
66 |
+
frame = np.copy(imgs[i].permute(1, 2, 0).numpy())
|
67 |
+
frame_for_contour = np.copy(imgs[i].permute(1, 2, 0).numpy())
|
68 |
+
|
69 |
+
frame_data = vid_data[2][frame_name]
|
70 |
+
obj_ids = list(frame_data.keys())
|
71 |
+
|
72 |
+
cat_cnt = 0
|
73 |
+
|
74 |
+
for j in range(len(obj_ids)):
|
75 |
+
obj_id = obj_ids[j]
|
76 |
+
obj_data = frame_data[obj_id]
|
77 |
+
obj_bbox = obj_data['bbox']
|
78 |
+
obj_valid = obj_data['valid']
|
79 |
+
obj_mask = obj_data['mask'].numpy().astype(np.uint8)
|
80 |
+
obj_cat = obj_data['category_name']
|
81 |
+
|
82 |
+
if obj_cat == cat and obj_valid:
|
83 |
+
cat_cnt += 1
|
84 |
+
|
85 |
+
if color_mask == False:
|
86 |
+
contours, _ = cv2.findContours(obj_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
87 |
+
cv2.drawContours(frame, contours, -1, colors[j], 3)
|
88 |
+
for i, contour in enumerate(contours):
|
89 |
+
# 윤곽선 중심 계산
|
90 |
+
moments = cv2.moments(contour)
|
91 |
+
if moments["m00"] != 0: # 중심 계산 가능 여부 확인
|
92 |
+
cx = int(moments["m10"] / moments["m00"])
|
93 |
+
cy = int(moments["m01"] / moments["m00"])
|
94 |
+
else:
|
95 |
+
cx, cy = contour[0][0] # 중심 계산 불가시 대체 좌표 사용
|
96 |
+
|
97 |
+
# 텍스트 배경 (검은색 배경 만들기)
|
98 |
+
font = cv2.FONT_HERSHEY_SIMPLEX
|
99 |
+
text = obj_id
|
100 |
+
text_size = cv2.getTextSize(text, font, 1, 2)[0]
|
101 |
+
text_w, text_h = text_size
|
102 |
+
|
103 |
+
# 텍스트 배경 그리기 (검은색 배경)
|
104 |
+
cv2.rectangle(frame, (cx - text_w // 2 - 5, cy - text_h // 2 - 5),
|
105 |
+
(cx + text_w // 2 + 5, cy + text_h // 2 + 5), (0, 0, 0), -1)
|
106 |
+
|
107 |
+
# 텍스트 그리기 (흰색 텍스트)
|
108 |
+
cv2.putText(frame, text, (cx - text_w // 2, cy + text_h // 2),
|
109 |
+
font, 1, (255, 255, 255), 2)
|
110 |
+
|
111 |
+
else:
|
112 |
+
alpha = 0.08
|
113 |
+
|
114 |
+
colored_obj_mask = np.zeros_like(frame)
|
115 |
+
colored_obj_mask[obj_mask == 1] = colors[j]
|
116 |
+
frame[obj_mask == 1] = (
|
117 |
+
(1 - alpha) * frame[obj_mask == 1]
|
118 |
+
+ alpha * colored_obj_mask[obj_mask == 1]
|
119 |
+
)
|
120 |
+
|
121 |
+
|
122 |
+
contours, _ = cv2.findContours(obj_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
123 |
+
cv2.drawContours(frame, contours, -1, colors[j], 2)
|
124 |
+
cv2.drawContours(frame_for_contour, contours, -1, colors[j], 2)
|
125 |
+
|
126 |
+
|
127 |
+
|
128 |
+
if len(contours) > 0:
|
129 |
+
largest_contour = max(contours, key=cv2.contourArea)
|
130 |
+
M = cv2.moments(largest_contour)
|
131 |
+
if M["m00"] != 0:
|
132 |
+
center_x = int(M["m10"] / M["m00"])
|
133 |
+
center_y = int(M["m01"] / M["m00"])
|
134 |
+
else:
|
135 |
+
center_x, center_y = 0, 0
|
136 |
+
|
137 |
+
font = cv2.FONT_HERSHEY_SIMPLEX
|
138 |
+
text = obj_id
|
139 |
+
|
140 |
+
font_scale = 0.9
|
141 |
+
text_size = cv2.getTextSize(text, font, font_scale, 2)[0]
|
142 |
+
text_x = center_x - text_size[0] // 1 # 텍스트의 가로 중심
|
143 |
+
text_y = center_y
|
144 |
+
# text_y = center_y + text_size[1] // 2 # 텍스트의 세로 중심
|
145 |
+
|
146 |
+
# 텍스트 배경 사각형 좌표 계산
|
147 |
+
rect_start = (text_x - 5, text_y - text_size[1] - 5) # 배경 사각형 좌상단
|
148 |
+
# rect_end = (text_x + text_size[0] + 5, text_y + 5)
|
149 |
+
rect_end = (text_x + text_size[0] + 5, text_y)
|
150 |
+
|
151 |
+
cv2.rectangle(frame, rect_start, rect_end, (0, 0, 0), -1)
|
152 |
+
cv2.putText(frame, text, (text_x, text_y), font, 1, (255, 255, 255), 2)
|
153 |
+
|
154 |
+
# plt.figure(figsize=(12, 8))
|
155 |
+
# plt.imshow(frame)
|
156 |
+
# plt.title(f"frame {frame_name}")
|
157 |
+
# plt.tight_layout()
|
158 |
+
# plt.axis('off')
|
159 |
+
# plt.show()
|
160 |
+
|
161 |
+
buffer = BytesIO()
|
162 |
+
frame = Image.fromarray(frame)
|
163 |
+
frame.save(buffer, format='jpeg')
|
164 |
+
buffer.seek(0)
|
165 |
+
cat_frames.append(base64.b64encode(buffer.read()).decode("utf-8"))
|
166 |
+
frame_cat_cnts[frame_name] = cat_cnt
|
167 |
+
|
168 |
+
buffer.seek(0) # Reuse buffer instead of creating a new one
|
169 |
+
buffer.truncate()
|
170 |
+
frame_for_contour = Image.fromarray(frame_for_contour)
|
171 |
+
frame_for_contour.save(buffer, format='jpeg')
|
172 |
+
buffer.seek(0)
|
173 |
+
contour_frames.append(base64.b64encode(buffer.read()).decode("utf-8"))
|
174 |
+
|
175 |
+
encoded_frames[cat] = cat_frames
|
176 |
+
contoured_frames[cat] = contour_frames
|
177 |
+
vid_cat_cnts[cat] = frame_cat_cnts
|
178 |
+
|
179 |
+
return encoded_frames, vid_cat_cnts, contoured_frames
|
180 |
+
|
181 |
+
|
182 |
+
def getCaption(idx, model='gpt-4o', color_mask=True):
|
183 |
+
vid_meta = metas[idx]
|
184 |
+
vid_data = train_dataset[idx]
|
185 |
+
vid_id = vid_meta['video']
|
186 |
+
print(f"vid id: {vid_id}\n")
|
187 |
+
|
188 |
+
frame_indx = vid_meta['sample_indx'] # e.g. [4, 7, 9, 16]
|
189 |
+
cat_names = set(vid_meta['obj_id_cat'].values()) # e.g. {"person", "elephant", ...}
|
190 |
+
all_captions = dict()
|
191 |
+
|
192 |
+
base64_frames, vid_cat_cnts, contoured_frames = number_objects_and_encode(idx, color_mask)
|
193 |
+
#marked = "mask with boundary" if color_mask else "boundary"
|
194 |
+
|
195 |
+
for cat_name in list(cat_names) :
|
196 |
+
|
197 |
+
is_movable = False
|
198 |
+
if cat_name in ytvos_category_valid_list :
|
199 |
+
is_movable = True
|
200 |
+
|
201 |
+
if not is_movable:
|
202 |
+
print(f"Skipping {cat_name}: Determined to be non-movable.", end='\n\n')
|
203 |
+
|
204 |
+
|
205 |
+
image_captions = {}
|
206 |
+
captioner = OpenAI()
|
207 |
+
cat_base64_frames = base64_frames[cat_name]
|
208 |
+
cont_base64_frames = contoured_frames[cat_name]
|
209 |
+
|
210 |
+
for i in range(len(cat_base64_frames)):
|
211 |
+
frame_name = frame_indx[i]
|
212 |
+
cont_base64_image = cont_base64_frames[i]
|
213 |
+
base64_image = cat_base64_frames[i]
|
214 |
+
should_filter = False
|
215 |
+
frame_cat_cnts = vid_cat_cnts[cat_name][frame_name]
|
216 |
+
|
217 |
+
if frame_cat_cnts >= 2:
|
218 |
+
should_filter = True
|
219 |
+
else:
|
220 |
+
print(f"Skipping {cat_name}: There is single or no object.", end='\n\n')
|
221 |
+
|
222 |
+
if is_movable and should_filter:
|
223 |
+
#1단계: 필터링
|
224 |
+
print(f"-----------category name: {cat_name}, frame name: {frame_name}")
|
225 |
+
caption_filter_text = f"""
|
226 |
+
You are a visual assistant analyzing a single frame from a video.
|
227 |
+
In this frame, I have labeled {frame_cat_cnts} {cat_name}(s), each with a bright numeric ID at its center and a visible marker.
|
228 |
+
|
229 |
+
Are {cat_name}s in the image performing all different and recognizable actions or postures?
|
230 |
+
Consider differences in body pose (standing, sitting, holding hands up, grabbing object, facing the camera, stretching, walking...), motion cues (inferred from the momentary stance or position),
|
231 |
+
facial expressions, and any notable interactions with objects or other {cat_name}s or people.
|
232 |
+
|
233 |
+
Only focus on obvious, prominent actions that can be reliably identified from this single frame.
|
234 |
+
|
235 |
+
- Respond with "YES" if:
|
236 |
+
1) Most of {cat_name}s exhibit clearly different, unique actions or poses.
|
237 |
+
(e.g. standing, sitting, bending, stretching, showing its back, or turning toward the camera.)
|
238 |
+
2) You can see visible significant differences in action and posture, that an observer can identify at a glance.
|
239 |
+
3) Interaction Variability: Each {cat_name} is engaged in a different type of action, such as one grasping an object while another is observing.
|
240 |
+
|
241 |
+
- Respond with "NONE" if:
|
242 |
+
1) The actions or pose are not clearly differentiable or too similar.
|
243 |
+
2) Minimal or Ambiguous Motion: The frame does not provide clear evidence of distinct movement beyond subtle shifts in stance.
|
244 |
+
3) Passive or Neutral Poses: If multiple {cat_name}(s) are simply standing or sitting without an obvious difference in orientation or motion
|
245 |
+
|
246 |
+
Answer strictly with either "YES" or "NONE".
|
247 |
+
"""
|
248 |
+
|
249 |
+
response1 = captioner.chat.completions.create(
|
250 |
+
model=model,
|
251 |
+
messages=[
|
252 |
+
{
|
253 |
+
"role": "user",
|
254 |
+
"content": [
|
255 |
+
{
|
256 |
+
"type": "text",
|
257 |
+
"text": caption_filter_text,
|
258 |
+
},
|
259 |
+
{
|
260 |
+
"type": "image_url",
|
261 |
+
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
|
262 |
+
}
|
263 |
+
],
|
264 |
+
}
|
265 |
+
],
|
266 |
+
)
|
267 |
+
response_content = response1.choices[0].message.content
|
268 |
+
should_caption = True if "yes" in response_content.lower() else False
|
269 |
+
print(f"are {cat_name}s distinguished by action: {response_content}", end='\n\n')
|
270 |
+
|
271 |
+
else:
|
272 |
+
should_caption = False
|
273 |
+
|
274 |
+
#2단계: dense caption 만들기
|
275 |
+
dense_caption_prompt_1 = f"""You are a visual assistant that can analyze a single frame of a video and create referring expressions for each object.
|
276 |
+
In the given frame, I labeled {frame_cat_cnts} {cat_name}s by marking each with a bright numeric ID at the center and its boundary.
|
277 |
+
I want to use your expressions to create a action-centric referring expression dataset.
|
278 |
+
Therefore, your expressions for these {cat_name}s should describe unique action of each object.
|
279 |
+
|
280 |
+
1. Focus only on clear, unique, and prominent actions that distinguish each object.
|
281 |
+
2. Avoid describing actions that are too minor, ambiguous, or not visible from the image.
|
282 |
+
3. Avoid subjective terms such as 'skilled', 'controlled', or 'focused'. Only describe observable actions.
|
283 |
+
4. Do not include common-sense or overly general descriptions like 'the elephant walks'.
|
284 |
+
5. Use dynamic action verbs (e.g., holding, throwing, jumping, inspecting) to describe interactions, poses, or movements.
|
285 |
+
6. Avoid overly detailed or speculative descriptions such as 'slightly moving its mouth' or 'appears to be anticipating'.
|
286 |
+
7. Pretend you are observing the scene directly, avoiding phrases like 'it seems' or 'based on the description'.
|
287 |
+
8. Include interactions with objects or other entities when they are prominent and observable.
|
288 |
+
9. If the image contains multiple {cat_name}s, describe the actions of each individually and ensure the descriptions are non-overlapping and specific.
|
289 |
+
10. Do not include descriptions of appearance such as clothes, color, size, shape etc.
|
290 |
+
11. Do not include relative position between objects such as 'the left elephant' because left/right can be ambiguous.
|
291 |
+
12. Do not mention object IDs.
|
292 |
+
13. Use '{cat_name}' as the noun for the referring expressions.
|
293 |
+
|
294 |
+
Keep in mind that you should not group the objects, e.g., 2-5. people: xxx, be sure to describe each object separately (one by one).
|
295 |
+
Output referring expressions for each object id.
|
296 |
+
"""
|
297 |
+
|
298 |
+
dense_caption_prompt = f"""
|
299 |
+
You are a visual assistant analyzing a single frame of a video.
|
300 |
+
In the given frame, I labeled {frame_cat_cnts} {cat_name}s by marking each with a bright numeric ID at the center and its boundary.
|
301 |
+
|
302 |
+
I want to use your expressions to create an **action-centric referring expression** dataset.
|
303 |
+
Please describe each {cat_name} using **clearly observable** and **specific** actions.
|
304 |
+
|
305 |
+
---
|
306 |
+
## Guidelines:
|
307 |
+
1. **Focus on visible, prominent actions** only (e.g., running, pushing, grasping an object).
|
308 |
+
2. **Avoid describing minor or ambiguous actions** (e.g., "slightly moving a paw", "slightly tilting head").
|
309 |
+
3. **Do not include subjective or speculative descriptions** (e.g., “it seems excited” or “it might be preparing to jump”).
|
310 |
+
4. **Avoid vague expressions** like "interacting with something" or "engaging with another object." Instead, specify the action (e.g., "grabbing a stick," "pressing a button").
|
311 |
+
5. **Use dynamic action verbs** (holding, throwing, inspecting, leaning, pressing) to highlight body movement or object/animal interaction.
|
312 |
+
6. If multiple {cat_name}s appear, ensure each description **differentiates** their actions.
|
313 |
+
7. Base your description on these action definitions:
|
314 |
+
- Avoid using term 'minimal' or 'slightly'.
|
315 |
+
- General body movement, body position, or pattern which is prominent. (e.g. "lifting head up", "facing towards", "showing its back")
|
316 |
+
- details such as motion and intention, facial with object manipulation
|
317 |
+
- movements with objects or other entities when they are prominent and observable. expression should be specific.
|
318 |
+
(e.g., "pushing another person" (O), "engaging with someone" (X) "interacting with another person" (X))
|
319 |
+
---
|
320 |
+
|
321 |
+
## Output Format:
|
322 |
+
- For each labeled {cat_name}, output **exactly one line**. Your answer should contain details and follow the following format :
|
323 |
+
object id. using {cat_name} as subject noun, action-oriented description
|
324 |
+
(e.g. 1. the person is holding ski poles and skiing on a snow mountain, with his two legs bent forward.)
|
325 |
+
- **Only include the currently labeled category** in each line (e.g., if it’s a person, do not suddenly label it as other object/animal).
|
326 |
+
|
327 |
+
### Example
|
328 |
+
If the frame has 2 labeled bears, your output should look like:
|
329 |
+
1. the bear reaching his right arm while leaning forward to capture the prey
|
330 |
+
2. a bear standing upright facing right, touching the bike aside
|
331 |
+
|
332 |
+
---
|
333 |
+
**Do not include** appearance details (e.g., color, size, texture) or relative positioning (e.g., “on the left/right”).
|
334 |
+
**Do not include object IDs** or reference them (e.g., "Person 1" or "object 2" is not allowed).
|
335 |
+
**Do not include markdown** in the output.
|
336 |
+
Keep in mind that you should not group the objects, e.g., 2-5. people: xxx, be sure to describe each object separately (one by one).
|
337 |
+
For each labeled {cat_name}, output referring expressions for each object id.
|
338 |
+
"""
|
339 |
+
MAX_RETRIES = 2
|
340 |
+
retry_count = 0
|
341 |
+
|
342 |
+
if should_caption:
|
343 |
+
while retry_count < MAX_RETRIES:
|
344 |
+
|
345 |
+
response2 = captioner.chat.completions.create(
|
346 |
+
model=model,
|
347 |
+
messages=[
|
348 |
+
{
|
349 |
+
"role": "user",
|
350 |
+
"content": [
|
351 |
+
{
|
352 |
+
"type": "text",
|
353 |
+
"text": dense_caption_prompt,
|
354 |
+
},
|
355 |
+
{
|
356 |
+
"type": "image_url",
|
357 |
+
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
|
358 |
+
},
|
359 |
+
],
|
360 |
+
}
|
361 |
+
],
|
362 |
+
)
|
363 |
+
|
364 |
+
# caption = response2.choices[0].message.content
|
365 |
+
#print(f"{image_path} - {frame_name}: {caption}")
|
366 |
+
|
367 |
+
caption = response2.choices[0].message.content.strip()
|
368 |
+
caption_lower = caption.lower().lstrip()
|
369 |
+
|
370 |
+
if caption_lower.startswith("1.") and not any(
|
371 |
+
phrase in caption_lower for phrase in ["i'm sorry", "please", "can't help"]
|
372 |
+
):
|
373 |
+
break
|
374 |
+
|
375 |
+
print(f"Retrying caption generation... ({retry_count + 1}/{MAX_RETRIES})")
|
376 |
+
retry_count += 1
|
377 |
+
time.sleep(2)
|
378 |
+
|
379 |
+
if retry_count == MAX_RETRIES:
|
380 |
+
caption = None
|
381 |
+
print("Max retries reached. Caption generation failed.")
|
382 |
+
|
383 |
+
else:
|
384 |
+
caption = None
|
385 |
+
|
386 |
+
image_captions[frame_name] = caption
|
387 |
+
all_captions[cat_name] = image_captions
|
388 |
+
|
389 |
+
# final : also prepare valid object ids
|
390 |
+
valid_obj_ids = dict()
|
391 |
+
|
392 |
+
for cat in cat_names:
|
393 |
+
if cat in ytvos_category_valid_list:
|
394 |
+
obj_id_cat = vid_meta['obj_id_cat']
|
395 |
+
valid_cat_ids = []
|
396 |
+
for obj_id in list(obj_id_cat.keys()):
|
397 |
+
if obj_id_cat[obj_id] == cat:
|
398 |
+
valid_cat_ids.append(obj_id)
|
399 |
+
valid_obj_ids[cat] = valid_cat_ids
|
400 |
+
|
401 |
+
return vid_id, all_captions, valid_obj_ids
|
402 |
+
|
403 |
+
|
404 |
+
|
405 |
+
if __name__ == '__main__':
|
406 |
+
parser = argparse.ArgumentParser('ReferFormer training and evaluation script', parents=[opts.get_args_parser()])
|
407 |
+
parser.add_argument('--save_caption_path', type=str, default="mbench/numbered_captions.json")
|
408 |
+
parser.add_argument('--save_valid_obj_ids_path', type=str, default="mbench/numbered_valid_obj_ids.json")
|
409 |
+
|
410 |
+
args = parser.parse_args()
|
411 |
+
|
412 |
+
#==================데이터 불러오기===================
|
413 |
+
# 전체 데이터셋
|
414 |
+
train_dataset = build_ytvos_ref(image_set = 'train', args = args)
|
415 |
+
|
416 |
+
# 전체 데이터셋 메타데이터
|
417 |
+
metas = train_dataset.metas
|
418 |
+
|
419 |
+
# 색상 후보 8개 (RGB 형식)
|
420 |
+
colors = [
|
421 |
+
(255, 0, 0), # Red
|
422 |
+
(0, 255, 0), # Green
|
423 |
+
(0, 0, 255), # Blue
|
424 |
+
(255, 255, 0), # Yellow
|
425 |
+
(255, 0, 255), # Magenta
|
426 |
+
(0, 255, 255), # Cyan
|
427 |
+
(128, 0, 128), # Purple
|
428 |
+
(255, 165, 0) # Orange
|
429 |
+
]
|
430 |
+
|
431 |
+
ytvos_category_valid_list = [
|
432 |
+
'airplane', 'ape', 'bear', 'bird', 'boat', 'bus', 'camel', 'cat', 'cow', 'crocodile',
|
433 |
+
'deer', 'dog', 'dolphin', 'duck', 'eagle', 'earless_seal', 'elephant', 'fish', 'fox', 'frog',
|
434 |
+
'giant_panda', 'giraffe', 'hedgehog', 'horse', 'leopard', 'lion', 'lizard',
|
435 |
+
'monkey', 'motorbike', 'mouse', 'owl', 'parrot', 'penguin', 'person',
|
436 |
+
'rabbit', 'raccoon', 'sedan', 'shark', 'sheep', 'snail', 'snake',
|
437 |
+
'squirrel', 'tiger', 'train', 'truck', 'turtle', 'whale', 'zebra'
|
438 |
+
]
|
439 |
+
|
440 |
+
#==================gpt 돌리기===================
|
441 |
+
os.environ['OPENAI_API_KEY'] = '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'
|
442 |
+
|
443 |
+
result_captions = {}
|
444 |
+
result_valid_obj_ids = {}
|
445 |
+
|
446 |
+
for i in range(370):
|
447 |
+
vid_id, all_captions, valid_obj_ids = getCaption(i, color_mask=False)
|
448 |
+
|
449 |
+
if vid_id not in result_captions:
|
450 |
+
result_captions[vid_id] = all_captions
|
451 |
+
if vid_id not in result_valid_obj_ids:
|
452 |
+
result_valid_obj_ids[vid_id] = valid_obj_ids
|
453 |
+
|
454 |
+
print("Finished!", flush=True)
|
455 |
+
|
456 |
+
with open(args.save_caption_path, "w") as file:
|
457 |
+
json.dump(result_captions, file, indent=4)
|
458 |
+
|
459 |
+
with open(args.save_valid_obj_ids_path, "w") as file:
|
460 |
+
json.dump(result_valid_obj_ids, file, indent=4)
|
mbench/gpt_ref-ytvos_numbered_cy_sanity.py
ADDED
@@ -0,0 +1,643 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
4 |
+
import time
|
5 |
+
|
6 |
+
from os import path as osp
|
7 |
+
from io import BytesIO
|
8 |
+
import random
|
9 |
+
|
10 |
+
from mbench.ytvos_ref import build as build_ytvos_ref
|
11 |
+
import argparse
|
12 |
+
import opts
|
13 |
+
|
14 |
+
import sys
|
15 |
+
from pathlib import Path
|
16 |
+
import os
|
17 |
+
from os import path as osp
|
18 |
+
import skimage
|
19 |
+
from io import BytesIO
|
20 |
+
|
21 |
+
import numpy as np
|
22 |
+
import pandas as pd
|
23 |
+
import regex as re
|
24 |
+
import json
|
25 |
+
|
26 |
+
import cv2
|
27 |
+
from PIL import Image, ImageDraw
|
28 |
+
import torch
|
29 |
+
from torchvision.transforms import functional as F
|
30 |
+
|
31 |
+
from skimage import measure # (pip install scikit-image)
|
32 |
+
from shapely.geometry import Polygon, MultiPolygon # (pip install Shapely)
|
33 |
+
|
34 |
+
import matplotlib.pyplot as plt
|
35 |
+
import matplotlib.patches as patches
|
36 |
+
from matplotlib.collections import PatchCollection
|
37 |
+
from matplotlib.patches import Rectangle
|
38 |
+
import textwrap
|
39 |
+
|
40 |
+
|
41 |
+
import ipywidgets as widgets
|
42 |
+
from IPython.display import display, clear_output
|
43 |
+
|
44 |
+
from openai import OpenAI
|
45 |
+
import base64
|
46 |
+
import json
|
47 |
+
|
48 |
+
def number_objects_and_encode(idx, color_mask=False):
|
49 |
+
encoded_frames = {}
|
50 |
+
contoured_frames = {} # New dictionary for original images
|
51 |
+
vid_cat_cnts = {}
|
52 |
+
|
53 |
+
vid_meta = metas[idx]
|
54 |
+
vid_data = train_dataset[idx]
|
55 |
+
vid_id = vid_meta['video']
|
56 |
+
frame_indx = vid_meta['sample_indx']
|
57 |
+
cat_names = set(vid_meta['obj_id_cat'].values())
|
58 |
+
imgs = vid_data[0]
|
59 |
+
|
60 |
+
for cat in cat_names:
|
61 |
+
cat_frames = []
|
62 |
+
contour_frames = []
|
63 |
+
frame_cat_cnts = {}
|
64 |
+
|
65 |
+
for i in range(imgs.size(0)):
|
66 |
+
frame_name = frame_indx[i]
|
67 |
+
frame = np.copy(imgs[i].permute(1, 2, 0).numpy())
|
68 |
+
frame_for_contour = np.copy(imgs[i].permute(1, 2, 0).numpy())
|
69 |
+
|
70 |
+
frame_data = vid_data[2][frame_name]
|
71 |
+
obj_ids = list(frame_data.keys())
|
72 |
+
|
73 |
+
cat_cnt = 0
|
74 |
+
|
75 |
+
for j in range(len(obj_ids)):
|
76 |
+
obj_id = obj_ids[j]
|
77 |
+
obj_data = frame_data[obj_id]
|
78 |
+
obj_bbox = obj_data['bbox']
|
79 |
+
obj_valid = obj_data['valid']
|
80 |
+
obj_mask = obj_data['mask'].numpy().astype(np.uint8)
|
81 |
+
obj_cat = obj_data['category_name']
|
82 |
+
|
83 |
+
if obj_cat == cat and obj_valid:
|
84 |
+
cat_cnt += 1
|
85 |
+
|
86 |
+
if color_mask == False:
|
87 |
+
contours, _ = cv2.findContours(obj_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
88 |
+
cv2.drawContours(frame, contours, -1, colors[j], 3)
|
89 |
+
for i, contour in enumerate(contours):
|
90 |
+
moments = cv2.moments(contour)
|
91 |
+
if moments["m00"] != 0:
|
92 |
+
cx = int(moments["m10"] / moments["m00"])
|
93 |
+
cy = int(moments["m01"] / moments["m00"])
|
94 |
+
else:
|
95 |
+
cx, cy = contour[0][0]
|
96 |
+
|
97 |
+
font = cv2.FONT_HERSHEY_SIMPLEX
|
98 |
+
text = obj_id
|
99 |
+
text_size = cv2.getTextSize(text, font, 1, 2)[0]
|
100 |
+
text_w, text_h = text_size
|
101 |
+
|
102 |
+
cv2.rectangle(frame, (cx - text_w // 2 - 5, cy - text_h // 2 - 5),
|
103 |
+
(cx + text_w // 2 + 5, cy + text_h // 2 + 5), (0, 0, 0), -1)
|
104 |
+
|
105 |
+
cv2.putText(frame, text, (cx - text_w // 2, cy + text_h // 2),
|
106 |
+
font, 1, (255, 255, 255), 2)
|
107 |
+
|
108 |
+
else:
|
109 |
+
alpha = 0.08
|
110 |
+
|
111 |
+
colored_obj_mask = np.zeros_like(frame)
|
112 |
+
colored_obj_mask[obj_mask == 1] = colors[j]
|
113 |
+
frame[obj_mask == 1] = (
|
114 |
+
(1 - alpha) * frame[obj_mask == 1]
|
115 |
+
+ alpha * colored_obj_mask[obj_mask == 1]
|
116 |
+
)
|
117 |
+
|
118 |
+
|
119 |
+
contours, _ = cv2.findContours(obj_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
120 |
+
cv2.drawContours(frame, contours, -1, colors[j], 2)
|
121 |
+
cv2.drawContours(frame_for_contour, contours, -1, colors[j], 2)
|
122 |
+
|
123 |
+
if len(contours) > 0:
|
124 |
+
largest_contour = max(contours, key=cv2.contourArea)
|
125 |
+
M = cv2.moments(largest_contour)
|
126 |
+
if M["m00"] != 0:
|
127 |
+
center_x = int(M["m10"] / M["m00"])
|
128 |
+
center_y = int(M["m01"] / M["m00"])
|
129 |
+
else:
|
130 |
+
center_x, center_y = 0, 0
|
131 |
+
|
132 |
+
font = cv2.FONT_HERSHEY_SIMPLEX
|
133 |
+
text = obj_id
|
134 |
+
|
135 |
+
font_scale = 0.9
|
136 |
+
text_size = cv2.getTextSize(text, font, font_scale, 2)[0]
|
137 |
+
text_x = center_x - text_size[0] // 1
|
138 |
+
text_y = center_y
|
139 |
+
|
140 |
+
rect_start = (text_x - 5, text_y - text_size[1] - 5)
|
141 |
+
rect_end = (text_x + text_size[0] + 5, text_y)
|
142 |
+
|
143 |
+
cv2.rectangle(frame, rect_start, rect_end, (0, 0, 0), -1)
|
144 |
+
cv2.putText(frame, text, (text_x, text_y), font, 1, (255, 255, 255), 2)
|
145 |
+
|
146 |
+
# plt.figure(figsize=(12, 8))
|
147 |
+
# plt.imshow(frame)
|
148 |
+
# plt.title(f"frame {frame_name}")
|
149 |
+
# plt.tight_layout()
|
150 |
+
# plt.axis('off')
|
151 |
+
# plt.show()
|
152 |
+
|
153 |
+
buffer = BytesIO()
|
154 |
+
frame = Image.fromarray(frame)
|
155 |
+
frame.save(buffer, format='jpeg')
|
156 |
+
buffer.seek(0)
|
157 |
+
cat_frames.append(base64.b64encode(buffer.read()).decode("utf-8"))
|
158 |
+
frame_cat_cnts[frame_name] = cat_cnt
|
159 |
+
|
160 |
+
buffer.seek(0) # Reuse buffer instead of creating a new one
|
161 |
+
buffer.truncate()
|
162 |
+
frame_for_contour = Image.fromarray(frame_for_contour)
|
163 |
+
frame_for_contour.save(buffer, format='jpeg')
|
164 |
+
buffer.seek(0)
|
165 |
+
contour_frames.append(base64.b64encode(buffer.read()).decode("utf-8"))
|
166 |
+
|
167 |
+
encoded_frames[cat] = cat_frames
|
168 |
+
contoured_frames[cat] = contour_frames
|
169 |
+
vid_cat_cnts[cat] = frame_cat_cnts
|
170 |
+
|
171 |
+
return encoded_frames, contoured_frames, vid_cat_cnts
|
172 |
+
|
173 |
+
|
174 |
+
# def number_objects_and_encode(idx, color_mask=False):
|
175 |
+
# encoded_frames = {}
|
176 |
+
# contoured_frames = {} # New dictionary for original images
|
177 |
+
# vid_cat_cnts = {}
|
178 |
+
|
179 |
+
# vid_meta = metas[idx]
|
180 |
+
# vid_data = train_dataset[idx]
|
181 |
+
# vid_id = vid_meta['video']
|
182 |
+
# frame_indx = vid_meta['sample_indx']
|
183 |
+
# cat_names = set(vid_meta['obj_id_cat'].values())
|
184 |
+
# imgs = vid_data[0]
|
185 |
+
|
186 |
+
# for cat in cat_names:
|
187 |
+
# cat_frames = []
|
188 |
+
# contour_frames = []
|
189 |
+
# frame_cat_cnts = {}
|
190 |
+
|
191 |
+
# for i in range(imgs.size(0)):
|
192 |
+
# frame_name = frame_indx[i]
|
193 |
+
# frame = np.copy(imgs[i].permute(1, 2, 0).numpy())
|
194 |
+
# frame_for_contour = np.copy(imgs[i].permute(1, 2, 0).numpy())
|
195 |
+
|
196 |
+
# frame_data = vid_data[2][frame_name]
|
197 |
+
# obj_ids = list(frame_data.keys())
|
198 |
+
|
199 |
+
# cat_cnt = 0
|
200 |
+
|
201 |
+
# for j in range(len(obj_ids)):
|
202 |
+
# obj_id = obj_ids[j]
|
203 |
+
# obj_data = frame_data[obj_id]
|
204 |
+
# obj_bbox = obj_data['bbox']
|
205 |
+
# obj_valid = obj_data['valid']
|
206 |
+
# obj_mask = obj_data['mask'].numpy().astype(np.uint8)
|
207 |
+
# obj_cat = obj_data['category_name']
|
208 |
+
|
209 |
+
# if obj_cat == cat and obj_valid:
|
210 |
+
# cat_cnt += 1
|
211 |
+
|
212 |
+
# contours, _ = cv2.findContours(obj_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
213 |
+
# cv2.drawContours(frame, contours, -1, colors[j], 3)
|
214 |
+
# cv2.drawContours(frame_for_contour, contours, -1, colors[j], 2)
|
215 |
+
|
216 |
+
# if len(contours) > 0:
|
217 |
+
# largest_contour = max(contours, key=cv2.contourArea)
|
218 |
+
# M = cv2.moments(largest_contour)
|
219 |
+
# if M["m00"] != 0:
|
220 |
+
# center_x = int(M["m10"] / M["m00"])
|
221 |
+
# center_y = int(M["m01"] / M["m00"])
|
222 |
+
# else:
|
223 |
+
# center_x, center_y = 0, 0
|
224 |
+
|
225 |
+
# font = cv2.FONT_HERSHEY_SIMPLEX
|
226 |
+
# text = obj_id
|
227 |
+
# font_scale = 1.2
|
228 |
+
# text_size = cv2.getTextSize(text, font, font_scale, 2)[0]
|
229 |
+
# text_x = center_x - text_size[0] // 1
|
230 |
+
# text_y = center_y
|
231 |
+
|
232 |
+
# rect_start = (text_x - 5, text_y - text_size[1] - 5)
|
233 |
+
# rect_end = (text_x + text_size[0] + 5, text_y + 3)
|
234 |
+
|
235 |
+
# contour_thickness = 1
|
236 |
+
# rect_start_contour = (rect_start[0] - contour_thickness, rect_start[1] - contour_thickness)
|
237 |
+
# rect_end_contour = (rect_end[0] + contour_thickness, rect_end[1] + contour_thickness)
|
238 |
+
|
239 |
+
# cv2.rectangle(frame, rect_start_contour, rect_end_contour, colors[j], contour_thickness)
|
240 |
+
# cv2.rectangle(frame, rect_start, rect_end, (0, 0, 0), -1)
|
241 |
+
# cv2.putText(frame, text, (text_x, text_y), font, 1, (255, 255, 255), 2)
|
242 |
+
|
243 |
+
|
244 |
+
# if color_mask:
|
245 |
+
# alpha = 0.08
|
246 |
+
# colored_obj_mask = np.zeros_like(frame)
|
247 |
+
# colored_obj_mask[obj_mask == 1] = colors[j]
|
248 |
+
# frame[obj_mask == 1] = (
|
249 |
+
# (1 - alpha) * frame[obj_mask == 1]
|
250 |
+
# + alpha * colored_obj_mask[obj_mask == 1]
|
251 |
+
# )
|
252 |
+
|
253 |
+
# # plt.figure(figsize=(12, 8))
|
254 |
+
# # plt.imshow(frame)
|
255 |
+
# # plt.title(f"frame {frame_name}")
|
256 |
+
# # plt.tight_layout()
|
257 |
+
# # plt.axis('off')
|
258 |
+
# # plt.show()
|
259 |
+
|
260 |
+
# buffer = BytesIO()
|
261 |
+
# frame = Image.fromarray(frame)
|
262 |
+
# frame.save(buffer, format='jpeg')
|
263 |
+
# buffer.seek(0)
|
264 |
+
# cat_frames.append(base64.b64encode(buffer.read()).decode("utf-8"))
|
265 |
+
# frame_cat_cnts[frame_name] = cat_cnt
|
266 |
+
|
267 |
+
# buffer.seek(0) # Reuse buffer instead of creating a new one
|
268 |
+
# buffer.truncate()
|
269 |
+
# frame_for_contour = Image.fromarray(frame_for_contour)
|
270 |
+
# frame_for_contour.save(buffer, format='jpeg')
|
271 |
+
# buffer.seek(0)
|
272 |
+
# contour_frames.append(base64.b64encode(buffer.read()).decode("utf-8"))
|
273 |
+
|
274 |
+
# encoded_frames[cat] = cat_frames
|
275 |
+
# contoured_frames[cat] = contour_frames
|
276 |
+
# vid_cat_cnts[cat] = frame_cat_cnts
|
277 |
+
|
278 |
+
|
279 |
+
# return encoded_frames, contoured_frames, vid_cat_cnts
|
280 |
+
|
281 |
+
|
282 |
+
|
283 |
+
def getCaption(idx, model='gpt-4o', color_mask=False):
|
284 |
+
vid_meta = metas[idx]
|
285 |
+
vid_data = train_dataset[idx]
|
286 |
+
vid_id = vid_meta['video']
|
287 |
+
print(f"vid id: {vid_id}\n")
|
288 |
+
|
289 |
+
frame_indx = vid_meta['sample_indx'] # e.g. [4, 7, 9, 16]
|
290 |
+
cat_names = set(vid_meta['obj_id_cat'].values()) # e.g. {"person", "elephant", ...}
|
291 |
+
all_captions = dict()
|
292 |
+
|
293 |
+
|
294 |
+
base64_frames, _ , vid_cat_cnts = number_objects_and_encode(idx, color_mask)
|
295 |
+
#marked = "mask with boundary" if color_mask else "boundary"
|
296 |
+
|
297 |
+
for cat_name in list(cat_names) :
|
298 |
+
|
299 |
+
is_movable = False
|
300 |
+
if cat_name in ytvos_category_valid_list :
|
301 |
+
is_movable = True
|
302 |
+
|
303 |
+
if not is_movable:
|
304 |
+
print(f"Skipping {cat_name}: Determined to be non-movable.", end='\n\n')
|
305 |
+
|
306 |
+
|
307 |
+
image_captions = {}
|
308 |
+
captioner = OpenAI()
|
309 |
+
cat_base64_frames = base64_frames[cat_name]
|
310 |
+
# cont_base64_frames = contoured_frames[cat_name]
|
311 |
+
|
312 |
+
for i in range(len(cat_base64_frames)):
|
313 |
+
frame_name = frame_indx[i]
|
314 |
+
# cont_base64_image = cont_base64_frames[i]
|
315 |
+
base64_image = cat_base64_frames[i]
|
316 |
+
should_filter = False
|
317 |
+
frame_cat_cnts = vid_cat_cnts[cat_name][frame_name]
|
318 |
+
|
319 |
+
if frame_cat_cnts >= 2:
|
320 |
+
should_filter = True
|
321 |
+
else:
|
322 |
+
print(f"Skipping {cat_name}: There is single or no object.", end='\n\n')
|
323 |
+
|
324 |
+
|
325 |
+
if is_movable and should_filter:
|
326 |
+
#1단계: 필터링
|
327 |
+
print(f"-----------category name: {cat_name}, frame name: {frame_name}")
|
328 |
+
caption_filter_text = f"""
|
329 |
+
You are a visual assistant analyzing a single frame from a video.
|
330 |
+
In this frame, I have labeled {frame_cat_cnts} {cat_name}(s), each with a bright numeric ID at its center and a visible marker.
|
331 |
+
|
332 |
+
Are {cat_name}s in the image performing all different and recognizable actions or postures?
|
333 |
+
Consider differences in body pose (standing, sitting, holding hands up, grabbing object, facing the camera, stretching, walking...), motion cues (inferred from the momentary stance or position),
|
334 |
+
facial expressions, and any notable interactions with objects or other {cat_name}s or people.
|
335 |
+
|
336 |
+
Only focus on obvious, prominent actions that can be reliably identified from this single frame.
|
337 |
+
|
338 |
+
- Respond with "YES" if:
|
339 |
+
1) Most of {cat_name}s exhibit clearly different, unique actions or poses.
|
340 |
+
(e.g. standing, sitting, bending, stretching, showing its back, or turning toward the camera.)
|
341 |
+
2) You can see visible significant differences in action and posture, that an observer can identify at a glance.
|
342 |
+
3) Interaction Variability: Each {cat_name} is engaged in a different type of action, such as one grasping an object while another is observing.
|
343 |
+
|
344 |
+
- Respond with "NONE" if:
|
345 |
+
1) The actions or pose are not clearly differentiable or too similar.
|
346 |
+
2) Minimal or Ambiguous Motion: The frame does not provide clear evidence of distinct movement beyond subtle shifts in stance.
|
347 |
+
3) Passive or Neutral Poses: If multiple {cat_name}(s) are simply standing or sitting without an obvious difference in orientation or motion
|
348 |
+
|
349 |
+
Answer strictly with either "YES" or "NONE".
|
350 |
+
"""
|
351 |
+
|
352 |
+
response1 = captioner.chat.completions.create(
|
353 |
+
model=model,
|
354 |
+
messages=[
|
355 |
+
{
|
356 |
+
"role": "user",
|
357 |
+
"content": [
|
358 |
+
{
|
359 |
+
"type": "text",
|
360 |
+
"text": caption_filter_text,
|
361 |
+
},
|
362 |
+
{
|
363 |
+
"type": "image_url",
|
364 |
+
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
|
365 |
+
}
|
366 |
+
],
|
367 |
+
}
|
368 |
+
],
|
369 |
+
)
|
370 |
+
response_content = response1.choices[0].message.content
|
371 |
+
should_caption = True if "yes" in response_content.lower() else False
|
372 |
+
print(f"are {cat_name}s distinguished by action: {response_content}", end='\n\n')
|
373 |
+
|
374 |
+
else:
|
375 |
+
should_caption = False
|
376 |
+
|
377 |
+
#2단계: dense caption 만들기
|
378 |
+
dense_caption_prompt_1 = f"""
|
379 |
+
In the given frame, I labeled {frame_cat_cnts} {cat_name}s by marking each with a bright numeric ID at the center and its boundary. The category name of these objects are : {cat_name}.
|
380 |
+
|
381 |
+
Please describe the image focusing on labeled {cat_name}s in detail, focusing on their actions and interactions.
|
382 |
+
|
383 |
+
1. Focus only on clear, unique, and prominent actions that distinguish each object.
|
384 |
+
2. Avoid describing actions that are too minor, ambiguous, or not visible from the image.
|
385 |
+
3. Avoid subjective terms such as 'skilled', 'controlled', or 'focused'. Only describe observable actions.
|
386 |
+
4. Do not include common-sense or overly general descriptions like 'the elephant walks'.
|
387 |
+
5. Use dynamic action verbs (e.g., holding, throwing, jumping, inspecting) to describe interactions, poses, or movements.
|
388 |
+
6. **Avoid overly detailed or speculative descriptions** such as 'slightly moving its mouth' or 'appears to be anticipating'.
|
389 |
+
- expressions like 'seems to be', 'appears to be' are BANNED!
|
390 |
+
7. Pretend you are observing the scene directly, avoiding phrases like 'it seems' or 'based on the description'.
|
391 |
+
8. Include interactions with objects or other entities when they are prominent and observable.
|
392 |
+
9. **Do not include descriptions of appearance** such as clothes, color, size, shape etc.
|
393 |
+
10. **Do not include relative position** between objects such as 'the left elephant' because left/right can be ambiguous.
|
394 |
+
11. Do not mention object IDs.
|
395 |
+
12. Use '{cat_name}' as the noun for the referring expressions.
|
396 |
+
|
397 |
+
Note that I want to use your description to create a grounding dataset, therefore, your descriptions for different objects should be unique, i.e., If the image contains multiple {cat_name}s, describe the actions of each individually and ensure the descriptions are non-overlapping and specific.
|
398 |
+
|
399 |
+
- Your answer should contain details, and follow the following format:
|
400 |
+
object id. action-oriented description
|
401 |
+
(e.g. 1. the person is holding bananas on two hands and opening his mouth, turning the head right.
|
402 |
+
2. a person bending over and touching his boots to tie the shoelace.)
|
403 |
+
- for action-oriented description, use {cat_name} as subject noun
|
404 |
+
|
405 |
+
**Only include the currently labeled category** in each line (e.g., if it’s a person, do not suddenly label it as other object/animal).
|
406 |
+
Please pay attention to the categories of these objects and don’t change them.
|
407 |
+
Keep in mind that you should not group the objects, e.g., 2-5. people: xxx, be sure to describe each object separately (one by one).
|
408 |
+
Output referring expressions for each object id. Please start your answer:"""
|
409 |
+
|
410 |
+
|
411 |
+
dense_caption_prompt_2 = f"""
|
412 |
+
You are an advanced visual language model analyzing a video frame.
|
413 |
+
In this frame, {frame_cat_cnts} objects belonging to the category **{cat_name}** have been distinctly labeled with bright numerical IDs at their center and boundary.
|
414 |
+
|
415 |
+
Your task is to generate **action-oriented descriptions** for each labeled {cat_name}.
|
416 |
+
Your descriptions should capture their **observable actions and interactions**, making sure to highlight movement, gestures, and dynamic behaviors.
|
417 |
+
|
418 |
+
---
|
419 |
+
## Key Guidelines:
|
420 |
+
1. **Describe only clear and visible actions** that uniquely define what the {cat_name} is doing.
|
421 |
+
- Example: "grabbing a branch and pulling it down" (**(O) Specific**)
|
422 |
+
- Avoid: "moving slightly to the side" (**(X) Too vague**)
|
423 |
+
|
424 |
+
2. **Do not describe appearance, color, or position**—focus purely on the action.
|
425 |
+
- (X) "A large brown bear standing on the left"
|
426 |
+
- (O) "The bear is lifting its front paws and swiping forward."
|
427 |
+
|
428 |
+
3. **Use dynamic, action-specific verbs** rather than passive descriptions.
|
429 |
+
- (O) "The giraffe is tilting its head and sniffing the ground."
|
430 |
+
- (X) "The giraffe is near a tree and looking around."
|
431 |
+
|
432 |
+
4. **Avoid assumptions, emotions, or speculative phrasing.**
|
433 |
+
- (X) "The person seems excited" / "The person might be preparing to jump."
|
434 |
+
- (O) "The person is pushing its front legs against the rock and leaping forward."
|
435 |
+
|
436 |
+
5. **Avoid overly detailed or speculative descriptions** such as 'slightly moving its mouth' or 'appears to be anticipating'.
|
437 |
+
- expressions like 'seems to be', 'appears to be' are BANNED!
|
438 |
+
6. Pretend you are observing the scene directly, avoiding phrases like 'it seems' or 'based on the description'.
|
439 |
+
|
440 |
+
7. If multiple {cat_name}s are present, make sure their descriptions are **distinct and non-overlapping**.
|
441 |
+
- **Each object should have a unique, descriptive action.**
|
442 |
+
- (X) "Two dogs are running."
|
443 |
+
- (O) "1. One dog is chasing another, its legs stretched mid-air.
|
444 |
+
2. The other dog is looking back while speeding up."
|
445 |
+
|
446 |
+
---
|
447 |
+
## Output Format:
|
448 |
+
- Each labeled **{cat_name}** should have exactly **one line of description**.
|
449 |
+
- Format: `ID. {cat_name} + action-based description`
|
450 |
+
- (O) Example:
|
451 |
+
```
|
452 |
+
1. The person is leaning forward while opening a bag with both hands.
|
453 |
+
2. The person is holding onto a rope and pulling themselves up.
|
454 |
+
```
|
455 |
+
- **Ensure that each object is described individually.**
|
456 |
+
- **Do not group objects into a single sentence** (e.g., "2-5. people: xxx" is NOT allowed).
|
457 |
+
|
458 |
+
---
|
459 |
+
## Additional Instructions:
|
460 |
+
- **Do NOT** use expressions like "it appears that..." or "it seems like...".
|
461 |
+
- **Do NOT** mention object IDs in the description (only use the provided format).
|
462 |
+
- **DO NOT** include markdown formatting (no bullet points, no asterisks).
|
463 |
+
- **Only describe actions of the labeled {cat_name} objects**—do not introduce unrelated categories.
|
464 |
+
|
465 |
+
Please generate the action-oriented descriptions for each labeled {cat_name} and start your answer:
|
466 |
+
"""
|
467 |
+
|
468 |
+
|
469 |
+
dense_caption_prompt = f"""
|
470 |
+
You are a visual assistant analyzing a single frame of a video.
|
471 |
+
In this frame, {frame_cat_cnts} objects belonging to the category **{cat_name}** have been labeled with bright numeric IDs at their center and boundary.
|
472 |
+
|
473 |
+
I am building an **action-centric referring expression** dataset.
|
474 |
+
Your task is to describe each labeled {cat_name} based on **clearly observable and specific actions**.
|
475 |
+
|
476 |
+
---
|
477 |
+
## Guidelines:
|
478 |
+
1. **Focus only on visible and prominent actions** (e.g., running, pushing, grasping an object).
|
479 |
+
2. **Avoid describing minor or ambiguous movements** (e.g., "slightly moving a paw," "tilting head a bit").
|
480 |
+
3. **Do not include subjective or speculative descriptions** (e.g., "it seems excited" or "it might be preparing to jump").
|
481 |
+
4. **Avoid vague expressions** like "engaging with something." Instead, specify the action (e.g., "grabbing a stick," "pressing a button").
|
482 |
+
5. **Use dynamic action verbs** (e.g., holding, throwing, inspecting, leaning, pressing) to highlight motion and interaction.
|
483 |
+
6. If multiple {cat_name}s appear, ensure each description is **distinct and non-overlapping**.
|
484 |
+
7. Base your descriptions on these principles:
|
485 |
+
- **Avoid words like 'minimal' or 'slightly'.**
|
486 |
+
- Emphasize **body movement, posture, and motion patterns** (e.g., "lifting its head," "facing forward," "showing its back").
|
487 |
+
- Describe **facial expressions and interactions with objects** (e.g., "opening its mouth wide," "smiling while holding an item").
|
488 |
+
- **Specify actions with other objects or entities** only when they are clear and observable.
|
489 |
+
- (O) "pushing another person"
|
490 |
+
- (X) "interacting with another object"
|
491 |
+
|
492 |
+
---
|
493 |
+
## Output Format:
|
494 |
+
- Each labeled **{cat_name}** must have **exactly one line**.
|
495 |
+
- Format: `ID. {cat_name} + action-based description`
|
496 |
+
- (O) Example:
|
497 |
+
```
|
498 |
+
1. The person is holding ski poles and skiing down a snowy mountain with bent knees.
|
499 |
+
2. The person is pulling a baby carriage while smiling.
|
500 |
+
```
|
501 |
+
- **Ensure each object is described individually.**
|
502 |
+
- **Do not group multiple objects into a single sentence** (e.g., "2-5. people: xxx" is NOT allowed).
|
503 |
+
|
504 |
+
---
|
505 |
+
## Example:
|
506 |
+
If the frame has two labeled **bears**, your output should be:
|
507 |
+
```
|
508 |
+
1. The bear is reaching out its right paw while leaning forward to catch prey.
|
509 |
+
2. A bear is standing upright, facing right, and touching the bike beside it.
|
510 |
+
```
|
511 |
+
|
512 |
+
---
|
513 |
+
## Additional Instructions:
|
514 |
+
- **Do NOT** describe appearance (e.g., color, size, texture) or relative positioning (e.g., "on the left/right").
|
515 |
+
- **Do NOT** reference object IDs explicitly (e.g., "Person 1" or "Object 2" is NOT allowed).
|
516 |
+
- **Do NOT** include markdown formatting (no bullet points, asterisks, or extra symbols).
|
517 |
+
- **Only describe actions of the labeled {cat_name} objects**—do not introduce unrelated categories.
|
518 |
+
|
519 |
+
Please generate the action-oriented descriptions for each labeled {cat_name} and start your answer:"""
|
520 |
+
|
521 |
+
|
522 |
+
MAX_RETRIES = 3
|
523 |
+
retry_count = 0
|
524 |
+
|
525 |
+
if should_caption:
|
526 |
+
while retry_count < MAX_RETRIES:
|
527 |
+
selected_prompt = random.choice([dense_caption_prompt, dense_caption_prompt_2, dense_caption_prompt_1])
|
528 |
+
|
529 |
+
response2 = captioner.chat.completions.create(
|
530 |
+
model=model,
|
531 |
+
messages=[
|
532 |
+
{
|
533 |
+
"role": "user",
|
534 |
+
"content": [
|
535 |
+
{
|
536 |
+
"type": "text",
|
537 |
+
"text": selected_prompt,
|
538 |
+
},
|
539 |
+
{
|
540 |
+
"type": "image_url",
|
541 |
+
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
|
542 |
+
},
|
543 |
+
],
|
544 |
+
}
|
545 |
+
],
|
546 |
+
)
|
547 |
+
|
548 |
+
# caption = response2.choices[0].message.content
|
549 |
+
#print(f"{image_path} - {frame_name}: {caption}")
|
550 |
+
|
551 |
+
caption = response2.choices[0].message.content.strip()
|
552 |
+
caption_lower = caption.lower().lstrip()
|
553 |
+
|
554 |
+
if caption_lower.startswith("1.") and not any(
|
555 |
+
phrase in caption_lower for phrase in ["i'm sorry", "please", "can't help"]
|
556 |
+
):
|
557 |
+
break
|
558 |
+
|
559 |
+
print(f"Retrying caption generation... ({retry_count + 1}/{MAX_RETRIES})")
|
560 |
+
retry_count += 1
|
561 |
+
time.sleep(2)
|
562 |
+
|
563 |
+
if retry_count == MAX_RETRIES:
|
564 |
+
caption = None
|
565 |
+
print("Max retries reached. Caption generation failed.")
|
566 |
+
|
567 |
+
else:
|
568 |
+
caption = None
|
569 |
+
|
570 |
+
image_captions[frame_name] = caption
|
571 |
+
all_captions[cat_name] = image_captions
|
572 |
+
|
573 |
+
# final : also prepare valid object ids
|
574 |
+
valid_obj_ids = dict()
|
575 |
+
|
576 |
+
for cat in cat_names:
|
577 |
+
if cat in ytvos_category_valid_list:
|
578 |
+
obj_id_cat = vid_meta['obj_id_cat']
|
579 |
+
valid_cat_ids = []
|
580 |
+
for obj_id in list(obj_id_cat.keys()):
|
581 |
+
if obj_id_cat[obj_id] == cat:
|
582 |
+
valid_cat_ids.append(obj_id)
|
583 |
+
valid_obj_ids[cat] = valid_cat_ids
|
584 |
+
|
585 |
+
return vid_id, all_captions, valid_obj_ids
|
586 |
+
|
587 |
+
|
588 |
+
if __name__ == '__main__':
|
589 |
+
parser = argparse.ArgumentParser('ReferFormer training and evaluation script', parents=[opts.get_args_parser()])
|
590 |
+
parser.add_argument('--save_caption_path', type=str, default="mbench/numbered_captions_gpt-4o_randcap.json")
|
591 |
+
parser.add_argument('--save_valid_obj_ids_path', type=str, default="mbench/numbered_valid_obj_ids_gpt-4o_randcap.json")
|
592 |
+
|
593 |
+
args = parser.parse_args()
|
594 |
+
|
595 |
+
#==================데이터 불러오기===================
|
596 |
+
# 전체 데이터셋
|
597 |
+
train_dataset = build_ytvos_ref(image_set = 'train', args = args)
|
598 |
+
|
599 |
+
# 전체 데이터셋 메타데이터
|
600 |
+
metas = train_dataset.metas
|
601 |
+
|
602 |
+
# 색상 후보 8개 (RGB 형식)
|
603 |
+
colors = [
|
604 |
+
(255, 0, 0), # Red
|
605 |
+
(0, 255, 0), # Green
|
606 |
+
(0, 0, 255), # Blue
|
607 |
+
(255, 255, 0), # Yellow
|
608 |
+
(255, 0, 255), # Magenta
|
609 |
+
(0, 255, 255), # Cyan
|
610 |
+
(128, 0, 128), # Purple
|
611 |
+
(255, 165, 0) # Orange
|
612 |
+
]
|
613 |
+
|
614 |
+
ytvos_category_valid_list = [
|
615 |
+
'airplane', 'ape', 'bear', 'bird', 'boat', 'bus', 'camel', 'cat', 'cow', 'crocodile',
|
616 |
+
'deer', 'dog', 'dolphin', 'duck', 'eagle', 'earless_seal', 'elephant', 'fish', 'fox', 'frog',
|
617 |
+
'giant_panda', 'giraffe', 'hedgehog', 'horse', 'leopard', 'lion', 'lizard',
|
618 |
+
'monkey', 'motorbike', 'mouse', 'owl', 'parrot', 'penguin', 'person',
|
619 |
+
'rabbit', 'raccoon', 'sedan', 'shark', 'sheep', 'snail', 'snake',
|
620 |
+
'squirrel', 'tiger', 'train', 'truck', 'turtle', 'whale', 'zebra'
|
621 |
+
]
|
622 |
+
|
623 |
+
#==================gpt 돌리기===================
|
624 |
+
os.environ['OPENAI_API_KEY'] = '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'
|
625 |
+
|
626 |
+
result_captions = {}
|
627 |
+
result_valid_obj_ids = {}
|
628 |
+
|
629 |
+
for i in range(370):
|
630 |
+
vid_id, all_captions, valid_obj_ids = getCaption(i, color_mask=False)
|
631 |
+
|
632 |
+
if vid_id not in result_captions:
|
633 |
+
result_captions[vid_id] = all_captions
|
634 |
+
if vid_id not in result_valid_obj_ids:
|
635 |
+
result_valid_obj_ids[vid_id] = valid_obj_ids
|
636 |
+
|
637 |
+
print("Finished!", flush=True)
|
638 |
+
|
639 |
+
with open(args.save_caption_path, "w") as file:
|
640 |
+
json.dump(result_captions, file, indent=4)
|
641 |
+
|
642 |
+
with open(args.save_valid_obj_ids_path, "w") as file:
|
643 |
+
json.dump(result_valid_obj_ids, file, indent=4)
|
mbench/gpt_ref-ytvos_numbered_cy_sanity_2.py
ADDED
@@ -0,0 +1,676 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
4 |
+
import time
|
5 |
+
|
6 |
+
from os import path as osp
|
7 |
+
from io import BytesIO
|
8 |
+
import random
|
9 |
+
|
10 |
+
from mbench.ytvos_ref import build as build_ytvos_ref
|
11 |
+
import argparse
|
12 |
+
import opts
|
13 |
+
|
14 |
+
import sys
|
15 |
+
from pathlib import Path
|
16 |
+
import os
|
17 |
+
from os import path as osp
|
18 |
+
import skimage
|
19 |
+
from io import BytesIO
|
20 |
+
|
21 |
+
import numpy as np
|
22 |
+
import pandas as pd
|
23 |
+
import regex as re
|
24 |
+
import json
|
25 |
+
|
26 |
+
import cv2
|
27 |
+
from PIL import Image, ImageDraw
|
28 |
+
import torch
|
29 |
+
from torchvision.transforms import functional as F
|
30 |
+
|
31 |
+
from skimage import measure # (pip install scikit-image)
|
32 |
+
from shapely.geometry import Polygon, MultiPolygon # (pip install Shapely)
|
33 |
+
|
34 |
+
import matplotlib.pyplot as plt
|
35 |
+
import matplotlib.patches as patches
|
36 |
+
from matplotlib.collections import PatchCollection
|
37 |
+
from matplotlib.patches import Rectangle
|
38 |
+
import textwrap
|
39 |
+
|
40 |
+
|
41 |
+
import ipywidgets as widgets
|
42 |
+
from IPython.display import display, clear_output
|
43 |
+
|
44 |
+
from openai import OpenAI, APIConnectionError, OpenAIError
|
45 |
+
import base64
|
46 |
+
import json
|
47 |
+
import requests
|
48 |
+
|
49 |
+
def number_objects_and_encode_old(idx, color_mask=False):
|
50 |
+
encoded_frames = {}
|
51 |
+
contoured_frames = {} # New dictionary for original images
|
52 |
+
vid_cat_cnts = {}
|
53 |
+
|
54 |
+
vid_meta = metas[idx]
|
55 |
+
vid_data = train_dataset[idx]
|
56 |
+
vid_id = vid_meta['video']
|
57 |
+
frame_indx = vid_meta['sample_indx']
|
58 |
+
cat_names = set(vid_meta['obj_id_cat'].values())
|
59 |
+
imgs = vid_data[0]
|
60 |
+
|
61 |
+
for cat in cat_names:
|
62 |
+
cat_frames = []
|
63 |
+
contour_frames = []
|
64 |
+
frame_cat_cnts = {}
|
65 |
+
|
66 |
+
for i in range(imgs.size(0)):
|
67 |
+
frame_name = frame_indx[i]
|
68 |
+
frame = np.copy(imgs[i].permute(1, 2, 0).numpy())
|
69 |
+
frame_for_contour = np.copy(imgs[i].permute(1, 2, 0).numpy())
|
70 |
+
|
71 |
+
frame_data = vid_data[2][frame_name]
|
72 |
+
obj_ids = list(frame_data.keys())
|
73 |
+
|
74 |
+
cat_cnt = 0
|
75 |
+
|
76 |
+
for j in range(len(obj_ids)):
|
77 |
+
obj_id = obj_ids[j]
|
78 |
+
obj_data = frame_data[obj_id]
|
79 |
+
obj_bbox = obj_data['bbox']
|
80 |
+
obj_valid = obj_data['valid']
|
81 |
+
obj_mask = obj_data['mask'].numpy().astype(np.uint8)
|
82 |
+
obj_cat = obj_data['category_name']
|
83 |
+
|
84 |
+
if obj_cat == cat and obj_valid:
|
85 |
+
cat_cnt += 1
|
86 |
+
|
87 |
+
if color_mask == False:
|
88 |
+
contours, _ = cv2.findContours(obj_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
89 |
+
cv2.drawContours(frame, contours, -1, colors[j], 3)
|
90 |
+
for i, contour in enumerate(contours):
|
91 |
+
moments = cv2.moments(contour)
|
92 |
+
if moments["m00"] != 0:
|
93 |
+
cx = int(moments["m10"] / moments["m00"])
|
94 |
+
cy = int(moments["m01"] / moments["m00"])
|
95 |
+
else:
|
96 |
+
cx, cy = contour[0][0]
|
97 |
+
|
98 |
+
font = cv2.FONT_HERSHEY_SIMPLEX
|
99 |
+
text = obj_id
|
100 |
+
text_size = cv2.getTextSize(text, font, 1, 2)[0]
|
101 |
+
text_w, text_h = text_size
|
102 |
+
|
103 |
+
cv2.rectangle(frame, (cx - text_w // 2 - 5, cy - text_h // 2 - 5),
|
104 |
+
(cx + text_w // 2 + 5, cy + text_h // 2 + 5), (0, 0, 0), -1)
|
105 |
+
|
106 |
+
cv2.putText(frame, text, (cx - text_w // 2, cy + text_h // 2),
|
107 |
+
font, 1, (255, 255, 255), 2)
|
108 |
+
|
109 |
+
else:
|
110 |
+
alpha = 0.08
|
111 |
+
|
112 |
+
colored_obj_mask = np.zeros_like(frame)
|
113 |
+
colored_obj_mask[obj_mask == 1] = colors[j]
|
114 |
+
frame[obj_mask == 1] = (
|
115 |
+
(1 - alpha) * frame[obj_mask == 1]
|
116 |
+
+ alpha * colored_obj_mask[obj_mask == 1]
|
117 |
+
)
|
118 |
+
|
119 |
+
|
120 |
+
contours, _ = cv2.findContours(obj_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
121 |
+
cv2.drawContours(frame, contours, -1, colors[j], 2)
|
122 |
+
cv2.drawContours(frame_for_contour, contours, -1, colors[j], 2)
|
123 |
+
|
124 |
+
if len(contours) > 0:
|
125 |
+
largest_contour = max(contours, key=cv2.contourArea)
|
126 |
+
M = cv2.moments(largest_contour)
|
127 |
+
if M["m00"] != 0:
|
128 |
+
center_x = int(M["m10"] / M["m00"])
|
129 |
+
center_y = int(M["m01"] / M["m00"])
|
130 |
+
else:
|
131 |
+
center_x, center_y = 0, 0
|
132 |
+
|
133 |
+
font = cv2.FONT_HERSHEY_SIMPLEX
|
134 |
+
text = obj_id
|
135 |
+
|
136 |
+
font_scale = 0.9
|
137 |
+
text_size = cv2.getTextSize(text, font, font_scale, 2)[0]
|
138 |
+
text_x = center_x - text_size[0] // 1
|
139 |
+
text_y = center_y
|
140 |
+
|
141 |
+
rect_start = (text_x - 5, text_y - text_size[1] - 5)
|
142 |
+
rect_end = (text_x + text_size[0] + 5, text_y)
|
143 |
+
|
144 |
+
cv2.rectangle(frame, rect_start, rect_end, (0, 0, 0), -1)
|
145 |
+
cv2.putText(frame, text, (text_x, text_y), font, 1, (255, 255, 255), 2)
|
146 |
+
|
147 |
+
# plt.figure(figsize=(12, 8))
|
148 |
+
# plt.imshow(frame)
|
149 |
+
# plt.title(f"frame {frame_name}")
|
150 |
+
# plt.tight_layout()
|
151 |
+
# plt.axis('off')
|
152 |
+
# plt.show()
|
153 |
+
|
154 |
+
buffer = BytesIO()
|
155 |
+
frame = Image.fromarray(frame)
|
156 |
+
frame.save(buffer, format='jpeg')
|
157 |
+
buffer.seek(0)
|
158 |
+
cat_frames.append(base64.b64encode(buffer.read()).decode("utf-8"))
|
159 |
+
frame_cat_cnts[frame_name] = cat_cnt
|
160 |
+
|
161 |
+
buffer.seek(0) # Reuse buffer instead of creating a new one
|
162 |
+
buffer.truncate()
|
163 |
+
frame_for_contour = Image.fromarray(frame_for_contour)
|
164 |
+
frame_for_contour.save(buffer, format='jpeg')
|
165 |
+
buffer.seek(0)
|
166 |
+
contour_frames.append(base64.b64encode(buffer.read()).decode("utf-8"))
|
167 |
+
|
168 |
+
encoded_frames[cat] = cat_frames
|
169 |
+
contoured_frames[cat] = contour_frames
|
170 |
+
vid_cat_cnts[cat] = frame_cat_cnts
|
171 |
+
|
172 |
+
return encoded_frames, contoured_frames, vid_cat_cnts
|
173 |
+
|
174 |
+
|
175 |
+
def number_objects_and_encode(idx, color_mask=False):
|
176 |
+
encoded_frames = {}
|
177 |
+
contoured_frames = {} # New dictionary for original images
|
178 |
+
vid_cat_cnts = {}
|
179 |
+
|
180 |
+
vid_meta = metas[idx]
|
181 |
+
vid_data = train_dataset[idx]
|
182 |
+
vid_id = vid_meta['video']
|
183 |
+
frame_indx = vid_meta['sample_indx']
|
184 |
+
cat_names = set(vid_meta['obj_id_cat'].values())
|
185 |
+
imgs = vid_data[0]
|
186 |
+
|
187 |
+
for cat in cat_names:
|
188 |
+
cat_frames = []
|
189 |
+
contour_frames = []
|
190 |
+
frame_cat_cnts = {}
|
191 |
+
|
192 |
+
for i in range(imgs.size(0)):
|
193 |
+
frame_name = frame_indx[i]
|
194 |
+
frame = np.copy(imgs[i].permute(1, 2, 0).numpy())
|
195 |
+
frame_for_contour = np.copy(imgs[i].permute(1, 2, 0).numpy())
|
196 |
+
|
197 |
+
frame_data = vid_data[2][frame_name]
|
198 |
+
obj_ids = list(frame_data.keys())
|
199 |
+
|
200 |
+
cat_cnt = 0
|
201 |
+
|
202 |
+
for j in range(len(obj_ids)):
|
203 |
+
obj_id = obj_ids[j]
|
204 |
+
obj_data = frame_data[obj_id]
|
205 |
+
obj_bbox = obj_data['bbox']
|
206 |
+
obj_valid = obj_data['valid']
|
207 |
+
obj_mask = obj_data['mask'].numpy().astype(np.uint8)
|
208 |
+
obj_cat = obj_data['category_name']
|
209 |
+
|
210 |
+
if obj_cat == cat and obj_valid:
|
211 |
+
cat_cnt += 1
|
212 |
+
|
213 |
+
contours, _ = cv2.findContours(obj_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
214 |
+
cv2.drawContours(frame, contours, -1, colors[j], 3)
|
215 |
+
cv2.drawContours(frame_for_contour, contours, -1, colors[j], 2)
|
216 |
+
|
217 |
+
if len(contours) > 0:
|
218 |
+
largest_contour = max(contours, key=cv2.contourArea)
|
219 |
+
M = cv2.moments(largest_contour)
|
220 |
+
if M["m00"] != 0:
|
221 |
+
center_x = int(M["m10"] / M["m00"])
|
222 |
+
center_y = int(M["m01"] / M["m00"])
|
223 |
+
else:
|
224 |
+
center_x, center_y = 0, 0
|
225 |
+
|
226 |
+
font = cv2.FONT_HERSHEY_SIMPLEX
|
227 |
+
text = obj_id
|
228 |
+
font_scale = 1.2
|
229 |
+
text_size = cv2.getTextSize(text, font, font_scale, 2)[0]
|
230 |
+
text_x = center_x - text_size[0] // 1
|
231 |
+
text_y = center_y
|
232 |
+
|
233 |
+
rect_start = (text_x - 5, text_y - text_size[1] - 5)
|
234 |
+
rect_end = (text_x + text_size[0] + 5, text_y + 3)
|
235 |
+
|
236 |
+
contour_thickness = 1
|
237 |
+
rect_start_contour = (rect_start[0] - contour_thickness, rect_start[1] - contour_thickness)
|
238 |
+
rect_end_contour = (rect_end[0] + contour_thickness, rect_end[1] + contour_thickness)
|
239 |
+
|
240 |
+
cv2.rectangle(frame, rect_start_contour, rect_end_contour, colors[j], contour_thickness)
|
241 |
+
cv2.rectangle(frame, rect_start, rect_end, (0, 0, 0), -1)
|
242 |
+
cv2.putText(frame, text, (text_x, text_y), font, 1, (255, 255, 255), 2)
|
243 |
+
|
244 |
+
|
245 |
+
if color_mask:
|
246 |
+
alpha = 0.08
|
247 |
+
colored_obj_mask = np.zeros_like(frame)
|
248 |
+
colored_obj_mask[obj_mask == 1] = colors[j]
|
249 |
+
frame[obj_mask == 1] = (
|
250 |
+
(1 - alpha) * frame[obj_mask == 1]
|
251 |
+
+ alpha * colored_obj_mask[obj_mask == 1]
|
252 |
+
)
|
253 |
+
|
254 |
+
# plt.figure(figsize=(12, 8))
|
255 |
+
# plt.imshow(frame)
|
256 |
+
# plt.title(f"frame {frame_name}")
|
257 |
+
# plt.tight_layout()
|
258 |
+
# plt.axis('off')
|
259 |
+
# plt.show()
|
260 |
+
|
261 |
+
buffer = BytesIO()
|
262 |
+
frame = Image.fromarray(frame)
|
263 |
+
frame.save(buffer, format='jpeg')
|
264 |
+
buffer.seek(0)
|
265 |
+
cat_frames.append(base64.b64encode(buffer.read()).decode("utf-8"))
|
266 |
+
frame_cat_cnts[frame_name] = cat_cnt
|
267 |
+
|
268 |
+
buffer.seek(0) # Reuse buffer instead of creating a new one
|
269 |
+
buffer.truncate()
|
270 |
+
frame_for_contour = Image.fromarray(frame_for_contour)
|
271 |
+
frame_for_contour.save(buffer, format='jpeg')
|
272 |
+
buffer.seek(0)
|
273 |
+
contour_frames.append(base64.b64encode(buffer.read()).decode("utf-8"))
|
274 |
+
|
275 |
+
encoded_frames[cat] = cat_frames
|
276 |
+
contoured_frames[cat] = contour_frames
|
277 |
+
vid_cat_cnts[cat] = frame_cat_cnts
|
278 |
+
|
279 |
+
return encoded_frames, contoured_frames, vid_cat_cnts
|
280 |
+
|
281 |
+
|
282 |
+
|
283 |
+
def getCaption(idx, model='gpt-4o'):
|
284 |
+
vid_meta = metas[idx]
|
285 |
+
vid_data = train_dataset[idx]
|
286 |
+
vid_id = vid_meta['video']
|
287 |
+
print(f"vid id: {vid_id}\n")
|
288 |
+
|
289 |
+
frame_indx = vid_meta['sample_indx'] # e.g. [4, 7, 9, 16]
|
290 |
+
cat_names = set(vid_meta['obj_id_cat'].values()) # e.g. {"person", "elephant", ...}
|
291 |
+
all_captions = dict()
|
292 |
+
|
293 |
+
# color_mask = random.choice([True, False])
|
294 |
+
color_mask = random.choices([False, True], weights=[60, 40])[0]
|
295 |
+
|
296 |
+
base64_frames, _ , vid_cat_cnts = number_objects_and_encode(idx, color_mask)
|
297 |
+
#marked = "mask with boundary" if color_mask else "boundary"
|
298 |
+
|
299 |
+
for cat_name in list(cat_names) :
|
300 |
+
|
301 |
+
is_movable = False
|
302 |
+
if cat_name in ytvos_category_valid_list :
|
303 |
+
is_movable = True
|
304 |
+
|
305 |
+
if not is_movable:
|
306 |
+
print(f"Skipping {cat_name}: Determined to be non-movable.", end='\n\n')
|
307 |
+
|
308 |
+
|
309 |
+
image_captions = {}
|
310 |
+
captioner = OpenAI()
|
311 |
+
cat_base64_frames = base64_frames[cat_name]
|
312 |
+
# cont_base64_frames = contoured_frames[cat_name]
|
313 |
+
|
314 |
+
for i in range(len(cat_base64_frames)):
|
315 |
+
frame_name = frame_indx[i]
|
316 |
+
# cont_base64_image = cont_base64_frames[i]
|
317 |
+
base64_image = cat_base64_frames[i]
|
318 |
+
should_filter = False
|
319 |
+
frame_cat_cnts = vid_cat_cnts[cat_name][frame_name]
|
320 |
+
|
321 |
+
if frame_cat_cnts >= 2:
|
322 |
+
should_filter = True
|
323 |
+
else:
|
324 |
+
print(f"Skipping {cat_name}: There is single or no object.", end='\n\n')
|
325 |
+
|
326 |
+
|
327 |
+
if is_movable and should_filter:
|
328 |
+
#1단계: 필터링
|
329 |
+
print(f"-----------category name: {cat_name}, frame name: {frame_name}")
|
330 |
+
caption_filter_text = f"""
|
331 |
+
You are a visual assistant analyzing a single frame from a video.
|
332 |
+
In this frame, I have labeled {frame_cat_cnts} {cat_name}(s), each with a bright numeric ID at its center and a visible marker.
|
333 |
+
|
334 |
+
Are {cat_name}s in the image performing all different and recognizable actions or postures?
|
335 |
+
Consider differences in body pose (standing, sitting, holding hands up, grabbing object, facing the camera, stretching, walking...), motion cues (inferred from the momentary stance or position),
|
336 |
+
facial expressions, and any notable interactions with objects or other {cat_name}s or people.
|
337 |
+
|
338 |
+
Only focus on obvious, prominent actions that can be reliably identified from this single frame.
|
339 |
+
|
340 |
+
- Respond with "YES" if:
|
341 |
+
1) Most of {cat_name}s exhibit clearly different, unique actions or poses.
|
342 |
+
(e.g. standing, sitting, bending, stretching, showing its back, or turning toward the camera.)
|
343 |
+
2) You can see visible significant differences in action and posture, that an observer can identify at a glance.
|
344 |
+
3) Interaction Variability: Each {cat_name} is engaged in a different type of action, such as one grasping an object while another is observing.
|
345 |
+
|
346 |
+
- Respond with "NONE" if:
|
347 |
+
1) The actions or pose are not clearly differentiable or too similar.
|
348 |
+
2) Minimal or Ambiguous Motion: The frame does not provide clear evidence of distinct movement beyond subtle shifts in stance.
|
349 |
+
3) Passive or Neutral Poses: If multiple {cat_name}(s) are simply standing or sitting without an obvious difference in orientation or motion
|
350 |
+
|
351 |
+
Answer strictly with either "YES" or "NONE".
|
352 |
+
"""
|
353 |
+
|
354 |
+
response1 = captioner.chat.completions.create(
|
355 |
+
model=model,
|
356 |
+
messages=[
|
357 |
+
{
|
358 |
+
"role": "user",
|
359 |
+
"content": [
|
360 |
+
{
|
361 |
+
"type": "text",
|
362 |
+
"text": caption_filter_text,
|
363 |
+
},
|
364 |
+
{
|
365 |
+
"type": "image_url",
|
366 |
+
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
|
367 |
+
}
|
368 |
+
],
|
369 |
+
}
|
370 |
+
],
|
371 |
+
)
|
372 |
+
response_content = response1.choices[0].message.content
|
373 |
+
should_caption = True if "yes" in response_content.lower() else False
|
374 |
+
print(f"are {cat_name}s distinguished by action: {response_content}", end='\n\n')
|
375 |
+
|
376 |
+
else:
|
377 |
+
should_caption = False
|
378 |
+
|
379 |
+
#2단계: dense caption 만들기
|
380 |
+
dense_caption_prompt_1 = f"""
|
381 |
+
In the given frame, I labeled {frame_cat_cnts} {cat_name}s by marking each with a bright numeric ID at the center and its boundary. The category name of these objects are : {cat_name}.
|
382 |
+
|
383 |
+
Please describe the image focusing on labeled {cat_name}s in detail, focusing on their actions and interactions.
|
384 |
+
|
385 |
+
1. Focus only on clear, unique, and prominent actions that distinguish each object.
|
386 |
+
2. Avoid describing actions that are too minor, ambiguous, or not visible from the image.
|
387 |
+
3. Avoid subjective terms such as 'skilled', 'controlled', or 'focused'. Only describe observable actions.
|
388 |
+
4. Do not include common-sense or overly general descriptions like 'the elephant walks'.
|
389 |
+
5. Use dynamic action verbs (e.g., holding, throwing, jumping, inspecting) to describe interactions, poses, or movements.
|
390 |
+
6. **Avoid overly detailed or speculative descriptions** such as 'slightly moving its mouth' or 'appears to be anticipating'.
|
391 |
+
- expressions like 'seems to be', 'appears to be' are BANNED!
|
392 |
+
7. Pretend you are observing the scene directly, avoiding phrases like 'it seems' or 'based on the description'.
|
393 |
+
8. Include interactions with objects or other entities when they are prominent and observable.
|
394 |
+
9. **Do not include descriptions of appearance** such as clothes, color, size, shape etc.
|
395 |
+
10. **Do not include relative position** between objects such as 'the left elephant' because left/right can be ambiguous.
|
396 |
+
11. Do not mention object IDs.
|
397 |
+
12. Use '{cat_name}' as the noun for the referring expressions.
|
398 |
+
|
399 |
+
Note that I want to use your description to create a grounding dataset, therefore, your descriptions for different objects should be unique, i.e., If the image contains multiple {cat_name}s, describe the actions of each individually and ensure the descriptions are non-overlapping and specific.
|
400 |
+
|
401 |
+
- Your answer should contain details, and follow the following format:
|
402 |
+
object id. action-oriented description
|
403 |
+
(e.g. 1. the person is holding bananas on two hands and opening his mouth, turning the head right.
|
404 |
+
2. a person bending over and touching his boots to tie the shoelace.)
|
405 |
+
- for action-oriented description, use {cat_name} as subject noun
|
406 |
+
|
407 |
+
**Only include the currently labeled category** in each line (e.g., if it’s a person, do not suddenly label it as other object/animal).
|
408 |
+
Please pay attention to the categories of these objects and don’t change them.
|
409 |
+
Keep in mind that you should not group the objects, e.g., 2-5. people: xxx, be sure to describe each object separately (one by one).
|
410 |
+
Output referring expressions for each object id. Please start your answer:"""
|
411 |
+
|
412 |
+
|
413 |
+
dense_caption_prompt_2 = f"""
|
414 |
+
You are an advanced visual language model analyzing a video frame.
|
415 |
+
In this frame, {frame_cat_cnts} objects belonging to the category **{cat_name}** have been distinctly labeled with bright numerical IDs at their center and boundary.
|
416 |
+
|
417 |
+
Your task is to generate **action-oriented descriptions** for each labeled {cat_name}.
|
418 |
+
Your descriptions should capture their **observable actions and interactions**, making sure to highlight movement, gestures, and dynamic behaviors.
|
419 |
+
|
420 |
+
---
|
421 |
+
## Key Guidelines:
|
422 |
+
1. **Describe only clear and visible actions** that uniquely define what the {cat_name} is doing.
|
423 |
+
- Example: "grabbing a branch and pulling it down" (**(O) Specific**)
|
424 |
+
- Avoid: "moving slightly to the side" (**(X) Too vague**)
|
425 |
+
|
426 |
+
2. **Do not describe appearance, color, or position**—focus purely on the action.
|
427 |
+
- (X) "A large brown bear standing on the left"
|
428 |
+
- (O) "The bear is lifting its front paws and swiping forward."
|
429 |
+
|
430 |
+
3. **Use dynamic, action-specific verbs** rather than passive descriptions.
|
431 |
+
- (O) "The giraffe is tilting its head and sniffing the ground."
|
432 |
+
- (X) "The giraffe is near a tree and looking around."
|
433 |
+
|
434 |
+
4. **Avoid assumptions, emotions, or speculative phrasing.**
|
435 |
+
- (X) "The person seems excited" / "The person might be preparing to jump."
|
436 |
+
- (O) "The person is pushing its front legs against the rock and leaping forward."
|
437 |
+
|
438 |
+
5. **Avoid overly detailed or speculative descriptions** such as 'slightly moving its mouth' or 'appears to be anticipating'.
|
439 |
+
- expressions like 'seems to be', 'appears to be' are BANNED!
|
440 |
+
6. Pretend you are observing the scene directly, avoiding phrases like 'it seems' or 'based on the description'.
|
441 |
+
|
442 |
+
7. If multiple {cat_name}s are present, make sure their descriptions are **distinct and non-overlapping**.
|
443 |
+
- **Each object should have a unique, descriptive action.**
|
444 |
+
- (X) "Two dogs are running."
|
445 |
+
- (O) "1. One dog is chasing another, its legs stretched mid-air.
|
446 |
+
2. The other dog is looking back while speeding up."
|
447 |
+
|
448 |
+
---
|
449 |
+
## Output Format:
|
450 |
+
- Each labeled **{cat_name}** should have exactly **one line of description**.
|
451 |
+
- Format: `ID. {cat_name} + action-based description`
|
452 |
+
- (O) Example:
|
453 |
+
```
|
454 |
+
1. The person is leaning forward while opening a bag with both hands.
|
455 |
+
2. The person is holding onto a rope and pulling themselves up.
|
456 |
+
```
|
457 |
+
- **Ensure that each object is described individually.**
|
458 |
+
- **Do not group objects into a single sentence** (e.g., "2-5. people: xxx" is NOT allowed).
|
459 |
+
|
460 |
+
---
|
461 |
+
## Additional Instructions:
|
462 |
+
- **Do NOT** use expressions like "it appears that..." or "it seems like...".
|
463 |
+
- **Do NOT** mention object IDs in the description (only use the provided format).
|
464 |
+
- **DO NOT** include markdown formatting (no bullet points, no asterisks).
|
465 |
+
- **Only describe actions of the labeled {cat_name} objects**—do not introduce unrelated categories.
|
466 |
+
|
467 |
+
Please generate the action-oriented descriptions for each labeled {cat_name} and start your answer:
|
468 |
+
"""
|
469 |
+
|
470 |
+
|
471 |
+
dense_caption_prompt = f"""
|
472 |
+
You are a visual assistant analyzing a single frame of a video.
|
473 |
+
In this frame, {frame_cat_cnts} objects belonging to the category **{cat_name}** have been labeled with bright numeric IDs at their center and boundary.
|
474 |
+
|
475 |
+
I am building an **action-centric referring expression** dataset.
|
476 |
+
Your task is to describe each labeled {cat_name} based on **clearly observable and specific actions**.
|
477 |
+
|
478 |
+
---
|
479 |
+
## Guidelines:
|
480 |
+
1. **Focus only on visible and prominent actions** (e.g., running, pushing, grasping an object).
|
481 |
+
2. **Avoid describing minor or ambiguous movements** (e.g., "slightly moving a paw," "tilting head a bit").
|
482 |
+
3. **Do not include subjective or speculative descriptions** (e.g., "it seems excited" or "it might be preparing to jump").
|
483 |
+
4. **Avoid vague expressions** like "engaging with something." Instead, specify the action (e.g., "grabbing a stick," "pressing a button").
|
484 |
+
5. **Use dynamic action verbs** (e.g., holding, throwing, inspecting, leaning, pressing) to highlight motion and interaction.
|
485 |
+
6. If multiple {cat_name}s appear, ensure each description is **distinct and non-overlapping**.
|
486 |
+
7. Base your descriptions on these principles:
|
487 |
+
- **Avoid words like 'minimal' or 'slightly'.**
|
488 |
+
- Emphasize **body movement, posture, and motion patterns** (e.g., "lifting its head," "facing forward," "showing its back").
|
489 |
+
- Describe **facial expressions and interactions with objects** (e.g., "opening its mouth wide," "smiling while holding an item").
|
490 |
+
- **Specify actions with other objects or entities** only when they are clear and observable.
|
491 |
+
- (O) "pushing another person"
|
492 |
+
- (X) "interacting with another object"
|
493 |
+
|
494 |
+
---
|
495 |
+
## Output Format:
|
496 |
+
- Each labeled **{cat_name}** must have **exactly one line**.
|
497 |
+
- Format: `ID. {cat_name} + action-based description`
|
498 |
+
- (O) Example:
|
499 |
+
```
|
500 |
+
1. The person is holding ski poles and skiing down a snowy mountain with bent knees.
|
501 |
+
2. The person is pulling a baby carriage while smiling.
|
502 |
+
```
|
503 |
+
- **Ensure each object is described individually.**
|
504 |
+
- **Do not group multiple objects into a single sentence** (e.g., "2-5. people: xxx" is NOT allowed).
|
505 |
+
|
506 |
+
---
|
507 |
+
## Example:
|
508 |
+
If the frame has two labeled **bears**, your output should be:
|
509 |
+
```
|
510 |
+
1. The bear is reaching out its right paw while leaning forward to catch prey.
|
511 |
+
2. A bear is standing upright, facing right, and touching the bike beside it.
|
512 |
+
```
|
513 |
+
|
514 |
+
---
|
515 |
+
## Additional Instructions:
|
516 |
+
- **Do NOT** describe appearance (e.g., color, size, texture) or relative positioning (e.g., "on the left/right").
|
517 |
+
- **Do NOT** reference object IDs explicitly (e.g., "Person 1" or "Object 2" is NOT allowed).
|
518 |
+
- **Do NOT** include markdown formatting (no bullet points, asterisks, or extra symbols).
|
519 |
+
- **Only describe actions of the labeled {cat_name} objects**—do not introduce unrelated categories.
|
520 |
+
|
521 |
+
Please generate the action-oriented descriptions for each labeled {cat_name} and start your answer:"""
|
522 |
+
|
523 |
+
|
524 |
+
MAX_RETRIES = 3
|
525 |
+
retry_count = 0
|
526 |
+
|
527 |
+
if should_caption:
|
528 |
+
while retry_count < MAX_RETRIES:
|
529 |
+
selected_prompt = random.choice([dense_caption_prompt, dense_caption_prompt_2])
|
530 |
+
|
531 |
+
response2 = captioner.chat.completions.create(
|
532 |
+
model=model,
|
533 |
+
messages=[
|
534 |
+
{
|
535 |
+
"role": "user",
|
536 |
+
"content": [
|
537 |
+
{
|
538 |
+
"type": "text",
|
539 |
+
"text": selected_prompt,
|
540 |
+
},
|
541 |
+
{
|
542 |
+
"type": "image_url",
|
543 |
+
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
|
544 |
+
},
|
545 |
+
],
|
546 |
+
}
|
547 |
+
],
|
548 |
+
)
|
549 |
+
|
550 |
+
# caption = response2.choices[0].message.content
|
551 |
+
#print(f"{image_path} - {frame_name}: {caption}")
|
552 |
+
|
553 |
+
caption = response2.choices[0].message.content.strip()
|
554 |
+
caption_lower = caption.lower().lstrip()
|
555 |
+
|
556 |
+
if caption_lower.startswith("1.") and not any(
|
557 |
+
phrase in caption_lower for phrase in ["i'm sorry", "please", "can't help"]
|
558 |
+
):
|
559 |
+
break
|
560 |
+
|
561 |
+
print(f"Retrying caption generation... ({retry_count + 1}/{MAX_RETRIES})")
|
562 |
+
retry_count += 1
|
563 |
+
time.sleep(2)
|
564 |
+
|
565 |
+
if retry_count == MAX_RETRIES:
|
566 |
+
caption = None
|
567 |
+
print("Max retries reached. Caption generation failed.")
|
568 |
+
|
569 |
+
else:
|
570 |
+
caption = None
|
571 |
+
|
572 |
+
image_captions[frame_name] = caption
|
573 |
+
all_captions[cat_name] = image_captions
|
574 |
+
|
575 |
+
# final : also prepare valid object ids
|
576 |
+
valid_obj_ids = dict()
|
577 |
+
|
578 |
+
for cat in cat_names:
|
579 |
+
if cat in ytvos_category_valid_list:
|
580 |
+
obj_id_cat = vid_meta['obj_id_cat']
|
581 |
+
valid_cat_ids = []
|
582 |
+
for obj_id in list(obj_id_cat.keys()):
|
583 |
+
if obj_id_cat[obj_id] == cat:
|
584 |
+
valid_cat_ids.append(obj_id)
|
585 |
+
valid_obj_ids[cat] = valid_cat_ids
|
586 |
+
|
587 |
+
return vid_id, all_captions, valid_obj_ids
|
588 |
+
|
589 |
+
|
590 |
+
if __name__ == '__main__':
|
591 |
+
parser = argparse.ArgumentParser('ReferFormer training and evaluation script', parents=[opts.get_args_parser()])
|
592 |
+
parser.add_argument('--save_caption_path', type=str, default="mbench/numbered_captions_gpt-4o_randcap.json")
|
593 |
+
parser.add_argument('--save_valid_obj_ids_path', type=str, default="mbench/numbered_valid_obj_ids_gpt-4o_randcap.json")
|
594 |
+
|
595 |
+
args = parser.parse_args()
|
596 |
+
|
597 |
+
#==================데이터 불러오기===================
|
598 |
+
# 전체 데이터셋
|
599 |
+
train_dataset = build_ytvos_ref(image_set = 'train', args = args)
|
600 |
+
|
601 |
+
# 전체 데이터셋 메타데이터
|
602 |
+
metas = train_dataset.metas
|
603 |
+
|
604 |
+
# 색상 후보 8개 (RGB 형식)
|
605 |
+
colors = [
|
606 |
+
(255, 0, 0), # Red
|
607 |
+
(0, 255, 0), # Green
|
608 |
+
(0, 0, 255), # Blue
|
609 |
+
(255, 255, 0), # Yellow
|
610 |
+
(255, 0, 255), # Magenta
|
611 |
+
(0, 255, 255), # Cyan
|
612 |
+
(128, 0, 128), # Purple
|
613 |
+
(255, 165, 0) # Orange
|
614 |
+
]
|
615 |
+
|
616 |
+
ytvos_category_valid_list = [
|
617 |
+
'airplane', 'ape', 'bear', 'bird', 'boat', 'bus', 'camel', 'cat', 'cow', 'crocodile',
|
618 |
+
'deer', 'dog', 'dolphin', 'duck', 'eagle', 'earless_seal', 'elephant', 'fish', 'fox', 'frog',
|
619 |
+
'giant_panda', 'giraffe', 'hedgehog', 'horse', 'leopard', 'lion', 'lizard',
|
620 |
+
'monkey', 'motorbike', 'mouse', 'owl', 'parrot', 'penguin', 'person',
|
621 |
+
'rabbit', 'raccoon', 'sedan', 'shark', 'sheep', 'snail', 'snake',
|
622 |
+
'squirrel', 'tiger', 'train', 'truck', 'turtle', 'whale', 'zebra'
|
623 |
+
]
|
624 |
+
|
625 |
+
#==================gpt 돌리기===================
|
626 |
+
os.environ['OPENAI_API_KEY'] = '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'
|
627 |
+
|
628 |
+
result_captions = {}
|
629 |
+
result_valid_obj_ids = {}
|
630 |
+
|
631 |
+
for i in range(len(metas)):
|
632 |
+
try:
|
633 |
+
vid_id, all_captions, valid_obj_ids = getCaption(i)
|
634 |
+
|
635 |
+
if vid_id not in result_captions:
|
636 |
+
result_captions[vid_id] = all_captions
|
637 |
+
if vid_id not in result_valid_obj_ids:
|
638 |
+
result_valid_obj_ids[vid_id] = valid_obj_ids
|
639 |
+
|
640 |
+
except (requests.exceptions.ConnectionError, APIConnectionError) as e:
|
641 |
+
print(f"created caption until {i-1}", flush=True)
|
642 |
+
print("인터넷 연결 문제로 요청을 처리할 수 없습니다:", e, flush=True)
|
643 |
+
|
644 |
+
with open(args.save_caption_path, "w") as file:
|
645 |
+
json.dump(result_captions, file, indent=4)
|
646 |
+
|
647 |
+
with open(args.save_valid_obj_ids_path, "w") as file:
|
648 |
+
json.dump(result_valid_obj_ids, file, indent=4)
|
649 |
+
|
650 |
+
except OpenAIError as e:
|
651 |
+
print(f"created caption until {i-1}", flush=True)
|
652 |
+
print("OpenAI API 관련 오류가 발생했습니다:", e, flush=True)
|
653 |
+
|
654 |
+
with open(args.save_caption_path, "w") as file:
|
655 |
+
json.dump(result_captions, file, indent=4)
|
656 |
+
|
657 |
+
with open(args.save_valid_obj_ids_path, "w") as file:
|
658 |
+
json.dump(result_valid_obj_ids, file, indent=4)
|
659 |
+
|
660 |
+
except Exception as e:
|
661 |
+
print(f"created caption until {i-1}", flush=True)
|
662 |
+
print("알 수 없는 오류 발생:", e, flush=True)
|
663 |
+
|
664 |
+
with open(args.save_caption_path, "w") as file:
|
665 |
+
json.dump(result_captions, file, indent=4)
|
666 |
+
|
667 |
+
with open(args.save_valid_obj_ids_path, "w") as file:
|
668 |
+
json.dump(result_valid_obj_ids, file, indent=4)
|
669 |
+
|
670 |
+
print("Finished!", flush=True)
|
671 |
+
|
672 |
+
with open(args.save_caption_path, "w") as file:
|
673 |
+
json.dump(result_captions, file, indent=4)
|
674 |
+
|
675 |
+
with open(args.save_valid_obj_ids_path, "w") as file:
|
676 |
+
json.dump(result_valid_obj_ids, file, indent=4)
|
mbench/gpt_test.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
mbench/make_ref-ytvos_json.py
ADDED
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
import os
|
3 |
+
from os import path as osp
|
4 |
+
sys.path.append(osp.abspath(osp.join(osp.dirname(__file__), '..')))
|
5 |
+
|
6 |
+
from datasets import build_dataset
|
7 |
+
import argparse
|
8 |
+
import opts
|
9 |
+
|
10 |
+
|
11 |
+
from pathlib import Path
|
12 |
+
import io
|
13 |
+
|
14 |
+
import numpy as np
|
15 |
+
import pandas as pd
|
16 |
+
import regex as re
|
17 |
+
import json
|
18 |
+
|
19 |
+
import cv2
|
20 |
+
from PIL import Image, ImageDraw
|
21 |
+
import torch
|
22 |
+
from torchvision.transforms import functional as F
|
23 |
+
|
24 |
+
from skimage import measure # (pip install scikit-image)
|
25 |
+
from shapely.geometry import Polygon, MultiPolygon # (pip install Shapely)
|
26 |
+
|
27 |
+
import matplotlib.pyplot as plt
|
28 |
+
import matplotlib.patches as patches
|
29 |
+
from matplotlib.collections import PatchCollection
|
30 |
+
from matplotlib.patches import Rectangle
|
31 |
+
|
32 |
+
|
33 |
+
import ipywidgets as widgets
|
34 |
+
from IPython.display import display, clear_output
|
35 |
+
|
36 |
+
#==================json 만들기===================
|
37 |
+
def createJson(train_dataset, metas):
|
38 |
+
entire_json = {}
|
39 |
+
|
40 |
+
#초기화
|
41 |
+
vid_idx = 0
|
42 |
+
|
43 |
+
while vid_idx < len(train_dataset):
|
44 |
+
|
45 |
+
#하나의 비디오에 대해
|
46 |
+
video_data = {}
|
47 |
+
video_train_frames, video_train_info = train_dataset[vid_idx]
|
48 |
+
video_meta = metas[vid_idx]
|
49 |
+
|
50 |
+
video_id = video_meta['video']
|
51 |
+
video_data['bins'] = video_meta['bins']
|
52 |
+
bin_nums = len(video_meta['bins'])
|
53 |
+
obj_nums = max([int(k) for k in list(video_meta['obj_id_cat'].keys())])
|
54 |
+
|
55 |
+
annotation_data = []
|
56 |
+
frame_names = []
|
57 |
+
|
58 |
+
for i in range(bin_nums):
|
59 |
+
bin_data = {}
|
60 |
+
for j in range(obj_nums):
|
61 |
+
obj_id = str(j+1)
|
62 |
+
try:
|
63 |
+
obj_data = {
|
64 |
+
"category_name":video_meta['obj_id_cat'][obj_id],
|
65 |
+
"bbox":video_train_info['boxes'][i*obj_nums+j, :].tolist(),
|
66 |
+
"valid":video_train_info['valid'][i*obj_nums+j].item()
|
67 |
+
}
|
68 |
+
except:
|
69 |
+
obj_data = {}
|
70 |
+
bin_data[obj_id] = obj_data
|
71 |
+
annotation_data.append(bin_data)
|
72 |
+
|
73 |
+
video_data['annotations'] = annotation_data
|
74 |
+
|
75 |
+
|
76 |
+
sample_indx = metas[vid_idx]['sample_indx']
|
77 |
+
frames = metas[vid_idx]['frames']
|
78 |
+
for i in sample_indx:
|
79 |
+
frame_name = frames[i]
|
80 |
+
frame_names.append(frame_name)
|
81 |
+
|
82 |
+
video_data['frame_names'] = frame_names
|
83 |
+
video_data['video_path'] = os.path.join(str(train_dataset.img_folder), 'JPEGImages', video_id)
|
84 |
+
entire_json[video_id] = video_data
|
85 |
+
|
86 |
+
vid_idx += 1
|
87 |
+
|
88 |
+
return entire_json
|
89 |
+
|
90 |
+
|
91 |
+
if __name__ == '__main__':
|
92 |
+
parser = argparse.ArgumentParser('ReferFormer training and evaluation script', parents=[opts.get_args_parser()])
|
93 |
+
args = parser.parse_args()
|
94 |
+
|
95 |
+
#==================데이터 불러오기===================
|
96 |
+
# 전체 데이터셋
|
97 |
+
train_dataset = build_dataset('ytvos_ref', image_set = 'train', args = args)
|
98 |
+
|
99 |
+
# 전체 데이터셋 메타데이터
|
100 |
+
metas = train_dataset.metas
|
101 |
+
|
102 |
+
#==================json 만들기===================
|
103 |
+
entire_json_dict = createJson(train_dataset, metas)
|
104 |
+
print(type(entire_json_dict))
|
105 |
+
entire_json = json.dumps(entire_json_dict, indent=4)
|
106 |
+
|
107 |
+
with open('mbench/sampled_frame3.json', mode='w') as file:
|
108 |
+
file.write(entire_json)
|
mbench/numbered_captions_gpt-4o_final.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
mbench/numbered_captions_gpt-4o_no_mask_color.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
mbench/numbered_captions_gpt-4o_nomask_randcap.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
mbench/numbered_captions_gpt-4o_randcap.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
mbench/numbered_valid_obj_ids.json
ADDED
@@ -0,0 +1,2153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"003234408d": {
|
3 |
+
"penguin": [
|
4 |
+
"1",
|
5 |
+
"2",
|
6 |
+
"3",
|
7 |
+
"4",
|
8 |
+
"5"
|
9 |
+
]
|
10 |
+
},
|
11 |
+
"0043f083b5": {
|
12 |
+
"sedan": [
|
13 |
+
"2",
|
14 |
+
"3"
|
15 |
+
],
|
16 |
+
"bus": [
|
17 |
+
"1"
|
18 |
+
]
|
19 |
+
},
|
20 |
+
"0044fa5fba": {
|
21 |
+
"giant_panda": [
|
22 |
+
"1"
|
23 |
+
]
|
24 |
+
},
|
25 |
+
"005a527edd": {
|
26 |
+
"ape": [
|
27 |
+
"1",
|
28 |
+
"2"
|
29 |
+
]
|
30 |
+
},
|
31 |
+
"0065b171f9": {
|
32 |
+
"giant_panda": [
|
33 |
+
"1"
|
34 |
+
]
|
35 |
+
},
|
36 |
+
"00917dcfc4": {
|
37 |
+
"zebra": [
|
38 |
+
"1",
|
39 |
+
"2",
|
40 |
+
"3"
|
41 |
+
]
|
42 |
+
},
|
43 |
+
"00a23ccf53": {
|
44 |
+
"shark": [
|
45 |
+
"1"
|
46 |
+
]
|
47 |
+
},
|
48 |
+
"00ad5016a4": {
|
49 |
+
"airplane": [
|
50 |
+
"1"
|
51 |
+
]
|
52 |
+
},
|
53 |
+
"01082ae388": {
|
54 |
+
"leopard": [
|
55 |
+
"1"
|
56 |
+
]
|
57 |
+
},
|
58 |
+
"011ac0a06f": {
|
59 |
+
"ape": [
|
60 |
+
"1",
|
61 |
+
"2",
|
62 |
+
"3",
|
63 |
+
"4",
|
64 |
+
"5"
|
65 |
+
]
|
66 |
+
},
|
67 |
+
"013099c098": {
|
68 |
+
"giant_panda": [
|
69 |
+
"1",
|
70 |
+
"2"
|
71 |
+
]
|
72 |
+
},
|
73 |
+
"0155498c85": {
|
74 |
+
"motorbike": [
|
75 |
+
"2"
|
76 |
+
],
|
77 |
+
"person": [
|
78 |
+
"1"
|
79 |
+
]
|
80 |
+
},
|
81 |
+
"01694ad9c8": {
|
82 |
+
"bird": [
|
83 |
+
"1"
|
84 |
+
]
|
85 |
+
},
|
86 |
+
"017ac35701": {
|
87 |
+
"giant_panda": [
|
88 |
+
"1"
|
89 |
+
]
|
90 |
+
},
|
91 |
+
"01b80e8e1a": {
|
92 |
+
"zebra": [
|
93 |
+
"1",
|
94 |
+
"2"
|
95 |
+
]
|
96 |
+
},
|
97 |
+
"01baa5a4e1": {},
|
98 |
+
"01c3111683": {
|
99 |
+
"whale": [
|
100 |
+
"1"
|
101 |
+
]
|
102 |
+
},
|
103 |
+
"01c4cb5ffe": {
|
104 |
+
"person": [
|
105 |
+
"1",
|
106 |
+
"3"
|
107 |
+
]
|
108 |
+
},
|
109 |
+
"01c76f0a82": {
|
110 |
+
"sedan": [
|
111 |
+
"1",
|
112 |
+
"4"
|
113 |
+
]
|
114 |
+
},
|
115 |
+
"01c783268c": {
|
116 |
+
"ape": [
|
117 |
+
"1"
|
118 |
+
],
|
119 |
+
"person": [
|
120 |
+
"2"
|
121 |
+
]
|
122 |
+
},
|
123 |
+
"01e64dd36a": {
|
124 |
+
"cow": [
|
125 |
+
"1",
|
126 |
+
"2",
|
127 |
+
"3"
|
128 |
+
]
|
129 |
+
},
|
130 |
+
"01ed275c6e": {
|
131 |
+
"giraffe": [
|
132 |
+
"1",
|
133 |
+
"2"
|
134 |
+
]
|
135 |
+
},
|
136 |
+
"01ff60d1fa": {
|
137 |
+
"lizard": [
|
138 |
+
"1"
|
139 |
+
]
|
140 |
+
},
|
141 |
+
"020cd28cd2": {
|
142 |
+
"person": [
|
143 |
+
"1"
|
144 |
+
]
|
145 |
+
},
|
146 |
+
"02264db755": {
|
147 |
+
"fox": [
|
148 |
+
"1"
|
149 |
+
]
|
150 |
+
},
|
151 |
+
"0248626d9a": {
|
152 |
+
"train": [
|
153 |
+
"1"
|
154 |
+
]
|
155 |
+
},
|
156 |
+
"02668dbffa": {
|
157 |
+
"frog": [
|
158 |
+
"1"
|
159 |
+
]
|
160 |
+
},
|
161 |
+
"0274193026": {
|
162 |
+
"person": [
|
163 |
+
"2"
|
164 |
+
]
|
165 |
+
},
|
166 |
+
"02d28375aa": {
|
167 |
+
"fox": [
|
168 |
+
"1"
|
169 |
+
]
|
170 |
+
},
|
171 |
+
"031ccc99b1": {
|
172 |
+
"person": [
|
173 |
+
"1",
|
174 |
+
"2",
|
175 |
+
"3"
|
176 |
+
]
|
177 |
+
},
|
178 |
+
"0321b18c10": {
|
179 |
+
"person": [
|
180 |
+
"1",
|
181 |
+
"2"
|
182 |
+
],
|
183 |
+
"elephant": [
|
184 |
+
"3"
|
185 |
+
]
|
186 |
+
},
|
187 |
+
"0348a45bca": {
|
188 |
+
"fish": [
|
189 |
+
"1",
|
190 |
+
"2",
|
191 |
+
"3",
|
192 |
+
"4",
|
193 |
+
"5"
|
194 |
+
]
|
195 |
+
},
|
196 |
+
"0355e92655": {
|
197 |
+
"person": [
|
198 |
+
"2"
|
199 |
+
],
|
200 |
+
"boat": [
|
201 |
+
"3"
|
202 |
+
]
|
203 |
+
},
|
204 |
+
"0358b938c1": {
|
205 |
+
"elephant": [
|
206 |
+
"1",
|
207 |
+
"2",
|
208 |
+
"3",
|
209 |
+
"4"
|
210 |
+
]
|
211 |
+
},
|
212 |
+
"0368107cf1": {
|
213 |
+
"person": [
|
214 |
+
"1",
|
215 |
+
"2"
|
216 |
+
]
|
217 |
+
},
|
218 |
+
"0379ddf557": {
|
219 |
+
"person": [
|
220 |
+
"1"
|
221 |
+
]
|
222 |
+
},
|
223 |
+
"038b2cc71d": {
|
224 |
+
"lizard": [
|
225 |
+
"1"
|
226 |
+
]
|
227 |
+
},
|
228 |
+
"038c15a5dd": {
|
229 |
+
"hedgehog": [
|
230 |
+
"1"
|
231 |
+
]
|
232 |
+
},
|
233 |
+
"03a06cc98a": {
|
234 |
+
"giraffe": [
|
235 |
+
"1",
|
236 |
+
"2",
|
237 |
+
"3"
|
238 |
+
]
|
239 |
+
},
|
240 |
+
"03a63e187f": {
|
241 |
+
"lizard": [
|
242 |
+
"1"
|
243 |
+
]
|
244 |
+
},
|
245 |
+
"03c95b4dae": {
|
246 |
+
"elephant": [
|
247 |
+
"1",
|
248 |
+
"2",
|
249 |
+
"3"
|
250 |
+
]
|
251 |
+
},
|
252 |
+
"03e2b57b0e": {
|
253 |
+
"lizard": [
|
254 |
+
"1"
|
255 |
+
]
|
256 |
+
},
|
257 |
+
"04194e1248": {
|
258 |
+
"lizard": [
|
259 |
+
"1"
|
260 |
+
]
|
261 |
+
},
|
262 |
+
"04259896e2": {
|
263 |
+
"lizard": [
|
264 |
+
"1"
|
265 |
+
]
|
266 |
+
},
|
267 |
+
"0444918a5f": {
|
268 |
+
"truck": [
|
269 |
+
"1",
|
270 |
+
"2",
|
271 |
+
"3",
|
272 |
+
"4"
|
273 |
+
]
|
274 |
+
},
|
275 |
+
"04460a7a52": {
|
276 |
+
"lizard": [
|
277 |
+
"1"
|
278 |
+
]
|
279 |
+
},
|
280 |
+
"04474174a4": {
|
281 |
+
"ape": [
|
282 |
+
"1",
|
283 |
+
"2"
|
284 |
+
]
|
285 |
+
},
|
286 |
+
"0450095513": {
|
287 |
+
"snail": [
|
288 |
+
"1"
|
289 |
+
]
|
290 |
+
},
|
291 |
+
"045f00aed2": {
|
292 |
+
"tiger": [
|
293 |
+
"1"
|
294 |
+
],
|
295 |
+
"person": [
|
296 |
+
"3"
|
297 |
+
]
|
298 |
+
},
|
299 |
+
"04667fabaa": {
|
300 |
+
"parrot": [
|
301 |
+
"1"
|
302 |
+
]
|
303 |
+
},
|
304 |
+
"04735c5030": {
|
305 |
+
"cat": [
|
306 |
+
"1",
|
307 |
+
"2"
|
308 |
+
]
|
309 |
+
},
|
310 |
+
"04990d1915": {
|
311 |
+
"truck": [
|
312 |
+
"3"
|
313 |
+
],
|
314 |
+
"sedan": [
|
315 |
+
"1"
|
316 |
+
],
|
317 |
+
"bus": [
|
318 |
+
"2"
|
319 |
+
]
|
320 |
+
},
|
321 |
+
"04d62d9d98": {
|
322 |
+
"person": [
|
323 |
+
"1"
|
324 |
+
]
|
325 |
+
},
|
326 |
+
"04f21da964": {
|
327 |
+
"monkey": [
|
328 |
+
"1"
|
329 |
+
]
|
330 |
+
},
|
331 |
+
"04fbad476e": {
|
332 |
+
"parrot": [
|
333 |
+
"1"
|
334 |
+
]
|
335 |
+
},
|
336 |
+
"04fe256562": {
|
337 |
+
"truck": [
|
338 |
+
"2"
|
339 |
+
],
|
340 |
+
"motorbike": [
|
341 |
+
"1"
|
342 |
+
]
|
343 |
+
},
|
344 |
+
"0503bf89c9": {
|
345 |
+
"hedgehog": [
|
346 |
+
"1"
|
347 |
+
]
|
348 |
+
},
|
349 |
+
"0536c9eed0": {
|
350 |
+
"cat": [
|
351 |
+
"1"
|
352 |
+
]
|
353 |
+
},
|
354 |
+
"054acb238f": {
|
355 |
+
"owl": [
|
356 |
+
"1"
|
357 |
+
]
|
358 |
+
},
|
359 |
+
"05579ca250": {
|
360 |
+
"person": [
|
361 |
+
"1"
|
362 |
+
],
|
363 |
+
"sedan": [
|
364 |
+
"3"
|
365 |
+
]
|
366 |
+
},
|
367 |
+
"056c200404": {},
|
368 |
+
"05774f3a2c": {
|
369 |
+
"ape": [
|
370 |
+
"1",
|
371 |
+
"2",
|
372 |
+
"3"
|
373 |
+
]
|
374 |
+
},
|
375 |
+
"058a7592c8": {
|
376 |
+
"train": [
|
377 |
+
"1"
|
378 |
+
]
|
379 |
+
},
|
380 |
+
"05a0a513df": {
|
381 |
+
"person": [
|
382 |
+
"1",
|
383 |
+
"2"
|
384 |
+
]
|
385 |
+
},
|
386 |
+
"05a569d8aa": {
|
387 |
+
"cat": [
|
388 |
+
"1"
|
389 |
+
],
|
390 |
+
"mouse": [
|
391 |
+
"2"
|
392 |
+
]
|
393 |
+
},
|
394 |
+
"05aa652648": {
|
395 |
+
"ape": [
|
396 |
+
"1"
|
397 |
+
]
|
398 |
+
},
|
399 |
+
"05d7715782": {},
|
400 |
+
"05e0b0f28f": {
|
401 |
+
"mouse": [
|
402 |
+
"1"
|
403 |
+
],
|
404 |
+
"person": [
|
405 |
+
"2"
|
406 |
+
]
|
407 |
+
},
|
408 |
+
"05fdbbdd7a": {},
|
409 |
+
"05ffcfed85": {
|
410 |
+
"monkey": [
|
411 |
+
"1",
|
412 |
+
"2"
|
413 |
+
]
|
414 |
+
},
|
415 |
+
"0630391881": {
|
416 |
+
"person": [
|
417 |
+
"1"
|
418 |
+
]
|
419 |
+
},
|
420 |
+
"06840b2bbe": {
|
421 |
+
"snake": [
|
422 |
+
"1"
|
423 |
+
]
|
424 |
+
},
|
425 |
+
"068f7dce6f": {
|
426 |
+
"shark": [
|
427 |
+
"1"
|
428 |
+
]
|
429 |
+
},
|
430 |
+
"0693719753": {
|
431 |
+
"turtle": [
|
432 |
+
"1",
|
433 |
+
"2"
|
434 |
+
]
|
435 |
+
},
|
436 |
+
"06ce2b51fb": {
|
437 |
+
"person": [
|
438 |
+
"1",
|
439 |
+
"2"
|
440 |
+
]
|
441 |
+
},
|
442 |
+
"06e224798e": {
|
443 |
+
"tiger": [
|
444 |
+
"1"
|
445 |
+
]
|
446 |
+
},
|
447 |
+
"06ee361788": {
|
448 |
+
"duck": [
|
449 |
+
"1",
|
450 |
+
"2",
|
451 |
+
"3"
|
452 |
+
]
|
453 |
+
},
|
454 |
+
"06fbb3fa2c": {
|
455 |
+
"eagle": [
|
456 |
+
"1"
|
457 |
+
]
|
458 |
+
},
|
459 |
+
"0700264286": {
|
460 |
+
"cow": [
|
461 |
+
"1",
|
462 |
+
"2"
|
463 |
+
]
|
464 |
+
},
|
465 |
+
"070c918ca7": {
|
466 |
+
"parrot": [
|
467 |
+
"1"
|
468 |
+
]
|
469 |
+
},
|
470 |
+
"07129e14a4": {
|
471 |
+
"parrot": [
|
472 |
+
"1",
|
473 |
+
"2"
|
474 |
+
],
|
475 |
+
"person": [
|
476 |
+
"3"
|
477 |
+
]
|
478 |
+
},
|
479 |
+
"07177017e9": {
|
480 |
+
"motorbike": [
|
481 |
+
"1",
|
482 |
+
"2"
|
483 |
+
]
|
484 |
+
},
|
485 |
+
"07238ffc58": {
|
486 |
+
"monkey": [
|
487 |
+
"1",
|
488 |
+
"2",
|
489 |
+
"3"
|
490 |
+
]
|
491 |
+
},
|
492 |
+
"07353b2a89": {
|
493 |
+
"sheep": [
|
494 |
+
"1",
|
495 |
+
"2",
|
496 |
+
"3",
|
497 |
+
"4"
|
498 |
+
]
|
499 |
+
},
|
500 |
+
"0738493cbf": {
|
501 |
+
"airplane": [
|
502 |
+
"1"
|
503 |
+
]
|
504 |
+
},
|
505 |
+
"075926c651": {
|
506 |
+
"person": [
|
507 |
+
"1",
|
508 |
+
"2"
|
509 |
+
]
|
510 |
+
},
|
511 |
+
"075c701292": {
|
512 |
+
"duck": [
|
513 |
+
"1",
|
514 |
+
"2",
|
515 |
+
"3",
|
516 |
+
"4"
|
517 |
+
]
|
518 |
+
},
|
519 |
+
"0762ea9a30": {
|
520 |
+
"person": [
|
521 |
+
"1"
|
522 |
+
]
|
523 |
+
},
|
524 |
+
"07652ee4af": {
|
525 |
+
"person": [
|
526 |
+
"1"
|
527 |
+
]
|
528 |
+
},
|
529 |
+
"076f206928": {
|
530 |
+
"person": [
|
531 |
+
"3"
|
532 |
+
],
|
533 |
+
"zebra": [
|
534 |
+
"1",
|
535 |
+
"2"
|
536 |
+
]
|
537 |
+
},
|
538 |
+
"077d32af19": {
|
539 |
+
"train": [
|
540 |
+
"4"
|
541 |
+
],
|
542 |
+
"person": [
|
543 |
+
"1",
|
544 |
+
"2",
|
545 |
+
"3"
|
546 |
+
]
|
547 |
+
},
|
548 |
+
"079049275c": {
|
549 |
+
"mouse": [
|
550 |
+
"1"
|
551 |
+
]
|
552 |
+
},
|
553 |
+
"07913cdda7": {
|
554 |
+
"train": [
|
555 |
+
"1"
|
556 |
+
],
|
557 |
+
"person": [
|
558 |
+
"2",
|
559 |
+
"3"
|
560 |
+
]
|
561 |
+
},
|
562 |
+
"07a11a35e8": {
|
563 |
+
"ape": [
|
564 |
+
"1",
|
565 |
+
"2"
|
566 |
+
]
|
567 |
+
},
|
568 |
+
"07ac33b6df": {
|
569 |
+
"ape": [
|
570 |
+
"1"
|
571 |
+
]
|
572 |
+
},
|
573 |
+
"07c62c3d11": {
|
574 |
+
"parrot": [
|
575 |
+
"1",
|
576 |
+
"2",
|
577 |
+
"3"
|
578 |
+
]
|
579 |
+
},
|
580 |
+
"07cc1c7d74": {
|
581 |
+
"snake": [
|
582 |
+
"1"
|
583 |
+
]
|
584 |
+
},
|
585 |
+
"080196ef01": {
|
586 |
+
"lizard": [
|
587 |
+
"1"
|
588 |
+
]
|
589 |
+
},
|
590 |
+
"081207976e": {},
|
591 |
+
"081ae4fa44": {
|
592 |
+
"shark": [
|
593 |
+
"1",
|
594 |
+
"2"
|
595 |
+
]
|
596 |
+
},
|
597 |
+
"081d8250cb": {
|
598 |
+
"person": [
|
599 |
+
"1"
|
600 |
+
],
|
601 |
+
"sedan": [
|
602 |
+
"3"
|
603 |
+
]
|
604 |
+
},
|
605 |
+
"082900c5d4": {
|
606 |
+
"duck": [
|
607 |
+
"1",
|
608 |
+
"2",
|
609 |
+
"3"
|
610 |
+
]
|
611 |
+
},
|
612 |
+
"0860df21e2": {},
|
613 |
+
"0866d4c5e3": {
|
614 |
+
"bird": [
|
615 |
+
"1",
|
616 |
+
"2",
|
617 |
+
"3"
|
618 |
+
]
|
619 |
+
},
|
620 |
+
"0891ac2eb6": {
|
621 |
+
"person": [
|
622 |
+
"1",
|
623 |
+
"2",
|
624 |
+
"3"
|
625 |
+
]
|
626 |
+
},
|
627 |
+
"08931bc458": {
|
628 |
+
"person": [
|
629 |
+
"1"
|
630 |
+
]
|
631 |
+
},
|
632 |
+
"08aa2705d5": {
|
633 |
+
"snake": [
|
634 |
+
"1"
|
635 |
+
]
|
636 |
+
},
|
637 |
+
"08c8450db7": {},
|
638 |
+
"08d50b926c": {
|
639 |
+
"turtle": [
|
640 |
+
"1",
|
641 |
+
"2"
|
642 |
+
]
|
643 |
+
},
|
644 |
+
"08e1e4de15": {
|
645 |
+
"monkey": [
|
646 |
+
"1",
|
647 |
+
"2",
|
648 |
+
"3",
|
649 |
+
"4"
|
650 |
+
]
|
651 |
+
},
|
652 |
+
"08e48c1a48": {
|
653 |
+
"cow": [
|
654 |
+
"1"
|
655 |
+
]
|
656 |
+
},
|
657 |
+
"08f561c65e": {
|
658 |
+
"giant_panda": [
|
659 |
+
"1"
|
660 |
+
],
|
661 |
+
"person": [
|
662 |
+
"2"
|
663 |
+
]
|
664 |
+
},
|
665 |
+
"08feb87790": {
|
666 |
+
"sheep": [
|
667 |
+
"1"
|
668 |
+
]
|
669 |
+
},
|
670 |
+
"09049f6fe3": {
|
671 |
+
"mouse": [
|
672 |
+
"1",
|
673 |
+
"2"
|
674 |
+
]
|
675 |
+
},
|
676 |
+
"092e4ff450": {
|
677 |
+
"snake": [
|
678 |
+
"1"
|
679 |
+
]
|
680 |
+
},
|
681 |
+
"09338adea8": {
|
682 |
+
"whale": [
|
683 |
+
"1",
|
684 |
+
"2"
|
685 |
+
]
|
686 |
+
},
|
687 |
+
"093c335ccc": {
|
688 |
+
"person": [
|
689 |
+
"2"
|
690 |
+
]
|
691 |
+
},
|
692 |
+
"0970d28339": {
|
693 |
+
"ape": [
|
694 |
+
"1",
|
695 |
+
"2"
|
696 |
+
]
|
697 |
+
},
|
698 |
+
"0974a213dc": {
|
699 |
+
"giraffe": [
|
700 |
+
"1",
|
701 |
+
"2",
|
702 |
+
"3"
|
703 |
+
]
|
704 |
+
},
|
705 |
+
"097b471ed8": {
|
706 |
+
"cat": [
|
707 |
+
"1",
|
708 |
+
"2"
|
709 |
+
]
|
710 |
+
},
|
711 |
+
"0990941758": {
|
712 |
+
"giant_panda": [
|
713 |
+
"1"
|
714 |
+
]
|
715 |
+
},
|
716 |
+
"09a348f4fa": {
|
717 |
+
"lizard": [
|
718 |
+
"1"
|
719 |
+
]
|
720 |
+
},
|
721 |
+
"09a6841288": {
|
722 |
+
"duck": [
|
723 |
+
"1",
|
724 |
+
"2"
|
725 |
+
]
|
726 |
+
},
|
727 |
+
"09c5bad17b": {
|
728 |
+
"airplane": [
|
729 |
+
"1"
|
730 |
+
]
|
731 |
+
},
|
732 |
+
"09c9ce80c7": {
|
733 |
+
"giant_panda": [
|
734 |
+
"1"
|
735 |
+
]
|
736 |
+
},
|
737 |
+
"09ff54fef4": {
|
738 |
+
"fox": [
|
739 |
+
"1",
|
740 |
+
"2"
|
741 |
+
]
|
742 |
+
},
|
743 |
+
"0a23765d15": {
|
744 |
+
"person": [
|
745 |
+
"1",
|
746 |
+
"2"
|
747 |
+
]
|
748 |
+
},
|
749 |
+
"0a275e7f12": {
|
750 |
+
"elephant": [
|
751 |
+
"1"
|
752 |
+
]
|
753 |
+
},
|
754 |
+
"0a2f2bd294": {
|
755 |
+
"motorbike": [
|
756 |
+
"1"
|
757 |
+
]
|
758 |
+
},
|
759 |
+
"0a7a2514aa": {
|
760 |
+
"cat": [
|
761 |
+
"1"
|
762 |
+
],
|
763 |
+
"lizard": [
|
764 |
+
"2"
|
765 |
+
]
|
766 |
+
},
|
767 |
+
"0a7b27fde9": {
|
768 |
+
"parrot": [
|
769 |
+
"1",
|
770 |
+
"2"
|
771 |
+
]
|
772 |
+
},
|
773 |
+
"0a8c467cc3": {
|
774 |
+
"fish": [
|
775 |
+
"1",
|
776 |
+
"2",
|
777 |
+
"3"
|
778 |
+
]
|
779 |
+
},
|
780 |
+
"0ac8c560ae": {
|
781 |
+
"person": [
|
782 |
+
"2",
|
783 |
+
"3"
|
784 |
+
]
|
785 |
+
},
|
786 |
+
"0b1627e896": {
|
787 |
+
"boat": [
|
788 |
+
"1"
|
789 |
+
]
|
790 |
+
},
|
791 |
+
"0b285c47f6": {
|
792 |
+
"mouse": [
|
793 |
+
"1"
|
794 |
+
]
|
795 |
+
},
|
796 |
+
"0b34ec1d55": {
|
797 |
+
"ape": [
|
798 |
+
"1"
|
799 |
+
]
|
800 |
+
},
|
801 |
+
"0b5b5e8e5a": {
|
802 |
+
"person": [
|
803 |
+
"1"
|
804 |
+
],
|
805 |
+
"sedan": [
|
806 |
+
"2"
|
807 |
+
]
|
808 |
+
},
|
809 |
+
"0b68535614": {
|
810 |
+
"rabbit": [
|
811 |
+
"1"
|
812 |
+
]
|
813 |
+
},
|
814 |
+
"0b6f9105fc": {
|
815 |
+
"rabbit": [
|
816 |
+
"1"
|
817 |
+
]
|
818 |
+
},
|
819 |
+
"0b7dbfa3cb": {
|
820 |
+
"cow": [
|
821 |
+
"1"
|
822 |
+
]
|
823 |
+
},
|
824 |
+
"0b9cea51ca": {
|
825 |
+
"whale": [
|
826 |
+
"1"
|
827 |
+
]
|
828 |
+
},
|
829 |
+
"0b9d012be8": {
|
830 |
+
"camel": [
|
831 |
+
"1"
|
832 |
+
]
|
833 |
+
},
|
834 |
+
"0bcfc4177d": {
|
835 |
+
"truck": [
|
836 |
+
"1"
|
837 |
+
]
|
838 |
+
},
|
839 |
+
"0bd37b23c1": {
|
840 |
+
"motorbike": [
|
841 |
+
"1"
|
842 |
+
]
|
843 |
+
},
|
844 |
+
"0bd864064c": {
|
845 |
+
"eagle": [
|
846 |
+
"1"
|
847 |
+
]
|
848 |
+
},
|
849 |
+
"0c11c6bf7b": {
|
850 |
+
"deer": [
|
851 |
+
"1"
|
852 |
+
]
|
853 |
+
},
|
854 |
+
"0c26bc77ac": {
|
855 |
+
"crocodile": [
|
856 |
+
"1"
|
857 |
+
]
|
858 |
+
},
|
859 |
+
"0c3a04798c": {
|
860 |
+
"fish": [
|
861 |
+
"2"
|
862 |
+
],
|
863 |
+
"duck": [
|
864 |
+
"1"
|
865 |
+
]
|
866 |
+
},
|
867 |
+
"0c44a9d545": {
|
868 |
+
"tiger": [
|
869 |
+
"1"
|
870 |
+
]
|
871 |
+
},
|
872 |
+
"0c817cc390": {
|
873 |
+
"dog": [
|
874 |
+
"2"
|
875 |
+
],
|
876 |
+
"hedgehog": [
|
877 |
+
"1"
|
878 |
+
]
|
879 |
+
},
|
880 |
+
"0ca839ee9a": {
|
881 |
+
"ape": [
|
882 |
+
"1",
|
883 |
+
"2"
|
884 |
+
]
|
885 |
+
},
|
886 |
+
"0cd7ac0ac0": {
|
887 |
+
"rabbit": [
|
888 |
+
"1"
|
889 |
+
]
|
890 |
+
},
|
891 |
+
"0ce06e0121": {
|
892 |
+
"parrot": [
|
893 |
+
"1",
|
894 |
+
"2"
|
895 |
+
]
|
896 |
+
},
|
897 |
+
"0cfe974a89": {
|
898 |
+
"turtle": [
|
899 |
+
"1",
|
900 |
+
"2"
|
901 |
+
]
|
902 |
+
},
|
903 |
+
"0d2fcc0dcd": {
|
904 |
+
"zebra": [
|
905 |
+
"1",
|
906 |
+
"2",
|
907 |
+
"3",
|
908 |
+
"4"
|
909 |
+
]
|
910 |
+
},
|
911 |
+
"0d3aad05d2": {
|
912 |
+
"person": [
|
913 |
+
"1"
|
914 |
+
]
|
915 |
+
},
|
916 |
+
"0d40b015f4": {
|
917 |
+
"person": [
|
918 |
+
"1"
|
919 |
+
]
|
920 |
+
},
|
921 |
+
"0d97fba242": {
|
922 |
+
"dog": [
|
923 |
+
"1"
|
924 |
+
],
|
925 |
+
"person": [
|
926 |
+
"2"
|
927 |
+
]
|
928 |
+
},
|
929 |
+
"0d9cc80d7e": {
|
930 |
+
"person": [
|
931 |
+
"1",
|
932 |
+
"2",
|
933 |
+
"3"
|
934 |
+
]
|
935 |
+
},
|
936 |
+
"0dab85b6d3": {
|
937 |
+
"lizard": [
|
938 |
+
"1",
|
939 |
+
"2"
|
940 |
+
]
|
941 |
+
},
|
942 |
+
"0db5c427a5": {
|
943 |
+
"train": [
|
944 |
+
"1"
|
945 |
+
]
|
946 |
+
},
|
947 |
+
"0dbaf284f1": {
|
948 |
+
"cat": [
|
949 |
+
"1",
|
950 |
+
"2"
|
951 |
+
]
|
952 |
+
},
|
953 |
+
"0de4923598": {},
|
954 |
+
"0df28a9101": {
|
955 |
+
"turtle": [
|
956 |
+
"1",
|
957 |
+
"2",
|
958 |
+
"3"
|
959 |
+
]
|
960 |
+
},
|
961 |
+
"0e04f636c4": {
|
962 |
+
"frog": [
|
963 |
+
"1"
|
964 |
+
]
|
965 |
+
},
|
966 |
+
"0e05f0e232": {
|
967 |
+
"lizard": [
|
968 |
+
"1",
|
969 |
+
"2"
|
970 |
+
]
|
971 |
+
},
|
972 |
+
"0e0930474b": {
|
973 |
+
"sedan": [
|
974 |
+
"1"
|
975 |
+
],
|
976 |
+
"person": [
|
977 |
+
"2",
|
978 |
+
"3"
|
979 |
+
]
|
980 |
+
},
|
981 |
+
"0e27472bea": {
|
982 |
+
"turtle": [
|
983 |
+
"1"
|
984 |
+
]
|
985 |
+
},
|
986 |
+
"0e30020549": {
|
987 |
+
"parrot": [
|
988 |
+
"1"
|
989 |
+
]
|
990 |
+
},
|
991 |
+
"0e621feb6c": {
|
992 |
+
"lizard": [
|
993 |
+
"1",
|
994 |
+
"2"
|
995 |
+
]
|
996 |
+
},
|
997 |
+
"0e803c7d73": {},
|
998 |
+
"0e9ebe4e3c": {
|
999 |
+
"truck": [
|
1000 |
+
"1"
|
1001 |
+
]
|
1002 |
+
},
|
1003 |
+
"0e9f2785ec": {
|
1004 |
+
"person": [
|
1005 |
+
"2"
|
1006 |
+
]
|
1007 |
+
},
|
1008 |
+
"0ea68d418b": {
|
1009 |
+
"airplane": [
|
1010 |
+
"1"
|
1011 |
+
]
|
1012 |
+
},
|
1013 |
+
"0eb403a222": {},
|
1014 |
+
"0ee92053d6": {
|
1015 |
+
"person": [
|
1016 |
+
"1"
|
1017 |
+
]
|
1018 |
+
},
|
1019 |
+
"0eefca067f": {
|
1020 |
+
"giant_panda": [
|
1021 |
+
"1",
|
1022 |
+
"2"
|
1023 |
+
]
|
1024 |
+
},
|
1025 |
+
"0f17fa6fcb": {
|
1026 |
+
"duck": [
|
1027 |
+
"1",
|
1028 |
+
"2",
|
1029 |
+
"3"
|
1030 |
+
]
|
1031 |
+
},
|
1032 |
+
"0f1ac8e9a3": {
|
1033 |
+
"frog": [
|
1034 |
+
"1"
|
1035 |
+
]
|
1036 |
+
},
|
1037 |
+
"0f202e9852": {
|
1038 |
+
"parrot": [
|
1039 |
+
"1"
|
1040 |
+
]
|
1041 |
+
},
|
1042 |
+
"0f2ab8b1ff": {
|
1043 |
+
"dolphin": [
|
1044 |
+
"1",
|
1045 |
+
"2",
|
1046 |
+
"3"
|
1047 |
+
]
|
1048 |
+
},
|
1049 |
+
"0f51a78756": {
|
1050 |
+
"sheep": [
|
1051 |
+
"1"
|
1052 |
+
]
|
1053 |
+
},
|
1054 |
+
"0f5fbe16b0": {
|
1055 |
+
"raccoon": [
|
1056 |
+
"1",
|
1057 |
+
"2"
|
1058 |
+
]
|
1059 |
+
},
|
1060 |
+
"0f6072077b": {
|
1061 |
+
"person": [
|
1062 |
+
"1",
|
1063 |
+
"2",
|
1064 |
+
"3"
|
1065 |
+
]
|
1066 |
+
},
|
1067 |
+
"0f6b69b2f4": {
|
1068 |
+
"rabbit": [
|
1069 |
+
"1"
|
1070 |
+
]
|
1071 |
+
},
|
1072 |
+
"0f6c2163de": {
|
1073 |
+
"snail": [
|
1074 |
+
"1"
|
1075 |
+
]
|
1076 |
+
},
|
1077 |
+
"0f74ec5599": {
|
1078 |
+
"giant_panda": [
|
1079 |
+
"1"
|
1080 |
+
]
|
1081 |
+
},
|
1082 |
+
"0f9683715b": {
|
1083 |
+
"elephant": [
|
1084 |
+
"1"
|
1085 |
+
]
|
1086 |
+
},
|
1087 |
+
"0fa7b59356": {
|
1088 |
+
"duck": [
|
1089 |
+
"1"
|
1090 |
+
]
|
1091 |
+
},
|
1092 |
+
"0fb173695b": {
|
1093 |
+
"person": [
|
1094 |
+
"3"
|
1095 |
+
]
|
1096 |
+
},
|
1097 |
+
"0fc958cde2": {
|
1098 |
+
"owl": [
|
1099 |
+
"1"
|
1100 |
+
]
|
1101 |
+
},
|
1102 |
+
"0fe7b1a621": {
|
1103 |
+
"parrot": [
|
1104 |
+
"1"
|
1105 |
+
]
|
1106 |
+
},
|
1107 |
+
"0ffcdb491c": {
|
1108 |
+
"person": [
|
1109 |
+
"1",
|
1110 |
+
"2",
|
1111 |
+
"3"
|
1112 |
+
]
|
1113 |
+
},
|
1114 |
+
"101caff7d4": {
|
1115 |
+
"giant_panda": [
|
1116 |
+
"1",
|
1117 |
+
"2"
|
1118 |
+
]
|
1119 |
+
},
|
1120 |
+
"1022fe8417": {
|
1121 |
+
"person": [
|
1122 |
+
"1",
|
1123 |
+
"2",
|
1124 |
+
"3"
|
1125 |
+
]
|
1126 |
+
},
|
1127 |
+
"1032e80b37": {
|
1128 |
+
"giraffe": [
|
1129 |
+
"1"
|
1130 |
+
]
|
1131 |
+
},
|
1132 |
+
"103f501680": {
|
1133 |
+
"fish": [
|
1134 |
+
"1"
|
1135 |
+
]
|
1136 |
+
},
|
1137 |
+
"104e64565f": {
|
1138 |
+
"elephant": [
|
1139 |
+
"1"
|
1140 |
+
]
|
1141 |
+
},
|
1142 |
+
"104f1ab997": {
|
1143 |
+
"person": [
|
1144 |
+
"1",
|
1145 |
+
"2",
|
1146 |
+
"3"
|
1147 |
+
]
|
1148 |
+
},
|
1149 |
+
"106242403f": {
|
1150 |
+
"person": [
|
1151 |
+
"1",
|
1152 |
+
"2"
|
1153 |
+
]
|
1154 |
+
},
|
1155 |
+
"10b31f5431": {
|
1156 |
+
"person": [
|
1157 |
+
"1",
|
1158 |
+
"3",
|
1159 |
+
"4"
|
1160 |
+
]
|
1161 |
+
},
|
1162 |
+
"10eced835e": {
|
1163 |
+
"giant_panda": [
|
1164 |
+
"1",
|
1165 |
+
"2"
|
1166 |
+
]
|
1167 |
+
},
|
1168 |
+
"110d26fa3a": {
|
1169 |
+
"shark": [
|
1170 |
+
"1"
|
1171 |
+
]
|
1172 |
+
},
|
1173 |
+
"1122c1d16a": {
|
1174 |
+
"parrot": [
|
1175 |
+
"1",
|
1176 |
+
"2",
|
1177 |
+
"3",
|
1178 |
+
"4",
|
1179 |
+
"5"
|
1180 |
+
],
|
1181 |
+
"person": [
|
1182 |
+
"6"
|
1183 |
+
]
|
1184 |
+
},
|
1185 |
+
"1145b49a5f": {
|
1186 |
+
"rabbit": [
|
1187 |
+
"1"
|
1188 |
+
]
|
1189 |
+
},
|
1190 |
+
"11485838c2": {
|
1191 |
+
"giraffe": [
|
1192 |
+
"1",
|
1193 |
+
"2",
|
1194 |
+
"3"
|
1195 |
+
]
|
1196 |
+
},
|
1197 |
+
"114e7676ec": {
|
1198 |
+
"person": [
|
1199 |
+
"1"
|
1200 |
+
]
|
1201 |
+
},
|
1202 |
+
"1157472b95": {
|
1203 |
+
"parrot": [
|
1204 |
+
"1",
|
1205 |
+
"2"
|
1206 |
+
]
|
1207 |
+
},
|
1208 |
+
"115ee1072c": {
|
1209 |
+
"cow": [
|
1210 |
+
"1"
|
1211 |
+
]
|
1212 |
+
},
|
1213 |
+
"1171141012": {
|
1214 |
+
"turtle": [
|
1215 |
+
"1"
|
1216 |
+
],
|
1217 |
+
"person": [
|
1218 |
+
"2"
|
1219 |
+
]
|
1220 |
+
},
|
1221 |
+
"117757b4b8": {
|
1222 |
+
"snail": [
|
1223 |
+
"1"
|
1224 |
+
]
|
1225 |
+
},
|
1226 |
+
"1178932d2f": {
|
1227 |
+
"motorbike": [
|
1228 |
+
"3"
|
1229 |
+
],
|
1230 |
+
"person": [
|
1231 |
+
"1",
|
1232 |
+
"2"
|
1233 |
+
]
|
1234 |
+
},
|
1235 |
+
"117cc76bda": {
|
1236 |
+
"whale": [
|
1237 |
+
"1"
|
1238 |
+
]
|
1239 |
+
},
|
1240 |
+
"1180cbf814": {
|
1241 |
+
"fish": [
|
1242 |
+
"1",
|
1243 |
+
"2"
|
1244 |
+
]
|
1245 |
+
},
|
1246 |
+
"1187bbd0e3": {
|
1247 |
+
"cat": [
|
1248 |
+
"1"
|
1249 |
+
]
|
1250 |
+
},
|
1251 |
+
"1197e44b26": {
|
1252 |
+
"giant_panda": [
|
1253 |
+
"1"
|
1254 |
+
]
|
1255 |
+
},
|
1256 |
+
"119cf20728": {
|
1257 |
+
"lizard": [
|
1258 |
+
"1"
|
1259 |
+
]
|
1260 |
+
},
|
1261 |
+
"119dd54871": {
|
1262 |
+
"lion": [
|
1263 |
+
"1",
|
1264 |
+
"2"
|
1265 |
+
]
|
1266 |
+
},
|
1267 |
+
"11a0c3b724": {
|
1268 |
+
"mouse": [
|
1269 |
+
"1",
|
1270 |
+
"2"
|
1271 |
+
]
|
1272 |
+
},
|
1273 |
+
"11a6ba8c94": {
|
1274 |
+
"person": [
|
1275 |
+
"1",
|
1276 |
+
"2"
|
1277 |
+
]
|
1278 |
+
},
|
1279 |
+
"11c722a456": {
|
1280 |
+
"turtle": [
|
1281 |
+
"1",
|
1282 |
+
"2"
|
1283 |
+
]
|
1284 |
+
},
|
1285 |
+
"11cbcb0b4d": {
|
1286 |
+
"zebra": [
|
1287 |
+
"1"
|
1288 |
+
]
|
1289 |
+
},
|
1290 |
+
"11ccf5e99d": {
|
1291 |
+
"person": [
|
1292 |
+
"2"
|
1293 |
+
]
|
1294 |
+
},
|
1295 |
+
"11ce6f452e": {
|
1296 |
+
"person": [
|
1297 |
+
"1",
|
1298 |
+
"2",
|
1299 |
+
"3"
|
1300 |
+
]
|
1301 |
+
},
|
1302 |
+
"11feabe596": {
|
1303 |
+
"rabbit": [
|
1304 |
+
"1"
|
1305 |
+
]
|
1306 |
+
},
|
1307 |
+
"120cb9514d": {
|
1308 |
+
"person": [
|
1309 |
+
"1",
|
1310 |
+
"2",
|
1311 |
+
"3"
|
1312 |
+
]
|
1313 |
+
},
|
1314 |
+
"12156b25b3": {
|
1315 |
+
"person": [
|
1316 |
+
"1"
|
1317 |
+
]
|
1318 |
+
},
|
1319 |
+
"122896672d": {
|
1320 |
+
"person": [
|
1321 |
+
"1",
|
1322 |
+
"3"
|
1323 |
+
]
|
1324 |
+
},
|
1325 |
+
"1233ac8596": {
|
1326 |
+
"dog": [
|
1327 |
+
"1"
|
1328 |
+
]
|
1329 |
+
},
|
1330 |
+
"1239c87234": {
|
1331 |
+
"lizard": [
|
1332 |
+
"1"
|
1333 |
+
]
|
1334 |
+
},
|
1335 |
+
"1250423f7c": {
|
1336 |
+
"person": [
|
1337 |
+
"2"
|
1338 |
+
],
|
1339 |
+
"elephant": [
|
1340 |
+
"3",
|
1341 |
+
"4"
|
1342 |
+
]
|
1343 |
+
},
|
1344 |
+
"1257a1bc67": {
|
1345 |
+
"snake": [
|
1346 |
+
"1"
|
1347 |
+
]
|
1348 |
+
},
|
1349 |
+
"125d1b19dd": {
|
1350 |
+
"giant_panda": [
|
1351 |
+
"1",
|
1352 |
+
"2"
|
1353 |
+
]
|
1354 |
+
},
|
1355 |
+
"126d203967": {
|
1356 |
+
"person": [
|
1357 |
+
"2"
|
1358 |
+
]
|
1359 |
+
},
|
1360 |
+
"1295e19071": {
|
1361 |
+
"airplane": [
|
1362 |
+
"1"
|
1363 |
+
]
|
1364 |
+
},
|
1365 |
+
"12ad198c54": {
|
1366 |
+
"person": [
|
1367 |
+
"1"
|
1368 |
+
]
|
1369 |
+
},
|
1370 |
+
"12bddb2bcb": {
|
1371 |
+
"person": [
|
1372 |
+
"2"
|
1373 |
+
]
|
1374 |
+
},
|
1375 |
+
"12ec9b93ee": {
|
1376 |
+
"giant_panda": [
|
1377 |
+
"1"
|
1378 |
+
]
|
1379 |
+
},
|
1380 |
+
"12eebedc35": {
|
1381 |
+
"bird": [
|
1382 |
+
"1"
|
1383 |
+
]
|
1384 |
+
},
|
1385 |
+
"132852e094": {
|
1386 |
+
"fox": [
|
1387 |
+
"1"
|
1388 |
+
]
|
1389 |
+
},
|
1390 |
+
"1329409f2a": {
|
1391 |
+
"fish": [
|
1392 |
+
"1"
|
1393 |
+
]
|
1394 |
+
},
|
1395 |
+
"13325cfa14": {
|
1396 |
+
"person": [
|
1397 |
+
"2"
|
1398 |
+
]
|
1399 |
+
},
|
1400 |
+
"1336440745": {
|
1401 |
+
"mouse": [
|
1402 |
+
"1",
|
1403 |
+
"2"
|
1404 |
+
]
|
1405 |
+
},
|
1406 |
+
"134d06dbf9": {
|
1407 |
+
"cat": [
|
1408 |
+
"1"
|
1409 |
+
]
|
1410 |
+
},
|
1411 |
+
"135625b53d": {
|
1412 |
+
"parrot": [
|
1413 |
+
"1"
|
1414 |
+
]
|
1415 |
+
},
|
1416 |
+
"13870016f9": {
|
1417 |
+
"cow": [
|
1418 |
+
"2",
|
1419 |
+
"3"
|
1420 |
+
],
|
1421 |
+
"person": [
|
1422 |
+
"1"
|
1423 |
+
]
|
1424 |
+
},
|
1425 |
+
"13960b3c84": {
|
1426 |
+
"giraffe": [
|
1427 |
+
"1",
|
1428 |
+
"2",
|
1429 |
+
"3"
|
1430 |
+
]
|
1431 |
+
},
|
1432 |
+
"13adaad9d9": {
|
1433 |
+
"giant_panda": [
|
1434 |
+
"1"
|
1435 |
+
]
|
1436 |
+
},
|
1437 |
+
"13ae097e20": {
|
1438 |
+
"giant_panda": [
|
1439 |
+
"1"
|
1440 |
+
]
|
1441 |
+
},
|
1442 |
+
"13e3070469": {
|
1443 |
+
"zebra": [
|
1444 |
+
"1",
|
1445 |
+
"2",
|
1446 |
+
"3"
|
1447 |
+
]
|
1448 |
+
},
|
1449 |
+
"13f6a8c20d": {
|
1450 |
+
"fish": [
|
1451 |
+
"1"
|
1452 |
+
]
|
1453 |
+
},
|
1454 |
+
"1416925cf2": {
|
1455 |
+
"truck": [
|
1456 |
+
"1",
|
1457 |
+
"2"
|
1458 |
+
]
|
1459 |
+
},
|
1460 |
+
"142d2621f5": {
|
1461 |
+
"motorbike": [
|
1462 |
+
"3"
|
1463 |
+
],
|
1464 |
+
"person": [
|
1465 |
+
"1",
|
1466 |
+
"2"
|
1467 |
+
]
|
1468 |
+
},
|
1469 |
+
"145d5d7c03": {
|
1470 |
+
"giant_panda": [
|
1471 |
+
"1"
|
1472 |
+
]
|
1473 |
+
},
|
1474 |
+
"145fdc3ac5": {
|
1475 |
+
"lizard": [
|
1476 |
+
"1"
|
1477 |
+
]
|
1478 |
+
},
|
1479 |
+
"1471274fa7": {
|
1480 |
+
"person": [
|
1481 |
+
"1"
|
1482 |
+
]
|
1483 |
+
},
|
1484 |
+
"14a6b5a139": {
|
1485 |
+
"fish": [
|
1486 |
+
"1"
|
1487 |
+
]
|
1488 |
+
},
|
1489 |
+
"14c21cea0d": {
|
1490 |
+
"monkey": [
|
1491 |
+
"1",
|
1492 |
+
"2"
|
1493 |
+
]
|
1494 |
+
},
|
1495 |
+
"14dae0dc93": {
|
1496 |
+
"person": [
|
1497 |
+
"2"
|
1498 |
+
]
|
1499 |
+
},
|
1500 |
+
"14f9bd22b5": {
|
1501 |
+
"tiger": [
|
1502 |
+
"1"
|
1503 |
+
]
|
1504 |
+
},
|
1505 |
+
"14fd28ae99": {
|
1506 |
+
"parrot": [
|
1507 |
+
"1"
|
1508 |
+
]
|
1509 |
+
},
|
1510 |
+
"15097d5d4e": {
|
1511 |
+
"parrot": [
|
1512 |
+
"1"
|
1513 |
+
]
|
1514 |
+
},
|
1515 |
+
"150ea711f2": {
|
1516 |
+
"whale": [
|
1517 |
+
"1"
|
1518 |
+
]
|
1519 |
+
},
|
1520 |
+
"1514e3563f": {
|
1521 |
+
"earless_seal": [
|
1522 |
+
"1",
|
1523 |
+
"2"
|
1524 |
+
]
|
1525 |
+
},
|
1526 |
+
"152aaa3a9e": {
|
1527 |
+
"raccoon": [
|
1528 |
+
"1"
|
1529 |
+
]
|
1530 |
+
},
|
1531 |
+
"152b7d3bd7": {
|
1532 |
+
"giant_panda": [
|
1533 |
+
"1"
|
1534 |
+
]
|
1535 |
+
},
|
1536 |
+
"15617297cc": {
|
1537 |
+
"person": [
|
1538 |
+
"1"
|
1539 |
+
]
|
1540 |
+
},
|
1541 |
+
"15abbe0c52": {
|
1542 |
+
"person": [
|
1543 |
+
"1"
|
1544 |
+
]
|
1545 |
+
},
|
1546 |
+
"15d1fb3de5": {
|
1547 |
+
"cat": [
|
1548 |
+
"2"
|
1549 |
+
],
|
1550 |
+
"owl": [
|
1551 |
+
"1"
|
1552 |
+
]
|
1553 |
+
},
|
1554 |
+
"15f67b0fab": {
|
1555 |
+
"person": [
|
1556 |
+
"1"
|
1557 |
+
]
|
1558 |
+
},
|
1559 |
+
"161eb59aad": {
|
1560 |
+
"cow": [
|
1561 |
+
"2",
|
1562 |
+
"3"
|
1563 |
+
],
|
1564 |
+
"giraffe": [
|
1565 |
+
"1"
|
1566 |
+
]
|
1567 |
+
},
|
1568 |
+
"16288ea47f": {
|
1569 |
+
"duck": [
|
1570 |
+
"1",
|
1571 |
+
"2"
|
1572 |
+
]
|
1573 |
+
},
|
1574 |
+
"164410ce62": {
|
1575 |
+
"person": [
|
1576 |
+
"1"
|
1577 |
+
]
|
1578 |
+
},
|
1579 |
+
"165c3c8cd4": {
|
1580 |
+
"person": [
|
1581 |
+
"1",
|
1582 |
+
"2",
|
1583 |
+
"3"
|
1584 |
+
]
|
1585 |
+
},
|
1586 |
+
"165c42b41b": {
|
1587 |
+
"motorbike": [
|
1588 |
+
"2",
|
1589 |
+
"3"
|
1590 |
+
],
|
1591 |
+
"person": [
|
1592 |
+
"1",
|
1593 |
+
"4"
|
1594 |
+
]
|
1595 |
+
},
|
1596 |
+
"165ec9e22b": {
|
1597 |
+
"person": [
|
1598 |
+
"1",
|
1599 |
+
"2"
|
1600 |
+
]
|
1601 |
+
},
|
1602 |
+
"1669502269": {
|
1603 |
+
"person": [
|
1604 |
+
"1"
|
1605 |
+
]
|
1606 |
+
},
|
1607 |
+
"16763cccbb": {
|
1608 |
+
"ape": [
|
1609 |
+
"1"
|
1610 |
+
]
|
1611 |
+
},
|
1612 |
+
"16adde065e": {
|
1613 |
+
"cat": [
|
1614 |
+
"2"
|
1615 |
+
],
|
1616 |
+
"person": [
|
1617 |
+
"3"
|
1618 |
+
]
|
1619 |
+
},
|
1620 |
+
"16af445362": {
|
1621 |
+
"airplane": [
|
1622 |
+
"1"
|
1623 |
+
]
|
1624 |
+
},
|
1625 |
+
"16afd538ad": {
|
1626 |
+
"parrot": [
|
1627 |
+
"1",
|
1628 |
+
"2"
|
1629 |
+
]
|
1630 |
+
},
|
1631 |
+
"16c3fa4d5d": {
|
1632 |
+
"sedan": [
|
1633 |
+
"1"
|
1634 |
+
]
|
1635 |
+
},
|
1636 |
+
"16d1d65c27": {
|
1637 |
+
"monkey": [
|
1638 |
+
"1"
|
1639 |
+
]
|
1640 |
+
},
|
1641 |
+
"16e8599e94": {
|
1642 |
+
"giant_panda": [
|
1643 |
+
"1"
|
1644 |
+
]
|
1645 |
+
},
|
1646 |
+
"16fe9fb444": {
|
1647 |
+
"motorbike": [
|
1648 |
+
"1"
|
1649 |
+
],
|
1650 |
+
"person": [
|
1651 |
+
"2"
|
1652 |
+
]
|
1653 |
+
},
|
1654 |
+
"1705796b02": {
|
1655 |
+
"train": [
|
1656 |
+
"1"
|
1657 |
+
]
|
1658 |
+
},
|
1659 |
+
"1724db7671": {
|
1660 |
+
"giant_panda": [
|
1661 |
+
"1"
|
1662 |
+
]
|
1663 |
+
},
|
1664 |
+
"17418e81ea": {
|
1665 |
+
"shark": [
|
1666 |
+
"1"
|
1667 |
+
]
|
1668 |
+
},
|
1669 |
+
"175169edbb": {
|
1670 |
+
"ape": [
|
1671 |
+
"1",
|
1672 |
+
"2"
|
1673 |
+
]
|
1674 |
+
},
|
1675 |
+
"17622326fd": {
|
1676 |
+
"lizard": [
|
1677 |
+
"1"
|
1678 |
+
]
|
1679 |
+
},
|
1680 |
+
"17656bae77": {
|
1681 |
+
"elephant": [
|
1682 |
+
"1"
|
1683 |
+
]
|
1684 |
+
},
|
1685 |
+
"17b0d94172": {
|
1686 |
+
"airplane": [
|
1687 |
+
"1"
|
1688 |
+
]
|
1689 |
+
},
|
1690 |
+
"17c220e4f6": {
|
1691 |
+
"giant_panda": [
|
1692 |
+
"1"
|
1693 |
+
]
|
1694 |
+
},
|
1695 |
+
"17c7bcd146": {
|
1696 |
+
"train": [
|
1697 |
+
"1"
|
1698 |
+
]
|
1699 |
+
},
|
1700 |
+
"17cb4afe89": {
|
1701 |
+
"tiger": [
|
1702 |
+
"1"
|
1703 |
+
]
|
1704 |
+
},
|
1705 |
+
"17cd79a434": {
|
1706 |
+
"squirrel": [
|
1707 |
+
"1"
|
1708 |
+
]
|
1709 |
+
},
|
1710 |
+
"17d18604c3": {
|
1711 |
+
"person": [
|
1712 |
+
"1",
|
1713 |
+
"2"
|
1714 |
+
]
|
1715 |
+
},
|
1716 |
+
"17d8ca1a37": {
|
1717 |
+
"person": [
|
1718 |
+
"2"
|
1719 |
+
],
|
1720 |
+
"owl": [
|
1721 |
+
"1"
|
1722 |
+
]
|
1723 |
+
},
|
1724 |
+
"17e33f4330": {
|
1725 |
+
"monkey": [
|
1726 |
+
"1"
|
1727 |
+
]
|
1728 |
+
},
|
1729 |
+
"17f7a6d805": {
|
1730 |
+
"snail": [
|
1731 |
+
"1"
|
1732 |
+
]
|
1733 |
+
},
|
1734 |
+
"180abc8378": {
|
1735 |
+
"person": [
|
1736 |
+
"2"
|
1737 |
+
],
|
1738 |
+
"owl": [
|
1739 |
+
"1"
|
1740 |
+
]
|
1741 |
+
},
|
1742 |
+
"183ba3d652": {
|
1743 |
+
"motorbike": [
|
1744 |
+
"3"
|
1745 |
+
],
|
1746 |
+
"person": [
|
1747 |
+
"2"
|
1748 |
+
]
|
1749 |
+
},
|
1750 |
+
"185bf64702": {
|
1751 |
+
"zebra": [
|
1752 |
+
"1",
|
1753 |
+
"2"
|
1754 |
+
]
|
1755 |
+
},
|
1756 |
+
"18913cc690": {
|
1757 |
+
"train": [
|
1758 |
+
"1"
|
1759 |
+
]
|
1760 |
+
},
|
1761 |
+
"1892651815": {
|
1762 |
+
"camel": [
|
1763 |
+
"1"
|
1764 |
+
]
|
1765 |
+
},
|
1766 |
+
"189ac8208a": {
|
1767 |
+
"giraffe": [
|
1768 |
+
"1",
|
1769 |
+
"2"
|
1770 |
+
]
|
1771 |
+
},
|
1772 |
+
"189b44e92c": {
|
1773 |
+
"zebra": [
|
1774 |
+
"1"
|
1775 |
+
]
|
1776 |
+
},
|
1777 |
+
"18ac264b76": {
|
1778 |
+
"person": [
|
1779 |
+
"2"
|
1780 |
+
]
|
1781 |
+
},
|
1782 |
+
"18b245ab49": {
|
1783 |
+
"penguin": [
|
1784 |
+
"1",
|
1785 |
+
"2",
|
1786 |
+
"3",
|
1787 |
+
"4"
|
1788 |
+
]
|
1789 |
+
},
|
1790 |
+
"18b5cebc34": {
|
1791 |
+
"mouse": [
|
1792 |
+
"1"
|
1793 |
+
]
|
1794 |
+
},
|
1795 |
+
"18bad52083": {
|
1796 |
+
"parrot": [
|
1797 |
+
"1",
|
1798 |
+
"2"
|
1799 |
+
]
|
1800 |
+
},
|
1801 |
+
"18bb5144d5": {
|
1802 |
+
"lizard": [
|
1803 |
+
"1"
|
1804 |
+
]
|
1805 |
+
},
|
1806 |
+
"18c6f205c5": {
|
1807 |
+
"person": [
|
1808 |
+
"1",
|
1809 |
+
"2",
|
1810 |
+
"3"
|
1811 |
+
]
|
1812 |
+
},
|
1813 |
+
"1903f9ea15": {
|
1814 |
+
"bird": [
|
1815 |
+
"1",
|
1816 |
+
"2",
|
1817 |
+
"3"
|
1818 |
+
]
|
1819 |
+
},
|
1820 |
+
"1917b209f2": {
|
1821 |
+
"horse": [
|
1822 |
+
"2"
|
1823 |
+
],
|
1824 |
+
"cow": [
|
1825 |
+
"3",
|
1826 |
+
"4"
|
1827 |
+
],
|
1828 |
+
"person": [
|
1829 |
+
"1"
|
1830 |
+
]
|
1831 |
+
},
|
1832 |
+
"191e74c01d": {
|
1833 |
+
"deer": [
|
1834 |
+
"1"
|
1835 |
+
]
|
1836 |
+
},
|
1837 |
+
"19367bb94e": {
|
1838 |
+
"fish": [
|
1839 |
+
"1",
|
1840 |
+
"2",
|
1841 |
+
"3"
|
1842 |
+
]
|
1843 |
+
},
|
1844 |
+
"193ffaa217": {
|
1845 |
+
"person": [
|
1846 |
+
"1",
|
1847 |
+
"2",
|
1848 |
+
"3"
|
1849 |
+
]
|
1850 |
+
},
|
1851 |
+
"19696b67d3": {
|
1852 |
+
"cow": [
|
1853 |
+
"1"
|
1854 |
+
]
|
1855 |
+
},
|
1856 |
+
"197f3ab6f3": {
|
1857 |
+
"giant_panda": [
|
1858 |
+
"1"
|
1859 |
+
]
|
1860 |
+
},
|
1861 |
+
"1981e763cc": {
|
1862 |
+
"sheep": [
|
1863 |
+
"1",
|
1864 |
+
"2"
|
1865 |
+
]
|
1866 |
+
},
|
1867 |
+
"198afe39ae": {
|
1868 |
+
"person": [
|
1869 |
+
"1"
|
1870 |
+
]
|
1871 |
+
},
|
1872 |
+
"19a6e62b9b": {
|
1873 |
+
"monkey": [
|
1874 |
+
"1",
|
1875 |
+
"2"
|
1876 |
+
]
|
1877 |
+
},
|
1878 |
+
"19b60d5335": {
|
1879 |
+
"hedgehog": [
|
1880 |
+
"1"
|
1881 |
+
]
|
1882 |
+
},
|
1883 |
+
"19c00c11f9": {
|
1884 |
+
"person": [
|
1885 |
+
"1"
|
1886 |
+
]
|
1887 |
+
},
|
1888 |
+
"19e061eb88": {
|
1889 |
+
"boat": [
|
1890 |
+
"1",
|
1891 |
+
"2"
|
1892 |
+
]
|
1893 |
+
},
|
1894 |
+
"19e8bc6178": {
|
1895 |
+
"dog": [
|
1896 |
+
"1"
|
1897 |
+
]
|
1898 |
+
},
|
1899 |
+
"19ee80dac6": {
|
1900 |
+
"person": [
|
1901 |
+
"1",
|
1902 |
+
"3",
|
1903 |
+
"4"
|
1904 |
+
]
|
1905 |
+
},
|
1906 |
+
"1a25a9170a": {
|
1907 |
+
"cow": [
|
1908 |
+
"1"
|
1909 |
+
],
|
1910 |
+
"person": [
|
1911 |
+
"2",
|
1912 |
+
"3"
|
1913 |
+
]
|
1914 |
+
},
|
1915 |
+
"1a359a6c1a": {
|
1916 |
+
"sheep": [
|
1917 |
+
"1"
|
1918 |
+
]
|
1919 |
+
},
|
1920 |
+
"1a3e87c566": {
|
1921 |
+
"frog": [
|
1922 |
+
"1"
|
1923 |
+
]
|
1924 |
+
},
|
1925 |
+
"1a5fe06b00": {
|
1926 |
+
"bus": [
|
1927 |
+
"1"
|
1928 |
+
]
|
1929 |
+
},
|
1930 |
+
"1a6c0fbd1e": {
|
1931 |
+
"person": [
|
1932 |
+
"1"
|
1933 |
+
]
|
1934 |
+
},
|
1935 |
+
"1a6f3b5a4b": {
|
1936 |
+
"sedan": [
|
1937 |
+
"3"
|
1938 |
+
]
|
1939 |
+
},
|
1940 |
+
"1a8afbad92": {
|
1941 |
+
"zebra": [
|
1942 |
+
"1",
|
1943 |
+
"2",
|
1944 |
+
"3"
|
1945 |
+
]
|
1946 |
+
},
|
1947 |
+
"1a8bdc5842": {
|
1948 |
+
"parrot": [
|
1949 |
+
"1",
|
1950 |
+
"2"
|
1951 |
+
]
|
1952 |
+
},
|
1953 |
+
"1a95752aca": {
|
1954 |
+
"duck": [
|
1955 |
+
"1",
|
1956 |
+
"2"
|
1957 |
+
]
|
1958 |
+
},
|
1959 |
+
"1a9c131cb7": {
|
1960 |
+
"ape": [
|
1961 |
+
"1",
|
1962 |
+
"2",
|
1963 |
+
"3"
|
1964 |
+
]
|
1965 |
+
},
|
1966 |
+
"1aa3da3ee3": {
|
1967 |
+
"sheep": [
|
1968 |
+
"1",
|
1969 |
+
"2",
|
1970 |
+
"3",
|
1971 |
+
"4"
|
1972 |
+
]
|
1973 |
+
},
|
1974 |
+
"1ab27ec7ea": {
|
1975 |
+
"deer": [
|
1976 |
+
"1"
|
1977 |
+
]
|
1978 |
+
},
|
1979 |
+
"1abf16d21d": {
|
1980 |
+
"turtle": [
|
1981 |
+
"1"
|
1982 |
+
]
|
1983 |
+
},
|
1984 |
+
"1acd0f993b": {
|
1985 |
+
"dog": [
|
1986 |
+
"1"
|
1987 |
+
],
|
1988 |
+
"person": [
|
1989 |
+
"3"
|
1990 |
+
]
|
1991 |
+
},
|
1992 |
+
"1ad202e499": {
|
1993 |
+
"lizard": [
|
1994 |
+
"1",
|
1995 |
+
"2"
|
1996 |
+
]
|
1997 |
+
},
|
1998 |
+
"1af8d2395d": {
|
1999 |
+
"airplane": [
|
2000 |
+
"4"
|
2001 |
+
],
|
2002 |
+
"person": [
|
2003 |
+
"1",
|
2004 |
+
"2"
|
2005 |
+
]
|
2006 |
+
},
|
2007 |
+
"1afd39a1fa": {
|
2008 |
+
"motorbike": [
|
2009 |
+
"2"
|
2010 |
+
]
|
2011 |
+
},
|
2012 |
+
"1b2d31306f": {
|
2013 |
+
"lizard": [
|
2014 |
+
"1"
|
2015 |
+
]
|
2016 |
+
},
|
2017 |
+
"1b3fa67f0e": {
|
2018 |
+
"airplane": [
|
2019 |
+
"1"
|
2020 |
+
]
|
2021 |
+
},
|
2022 |
+
"1b43fa74b4": {
|
2023 |
+
"owl": [
|
2024 |
+
"1",
|
2025 |
+
"2"
|
2026 |
+
]
|
2027 |
+
},
|
2028 |
+
"1b73ea9fc2": {
|
2029 |
+
"parrot": [
|
2030 |
+
"1"
|
2031 |
+
]
|
2032 |
+
},
|
2033 |
+
"1b7e8bb255": {
|
2034 |
+
"person": [
|
2035 |
+
"2"
|
2036 |
+
]
|
2037 |
+
},
|
2038 |
+
"1b8680f8cd": {
|
2039 |
+
"person": [
|
2040 |
+
"2",
|
2041 |
+
"3"
|
2042 |
+
]
|
2043 |
+
},
|
2044 |
+
"1b883843c0": {
|
2045 |
+
"person": [
|
2046 |
+
"1",
|
2047 |
+
"2"
|
2048 |
+
]
|
2049 |
+
},
|
2050 |
+
"1b8898785b": {
|
2051 |
+
"monkey": [
|
2052 |
+
"1",
|
2053 |
+
"2"
|
2054 |
+
]
|
2055 |
+
},
|
2056 |
+
"1b88ba1aa4": {
|
2057 |
+
"giant_panda": [
|
2058 |
+
"1"
|
2059 |
+
]
|
2060 |
+
},
|
2061 |
+
"1b96a498e5": {
|
2062 |
+
"ape": [
|
2063 |
+
"1"
|
2064 |
+
]
|
2065 |
+
},
|
2066 |
+
"1bbc4c274f": {
|
2067 |
+
"fish": [
|
2068 |
+
"2"
|
2069 |
+
]
|
2070 |
+
},
|
2071 |
+
"1bd87fe9ab": {
|
2072 |
+
"train": [
|
2073 |
+
"1"
|
2074 |
+
]
|
2075 |
+
},
|
2076 |
+
"1c4090c75b": {
|
2077 |
+
"whale": [
|
2078 |
+
"1"
|
2079 |
+
]
|
2080 |
+
},
|
2081 |
+
"1c41934f84": {
|
2082 |
+
"elephant": [
|
2083 |
+
"1",
|
2084 |
+
"2"
|
2085 |
+
]
|
2086 |
+
},
|
2087 |
+
"1c72b04b56": {
|
2088 |
+
"lion": [
|
2089 |
+
"1"
|
2090 |
+
]
|
2091 |
+
},
|
2092 |
+
"1c87955a3a": {
|
2093 |
+
"turtle": [
|
2094 |
+
"2"
|
2095 |
+
],
|
2096 |
+
"crocodile": [
|
2097 |
+
"1"
|
2098 |
+
]
|
2099 |
+
},
|
2100 |
+
"1c9f9eb792": {
|
2101 |
+
"person": [
|
2102 |
+
"2"
|
2103 |
+
]
|
2104 |
+
},
|
2105 |
+
"1ca240fede": {
|
2106 |
+
"train": [
|
2107 |
+
"1"
|
2108 |
+
]
|
2109 |
+
},
|
2110 |
+
"1ca5673803": {
|
2111 |
+
"person": [
|
2112 |
+
"1",
|
2113 |
+
"3"
|
2114 |
+
]
|
2115 |
+
},
|
2116 |
+
"1cada35274": {
|
2117 |
+
"duck": [
|
2118 |
+
"1"
|
2119 |
+
]
|
2120 |
+
},
|
2121 |
+
"1cb44b920d": {
|
2122 |
+
"eagle": [
|
2123 |
+
"1",
|
2124 |
+
"2"
|
2125 |
+
]
|
2126 |
+
},
|
2127 |
+
"1cd10e62be": {
|
2128 |
+
"leopard": [
|
2129 |
+
"1"
|
2130 |
+
]
|
2131 |
+
},
|
2132 |
+
"1d3087d5e5": {
|
2133 |
+
"fish": [
|
2134 |
+
"1",
|
2135 |
+
"2",
|
2136 |
+
"3",
|
2137 |
+
"4",
|
2138 |
+
"5"
|
2139 |
+
]
|
2140 |
+
},
|
2141 |
+
"1d3685150a": {
|
2142 |
+
"person": [
|
2143 |
+
"1",
|
2144 |
+
"3"
|
2145 |
+
]
|
2146 |
+
},
|
2147 |
+
"1d6ff083aa": {
|
2148 |
+
"person": [
|
2149 |
+
"1",
|
2150 |
+
"2"
|
2151 |
+
]
|
2152 |
+
}
|
2153 |
+
}
|
mbench/numbered_valid_obj_ids_gpt-4o.json
ADDED
@@ -0,0 +1,2153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"003234408d": {
|
3 |
+
"penguin": [
|
4 |
+
"1",
|
5 |
+
"2",
|
6 |
+
"3",
|
7 |
+
"4",
|
8 |
+
"5"
|
9 |
+
]
|
10 |
+
},
|
11 |
+
"0043f083b5": {
|
12 |
+
"sedan": [
|
13 |
+
"2",
|
14 |
+
"3"
|
15 |
+
],
|
16 |
+
"bus": [
|
17 |
+
"1"
|
18 |
+
]
|
19 |
+
},
|
20 |
+
"0044fa5fba": {
|
21 |
+
"giant_panda": [
|
22 |
+
"1"
|
23 |
+
]
|
24 |
+
},
|
25 |
+
"005a527edd": {
|
26 |
+
"ape": [
|
27 |
+
"1",
|
28 |
+
"2"
|
29 |
+
]
|
30 |
+
},
|
31 |
+
"0065b171f9": {
|
32 |
+
"giant_panda": [
|
33 |
+
"1"
|
34 |
+
]
|
35 |
+
},
|
36 |
+
"00917dcfc4": {
|
37 |
+
"zebra": [
|
38 |
+
"1",
|
39 |
+
"2",
|
40 |
+
"3"
|
41 |
+
]
|
42 |
+
},
|
43 |
+
"00a23ccf53": {
|
44 |
+
"shark": [
|
45 |
+
"1"
|
46 |
+
]
|
47 |
+
},
|
48 |
+
"00ad5016a4": {
|
49 |
+
"airplane": [
|
50 |
+
"1"
|
51 |
+
]
|
52 |
+
},
|
53 |
+
"01082ae388": {
|
54 |
+
"leopard": [
|
55 |
+
"1"
|
56 |
+
]
|
57 |
+
},
|
58 |
+
"011ac0a06f": {
|
59 |
+
"ape": [
|
60 |
+
"1",
|
61 |
+
"2",
|
62 |
+
"3",
|
63 |
+
"4",
|
64 |
+
"5"
|
65 |
+
]
|
66 |
+
},
|
67 |
+
"013099c098": {
|
68 |
+
"giant_panda": [
|
69 |
+
"1",
|
70 |
+
"2"
|
71 |
+
]
|
72 |
+
},
|
73 |
+
"0155498c85": {
|
74 |
+
"person": [
|
75 |
+
"1"
|
76 |
+
],
|
77 |
+
"motorbike": [
|
78 |
+
"2"
|
79 |
+
]
|
80 |
+
},
|
81 |
+
"01694ad9c8": {
|
82 |
+
"bird": [
|
83 |
+
"1"
|
84 |
+
]
|
85 |
+
},
|
86 |
+
"017ac35701": {
|
87 |
+
"giant_panda": [
|
88 |
+
"1"
|
89 |
+
]
|
90 |
+
},
|
91 |
+
"01b80e8e1a": {
|
92 |
+
"zebra": [
|
93 |
+
"1",
|
94 |
+
"2"
|
95 |
+
]
|
96 |
+
},
|
97 |
+
"01baa5a4e1": {},
|
98 |
+
"01c3111683": {
|
99 |
+
"whale": [
|
100 |
+
"1"
|
101 |
+
]
|
102 |
+
},
|
103 |
+
"01c4cb5ffe": {
|
104 |
+
"person": [
|
105 |
+
"1",
|
106 |
+
"3"
|
107 |
+
]
|
108 |
+
},
|
109 |
+
"01c76f0a82": {
|
110 |
+
"sedan": [
|
111 |
+
"1",
|
112 |
+
"4"
|
113 |
+
]
|
114 |
+
},
|
115 |
+
"01c783268c": {
|
116 |
+
"person": [
|
117 |
+
"2"
|
118 |
+
],
|
119 |
+
"ape": [
|
120 |
+
"1"
|
121 |
+
]
|
122 |
+
},
|
123 |
+
"01e64dd36a": {
|
124 |
+
"cow": [
|
125 |
+
"1",
|
126 |
+
"2",
|
127 |
+
"3"
|
128 |
+
]
|
129 |
+
},
|
130 |
+
"01ed275c6e": {
|
131 |
+
"giraffe": [
|
132 |
+
"1",
|
133 |
+
"2"
|
134 |
+
]
|
135 |
+
},
|
136 |
+
"01ff60d1fa": {
|
137 |
+
"lizard": [
|
138 |
+
"1"
|
139 |
+
]
|
140 |
+
},
|
141 |
+
"020cd28cd2": {
|
142 |
+
"person": [
|
143 |
+
"1"
|
144 |
+
]
|
145 |
+
},
|
146 |
+
"02264db755": {
|
147 |
+
"fox": [
|
148 |
+
"1"
|
149 |
+
]
|
150 |
+
},
|
151 |
+
"0248626d9a": {
|
152 |
+
"train": [
|
153 |
+
"1"
|
154 |
+
]
|
155 |
+
},
|
156 |
+
"02668dbffa": {
|
157 |
+
"frog": [
|
158 |
+
"1"
|
159 |
+
]
|
160 |
+
},
|
161 |
+
"0274193026": {
|
162 |
+
"person": [
|
163 |
+
"2"
|
164 |
+
]
|
165 |
+
},
|
166 |
+
"02d28375aa": {
|
167 |
+
"fox": [
|
168 |
+
"1"
|
169 |
+
]
|
170 |
+
},
|
171 |
+
"031ccc99b1": {
|
172 |
+
"person": [
|
173 |
+
"1",
|
174 |
+
"2",
|
175 |
+
"3"
|
176 |
+
]
|
177 |
+
},
|
178 |
+
"0321b18c10": {
|
179 |
+
"elephant": [
|
180 |
+
"3"
|
181 |
+
],
|
182 |
+
"person": [
|
183 |
+
"1",
|
184 |
+
"2"
|
185 |
+
]
|
186 |
+
},
|
187 |
+
"0348a45bca": {
|
188 |
+
"fish": [
|
189 |
+
"1",
|
190 |
+
"2",
|
191 |
+
"3",
|
192 |
+
"4",
|
193 |
+
"5"
|
194 |
+
]
|
195 |
+
},
|
196 |
+
"0355e92655": {
|
197 |
+
"person": [
|
198 |
+
"2"
|
199 |
+
],
|
200 |
+
"boat": [
|
201 |
+
"3"
|
202 |
+
]
|
203 |
+
},
|
204 |
+
"0358b938c1": {
|
205 |
+
"elephant": [
|
206 |
+
"1",
|
207 |
+
"2",
|
208 |
+
"3",
|
209 |
+
"4"
|
210 |
+
]
|
211 |
+
},
|
212 |
+
"0368107cf1": {
|
213 |
+
"person": [
|
214 |
+
"1",
|
215 |
+
"2"
|
216 |
+
]
|
217 |
+
},
|
218 |
+
"0379ddf557": {
|
219 |
+
"person": [
|
220 |
+
"1"
|
221 |
+
]
|
222 |
+
},
|
223 |
+
"038b2cc71d": {
|
224 |
+
"lizard": [
|
225 |
+
"1"
|
226 |
+
]
|
227 |
+
},
|
228 |
+
"038c15a5dd": {
|
229 |
+
"hedgehog": [
|
230 |
+
"1"
|
231 |
+
]
|
232 |
+
},
|
233 |
+
"03a06cc98a": {
|
234 |
+
"giraffe": [
|
235 |
+
"1",
|
236 |
+
"2",
|
237 |
+
"3"
|
238 |
+
]
|
239 |
+
},
|
240 |
+
"03a63e187f": {
|
241 |
+
"lizard": [
|
242 |
+
"1"
|
243 |
+
]
|
244 |
+
},
|
245 |
+
"03c95b4dae": {
|
246 |
+
"elephant": [
|
247 |
+
"1",
|
248 |
+
"2",
|
249 |
+
"3"
|
250 |
+
]
|
251 |
+
},
|
252 |
+
"03e2b57b0e": {
|
253 |
+
"lizard": [
|
254 |
+
"1"
|
255 |
+
]
|
256 |
+
},
|
257 |
+
"04194e1248": {
|
258 |
+
"lizard": [
|
259 |
+
"1"
|
260 |
+
]
|
261 |
+
},
|
262 |
+
"04259896e2": {
|
263 |
+
"lizard": [
|
264 |
+
"1"
|
265 |
+
]
|
266 |
+
},
|
267 |
+
"0444918a5f": {
|
268 |
+
"truck": [
|
269 |
+
"1",
|
270 |
+
"2",
|
271 |
+
"3",
|
272 |
+
"4"
|
273 |
+
]
|
274 |
+
},
|
275 |
+
"04460a7a52": {
|
276 |
+
"lizard": [
|
277 |
+
"1"
|
278 |
+
]
|
279 |
+
},
|
280 |
+
"04474174a4": {
|
281 |
+
"ape": [
|
282 |
+
"1",
|
283 |
+
"2"
|
284 |
+
]
|
285 |
+
},
|
286 |
+
"0450095513": {
|
287 |
+
"snail": [
|
288 |
+
"1"
|
289 |
+
]
|
290 |
+
},
|
291 |
+
"045f00aed2": {
|
292 |
+
"tiger": [
|
293 |
+
"1"
|
294 |
+
],
|
295 |
+
"person": [
|
296 |
+
"3"
|
297 |
+
]
|
298 |
+
},
|
299 |
+
"04667fabaa": {
|
300 |
+
"parrot": [
|
301 |
+
"1"
|
302 |
+
]
|
303 |
+
},
|
304 |
+
"04735c5030": {
|
305 |
+
"cat": [
|
306 |
+
"1",
|
307 |
+
"2"
|
308 |
+
]
|
309 |
+
},
|
310 |
+
"04990d1915": {
|
311 |
+
"bus": [
|
312 |
+
"2"
|
313 |
+
],
|
314 |
+
"truck": [
|
315 |
+
"3"
|
316 |
+
],
|
317 |
+
"sedan": [
|
318 |
+
"1"
|
319 |
+
]
|
320 |
+
},
|
321 |
+
"04d62d9d98": {
|
322 |
+
"person": [
|
323 |
+
"1"
|
324 |
+
]
|
325 |
+
},
|
326 |
+
"04f21da964": {
|
327 |
+
"monkey": [
|
328 |
+
"1"
|
329 |
+
]
|
330 |
+
},
|
331 |
+
"04fbad476e": {
|
332 |
+
"parrot": [
|
333 |
+
"1"
|
334 |
+
]
|
335 |
+
},
|
336 |
+
"04fe256562": {
|
337 |
+
"truck": [
|
338 |
+
"2"
|
339 |
+
],
|
340 |
+
"motorbike": [
|
341 |
+
"1"
|
342 |
+
]
|
343 |
+
},
|
344 |
+
"0503bf89c9": {
|
345 |
+
"hedgehog": [
|
346 |
+
"1"
|
347 |
+
]
|
348 |
+
},
|
349 |
+
"0536c9eed0": {
|
350 |
+
"cat": [
|
351 |
+
"1"
|
352 |
+
]
|
353 |
+
},
|
354 |
+
"054acb238f": {
|
355 |
+
"owl": [
|
356 |
+
"1"
|
357 |
+
]
|
358 |
+
},
|
359 |
+
"05579ca250": {
|
360 |
+
"person": [
|
361 |
+
"1"
|
362 |
+
],
|
363 |
+
"sedan": [
|
364 |
+
"3"
|
365 |
+
]
|
366 |
+
},
|
367 |
+
"056c200404": {},
|
368 |
+
"05774f3a2c": {
|
369 |
+
"ape": [
|
370 |
+
"1",
|
371 |
+
"2",
|
372 |
+
"3"
|
373 |
+
]
|
374 |
+
},
|
375 |
+
"058a7592c8": {
|
376 |
+
"train": [
|
377 |
+
"1"
|
378 |
+
]
|
379 |
+
},
|
380 |
+
"05a0a513df": {
|
381 |
+
"person": [
|
382 |
+
"1",
|
383 |
+
"2"
|
384 |
+
]
|
385 |
+
},
|
386 |
+
"05a569d8aa": {
|
387 |
+
"mouse": [
|
388 |
+
"2"
|
389 |
+
],
|
390 |
+
"cat": [
|
391 |
+
"1"
|
392 |
+
]
|
393 |
+
},
|
394 |
+
"05aa652648": {
|
395 |
+
"ape": [
|
396 |
+
"1"
|
397 |
+
]
|
398 |
+
},
|
399 |
+
"05d7715782": {},
|
400 |
+
"05e0b0f28f": {
|
401 |
+
"mouse": [
|
402 |
+
"1"
|
403 |
+
],
|
404 |
+
"person": [
|
405 |
+
"2"
|
406 |
+
]
|
407 |
+
},
|
408 |
+
"05fdbbdd7a": {},
|
409 |
+
"05ffcfed85": {
|
410 |
+
"monkey": [
|
411 |
+
"1",
|
412 |
+
"2"
|
413 |
+
]
|
414 |
+
},
|
415 |
+
"0630391881": {
|
416 |
+
"person": [
|
417 |
+
"1"
|
418 |
+
]
|
419 |
+
},
|
420 |
+
"06840b2bbe": {
|
421 |
+
"snake": [
|
422 |
+
"1"
|
423 |
+
]
|
424 |
+
},
|
425 |
+
"068f7dce6f": {
|
426 |
+
"shark": [
|
427 |
+
"1"
|
428 |
+
]
|
429 |
+
},
|
430 |
+
"0693719753": {
|
431 |
+
"turtle": [
|
432 |
+
"1",
|
433 |
+
"2"
|
434 |
+
]
|
435 |
+
},
|
436 |
+
"06ce2b51fb": {
|
437 |
+
"person": [
|
438 |
+
"1",
|
439 |
+
"2"
|
440 |
+
]
|
441 |
+
},
|
442 |
+
"06e224798e": {
|
443 |
+
"tiger": [
|
444 |
+
"1"
|
445 |
+
]
|
446 |
+
},
|
447 |
+
"06ee361788": {
|
448 |
+
"duck": [
|
449 |
+
"1",
|
450 |
+
"2",
|
451 |
+
"3"
|
452 |
+
]
|
453 |
+
},
|
454 |
+
"06fbb3fa2c": {
|
455 |
+
"eagle": [
|
456 |
+
"1"
|
457 |
+
]
|
458 |
+
},
|
459 |
+
"0700264286": {
|
460 |
+
"cow": [
|
461 |
+
"1",
|
462 |
+
"2"
|
463 |
+
]
|
464 |
+
},
|
465 |
+
"070c918ca7": {
|
466 |
+
"parrot": [
|
467 |
+
"1"
|
468 |
+
]
|
469 |
+
},
|
470 |
+
"07129e14a4": {
|
471 |
+
"parrot": [
|
472 |
+
"1",
|
473 |
+
"2"
|
474 |
+
],
|
475 |
+
"person": [
|
476 |
+
"3"
|
477 |
+
]
|
478 |
+
},
|
479 |
+
"07177017e9": {
|
480 |
+
"motorbike": [
|
481 |
+
"1",
|
482 |
+
"2"
|
483 |
+
]
|
484 |
+
},
|
485 |
+
"07238ffc58": {
|
486 |
+
"monkey": [
|
487 |
+
"1",
|
488 |
+
"2",
|
489 |
+
"3"
|
490 |
+
]
|
491 |
+
},
|
492 |
+
"07353b2a89": {
|
493 |
+
"sheep": [
|
494 |
+
"1",
|
495 |
+
"2",
|
496 |
+
"3",
|
497 |
+
"4"
|
498 |
+
]
|
499 |
+
},
|
500 |
+
"0738493cbf": {
|
501 |
+
"airplane": [
|
502 |
+
"1"
|
503 |
+
]
|
504 |
+
},
|
505 |
+
"075926c651": {
|
506 |
+
"person": [
|
507 |
+
"1",
|
508 |
+
"2"
|
509 |
+
]
|
510 |
+
},
|
511 |
+
"075c701292": {
|
512 |
+
"duck": [
|
513 |
+
"1",
|
514 |
+
"2",
|
515 |
+
"3",
|
516 |
+
"4"
|
517 |
+
]
|
518 |
+
},
|
519 |
+
"0762ea9a30": {
|
520 |
+
"person": [
|
521 |
+
"1"
|
522 |
+
]
|
523 |
+
},
|
524 |
+
"07652ee4af": {
|
525 |
+
"person": [
|
526 |
+
"1"
|
527 |
+
]
|
528 |
+
},
|
529 |
+
"076f206928": {
|
530 |
+
"zebra": [
|
531 |
+
"1",
|
532 |
+
"2"
|
533 |
+
],
|
534 |
+
"person": [
|
535 |
+
"3"
|
536 |
+
]
|
537 |
+
},
|
538 |
+
"077d32af19": {
|
539 |
+
"train": [
|
540 |
+
"4"
|
541 |
+
],
|
542 |
+
"person": [
|
543 |
+
"1",
|
544 |
+
"2",
|
545 |
+
"3"
|
546 |
+
]
|
547 |
+
},
|
548 |
+
"079049275c": {
|
549 |
+
"mouse": [
|
550 |
+
"1"
|
551 |
+
]
|
552 |
+
},
|
553 |
+
"07913cdda7": {
|
554 |
+
"train": [
|
555 |
+
"1"
|
556 |
+
],
|
557 |
+
"person": [
|
558 |
+
"2",
|
559 |
+
"3"
|
560 |
+
]
|
561 |
+
},
|
562 |
+
"07a11a35e8": {
|
563 |
+
"ape": [
|
564 |
+
"1",
|
565 |
+
"2"
|
566 |
+
]
|
567 |
+
},
|
568 |
+
"07ac33b6df": {
|
569 |
+
"ape": [
|
570 |
+
"1"
|
571 |
+
]
|
572 |
+
},
|
573 |
+
"07c62c3d11": {
|
574 |
+
"parrot": [
|
575 |
+
"1",
|
576 |
+
"2",
|
577 |
+
"3"
|
578 |
+
]
|
579 |
+
},
|
580 |
+
"07cc1c7d74": {
|
581 |
+
"snake": [
|
582 |
+
"1"
|
583 |
+
]
|
584 |
+
},
|
585 |
+
"080196ef01": {
|
586 |
+
"lizard": [
|
587 |
+
"1"
|
588 |
+
]
|
589 |
+
},
|
590 |
+
"081207976e": {},
|
591 |
+
"081ae4fa44": {
|
592 |
+
"shark": [
|
593 |
+
"1",
|
594 |
+
"2"
|
595 |
+
]
|
596 |
+
},
|
597 |
+
"081d8250cb": {
|
598 |
+
"person": [
|
599 |
+
"1"
|
600 |
+
],
|
601 |
+
"sedan": [
|
602 |
+
"3"
|
603 |
+
]
|
604 |
+
},
|
605 |
+
"082900c5d4": {
|
606 |
+
"duck": [
|
607 |
+
"1",
|
608 |
+
"2",
|
609 |
+
"3"
|
610 |
+
]
|
611 |
+
},
|
612 |
+
"0860df21e2": {},
|
613 |
+
"0866d4c5e3": {
|
614 |
+
"bird": [
|
615 |
+
"1",
|
616 |
+
"2",
|
617 |
+
"3"
|
618 |
+
]
|
619 |
+
},
|
620 |
+
"0891ac2eb6": {
|
621 |
+
"person": [
|
622 |
+
"1",
|
623 |
+
"2",
|
624 |
+
"3"
|
625 |
+
]
|
626 |
+
},
|
627 |
+
"08931bc458": {
|
628 |
+
"person": [
|
629 |
+
"1"
|
630 |
+
]
|
631 |
+
},
|
632 |
+
"08aa2705d5": {
|
633 |
+
"snake": [
|
634 |
+
"1"
|
635 |
+
]
|
636 |
+
},
|
637 |
+
"08c8450db7": {},
|
638 |
+
"08d50b926c": {
|
639 |
+
"turtle": [
|
640 |
+
"1",
|
641 |
+
"2"
|
642 |
+
]
|
643 |
+
},
|
644 |
+
"08e1e4de15": {
|
645 |
+
"monkey": [
|
646 |
+
"1",
|
647 |
+
"2",
|
648 |
+
"3",
|
649 |
+
"4"
|
650 |
+
]
|
651 |
+
},
|
652 |
+
"08e48c1a48": {
|
653 |
+
"cow": [
|
654 |
+
"1"
|
655 |
+
]
|
656 |
+
},
|
657 |
+
"08f561c65e": {
|
658 |
+
"giant_panda": [
|
659 |
+
"1"
|
660 |
+
],
|
661 |
+
"person": [
|
662 |
+
"2"
|
663 |
+
]
|
664 |
+
},
|
665 |
+
"08feb87790": {
|
666 |
+
"sheep": [
|
667 |
+
"1"
|
668 |
+
]
|
669 |
+
},
|
670 |
+
"09049f6fe3": {
|
671 |
+
"mouse": [
|
672 |
+
"1",
|
673 |
+
"2"
|
674 |
+
]
|
675 |
+
},
|
676 |
+
"092e4ff450": {
|
677 |
+
"snake": [
|
678 |
+
"1"
|
679 |
+
]
|
680 |
+
},
|
681 |
+
"09338adea8": {
|
682 |
+
"whale": [
|
683 |
+
"1",
|
684 |
+
"2"
|
685 |
+
]
|
686 |
+
},
|
687 |
+
"093c335ccc": {
|
688 |
+
"person": [
|
689 |
+
"2"
|
690 |
+
]
|
691 |
+
},
|
692 |
+
"0970d28339": {
|
693 |
+
"ape": [
|
694 |
+
"1",
|
695 |
+
"2"
|
696 |
+
]
|
697 |
+
},
|
698 |
+
"0974a213dc": {
|
699 |
+
"giraffe": [
|
700 |
+
"1",
|
701 |
+
"2",
|
702 |
+
"3"
|
703 |
+
]
|
704 |
+
},
|
705 |
+
"097b471ed8": {
|
706 |
+
"cat": [
|
707 |
+
"1",
|
708 |
+
"2"
|
709 |
+
]
|
710 |
+
},
|
711 |
+
"0990941758": {
|
712 |
+
"giant_panda": [
|
713 |
+
"1"
|
714 |
+
]
|
715 |
+
},
|
716 |
+
"09a348f4fa": {
|
717 |
+
"lizard": [
|
718 |
+
"1"
|
719 |
+
]
|
720 |
+
},
|
721 |
+
"09a6841288": {
|
722 |
+
"duck": [
|
723 |
+
"1",
|
724 |
+
"2"
|
725 |
+
]
|
726 |
+
},
|
727 |
+
"09c5bad17b": {
|
728 |
+
"airplane": [
|
729 |
+
"1"
|
730 |
+
]
|
731 |
+
},
|
732 |
+
"09c9ce80c7": {
|
733 |
+
"giant_panda": [
|
734 |
+
"1"
|
735 |
+
]
|
736 |
+
},
|
737 |
+
"09ff54fef4": {
|
738 |
+
"fox": [
|
739 |
+
"1",
|
740 |
+
"2"
|
741 |
+
]
|
742 |
+
},
|
743 |
+
"0a23765d15": {
|
744 |
+
"person": [
|
745 |
+
"1",
|
746 |
+
"2"
|
747 |
+
]
|
748 |
+
},
|
749 |
+
"0a275e7f12": {
|
750 |
+
"elephant": [
|
751 |
+
"1"
|
752 |
+
]
|
753 |
+
},
|
754 |
+
"0a2f2bd294": {
|
755 |
+
"motorbike": [
|
756 |
+
"1"
|
757 |
+
]
|
758 |
+
},
|
759 |
+
"0a7a2514aa": {
|
760 |
+
"lizard": [
|
761 |
+
"2"
|
762 |
+
],
|
763 |
+
"cat": [
|
764 |
+
"1"
|
765 |
+
]
|
766 |
+
},
|
767 |
+
"0a7b27fde9": {
|
768 |
+
"parrot": [
|
769 |
+
"1",
|
770 |
+
"2"
|
771 |
+
]
|
772 |
+
},
|
773 |
+
"0a8c467cc3": {
|
774 |
+
"fish": [
|
775 |
+
"1",
|
776 |
+
"2",
|
777 |
+
"3"
|
778 |
+
]
|
779 |
+
},
|
780 |
+
"0ac8c560ae": {
|
781 |
+
"person": [
|
782 |
+
"2",
|
783 |
+
"3"
|
784 |
+
]
|
785 |
+
},
|
786 |
+
"0b1627e896": {
|
787 |
+
"boat": [
|
788 |
+
"1"
|
789 |
+
]
|
790 |
+
},
|
791 |
+
"0b285c47f6": {
|
792 |
+
"mouse": [
|
793 |
+
"1"
|
794 |
+
]
|
795 |
+
},
|
796 |
+
"0b34ec1d55": {
|
797 |
+
"ape": [
|
798 |
+
"1"
|
799 |
+
]
|
800 |
+
},
|
801 |
+
"0b5b5e8e5a": {
|
802 |
+
"person": [
|
803 |
+
"1"
|
804 |
+
],
|
805 |
+
"sedan": [
|
806 |
+
"2"
|
807 |
+
]
|
808 |
+
},
|
809 |
+
"0b68535614": {
|
810 |
+
"rabbit": [
|
811 |
+
"1"
|
812 |
+
]
|
813 |
+
},
|
814 |
+
"0b6f9105fc": {
|
815 |
+
"rabbit": [
|
816 |
+
"1"
|
817 |
+
]
|
818 |
+
},
|
819 |
+
"0b7dbfa3cb": {
|
820 |
+
"cow": [
|
821 |
+
"1"
|
822 |
+
]
|
823 |
+
},
|
824 |
+
"0b9cea51ca": {
|
825 |
+
"whale": [
|
826 |
+
"1"
|
827 |
+
]
|
828 |
+
},
|
829 |
+
"0b9d012be8": {
|
830 |
+
"camel": [
|
831 |
+
"1"
|
832 |
+
]
|
833 |
+
},
|
834 |
+
"0bcfc4177d": {
|
835 |
+
"truck": [
|
836 |
+
"1"
|
837 |
+
]
|
838 |
+
},
|
839 |
+
"0bd37b23c1": {
|
840 |
+
"motorbike": [
|
841 |
+
"1"
|
842 |
+
]
|
843 |
+
},
|
844 |
+
"0bd864064c": {
|
845 |
+
"eagle": [
|
846 |
+
"1"
|
847 |
+
]
|
848 |
+
},
|
849 |
+
"0c11c6bf7b": {
|
850 |
+
"deer": [
|
851 |
+
"1"
|
852 |
+
]
|
853 |
+
},
|
854 |
+
"0c26bc77ac": {
|
855 |
+
"crocodile": [
|
856 |
+
"1"
|
857 |
+
]
|
858 |
+
},
|
859 |
+
"0c3a04798c": {
|
860 |
+
"duck": [
|
861 |
+
"1"
|
862 |
+
],
|
863 |
+
"fish": [
|
864 |
+
"2"
|
865 |
+
]
|
866 |
+
},
|
867 |
+
"0c44a9d545": {
|
868 |
+
"tiger": [
|
869 |
+
"1"
|
870 |
+
]
|
871 |
+
},
|
872 |
+
"0c817cc390": {
|
873 |
+
"hedgehog": [
|
874 |
+
"1"
|
875 |
+
],
|
876 |
+
"dog": [
|
877 |
+
"2"
|
878 |
+
]
|
879 |
+
},
|
880 |
+
"0ca839ee9a": {
|
881 |
+
"ape": [
|
882 |
+
"1",
|
883 |
+
"2"
|
884 |
+
]
|
885 |
+
},
|
886 |
+
"0cd7ac0ac0": {
|
887 |
+
"rabbit": [
|
888 |
+
"1"
|
889 |
+
]
|
890 |
+
},
|
891 |
+
"0ce06e0121": {
|
892 |
+
"parrot": [
|
893 |
+
"1",
|
894 |
+
"2"
|
895 |
+
]
|
896 |
+
},
|
897 |
+
"0cfe974a89": {
|
898 |
+
"turtle": [
|
899 |
+
"1",
|
900 |
+
"2"
|
901 |
+
]
|
902 |
+
},
|
903 |
+
"0d2fcc0dcd": {
|
904 |
+
"zebra": [
|
905 |
+
"1",
|
906 |
+
"2",
|
907 |
+
"3",
|
908 |
+
"4"
|
909 |
+
]
|
910 |
+
},
|
911 |
+
"0d3aad05d2": {
|
912 |
+
"person": [
|
913 |
+
"1"
|
914 |
+
]
|
915 |
+
},
|
916 |
+
"0d40b015f4": {
|
917 |
+
"person": [
|
918 |
+
"1"
|
919 |
+
]
|
920 |
+
},
|
921 |
+
"0d97fba242": {
|
922 |
+
"person": [
|
923 |
+
"2"
|
924 |
+
],
|
925 |
+
"dog": [
|
926 |
+
"1"
|
927 |
+
]
|
928 |
+
},
|
929 |
+
"0d9cc80d7e": {
|
930 |
+
"person": [
|
931 |
+
"1",
|
932 |
+
"2",
|
933 |
+
"3"
|
934 |
+
]
|
935 |
+
},
|
936 |
+
"0dab85b6d3": {
|
937 |
+
"lizard": [
|
938 |
+
"1",
|
939 |
+
"2"
|
940 |
+
]
|
941 |
+
},
|
942 |
+
"0db5c427a5": {
|
943 |
+
"train": [
|
944 |
+
"1"
|
945 |
+
]
|
946 |
+
},
|
947 |
+
"0dbaf284f1": {
|
948 |
+
"cat": [
|
949 |
+
"1",
|
950 |
+
"2"
|
951 |
+
]
|
952 |
+
},
|
953 |
+
"0de4923598": {},
|
954 |
+
"0df28a9101": {
|
955 |
+
"turtle": [
|
956 |
+
"1",
|
957 |
+
"2",
|
958 |
+
"3"
|
959 |
+
]
|
960 |
+
},
|
961 |
+
"0e04f636c4": {
|
962 |
+
"frog": [
|
963 |
+
"1"
|
964 |
+
]
|
965 |
+
},
|
966 |
+
"0e05f0e232": {
|
967 |
+
"lizard": [
|
968 |
+
"1",
|
969 |
+
"2"
|
970 |
+
]
|
971 |
+
},
|
972 |
+
"0e0930474b": {
|
973 |
+
"person": [
|
974 |
+
"2",
|
975 |
+
"3"
|
976 |
+
],
|
977 |
+
"sedan": [
|
978 |
+
"1"
|
979 |
+
]
|
980 |
+
},
|
981 |
+
"0e27472bea": {
|
982 |
+
"turtle": [
|
983 |
+
"1"
|
984 |
+
]
|
985 |
+
},
|
986 |
+
"0e30020549": {
|
987 |
+
"parrot": [
|
988 |
+
"1"
|
989 |
+
]
|
990 |
+
},
|
991 |
+
"0e621feb6c": {
|
992 |
+
"lizard": [
|
993 |
+
"1",
|
994 |
+
"2"
|
995 |
+
]
|
996 |
+
},
|
997 |
+
"0e803c7d73": {},
|
998 |
+
"0e9ebe4e3c": {
|
999 |
+
"truck": [
|
1000 |
+
"1"
|
1001 |
+
]
|
1002 |
+
},
|
1003 |
+
"0e9f2785ec": {
|
1004 |
+
"person": [
|
1005 |
+
"2"
|
1006 |
+
]
|
1007 |
+
},
|
1008 |
+
"0ea68d418b": {
|
1009 |
+
"airplane": [
|
1010 |
+
"1"
|
1011 |
+
]
|
1012 |
+
},
|
1013 |
+
"0eb403a222": {},
|
1014 |
+
"0ee92053d6": {
|
1015 |
+
"person": [
|
1016 |
+
"1"
|
1017 |
+
]
|
1018 |
+
},
|
1019 |
+
"0eefca067f": {
|
1020 |
+
"giant_panda": [
|
1021 |
+
"1",
|
1022 |
+
"2"
|
1023 |
+
]
|
1024 |
+
},
|
1025 |
+
"0f17fa6fcb": {
|
1026 |
+
"duck": [
|
1027 |
+
"1",
|
1028 |
+
"2",
|
1029 |
+
"3"
|
1030 |
+
]
|
1031 |
+
},
|
1032 |
+
"0f1ac8e9a3": {
|
1033 |
+
"frog": [
|
1034 |
+
"1"
|
1035 |
+
]
|
1036 |
+
},
|
1037 |
+
"0f202e9852": {
|
1038 |
+
"parrot": [
|
1039 |
+
"1"
|
1040 |
+
]
|
1041 |
+
},
|
1042 |
+
"0f2ab8b1ff": {
|
1043 |
+
"dolphin": [
|
1044 |
+
"1",
|
1045 |
+
"2",
|
1046 |
+
"3"
|
1047 |
+
]
|
1048 |
+
},
|
1049 |
+
"0f51a78756": {
|
1050 |
+
"sheep": [
|
1051 |
+
"1"
|
1052 |
+
]
|
1053 |
+
},
|
1054 |
+
"0f5fbe16b0": {
|
1055 |
+
"raccoon": [
|
1056 |
+
"1",
|
1057 |
+
"2"
|
1058 |
+
]
|
1059 |
+
},
|
1060 |
+
"0f6072077b": {
|
1061 |
+
"person": [
|
1062 |
+
"1",
|
1063 |
+
"2",
|
1064 |
+
"3"
|
1065 |
+
]
|
1066 |
+
},
|
1067 |
+
"0f6b69b2f4": {
|
1068 |
+
"rabbit": [
|
1069 |
+
"1"
|
1070 |
+
]
|
1071 |
+
},
|
1072 |
+
"0f6c2163de": {
|
1073 |
+
"snail": [
|
1074 |
+
"1"
|
1075 |
+
]
|
1076 |
+
},
|
1077 |
+
"0f74ec5599": {
|
1078 |
+
"giant_panda": [
|
1079 |
+
"1"
|
1080 |
+
]
|
1081 |
+
},
|
1082 |
+
"0f9683715b": {
|
1083 |
+
"elephant": [
|
1084 |
+
"1"
|
1085 |
+
]
|
1086 |
+
},
|
1087 |
+
"0fa7b59356": {
|
1088 |
+
"duck": [
|
1089 |
+
"1"
|
1090 |
+
]
|
1091 |
+
},
|
1092 |
+
"0fb173695b": {
|
1093 |
+
"person": [
|
1094 |
+
"3"
|
1095 |
+
]
|
1096 |
+
},
|
1097 |
+
"0fc958cde2": {
|
1098 |
+
"owl": [
|
1099 |
+
"1"
|
1100 |
+
]
|
1101 |
+
},
|
1102 |
+
"0fe7b1a621": {
|
1103 |
+
"parrot": [
|
1104 |
+
"1"
|
1105 |
+
]
|
1106 |
+
},
|
1107 |
+
"0ffcdb491c": {
|
1108 |
+
"person": [
|
1109 |
+
"1",
|
1110 |
+
"2",
|
1111 |
+
"3"
|
1112 |
+
]
|
1113 |
+
},
|
1114 |
+
"101caff7d4": {
|
1115 |
+
"giant_panda": [
|
1116 |
+
"1",
|
1117 |
+
"2"
|
1118 |
+
]
|
1119 |
+
},
|
1120 |
+
"1022fe8417": {
|
1121 |
+
"person": [
|
1122 |
+
"1",
|
1123 |
+
"2",
|
1124 |
+
"3"
|
1125 |
+
]
|
1126 |
+
},
|
1127 |
+
"1032e80b37": {
|
1128 |
+
"giraffe": [
|
1129 |
+
"1"
|
1130 |
+
]
|
1131 |
+
},
|
1132 |
+
"103f501680": {
|
1133 |
+
"fish": [
|
1134 |
+
"1"
|
1135 |
+
]
|
1136 |
+
},
|
1137 |
+
"104e64565f": {
|
1138 |
+
"elephant": [
|
1139 |
+
"1"
|
1140 |
+
]
|
1141 |
+
},
|
1142 |
+
"104f1ab997": {
|
1143 |
+
"person": [
|
1144 |
+
"1",
|
1145 |
+
"2",
|
1146 |
+
"3"
|
1147 |
+
]
|
1148 |
+
},
|
1149 |
+
"106242403f": {
|
1150 |
+
"person": [
|
1151 |
+
"1",
|
1152 |
+
"2"
|
1153 |
+
]
|
1154 |
+
},
|
1155 |
+
"10b31f5431": {
|
1156 |
+
"person": [
|
1157 |
+
"1",
|
1158 |
+
"3",
|
1159 |
+
"4"
|
1160 |
+
]
|
1161 |
+
},
|
1162 |
+
"10eced835e": {
|
1163 |
+
"giant_panda": [
|
1164 |
+
"1",
|
1165 |
+
"2"
|
1166 |
+
]
|
1167 |
+
},
|
1168 |
+
"110d26fa3a": {
|
1169 |
+
"shark": [
|
1170 |
+
"1"
|
1171 |
+
]
|
1172 |
+
},
|
1173 |
+
"1122c1d16a": {
|
1174 |
+
"parrot": [
|
1175 |
+
"1",
|
1176 |
+
"2",
|
1177 |
+
"3",
|
1178 |
+
"4",
|
1179 |
+
"5"
|
1180 |
+
],
|
1181 |
+
"person": [
|
1182 |
+
"6"
|
1183 |
+
]
|
1184 |
+
},
|
1185 |
+
"1145b49a5f": {
|
1186 |
+
"rabbit": [
|
1187 |
+
"1"
|
1188 |
+
]
|
1189 |
+
},
|
1190 |
+
"11485838c2": {
|
1191 |
+
"giraffe": [
|
1192 |
+
"1",
|
1193 |
+
"2",
|
1194 |
+
"3"
|
1195 |
+
]
|
1196 |
+
},
|
1197 |
+
"114e7676ec": {
|
1198 |
+
"person": [
|
1199 |
+
"1"
|
1200 |
+
]
|
1201 |
+
},
|
1202 |
+
"1157472b95": {
|
1203 |
+
"parrot": [
|
1204 |
+
"1",
|
1205 |
+
"2"
|
1206 |
+
]
|
1207 |
+
},
|
1208 |
+
"115ee1072c": {
|
1209 |
+
"cow": [
|
1210 |
+
"1"
|
1211 |
+
]
|
1212 |
+
},
|
1213 |
+
"1171141012": {
|
1214 |
+
"person": [
|
1215 |
+
"2"
|
1216 |
+
],
|
1217 |
+
"turtle": [
|
1218 |
+
"1"
|
1219 |
+
]
|
1220 |
+
},
|
1221 |
+
"117757b4b8": {
|
1222 |
+
"snail": [
|
1223 |
+
"1"
|
1224 |
+
]
|
1225 |
+
},
|
1226 |
+
"1178932d2f": {
|
1227 |
+
"person": [
|
1228 |
+
"1",
|
1229 |
+
"2"
|
1230 |
+
],
|
1231 |
+
"motorbike": [
|
1232 |
+
"3"
|
1233 |
+
]
|
1234 |
+
},
|
1235 |
+
"117cc76bda": {
|
1236 |
+
"whale": [
|
1237 |
+
"1"
|
1238 |
+
]
|
1239 |
+
},
|
1240 |
+
"1180cbf814": {
|
1241 |
+
"fish": [
|
1242 |
+
"1",
|
1243 |
+
"2"
|
1244 |
+
]
|
1245 |
+
},
|
1246 |
+
"1187bbd0e3": {
|
1247 |
+
"cat": [
|
1248 |
+
"1"
|
1249 |
+
]
|
1250 |
+
},
|
1251 |
+
"1197e44b26": {
|
1252 |
+
"giant_panda": [
|
1253 |
+
"1"
|
1254 |
+
]
|
1255 |
+
},
|
1256 |
+
"119cf20728": {
|
1257 |
+
"lizard": [
|
1258 |
+
"1"
|
1259 |
+
]
|
1260 |
+
},
|
1261 |
+
"119dd54871": {
|
1262 |
+
"lion": [
|
1263 |
+
"1",
|
1264 |
+
"2"
|
1265 |
+
]
|
1266 |
+
},
|
1267 |
+
"11a0c3b724": {
|
1268 |
+
"mouse": [
|
1269 |
+
"1",
|
1270 |
+
"2"
|
1271 |
+
]
|
1272 |
+
},
|
1273 |
+
"11a6ba8c94": {
|
1274 |
+
"person": [
|
1275 |
+
"1",
|
1276 |
+
"2"
|
1277 |
+
]
|
1278 |
+
},
|
1279 |
+
"11c722a456": {
|
1280 |
+
"turtle": [
|
1281 |
+
"1",
|
1282 |
+
"2"
|
1283 |
+
]
|
1284 |
+
},
|
1285 |
+
"11cbcb0b4d": {
|
1286 |
+
"zebra": [
|
1287 |
+
"1"
|
1288 |
+
]
|
1289 |
+
},
|
1290 |
+
"11ccf5e99d": {
|
1291 |
+
"person": [
|
1292 |
+
"2"
|
1293 |
+
]
|
1294 |
+
},
|
1295 |
+
"11ce6f452e": {
|
1296 |
+
"person": [
|
1297 |
+
"1",
|
1298 |
+
"2",
|
1299 |
+
"3"
|
1300 |
+
]
|
1301 |
+
},
|
1302 |
+
"11feabe596": {
|
1303 |
+
"rabbit": [
|
1304 |
+
"1"
|
1305 |
+
]
|
1306 |
+
},
|
1307 |
+
"120cb9514d": {
|
1308 |
+
"person": [
|
1309 |
+
"1",
|
1310 |
+
"2",
|
1311 |
+
"3"
|
1312 |
+
]
|
1313 |
+
},
|
1314 |
+
"12156b25b3": {
|
1315 |
+
"person": [
|
1316 |
+
"1"
|
1317 |
+
]
|
1318 |
+
},
|
1319 |
+
"122896672d": {
|
1320 |
+
"person": [
|
1321 |
+
"1",
|
1322 |
+
"3"
|
1323 |
+
]
|
1324 |
+
},
|
1325 |
+
"1233ac8596": {
|
1326 |
+
"dog": [
|
1327 |
+
"1"
|
1328 |
+
]
|
1329 |
+
},
|
1330 |
+
"1239c87234": {
|
1331 |
+
"lizard": [
|
1332 |
+
"1"
|
1333 |
+
]
|
1334 |
+
},
|
1335 |
+
"1250423f7c": {
|
1336 |
+
"elephant": [
|
1337 |
+
"3",
|
1338 |
+
"4"
|
1339 |
+
],
|
1340 |
+
"person": [
|
1341 |
+
"2"
|
1342 |
+
]
|
1343 |
+
},
|
1344 |
+
"1257a1bc67": {
|
1345 |
+
"snake": [
|
1346 |
+
"1"
|
1347 |
+
]
|
1348 |
+
},
|
1349 |
+
"125d1b19dd": {
|
1350 |
+
"giant_panda": [
|
1351 |
+
"1",
|
1352 |
+
"2"
|
1353 |
+
]
|
1354 |
+
},
|
1355 |
+
"126d203967": {
|
1356 |
+
"person": [
|
1357 |
+
"2"
|
1358 |
+
]
|
1359 |
+
},
|
1360 |
+
"1295e19071": {
|
1361 |
+
"airplane": [
|
1362 |
+
"1"
|
1363 |
+
]
|
1364 |
+
},
|
1365 |
+
"12ad198c54": {
|
1366 |
+
"person": [
|
1367 |
+
"1"
|
1368 |
+
]
|
1369 |
+
},
|
1370 |
+
"12bddb2bcb": {
|
1371 |
+
"person": [
|
1372 |
+
"2"
|
1373 |
+
]
|
1374 |
+
},
|
1375 |
+
"12ec9b93ee": {
|
1376 |
+
"giant_panda": [
|
1377 |
+
"1"
|
1378 |
+
]
|
1379 |
+
},
|
1380 |
+
"12eebedc35": {
|
1381 |
+
"bird": [
|
1382 |
+
"1"
|
1383 |
+
]
|
1384 |
+
},
|
1385 |
+
"132852e094": {
|
1386 |
+
"fox": [
|
1387 |
+
"1"
|
1388 |
+
]
|
1389 |
+
},
|
1390 |
+
"1329409f2a": {
|
1391 |
+
"fish": [
|
1392 |
+
"1"
|
1393 |
+
]
|
1394 |
+
},
|
1395 |
+
"13325cfa14": {
|
1396 |
+
"person": [
|
1397 |
+
"2"
|
1398 |
+
]
|
1399 |
+
},
|
1400 |
+
"1336440745": {
|
1401 |
+
"mouse": [
|
1402 |
+
"1",
|
1403 |
+
"2"
|
1404 |
+
]
|
1405 |
+
},
|
1406 |
+
"134d06dbf9": {
|
1407 |
+
"cat": [
|
1408 |
+
"1"
|
1409 |
+
]
|
1410 |
+
},
|
1411 |
+
"135625b53d": {
|
1412 |
+
"parrot": [
|
1413 |
+
"1"
|
1414 |
+
]
|
1415 |
+
},
|
1416 |
+
"13870016f9": {
|
1417 |
+
"person": [
|
1418 |
+
"1"
|
1419 |
+
],
|
1420 |
+
"cow": [
|
1421 |
+
"2",
|
1422 |
+
"3"
|
1423 |
+
]
|
1424 |
+
},
|
1425 |
+
"13960b3c84": {
|
1426 |
+
"giraffe": [
|
1427 |
+
"1",
|
1428 |
+
"2",
|
1429 |
+
"3"
|
1430 |
+
]
|
1431 |
+
},
|
1432 |
+
"13adaad9d9": {
|
1433 |
+
"giant_panda": [
|
1434 |
+
"1"
|
1435 |
+
]
|
1436 |
+
},
|
1437 |
+
"13ae097e20": {
|
1438 |
+
"giant_panda": [
|
1439 |
+
"1"
|
1440 |
+
]
|
1441 |
+
},
|
1442 |
+
"13e3070469": {
|
1443 |
+
"zebra": [
|
1444 |
+
"1",
|
1445 |
+
"2",
|
1446 |
+
"3"
|
1447 |
+
]
|
1448 |
+
},
|
1449 |
+
"13f6a8c20d": {
|
1450 |
+
"fish": [
|
1451 |
+
"1"
|
1452 |
+
]
|
1453 |
+
},
|
1454 |
+
"1416925cf2": {
|
1455 |
+
"truck": [
|
1456 |
+
"1",
|
1457 |
+
"2"
|
1458 |
+
]
|
1459 |
+
},
|
1460 |
+
"142d2621f5": {
|
1461 |
+
"person": [
|
1462 |
+
"1",
|
1463 |
+
"2"
|
1464 |
+
],
|
1465 |
+
"motorbike": [
|
1466 |
+
"3"
|
1467 |
+
]
|
1468 |
+
},
|
1469 |
+
"145d5d7c03": {
|
1470 |
+
"giant_panda": [
|
1471 |
+
"1"
|
1472 |
+
]
|
1473 |
+
},
|
1474 |
+
"145fdc3ac5": {
|
1475 |
+
"lizard": [
|
1476 |
+
"1"
|
1477 |
+
]
|
1478 |
+
},
|
1479 |
+
"1471274fa7": {
|
1480 |
+
"person": [
|
1481 |
+
"1"
|
1482 |
+
]
|
1483 |
+
},
|
1484 |
+
"14a6b5a139": {
|
1485 |
+
"fish": [
|
1486 |
+
"1"
|
1487 |
+
]
|
1488 |
+
},
|
1489 |
+
"14c21cea0d": {
|
1490 |
+
"monkey": [
|
1491 |
+
"1",
|
1492 |
+
"2"
|
1493 |
+
]
|
1494 |
+
},
|
1495 |
+
"14dae0dc93": {
|
1496 |
+
"person": [
|
1497 |
+
"2"
|
1498 |
+
]
|
1499 |
+
},
|
1500 |
+
"14f9bd22b5": {
|
1501 |
+
"tiger": [
|
1502 |
+
"1"
|
1503 |
+
]
|
1504 |
+
},
|
1505 |
+
"14fd28ae99": {
|
1506 |
+
"parrot": [
|
1507 |
+
"1"
|
1508 |
+
]
|
1509 |
+
},
|
1510 |
+
"15097d5d4e": {
|
1511 |
+
"parrot": [
|
1512 |
+
"1"
|
1513 |
+
]
|
1514 |
+
},
|
1515 |
+
"150ea711f2": {
|
1516 |
+
"whale": [
|
1517 |
+
"1"
|
1518 |
+
]
|
1519 |
+
},
|
1520 |
+
"1514e3563f": {
|
1521 |
+
"earless_seal": [
|
1522 |
+
"1",
|
1523 |
+
"2"
|
1524 |
+
]
|
1525 |
+
},
|
1526 |
+
"152aaa3a9e": {
|
1527 |
+
"raccoon": [
|
1528 |
+
"1"
|
1529 |
+
]
|
1530 |
+
},
|
1531 |
+
"152b7d3bd7": {
|
1532 |
+
"giant_panda": [
|
1533 |
+
"1"
|
1534 |
+
]
|
1535 |
+
},
|
1536 |
+
"15617297cc": {
|
1537 |
+
"person": [
|
1538 |
+
"1"
|
1539 |
+
]
|
1540 |
+
},
|
1541 |
+
"15abbe0c52": {
|
1542 |
+
"person": [
|
1543 |
+
"1"
|
1544 |
+
]
|
1545 |
+
},
|
1546 |
+
"15d1fb3de5": {
|
1547 |
+
"owl": [
|
1548 |
+
"1"
|
1549 |
+
],
|
1550 |
+
"cat": [
|
1551 |
+
"2"
|
1552 |
+
]
|
1553 |
+
},
|
1554 |
+
"15f67b0fab": {
|
1555 |
+
"person": [
|
1556 |
+
"1"
|
1557 |
+
]
|
1558 |
+
},
|
1559 |
+
"161eb59aad": {
|
1560 |
+
"cow": [
|
1561 |
+
"2",
|
1562 |
+
"3"
|
1563 |
+
],
|
1564 |
+
"giraffe": [
|
1565 |
+
"1"
|
1566 |
+
]
|
1567 |
+
},
|
1568 |
+
"16288ea47f": {
|
1569 |
+
"duck": [
|
1570 |
+
"1",
|
1571 |
+
"2"
|
1572 |
+
]
|
1573 |
+
},
|
1574 |
+
"164410ce62": {
|
1575 |
+
"person": [
|
1576 |
+
"1"
|
1577 |
+
]
|
1578 |
+
},
|
1579 |
+
"165c3c8cd4": {
|
1580 |
+
"person": [
|
1581 |
+
"1",
|
1582 |
+
"2",
|
1583 |
+
"3"
|
1584 |
+
]
|
1585 |
+
},
|
1586 |
+
"165c42b41b": {
|
1587 |
+
"person": [
|
1588 |
+
"1",
|
1589 |
+
"4"
|
1590 |
+
],
|
1591 |
+
"motorbike": [
|
1592 |
+
"2",
|
1593 |
+
"3"
|
1594 |
+
]
|
1595 |
+
},
|
1596 |
+
"165ec9e22b": {
|
1597 |
+
"person": [
|
1598 |
+
"1",
|
1599 |
+
"2"
|
1600 |
+
]
|
1601 |
+
},
|
1602 |
+
"1669502269": {
|
1603 |
+
"person": [
|
1604 |
+
"1"
|
1605 |
+
]
|
1606 |
+
},
|
1607 |
+
"16763cccbb": {
|
1608 |
+
"ape": [
|
1609 |
+
"1"
|
1610 |
+
]
|
1611 |
+
},
|
1612 |
+
"16adde065e": {
|
1613 |
+
"person": [
|
1614 |
+
"3"
|
1615 |
+
],
|
1616 |
+
"cat": [
|
1617 |
+
"2"
|
1618 |
+
]
|
1619 |
+
},
|
1620 |
+
"16af445362": {
|
1621 |
+
"airplane": [
|
1622 |
+
"1"
|
1623 |
+
]
|
1624 |
+
},
|
1625 |
+
"16afd538ad": {
|
1626 |
+
"parrot": [
|
1627 |
+
"1",
|
1628 |
+
"2"
|
1629 |
+
]
|
1630 |
+
},
|
1631 |
+
"16c3fa4d5d": {
|
1632 |
+
"sedan": [
|
1633 |
+
"1"
|
1634 |
+
]
|
1635 |
+
},
|
1636 |
+
"16d1d65c27": {
|
1637 |
+
"monkey": [
|
1638 |
+
"1"
|
1639 |
+
]
|
1640 |
+
},
|
1641 |
+
"16e8599e94": {
|
1642 |
+
"giant_panda": [
|
1643 |
+
"1"
|
1644 |
+
]
|
1645 |
+
},
|
1646 |
+
"16fe9fb444": {
|
1647 |
+
"person": [
|
1648 |
+
"2"
|
1649 |
+
],
|
1650 |
+
"motorbike": [
|
1651 |
+
"1"
|
1652 |
+
]
|
1653 |
+
},
|
1654 |
+
"1705796b02": {
|
1655 |
+
"train": [
|
1656 |
+
"1"
|
1657 |
+
]
|
1658 |
+
},
|
1659 |
+
"1724db7671": {
|
1660 |
+
"giant_panda": [
|
1661 |
+
"1"
|
1662 |
+
]
|
1663 |
+
},
|
1664 |
+
"17418e81ea": {
|
1665 |
+
"shark": [
|
1666 |
+
"1"
|
1667 |
+
]
|
1668 |
+
},
|
1669 |
+
"175169edbb": {
|
1670 |
+
"ape": [
|
1671 |
+
"1",
|
1672 |
+
"2"
|
1673 |
+
]
|
1674 |
+
},
|
1675 |
+
"17622326fd": {
|
1676 |
+
"lizard": [
|
1677 |
+
"1"
|
1678 |
+
]
|
1679 |
+
},
|
1680 |
+
"17656bae77": {
|
1681 |
+
"elephant": [
|
1682 |
+
"1"
|
1683 |
+
]
|
1684 |
+
},
|
1685 |
+
"17b0d94172": {
|
1686 |
+
"airplane": [
|
1687 |
+
"1"
|
1688 |
+
]
|
1689 |
+
},
|
1690 |
+
"17c220e4f6": {
|
1691 |
+
"giant_panda": [
|
1692 |
+
"1"
|
1693 |
+
]
|
1694 |
+
},
|
1695 |
+
"17c7bcd146": {
|
1696 |
+
"train": [
|
1697 |
+
"1"
|
1698 |
+
]
|
1699 |
+
},
|
1700 |
+
"17cb4afe89": {
|
1701 |
+
"tiger": [
|
1702 |
+
"1"
|
1703 |
+
]
|
1704 |
+
},
|
1705 |
+
"17cd79a434": {
|
1706 |
+
"squirrel": [
|
1707 |
+
"1"
|
1708 |
+
]
|
1709 |
+
},
|
1710 |
+
"17d18604c3": {
|
1711 |
+
"person": [
|
1712 |
+
"1",
|
1713 |
+
"2"
|
1714 |
+
]
|
1715 |
+
},
|
1716 |
+
"17d8ca1a37": {
|
1717 |
+
"person": [
|
1718 |
+
"2"
|
1719 |
+
],
|
1720 |
+
"owl": [
|
1721 |
+
"1"
|
1722 |
+
]
|
1723 |
+
},
|
1724 |
+
"17e33f4330": {
|
1725 |
+
"monkey": [
|
1726 |
+
"1"
|
1727 |
+
]
|
1728 |
+
},
|
1729 |
+
"17f7a6d805": {
|
1730 |
+
"snail": [
|
1731 |
+
"1"
|
1732 |
+
]
|
1733 |
+
},
|
1734 |
+
"180abc8378": {
|
1735 |
+
"person": [
|
1736 |
+
"2"
|
1737 |
+
],
|
1738 |
+
"owl": [
|
1739 |
+
"1"
|
1740 |
+
]
|
1741 |
+
},
|
1742 |
+
"183ba3d652": {
|
1743 |
+
"person": [
|
1744 |
+
"2"
|
1745 |
+
],
|
1746 |
+
"motorbike": [
|
1747 |
+
"3"
|
1748 |
+
]
|
1749 |
+
},
|
1750 |
+
"185bf64702": {
|
1751 |
+
"zebra": [
|
1752 |
+
"1",
|
1753 |
+
"2"
|
1754 |
+
]
|
1755 |
+
},
|
1756 |
+
"18913cc690": {
|
1757 |
+
"train": [
|
1758 |
+
"1"
|
1759 |
+
]
|
1760 |
+
},
|
1761 |
+
"1892651815": {
|
1762 |
+
"camel": [
|
1763 |
+
"1"
|
1764 |
+
]
|
1765 |
+
},
|
1766 |
+
"189ac8208a": {
|
1767 |
+
"giraffe": [
|
1768 |
+
"1",
|
1769 |
+
"2"
|
1770 |
+
]
|
1771 |
+
},
|
1772 |
+
"189b44e92c": {
|
1773 |
+
"zebra": [
|
1774 |
+
"1"
|
1775 |
+
]
|
1776 |
+
},
|
1777 |
+
"18ac264b76": {
|
1778 |
+
"person": [
|
1779 |
+
"2"
|
1780 |
+
]
|
1781 |
+
},
|
1782 |
+
"18b245ab49": {
|
1783 |
+
"penguin": [
|
1784 |
+
"1",
|
1785 |
+
"2",
|
1786 |
+
"3",
|
1787 |
+
"4"
|
1788 |
+
]
|
1789 |
+
},
|
1790 |
+
"18b5cebc34": {
|
1791 |
+
"mouse": [
|
1792 |
+
"1"
|
1793 |
+
]
|
1794 |
+
},
|
1795 |
+
"18bad52083": {
|
1796 |
+
"parrot": [
|
1797 |
+
"1",
|
1798 |
+
"2"
|
1799 |
+
]
|
1800 |
+
},
|
1801 |
+
"18bb5144d5": {
|
1802 |
+
"lizard": [
|
1803 |
+
"1"
|
1804 |
+
]
|
1805 |
+
},
|
1806 |
+
"18c6f205c5": {
|
1807 |
+
"person": [
|
1808 |
+
"1",
|
1809 |
+
"2",
|
1810 |
+
"3"
|
1811 |
+
]
|
1812 |
+
},
|
1813 |
+
"1903f9ea15": {
|
1814 |
+
"bird": [
|
1815 |
+
"1",
|
1816 |
+
"2",
|
1817 |
+
"3"
|
1818 |
+
]
|
1819 |
+
},
|
1820 |
+
"1917b209f2": {
|
1821 |
+
"horse": [
|
1822 |
+
"2"
|
1823 |
+
],
|
1824 |
+
"person": [
|
1825 |
+
"1"
|
1826 |
+
],
|
1827 |
+
"cow": [
|
1828 |
+
"3",
|
1829 |
+
"4"
|
1830 |
+
]
|
1831 |
+
},
|
1832 |
+
"191e74c01d": {
|
1833 |
+
"deer": [
|
1834 |
+
"1"
|
1835 |
+
]
|
1836 |
+
},
|
1837 |
+
"19367bb94e": {
|
1838 |
+
"fish": [
|
1839 |
+
"1",
|
1840 |
+
"2",
|
1841 |
+
"3"
|
1842 |
+
]
|
1843 |
+
},
|
1844 |
+
"193ffaa217": {
|
1845 |
+
"person": [
|
1846 |
+
"1",
|
1847 |
+
"2",
|
1848 |
+
"3"
|
1849 |
+
]
|
1850 |
+
},
|
1851 |
+
"19696b67d3": {
|
1852 |
+
"cow": [
|
1853 |
+
"1"
|
1854 |
+
]
|
1855 |
+
},
|
1856 |
+
"197f3ab6f3": {
|
1857 |
+
"giant_panda": [
|
1858 |
+
"1"
|
1859 |
+
]
|
1860 |
+
},
|
1861 |
+
"1981e763cc": {
|
1862 |
+
"sheep": [
|
1863 |
+
"1",
|
1864 |
+
"2"
|
1865 |
+
]
|
1866 |
+
},
|
1867 |
+
"198afe39ae": {
|
1868 |
+
"person": [
|
1869 |
+
"1"
|
1870 |
+
]
|
1871 |
+
},
|
1872 |
+
"19a6e62b9b": {
|
1873 |
+
"monkey": [
|
1874 |
+
"1",
|
1875 |
+
"2"
|
1876 |
+
]
|
1877 |
+
},
|
1878 |
+
"19b60d5335": {
|
1879 |
+
"hedgehog": [
|
1880 |
+
"1"
|
1881 |
+
]
|
1882 |
+
},
|
1883 |
+
"19c00c11f9": {
|
1884 |
+
"person": [
|
1885 |
+
"1"
|
1886 |
+
]
|
1887 |
+
},
|
1888 |
+
"19e061eb88": {
|
1889 |
+
"boat": [
|
1890 |
+
"1",
|
1891 |
+
"2"
|
1892 |
+
]
|
1893 |
+
},
|
1894 |
+
"19e8bc6178": {
|
1895 |
+
"dog": [
|
1896 |
+
"1"
|
1897 |
+
]
|
1898 |
+
},
|
1899 |
+
"19ee80dac6": {
|
1900 |
+
"person": [
|
1901 |
+
"1",
|
1902 |
+
"3",
|
1903 |
+
"4"
|
1904 |
+
]
|
1905 |
+
},
|
1906 |
+
"1a25a9170a": {
|
1907 |
+
"person": [
|
1908 |
+
"2",
|
1909 |
+
"3"
|
1910 |
+
],
|
1911 |
+
"cow": [
|
1912 |
+
"1"
|
1913 |
+
]
|
1914 |
+
},
|
1915 |
+
"1a359a6c1a": {
|
1916 |
+
"sheep": [
|
1917 |
+
"1"
|
1918 |
+
]
|
1919 |
+
},
|
1920 |
+
"1a3e87c566": {
|
1921 |
+
"frog": [
|
1922 |
+
"1"
|
1923 |
+
]
|
1924 |
+
},
|
1925 |
+
"1a5fe06b00": {
|
1926 |
+
"bus": [
|
1927 |
+
"1"
|
1928 |
+
]
|
1929 |
+
},
|
1930 |
+
"1a6c0fbd1e": {
|
1931 |
+
"person": [
|
1932 |
+
"1"
|
1933 |
+
]
|
1934 |
+
},
|
1935 |
+
"1a6f3b5a4b": {
|
1936 |
+
"sedan": [
|
1937 |
+
"3"
|
1938 |
+
]
|
1939 |
+
},
|
1940 |
+
"1a8afbad92": {
|
1941 |
+
"zebra": [
|
1942 |
+
"1",
|
1943 |
+
"2",
|
1944 |
+
"3"
|
1945 |
+
]
|
1946 |
+
},
|
1947 |
+
"1a8bdc5842": {
|
1948 |
+
"parrot": [
|
1949 |
+
"1",
|
1950 |
+
"2"
|
1951 |
+
]
|
1952 |
+
},
|
1953 |
+
"1a95752aca": {
|
1954 |
+
"duck": [
|
1955 |
+
"1",
|
1956 |
+
"2"
|
1957 |
+
]
|
1958 |
+
},
|
1959 |
+
"1a9c131cb7": {
|
1960 |
+
"ape": [
|
1961 |
+
"1",
|
1962 |
+
"2",
|
1963 |
+
"3"
|
1964 |
+
]
|
1965 |
+
},
|
1966 |
+
"1aa3da3ee3": {
|
1967 |
+
"sheep": [
|
1968 |
+
"1",
|
1969 |
+
"2",
|
1970 |
+
"3",
|
1971 |
+
"4"
|
1972 |
+
]
|
1973 |
+
},
|
1974 |
+
"1ab27ec7ea": {
|
1975 |
+
"deer": [
|
1976 |
+
"1"
|
1977 |
+
]
|
1978 |
+
},
|
1979 |
+
"1abf16d21d": {
|
1980 |
+
"turtle": [
|
1981 |
+
"1"
|
1982 |
+
]
|
1983 |
+
},
|
1984 |
+
"1acd0f993b": {
|
1985 |
+
"person": [
|
1986 |
+
"3"
|
1987 |
+
],
|
1988 |
+
"dog": [
|
1989 |
+
"1"
|
1990 |
+
]
|
1991 |
+
},
|
1992 |
+
"1ad202e499": {
|
1993 |
+
"lizard": [
|
1994 |
+
"1",
|
1995 |
+
"2"
|
1996 |
+
]
|
1997 |
+
},
|
1998 |
+
"1af8d2395d": {
|
1999 |
+
"person": [
|
2000 |
+
"1",
|
2001 |
+
"2"
|
2002 |
+
],
|
2003 |
+
"airplane": [
|
2004 |
+
"4"
|
2005 |
+
]
|
2006 |
+
},
|
2007 |
+
"1afd39a1fa": {
|
2008 |
+
"motorbike": [
|
2009 |
+
"2"
|
2010 |
+
]
|
2011 |
+
},
|
2012 |
+
"1b2d31306f": {
|
2013 |
+
"lizard": [
|
2014 |
+
"1"
|
2015 |
+
]
|
2016 |
+
},
|
2017 |
+
"1b3fa67f0e": {
|
2018 |
+
"airplane": [
|
2019 |
+
"1"
|
2020 |
+
]
|
2021 |
+
},
|
2022 |
+
"1b43fa74b4": {
|
2023 |
+
"owl": [
|
2024 |
+
"1",
|
2025 |
+
"2"
|
2026 |
+
]
|
2027 |
+
},
|
2028 |
+
"1b73ea9fc2": {
|
2029 |
+
"parrot": [
|
2030 |
+
"1"
|
2031 |
+
]
|
2032 |
+
},
|
2033 |
+
"1b7e8bb255": {
|
2034 |
+
"person": [
|
2035 |
+
"2"
|
2036 |
+
]
|
2037 |
+
},
|
2038 |
+
"1b8680f8cd": {
|
2039 |
+
"person": [
|
2040 |
+
"2",
|
2041 |
+
"3"
|
2042 |
+
]
|
2043 |
+
},
|
2044 |
+
"1b883843c0": {
|
2045 |
+
"person": [
|
2046 |
+
"1",
|
2047 |
+
"2"
|
2048 |
+
]
|
2049 |
+
},
|
2050 |
+
"1b8898785b": {
|
2051 |
+
"monkey": [
|
2052 |
+
"1",
|
2053 |
+
"2"
|
2054 |
+
]
|
2055 |
+
},
|
2056 |
+
"1b88ba1aa4": {
|
2057 |
+
"giant_panda": [
|
2058 |
+
"1"
|
2059 |
+
]
|
2060 |
+
},
|
2061 |
+
"1b96a498e5": {
|
2062 |
+
"ape": [
|
2063 |
+
"1"
|
2064 |
+
]
|
2065 |
+
},
|
2066 |
+
"1bbc4c274f": {
|
2067 |
+
"fish": [
|
2068 |
+
"2"
|
2069 |
+
]
|
2070 |
+
},
|
2071 |
+
"1bd87fe9ab": {
|
2072 |
+
"train": [
|
2073 |
+
"1"
|
2074 |
+
]
|
2075 |
+
},
|
2076 |
+
"1c4090c75b": {
|
2077 |
+
"whale": [
|
2078 |
+
"1"
|
2079 |
+
]
|
2080 |
+
},
|
2081 |
+
"1c41934f84": {
|
2082 |
+
"elephant": [
|
2083 |
+
"1",
|
2084 |
+
"2"
|
2085 |
+
]
|
2086 |
+
},
|
2087 |
+
"1c72b04b56": {
|
2088 |
+
"lion": [
|
2089 |
+
"1"
|
2090 |
+
]
|
2091 |
+
},
|
2092 |
+
"1c87955a3a": {
|
2093 |
+
"crocodile": [
|
2094 |
+
"1"
|
2095 |
+
],
|
2096 |
+
"turtle": [
|
2097 |
+
"2"
|
2098 |
+
]
|
2099 |
+
},
|
2100 |
+
"1c9f9eb792": {
|
2101 |
+
"person": [
|
2102 |
+
"2"
|
2103 |
+
]
|
2104 |
+
},
|
2105 |
+
"1ca240fede": {
|
2106 |
+
"train": [
|
2107 |
+
"1"
|
2108 |
+
]
|
2109 |
+
},
|
2110 |
+
"1ca5673803": {
|
2111 |
+
"person": [
|
2112 |
+
"1",
|
2113 |
+
"3"
|
2114 |
+
]
|
2115 |
+
},
|
2116 |
+
"1cada35274": {
|
2117 |
+
"duck": [
|
2118 |
+
"1"
|
2119 |
+
]
|
2120 |
+
},
|
2121 |
+
"1cb44b920d": {
|
2122 |
+
"eagle": [
|
2123 |
+
"1",
|
2124 |
+
"2"
|
2125 |
+
]
|
2126 |
+
},
|
2127 |
+
"1cd10e62be": {
|
2128 |
+
"leopard": [
|
2129 |
+
"1"
|
2130 |
+
]
|
2131 |
+
},
|
2132 |
+
"1d3087d5e5": {
|
2133 |
+
"fish": [
|
2134 |
+
"1",
|
2135 |
+
"2",
|
2136 |
+
"3",
|
2137 |
+
"4",
|
2138 |
+
"5"
|
2139 |
+
]
|
2140 |
+
},
|
2141 |
+
"1d3685150a": {
|
2142 |
+
"person": [
|
2143 |
+
"1",
|
2144 |
+
"3"
|
2145 |
+
]
|
2146 |
+
},
|
2147 |
+
"1d6ff083aa": {
|
2148 |
+
"person": [
|
2149 |
+
"1",
|
2150 |
+
"2"
|
2151 |
+
]
|
2152 |
+
}
|
2153 |
+
}
|
mbench/numbered_valid_obj_ids_gpt-4o_no_mask_color.json
ADDED
@@ -0,0 +1,2153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"003234408d": {
|
3 |
+
"penguin": [
|
4 |
+
"1",
|
5 |
+
"2",
|
6 |
+
"3",
|
7 |
+
"4",
|
8 |
+
"5"
|
9 |
+
]
|
10 |
+
},
|
11 |
+
"0043f083b5": {
|
12 |
+
"bus": [
|
13 |
+
"1"
|
14 |
+
],
|
15 |
+
"sedan": [
|
16 |
+
"2",
|
17 |
+
"3"
|
18 |
+
]
|
19 |
+
},
|
20 |
+
"0044fa5fba": {
|
21 |
+
"giant_panda": [
|
22 |
+
"1"
|
23 |
+
]
|
24 |
+
},
|
25 |
+
"005a527edd": {
|
26 |
+
"ape": [
|
27 |
+
"1",
|
28 |
+
"2"
|
29 |
+
]
|
30 |
+
},
|
31 |
+
"0065b171f9": {
|
32 |
+
"giant_panda": [
|
33 |
+
"1"
|
34 |
+
]
|
35 |
+
},
|
36 |
+
"00917dcfc4": {
|
37 |
+
"zebra": [
|
38 |
+
"1",
|
39 |
+
"2",
|
40 |
+
"3"
|
41 |
+
]
|
42 |
+
},
|
43 |
+
"00a23ccf53": {
|
44 |
+
"shark": [
|
45 |
+
"1"
|
46 |
+
]
|
47 |
+
},
|
48 |
+
"00ad5016a4": {
|
49 |
+
"airplane": [
|
50 |
+
"1"
|
51 |
+
]
|
52 |
+
},
|
53 |
+
"01082ae388": {
|
54 |
+
"leopard": [
|
55 |
+
"1"
|
56 |
+
]
|
57 |
+
},
|
58 |
+
"011ac0a06f": {
|
59 |
+
"ape": [
|
60 |
+
"1",
|
61 |
+
"2",
|
62 |
+
"3",
|
63 |
+
"4",
|
64 |
+
"5"
|
65 |
+
]
|
66 |
+
},
|
67 |
+
"013099c098": {
|
68 |
+
"giant_panda": [
|
69 |
+
"1",
|
70 |
+
"2"
|
71 |
+
]
|
72 |
+
},
|
73 |
+
"0155498c85": {
|
74 |
+
"person": [
|
75 |
+
"1"
|
76 |
+
],
|
77 |
+
"motorbike": [
|
78 |
+
"2"
|
79 |
+
]
|
80 |
+
},
|
81 |
+
"01694ad9c8": {
|
82 |
+
"bird": [
|
83 |
+
"1"
|
84 |
+
]
|
85 |
+
},
|
86 |
+
"017ac35701": {
|
87 |
+
"giant_panda": [
|
88 |
+
"1"
|
89 |
+
]
|
90 |
+
},
|
91 |
+
"01b80e8e1a": {
|
92 |
+
"zebra": [
|
93 |
+
"1",
|
94 |
+
"2"
|
95 |
+
]
|
96 |
+
},
|
97 |
+
"01baa5a4e1": {},
|
98 |
+
"01c3111683": {
|
99 |
+
"whale": [
|
100 |
+
"1"
|
101 |
+
]
|
102 |
+
},
|
103 |
+
"01c4cb5ffe": {
|
104 |
+
"person": [
|
105 |
+
"1",
|
106 |
+
"3"
|
107 |
+
]
|
108 |
+
},
|
109 |
+
"01c76f0a82": {
|
110 |
+
"sedan": [
|
111 |
+
"1",
|
112 |
+
"4"
|
113 |
+
]
|
114 |
+
},
|
115 |
+
"01c783268c": {
|
116 |
+
"person": [
|
117 |
+
"2"
|
118 |
+
],
|
119 |
+
"ape": [
|
120 |
+
"1"
|
121 |
+
]
|
122 |
+
},
|
123 |
+
"01e64dd36a": {
|
124 |
+
"cow": [
|
125 |
+
"1",
|
126 |
+
"2",
|
127 |
+
"3"
|
128 |
+
]
|
129 |
+
},
|
130 |
+
"01ed275c6e": {
|
131 |
+
"giraffe": [
|
132 |
+
"1",
|
133 |
+
"2"
|
134 |
+
]
|
135 |
+
},
|
136 |
+
"01ff60d1fa": {
|
137 |
+
"lizard": [
|
138 |
+
"1"
|
139 |
+
]
|
140 |
+
},
|
141 |
+
"020cd28cd2": {
|
142 |
+
"person": [
|
143 |
+
"1"
|
144 |
+
]
|
145 |
+
},
|
146 |
+
"02264db755": {
|
147 |
+
"fox": [
|
148 |
+
"1"
|
149 |
+
]
|
150 |
+
},
|
151 |
+
"0248626d9a": {
|
152 |
+
"train": [
|
153 |
+
"1"
|
154 |
+
]
|
155 |
+
},
|
156 |
+
"02668dbffa": {
|
157 |
+
"frog": [
|
158 |
+
"1"
|
159 |
+
]
|
160 |
+
},
|
161 |
+
"0274193026": {
|
162 |
+
"person": [
|
163 |
+
"2"
|
164 |
+
]
|
165 |
+
},
|
166 |
+
"02d28375aa": {
|
167 |
+
"fox": [
|
168 |
+
"1"
|
169 |
+
]
|
170 |
+
},
|
171 |
+
"031ccc99b1": {
|
172 |
+
"person": [
|
173 |
+
"1",
|
174 |
+
"2",
|
175 |
+
"3"
|
176 |
+
]
|
177 |
+
},
|
178 |
+
"0321b18c10": {
|
179 |
+
"person": [
|
180 |
+
"1",
|
181 |
+
"2"
|
182 |
+
],
|
183 |
+
"elephant": [
|
184 |
+
"3"
|
185 |
+
]
|
186 |
+
},
|
187 |
+
"0348a45bca": {
|
188 |
+
"fish": [
|
189 |
+
"1",
|
190 |
+
"2",
|
191 |
+
"3",
|
192 |
+
"4",
|
193 |
+
"5"
|
194 |
+
]
|
195 |
+
},
|
196 |
+
"0355e92655": {
|
197 |
+
"boat": [
|
198 |
+
"3"
|
199 |
+
],
|
200 |
+
"person": [
|
201 |
+
"2"
|
202 |
+
]
|
203 |
+
},
|
204 |
+
"0358b938c1": {
|
205 |
+
"elephant": [
|
206 |
+
"1",
|
207 |
+
"2",
|
208 |
+
"3",
|
209 |
+
"4"
|
210 |
+
]
|
211 |
+
},
|
212 |
+
"0368107cf1": {
|
213 |
+
"person": [
|
214 |
+
"1",
|
215 |
+
"2"
|
216 |
+
]
|
217 |
+
},
|
218 |
+
"0379ddf557": {
|
219 |
+
"person": [
|
220 |
+
"1"
|
221 |
+
]
|
222 |
+
},
|
223 |
+
"038b2cc71d": {
|
224 |
+
"lizard": [
|
225 |
+
"1"
|
226 |
+
]
|
227 |
+
},
|
228 |
+
"038c15a5dd": {
|
229 |
+
"hedgehog": [
|
230 |
+
"1"
|
231 |
+
]
|
232 |
+
},
|
233 |
+
"03a06cc98a": {
|
234 |
+
"giraffe": [
|
235 |
+
"1",
|
236 |
+
"2",
|
237 |
+
"3"
|
238 |
+
]
|
239 |
+
},
|
240 |
+
"03a63e187f": {
|
241 |
+
"lizard": [
|
242 |
+
"1"
|
243 |
+
]
|
244 |
+
},
|
245 |
+
"03c95b4dae": {
|
246 |
+
"elephant": [
|
247 |
+
"1",
|
248 |
+
"2",
|
249 |
+
"3"
|
250 |
+
]
|
251 |
+
},
|
252 |
+
"03e2b57b0e": {
|
253 |
+
"lizard": [
|
254 |
+
"1"
|
255 |
+
]
|
256 |
+
},
|
257 |
+
"04194e1248": {
|
258 |
+
"lizard": [
|
259 |
+
"1"
|
260 |
+
]
|
261 |
+
},
|
262 |
+
"04259896e2": {
|
263 |
+
"lizard": [
|
264 |
+
"1"
|
265 |
+
]
|
266 |
+
},
|
267 |
+
"0444918a5f": {
|
268 |
+
"truck": [
|
269 |
+
"1",
|
270 |
+
"2",
|
271 |
+
"3",
|
272 |
+
"4"
|
273 |
+
]
|
274 |
+
},
|
275 |
+
"04460a7a52": {
|
276 |
+
"lizard": [
|
277 |
+
"1"
|
278 |
+
]
|
279 |
+
},
|
280 |
+
"04474174a4": {
|
281 |
+
"ape": [
|
282 |
+
"1",
|
283 |
+
"2"
|
284 |
+
]
|
285 |
+
},
|
286 |
+
"0450095513": {
|
287 |
+
"snail": [
|
288 |
+
"1"
|
289 |
+
]
|
290 |
+
},
|
291 |
+
"045f00aed2": {
|
292 |
+
"tiger": [
|
293 |
+
"1"
|
294 |
+
],
|
295 |
+
"person": [
|
296 |
+
"3"
|
297 |
+
]
|
298 |
+
},
|
299 |
+
"04667fabaa": {
|
300 |
+
"parrot": [
|
301 |
+
"1"
|
302 |
+
]
|
303 |
+
},
|
304 |
+
"04735c5030": {
|
305 |
+
"cat": [
|
306 |
+
"1",
|
307 |
+
"2"
|
308 |
+
]
|
309 |
+
},
|
310 |
+
"04990d1915": {
|
311 |
+
"bus": [
|
312 |
+
"2"
|
313 |
+
],
|
314 |
+
"truck": [
|
315 |
+
"3"
|
316 |
+
],
|
317 |
+
"sedan": [
|
318 |
+
"1"
|
319 |
+
]
|
320 |
+
},
|
321 |
+
"04d62d9d98": {
|
322 |
+
"person": [
|
323 |
+
"1"
|
324 |
+
]
|
325 |
+
},
|
326 |
+
"04f21da964": {
|
327 |
+
"monkey": [
|
328 |
+
"1"
|
329 |
+
]
|
330 |
+
},
|
331 |
+
"04fbad476e": {
|
332 |
+
"parrot": [
|
333 |
+
"1"
|
334 |
+
]
|
335 |
+
},
|
336 |
+
"04fe256562": {
|
337 |
+
"motorbike": [
|
338 |
+
"1"
|
339 |
+
],
|
340 |
+
"truck": [
|
341 |
+
"2"
|
342 |
+
]
|
343 |
+
},
|
344 |
+
"0503bf89c9": {
|
345 |
+
"hedgehog": [
|
346 |
+
"1"
|
347 |
+
]
|
348 |
+
},
|
349 |
+
"0536c9eed0": {
|
350 |
+
"cat": [
|
351 |
+
"1"
|
352 |
+
]
|
353 |
+
},
|
354 |
+
"054acb238f": {
|
355 |
+
"owl": [
|
356 |
+
"1"
|
357 |
+
]
|
358 |
+
},
|
359 |
+
"05579ca250": {
|
360 |
+
"person": [
|
361 |
+
"1"
|
362 |
+
],
|
363 |
+
"sedan": [
|
364 |
+
"3"
|
365 |
+
]
|
366 |
+
},
|
367 |
+
"056c200404": {},
|
368 |
+
"05774f3a2c": {
|
369 |
+
"ape": [
|
370 |
+
"1",
|
371 |
+
"2",
|
372 |
+
"3"
|
373 |
+
]
|
374 |
+
},
|
375 |
+
"058a7592c8": {
|
376 |
+
"train": [
|
377 |
+
"1"
|
378 |
+
]
|
379 |
+
},
|
380 |
+
"05a0a513df": {
|
381 |
+
"person": [
|
382 |
+
"1",
|
383 |
+
"2"
|
384 |
+
]
|
385 |
+
},
|
386 |
+
"05a569d8aa": {
|
387 |
+
"cat": [
|
388 |
+
"1"
|
389 |
+
],
|
390 |
+
"mouse": [
|
391 |
+
"2"
|
392 |
+
]
|
393 |
+
},
|
394 |
+
"05aa652648": {
|
395 |
+
"ape": [
|
396 |
+
"1"
|
397 |
+
]
|
398 |
+
},
|
399 |
+
"05d7715782": {},
|
400 |
+
"05e0b0f28f": {
|
401 |
+
"person": [
|
402 |
+
"2"
|
403 |
+
],
|
404 |
+
"mouse": [
|
405 |
+
"1"
|
406 |
+
]
|
407 |
+
},
|
408 |
+
"05fdbbdd7a": {},
|
409 |
+
"05ffcfed85": {
|
410 |
+
"monkey": [
|
411 |
+
"1",
|
412 |
+
"2"
|
413 |
+
]
|
414 |
+
},
|
415 |
+
"0630391881": {
|
416 |
+
"person": [
|
417 |
+
"1"
|
418 |
+
]
|
419 |
+
},
|
420 |
+
"06840b2bbe": {
|
421 |
+
"snake": [
|
422 |
+
"1"
|
423 |
+
]
|
424 |
+
},
|
425 |
+
"068f7dce6f": {
|
426 |
+
"shark": [
|
427 |
+
"1"
|
428 |
+
]
|
429 |
+
},
|
430 |
+
"0693719753": {
|
431 |
+
"turtle": [
|
432 |
+
"1",
|
433 |
+
"2"
|
434 |
+
]
|
435 |
+
},
|
436 |
+
"06ce2b51fb": {
|
437 |
+
"person": [
|
438 |
+
"1",
|
439 |
+
"2"
|
440 |
+
]
|
441 |
+
},
|
442 |
+
"06e224798e": {
|
443 |
+
"tiger": [
|
444 |
+
"1"
|
445 |
+
]
|
446 |
+
},
|
447 |
+
"06ee361788": {
|
448 |
+
"duck": [
|
449 |
+
"1",
|
450 |
+
"2",
|
451 |
+
"3"
|
452 |
+
]
|
453 |
+
},
|
454 |
+
"06fbb3fa2c": {
|
455 |
+
"eagle": [
|
456 |
+
"1"
|
457 |
+
]
|
458 |
+
},
|
459 |
+
"0700264286": {
|
460 |
+
"cow": [
|
461 |
+
"1",
|
462 |
+
"2"
|
463 |
+
]
|
464 |
+
},
|
465 |
+
"070c918ca7": {
|
466 |
+
"parrot": [
|
467 |
+
"1"
|
468 |
+
]
|
469 |
+
},
|
470 |
+
"07129e14a4": {
|
471 |
+
"parrot": [
|
472 |
+
"1",
|
473 |
+
"2"
|
474 |
+
],
|
475 |
+
"person": [
|
476 |
+
"3"
|
477 |
+
]
|
478 |
+
},
|
479 |
+
"07177017e9": {
|
480 |
+
"motorbike": [
|
481 |
+
"1",
|
482 |
+
"2"
|
483 |
+
]
|
484 |
+
},
|
485 |
+
"07238ffc58": {
|
486 |
+
"monkey": [
|
487 |
+
"1",
|
488 |
+
"2",
|
489 |
+
"3"
|
490 |
+
]
|
491 |
+
},
|
492 |
+
"07353b2a89": {
|
493 |
+
"sheep": [
|
494 |
+
"1",
|
495 |
+
"2",
|
496 |
+
"3",
|
497 |
+
"4"
|
498 |
+
]
|
499 |
+
},
|
500 |
+
"0738493cbf": {
|
501 |
+
"airplane": [
|
502 |
+
"1"
|
503 |
+
]
|
504 |
+
},
|
505 |
+
"075926c651": {
|
506 |
+
"person": [
|
507 |
+
"1",
|
508 |
+
"2"
|
509 |
+
]
|
510 |
+
},
|
511 |
+
"075c701292": {
|
512 |
+
"duck": [
|
513 |
+
"1",
|
514 |
+
"2",
|
515 |
+
"3",
|
516 |
+
"4"
|
517 |
+
]
|
518 |
+
},
|
519 |
+
"0762ea9a30": {
|
520 |
+
"person": [
|
521 |
+
"1"
|
522 |
+
]
|
523 |
+
},
|
524 |
+
"07652ee4af": {
|
525 |
+
"person": [
|
526 |
+
"1"
|
527 |
+
]
|
528 |
+
},
|
529 |
+
"076f206928": {
|
530 |
+
"zebra": [
|
531 |
+
"1",
|
532 |
+
"2"
|
533 |
+
],
|
534 |
+
"person": [
|
535 |
+
"3"
|
536 |
+
]
|
537 |
+
},
|
538 |
+
"077d32af19": {
|
539 |
+
"train": [
|
540 |
+
"4"
|
541 |
+
],
|
542 |
+
"person": [
|
543 |
+
"1",
|
544 |
+
"2",
|
545 |
+
"3"
|
546 |
+
]
|
547 |
+
},
|
548 |
+
"079049275c": {
|
549 |
+
"mouse": [
|
550 |
+
"1"
|
551 |
+
]
|
552 |
+
},
|
553 |
+
"07913cdda7": {
|
554 |
+
"train": [
|
555 |
+
"1"
|
556 |
+
],
|
557 |
+
"person": [
|
558 |
+
"2",
|
559 |
+
"3"
|
560 |
+
]
|
561 |
+
},
|
562 |
+
"07a11a35e8": {
|
563 |
+
"ape": [
|
564 |
+
"1",
|
565 |
+
"2"
|
566 |
+
]
|
567 |
+
},
|
568 |
+
"07ac33b6df": {
|
569 |
+
"ape": [
|
570 |
+
"1"
|
571 |
+
]
|
572 |
+
},
|
573 |
+
"07c62c3d11": {
|
574 |
+
"parrot": [
|
575 |
+
"1",
|
576 |
+
"2",
|
577 |
+
"3"
|
578 |
+
]
|
579 |
+
},
|
580 |
+
"07cc1c7d74": {
|
581 |
+
"snake": [
|
582 |
+
"1"
|
583 |
+
]
|
584 |
+
},
|
585 |
+
"080196ef01": {
|
586 |
+
"lizard": [
|
587 |
+
"1"
|
588 |
+
]
|
589 |
+
},
|
590 |
+
"081207976e": {},
|
591 |
+
"081ae4fa44": {
|
592 |
+
"shark": [
|
593 |
+
"1",
|
594 |
+
"2"
|
595 |
+
]
|
596 |
+
},
|
597 |
+
"081d8250cb": {
|
598 |
+
"person": [
|
599 |
+
"1"
|
600 |
+
],
|
601 |
+
"sedan": [
|
602 |
+
"3"
|
603 |
+
]
|
604 |
+
},
|
605 |
+
"082900c5d4": {
|
606 |
+
"duck": [
|
607 |
+
"1",
|
608 |
+
"2",
|
609 |
+
"3"
|
610 |
+
]
|
611 |
+
},
|
612 |
+
"0860df21e2": {},
|
613 |
+
"0866d4c5e3": {
|
614 |
+
"bird": [
|
615 |
+
"1",
|
616 |
+
"2",
|
617 |
+
"3"
|
618 |
+
]
|
619 |
+
},
|
620 |
+
"0891ac2eb6": {
|
621 |
+
"person": [
|
622 |
+
"1",
|
623 |
+
"2",
|
624 |
+
"3"
|
625 |
+
]
|
626 |
+
},
|
627 |
+
"08931bc458": {
|
628 |
+
"person": [
|
629 |
+
"1"
|
630 |
+
]
|
631 |
+
},
|
632 |
+
"08aa2705d5": {
|
633 |
+
"snake": [
|
634 |
+
"1"
|
635 |
+
]
|
636 |
+
},
|
637 |
+
"08c8450db7": {},
|
638 |
+
"08d50b926c": {
|
639 |
+
"turtle": [
|
640 |
+
"1",
|
641 |
+
"2"
|
642 |
+
]
|
643 |
+
},
|
644 |
+
"08e1e4de15": {
|
645 |
+
"monkey": [
|
646 |
+
"1",
|
647 |
+
"2",
|
648 |
+
"3",
|
649 |
+
"4"
|
650 |
+
]
|
651 |
+
},
|
652 |
+
"08e48c1a48": {
|
653 |
+
"cow": [
|
654 |
+
"1"
|
655 |
+
]
|
656 |
+
},
|
657 |
+
"08f561c65e": {
|
658 |
+
"person": [
|
659 |
+
"2"
|
660 |
+
],
|
661 |
+
"giant_panda": [
|
662 |
+
"1"
|
663 |
+
]
|
664 |
+
},
|
665 |
+
"08feb87790": {
|
666 |
+
"sheep": [
|
667 |
+
"1"
|
668 |
+
]
|
669 |
+
},
|
670 |
+
"09049f6fe3": {
|
671 |
+
"mouse": [
|
672 |
+
"1",
|
673 |
+
"2"
|
674 |
+
]
|
675 |
+
},
|
676 |
+
"092e4ff450": {
|
677 |
+
"snake": [
|
678 |
+
"1"
|
679 |
+
]
|
680 |
+
},
|
681 |
+
"09338adea8": {
|
682 |
+
"whale": [
|
683 |
+
"1",
|
684 |
+
"2"
|
685 |
+
]
|
686 |
+
},
|
687 |
+
"093c335ccc": {
|
688 |
+
"person": [
|
689 |
+
"2"
|
690 |
+
]
|
691 |
+
},
|
692 |
+
"0970d28339": {
|
693 |
+
"ape": [
|
694 |
+
"1",
|
695 |
+
"2"
|
696 |
+
]
|
697 |
+
},
|
698 |
+
"0974a213dc": {
|
699 |
+
"giraffe": [
|
700 |
+
"1",
|
701 |
+
"2",
|
702 |
+
"3"
|
703 |
+
]
|
704 |
+
},
|
705 |
+
"097b471ed8": {
|
706 |
+
"cat": [
|
707 |
+
"1",
|
708 |
+
"2"
|
709 |
+
]
|
710 |
+
},
|
711 |
+
"0990941758": {
|
712 |
+
"giant_panda": [
|
713 |
+
"1"
|
714 |
+
]
|
715 |
+
},
|
716 |
+
"09a348f4fa": {
|
717 |
+
"lizard": [
|
718 |
+
"1"
|
719 |
+
]
|
720 |
+
},
|
721 |
+
"09a6841288": {
|
722 |
+
"duck": [
|
723 |
+
"1",
|
724 |
+
"2"
|
725 |
+
]
|
726 |
+
},
|
727 |
+
"09c5bad17b": {
|
728 |
+
"airplane": [
|
729 |
+
"1"
|
730 |
+
]
|
731 |
+
},
|
732 |
+
"09c9ce80c7": {
|
733 |
+
"giant_panda": [
|
734 |
+
"1"
|
735 |
+
]
|
736 |
+
},
|
737 |
+
"09ff54fef4": {
|
738 |
+
"fox": [
|
739 |
+
"1",
|
740 |
+
"2"
|
741 |
+
]
|
742 |
+
},
|
743 |
+
"0a23765d15": {
|
744 |
+
"person": [
|
745 |
+
"1",
|
746 |
+
"2"
|
747 |
+
]
|
748 |
+
},
|
749 |
+
"0a275e7f12": {
|
750 |
+
"elephant": [
|
751 |
+
"1"
|
752 |
+
]
|
753 |
+
},
|
754 |
+
"0a2f2bd294": {
|
755 |
+
"motorbike": [
|
756 |
+
"1"
|
757 |
+
]
|
758 |
+
},
|
759 |
+
"0a7a2514aa": {
|
760 |
+
"cat": [
|
761 |
+
"1"
|
762 |
+
],
|
763 |
+
"lizard": [
|
764 |
+
"2"
|
765 |
+
]
|
766 |
+
},
|
767 |
+
"0a7b27fde9": {
|
768 |
+
"parrot": [
|
769 |
+
"1",
|
770 |
+
"2"
|
771 |
+
]
|
772 |
+
},
|
773 |
+
"0a8c467cc3": {
|
774 |
+
"fish": [
|
775 |
+
"1",
|
776 |
+
"2",
|
777 |
+
"3"
|
778 |
+
]
|
779 |
+
},
|
780 |
+
"0ac8c560ae": {
|
781 |
+
"person": [
|
782 |
+
"2",
|
783 |
+
"3"
|
784 |
+
]
|
785 |
+
},
|
786 |
+
"0b1627e896": {
|
787 |
+
"boat": [
|
788 |
+
"1"
|
789 |
+
]
|
790 |
+
},
|
791 |
+
"0b285c47f6": {
|
792 |
+
"mouse": [
|
793 |
+
"1"
|
794 |
+
]
|
795 |
+
},
|
796 |
+
"0b34ec1d55": {
|
797 |
+
"ape": [
|
798 |
+
"1"
|
799 |
+
]
|
800 |
+
},
|
801 |
+
"0b5b5e8e5a": {
|
802 |
+
"person": [
|
803 |
+
"1"
|
804 |
+
],
|
805 |
+
"sedan": [
|
806 |
+
"2"
|
807 |
+
]
|
808 |
+
},
|
809 |
+
"0b68535614": {
|
810 |
+
"rabbit": [
|
811 |
+
"1"
|
812 |
+
]
|
813 |
+
},
|
814 |
+
"0b6f9105fc": {
|
815 |
+
"rabbit": [
|
816 |
+
"1"
|
817 |
+
]
|
818 |
+
},
|
819 |
+
"0b7dbfa3cb": {
|
820 |
+
"cow": [
|
821 |
+
"1"
|
822 |
+
]
|
823 |
+
},
|
824 |
+
"0b9cea51ca": {
|
825 |
+
"whale": [
|
826 |
+
"1"
|
827 |
+
]
|
828 |
+
},
|
829 |
+
"0b9d012be8": {
|
830 |
+
"camel": [
|
831 |
+
"1"
|
832 |
+
]
|
833 |
+
},
|
834 |
+
"0bcfc4177d": {
|
835 |
+
"truck": [
|
836 |
+
"1"
|
837 |
+
]
|
838 |
+
},
|
839 |
+
"0bd37b23c1": {
|
840 |
+
"motorbike": [
|
841 |
+
"1"
|
842 |
+
]
|
843 |
+
},
|
844 |
+
"0bd864064c": {
|
845 |
+
"eagle": [
|
846 |
+
"1"
|
847 |
+
]
|
848 |
+
},
|
849 |
+
"0c11c6bf7b": {
|
850 |
+
"deer": [
|
851 |
+
"1"
|
852 |
+
]
|
853 |
+
},
|
854 |
+
"0c26bc77ac": {
|
855 |
+
"crocodile": [
|
856 |
+
"1"
|
857 |
+
]
|
858 |
+
},
|
859 |
+
"0c3a04798c": {
|
860 |
+
"duck": [
|
861 |
+
"1"
|
862 |
+
],
|
863 |
+
"fish": [
|
864 |
+
"2"
|
865 |
+
]
|
866 |
+
},
|
867 |
+
"0c44a9d545": {
|
868 |
+
"tiger": [
|
869 |
+
"1"
|
870 |
+
]
|
871 |
+
},
|
872 |
+
"0c817cc390": {
|
873 |
+
"hedgehog": [
|
874 |
+
"1"
|
875 |
+
],
|
876 |
+
"dog": [
|
877 |
+
"2"
|
878 |
+
]
|
879 |
+
},
|
880 |
+
"0ca839ee9a": {
|
881 |
+
"ape": [
|
882 |
+
"1",
|
883 |
+
"2"
|
884 |
+
]
|
885 |
+
},
|
886 |
+
"0cd7ac0ac0": {
|
887 |
+
"rabbit": [
|
888 |
+
"1"
|
889 |
+
]
|
890 |
+
},
|
891 |
+
"0ce06e0121": {
|
892 |
+
"parrot": [
|
893 |
+
"1",
|
894 |
+
"2"
|
895 |
+
]
|
896 |
+
},
|
897 |
+
"0cfe974a89": {
|
898 |
+
"turtle": [
|
899 |
+
"1",
|
900 |
+
"2"
|
901 |
+
]
|
902 |
+
},
|
903 |
+
"0d2fcc0dcd": {
|
904 |
+
"zebra": [
|
905 |
+
"1",
|
906 |
+
"2",
|
907 |
+
"3",
|
908 |
+
"4"
|
909 |
+
]
|
910 |
+
},
|
911 |
+
"0d3aad05d2": {
|
912 |
+
"person": [
|
913 |
+
"1"
|
914 |
+
]
|
915 |
+
},
|
916 |
+
"0d40b015f4": {
|
917 |
+
"person": [
|
918 |
+
"1"
|
919 |
+
]
|
920 |
+
},
|
921 |
+
"0d97fba242": {
|
922 |
+
"person": [
|
923 |
+
"2"
|
924 |
+
],
|
925 |
+
"dog": [
|
926 |
+
"1"
|
927 |
+
]
|
928 |
+
},
|
929 |
+
"0d9cc80d7e": {
|
930 |
+
"person": [
|
931 |
+
"1",
|
932 |
+
"2",
|
933 |
+
"3"
|
934 |
+
]
|
935 |
+
},
|
936 |
+
"0dab85b6d3": {
|
937 |
+
"lizard": [
|
938 |
+
"1",
|
939 |
+
"2"
|
940 |
+
]
|
941 |
+
},
|
942 |
+
"0db5c427a5": {
|
943 |
+
"train": [
|
944 |
+
"1"
|
945 |
+
]
|
946 |
+
},
|
947 |
+
"0dbaf284f1": {
|
948 |
+
"cat": [
|
949 |
+
"1",
|
950 |
+
"2"
|
951 |
+
]
|
952 |
+
},
|
953 |
+
"0de4923598": {},
|
954 |
+
"0df28a9101": {
|
955 |
+
"turtle": [
|
956 |
+
"1",
|
957 |
+
"2",
|
958 |
+
"3"
|
959 |
+
]
|
960 |
+
},
|
961 |
+
"0e04f636c4": {
|
962 |
+
"frog": [
|
963 |
+
"1"
|
964 |
+
]
|
965 |
+
},
|
966 |
+
"0e05f0e232": {
|
967 |
+
"lizard": [
|
968 |
+
"1",
|
969 |
+
"2"
|
970 |
+
]
|
971 |
+
},
|
972 |
+
"0e0930474b": {
|
973 |
+
"person": [
|
974 |
+
"2",
|
975 |
+
"3"
|
976 |
+
],
|
977 |
+
"sedan": [
|
978 |
+
"1"
|
979 |
+
]
|
980 |
+
},
|
981 |
+
"0e27472bea": {
|
982 |
+
"turtle": [
|
983 |
+
"1"
|
984 |
+
]
|
985 |
+
},
|
986 |
+
"0e30020549": {
|
987 |
+
"parrot": [
|
988 |
+
"1"
|
989 |
+
]
|
990 |
+
},
|
991 |
+
"0e621feb6c": {
|
992 |
+
"lizard": [
|
993 |
+
"1",
|
994 |
+
"2"
|
995 |
+
]
|
996 |
+
},
|
997 |
+
"0e803c7d73": {},
|
998 |
+
"0e9ebe4e3c": {
|
999 |
+
"truck": [
|
1000 |
+
"1"
|
1001 |
+
]
|
1002 |
+
},
|
1003 |
+
"0e9f2785ec": {
|
1004 |
+
"person": [
|
1005 |
+
"2"
|
1006 |
+
]
|
1007 |
+
},
|
1008 |
+
"0ea68d418b": {
|
1009 |
+
"airplane": [
|
1010 |
+
"1"
|
1011 |
+
]
|
1012 |
+
},
|
1013 |
+
"0eb403a222": {},
|
1014 |
+
"0ee92053d6": {
|
1015 |
+
"person": [
|
1016 |
+
"1"
|
1017 |
+
]
|
1018 |
+
},
|
1019 |
+
"0eefca067f": {
|
1020 |
+
"giant_panda": [
|
1021 |
+
"1",
|
1022 |
+
"2"
|
1023 |
+
]
|
1024 |
+
},
|
1025 |
+
"0f17fa6fcb": {
|
1026 |
+
"duck": [
|
1027 |
+
"1",
|
1028 |
+
"2",
|
1029 |
+
"3"
|
1030 |
+
]
|
1031 |
+
},
|
1032 |
+
"0f1ac8e9a3": {
|
1033 |
+
"frog": [
|
1034 |
+
"1"
|
1035 |
+
]
|
1036 |
+
},
|
1037 |
+
"0f202e9852": {
|
1038 |
+
"parrot": [
|
1039 |
+
"1"
|
1040 |
+
]
|
1041 |
+
},
|
1042 |
+
"0f2ab8b1ff": {
|
1043 |
+
"dolphin": [
|
1044 |
+
"1",
|
1045 |
+
"2",
|
1046 |
+
"3"
|
1047 |
+
]
|
1048 |
+
},
|
1049 |
+
"0f51a78756": {
|
1050 |
+
"sheep": [
|
1051 |
+
"1"
|
1052 |
+
]
|
1053 |
+
},
|
1054 |
+
"0f5fbe16b0": {
|
1055 |
+
"raccoon": [
|
1056 |
+
"1",
|
1057 |
+
"2"
|
1058 |
+
]
|
1059 |
+
},
|
1060 |
+
"0f6072077b": {
|
1061 |
+
"person": [
|
1062 |
+
"1",
|
1063 |
+
"2",
|
1064 |
+
"3"
|
1065 |
+
]
|
1066 |
+
},
|
1067 |
+
"0f6b69b2f4": {
|
1068 |
+
"rabbit": [
|
1069 |
+
"1"
|
1070 |
+
]
|
1071 |
+
},
|
1072 |
+
"0f6c2163de": {
|
1073 |
+
"snail": [
|
1074 |
+
"1"
|
1075 |
+
]
|
1076 |
+
},
|
1077 |
+
"0f74ec5599": {
|
1078 |
+
"giant_panda": [
|
1079 |
+
"1"
|
1080 |
+
]
|
1081 |
+
},
|
1082 |
+
"0f9683715b": {
|
1083 |
+
"elephant": [
|
1084 |
+
"1"
|
1085 |
+
]
|
1086 |
+
},
|
1087 |
+
"0fa7b59356": {
|
1088 |
+
"duck": [
|
1089 |
+
"1"
|
1090 |
+
]
|
1091 |
+
},
|
1092 |
+
"0fb173695b": {
|
1093 |
+
"person": [
|
1094 |
+
"3"
|
1095 |
+
]
|
1096 |
+
},
|
1097 |
+
"0fc958cde2": {
|
1098 |
+
"owl": [
|
1099 |
+
"1"
|
1100 |
+
]
|
1101 |
+
},
|
1102 |
+
"0fe7b1a621": {
|
1103 |
+
"parrot": [
|
1104 |
+
"1"
|
1105 |
+
]
|
1106 |
+
},
|
1107 |
+
"0ffcdb491c": {
|
1108 |
+
"person": [
|
1109 |
+
"1",
|
1110 |
+
"2",
|
1111 |
+
"3"
|
1112 |
+
]
|
1113 |
+
},
|
1114 |
+
"101caff7d4": {
|
1115 |
+
"giant_panda": [
|
1116 |
+
"1",
|
1117 |
+
"2"
|
1118 |
+
]
|
1119 |
+
},
|
1120 |
+
"1022fe8417": {
|
1121 |
+
"person": [
|
1122 |
+
"1",
|
1123 |
+
"2",
|
1124 |
+
"3"
|
1125 |
+
]
|
1126 |
+
},
|
1127 |
+
"1032e80b37": {
|
1128 |
+
"giraffe": [
|
1129 |
+
"1"
|
1130 |
+
]
|
1131 |
+
},
|
1132 |
+
"103f501680": {
|
1133 |
+
"fish": [
|
1134 |
+
"1"
|
1135 |
+
]
|
1136 |
+
},
|
1137 |
+
"104e64565f": {
|
1138 |
+
"elephant": [
|
1139 |
+
"1"
|
1140 |
+
]
|
1141 |
+
},
|
1142 |
+
"104f1ab997": {
|
1143 |
+
"person": [
|
1144 |
+
"1",
|
1145 |
+
"2",
|
1146 |
+
"3"
|
1147 |
+
]
|
1148 |
+
},
|
1149 |
+
"106242403f": {
|
1150 |
+
"person": [
|
1151 |
+
"1",
|
1152 |
+
"2"
|
1153 |
+
]
|
1154 |
+
},
|
1155 |
+
"10b31f5431": {
|
1156 |
+
"person": [
|
1157 |
+
"1",
|
1158 |
+
"3",
|
1159 |
+
"4"
|
1160 |
+
]
|
1161 |
+
},
|
1162 |
+
"10eced835e": {
|
1163 |
+
"giant_panda": [
|
1164 |
+
"1",
|
1165 |
+
"2"
|
1166 |
+
]
|
1167 |
+
},
|
1168 |
+
"110d26fa3a": {
|
1169 |
+
"shark": [
|
1170 |
+
"1"
|
1171 |
+
]
|
1172 |
+
},
|
1173 |
+
"1122c1d16a": {
|
1174 |
+
"parrot": [
|
1175 |
+
"1",
|
1176 |
+
"2",
|
1177 |
+
"3",
|
1178 |
+
"4",
|
1179 |
+
"5"
|
1180 |
+
],
|
1181 |
+
"person": [
|
1182 |
+
"6"
|
1183 |
+
]
|
1184 |
+
},
|
1185 |
+
"1145b49a5f": {
|
1186 |
+
"rabbit": [
|
1187 |
+
"1"
|
1188 |
+
]
|
1189 |
+
},
|
1190 |
+
"11485838c2": {
|
1191 |
+
"giraffe": [
|
1192 |
+
"1",
|
1193 |
+
"2",
|
1194 |
+
"3"
|
1195 |
+
]
|
1196 |
+
},
|
1197 |
+
"114e7676ec": {
|
1198 |
+
"person": [
|
1199 |
+
"1"
|
1200 |
+
]
|
1201 |
+
},
|
1202 |
+
"1157472b95": {
|
1203 |
+
"parrot": [
|
1204 |
+
"1",
|
1205 |
+
"2"
|
1206 |
+
]
|
1207 |
+
},
|
1208 |
+
"115ee1072c": {
|
1209 |
+
"cow": [
|
1210 |
+
"1"
|
1211 |
+
]
|
1212 |
+
},
|
1213 |
+
"1171141012": {
|
1214 |
+
"person": [
|
1215 |
+
"2"
|
1216 |
+
],
|
1217 |
+
"turtle": [
|
1218 |
+
"1"
|
1219 |
+
]
|
1220 |
+
},
|
1221 |
+
"117757b4b8": {
|
1222 |
+
"snail": [
|
1223 |
+
"1"
|
1224 |
+
]
|
1225 |
+
},
|
1226 |
+
"1178932d2f": {
|
1227 |
+
"person": [
|
1228 |
+
"1",
|
1229 |
+
"2"
|
1230 |
+
],
|
1231 |
+
"motorbike": [
|
1232 |
+
"3"
|
1233 |
+
]
|
1234 |
+
},
|
1235 |
+
"117cc76bda": {
|
1236 |
+
"whale": [
|
1237 |
+
"1"
|
1238 |
+
]
|
1239 |
+
},
|
1240 |
+
"1180cbf814": {
|
1241 |
+
"fish": [
|
1242 |
+
"1",
|
1243 |
+
"2"
|
1244 |
+
]
|
1245 |
+
},
|
1246 |
+
"1187bbd0e3": {
|
1247 |
+
"cat": [
|
1248 |
+
"1"
|
1249 |
+
]
|
1250 |
+
},
|
1251 |
+
"1197e44b26": {
|
1252 |
+
"giant_panda": [
|
1253 |
+
"1"
|
1254 |
+
]
|
1255 |
+
},
|
1256 |
+
"119cf20728": {
|
1257 |
+
"lizard": [
|
1258 |
+
"1"
|
1259 |
+
]
|
1260 |
+
},
|
1261 |
+
"119dd54871": {
|
1262 |
+
"lion": [
|
1263 |
+
"1",
|
1264 |
+
"2"
|
1265 |
+
]
|
1266 |
+
},
|
1267 |
+
"11a0c3b724": {
|
1268 |
+
"mouse": [
|
1269 |
+
"1",
|
1270 |
+
"2"
|
1271 |
+
]
|
1272 |
+
},
|
1273 |
+
"11a6ba8c94": {
|
1274 |
+
"person": [
|
1275 |
+
"1",
|
1276 |
+
"2"
|
1277 |
+
]
|
1278 |
+
},
|
1279 |
+
"11c722a456": {
|
1280 |
+
"turtle": [
|
1281 |
+
"1",
|
1282 |
+
"2"
|
1283 |
+
]
|
1284 |
+
},
|
1285 |
+
"11cbcb0b4d": {
|
1286 |
+
"zebra": [
|
1287 |
+
"1"
|
1288 |
+
]
|
1289 |
+
},
|
1290 |
+
"11ccf5e99d": {
|
1291 |
+
"person": [
|
1292 |
+
"2"
|
1293 |
+
]
|
1294 |
+
},
|
1295 |
+
"11ce6f452e": {
|
1296 |
+
"person": [
|
1297 |
+
"1",
|
1298 |
+
"2",
|
1299 |
+
"3"
|
1300 |
+
]
|
1301 |
+
},
|
1302 |
+
"11feabe596": {
|
1303 |
+
"rabbit": [
|
1304 |
+
"1"
|
1305 |
+
]
|
1306 |
+
},
|
1307 |
+
"120cb9514d": {
|
1308 |
+
"person": [
|
1309 |
+
"1",
|
1310 |
+
"2",
|
1311 |
+
"3"
|
1312 |
+
]
|
1313 |
+
},
|
1314 |
+
"12156b25b3": {
|
1315 |
+
"person": [
|
1316 |
+
"1"
|
1317 |
+
]
|
1318 |
+
},
|
1319 |
+
"122896672d": {
|
1320 |
+
"person": [
|
1321 |
+
"1",
|
1322 |
+
"3"
|
1323 |
+
]
|
1324 |
+
},
|
1325 |
+
"1233ac8596": {
|
1326 |
+
"dog": [
|
1327 |
+
"1"
|
1328 |
+
]
|
1329 |
+
},
|
1330 |
+
"1239c87234": {
|
1331 |
+
"lizard": [
|
1332 |
+
"1"
|
1333 |
+
]
|
1334 |
+
},
|
1335 |
+
"1250423f7c": {
|
1336 |
+
"person": [
|
1337 |
+
"2"
|
1338 |
+
],
|
1339 |
+
"elephant": [
|
1340 |
+
"3",
|
1341 |
+
"4"
|
1342 |
+
]
|
1343 |
+
},
|
1344 |
+
"1257a1bc67": {
|
1345 |
+
"snake": [
|
1346 |
+
"1"
|
1347 |
+
]
|
1348 |
+
},
|
1349 |
+
"125d1b19dd": {
|
1350 |
+
"giant_panda": [
|
1351 |
+
"1",
|
1352 |
+
"2"
|
1353 |
+
]
|
1354 |
+
},
|
1355 |
+
"126d203967": {
|
1356 |
+
"person": [
|
1357 |
+
"2"
|
1358 |
+
]
|
1359 |
+
},
|
1360 |
+
"1295e19071": {
|
1361 |
+
"airplane": [
|
1362 |
+
"1"
|
1363 |
+
]
|
1364 |
+
},
|
1365 |
+
"12ad198c54": {
|
1366 |
+
"person": [
|
1367 |
+
"1"
|
1368 |
+
]
|
1369 |
+
},
|
1370 |
+
"12bddb2bcb": {
|
1371 |
+
"person": [
|
1372 |
+
"2"
|
1373 |
+
]
|
1374 |
+
},
|
1375 |
+
"12ec9b93ee": {
|
1376 |
+
"giant_panda": [
|
1377 |
+
"1"
|
1378 |
+
]
|
1379 |
+
},
|
1380 |
+
"12eebedc35": {
|
1381 |
+
"bird": [
|
1382 |
+
"1"
|
1383 |
+
]
|
1384 |
+
},
|
1385 |
+
"132852e094": {
|
1386 |
+
"fox": [
|
1387 |
+
"1"
|
1388 |
+
]
|
1389 |
+
},
|
1390 |
+
"1329409f2a": {
|
1391 |
+
"fish": [
|
1392 |
+
"1"
|
1393 |
+
]
|
1394 |
+
},
|
1395 |
+
"13325cfa14": {
|
1396 |
+
"person": [
|
1397 |
+
"2"
|
1398 |
+
]
|
1399 |
+
},
|
1400 |
+
"1336440745": {
|
1401 |
+
"mouse": [
|
1402 |
+
"1",
|
1403 |
+
"2"
|
1404 |
+
]
|
1405 |
+
},
|
1406 |
+
"134d06dbf9": {
|
1407 |
+
"cat": [
|
1408 |
+
"1"
|
1409 |
+
]
|
1410 |
+
},
|
1411 |
+
"135625b53d": {
|
1412 |
+
"parrot": [
|
1413 |
+
"1"
|
1414 |
+
]
|
1415 |
+
},
|
1416 |
+
"13870016f9": {
|
1417 |
+
"person": [
|
1418 |
+
"1"
|
1419 |
+
],
|
1420 |
+
"cow": [
|
1421 |
+
"2",
|
1422 |
+
"3"
|
1423 |
+
]
|
1424 |
+
},
|
1425 |
+
"13960b3c84": {
|
1426 |
+
"giraffe": [
|
1427 |
+
"1",
|
1428 |
+
"2",
|
1429 |
+
"3"
|
1430 |
+
]
|
1431 |
+
},
|
1432 |
+
"13adaad9d9": {
|
1433 |
+
"giant_panda": [
|
1434 |
+
"1"
|
1435 |
+
]
|
1436 |
+
},
|
1437 |
+
"13ae097e20": {
|
1438 |
+
"giant_panda": [
|
1439 |
+
"1"
|
1440 |
+
]
|
1441 |
+
},
|
1442 |
+
"13e3070469": {
|
1443 |
+
"zebra": [
|
1444 |
+
"1",
|
1445 |
+
"2",
|
1446 |
+
"3"
|
1447 |
+
]
|
1448 |
+
},
|
1449 |
+
"13f6a8c20d": {
|
1450 |
+
"fish": [
|
1451 |
+
"1"
|
1452 |
+
]
|
1453 |
+
},
|
1454 |
+
"1416925cf2": {
|
1455 |
+
"truck": [
|
1456 |
+
"1",
|
1457 |
+
"2"
|
1458 |
+
]
|
1459 |
+
},
|
1460 |
+
"142d2621f5": {
|
1461 |
+
"person": [
|
1462 |
+
"1",
|
1463 |
+
"2"
|
1464 |
+
],
|
1465 |
+
"motorbike": [
|
1466 |
+
"3"
|
1467 |
+
]
|
1468 |
+
},
|
1469 |
+
"145d5d7c03": {
|
1470 |
+
"giant_panda": [
|
1471 |
+
"1"
|
1472 |
+
]
|
1473 |
+
},
|
1474 |
+
"145fdc3ac5": {
|
1475 |
+
"lizard": [
|
1476 |
+
"1"
|
1477 |
+
]
|
1478 |
+
},
|
1479 |
+
"1471274fa7": {
|
1480 |
+
"person": [
|
1481 |
+
"1"
|
1482 |
+
]
|
1483 |
+
},
|
1484 |
+
"14a6b5a139": {
|
1485 |
+
"fish": [
|
1486 |
+
"1"
|
1487 |
+
]
|
1488 |
+
},
|
1489 |
+
"14c21cea0d": {
|
1490 |
+
"monkey": [
|
1491 |
+
"1",
|
1492 |
+
"2"
|
1493 |
+
]
|
1494 |
+
},
|
1495 |
+
"14dae0dc93": {
|
1496 |
+
"person": [
|
1497 |
+
"2"
|
1498 |
+
]
|
1499 |
+
},
|
1500 |
+
"14f9bd22b5": {
|
1501 |
+
"tiger": [
|
1502 |
+
"1"
|
1503 |
+
]
|
1504 |
+
},
|
1505 |
+
"14fd28ae99": {
|
1506 |
+
"parrot": [
|
1507 |
+
"1"
|
1508 |
+
]
|
1509 |
+
},
|
1510 |
+
"15097d5d4e": {
|
1511 |
+
"parrot": [
|
1512 |
+
"1"
|
1513 |
+
]
|
1514 |
+
},
|
1515 |
+
"150ea711f2": {
|
1516 |
+
"whale": [
|
1517 |
+
"1"
|
1518 |
+
]
|
1519 |
+
},
|
1520 |
+
"1514e3563f": {
|
1521 |
+
"earless_seal": [
|
1522 |
+
"1",
|
1523 |
+
"2"
|
1524 |
+
]
|
1525 |
+
},
|
1526 |
+
"152aaa3a9e": {
|
1527 |
+
"raccoon": [
|
1528 |
+
"1"
|
1529 |
+
]
|
1530 |
+
},
|
1531 |
+
"152b7d3bd7": {
|
1532 |
+
"giant_panda": [
|
1533 |
+
"1"
|
1534 |
+
]
|
1535 |
+
},
|
1536 |
+
"15617297cc": {
|
1537 |
+
"person": [
|
1538 |
+
"1"
|
1539 |
+
]
|
1540 |
+
},
|
1541 |
+
"15abbe0c52": {
|
1542 |
+
"person": [
|
1543 |
+
"1"
|
1544 |
+
]
|
1545 |
+
},
|
1546 |
+
"15d1fb3de5": {
|
1547 |
+
"cat": [
|
1548 |
+
"2"
|
1549 |
+
],
|
1550 |
+
"owl": [
|
1551 |
+
"1"
|
1552 |
+
]
|
1553 |
+
},
|
1554 |
+
"15f67b0fab": {
|
1555 |
+
"person": [
|
1556 |
+
"1"
|
1557 |
+
]
|
1558 |
+
},
|
1559 |
+
"161eb59aad": {
|
1560 |
+
"cow": [
|
1561 |
+
"2",
|
1562 |
+
"3"
|
1563 |
+
],
|
1564 |
+
"giraffe": [
|
1565 |
+
"1"
|
1566 |
+
]
|
1567 |
+
},
|
1568 |
+
"16288ea47f": {
|
1569 |
+
"duck": [
|
1570 |
+
"1",
|
1571 |
+
"2"
|
1572 |
+
]
|
1573 |
+
},
|
1574 |
+
"164410ce62": {
|
1575 |
+
"person": [
|
1576 |
+
"1"
|
1577 |
+
]
|
1578 |
+
},
|
1579 |
+
"165c3c8cd4": {
|
1580 |
+
"person": [
|
1581 |
+
"1",
|
1582 |
+
"2",
|
1583 |
+
"3"
|
1584 |
+
]
|
1585 |
+
},
|
1586 |
+
"165c42b41b": {
|
1587 |
+
"person": [
|
1588 |
+
"1",
|
1589 |
+
"4"
|
1590 |
+
],
|
1591 |
+
"motorbike": [
|
1592 |
+
"2",
|
1593 |
+
"3"
|
1594 |
+
]
|
1595 |
+
},
|
1596 |
+
"165ec9e22b": {
|
1597 |
+
"person": [
|
1598 |
+
"1",
|
1599 |
+
"2"
|
1600 |
+
]
|
1601 |
+
},
|
1602 |
+
"1669502269": {
|
1603 |
+
"person": [
|
1604 |
+
"1"
|
1605 |
+
]
|
1606 |
+
},
|
1607 |
+
"16763cccbb": {
|
1608 |
+
"ape": [
|
1609 |
+
"1"
|
1610 |
+
]
|
1611 |
+
},
|
1612 |
+
"16adde065e": {
|
1613 |
+
"cat": [
|
1614 |
+
"2"
|
1615 |
+
],
|
1616 |
+
"person": [
|
1617 |
+
"3"
|
1618 |
+
]
|
1619 |
+
},
|
1620 |
+
"16af445362": {
|
1621 |
+
"airplane": [
|
1622 |
+
"1"
|
1623 |
+
]
|
1624 |
+
},
|
1625 |
+
"16afd538ad": {
|
1626 |
+
"parrot": [
|
1627 |
+
"1",
|
1628 |
+
"2"
|
1629 |
+
]
|
1630 |
+
},
|
1631 |
+
"16c3fa4d5d": {
|
1632 |
+
"sedan": [
|
1633 |
+
"1"
|
1634 |
+
]
|
1635 |
+
},
|
1636 |
+
"16d1d65c27": {
|
1637 |
+
"monkey": [
|
1638 |
+
"1"
|
1639 |
+
]
|
1640 |
+
},
|
1641 |
+
"16e8599e94": {
|
1642 |
+
"giant_panda": [
|
1643 |
+
"1"
|
1644 |
+
]
|
1645 |
+
},
|
1646 |
+
"16fe9fb444": {
|
1647 |
+
"motorbike": [
|
1648 |
+
"1"
|
1649 |
+
],
|
1650 |
+
"person": [
|
1651 |
+
"2"
|
1652 |
+
]
|
1653 |
+
},
|
1654 |
+
"1705796b02": {
|
1655 |
+
"train": [
|
1656 |
+
"1"
|
1657 |
+
]
|
1658 |
+
},
|
1659 |
+
"1724db7671": {
|
1660 |
+
"giant_panda": [
|
1661 |
+
"1"
|
1662 |
+
]
|
1663 |
+
},
|
1664 |
+
"17418e81ea": {
|
1665 |
+
"shark": [
|
1666 |
+
"1"
|
1667 |
+
]
|
1668 |
+
},
|
1669 |
+
"175169edbb": {
|
1670 |
+
"ape": [
|
1671 |
+
"1",
|
1672 |
+
"2"
|
1673 |
+
]
|
1674 |
+
},
|
1675 |
+
"17622326fd": {
|
1676 |
+
"lizard": [
|
1677 |
+
"1"
|
1678 |
+
]
|
1679 |
+
},
|
1680 |
+
"17656bae77": {
|
1681 |
+
"elephant": [
|
1682 |
+
"1"
|
1683 |
+
]
|
1684 |
+
},
|
1685 |
+
"17b0d94172": {
|
1686 |
+
"airplane": [
|
1687 |
+
"1"
|
1688 |
+
]
|
1689 |
+
},
|
1690 |
+
"17c220e4f6": {
|
1691 |
+
"giant_panda": [
|
1692 |
+
"1"
|
1693 |
+
]
|
1694 |
+
},
|
1695 |
+
"17c7bcd146": {
|
1696 |
+
"train": [
|
1697 |
+
"1"
|
1698 |
+
]
|
1699 |
+
},
|
1700 |
+
"17cb4afe89": {
|
1701 |
+
"tiger": [
|
1702 |
+
"1"
|
1703 |
+
]
|
1704 |
+
},
|
1705 |
+
"17cd79a434": {
|
1706 |
+
"squirrel": [
|
1707 |
+
"1"
|
1708 |
+
]
|
1709 |
+
},
|
1710 |
+
"17d18604c3": {
|
1711 |
+
"person": [
|
1712 |
+
"1",
|
1713 |
+
"2"
|
1714 |
+
]
|
1715 |
+
},
|
1716 |
+
"17d8ca1a37": {
|
1717 |
+
"person": [
|
1718 |
+
"2"
|
1719 |
+
],
|
1720 |
+
"owl": [
|
1721 |
+
"1"
|
1722 |
+
]
|
1723 |
+
},
|
1724 |
+
"17e33f4330": {
|
1725 |
+
"monkey": [
|
1726 |
+
"1"
|
1727 |
+
]
|
1728 |
+
},
|
1729 |
+
"17f7a6d805": {
|
1730 |
+
"snail": [
|
1731 |
+
"1"
|
1732 |
+
]
|
1733 |
+
},
|
1734 |
+
"180abc8378": {
|
1735 |
+
"person": [
|
1736 |
+
"2"
|
1737 |
+
],
|
1738 |
+
"owl": [
|
1739 |
+
"1"
|
1740 |
+
]
|
1741 |
+
},
|
1742 |
+
"183ba3d652": {
|
1743 |
+
"person": [
|
1744 |
+
"2"
|
1745 |
+
],
|
1746 |
+
"motorbike": [
|
1747 |
+
"3"
|
1748 |
+
]
|
1749 |
+
},
|
1750 |
+
"185bf64702": {
|
1751 |
+
"zebra": [
|
1752 |
+
"1",
|
1753 |
+
"2"
|
1754 |
+
]
|
1755 |
+
},
|
1756 |
+
"18913cc690": {
|
1757 |
+
"train": [
|
1758 |
+
"1"
|
1759 |
+
]
|
1760 |
+
},
|
1761 |
+
"1892651815": {
|
1762 |
+
"camel": [
|
1763 |
+
"1"
|
1764 |
+
]
|
1765 |
+
},
|
1766 |
+
"189ac8208a": {
|
1767 |
+
"giraffe": [
|
1768 |
+
"1",
|
1769 |
+
"2"
|
1770 |
+
]
|
1771 |
+
},
|
1772 |
+
"189b44e92c": {
|
1773 |
+
"zebra": [
|
1774 |
+
"1"
|
1775 |
+
]
|
1776 |
+
},
|
1777 |
+
"18ac264b76": {
|
1778 |
+
"person": [
|
1779 |
+
"2"
|
1780 |
+
]
|
1781 |
+
},
|
1782 |
+
"18b245ab49": {
|
1783 |
+
"penguin": [
|
1784 |
+
"1",
|
1785 |
+
"2",
|
1786 |
+
"3",
|
1787 |
+
"4"
|
1788 |
+
]
|
1789 |
+
},
|
1790 |
+
"18b5cebc34": {
|
1791 |
+
"mouse": [
|
1792 |
+
"1"
|
1793 |
+
]
|
1794 |
+
},
|
1795 |
+
"18bad52083": {
|
1796 |
+
"parrot": [
|
1797 |
+
"1",
|
1798 |
+
"2"
|
1799 |
+
]
|
1800 |
+
},
|
1801 |
+
"18bb5144d5": {
|
1802 |
+
"lizard": [
|
1803 |
+
"1"
|
1804 |
+
]
|
1805 |
+
},
|
1806 |
+
"18c6f205c5": {
|
1807 |
+
"person": [
|
1808 |
+
"1",
|
1809 |
+
"2",
|
1810 |
+
"3"
|
1811 |
+
]
|
1812 |
+
},
|
1813 |
+
"1903f9ea15": {
|
1814 |
+
"bird": [
|
1815 |
+
"1",
|
1816 |
+
"2",
|
1817 |
+
"3"
|
1818 |
+
]
|
1819 |
+
},
|
1820 |
+
"1917b209f2": {
|
1821 |
+
"person": [
|
1822 |
+
"1"
|
1823 |
+
],
|
1824 |
+
"cow": [
|
1825 |
+
"3",
|
1826 |
+
"4"
|
1827 |
+
],
|
1828 |
+
"horse": [
|
1829 |
+
"2"
|
1830 |
+
]
|
1831 |
+
},
|
1832 |
+
"191e74c01d": {
|
1833 |
+
"deer": [
|
1834 |
+
"1"
|
1835 |
+
]
|
1836 |
+
},
|
1837 |
+
"19367bb94e": {
|
1838 |
+
"fish": [
|
1839 |
+
"1",
|
1840 |
+
"2",
|
1841 |
+
"3"
|
1842 |
+
]
|
1843 |
+
},
|
1844 |
+
"193ffaa217": {
|
1845 |
+
"person": [
|
1846 |
+
"1",
|
1847 |
+
"2",
|
1848 |
+
"3"
|
1849 |
+
]
|
1850 |
+
},
|
1851 |
+
"19696b67d3": {
|
1852 |
+
"cow": [
|
1853 |
+
"1"
|
1854 |
+
]
|
1855 |
+
},
|
1856 |
+
"197f3ab6f3": {
|
1857 |
+
"giant_panda": [
|
1858 |
+
"1"
|
1859 |
+
]
|
1860 |
+
},
|
1861 |
+
"1981e763cc": {
|
1862 |
+
"sheep": [
|
1863 |
+
"1",
|
1864 |
+
"2"
|
1865 |
+
]
|
1866 |
+
},
|
1867 |
+
"198afe39ae": {
|
1868 |
+
"person": [
|
1869 |
+
"1"
|
1870 |
+
]
|
1871 |
+
},
|
1872 |
+
"19a6e62b9b": {
|
1873 |
+
"monkey": [
|
1874 |
+
"1",
|
1875 |
+
"2"
|
1876 |
+
]
|
1877 |
+
},
|
1878 |
+
"19b60d5335": {
|
1879 |
+
"hedgehog": [
|
1880 |
+
"1"
|
1881 |
+
]
|
1882 |
+
},
|
1883 |
+
"19c00c11f9": {
|
1884 |
+
"person": [
|
1885 |
+
"1"
|
1886 |
+
]
|
1887 |
+
},
|
1888 |
+
"19e061eb88": {
|
1889 |
+
"boat": [
|
1890 |
+
"1",
|
1891 |
+
"2"
|
1892 |
+
]
|
1893 |
+
},
|
1894 |
+
"19e8bc6178": {
|
1895 |
+
"dog": [
|
1896 |
+
"1"
|
1897 |
+
]
|
1898 |
+
},
|
1899 |
+
"19ee80dac6": {
|
1900 |
+
"person": [
|
1901 |
+
"1",
|
1902 |
+
"3",
|
1903 |
+
"4"
|
1904 |
+
]
|
1905 |
+
},
|
1906 |
+
"1a25a9170a": {
|
1907 |
+
"person": [
|
1908 |
+
"2",
|
1909 |
+
"3"
|
1910 |
+
],
|
1911 |
+
"cow": [
|
1912 |
+
"1"
|
1913 |
+
]
|
1914 |
+
},
|
1915 |
+
"1a359a6c1a": {
|
1916 |
+
"sheep": [
|
1917 |
+
"1"
|
1918 |
+
]
|
1919 |
+
},
|
1920 |
+
"1a3e87c566": {
|
1921 |
+
"frog": [
|
1922 |
+
"1"
|
1923 |
+
]
|
1924 |
+
},
|
1925 |
+
"1a5fe06b00": {
|
1926 |
+
"bus": [
|
1927 |
+
"1"
|
1928 |
+
]
|
1929 |
+
},
|
1930 |
+
"1a6c0fbd1e": {
|
1931 |
+
"person": [
|
1932 |
+
"1"
|
1933 |
+
]
|
1934 |
+
},
|
1935 |
+
"1a6f3b5a4b": {
|
1936 |
+
"sedan": [
|
1937 |
+
"3"
|
1938 |
+
]
|
1939 |
+
},
|
1940 |
+
"1a8afbad92": {
|
1941 |
+
"zebra": [
|
1942 |
+
"1",
|
1943 |
+
"2",
|
1944 |
+
"3"
|
1945 |
+
]
|
1946 |
+
},
|
1947 |
+
"1a8bdc5842": {
|
1948 |
+
"parrot": [
|
1949 |
+
"1",
|
1950 |
+
"2"
|
1951 |
+
]
|
1952 |
+
},
|
1953 |
+
"1a95752aca": {
|
1954 |
+
"duck": [
|
1955 |
+
"1",
|
1956 |
+
"2"
|
1957 |
+
]
|
1958 |
+
},
|
1959 |
+
"1a9c131cb7": {
|
1960 |
+
"ape": [
|
1961 |
+
"1",
|
1962 |
+
"2",
|
1963 |
+
"3"
|
1964 |
+
]
|
1965 |
+
},
|
1966 |
+
"1aa3da3ee3": {
|
1967 |
+
"sheep": [
|
1968 |
+
"1",
|
1969 |
+
"2",
|
1970 |
+
"3",
|
1971 |
+
"4"
|
1972 |
+
]
|
1973 |
+
},
|
1974 |
+
"1ab27ec7ea": {
|
1975 |
+
"deer": [
|
1976 |
+
"1"
|
1977 |
+
]
|
1978 |
+
},
|
1979 |
+
"1abf16d21d": {
|
1980 |
+
"turtle": [
|
1981 |
+
"1"
|
1982 |
+
]
|
1983 |
+
},
|
1984 |
+
"1acd0f993b": {
|
1985 |
+
"person": [
|
1986 |
+
"3"
|
1987 |
+
],
|
1988 |
+
"dog": [
|
1989 |
+
"1"
|
1990 |
+
]
|
1991 |
+
},
|
1992 |
+
"1ad202e499": {
|
1993 |
+
"lizard": [
|
1994 |
+
"1",
|
1995 |
+
"2"
|
1996 |
+
]
|
1997 |
+
},
|
1998 |
+
"1af8d2395d": {
|
1999 |
+
"person": [
|
2000 |
+
"1",
|
2001 |
+
"2"
|
2002 |
+
],
|
2003 |
+
"airplane": [
|
2004 |
+
"4"
|
2005 |
+
]
|
2006 |
+
},
|
2007 |
+
"1afd39a1fa": {
|
2008 |
+
"motorbike": [
|
2009 |
+
"2"
|
2010 |
+
]
|
2011 |
+
},
|
2012 |
+
"1b2d31306f": {
|
2013 |
+
"lizard": [
|
2014 |
+
"1"
|
2015 |
+
]
|
2016 |
+
},
|
2017 |
+
"1b3fa67f0e": {
|
2018 |
+
"airplane": [
|
2019 |
+
"1"
|
2020 |
+
]
|
2021 |
+
},
|
2022 |
+
"1b43fa74b4": {
|
2023 |
+
"owl": [
|
2024 |
+
"1",
|
2025 |
+
"2"
|
2026 |
+
]
|
2027 |
+
},
|
2028 |
+
"1b73ea9fc2": {
|
2029 |
+
"parrot": [
|
2030 |
+
"1"
|
2031 |
+
]
|
2032 |
+
},
|
2033 |
+
"1b7e8bb255": {
|
2034 |
+
"person": [
|
2035 |
+
"2"
|
2036 |
+
]
|
2037 |
+
},
|
2038 |
+
"1b8680f8cd": {
|
2039 |
+
"person": [
|
2040 |
+
"2",
|
2041 |
+
"3"
|
2042 |
+
]
|
2043 |
+
},
|
2044 |
+
"1b883843c0": {
|
2045 |
+
"person": [
|
2046 |
+
"1",
|
2047 |
+
"2"
|
2048 |
+
]
|
2049 |
+
},
|
2050 |
+
"1b8898785b": {
|
2051 |
+
"monkey": [
|
2052 |
+
"1",
|
2053 |
+
"2"
|
2054 |
+
]
|
2055 |
+
},
|
2056 |
+
"1b88ba1aa4": {
|
2057 |
+
"giant_panda": [
|
2058 |
+
"1"
|
2059 |
+
]
|
2060 |
+
},
|
2061 |
+
"1b96a498e5": {
|
2062 |
+
"ape": [
|
2063 |
+
"1"
|
2064 |
+
]
|
2065 |
+
},
|
2066 |
+
"1bbc4c274f": {
|
2067 |
+
"fish": [
|
2068 |
+
"2"
|
2069 |
+
]
|
2070 |
+
},
|
2071 |
+
"1bd87fe9ab": {
|
2072 |
+
"train": [
|
2073 |
+
"1"
|
2074 |
+
]
|
2075 |
+
},
|
2076 |
+
"1c4090c75b": {
|
2077 |
+
"whale": [
|
2078 |
+
"1"
|
2079 |
+
]
|
2080 |
+
},
|
2081 |
+
"1c41934f84": {
|
2082 |
+
"elephant": [
|
2083 |
+
"1",
|
2084 |
+
"2"
|
2085 |
+
]
|
2086 |
+
},
|
2087 |
+
"1c72b04b56": {
|
2088 |
+
"lion": [
|
2089 |
+
"1"
|
2090 |
+
]
|
2091 |
+
},
|
2092 |
+
"1c87955a3a": {
|
2093 |
+
"crocodile": [
|
2094 |
+
"1"
|
2095 |
+
],
|
2096 |
+
"turtle": [
|
2097 |
+
"2"
|
2098 |
+
]
|
2099 |
+
},
|
2100 |
+
"1c9f9eb792": {
|
2101 |
+
"person": [
|
2102 |
+
"2"
|
2103 |
+
]
|
2104 |
+
},
|
2105 |
+
"1ca240fede": {
|
2106 |
+
"train": [
|
2107 |
+
"1"
|
2108 |
+
]
|
2109 |
+
},
|
2110 |
+
"1ca5673803": {
|
2111 |
+
"person": [
|
2112 |
+
"1",
|
2113 |
+
"3"
|
2114 |
+
]
|
2115 |
+
},
|
2116 |
+
"1cada35274": {
|
2117 |
+
"duck": [
|
2118 |
+
"1"
|
2119 |
+
]
|
2120 |
+
},
|
2121 |
+
"1cb44b920d": {
|
2122 |
+
"eagle": [
|
2123 |
+
"1",
|
2124 |
+
"2"
|
2125 |
+
]
|
2126 |
+
},
|
2127 |
+
"1cd10e62be": {
|
2128 |
+
"leopard": [
|
2129 |
+
"1"
|
2130 |
+
]
|
2131 |
+
},
|
2132 |
+
"1d3087d5e5": {
|
2133 |
+
"fish": [
|
2134 |
+
"1",
|
2135 |
+
"2",
|
2136 |
+
"3",
|
2137 |
+
"4",
|
2138 |
+
"5"
|
2139 |
+
]
|
2140 |
+
},
|
2141 |
+
"1d3685150a": {
|
2142 |
+
"person": [
|
2143 |
+
"1",
|
2144 |
+
"3"
|
2145 |
+
]
|
2146 |
+
},
|
2147 |
+
"1d6ff083aa": {
|
2148 |
+
"person": [
|
2149 |
+
"1",
|
2150 |
+
"2"
|
2151 |
+
]
|
2152 |
+
}
|
2153 |
+
}
|
mbench/numbered_valid_obj_ids_gpt-4o_nomask_randcap.json
ADDED
@@ -0,0 +1,2153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"003234408d": {
|
3 |
+
"penguin": [
|
4 |
+
"1",
|
5 |
+
"2",
|
6 |
+
"3",
|
7 |
+
"4",
|
8 |
+
"5"
|
9 |
+
]
|
10 |
+
},
|
11 |
+
"0043f083b5": {
|
12 |
+
"sedan": [
|
13 |
+
"2",
|
14 |
+
"3"
|
15 |
+
],
|
16 |
+
"bus": [
|
17 |
+
"1"
|
18 |
+
]
|
19 |
+
},
|
20 |
+
"0044fa5fba": {
|
21 |
+
"giant_panda": [
|
22 |
+
"1"
|
23 |
+
]
|
24 |
+
},
|
25 |
+
"005a527edd": {
|
26 |
+
"ape": [
|
27 |
+
"1",
|
28 |
+
"2"
|
29 |
+
]
|
30 |
+
},
|
31 |
+
"0065b171f9": {
|
32 |
+
"giant_panda": [
|
33 |
+
"1"
|
34 |
+
]
|
35 |
+
},
|
36 |
+
"00917dcfc4": {
|
37 |
+
"zebra": [
|
38 |
+
"1",
|
39 |
+
"2",
|
40 |
+
"3"
|
41 |
+
]
|
42 |
+
},
|
43 |
+
"00a23ccf53": {
|
44 |
+
"shark": [
|
45 |
+
"1"
|
46 |
+
]
|
47 |
+
},
|
48 |
+
"00ad5016a4": {
|
49 |
+
"airplane": [
|
50 |
+
"1"
|
51 |
+
]
|
52 |
+
},
|
53 |
+
"01082ae388": {
|
54 |
+
"leopard": [
|
55 |
+
"1"
|
56 |
+
]
|
57 |
+
},
|
58 |
+
"011ac0a06f": {
|
59 |
+
"ape": [
|
60 |
+
"1",
|
61 |
+
"2",
|
62 |
+
"3",
|
63 |
+
"4",
|
64 |
+
"5"
|
65 |
+
]
|
66 |
+
},
|
67 |
+
"013099c098": {
|
68 |
+
"giant_panda": [
|
69 |
+
"1",
|
70 |
+
"2"
|
71 |
+
]
|
72 |
+
},
|
73 |
+
"0155498c85": {
|
74 |
+
"motorbike": [
|
75 |
+
"2"
|
76 |
+
],
|
77 |
+
"person": [
|
78 |
+
"1"
|
79 |
+
]
|
80 |
+
},
|
81 |
+
"01694ad9c8": {
|
82 |
+
"bird": [
|
83 |
+
"1"
|
84 |
+
]
|
85 |
+
},
|
86 |
+
"017ac35701": {
|
87 |
+
"giant_panda": [
|
88 |
+
"1"
|
89 |
+
]
|
90 |
+
},
|
91 |
+
"01b80e8e1a": {
|
92 |
+
"zebra": [
|
93 |
+
"1",
|
94 |
+
"2"
|
95 |
+
]
|
96 |
+
},
|
97 |
+
"01baa5a4e1": {},
|
98 |
+
"01c3111683": {
|
99 |
+
"whale": [
|
100 |
+
"1"
|
101 |
+
]
|
102 |
+
},
|
103 |
+
"01c4cb5ffe": {
|
104 |
+
"person": [
|
105 |
+
"1",
|
106 |
+
"3"
|
107 |
+
]
|
108 |
+
},
|
109 |
+
"01c76f0a82": {
|
110 |
+
"sedan": [
|
111 |
+
"1",
|
112 |
+
"4"
|
113 |
+
]
|
114 |
+
},
|
115 |
+
"01c783268c": {
|
116 |
+
"ape": [
|
117 |
+
"1"
|
118 |
+
],
|
119 |
+
"person": [
|
120 |
+
"2"
|
121 |
+
]
|
122 |
+
},
|
123 |
+
"01e64dd36a": {
|
124 |
+
"cow": [
|
125 |
+
"1",
|
126 |
+
"2",
|
127 |
+
"3"
|
128 |
+
]
|
129 |
+
},
|
130 |
+
"01ed275c6e": {
|
131 |
+
"giraffe": [
|
132 |
+
"1",
|
133 |
+
"2"
|
134 |
+
]
|
135 |
+
},
|
136 |
+
"01ff60d1fa": {
|
137 |
+
"lizard": [
|
138 |
+
"1"
|
139 |
+
]
|
140 |
+
},
|
141 |
+
"020cd28cd2": {
|
142 |
+
"person": [
|
143 |
+
"1"
|
144 |
+
]
|
145 |
+
},
|
146 |
+
"02264db755": {
|
147 |
+
"fox": [
|
148 |
+
"1"
|
149 |
+
]
|
150 |
+
},
|
151 |
+
"0248626d9a": {
|
152 |
+
"train": [
|
153 |
+
"1"
|
154 |
+
]
|
155 |
+
},
|
156 |
+
"02668dbffa": {
|
157 |
+
"frog": [
|
158 |
+
"1"
|
159 |
+
]
|
160 |
+
},
|
161 |
+
"0274193026": {
|
162 |
+
"person": [
|
163 |
+
"2"
|
164 |
+
]
|
165 |
+
},
|
166 |
+
"02d28375aa": {
|
167 |
+
"fox": [
|
168 |
+
"1"
|
169 |
+
]
|
170 |
+
},
|
171 |
+
"031ccc99b1": {
|
172 |
+
"person": [
|
173 |
+
"1",
|
174 |
+
"2",
|
175 |
+
"3"
|
176 |
+
]
|
177 |
+
},
|
178 |
+
"0321b18c10": {
|
179 |
+
"elephant": [
|
180 |
+
"3"
|
181 |
+
],
|
182 |
+
"person": [
|
183 |
+
"1",
|
184 |
+
"2"
|
185 |
+
]
|
186 |
+
},
|
187 |
+
"0348a45bca": {
|
188 |
+
"fish": [
|
189 |
+
"1",
|
190 |
+
"2",
|
191 |
+
"3",
|
192 |
+
"4",
|
193 |
+
"5"
|
194 |
+
]
|
195 |
+
},
|
196 |
+
"0355e92655": {
|
197 |
+
"boat": [
|
198 |
+
"3"
|
199 |
+
],
|
200 |
+
"person": [
|
201 |
+
"2"
|
202 |
+
]
|
203 |
+
},
|
204 |
+
"0358b938c1": {
|
205 |
+
"elephant": [
|
206 |
+
"1",
|
207 |
+
"2",
|
208 |
+
"3",
|
209 |
+
"4"
|
210 |
+
]
|
211 |
+
},
|
212 |
+
"0368107cf1": {
|
213 |
+
"person": [
|
214 |
+
"1",
|
215 |
+
"2"
|
216 |
+
]
|
217 |
+
},
|
218 |
+
"0379ddf557": {
|
219 |
+
"person": [
|
220 |
+
"1"
|
221 |
+
]
|
222 |
+
},
|
223 |
+
"038b2cc71d": {
|
224 |
+
"lizard": [
|
225 |
+
"1"
|
226 |
+
]
|
227 |
+
},
|
228 |
+
"038c15a5dd": {
|
229 |
+
"hedgehog": [
|
230 |
+
"1"
|
231 |
+
]
|
232 |
+
},
|
233 |
+
"03a06cc98a": {
|
234 |
+
"giraffe": [
|
235 |
+
"1",
|
236 |
+
"2",
|
237 |
+
"3"
|
238 |
+
]
|
239 |
+
},
|
240 |
+
"03a63e187f": {
|
241 |
+
"lizard": [
|
242 |
+
"1"
|
243 |
+
]
|
244 |
+
},
|
245 |
+
"03c95b4dae": {
|
246 |
+
"elephant": [
|
247 |
+
"1",
|
248 |
+
"2",
|
249 |
+
"3"
|
250 |
+
]
|
251 |
+
},
|
252 |
+
"03e2b57b0e": {
|
253 |
+
"lizard": [
|
254 |
+
"1"
|
255 |
+
]
|
256 |
+
},
|
257 |
+
"04194e1248": {
|
258 |
+
"lizard": [
|
259 |
+
"1"
|
260 |
+
]
|
261 |
+
},
|
262 |
+
"04259896e2": {
|
263 |
+
"lizard": [
|
264 |
+
"1"
|
265 |
+
]
|
266 |
+
},
|
267 |
+
"0444918a5f": {
|
268 |
+
"truck": [
|
269 |
+
"1",
|
270 |
+
"2",
|
271 |
+
"3",
|
272 |
+
"4"
|
273 |
+
]
|
274 |
+
},
|
275 |
+
"04460a7a52": {
|
276 |
+
"lizard": [
|
277 |
+
"1"
|
278 |
+
]
|
279 |
+
},
|
280 |
+
"04474174a4": {
|
281 |
+
"ape": [
|
282 |
+
"1",
|
283 |
+
"2"
|
284 |
+
]
|
285 |
+
},
|
286 |
+
"0450095513": {
|
287 |
+
"snail": [
|
288 |
+
"1"
|
289 |
+
]
|
290 |
+
},
|
291 |
+
"045f00aed2": {
|
292 |
+
"person": [
|
293 |
+
"3"
|
294 |
+
],
|
295 |
+
"tiger": [
|
296 |
+
"1"
|
297 |
+
]
|
298 |
+
},
|
299 |
+
"04667fabaa": {
|
300 |
+
"parrot": [
|
301 |
+
"1"
|
302 |
+
]
|
303 |
+
},
|
304 |
+
"04735c5030": {
|
305 |
+
"cat": [
|
306 |
+
"1",
|
307 |
+
"2"
|
308 |
+
]
|
309 |
+
},
|
310 |
+
"04990d1915": {
|
311 |
+
"sedan": [
|
312 |
+
"1"
|
313 |
+
],
|
314 |
+
"bus": [
|
315 |
+
"2"
|
316 |
+
],
|
317 |
+
"truck": [
|
318 |
+
"3"
|
319 |
+
]
|
320 |
+
},
|
321 |
+
"04d62d9d98": {
|
322 |
+
"person": [
|
323 |
+
"1"
|
324 |
+
]
|
325 |
+
},
|
326 |
+
"04f21da964": {
|
327 |
+
"monkey": [
|
328 |
+
"1"
|
329 |
+
]
|
330 |
+
},
|
331 |
+
"04fbad476e": {
|
332 |
+
"parrot": [
|
333 |
+
"1"
|
334 |
+
]
|
335 |
+
},
|
336 |
+
"04fe256562": {
|
337 |
+
"motorbike": [
|
338 |
+
"1"
|
339 |
+
],
|
340 |
+
"truck": [
|
341 |
+
"2"
|
342 |
+
]
|
343 |
+
},
|
344 |
+
"0503bf89c9": {
|
345 |
+
"hedgehog": [
|
346 |
+
"1"
|
347 |
+
]
|
348 |
+
},
|
349 |
+
"0536c9eed0": {
|
350 |
+
"cat": [
|
351 |
+
"1"
|
352 |
+
]
|
353 |
+
},
|
354 |
+
"054acb238f": {
|
355 |
+
"owl": [
|
356 |
+
"1"
|
357 |
+
]
|
358 |
+
},
|
359 |
+
"05579ca250": {
|
360 |
+
"sedan": [
|
361 |
+
"3"
|
362 |
+
],
|
363 |
+
"person": [
|
364 |
+
"1"
|
365 |
+
]
|
366 |
+
},
|
367 |
+
"056c200404": {},
|
368 |
+
"05774f3a2c": {
|
369 |
+
"ape": [
|
370 |
+
"1",
|
371 |
+
"2",
|
372 |
+
"3"
|
373 |
+
]
|
374 |
+
},
|
375 |
+
"058a7592c8": {
|
376 |
+
"train": [
|
377 |
+
"1"
|
378 |
+
]
|
379 |
+
},
|
380 |
+
"05a0a513df": {
|
381 |
+
"person": [
|
382 |
+
"1",
|
383 |
+
"2"
|
384 |
+
]
|
385 |
+
},
|
386 |
+
"05a569d8aa": {
|
387 |
+
"mouse": [
|
388 |
+
"2"
|
389 |
+
],
|
390 |
+
"cat": [
|
391 |
+
"1"
|
392 |
+
]
|
393 |
+
},
|
394 |
+
"05aa652648": {
|
395 |
+
"ape": [
|
396 |
+
"1"
|
397 |
+
]
|
398 |
+
},
|
399 |
+
"05d7715782": {},
|
400 |
+
"05e0b0f28f": {
|
401 |
+
"mouse": [
|
402 |
+
"1"
|
403 |
+
],
|
404 |
+
"person": [
|
405 |
+
"2"
|
406 |
+
]
|
407 |
+
},
|
408 |
+
"05fdbbdd7a": {},
|
409 |
+
"05ffcfed85": {
|
410 |
+
"monkey": [
|
411 |
+
"1",
|
412 |
+
"2"
|
413 |
+
]
|
414 |
+
},
|
415 |
+
"0630391881": {
|
416 |
+
"person": [
|
417 |
+
"1"
|
418 |
+
]
|
419 |
+
},
|
420 |
+
"06840b2bbe": {
|
421 |
+
"snake": [
|
422 |
+
"1"
|
423 |
+
]
|
424 |
+
},
|
425 |
+
"068f7dce6f": {
|
426 |
+
"shark": [
|
427 |
+
"1"
|
428 |
+
]
|
429 |
+
},
|
430 |
+
"0693719753": {
|
431 |
+
"turtle": [
|
432 |
+
"1",
|
433 |
+
"2"
|
434 |
+
]
|
435 |
+
},
|
436 |
+
"06ce2b51fb": {
|
437 |
+
"person": [
|
438 |
+
"1",
|
439 |
+
"2"
|
440 |
+
]
|
441 |
+
},
|
442 |
+
"06e224798e": {
|
443 |
+
"tiger": [
|
444 |
+
"1"
|
445 |
+
]
|
446 |
+
},
|
447 |
+
"06ee361788": {
|
448 |
+
"duck": [
|
449 |
+
"1",
|
450 |
+
"2",
|
451 |
+
"3"
|
452 |
+
]
|
453 |
+
},
|
454 |
+
"06fbb3fa2c": {
|
455 |
+
"eagle": [
|
456 |
+
"1"
|
457 |
+
]
|
458 |
+
},
|
459 |
+
"0700264286": {
|
460 |
+
"cow": [
|
461 |
+
"1",
|
462 |
+
"2"
|
463 |
+
]
|
464 |
+
},
|
465 |
+
"070c918ca7": {
|
466 |
+
"parrot": [
|
467 |
+
"1"
|
468 |
+
]
|
469 |
+
},
|
470 |
+
"07129e14a4": {
|
471 |
+
"parrot": [
|
472 |
+
"1",
|
473 |
+
"2"
|
474 |
+
],
|
475 |
+
"person": [
|
476 |
+
"3"
|
477 |
+
]
|
478 |
+
},
|
479 |
+
"07177017e9": {
|
480 |
+
"motorbike": [
|
481 |
+
"1",
|
482 |
+
"2"
|
483 |
+
]
|
484 |
+
},
|
485 |
+
"07238ffc58": {
|
486 |
+
"monkey": [
|
487 |
+
"1",
|
488 |
+
"2",
|
489 |
+
"3"
|
490 |
+
]
|
491 |
+
},
|
492 |
+
"07353b2a89": {
|
493 |
+
"sheep": [
|
494 |
+
"1",
|
495 |
+
"2",
|
496 |
+
"3",
|
497 |
+
"4"
|
498 |
+
]
|
499 |
+
},
|
500 |
+
"0738493cbf": {
|
501 |
+
"airplane": [
|
502 |
+
"1"
|
503 |
+
]
|
504 |
+
},
|
505 |
+
"075926c651": {
|
506 |
+
"person": [
|
507 |
+
"1",
|
508 |
+
"2"
|
509 |
+
]
|
510 |
+
},
|
511 |
+
"075c701292": {
|
512 |
+
"duck": [
|
513 |
+
"1",
|
514 |
+
"2",
|
515 |
+
"3",
|
516 |
+
"4"
|
517 |
+
]
|
518 |
+
},
|
519 |
+
"0762ea9a30": {
|
520 |
+
"person": [
|
521 |
+
"1"
|
522 |
+
]
|
523 |
+
},
|
524 |
+
"07652ee4af": {
|
525 |
+
"person": [
|
526 |
+
"1"
|
527 |
+
]
|
528 |
+
},
|
529 |
+
"076f206928": {
|
530 |
+
"zebra": [
|
531 |
+
"1",
|
532 |
+
"2"
|
533 |
+
],
|
534 |
+
"person": [
|
535 |
+
"3"
|
536 |
+
]
|
537 |
+
},
|
538 |
+
"077d32af19": {
|
539 |
+
"train": [
|
540 |
+
"4"
|
541 |
+
],
|
542 |
+
"person": [
|
543 |
+
"1",
|
544 |
+
"2",
|
545 |
+
"3"
|
546 |
+
]
|
547 |
+
},
|
548 |
+
"079049275c": {
|
549 |
+
"mouse": [
|
550 |
+
"1"
|
551 |
+
]
|
552 |
+
},
|
553 |
+
"07913cdda7": {
|
554 |
+
"train": [
|
555 |
+
"1"
|
556 |
+
],
|
557 |
+
"person": [
|
558 |
+
"2",
|
559 |
+
"3"
|
560 |
+
]
|
561 |
+
},
|
562 |
+
"07a11a35e8": {
|
563 |
+
"ape": [
|
564 |
+
"1",
|
565 |
+
"2"
|
566 |
+
]
|
567 |
+
},
|
568 |
+
"07ac33b6df": {
|
569 |
+
"ape": [
|
570 |
+
"1"
|
571 |
+
]
|
572 |
+
},
|
573 |
+
"07c62c3d11": {
|
574 |
+
"parrot": [
|
575 |
+
"1",
|
576 |
+
"2",
|
577 |
+
"3"
|
578 |
+
]
|
579 |
+
},
|
580 |
+
"07cc1c7d74": {
|
581 |
+
"snake": [
|
582 |
+
"1"
|
583 |
+
]
|
584 |
+
},
|
585 |
+
"080196ef01": {
|
586 |
+
"lizard": [
|
587 |
+
"1"
|
588 |
+
]
|
589 |
+
},
|
590 |
+
"081207976e": {},
|
591 |
+
"081ae4fa44": {
|
592 |
+
"shark": [
|
593 |
+
"1",
|
594 |
+
"2"
|
595 |
+
]
|
596 |
+
},
|
597 |
+
"081d8250cb": {
|
598 |
+
"sedan": [
|
599 |
+
"3"
|
600 |
+
],
|
601 |
+
"person": [
|
602 |
+
"1"
|
603 |
+
]
|
604 |
+
},
|
605 |
+
"082900c5d4": {
|
606 |
+
"duck": [
|
607 |
+
"1",
|
608 |
+
"2",
|
609 |
+
"3"
|
610 |
+
]
|
611 |
+
},
|
612 |
+
"0860df21e2": {},
|
613 |
+
"0866d4c5e3": {
|
614 |
+
"bird": [
|
615 |
+
"1",
|
616 |
+
"2",
|
617 |
+
"3"
|
618 |
+
]
|
619 |
+
},
|
620 |
+
"0891ac2eb6": {
|
621 |
+
"person": [
|
622 |
+
"1",
|
623 |
+
"2",
|
624 |
+
"3"
|
625 |
+
]
|
626 |
+
},
|
627 |
+
"08931bc458": {
|
628 |
+
"person": [
|
629 |
+
"1"
|
630 |
+
]
|
631 |
+
},
|
632 |
+
"08aa2705d5": {
|
633 |
+
"snake": [
|
634 |
+
"1"
|
635 |
+
]
|
636 |
+
},
|
637 |
+
"08c8450db7": {},
|
638 |
+
"08d50b926c": {
|
639 |
+
"turtle": [
|
640 |
+
"1",
|
641 |
+
"2"
|
642 |
+
]
|
643 |
+
},
|
644 |
+
"08e1e4de15": {
|
645 |
+
"monkey": [
|
646 |
+
"1",
|
647 |
+
"2",
|
648 |
+
"3",
|
649 |
+
"4"
|
650 |
+
]
|
651 |
+
},
|
652 |
+
"08e48c1a48": {
|
653 |
+
"cow": [
|
654 |
+
"1"
|
655 |
+
]
|
656 |
+
},
|
657 |
+
"08f561c65e": {
|
658 |
+
"person": [
|
659 |
+
"2"
|
660 |
+
],
|
661 |
+
"giant_panda": [
|
662 |
+
"1"
|
663 |
+
]
|
664 |
+
},
|
665 |
+
"08feb87790": {
|
666 |
+
"sheep": [
|
667 |
+
"1"
|
668 |
+
]
|
669 |
+
},
|
670 |
+
"09049f6fe3": {
|
671 |
+
"mouse": [
|
672 |
+
"1",
|
673 |
+
"2"
|
674 |
+
]
|
675 |
+
},
|
676 |
+
"092e4ff450": {
|
677 |
+
"snake": [
|
678 |
+
"1"
|
679 |
+
]
|
680 |
+
},
|
681 |
+
"09338adea8": {
|
682 |
+
"whale": [
|
683 |
+
"1",
|
684 |
+
"2"
|
685 |
+
]
|
686 |
+
},
|
687 |
+
"093c335ccc": {
|
688 |
+
"person": [
|
689 |
+
"2"
|
690 |
+
]
|
691 |
+
},
|
692 |
+
"0970d28339": {
|
693 |
+
"ape": [
|
694 |
+
"1",
|
695 |
+
"2"
|
696 |
+
]
|
697 |
+
},
|
698 |
+
"0974a213dc": {
|
699 |
+
"giraffe": [
|
700 |
+
"1",
|
701 |
+
"2",
|
702 |
+
"3"
|
703 |
+
]
|
704 |
+
},
|
705 |
+
"097b471ed8": {
|
706 |
+
"cat": [
|
707 |
+
"1",
|
708 |
+
"2"
|
709 |
+
]
|
710 |
+
},
|
711 |
+
"0990941758": {
|
712 |
+
"giant_panda": [
|
713 |
+
"1"
|
714 |
+
]
|
715 |
+
},
|
716 |
+
"09a348f4fa": {
|
717 |
+
"lizard": [
|
718 |
+
"1"
|
719 |
+
]
|
720 |
+
},
|
721 |
+
"09a6841288": {
|
722 |
+
"duck": [
|
723 |
+
"1",
|
724 |
+
"2"
|
725 |
+
]
|
726 |
+
},
|
727 |
+
"09c5bad17b": {
|
728 |
+
"airplane": [
|
729 |
+
"1"
|
730 |
+
]
|
731 |
+
},
|
732 |
+
"09c9ce80c7": {
|
733 |
+
"giant_panda": [
|
734 |
+
"1"
|
735 |
+
]
|
736 |
+
},
|
737 |
+
"09ff54fef4": {
|
738 |
+
"fox": [
|
739 |
+
"1",
|
740 |
+
"2"
|
741 |
+
]
|
742 |
+
},
|
743 |
+
"0a23765d15": {
|
744 |
+
"person": [
|
745 |
+
"1",
|
746 |
+
"2"
|
747 |
+
]
|
748 |
+
},
|
749 |
+
"0a275e7f12": {
|
750 |
+
"elephant": [
|
751 |
+
"1"
|
752 |
+
]
|
753 |
+
},
|
754 |
+
"0a2f2bd294": {
|
755 |
+
"motorbike": [
|
756 |
+
"1"
|
757 |
+
]
|
758 |
+
},
|
759 |
+
"0a7a2514aa": {
|
760 |
+
"cat": [
|
761 |
+
"1"
|
762 |
+
],
|
763 |
+
"lizard": [
|
764 |
+
"2"
|
765 |
+
]
|
766 |
+
},
|
767 |
+
"0a7b27fde9": {
|
768 |
+
"parrot": [
|
769 |
+
"1",
|
770 |
+
"2"
|
771 |
+
]
|
772 |
+
},
|
773 |
+
"0a8c467cc3": {
|
774 |
+
"fish": [
|
775 |
+
"1",
|
776 |
+
"2",
|
777 |
+
"3"
|
778 |
+
]
|
779 |
+
},
|
780 |
+
"0ac8c560ae": {
|
781 |
+
"person": [
|
782 |
+
"2",
|
783 |
+
"3"
|
784 |
+
]
|
785 |
+
},
|
786 |
+
"0b1627e896": {
|
787 |
+
"boat": [
|
788 |
+
"1"
|
789 |
+
]
|
790 |
+
},
|
791 |
+
"0b285c47f6": {
|
792 |
+
"mouse": [
|
793 |
+
"1"
|
794 |
+
]
|
795 |
+
},
|
796 |
+
"0b34ec1d55": {
|
797 |
+
"ape": [
|
798 |
+
"1"
|
799 |
+
]
|
800 |
+
},
|
801 |
+
"0b5b5e8e5a": {
|
802 |
+
"sedan": [
|
803 |
+
"2"
|
804 |
+
],
|
805 |
+
"person": [
|
806 |
+
"1"
|
807 |
+
]
|
808 |
+
},
|
809 |
+
"0b68535614": {
|
810 |
+
"rabbit": [
|
811 |
+
"1"
|
812 |
+
]
|
813 |
+
},
|
814 |
+
"0b6f9105fc": {
|
815 |
+
"rabbit": [
|
816 |
+
"1"
|
817 |
+
]
|
818 |
+
},
|
819 |
+
"0b7dbfa3cb": {
|
820 |
+
"cow": [
|
821 |
+
"1"
|
822 |
+
]
|
823 |
+
},
|
824 |
+
"0b9cea51ca": {
|
825 |
+
"whale": [
|
826 |
+
"1"
|
827 |
+
]
|
828 |
+
},
|
829 |
+
"0b9d012be8": {
|
830 |
+
"camel": [
|
831 |
+
"1"
|
832 |
+
]
|
833 |
+
},
|
834 |
+
"0bcfc4177d": {
|
835 |
+
"truck": [
|
836 |
+
"1"
|
837 |
+
]
|
838 |
+
},
|
839 |
+
"0bd37b23c1": {
|
840 |
+
"motorbike": [
|
841 |
+
"1"
|
842 |
+
]
|
843 |
+
},
|
844 |
+
"0bd864064c": {
|
845 |
+
"eagle": [
|
846 |
+
"1"
|
847 |
+
]
|
848 |
+
},
|
849 |
+
"0c11c6bf7b": {
|
850 |
+
"deer": [
|
851 |
+
"1"
|
852 |
+
]
|
853 |
+
},
|
854 |
+
"0c26bc77ac": {
|
855 |
+
"crocodile": [
|
856 |
+
"1"
|
857 |
+
]
|
858 |
+
},
|
859 |
+
"0c3a04798c": {
|
860 |
+
"fish": [
|
861 |
+
"2"
|
862 |
+
],
|
863 |
+
"duck": [
|
864 |
+
"1"
|
865 |
+
]
|
866 |
+
},
|
867 |
+
"0c44a9d545": {
|
868 |
+
"tiger": [
|
869 |
+
"1"
|
870 |
+
]
|
871 |
+
},
|
872 |
+
"0c817cc390": {
|
873 |
+
"hedgehog": [
|
874 |
+
"1"
|
875 |
+
],
|
876 |
+
"dog": [
|
877 |
+
"2"
|
878 |
+
]
|
879 |
+
},
|
880 |
+
"0ca839ee9a": {
|
881 |
+
"ape": [
|
882 |
+
"1",
|
883 |
+
"2"
|
884 |
+
]
|
885 |
+
},
|
886 |
+
"0cd7ac0ac0": {
|
887 |
+
"rabbit": [
|
888 |
+
"1"
|
889 |
+
]
|
890 |
+
},
|
891 |
+
"0ce06e0121": {
|
892 |
+
"parrot": [
|
893 |
+
"1",
|
894 |
+
"2"
|
895 |
+
]
|
896 |
+
},
|
897 |
+
"0cfe974a89": {
|
898 |
+
"turtle": [
|
899 |
+
"1",
|
900 |
+
"2"
|
901 |
+
]
|
902 |
+
},
|
903 |
+
"0d2fcc0dcd": {
|
904 |
+
"zebra": [
|
905 |
+
"1",
|
906 |
+
"2",
|
907 |
+
"3",
|
908 |
+
"4"
|
909 |
+
]
|
910 |
+
},
|
911 |
+
"0d3aad05d2": {
|
912 |
+
"person": [
|
913 |
+
"1"
|
914 |
+
]
|
915 |
+
},
|
916 |
+
"0d40b015f4": {
|
917 |
+
"person": [
|
918 |
+
"1"
|
919 |
+
]
|
920 |
+
},
|
921 |
+
"0d97fba242": {
|
922 |
+
"dog": [
|
923 |
+
"1"
|
924 |
+
],
|
925 |
+
"person": [
|
926 |
+
"2"
|
927 |
+
]
|
928 |
+
},
|
929 |
+
"0d9cc80d7e": {
|
930 |
+
"person": [
|
931 |
+
"1",
|
932 |
+
"2",
|
933 |
+
"3"
|
934 |
+
]
|
935 |
+
},
|
936 |
+
"0dab85b6d3": {
|
937 |
+
"lizard": [
|
938 |
+
"1",
|
939 |
+
"2"
|
940 |
+
]
|
941 |
+
},
|
942 |
+
"0db5c427a5": {
|
943 |
+
"train": [
|
944 |
+
"1"
|
945 |
+
]
|
946 |
+
},
|
947 |
+
"0dbaf284f1": {
|
948 |
+
"cat": [
|
949 |
+
"1",
|
950 |
+
"2"
|
951 |
+
]
|
952 |
+
},
|
953 |
+
"0de4923598": {},
|
954 |
+
"0df28a9101": {
|
955 |
+
"turtle": [
|
956 |
+
"1",
|
957 |
+
"2",
|
958 |
+
"3"
|
959 |
+
]
|
960 |
+
},
|
961 |
+
"0e04f636c4": {
|
962 |
+
"frog": [
|
963 |
+
"1"
|
964 |
+
]
|
965 |
+
},
|
966 |
+
"0e05f0e232": {
|
967 |
+
"lizard": [
|
968 |
+
"1",
|
969 |
+
"2"
|
970 |
+
]
|
971 |
+
},
|
972 |
+
"0e0930474b": {
|
973 |
+
"sedan": [
|
974 |
+
"1"
|
975 |
+
],
|
976 |
+
"person": [
|
977 |
+
"2",
|
978 |
+
"3"
|
979 |
+
]
|
980 |
+
},
|
981 |
+
"0e27472bea": {
|
982 |
+
"turtle": [
|
983 |
+
"1"
|
984 |
+
]
|
985 |
+
},
|
986 |
+
"0e30020549": {
|
987 |
+
"parrot": [
|
988 |
+
"1"
|
989 |
+
]
|
990 |
+
},
|
991 |
+
"0e621feb6c": {
|
992 |
+
"lizard": [
|
993 |
+
"1",
|
994 |
+
"2"
|
995 |
+
]
|
996 |
+
},
|
997 |
+
"0e803c7d73": {},
|
998 |
+
"0e9ebe4e3c": {
|
999 |
+
"truck": [
|
1000 |
+
"1"
|
1001 |
+
]
|
1002 |
+
},
|
1003 |
+
"0e9f2785ec": {
|
1004 |
+
"person": [
|
1005 |
+
"2"
|
1006 |
+
]
|
1007 |
+
},
|
1008 |
+
"0ea68d418b": {
|
1009 |
+
"airplane": [
|
1010 |
+
"1"
|
1011 |
+
]
|
1012 |
+
},
|
1013 |
+
"0eb403a222": {},
|
1014 |
+
"0ee92053d6": {
|
1015 |
+
"person": [
|
1016 |
+
"1"
|
1017 |
+
]
|
1018 |
+
},
|
1019 |
+
"0eefca067f": {
|
1020 |
+
"giant_panda": [
|
1021 |
+
"1",
|
1022 |
+
"2"
|
1023 |
+
]
|
1024 |
+
},
|
1025 |
+
"0f17fa6fcb": {
|
1026 |
+
"duck": [
|
1027 |
+
"1",
|
1028 |
+
"2",
|
1029 |
+
"3"
|
1030 |
+
]
|
1031 |
+
},
|
1032 |
+
"0f1ac8e9a3": {
|
1033 |
+
"frog": [
|
1034 |
+
"1"
|
1035 |
+
]
|
1036 |
+
},
|
1037 |
+
"0f202e9852": {
|
1038 |
+
"parrot": [
|
1039 |
+
"1"
|
1040 |
+
]
|
1041 |
+
},
|
1042 |
+
"0f2ab8b1ff": {
|
1043 |
+
"dolphin": [
|
1044 |
+
"1",
|
1045 |
+
"2",
|
1046 |
+
"3"
|
1047 |
+
]
|
1048 |
+
},
|
1049 |
+
"0f51a78756": {
|
1050 |
+
"sheep": [
|
1051 |
+
"1"
|
1052 |
+
]
|
1053 |
+
},
|
1054 |
+
"0f5fbe16b0": {
|
1055 |
+
"raccoon": [
|
1056 |
+
"1",
|
1057 |
+
"2"
|
1058 |
+
]
|
1059 |
+
},
|
1060 |
+
"0f6072077b": {
|
1061 |
+
"person": [
|
1062 |
+
"1",
|
1063 |
+
"2",
|
1064 |
+
"3"
|
1065 |
+
]
|
1066 |
+
},
|
1067 |
+
"0f6b69b2f4": {
|
1068 |
+
"rabbit": [
|
1069 |
+
"1"
|
1070 |
+
]
|
1071 |
+
},
|
1072 |
+
"0f6c2163de": {
|
1073 |
+
"snail": [
|
1074 |
+
"1"
|
1075 |
+
]
|
1076 |
+
},
|
1077 |
+
"0f74ec5599": {
|
1078 |
+
"giant_panda": [
|
1079 |
+
"1"
|
1080 |
+
]
|
1081 |
+
},
|
1082 |
+
"0f9683715b": {
|
1083 |
+
"elephant": [
|
1084 |
+
"1"
|
1085 |
+
]
|
1086 |
+
},
|
1087 |
+
"0fa7b59356": {
|
1088 |
+
"duck": [
|
1089 |
+
"1"
|
1090 |
+
]
|
1091 |
+
},
|
1092 |
+
"0fb173695b": {
|
1093 |
+
"person": [
|
1094 |
+
"3"
|
1095 |
+
]
|
1096 |
+
},
|
1097 |
+
"0fc958cde2": {
|
1098 |
+
"owl": [
|
1099 |
+
"1"
|
1100 |
+
]
|
1101 |
+
},
|
1102 |
+
"0fe7b1a621": {
|
1103 |
+
"parrot": [
|
1104 |
+
"1"
|
1105 |
+
]
|
1106 |
+
},
|
1107 |
+
"0ffcdb491c": {
|
1108 |
+
"person": [
|
1109 |
+
"1",
|
1110 |
+
"2",
|
1111 |
+
"3"
|
1112 |
+
]
|
1113 |
+
},
|
1114 |
+
"101caff7d4": {
|
1115 |
+
"giant_panda": [
|
1116 |
+
"1",
|
1117 |
+
"2"
|
1118 |
+
]
|
1119 |
+
},
|
1120 |
+
"1022fe8417": {
|
1121 |
+
"person": [
|
1122 |
+
"1",
|
1123 |
+
"2",
|
1124 |
+
"3"
|
1125 |
+
]
|
1126 |
+
},
|
1127 |
+
"1032e80b37": {
|
1128 |
+
"giraffe": [
|
1129 |
+
"1"
|
1130 |
+
]
|
1131 |
+
},
|
1132 |
+
"103f501680": {
|
1133 |
+
"fish": [
|
1134 |
+
"1"
|
1135 |
+
]
|
1136 |
+
},
|
1137 |
+
"104e64565f": {
|
1138 |
+
"elephant": [
|
1139 |
+
"1"
|
1140 |
+
]
|
1141 |
+
},
|
1142 |
+
"104f1ab997": {
|
1143 |
+
"person": [
|
1144 |
+
"1",
|
1145 |
+
"2",
|
1146 |
+
"3"
|
1147 |
+
]
|
1148 |
+
},
|
1149 |
+
"106242403f": {
|
1150 |
+
"person": [
|
1151 |
+
"1",
|
1152 |
+
"2"
|
1153 |
+
]
|
1154 |
+
},
|
1155 |
+
"10b31f5431": {
|
1156 |
+
"person": [
|
1157 |
+
"1",
|
1158 |
+
"3",
|
1159 |
+
"4"
|
1160 |
+
]
|
1161 |
+
},
|
1162 |
+
"10eced835e": {
|
1163 |
+
"giant_panda": [
|
1164 |
+
"1",
|
1165 |
+
"2"
|
1166 |
+
]
|
1167 |
+
},
|
1168 |
+
"110d26fa3a": {
|
1169 |
+
"shark": [
|
1170 |
+
"1"
|
1171 |
+
]
|
1172 |
+
},
|
1173 |
+
"1122c1d16a": {
|
1174 |
+
"parrot": [
|
1175 |
+
"1",
|
1176 |
+
"2",
|
1177 |
+
"3",
|
1178 |
+
"4",
|
1179 |
+
"5"
|
1180 |
+
],
|
1181 |
+
"person": [
|
1182 |
+
"6"
|
1183 |
+
]
|
1184 |
+
},
|
1185 |
+
"1145b49a5f": {
|
1186 |
+
"rabbit": [
|
1187 |
+
"1"
|
1188 |
+
]
|
1189 |
+
},
|
1190 |
+
"11485838c2": {
|
1191 |
+
"giraffe": [
|
1192 |
+
"1",
|
1193 |
+
"2",
|
1194 |
+
"3"
|
1195 |
+
]
|
1196 |
+
},
|
1197 |
+
"114e7676ec": {
|
1198 |
+
"person": [
|
1199 |
+
"1"
|
1200 |
+
]
|
1201 |
+
},
|
1202 |
+
"1157472b95": {
|
1203 |
+
"parrot": [
|
1204 |
+
"1",
|
1205 |
+
"2"
|
1206 |
+
]
|
1207 |
+
},
|
1208 |
+
"115ee1072c": {
|
1209 |
+
"cow": [
|
1210 |
+
"1"
|
1211 |
+
]
|
1212 |
+
},
|
1213 |
+
"1171141012": {
|
1214 |
+
"turtle": [
|
1215 |
+
"1"
|
1216 |
+
],
|
1217 |
+
"person": [
|
1218 |
+
"2"
|
1219 |
+
]
|
1220 |
+
},
|
1221 |
+
"117757b4b8": {
|
1222 |
+
"snail": [
|
1223 |
+
"1"
|
1224 |
+
]
|
1225 |
+
},
|
1226 |
+
"1178932d2f": {
|
1227 |
+
"motorbike": [
|
1228 |
+
"3"
|
1229 |
+
],
|
1230 |
+
"person": [
|
1231 |
+
"1",
|
1232 |
+
"2"
|
1233 |
+
]
|
1234 |
+
},
|
1235 |
+
"117cc76bda": {
|
1236 |
+
"whale": [
|
1237 |
+
"1"
|
1238 |
+
]
|
1239 |
+
},
|
1240 |
+
"1180cbf814": {
|
1241 |
+
"fish": [
|
1242 |
+
"1",
|
1243 |
+
"2"
|
1244 |
+
]
|
1245 |
+
},
|
1246 |
+
"1187bbd0e3": {
|
1247 |
+
"cat": [
|
1248 |
+
"1"
|
1249 |
+
]
|
1250 |
+
},
|
1251 |
+
"1197e44b26": {
|
1252 |
+
"giant_panda": [
|
1253 |
+
"1"
|
1254 |
+
]
|
1255 |
+
},
|
1256 |
+
"119cf20728": {
|
1257 |
+
"lizard": [
|
1258 |
+
"1"
|
1259 |
+
]
|
1260 |
+
},
|
1261 |
+
"119dd54871": {
|
1262 |
+
"lion": [
|
1263 |
+
"1",
|
1264 |
+
"2"
|
1265 |
+
]
|
1266 |
+
},
|
1267 |
+
"11a0c3b724": {
|
1268 |
+
"mouse": [
|
1269 |
+
"1",
|
1270 |
+
"2"
|
1271 |
+
]
|
1272 |
+
},
|
1273 |
+
"11a6ba8c94": {
|
1274 |
+
"person": [
|
1275 |
+
"1",
|
1276 |
+
"2"
|
1277 |
+
]
|
1278 |
+
},
|
1279 |
+
"11c722a456": {
|
1280 |
+
"turtle": [
|
1281 |
+
"1",
|
1282 |
+
"2"
|
1283 |
+
]
|
1284 |
+
},
|
1285 |
+
"11cbcb0b4d": {
|
1286 |
+
"zebra": [
|
1287 |
+
"1"
|
1288 |
+
]
|
1289 |
+
},
|
1290 |
+
"11ccf5e99d": {
|
1291 |
+
"person": [
|
1292 |
+
"2"
|
1293 |
+
]
|
1294 |
+
},
|
1295 |
+
"11ce6f452e": {
|
1296 |
+
"person": [
|
1297 |
+
"1",
|
1298 |
+
"2",
|
1299 |
+
"3"
|
1300 |
+
]
|
1301 |
+
},
|
1302 |
+
"11feabe596": {
|
1303 |
+
"rabbit": [
|
1304 |
+
"1"
|
1305 |
+
]
|
1306 |
+
},
|
1307 |
+
"120cb9514d": {
|
1308 |
+
"person": [
|
1309 |
+
"1",
|
1310 |
+
"2",
|
1311 |
+
"3"
|
1312 |
+
]
|
1313 |
+
},
|
1314 |
+
"12156b25b3": {
|
1315 |
+
"person": [
|
1316 |
+
"1"
|
1317 |
+
]
|
1318 |
+
},
|
1319 |
+
"122896672d": {
|
1320 |
+
"person": [
|
1321 |
+
"1",
|
1322 |
+
"3"
|
1323 |
+
]
|
1324 |
+
},
|
1325 |
+
"1233ac8596": {
|
1326 |
+
"dog": [
|
1327 |
+
"1"
|
1328 |
+
]
|
1329 |
+
},
|
1330 |
+
"1239c87234": {
|
1331 |
+
"lizard": [
|
1332 |
+
"1"
|
1333 |
+
]
|
1334 |
+
},
|
1335 |
+
"1250423f7c": {
|
1336 |
+
"elephant": [
|
1337 |
+
"3",
|
1338 |
+
"4"
|
1339 |
+
],
|
1340 |
+
"person": [
|
1341 |
+
"2"
|
1342 |
+
]
|
1343 |
+
},
|
1344 |
+
"1257a1bc67": {
|
1345 |
+
"snake": [
|
1346 |
+
"1"
|
1347 |
+
]
|
1348 |
+
},
|
1349 |
+
"125d1b19dd": {
|
1350 |
+
"giant_panda": [
|
1351 |
+
"1",
|
1352 |
+
"2"
|
1353 |
+
]
|
1354 |
+
},
|
1355 |
+
"126d203967": {
|
1356 |
+
"person": [
|
1357 |
+
"2"
|
1358 |
+
]
|
1359 |
+
},
|
1360 |
+
"1295e19071": {
|
1361 |
+
"airplane": [
|
1362 |
+
"1"
|
1363 |
+
]
|
1364 |
+
},
|
1365 |
+
"12ad198c54": {
|
1366 |
+
"person": [
|
1367 |
+
"1"
|
1368 |
+
]
|
1369 |
+
},
|
1370 |
+
"12bddb2bcb": {
|
1371 |
+
"person": [
|
1372 |
+
"2"
|
1373 |
+
]
|
1374 |
+
},
|
1375 |
+
"12ec9b93ee": {
|
1376 |
+
"giant_panda": [
|
1377 |
+
"1"
|
1378 |
+
]
|
1379 |
+
},
|
1380 |
+
"12eebedc35": {
|
1381 |
+
"bird": [
|
1382 |
+
"1"
|
1383 |
+
]
|
1384 |
+
},
|
1385 |
+
"132852e094": {
|
1386 |
+
"fox": [
|
1387 |
+
"1"
|
1388 |
+
]
|
1389 |
+
},
|
1390 |
+
"1329409f2a": {
|
1391 |
+
"fish": [
|
1392 |
+
"1"
|
1393 |
+
]
|
1394 |
+
},
|
1395 |
+
"13325cfa14": {
|
1396 |
+
"person": [
|
1397 |
+
"2"
|
1398 |
+
]
|
1399 |
+
},
|
1400 |
+
"1336440745": {
|
1401 |
+
"mouse": [
|
1402 |
+
"1",
|
1403 |
+
"2"
|
1404 |
+
]
|
1405 |
+
},
|
1406 |
+
"134d06dbf9": {
|
1407 |
+
"cat": [
|
1408 |
+
"1"
|
1409 |
+
]
|
1410 |
+
},
|
1411 |
+
"135625b53d": {
|
1412 |
+
"parrot": [
|
1413 |
+
"1"
|
1414 |
+
]
|
1415 |
+
},
|
1416 |
+
"13870016f9": {
|
1417 |
+
"cow": [
|
1418 |
+
"2",
|
1419 |
+
"3"
|
1420 |
+
],
|
1421 |
+
"person": [
|
1422 |
+
"1"
|
1423 |
+
]
|
1424 |
+
},
|
1425 |
+
"13960b3c84": {
|
1426 |
+
"giraffe": [
|
1427 |
+
"1",
|
1428 |
+
"2",
|
1429 |
+
"3"
|
1430 |
+
]
|
1431 |
+
},
|
1432 |
+
"13adaad9d9": {
|
1433 |
+
"giant_panda": [
|
1434 |
+
"1"
|
1435 |
+
]
|
1436 |
+
},
|
1437 |
+
"13ae097e20": {
|
1438 |
+
"giant_panda": [
|
1439 |
+
"1"
|
1440 |
+
]
|
1441 |
+
},
|
1442 |
+
"13e3070469": {
|
1443 |
+
"zebra": [
|
1444 |
+
"1",
|
1445 |
+
"2",
|
1446 |
+
"3"
|
1447 |
+
]
|
1448 |
+
},
|
1449 |
+
"13f6a8c20d": {
|
1450 |
+
"fish": [
|
1451 |
+
"1"
|
1452 |
+
]
|
1453 |
+
},
|
1454 |
+
"1416925cf2": {
|
1455 |
+
"truck": [
|
1456 |
+
"1",
|
1457 |
+
"2"
|
1458 |
+
]
|
1459 |
+
},
|
1460 |
+
"142d2621f5": {
|
1461 |
+
"motorbike": [
|
1462 |
+
"3"
|
1463 |
+
],
|
1464 |
+
"person": [
|
1465 |
+
"1",
|
1466 |
+
"2"
|
1467 |
+
]
|
1468 |
+
},
|
1469 |
+
"145d5d7c03": {
|
1470 |
+
"giant_panda": [
|
1471 |
+
"1"
|
1472 |
+
]
|
1473 |
+
},
|
1474 |
+
"145fdc3ac5": {
|
1475 |
+
"lizard": [
|
1476 |
+
"1"
|
1477 |
+
]
|
1478 |
+
},
|
1479 |
+
"1471274fa7": {
|
1480 |
+
"person": [
|
1481 |
+
"1"
|
1482 |
+
]
|
1483 |
+
},
|
1484 |
+
"14a6b5a139": {
|
1485 |
+
"fish": [
|
1486 |
+
"1"
|
1487 |
+
]
|
1488 |
+
},
|
1489 |
+
"14c21cea0d": {
|
1490 |
+
"monkey": [
|
1491 |
+
"1",
|
1492 |
+
"2"
|
1493 |
+
]
|
1494 |
+
},
|
1495 |
+
"14dae0dc93": {
|
1496 |
+
"person": [
|
1497 |
+
"2"
|
1498 |
+
]
|
1499 |
+
},
|
1500 |
+
"14f9bd22b5": {
|
1501 |
+
"tiger": [
|
1502 |
+
"1"
|
1503 |
+
]
|
1504 |
+
},
|
1505 |
+
"14fd28ae99": {
|
1506 |
+
"parrot": [
|
1507 |
+
"1"
|
1508 |
+
]
|
1509 |
+
},
|
1510 |
+
"15097d5d4e": {
|
1511 |
+
"parrot": [
|
1512 |
+
"1"
|
1513 |
+
]
|
1514 |
+
},
|
1515 |
+
"150ea711f2": {
|
1516 |
+
"whale": [
|
1517 |
+
"1"
|
1518 |
+
]
|
1519 |
+
},
|
1520 |
+
"1514e3563f": {
|
1521 |
+
"earless_seal": [
|
1522 |
+
"1",
|
1523 |
+
"2"
|
1524 |
+
]
|
1525 |
+
},
|
1526 |
+
"152aaa3a9e": {
|
1527 |
+
"raccoon": [
|
1528 |
+
"1"
|
1529 |
+
]
|
1530 |
+
},
|
1531 |
+
"152b7d3bd7": {
|
1532 |
+
"giant_panda": [
|
1533 |
+
"1"
|
1534 |
+
]
|
1535 |
+
},
|
1536 |
+
"15617297cc": {
|
1537 |
+
"person": [
|
1538 |
+
"1"
|
1539 |
+
]
|
1540 |
+
},
|
1541 |
+
"15abbe0c52": {
|
1542 |
+
"person": [
|
1543 |
+
"1"
|
1544 |
+
]
|
1545 |
+
},
|
1546 |
+
"15d1fb3de5": {
|
1547 |
+
"owl": [
|
1548 |
+
"1"
|
1549 |
+
],
|
1550 |
+
"cat": [
|
1551 |
+
"2"
|
1552 |
+
]
|
1553 |
+
},
|
1554 |
+
"15f67b0fab": {
|
1555 |
+
"person": [
|
1556 |
+
"1"
|
1557 |
+
]
|
1558 |
+
},
|
1559 |
+
"161eb59aad": {
|
1560 |
+
"giraffe": [
|
1561 |
+
"1"
|
1562 |
+
],
|
1563 |
+
"cow": [
|
1564 |
+
"2",
|
1565 |
+
"3"
|
1566 |
+
]
|
1567 |
+
},
|
1568 |
+
"16288ea47f": {
|
1569 |
+
"duck": [
|
1570 |
+
"1",
|
1571 |
+
"2"
|
1572 |
+
]
|
1573 |
+
},
|
1574 |
+
"164410ce62": {
|
1575 |
+
"person": [
|
1576 |
+
"1"
|
1577 |
+
]
|
1578 |
+
},
|
1579 |
+
"165c3c8cd4": {
|
1580 |
+
"person": [
|
1581 |
+
"1",
|
1582 |
+
"2",
|
1583 |
+
"3"
|
1584 |
+
]
|
1585 |
+
},
|
1586 |
+
"165c42b41b": {
|
1587 |
+
"motorbike": [
|
1588 |
+
"2",
|
1589 |
+
"3"
|
1590 |
+
],
|
1591 |
+
"person": [
|
1592 |
+
"1",
|
1593 |
+
"4"
|
1594 |
+
]
|
1595 |
+
},
|
1596 |
+
"165ec9e22b": {
|
1597 |
+
"person": [
|
1598 |
+
"1",
|
1599 |
+
"2"
|
1600 |
+
]
|
1601 |
+
},
|
1602 |
+
"1669502269": {
|
1603 |
+
"person": [
|
1604 |
+
"1"
|
1605 |
+
]
|
1606 |
+
},
|
1607 |
+
"16763cccbb": {
|
1608 |
+
"ape": [
|
1609 |
+
"1"
|
1610 |
+
]
|
1611 |
+
},
|
1612 |
+
"16adde065e": {
|
1613 |
+
"cat": [
|
1614 |
+
"2"
|
1615 |
+
],
|
1616 |
+
"person": [
|
1617 |
+
"3"
|
1618 |
+
]
|
1619 |
+
},
|
1620 |
+
"16af445362": {
|
1621 |
+
"airplane": [
|
1622 |
+
"1"
|
1623 |
+
]
|
1624 |
+
},
|
1625 |
+
"16afd538ad": {
|
1626 |
+
"parrot": [
|
1627 |
+
"1",
|
1628 |
+
"2"
|
1629 |
+
]
|
1630 |
+
},
|
1631 |
+
"16c3fa4d5d": {
|
1632 |
+
"sedan": [
|
1633 |
+
"1"
|
1634 |
+
]
|
1635 |
+
},
|
1636 |
+
"16d1d65c27": {
|
1637 |
+
"monkey": [
|
1638 |
+
"1"
|
1639 |
+
]
|
1640 |
+
},
|
1641 |
+
"16e8599e94": {
|
1642 |
+
"giant_panda": [
|
1643 |
+
"1"
|
1644 |
+
]
|
1645 |
+
},
|
1646 |
+
"16fe9fb444": {
|
1647 |
+
"motorbike": [
|
1648 |
+
"1"
|
1649 |
+
],
|
1650 |
+
"person": [
|
1651 |
+
"2"
|
1652 |
+
]
|
1653 |
+
},
|
1654 |
+
"1705796b02": {
|
1655 |
+
"train": [
|
1656 |
+
"1"
|
1657 |
+
]
|
1658 |
+
},
|
1659 |
+
"1724db7671": {
|
1660 |
+
"giant_panda": [
|
1661 |
+
"1"
|
1662 |
+
]
|
1663 |
+
},
|
1664 |
+
"17418e81ea": {
|
1665 |
+
"shark": [
|
1666 |
+
"1"
|
1667 |
+
]
|
1668 |
+
},
|
1669 |
+
"175169edbb": {
|
1670 |
+
"ape": [
|
1671 |
+
"1",
|
1672 |
+
"2"
|
1673 |
+
]
|
1674 |
+
},
|
1675 |
+
"17622326fd": {
|
1676 |
+
"lizard": [
|
1677 |
+
"1"
|
1678 |
+
]
|
1679 |
+
},
|
1680 |
+
"17656bae77": {
|
1681 |
+
"elephant": [
|
1682 |
+
"1"
|
1683 |
+
]
|
1684 |
+
},
|
1685 |
+
"17b0d94172": {
|
1686 |
+
"airplane": [
|
1687 |
+
"1"
|
1688 |
+
]
|
1689 |
+
},
|
1690 |
+
"17c220e4f6": {
|
1691 |
+
"giant_panda": [
|
1692 |
+
"1"
|
1693 |
+
]
|
1694 |
+
},
|
1695 |
+
"17c7bcd146": {
|
1696 |
+
"train": [
|
1697 |
+
"1"
|
1698 |
+
]
|
1699 |
+
},
|
1700 |
+
"17cb4afe89": {
|
1701 |
+
"tiger": [
|
1702 |
+
"1"
|
1703 |
+
]
|
1704 |
+
},
|
1705 |
+
"17cd79a434": {
|
1706 |
+
"squirrel": [
|
1707 |
+
"1"
|
1708 |
+
]
|
1709 |
+
},
|
1710 |
+
"17d18604c3": {
|
1711 |
+
"person": [
|
1712 |
+
"1",
|
1713 |
+
"2"
|
1714 |
+
]
|
1715 |
+
},
|
1716 |
+
"17d8ca1a37": {
|
1717 |
+
"owl": [
|
1718 |
+
"1"
|
1719 |
+
],
|
1720 |
+
"person": [
|
1721 |
+
"2"
|
1722 |
+
]
|
1723 |
+
},
|
1724 |
+
"17e33f4330": {
|
1725 |
+
"monkey": [
|
1726 |
+
"1"
|
1727 |
+
]
|
1728 |
+
},
|
1729 |
+
"17f7a6d805": {
|
1730 |
+
"snail": [
|
1731 |
+
"1"
|
1732 |
+
]
|
1733 |
+
},
|
1734 |
+
"180abc8378": {
|
1735 |
+
"owl": [
|
1736 |
+
"1"
|
1737 |
+
],
|
1738 |
+
"person": [
|
1739 |
+
"2"
|
1740 |
+
]
|
1741 |
+
},
|
1742 |
+
"183ba3d652": {
|
1743 |
+
"motorbike": [
|
1744 |
+
"3"
|
1745 |
+
],
|
1746 |
+
"person": [
|
1747 |
+
"2"
|
1748 |
+
]
|
1749 |
+
},
|
1750 |
+
"185bf64702": {
|
1751 |
+
"zebra": [
|
1752 |
+
"1",
|
1753 |
+
"2"
|
1754 |
+
]
|
1755 |
+
},
|
1756 |
+
"18913cc690": {
|
1757 |
+
"train": [
|
1758 |
+
"1"
|
1759 |
+
]
|
1760 |
+
},
|
1761 |
+
"1892651815": {
|
1762 |
+
"camel": [
|
1763 |
+
"1"
|
1764 |
+
]
|
1765 |
+
},
|
1766 |
+
"189ac8208a": {
|
1767 |
+
"giraffe": [
|
1768 |
+
"1",
|
1769 |
+
"2"
|
1770 |
+
]
|
1771 |
+
},
|
1772 |
+
"189b44e92c": {
|
1773 |
+
"zebra": [
|
1774 |
+
"1"
|
1775 |
+
]
|
1776 |
+
},
|
1777 |
+
"18ac264b76": {
|
1778 |
+
"person": [
|
1779 |
+
"2"
|
1780 |
+
]
|
1781 |
+
},
|
1782 |
+
"18b245ab49": {
|
1783 |
+
"penguin": [
|
1784 |
+
"1",
|
1785 |
+
"2",
|
1786 |
+
"3",
|
1787 |
+
"4"
|
1788 |
+
]
|
1789 |
+
},
|
1790 |
+
"18b5cebc34": {
|
1791 |
+
"mouse": [
|
1792 |
+
"1"
|
1793 |
+
]
|
1794 |
+
},
|
1795 |
+
"18bad52083": {
|
1796 |
+
"parrot": [
|
1797 |
+
"1",
|
1798 |
+
"2"
|
1799 |
+
]
|
1800 |
+
},
|
1801 |
+
"18bb5144d5": {
|
1802 |
+
"lizard": [
|
1803 |
+
"1"
|
1804 |
+
]
|
1805 |
+
},
|
1806 |
+
"18c6f205c5": {
|
1807 |
+
"person": [
|
1808 |
+
"1",
|
1809 |
+
"2",
|
1810 |
+
"3"
|
1811 |
+
]
|
1812 |
+
},
|
1813 |
+
"1903f9ea15": {
|
1814 |
+
"bird": [
|
1815 |
+
"1",
|
1816 |
+
"2",
|
1817 |
+
"3"
|
1818 |
+
]
|
1819 |
+
},
|
1820 |
+
"1917b209f2": {
|
1821 |
+
"cow": [
|
1822 |
+
"3",
|
1823 |
+
"4"
|
1824 |
+
],
|
1825 |
+
"horse": [
|
1826 |
+
"2"
|
1827 |
+
],
|
1828 |
+
"person": [
|
1829 |
+
"1"
|
1830 |
+
]
|
1831 |
+
},
|
1832 |
+
"191e74c01d": {
|
1833 |
+
"deer": [
|
1834 |
+
"1"
|
1835 |
+
]
|
1836 |
+
},
|
1837 |
+
"19367bb94e": {
|
1838 |
+
"fish": [
|
1839 |
+
"1",
|
1840 |
+
"2",
|
1841 |
+
"3"
|
1842 |
+
]
|
1843 |
+
},
|
1844 |
+
"193ffaa217": {
|
1845 |
+
"person": [
|
1846 |
+
"1",
|
1847 |
+
"2",
|
1848 |
+
"3"
|
1849 |
+
]
|
1850 |
+
},
|
1851 |
+
"19696b67d3": {
|
1852 |
+
"cow": [
|
1853 |
+
"1"
|
1854 |
+
]
|
1855 |
+
},
|
1856 |
+
"197f3ab6f3": {
|
1857 |
+
"giant_panda": [
|
1858 |
+
"1"
|
1859 |
+
]
|
1860 |
+
},
|
1861 |
+
"1981e763cc": {
|
1862 |
+
"sheep": [
|
1863 |
+
"1",
|
1864 |
+
"2"
|
1865 |
+
]
|
1866 |
+
},
|
1867 |
+
"198afe39ae": {
|
1868 |
+
"person": [
|
1869 |
+
"1"
|
1870 |
+
]
|
1871 |
+
},
|
1872 |
+
"19a6e62b9b": {
|
1873 |
+
"monkey": [
|
1874 |
+
"1",
|
1875 |
+
"2"
|
1876 |
+
]
|
1877 |
+
},
|
1878 |
+
"19b60d5335": {
|
1879 |
+
"hedgehog": [
|
1880 |
+
"1"
|
1881 |
+
]
|
1882 |
+
},
|
1883 |
+
"19c00c11f9": {
|
1884 |
+
"person": [
|
1885 |
+
"1"
|
1886 |
+
]
|
1887 |
+
},
|
1888 |
+
"19e061eb88": {
|
1889 |
+
"boat": [
|
1890 |
+
"1",
|
1891 |
+
"2"
|
1892 |
+
]
|
1893 |
+
},
|
1894 |
+
"19e8bc6178": {
|
1895 |
+
"dog": [
|
1896 |
+
"1"
|
1897 |
+
]
|
1898 |
+
},
|
1899 |
+
"19ee80dac6": {
|
1900 |
+
"person": [
|
1901 |
+
"1",
|
1902 |
+
"3",
|
1903 |
+
"4"
|
1904 |
+
]
|
1905 |
+
},
|
1906 |
+
"1a25a9170a": {
|
1907 |
+
"person": [
|
1908 |
+
"2",
|
1909 |
+
"3"
|
1910 |
+
],
|
1911 |
+
"cow": [
|
1912 |
+
"1"
|
1913 |
+
]
|
1914 |
+
},
|
1915 |
+
"1a359a6c1a": {
|
1916 |
+
"sheep": [
|
1917 |
+
"1"
|
1918 |
+
]
|
1919 |
+
},
|
1920 |
+
"1a3e87c566": {
|
1921 |
+
"frog": [
|
1922 |
+
"1"
|
1923 |
+
]
|
1924 |
+
},
|
1925 |
+
"1a5fe06b00": {
|
1926 |
+
"bus": [
|
1927 |
+
"1"
|
1928 |
+
]
|
1929 |
+
},
|
1930 |
+
"1a6c0fbd1e": {
|
1931 |
+
"person": [
|
1932 |
+
"1"
|
1933 |
+
]
|
1934 |
+
},
|
1935 |
+
"1a6f3b5a4b": {
|
1936 |
+
"sedan": [
|
1937 |
+
"3"
|
1938 |
+
]
|
1939 |
+
},
|
1940 |
+
"1a8afbad92": {
|
1941 |
+
"zebra": [
|
1942 |
+
"1",
|
1943 |
+
"2",
|
1944 |
+
"3"
|
1945 |
+
]
|
1946 |
+
},
|
1947 |
+
"1a8bdc5842": {
|
1948 |
+
"parrot": [
|
1949 |
+
"1",
|
1950 |
+
"2"
|
1951 |
+
]
|
1952 |
+
},
|
1953 |
+
"1a95752aca": {
|
1954 |
+
"duck": [
|
1955 |
+
"1",
|
1956 |
+
"2"
|
1957 |
+
]
|
1958 |
+
},
|
1959 |
+
"1a9c131cb7": {
|
1960 |
+
"ape": [
|
1961 |
+
"1",
|
1962 |
+
"2",
|
1963 |
+
"3"
|
1964 |
+
]
|
1965 |
+
},
|
1966 |
+
"1aa3da3ee3": {
|
1967 |
+
"sheep": [
|
1968 |
+
"1",
|
1969 |
+
"2",
|
1970 |
+
"3",
|
1971 |
+
"4"
|
1972 |
+
]
|
1973 |
+
},
|
1974 |
+
"1ab27ec7ea": {
|
1975 |
+
"deer": [
|
1976 |
+
"1"
|
1977 |
+
]
|
1978 |
+
},
|
1979 |
+
"1abf16d21d": {
|
1980 |
+
"turtle": [
|
1981 |
+
"1"
|
1982 |
+
]
|
1983 |
+
},
|
1984 |
+
"1acd0f993b": {
|
1985 |
+
"dog": [
|
1986 |
+
"1"
|
1987 |
+
],
|
1988 |
+
"person": [
|
1989 |
+
"3"
|
1990 |
+
]
|
1991 |
+
},
|
1992 |
+
"1ad202e499": {
|
1993 |
+
"lizard": [
|
1994 |
+
"1",
|
1995 |
+
"2"
|
1996 |
+
]
|
1997 |
+
},
|
1998 |
+
"1af8d2395d": {
|
1999 |
+
"airplane": [
|
2000 |
+
"4"
|
2001 |
+
],
|
2002 |
+
"person": [
|
2003 |
+
"1",
|
2004 |
+
"2"
|
2005 |
+
]
|
2006 |
+
},
|
2007 |
+
"1afd39a1fa": {
|
2008 |
+
"motorbike": [
|
2009 |
+
"2"
|
2010 |
+
]
|
2011 |
+
},
|
2012 |
+
"1b2d31306f": {
|
2013 |
+
"lizard": [
|
2014 |
+
"1"
|
2015 |
+
]
|
2016 |
+
},
|
2017 |
+
"1b3fa67f0e": {
|
2018 |
+
"airplane": [
|
2019 |
+
"1"
|
2020 |
+
]
|
2021 |
+
},
|
2022 |
+
"1b43fa74b4": {
|
2023 |
+
"owl": [
|
2024 |
+
"1",
|
2025 |
+
"2"
|
2026 |
+
]
|
2027 |
+
},
|
2028 |
+
"1b73ea9fc2": {
|
2029 |
+
"parrot": [
|
2030 |
+
"1"
|
2031 |
+
]
|
2032 |
+
},
|
2033 |
+
"1b7e8bb255": {
|
2034 |
+
"person": [
|
2035 |
+
"2"
|
2036 |
+
]
|
2037 |
+
},
|
2038 |
+
"1b8680f8cd": {
|
2039 |
+
"person": [
|
2040 |
+
"2",
|
2041 |
+
"3"
|
2042 |
+
]
|
2043 |
+
},
|
2044 |
+
"1b883843c0": {
|
2045 |
+
"person": [
|
2046 |
+
"1",
|
2047 |
+
"2"
|
2048 |
+
]
|
2049 |
+
},
|
2050 |
+
"1b8898785b": {
|
2051 |
+
"monkey": [
|
2052 |
+
"1",
|
2053 |
+
"2"
|
2054 |
+
]
|
2055 |
+
},
|
2056 |
+
"1b88ba1aa4": {
|
2057 |
+
"giant_panda": [
|
2058 |
+
"1"
|
2059 |
+
]
|
2060 |
+
},
|
2061 |
+
"1b96a498e5": {
|
2062 |
+
"ape": [
|
2063 |
+
"1"
|
2064 |
+
]
|
2065 |
+
},
|
2066 |
+
"1bbc4c274f": {
|
2067 |
+
"fish": [
|
2068 |
+
"2"
|
2069 |
+
]
|
2070 |
+
},
|
2071 |
+
"1bd87fe9ab": {
|
2072 |
+
"train": [
|
2073 |
+
"1"
|
2074 |
+
]
|
2075 |
+
},
|
2076 |
+
"1c4090c75b": {
|
2077 |
+
"whale": [
|
2078 |
+
"1"
|
2079 |
+
]
|
2080 |
+
},
|
2081 |
+
"1c41934f84": {
|
2082 |
+
"elephant": [
|
2083 |
+
"1",
|
2084 |
+
"2"
|
2085 |
+
]
|
2086 |
+
},
|
2087 |
+
"1c72b04b56": {
|
2088 |
+
"lion": [
|
2089 |
+
"1"
|
2090 |
+
]
|
2091 |
+
},
|
2092 |
+
"1c87955a3a": {
|
2093 |
+
"crocodile": [
|
2094 |
+
"1"
|
2095 |
+
],
|
2096 |
+
"turtle": [
|
2097 |
+
"2"
|
2098 |
+
]
|
2099 |
+
},
|
2100 |
+
"1c9f9eb792": {
|
2101 |
+
"person": [
|
2102 |
+
"2"
|
2103 |
+
]
|
2104 |
+
},
|
2105 |
+
"1ca240fede": {
|
2106 |
+
"train": [
|
2107 |
+
"1"
|
2108 |
+
]
|
2109 |
+
},
|
2110 |
+
"1ca5673803": {
|
2111 |
+
"person": [
|
2112 |
+
"1",
|
2113 |
+
"3"
|
2114 |
+
]
|
2115 |
+
},
|
2116 |
+
"1cada35274": {
|
2117 |
+
"duck": [
|
2118 |
+
"1"
|
2119 |
+
]
|
2120 |
+
},
|
2121 |
+
"1cb44b920d": {
|
2122 |
+
"eagle": [
|
2123 |
+
"1",
|
2124 |
+
"2"
|
2125 |
+
]
|
2126 |
+
},
|
2127 |
+
"1cd10e62be": {
|
2128 |
+
"leopard": [
|
2129 |
+
"1"
|
2130 |
+
]
|
2131 |
+
},
|
2132 |
+
"1d3087d5e5": {
|
2133 |
+
"fish": [
|
2134 |
+
"1",
|
2135 |
+
"2",
|
2136 |
+
"3",
|
2137 |
+
"4",
|
2138 |
+
"5"
|
2139 |
+
]
|
2140 |
+
},
|
2141 |
+
"1d3685150a": {
|
2142 |
+
"person": [
|
2143 |
+
"1",
|
2144 |
+
"3"
|
2145 |
+
]
|
2146 |
+
},
|
2147 |
+
"1d6ff083aa": {
|
2148 |
+
"person": [
|
2149 |
+
"1",
|
2150 |
+
"2"
|
2151 |
+
]
|
2152 |
+
}
|
2153 |
+
}
|
mbench/numbered_valid_obj_ids_gpt-4o_randcap.json
ADDED
@@ -0,0 +1,2153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"003234408d": {
|
3 |
+
"penguin": [
|
4 |
+
"1",
|
5 |
+
"2",
|
6 |
+
"3",
|
7 |
+
"4",
|
8 |
+
"5"
|
9 |
+
]
|
10 |
+
},
|
11 |
+
"0043f083b5": {
|
12 |
+
"bus": [
|
13 |
+
"1"
|
14 |
+
],
|
15 |
+
"sedan": [
|
16 |
+
"2",
|
17 |
+
"3"
|
18 |
+
]
|
19 |
+
},
|
20 |
+
"0044fa5fba": {
|
21 |
+
"giant_panda": [
|
22 |
+
"1"
|
23 |
+
]
|
24 |
+
},
|
25 |
+
"005a527edd": {
|
26 |
+
"ape": [
|
27 |
+
"1",
|
28 |
+
"2"
|
29 |
+
]
|
30 |
+
},
|
31 |
+
"0065b171f9": {
|
32 |
+
"giant_panda": [
|
33 |
+
"1"
|
34 |
+
]
|
35 |
+
},
|
36 |
+
"00917dcfc4": {
|
37 |
+
"zebra": [
|
38 |
+
"1",
|
39 |
+
"2",
|
40 |
+
"3"
|
41 |
+
]
|
42 |
+
},
|
43 |
+
"00a23ccf53": {
|
44 |
+
"shark": [
|
45 |
+
"1"
|
46 |
+
]
|
47 |
+
},
|
48 |
+
"00ad5016a4": {
|
49 |
+
"airplane": [
|
50 |
+
"1"
|
51 |
+
]
|
52 |
+
},
|
53 |
+
"01082ae388": {
|
54 |
+
"leopard": [
|
55 |
+
"1"
|
56 |
+
]
|
57 |
+
},
|
58 |
+
"011ac0a06f": {
|
59 |
+
"ape": [
|
60 |
+
"1",
|
61 |
+
"2",
|
62 |
+
"3",
|
63 |
+
"4",
|
64 |
+
"5"
|
65 |
+
]
|
66 |
+
},
|
67 |
+
"013099c098": {
|
68 |
+
"giant_panda": [
|
69 |
+
"1",
|
70 |
+
"2"
|
71 |
+
]
|
72 |
+
},
|
73 |
+
"0155498c85": {
|
74 |
+
"motorbike": [
|
75 |
+
"2"
|
76 |
+
],
|
77 |
+
"person": [
|
78 |
+
"1"
|
79 |
+
]
|
80 |
+
},
|
81 |
+
"01694ad9c8": {
|
82 |
+
"bird": [
|
83 |
+
"1"
|
84 |
+
]
|
85 |
+
},
|
86 |
+
"017ac35701": {
|
87 |
+
"giant_panda": [
|
88 |
+
"1"
|
89 |
+
]
|
90 |
+
},
|
91 |
+
"01b80e8e1a": {
|
92 |
+
"zebra": [
|
93 |
+
"1",
|
94 |
+
"2"
|
95 |
+
]
|
96 |
+
},
|
97 |
+
"01baa5a4e1": {},
|
98 |
+
"01c3111683": {
|
99 |
+
"whale": [
|
100 |
+
"1"
|
101 |
+
]
|
102 |
+
},
|
103 |
+
"01c4cb5ffe": {
|
104 |
+
"person": [
|
105 |
+
"1",
|
106 |
+
"3"
|
107 |
+
]
|
108 |
+
},
|
109 |
+
"01c76f0a82": {
|
110 |
+
"sedan": [
|
111 |
+
"1",
|
112 |
+
"4"
|
113 |
+
]
|
114 |
+
},
|
115 |
+
"01c783268c": {
|
116 |
+
"person": [
|
117 |
+
"2"
|
118 |
+
],
|
119 |
+
"ape": [
|
120 |
+
"1"
|
121 |
+
]
|
122 |
+
},
|
123 |
+
"01e64dd36a": {
|
124 |
+
"cow": [
|
125 |
+
"1",
|
126 |
+
"2",
|
127 |
+
"3"
|
128 |
+
]
|
129 |
+
},
|
130 |
+
"01ed275c6e": {
|
131 |
+
"giraffe": [
|
132 |
+
"1",
|
133 |
+
"2"
|
134 |
+
]
|
135 |
+
},
|
136 |
+
"01ff60d1fa": {
|
137 |
+
"lizard": [
|
138 |
+
"1"
|
139 |
+
]
|
140 |
+
},
|
141 |
+
"020cd28cd2": {
|
142 |
+
"person": [
|
143 |
+
"1"
|
144 |
+
]
|
145 |
+
},
|
146 |
+
"02264db755": {
|
147 |
+
"fox": [
|
148 |
+
"1"
|
149 |
+
]
|
150 |
+
},
|
151 |
+
"0248626d9a": {
|
152 |
+
"train": [
|
153 |
+
"1"
|
154 |
+
]
|
155 |
+
},
|
156 |
+
"02668dbffa": {
|
157 |
+
"frog": [
|
158 |
+
"1"
|
159 |
+
]
|
160 |
+
},
|
161 |
+
"0274193026": {
|
162 |
+
"person": [
|
163 |
+
"2"
|
164 |
+
]
|
165 |
+
},
|
166 |
+
"02d28375aa": {
|
167 |
+
"fox": [
|
168 |
+
"1"
|
169 |
+
]
|
170 |
+
},
|
171 |
+
"031ccc99b1": {
|
172 |
+
"person": [
|
173 |
+
"1",
|
174 |
+
"2",
|
175 |
+
"3"
|
176 |
+
]
|
177 |
+
},
|
178 |
+
"0321b18c10": {
|
179 |
+
"elephant": [
|
180 |
+
"3"
|
181 |
+
],
|
182 |
+
"person": [
|
183 |
+
"1",
|
184 |
+
"2"
|
185 |
+
]
|
186 |
+
},
|
187 |
+
"0348a45bca": {
|
188 |
+
"fish": [
|
189 |
+
"1",
|
190 |
+
"2",
|
191 |
+
"3",
|
192 |
+
"4",
|
193 |
+
"5"
|
194 |
+
]
|
195 |
+
},
|
196 |
+
"0355e92655": {
|
197 |
+
"boat": [
|
198 |
+
"3"
|
199 |
+
],
|
200 |
+
"person": [
|
201 |
+
"2"
|
202 |
+
]
|
203 |
+
},
|
204 |
+
"0358b938c1": {
|
205 |
+
"elephant": [
|
206 |
+
"1",
|
207 |
+
"2",
|
208 |
+
"3",
|
209 |
+
"4"
|
210 |
+
]
|
211 |
+
},
|
212 |
+
"0368107cf1": {
|
213 |
+
"person": [
|
214 |
+
"1",
|
215 |
+
"2"
|
216 |
+
]
|
217 |
+
},
|
218 |
+
"0379ddf557": {
|
219 |
+
"person": [
|
220 |
+
"1"
|
221 |
+
]
|
222 |
+
},
|
223 |
+
"038b2cc71d": {
|
224 |
+
"lizard": [
|
225 |
+
"1"
|
226 |
+
]
|
227 |
+
},
|
228 |
+
"038c15a5dd": {
|
229 |
+
"hedgehog": [
|
230 |
+
"1"
|
231 |
+
]
|
232 |
+
},
|
233 |
+
"03a06cc98a": {
|
234 |
+
"giraffe": [
|
235 |
+
"1",
|
236 |
+
"2",
|
237 |
+
"3"
|
238 |
+
]
|
239 |
+
},
|
240 |
+
"03a63e187f": {
|
241 |
+
"lizard": [
|
242 |
+
"1"
|
243 |
+
]
|
244 |
+
},
|
245 |
+
"03c95b4dae": {
|
246 |
+
"elephant": [
|
247 |
+
"1",
|
248 |
+
"2",
|
249 |
+
"3"
|
250 |
+
]
|
251 |
+
},
|
252 |
+
"03e2b57b0e": {
|
253 |
+
"lizard": [
|
254 |
+
"1"
|
255 |
+
]
|
256 |
+
},
|
257 |
+
"04194e1248": {
|
258 |
+
"lizard": [
|
259 |
+
"1"
|
260 |
+
]
|
261 |
+
},
|
262 |
+
"04259896e2": {
|
263 |
+
"lizard": [
|
264 |
+
"1"
|
265 |
+
]
|
266 |
+
},
|
267 |
+
"0444918a5f": {
|
268 |
+
"truck": [
|
269 |
+
"1",
|
270 |
+
"2",
|
271 |
+
"3",
|
272 |
+
"4"
|
273 |
+
]
|
274 |
+
},
|
275 |
+
"04460a7a52": {
|
276 |
+
"lizard": [
|
277 |
+
"1"
|
278 |
+
]
|
279 |
+
},
|
280 |
+
"04474174a4": {
|
281 |
+
"ape": [
|
282 |
+
"1",
|
283 |
+
"2"
|
284 |
+
]
|
285 |
+
},
|
286 |
+
"0450095513": {
|
287 |
+
"snail": [
|
288 |
+
"1"
|
289 |
+
]
|
290 |
+
},
|
291 |
+
"045f00aed2": {
|
292 |
+
"person": [
|
293 |
+
"3"
|
294 |
+
],
|
295 |
+
"tiger": [
|
296 |
+
"1"
|
297 |
+
]
|
298 |
+
},
|
299 |
+
"04667fabaa": {
|
300 |
+
"parrot": [
|
301 |
+
"1"
|
302 |
+
]
|
303 |
+
},
|
304 |
+
"04735c5030": {
|
305 |
+
"cat": [
|
306 |
+
"1",
|
307 |
+
"2"
|
308 |
+
]
|
309 |
+
},
|
310 |
+
"04990d1915": {
|
311 |
+
"truck": [
|
312 |
+
"3"
|
313 |
+
],
|
314 |
+
"bus": [
|
315 |
+
"2"
|
316 |
+
],
|
317 |
+
"sedan": [
|
318 |
+
"1"
|
319 |
+
]
|
320 |
+
},
|
321 |
+
"04d62d9d98": {
|
322 |
+
"person": [
|
323 |
+
"1"
|
324 |
+
]
|
325 |
+
},
|
326 |
+
"04f21da964": {
|
327 |
+
"monkey": [
|
328 |
+
"1"
|
329 |
+
]
|
330 |
+
},
|
331 |
+
"04fbad476e": {
|
332 |
+
"parrot": [
|
333 |
+
"1"
|
334 |
+
]
|
335 |
+
},
|
336 |
+
"04fe256562": {
|
337 |
+
"motorbike": [
|
338 |
+
"1"
|
339 |
+
],
|
340 |
+
"truck": [
|
341 |
+
"2"
|
342 |
+
]
|
343 |
+
},
|
344 |
+
"0503bf89c9": {
|
345 |
+
"hedgehog": [
|
346 |
+
"1"
|
347 |
+
]
|
348 |
+
},
|
349 |
+
"0536c9eed0": {
|
350 |
+
"cat": [
|
351 |
+
"1"
|
352 |
+
]
|
353 |
+
},
|
354 |
+
"054acb238f": {
|
355 |
+
"owl": [
|
356 |
+
"1"
|
357 |
+
]
|
358 |
+
},
|
359 |
+
"05579ca250": {
|
360 |
+
"person": [
|
361 |
+
"1"
|
362 |
+
],
|
363 |
+
"sedan": [
|
364 |
+
"3"
|
365 |
+
]
|
366 |
+
},
|
367 |
+
"056c200404": {},
|
368 |
+
"05774f3a2c": {
|
369 |
+
"ape": [
|
370 |
+
"1",
|
371 |
+
"2",
|
372 |
+
"3"
|
373 |
+
]
|
374 |
+
},
|
375 |
+
"058a7592c8": {
|
376 |
+
"train": [
|
377 |
+
"1"
|
378 |
+
]
|
379 |
+
},
|
380 |
+
"05a0a513df": {
|
381 |
+
"person": [
|
382 |
+
"1",
|
383 |
+
"2"
|
384 |
+
]
|
385 |
+
},
|
386 |
+
"05a569d8aa": {
|
387 |
+
"cat": [
|
388 |
+
"1"
|
389 |
+
],
|
390 |
+
"mouse": [
|
391 |
+
"2"
|
392 |
+
]
|
393 |
+
},
|
394 |
+
"05aa652648": {
|
395 |
+
"ape": [
|
396 |
+
"1"
|
397 |
+
]
|
398 |
+
},
|
399 |
+
"05d7715782": {},
|
400 |
+
"05e0b0f28f": {
|
401 |
+
"mouse": [
|
402 |
+
"1"
|
403 |
+
],
|
404 |
+
"person": [
|
405 |
+
"2"
|
406 |
+
]
|
407 |
+
},
|
408 |
+
"05fdbbdd7a": {},
|
409 |
+
"05ffcfed85": {
|
410 |
+
"monkey": [
|
411 |
+
"1",
|
412 |
+
"2"
|
413 |
+
]
|
414 |
+
},
|
415 |
+
"0630391881": {
|
416 |
+
"person": [
|
417 |
+
"1"
|
418 |
+
]
|
419 |
+
},
|
420 |
+
"06840b2bbe": {
|
421 |
+
"snake": [
|
422 |
+
"1"
|
423 |
+
]
|
424 |
+
},
|
425 |
+
"068f7dce6f": {
|
426 |
+
"shark": [
|
427 |
+
"1"
|
428 |
+
]
|
429 |
+
},
|
430 |
+
"0693719753": {
|
431 |
+
"turtle": [
|
432 |
+
"1",
|
433 |
+
"2"
|
434 |
+
]
|
435 |
+
},
|
436 |
+
"06ce2b51fb": {
|
437 |
+
"person": [
|
438 |
+
"1",
|
439 |
+
"2"
|
440 |
+
]
|
441 |
+
},
|
442 |
+
"06e224798e": {
|
443 |
+
"tiger": [
|
444 |
+
"1"
|
445 |
+
]
|
446 |
+
},
|
447 |
+
"06ee361788": {
|
448 |
+
"duck": [
|
449 |
+
"1",
|
450 |
+
"2",
|
451 |
+
"3"
|
452 |
+
]
|
453 |
+
},
|
454 |
+
"06fbb3fa2c": {
|
455 |
+
"eagle": [
|
456 |
+
"1"
|
457 |
+
]
|
458 |
+
},
|
459 |
+
"0700264286": {
|
460 |
+
"cow": [
|
461 |
+
"1",
|
462 |
+
"2"
|
463 |
+
]
|
464 |
+
},
|
465 |
+
"070c918ca7": {
|
466 |
+
"parrot": [
|
467 |
+
"1"
|
468 |
+
]
|
469 |
+
},
|
470 |
+
"07129e14a4": {
|
471 |
+
"person": [
|
472 |
+
"3"
|
473 |
+
],
|
474 |
+
"parrot": [
|
475 |
+
"1",
|
476 |
+
"2"
|
477 |
+
]
|
478 |
+
},
|
479 |
+
"07177017e9": {
|
480 |
+
"motorbike": [
|
481 |
+
"1",
|
482 |
+
"2"
|
483 |
+
]
|
484 |
+
},
|
485 |
+
"07238ffc58": {
|
486 |
+
"monkey": [
|
487 |
+
"1",
|
488 |
+
"2",
|
489 |
+
"3"
|
490 |
+
]
|
491 |
+
},
|
492 |
+
"07353b2a89": {
|
493 |
+
"sheep": [
|
494 |
+
"1",
|
495 |
+
"2",
|
496 |
+
"3",
|
497 |
+
"4"
|
498 |
+
]
|
499 |
+
},
|
500 |
+
"0738493cbf": {
|
501 |
+
"airplane": [
|
502 |
+
"1"
|
503 |
+
]
|
504 |
+
},
|
505 |
+
"075926c651": {
|
506 |
+
"person": [
|
507 |
+
"1",
|
508 |
+
"2"
|
509 |
+
]
|
510 |
+
},
|
511 |
+
"075c701292": {
|
512 |
+
"duck": [
|
513 |
+
"1",
|
514 |
+
"2",
|
515 |
+
"3",
|
516 |
+
"4"
|
517 |
+
]
|
518 |
+
},
|
519 |
+
"0762ea9a30": {
|
520 |
+
"person": [
|
521 |
+
"1"
|
522 |
+
]
|
523 |
+
},
|
524 |
+
"07652ee4af": {
|
525 |
+
"person": [
|
526 |
+
"1"
|
527 |
+
]
|
528 |
+
},
|
529 |
+
"076f206928": {
|
530 |
+
"person": [
|
531 |
+
"3"
|
532 |
+
],
|
533 |
+
"zebra": [
|
534 |
+
"1",
|
535 |
+
"2"
|
536 |
+
]
|
537 |
+
},
|
538 |
+
"077d32af19": {
|
539 |
+
"train": [
|
540 |
+
"4"
|
541 |
+
],
|
542 |
+
"person": [
|
543 |
+
"1",
|
544 |
+
"2",
|
545 |
+
"3"
|
546 |
+
]
|
547 |
+
},
|
548 |
+
"079049275c": {
|
549 |
+
"mouse": [
|
550 |
+
"1"
|
551 |
+
]
|
552 |
+
},
|
553 |
+
"07913cdda7": {
|
554 |
+
"train": [
|
555 |
+
"1"
|
556 |
+
],
|
557 |
+
"person": [
|
558 |
+
"2",
|
559 |
+
"3"
|
560 |
+
]
|
561 |
+
},
|
562 |
+
"07a11a35e8": {
|
563 |
+
"ape": [
|
564 |
+
"1",
|
565 |
+
"2"
|
566 |
+
]
|
567 |
+
},
|
568 |
+
"07ac33b6df": {
|
569 |
+
"ape": [
|
570 |
+
"1"
|
571 |
+
]
|
572 |
+
},
|
573 |
+
"07c62c3d11": {
|
574 |
+
"parrot": [
|
575 |
+
"1",
|
576 |
+
"2",
|
577 |
+
"3"
|
578 |
+
]
|
579 |
+
},
|
580 |
+
"07cc1c7d74": {
|
581 |
+
"snake": [
|
582 |
+
"1"
|
583 |
+
]
|
584 |
+
},
|
585 |
+
"080196ef01": {
|
586 |
+
"lizard": [
|
587 |
+
"1"
|
588 |
+
]
|
589 |
+
},
|
590 |
+
"081207976e": {},
|
591 |
+
"081ae4fa44": {
|
592 |
+
"shark": [
|
593 |
+
"1",
|
594 |
+
"2"
|
595 |
+
]
|
596 |
+
},
|
597 |
+
"081d8250cb": {
|
598 |
+
"person": [
|
599 |
+
"1"
|
600 |
+
],
|
601 |
+
"sedan": [
|
602 |
+
"3"
|
603 |
+
]
|
604 |
+
},
|
605 |
+
"082900c5d4": {
|
606 |
+
"duck": [
|
607 |
+
"1",
|
608 |
+
"2",
|
609 |
+
"3"
|
610 |
+
]
|
611 |
+
},
|
612 |
+
"0860df21e2": {},
|
613 |
+
"0866d4c5e3": {
|
614 |
+
"bird": [
|
615 |
+
"1",
|
616 |
+
"2",
|
617 |
+
"3"
|
618 |
+
]
|
619 |
+
},
|
620 |
+
"0891ac2eb6": {
|
621 |
+
"person": [
|
622 |
+
"1",
|
623 |
+
"2",
|
624 |
+
"3"
|
625 |
+
]
|
626 |
+
},
|
627 |
+
"08931bc458": {
|
628 |
+
"person": [
|
629 |
+
"1"
|
630 |
+
]
|
631 |
+
},
|
632 |
+
"08aa2705d5": {
|
633 |
+
"snake": [
|
634 |
+
"1"
|
635 |
+
]
|
636 |
+
},
|
637 |
+
"08c8450db7": {},
|
638 |
+
"08d50b926c": {
|
639 |
+
"turtle": [
|
640 |
+
"1",
|
641 |
+
"2"
|
642 |
+
]
|
643 |
+
},
|
644 |
+
"08e1e4de15": {
|
645 |
+
"monkey": [
|
646 |
+
"1",
|
647 |
+
"2",
|
648 |
+
"3",
|
649 |
+
"4"
|
650 |
+
]
|
651 |
+
},
|
652 |
+
"08e48c1a48": {
|
653 |
+
"cow": [
|
654 |
+
"1"
|
655 |
+
]
|
656 |
+
},
|
657 |
+
"08f561c65e": {
|
658 |
+
"person": [
|
659 |
+
"2"
|
660 |
+
],
|
661 |
+
"giant_panda": [
|
662 |
+
"1"
|
663 |
+
]
|
664 |
+
},
|
665 |
+
"08feb87790": {
|
666 |
+
"sheep": [
|
667 |
+
"1"
|
668 |
+
]
|
669 |
+
},
|
670 |
+
"09049f6fe3": {
|
671 |
+
"mouse": [
|
672 |
+
"1",
|
673 |
+
"2"
|
674 |
+
]
|
675 |
+
},
|
676 |
+
"092e4ff450": {
|
677 |
+
"snake": [
|
678 |
+
"1"
|
679 |
+
]
|
680 |
+
},
|
681 |
+
"09338adea8": {
|
682 |
+
"whale": [
|
683 |
+
"1",
|
684 |
+
"2"
|
685 |
+
]
|
686 |
+
},
|
687 |
+
"093c335ccc": {
|
688 |
+
"person": [
|
689 |
+
"2"
|
690 |
+
]
|
691 |
+
},
|
692 |
+
"0970d28339": {
|
693 |
+
"ape": [
|
694 |
+
"1",
|
695 |
+
"2"
|
696 |
+
]
|
697 |
+
},
|
698 |
+
"0974a213dc": {
|
699 |
+
"giraffe": [
|
700 |
+
"1",
|
701 |
+
"2",
|
702 |
+
"3"
|
703 |
+
]
|
704 |
+
},
|
705 |
+
"097b471ed8": {
|
706 |
+
"cat": [
|
707 |
+
"1",
|
708 |
+
"2"
|
709 |
+
]
|
710 |
+
},
|
711 |
+
"0990941758": {
|
712 |
+
"giant_panda": [
|
713 |
+
"1"
|
714 |
+
]
|
715 |
+
},
|
716 |
+
"09a348f4fa": {
|
717 |
+
"lizard": [
|
718 |
+
"1"
|
719 |
+
]
|
720 |
+
},
|
721 |
+
"09a6841288": {
|
722 |
+
"duck": [
|
723 |
+
"1",
|
724 |
+
"2"
|
725 |
+
]
|
726 |
+
},
|
727 |
+
"09c5bad17b": {
|
728 |
+
"airplane": [
|
729 |
+
"1"
|
730 |
+
]
|
731 |
+
},
|
732 |
+
"09c9ce80c7": {
|
733 |
+
"giant_panda": [
|
734 |
+
"1"
|
735 |
+
]
|
736 |
+
},
|
737 |
+
"09ff54fef4": {
|
738 |
+
"fox": [
|
739 |
+
"1",
|
740 |
+
"2"
|
741 |
+
]
|
742 |
+
},
|
743 |
+
"0a23765d15": {
|
744 |
+
"person": [
|
745 |
+
"1",
|
746 |
+
"2"
|
747 |
+
]
|
748 |
+
},
|
749 |
+
"0a275e7f12": {
|
750 |
+
"elephant": [
|
751 |
+
"1"
|
752 |
+
]
|
753 |
+
},
|
754 |
+
"0a2f2bd294": {
|
755 |
+
"motorbike": [
|
756 |
+
"1"
|
757 |
+
]
|
758 |
+
},
|
759 |
+
"0a7a2514aa": {
|
760 |
+
"cat": [
|
761 |
+
"1"
|
762 |
+
],
|
763 |
+
"lizard": [
|
764 |
+
"2"
|
765 |
+
]
|
766 |
+
},
|
767 |
+
"0a7b27fde9": {
|
768 |
+
"parrot": [
|
769 |
+
"1",
|
770 |
+
"2"
|
771 |
+
]
|
772 |
+
},
|
773 |
+
"0a8c467cc3": {
|
774 |
+
"fish": [
|
775 |
+
"1",
|
776 |
+
"2",
|
777 |
+
"3"
|
778 |
+
]
|
779 |
+
},
|
780 |
+
"0ac8c560ae": {
|
781 |
+
"person": [
|
782 |
+
"2",
|
783 |
+
"3"
|
784 |
+
]
|
785 |
+
},
|
786 |
+
"0b1627e896": {
|
787 |
+
"boat": [
|
788 |
+
"1"
|
789 |
+
]
|
790 |
+
},
|
791 |
+
"0b285c47f6": {
|
792 |
+
"mouse": [
|
793 |
+
"1"
|
794 |
+
]
|
795 |
+
},
|
796 |
+
"0b34ec1d55": {
|
797 |
+
"ape": [
|
798 |
+
"1"
|
799 |
+
]
|
800 |
+
},
|
801 |
+
"0b5b5e8e5a": {
|
802 |
+
"person": [
|
803 |
+
"1"
|
804 |
+
],
|
805 |
+
"sedan": [
|
806 |
+
"2"
|
807 |
+
]
|
808 |
+
},
|
809 |
+
"0b68535614": {
|
810 |
+
"rabbit": [
|
811 |
+
"1"
|
812 |
+
]
|
813 |
+
},
|
814 |
+
"0b6f9105fc": {
|
815 |
+
"rabbit": [
|
816 |
+
"1"
|
817 |
+
]
|
818 |
+
},
|
819 |
+
"0b7dbfa3cb": {
|
820 |
+
"cow": [
|
821 |
+
"1"
|
822 |
+
]
|
823 |
+
},
|
824 |
+
"0b9cea51ca": {
|
825 |
+
"whale": [
|
826 |
+
"1"
|
827 |
+
]
|
828 |
+
},
|
829 |
+
"0b9d012be8": {
|
830 |
+
"camel": [
|
831 |
+
"1"
|
832 |
+
]
|
833 |
+
},
|
834 |
+
"0bcfc4177d": {
|
835 |
+
"truck": [
|
836 |
+
"1"
|
837 |
+
]
|
838 |
+
},
|
839 |
+
"0bd37b23c1": {
|
840 |
+
"motorbike": [
|
841 |
+
"1"
|
842 |
+
]
|
843 |
+
},
|
844 |
+
"0bd864064c": {
|
845 |
+
"eagle": [
|
846 |
+
"1"
|
847 |
+
]
|
848 |
+
},
|
849 |
+
"0c11c6bf7b": {
|
850 |
+
"deer": [
|
851 |
+
"1"
|
852 |
+
]
|
853 |
+
},
|
854 |
+
"0c26bc77ac": {
|
855 |
+
"crocodile": [
|
856 |
+
"1"
|
857 |
+
]
|
858 |
+
},
|
859 |
+
"0c3a04798c": {
|
860 |
+
"fish": [
|
861 |
+
"2"
|
862 |
+
],
|
863 |
+
"duck": [
|
864 |
+
"1"
|
865 |
+
]
|
866 |
+
},
|
867 |
+
"0c44a9d545": {
|
868 |
+
"tiger": [
|
869 |
+
"1"
|
870 |
+
]
|
871 |
+
},
|
872 |
+
"0c817cc390": {
|
873 |
+
"hedgehog": [
|
874 |
+
"1"
|
875 |
+
],
|
876 |
+
"dog": [
|
877 |
+
"2"
|
878 |
+
]
|
879 |
+
},
|
880 |
+
"0ca839ee9a": {
|
881 |
+
"ape": [
|
882 |
+
"1",
|
883 |
+
"2"
|
884 |
+
]
|
885 |
+
},
|
886 |
+
"0cd7ac0ac0": {
|
887 |
+
"rabbit": [
|
888 |
+
"1"
|
889 |
+
]
|
890 |
+
},
|
891 |
+
"0ce06e0121": {
|
892 |
+
"parrot": [
|
893 |
+
"1",
|
894 |
+
"2"
|
895 |
+
]
|
896 |
+
},
|
897 |
+
"0cfe974a89": {
|
898 |
+
"turtle": [
|
899 |
+
"1",
|
900 |
+
"2"
|
901 |
+
]
|
902 |
+
},
|
903 |
+
"0d2fcc0dcd": {
|
904 |
+
"zebra": [
|
905 |
+
"1",
|
906 |
+
"2",
|
907 |
+
"3",
|
908 |
+
"4"
|
909 |
+
]
|
910 |
+
},
|
911 |
+
"0d3aad05d2": {
|
912 |
+
"person": [
|
913 |
+
"1"
|
914 |
+
]
|
915 |
+
},
|
916 |
+
"0d40b015f4": {
|
917 |
+
"person": [
|
918 |
+
"1"
|
919 |
+
]
|
920 |
+
},
|
921 |
+
"0d97fba242": {
|
922 |
+
"dog": [
|
923 |
+
"1"
|
924 |
+
],
|
925 |
+
"person": [
|
926 |
+
"2"
|
927 |
+
]
|
928 |
+
},
|
929 |
+
"0d9cc80d7e": {
|
930 |
+
"person": [
|
931 |
+
"1",
|
932 |
+
"2",
|
933 |
+
"3"
|
934 |
+
]
|
935 |
+
},
|
936 |
+
"0dab85b6d3": {
|
937 |
+
"lizard": [
|
938 |
+
"1",
|
939 |
+
"2"
|
940 |
+
]
|
941 |
+
},
|
942 |
+
"0db5c427a5": {
|
943 |
+
"train": [
|
944 |
+
"1"
|
945 |
+
]
|
946 |
+
},
|
947 |
+
"0dbaf284f1": {
|
948 |
+
"cat": [
|
949 |
+
"1",
|
950 |
+
"2"
|
951 |
+
]
|
952 |
+
},
|
953 |
+
"0de4923598": {},
|
954 |
+
"0df28a9101": {
|
955 |
+
"turtle": [
|
956 |
+
"1",
|
957 |
+
"2",
|
958 |
+
"3"
|
959 |
+
]
|
960 |
+
},
|
961 |
+
"0e04f636c4": {
|
962 |
+
"frog": [
|
963 |
+
"1"
|
964 |
+
]
|
965 |
+
},
|
966 |
+
"0e05f0e232": {
|
967 |
+
"lizard": [
|
968 |
+
"1",
|
969 |
+
"2"
|
970 |
+
]
|
971 |
+
},
|
972 |
+
"0e0930474b": {
|
973 |
+
"person": [
|
974 |
+
"2",
|
975 |
+
"3"
|
976 |
+
],
|
977 |
+
"sedan": [
|
978 |
+
"1"
|
979 |
+
]
|
980 |
+
},
|
981 |
+
"0e27472bea": {
|
982 |
+
"turtle": [
|
983 |
+
"1"
|
984 |
+
]
|
985 |
+
},
|
986 |
+
"0e30020549": {
|
987 |
+
"parrot": [
|
988 |
+
"1"
|
989 |
+
]
|
990 |
+
},
|
991 |
+
"0e621feb6c": {
|
992 |
+
"lizard": [
|
993 |
+
"1",
|
994 |
+
"2"
|
995 |
+
]
|
996 |
+
},
|
997 |
+
"0e803c7d73": {},
|
998 |
+
"0e9ebe4e3c": {
|
999 |
+
"truck": [
|
1000 |
+
"1"
|
1001 |
+
]
|
1002 |
+
},
|
1003 |
+
"0e9f2785ec": {
|
1004 |
+
"person": [
|
1005 |
+
"2"
|
1006 |
+
]
|
1007 |
+
},
|
1008 |
+
"0ea68d418b": {
|
1009 |
+
"airplane": [
|
1010 |
+
"1"
|
1011 |
+
]
|
1012 |
+
},
|
1013 |
+
"0eb403a222": {},
|
1014 |
+
"0ee92053d6": {
|
1015 |
+
"person": [
|
1016 |
+
"1"
|
1017 |
+
]
|
1018 |
+
},
|
1019 |
+
"0eefca067f": {
|
1020 |
+
"giant_panda": [
|
1021 |
+
"1",
|
1022 |
+
"2"
|
1023 |
+
]
|
1024 |
+
},
|
1025 |
+
"0f17fa6fcb": {
|
1026 |
+
"duck": [
|
1027 |
+
"1",
|
1028 |
+
"2",
|
1029 |
+
"3"
|
1030 |
+
]
|
1031 |
+
},
|
1032 |
+
"0f1ac8e9a3": {
|
1033 |
+
"frog": [
|
1034 |
+
"1"
|
1035 |
+
]
|
1036 |
+
},
|
1037 |
+
"0f202e9852": {
|
1038 |
+
"parrot": [
|
1039 |
+
"1"
|
1040 |
+
]
|
1041 |
+
},
|
1042 |
+
"0f2ab8b1ff": {
|
1043 |
+
"dolphin": [
|
1044 |
+
"1",
|
1045 |
+
"2",
|
1046 |
+
"3"
|
1047 |
+
]
|
1048 |
+
},
|
1049 |
+
"0f51a78756": {
|
1050 |
+
"sheep": [
|
1051 |
+
"1"
|
1052 |
+
]
|
1053 |
+
},
|
1054 |
+
"0f5fbe16b0": {
|
1055 |
+
"raccoon": [
|
1056 |
+
"1",
|
1057 |
+
"2"
|
1058 |
+
]
|
1059 |
+
},
|
1060 |
+
"0f6072077b": {
|
1061 |
+
"person": [
|
1062 |
+
"1",
|
1063 |
+
"2",
|
1064 |
+
"3"
|
1065 |
+
]
|
1066 |
+
},
|
1067 |
+
"0f6b69b2f4": {
|
1068 |
+
"rabbit": [
|
1069 |
+
"1"
|
1070 |
+
]
|
1071 |
+
},
|
1072 |
+
"0f6c2163de": {
|
1073 |
+
"snail": [
|
1074 |
+
"1"
|
1075 |
+
]
|
1076 |
+
},
|
1077 |
+
"0f74ec5599": {
|
1078 |
+
"giant_panda": [
|
1079 |
+
"1"
|
1080 |
+
]
|
1081 |
+
},
|
1082 |
+
"0f9683715b": {
|
1083 |
+
"elephant": [
|
1084 |
+
"1"
|
1085 |
+
]
|
1086 |
+
},
|
1087 |
+
"0fa7b59356": {
|
1088 |
+
"duck": [
|
1089 |
+
"1"
|
1090 |
+
]
|
1091 |
+
},
|
1092 |
+
"0fb173695b": {
|
1093 |
+
"person": [
|
1094 |
+
"3"
|
1095 |
+
]
|
1096 |
+
},
|
1097 |
+
"0fc958cde2": {
|
1098 |
+
"owl": [
|
1099 |
+
"1"
|
1100 |
+
]
|
1101 |
+
},
|
1102 |
+
"0fe7b1a621": {
|
1103 |
+
"parrot": [
|
1104 |
+
"1"
|
1105 |
+
]
|
1106 |
+
},
|
1107 |
+
"0ffcdb491c": {
|
1108 |
+
"person": [
|
1109 |
+
"1",
|
1110 |
+
"2",
|
1111 |
+
"3"
|
1112 |
+
]
|
1113 |
+
},
|
1114 |
+
"101caff7d4": {
|
1115 |
+
"giant_panda": [
|
1116 |
+
"1",
|
1117 |
+
"2"
|
1118 |
+
]
|
1119 |
+
},
|
1120 |
+
"1022fe8417": {
|
1121 |
+
"person": [
|
1122 |
+
"1",
|
1123 |
+
"2",
|
1124 |
+
"3"
|
1125 |
+
]
|
1126 |
+
},
|
1127 |
+
"1032e80b37": {
|
1128 |
+
"giraffe": [
|
1129 |
+
"1"
|
1130 |
+
]
|
1131 |
+
},
|
1132 |
+
"103f501680": {
|
1133 |
+
"fish": [
|
1134 |
+
"1"
|
1135 |
+
]
|
1136 |
+
},
|
1137 |
+
"104e64565f": {
|
1138 |
+
"elephant": [
|
1139 |
+
"1"
|
1140 |
+
]
|
1141 |
+
},
|
1142 |
+
"104f1ab997": {
|
1143 |
+
"person": [
|
1144 |
+
"1",
|
1145 |
+
"2",
|
1146 |
+
"3"
|
1147 |
+
]
|
1148 |
+
},
|
1149 |
+
"106242403f": {
|
1150 |
+
"person": [
|
1151 |
+
"1",
|
1152 |
+
"2"
|
1153 |
+
]
|
1154 |
+
},
|
1155 |
+
"10b31f5431": {
|
1156 |
+
"person": [
|
1157 |
+
"1",
|
1158 |
+
"3",
|
1159 |
+
"4"
|
1160 |
+
]
|
1161 |
+
},
|
1162 |
+
"10eced835e": {
|
1163 |
+
"giant_panda": [
|
1164 |
+
"1",
|
1165 |
+
"2"
|
1166 |
+
]
|
1167 |
+
},
|
1168 |
+
"110d26fa3a": {
|
1169 |
+
"shark": [
|
1170 |
+
"1"
|
1171 |
+
]
|
1172 |
+
},
|
1173 |
+
"1122c1d16a": {
|
1174 |
+
"person": [
|
1175 |
+
"6"
|
1176 |
+
],
|
1177 |
+
"parrot": [
|
1178 |
+
"1",
|
1179 |
+
"2",
|
1180 |
+
"3",
|
1181 |
+
"4",
|
1182 |
+
"5"
|
1183 |
+
]
|
1184 |
+
},
|
1185 |
+
"1145b49a5f": {
|
1186 |
+
"rabbit": [
|
1187 |
+
"1"
|
1188 |
+
]
|
1189 |
+
},
|
1190 |
+
"11485838c2": {
|
1191 |
+
"giraffe": [
|
1192 |
+
"1",
|
1193 |
+
"2",
|
1194 |
+
"3"
|
1195 |
+
]
|
1196 |
+
},
|
1197 |
+
"114e7676ec": {
|
1198 |
+
"person": [
|
1199 |
+
"1"
|
1200 |
+
]
|
1201 |
+
},
|
1202 |
+
"1157472b95": {
|
1203 |
+
"parrot": [
|
1204 |
+
"1",
|
1205 |
+
"2"
|
1206 |
+
]
|
1207 |
+
},
|
1208 |
+
"115ee1072c": {
|
1209 |
+
"cow": [
|
1210 |
+
"1"
|
1211 |
+
]
|
1212 |
+
},
|
1213 |
+
"1171141012": {
|
1214 |
+
"turtle": [
|
1215 |
+
"1"
|
1216 |
+
],
|
1217 |
+
"person": [
|
1218 |
+
"2"
|
1219 |
+
]
|
1220 |
+
},
|
1221 |
+
"117757b4b8": {
|
1222 |
+
"snail": [
|
1223 |
+
"1"
|
1224 |
+
]
|
1225 |
+
},
|
1226 |
+
"1178932d2f": {
|
1227 |
+
"motorbike": [
|
1228 |
+
"3"
|
1229 |
+
],
|
1230 |
+
"person": [
|
1231 |
+
"1",
|
1232 |
+
"2"
|
1233 |
+
]
|
1234 |
+
},
|
1235 |
+
"117cc76bda": {
|
1236 |
+
"whale": [
|
1237 |
+
"1"
|
1238 |
+
]
|
1239 |
+
},
|
1240 |
+
"1180cbf814": {
|
1241 |
+
"fish": [
|
1242 |
+
"1",
|
1243 |
+
"2"
|
1244 |
+
]
|
1245 |
+
},
|
1246 |
+
"1187bbd0e3": {
|
1247 |
+
"cat": [
|
1248 |
+
"1"
|
1249 |
+
]
|
1250 |
+
},
|
1251 |
+
"1197e44b26": {
|
1252 |
+
"giant_panda": [
|
1253 |
+
"1"
|
1254 |
+
]
|
1255 |
+
},
|
1256 |
+
"119cf20728": {
|
1257 |
+
"lizard": [
|
1258 |
+
"1"
|
1259 |
+
]
|
1260 |
+
},
|
1261 |
+
"119dd54871": {
|
1262 |
+
"lion": [
|
1263 |
+
"1",
|
1264 |
+
"2"
|
1265 |
+
]
|
1266 |
+
},
|
1267 |
+
"11a0c3b724": {
|
1268 |
+
"mouse": [
|
1269 |
+
"1",
|
1270 |
+
"2"
|
1271 |
+
]
|
1272 |
+
},
|
1273 |
+
"11a6ba8c94": {
|
1274 |
+
"person": [
|
1275 |
+
"1",
|
1276 |
+
"2"
|
1277 |
+
]
|
1278 |
+
},
|
1279 |
+
"11c722a456": {
|
1280 |
+
"turtle": [
|
1281 |
+
"1",
|
1282 |
+
"2"
|
1283 |
+
]
|
1284 |
+
},
|
1285 |
+
"11cbcb0b4d": {
|
1286 |
+
"zebra": [
|
1287 |
+
"1"
|
1288 |
+
]
|
1289 |
+
},
|
1290 |
+
"11ccf5e99d": {
|
1291 |
+
"person": [
|
1292 |
+
"2"
|
1293 |
+
]
|
1294 |
+
},
|
1295 |
+
"11ce6f452e": {
|
1296 |
+
"person": [
|
1297 |
+
"1",
|
1298 |
+
"2",
|
1299 |
+
"3"
|
1300 |
+
]
|
1301 |
+
},
|
1302 |
+
"11feabe596": {
|
1303 |
+
"rabbit": [
|
1304 |
+
"1"
|
1305 |
+
]
|
1306 |
+
},
|
1307 |
+
"120cb9514d": {
|
1308 |
+
"person": [
|
1309 |
+
"1",
|
1310 |
+
"2",
|
1311 |
+
"3"
|
1312 |
+
]
|
1313 |
+
},
|
1314 |
+
"12156b25b3": {
|
1315 |
+
"person": [
|
1316 |
+
"1"
|
1317 |
+
]
|
1318 |
+
},
|
1319 |
+
"122896672d": {
|
1320 |
+
"person": [
|
1321 |
+
"1",
|
1322 |
+
"3"
|
1323 |
+
]
|
1324 |
+
},
|
1325 |
+
"1233ac8596": {
|
1326 |
+
"dog": [
|
1327 |
+
"1"
|
1328 |
+
]
|
1329 |
+
},
|
1330 |
+
"1239c87234": {
|
1331 |
+
"lizard": [
|
1332 |
+
"1"
|
1333 |
+
]
|
1334 |
+
},
|
1335 |
+
"1250423f7c": {
|
1336 |
+
"elephant": [
|
1337 |
+
"3",
|
1338 |
+
"4"
|
1339 |
+
],
|
1340 |
+
"person": [
|
1341 |
+
"2"
|
1342 |
+
]
|
1343 |
+
},
|
1344 |
+
"1257a1bc67": {
|
1345 |
+
"snake": [
|
1346 |
+
"1"
|
1347 |
+
]
|
1348 |
+
},
|
1349 |
+
"125d1b19dd": {
|
1350 |
+
"giant_panda": [
|
1351 |
+
"1",
|
1352 |
+
"2"
|
1353 |
+
]
|
1354 |
+
},
|
1355 |
+
"126d203967": {
|
1356 |
+
"person": [
|
1357 |
+
"2"
|
1358 |
+
]
|
1359 |
+
},
|
1360 |
+
"1295e19071": {
|
1361 |
+
"airplane": [
|
1362 |
+
"1"
|
1363 |
+
]
|
1364 |
+
},
|
1365 |
+
"12ad198c54": {
|
1366 |
+
"person": [
|
1367 |
+
"1"
|
1368 |
+
]
|
1369 |
+
},
|
1370 |
+
"12bddb2bcb": {
|
1371 |
+
"person": [
|
1372 |
+
"2"
|
1373 |
+
]
|
1374 |
+
},
|
1375 |
+
"12ec9b93ee": {
|
1376 |
+
"giant_panda": [
|
1377 |
+
"1"
|
1378 |
+
]
|
1379 |
+
},
|
1380 |
+
"12eebedc35": {
|
1381 |
+
"bird": [
|
1382 |
+
"1"
|
1383 |
+
]
|
1384 |
+
},
|
1385 |
+
"132852e094": {
|
1386 |
+
"fox": [
|
1387 |
+
"1"
|
1388 |
+
]
|
1389 |
+
},
|
1390 |
+
"1329409f2a": {
|
1391 |
+
"fish": [
|
1392 |
+
"1"
|
1393 |
+
]
|
1394 |
+
},
|
1395 |
+
"13325cfa14": {
|
1396 |
+
"person": [
|
1397 |
+
"2"
|
1398 |
+
]
|
1399 |
+
},
|
1400 |
+
"1336440745": {
|
1401 |
+
"mouse": [
|
1402 |
+
"1",
|
1403 |
+
"2"
|
1404 |
+
]
|
1405 |
+
},
|
1406 |
+
"134d06dbf9": {
|
1407 |
+
"cat": [
|
1408 |
+
"1"
|
1409 |
+
]
|
1410 |
+
},
|
1411 |
+
"135625b53d": {
|
1412 |
+
"parrot": [
|
1413 |
+
"1"
|
1414 |
+
]
|
1415 |
+
},
|
1416 |
+
"13870016f9": {
|
1417 |
+
"cow": [
|
1418 |
+
"2",
|
1419 |
+
"3"
|
1420 |
+
],
|
1421 |
+
"person": [
|
1422 |
+
"1"
|
1423 |
+
]
|
1424 |
+
},
|
1425 |
+
"13960b3c84": {
|
1426 |
+
"giraffe": [
|
1427 |
+
"1",
|
1428 |
+
"2",
|
1429 |
+
"3"
|
1430 |
+
]
|
1431 |
+
},
|
1432 |
+
"13adaad9d9": {
|
1433 |
+
"giant_panda": [
|
1434 |
+
"1"
|
1435 |
+
]
|
1436 |
+
},
|
1437 |
+
"13ae097e20": {
|
1438 |
+
"giant_panda": [
|
1439 |
+
"1"
|
1440 |
+
]
|
1441 |
+
},
|
1442 |
+
"13e3070469": {
|
1443 |
+
"zebra": [
|
1444 |
+
"1",
|
1445 |
+
"2",
|
1446 |
+
"3"
|
1447 |
+
]
|
1448 |
+
},
|
1449 |
+
"13f6a8c20d": {
|
1450 |
+
"fish": [
|
1451 |
+
"1"
|
1452 |
+
]
|
1453 |
+
},
|
1454 |
+
"1416925cf2": {
|
1455 |
+
"truck": [
|
1456 |
+
"1",
|
1457 |
+
"2"
|
1458 |
+
]
|
1459 |
+
},
|
1460 |
+
"142d2621f5": {
|
1461 |
+
"motorbike": [
|
1462 |
+
"3"
|
1463 |
+
],
|
1464 |
+
"person": [
|
1465 |
+
"1",
|
1466 |
+
"2"
|
1467 |
+
]
|
1468 |
+
},
|
1469 |
+
"145d5d7c03": {
|
1470 |
+
"giant_panda": [
|
1471 |
+
"1"
|
1472 |
+
]
|
1473 |
+
},
|
1474 |
+
"145fdc3ac5": {
|
1475 |
+
"lizard": [
|
1476 |
+
"1"
|
1477 |
+
]
|
1478 |
+
},
|
1479 |
+
"1471274fa7": {
|
1480 |
+
"person": [
|
1481 |
+
"1"
|
1482 |
+
]
|
1483 |
+
},
|
1484 |
+
"14a6b5a139": {
|
1485 |
+
"fish": [
|
1486 |
+
"1"
|
1487 |
+
]
|
1488 |
+
},
|
1489 |
+
"14c21cea0d": {
|
1490 |
+
"monkey": [
|
1491 |
+
"1",
|
1492 |
+
"2"
|
1493 |
+
]
|
1494 |
+
},
|
1495 |
+
"14dae0dc93": {
|
1496 |
+
"person": [
|
1497 |
+
"2"
|
1498 |
+
]
|
1499 |
+
},
|
1500 |
+
"14f9bd22b5": {
|
1501 |
+
"tiger": [
|
1502 |
+
"1"
|
1503 |
+
]
|
1504 |
+
},
|
1505 |
+
"14fd28ae99": {
|
1506 |
+
"parrot": [
|
1507 |
+
"1"
|
1508 |
+
]
|
1509 |
+
},
|
1510 |
+
"15097d5d4e": {
|
1511 |
+
"parrot": [
|
1512 |
+
"1"
|
1513 |
+
]
|
1514 |
+
},
|
1515 |
+
"150ea711f2": {
|
1516 |
+
"whale": [
|
1517 |
+
"1"
|
1518 |
+
]
|
1519 |
+
},
|
1520 |
+
"1514e3563f": {
|
1521 |
+
"earless_seal": [
|
1522 |
+
"1",
|
1523 |
+
"2"
|
1524 |
+
]
|
1525 |
+
},
|
1526 |
+
"152aaa3a9e": {
|
1527 |
+
"raccoon": [
|
1528 |
+
"1"
|
1529 |
+
]
|
1530 |
+
},
|
1531 |
+
"152b7d3bd7": {
|
1532 |
+
"giant_panda": [
|
1533 |
+
"1"
|
1534 |
+
]
|
1535 |
+
},
|
1536 |
+
"15617297cc": {
|
1537 |
+
"person": [
|
1538 |
+
"1"
|
1539 |
+
]
|
1540 |
+
},
|
1541 |
+
"15abbe0c52": {
|
1542 |
+
"person": [
|
1543 |
+
"1"
|
1544 |
+
]
|
1545 |
+
},
|
1546 |
+
"15d1fb3de5": {
|
1547 |
+
"owl": [
|
1548 |
+
"1"
|
1549 |
+
],
|
1550 |
+
"cat": [
|
1551 |
+
"2"
|
1552 |
+
]
|
1553 |
+
},
|
1554 |
+
"15f67b0fab": {
|
1555 |
+
"person": [
|
1556 |
+
"1"
|
1557 |
+
]
|
1558 |
+
},
|
1559 |
+
"161eb59aad": {
|
1560 |
+
"cow": [
|
1561 |
+
"2",
|
1562 |
+
"3"
|
1563 |
+
],
|
1564 |
+
"giraffe": [
|
1565 |
+
"1"
|
1566 |
+
]
|
1567 |
+
},
|
1568 |
+
"16288ea47f": {
|
1569 |
+
"duck": [
|
1570 |
+
"1",
|
1571 |
+
"2"
|
1572 |
+
]
|
1573 |
+
},
|
1574 |
+
"164410ce62": {
|
1575 |
+
"person": [
|
1576 |
+
"1"
|
1577 |
+
]
|
1578 |
+
},
|
1579 |
+
"165c3c8cd4": {
|
1580 |
+
"person": [
|
1581 |
+
"1",
|
1582 |
+
"2",
|
1583 |
+
"3"
|
1584 |
+
]
|
1585 |
+
},
|
1586 |
+
"165c42b41b": {
|
1587 |
+
"motorbike": [
|
1588 |
+
"2",
|
1589 |
+
"3"
|
1590 |
+
],
|
1591 |
+
"person": [
|
1592 |
+
"1",
|
1593 |
+
"4"
|
1594 |
+
]
|
1595 |
+
},
|
1596 |
+
"165ec9e22b": {
|
1597 |
+
"person": [
|
1598 |
+
"1",
|
1599 |
+
"2"
|
1600 |
+
]
|
1601 |
+
},
|
1602 |
+
"1669502269": {
|
1603 |
+
"person": [
|
1604 |
+
"1"
|
1605 |
+
]
|
1606 |
+
},
|
1607 |
+
"16763cccbb": {
|
1608 |
+
"ape": [
|
1609 |
+
"1"
|
1610 |
+
]
|
1611 |
+
},
|
1612 |
+
"16adde065e": {
|
1613 |
+
"cat": [
|
1614 |
+
"2"
|
1615 |
+
],
|
1616 |
+
"person": [
|
1617 |
+
"3"
|
1618 |
+
]
|
1619 |
+
},
|
1620 |
+
"16af445362": {
|
1621 |
+
"airplane": [
|
1622 |
+
"1"
|
1623 |
+
]
|
1624 |
+
},
|
1625 |
+
"16afd538ad": {
|
1626 |
+
"parrot": [
|
1627 |
+
"1",
|
1628 |
+
"2"
|
1629 |
+
]
|
1630 |
+
},
|
1631 |
+
"16c3fa4d5d": {
|
1632 |
+
"sedan": [
|
1633 |
+
"1"
|
1634 |
+
]
|
1635 |
+
},
|
1636 |
+
"16d1d65c27": {
|
1637 |
+
"monkey": [
|
1638 |
+
"1"
|
1639 |
+
]
|
1640 |
+
},
|
1641 |
+
"16e8599e94": {
|
1642 |
+
"giant_panda": [
|
1643 |
+
"1"
|
1644 |
+
]
|
1645 |
+
},
|
1646 |
+
"16fe9fb444": {
|
1647 |
+
"motorbike": [
|
1648 |
+
"1"
|
1649 |
+
],
|
1650 |
+
"person": [
|
1651 |
+
"2"
|
1652 |
+
]
|
1653 |
+
},
|
1654 |
+
"1705796b02": {
|
1655 |
+
"train": [
|
1656 |
+
"1"
|
1657 |
+
]
|
1658 |
+
},
|
1659 |
+
"1724db7671": {
|
1660 |
+
"giant_panda": [
|
1661 |
+
"1"
|
1662 |
+
]
|
1663 |
+
},
|
1664 |
+
"17418e81ea": {
|
1665 |
+
"shark": [
|
1666 |
+
"1"
|
1667 |
+
]
|
1668 |
+
},
|
1669 |
+
"175169edbb": {
|
1670 |
+
"ape": [
|
1671 |
+
"1",
|
1672 |
+
"2"
|
1673 |
+
]
|
1674 |
+
},
|
1675 |
+
"17622326fd": {
|
1676 |
+
"lizard": [
|
1677 |
+
"1"
|
1678 |
+
]
|
1679 |
+
},
|
1680 |
+
"17656bae77": {
|
1681 |
+
"elephant": [
|
1682 |
+
"1"
|
1683 |
+
]
|
1684 |
+
},
|
1685 |
+
"17b0d94172": {
|
1686 |
+
"airplane": [
|
1687 |
+
"1"
|
1688 |
+
]
|
1689 |
+
},
|
1690 |
+
"17c220e4f6": {
|
1691 |
+
"giant_panda": [
|
1692 |
+
"1"
|
1693 |
+
]
|
1694 |
+
},
|
1695 |
+
"17c7bcd146": {
|
1696 |
+
"train": [
|
1697 |
+
"1"
|
1698 |
+
]
|
1699 |
+
},
|
1700 |
+
"17cb4afe89": {
|
1701 |
+
"tiger": [
|
1702 |
+
"1"
|
1703 |
+
]
|
1704 |
+
},
|
1705 |
+
"17cd79a434": {
|
1706 |
+
"squirrel": [
|
1707 |
+
"1"
|
1708 |
+
]
|
1709 |
+
},
|
1710 |
+
"17d18604c3": {
|
1711 |
+
"person": [
|
1712 |
+
"1",
|
1713 |
+
"2"
|
1714 |
+
]
|
1715 |
+
},
|
1716 |
+
"17d8ca1a37": {
|
1717 |
+
"owl": [
|
1718 |
+
"1"
|
1719 |
+
],
|
1720 |
+
"person": [
|
1721 |
+
"2"
|
1722 |
+
]
|
1723 |
+
},
|
1724 |
+
"17e33f4330": {
|
1725 |
+
"monkey": [
|
1726 |
+
"1"
|
1727 |
+
]
|
1728 |
+
},
|
1729 |
+
"17f7a6d805": {
|
1730 |
+
"snail": [
|
1731 |
+
"1"
|
1732 |
+
]
|
1733 |
+
},
|
1734 |
+
"180abc8378": {
|
1735 |
+
"owl": [
|
1736 |
+
"1"
|
1737 |
+
],
|
1738 |
+
"person": [
|
1739 |
+
"2"
|
1740 |
+
]
|
1741 |
+
},
|
1742 |
+
"183ba3d652": {
|
1743 |
+
"person": [
|
1744 |
+
"2"
|
1745 |
+
],
|
1746 |
+
"motorbike": [
|
1747 |
+
"3"
|
1748 |
+
]
|
1749 |
+
},
|
1750 |
+
"185bf64702": {
|
1751 |
+
"zebra": [
|
1752 |
+
"1",
|
1753 |
+
"2"
|
1754 |
+
]
|
1755 |
+
},
|
1756 |
+
"18913cc690": {
|
1757 |
+
"train": [
|
1758 |
+
"1"
|
1759 |
+
]
|
1760 |
+
},
|
1761 |
+
"1892651815": {
|
1762 |
+
"camel": [
|
1763 |
+
"1"
|
1764 |
+
]
|
1765 |
+
},
|
1766 |
+
"189ac8208a": {
|
1767 |
+
"giraffe": [
|
1768 |
+
"1",
|
1769 |
+
"2"
|
1770 |
+
]
|
1771 |
+
},
|
1772 |
+
"189b44e92c": {
|
1773 |
+
"zebra": [
|
1774 |
+
"1"
|
1775 |
+
]
|
1776 |
+
},
|
1777 |
+
"18ac264b76": {
|
1778 |
+
"person": [
|
1779 |
+
"2"
|
1780 |
+
]
|
1781 |
+
},
|
1782 |
+
"18b245ab49": {
|
1783 |
+
"penguin": [
|
1784 |
+
"1",
|
1785 |
+
"2",
|
1786 |
+
"3",
|
1787 |
+
"4"
|
1788 |
+
]
|
1789 |
+
},
|
1790 |
+
"18b5cebc34": {
|
1791 |
+
"mouse": [
|
1792 |
+
"1"
|
1793 |
+
]
|
1794 |
+
},
|
1795 |
+
"18bad52083": {
|
1796 |
+
"parrot": [
|
1797 |
+
"1",
|
1798 |
+
"2"
|
1799 |
+
]
|
1800 |
+
},
|
1801 |
+
"18bb5144d5": {
|
1802 |
+
"lizard": [
|
1803 |
+
"1"
|
1804 |
+
]
|
1805 |
+
},
|
1806 |
+
"18c6f205c5": {
|
1807 |
+
"person": [
|
1808 |
+
"1",
|
1809 |
+
"2",
|
1810 |
+
"3"
|
1811 |
+
]
|
1812 |
+
},
|
1813 |
+
"1903f9ea15": {
|
1814 |
+
"bird": [
|
1815 |
+
"1",
|
1816 |
+
"2",
|
1817 |
+
"3"
|
1818 |
+
]
|
1819 |
+
},
|
1820 |
+
"1917b209f2": {
|
1821 |
+
"cow": [
|
1822 |
+
"3",
|
1823 |
+
"4"
|
1824 |
+
],
|
1825 |
+
"person": [
|
1826 |
+
"1"
|
1827 |
+
],
|
1828 |
+
"horse": [
|
1829 |
+
"2"
|
1830 |
+
]
|
1831 |
+
},
|
1832 |
+
"191e74c01d": {
|
1833 |
+
"deer": [
|
1834 |
+
"1"
|
1835 |
+
]
|
1836 |
+
},
|
1837 |
+
"19367bb94e": {
|
1838 |
+
"fish": [
|
1839 |
+
"1",
|
1840 |
+
"2",
|
1841 |
+
"3"
|
1842 |
+
]
|
1843 |
+
},
|
1844 |
+
"193ffaa217": {
|
1845 |
+
"person": [
|
1846 |
+
"1",
|
1847 |
+
"2",
|
1848 |
+
"3"
|
1849 |
+
]
|
1850 |
+
},
|
1851 |
+
"19696b67d3": {
|
1852 |
+
"cow": [
|
1853 |
+
"1"
|
1854 |
+
]
|
1855 |
+
},
|
1856 |
+
"197f3ab6f3": {
|
1857 |
+
"giant_panda": [
|
1858 |
+
"1"
|
1859 |
+
]
|
1860 |
+
},
|
1861 |
+
"1981e763cc": {
|
1862 |
+
"sheep": [
|
1863 |
+
"1",
|
1864 |
+
"2"
|
1865 |
+
]
|
1866 |
+
},
|
1867 |
+
"198afe39ae": {
|
1868 |
+
"person": [
|
1869 |
+
"1"
|
1870 |
+
]
|
1871 |
+
},
|
1872 |
+
"19a6e62b9b": {
|
1873 |
+
"monkey": [
|
1874 |
+
"1",
|
1875 |
+
"2"
|
1876 |
+
]
|
1877 |
+
},
|
1878 |
+
"19b60d5335": {
|
1879 |
+
"hedgehog": [
|
1880 |
+
"1"
|
1881 |
+
]
|
1882 |
+
},
|
1883 |
+
"19c00c11f9": {
|
1884 |
+
"person": [
|
1885 |
+
"1"
|
1886 |
+
]
|
1887 |
+
},
|
1888 |
+
"19e061eb88": {
|
1889 |
+
"boat": [
|
1890 |
+
"1",
|
1891 |
+
"2"
|
1892 |
+
]
|
1893 |
+
},
|
1894 |
+
"19e8bc6178": {
|
1895 |
+
"dog": [
|
1896 |
+
"1"
|
1897 |
+
]
|
1898 |
+
},
|
1899 |
+
"19ee80dac6": {
|
1900 |
+
"person": [
|
1901 |
+
"1",
|
1902 |
+
"3",
|
1903 |
+
"4"
|
1904 |
+
]
|
1905 |
+
},
|
1906 |
+
"1a25a9170a": {
|
1907 |
+
"cow": [
|
1908 |
+
"1"
|
1909 |
+
],
|
1910 |
+
"person": [
|
1911 |
+
"2",
|
1912 |
+
"3"
|
1913 |
+
]
|
1914 |
+
},
|
1915 |
+
"1a359a6c1a": {
|
1916 |
+
"sheep": [
|
1917 |
+
"1"
|
1918 |
+
]
|
1919 |
+
},
|
1920 |
+
"1a3e87c566": {
|
1921 |
+
"frog": [
|
1922 |
+
"1"
|
1923 |
+
]
|
1924 |
+
},
|
1925 |
+
"1a5fe06b00": {
|
1926 |
+
"bus": [
|
1927 |
+
"1"
|
1928 |
+
]
|
1929 |
+
},
|
1930 |
+
"1a6c0fbd1e": {
|
1931 |
+
"person": [
|
1932 |
+
"1"
|
1933 |
+
]
|
1934 |
+
},
|
1935 |
+
"1a6f3b5a4b": {
|
1936 |
+
"sedan": [
|
1937 |
+
"3"
|
1938 |
+
]
|
1939 |
+
},
|
1940 |
+
"1a8afbad92": {
|
1941 |
+
"zebra": [
|
1942 |
+
"1",
|
1943 |
+
"2",
|
1944 |
+
"3"
|
1945 |
+
]
|
1946 |
+
},
|
1947 |
+
"1a8bdc5842": {
|
1948 |
+
"parrot": [
|
1949 |
+
"1",
|
1950 |
+
"2"
|
1951 |
+
]
|
1952 |
+
},
|
1953 |
+
"1a95752aca": {
|
1954 |
+
"duck": [
|
1955 |
+
"1",
|
1956 |
+
"2"
|
1957 |
+
]
|
1958 |
+
},
|
1959 |
+
"1a9c131cb7": {
|
1960 |
+
"ape": [
|
1961 |
+
"1",
|
1962 |
+
"2",
|
1963 |
+
"3"
|
1964 |
+
]
|
1965 |
+
},
|
1966 |
+
"1aa3da3ee3": {
|
1967 |
+
"sheep": [
|
1968 |
+
"1",
|
1969 |
+
"2",
|
1970 |
+
"3",
|
1971 |
+
"4"
|
1972 |
+
]
|
1973 |
+
},
|
1974 |
+
"1ab27ec7ea": {
|
1975 |
+
"deer": [
|
1976 |
+
"1"
|
1977 |
+
]
|
1978 |
+
},
|
1979 |
+
"1abf16d21d": {
|
1980 |
+
"turtle": [
|
1981 |
+
"1"
|
1982 |
+
]
|
1983 |
+
},
|
1984 |
+
"1acd0f993b": {
|
1985 |
+
"dog": [
|
1986 |
+
"1"
|
1987 |
+
],
|
1988 |
+
"person": [
|
1989 |
+
"3"
|
1990 |
+
]
|
1991 |
+
},
|
1992 |
+
"1ad202e499": {
|
1993 |
+
"lizard": [
|
1994 |
+
"1",
|
1995 |
+
"2"
|
1996 |
+
]
|
1997 |
+
},
|
1998 |
+
"1af8d2395d": {
|
1999 |
+
"person": [
|
2000 |
+
"1",
|
2001 |
+
"2"
|
2002 |
+
],
|
2003 |
+
"airplane": [
|
2004 |
+
"4"
|
2005 |
+
]
|
2006 |
+
},
|
2007 |
+
"1afd39a1fa": {
|
2008 |
+
"motorbike": [
|
2009 |
+
"2"
|
2010 |
+
]
|
2011 |
+
},
|
2012 |
+
"1b2d31306f": {
|
2013 |
+
"lizard": [
|
2014 |
+
"1"
|
2015 |
+
]
|
2016 |
+
},
|
2017 |
+
"1b3fa67f0e": {
|
2018 |
+
"airplane": [
|
2019 |
+
"1"
|
2020 |
+
]
|
2021 |
+
},
|
2022 |
+
"1b43fa74b4": {
|
2023 |
+
"owl": [
|
2024 |
+
"1",
|
2025 |
+
"2"
|
2026 |
+
]
|
2027 |
+
},
|
2028 |
+
"1b73ea9fc2": {
|
2029 |
+
"parrot": [
|
2030 |
+
"1"
|
2031 |
+
]
|
2032 |
+
},
|
2033 |
+
"1b7e8bb255": {
|
2034 |
+
"person": [
|
2035 |
+
"2"
|
2036 |
+
]
|
2037 |
+
},
|
2038 |
+
"1b8680f8cd": {
|
2039 |
+
"person": [
|
2040 |
+
"2",
|
2041 |
+
"3"
|
2042 |
+
]
|
2043 |
+
},
|
2044 |
+
"1b883843c0": {
|
2045 |
+
"person": [
|
2046 |
+
"1",
|
2047 |
+
"2"
|
2048 |
+
]
|
2049 |
+
},
|
2050 |
+
"1b8898785b": {
|
2051 |
+
"monkey": [
|
2052 |
+
"1",
|
2053 |
+
"2"
|
2054 |
+
]
|
2055 |
+
},
|
2056 |
+
"1b88ba1aa4": {
|
2057 |
+
"giant_panda": [
|
2058 |
+
"1"
|
2059 |
+
]
|
2060 |
+
},
|
2061 |
+
"1b96a498e5": {
|
2062 |
+
"ape": [
|
2063 |
+
"1"
|
2064 |
+
]
|
2065 |
+
},
|
2066 |
+
"1bbc4c274f": {
|
2067 |
+
"fish": [
|
2068 |
+
"2"
|
2069 |
+
]
|
2070 |
+
},
|
2071 |
+
"1bd87fe9ab": {
|
2072 |
+
"train": [
|
2073 |
+
"1"
|
2074 |
+
]
|
2075 |
+
},
|
2076 |
+
"1c4090c75b": {
|
2077 |
+
"whale": [
|
2078 |
+
"1"
|
2079 |
+
]
|
2080 |
+
},
|
2081 |
+
"1c41934f84": {
|
2082 |
+
"elephant": [
|
2083 |
+
"1",
|
2084 |
+
"2"
|
2085 |
+
]
|
2086 |
+
},
|
2087 |
+
"1c72b04b56": {
|
2088 |
+
"lion": [
|
2089 |
+
"1"
|
2090 |
+
]
|
2091 |
+
},
|
2092 |
+
"1c87955a3a": {
|
2093 |
+
"crocodile": [
|
2094 |
+
"1"
|
2095 |
+
],
|
2096 |
+
"turtle": [
|
2097 |
+
"2"
|
2098 |
+
]
|
2099 |
+
},
|
2100 |
+
"1c9f9eb792": {
|
2101 |
+
"person": [
|
2102 |
+
"2"
|
2103 |
+
]
|
2104 |
+
},
|
2105 |
+
"1ca240fede": {
|
2106 |
+
"train": [
|
2107 |
+
"1"
|
2108 |
+
]
|
2109 |
+
},
|
2110 |
+
"1ca5673803": {
|
2111 |
+
"person": [
|
2112 |
+
"1",
|
2113 |
+
"3"
|
2114 |
+
]
|
2115 |
+
},
|
2116 |
+
"1cada35274": {
|
2117 |
+
"duck": [
|
2118 |
+
"1"
|
2119 |
+
]
|
2120 |
+
},
|
2121 |
+
"1cb44b920d": {
|
2122 |
+
"eagle": [
|
2123 |
+
"1",
|
2124 |
+
"2"
|
2125 |
+
]
|
2126 |
+
},
|
2127 |
+
"1cd10e62be": {
|
2128 |
+
"leopard": [
|
2129 |
+
"1"
|
2130 |
+
]
|
2131 |
+
},
|
2132 |
+
"1d3087d5e5": {
|
2133 |
+
"fish": [
|
2134 |
+
"1",
|
2135 |
+
"2",
|
2136 |
+
"3",
|
2137 |
+
"4",
|
2138 |
+
"5"
|
2139 |
+
]
|
2140 |
+
},
|
2141 |
+
"1d3685150a": {
|
2142 |
+
"person": [
|
2143 |
+
"1",
|
2144 |
+
"3"
|
2145 |
+
]
|
2146 |
+
},
|
2147 |
+
"1d6ff083aa": {
|
2148 |
+
"person": [
|
2149 |
+
"1",
|
2150 |
+
"2"
|
2151 |
+
]
|
2152 |
+
}
|
2153 |
+
}
|