nikigoli commited on
Commit
a277bb8
1 Parent(s): 088e542

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. .github/workflows/update_space.yml +28 -0
  3. README.md +3 -8
  4. app.log +15 -0
  5. app.py +437 -0
  6. apple.jpg +0 -0
  7. back-icon.jpg +0 -0
  8. balloon.jpg +0 -0
  9. bird-1.JPG +0 -0
  10. bird-2.JPG +0 -0
  11. button-legend.jpg +0 -0
  12. cfg_app.py +118 -0
  13. checkpoint_best_regular.pth +3 -0
  14. checkpoints/bert-base-uncased/config.json +26 -0
  15. checkpoints/bert-base-uncased/model.safetensors +3 -0
  16. checkpoints/bert-base-uncased/special_tokens_map.json +7 -0
  17. checkpoints/bert-base-uncased/tokenizer.json +0 -0
  18. checkpoints/bert-base-uncased/tokenizer_config.json +55 -0
  19. checkpoints/bert-base-uncased/vocab.txt +0 -0
  20. checkpoints/groundingdino_swinb_cogcoor.pth +3 -0
  21. datasets/.odvg.py.swp +0 -0
  22. datasets/__init__.py +23 -0
  23. datasets/__pycache__/__init__.cpython-39.pyc +0 -0
  24. datasets/__pycache__/coco.cpython-39.pyc +0 -0
  25. datasets/__pycache__/data_util.cpython-39.pyc +0 -0
  26. datasets/__pycache__/transforms.cpython-39.pyc +0 -0
  27. datasets/coco.py +827 -0
  28. datasets/coco_eval.py +266 -0
  29. datasets/coco_panoptic.py +99 -0
  30. datasets/cocogrounding_eval.py +271 -0
  31. datasets/data_util.py +170 -0
  32. datasets/dataset.py +44 -0
  33. datasets/odvg.py +288 -0
  34. datasets/panoptic_eval.py +44 -0
  35. datasets/random_crop.py +135 -0
  36. datasets/sltransform.py +247 -0
  37. datasets/transforms.py +338 -0
  38. debug/config_args_all.json +308 -0
  39. debug/config_args_raw.json +30 -0
  40. debug/config_cfg.py +140 -0
  41. debug/info.txt +0 -0
  42. deer.jpg +0 -0
  43. egg.jpg +0 -0
  44. fish.jpg +0 -0
  45. flagged/Input Image/88e8b1b238c064836037/HVITa2016a_000011.JPG +0 -0
  46. flagged/log.csv +215 -0
  47. flagged/output/a686c9813d1a6c436907/image.webp +0 -0
  48. gradio-demo-img.png +3 -0
  49. green-pea.jpg +0 -0
  50. groundingdino/util/__init__.py +1 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ gradio-demo-img.png filter=lfs diff=lfs merge=lfs -text
.github/workflows/update_space.yml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Run Python script
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ build:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Checkout
14
+ uses: actions/checkout@v2
15
+
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v2
18
+ with:
19
+ python-version: '3.9'
20
+
21
+ - name: Install Gradio
22
+ run: python -m pip install gradio
23
+
24
+ - name: Log in to Hugging Face
25
+ run: python -c 'import huggingface_hub; huggingface_hub.login(token="${{ secrets.hf_token }}")'
26
+
27
+ - name: Deploy to Spaces
28
+ run: gradio deploy
README.md CHANGED
@@ -1,12 +1,7 @@
1
  ---
2
- title: CountGD Multi-Modal Open-World Counting
3
- emoji: 🏃
4
- colorFrom: blue
5
- colorTo: purple
6
  sdk: gradio
7
  sdk_version: 4.37.2
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: CountGD_Multi-Modal_Open-World_Counting
3
+ app_file: app.py
 
 
4
  sdk: gradio
5
  sdk_version: 4.37.2
 
 
6
  ---
7
+ nohup python -u app.py &
 
app.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ nohup: ignoring input
2
+ /home/niki/anaconda3/envs/gradio/lib/python3.9/site-packages/torch/functional.py:507: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at ../aten/src/ATen/native/TensorShape.cpp:3549.)
3
+ return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
4
+ Some weights of BertModel were not initialized from the model checkpoint at checkpoints/bert-base-uncased and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
5
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
6
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
7
+ To disable this warning, you can either:
8
+ - Avoid using `tokenizers` before the fork if possible
9
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
10
+ /home/niki/anaconda3/envs/gradio/lib/python3.9/site-packages/transformers/modeling_utils.py:977: FutureWarning: The `device` argument is deprecated and will be removed in v5 of Transformers.
11
+ warnings.warn(
12
+ /home/niki/anaconda3/envs/gradio/lib/python3.9/site-packages/torch/utils/checkpoint.py:460: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
13
+ warnings.warn(
14
+ /home/niki/anaconda3/envs/gradio/lib/python3.9/site-packages/torch/utils/checkpoint.py:90: UserWarning: None of the inputs have requires_grad=True. Gradients will be None
15
+ warnings.warn(
app.py ADDED
@@ -0,0 +1,437 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import copy
3
+ import random
4
+ import torch
5
+ import PIL
6
+ from PIL import Image, ImageDraw, ImageFont
7
+ import torchvision.transforms.functional as F
8
+ import numpy as np
9
+ import argparse
10
+ import json
11
+ import plotly.express as px
12
+ import pandas as pd
13
+ from util.slconfig import SLConfig, DictAction
14
+ from util.misc import nested_tensor_from_tensor_list
15
+ import datasets.transforms as T
16
+ import scipy.ndimage as ndimage
17
+ import matplotlib.pyplot as plt
18
+ from gradio_image_prompter import ImagePrompter
19
+ # https://github.com/PhyscalX/gradio-image-prompter/tree/main/backend/gradio_image_prompter/templates/component
20
+ import io
21
+ from enum import Enum
22
+ import os
23
+ os.environ["GRADIO_TEMP_DIR"] = os.path.join(os.getcwd(), "tmp")
24
+
25
+ class AppSteps(Enum):
26
+ JUST_TEXT = 1
27
+ TEXT_AND_EXEMPLARS = 2
28
+ JUST_EXEMPLARS = 3
29
+ FULL_APP = 4
30
+
31
+ CONF_THRESH = 0.23
32
+
33
+ # MODEL:
34
+ def get_args_parser():
35
+ """
36
+ Example eval command:
37
+
38
+ >> python main.py --output_dir ./gdino_test -c config/cfg_fsc147_vit_b_test.py --eval --datasets config/datasets_fsc147.json --pretrain_model_path ../checkpoints_and_logs/gdino_train/checkpoint_best_regular.pth --options text_encoder_type=checkpoints/bert-base-uncased --sam_tt_norm --crop
39
+ """
40
+ parser = argparse.ArgumentParser("Set transformer detector", add_help=False)
41
+ parser.add_argument(
42
+ "--options",
43
+ nargs="+",
44
+ action=DictAction,
45
+ help="override some settings in the used config, the key-value pair "
46
+ "in xxx=yyy format will be merged into config file.",
47
+ )
48
+
49
+ # dataset parameters
50
+ parser.add_argument("--remove_difficult", action="store_true")
51
+ parser.add_argument("--fix_size", action="store_true")
52
+
53
+ # training parameters
54
+ parser.add_argument("--note", default="", help="add some notes to the experiment")
55
+ parser.add_argument(
56
+ "--device", default="cuda", help="device to use for training / testing"
57
+ )
58
+ parser.add_argument("--resume", default="", help="resume from checkpoint")
59
+ parser.add_argument(
60
+ "--pretrain_model_path",
61
+ help="load from other checkpoint",
62
+ default="checkpoint_best_regular.pth",
63
+ )
64
+ parser.add_argument("--finetune_ignore", type=str, nargs="+")
65
+ parser.add_argument(
66
+ "--start_epoch", default=0, type=int, metavar="N", help="start epoch"
67
+ )
68
+ parser.add_argument("--eval", action="store_false")
69
+ parser.add_argument("--num_workers", default=8, type=int)
70
+ parser.add_argument("--test", action="store_true")
71
+ parser.add_argument("--debug", action="store_true")
72
+ parser.add_argument("--find_unused_params", action="store_true")
73
+ parser.add_argument("--save_results", action="store_true")
74
+ parser.add_argument("--save_log", action="store_true")
75
+
76
+ # distributed training parameters
77
+ parser.add_argument(
78
+ "--world_size", default=1, type=int, help="number of distributed processes"
79
+ )
80
+ parser.add_argument(
81
+ "--dist_url", default="env://", help="url used to set up distributed training"
82
+ )
83
+ parser.add_argument(
84
+ "--rank", default=0, type=int, help="number of distributed processes"
85
+ )
86
+ parser.add_argument(
87
+ "--local_rank", type=int, help="local rank for DistributedDataParallel"
88
+ )
89
+ parser.add_argument(
90
+ "--local-rank", type=int, help="local rank for DistributedDataParallel"
91
+ )
92
+ parser.add_argument("--amp", action="store_true", help="Train with mixed precision")
93
+ return parser
94
+
95
+ # Get counting model.
96
+ def build_model_and_transforms(args):
97
+ normalize = T.Compose(
98
+ [T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]
99
+ )
100
+ data_transform = T.Compose(
101
+ [
102
+ T.RandomResize([800], max_size=1333),
103
+ normalize,
104
+ ]
105
+ )
106
+ cfg = SLConfig.fromfile("cfg_app.py")
107
+ cfg.merge_from_dict({"text_encoder_type": "checkpoints/bert-base-uncased"})
108
+ cfg_dict = cfg._cfg_dict.to_dict()
109
+ args_vars = vars(args)
110
+ for k, v in cfg_dict.items():
111
+ if k not in args_vars:
112
+ setattr(args, k, v)
113
+ else:
114
+ raise ValueError("Key {} can used by args only".format(k))
115
+
116
+ device = torch.device(args.device)
117
+ # fix the seed for reproducibility
118
+ seed = 42
119
+ torch.manual_seed(seed)
120
+ np.random.seed(seed)
121
+ random.seed(seed)
122
+
123
+ # we use register to maintain models from catdet6 on.
124
+ from models.registry import MODULE_BUILD_FUNCS
125
+
126
+ assert args.modelname in MODULE_BUILD_FUNCS._module_dict
127
+
128
+ build_func = MODULE_BUILD_FUNCS.get(args.modelname)
129
+ model, _, _ = build_func(args)
130
+
131
+ model.to(device)
132
+
133
+ checkpoint = torch.load(args.pretrain_model_path, map_location="cpu")["model"]
134
+ model.load_state_dict(checkpoint, strict=False)
135
+
136
+ model.eval()
137
+
138
+ return model, data_transform
139
+
140
+
141
+ parser = argparse.ArgumentParser("Counting Application", parents=[get_args_parser()])
142
+ args = parser.parse_args()
143
+ model, transform = build_model_and_transforms(args)
144
+
145
+ examples = [
146
+ ["strawberry.jpg", "strawberry", {"image": "strawberry.jpg"}],
147
+ ["strawberry.jpg", "blueberry", {"image": "strawberry.jpg"}],
148
+ ["bird-1.JPG", "bird", {"image": "bird-2.JPG"}],
149
+ ["fish.jpg", "fish", {"image": "fish.jpg"}],
150
+ ["women.jpg", "girl", {"image": "women.jpg"}],
151
+ ["women.jpg", "boy", {"image": "women.jpg"}],
152
+ ["balloon.jpg", "hot air balloon", {"image": "balloon.jpg"}],
153
+ ["deer.jpg", "deer", {"image": "deer.jpg"}],
154
+ ["apple.jpg", "apple", {"image": "apple.jpg"}],
155
+ ["egg.jpg", "egg", {"image": "egg.jpg"}],
156
+ ["stamp.jpg", "stamp", {"image": "stamp.jpg"}],
157
+ ["green-pea.jpg", "green pea", {"image": "green-pea.jpg"}],
158
+ ["lego.jpg", "lego", {"image": "lego.jpg"}]
159
+ ]
160
+
161
+ # APP:
162
+ def get_box_inputs(prompts):
163
+ box_inputs = []
164
+ for prompt in prompts:
165
+ if prompt[2] == 2.0 and prompt[5] == 3.0:
166
+ box_inputs.append([prompt[0], prompt[1], prompt[3], prompt[4]])
167
+
168
+ return box_inputs
169
+
170
+ def get_ind_to_filter(text, word_ids, keywords):
171
+ if len(keywords) <= 0:
172
+ return list(range(len(word_ids)))
173
+ input_words = text.split()
174
+ keywords = keywords.split(",")
175
+ keywords = [keyword.strip() for keyword in keywords]
176
+
177
+ word_inds = []
178
+ for keyword in keywords:
179
+ if keyword in input_words:
180
+ if len(word_inds) <= 0:
181
+ ind = input_words.index(keyword)
182
+ word_inds.append(ind)
183
+ else:
184
+ ind = input_words.index(keyword, word_inds[-1])
185
+ word_inds.append(ind)
186
+ else:
187
+ raise Exception("Only specify keywords in the input text!")
188
+
189
+ inds_to_filter = []
190
+ for ind in range(len(word_ids)):
191
+ word_id = word_ids[ind]
192
+ if word_id in word_inds:
193
+ inds_to_filter.append(ind)
194
+
195
+ return inds_to_filter
196
+
197
+ def count(image, text, prompts, state):
198
+ print("state: " + str(state))
199
+ keywords = "" # do not handle this for now
200
+ # Handle no prompt case.
201
+ if prompts is None:
202
+ prompts = {"image": image, "points": []}
203
+ input_image, _ = transform(image, {"exemplars": torch.tensor([])})
204
+ input_image = input_image.unsqueeze(0).cuda()
205
+ exemplars = get_box_inputs(prompts["points"])
206
+ print(exemplars)
207
+ input_image_exemplars, exemplars = transform(prompts["image"], {"exemplars": torch.tensor(exemplars)})
208
+ input_image_exemplars = input_image_exemplars.unsqueeze(0).cuda()
209
+ exemplars = [exemplars["exemplars"].cuda()]
210
+
211
+ with torch.no_grad():
212
+ model_output = model(
213
+ nested_tensor_from_tensor_list(input_image),
214
+ nested_tensor_from_tensor_list(input_image_exemplars),
215
+ exemplars,
216
+ [torch.tensor([0]).cuda() for _ in range(len(input_image))],
217
+ captions=[text + " ."] * len(input_image),
218
+ )
219
+
220
+ ind_to_filter = get_ind_to_filter(text, model_output["token"][0].word_ids, keywords)
221
+ print(model_output["token"][0].tokens)
222
+ print(ind_to_filter)
223
+ print(model_output["pred_logits"].sigmoid()[0].shape)
224
+ logits = model_output["pred_logits"].sigmoid()[0][:, ind_to_filter]
225
+ print(logits.shape)
226
+ boxes = model_output["pred_boxes"][0]
227
+ if len(keywords.strip()) > 0:
228
+ box_mask = (logits > CONF_THRESH).sum(dim=-1) == len(ind_to_filter)
229
+ else:
230
+ box_mask = logits.max(dim=-1).values > CONF_THRESH
231
+ logits = logits[box_mask, :].cpu().numpy()
232
+ boxes = boxes[box_mask, :].cpu().numpy()
233
+
234
+ # Plot results.
235
+ (w, h) = image.size
236
+ det_map = np.zeros((h, w))
237
+ det_map[(h * boxes[:, 1]).astype(int), (w * boxes[:, 0]).astype(int)] = 1
238
+ det_map = ndimage.gaussian_filter(
239
+ det_map, sigma=(w // 200, w // 200), order=0
240
+ )
241
+ plt.imshow(image)
242
+ plt.imshow(det_map[None, :].transpose(1, 2, 0), 'jet', interpolation='none', alpha=0.7)
243
+ plt.axis('off')
244
+ img_buf = io.BytesIO()
245
+ plt.savefig(img_buf, format='png', bbox_inches='tight')
246
+
247
+ output_img = Image.open(img_buf)
248
+
249
+ if AppSteps.TEXT_AND_EXEMPLARS not in state:
250
+ exemplar_image = ImagePrompter(type='pil', label='Visual Exemplar Image', value=prompts, interactive=True, visible=True)
251
+ new_submit_btn = gr.Button("Count", variant="primary", interactive=False)
252
+ state = [AppSteps.JUST_TEXT, AppSteps.TEXT_AND_EXEMPLARS]
253
+ main_instructions_comp = gr.Markdown(visible=False)
254
+ step_3 = gr.Tab(visible=False)
255
+ elif AppSteps.FULL_APP not in state:
256
+ exemplar_image = ImagePrompter(type='pil', label='Visual Exemplar Image', value=prompts, interactive=True, visible=True)
257
+ new_submit_btn = submit_btn
258
+ state = [AppSteps.JUST_TEXT, AppSteps.TEXT_AND_EXEMPLARS, AppSteps.FULL_APP]
259
+ main_instructions_comp = gr.Markdown(visible=True)
260
+ step_3 = gr.Tab(visible=True)
261
+ else:
262
+ exemplar_image = ImagePrompter(type='pil', label='Visual Exemplar Image', value=prompts, interactive=True, visible=True)
263
+ new_submit_btn = submit_btn
264
+ main_instructions_comp = gr.Markdown(visible=True)
265
+ step_3 = gr.Tab(visible=True)
266
+
267
+ out_label = "Detected instances predicted with"
268
+ if len(text.strip()) > 0:
269
+ out_label += " text"
270
+ if exemplars[0].size()[0] == 1:
271
+ out_label += " and " + str(exemplars[0].size()[0]) + " visual exemplar."
272
+ elif exemplars[0].size()[0] > 1:
273
+ out_label += " and " + str(exemplars[0].size()[0]) + " visual exemplars."
274
+ else:
275
+ out_label += "."
276
+ elif exemplars[0].size()[0] > 0:
277
+ if exemplars[0].size()[0] == 1:
278
+ out_label += " " + str(exemplars[0].size()[0]) + " visual exemplar."
279
+ else:
280
+ out_label += " " + str(exemplars[0].size()[0]) + " visual exemplars."
281
+ else:
282
+ out_label = "Nothing specified to detect."
283
+ return (gr.Image(output_img, visible=True, label=out_label, show_label=True), gr.Number(label="Predicted Count", visible=True, value=boxes.shape[0]), new_submit_btn, gr.Tab(visible=True), step_3, state)
284
+
285
+ def count_main(image, text, prompts):
286
+ keywords = "" # do not handle this for now
287
+ # Handle no prompt case.
288
+ if prompts is None:
289
+ prompts = {"image": image, "points": []}
290
+ input_image, _ = transform(image, {"exemplars": torch.tensor([])})
291
+ input_image = input_image.unsqueeze(0).cuda()
292
+ exemplars = get_box_inputs(prompts["points"])
293
+ print(exemplars)
294
+ input_image_exemplars, exemplars = transform(prompts["image"], {"exemplars": torch.tensor(exemplars)})
295
+ input_image_exemplars = input_image_exemplars.unsqueeze(0).cuda()
296
+ exemplars = [exemplars["exemplars"].cuda()]
297
+
298
+ with torch.no_grad():
299
+ model_output = model(
300
+ nested_tensor_from_tensor_list(input_image),
301
+ nested_tensor_from_tensor_list(input_image_exemplars),
302
+ exemplars,
303
+ [torch.tensor([0]).cuda() for _ in range(len(input_image))],
304
+ captions=[text + " ."] * len(input_image),
305
+ )
306
+
307
+ ind_to_filter = get_ind_to_filter(text, model_output["token"][0].word_ids, keywords)
308
+ print(model_output["token"][0].tokens)
309
+ print(ind_to_filter)
310
+ print(model_output["pred_logits"].sigmoid()[0].shape)
311
+ logits = model_output["pred_logits"].sigmoid()[0][:, ind_to_filter]
312
+ print(logits.shape)
313
+ boxes = model_output["pred_boxes"][0]
314
+ if len(keywords.strip()) > 0:
315
+ box_mask = (logits > CONF_THRESH).sum(dim=-1) == len(ind_to_filter)
316
+ else:
317
+ box_mask = logits.max(dim=-1).values > CONF_THRESH
318
+ logits = logits[box_mask, :].cpu().numpy()
319
+ boxes = boxes[box_mask, :].cpu().numpy()
320
+
321
+ # Plot results.
322
+ (w, h) = image.size
323
+ det_map = np.zeros((h, w))
324
+ det_map[(h * boxes[:, 1]).astype(int), (w * boxes[:, 0]).astype(int)] = 1
325
+ det_map = ndimage.gaussian_filter(
326
+ det_map, sigma=(w // 200, w // 200), order=0
327
+ )
328
+ plt.imshow(image)
329
+ plt.imshow(det_map[None, :].transpose(1, 2, 0), 'jet', interpolation='none', alpha=0.7)
330
+ plt.axis('off')
331
+ img_buf = io.BytesIO()
332
+ plt.savefig(img_buf, format='png', bbox_inches='tight')
333
+
334
+ output_img = Image.open(img_buf)
335
+
336
+ out_label = "Detected instances predicted with"
337
+ if len(text.strip()) > 0:
338
+ out_label += " text"
339
+ if exemplars[0].size()[0] == 1:
340
+ out_label += " and " + str(exemplars[0].size()[0]) + " visual exemplar."
341
+ elif exemplars[0].size()[0] > 1:
342
+ out_label += " and " + str(exemplars[0].size()[0]) + " visual exemplars."
343
+ else:
344
+ out_label += "."
345
+ elif exemplars[0].size()[0] > 0:
346
+ if exemplars[0].size()[0] == 1:
347
+ out_label += " " + str(exemplars[0].size()[0]) + " visual exemplar."
348
+ else:
349
+ out_label += " " + str(exemplars[0].size()[0]) + " visual exemplars."
350
+ else:
351
+ out_label = "Nothing specified to detect."
352
+ return (gr.Image(output_img, visible=True, label=out_label, show_label=True), gr.Number(label="Predicted Count", visible=True, value=boxes.shape[0]))
353
+
354
+ def remove_label(image):
355
+ return gr.Image(show_label=False)
356
+
357
+ def check_submit_btn(exemplar_image_prompts, state):
358
+ if AppSteps.TEXT_AND_EXEMPLARS not in state or len(state) == 3:
359
+ return gr.Button("Count", variant="primary", interactive=True)
360
+ elif exemplar_image_prompts is None:
361
+ return gr.Button("Count", variant="primary", interactive=False)
362
+ elif len(get_box_inputs(exemplar_image_prompts["points"])) > 0:
363
+ return gr.Button("Count", variant="primary", interactive=True)
364
+ else:
365
+ return gr.Button("Count", variant="primary", interactive=False)
366
+
367
+ exemplar_img_drawing_instructions_part_1 = '<p><strong>Congrats, you have counted the strawberries!</strong> You can also draw a box around the object you want to count. <strong>Click and drag the mouse on the image below to draw a box around one of the strawberries.</strong> You can click the back button in the top right of the image to delete the box and try again.<img src="file/button-legend.jpg" width="750"></p>'
368
+ exemplar_img_drawing_instructions_part_2 = '<p>The boxes you draw are called \"visual exemplars,\" image examples of what you want the model to count. You can add more boxes around more examples of strawberries in the image above to increase the accuracy of the predicted count. You can also use strawberries from a different image to specify the object to count by uploading or pasting a new image above and drawing boxes around strawberries in it.</p>'
369
+ instructions_main = """
370
+ # How to Use the App
371
+ As shown earlier, there are 3 ways to specify the object to count: (1) with text only, (2) with text and any number of boxes (i.e., "visual exemplars") around example objects, and (3) with visual exemplars only. What is being used is indicated in the top left of the output image. How to try each case is detailed below.
372
+
373
+ <ol>
374
+ <li><strong>Text Only: </strong> Only provide text describing the object to count in the textbox titled "What would you like to count?" Delete all boxes drawn on the visual exemplar image.</li>
375
+ <li><strong>Text + Visual Exemplars: </strong> Provide text describing the object to count in the textbox titled "What would you like to count?" and draw at least one box around an example object in the visual exemplar image.</li>
376
+ <li><strong>Visual Exemplars Only: </strong> Remove all text in the textbox titled "What would you like to count?" and draw at least one box around an example object in the visual exemplar image.</li>
377
+ </ol>
378
+
379
+ ## Click on the "App" tab at the top of the screen to exit the tutorial and start using the main app!
380
+ """
381
+
382
+ with gr.Blocks(title="CountGD: Multi-Modal Open-World Counting", theme="soft", head="""<meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=1">""") as demo:
383
+ state = gr.State(value=[AppSteps.JUST_TEXT])
384
+ with gr.Tab("Tutorial"):
385
+ with gr.Row():
386
+ with gr.Column():
387
+ with gr.Tab("Step 3", visible=False) as step_3:
388
+ main_instructions = gr.Markdown(instructions_main)
389
+ with gr.Tab("Step 2", visible=False) as step_2:
390
+ gr.Markdown(exemplar_img_drawing_instructions_part_1)
391
+ exemplar_image = ImagePrompter(type='pil', label='Visual Exemplar Image', show_label=True, value={"image": "strawberry.jpg", "points": []}, interactive=True)
392
+ with gr.Accordion("Open for Further Information", open=False):
393
+ gr.Markdown(exemplar_img_drawing_instructions_part_2)
394
+ with gr.Tab("Step 1", visible=True) as step_1:
395
+ input_image = gr.Image(type='pil', label='Input Image', show_label='True', value="strawberry.jpg", interactive=False, width="30vw")
396
+ gr.Markdown('# Click "Count" to count the strawberries.')
397
+
398
+ with gr.Column():
399
+ with gr.Tab("Output Image"):
400
+ detected_instances = gr.Image(label="Detected Instances", show_label='True', interactive=False, visible=True, width="40vw")
401
+
402
+ with gr.Row():
403
+ input_text = gr.Textbox(label="What would you like to count?", value="strawberry", interactive=True)
404
+ pred_count = gr.Number(label="Predicted Count", visible=False)
405
+ submit_btn = gr.Button("Count", variant="primary", interactive=True)
406
+
407
+ submit_btn.click(fn=remove_label, inputs=[detected_instances], outputs=[detected_instances]).then(fn=count, inputs=[input_image, input_text, exemplar_image, state], outputs=[detected_instances, pred_count, submit_btn, step_2, step_3, state])
408
+ exemplar_image.change(check_submit_btn, inputs=[exemplar_image, state], outputs=[submit_btn])
409
+ with gr.Tab("App", visible=True) as main_app:
410
+
411
+ gr.Markdown(
412
+ """
413
+ # <center>CountGD: Multi-Modal Open-World Counting
414
+ <center><h3>Count objects with text, visual exemplars, or both together.</h3>
415
+ <h3>Scroll down to try more examples</h3>
416
+ <h3><a href='https://github.com/niki-amini-naieni/CountGD/' target='_blank' rel='noopener'>[paper]</a>
417
+ <a href='https://github.com/niki-amini-naieni/CountGD/' target='_blank' rel='noopener'>[code]</a></h3>
418
+ Limitation: this app does not support fine-grained counting based on attributes or visual grounding inputs yet.</center>
419
+ """
420
+ )
421
+
422
+ with gr.Row():
423
+ with gr.Column():
424
+ input_image_main = gr.Image(type='pil', label='Input Image', show_label='True', value="strawberry.jpg", interactive=True)
425
+ input_text_main = gr.Textbox(label="What would you like to count?", placeholder="", value="strawberry")
426
+ exemplar_image_main = ImagePrompter(type='pil', label='Visual Exemplar Image', show_label=True, value={"image": "strawberry.jpg", "points": []}, interactive=True)
427
+ with gr.Column():
428
+ detected_instances_main = gr.Image(label="Detected Instances", show_label='True', interactive=False)
429
+ pred_count_main = gr.Number(label="Predicted Count")
430
+ submit_btn_main = gr.Button("Count", variant="primary")
431
+ clear_btn_main = gr.ClearButton(variant="secondary")
432
+ gr.Examples(label="Examples: click on a row to load the example. Add visual exemplars by drawing boxes on the loaded \"Visual Exemplar Image.\"", examples=examples, inputs=[input_image_main, input_text_main, exemplar_image_main])
433
+ submit_btn_main.click(fn=remove_label, inputs=[detected_instances_main], outputs=[detected_instances_main]).then(fn=count_main, inputs=[input_image_main, input_text_main, exemplar_image_main], outputs=[detected_instances_main, pred_count_main])
434
+ clear_btn_main.add([input_image_main, input_text_main, exemplar_image_main, detected_instances_main, pred_count_main])
435
+
436
+
437
+ demo.launch(share=True, allowed_paths=['back-icon.jpg', 'paste-icon.jpg', 'upload-icon.jpg', 'button-legend.jpg'])
apple.jpg ADDED
back-icon.jpg ADDED
balloon.jpg ADDED
bird-1.JPG ADDED
bird-2.JPG ADDED
button-legend.jpg ADDED
cfg_app.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ data_aug_scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800]
2
+ data_aug_max_size = 1333
3
+ data_aug_scales2_resize = [400, 500, 600]
4
+ data_aug_scales2_crop = [384, 600]
5
+ data_aug_scale_overlap = None
6
+ batch_size = 4
7
+ modelname = 'groundingdino'
8
+ backbone = "swin_B_384_22k"
9
+ position_embedding = 'sine'
10
+ pe_temperatureH = 20
11
+ pe_temperatureW = 20
12
+ return_interm_indices = [1, 2, 3]
13
+ enc_layers = 6
14
+ dec_layers = 6
15
+ pre_norm = False
16
+ dim_feedforward = 2048
17
+ hidden_dim = 256
18
+ dropout = 0.0
19
+ nheads = 8
20
+ num_queries = 900
21
+ query_dim = 4
22
+ num_patterns = 0
23
+ num_feature_levels = 4
24
+ enc_n_points = 4
25
+ dec_n_points = 4
26
+ two_stage_type = 'standard'
27
+ two_stage_bbox_embed_share = False
28
+ two_stage_class_embed_share = False
29
+ transformer_activation = 'relu'
30
+ dec_pred_bbox_embed_share = True
31
+ dn_box_noise_scale = 1.0
32
+ dn_label_noise_ratio = 0.5
33
+ dn_label_coef = 1.0
34
+ dn_bbox_coef = 1.0
35
+ embed_init_tgt = True
36
+ dn_labelbook_size = 91
37
+ max_text_len = 256
38
+ text_encoder_type = "bert-base-uncased"
39
+ use_text_enhancer = True
40
+ use_fusion_layer = True
41
+ use_checkpoint = True
42
+ use_transformer_ckpt = True
43
+ use_text_cross_attention = True
44
+ text_dropout = 0.0
45
+ fusion_dropout = 0.0
46
+ fusion_droppath = 0.1
47
+ sub_sentence_present = True
48
+ max_labels = 90 # pos + neg
49
+ lr = 0.0001 # base learning rate
50
+ backbone_freeze_keywords = None # only for gdino backbone
51
+ freeze_keywords = ['backbone.0', 'bert'] # for whole model, e.g. ['backbone.0', 'bert'] for freeze visual encoder and text encoder
52
+ lr_backbone = 1e-05 # specific learning rate
53
+ lr_backbone_names = ['backbone.0', 'bert']
54
+ lr_linear_proj_mult = 1e-05
55
+ lr_linear_proj_names = ['ref_point_head', 'sampling_offsets']
56
+ weight_decay = 0.0001
57
+ param_dict_type = 'ddetr_in_mmdet'
58
+ ddetr_lr_param = False
59
+ epochs = 30
60
+ lr_drop = 10
61
+ save_checkpoint_interval = 10
62
+ clip_max_norm = 0.1
63
+ onecyclelr = False
64
+ multi_step_lr = False
65
+ lr_drop_list = [10, 20]
66
+ frozen_weights = None
67
+ dilation = False
68
+ pdetr3_bbox_embed_diff_each_layer = False
69
+ pdetr3_refHW = -1
70
+ random_refpoints_xy = False
71
+ fix_refpoints_hw = -1
72
+ dabdetr_yolo_like_anchor_update = False
73
+ dabdetr_deformable_encoder = False
74
+ dabdetr_deformable_decoder = False
75
+ use_deformable_box_attn = False
76
+ box_attn_type = 'roi_align'
77
+ dec_layer_number = None
78
+ decoder_layer_noise = False
79
+ dln_xy_noise = 0.2
80
+ dln_hw_noise = 0.2
81
+ add_channel_attention = False
82
+ add_pos_value = False
83
+ two_stage_pat_embed = 0
84
+ two_stage_add_query_num = 0
85
+ two_stage_learn_wh = False
86
+ two_stage_default_hw = 0.05
87
+ two_stage_keep_all_tokens = False
88
+ num_select = 900
89
+ batch_norm_type = 'FrozenBatchNorm2d'
90
+ masks = False
91
+ aux_loss = True
92
+ set_cost_class = 5.0
93
+ set_cost_bbox = 1.0
94
+ set_cost_giou = 0.0
95
+ cls_loss_coef = 5.0
96
+ bbox_loss_coef = 1.0
97
+ giou_loss_coef = 0.0
98
+ enc_loss_coef = 1.0
99
+ interm_loss_coef = 1.0
100
+ no_interm_box_loss = False
101
+ mask_loss_coef = 1.0
102
+ dice_loss_coef = 1.0
103
+ focal_alpha = 0.25
104
+ focal_gamma = 2.0
105
+ decoder_sa_type = 'sa'
106
+ matcher_type = 'HungarianMatcher'
107
+ decoder_module_seq = ['sa', 'ca', 'ffn']
108
+ nms_iou_threshold = -1
109
+ dec_pred_class_embed_share = True
110
+ match_unstable_error = True
111
+ use_detached_boxes_dec_out = False
112
+ dn_scalar = 100
113
+
114
+ box_threshold = 0.23
115
+ text_threshold = 0
116
+ use_coco_eval = False
117
+ label_list = ['alcohol bottle', 'baguette roll', 'ball', 'banana', 'bead', 'bee', 'birthday candle', 'biscuit', 'boat', 'bottle', 'bowl', 'box', 'bread roll', 'brick', 'buffalo', 'bun', 'calamari ring', 'can', 'candle', 'cap', 'car', 'cartridge', 'cassette', 'cement bag', 'cereal', 'chewing gum piece', 'chopstick', 'clam', 'coffee bean', 'coin', 'cotton ball', 'cow', 'crane', 'crayon', 'croissant', 'crow', 'cup', 'cupcake', 'cupcake holder', 'fish', 'gemstone', 'go game piece', 'goat', 'goldfish snack', 'goose', 'ice cream', 'ice cream cone', 'instant noodle', 'jade stone', 'jeans', 'kidney bean', 'kitchen towel', 'lighter', 'lipstick', 'm&m piece', 'macaron', 'match', 'meat skewer', 'mini blind', 'mosaic tile', 'naan bread', 'nail', 'nut', 'onion ring', 'orange', 'pearl', 'pen', 'pencil', 'penguin', 'pepper', 'person', 'pigeon', 'plate', 'polka dot tile', 'potato', 'rice bag', 'roof tile', 'screw', 'shoe', 'spoon', 'spring roll', 'stair', 'stapler pin', 'straw', 'supermarket shelf', 'swan', 'tomato', 'watermelon', 'window', 'zebra']
118
+ val_label_list = ["apple", "candy piece", "carrom board piece", "cashew nut", "comic book", "crab cake", "deer", "egg", "elephant", "finger food", "green pea", "hot air balloon", "keyboard key", "lego", "marble", "marker", "nail polish", "potato chip", "red bean", "round dessert", "sauce bottle", "sea shell", "sheep", "ski", "stamp", "sticky note", "strawberry", "sunglasses", "tree log", "watch"]
checkpoint_best_regular.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1bab864b17db345b4c6e3aaabb5765bc2c0a90d0bc8defb5e664a74a50aa126
3
+ size 1250122522
checkpoints/bert-base-uncased/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.39.1",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 30522
26
+ }
checkpoints/bert-base-uncased/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5875f83030335d194f35b15a32e7f4e654aa302aa83af032a3f36d035dcaf8af
3
+ size 435588776
checkpoints/bert-base-uncased/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
checkpoints/bert-base-uncased/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/bert-base-uncased/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "BertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
checkpoints/bert-base-uncased/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/groundingdino_swinb_cogcoor.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46270f7a822e6906b655b729c90613e48929d0f2bb8b9b76fd10a856f3ac6ab7
3
+ size 938057991
datasets/.odvg.py.swp ADDED
Binary file (24.6 kB). View file
 
datasets/__init__.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2
+ import torch.utils.data
3
+ import torchvision
4
+ from .coco import build as build_coco
5
+
6
+
7
+ def get_coco_api_from_dataset(dataset):
8
+ for _ in range(10):
9
+ # if isinstance(dataset, torchvision.datasets.CocoDetection):
10
+ # break
11
+ if isinstance(dataset, torch.utils.data.Subset):
12
+ dataset = dataset.dataset
13
+ if isinstance(dataset, torchvision.datasets.CocoDetection):
14
+ return dataset.coco
15
+
16
+
17
+ def build_dataset(image_set, args, datasetinfo):
18
+ if datasetinfo["dataset_mode"] == 'coco':
19
+ return build_coco(image_set, args, datasetinfo)
20
+ if datasetinfo["dataset_mode"] == 'odvg':
21
+ from .odvg import build_odvg
22
+ return build_odvg(image_set, args, datasetinfo)
23
+ raise ValueError(f'dataset {args.dataset_file} not supported')
datasets/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (862 Bytes). View file
 
datasets/__pycache__/coco.cpython-39.pyc ADDED
Binary file (20.7 kB). View file
 
datasets/__pycache__/data_util.cpython-39.pyc ADDED
Binary file (4.52 kB). View file
 
datasets/__pycache__/transforms.cpython-39.pyc ADDED
Binary file (10.2 kB). View file
 
datasets/coco.py ADDED
@@ -0,0 +1,827 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2
+ """
3
+ COCO dataset which returns image_id for evaluation.
4
+
5
+ Mostly copy-paste from https://github.com/pytorch/vision/blob/13b35ff/references/detection/coco_utils.py
6
+ """
7
+ if __name__ == "__main__":
8
+ # for debug only
9
+ import os, sys
10
+
11
+ sys.path.append(os.path.dirname(sys.path[0]))
12
+ from torchvision.datasets.vision import VisionDataset
13
+
14
+ import json
15
+ from pathlib import Path
16
+ import random
17
+ import os
18
+ from typing import Any, Callable, List, Optional, Tuple
19
+
20
+ from PIL import Image
21
+
22
+ import torch
23
+ import torch.utils.data
24
+ import torchvision
25
+ from pycocotools import mask as coco_mask
26
+
27
+ from datasets.data_util import preparing_dataset
28
+ import datasets.transforms as T
29
+ from util.box_ops import box_cxcywh_to_xyxy, box_iou
30
+
31
+ __all__ = ["build"]
32
+
33
+
34
+ class label2compat:
35
+ def __init__(self) -> None:
36
+ self.category_map_str = {
37
+ "1": 1,
38
+ "2": 2,
39
+ "3": 3,
40
+ "4": 4,
41
+ "5": 5,
42
+ "6": 6,
43
+ "7": 7,
44
+ "8": 8,
45
+ "9": 9,
46
+ "10": 10,
47
+ "11": 11,
48
+ "13": 12,
49
+ "14": 13,
50
+ "15": 14,
51
+ "16": 15,
52
+ "17": 16,
53
+ "18": 17,
54
+ "19": 18,
55
+ "20": 19,
56
+ "21": 20,
57
+ "22": 21,
58
+ "23": 22,
59
+ "24": 23,
60
+ "25": 24,
61
+ "27": 25,
62
+ "28": 26,
63
+ "31": 27,
64
+ "32": 28,
65
+ "33": 29,
66
+ "34": 30,
67
+ "35": 31,
68
+ "36": 32,
69
+ "37": 33,
70
+ "38": 34,
71
+ "39": 35,
72
+ "40": 36,
73
+ "41": 37,
74
+ "42": 38,
75
+ "43": 39,
76
+ "44": 40,
77
+ "46": 41,
78
+ "47": 42,
79
+ "48": 43,
80
+ "49": 44,
81
+ "50": 45,
82
+ "51": 46,
83
+ "52": 47,
84
+ "53": 48,
85
+ "54": 49,
86
+ "55": 50,
87
+ "56": 51,
88
+ "57": 52,
89
+ "58": 53,
90
+ "59": 54,
91
+ "60": 55,
92
+ "61": 56,
93
+ "62": 57,
94
+ "63": 58,
95
+ "64": 59,
96
+ "65": 60,
97
+ "67": 61,
98
+ "70": 62,
99
+ "72": 63,
100
+ "73": 64,
101
+ "74": 65,
102
+ "75": 66,
103
+ "76": 67,
104
+ "77": 68,
105
+ "78": 69,
106
+ "79": 70,
107
+ "80": 71,
108
+ "81": 72,
109
+ "82": 73,
110
+ "84": 74,
111
+ "85": 75,
112
+ "86": 76,
113
+ "87": 77,
114
+ "88": 78,
115
+ "89": 79,
116
+ "90": 80,
117
+ }
118
+ self.category_map = {int(k): v for k, v in self.category_map_str.items()}
119
+
120
+ def __call__(self, target, img=None):
121
+ labels = target["labels"]
122
+ res = torch.zeros(labels.shape, dtype=labels.dtype)
123
+ for idx, item in enumerate(labels):
124
+ res[idx] = self.category_map[item.item()] - 1
125
+ target["label_compat"] = res
126
+ if img is not None:
127
+ return target, img
128
+ else:
129
+ return target
130
+
131
+
132
+ class label_compat2onehot:
133
+ def __init__(self, num_class=80, num_output_objs=1):
134
+ self.num_class = num_class
135
+ self.num_output_objs = num_output_objs
136
+ if num_output_objs != 1:
137
+ raise DeprecationWarning(
138
+ "num_output_objs!=1, which is only used for comparison"
139
+ )
140
+
141
+ def __call__(self, target, img=None):
142
+ labels = target["label_compat"]
143
+ place_dict = {k: 0 for k in range(self.num_class)}
144
+ if self.num_output_objs == 1:
145
+ res = torch.zeros(self.num_class)
146
+ for i in labels:
147
+ itm = i.item()
148
+ res[itm] = 1.0
149
+ else:
150
+ # compat with baseline
151
+ res = torch.zeros(self.num_class, self.num_output_objs)
152
+ for i in labels:
153
+ itm = i.item()
154
+ res[itm][place_dict[itm]] = 1.0
155
+ place_dict[itm] += 1
156
+ target["label_compat_onehot"] = res
157
+ if img is not None:
158
+ return target, img
159
+ else:
160
+ return target
161
+
162
+
163
+ class box_label_catter:
164
+ def __init__(self):
165
+ pass
166
+
167
+ def __call__(self, target, img=None):
168
+ labels = target["label_compat"]
169
+ boxes = target["boxes"]
170
+ box_label = torch.cat((boxes, labels.unsqueeze(-1)), 1)
171
+ target["box_label"] = box_label
172
+ if img is not None:
173
+ return target, img
174
+ else:
175
+ return target
176
+
177
+
178
+ class RandomSelectBoxlabels:
179
+ def __init__(
180
+ self,
181
+ num_classes,
182
+ leave_one_out=False,
183
+ blank_prob=0.8,
184
+ prob_first_item=0.0,
185
+ prob_random_item=0.0,
186
+ prob_last_item=0.8,
187
+ prob_stop_sign=0.2,
188
+ ) -> None:
189
+ self.num_classes = num_classes
190
+ self.leave_one_out = leave_one_out
191
+ self.blank_prob = blank_prob
192
+
193
+ self.set_state(
194
+ prob_first_item, prob_random_item, prob_last_item, prob_stop_sign
195
+ )
196
+
197
+ def get_state(self):
198
+ return [
199
+ self.prob_first_item,
200
+ self.prob_random_item,
201
+ self.prob_last_item,
202
+ self.prob_stop_sign,
203
+ ]
204
+
205
+ def set_state(
206
+ self, prob_first_item, prob_random_item, prob_last_item, prob_stop_sign
207
+ ):
208
+ sum_prob = prob_first_item + prob_random_item + prob_last_item + prob_stop_sign
209
+ assert sum_prob - 1 < 1e-6, (
210
+ f"Sum up all prob = {sum_prob}. prob_first_item:{prob_first_item}"
211
+ + f"prob_random_item:{prob_random_item}, prob_last_item:{prob_last_item}"
212
+ + f"prob_stop_sign:{prob_stop_sign}"
213
+ )
214
+
215
+ self.prob_first_item = prob_first_item
216
+ self.prob_random_item = prob_random_item
217
+ self.prob_last_item = prob_last_item
218
+ self.prob_stop_sign = prob_stop_sign
219
+
220
+ def sample_for_pred_first_item(self, box_label: torch.FloatTensor):
221
+ box_label_known = torch.Tensor(0, 5)
222
+ box_label_unknown = box_label
223
+ return box_label_known, box_label_unknown
224
+
225
+ def sample_for_pred_random_item(self, box_label: torch.FloatTensor):
226
+ n_select = int(random.random() * box_label.shape[0])
227
+ box_label = box_label[torch.randperm(box_label.shape[0])]
228
+ box_label_known = box_label[:n_select]
229
+ box_label_unknown = box_label[n_select:]
230
+ return box_label_known, box_label_unknown
231
+
232
+ def sample_for_pred_last_item(self, box_label: torch.FloatTensor):
233
+ box_label_perm = box_label[torch.randperm(box_label.shape[0])]
234
+ known_label_list = []
235
+ box_label_known = []
236
+ box_label_unknown = []
237
+ for item in box_label_perm:
238
+ label_i = item[4].item()
239
+ if label_i in known_label_list:
240
+ box_label_known.append(item)
241
+ else:
242
+ # first item
243
+ box_label_unknown.append(item)
244
+ known_label_list.append(label_i)
245
+ box_label_known = (
246
+ torch.stack(box_label_known)
247
+ if len(box_label_known) > 0
248
+ else torch.Tensor(0, 5)
249
+ )
250
+ box_label_unknown = (
251
+ torch.stack(box_label_unknown)
252
+ if len(box_label_unknown) > 0
253
+ else torch.Tensor(0, 5)
254
+ )
255
+ return box_label_known, box_label_unknown
256
+
257
+ def sample_for_pred_stop_sign(self, box_label: torch.FloatTensor):
258
+ box_label_unknown = torch.Tensor(0, 5)
259
+ box_label_known = box_label
260
+ return box_label_known, box_label_unknown
261
+
262
+ def __call__(self, target, img=None):
263
+ box_label = target["box_label"] # K, 5
264
+
265
+ dice_number = random.random()
266
+
267
+ if dice_number < self.prob_first_item:
268
+ box_label_known, box_label_unknown = self.sample_for_pred_first_item(
269
+ box_label
270
+ )
271
+ elif dice_number < self.prob_first_item + self.prob_random_item:
272
+ box_label_known, box_label_unknown = self.sample_for_pred_random_item(
273
+ box_label
274
+ )
275
+ elif (
276
+ dice_number
277
+ < self.prob_first_item + self.prob_random_item + self.prob_last_item
278
+ ):
279
+ box_label_known, box_label_unknown = self.sample_for_pred_last_item(
280
+ box_label
281
+ )
282
+ else:
283
+ box_label_known, box_label_unknown = self.sample_for_pred_stop_sign(
284
+ box_label
285
+ )
286
+
287
+ target["label_onehot_known"] = label2onehot(
288
+ box_label_known[:, -1], self.num_classes
289
+ )
290
+ target["label_onehot_unknown"] = label2onehot(
291
+ box_label_unknown[:, -1], self.num_classes
292
+ )
293
+ target["box_label_known"] = box_label_known
294
+ target["box_label_unknown"] = box_label_unknown
295
+
296
+ return target, img
297
+
298
+
299
+ class RandomDrop:
300
+ def __init__(self, p=0.2) -> None:
301
+ self.p = p
302
+
303
+ def __call__(self, target, img=None):
304
+ known_box = target["box_label_known"]
305
+ num_known_box = known_box.size(0)
306
+ idxs = torch.rand(num_known_box)
307
+ # indices = torch.randperm(num_known_box)[:int((1-self).p*num_known_box + 0.5 + random.random())]
308
+ target["box_label_known"] = known_box[idxs > self.p]
309
+ return target, img
310
+
311
+
312
+ class BboxPertuber:
313
+ def __init__(self, max_ratio=0.02, generate_samples=1000) -> None:
314
+ self.max_ratio = max_ratio
315
+ self.generate_samples = generate_samples
316
+ self.samples = self.generate_pertube_samples()
317
+ self.idx = 0
318
+
319
+ def generate_pertube_samples(self):
320
+ import torch
321
+
322
+ samples = (torch.rand(self.generate_samples, 5) - 0.5) * 2 * self.max_ratio
323
+ return samples
324
+
325
+ def __call__(self, target, img):
326
+ known_box = target["box_label_known"] # Tensor(K,5), K known bbox
327
+ K = known_box.shape[0]
328
+ known_box_pertube = torch.zeros(K, 6) # 4:bbox, 1:prob, 1:label
329
+ if K == 0:
330
+ pass
331
+ else:
332
+ if self.idx + K > self.generate_samples:
333
+ self.idx = 0
334
+ delta = self.samples[self.idx : self.idx + K, :]
335
+ known_box_pertube[:, :4] = known_box[:, :4] + delta[:, :4]
336
+ iou = (
337
+ torch.diag(
338
+ box_iou(
339
+ box_cxcywh_to_xyxy(known_box[:, :4]),
340
+ box_cxcywh_to_xyxy(known_box_pertube[:, :4]),
341
+ )[0]
342
+ )
343
+ ) * (1 + delta[:, -1])
344
+ known_box_pertube[:, 4].copy_(iou)
345
+ known_box_pertube[:, -1].copy_(known_box[:, -1])
346
+
347
+ target["box_label_known_pertube"] = known_box_pertube
348
+ return target, img
349
+
350
+
351
+ class RandomCutout:
352
+ def __init__(self, factor=0.5) -> None:
353
+ self.factor = factor
354
+
355
+ def __call__(self, target, img=None):
356
+ unknown_box = target["box_label_unknown"] # Ku, 5
357
+ known_box = target["box_label_known_pertube"] # Kk, 6
358
+ Ku = unknown_box.size(0)
359
+
360
+ known_box_add = torch.zeros(Ku, 6) # Ku, 6
361
+ known_box_add[:, :5] = unknown_box
362
+ known_box_add[:, 5].uniform_(0.5, 1)
363
+
364
+ known_box_add[:, :2] += known_box_add[:, 2:4] * (torch.rand(Ku, 2) - 0.5) / 2
365
+ known_box_add[:, 2:4] /= 2
366
+
367
+ target["box_label_known_pertube"] = torch.cat((known_box, known_box_add))
368
+ return target, img
369
+
370
+
371
+ class RandomSelectBoxes:
372
+ def __init__(self, num_class=80) -> None:
373
+ Warning("This is such a slow function and will be deprecated soon!!!")
374
+ self.num_class = num_class
375
+
376
+ def __call__(self, target, img=None):
377
+ boxes = target["boxes"]
378
+ labels = target["label_compat"]
379
+
380
+ # transform to list of tensors
381
+ boxs_list = [[] for i in range(self.num_class)]
382
+ for idx, item in enumerate(boxes):
383
+ label = labels[idx].item()
384
+ boxs_list[label].append(item)
385
+ boxs_list_tensor = [
386
+ torch.stack(i) if len(i) > 0 else torch.Tensor(0, 4) for i in boxs_list
387
+ ]
388
+
389
+ # random selection
390
+ box_known = []
391
+ box_unknown = []
392
+ for idx, item in enumerate(boxs_list_tensor):
393
+ ncnt = item.shape[0]
394
+ nselect = int(
395
+ random.random() * ncnt
396
+ ) # close in both sides, much faster than random.randint
397
+
398
+ item = item[torch.randperm(ncnt)]
399
+ # random.shuffle(item)
400
+ box_known.append(item[:nselect])
401
+ box_unknown.append(item[nselect:])
402
+
403
+ # box_known_tensor = [torch.stack(i) if len(i) > 0 else torch.Tensor(0,4) for i in box_known]
404
+ # box_unknown_tensor = [torch.stack(i) if len(i) > 0 else torch.Tensor(0,4) for i in box_unknown]
405
+ # print('box_unknown_tensor:', box_unknown_tensor)
406
+ target["known_box"] = box_known
407
+ target["unknown_box"] = box_unknown
408
+ return target, img
409
+
410
+
411
+ def label2onehot(label, num_classes):
412
+ """
413
+ label: Tensor(K)
414
+ """
415
+ res = torch.zeros(num_classes)
416
+ for i in label:
417
+ itm = int(i.item())
418
+ res[itm] = 1.0
419
+ return res
420
+
421
+
422
+ class MaskCrop:
423
+ def __init__(self) -> None:
424
+ pass
425
+
426
+ def __call__(self, target, img):
427
+ known_box = target["known_box"]
428
+ h, w = img.shape[1:] # h,w
429
+ # imgsize = target['orig_size'] # h,w
430
+
431
+ scale = torch.Tensor([w, h, w, h])
432
+
433
+ # _cnt = 0
434
+ for boxes in known_box:
435
+ if boxes.shape[0] == 0:
436
+ continue
437
+ box_xyxy = box_cxcywh_to_xyxy(boxes) * scale
438
+ for box in box_xyxy:
439
+ x1, y1, x2, y2 = [int(i) for i in box.tolist()]
440
+ img[:, y1:y2, x1:x2] = 0
441
+ # _cnt += 1
442
+ # print("_cnt:", _cnt)
443
+ return target, img
444
+
445
+
446
+ dataset_hook_register = {
447
+ "label2compat": label2compat,
448
+ "label_compat2onehot": label_compat2onehot,
449
+ "box_label_catter": box_label_catter,
450
+ "RandomSelectBoxlabels": RandomSelectBoxlabels,
451
+ "RandomSelectBoxes": RandomSelectBoxes,
452
+ "MaskCrop": MaskCrop,
453
+ "BboxPertuber": BboxPertuber,
454
+ }
455
+
456
+
457
+ class CocoDetection(torchvision.datasets.CocoDetection):
458
+ def __init__(
459
+ self, img_folder, ann_file, transforms, return_masks, aux_target_hacks=None
460
+ ):
461
+ super(CocoDetection, self).__init__(img_folder, ann_file)
462
+ self._transforms = transforms
463
+ self.prepare = ConvertCocoPolysToMask(return_masks)
464
+ self.aux_target_hacks = aux_target_hacks
465
+
466
+ def change_hack_attr(self, hackclassname, attrkv_dict):
467
+ target_class = dataset_hook_register[hackclassname]
468
+ for item in self.aux_target_hacks:
469
+ if isinstance(item, target_class):
470
+ for k, v in attrkv_dict.items():
471
+ setattr(item, k, v)
472
+
473
+ def get_hack(self, hackclassname):
474
+ target_class = dataset_hook_register[hackclassname]
475
+ for item in self.aux_target_hacks:
476
+ if isinstance(item, target_class):
477
+ return item
478
+
479
+ def _load_image(self, id: int) -> Image.Image:
480
+ path = self.coco.loadImgs(id)[0]["file_name"]
481
+ abs_path = os.path.join(self.root, path)
482
+ return Image.open(abs_path).convert("RGB")
483
+
484
+ def __getitem__(self, idx):
485
+ """
486
+ Output:
487
+ - target: dict of multiple items
488
+ - boxes: Tensor[num_box, 4]. \
489
+ Init type: x0,y0,x1,y1. unnormalized data.
490
+ Final type: cx,cy,w,h. normalized data.
491
+ """
492
+ try:
493
+ img, target = super(CocoDetection, self).__getitem__(idx)
494
+
495
+ except:
496
+ print("Error idx: {}".format(idx))
497
+ idx += 1
498
+ img, target = super(CocoDetection, self).__getitem__(idx)
499
+
500
+ image_id = self.ids[idx]
501
+ target = {"image_id": image_id, "annotations": target}
502
+ exemp_count = 0
503
+ for instance in target["annotations"]:
504
+ if instance["area"] != 4:
505
+ exemp_count += 1
506
+ # Only provide at most 3 visual exemplars during inference.
507
+ assert exemp_count == 3
508
+ img, target = self.prepare(img, target)
509
+ target["exemplars"] = target["boxes"][-3:]
510
+ # Remove inaccurate exemplars.
511
+ if image_id == 6003:
512
+ target["exemplars"] = torch.tensor([])
513
+ target["boxes"] = target["boxes"][:-3]
514
+ target["labels"] = target["labels"][:-3]
515
+ target["labels_uncropped"] = torch.clone(target["labels"])
516
+
517
+ if self._transforms is not None:
518
+ img, target = self._transforms(img, target)
519
+
520
+ # convert to needed format
521
+ if self.aux_target_hacks is not None:
522
+ for hack_runner in self.aux_target_hacks:
523
+ target, img = hack_runner(target, img=img)
524
+
525
+ return img, target
526
+
527
+
528
+ def convert_coco_poly_to_mask(segmentations, height, width):
529
+ masks = []
530
+ for polygons in segmentations:
531
+ rles = coco_mask.frPyObjects(polygons, height, width)
532
+ mask = coco_mask.decode(rles)
533
+ if len(mask.shape) < 3:
534
+ mask = mask[..., None]
535
+ mask = torch.as_tensor(mask, dtype=torch.uint8)
536
+ mask = mask.any(dim=2)
537
+ masks.append(mask)
538
+ if masks:
539
+ masks = torch.stack(masks, dim=0)
540
+ else:
541
+ masks = torch.zeros((0, height, width), dtype=torch.uint8)
542
+ return masks
543
+
544
+
545
+ class ConvertCocoPolysToMask(object):
546
+ def __init__(self, return_masks=False):
547
+ self.return_masks = return_masks
548
+
549
+ def __call__(self, image, target):
550
+ w, h = image.size
551
+
552
+ image_id = target["image_id"]
553
+ image_id = torch.tensor([image_id])
554
+
555
+ anno = target["annotations"]
556
+
557
+ anno = [obj for obj in anno if "iscrowd" not in obj or obj["iscrowd"] == 0]
558
+
559
+ boxes = [obj["bbox"] for obj in anno]
560
+ # guard against no boxes via resizing
561
+ boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
562
+ boxes[:, 2:] += boxes[:, :2]
563
+ boxes[:, 0::2].clamp_(min=0, max=w)
564
+ boxes[:, 1::2].clamp_(min=0, max=h)
565
+
566
+ classes = [obj["category_id"] for obj in anno]
567
+ classes = torch.tensor(classes, dtype=torch.int64)
568
+
569
+ if self.return_masks:
570
+ segmentations = [obj["segmentation"] for obj in anno]
571
+ masks = convert_coco_poly_to_mask(segmentations, h, w)
572
+
573
+ keypoints = None
574
+ if anno and "keypoints" in anno[0]:
575
+ keypoints = [obj["keypoints"] for obj in anno]
576
+ keypoints = torch.as_tensor(keypoints, dtype=torch.float32)
577
+ num_keypoints = keypoints.shape[0]
578
+ if num_keypoints:
579
+ keypoints = keypoints.view(num_keypoints, -1, 3)
580
+
581
+ keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
582
+ boxes = boxes[keep]
583
+ classes = classes[keep]
584
+ if self.return_masks:
585
+ masks = masks[keep]
586
+ if keypoints is not None:
587
+ keypoints = keypoints[keep]
588
+
589
+ target = {}
590
+ target["boxes"] = boxes
591
+ target["labels"] = classes
592
+ if self.return_masks:
593
+ target["masks"] = masks
594
+ target["image_id"] = image_id
595
+ if keypoints is not None:
596
+ target["keypoints"] = keypoints
597
+
598
+ # for conversion to coco api
599
+ area = torch.tensor([obj["area"] for obj in anno])
600
+ iscrowd = torch.tensor(
601
+ [obj["iscrowd"] if "iscrowd" in obj else 0 for obj in anno]
602
+ )
603
+ target["area"] = area[keep]
604
+ target["iscrowd"] = iscrowd[keep]
605
+
606
+ target["orig_size"] = torch.as_tensor([int(h), int(w)])
607
+ target["size"] = torch.as_tensor([int(h), int(w)])
608
+
609
+ return image, target
610
+
611
+
612
+ def make_coco_transforms(image_set, fix_size=False, strong_aug=False, args=None):
613
+ normalize = T.Compose(
614
+ [T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]
615
+ )
616
+
617
+ # config the params for data aug
618
+ scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800]
619
+ max_size = 1333
620
+ scales2_resize = [400, 500, 600]
621
+ scales2_crop = [384, 600]
622
+
623
+ # update args from config files
624
+ scales = getattr(args, "data_aug_scales", scales)
625
+ max_size = getattr(args, "data_aug_max_size", max_size)
626
+ scales2_resize = getattr(args, "data_aug_scales2_resize", scales2_resize)
627
+ scales2_crop = getattr(args, "data_aug_scales2_crop", scales2_crop)
628
+
629
+ # resize them
630
+ data_aug_scale_overlap = getattr(args, "data_aug_scale_overlap", None)
631
+ if data_aug_scale_overlap is not None and data_aug_scale_overlap > 0:
632
+ data_aug_scale_overlap = float(data_aug_scale_overlap)
633
+ scales = [int(i * data_aug_scale_overlap) for i in scales]
634
+ max_size = int(max_size * data_aug_scale_overlap)
635
+ scales2_resize = [int(i * data_aug_scale_overlap) for i in scales2_resize]
636
+ scales2_crop = [int(i * data_aug_scale_overlap) for i in scales2_crop]
637
+
638
+ datadict_for_print = {
639
+ "scales": scales,
640
+ "max_size": max_size,
641
+ "scales2_resize": scales2_resize,
642
+ "scales2_crop": scales2_crop,
643
+ }
644
+ # print("data_aug_params:", json.dumps(datadict_for_print, indent=2))
645
+
646
+ if image_set == "train":
647
+ if fix_size:
648
+ return T.Compose(
649
+ [
650
+ T.RandomHorizontalFlip(),
651
+ T.RandomResize([(max_size, max(scales))]),
652
+ # T.RandomResize([(512, 512)]),
653
+ normalize,
654
+ ]
655
+ )
656
+
657
+ if strong_aug:
658
+ import datasets.sltransform as SLT
659
+
660
+ return T.Compose(
661
+ [
662
+ T.RandomHorizontalFlip(),
663
+ T.RandomSelect(
664
+ T.RandomResize(scales, max_size=max_size),
665
+ T.Compose(
666
+ [
667
+ T.RandomResize(scales2_resize),
668
+ T.RandomSizeCrop(*scales2_crop),
669
+ T.RandomResize(scales, max_size=max_size),
670
+ ]
671
+ ),
672
+ ),
673
+ SLT.RandomSelectMulti(
674
+ [
675
+ SLT.RandomCrop(),
676
+ SLT.LightingNoise(),
677
+ SLT.AdjustBrightness(2),
678
+ SLT.AdjustContrast(2),
679
+ ]
680
+ ),
681
+ normalize,
682
+ ]
683
+ )
684
+
685
+ return T.Compose(
686
+ [
687
+ T.RandomHorizontalFlip(),
688
+ T.RandomSelect(
689
+ T.RandomResize(scales, max_size=max_size),
690
+ T.Compose(
691
+ [
692
+ T.RandomResize(scales2_resize),
693
+ T.RandomSizeCrop(*scales2_crop),
694
+ T.RandomResize(scales, max_size=max_size),
695
+ ]
696
+ ),
697
+ ),
698
+ normalize,
699
+ ]
700
+ )
701
+
702
+ if image_set in ["val", "eval_debug", "train_reg", "test"]:
703
+ if os.environ.get("GFLOPS_DEBUG_SHILONG", False) == "INFO":
704
+ print("Under debug mode for flops calculation only!!!!!!!!!!!!!!!!")
705
+ return T.Compose(
706
+ [
707
+ T.ResizeDebug((1280, 800)),
708
+ normalize,
709
+ ]
710
+ )
711
+
712
+ print("max(scales): " + str(max(scales)))
713
+
714
+ return T.Compose(
715
+ [
716
+ T.RandomResize([max(scales)], max_size=max_size),
717
+ normalize,
718
+ ]
719
+ )
720
+
721
+ raise ValueError(f"unknown {image_set}")
722
+
723
+
724
+ def get_aux_target_hacks_list(image_set, args):
725
+ if args.modelname in ["q2bs_mask", "q2bs"]:
726
+ aux_target_hacks_list = [
727
+ label2compat(),
728
+ label_compat2onehot(),
729
+ RandomSelectBoxes(num_class=args.num_classes),
730
+ ]
731
+ if args.masked_data and image_set == "train":
732
+ # aux_target_hacks_list.append()
733
+ aux_target_hacks_list.append(MaskCrop())
734
+ elif args.modelname in [
735
+ "q2bm_v2",
736
+ "q2bs_ce",
737
+ "q2op",
738
+ "q2ofocal",
739
+ "q2opclip",
740
+ "q2ocqonly",
741
+ ]:
742
+ aux_target_hacks_list = [
743
+ label2compat(),
744
+ label_compat2onehot(),
745
+ box_label_catter(),
746
+ RandomSelectBoxlabels(
747
+ num_classes=args.num_classes,
748
+ prob_first_item=args.prob_first_item,
749
+ prob_random_item=args.prob_random_item,
750
+ prob_last_item=args.prob_last_item,
751
+ prob_stop_sign=args.prob_stop_sign,
752
+ ),
753
+ BboxPertuber(max_ratio=0.02, generate_samples=1000),
754
+ ]
755
+ elif args.modelname in ["q2omask", "q2osa"]:
756
+ if args.coco_aug:
757
+ aux_target_hacks_list = [
758
+ label2compat(),
759
+ label_compat2onehot(),
760
+ box_label_catter(),
761
+ RandomSelectBoxlabels(
762
+ num_classes=args.num_classes,
763
+ prob_first_item=args.prob_first_item,
764
+ prob_random_item=args.prob_random_item,
765
+ prob_last_item=args.prob_last_item,
766
+ prob_stop_sign=args.prob_stop_sign,
767
+ ),
768
+ RandomDrop(p=0.2),
769
+ BboxPertuber(max_ratio=0.02, generate_samples=1000),
770
+ RandomCutout(factor=0.5),
771
+ ]
772
+ else:
773
+ aux_target_hacks_list = [
774
+ label2compat(),
775
+ label_compat2onehot(),
776
+ box_label_catter(),
777
+ RandomSelectBoxlabels(
778
+ num_classes=args.num_classes,
779
+ prob_first_item=args.prob_first_item,
780
+ prob_random_item=args.prob_random_item,
781
+ prob_last_item=args.prob_last_item,
782
+ prob_stop_sign=args.prob_stop_sign,
783
+ ),
784
+ BboxPertuber(max_ratio=0.02, generate_samples=1000),
785
+ ]
786
+ else:
787
+ aux_target_hacks_list = None
788
+
789
+ return aux_target_hacks_list
790
+
791
+
792
+ def build(image_set, args, datasetinfo):
793
+ img_folder = datasetinfo["root"]
794
+ ann_file = datasetinfo["anno"]
795
+
796
+ # copy to local path
797
+ if os.environ.get("DATA_COPY_SHILONG") == "INFO":
798
+ preparing_dataset(
799
+ dict(img_folder=img_folder, ann_file=ann_file), image_set, args
800
+ )
801
+
802
+ try:
803
+ strong_aug = args.strong_aug
804
+ except:
805
+ strong_aug = False
806
+ print(img_folder, ann_file)
807
+ dataset = CocoDetection(
808
+ img_folder,
809
+ ann_file,
810
+ transforms=make_coco_transforms(
811
+ image_set, fix_size=args.fix_size, strong_aug=strong_aug, args=args
812
+ ),
813
+ return_masks=args.masks,
814
+ aux_target_hacks=None,
815
+ )
816
+ return dataset
817
+
818
+
819
+ if __name__ == "__main__":
820
+ # Objects365 Val example
821
+ dataset_o365 = CocoDetection(
822
+ "/path/Objects365/train/",
823
+ "/path/Objects365/slannos/anno_preprocess_train_v2.json",
824
+ transforms=None,
825
+ return_masks=False,
826
+ )
827
+ print("len(dataset_o365):", len(dataset_o365))
datasets/coco_eval.py ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2
+ """
3
+ COCO evaluator that works in distributed mode.
4
+
5
+ Mostly copy-paste from https://github.com/pytorch/vision/blob/edfd5a7/references/detection/coco_eval.py
6
+ The difference is that there is less copy-pasting from pycocotools
7
+ in the end of the file, as python3 can suppress prints with contextlib
8
+ """
9
+ import os
10
+ import contextlib
11
+ import copy
12
+ import numpy as np
13
+ import torch
14
+
15
+ from pycocotools.cocoeval import COCOeval
16
+ from pycocotools.coco import COCO
17
+ import pycocotools.mask as mask_util
18
+
19
+ from util.misc import all_gather
20
+
21
+
22
+ class CocoEvaluator(object):
23
+ def __init__(self, coco_gt, iou_types, useCats=True):
24
+ assert isinstance(iou_types, (list, tuple))
25
+ coco_gt = copy.deepcopy(coco_gt)
26
+ self.coco_gt = coco_gt
27
+
28
+ self.iou_types = iou_types
29
+ self.coco_eval = {}
30
+ for iou_type in iou_types:
31
+ self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type)
32
+ self.coco_eval[iou_type].useCats = useCats
33
+
34
+ self.img_ids = []
35
+ self.eval_imgs = {k: [] for k in iou_types}
36
+ self.useCats = useCats
37
+
38
+ def update(self, predictions):
39
+ img_ids = list(np.unique(list(predictions.keys())))
40
+ self.img_ids.extend(img_ids)
41
+
42
+ for iou_type in self.iou_types:
43
+ results = self.prepare(predictions, iou_type)
44
+
45
+ # suppress pycocotools prints
46
+ with open(os.devnull, 'w') as devnull:
47
+ with contextlib.redirect_stdout(devnull):
48
+ coco_dt = COCO.loadRes(self.coco_gt, results) if results else COCO()
49
+ coco_eval = self.coco_eval[iou_type]
50
+
51
+ coco_eval.cocoDt = coco_dt
52
+ coco_eval.params.imgIds = list(img_ids)
53
+ coco_eval.params.useCats = self.useCats
54
+ img_ids, eval_imgs = evaluate(coco_eval)
55
+
56
+ self.eval_imgs[iou_type].append(eval_imgs)
57
+
58
+ def synchronize_between_processes(self):
59
+ for iou_type in self.iou_types:
60
+ self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)
61
+ create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type])
62
+
63
+ def accumulate(self):
64
+ for coco_eval in self.coco_eval.values():
65
+ coco_eval.accumulate()
66
+
67
+ def summarize(self):
68
+ for iou_type, coco_eval in self.coco_eval.items():
69
+ print("IoU metric: {}".format(iou_type))
70
+ coco_eval.summarize()
71
+
72
+ def prepare(self, predictions, iou_type):
73
+ if iou_type == "bbox":
74
+ return self.prepare_for_coco_detection(predictions)
75
+ elif iou_type == "segm":
76
+ return self.prepare_for_coco_segmentation(predictions)
77
+ elif iou_type == "keypoints":
78
+ return self.prepare_for_coco_keypoint(predictions)
79
+ else:
80
+ raise ValueError("Unknown iou type {}".format(iou_type))
81
+
82
+ def prepare_for_coco_detection(self, predictions):
83
+ coco_results = []
84
+ for original_id, prediction in predictions.items():
85
+ if len(prediction) == 0:
86
+ continue
87
+
88
+ boxes = prediction["boxes"]
89
+ boxes = convert_to_xywh(boxes).tolist()
90
+ if not isinstance(prediction["scores"], list):
91
+ scores = prediction["scores"].tolist()
92
+ else:
93
+ scores = prediction["scores"]
94
+ if not isinstance(prediction["labels"], list):
95
+ labels = prediction["labels"].tolist()
96
+ else:
97
+ labels = prediction["labels"]
98
+
99
+
100
+ try:
101
+ coco_results.extend(
102
+ [
103
+ {
104
+ "image_id": original_id,
105
+ "category_id": labels[k],
106
+ "bbox": box,
107
+ "score": scores[k],
108
+ }
109
+ for k, box in enumerate(boxes)
110
+ ]
111
+ )
112
+ except:
113
+ import ipdb; ipdb.set_trace()
114
+ return coco_results
115
+
116
+ def prepare_for_coco_segmentation(self, predictions):
117
+ coco_results = []
118
+ for original_id, prediction in predictions.items():
119
+ if len(prediction) == 0:
120
+ continue
121
+
122
+ scores = prediction["scores"]
123
+ labels = prediction["labels"]
124
+ masks = prediction["masks"]
125
+
126
+ masks = masks > 0.5
127
+
128
+ scores = prediction["scores"].tolist()
129
+ labels = prediction["labels"].tolist()
130
+
131
+ rles = [
132
+ mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0]
133
+ for mask in masks
134
+ ]
135
+ for rle in rles:
136
+ rle["counts"] = rle["counts"].decode("utf-8")
137
+
138
+ coco_results.extend(
139
+ [
140
+ {
141
+ "image_id": original_id,
142
+ "category_id": labels[k],
143
+ "segmentation": rle,
144
+ "score": scores[k],
145
+ }
146
+ for k, rle in enumerate(rles)
147
+ ]
148
+ )
149
+ return coco_results
150
+
151
+ def prepare_for_coco_keypoint(self, predictions):
152
+ coco_results = []
153
+ for original_id, prediction in predictions.items():
154
+ if len(prediction) == 0:
155
+ continue
156
+
157
+ boxes = prediction["boxes"]
158
+ boxes = convert_to_xywh(boxes).tolist()
159
+ scores = prediction["scores"].tolist()
160
+ labels = prediction["labels"].tolist()
161
+ keypoints = prediction["keypoints"]
162
+ keypoints = keypoints.flatten(start_dim=1).tolist()
163
+
164
+ coco_results.extend(
165
+ [
166
+ {
167
+ "image_id": original_id,
168
+ "category_id": labels[k],
169
+ 'keypoints': keypoint,
170
+ "score": scores[k],
171
+ }
172
+ for k, keypoint in enumerate(keypoints)
173
+ ]
174
+ )
175
+ return coco_results
176
+
177
+
178
+ def convert_to_xywh(boxes):
179
+ xmin, ymin, xmax, ymax = boxes.unbind(1)
180
+ return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)
181
+
182
+
183
+ def merge(img_ids, eval_imgs):
184
+ all_img_ids = all_gather(img_ids)
185
+ all_eval_imgs = all_gather(eval_imgs)
186
+
187
+ merged_img_ids = []
188
+ for p in all_img_ids:
189
+ merged_img_ids.extend(p)
190
+
191
+ merged_eval_imgs = []
192
+ for p in all_eval_imgs:
193
+ merged_eval_imgs.append(p)
194
+
195
+ merged_img_ids = np.array(merged_img_ids)
196
+ merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)
197
+
198
+ # keep only unique (and in sorted order) images
199
+ merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
200
+ merged_eval_imgs = merged_eval_imgs[..., idx]
201
+
202
+ return merged_img_ids, merged_eval_imgs
203
+
204
+
205
+ def create_common_coco_eval(coco_eval, img_ids, eval_imgs):
206
+ img_ids, eval_imgs = merge(img_ids, eval_imgs)
207
+ img_ids = list(img_ids)
208
+ eval_imgs = list(eval_imgs.flatten())
209
+
210
+ coco_eval.evalImgs = eval_imgs
211
+ coco_eval.params.imgIds = img_ids
212
+ coco_eval._paramsEval = copy.deepcopy(coco_eval.params)
213
+
214
+
215
+ #################################################################
216
+ # From pycocotools, just removed the prints and fixed
217
+ # a Python3 bug about unicode not defined
218
+ #################################################################
219
+
220
+
221
+ def evaluate(self):
222
+ '''
223
+ Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
224
+ :return: None
225
+ '''
226
+ p = self.params
227
+ # add backward compatibility if useSegm is specified in params
228
+ if p.useSegm is not None:
229
+ p.iouType = 'segm' if p.useSegm == 1 else 'bbox'
230
+ print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))
231
+ p.imgIds = list(np.unique(p.imgIds))
232
+ if p.useCats:
233
+ p.catIds = list(np.unique(p.catIds))
234
+ p.maxDets = sorted(p.maxDets)
235
+ self.params = p
236
+
237
+ self._prepare()
238
+ # loop through images, area range, max detection number
239
+ catIds = p.catIds if p.useCats else [-1]
240
+
241
+ if p.iouType == 'segm' or p.iouType == 'bbox':
242
+ computeIoU = self.computeIoU
243
+ elif p.iouType == 'keypoints':
244
+ computeIoU = self.computeOks
245
+ self.ious = {
246
+ (imgId, catId): computeIoU(imgId, catId)
247
+ for imgId in p.imgIds
248
+ for catId in catIds}
249
+
250
+ evaluateImg = self.evaluateImg
251
+ maxDet = p.maxDets[-1]
252
+ evalImgs = [
253
+ evaluateImg(imgId, catId, areaRng, maxDet)
254
+ for catId in catIds
255
+ for areaRng in p.areaRng
256
+ for imgId in p.imgIds
257
+ ]
258
+ # this is NOT in the pycocotools code, but could be done outside
259
+ evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds))
260
+ self._paramsEval = copy.deepcopy(self.params)
261
+
262
+ return p.imgIds, evalImgs
263
+
264
+ #################################################################
265
+ # end of straight copy from pycocotools, just removing the prints
266
+ #################################################################
datasets/coco_panoptic.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2
+ import json
3
+ from pathlib import Path
4
+
5
+ import numpy as np
6
+ import torch
7
+ from PIL import Image
8
+
9
+ from panopticapi.utils import rgb2id
10
+ from util.box_ops import masks_to_boxes
11
+
12
+ from .coco import make_coco_transforms
13
+
14
+
15
+ class CocoPanoptic:
16
+ def __init__(self, img_folder, ann_folder, ann_file, transforms=None, return_masks=True):
17
+ with open(ann_file, 'r') as f:
18
+ self.coco = json.load(f)
19
+
20
+ # sort 'images' field so that they are aligned with 'annotations'
21
+ # i.e., in alphabetical order
22
+ self.coco['images'] = sorted(self.coco['images'], key=lambda x: x['id'])
23
+ # sanity check
24
+ if "annotations" in self.coco:
25
+ for img, ann in zip(self.coco['images'], self.coco['annotations']):
26
+ assert img['file_name'][:-4] == ann['file_name'][:-4]
27
+
28
+ self.img_folder = img_folder
29
+ self.ann_folder = ann_folder
30
+ self.ann_file = ann_file
31
+ self.transforms = transforms
32
+ self.return_masks = return_masks
33
+
34
+ def __getitem__(self, idx):
35
+ ann_info = self.coco['annotations'][idx] if "annotations" in self.coco else self.coco['images'][idx]
36
+ img_path = Path(self.img_folder) / ann_info['file_name'].replace('.png', '.jpg')
37
+ ann_path = Path(self.ann_folder) / ann_info['file_name']
38
+
39
+ img = Image.open(img_path).convert('RGB')
40
+ w, h = img.size
41
+ if "segments_info" in ann_info:
42
+ masks = np.asarray(Image.open(ann_path), dtype=np.uint32)
43
+ masks = rgb2id(masks)
44
+
45
+ ids = np.array([ann['id'] for ann in ann_info['segments_info']])
46
+ masks = masks == ids[:, None, None]
47
+
48
+ masks = torch.as_tensor(masks, dtype=torch.uint8)
49
+ labels = torch.tensor([ann['category_id'] for ann in ann_info['segments_info']], dtype=torch.int64)
50
+
51
+ target = {}
52
+ target['image_id'] = torch.tensor([ann_info['image_id'] if "image_id" in ann_info else ann_info["id"]])
53
+ if self.return_masks:
54
+ target['masks'] = masks
55
+ target['labels'] = labels
56
+
57
+ target["boxes"] = masks_to_boxes(masks)
58
+
59
+ target['size'] = torch.as_tensor([int(h), int(w)])
60
+ target['orig_size'] = torch.as_tensor([int(h), int(w)])
61
+ if "segments_info" in ann_info:
62
+ for name in ['iscrowd', 'area']:
63
+ target[name] = torch.tensor([ann[name] for ann in ann_info['segments_info']])
64
+
65
+ if self.transforms is not None:
66
+ img, target = self.transforms(img, target)
67
+
68
+ return img, target
69
+
70
+ def __len__(self):
71
+ return len(self.coco['images'])
72
+
73
+ def get_height_and_width(self, idx):
74
+ img_info = self.coco['images'][idx]
75
+ height = img_info['height']
76
+ width = img_info['width']
77
+ return height, width
78
+
79
+
80
+ def build(image_set, args):
81
+ img_folder_root = Path(args.coco_path)
82
+ ann_folder_root = Path(args.coco_panoptic_path)
83
+ assert img_folder_root.exists(), f'provided COCO path {img_folder_root} does not exist'
84
+ assert ann_folder_root.exists(), f'provided COCO path {ann_folder_root} does not exist'
85
+ mode = 'panoptic'
86
+ PATHS = {
87
+ "train": ("train2017", Path("annotations") / f'{mode}_train2017.json'),
88
+ "val": ("val2017", Path("annotations") / f'{mode}_val2017.json'),
89
+ }
90
+
91
+ img_folder, ann_file = PATHS[image_set]
92
+ img_folder_path = img_folder_root / img_folder
93
+ ann_folder = ann_folder_root / f'{mode}_{img_folder}'
94
+ ann_file = ann_folder_root / ann_file
95
+
96
+ dataset = CocoPanoptic(img_folder_path, ann_folder, ann_file,
97
+ transforms=make_coco_transforms(image_set), return_masks=args.masks)
98
+
99
+ return dataset
datasets/cocogrounding_eval.py ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ------------------------------------------------------------------------
2
+ # Grounding DINO. Midified by Shilong Liu.
3
+ # url: https://github.com/IDEA-Research/GroundingDINO
4
+ # Copyright (c) 2023 IDEA. All Rights Reserved.
5
+ # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
6
+ # ------------------------------------------------------------------------
7
+ # Copyright (c) Aishwarya Kamath & Nicolas Carion. Licensed under the Apache License 2.0. All Rights Reserved
8
+ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
9
+ """
10
+ COCO evaluator that works in distributed mode.
11
+
12
+ Mostly copy-paste from https://github.com/pytorch/vision/blob/edfd5a7/references/detection/coco_eval.py
13
+ The difference is that there is less copy-pasting from pycocotools
14
+ in the end of the file, as python3 can suppress prints with contextlib
15
+ """
16
+ import contextlib
17
+ import copy
18
+ import os
19
+
20
+ import numpy as np
21
+ import pycocotools.mask as mask_util
22
+ import torch
23
+ from pycocotools.coco import COCO
24
+ from pycocotools.cocoeval import COCOeval
25
+
26
+ from groundingdino.util.misc import all_gather
27
+
28
+
29
+ class CocoGroundingEvaluator(object):
30
+ def __init__(self, coco_gt, iou_types, useCats=True):
31
+ assert isinstance(iou_types, (list, tuple))
32
+ coco_gt = copy.deepcopy(coco_gt)
33
+ self.coco_gt = coco_gt
34
+
35
+ self.iou_types = iou_types
36
+ self.coco_eval = {}
37
+ for iou_type in iou_types:
38
+ self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type)
39
+ self.coco_eval[iou_type].useCats = useCats
40
+
41
+ self.img_ids = []
42
+ self.eval_imgs = {k: [] for k in iou_types}
43
+ self.useCats = useCats
44
+
45
+ def update(self, predictions):
46
+ img_ids = list(np.unique(list(predictions.keys())))
47
+ self.img_ids.extend(img_ids)
48
+ # import pdb;pdb.set_trace()
49
+ for iou_type in self.iou_types:
50
+ results = self.prepare(predictions, iou_type)
51
+
52
+ # suppress pycocotools prints
53
+ with open(os.devnull, "w") as devnull:
54
+ with contextlib.redirect_stdout(devnull):
55
+ coco_dt = COCO.loadRes(self.coco_gt, results) if results else COCO()
56
+
57
+ coco_eval = self.coco_eval[iou_type]
58
+
59
+ coco_eval.cocoDt = coco_dt
60
+ coco_eval.params.imgIds = list(img_ids)
61
+ coco_eval.params.useCats = self.useCats
62
+ img_ids, eval_imgs = evaluate(coco_eval)
63
+
64
+ self.eval_imgs[iou_type].append(eval_imgs)
65
+
66
+ def synchronize_between_processes(self):
67
+ for iou_type in self.iou_types:
68
+ self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)
69
+ create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type])
70
+
71
+ def accumulate(self):
72
+ for coco_eval in self.coco_eval.values():
73
+ coco_eval.accumulate()
74
+
75
+ def summarize(self):
76
+ for iou_type, coco_eval in self.coco_eval.items():
77
+ print("IoU metric: {}".format(iou_type))
78
+ coco_eval.summarize()
79
+
80
+ def prepare(self, predictions, iou_type):
81
+ if iou_type == "bbox":
82
+ return self.prepare_for_coco_detection(predictions)
83
+ elif iou_type == "segm":
84
+ return self.prepare_for_coco_segmentation(predictions)
85
+ elif iou_type == "keypoints":
86
+ return self.prepare_for_coco_keypoint(predictions)
87
+ else:
88
+ raise ValueError("Unknown iou type {}".format(iou_type))
89
+
90
+ def prepare_for_coco_detection(self, predictions):
91
+ coco_results = []
92
+ for original_id, prediction in predictions.items():
93
+ if len(prediction) == 0:
94
+ continue
95
+
96
+ boxes = prediction["boxes"]
97
+ boxes = convert_to_xywh(boxes).tolist()
98
+ scores = prediction["scores"].tolist()
99
+ labels = prediction["labels"].tolist()
100
+
101
+ coco_results.extend(
102
+ [
103
+ {
104
+ "image_id": original_id,
105
+ "category_id": labels[k],
106
+ "bbox": box,
107
+ "score": scores[k],
108
+ }
109
+ for k, box in enumerate(boxes)
110
+ ]
111
+ )
112
+ return coco_results
113
+
114
+ def prepare_for_coco_segmentation(self, predictions):
115
+ coco_results = []
116
+ for original_id, prediction in predictions.items():
117
+ if len(prediction) == 0:
118
+ continue
119
+
120
+ scores = prediction["scores"]
121
+ labels = prediction["labels"]
122
+ masks = prediction["masks"]
123
+
124
+ masks = masks > 0.5
125
+
126
+ scores = prediction["scores"].tolist()
127
+ labels = prediction["labels"].tolist()
128
+
129
+ rles = [
130
+ mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0]
131
+ for mask in masks
132
+ ]
133
+ for rle in rles:
134
+ rle["counts"] = rle["counts"].decode("utf-8")
135
+
136
+ coco_results.extend(
137
+ [
138
+ {
139
+ "image_id": original_id,
140
+ "category_id": labels[k],
141
+ "segmentation": rle,
142
+ "score": scores[k],
143
+ }
144
+ for k, rle in enumerate(rles)
145
+ ]
146
+ )
147
+ return coco_results
148
+
149
+ def prepare_for_coco_keypoint(self, predictions):
150
+ coco_results = []
151
+ for original_id, prediction in predictions.items():
152
+ if len(prediction) == 0:
153
+ continue
154
+
155
+ boxes = prediction["boxes"]
156
+ boxes = convert_to_xywh(boxes).tolist()
157
+ scores = prediction["scores"].tolist()
158
+ labels = prediction["labels"].tolist()
159
+ keypoints = prediction["keypoints"]
160
+ keypoints = keypoints.flatten(start_dim=1).tolist()
161
+
162
+ coco_results.extend(
163
+ [
164
+ {
165
+ "image_id": original_id,
166
+ "category_id": labels[k],
167
+ "keypoints": keypoint,
168
+ "score": scores[k],
169
+ }
170
+ for k, keypoint in enumerate(keypoints)
171
+ ]
172
+ )
173
+ return coco_results
174
+
175
+
176
+ def convert_to_xywh(boxes):
177
+ xmin, ymin, xmax, ymax = boxes.unbind(1)
178
+ return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)
179
+
180
+
181
+ def merge(img_ids, eval_imgs):
182
+ all_img_ids = all_gather(img_ids)
183
+ all_eval_imgs = all_gather(eval_imgs)
184
+
185
+ merged_img_ids = []
186
+ for p in all_img_ids:
187
+ merged_img_ids.extend(p)
188
+
189
+ merged_eval_imgs = []
190
+ for p in all_eval_imgs:
191
+ merged_eval_imgs.append(p)
192
+
193
+ merged_img_ids = np.array(merged_img_ids)
194
+ merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)
195
+
196
+ # keep only unique (and in sorted order) images
197
+ merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
198
+ merged_eval_imgs = merged_eval_imgs[..., idx]
199
+
200
+ return merged_img_ids, merged_eval_imgs
201
+
202
+
203
+ def create_common_coco_eval(coco_eval, img_ids, eval_imgs):
204
+ img_ids, eval_imgs = merge(img_ids, eval_imgs)
205
+ img_ids = list(img_ids)
206
+ eval_imgs = list(eval_imgs.flatten())
207
+
208
+ coco_eval.evalImgs = eval_imgs
209
+ coco_eval.params.imgIds = img_ids
210
+ coco_eval._paramsEval = copy.deepcopy(coco_eval.params)
211
+
212
+
213
+ #################################################################
214
+ # From pycocotools, just removed the prints and fixed
215
+ # a Python3 bug about unicode not defined
216
+ #################################################################
217
+
218
+
219
+ def evaluate(self):
220
+ """
221
+ Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
222
+ :return: None
223
+ """
224
+ # tic = time.time()
225
+ # print('Running per image evaluation...')
226
+
227
+ # import pdb;pdb.set_trace()
228
+ p = self.params
229
+ # add backward compatibility if useSegm is specified in params
230
+ if p.useSegm is not None:
231
+ p.iouType = "segm" if p.useSegm == 1 else "bbox"
232
+ print("useSegm (deprecated) is not None. Running {} evaluation".format(p.iouType))
233
+ # print('Evaluate annotation type *{}*'.format(p.iouType))
234
+ p.imgIds = list(np.unique(p.imgIds))
235
+ if p.useCats:
236
+ p.catIds = list(np.unique(p.catIds))
237
+ p.maxDets = sorted(p.maxDets)
238
+ self.params = p
239
+
240
+ self._prepare()
241
+ # loop through images, area range, max detection number
242
+ catIds = p.catIds if p.useCats else [-1]
243
+
244
+ if p.iouType == "segm" or p.iouType == "bbox":
245
+ computeIoU = self.computeIoU
246
+ elif p.iouType == "keypoints":
247
+ computeIoU = self.computeOks
248
+ self.ious = {
249
+ (imgId, catId): computeIoU(imgId, catId)
250
+ for imgId in p.imgIds
251
+ for catId in catIds}
252
+
253
+ evaluateImg = self.evaluateImg
254
+ maxDet = p.maxDets[-1]
255
+ evalImgs = [
256
+ evaluateImg(imgId, catId, areaRng, maxDet)
257
+ for catId in catIds
258
+ for areaRng in p.areaRng
259
+ for imgId in p.imgIds
260
+ ]
261
+ # this is NOT in the pycocotools code, but could be done outside
262
+ evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds))
263
+ self._paramsEval = copy.deepcopy(self.params)
264
+ # toc = time.time()
265
+ # print('DONE (t={:0.2f}s).'.format(toc-tic))
266
+ return p.imgIds, evalImgs
267
+
268
+
269
+ #################################################################
270
+ # end of straight copy from pycocotools, just removing the prints
271
+ #################################################################
datasets/data_util.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import os.path as osp
3
+ import shutil
4
+ import time
5
+ import datetime
6
+
7
+ import torch
8
+
9
+ from util.slconfig import SLConfig
10
+
11
+ class Error(OSError):
12
+ pass
13
+
14
+ def slcopytree(src, dst, symlinks=False, ignore=None, copy_function=shutil.copyfile,
15
+ ignore_dangling_symlinks=False):
16
+ """
17
+ modified from shutil.copytree without copystat.
18
+
19
+ Recursively copy a directory tree.
20
+
21
+ The destination directory must not already exist.
22
+ If exception(s) occur, an Error is raised with a list of reasons.
23
+
24
+ If the optional symlinks flag is true, symbolic links in the
25
+ source tree result in symbolic links in the destination tree; if
26
+ it is false, the contents of the files pointed to by symbolic
27
+ links are copied. If the file pointed by the symlink doesn't
28
+ exist, an exception will be added in the list of errors raised in
29
+ an Error exception at the end of the copy process.
30
+
31
+ You can set the optional ignore_dangling_symlinks flag to true if you
32
+ want to silence this exception. Notice that this has no effect on
33
+ platforms that don't support os.symlink.
34
+
35
+ The optional ignore argument is a callable. If given, it
36
+ is called with the `src` parameter, which is the directory
37
+ being visited by copytree(), and `names` which is the list of
38
+ `src` contents, as returned by os.listdir():
39
+
40
+ callable(src, names) -> ignored_names
41
+
42
+ Since copytree() is called recursively, the callable will be
43
+ called once for each directory that is copied. It returns a
44
+ list of names relative to the `src` directory that should
45
+ not be copied.
46
+
47
+ The optional copy_function argument is a callable that will be used
48
+ to copy each file. It will be called with the source path and the
49
+ destination path as arguments. By default, copy2() is used, but any
50
+ function that supports the same signature (like copy()) can be used.
51
+
52
+ """
53
+ errors = []
54
+ if os.path.isdir(src):
55
+ names = os.listdir(src)
56
+ if ignore is not None:
57
+ ignored_names = ignore(src, names)
58
+ else:
59
+ ignored_names = set()
60
+
61
+ os.makedirs(dst)
62
+ for name in names:
63
+ if name in ignored_names:
64
+ continue
65
+ srcname = os.path.join(src, name)
66
+ dstname = os.path.join(dst, name)
67
+ try:
68
+ if os.path.islink(srcname):
69
+ linkto = os.readlink(srcname)
70
+ if symlinks:
71
+ # We can't just leave it to `copy_function` because legacy
72
+ # code with a custom `copy_function` may rely on copytree
73
+ # doing the right thing.
74
+ os.symlink(linkto, dstname)
75
+ else:
76
+ # ignore dangling symlink if the flag is on
77
+ if not os.path.exists(linkto) and ignore_dangling_symlinks:
78
+ continue
79
+ # otherwise let the copy occurs. copy2 will raise an error
80
+ if os.path.isdir(srcname):
81
+ slcopytree(srcname, dstname, symlinks, ignore,
82
+ copy_function)
83
+ else:
84
+ copy_function(srcname, dstname)
85
+ elif os.path.isdir(srcname):
86
+ slcopytree(srcname, dstname, symlinks, ignore, copy_function)
87
+ else:
88
+ # Will raise a SpecialFileError for unsupported file types
89
+ copy_function(srcname, dstname)
90
+ # catch the Error from the recursive copytree so that we can
91
+ # continue with other files
92
+ except Error as err:
93
+ errors.extend(err.args[0])
94
+ except OSError as why:
95
+ errors.append((srcname, dstname, str(why)))
96
+ else:
97
+ copy_function(src, dst)
98
+
99
+ if errors:
100
+ raise Error(errors)
101
+ return dst
102
+
103
+ def check_and_copy(src_path, tgt_path):
104
+ if os.path.exists(tgt_path):
105
+ return None
106
+
107
+ return slcopytree(src_path, tgt_path)
108
+
109
+
110
+ def remove(srcpath):
111
+ if os.path.isdir(srcpath):
112
+ return shutil.rmtree(srcpath)
113
+ else:
114
+ return os.remove(srcpath)
115
+
116
+
117
+ def preparing_dataset(pathdict, image_set, args):
118
+ start_time = time.time()
119
+ dataset_file = args.dataset_file
120
+ data_static_info = SLConfig.fromfile('util/static_data_path.py')
121
+ static_dict = data_static_info[dataset_file][image_set]
122
+
123
+ copyfilelist = []
124
+ for k,tgt_v in pathdict.items():
125
+ if os.path.exists(tgt_v):
126
+ if args.local_rank == 0:
127
+ print("path <{}> exist. remove it!".format(tgt_v))
128
+ remove(tgt_v)
129
+ # continue
130
+
131
+ if args.local_rank == 0:
132
+ src_v = static_dict[k]
133
+ assert isinstance(src_v, str)
134
+ if src_v.endswith('.zip'):
135
+ # copy
136
+ cp_tgt_dir = os.path.dirname(tgt_v)
137
+ filename = os.path.basename(src_v)
138
+ cp_tgt_path = os.path.join(cp_tgt_dir, filename)
139
+ print('Copy from <{}> to <{}>.'.format(src_v, cp_tgt_path))
140
+ os.makedirs(cp_tgt_dir, exist_ok=True)
141
+ check_and_copy(src_v, cp_tgt_path)
142
+
143
+ # unzip
144
+ import zipfile
145
+ print("Starting unzip <{}>".format(cp_tgt_path))
146
+ with zipfile.ZipFile(cp_tgt_path, 'r') as zip_ref:
147
+ zip_ref.extractall(os.path.dirname(cp_tgt_path))
148
+
149
+ copyfilelist.append(cp_tgt_path)
150
+ copyfilelist.append(tgt_v)
151
+ else:
152
+ print('Copy from <{}> to <{}>.'.format(src_v, tgt_v))
153
+ os.makedirs(os.path.dirname(tgt_v), exist_ok=True)
154
+ check_and_copy(src_v, tgt_v)
155
+ copyfilelist.append(tgt_v)
156
+
157
+ if len(copyfilelist) == 0:
158
+ copyfilelist = None
159
+ args.copyfilelist = copyfilelist
160
+
161
+ if args.distributed:
162
+ torch.distributed.barrier()
163
+ total_time = time.time() - start_time
164
+ if copyfilelist:
165
+ total_time_str = str(datetime.timedelta(seconds=int(total_time)))
166
+ print('Data copy time {}'.format(total_time_str))
167
+ return copyfilelist
168
+
169
+
170
+
datasets/dataset.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import print_function
2
+
3
+ import torch
4
+ import torchvision.datasets as datasets
5
+ from torch.utils.data import Dataset
6
+ from PIL import Image
7
+ from .tsv_io import TSVFile
8
+ import numpy as np
9
+ import base64
10
+ import io
11
+
12
+
13
+ class TSVDataset(Dataset):
14
+ """ TSV dataset for ImageNet 1K training
15
+ """
16
+ def __init__(self, tsv_file, transform=None, target_transform=None):
17
+ self.tsv = TSVFile(tsv_file)
18
+ self.transform = transform
19
+ self.target_transform = target_transform
20
+
21
+ def __getitem__(self, index):
22
+ """
23
+ Args:
24
+ index (int): Index
25
+ Returns:
26
+ tuple: (image, target) where target is class_index of the target class.
27
+ """
28
+ row = self.tsv.seek(index)
29
+ image_data = base64.b64decode(row[-1])
30
+ image = Image.open(io.BytesIO(image_data))
31
+ image = image.convert('RGB')
32
+ target = int(row[1])
33
+
34
+ if self.transform is not None:
35
+ img = self.transform(image)
36
+ else:
37
+ img = image
38
+ if self.target_transform is not None:
39
+ target = self.target_transform(target)
40
+
41
+ return img, target
42
+
43
+ def __len__(self):
44
+ return self.tsv.num_rows()
datasets/odvg.py ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torchvision.datasets.vision import VisionDataset
2
+ import os.path
3
+ from typing import Callable, Optional
4
+ import json
5
+ from PIL import Image
6
+ import torch
7
+ import random
8
+ import os, sys
9
+
10
+ sys.path.append(os.path.dirname(sys.path[0]))
11
+
12
+ import datasets.transforms as T
13
+
14
+
15
+ class ODVGDataset(VisionDataset):
16
+ """
17
+ Args:
18
+ root (string): Root directory where images are downloaded to.
19
+ anno (string): Path to json annotation file.
20
+ label_map_anno (string): Path to json label mapping file. Only for Object Detection
21
+ transform (callable, optional): A function/transform that takes in an PIL image
22
+ and returns a transformed version. E.g, ``transforms.PILToTensor``
23
+ target_transform (callable, optional): A function/transform that takes in the
24
+ target and transforms it.
25
+ transforms (callable, optional): A function/transform that takes input sample and its target as entry
26
+ and returns a transformed version.
27
+ """
28
+
29
+ def __init__(
30
+ self,
31
+ root: str,
32
+ anno: str,
33
+ label_map_anno: str = None,
34
+ max_labels: int = 80,
35
+ transform: Optional[Callable] = None,
36
+ target_transform: Optional[Callable] = None,
37
+ transforms: Optional[Callable] = None,
38
+ ) -> None:
39
+ super().__init__(root, transforms, transform, target_transform)
40
+ self.root = root
41
+ self.dataset_mode = "OD" if label_map_anno else "VG"
42
+ self.max_labels = max_labels
43
+ if self.dataset_mode == "OD":
44
+ self.load_label_map(label_map_anno)
45
+ self._load_metas(anno)
46
+ self.get_dataset_info()
47
+
48
+ def load_label_map(self, label_map_anno):
49
+ with open(label_map_anno, "r") as file:
50
+ self.label_map = json.load(file)
51
+ self.label_index = set(self.label_map.keys())
52
+
53
+ def _load_metas(self, anno):
54
+ with open(anno, "r") as f:
55
+ self.metas = [json.loads(line) for line in f]
56
+
57
+ def get_dataset_info(self):
58
+ print(f" == total images: {len(self)}")
59
+ if self.dataset_mode == "OD":
60
+ print(f" == total labels: {len(self.label_map)}")
61
+
62
+ def __getitem__(self, index: int):
63
+ meta = self.metas[index]
64
+ rel_path = meta["filename"]
65
+ abs_path = os.path.join(self.root, rel_path)
66
+ if not os.path.exists(abs_path):
67
+ raise FileNotFoundError(f"{abs_path} not found.")
68
+ image = Image.open(abs_path).convert("RGB")
69
+ exemplars = torch.tensor(meta["exemplars"], dtype=torch.int64)
70
+ w, h = image.size
71
+ if self.dataset_mode == "OD":
72
+ anno = meta["detection"]
73
+ instances = [obj for obj in anno["instances"]]
74
+ boxes = [obj["bbox"] for obj in instances]
75
+ # generate vg_labels
76
+ # pos bbox labels
77
+ ori_classes = [str(obj["label"]) for obj in instances]
78
+ pos_labels = set(ori_classes)
79
+ # neg bbox labels
80
+ neg_labels = self.label_index.difference(pos_labels)
81
+
82
+ vg_labels = list(pos_labels)
83
+ num_to_add = min(len(neg_labels), self.max_labels - len(pos_labels))
84
+ if num_to_add > 0:
85
+ vg_labels.extend(random.sample(neg_labels, num_to_add))
86
+
87
+ # shuffle
88
+ for i in range(len(vg_labels) - 1, 0, -1):
89
+ j = random.randint(0, i)
90
+ vg_labels[i], vg_labels[j] = vg_labels[j], vg_labels[i]
91
+
92
+ caption_list = [self.label_map[lb] for lb in vg_labels]
93
+ caption_dict = {item: index for index, item in enumerate(caption_list)}
94
+
95
+ caption = " . ".join(caption_list) + " ."
96
+ classes = [
97
+ caption_dict[self.label_map[str(obj["label"])]] for obj in instances
98
+ ]
99
+ boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
100
+ classes = torch.tensor(classes, dtype=torch.int64)
101
+ elif self.dataset_mode == "VG":
102
+ anno = meta["grounding"]
103
+ instances = [obj for obj in anno["regions"]]
104
+ boxes = [obj["bbox"] for obj in instances]
105
+ caption_list = [obj["phrase"] for obj in instances]
106
+ c = list(zip(boxes, caption_list))
107
+ random.shuffle(c)
108
+ boxes[:], caption_list[:] = zip(*c)
109
+ uni_caption_list = list(set(caption_list))
110
+ label_map = {}
111
+ for idx in range(len(uni_caption_list)):
112
+ label_map[uni_caption_list[idx]] = idx
113
+ classes = [label_map[cap] for cap in caption_list]
114
+ caption = " . ".join(uni_caption_list) + " ."
115
+ boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
116
+ classes = torch.tensor(classes, dtype=torch.int64)
117
+ caption_list = uni_caption_list
118
+ target = {}
119
+ target["size"] = torch.as_tensor([int(h), int(w)])
120
+ target["cap_list"] = caption_list
121
+ target["caption"] = caption
122
+ target["boxes"] = boxes
123
+ target["labels"] = classes
124
+ target["exemplars"] = exemplars
125
+ target["labels_uncropped"] = torch.clone(classes)
126
+ # size, cap_list, caption, bboxes, labels
127
+
128
+ if self.transforms is not None:
129
+ image, target = self.transforms(image, target)
130
+ # Check that transforms does not change the identity of target['labels'].
131
+ if len(target["labels"]) > 0:
132
+ assert target["labels"][0] == target["labels_uncropped"][0]
133
+ print(
134
+ "Asserted that transforms does not change the identity of target['labels']."
135
+ )
136
+
137
+ return image, target
138
+
139
+ def __len__(self) -> int:
140
+ return len(self.metas)
141
+
142
+
143
+ def make_coco_transforms(image_set, fix_size=False, strong_aug=False, args=None):
144
+ normalize = T.Compose(
145
+ [T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]
146
+ )
147
+
148
+ # config the params for data aug
149
+ scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800]
150
+ max_size = 1333
151
+ scales2_resize = [400, 500, 600]
152
+ scales2_crop = [384, 600]
153
+
154
+ # update args from config files
155
+ scales = getattr(args, "data_aug_scales", scales)
156
+ max_size = getattr(args, "data_aug_max_size", max_size)
157
+ scales2_resize = getattr(args, "data_aug_scales2_resize", scales2_resize)
158
+ scales2_crop = getattr(args, "data_aug_scales2_crop", scales2_crop)
159
+
160
+ # resize them
161
+ data_aug_scale_overlap = getattr(args, "data_aug_scale_overlap", None)
162
+ if data_aug_scale_overlap is not None and data_aug_scale_overlap > 0:
163
+ data_aug_scale_overlap = float(data_aug_scale_overlap)
164
+ scales = [int(i * data_aug_scale_overlap) for i in scales]
165
+ max_size = int(max_size * data_aug_scale_overlap)
166
+ scales2_resize = [int(i * data_aug_scale_overlap) for i in scales2_resize]
167
+ scales2_crop = [int(i * data_aug_scale_overlap) for i in scales2_crop]
168
+
169
+ # datadict_for_print = {
170
+ # 'scales': scales,
171
+ # 'max_size': max_size,
172
+ # 'scales2_resize': scales2_resize,
173
+ # 'scales2_crop': scales2_crop
174
+ # }
175
+ # print("data_aug_params:", json.dumps(datadict_for_print, indent=2))
176
+
177
+ if image_set == "train":
178
+ if fix_size:
179
+ return T.Compose(
180
+ [
181
+ T.RandomHorizontalFlip(),
182
+ T.RandomResize([(max_size, max(scales))]),
183
+ normalize,
184
+ ]
185
+ )
186
+
187
+ if strong_aug:
188
+ import datasets.sltransform as SLT
189
+
190
+ return T.Compose(
191
+ [
192
+ T.RandomHorizontalFlip(),
193
+ T.RandomSelect(
194
+ T.RandomResize(scales, max_size=max_size),
195
+ T.Compose(
196
+ [
197
+ T.RandomResize(scales2_resize),
198
+ T.RandomSizeCrop(*scales2_crop),
199
+ T.RandomResize(scales, max_size=max_size),
200
+ ]
201
+ ),
202
+ ),
203
+ SLT.RandomSelectMulti(
204
+ [
205
+ SLT.RandomCrop(),
206
+ SLT.LightingNoise(),
207
+ SLT.AdjustBrightness(2),
208
+ SLT.AdjustContrast(2),
209
+ ]
210
+ ),
211
+ normalize,
212
+ ]
213
+ )
214
+
215
+ return T.Compose(
216
+ [
217
+ T.RandomHorizontalFlip(),
218
+ T.RandomSelect(
219
+ T.RandomResize(scales, max_size=max_size),
220
+ T.Compose(
221
+ [
222
+ T.RandomResize(scales2_resize),
223
+ T.RandomSizeCrop(*scales2_crop),
224
+ T.RandomResize(scales, max_size=max_size),
225
+ ]
226
+ ),
227
+ ),
228
+ normalize,
229
+ ]
230
+ )
231
+
232
+ if image_set in ["val", "eval_debug", "train_reg", "test"]:
233
+ if os.environ.get("GFLOPS_DEBUG_SHILONG", False) == "INFO":
234
+ print("Under debug mode for flops calculation only!!!!!!!!!!!!!!!!")
235
+ return T.Compose(
236
+ [
237
+ T.ResizeDebug((1280, 800)),
238
+ normalize,
239
+ ]
240
+ )
241
+
242
+ return T.Compose(
243
+ [
244
+ T.RandomResize([max(scales)], max_size=max_size),
245
+ normalize,
246
+ ]
247
+ )
248
+
249
+ raise ValueError(f"unknown {image_set}")
250
+
251
+
252
+ def build_odvg(image_set, args, datasetinfo):
253
+ img_folder = datasetinfo["root"]
254
+ ann_file = datasetinfo["anno"]
255
+ label_map = datasetinfo["label_map"] if "label_map" in datasetinfo else None
256
+ try:
257
+ strong_aug = args.strong_aug
258
+ except:
259
+ strong_aug = False
260
+ print(img_folder, ann_file, label_map)
261
+ dataset = ODVGDataset(
262
+ img_folder,
263
+ ann_file,
264
+ label_map,
265
+ max_labels=args.max_labels,
266
+ transforms=make_coco_transforms(
267
+ image_set, fix_size=args.fix_size, strong_aug=strong_aug, args=args
268
+ ),
269
+ )
270
+ return dataset
271
+
272
+
273
+ if __name__ == "__main__":
274
+ dataset_vg = ODVGDataset(
275
+ "path/GRIT-20M/data/",
276
+ "path/GRIT-20M/anno/grit_odvg_10k.jsonl",
277
+ )
278
+ print(len(dataset_vg))
279
+ data = dataset_vg[random.randint(0, 100)]
280
+ print(data)
281
+ dataset_od = ODVGDataset(
282
+ "pathl/V3Det/",
283
+ "path/V3Det/annotations/v3det_2023_v1_all_odvg.jsonl",
284
+ "path/V3Det/annotations/v3det_label_map.json",
285
+ )
286
+ print(len(dataset_od))
287
+ data = dataset_od[random.randint(0, 100)]
288
+ print(data)
datasets/panoptic_eval.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2
+ import json
3
+ import os
4
+
5
+ import util.misc as utils
6
+
7
+ try:
8
+ from panopticapi.evaluation import pq_compute
9
+ except ImportError:
10
+ pass
11
+
12
+
13
+ class PanopticEvaluator(object):
14
+ def __init__(self, ann_file, ann_folder, output_dir="panoptic_eval"):
15
+ self.gt_json = ann_file
16
+ self.gt_folder = ann_folder
17
+ if utils.is_main_process():
18
+ if not os.path.exists(output_dir):
19
+ os.mkdir(output_dir)
20
+ self.output_dir = output_dir
21
+ self.predictions = []
22
+
23
+ def update(self, predictions):
24
+ for p in predictions:
25
+ with open(os.path.join(self.output_dir, p["file_name"]), "wb") as f:
26
+ f.write(p.pop("png_string"))
27
+
28
+ self.predictions += predictions
29
+
30
+ def synchronize_between_processes(self):
31
+ all_predictions = utils.all_gather(self.predictions)
32
+ merged_predictions = []
33
+ for p in all_predictions:
34
+ merged_predictions += p
35
+ self.predictions = merged_predictions
36
+
37
+ def summarize(self):
38
+ if utils.is_main_process():
39
+ json_data = {"annotations": self.predictions}
40
+ predictions_json = os.path.join(self.output_dir, "predictions.json")
41
+ with open(predictions_json, "w") as f:
42
+ f.write(json.dumps(json_data))
43
+ return pq_compute(self.gt_json, predictions_json, gt_folder=self.gt_folder, pred_folder=self.output_dir)
44
+ return None
datasets/random_crop.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import PIL #version 1.2.0
2
+ import torch
3
+ import os
4
+ import torchvision.transforms.functional as F
5
+ import numpy as np
6
+ import random
7
+
8
+
9
+ def intersect(boxes1, boxes2):
10
+ '''
11
+ Find intersection of every box combination between two sets of box
12
+ boxes1: bounding boxes 1, a tensor of dimensions (n1, 4)
13
+ boxes2: bounding boxes 2, a tensor of dimensions (n2, 4)
14
+
15
+ Out: Intersection each of boxes1 with respect to each of boxes2,
16
+ a tensor of dimensions (n1, n2)
17
+ '''
18
+ n1 = boxes1.size(0)
19
+ n2 = boxes2.size(0)
20
+ max_xy = torch.min(boxes1[:, 2:].unsqueeze(1).expand(n1, n2, 2),
21
+ boxes2[:, 2:].unsqueeze(0).expand(n1, n2, 2))
22
+
23
+ min_xy = torch.max(boxes1[:, :2].unsqueeze(1).expand(n1, n2, 2),
24
+ boxes2[:, :2].unsqueeze(0).expand(n1, n2, 2))
25
+ inter = torch.clamp(max_xy - min_xy , min=0) # (n1, n2, 2)
26
+ return inter[:, :, 0] * inter[:, :, 1] #(n1, n2)
27
+ def find_IoU(boxes1, boxes2):
28
+ '''
29
+ Find IoU between every boxes set of boxes
30
+ boxes1: a tensor of dimensions (n1, 4) (left, top, right , bottom)
31
+ boxes2: a tensor of dimensions (n2, 4)
32
+
33
+ Out: IoU each of boxes1 with respect to each of boxes2, a tensor of
34
+ dimensions (n1, n2)
35
+
36
+ Formula:
37
+ (box1 ∩ box2) / (box1 u box2) = (box1 ∩ box2) / (area(box1) + area(box2) - (box1 ∩ box2 ))
38
+ '''
39
+ inter = intersect(boxes1, boxes2)
40
+ area_boxes1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
41
+ area_boxes2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
42
+
43
+ area_boxes1 = area_boxes1.unsqueeze(1).expand_as(inter) #(n1, n2)
44
+ area_boxes2 = area_boxes2.unsqueeze(0).expand_as(inter) #(n1, n2)
45
+ union = (area_boxes1 + area_boxes2 - inter)
46
+ return inter / union
47
+
48
+
49
+ def random_crop(image, boxes, labels, difficulties=None):
50
+ '''
51
+ image: A PIL image
52
+ boxes: Bounding boxes, a tensor of dimensions (#objects, 4)
53
+ labels: labels of object, a tensor of dimensions (#objects)
54
+ difficulties: difficulties of detect object, a tensor of dimensions (#objects)
55
+
56
+ Out: cropped image , new boxes, new labels, new difficulties
57
+ '''
58
+ if type(image) == PIL.Image.Image:
59
+ image = F.to_tensor(image)
60
+ original_h = image.size(1)
61
+ original_w = image.size(2)
62
+
63
+ while True:
64
+ mode = random.choice([0.1, 0.3, 0.5, 0.9, None])
65
+
66
+ if mode is None:
67
+ return F.to_pil_image(image), boxes, labels, difficulties
68
+
69
+ new_image = image
70
+ new_boxes = boxes
71
+ new_difficulties = difficulties
72
+ new_labels = labels
73
+ for _ in range(50):
74
+ # Crop dimensions: [0.3, 1] of original dimensions
75
+ new_h = random.uniform(0.3*original_h, original_h)
76
+ new_w = random.uniform(0.3*original_w, original_w)
77
+
78
+ # Aspect ratio constraint b/t .5 & 2
79
+ if new_h/new_w < 0.5 or new_h/new_w > 2:
80
+ continue
81
+
82
+ #Crop coordinate
83
+ left = random.uniform(0, original_w - new_w)
84
+ right = left + new_w
85
+ top = random.uniform(0, original_h - new_h)
86
+ bottom = top + new_h
87
+ crop = torch.FloatTensor([int(left), int(top), int(right), int(bottom)])
88
+
89
+ # Calculate IoU between the crop and the bounding boxes
90
+ overlap = find_IoU(crop.unsqueeze(0), boxes) #(1, #objects)
91
+ overlap = overlap.squeeze(0)
92
+
93
+ # If not a single bounding box has a IoU of greater than the minimum, try again
94
+ if overlap.shape[0] == 0:
95
+ continue
96
+ if overlap.max().item() < mode:
97
+ continue
98
+
99
+ #Crop
100
+ new_image = image[:, int(top):int(bottom), int(left):int(right)] #(3, new_h, new_w)
101
+
102
+ #Center of bounding boxes
103
+ center_bb = (boxes[:, :2] + boxes[:, 2:])/2.0
104
+
105
+ #Find bounding box has been had center in crop
106
+ center_in_crop = (center_bb[:, 0] >left) * (center_bb[:, 0] < right
107
+ ) *(center_bb[:, 1] > top) * (center_bb[:, 1] < bottom) #( #objects)
108
+
109
+ if not center_in_crop.any():
110
+ continue
111
+
112
+ #take matching bounding box
113
+ new_boxes = boxes[center_in_crop, :]
114
+
115
+ #take matching labels
116
+ new_labels = labels[center_in_crop]
117
+
118
+ #take matching difficulities
119
+ if difficulties is not None:
120
+ new_difficulties = difficulties[center_in_crop]
121
+ else:
122
+ new_difficulties = None
123
+
124
+ #Use the box left and top corner or the crop's
125
+ new_boxes[:, :2] = torch.max(new_boxes[:, :2], crop[:2])
126
+
127
+ #adjust to crop
128
+ new_boxes[:, :2] -= crop[:2]
129
+
130
+ new_boxes[:, 2:] = torch.min(new_boxes[:, 2:],crop[2:])
131
+
132
+ #adjust to crop
133
+ new_boxes[:, 2:] -= crop[:2]
134
+
135
+ return F.to_pil_image(new_image), new_boxes, new_labels, new_difficulties
datasets/sltransform.py ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # modified from https://github.com/anhtuan85/Data-Augmentation-for-Object-Detection/blob/master/augmentation.ipynb
2
+
3
+ import PIL #version 1.2.0
4
+ from PIL import Image #version 6.1.0
5
+ import torch
6
+ import os
7
+ import torchvision.transforms.functional as F
8
+ import numpy as np
9
+ import random
10
+
11
+ from .random_crop import random_crop
12
+ from util.box_ops import box_cxcywh_to_xyxy, box_xyxy_to_cxcywh
13
+
14
+ class AdjustContrast:
15
+ def __init__(self, contrast_factor):
16
+ self.contrast_factor = contrast_factor
17
+
18
+ def __call__(self, img, target):
19
+ """
20
+ img (PIL Image or Tensor): Image to be adjusted.
21
+ """
22
+ _contrast_factor = ((random.random() + 1.0) / 2.0) * self.contrast_factor
23
+ img = F.adjust_contrast(img, _contrast_factor)
24
+ return img, target
25
+
26
+ class AdjustBrightness:
27
+ def __init__(self, brightness_factor):
28
+ self.brightness_factor = brightness_factor
29
+
30
+ def __call__(self, img, target):
31
+ """
32
+ img (PIL Image or Tensor): Image to be adjusted.
33
+ """
34
+ _brightness_factor = ((random.random() + 1.0) / 2.0) * self.brightness_factor
35
+ img = F.adjust_brightness(img, _brightness_factor)
36
+ return img, target
37
+
38
+ def lighting_noise(image):
39
+ '''
40
+ color channel swap in image
41
+ image: A PIL image
42
+ '''
43
+ new_image = image
44
+ perms = ((0, 1, 2), (0, 2, 1), (1, 0, 2),
45
+ (1, 2, 0), (2, 0, 1), (2, 1, 0))
46
+ swap = perms[random.randint(0, len(perms)- 1)]
47
+ new_image = F.to_tensor(new_image)
48
+ new_image = new_image[swap, :, :]
49
+ new_image = F.to_pil_image(new_image)
50
+ return new_image
51
+
52
+ class LightingNoise:
53
+ def __init__(self) -> None:
54
+ pass
55
+
56
+ def __call__(self, img, target):
57
+ return lighting_noise(img), target
58
+
59
+
60
+ def rotate(image, boxes, angle):
61
+ '''
62
+ Rotate image and bounding box
63
+ image: A Pil image (w, h)
64
+ boxes: A tensors of dimensions (#objects, 4)
65
+
66
+ Out: rotated image (w, h), rotated boxes
67
+ '''
68
+ new_image = image.copy()
69
+ new_boxes = boxes.clone()
70
+
71
+ #Rotate image, expand = True
72
+ w = image.width
73
+ h = image.height
74
+ cx = w/2
75
+ cy = h/2
76
+ new_image = new_image.rotate(angle, expand=True)
77
+ angle = np.radians(angle)
78
+ alpha = np.cos(angle)
79
+ beta = np.sin(angle)
80
+ #Get affine matrix
81
+ AffineMatrix = torch.tensor([[alpha, beta, (1-alpha)*cx - beta*cy],
82
+ [-beta, alpha, beta*cx + (1-alpha)*cy]])
83
+
84
+ #Rotation boxes
85
+ box_width = (boxes[:,2] - boxes[:,0]).reshape(-1,1)
86
+ box_height = (boxes[:,3] - boxes[:,1]).reshape(-1,1)
87
+
88
+ #Get corners for boxes
89
+ x1 = boxes[:,0].reshape(-1,1)
90
+ y1 = boxes[:,1].reshape(-1,1)
91
+
92
+ x2 = x1 + box_width
93
+ y2 = y1
94
+
95
+ x3 = x1
96
+ y3 = y1 + box_height
97
+
98
+ x4 = boxes[:,2].reshape(-1,1)
99
+ y4 = boxes[:,3].reshape(-1,1)
100
+
101
+ corners = torch.stack((x1,y1,x2,y2,x3,y3,x4,y4), dim= 1)
102
+ # corners.reshape(-1, 8) #Tensors of dimensions (#objects, 8)
103
+ corners = corners.reshape(-1,2) #Tensors of dimension (4* #objects, 2)
104
+ corners = torch.cat((corners, torch.ones(corners.shape[0], 1)), dim= 1) #(Tensors of dimension (4* #objects, 3))
105
+
106
+ cos = np.abs(AffineMatrix[0, 0])
107
+ sin = np.abs(AffineMatrix[0, 1])
108
+
109
+ nW = int((h * sin) + (w * cos))
110
+ nH = int((h * cos) + (w * sin))
111
+ AffineMatrix[0, 2] += (nW / 2) - cx
112
+ AffineMatrix[1, 2] += (nH / 2) - cy
113
+
114
+
115
+ #Apply affine transform
116
+ rotate_corners = torch.mm(AffineMatrix, corners.t().to(torch.float64)).t()
117
+ rotate_corners = rotate_corners.reshape(-1,8)
118
+
119
+ x_corners = rotate_corners[:,[0,2,4,6]]
120
+ y_corners = rotate_corners[:,[1,3,5,7]]
121
+
122
+ #Get (x_min, y_min, x_max, y_max)
123
+ x_min, _ = torch.min(x_corners, dim= 1)
124
+ x_min = x_min.reshape(-1, 1)
125
+ y_min, _ = torch.min(y_corners, dim= 1)
126
+ y_min = y_min.reshape(-1, 1)
127
+ x_max, _ = torch.max(x_corners, dim= 1)
128
+ x_max = x_max.reshape(-1, 1)
129
+ y_max, _ = torch.max(y_corners, dim= 1)
130
+ y_max = y_max.reshape(-1, 1)
131
+
132
+ new_boxes = torch.cat((x_min, y_min, x_max, y_max), dim= 1)
133
+
134
+ scale_x = new_image.width / w
135
+ scale_y = new_image.height / h
136
+
137
+ #Resize new image to (w, h)
138
+
139
+ new_image = new_image.resize((w, h))
140
+
141
+ #Resize boxes
142
+ new_boxes /= torch.Tensor([scale_x, scale_y, scale_x, scale_y])
143
+ new_boxes[:, 0] = torch.clamp(new_boxes[:, 0], 0, w)
144
+ new_boxes[:, 1] = torch.clamp(new_boxes[:, 1], 0, h)
145
+ new_boxes[:, 2] = torch.clamp(new_boxes[:, 2], 0, w)
146
+ new_boxes[:, 3] = torch.clamp(new_boxes[:, 3], 0, h)
147
+ return new_image, new_boxes
148
+
149
+ # def convert_xywh_to_xyxy(boxes: torch.Tensor):
150
+ # _boxes = boxes.clone()
151
+ # box_xy = _boxes[:, :2]
152
+ # box_wh = _boxes[:, 2:]
153
+ # box_x1y1 = box_xy - box_wh/2
154
+ # box_x2y2 = box_xy + box_wh/2
155
+ # box_xyxy = torch.cat((box_x1y1, box_x2y2), dim=-1)
156
+ # return box_xyxy
157
+
158
+ class Rotate:
159
+ def __init__(self, angle=10) -> None:
160
+ self.angle = angle
161
+
162
+ def __call__(self, img, target):
163
+ w,h = img.size
164
+ whwh = torch.Tensor([w, h, w, h])
165
+ boxes_xyxy = box_cxcywh_to_xyxy(target['boxes']) * whwh
166
+ img, boxes_new = rotate(img, boxes_xyxy, self.angle)
167
+ target['boxes'] = box_xyxy_to_cxcywh(boxes_new).to(boxes_xyxy.dtype) / (whwh + 1e-3)
168
+ return img, target
169
+
170
+
171
+ class RandomCrop:
172
+ def __init__(self) -> None:
173
+ pass
174
+
175
+ def __call__(self, img, target):
176
+ w,h = img.size
177
+ try:
178
+ boxes_xyxy = target['boxes']
179
+ labels = target['labels']
180
+ img, new_boxes, new_labels, _ = random_crop(img, boxes_xyxy, labels)
181
+ target['boxes'] = new_boxes
182
+ target['labels'] = new_labels
183
+ except Exception as e:
184
+ pass
185
+ return img, target
186
+
187
+
188
+ class RandomCropDebug:
189
+ def __init__(self) -> None:
190
+ pass
191
+
192
+ def __call__(self, img, target):
193
+ boxes_xyxy = target['boxes'].clone()
194
+ labels = target['labels'].clone()
195
+ img, new_boxes, new_labels, _ = random_crop(img, boxes_xyxy, labels)
196
+ target['boxes'] = new_boxes
197
+ target['labels'] = new_labels
198
+
199
+
200
+ return img, target
201
+
202
+ class RandomSelectMulti(object):
203
+ """
204
+ Randomly selects between transforms1 and transforms2,
205
+ """
206
+ def __init__(self, transformslist, p=-1):
207
+ self.transformslist = transformslist
208
+ self.p = p
209
+ assert p == -1
210
+
211
+ def __call__(self, img, target):
212
+ if self.p == -1:
213
+ return random.choice(self.transformslist)(img, target)
214
+
215
+
216
+ class Albumentations:
217
+ def __init__(self):
218
+ import albumentations as A
219
+ self.transform = A.Compose([
220
+ A.Blur(p=0.01),
221
+ A.MedianBlur(p=0.01),
222
+ A.ToGray(p=0.01),
223
+ A.CLAHE(p=0.01),
224
+ A.RandomBrightnessContrast(p=0.005),
225
+ A.RandomGamma(p=0.005),
226
+ A.ImageCompression(quality_lower=75, p=0.005)],
227
+ bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))
228
+
229
+ def __call__(self, img, target, p=1.0):
230
+ """
231
+ Input:
232
+ target['boxes']: xyxy, unnormalized data.
233
+
234
+ """
235
+ boxes_raw = target['boxes']
236
+ labels_raw = target['labels']
237
+ img_np = np.array(img)
238
+ if self.transform and random.random() < p:
239
+ new_res = self.transform(image=img_np, bboxes=boxes_raw, class_labels=labels_raw) # transformed
240
+ boxes_new = torch.Tensor(new_res['bboxes']).to(boxes_raw.dtype).reshape_as(boxes_raw)
241
+ img_np = new_res['image']
242
+ labels_new = torch.Tensor(new_res['class_labels']).to(labels_raw.dtype)
243
+ img_new = Image.fromarray(img_np)
244
+ target['boxes'] = boxes_new
245
+ target['labels'] = labels_new
246
+
247
+ return img_new, target
datasets/transforms.py ADDED
@@ -0,0 +1,338 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2
+ """
3
+ Transforms and data augmentation for both image + bbox.
4
+ """
5
+ import random
6
+
7
+ import PIL
8
+ import torch
9
+ import torchvision.transforms as T
10
+ import torchvision.transforms.functional as F
11
+
12
+ from util.box_ops import box_xyxy_to_cxcywh
13
+ from util.misc import interpolate
14
+
15
+
16
+ def crop(image, target, region):
17
+ cropped_image = F.crop(image, *region)
18
+
19
+ target = target.copy()
20
+ i, j, h, w = region
21
+
22
+ # should we do something wrt the original size?
23
+ target["size"] = torch.tensor([h, w])
24
+
25
+ fields = ["labels", "area"]
26
+
27
+ # Crop exemplars.
28
+ exemplars = target["exemplars"]
29
+ max_size = torch.as_tensor([w, h], dtype=torch.float32)
30
+ # Shift exemplars to cropped region.
31
+ cropped_exemplars = exemplars - torch.as_tensor([j, i, j, i])
32
+ # Correct exemplar regions that go past new image boundary (too far right).
33
+ cropped_exemplars = torch.min(cropped_exemplars.reshape(-1, 2, 2), max_size)
34
+ # Correct exemplar regions that go past new image boundary (too far left).
35
+ cropped_exemplars = cropped_exemplars.clamp(min=0)
36
+ # Get new exemplar areas.
37
+ area_exemplars = (cropped_exemplars[:, 1, :] - cropped_exemplars[:, 0, :]).prod(
38
+ dim=1
39
+ )
40
+ # Update [target] with cropped exemplars.
41
+ target["exemplars"] = cropped_exemplars.reshape(-1, 4)
42
+ if "boxes" in target:
43
+ boxes = target["boxes"]
44
+ max_size = torch.as_tensor([w, h], dtype=torch.float32)
45
+ cropped_boxes = boxes - torch.as_tensor([j, i, j, i])
46
+ cropped_boxes = torch.min(cropped_boxes.reshape(-1, 2, 2), max_size)
47
+ cropped_boxes = cropped_boxes.clamp(min=0)
48
+ area = (cropped_boxes[:, 1, :] - cropped_boxes[:, 0, :]).prod(dim=1)
49
+ target["boxes"] = cropped_boxes.reshape(-1, 4)
50
+ target["area"] = area
51
+ fields.append("boxes")
52
+
53
+ if "masks" in target:
54
+ # FIXME should we update the area here if there are no boxes?
55
+ target["masks"] = target["masks"][:, i : i + h, j : j + w]
56
+ fields.append("masks")
57
+
58
+ # Remove exemplars that have zero area (due to cropping).
59
+ keep = area_exemplars > 0
60
+ target["exemplars"] = target["exemplars"][keep, :]
61
+
62
+ # remove elements for which the boxes or masks that have zero area
63
+ if "boxes" in target or "masks" in target:
64
+ # favor boxes selection when defining which elements to keep
65
+ # this is compatible with previous implementation
66
+ if "boxes" in target:
67
+ cropped_boxes = target["boxes"].reshape(-1, 2, 2)
68
+ keep = torch.all(cropped_boxes[:, 1, :] > cropped_boxes[:, 0, :], dim=1)
69
+ else:
70
+ keep = target["masks"].flatten(1).any(1)
71
+
72
+ for field in fields:
73
+ target[field] = target[field][keep]
74
+
75
+ return cropped_image, target
76
+
77
+
78
+ def hflip(image, target):
79
+ flipped_image = F.hflip(image)
80
+
81
+ w, h = image.size
82
+
83
+ target = target.copy()
84
+
85
+ exemplars = target["exemplars"]
86
+ # Flip image across x-axis.
87
+ exemplars = exemplars[:, [2, 1, 0, 3]] * torch.as_tensor([-1, 1, -1, 1])
88
+ # Shift flipped image to (0, 0).
89
+ exemplars = exemplars + torch.as_tensor([w, 0, w, 0])
90
+ # Update [target] with horizontally flipped exemplars.
91
+ target["exemplars"] = exemplars
92
+
93
+ if "boxes" in target:
94
+ boxes = target["boxes"]
95
+ boxes = boxes[:, [2, 1, 0, 3]] * torch.as_tensor(
96
+ [-1, 1, -1, 1]
97
+ ) + torch.as_tensor([w, 0, w, 0])
98
+ target["boxes"] = boxes
99
+
100
+ if "masks" in target:
101
+ target["masks"] = target["masks"].flip(-1)
102
+
103
+ return flipped_image, target
104
+
105
+
106
+ def resize(image, target, size, max_size=None):
107
+ # size can be min_size (scalar) or (w, h) tuple
108
+
109
+ def get_size_with_aspect_ratio(image_size, size, max_size=None):
110
+ w, h = image_size
111
+ if max_size is not None:
112
+ min_original_size = float(min((w, h)))
113
+ max_original_size = float(max((w, h)))
114
+ if max_original_size / min_original_size * size > max_size:
115
+ size = int(round(max_size * min_original_size / max_original_size))
116
+
117
+ if (w <= h and w == size) or (h <= w and h == size):
118
+ return (h, w)
119
+
120
+ if w < h:
121
+ ow = size
122
+ oh = int(size * h / w)
123
+ else:
124
+ oh = size
125
+ ow = int(size * w / h)
126
+
127
+ return (oh, ow)
128
+
129
+ def get_size(image_size, size, max_size=None):
130
+ if isinstance(size, (list, tuple)):
131
+ return size[::-1]
132
+ else:
133
+ return get_size_with_aspect_ratio(image_size, size, max_size)
134
+
135
+ try:
136
+ size = get_size(image.size, size, max_size)
137
+ except:
138
+ size = get_size((image.shape[-1], image.shape[-2]), size, max_size)
139
+ rescaled_image = F.resize(image, size)
140
+
141
+ if target is None:
142
+ return rescaled_image, None
143
+
144
+ ratios = tuple(
145
+ float(s) / float(s_orig) for s, s_orig in zip(rescaled_image.size, image.size)
146
+ )
147
+ ratio_width, ratio_height = ratios
148
+
149
+ target = target.copy()
150
+
151
+ # Rescale exemplars.
152
+ exemplars = target["exemplars"]
153
+ if exemplars.shape[-1] == 4:
154
+ scaled_exemplars = exemplars * torch.as_tensor(
155
+ [ratio_width, ratio_height, ratio_width, ratio_height]
156
+ )
157
+ else:
158
+ scaled_exemplars = exemplars
159
+
160
+ target["exemplars"] = scaled_exemplars
161
+
162
+ if "boxes" in target:
163
+ boxes = target["boxes"]
164
+ scaled_boxes = boxes * torch.as_tensor(
165
+ [ratio_width, ratio_height, ratio_width, ratio_height]
166
+ )
167
+ target["boxes"] = scaled_boxes
168
+
169
+ if "area" in target:
170
+ area = target["area"]
171
+ scaled_area = area * (ratio_width * ratio_height)
172
+ target["area"] = scaled_area
173
+
174
+ h, w = size
175
+ target["size"] = torch.tensor([h, w])
176
+
177
+ if "masks" in target:
178
+ target["masks"] = (
179
+ interpolate(target["masks"][:, None].float(), size, mode="nearest")[:, 0]
180
+ > 0.5
181
+ )
182
+
183
+ return rescaled_image, target
184
+
185
+
186
+ def pad(image, target, padding):
187
+ # assumes that we only pad on the bottom right corners
188
+ padded_image = F.pad(image, (0, 0, padding[0], padding[1]))
189
+ if target is None:
190
+ return padded_image, None
191
+ target = target.copy()
192
+ # should we do something wrt the original size?
193
+ target["size"] = torch.tensor(padded_image.size[::-1])
194
+ if "masks" in target:
195
+ target["masks"] = torch.nn.functional.pad(
196
+ target["masks"], (0, padding[0], 0, padding[1])
197
+ )
198
+ return padded_image, target
199
+
200
+
201
+ class ResizeDebug(object):
202
+ def __init__(self, size):
203
+ self.size = size
204
+
205
+ def __call__(self, img, target):
206
+ return resize(img, target, self.size)
207
+
208
+
209
+ class RandomCrop(object):
210
+ def __init__(self, size):
211
+ self.size = size
212
+
213
+ def __call__(self, img, target):
214
+ region = T.RandomCrop.get_params(img, self.size)
215
+ return crop(img, target, region)
216
+
217
+
218
+ class RandomSizeCrop(object):
219
+ def __init__(self, min_size: int, max_size: int):
220
+ self.min_size = min_size
221
+ self.max_size = max_size
222
+
223
+ def __call__(self, img: PIL.Image.Image, target: dict):
224
+ w = random.randint(self.min_size, min(img.width, self.max_size))
225
+ h = random.randint(self.min_size, min(img.height, self.max_size))
226
+ region = T.RandomCrop.get_params(img, [h, w])
227
+ return crop(img, target, region)
228
+
229
+
230
+ class CenterCrop(object):
231
+ def __init__(self, size):
232
+ self.size = size
233
+
234
+ def __call__(self, img, target):
235
+ image_width, image_height = img.size
236
+ crop_height, crop_width = self.size
237
+ crop_top = int(round((image_height - crop_height) / 2.0))
238
+ crop_left = int(round((image_width - crop_width) / 2.0))
239
+ return crop(img, target, (crop_top, crop_left, crop_height, crop_width))
240
+
241
+
242
+ class RandomHorizontalFlip(object):
243
+ def __init__(self, p=0.5):
244
+ self.p = p
245
+
246
+ def __call__(self, img, target):
247
+ if random.random() < self.p:
248
+ return hflip(img, target)
249
+ return img, target
250
+
251
+
252
+ class RandomResize(object):
253
+ def __init__(self, sizes, max_size=None):
254
+ assert isinstance(sizes, (list, tuple))
255
+ self.sizes = sizes
256
+ self.max_size = max_size
257
+
258
+ def __call__(self, img, target=None):
259
+ size = random.choice(self.sizes)
260
+ return resize(img, target, size, self.max_size)
261
+
262
+
263
+ class RandomPad(object):
264
+ def __init__(self, max_pad):
265
+ self.max_pad = max_pad
266
+
267
+ def __call__(self, img, target):
268
+ pad_x = random.randint(0, self.max_pad)
269
+ pad_y = random.randint(0, self.max_pad)
270
+ return pad(img, target, (pad_x, pad_y))
271
+
272
+
273
+ class RandomSelect(object):
274
+ """
275
+ Randomly selects between transforms1 and transforms2,
276
+ with probability p for transforms1 and (1 - p) for transforms2
277
+ """
278
+
279
+ def __init__(self, transforms1, transforms2, p=0.5):
280
+ self.transforms1 = transforms1
281
+ self.transforms2 = transforms2
282
+ self.p = p
283
+
284
+ def __call__(self, img, target):
285
+ if random.random() < self.p:
286
+ return self.transforms1(img, target)
287
+ return self.transforms2(img, target)
288
+
289
+
290
+ class ToTensor(object):
291
+ def __call__(self, img, target):
292
+ return F.to_tensor(img), target
293
+
294
+
295
+ class RandomErasing(object):
296
+ def __init__(self, *args, **kwargs):
297
+ self.eraser = T.RandomErasing(*args, **kwargs)
298
+
299
+ def __call__(self, img, target):
300
+ return self.eraser(img), target
301
+
302
+
303
+ class Normalize(object):
304
+ def __init__(self, mean, std):
305
+ self.mean = mean
306
+ self.std = std
307
+
308
+ def __call__(self, image, target=None):
309
+ image = F.normalize(image, mean=self.mean, std=self.std)
310
+ if target is None:
311
+ return image, None
312
+ target = target.copy()
313
+ h, w = image.shape[-2:]
314
+ # No normalization of exemplars needed, since they are used directly for cropping.
315
+ if "boxes" in target:
316
+ boxes = target["boxes"]
317
+ boxes = box_xyxy_to_cxcywh(boxes)
318
+ boxes = boxes / torch.tensor([w, h, w, h], dtype=torch.float32)
319
+ target["boxes"] = boxes
320
+ return image, target
321
+
322
+
323
+ class Compose(object):
324
+ def __init__(self, transforms):
325
+ self.transforms = transforms
326
+
327
+ def __call__(self, image, target):
328
+ for t in self.transforms:
329
+ image, target = t(image, target)
330
+ return image, target
331
+
332
+ def __repr__(self):
333
+ format_string = self.__class__.__name__ + "("
334
+ for t in self.transforms:
335
+ format_string += "\n"
336
+ format_string += " {0}".format(t)
337
+ format_string += "\n)"
338
+ return format_string
debug/config_args_all.json ADDED
@@ -0,0 +1,308 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config_file": "config/cfg_fsc147_vit_b.py",
3
+ "options": {
4
+ "text_encoder_type": "checkpoints/bert-base-uncased"
5
+ },
6
+ "datasets": "config/datasets_fsc147.json",
7
+ "remove_difficult": false,
8
+ "fix_size": false,
9
+ "output_dir": "./debug",
10
+ "note": "",
11
+ "device": "cuda",
12
+ "seed": 42,
13
+ "resume": "",
14
+ "pretrain_model_path": "checkpoints/groundingdino_swinb_cogcoor.pth",
15
+ "finetune_ignore": null,
16
+ "start_epoch": 0,
17
+ "eval": false,
18
+ "num_workers": 8,
19
+ "test": false,
20
+ "debug": false,
21
+ "find_unused_params": false,
22
+ "save_results": false,
23
+ "save_log": false,
24
+ "world_size": 1,
25
+ "dist_url": "env://",
26
+ "rank": 0,
27
+ "local_rank": 0,
28
+ "amp": false,
29
+ "distributed": false,
30
+ "data_aug_scales": [
31
+ 480,
32
+ 512,
33
+ 544,
34
+ 576,
35
+ 608,
36
+ 640,
37
+ 672,
38
+ 704,
39
+ 736,
40
+ 768,
41
+ 800
42
+ ],
43
+ "data_aug_max_size": 1333,
44
+ "data_aug_scales2_resize": [
45
+ 400,
46
+ 500,
47
+ 600
48
+ ],
49
+ "data_aug_scales2_crop": [
50
+ 384,
51
+ 600
52
+ ],
53
+ "data_aug_scale_overlap": null,
54
+ "batch_size": 4,
55
+ "modelname": "groundingdino",
56
+ "backbone": "swin_B_384_22k",
57
+ "position_embedding": "sine",
58
+ "pe_temperatureH": 20,
59
+ "pe_temperatureW": 20,
60
+ "return_interm_indices": [
61
+ 1,
62
+ 2,
63
+ 3
64
+ ],
65
+ "enc_layers": 6,
66
+ "dec_layers": 6,
67
+ "pre_norm": false,
68
+ "dim_feedforward": 2048,
69
+ "hidden_dim": 256,
70
+ "dropout": 0.0,
71
+ "nheads": 8,
72
+ "num_queries": 900,
73
+ "query_dim": 4,
74
+ "num_patterns": 0,
75
+ "num_feature_levels": 4,
76
+ "enc_n_points": 4,
77
+ "dec_n_points": 4,
78
+ "two_stage_type": "standard",
79
+ "two_stage_bbox_embed_share": false,
80
+ "two_stage_class_embed_share": false,
81
+ "transformer_activation": "relu",
82
+ "dec_pred_bbox_embed_share": true,
83
+ "dn_box_noise_scale": 1.0,
84
+ "dn_label_noise_ratio": 0.5,
85
+ "dn_label_coef": 1.0,
86
+ "dn_bbox_coef": 1.0,
87
+ "embed_init_tgt": true,
88
+ "dn_labelbook_size": 91,
89
+ "max_text_len": 256,
90
+ "text_encoder_type": "checkpoints/bert-base-uncased",
91
+ "use_text_enhancer": true,
92
+ "use_fusion_layer": true,
93
+ "use_checkpoint": true,
94
+ "use_transformer_ckpt": true,
95
+ "use_text_cross_attention": true,
96
+ "text_dropout": 0.0,
97
+ "fusion_dropout": 0.0,
98
+ "fusion_droppath": 0.1,
99
+ "sub_sentence_present": true,
100
+ "max_labels": 90,
101
+ "lr": 0.0001,
102
+ "backbone_freeze_keywords": null,
103
+ "freeze_keywords": [
104
+ "backbone.0",
105
+ "bert"
106
+ ],
107
+ "lr_backbone": 1e-05,
108
+ "lr_backbone_names": [
109
+ "backbone.0",
110
+ "bert"
111
+ ],
112
+ "lr_linear_proj_mult": 1e-05,
113
+ "lr_linear_proj_names": [
114
+ "ref_point_head",
115
+ "sampling_offsets"
116
+ ],
117
+ "weight_decay": 0.0001,
118
+ "param_dict_type": "ddetr_in_mmdet",
119
+ "ddetr_lr_param": false,
120
+ "epochs": 30,
121
+ "lr_drop": 10,
122
+ "save_checkpoint_interval": 10,
123
+ "clip_max_norm": 0.1,
124
+ "onecyclelr": false,
125
+ "multi_step_lr": false,
126
+ "lr_drop_list": [
127
+ 10,
128
+ 20
129
+ ],
130
+ "frozen_weights": null,
131
+ "dilation": false,
132
+ "pdetr3_bbox_embed_diff_each_layer": false,
133
+ "pdetr3_refHW": -1,
134
+ "random_refpoints_xy": false,
135
+ "fix_refpoints_hw": -1,
136
+ "dabdetr_yolo_like_anchor_update": false,
137
+ "dabdetr_deformable_encoder": false,
138
+ "dabdetr_deformable_decoder": false,
139
+ "use_deformable_box_attn": false,
140
+ "box_attn_type": "roi_align",
141
+ "dec_layer_number": null,
142
+ "decoder_layer_noise": false,
143
+ "dln_xy_noise": 0.2,
144
+ "dln_hw_noise": 0.2,
145
+ "add_channel_attention": false,
146
+ "add_pos_value": false,
147
+ "two_stage_pat_embed": 0,
148
+ "two_stage_add_query_num": 0,
149
+ "two_stage_learn_wh": false,
150
+ "two_stage_default_hw": 0.05,
151
+ "two_stage_keep_all_tokens": false,
152
+ "num_select": 900,
153
+ "batch_norm_type": "FrozenBatchNorm2d",
154
+ "masks": false,
155
+ "aux_loss": true,
156
+ "set_cost_class": 5.0,
157
+ "set_cost_bbox": 1.0,
158
+ "set_cost_giou": 0.0,
159
+ "cls_loss_coef": 5.0,
160
+ "bbox_loss_coef": 1.0,
161
+ "giou_loss_coef": 0.0,
162
+ "enc_loss_coef": 1.0,
163
+ "interm_loss_coef": 1.0,
164
+ "no_interm_box_loss": false,
165
+ "mask_loss_coef": 1.0,
166
+ "dice_loss_coef": 1.0,
167
+ "focal_alpha": 0.25,
168
+ "focal_gamma": 2.0,
169
+ "decoder_sa_type": "sa",
170
+ "matcher_type": "HungarianMatcher",
171
+ "decoder_module_seq": [
172
+ "sa",
173
+ "ca",
174
+ "ffn"
175
+ ],
176
+ "nms_iou_threshold": -1,
177
+ "dec_pred_class_embed_share": true,
178
+ "match_unstable_error": true,
179
+ "use_detached_boxes_dec_out": false,
180
+ "dn_scalar": 100,
181
+ "box_threshold": 0.23,
182
+ "text_threshold": 0,
183
+ "use_coco_eval": false,
184
+ "label_list": [
185
+ "alcohol bottle",
186
+ "baguette roll",
187
+ "ball",
188
+ "banana",
189
+ "bead",
190
+ "bee",
191
+ "birthday candle",
192
+ "biscuit",
193
+ "boat",
194
+ "bottle",
195
+ "bowl",
196
+ "box",
197
+ "bread roll",
198
+ "brick",
199
+ "buffalo",
200
+ "bun",
201
+ "calamari ring",
202
+ "can",
203
+ "candle",
204
+ "cap",
205
+ "car",
206
+ "cartridge",
207
+ "cassette",
208
+ "cement bag",
209
+ "cereal",
210
+ "chewing gum piece",
211
+ "chopstick",
212
+ "clam",
213
+ "coffee bean",
214
+ "coin",
215
+ "cotton ball",
216
+ "cow",
217
+ "crane",
218
+ "crayon",
219
+ "croissant",
220
+ "crow",
221
+ "cup",
222
+ "cupcake",
223
+ "cupcake holder",
224
+ "fish",
225
+ "gemstone",
226
+ "go game piece",
227
+ "goat",
228
+ "goldfish snack",
229
+ "goose",
230
+ "ice cream",
231
+ "ice cream cone",
232
+ "instant noodle",
233
+ "jade stone",
234
+ "jeans",
235
+ "kidney bean",
236
+ "kitchen towel",
237
+ "lighter",
238
+ "lipstick",
239
+ "m&m piece",
240
+ "macaron",
241
+ "match",
242
+ "meat skewer",
243
+ "mini blind",
244
+ "mosaic tile",
245
+ "naan bread",
246
+ "nail",
247
+ "nut",
248
+ "onion ring",
249
+ "orange",
250
+ "pearl",
251
+ "pen",
252
+ "pencil",
253
+ "penguin",
254
+ "pepper",
255
+ "person",
256
+ "pigeon",
257
+ "plate",
258
+ "polka dot tile",
259
+ "potato",
260
+ "rice bag",
261
+ "roof tile",
262
+ "screw",
263
+ "shoe",
264
+ "spoon",
265
+ "spring roll",
266
+ "stair",
267
+ "stapler pin",
268
+ "straw",
269
+ "supermarket shelf",
270
+ "swan",
271
+ "tomato",
272
+ "watermelon",
273
+ "window",
274
+ "zebra"
275
+ ],
276
+ "val_label_list": [
277
+ "ant",
278
+ "bird",
279
+ "book",
280
+ "bottle cap",
281
+ "bullet",
282
+ "camel",
283
+ "chair",
284
+ "chicken wing",
285
+ "donut",
286
+ "donut holder",
287
+ "flamingo",
288
+ "flower",
289
+ "flower pot",
290
+ "grape",
291
+ "horse",
292
+ "kiwi",
293
+ "milk carton",
294
+ "oyster",
295
+ "oyster shell",
296
+ "package of fresh cut fruit",
297
+ "peach",
298
+ "pill",
299
+ "polka dot",
300
+ "prawn cracker",
301
+ "sausage",
302
+ "seagull",
303
+ "shallot",
304
+ "shirt",
305
+ "skateboard",
306
+ "toilet paper roll"
307
+ ]
308
+ }
debug/config_args_raw.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config_file": "config/cfg_fsc147_vit_b.py",
3
+ "options": {
4
+ "text_encoder_type": "checkpoints/bert-base-uncased"
5
+ },
6
+ "datasets": "config/datasets_fsc147.json",
7
+ "remove_difficult": false,
8
+ "fix_size": false,
9
+ "output_dir": "./debug",
10
+ "note": "",
11
+ "device": "cuda",
12
+ "seed": 42,
13
+ "resume": "",
14
+ "pretrain_model_path": "checkpoints/groundingdino_swinb_cogcoor.pth",
15
+ "finetune_ignore": null,
16
+ "start_epoch": 0,
17
+ "eval": false,
18
+ "num_workers": 8,
19
+ "test": false,
20
+ "debug": false,
21
+ "find_unused_params": false,
22
+ "save_results": false,
23
+ "save_log": false,
24
+ "world_size": 1,
25
+ "dist_url": "env://",
26
+ "rank": 0,
27
+ "local_rank": 0,
28
+ "amp": false,
29
+ "distributed": false
30
+ }
debug/config_cfg.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ data_aug_scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800]
2
+ data_aug_max_size = 1333
3
+ data_aug_scales2_resize = [400, 500, 600]
4
+ data_aug_scales2_crop = [384, 600]
5
+ data_aug_scale_overlap = None
6
+ batch_size = 4
7
+ modelname = 'groundingdino'
8
+ backbone = 'swin_B_384_22k'
9
+ position_embedding = 'sine'
10
+ pe_temperatureH = 20
11
+ pe_temperatureW = 20
12
+ return_interm_indices = [1, 2, 3]
13
+ enc_layers = 6
14
+ dec_layers = 6
15
+ pre_norm = False
16
+ dim_feedforward = 2048
17
+ hidden_dim = 256
18
+ dropout = 0.0
19
+ nheads = 8
20
+ num_queries = 900
21
+ query_dim = 4
22
+ num_patterns = 0
23
+ num_feature_levels = 4
24
+ enc_n_points = 4
25
+ dec_n_points = 4
26
+ two_stage_type = 'standard'
27
+ two_stage_bbox_embed_share = False
28
+ two_stage_class_embed_share = False
29
+ transformer_activation = 'relu'
30
+ dec_pred_bbox_embed_share = True
31
+ dn_box_noise_scale = 1.0
32
+ dn_label_noise_ratio = 0.5
33
+ dn_label_coef = 1.0
34
+ dn_bbox_coef = 1.0
35
+ embed_init_tgt = True
36
+ dn_labelbook_size = 91
37
+ max_text_len = 256
38
+ text_encoder_type = 'checkpoints/bert-base-uncased'
39
+ use_text_enhancer = True
40
+ use_fusion_layer = True
41
+ use_checkpoint = True
42
+ use_transformer_ckpt = True
43
+ use_text_cross_attention = True
44
+ text_dropout = 0.0
45
+ fusion_dropout = 0.0
46
+ fusion_droppath = 0.1
47
+ sub_sentence_present = True
48
+ max_labels = 90
49
+ lr = 0.0001
50
+ backbone_freeze_keywords = None
51
+ freeze_keywords = ['backbone.0', 'bert']
52
+ lr_backbone = 1e-05
53
+ lr_backbone_names = ['backbone.0', 'bert']
54
+ lr_linear_proj_mult = 1e-05
55
+ lr_linear_proj_names = ['ref_point_head', 'sampling_offsets']
56
+ weight_decay = 0.0001
57
+ param_dict_type = 'ddetr_in_mmdet'
58
+ ddetr_lr_param = False
59
+ epochs = 30
60
+ lr_drop = 10
61
+ save_checkpoint_interval = 10
62
+ clip_max_norm = 0.1
63
+ onecyclelr = False
64
+ multi_step_lr = False
65
+ lr_drop_list = [10, 20]
66
+ frozen_weights = None
67
+ dilation = False
68
+ pdetr3_bbox_embed_diff_each_layer = False
69
+ pdetr3_refHW = -1
70
+ random_refpoints_xy = False
71
+ fix_refpoints_hw = -1
72
+ dabdetr_yolo_like_anchor_update = False
73
+ dabdetr_deformable_encoder = False
74
+ dabdetr_deformable_decoder = False
75
+ use_deformable_box_attn = False
76
+ box_attn_type = 'roi_align'
77
+ dec_layer_number = None
78
+ decoder_layer_noise = False
79
+ dln_xy_noise = 0.2
80
+ dln_hw_noise = 0.2
81
+ add_channel_attention = False
82
+ add_pos_value = False
83
+ two_stage_pat_embed = 0
84
+ two_stage_add_query_num = 0
85
+ two_stage_learn_wh = False
86
+ two_stage_default_hw = 0.05
87
+ two_stage_keep_all_tokens = False
88
+ num_select = 900
89
+ batch_norm_type = 'FrozenBatchNorm2d'
90
+ masks = False
91
+ aux_loss = True
92
+ set_cost_class = 5.0
93
+ set_cost_bbox = 1.0
94
+ set_cost_giou = 0.0
95
+ cls_loss_coef = 5.0
96
+ bbox_loss_coef = 1.0
97
+ giou_loss_coef = 0.0
98
+ enc_loss_coef = 1.0
99
+ interm_loss_coef = 1.0
100
+ no_interm_box_loss = False
101
+ mask_loss_coef = 1.0
102
+ dice_loss_coef = 1.0
103
+ focal_alpha = 0.25
104
+ focal_gamma = 2.0
105
+ decoder_sa_type = 'sa'
106
+ matcher_type = 'HungarianMatcher'
107
+ decoder_module_seq = ['sa', 'ca', 'ffn']
108
+ nms_iou_threshold = -1
109
+ dec_pred_class_embed_share = True
110
+ match_unstable_error = True
111
+ use_detached_boxes_dec_out = False
112
+ dn_scalar = 100
113
+ box_threshold = 0.23
114
+ text_threshold = 0
115
+ use_coco_eval = False
116
+ label_list = [
117
+ 'alcohol bottle', 'baguette roll', 'ball', 'banana', 'bead', 'bee',
118
+ 'birthday candle', 'biscuit', 'boat', 'bottle', 'bowl', 'box',
119
+ 'bread roll', 'brick', 'buffalo', 'bun', 'calamari ring', 'can', 'candle',
120
+ 'cap', 'car', 'cartridge', 'cassette', 'cement bag', 'cereal',
121
+ 'chewing gum piece', 'chopstick', 'clam', 'coffee bean', 'coin',
122
+ 'cotton ball', 'cow', 'crane', 'crayon', 'croissant', 'crow', 'cup',
123
+ 'cupcake', 'cupcake holder', 'fish', 'gemstone', 'go game piece', 'goat',
124
+ 'goldfish snack', 'goose', 'ice cream', 'ice cream cone', 'instant noodle',
125
+ 'jade stone', 'jeans', 'kidney bean', 'kitchen towel', 'lighter',
126
+ 'lipstick', 'm&m piece', 'macaron', 'match', 'meat skewer', 'mini blind',
127
+ 'mosaic tile', 'naan bread', 'nail', 'nut', 'onion ring', 'orange',
128
+ 'pearl', 'pen', 'pencil', 'penguin', 'pepper', 'person', 'pigeon', 'plate',
129
+ 'polka dot tile', 'potato', 'rice bag', 'roof tile', 'screw', 'shoe',
130
+ 'spoon', 'spring roll', 'stair', 'stapler pin', 'straw',
131
+ 'supermarket shelf', 'swan', 'tomato', 'watermelon', 'window', 'zebra'
132
+ ]
133
+ val_label_list = [
134
+ 'ant', 'bird', 'book', 'bottle cap', 'bullet', 'camel', 'chair',
135
+ 'chicken wing', 'donut', 'donut holder', 'flamingo', 'flower',
136
+ 'flower pot', 'grape', 'horse', 'kiwi', 'milk carton', 'oyster',
137
+ 'oyster shell', 'package of fresh cut fruit', 'peach', 'pill', 'polka dot',
138
+ 'prawn cracker', 'sausage', 'seagull', 'shallot', 'shirt', 'skateboard',
139
+ 'toilet paper roll'
140
+ ]
debug/info.txt ADDED
The diff for this file is too large to render. See raw diff
 
deer.jpg ADDED
egg.jpg ADDED
fish.jpg ADDED
flagged/Input Image/88e8b1b238c064836037/HVITa2016a_000011.JPG ADDED
flagged/log.csv ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Input Image,Specify object to count here:,output,flag,username,timestamp
2
+ flagged/Input Image/88e8b1b238c064836037/HVITa2016a_000011.JPG,bird,flagged/output/a686c9813d1a6c436907/image.webp,,,2024-06-09 09:33:54.235722
3
+ flagged/Input Image/875b25d454777d421c42/221113_ECCC_MRADUL_ELEPHANT_MIA_RHEA_EVENING_WALK_KEEPER-8-1920x900.jpg,people and elephants,,,flagged/Detected Instances/76fafa9d17afaaf9eb38/image.webp,4,,,2024-06-18 16:54:16.644520
4
+ flagged/Input Image/037d1ea9a55366ad2940/2.jpg,shell,,"{""image"": ""flagged/Specify object to count with visual exemplars here/fceed5d5292244ecdd73/2.jpg"", ""points"": [[380.0, 211.0, 2.0, 483.0, 317.0, 3.0]]}",flagged/Detected Instances/27c94b87a5ced00b9dd1/image.webp,8,,,2024-06-22 08:03:07.969202
5
+ flagged/Input Image/b8faa04105f66c5a71d1/7.jpg,woman,,,flagged/Detected Instances/cd67c449c0a10a91f35b/image.webp,1,,,2024-06-22 08:08:57.657953
6
+ flagged/Input Image/350e00785dada901c520/7.jpg,woman,,,flagged/Detected Instances/b16f6a58d39f8b72ca49/image.webp,1,,,2024-06-22 08:18:43.429051
7
+ flagged/Input Image/12d92058a4ac08b10dea/7.jpg,woman,,"{""image"": ""flagged/Specify object to count with visual exemplars here/38245c2c066faff3cae8/7.jpg"", ""points"": [[113.0, 132.0, 2.0, 260.0, 315.0, 3.0]]}",flagged/Detected Instances/b941dacc2ffe391d2e16/image.webp,10,,,2024-06-22 08:19:03.379704
8
+ flagged/Input Image/9b95bcefbe76b48d43ea/39.jpg,,,"{""image"": ""flagged/Specify object to count with visual exemplars here/f7cb8955646797bfe577/39.jpg"", ""points"": [[218.0, 225.0, 2.0, 295.0, 282.0, 3.0]]}",flagged/Detected Instances/f1930c50e7cf23e1bab8/image.webp,20,,,2024-06-22 08:19:37.258705
9
+ flagged/Input Image/a29aa3c68b71af80ceac/39.jpg,tomato,,"{""image"": ""flagged/Specify object to count with visual exemplars here/80430ff258140b26bd0e/39.jpg"", ""points"": [[218.0, 225.0, 2.0, 295.0, 282.0, 3.0]]}",flagged/Detected Instances/fc4d452719e25259f01d/image.webp,19,,,2024-06-22 08:19:58.499357
10
+ flagged/Input Image/bdf516de7a637158f274/HVITa2016a_000011.JPG,bird,,,flagged/Detected Instances/d0d2e5d0a4a0ac61ac4d/image.webp,30,,,2024-06-22 09:39:09.374430
11
+ flagged/Input Image/9d6b2710e64f8889dd64/HVITa2016a_000011.JPG,,,"{""image"": ""flagged/Specify object to count with visual exemplars here/453f32945cd9e681b483/KAPWa2019a_000132.JPG"", ""points"": [[510.0, 150.0, 2.0, 534.0, 176.0, 3.0], [684.0, 205.0, 2.0, 712.0, 224.0, 3.0]]}",flagged/Detected Instances/5c1af4b6118751ff9b9d/image.webp,32,,,2024-06-22 09:40:38.259957
12
+ flagged/Input Image/5ca9673ad1540cff6509/HVITa2016a_000011.JPG,bird,,"{""image"": ""flagged/Specify object to count with visual exemplars here/5df88f87ed6641277db1/KAPWa2019a_000132.JPG"", ""points"": [[744.0, 429.0, 2.0, 763.0, 461.0, 3.0]]}",flagged/Detected Instances/f44fe32f6932dd6574b3/image.webp,31,,,2024-06-22 09:41:55.748604
13
+ flagged/Input Image/b6a179bab6c6189ddad7/55.jpg,tomato,,,flagged/Detected Instances/6d315017249048f38b52/image.webp,50,,,2024-06-22 10:41:12.653937
14
+ flagged/Input Image/9bf0e79d190639880832/55.jpg,tomato,,"{""image"": ""flagged/Specify object to count with visual exemplars here/fe802eb93b73f4dae6b7/42.jpg"", ""points"": [[180.0, 128.0, 2.0, 224.0, 162.0, 3.0]]}",flagged/Detected Instances/f1b139f485f3c6049bb5/image.webp,50,,,2024-06-22 10:41:37.183988
15
+ flagged/Input Image/80d1ea6bfc5bc8002c62/28.jpg,cinnamon roll,,,flagged/Detected Instances/caac9eeb8dcadfd732cb/image.webp,8,,,2024-06-22 11:13:59.554789
16
+ flagged/Input Image/3f37e6da6e272866e8de/3.jpg,hot air balloon,,,flagged/Detected Instances/24844df4b3df513a0c36/image.webp,11,,,2024-06-22 14:11:32.948564
17
+ flagged/Input Image/5e7dc59cf0a41b9bc5fb/fish images.jpeg,fish,,,flagged/Detected Instances/a96993ece7e66065ba7d/image.webp,77,,,2024-06-22 14:16:37.842263
18
+ flagged/Input Image/57a992296c0ec254b864/Fish 2.jpeg,fish,,,flagged/Detected Instances/2c9d451701856fe3371d/image.webp,900,,,2024-06-22 14:20:50.590216
19
+ flagged/Input Image/6c3c11094015679fa790/Fish 3.jpeg,fish,,,flagged/Detected Instances/4e75007f5ef68e4ca3d6/image.webp,839,,,2024-06-22 14:22:09.322830
20
+ flagged/Input Image/31698b5d2863b6756bfa/Fish 4.jpeg,fish,,,flagged/Detected Instances/9d32c5fb797b4985cee0/image.webp,278,,,2024-06-22 14:25:47.760426
21
+ flagged/Input Image/5ce8e5a0fa4008aec1f2/Fish 4.jpeg,fish,,"{""image"": ""flagged/Specify object to count with visual exemplars here/6780b6a4f82e7d10d5cf/fish images.jpeg"", ""points"": [[63.0, 65.0, 2.0, 478.0, 217.0, 3.0]]}",flagged/Detected Instances/c6b81dedf92ab0af4153/image.webp,286,,,2024-06-22 14:28:36.854956
22
+ flagged/Input Image/00be0bde65646930f991/Fish 4.jpeg,fish,,"{""image"": ""flagged/Specify object to count with visual exemplars here/25a3e4831f706db4a521/Fish 3.jpeg"", ""points"": [[319.0, 221.0, 2.0, 650.0, 516.0, 3.0]]}",flagged/Detected Instances/25d123e67ec8644c5f02/image.webp,277,,,2024-06-22 14:29:09.194220
23
+ flagged/Input Image/542e2646fed421a4d5e5/6f91f056-e447-4316-aae1-b1ca57b55ec6.jpeg,Man,,,flagged/Detected Instances/4e890d293a4cc098b2b5/image.webp,1,,,2024-06-22 14:29:55.868319
24
+ flagged/Input Image/5571766c4ae584f798f6/Fish 3.jpeg,fish and fish with weird and large mouth,,,flagged/Detected Instances/9f53a1f8ee034afcb29f/image.webp,347,,,2024-06-22 14:32:29.009512
25
+ flagged/Input Image/df8f6e4f3c2658165b32/Fish 3.jpeg,fish and fish with weird and large mouth,,,flagged/Detected Instances/9c453d75fc3f0b7ba779/image.webp,347,,,2024-06-22 14:33:38.795075
26
+ flagged/Input Image/52226488b0c44d964af9/Fish 3.jpeg,All fishes big and small,,,flagged/Detected Instances/bf38e737886a0ac61746/image.webp,609,,,2024-06-22 14:34:34.754255
27
+ flagged/Input Image/caae84890c10f63e5c53/Fish 3.jpeg,All fish big and small,,,flagged/Detected Instances/d209f5a62318a3cc9a83/image.webp,653,,,2024-06-22 14:34:49.612537
28
+ flagged/Input Image/b1986e95b8a8df57d81b/Fish 3.jpeg,A big fish and all small fish,,,flagged/Detected Instances/10fdbef3f9c29b00d343/image.webp,655,,,2024-06-22 14:36:39.308451
29
+ flagged/Input Image/b320c79d24f53c48cb36/Fish 3.jpeg,A big fish and all small fishes,,,flagged/Detected Instances/83948fd3cdfc64acba91/image.webp,663,,,2024-06-22 14:37:47.440375
30
+ flagged/Input Image/abf54b47c8c97ae29e2d/Fish 3.jpeg,,,"{""image"": ""flagged/Specify object to count with visual exemplars here/0b10284a52f55d2b997c/Fish 3.jpeg"", ""points"": [[738.0, 295.0, 2.0, 917.0, 469.0, 3.0]]}",flagged/Detected Instances/2bb2c04de9c3c15284a2/image.webp,595,,,2024-06-22 14:42:47.880099
31
+ flagged/Input Image/485ab0f7f21cef342c33/Fish 3.jpeg,,,"{""image"": ""flagged/Specify object to count with visual exemplars here/a2645b60d75a35644c30/Fish 3.jpeg"", ""points"": [[717.0, 65.0, 2.0, 934.0, 232.0, 3.0], [812.0, 55.0, 2.0, 812.0, 155.0, 3.0]]}",flagged/Detected Instances/bb94b03bbae3744cb687/image.webp,710,,,2024-06-22 14:43:57.785652
32
+ flagged/Input Image/eaf2d446d413a9b288cf/Fish 3.jpeg,A big fish and all small fishes,,"{""image"": ""flagged/Specify object to count with visual exemplars here/8848ccbc7b6f9793d87a/Fish 3.jpeg"", ""points"": [[717.0, 65.0, 2.0, 934.0, 232.0, 3.0], [812.0, 55.0, 2.0, 812.0, 155.0, 3.0]]}",flagged/Detected Instances/31643c3688da9b4fa531/image.webp,716,,,2024-06-22 14:44:56.566214
33
+ flagged/Input Image/432fc52db57386081a7f/Fish 3.jpeg,A big fish and all small fishes,,"{""image"": ""flagged/Specify object to count with visual exemplars here/b3b4cf1ebceac265a27a/Fish 3.jpeg"", ""points"": [[202.0, 10.0, 2.0, 678.0, 280.0, 3.0]]}",flagged/Detected Instances/596768cb96359baae656/image.webp,736,,,2024-06-22 14:46:10.694967
34
+ flagged/Input Image/a12f368ae7c4c5fd3da6/Fish 3.jpeg,,,"{""image"": ""flagged/Specify object to count with visual exemplars here/a59d14a310592e735186/Fish 3.jpeg"", ""points"": [[44.0, 96.0, 2.0, 83.0, 148.0, 3.0]]}",flagged/Detected Instances/663747d5d817f3b6f17b/image.webp,603,,,2024-06-22 14:50:04.561496
35
+ flagged/Input Image/e081cd4b3f8c0a233e93/HVITa2016a_000011.JPG,bird,,,flagged/Detected Instances/ef9c94eaef8c71a59be7/image.webp,30,,,2024-06-22 14:59:44.195345
36
+ flagged/Input Image/037b6400419594ee083f/2024-06-20 21.21.07.jpg,apples,,,flagged/Detected Instances/69304bef5b817c19a40d/image.webp,0,,,2024-06-22 21:16:37.304942
37
+ flagged/Input Image/7fb25053e99ea97dec5d/IMG_4345.jpeg,bottle of water,,,flagged/Detected Instances/cd9ef90fd76f3c5bc28d/image.webp,5,,,2024-06-22 21:18:02.687111
38
+ flagged/Input Image/ac22a60546af5b9e27d5/Julie_Chandelier_South-Georgia_Gold-Harbour_King-Penguins-colony-2-scaled.jpg,,,"{""image"": ""flagged/Specify object to count with visual exemplars here/9df284bf10e2e549b58b/900.jpeg"", ""points"": [[254.0, 43.0, 2.0, 615.0, 646.0, 3.0]]}",flagged/Detected Instances/6afde69e615172163cd9/image.webp,19,,,2024-06-22 21:24:24.843762
39
+ flagged/Input Image/1e3fbaf70a0e480fcc7e/photo-1536842717890-4804617eb992.jpeg,balloon,,,flagged/Detected Instances/9e35897bb2b104feaf01/image.webp,21,,,2024-06-23 18:19:01.914350
40
+ flagged/Input Image/8c8253a9745752f7ac5c/photo-1533551131698-c87deddfa2d6.jpeg,person,,,flagged/Detected Instances/f73654df476be2e2025a/image.webp,392,,,2024-06-23 18:20:38.923697
41
+ flagged/Input Image/ba84aaaf18399d9d7285/photo-1533551131698-c87deddfa2d6.jpeg,person,,"{""image"": ""flagged/Specify object to count with visual exemplars here/2887c60fea7795aacecb/photo-1533551131698-c87deddfa2d6.jpeg"", ""points"": [[471.0, 235.0, 2.0, 529.0, 335.0, 3.0]]}",flagged/Detected Instances/5128711a7e3b3b296705/image.webp,379,,,2024-06-23 18:21:23.083042
42
+ flagged/Input Image/288c3bba8e73832aa8a2/photo-1533551131698-c87deddfa2d6.jpeg,,,"{""image"": ""flagged/Specify object to count with visual exemplars here/8af1f367a37b6758f689/photo-1533551131698-c87deddfa2d6.jpeg"", ""points"": [[471.0, 235.0, 2.0, 529.0, 335.0, 3.0]]}",flagged/Detected Instances/26a1963fd9197c1e4c1d/image.webp,312,,,2024-06-23 18:21:38.559234
43
+ flagged/Input Image/254816dc70cdd88ffa6c/photo-1533551131698-c87deddfa2d6.jpeg,person,,,flagged/Detected Instances/6bb3d26c43dc6b20eeba/image.webp,392,,,2024-06-23 18:21:50.311989
44
+ flagged/Input Image/25d39fcb0445819504c4/photo-1533551131698-c87deddfa2d6.jpeg,person,,"{""image"": ""flagged/Specify object to count with visual exemplars here/005d56113a6597d456d6/photo-1533551131698-c87deddfa2d6.jpeg"", ""points"": [[473.0, 242.0, 2.0, 526.0, 339.0, 3.0]]}",flagged/Detected Instances/ef8e0debab490fee3637/image.webp,381,,,2024-06-23 18:22:20.986792
45
+ flagged/Input Image/2bc6a37e212dcc397a14/JUTeHjN.jpg,,,"{""image"": ""flagged/Specify object to count with visual exemplars here/1dfc2902049a063b4d3b/JUTeHjN.jpg"", ""points"": [[274.0, 559.0, 2.0, 301.0, 639.0, 3.0]]}",flagged/Detected Instances/d0472660028ee835a909/image.webp,591,,,2024-06-23 18:26:11.912055
46
+ flagged/Input Image/cc6b05c49c410ce714a4/JUTeHjN.jpg,waldo,,"{""image"": ""flagged/Specify object to count with visual exemplars here/0354019aa33aa8cc1b4c/JUTeHjN.jpg"", ""points"": [[274.0, 559.0, 2.0, 301.0, 639.0, 3.0]]}",flagged/Detected Instances/e7d80138cd030c1a4de7/image.webp,707,,,2024-06-23 18:26:56.983776
47
+ flagged/Input Image/c1f5e295668daa58b78b/La_horde_-_Surfers_riding_a_wave_in_Paea_Tahiti.jpg,people,,,flagged/Detected Instances/2b8161f3e7ac96bc81a6/image.webp,6,,,2024-06-24 08:54:20.028204
48
+ flagged/Input Image/3077b7ba647265981dac/Screenshot from 2024-06-18 07-24-03.png,letter t,,,flagged/Detected Instances/d0f7006b69b870c7c227/image.webp,60,,,2024-06-24 13:39:18.019532
49
+ flagged/Input Image/9a11e4fe8738ffdc2176/Screenshot from 2024-06-18 07-24-03.png,letter t,,"{""image"": ""flagged/Specify object to count with visual exemplars here/441e53d9531d94725262/hello.png"", ""points"": []}",flagged/Detected Instances/0465e358fdb89e04a940/image.webp,60,,,2024-06-24 13:40:57.783042
50
+ flagged/Input Image/afa9d75359a32a6d4b41/Screenshot from 2024-06-18 07-24-03.png,letter t,,"{""image"": ""flagged/Specify object to count with visual exemplars here/037cdd9bcd4683726fc5/hello.png"", ""points"": []}",flagged/Detected Instances/2f592103e85d7e6e527a/image.webp,60,,,2024-06-24 13:41:29.438433
51
+ flagged/Input Image/57809c4a9bafbbd30918/Screenshot from 2024-06-18 07-24-03.png,Ts,,"{""image"": ""flagged/Specify object to count with visual exemplars here/e22fa571644aa160d86c/hello.png"", ""points"": []}",flagged/Detected Instances/c318b58b8c88b7710e0c/image.webp,6,,,2024-06-24 13:41:45.916615
52
+ flagged/Input Image/c9412ee3b51eacd33f52/06.jpg_instances.jpg,fish,,,flagged/Detected Instances/a6c2ad6ab9379e0211fa/image.webp,9,,,2024-06-24 15:07:10.854678
53
+ flagged/Input Image/e2976e4738abc316313f/word-image-233.jpeg,leaves,,,flagged/Detected Instances/e8eab3eeef0d444bbb1a/image.webp,10,,,2024-06-24 15:53:21.817485
54
+ flagged/Input Image/be3ab6fa387c0a0194f4/word-image-233.jpeg,leaf,,,flagged/Detected Instances/90dc5680d2ec80c0eac8/image.webp,10,,,2024-06-24 15:55:43.676702
55
+ flagged/Input Image/8ee3a4ce6a33884d03ae/2.jpg,sea shell,,,flagged/Detected Instances/f0d8fd3020ac899200f2/image.webp,8,,,2024-06-24 15:57:17.391770
56
+ flagged/Input Image/74980bc345053ad14f89/215.jpg,grape,,,flagged/Detected Instances/e73eae638535e8770764/image.webp,243,,,2024-06-24 15:58:37.852766
57
+ flagged/Input Image/d206f8d836baf220ac41/HVITa2016a_000011.JPG,bird,,,flagged/Detected Instances/9949e21c2f5bacf777a1/image.webp,30,,,2024-06-24 16:27:43.463142
58
+ flagged/Input Image/86a081dc917ef9d837c7/HVITa2016a_000011.JPG,,,"{""image"": ""flagged/Specify object to count with visual exemplars here/64abdac51a65819724a6/HVITa2016a_000011.JPG"", ""points"": [[1230.0, 118.0, 2.0, 1292.0, 201.0, 3.0], [830.0, 550.0, 2.0, 906.0, 623.0, 3.0]]}",flagged/Detected Instances/d92c3d3991cfcefde001/image.webp,33,,,2024-06-24 16:28:22.627606
59
+ flagged/Input Image/35151af517a911bcc223/HVITa2016a_000011.JPG,bird,,"{""image"": ""flagged/Specify object to count with visual exemplars here/4f611be7fe99047414ae/HVITa2016a_000011.JPG"", ""points"": [[1230.0, 118.0, 2.0, 1292.0, 201.0, 3.0], [830.0, 550.0, 2.0, 906.0, 623.0, 3.0]]}",flagged/Detected Instances/10125734c7afa38ed087/image.webp,31,,,2024-06-24 16:28:36.193649
60
+ flagged/Input Image/bba1422fdfed279a73af/HVITa2016a_000011.JPG,bird,,flagged/Detected Instances/bec28a276df78afe343d/image.webp,30,,,2024-06-24 16:41:23.500662
61
+ flagged/Input Image/d7762005e6674cf12e5c/HVITa2016a_000011.JPG,bird,,flagged/Detected Instances/cca9e646a051b17f95f5/image.webp,30,,,2024-06-24 17:55:54.474905
62
+ flagged/Input Image/81aa794f290cedf734c3/HVITa2016a_000011.JPG,,"{""image"": ""flagged/Specify object to count with visual exemplars here/6da159a3958dedc078b2/HVITa2016a_000011.JPG"", ""points"": []}",flagged/Detected Instances/02faafb810646f2ea9b4/image.webp,0,,,2024-06-24 17:56:45.209436
63
+ flagged/Input Image/7b8f501cc28c308ff8b6/HVITa2016a_000011.JPG,,"{""image"": ""flagged/Specify object to count with visual exemplars here/4a30c21797e2739e4abc/HVITa2016a_000011.JPG"", ""points"": [[943.0, 374.0, 2.0, 1057.0, 485.0, 3.0]]}",flagged/Detected Instances/e27a207c035db1ab701e/image.webp,32,,,2024-06-24 18:03:25.307742
64
+ flagged/Input Image/7af486902d9e28508494/HVITa2016a_000011.JPG,,"{""image"": ""flagged/Specify object to count with visual exemplars here/4a1a53289daf6c4ab771/HVITa2016a_000011.JPG"", ""points"": []}",flagged/Detected Instances/2d8b8e39fc9512762815/image.webp,0,,,2024-06-24 18:43:48.203081
65
+ flagged/Input Image/55afacf49af9b719cdda/HVITa2016a_000011.JPG,bird,"{""image"": ""flagged/Specify object to count with visual exemplars here/30839117a5e1c2687952/HVITa2016a_000011.JPG"", ""points"": [[1230.0, 118.0, 2.0, 1292.0, 201.0, 3.0], [830.0, 550.0, 2.0, 906.0, 623.0, 3.0]]}",flagged/Detected Instances/c9d49f2d15a7de44e80d/image.webp,31,,,2024-06-24 18:52:25.941469
66
+ flagged/Input Image/266f273a429e2a835b60/HVITa2016a_000011.JPG,,"{""image"": ""flagged/Specify object to count with visual exemplars here/ea17095cd854670fd8db/HVITa2016a_000011.JPG"", ""points"": []}",flagged/Detected Instances/7f02a76d21f7e077559e/image.webp,0,,,2024-06-25 08:53:21.768814
67
+ flagged/Input Image/e93ffc35e1c7c113beec/HVITa2016a_000011.JPG,,"{""image"": ""flagged/Specify object to count with visual exemplars here/af5231d7685032d2c2fe/HVITa2016a_000011.JPG"", ""points"": []}",flagged/Detected Instances/5cd4eb530194572ba703/image.webp,0,,,2024-06-25 09:16:55.226689
68
+ flagged/Input Image/733b82dbcccd8aae5c53/HVITa2016a_000011.JPG,,"{""image"": ""flagged/Specify object to count with visual exemplars here/be9a0871ad384d450dbc/HVITa2016a_000011.JPG"", ""points"": []}",flagged/Detected Instances/d9e693ae5adb9314a442/image.webp,0,,,2024-06-25 09:22:48.487486
69
+ flagged/Input Image/180e288224fa47f7e40e/HVITa2016a_000011.JPG,,"{""image"": ""flagged/Specify object to count with visual exemplars here/3b47f1cbc62a52b89a35/HVITa2016a_000011.JPG"", ""points"": []}",flagged/Detected Instances/c06ad461ce25c9e93519/image.webp,0,,,2024-06-25 09:37:10.918912
70
+ flagged/Input Image/f6c46243c9e41736a91b/HVITa2016a_000011.JPG,bird,,flagged/Detected Instances/f834fe8aa737c6602250/image.webp,30,,,2024-06-25 09:40:18.398469
71
+ flagged/Input Image/02e6dd294378714d2c60/HVITa2016a_000011.JPG,,"{""image"": ""flagged/Specify object to count with visual exemplars here/7a28c64f2d7f386ba90f/HVITa2016a_000011.JPG"", ""points"": []}",flagged/Detected Instances/58939acce9372835d94f/image.webp,0,,,2024-06-25 09:40:26.315283
72
+ flagged/Input Image/ef125c4204764412c3f7/HVITa2016a_000011.JPG,,"{""image"": ""flagged/Specify object to count with visual exemplars here/5130a669c02f296bb278/HVITa2016a_000011.JPG"", ""points"": []}",flagged/Detected Instances/befab1125336c5e50181/image.webp,0,,,2024-06-25 10:05:42.679549
73
+ flagged/Input Image/feae1a999efc1b161e4a/HVITa2016a_000011.JPG,,"{""image"": ""flagged/Specify object to count with visual exemplars here/d8f9a71db6b70a468a0f/HVITa2016a_000011.JPG"", ""points"": []}",flagged/Detected Instances/ec35f973d14b7fc08792/image.webp,0,,,2024-06-25 10:14:45.281246
74
+ flagged/Input Image/b721bed7a52d56eabff8/HVITa2016a_000011.JPG,,"{""image"": ""flagged/Specify object to count with visual exemplars here/8df3b7b4105c8bbed352/HVITa2016a_000011.JPG"", ""points"": [[1224.0, 99.0, 2.0, 1308.0, 215.0, 3.0]]}",flagged/Detected Instances/1ab68fe2b72b062794c3/image.webp,32,,,2024-06-25 10:15:02.226475
75
+ flagged/Input Image/30c0dd916e13e8cdf40a/HVITa2016a_000011.JPG,,"{""image"": ""flagged/Specify object to count with visual exemplars here/3183ccd48e9876f48116/HVITa2016a_000011.JPG"", ""points"": []}",flagged/Detected Instances/486ddeee601a5658d3c1/image.webp,0,,,2024-06-25 10:17:04.340172
76
+ flagged/Input Image/59fcdf686244d693b758/HVITa2016a_000011.JPG,bird,,flagged/Detected Instances/1c4afcc87d7b113064e0/image.webp,30,,,2024-06-25 10:25:55.780204
77
+ flagged/Input Image/0faa5dfd6e852c50d8ff/HVITa2016a_000011.JPG,,"{""image"": ""flagged/Specify object to count with visual exemplars here/5e3aabee37bf681d51a4/HVITa2016a_000011.JPG"", ""points"": []}",flagged/Detected Instances/2cd238061edc54130cdf/image.webp,0,,,2024-06-25 10:26:05.084390
78
+ flagged/Input Image/3e839a4e8ebe95002a20/HVITa2016a_000011.JPG,bird,"{""image"": ""flagged/Specify object to count with visual exemplars here/f0cd0be5ea8ccbb26d0f/HVITa2016a_000011.JPG"", ""points"": []}",flagged/Detected Instances/b8c1235f9f45df306ef9/image.webp,30,,,2024-06-25 10:26:13.747497
79
+ flagged/Input Image/aea44fcfb41612489dad/HVITa2016a_000011.JPG,,"{""image"": ""flagged/Specify object to count with visual exemplars here/da461ef7d215c55a5e7c/HVITa2016a_000011.JPG"", ""points"": []}",flagged/Detected Instances/cf0299a8a5c332b5c02f/image.webp,0,,,2024-06-25 10:27:17.764060
80
+ flagged/Input Image/defc48612eca2926a7b7/bird-1.JPG,bird,"{""image"": ""flagged/Specify object to count with visual exemplars here/bf6d9e9b6fd1b4962765/bird-2.JPG"", ""points"": []}",flagged/Detected Instances/8a857874d6e33125d752/image.webp,30,,,2024-06-25 10:56:25.591931
81
+ flagged/Input Image/02215a04cfb823dd7796/bird-1.JPG,bird,"{""image"": ""flagged/Specify object to count with visual exemplars here/54a44757b38db813d618/bird-2.JPG"", ""points"": [[608.0, 232.0, 2.0, 640.0, 263.0, 3.0]]}",flagged/Detected Instances/c0ed66b9cad56b199aa2/image.webp,31,,,2024-06-25 10:59:54.183533
82
+ flagged/Input Image/df6cea1edf80683a3767/bird-1.JPG,bird,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/0dce00cba984dd532757/bird-2.JPG"", ""points"": [[503.0, 142.0, 2.0, 542.0, 184.0, 3.0]]}",flagged/Detected Instances/c1f230dd1e883d9e32e7/image.webp,31,,,2024-06-25 11:15:22.282716
83
+ flagged/Input Image/8d0fb36ef83e86fd525a/strawberry.jpg,strawberry,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/e646d85a66c06474d22d/strawberry.jpg"", ""points"": [[160.0, 68.0, 2.0, 197.0, 138.0, 3.0]]}",flagged/Detected Instances/ed9419605b016fba2dc5/image.webp,16,,,2024-06-25 11:30:30.236817
84
+ flagged/Input Image/b49396917cf5ecc697fa/strawberry.jpg,blueberry,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/f3fd414caafcebcbda4a/strawberry.jpg"", ""points"": []}",flagged/Detected Instances/8632f9c1bb05eee15dcc/image.webp,137,,,2024-06-25 11:30:43.620161
85
+ flagged/Input Image/3dd1700ef3bcbe5657f7/strawberry.jpg,blueberry,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/5dfc0e9e8efa0af0c6a8/strawberry.jpg"", ""points"": [[253.0, 130.0, 2.0, 273.0, 152.0, 3.0]]}",flagged/Detected Instances/94ce91cd91c43e18974d/image.webp,133,,,2024-06-25 11:30:53.267942
86
+ flagged/Input Image/cec4554c9b13aa4d346d/clipboard.png,fish,,flagged/Detected Instances/1830def027aa0d3f3d91/image.webp,434,,,2024-06-25 11:38:27.652346
87
+ flagged/Input Image/032c2e1714788159e262/clipboard.png,fish,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/04a1a12a302c9c1db814/clipboard.png"", ""points"": [[1049.0, 213.0, 2.0, 1125.0, 238.0, 3.0]]}",flagged/Detected Instances/122ff6e9f6ff79c3e5a7/image.webp,411,,,2024-06-25 11:39:25.658141
88
+ flagged/Input Image/5422ce8f8f4364819aaa/clipboard.png,fish,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/c32361eb60578a2bd4ac/clipboard.png"", ""points"": []}",flagged/Detected Instances/5d5df763986fec98e93f/image.webp,900,,,2024-06-25 11:41:22.135329
89
+ flagged/Input Image/5072a86417c33eb593f3/clipboard.png,fish,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/028b7ae649c4dd8b7178/clipboard.png"", ""points"": [[1832.0, 158.0, 2.0, 1878.0, 223.0, 3.0]]}",flagged/Detected Instances/c65818f6571e4d168e43/image.webp,900,,,2024-06-25 11:41:46.118310
90
+ flagged/Input Image/df7f9cdf52e79013f8cb/clipboard.png,fish,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/09eb9b77eff2e836b6ff/clipboard.png"", ""points"": []}",flagged/Detected Instances/b4a6138c92f1c746ccdb/image.webp,248,,,2024-06-25 11:42:43.425077
91
+ flagged/Input Image/a9ccd350937abcb63291/clipboard.png,fish,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/19ce574f4470218dc7e0/clipboard.png"", ""points"": [[367.0, 183.0, 2.0, 451.0, 219.0, 3.0]]}",flagged/Detected Instances/d48cc5c8133c9068a0dd/image.webp,247,,,2024-06-25 11:43:33.822691
92
+ flagged/Input Image/a3d2036f9c1ac12e961c/fish.jpg,fish,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/0c6da89b80bb001aa49e/fish.jpg"", ""points"": []}",flagged/Detected Instances/f27dfe196a1c41a69503/image.webp,248,,,2024-06-25 11:45:59.667866
93
+ flagged/Input Image/46f4410a1aa98f5ee2af/strawberry.jpg,blueberry,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/2fd7f3ce369404120141/strawberry.jpg"", ""points"": []}",flagged/Detected Instances/0197e9eeac0554007c3f/image.webp,137,,,2024-06-25 11:46:32.895500
94
+ flagged/Input Image/bb268ffa2de20ebe1c26/strawberry.jpg,blueberry,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/c3b2d652d852a7eab171/strawberry.jpg"", ""points"": [[342.0, 126.0, 2.0, 359.0, 144.0, 3.0]]}",flagged/Detected Instances/44f0249ecddfb4155756/image.webp,137,,,2024-06-25 11:46:42.177472
95
+ flagged/Input Image/6c66434da1d48433ecb3/strawberry.jpg,blueberry,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/4d4cffebe4515feff0ec/strawberry.jpg"", ""points"": []}",flagged/Detected Instances/ab963c869b4f8d820e49/image.webp,137,,,2024-06-25 11:46:57.009420
96
+ flagged/Input Image/a6875dccb70b93acd740/strawberry.jpg,blueberry,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/f481446bb34bcb55993f/strawberry.jpg"", ""points"": [[333.0, 167.0, 2.0, 353.0, 187.0, 3.0], [301.0, 100.0, 2.0, 321.0, 119.0, 3.0]]}",flagged/Detected Instances/bdf5762c87a5fb86e0b2/image.webp,137,,,2024-06-25 11:47:18.226311
97
+ flagged/Input Image/44cef4325bfc0b2aa091/strawberry.jpg,strawberry,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/bcaa94ade9f2b6274f1c/strawberry.jpg"", ""points"": []}",flagged/Detected Instances/613e0b5dca0804e918c7/image.webp,16,,,2024-06-25 11:47:34.461816
98
+ flagged/Input Image/6d93ce821c043cc1fa50/strawberry.jpg,blueberry,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/ae226a70b29c89cc4eb8/strawberry.jpg"", ""points"": []}",flagged/Detected Instances/00176ba3de1e4fbd2c79/image.webp,137,,,2024-06-25 11:47:45.327559
99
+ flagged/Input Image/214729be550d569c1c68/bird-1.JPG,bird,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/f6eed5c264264d847ff6/bird-2.JPG"", ""points"": []}",flagged/Detected Instances/c12f210c5902dfd03e70/image.webp,30,,,2024-06-25 11:48:34.856890
100
+ flagged/Input Image/8bf133a0564fd92cc890/balloon.jpg,hot air balloon,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/978150756fa4f1ef5027/balloon.jpg"", ""points"": []}",flagged/Detected Instances/5b47d64211343b63152a/image.webp,108,,,2024-06-25 12:25:57.589269
101
+ flagged/Input Image/6429656af3e526d1c231/women.jpg,women,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/25350112732936ba546b/women.jpg"", ""points"": []}",flagged/Detected Instances/be57da4ba536ae483835/image.webp,5,,,2024-06-25 12:26:41.171433
102
+ flagged/Input Image/c58a77f0b28ca5d81b43/women.jpg,men,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/130e25a655d338fdfe3e/women.jpg"", ""points"": []}",flagged/Detected Instances/e7e68bb358caa2a02fad/image.webp,6,,,2024-06-25 12:26:49.904389
103
+ flagged/Input Image/a7f9b2e70fc2dd48ea61/women.jpg,man,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/cd6645e130252cb0ce54/women.jpg"", ""points"": []}",flagged/Detected Instances/7ffc5ec4d2638a7d5993/image.webp,4,,,2024-06-25 12:26:59.917970
104
+ flagged/Input Image/42851096a5465744aff8/women.jpg,boy,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/fdaea5a42ce12dafba02/women.jpg"", ""points"": []}",flagged/Detected Instances/47cfc5281c09ccad9f3c/image.webp,1,,,2024-06-25 12:27:25.852703
105
+ flagged/Input Image/67132ca41a6c48bc4ae8/women.jpg,girl,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/2410b57b1dae0d986036/women.jpg"", ""points"": []}",flagged/Detected Instances/b8ef07f2a0d78188051d/image.webp,5,,,2024-06-25 12:27:35.919728
106
+ flagged/Input Image/d66d3772b1409e1852d8/strawberry.jpg,strawberry,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/2f7a42ec53940a767ab1/strawberry.jpg"", ""points"": []}",flagged/Detected Instances/db7ecc53cc8f29566958/image.webp,16,,,2024-06-25 12:29:35.376726
107
+ flagged/Input Image/7493d467be9cfe1dff7c/strawberry.jpg,strawberry,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/8cab0399346d70c00fc0/strawberry.jpg"", ""points"": [[338.0, 123.0, 2.0, 361.0, 146.0, 3.0]]}",flagged/Detected Instances/f01f6a02720e21ce0150/image.webp,50,,,2024-06-25 12:29:45.148685
108
+ flagged/Input Image/440c504736080f8142ad/strawberry.jpg,blueberry,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/92fd535788251da6ba09/strawberry.jpg"", ""points"": [[338.0, 123.0, 2.0, 361.0, 146.0, 3.0]]}",flagged/Detected Instances/990386fa1a9553c882db/image.webp,136,,,2024-06-25 12:29:57.059622
109
+ flagged/Input Image/fed431554a261ab266a4/strawberry.jpg,blueberry,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/60afc4b94764dc018370/strawberry.jpg"", ""points"": []}",flagged/Detected Instances/5f048bdb5003eb64e17b/image.webp,137,,,2024-06-25 12:30:08.506742
110
+ flagged/Input Image/5e49dbc792a65c4dbbb5/strawberry.jpg,blueberry,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/8415524ab37ceaf7469b/strawberry.jpg"", ""points"": [[338.0, 123.0, 2.0, 361.0, 146.0, 3.0], [257.0, 112.0, 2.0, 287.0, 136.0, 3.0]]}",flagged/Detected Instances/e56fa7694cc83977e48e/image.webp,137,,,2024-06-25 12:30:18.906628
111
+ flagged/Input Image/daa4ac7838796afe20fd/bird-1.JPG,bird,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/69dcd045d1a47c1a45cc/bird-2.JPG"", ""points"": []}",flagged/Detected Instances/7c51fda26eae4f4660a6/image.webp,30,,,2024-06-25 12:30:29.347865
112
+ flagged/Input Image/7ba4ec104b5a3a146474/bird-1.JPG,bird,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/164da9e8be790ea4ef79/bird-2.JPG"", ""points"": [[505.0, 147.0, 2.0, 536.0, 183.0, 3.0]]}",flagged/Detected Instances/f16c10ef2bc36b5dc69d/image.webp,31,,,2024-06-25 12:30:39.178460
113
+ flagged/Input Image/420750e47befc70801e2/fish.jpg,fish,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/9e22ac918784790aa880/fish.jpg"", ""points"": []}",flagged/Detected Instances/27ff780f6a23afc048b2/image.webp,248,,,2024-06-25 12:31:01.375887
114
+ flagged/Input Image/1feeba99c7d2efe9096e/women.jpg,girl,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/4f7337051dee8ff77653/women.jpg"", ""points"": []}",flagged/Detected Instances/ce0dd6ff795456176eab/image.webp,5,,,2024-06-25 12:31:17.643196
115
+ flagged/Input Image/9f1c5da2733fa8c58ae8/women.jpg,boy,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/d41a6c081af41e9a2f62/women.jpg"", ""points"": []}",flagged/Detected Instances/620e679c66d0f9574fad/image.webp,1,,,2024-06-25 12:31:28.806538
116
+ flagged/Input Image/30598862ab850cd6db31/balloon.jpg,hot air balloon,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/ff94594958fb2da48621/balloon.jpg"", ""points"": []}",flagged/Detected Instances/16993819652dc9bce0b8/image.webp,108,,,2024-06-25 12:31:44.635515
117
+ flagged/Input Image/f9dbff9baf36cf6c8a91/balloon.jpg,hot air balloon,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/39ac7da130be6fb0cba1/balloon.jpg"", ""points"": [[255.0, 38.0, 2.0, 274.0, 60.0, 3.0]]}",flagged/Detected Instances/fbaa66b107151c86b5f3/image.webp,108,,,2024-06-25 12:31:53.008190
118
+ flagged/Input Image/86ef5e44d86d09969fa3/deer.jpg,deer,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/fbc70e37fee443170ff4/deer.jpg"", ""points"": []}",flagged/Detected Instances/d1ab59ab9d6d21e0c4b3/image.webp,21,,,2024-06-25 12:32:16.020013
119
+ flagged/Input Image/279428723858139b5a4c/apple.jpg,apple,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/ea6e4f53ba1edf38def6/apple.jpg"", ""points"": []}",flagged/Detected Instances/f45dc48dfdc3f7df6249/image.webp,568,,,2024-06-25 12:32:45.752592
120
+ flagged/Input Image/8213864ae0272f59994f/apple.jpg,apple,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/d5d645dce3a0bedcf3c7/apple.jpg"", ""points"": [[316.0, 230.0, 2.0, 365.0, 273.0, 3.0]]}",flagged/Detected Instances/8f1288ccce0a4abba02c/image.webp,550,,,2024-06-25 12:33:07.817865
121
+ flagged/Input Image/1482d3171ccab0738d15/egg.jpg,egg,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/937d3cd3614befcd3692/egg.jpg"", ""points"": []}",flagged/Detected Instances/577b9c0ccd91d4d879ed/image.webp,18,,,2024-06-25 12:33:27.798499
122
+ flagged/Input Image/25f50a2c8a3aa7d190bd/egg.jpg,egg,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/d345ceb5293859fabf1c/egg.jpg"", ""points"": [[418.0, 85.0, 2.0, 472.0, 148.0, 3.0]]}",flagged/Detected Instances/87eb5f1e0f18dc6739f6/image.webp,18,,,2024-06-25 12:33:35.566274
123
+ flagged/Input Image/9f0db0ff082f6b31f825/stamp.jpg,stamp,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/701c11e28665c59565cc/stamp.jpg"", ""points"": []}",flagged/Detected Instances/82cabb88e68e8ecc63ba/image.webp,45,,,2024-06-25 12:33:47.051392
124
+ flagged/Input Image/fcb41365c9ac544c8898/green-pea.jpg,green pea,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/2b2c58e68049e4dade9d/green-pea.jpg"", ""points"": []}",flagged/Detected Instances/3f41f7d66283c8f516ec/image.webp,213,,,2024-06-25 12:33:59.684009
125
+ flagged/Input Image/8a701be26fb9fb2d40e8/lego.jpg,lego,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/87e17ecb546906d99293/lego.jpg"", ""points"": []}",flagged/Detected Instances/e1adec40dc3e0880d984/image.webp,23,,,2024-06-25 12:34:29.213218
126
+ flagged/Input Image/4673ed3f053269ba11d4/lego.jpg,lego,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/aae11dd32545f4f6e48a/lego.jpg"", ""points"": [[275.0, 166.0, 2.0, 294.0, 186.0, 3.0]]}",flagged/Detected Instances/0ed9da189663a3352717/image.webp,23,,,2024-06-25 12:34:41.379472
127
+ flagged/Input Image/d956a7af5032db84ebf4/lego.jpg,lego,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/65d7ec1fb0e4f2a6b378/lego.jpg"", ""points"": [[275.0, 166.0, 2.0, 294.0, 186.0, 3.0], [269.0, 256.0, 2.0, 284.0, 273.0, 3.0]]}",flagged/Detected Instances/f0675e3f8e5fbae79512/image.webp,23,,,2024-06-25 12:34:50.644433
128
+ flagged/Input Image/4a629b96d8745c48c739/lego.jpg,lego,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/a71c7ef0952943bac3d3/lego.jpg"", ""points"": [[275.0, 166.0, 2.0, 294.0, 186.0, 3.0], [269.0, 256.0, 2.0, 284.0, 273.0, 3.0], [124.0, 213.0, 2.0, 143.0, 232.0, 3.0]]}",flagged/Detected Instances/28d4f0037a145a77e53f/image.webp,23,,,2024-06-25 12:35:00.163591
129
+ flagged/Input Image/be96be6a6471be11367b/fish.jpg,fish,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/e94ed1719ff8f3280d11/fish.jpg"", ""points"": []}",flagged/Detected Instances/4ad9cd81c3a99b08a811/image.webp,248,,,2024-06-25 12:37:35.824495
130
+ flagged/Input Image/5c18d75a43fc788202de/egg.jpg,egg,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/1158914112f891b5efd0/egg.jpg"", ""points"": []}",flagged/Detected Instances/182a0d4d6453ae657719/image.webp,18,,,2024-06-25 12:38:00.758125
131
+ flagged/Input Image/e0811fca1751db63b380/egg.jpg,,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/382cdaec8615c7e2fecb/egg.jpg"", ""points"": [[199.0, 86.0, 2.0, 248.0, 142.0, 3.0]]}",flagged/Detected Instances/2079378c09fdd2a9c7ce/image.webp,18,,,2024-06-25 12:38:18.291809
132
+ flagged/Input Image/d2e387b8a61c19868cba/apple.jpg,apple,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/3ec031a562ec058e7731/apple.jpg"", ""points"": []}",flagged/Detected Instances/adf208e2ecead3e6a1f4/image.webp,568,,,2024-06-25 12:38:34.859201
133
+ flagged/Input Image/5f63a040416264d96fc7/green-pea.jpg,green pea,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/517ad35ce8c0e44b6782/green-pea.jpg"", ""points"": []}",flagged/Detected Instances/66a8dfe7bfefebd3121d/image.webp,213,,,2024-06-25 12:39:07.587414
134
+ flagged/Input Image/aae210f02dbbc2bdd515/stamp.jpg,stamp,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/422d122996b90782f0a3/stamp.jpg"", ""points"": []}",flagged/Detected Instances/6cbaa1c841d2eb3ec14d/image.webp,45,,,2024-06-25 12:42:10.960744
135
+ flagged/Input Image/81719deb5b7c5097ebb0/stamp.jpg,square,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/959e12c05af1e92d9355/stamp.jpg"", ""points"": []}",flagged/Detected Instances/21ac1219abab8e1a1e3a/image.webp,45,,,2024-06-25 12:42:30.712204
136
+ flagged/Input Image/41061285b258efc14011/stamp.jpg,25,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/b183dbda22012f6a2e91/stamp.jpg"", ""points"": []}",flagged/Detected Instances/e4fae1385b5fe0b62af2/image.webp,45,,,2024-06-25 12:42:48.778457
137
+ flagged/Input Image/4c02c2917f07a0a47195/stamp.jpg,airplane,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/f0ed657840030b178009/stamp.jpg"", ""points"": []}",flagged/Detected Instances/16c30614d826227da8a2/image.webp,0,,,2024-06-25 12:43:02.980277
138
+ flagged/Input Image/965aa552db7559607920/stamp.jpg,horse,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/bab35fa8d8c596d72c83/stamp.jpg"", ""points"": []}",flagged/Detected Instances/eb1276c9005a0e28ff80/image.webp,1,,,2024-06-25 12:43:22.910300
139
+ flagged/Input Image/1f47a5922213f28e3d47/stamp.jpg,green,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/cabeb714ed1e1cddefa1/stamp.jpg"", ""points"": []}",flagged/Detected Instances/fe42b78529616b432d55/image.webp,45,,,2024-06-25 12:43:45.012330
140
+ flagged/Input Image/37cf51da176e45f176c8/lego.jpg,yellow,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/beeb08664504d784a62e/lego.jpg"", ""points"": []}",flagged/Detected Instances/c420149b5762709cd085/image.webp,23,,,2024-06-25 12:44:27.177000
141
+ flagged/Input Image/96219bcbf20113a0aab8/lego.jpg,studs,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/699acba2f203aa14fc16/lego.jpg"", ""points"": []}",flagged/Detected Instances/18d8ce74eab9759ffa86/image.webp,23,,,2024-06-25 12:44:37.787345
142
+ flagged/Input Image/3fa513923f4a4cbc1650/strawberry.jpg,strawberry,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/b26302259971622261f7/strawberry.jpg"", ""points"": []}",flagged/Detected Instances/8d551fe44abbcf336b71/image.webp,16,,,2024-06-25 12:45:02.003915
143
+ flagged/Input Image/d9e2d9688dc9a82fa656/strawberry.jpg,elderberry,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/f86eed288abddd19b0d2/strawberry.jpg"", ""points"": []}",flagged/Detected Instances/cc78b0d8c3d24a3b12e7/image.webp,0,,,2024-06-25 12:45:16.103419
144
+ flagged/Input Image/d74aa8ff2ec0dbba88b7/strawberry.jpg,blueberry,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/89b0adc1664a53a48fea/strawberry.jpg"", ""points"": []}",flagged/Detected Instances/b97b8f4849493ce67de8/image.webp,137,,,2024-06-25 12:45:29.869576
145
+ flagged/Input Image/84eb8736bc8eb48f507f/strawberry.jpg,blueberry,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/0978ec84da8050ca0792/strawberry.jpg"", ""points"": [[330.0, 158.0, 2.0, 354.0, 195.0, 3.0], [289.0, 248.0, 2.0, 307.0, 284.0, 3.0]]}",flagged/Detected Instances/923571e5c8db86d8a8ed/image.webp,132,,,2024-06-25 12:46:16.470770
146
+ flagged/Input Image/104d1b72504a1965f84d/fish.jpg,fish,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/1e96cf74eb0dea109d9f/fish.jpg"", ""points"": []}",flagged/Detected Instances/3df418860ba1a58017cc/image.webp,248,,,2024-06-25 12:46:35.001964
147
+ flagged/Input Image/6ad4c8683b86783f3ec2/stamp.jpg,square,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/1eefba3eed3b8e2a271b/stamp.jpg"", ""points"": [[300.0, 285.0, 2.0, 351.0, 327.0, 3.0], [240.0, 337.0, 2.0, 290.0, 379.0, 3.0]]}",flagged/Detected Instances/eaab5820db88aac49462/image.webp,45,,,2024-06-25 12:47:11.901813
148
+ flagged/Input Image/9eb585667d874ba79393/stamp.jpg,red rectangular stamp,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/88c962fe4751ec3839cd/stamp.jpg"", ""points"": [[300.0, 285.0, 2.0, 351.0, 327.0, 3.0], [240.0, 337.0, 2.0, 290.0, 379.0, 3.0]]}",flagged/Detected Instances/cd25d7af77d301a1d5a4/image.webp,45,,,2024-06-25 12:49:38.403498
149
+ flagged/Input Image/9724ddc2f18fd68bdee9/stamp.jpg,red ,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/13e9c5b34604bb92d7fc/stamp.jpg"", ""points"": [[300.0, 285.0, 2.0, 351.0, 327.0, 3.0], [240.0, 337.0, 2.0, 290.0, 379.0, 3.0]]}",flagged/Detected Instances/57bf0944ab05bd0099ea/image.webp,45,,,2024-06-25 12:49:51.522005
150
+ flagged/Input Image/1238680c118136bb0496/lego.jpg,red lego,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/05be72fc352ad2ba08ba/lego.jpg"", ""points"": []}",flagged/Detected Instances/11c893cbcf45bcf1ab51/image.webp,23,,,2024-06-25 12:50:27.240856
151
+ flagged/Input Image/a305b9fbd30361d8d3bf/lego.jpg,red ,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/a7ebb8ee6c2ad9540939/lego.jpg"", ""points"": []}",flagged/Detected Instances/e5780fbd02076a52113b/image.webp,23,,,2024-06-25 12:50:37.774851
152
+ flagged/Input Image/34d0773dc8f14c58f797/lego.jpg,white,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/23ead2166dba8533b130/lego.jpg"", ""points"": []}",flagged/Detected Instances/91146d3e4f8f27b15856/image.webp,23,,,2024-06-25 12:50:50.401975
153
+ flagged/Input Image/c76b4273734935f64ecf/strawberry.jpg,strawberry,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/ee8ba6f40aa26158d870/strawberry.jpg"", ""points"": []}",flagged/Detected Instances/d1e59cae00d004db8b42/image.webp,16,,,2024-06-25 12:51:05.753104
154
+ flagged/Input Image/a6c4912aa51cc54ed3d7/strawberry.jpg,blueberry,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/566190bf535ad3281f6c/strawberry.jpg"", ""points"": []}",flagged/Detected Instances/0a2ffbe024c77e7da50d/image.webp,137,,,2024-06-25 12:51:31.117635
155
+ flagged/Input Image/ea56a8e9ad4e135ff663/strawberry.jpg,blueberry,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/c92e960c7b3df79ff7b5/strawberry.jpg"", ""points"": [[341.0, 123.0, 2.0, 361.0, 149.0, 3.0]]}",flagged/Detected Instances/428433149285aee052cf/image.webp,134,,,2024-06-25 12:51:42.720447
156
+ flagged/Input Image/6fc4ed98f8495057126d/women.jpg,girl,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/ec38f075c8bc9249bfb6/women.jpg"", ""points"": []}",flagged/Detected Instances/bc7f57f26e088938c86a/image.webp,5,,,2024-06-25 12:52:16.523201
157
+ flagged/Input Image/8d06bc9272737637658c/women.jpg,boy,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/94a9eef129e73dec7ed8/women.jpg"", ""points"": []}",flagged/Detected Instances/c419565161b7941b9368/image.webp,1,,,2024-06-25 12:52:34.380793
158
+ flagged/Input Image/18639d94ba6ee7673454/fish.jpg,fish,"{""image"": ""flagged/CLICK DRAG to specify object to count with visual exemplars here/4d917d8cff994b9757bd/fish.jpg"", ""points"": []}",flagged/Detected Instances/43bbba43eb4a991b6256/image.webp,248,,,2024-06-25 12:53:25.668942
159
+ flagged/Input Image/acbbf9f607925c34aec9/strawberry.jpg,strawberry,"{""image"": ""flagged/Specify object to count with visual exemplars here/a66486911b04c6d3dd5f/strawberry.jpg"", ""points"": []}",flagged/Detected Instances/71aa71914a32c98dbf5e/image.webp,16,,,2024-06-25 13:21:35.534920
160
+ flagged/Input Image/5a125c995d0990aaf69b/bird-1.JPG,bird,"{""image"": ""flagged/Specify object to count with visual exemplars here/c8b24b1c09de0bbcfa35/bird-2.JPG"", ""points"": []}",flagged/Detected Instances/a24ce3527db178c02415/image.webp,30,,,2024-06-25 13:24:10.446815
161
+ flagged/Input Image/be297ebfd1bb43cd1438/strawberry.jpg,strawberry,"{""image"": ""flagged/Specify object to count with visual exemplars here/d6e9603b839cb704d1e1/strawberry.jpg"", ""points"": []}",flagged/Detected Instances/ab49e40bc9aefd394529/image.webp,16,,,2024-06-25 13:27:08.069932
162
+ flagged/Input Image/667eae2d1cf1f7cb10bf/apple.jpg,apple,"{""image"": ""flagged/Specify object to count with visual exemplars here/d4e8125f9602a8823edd/apple.jpg"", ""points"": []}",flagged/Detected Instances/9568219f942093ef51cf/image.webp,568,,,2024-06-25 13:47:38.683788
163
+ flagged/Input Image/ff98881ffa5a95d08f37/women.jpg,girl,"{""image"": ""flagged/Specify object to count with visual exemplars here/8afee605e88ec6ddaa37/women.jpg"", ""points"": []}",flagged/Detected Instances/9c09433356e51d42d738/image.webp,5,,,2024-06-25 13:47:53.014232
164
+ flagged/Input Image/0c059862756e0017eb5b/deer.jpg,deer,"{""image"": ""flagged/Specify object to count with visual exemplars here/454665f365b4cd0826e9/deer.jpg"", ""points"": []}",flagged/Detected Instances/95bf56b70e2f2e6f3fe6/image.webp,21,,,2024-06-25 13:48:07.565103
165
+ flagged/Input Image/59a8de455bfa9769d8f0/egg.jpg,egg,"{""image"": ""flagged/Specify object to count with visual exemplars here/6626b7927b70e282f908/egg.jpg"", ""points"": []}",flagged/Detected Instances/043ac97ae83b87ef6d7d/image.webp,18,,,2024-06-25 13:48:24.214285
166
+ flagged/Input Image/f916803fd0379cb3b3b6/women.jpg,foot,"{""image"": ""flagged/Specify object to count with visual exemplars here/e743db45e1b6c6dbce71/women.jpg"", ""points"": []}",flagged/Detected Instances/9906067993749438f371/image.webp,0,,,2024-06-25 13:52:46.735106
167
+ flagged/Input Image/b014d0dd065e2cc85351/women.jpg,shoe,"{""image"": ""flagged/Specify object to count with visual exemplars here/15cc3b4b2ee3fa63144b/women.jpg"", ""points"": []}",flagged/Detected Instances/2a54e56bffa5f3c38df3/image.webp,13,,,2024-06-25 13:52:55.287490
168
+ flagged/Input Image/e58990d9da6323e900ec/strawberry.jpg,blueberry,"{""image"": ""flagged/Specify object to count with visual exemplars here/f9d6362f7f07cc8455ba/strawberry.jpg"", ""points"": []}",flagged/Detected Instances/962b404f01817d4b4240/image.webp,137,,,2024-06-25 13:53:24.831821
169
+ flagged/Input Image/36b16cc3616ed8f56ff1/strawberry.jpg,blueberry,"{""image"": ""flagged/Specify object to count with visual exemplars here/c70c72e5fad645bc14aa/strawberry.jpg"", ""points"": [[300.0, 97.0, 2.0, 321.0, 122.0, 3.0]]}",flagged/Detected Instances/c8d02635413c34abe79e/image.webp,134,,,2024-06-25 13:53:33.382644
170
+ flagged/Input Image/abf2935032503027b2a6/strawberry.jpg,,"{""image"": ""flagged/Specify object to count with visual exemplars here/9d3f4b6f03f0dca04bc9/strawberry.jpg"", ""points"": [[300.0, 97.0, 2.0, 321.0, 122.0, 3.0], [165.0, 74.0, 2.0, 204.0, 133.0, 3.0], [104.0, 182.0, 2.0, 151.0, 230.0, 3.0]]}",flagged/Detected Instances/0e52e648edb9fd8db2d5/image.webp,143,,,2024-06-25 13:54:00.465000
171
+ flagged/Input Image/af6c4808a13098046947/strawberry.jpg,,"{""image"": ""flagged/Specify object to count with visual exemplars here/75ed6a32a8a1e6340110/strawberry.jpg"", ""points"": [[99.0, 139.0, 2.0, 146.0, 185.0, 3.0], [161.0, 71.0, 2.0, 202.0, 132.0, 3.0]]}",flagged/Detected Instances/db51c4253db3807889f0/image.webp,16,,,2024-06-25 13:54:22.839814
172
+ flagged/Input Image/c716b9c2502379b4f53d/strawberry.jpg,strawberry,"{""image"": ""flagged/Specify object to count with visual exemplars here/1ab41b769030db4ba746/strawberry.jpg"", ""points"": []}",flagged/Detected Instances/1d060462444efcff7c1b/image.webp,16,,,2024-06-25 14:24:43.543530
173
+ flagged/Input Image/aca09cc83af42dc6a5ed/women.jpg,girl,"{""image"": ""flagged/Specify object to count with visual exemplars here/3399db8186d98f479b4f/women.jpg"", ""points"": []}",flagged/Detected Instances/a878c25daaecd95cd2e3/image.webp,5,,,2024-06-25 14:26:04.205110
174
+ flagged/Input Image/f0e3038613138f5653fe/fish.jpg,fish,"{""image"": ""flagged/Specify object to count with visual exemplars here/5a2fbfebe22860e70887/fish.jpg"", ""points"": []}",flagged/Detected Instances/b00af2ba550cbce038ed/image.webp,248,,,2024-06-25 14:41:36.984087
175
+ flagged/Input Image/f22235f8e59c137765af/deer.jpg,deer,"{""image"": ""flagged/Specify object to count with visual exemplars here/79acfe92d19d88df7924/deer.jpg"", ""points"": []}",flagged/Detected Instances/43e430b66d190a4be7b6/image.webp,21,,,2024-06-25 14:42:57.479610
176
+ flagged/Input Image/43828e990ec1f6c5dc14/fish.jpg,fish,"{""image"": ""flagged/Specify object to count with visual exemplars here/ee76194fb63611ab5b90/fish.jpg"", ""points"": []}",flagged/Detected Instances/04969bb2604f9248742b/image.webp,248,,,2024-06-25 14:45:20.424378
177
+ flagged/Input Image/1e2bb41e969d56102dc2/bird-1.JPG,bird,"{""image"": ""flagged/Specify object to count with visual exemplars here/01989c9431d1e6c77ad1/bird-2.JPG"", ""points"": []}",flagged/Detected Instances/4957f1a733161da1ddcd/image.webp,30,,,2024-06-25 14:48:39.348196
178
+ flagged/Input Image/e1f55639b7c03f2c3bd1/deer.jpg,deer,"{""image"": ""flagged/Specify object to count with visual exemplars here/7207f6ffa7c700e4339b/deer.jpg"", ""points"": []}",flagged/Detected Instances/c64f5cf630c80bfc90b5/image.webp,21,,,2024-06-25 14:48:56.962720
179
+ flagged/Input Image/adef57f83578033397ef/strawberry.jpg,strawberry,"{""image"": ""flagged/Specify object to count with visual exemplars here/a77ac336261b1cb69b31/strawberry.jpg"", ""points"": []}",flagged/Detected Instances/99b7039bc665946889cf/image.webp,16,,,2024-06-25 14:49:23.944300
180
+ flagged/Input Image/fad7be92b6af8fc44818/strawberry.jpg,strawberry,"{""image"": ""flagged/Specify object to count with visual exemplars here/fbd49488b1c780e4d5d1/strawberry.jpg"", ""points"": []}",flagged/Detected Instances/26fa7fa2475e60ea3221/image.webp,16,,,2024-06-25 14:51:48.832356
181
+ flagged/Input Image/ab0e307953f58465f254/strawberry.jpg,strawberry,"{""image"": ""flagged/Specify object to count with visual exemplars here/162fa100594505627551/strawberry.jpg"", ""points"": []}",flagged/Detected Instances/4351f6b9dc586a7a192f/image.webp,16,,,2024-06-25 14:53:19.317939
182
+ flagged/Input Image/64cf922799f8f994f127/strawberry.jpg,strawberry,"{""image"": ""flagged/Specify object to count with visual exemplars here/be38b70786240819da5e/strawberry.jpg"", ""points"": [[183.0, 131.0, 1.0, 0.0, 0.0, 4.0], [272.0, 124.0, 1.0, 0.0, 0.0, 4.0], [272.0, 124.0, 2.0, 325.0, 186.0, 3.0]]}",flagged/Detected Instances/7e5c71b26561217570d0/image.webp,16,,,2024-06-25 14:53:37.751975
183
+ flagged/Input Image/7a7dacbf8c123986c050/strawberry.jpg,strawberry,"{""image"": ""flagged/Specify object to count with visual exemplars here/50f292a91824f56a78ec/strawberry.jpg"", ""points"": [[183.0, 131.0, 1.0, 0.0, 0.0, 4.0], [272.0, 124.0, 1.0, 0.0, 0.0, 4.0], [272.0, 124.0, 2.0, 325.0, 186.0, 3.0], [335.0, 170.0, 1.0, 0.0, 0.0, 4.0], [286.0, 172.0, 1.0, 0.0, 0.0, 4.0], [275.0, 193.0, 1.0, 0.0, 0.0, 4.0], [267.0, 207.0, 2.0, 285.0, 254.0, 3.0], [317.0, 223.0, 2.0, 345.0, 277.0, 3.0]]}",flagged/Detected Instances/cdeab185b16c9564290b/image.webp,150,,,2024-06-25 14:53:51.580488
184
+ flagged/Input Image/4a1ed2bed23e714c2e98/fish.jpg,fish,"{""image"": ""flagged/Specify object to count with visual exemplars here/7b43211f1ed9441e8e60/fish.jpg"", ""points"": [[526.0, 469.0, 2.0, 577.0, 520.0, 3.0]]}",flagged/Detected Instances/b6581780403fac1fe05a/image.webp,247,,,2024-06-25 14:54:23.273247
185
+ flagged/Input Image/c82e8b7773224a87ab1a/strawberry.jpg,blueberry,"{""image"": ""flagged/Specify object to count with visual exemplars here/3f1c6c4b8fa8b0faec98/strawberry.jpg"", ""points"": []}",flagged/Detected Instances/f9ffff38f04c59e61004/image.webp,137,,,2024-06-25 14:56:52.255423
186
+ flagged/Input Image/5e7b302c38bef4e986e9/strawberry.jpg,blueberry,"{""image"": ""flagged/Specify object to count with visual exemplars here/296b7b929afa36c48677/strawberry.jpg"", ""points"": [[329.0, 168.0, 2.0, 353.0, 190.0, 3.0]]}",flagged/Detected Instances/1a9940b04abede2a55a9/image.webp,133,,,2024-06-25 14:57:03.171030
187
+ flagged/Input Image/288e68a5d5996e695512/strawberry.jpg,strawberry,"{""image"": ""flagged/Specify object to count with visual exemplars here/e7aa3e19a9c873c247b5/strawberry.jpg"", ""points"": [[208.0, 92.0, 2.0, 236.0, 139.0, 3.0]]}",flagged/Detected Instances/062ea676672533bd63af/image.webp,16,,,2024-06-25 15:01:55.834836
188
+ flagged/Input Image/65771625dd3209ab4b5e/women.jpg,girl,"{""image"": ""flagged/Specify object to count with visual exemplars here/9cc7376257b49f9a02ef/women.jpg"", ""points"": [[267.0, 320.0, 2.0, 300.0, 362.0, 3.0]]}",flagged/Detected Instances/3e86fdbcad5cb3dbab9a/image.webp,6,,,2024-06-25 15:02:27.623802
189
+ flagged/Input Image/6d2d8805808abba5c681/strawberry.jpg,strawberry,"{""image"": ""flagged/Specify object to count with visual exemplars here/9f49a41bd458e9c37e95/strawberry.jpg"", ""points"": [[305.0, 187.0, 1.0, 0.0, 0.0, 4.0], [166.0, 56.0, 2.0, 203.0, 136.0, 3.0]]}",flagged/Detected Instances/9618d50e52b98e8ecfd1/image.webp,16,,,2024-06-25 15:08:35.121868
190
+ flagged/Input Image/12e08ca8d302970a2008/strawberry.jpg,strawberry,"{""image"": ""flagged/Specify object to count with visual exemplars here/ee9d14118e586de08ead/strawberry.jpg"", ""points"": [[305.0, 187.0, 1.0, 0.0, 0.0, 4.0], [166.0, 56.0, 2.0, 203.0, 136.0, 3.0], [301.0, 96.0, 2.0, 319.0, 118.0, 3.0], [313.0, 165.0, 2.0, 333.0, 184.0, 3.0], [271.0, 282.0, 2.0, 285.0, 309.0, 3.0], [301.0, 242.0, 2.0, 320.0, 262.0, 3.0], [329.0, 162.0, 2.0, 358.0, 185.0, 3.0]]}",flagged/Detected Instances/05fd3125ccda9546e2b6/image.webp,146,,,2024-06-25 15:08:50.604743
191
+ flagged/Input Image/4e1dd60a00267ba7ac80/women.jpg,boy,"{""image"": ""flagged/Specify object to count with visual exemplars here/4a178f28ec8e9901388e/women.jpg"", ""points"": []}",flagged/Detected Instances/b7d4a5530dfdf01f426a/image.webp,1,,,2024-06-25 15:09:42.749784
192
+ flagged/Input Image/813b6b4e4a70fea8e13c/egg.jpg,egg,"{""image"": ""flagged/Specify object to count with visual exemplars here/9fc0bb0e2e5ed2a7c023/egg.jpg"", ""points"": []}",flagged/Detected Instances/6a7dbfbbbf65a567057e/image.webp,18,,,2024-06-25 15:09:55.901192
193
+ flagged/Input Image/c09c36810b55389abddc/lego.jpg,yellow lego stud,"{""image"": ""flagged/Specify object to count with visual exemplars here/696b3fd37ad6c635baba/lego.jpg"", ""points"": []}",flagged/Detected Instances/60c9fcde0bb55251c8f1/image.webp,23,,,2024-06-25 15:10:20.114086
194
+ flagged/Input Image/bd286866d8e9a3e48bad/lego.jpg,red,"{""image"": ""flagged/Specify object to count with visual exemplars here/719beaa78bcd4eb92c8b/lego.jpg"", ""points"": []}",flagged/Detected Instances/87cfe221a685444fa193/image.webp,23,,,2024-06-25 15:10:33.233397
195
+ flagged/Input Image/89c65135bd1fcf0062af/green-pea.jpg,green pea,"{""image"": ""flagged/Specify object to count with visual exemplars here/b2de1a09bc58b6b4d3ae/green-pea.jpg"", ""points"": []}",flagged/Detected Instances/7272d7039ec368ccb1ac/image.webp,213,,,2024-06-25 15:10:42.163670
196
+ flagged/Input Image/a6c33d85c85f75ef8866/stamp.jpg,stamp,"{""image"": ""flagged/Specify object to count with visual exemplars here/679009966f23256eed77/stamp.jpg"", ""points"": []}",flagged/Detected Instances/946f4a626a568a22de21/image.webp,45,,,2024-06-25 15:10:57.228588
197
+ flagged/Input Image/5954d7cfd21da1b1d083/webcam.png,girl,"{""image"": ""flagged/Specify object to count with visual exemplars here/406bdbe8b94e97f309e1/stamp.jpg"", ""points"": []}",flagged/Detected Instances/671208d6944225443700/image.webp,1,,,2024-06-25 15:13:06.226068
198
+ flagged/Input Image/0d653baaf7b60d4fa9a9/webcam.png,,"{""image"": ""flagged/Specify object to count with visual exemplars here/4961820b4b19fa1543ab/webcam.png"", ""points"": [[348.0, 198.0, 2.0, 825.0, 594.0, 3.0]]}",flagged/Detected Instances/11d3ce706b7c55e4b29d/image.webp,6,,,2024-06-25 15:13:46.575188
199
+ flagged/Input Image/2fc47796e17477b72a7b/webcam.png,niki,"{""image"": ""flagged/Specify object to count with visual exemplars here/ed991be65bcc28198008/webcam.png"", ""points"": [[348.0, 198.0, 2.0, 825.0, 594.0, 3.0]]}",flagged/Detected Instances/4b4c120051c76b356ad9/image.webp,11,,,2024-06-25 15:14:11.677104
200
+ flagged/Input Image/c97af0e3fe6b528a4787/webcam.png,girl,"{""image"": ""flagged/Specify object to count with visual exemplars here/b2e5b5ad196b52f66b65/webcam.png"", ""points"": [[348.0, 198.0, 2.0, 825.0, 594.0, 3.0]]}",flagged/Detected Instances/99c1019f28a1ea86e661/image.webp,5,,,2024-06-25 15:14:22.291803
201
+ flagged/Input Image/6d42f7157ab42c91e2d7/webcam.png,girl,"{""image"": ""flagged/Specify object to count with visual exemplars here/7fb61e9fc487a3698878/webcam.png"", ""points"": []}",flagged/Detected Instances/bbe44c1a92d592ae7d96/image.webp,1,,,2024-06-25 15:14:34.641650
202
+ flagged/Input Image/cd2fd26787e15861e3d8/apple.jpg,apple,"{""image"": ""flagged/Specify object to count with visual exemplars here/67cf19d7f6ecdd3495a0/apple.jpg"", ""points"": []}",flagged/Detected Instances/c828265963027a846279/image.webp,568,,,2024-06-25 15:39:38.657610
203
+ flagged/Input Image/676bdd4d886064a651ff/apple.jpg,apple,"{""image"": ""flagged/Specify object to count with visual exemplars here/26e0d8f5fa299f54b2dd/apple.jpg"", ""points"": [[316.0, 184.0, 2.0, 371.0, 220.0, 3.0]]}",flagged/Detected Instances/f281b9015813f6849ef0/image.webp,541,,,2024-06-25 15:40:22.068821
204
+ flagged/Input Image/157f9999f8285255f079/apple.jpg,apple,"{""image"": ""flagged/Specify object to count with visual exemplars here/9f08512a7bc78db40a16/apple.jpg"", ""points"": [[316.0, 184.0, 2.0, 371.0, 220.0, 3.0], [118.0, 245.0, 2.0, 172.0, 276.0, 3.0]]}",flagged/Detected Instances/20546e887359f9ab6311/image.webp,552,,,2024-06-25 15:40:31.337257
205
+ flagged/Input Image/dfefe4e8ad7004a21057/apple.jpg,apple,"{""image"": ""flagged/Specify object to count with visual exemplars here/0e01c3339d835fc181f9/apple.jpg"", ""points"": [[316.0, 184.0, 2.0, 371.0, 220.0, 3.0], [118.0, 245.0, 2.0, 172.0, 276.0, 3.0], [314.0, 230.0, 2.0, 357.0, 265.0, 3.0]]}",flagged/Detected Instances/00f422daee514dc14f95/image.webp,540,,,2024-06-25 15:40:39.970940
206
+ flagged/Input Image/740c54384733c6909520/deer.jpg,deer,"{""image"": ""flagged/Specify object to count with visual exemplars here/20146d57e02b87830aeb/deer.jpg"", ""points"": []}",flagged/Detected Instances/5f4bb2b3b91635f81795/image.webp,21,,,2024-06-25 15:40:53.851874
207
+ flagged/Input Image/5a9d46d5f1fb47ee307b/orchard.jpg,apple,"{""image"": ""flagged/Specify object to count with visual exemplars here/6d352f6e7b5a6c866fdb/deer.jpg"", ""points"": []}",flagged/Detected Instances/6c2b985de60b38896d84/image.webp,543,,,2024-06-25 15:42:01.647977
208
+ flagged/Input Image/de66ddbd6870a9789205/webcam.png,bunny,"{""image"": ""flagged/Specify object to count with visual exemplars here/cd0638cb70d92342ecda/deer.jpg"", ""points"": []}",flagged/Detected Instances/dd795b53a5c38aa3207c/image.webp,0,,,2024-06-25 15:43:54.093401
209
+ flagged/Input Image/4193d822e105ac4695ff/webcam.png,man,"{""image"": ""flagged/Specify object to count with visual exemplars here/13affadc040616f325fb/deer.jpg"", ""points"": []}",flagged/Detected Instances/deb096b5bb87593f8894/image.webp,1,,,2024-06-25 15:44:03.789939
210
+ flagged/Input Image/5dca2e19924d68ff12c5/webcam.png,yoga mat,"{""image"": ""flagged/Specify object to count with visual exemplars here/3c08834b2231ee8173b6/deer.jpg"", ""points"": []}",flagged/Detected Instances/4ccd7a4d047f6cc4f438/image.webp,0,,,2024-06-25 15:44:18.374825
211
+ flagged/Input Image/7dc8361a1e198ce87161/webcam.png,microwave,"{""image"": ""flagged/Specify object to count with visual exemplars here/2ab8fa7854e2e8f488b7/deer.jpg"", ""points"": []}",flagged/Detected Instances/5a1c457670ba55bd7f22/image.webp,1,,,2024-06-25 15:44:29.556338
212
+ flagged/Input Image/e110ca4a4a40edaa6c2f/webcam.png,eyebrow,"{""image"": ""flagged/Specify object to count with visual exemplars here/2b5d4f4150c5af42be5e/deer.jpg"", ""points"": []}",flagged/Detected Instances/e6e0f246c363ee76f378/image.webp,0,,,2024-06-25 15:44:54.438375
213
+ flagged/Input Image/b9323d538671be8fb66c/webcam.png,ear,"{""image"": ""flagged/Specify object to count with visual exemplars here/d218867fab61d8837d1b/deer.jpg"", ""points"": []}",flagged/Detected Instances/8d6a4f375884a2862603/image.webp,0,,,2024-06-25 15:45:04.003679
214
+ Submit,Clear,flagged/Input Image/207cd608b7c1a6c526bd/strawberry.jpg,strawberry,"{""image"": ""flagged/Specify object to count with visual exemplars here/4dfe49b3bb9109a5112c/strawberry.jpg"", ""points"": []}",flagged/Detected Instances/69dbd8ce1b54204aa311/image.webp,16,,,2024-06-25 17:17:28.470393
215
+ Submit,Clear,flagged/Input Image/ccb9fedaf141f3cb6f69/strawberry.jpg,strawberry,"{""image"": ""flagged/Specify object to count with visual exemplars here/9f5e5147ae36d6c07538/strawberry.jpg"", ""points"": []}",flagged/Detected Instances/cd6f4c50bdf9f10604db/image.webp,16,,,2024-06-25 17:48:13.969532
flagged/output/a686c9813d1a6c436907/image.webp ADDED
gradio-demo-img.png ADDED

Git LFS Details

  • SHA256: 09bd972ca4a129ed5cf011251e8d63ec4c9e97a2b9641d4fa871b550b41aece6
  • Pointer size: 132 Bytes
  • Size of remote file: 1.54 MB
green-pea.jpg ADDED
groundingdino/util/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved