pain commited on
Commit
08545c6
1 Parent(s): ac78178

Upload 115 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. cashed_pickles/image_features_XTD_1000_images_XLM_Roberta_Large_Vit_B_16Plus_ar.pickle +3 -0
  2. cashed_pickles/image_features_XTD_1000_images_arabert_siglib_best_model.pickle +3 -0
  3. cashed_pickles/text_features_XTD_1000_images_XLM_Roberta_Large_Vit_B_16Plus_ar.pickle +3 -0
  4. cashed_pickles/text_features_XTD_1000_images_arabert_siglib_best_model.pickle +3 -0
  5. gradio_application/__pycache__/app.cpython-38.pyc +0 -0
  6. gradio_application/__pycache__/app.cpython-39.pyc +0 -0
  7. gradio_application/__pycache__/model_loading.cpython-38.pyc +0 -0
  8. gradio_application/__pycache__/model_loading.cpython-39.pyc +0 -0
  9. gradio_application/__pycache__/utils.cpython-38.pyc +0 -0
  10. gradio_application/__pycache__/utils.cpython-39.pyc +0 -0
  11. gradio_application/app.py +110 -0
  12. gradio_application/model_loading.py +51 -0
  13. gradio_application/utils.py +200 -0
  14. head_weights/arabertv2-vit-B-16-siglibheads_of_the_model_arabertv2-ViT-B-16-SigLIP-512-155_.pickle +3 -0
  15. photos/XTD10_dataset/COCO_train2014_000000061854.jpg +0 -0
  16. photos/XTD10_dataset/COCO_train2014_000000061877.jpg +0 -0
  17. photos/XTD10_dataset/COCO_train2014_000000061911.jpg +0 -0
  18. photos/XTD10_dataset/COCO_train2014_000000061945.jpg +0 -0
  19. photos/XTD10_dataset/COCO_train2014_000000062160.jpg +0 -0
  20. photos/XTD10_dataset/COCO_train2014_000000062209.jpg +0 -0
  21. photos/XTD10_dataset/COCO_train2014_000000062226.jpg +0 -0
  22. photos/XTD10_dataset/COCO_train2014_000000062257.jpg +0 -0
  23. photos/XTD10_dataset/COCO_train2014_000000062293.jpg +0 -0
  24. photos/XTD10_dataset/COCO_train2014_000000062301.jpg +0 -0
  25. photos/XTD10_dataset/COCO_train2014_000000062387.jpg +0 -0
  26. photos/XTD10_dataset/COCO_train2014_000000062557.jpg +0 -0
  27. photos/XTD10_dataset/COCO_train2014_000000062591.jpg +0 -0
  28. photos/XTD10_dataset/COCO_train2014_000000062740.jpg +0 -0
  29. photos/XTD10_dataset/COCO_train2014_000000062745.jpg +0 -0
  30. photos/XTD10_dataset/COCO_train2014_000000062756.jpg +0 -0
  31. photos/XTD10_dataset/COCO_train2014_000000062778.jpg +0 -0
  32. photos/XTD10_dataset/COCO_train2014_000000063035.jpg +0 -0
  33. photos/XTD10_dataset/COCO_train2014_000000063050.jpg +0 -0
  34. photos/XTD10_dataset/COCO_train2014_000000063109.jpg +0 -0
  35. photos/XTD10_dataset/COCO_train2014_000000063121.jpg +0 -0
  36. photos/XTD10_dataset/COCO_train2014_000000063230.jpg +0 -0
  37. photos/XTD10_dataset/COCO_train2014_000000064627.jpg +0 -0
  38. photos/XTD10_dataset/COCO_train2014_000000064697.jpg +0 -0
  39. photos/XTD10_dataset/COCO_train2014_000000064744.jpg +0 -0
  40. photos/XTD10_dataset/COCO_train2014_000000064765.jpg +0 -0
  41. photos/XTD10_dataset/COCO_train2014_000000064823.jpg +0 -0
  42. photos/XTD10_dataset/COCO_train2014_000000064836.jpg +0 -0
  43. photos/XTD10_dataset/COCO_train2014_000000064890.jpg +0 -0
  44. photos/XTD10_dataset/COCO_train2014_000000064962.jpg +0 -0
  45. photos/XTD10_dataset/COCO_train2014_000000065162.jpg +0 -0
  46. photos/XTD10_dataset/COCO_train2014_000000065213.jpg +0 -0
  47. photos/XTD10_dataset/COCO_train2014_000000065220.jpg +0 -0
  48. photos/XTD10_dataset/COCO_train2014_000000065420.jpg +0 -0
  49. photos/XTD10_dataset/COCO_train2014_000000065523.jpg +0 -0
  50. photos/XTD10_dataset/COCO_train2014_000000065836.jpg +0 -0
cashed_pickles/image_features_XTD_1000_images_XLM_Roberta_Large_Vit_B_16Plus_ar.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3986c84721b9f63ecbe86c4d9903327185865f560de8a5105f1909a5ffd54be
3
+ size 2560139
cashed_pickles/image_features_XTD_1000_images_arabert_siglib_best_model.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22ee1693fd39a670fbefba330b2ae171bd523429fccd4f6c0b5c9427a837f66a
3
+ size 3072139
cashed_pickles/text_features_XTD_1000_images_XLM_Roberta_Large_Vit_B_16Plus_ar.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1b0bfa2ea2c8bff9da1768012c80fda84465605d56d0dcdd7ac9f63fb23f503
3
+ size 2560139
cashed_pickles/text_features_XTD_1000_images_arabert_siglib_best_model.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd91f8e3579360094373b2874630da96d1ad29b8f7a3f81c616e3ad1ee09b969
3
+ size 3072139
gradio_application/__pycache__/app.cpython-38.pyc ADDED
Binary file (2.46 kB). View file
 
gradio_application/__pycache__/app.cpython-39.pyc ADDED
Binary file (2.74 kB). View file
 
gradio_application/__pycache__/model_loading.cpython-38.pyc ADDED
Binary file (2.17 kB). View file
 
gradio_application/__pycache__/model_loading.cpython-39.pyc ADDED
Binary file (2.51 kB). View file
 
gradio_application/__pycache__/utils.cpython-38.pyc ADDED
Binary file (7.21 kB). View file
 
gradio_application/__pycache__/utils.cpython-39.pyc ADDED
Binary file (7.34 kB). View file
 
gradio_application/app.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import utils
3
+
4
+
5
+ # Araclip demo
6
+ with gr.Blocks() as demo_araclip:
7
+
8
+ gr.Markdown("## Input parameters")
9
+
10
+ txt = gr.Textbox(label="Text Query (Caption)")
11
+ num = gr.Slider(label="Number of retrieved image", value=1, minimum=1, maximum=1000)
12
+
13
+
14
+ with gr.Row():
15
+ btn = gr.Button("Retrieve images", scale=1)
16
+
17
+ gr.Markdown("## Retrieved Images")
18
+
19
+ gallery = gr.Gallery(
20
+ label="Generated images", show_label=True, elem_id="gallery"
21
+ , columns=[5], rows=[1], object_fit="contain", height="auto")
22
+
23
+
24
+ with gr.Row():
25
+ lables = gr.Label(label="Text image similarity")
26
+
27
+ with gr.Row():
28
+
29
+ with gr.Column(scale=1):
30
+ gr.Markdown("<div style='text-align: center; font-size: 24px; font-weight: bold;'>Data Retrieved based on Images Similarity</div>")
31
+
32
+ json_output = gr.JSON()
33
+
34
+ with gr.Column(scale=1):
35
+ # gr.Markdown("### Data Retrieved based on Text similarity")
36
+ # gr.Markdown("<div style='text-align: center;'> Data Retrieved based on Text similarity </div>")
37
+ gr.Markdown("<div style='text-align: center; font-size: 24px; font-weight: bold;'>Data Retrieved based on Text similarity</div>")
38
+ json_text = gr.JSON()
39
+
40
+
41
+ btn.click(utils.predict, inputs=[txt, num], outputs=[gallery,lables, json_output, json_text])
42
+
43
+
44
+ gr.Examples(
45
+ examples=[["تخطي لاعب فريق بيتسبرج بايرتس منطقة اللوحة الرئيسية في مباراة بدوري البيسبول", 5],
46
+ ["وقوف قطة بمخالبها على فأرة حاسوب على المكتب", 10],
47
+ ["صحن به شوربة صينية بالخضار، وإلى جانبه بطاطس مقلية وزجاجة ماء", 7]],
48
+ inputs=[txt, num],
49
+ outputs=[gallery,lables, json_output, json_text],
50
+ fn=utils.predict,
51
+ cache_examples=False,
52
+ )
53
+
54
+ # mclip demo
55
+ with gr.Blocks() as demo_mclip:
56
+
57
+ gr.Markdown("## Input parameters")
58
+
59
+ txt = gr.Textbox(label="Text Query (Caption)")
60
+ num = gr.Slider(label="Number of retrieved image", value=1, minimum=1, maximum=1000)
61
+
62
+ with gr.Row():
63
+ btn = gr.Button("Retrieve images", scale=1)
64
+
65
+ gr.Markdown("## Retrieved Images")
66
+
67
+ gallery = gr.Gallery(
68
+ label="Generated images", show_label=True, elem_id="gallery_mclip"
69
+ , columns=[5], rows=[1], object_fit="contain", height="auto")
70
+
71
+
72
+ lables = gr.Label()
73
+
74
+ with gr.Row():
75
+
76
+ with gr.Column(scale=1):
77
+ gr.Markdown("## Images Retrieved")
78
+ json_output = gr.JSON()
79
+
80
+ with gr.Column(scale=1):
81
+ gr.Markdown("## Text Retrieved")
82
+ json_text = gr.JSON()
83
+
84
+ btn.click(utils.predict_mclip, inputs=[txt, num], outputs=[gallery,lables, json_output, json_text])
85
+
86
+
87
+
88
+
89
+ gr.Examples(
90
+ examples=[["تخطي لاعب فريق بيتسبرج بايرتس منطقة اللوحة الرئيسية في مباراة بدوري البيسبول", 5],
91
+ ["وقوف قطة بمخالبها على فأرة حاسوب على المكتب", 10],
92
+ ["صحن به شوربة صينية بالخضار، وإلى جانبه بطاطس مقلية وزجاجة ماء", 7]],
93
+ inputs=[txt, num],
94
+ outputs=[gallery,lables, json_output, json_text],
95
+ fn=utils.predict_mclip,
96
+ cache_examples=False,
97
+ )
98
+
99
+
100
+ # Group the demos in a TabbedInterface
101
+ with gr.Blocks() as demo:
102
+
103
+ gr.Markdown("<font color=red size=10><center>AraClip: Arabic Image Retrieval Application</center></font>")
104
+ gr.TabbedInterface([demo_araclip, demo_mclip], ["Our Model", "Mclip model"])
105
+
106
+
107
+
108
+ if __name__ == "__main__":
109
+
110
+ demo.launch()
gradio_application/model_loading.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import pickle
3
+ import torch
4
+ import transformers
5
+ import gradio as gr
6
+
7
+
8
+ # XLM model functions
9
+ import transformers
10
+
11
+
12
+ # Our model definition
13
+
14
+ class MultilingualClipEdited(torch.nn.Module):
15
+ def __init__(self, model_name, tokenizer_name, head_name, weights_dir='head_weights/', cache_dir=None,in_features=None,out_features=None):
16
+ super().__init__()
17
+ self.model_name = model_name
18
+ self.tokenizer_name = tokenizer_name
19
+ self.head_path = weights_dir + head_name
20
+
21
+ self.tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer_name, cache_dir=cache_dir)
22
+ self.transformer = transformers.AutoModel.from_pretrained(model_name, cache_dir=cache_dir)
23
+ self.clip_head = torch.nn.Linear(in_features=in_features, out_features=out_features)
24
+ self._load_head()
25
+
26
+ def forward(self, txt):
27
+ txt_tok = self.tokenizer(txt, padding=True, return_tensors='pt')
28
+ embs = self.transformer(**txt_tok)[0]
29
+ att = txt_tok['attention_mask']
30
+ embs = (embs * att.unsqueeze(2)).sum(dim=1) / att.sum(dim=1)[:, None]
31
+ return self.clip_head(embs)
32
+
33
+ def _load_head(self):
34
+ with open(self.head_path, 'rb') as f:
35
+ lin_weights = pickle.loads(f.read())
36
+ self.clip_head.weight = torch.nn.Parameter(torch.tensor(lin_weights[0]).float().t())
37
+ self.clip_head.bias = torch.nn.Parameter(torch.tensor(lin_weights[1]).float())
38
+
39
+ AVAILABLE_MODELS = {
40
+ 'bert-base-arabertv2-ViT-B-16-SigLIP-512-epoch-155-trained-2M':{
41
+ 'model_name': 'Arabic-Clip/bert-base-arabertv2-ViT-B-16-SigLIP-512-epoch-155-trained-2M',
42
+ 'tokenizer_name': 'Arabic-Clip/bert-base-arabertv2-ViT-B-16-SigLIP-512-epoch-155-trained-2M',
43
+ 'head_name': 'arabertv2-vit-B-16-siglibheads_of_the_model_arabertv2-ViT-B-16-SigLIP-512-155_.pickle'
44
+ },
45
+
46
+ }
47
+
48
+
49
+ def load_model(name, cache_dir=None,in_features=None,out_features=None):
50
+ config = AVAILABLE_MODELS[name]
51
+ return MultilingualClipEdited(**config, cache_dir=cache_dir, in_features= in_features, out_features=out_features)
gradio_application/utils.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import numpy as np
4
+ import pickle
5
+ import torch
6
+ import transformers
7
+ from PIL import Image
8
+ from open_clip import create_model_from_pretrained, create_model_and_transforms
9
+ import json
10
+
11
+ # XLM model functions
12
+ from multilingual_clip import pt_multilingual_clip
13
+
14
+ from model_loading import load_model
15
+
16
+
17
+
18
+ class CustomDataSet(torch.utils.data.Dataset):
19
+ def __init__(self, main_dir, compose, image_name_list):
20
+ self.main_dir = main_dir
21
+ self.transform = compose
22
+ self.total_imgs = image_name_list
23
+
24
+ def __len__(self):
25
+ return len(self.total_imgs)
26
+
27
+ def get_image_name(self, idx):
28
+
29
+ return self.total_imgs[idx]
30
+
31
+ def __getitem__(self, idx):
32
+ img_loc = os.path.join(self.main_dir, self.total_imgs[idx])
33
+ image = Image.open(img_loc)
34
+
35
+ return self.transform(image)
36
+
37
+
38
+ def features_pickle(file_path=None):
39
+
40
+ with open(file_path, 'rb') as handle:
41
+ features_pickle = pickle.load(handle)
42
+
43
+ return features_pickle
44
+
45
+
46
+ def dataset_loading():
47
+
48
+ with open("photos/en_ar_XTD10_edited_v2.jsonl") as filino:
49
+
50
+
51
+ data = [json.loads(file_i) for file_i in filino]
52
+
53
+ sorted_data = sorted(data, key=lambda x: x['id'])
54
+
55
+ image_name_list = [lin["image_name"] for lin in sorted_data]
56
+
57
+
58
+ return sorted_data, image_name_list
59
+
60
+
61
+ def text_encoder(language_model, text):
62
+ """Normalize the text embeddings"""
63
+ embedding = language_model(text)
64
+ norm_embedding = embedding / np.linalg.norm(embedding)
65
+
66
+ return embedding, norm_embedding
67
+
68
+
69
+ def compare_embeddings(logit_scale, img_embs, txt_embs):
70
+
71
+ image_features = img_embs / img_embs.norm(dim=-1, keepdim=True)
72
+
73
+ text_features = txt_embs / txt_embs.norm(dim=-1, keepdim=True)
74
+
75
+ logits_per_text = logit_scale * text_features @ image_features.t()
76
+
77
+ return logits_per_text
78
+
79
+ # Done
80
+ def compare_embeddings_text(full_text_embds, txt_embs):
81
+
82
+ full_text_embds_features = full_text_embds / full_text_embds.norm(dim=-1, keepdim=True)
83
+
84
+ text_features = txt_embs / txt_embs.norm(dim=-1, keepdim=True)
85
+
86
+ logits_per_text_full = text_features @ full_text_embds_features.t()
87
+
88
+ return logits_per_text_full
89
+
90
+
91
+
92
+ def find_image(language_model,clip_model, text_query, dataset, image_features, text_features_new,sorted_data, num=1):
93
+
94
+ embedding, _ = text_encoder(language_model, text_query)
95
+
96
+ logit_scale = clip_model.logit_scale.exp().float().to('cpu')
97
+
98
+ language_logits, text_logits = {}, {}
99
+
100
+ language_logits["Arabic"] = compare_embeddings(logit_scale, torch.from_numpy(image_features), torch.from_numpy(embedding))
101
+
102
+ text_logits["Arabic_text"] = compare_embeddings_text(torch.from_numpy(text_features_new), torch.from_numpy(embedding))
103
+
104
+
105
+ for _, txt_logits in language_logits.items():
106
+
107
+ probs = txt_logits.softmax(dim=-1).cpu().detach().numpy().T
108
+
109
+ file_paths = []
110
+ labels, json_data = {}, {}
111
+
112
+ for i in range(1, num+1):
113
+ idx = np.argsort(probs, axis=0)[-i, 0]
114
+ path = 'photos/XTD10_dataset/' + dataset.get_image_name(idx)
115
+
116
+ path_l = (path,f"{sorted_data[idx]['caption_ar']}")
117
+
118
+ labels[f" Image # {i}"] = probs[idx]
119
+ json_data[f" Image # {i}"] = sorted_data[idx]
120
+
121
+ file_paths.append(path_l)
122
+
123
+
124
+ json_text = {}
125
+
126
+ for _, txt_logits_full in text_logits.items():
127
+
128
+ probs_text = txt_logits_full.softmax(dim=-1).cpu().detach().numpy().T
129
+
130
+ for j in range(1, num+1):
131
+
132
+ idx = np.argsort(probs_text, axis=0)[-j, 0]
133
+ json_text[f" Text # {j}"] = sorted_data[idx]
134
+
135
+ return file_paths, labels, json_data, json_text
136
+
137
+
138
+
139
+ class AraClip():
140
+ def __init__(self):
141
+
142
+ self.text_model = load_model('bert-base-arabertv2-ViT-B-16-SigLIP-512-epoch-155-trained-2M', in_features= 768, out_features=768)
143
+ self.language_model = lambda queries: np.asarray(self.text_model(queries).detach().to('cpu'))
144
+ self.clip_model, self.compose = create_model_from_pretrained('hf-hub:timm/ViT-B-16-SigLIP-512')
145
+ self.sorted_data, self.image_name_list = dataset_loading()
146
+
147
+ def load_images(self):
148
+ # Return the features of the text and images
149
+ image_features_new = features_pickle('cashed_pickles/image_features_XTD_1000_images_arabert_siglib_best_model.pickle')
150
+ return image_features_new
151
+
152
+ def load_text(self):
153
+ text_features_new = features_pickle('cashed_pickles/text_features_XTD_1000_images_arabert_siglib_best_model.pickle')
154
+ return text_features_new
155
+
156
+ def load_dataset(self):
157
+ dataset = CustomDataSet("photos/XTD10_dataset", self.compose, self.image_name_list)
158
+ return dataset
159
+
160
+
161
+ araclip = AraClip()
162
+
163
+ def predict(text, num):
164
+
165
+ image_paths, labels, json_data, json_text = find_image(araclip.language_model,araclip.clip_model, text, araclip.load_dataset(), araclip.load_images() , araclip.load_text(), araclip.sorted_data, num=int(num))
166
+
167
+ return image_paths, labels, json_data, json_text
168
+
169
+
170
+ class Mclip():
171
+ def __init__(self) -> None:
172
+
173
+
174
+ self.tokenizer_mclip = transformers.AutoTokenizer.from_pretrained('M-CLIP/XLM-Roberta-Large-Vit-B-16Plus')
175
+ self.text_model_mclip = pt_multilingual_clip.MultilingualCLIP.from_pretrained('M-CLIP/XLM-Roberta-Large-Vit-B-16Plus')
176
+ self.language_model_mclip = lambda queries: np.asarray(self.text_model_mclip.forward(queries, self.tokenizer_mclip).detach().to('cpu'))
177
+ self.clip_model_mclip, _, self.compose_mclip = create_model_and_transforms('ViT-B-16-plus-240', pretrained="laion400m_e32")
178
+ self.sorted_data, self.image_name_list = dataset_loading()
179
+
180
+ def load_images(self):
181
+ # Return the features of the text and images
182
+ image_features_mclip = features_pickle('cashed_pickles/image_features_XTD_1000_images_XLM_Roberta_Large_Vit_B_16Plus_ar.pickle')
183
+ return image_features_mclip
184
+
185
+ def load_text(self):
186
+ text_features_new_mclip = features_pickle('cashed_pickles/text_features_XTD_1000_images_XLM_Roberta_Large_Vit_B_16Plus_ar.pickle')
187
+ return text_features_new_mclip
188
+
189
+ def load_dataset(self):
190
+ dataset_mclip = CustomDataSet("photos/XTD10_dataset", self.compose_mclip, self.image_name_list)
191
+ return dataset_mclip
192
+
193
+
194
+ mclip = Mclip()
195
+
196
+ def predict_mclip(text, num):
197
+
198
+ image_paths, labels, json_data, json_text = find_image(mclip.language_model_mclip,mclip.clip_model_mclip, text, mclip.load_dataset() , mclip.load_text() , mclip.load_text() , mclip.sorted_data , num=int(num))
199
+
200
+ return image_paths, labels, json_data, json_text
head_weights/arabertv2-vit-B-16-siglibheads_of_the_model_arabertv2-ViT-B-16-SigLIP-512-155_.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:360d4cf756dfc96ebbe9ccaf90f943e1d25a9ca7ca2e225cb7715893c5f62fbb
3
+ size 2362569
photos/XTD10_dataset/COCO_train2014_000000061854.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000061877.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000061911.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000061945.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000062160.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000062209.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000062226.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000062257.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000062293.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000062301.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000062387.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000062557.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000062591.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000062740.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000062745.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000062756.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000062778.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000063035.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000063050.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000063109.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000063121.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000063230.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000064627.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000064697.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000064744.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000064765.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000064823.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000064836.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000064890.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000064962.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000065162.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000065213.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000065220.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000065420.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000065523.jpg ADDED
photos/XTD10_dataset/COCO_train2014_000000065836.jpg ADDED