adirik commited on
Commit
49bc02a
1 Parent(s): 28c00f4
.DS_Store ADDED
Binary file (6.15 kB). View file
.gitattributes CHANGED
@@ -29,3 +29,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zst filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zst filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
32
+ *.index filter=lfs diff=lfs merge=lfs -text
33
+ *.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import json
3
+ import gradio as gr
4
+ import numpy as np
5
+ import tensorflow as tf
6
+
7
+ from backbone import create_name_vit
8
+ from backbone import ClassificationModel
9
+
10
+
11
+
12
+ vit_l16_384 = {
13
+ "backbone_name": "vit-l/16",
14
+ "backbone_params": {
15
+ "image_size": 384,
16
+ "representation_size": 0,
17
+ "attention_dropout_rate": 0.,
18
+ "dropout_rate": 0.,
19
+ "channels": 3
20
+ },
21
+ "dropout_rate": 0.,
22
+ "pretrained": "./weights/vit_l16_384/model-weights"
23
+ }
24
+
25
+ # Init backbone
26
+ backbone = create_name_vit(vit_l16_384["backbone_name"], **vit_l16_384["backbone_params"])
27
+
28
+ # Init classification model
29
+ model = ClassificationModel(
30
+ backbone=backbone,
31
+ dropout_rate=vit_l16_384["dropout_rate"],
32
+ num_classes=1000
33
+ )
34
+
35
+ # Load weights
36
+ model.load_weights(vit_l16_384["pretrained"])
37
+ model.trainable = False
38
+
39
+ # Load ImageNet idx to label mapping
40
+ with open("assets/imagenet_1000_idx2labels.json") as f:
41
+ idx_to_label = json.load(f)
42
+
43
+
44
+ def resize_with_normalization(image, size=[384, 384]):
45
+ image = tf.cast(image, tf.float32)
46
+ image = tf.image.resize(image, size)
47
+ image -= tf.constant(127.5, shape=(1, 1, 3), dtype=tf.float32)
48
+ image /= tf.constant(127.5, shape=(1, 1, 3), dtype=tf.float32)
49
+ image = tf.expand_dims(image, axis=0)
50
+ return image
51
+
52
+ def softmax_stable(x):
53
+ return(np.exp(x - np.max(x)) / np.exp(x - np.max(x)).sum())
54
+
55
+ def classify_image(img, top_k):
56
+ img = tf.convert_to_tensor(img)
57
+ img = resize_with_normalization(img)
58
+
59
+ pred_logits = model.predict(img, batch_size=1, workers=8)[0]
60
+ pred_probs = softmax_stable(pred_logits)
61
+ top_k_labels = pred_probs.argsort()[-top_k:][::-1]
62
+
63
+ return {idx_to_label[str(idx)] : round(float(pred_probs[idx]), 4) for idx in top_k_labels}
64
+
65
+
66
+ description = """
67
+ Gradio demo for <a href="https://huggingface.co/docs/transformers/main/en/model_doc/owlvit">ViT released by Kakao Lab</a>,
68
+ introduced in <a href="https://arxiv.org/abs/2205.06230">Simple Open-Vocabulary Object Detection
69
+ with Vision Transformers</a>.
70
+ \n\nYou can use OWL-ViT to query images with text descriptions of any object.
71
+ To use it, simply upload an image and enter comma separated text descriptions of objects you want to query the image for. You
72
+ can also use the score threshold slider to set a threshold to filter out low probability predictions.
73
+ \n\nOWL-ViT is trained on text templates,
74
+ hence you can get better predictions by querying the image with text templates used in training the original model: *"photo of a star-spangled banner"*,
75
+ *"image of a shoe"*. Refer to the <a href="https://arxiv.org/abs/2103.00020">CLIP</a> paper to see the full list of text templates used to augment the training data.
76
+ """
77
+ demo = gr.Interface(
78
+ classify_image,
79
+ inputs=[gr.Image(), gr.Slider(0, 1000, value=5)],
80
+ outputs=gr.outputs.Label(),
81
+ title="Image Classification with Kakao Brain ViT",
82
+ #description=description,
83
+ )
84
+ demo.launch()
assets/.DS_Store ADDED
Binary file (6.15 kB). View file
assets/imagenet_1000_idx2labels.json ADDED
@@ -0,0 +1,1002 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "0":"tench, Tinca tinca",
3
+ "1":"goldfish, Carassius auratus",
4
+ "2":"great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias",
5
+ "3":"tiger shark, Galeocerdo cuvieri",
6
+ "4":"hammerhead, hammerhead shark",
7
+ "5":"electric ray, crampfish, numbfish, torpedo",
8
+ "6":"stingray",
9
+ "7":"cock",
10
+ "8":"hen",
11
+ "9":"ostrich, Struthio camelus",
12
+ "10":"brambling, Fringilla montifringilla",
13
+ "11":"goldfinch, Carduelis carduelis",
14
+ "12":"house finch, linnet, Carpodacus mexicanus",
15
+ "13":"junco, snowbird",
16
+ "14":"indigo bunting, indigo finch, indigo bird, Passerina cyanea",
17
+ "15":"robin, American robin, Turdus migratorius",
18
+ "16":"bulbul",
19
+ "17":"jay",
20
+ "18":"magpie",
21
+ "19":"chickadee",
22
+ "20":"water ouzel, dipper",
23
+ "21":"kite",
24
+ "22":"bald eagle, American eagle, Haliaeetus leucocephalus",
25
+ "23":"vulture",
26
+ "24":"great grey owl, great gray owl, Strix nebulosa",
27
+ "25":"European fire salamander, Salamandra salamandra",
28
+ "26":"common newt, Triturus vulgaris",
29
+ "27":"eft",
30
+ "28":"spotted salamander, Ambystoma maculatum",
31
+ "29":"axolotl, mud puppy, Ambystoma mexicanum",
32
+ "30":"bullfrog, Rana catesbeiana",
33
+ "31":"tree frog, tree-frog",
34
+ "32":"tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui",
35
+ "33":"loggerhead, loggerhead turtle, Caretta caretta",
36
+ "34":"leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea",
37
+ "35":"mud turtle",
38
+ "36":"terrapin",
39
+ "37":"box turtle, box tortoise",
40
+ "38":"banded gecko",
41
+ "39":"common iguana, iguana, Iguana iguana",
42
+ "40":"American chameleon, anole, Anolis carolinensis",
43
+ "41":"whiptail, whiptail lizard",
44
+ "42":"agama",
45
+ "43":"frilled lizard, Chlamydosaurus kingi",
46
+ "44":"alligator lizard",
47
+ "45":"Gila monster, Heloderma suspectum",
48
+ "46":"green lizard, Lacerta viridis",
49
+ "47":"African chameleon, Chamaeleo chamaeleon",
50
+ "48":"Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis",
51
+ "49":"African crocodile, Nile crocodile, Crocodylus niloticus",
52
+ "50":"American alligator, Alligator mississipiensis",
53
+ "51":"triceratops",
54
+ "52":"thunder snake, worm snake, Carphophis amoenus",
55
+ "53":"ringneck snake, ring-necked snake, ring snake",
56
+ "54":"hognose snake, puff adder, sand viper",
57
+ "55":"green snake, grass snake",
58
+ "56":"king snake, kingsnake",
59
+ "57":"garter snake, grass snake",
60
+ "58":"water snake",
61
+ "59":"vine snake",
62
+ "60":"night snake, Hypsiglena torquata",
63
+ "61":"boa constrictor, Constrictor constrictor",
64
+ "62":"rock python, rock snake, Python sebae",
65
+ "63":"Indian cobra, Naja naja",
66
+ "64":"green mamba",
67
+ "65":"sea snake",
68
+ "66":"horned viper, cerastes, sand viper, horned asp, Cerastes cornutus",
69
+ "67":"diamondback, diamondback rattlesnake, Crotalus adamanteus",
70
+ "68":"sidewinder, horned rattlesnake, Crotalus cerastes",
71
+ "69":"trilobite",
72
+ "70":"harvestman, daddy longlegs, Phalangium opilio",
73
+ "71":"scorpion",
74
+ "72":"black and gold garden spider, Argiope aurantia",
75
+ "73":"barn spider, Araneus cavaticus",
76
+ "74":"garden spider, Aranea diademata",
77
+ "75":"black widow, Latrodectus mactans",
78
+ "76":"tarantula",
79
+ "77":"wolf spider, hunting spider",
80
+ "78":"tick",
81
+ "79":"centipede",
82
+ "80":"black grouse",
83
+ "81":"ptarmigan",
84
+ "82":"ruffed grouse, partridge, Bonasa umbellus",
85
+ "83":"prairie chicken, prairie grouse, prairie fowl",
86
+ "84":"peacock",
87
+ "85":"quail",
88
+ "86":"partridge",
89
+ "87":"African grey, African gray, Psittacus erithacus",
90
+ "88":"macaw",
91
+ "89":"sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita",
92
+ "90":"lorikeet",
93
+ "91":"coucal",
94
+ "92":"bee eater",
95
+ "93":"hornbill",
96
+ "94":"hummingbird",
97
+ "95":"jacamar",
98
+ "96":"toucan",
99
+ "97":"drake",
100
+ "98":"red-breasted merganser, Mergus serrator",
101
+ "99":"goose",
102
+ "100":"black swan, Cygnus atratus",
103
+ "101":"tusker",
104
+ "102":"echidna, spiny anteater, anteater",
105
+ "103":"platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus",
106
+ "104":"wallaby, brush kangaroo",
107
+ "105":"koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus",
108
+ "106":"wombat",
109
+ "107":"jellyfish",
110
+ "108":"sea anemone, anemone",
111
+ "109":"brain coral",
112
+ "110":"flatworm, platyhelminth",
113
+ "111":"nematode, nematode worm, roundworm",
114
+ "112":"conch",
115
+ "113":"snail",
116
+ "114":"slug",
117
+ "115":"sea slug, nudibranch",
118
+ "116":"chiton, coat-of-mail shell, sea cradle, polyplacophore",
119
+ "117":"chambered nautilus, pearly nautilus, nautilus",
120
+ "118":"Dungeness crab, Cancer magister",
121
+ "119":"rock crab, Cancer irroratus",
122
+ "120":"fiddler crab",
123
+ "121":"king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica",
124
+ "122":"American lobster, Northern lobster, Maine lobster, Homarus americanus",
125
+ "123":"spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish",
126
+ "124":"crayfish, crawfish, crawdad, crawdaddy",
127
+ "125":"hermit crab",
128
+ "126":"isopod",
129
+ "127":"white stork, Ciconia ciconia",
130
+ "128":"black stork, Ciconia nigra",
131
+ "129":"spoonbill",
132
+ "130":"flamingo",
133
+ "131":"little blue heron, Egretta caerulea",
134
+ "132":"American egret, great white heron, Egretta albus",
135
+ "133":"bittern",
136
+ "134":"crane",
137
+ "135":"limpkin, Aramus pictus",
138
+ "136":"European gallinule, Porphyrio porphyrio",
139
+ "137":"American coot, marsh hen, mud hen, water hen, Fulica americana",
140
+ "138":"bustard",
141
+ "139":"ruddy turnstone, Arenaria interpres",
142
+ "140":"red-backed sandpiper, dunlin, Erolia alpina",
143
+ "141":"redshank, Tringa totanus",
144
+ "142":"dowitcher",
145
+ "143":"oystercatcher, oyster catcher",
146
+ "144":"pelican",
147
+ "145":"king penguin, Aptenodytes patagonica",
148
+ "146":"albatross, mollymawk",
149
+ "147":"grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus",
150
+ "148":"killer whale, killer, orca, grampus, sea wolf, Orcinus orca",
151
+ "149":"dugong, Dugong dugon",
152
+ "150":"sea lion",
153
+ "151":"Chihuahua",
154
+ "152":"Japanese spaniel",
155
+ "153":"Maltese dog, Maltese terrier, Maltese",
156
+ "154":"Pekinese, Pekingese, Peke",
157
+ "155":"Shih-Tzu",
158
+ "156":"Blenheim spaniel",
159
+ "157":"papillon",
160
+ "158":"toy terrier",
161
+ "159":"Rhodesian ridgeback",
162
+ "160":"Afghan hound, Afghan",
163
+ "161":"basset, basset hound",
164
+ "162":"beagle",
165
+ "163":"bloodhound, sleuthhound",
166
+ "164":"bluetick",
167
+ "165":"black-and-tan coonhound",
168
+ "166":"Walker hound, Walker foxhound",
169
+ "167":"English foxhound",
170
+ "168":"redbone",
171
+ "169":"borzoi, Russian wolfhound",
172
+ "170":"Irish wolfhound",
173
+ "171":"Italian greyhound",
174
+ "172":"whippet",
175
+ "173":"Ibizan hound, Ibizan Podenco",
176
+ "174":"Norwegian elkhound, elkhound",
177
+ "175":"otterhound, otter hound",
178
+ "176":"Saluki, gazelle hound",
179
+ "177":"Scottish deerhound, deerhound",
180
+ "178":"Weimaraner",
181
+ "179":"Staffordshire bullterrier, Staffordshire bull terrier",
182
+ "180":"American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier",
183
+ "181":"Bedlington terrier",
184
+ "182":"Border terrier",
185
+ "183":"Kerry blue terrier",
186
+ "184":"Irish terrier",
187
+ "185":"Norfolk terrier",
188
+ "186":"Norwich terrier",
189
+ "187":"Yorkshire terrier",
190
+ "188":"wire-haired fox terrier",
191
+ "189":"Lakeland terrier",
192
+ "190":"Sealyham terrier, Sealyham",
193
+ "191":"Airedale, Airedale terrier",
194
+ "192":"cairn, cairn terrier",
195
+ "193":"Australian terrier",
196
+ "194":"Dandie Dinmont, Dandie Dinmont terrier",
197
+ "195":"Boston bull, Boston terrier",
198
+ "196":"miniature schnauzer",
199
+ "197":"giant schnauzer",
200
+ "198":"standard schnauzer",
201
+ "199":"Scotch terrier, Scottish terrier, Scottie",
202
+ "200":"Tibetan terrier, chrysanthemum dog",
203
+ "201":"silky terrier, Sydney silky",
204
+ "202":"soft-coated wheaten terrier",
205
+ "203":"West Highland white terrier",
206
+ "204":"Lhasa, Lhasa apso",
207
+ "205":"flat-coated retriever",
208
+ "206":"curly-coated retriever",
209
+ "207":"golden retriever",
210
+ "208":"Labrador retriever",
211
+ "209":"Chesapeake Bay retriever",
212
+ "210":"German short-haired pointer",
213
+ "211":"vizsla, Hungarian pointer",
214
+ "212":"English setter",
215
+ "213":"Irish setter, red setter",
216
+ "214":"Gordon setter",
217
+ "215":"Brittany spaniel",
218
+ "216":"clumber, clumber spaniel",
219
+ "217":"English springer, English springer spaniel",
220
+ "218":"Welsh springer spaniel",
221
+ "219":"cocker spaniel, English cocker spaniel, cocker",
222
+ "220":"Sussex spaniel",
223
+ "221":"Irish water spaniel",
224
+ "222":"kuvasz",
225
+ "223":"schipperke",
226
+ "224":"groenendael",
227
+ "225":"malinois",
228
+ "226":"briard",
229
+ "227":"kelpie",
230
+ "228":"komondor",
231
+ "229":"Old English sheepdog, bobtail",
232
+ "230":"Shetland sheepdog, Shetland sheep dog, Shetland",
233
+ "231":"collie",
234
+ "232":"Border collie",
235
+ "233":"Bouvier des Flandres, Bouviers des Flandres",
236
+ "234":"Rottweiler",
237
+ "235":"German shepherd, German shepherd dog, German police dog, alsatian",
238
+ "236":"Doberman, Doberman pinscher",
239
+ "237":"miniature pinscher",
240
+ "238":"Greater Swiss Mountain dog",
241
+ "239":"Bernese mountain dog",
242
+ "240":"Appenzeller",
243
+ "241":"EntleBucher",
244
+ "242":"boxer",
245
+ "243":"bull mastiff",
246
+ "244":"Tibetan mastiff",
247
+ "245":"French bulldog",
248
+ "246":"Great Dane",
249
+ "247":"Saint Bernard, St Bernard",
250
+ "248":"Eskimo dog, husky",
251
+ "249":"malamute, malemute, Alaskan malamute",
252
+ "250":"Siberian husky",
253
+ "251":"dalmatian, coach dog, carriage dog",
254
+ "252":"affenpinscher, monkey pinscher, monkey dog",
255
+ "253":"basenji",
256
+ "254":"pug, pug-dog",
257
+ "255":"Leonberg",
258
+ "256":"Newfoundland, Newfoundland dog",
259
+ "257":"Great Pyrenees",
260
+ "258":"Samoyed, Samoyede",
261
+ "259":"Pomeranian",
262
+ "260":"chow, chow chow",
263
+ "261":"keeshond",
264
+ "262":"Brabancon griffon",
265
+ "263":"Pembroke, Pembroke Welsh corgi",
266
+ "264":"Cardigan, Cardigan Welsh corgi",
267
+ "265":"toy poodle",
268
+ "266":"miniature poodle",
269
+ "267":"standard poodle",
270
+ "268":"Mexican hairless",
271
+ "269":"timber wolf, grey wolf, gray wolf, Canis lupus",
272
+ "270":"white wolf, Arctic wolf, Canis lupus tundrarum",
273
+ "271":"red wolf, maned wolf, Canis rufus, Canis niger",
274
+ "272":"coyote, prairie wolf, brush wolf, Canis latrans",
275
+ "273":"dingo, warrigal, warragal, Canis dingo",
276
+ "274":"dhole, Cuon alpinus",
277
+ "275":"African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus",
278
+ "276":"hyena, hyaena",
279
+ "277":"red fox, Vulpes vulpes",
280
+ "278":"kit fox, Vulpes macrotis",
281
+ "279":"Arctic fox, white fox, Alopex lagopus",
282
+ "280":"grey fox, gray fox, Urocyon cinereoargenteus",
283
+ "281":"tabby, tabby cat",
284
+ "282":"tiger cat",
285
+ "283":"Persian cat",
286
+ "284":"Siamese cat, Siamese",
287
+ "285":"Egyptian cat",
288
+ "286":"cougar, puma, catamount, mountain lion, painter, panther, Felis concolor",
289
+ "287":"lynx, catamount",
290
+ "288":"leopard, Panthera pardus",
291
+ "289":"snow leopard, ounce, Panthera uncia",
292
+ "290":"jaguar, panther, Panthera onca, Felis onca",
293
+ "291":"lion, king of beasts, Panthera leo",
294
+ "292":"tiger, Panthera tigris",
295
+ "293":"cheetah, chetah, Acinonyx jubatus",
296
+ "294":"brown bear, bruin, Ursus arctos",
297
+ "295":"American black bear, black bear, Ursus americanus, Euarctos americanus",
298
+ "296":"ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus",
299
+ "297":"sloth bear, Melursus ursinus, Ursus ursinus",
300
+ "298":"mongoose",
301
+ "299":"meerkat, mierkat",
302
+ "300":"tiger beetle",
303
+ "301":"ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle",
304
+ "302":"ground beetle, carabid beetle",
305
+ "303":"long-horned beetle, longicorn, longicorn beetle",
306
+ "304":"leaf beetle, chrysomelid",
307
+ "305":"dung beetle",
308
+ "306":"rhinoceros beetle",
309
+ "307":"weevil",
310
+ "308":"fly",
311
+ "309":"bee",
312
+ "310":"ant, emmet, pismire",
313
+ "311":"grasshopper, hopper",
314
+ "312":"cricket",
315
+ "313":"walking stick, walkingstick, stick insect",
316
+ "314":"cockroach, roach",
317
+ "315":"mantis, mantid",
318
+ "316":"cicada, cicala",
319
+ "317":"leafhopper",
320
+ "318":"lacewing, lacewing fly",
321
+ "319":"dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk",
322
+ "320":"damselfly",
323
+ "321":"admiral",
324
+ "322":"ringlet, ringlet butterfly",
325
+ "323":"monarch, monarch butterfly, milkweed butterfly, Danaus plexippus",
326
+ "324":"cabbage butterfly",
327
+ "325":"sulphur butterfly, sulfur butterfly",
328
+ "326":"lycaenid, lycaenid butterfly",
329
+ "327":"starfish, sea star",
330
+ "328":"sea urchin",
331
+ "329":"sea cucumber, holothurian",
332
+ "330":"wood rabbit, cottontail, cottontail rabbit",
333
+ "331":"hare",
334
+ "332":"Angora, Angora rabbit",
335
+ "333":"hamster",
336
+ "334":"porcupine, hedgehog",
337
+ "335":"fox squirrel, eastern fox squirrel, Sciurus niger",
338
+ "336":"marmot",
339
+ "337":"beaver",
340
+ "338":"guinea pig, Cavia cobaya",
341
+ "339":"sorrel",
342
+ "340":"zebra",
343
+ "341":"hog, pig, grunter, squealer, Sus scrofa",
344
+ "342":"wild boar, boar, Sus scrofa",
345
+ "343":"warthog",
346
+ "344":"hippopotamus, hippo, river horse, Hippopotamus amphibius",
347
+ "345":"ox",
348
+ "346":"water buffalo, water ox, Asiatic buffalo, Bubalus bubalis",
349
+ "347":"bison",
350
+ "348":"ram, tup",
351
+ "349":"bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis",
352
+ "350":"ibex, Capra ibex",
353
+ "351":"hartebeest",
354
+ "352":"impala, Aepyceros melampus",
355
+ "353":"gazelle",
356
+ "354":"Arabian camel, dromedary, Camelus dromedarius",
357
+ "355":"llama",
358
+ "356":"weasel",
359
+ "357":"mink",
360
+ "358":"polecat, fitch, foulmart, foumart, Mustela putorius",
361
+ "359":"black-footed ferret, ferret, Mustela nigripes",
362
+ "360":"otter",
363
+ "361":"skunk, polecat, wood pussy",
364
+ "362":"badger",
365
+ "363":"armadillo",
366
+ "364":"three-toed sloth, ai, Bradypus tridactylus",
367
+ "365":"orangutan, orang, orangutang, Pongo pygmaeus",
368
+ "366":"gorilla, Gorilla gorilla",
369
+ "367":"chimpanzee, chimp, Pan troglodytes",
370
+ "368":"gibbon, Hylobates lar",
371
+ "369":"siamang, Hylobates syndactylus, Symphalangus syndactylus",
372
+ "370":"guenon, guenon monkey",
373
+ "371":"patas, hussar monkey, Erythrocebus patas",
374
+ "372":"baboon",
375
+ "373":"macaque",
376
+ "374":"langur",
377
+ "375":"colobus, colobus monkey",
378
+ "376":"proboscis monkey, Nasalis larvatus",
379
+ "377":"marmoset",
380
+ "378":"capuchin, ringtail, Cebus capucinus",
381
+ "379":"howler monkey, howler",
382
+ "380":"titi, titi monkey",
383
+ "381":"spider monkey, Ateles geoffroyi",
384
+ "382":"squirrel monkey, Saimiri sciureus",
385
+ "383":"Madagascar cat, ring-tailed lemur, Lemur catta",
386
+ "384":"indri, indris, Indri indri, Indri brevicaudatus",
387
+ "385":"Indian elephant, Elephas maximus",
388
+ "386":"African elephant, Loxodonta africana",
389
+ "387":"lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens",
390
+ "388":"giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca",
391
+ "389":"barracouta, snoek",
392
+ "390":"eel",
393
+ "391":"coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch",
394
+ "392":"rock beauty, Holocanthus tricolor",
395
+ "393":"anemone fish",
396
+ "394":"sturgeon",
397
+ "395":"gar, garfish, garpike, billfish, Lepisosteus osseus",
398
+ "396":"lionfish",
399
+ "397":"puffer, pufferfish, blowfish, globefish",
400
+ "398":"abacus",
401
+ "399":"abaya",
402
+ "400":"academic gown, academic robe, judge's robe",
403
+ "401":"accordion, piano accordion, squeeze box",
404
+ "402":"acoustic guitar",
405
+ "403":"aircraft carrier, carrier, flattop, attack aircraft carrier",
406
+ "404":"airliner",
407
+ "405":"airship, dirigible",
408
+ "406":"altar",
409
+ "407":"ambulance",
410
+ "408":"amphibian, amphibious vehicle",
411
+ "409":"analog clock",
412
+ "410":"apiary, bee house",
413
+ "411":"apron",
414
+ "412":"ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin",
415
+ "413":"assault rifle, assault gun",
416
+ "414":"backpack, back pack, knapsack, packsack, rucksack, haversack",
417
+ "415":"bakery, bakeshop, bakehouse",
418
+ "416":"balance beam, beam",
419
+ "417":"balloon",
420
+ "418":"ballpoint, ballpoint pen, ballpen, Biro",
421
+ "419":"Band Aid",
422
+ "420":"banjo",
423
+ "421":"bannister, banister, balustrade, balusters, handrail",
424
+ "422":"barbell",
425
+ "423":"barber chair",
426
+ "424":"barbershop",
427
+ "425":"barn",
428
+ "426":"barometer",
429
+ "427":"barrel, cask",
430
+ "428":"barrow, garden cart, lawn cart, wheelbarrow",
431
+ "429":"baseball",
432
+ "430":"basketball",
433
+ "431":"bassinet",
434
+ "432":"bassoon",
435
+ "433":"bathing cap, swimming cap",
436
+ "434":"bath towel",
437
+ "435":"bathtub, bathing tub, bath, tub",
438
+ "436":"beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon",
439
+ "437":"beacon, lighthouse, beacon light, pharos",
440
+ "438":"beaker",
441
+ "439":"bearskin, busby, shako",
442
+ "440":"beer bottle",
443
+ "441":"beer glass",
444
+ "442":"bell cote, bell cot",
445
+ "443":"bib",
446
+ "444":"bicycle-built-for-two, tandem bicycle, tandem",
447
+ "445":"bikini, two-piece",
448
+ "446":"binder, ring-binder",
449
+ "447":"binoculars, field glasses, opera glasses",
450
+ "448":"birdhouse",
451
+ "449":"boathouse",
452
+ "450":"bobsled, bobsleigh, bob",
453
+ "451":"bolo tie, bolo, bola tie, bola",
454
+ "452":"bonnet, poke bonnet",
455
+ "453":"bookcase",
456
+ "454":"bookshop, bookstore, bookstall",
457
+ "455":"bottlecap",
458
+ "456":"bow",
459
+ "457":"bow tie, bow-tie, bowtie",
460
+ "458":"brass, memorial tablet, plaque",
461
+ "459":"brassiere, bra, bandeau",
462
+ "460":"breakwater, groin, groyne, mole, bulwark, seawall, jetty",
463
+ "461":"breastplate, aegis, egis",
464
+ "462":"broom",
465
+ "463":"bucket, pail",
466
+ "464":"buckle",
467
+ "465":"bulletproof vest",
468
+ "466":"bullet train, bullet",
469
+ "467":"butcher shop, meat market",
470
+ "468":"cab, hack, taxi, taxicab",
471
+ "469":"caldron, cauldron",
472
+ "470":"candle, taper, wax light",
473
+ "471":"cannon",
474
+ "472":"canoe",
475
+ "473":"can opener, tin opener",
476
+ "474":"cardigan",
477
+ "475":"car mirror",
478
+ "476":"carousel, carrousel, merry-go-round, roundabout, whirligig",
479
+ "477":"carpenter's kit, tool kit",
480
+ "478":"carton",
481
+ "479":"car wheel",
482
+ "480":"cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM",
483
+ "481":"cassette",
484
+ "482":"cassette player",
485
+ "483":"castle",
486
+ "484":"catamaran",
487
+ "485":"CD player",
488
+ "486":"cello, violoncello",
489
+ "487":"cellular telephone, cellular phone, cellphone, cell, mobile phone",
490
+ "488":"chain",
491
+ "489":"chainlink fence",
492
+ "490":"chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour",
493
+ "491":"chain saw, chainsaw",
494
+ "492":"chest",
495
+ "493":"chiffonier, commode",
496
+ "494":"chime, bell, gong",
497
+ "495":"china cabinet, china closet",
498
+ "496":"Christmas stocking",
499
+ "497":"church, church building",
500
+ "498":"cinema, movie theater, movie theatre, movie house, picture palace",
501
+ "499":"cleaver, meat cleaver, chopper",
502
+ "500":"cliff dwelling",
503
+ "501":"cloak",
504
+ "502":"clog, geta, patten, sabot",
505
+ "503":"cocktail shaker",
506
+ "504":"coffee mug",
507
+ "505":"coffeepot",
508
+ "506":"coil, spiral, volute, whorl, helix",
509
+ "507":"combination lock",
510
+ "508":"computer keyboard, keypad",
511
+ "509":"confectionery, confectionary, candy store",
512
+ "510":"container ship, containership, container vessel",
513
+ "511":"convertible",
514
+ "512":"corkscrew, bottle screw",
515
+ "513":"cornet, horn, trumpet, trump",
516
+ "514":"cowboy boot",
517
+ "515":"cowboy hat, ten-gallon hat",
518
+ "516":"cradle",
519
+ "517":"crane",
520
+ "518":"crash helmet",
521
+ "519":"crate",
522
+ "520":"crib, cot",
523
+ "521":"Crock Pot",
524
+ "522":"croquet ball",
525
+ "523":"crutch",
526
+ "524":"cuirass",
527
+ "525":"dam, dike, dyke",
528
+ "526":"desk",
529
+ "527":"desktop computer",
530
+ "528":"dial telephone, dial phone",
531
+ "529":"diaper, nappy, napkin",
532
+ "530":"digital clock",
533
+ "531":"digital watch",
534
+ "532":"dining table, board",
535
+ "533":"dishrag, dishcloth",
536
+ "534":"dishwasher, dish washer, dishwashing machine",
537
+ "535":"disk brake, disc brake",
538
+ "536":"dock, dockage, docking facility",
539
+ "537":"dogsled, dog sled, dog sleigh",
540
+ "538":"dome",
541
+ "539":"doormat, welcome mat",
542
+ "540":"drilling platform, offshore rig",
543
+ "541":"drum, membranophone, tympan",
544
+ "542":"drumstick",
545
+ "543":"dumbbell",
546
+ "544":"Dutch oven",
547
+ "545":"electric fan, blower",
548
+ "546":"electric guitar",
549
+ "547":"electric locomotive",
550
+ "548":"entertainment center",
551
+ "549":"envelope",
552
+ "550":"espresso maker",
553
+ "551":"face powder",
554
+ "552":"feather boa, boa",
555
+ "553":"file, file cabinet, filing cabinet",
556
+ "554":"fireboat",
557
+ "555":"fire engine, fire truck",
558
+ "556":"fire screen, fireguard",
559
+ "557":"flagpole, flagstaff",
560
+ "558":"flute, transverse flute",
561
+ "559":"folding chair",
562
+ "560":"football helmet",
563
+ "561":"forklift",
564
+ "562":"fountain",
565
+ "563":"fountain pen",
566
+ "564":"four-poster",
567
+ "565":"freight car",
568
+ "566":"French horn, horn",
569
+ "567":"frying pan, frypan, skillet",
570
+ "568":"fur coat",
571
+ "569":"garbage truck, dustcart",
572
+ "570":"gasmask, respirator, gas helmet",
573
+ "571":"gas pump, gasoline pump, petrol pump, island dispenser",
574
+ "572":"goblet",
575
+ "573":"go-kart",
576
+ "574":"golf ball",
577
+ "575":"golfcart, golf cart",
578
+ "576":"gondola",
579
+ "577":"gong, tam-tam",
580
+ "578":"gown",
581
+ "579":"grand piano, grand",
582
+ "580":"greenhouse, nursery, glasshouse",
583
+ "581":"grille, radiator grille",
584
+ "582":"grocery store, grocery, food market, market",
585
+ "583":"guillotine",
586
+ "584":"hair slide",
587
+ "585":"hair spray",
588
+ "586":"half track",
589
+ "587":"hammer",
590
+ "588":"hamper",
591
+ "589":"hand blower, blow dryer, blow drier, hair dryer, hair drier",
592
+ "590":"hand-held computer, hand-held microcomputer",
593
+ "591":"handkerchief, hankie, hanky, hankey",
594
+ "592":"hard disc, hard disk, fixed disk",
595
+ "593":"harmonica, mouth organ, harp, mouth harp",
596
+ "594":"harp",
597
+ "595":"harvester, reaper",
598
+ "596":"hatchet",
599
+ "597":"holster",
600
+ "598":"home theater, home theatre",
601
+ "599":"honeycomb",
602
+ "600":"hook, claw",
603
+ "601":"hoopskirt, crinoline",
604
+ "602":"horizontal bar, high bar",
605
+ "603":"horse cart, horse-cart",
606
+ "604":"hourglass",
607
+ "605":"iPod",
608
+ "606":"iron, smoothing iron",
609
+ "607":"jack-o'-lantern",
610
+ "608":"jean, blue jean, denim",
611
+ "609":"jeep, landrover",
612
+ "610":"jersey, T-shirt, tee shirt",
613
+ "611":"jigsaw puzzle",
614
+ "612":"jinrikisha, ricksha, rickshaw",
615
+ "613":"joystick",
616
+ "614":"kimono",
617
+ "615":"knee pad",
618
+ "616":"knot",
619
+ "617":"lab coat, laboratory coat",
620
+ "618":"ladle",
621
+ "619":"lampshade, lamp shade",
622
+ "620":"laptop, laptop computer",
623
+ "621":"lawn mower, mower",
624
+ "622":"lens cap, lens cover",
625
+ "623":"letter opener, paper knife, paperknife",
626
+ "624":"library",
627
+ "625":"lifeboat",
628
+ "626":"lighter, light, igniter, ignitor",
629
+ "627":"limousine, limo",
630
+ "628":"liner, ocean liner",
631
+ "629":"lipstick, lip rouge",
632
+ "630":"Loafer",
633
+ "631":"lotion",
634
+ "632":"loudspeaker, speaker, speaker unit, loudspeaker system, speaker system",
635
+ "633":"loupe, jeweler's loupe",
636
+ "634":"lumbermill, sawmill",
637
+ "635":"magnetic compass",
638
+ "636":"mailbag, postbag",
639
+ "637":"mailbox, letter box",
640
+ "638":"maillot",
641
+ "639":"maillot, tank suit",
642
+ "640":"manhole cover",
643
+ "641":"maraca",
644
+ "642":"marimba, xylophone",
645
+ "643":"mask",
646
+ "644":"matchstick",
647
+ "645":"maypole",
648
+ "646":"maze, labyrinth",
649
+ "647":"measuring cup",
650
+ "648":"medicine chest, medicine cabinet",
651
+ "649":"megalith, megalithic structure",
652
+ "650":"microphone, mike",
653
+ "651":"microwave, microwave oven",
654
+ "652":"military uniform",
655
+ "653":"milk can",
656
+ "654":"minibus",
657
+ "655":"miniskirt, mini",
658
+ "656":"minivan",
659
+ "657":"missile",
660
+ "658":"mitten",
661
+ "659":"mixing bowl",
662
+ "660":"mobile home, manufactured home",
663
+ "661":"Model T",
664
+ "662":"modem",
665
+ "663":"monastery",
666
+ "664":"monitor",
667
+ "665":"moped",
668
+ "666":"mortar",
669
+ "667":"mortarboard",
670
+ "668":"mosque",
671
+ "669":"mosquito net",
672
+ "670":"motor scooter, scooter",
673
+ "671":"mountain bike, all-terrain bike, off-roader",
674
+ "672":"mountain tent",
675
+ "673":"mouse, computer mouse",
676
+ "674":"mousetrap",
677
+ "675":"moving van",
678
+ "676":"muzzle",
679
+ "677":"nail",
680
+ "678":"neck brace",
681
+ "679":"necklace",
682
+ "680":"nipple",
683
+ "681":"notebook, notebook computer",
684
+ "682":"obelisk",
685
+ "683":"oboe, hautboy, hautbois",
686
+ "684":"ocarina, sweet potato",
687
+ "685":"odometer, hodometer, mileometer, milometer",
688
+ "686":"oil filter",
689
+ "687":"organ, pipe organ",
690
+ "688":"oscilloscope, scope, cathode-ray oscilloscope, CRO",
691
+ "689":"overskirt",
692
+ "690":"oxcart",
693
+ "691":"oxygen mask",
694
+ "692":"packet",
695
+ "693":"paddle, boat paddle",
696
+ "694":"paddlewheel, paddle wheel",
697
+ "695":"padlock",
698
+ "696":"paintbrush",
699
+ "697":"pajama, pyjama, pj's, jammies",
700
+ "698":"palace",
701
+ "699":"panpipe, pandean pipe, syrinx",
702
+ "700":"paper towel",
703
+ "701":"parachute, chute",
704
+ "702":"parallel bars, bars",
705
+ "703":"park bench",
706
+ "704":"parking meter",
707
+ "705":"passenger car, coach, carriage",
708
+ "706":"patio, terrace",
709
+ "707":"pay-phone, pay-station",
710
+ "708":"pedestal, plinth, footstall",
711
+ "709":"pencil box, pencil case",
712
+ "710":"pencil sharpener",
713
+ "711":"perfume, essence",
714
+ "712":"Petri dish",
715
+ "713":"photocopier",
716
+ "714":"pick, plectrum, plectron",
717
+ "715":"pickelhaube",
718
+ "716":"picket fence, paling",
719
+ "717":"pickup, pickup truck",
720
+ "718":"pier",
721
+ "719":"piggy bank, penny bank",
722
+ "720":"pill bottle",
723
+ "721":"pillow",
724
+ "722":"ping-pong ball",
725
+ "723":"pinwheel",
726
+ "724":"pirate, pirate ship",
727
+ "725":"pitcher, ewer",
728
+ "726":"plane, carpenter's plane, woodworking plane",
729
+ "727":"planetarium",
730
+ "728":"plastic bag",
731
+ "729":"plate rack",
732
+ "730":"plow, plough",
733
+ "731":"plunger, plumber's helper",
734
+ "732":"Polaroid camera, Polaroid Land camera",
735
+ "733":"pole",
736
+ "734":"police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria",
737
+ "735":"poncho",
738
+ "736":"pool table, billiard table, snooker table",
739
+ "737":"pop bottle, soda bottle",
740
+ "738":"pot, flowerpot",
741
+ "739":"potter's wheel",
742
+ "740":"power drill",
743
+ "741":"prayer rug, prayer mat",
744
+ "742":"printer",
745
+ "743":"prison, prison house",
746
+ "744":"projectile, missile",
747
+ "745":"projector",
748
+ "746":"puck, hockey puck",
749
+ "747":"punching bag, punch bag, punching ball, punchball",
750
+ "748":"purse",
751
+ "749":"quill, quill pen",
752
+ "750":"quilt, comforter, comfort, puff",
753
+ "751":"racer, race car, racing car",
754
+ "752":"racket, racquet",
755
+ "753":"radiator",
756
+ "754":"radio, wireless",
757
+ "755":"radio telescope, radio reflector",
758
+ "756":"rain barrel",
759
+ "757":"recreational vehicle, RV, R.V.",
760
+ "758":"reel",
761
+ "759":"reflex camera",
762
+ "760":"refrigerator, icebox",
763
+ "761":"remote control, remote",
764
+ "762":"restaurant, eating house, eating place, eatery",
765
+ "763":"revolver, six-gun, six-shooter",
766
+ "764":"rifle",
767
+ "765":"rocking chair, rocker",
768
+ "766":"rotisserie",
769
+ "767":"rubber eraser, rubber, pencil eraser",
770
+ "768":"rugby ball",
771
+ "769":"rule, ruler",
772
+ "770":"running shoe",
773
+ "771":"safe",
774
+ "772":"safety pin",
775
+ "773":"saltshaker, salt shaker",
776
+ "774":"sandal",
777
+ "775":"sarong",
778
+ "776":"sax, saxophone",
779
+ "777":"scabbard",
780
+ "778":"scale, weighing machine",
781
+ "779":"school bus",
782
+ "780":"schooner",
783
+ "781":"scoreboard",
784
+ "782":"screen, CRT screen",
785
+ "783":"screw",
786
+ "784":"screwdriver",
787
+ "785":"seat belt, seatbelt",
788
+ "786":"sewing machine",
789
+ "787":"shield, buckler",
790
+ "788":"shoe shop, shoe-shop, shoe store",
791
+ "789":"shoji",
792
+ "790":"shopping basket",
793
+ "791":"shopping cart",
794
+ "792":"shovel",
795
+ "793":"shower cap",
796
+ "794":"shower curtain",
797
+ "795":"ski",
798
+ "796":"ski mask",
799
+ "797":"sleeping bag",
800
+ "798":"slide rule, slipstick",
801
+ "799":"sliding door",
802
+ "800":"slot, one-armed bandit",
803
+ "801":"snorkel",
804
+ "802":"snowmobile",
805
+ "803":"snowplow, snowplough",
806
+ "804":"soap dispenser",
807
+ "805":"soccer ball",
808
+ "806":"sock",
809
+ "807":"solar dish, solar collector, solar furnace",
810
+ "808":"sombrero",
811
+ "809":"soup bowl",
812
+ "810":"space bar",
813
+ "811":"space heater",
814
+ "812":"space shuttle",
815
+ "813":"spatula",
816
+ "814":"speedboat",
817
+ "815":"spider web, spider's web",
818
+ "816":"spindle",
819
+ "817":"sports car, sport car",
820
+ "818":"spotlight, spot",
821
+ "819":"stage",
822
+ "820":"steam locomotive",
823
+ "821":"steel arch bridge",
824
+ "822":"steel drum",
825
+ "823":"stethoscope",
826
+ "824":"stole",
827
+ "825":"stone wall",
828
+ "826":"stopwatch, stop watch",
829
+ "827":"stove",
830
+ "828":"strainer",
831
+ "829":"streetcar, tram, tramcar, trolley, trolley car",
832
+ "830":"stretcher",
833
+ "831":"studio couch, day bed",
834
+ "832":"stupa, tope",
835
+ "833":"submarine, pigboat, sub, U-boat",
836
+ "834":"suit, suit of clothes",
837
+ "835":"sundial",
838
+ "836":"sunglass",
839
+ "837":"sunglasses, dark glasses, shades",
840
+ "838":"sunscreen, sunblock, sun blocker",
841
+ "839":"suspension bridge",
842
+ "840":"swab, swob, mop",
843
+ "841":"sweatshirt",
844
+ "842":"swimming trunks, bathing trunks",
845
+ "843":"swing",
846
+ "844":"switch, electric switch, electrical switch",
847
+ "845":"syringe",
848
+ "846":"table lamp",
849
+ "847":"tank, army tank, armored combat vehicle, armoured combat vehicle",
850
+ "848":"tape player",
851
+ "849":"teapot",
852
+ "850":"teddy, teddy bear",
853
+ "851":"television, television system",
854
+ "852":"tennis ball",
855
+ "853":"thatch, thatched roof",
856
+ "854":"theater curtain, theatre curtain",
857
+ "855":"thimble",
858
+ "856":"thresher, thrasher, threshing machine",
859
+ "857":"throne",
860
+ "858":"tile roof",
861
+ "859":"toaster",
862
+ "860":"tobacco shop, tobacconist shop, tobacconist",
863
+ "861":"toilet seat",
864
+ "862":"torch",
865
+ "863":"totem pole",
866
+ "864":"tow truck, tow car, wrecker",
867
+ "865":"toyshop",
868
+ "866":"tractor",
869
+ "867":"trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi",
870
+ "868":"tray",
871
+ "869":"trench coat",
872
+ "870":"tricycle, trike, velocipede",
873
+ "871":"trimaran",
874
+ "872":"tripod",
875
+ "873":"triumphal arch",
876
+ "874":"trolleybus, trolley coach, trackless trolley",
877
+ "875":"trombone",
878
+ "876":"tub, vat",
879
+ "877":"turnstile",
880
+ "878":"typewriter keyboard",
881
+ "879":"umbrella",
882
+ "880":"unicycle, monocycle",
883
+ "881":"upright, upright piano",
884
+ "882":"vacuum, vacuum cleaner",
885
+ "883":"vase",
886
+ "884":"vault",
887
+ "885":"velvet",
888
+ "886":"vending machine",
889
+ "887":"vestment",
890
+ "888":"viaduct",
891
+ "889":"violin, fiddle",
892
+ "890":"volleyball",
893
+ "891":"waffle iron",
894
+ "892":"wall clock",
895
+ "893":"wallet, billfold, notecase, pocketbook",
896
+ "894":"wardrobe, closet, press",
897
+ "895":"warplane, military plane",
898
+ "896":"washbasin, handbasin, washbowl, lavabo, wash-hand basin",
899
+ "897":"washer, automatic washer, washing machine",
900
+ "898":"water bottle",
901
+ "899":"water jug",
902
+ "900":"water tower",
903
+ "901":"whiskey jug",
904
+ "902":"whistle",
905
+ "903":"wig",
906
+ "904":"window screen",
907
+ "905":"window shade",
908
+ "906":"Windsor tie",
909
+ "907":"wine bottle",
910
+ "908":"wing",
911
+ "909":"wok",
912
+ "910":"wooden spoon",
913
+ "911":"wool, woolen, woollen",
914
+ "912":"worm fence, snake fence, snake-rail fence, Virginia fence",
915
+ "913":"wreck",
916
+ "914":"yawl",
917
+ "915":"yurt",
918
+ "916":"web site, website, internet site, site",
919
+ "917":"comic book",
920
+ "918":"crossword puzzle, crossword",
921
+ "919":"street sign",
922
+ "920":"traffic light, traffic signal, stoplight",
923
+ "921":"book jacket, dust cover, dust jacket, dust wrapper",
924
+ "922":"menu",
925
+ "923":"plate",
926
+ "924":"guacamole",
927
+ "925":"consomme",
928
+ "926":"hot pot, hotpot",
929
+ "927":"trifle",
930
+ "928":"ice cream, icecream",
931
+ "929":"ice lolly, lolly, lollipop, popsicle",
932
+ "930":"French loaf",
933
+ "931":"bagel, beigel",
934
+ "932":"pretzel",
935
+ "933":"cheeseburger",
936
+ "934":"hotdog, hot dog, red hot",
937
+ "935":"mashed potato",
938
+ "936":"head cabbage",
939
+ "937":"broccoli",
940
+ "938":"cauliflower",
941
+ "939":"zucchini, courgette",
942
+ "940":"spaghetti squash",
943
+ "941":"acorn squash",
944
+ "942":"butternut squash",
945
+ "943":"cucumber, cuke",
946
+ "944":"artichoke, globe artichoke",
947
+ "945":"bell pepper",
948
+ "946":"cardoon",
949
+ "947":"mushroom",
950
+ "948":"Granny Smith",
951
+ "949":"strawberry",
952
+ "950":"orange",
953
+ "951":"lemon",
954
+ "952":"fig",
955
+ "953":"pineapple, ananas",
956
+ "954":"banana",
957
+ "955":"jackfruit, jak, jack",
958
+ "956":"custard apple",
959
+ "957":"pomegranate",
960
+ "958":"hay",
961
+ "959":"carbonara",
962
+ "960":"chocolate sauce, chocolate syrup",
963
+ "961":"dough",
964
+ "962":"meat loaf, meatloaf",
965
+ "963":"pizza, pizza pie",
966
+ "964":"potpie",
967
+ "965":"burrito",
968
+ "966":"red wine",
969
+ "967":"espresso",
970
+ "968":"cup",
971
+ "969":"eggnog",
972
+ "970":"alp",
973
+ "971":"bubble",
974
+ "972":"cliff, drop, drop-off",
975
+ "973":"coral reef",
976
+ "974":"geyser",
977
+ "975":"lakeside, lakeshore",
978
+ "976":"promontory, headland, head, foreland",
979
+ "977":"sandbar, sand bar",
980
+ "978":"seashore, coast, seacoast, sea-coast",
981
+ "979":"valley, vale",
982
+ "980":"volcano",
983
+ "981":"ballplayer, baseball player",
984
+ "982":"groom, bridegroom",
985
+ "983":"scuba diver",
986
+ "984":"rapeseed",
987
+ "985":"daisy",
988
+ "986":"yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum",
989
+ "987":"corn",
990
+ "988":"acorn",
991
+ "989":"hip, rose hip, rosehip",
992
+ "990":"buckeye, horse chestnut, conker",
993
+ "991":"coral fungus",
994
+ "992":"agaric",
995
+ "993":"gyromitra",
996
+ "994":"stinkhorn, carrion fungus",
997
+ "995":"earthstar",
998
+ "996":"hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa",
999
+ "997":"bolete",
1000
+ "998":"ear, spike, capitulum",
1001
+ "999":"toilet tissue, toilet paper, bathroom tissue"
1002
+ }
backbone/.ipynb_checkpoints/__init__-checkpoint.py ADDED
@@ -0,0 +1,2 @@
 
 
1
+ from .vit_model import create_name_vit
2
+ from .classification import ClassificationModel
backbone/.ipynb_checkpoints/classification-checkpoint.py ADDED
File without changes
backbone/.ipynb_checkpoints/layers-checkpoint.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+
3
+ class Identity(tf.keras.layers.Layer):
4
+ def __init__(self, name):
5
+ super(Identity, self).__init__(name=name)
6
+
7
+ def call(self, x):
8
+ return x
backbone/.ipynb_checkpoints/vit_model-checkpoint.py ADDED
File without changes
backbone/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
1
+ from .vit_model import create_name_vit
2
+ from .classification import ClassificationModel
backbone/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (254 Bytes). View file
backbone/__pycache__/classification.cpython-38.pyc ADDED
Binary file (4.93 kB). View file
backbone/__pycache__/layers.cpython-38.pyc ADDED
Binary file (663 Bytes). View file
backbone/__pycache__/vit_model.cpython-38.pyc ADDED
Binary file (7.43 kB). View file
backbone/classification.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://github.com/tensorflow/models/blob/master/official/vision/beta/modeling/classification_model.py
2
+
3
+ """Build classification models."""
4
+
5
+ from typing import Any, Mapping, Optional, Union
6
+ # Import libraries
7
+ import tensorflow as tf
8
+ import re
9
+
10
+
11
+ class ClassificationModel(tf.keras.Model):
12
+ """A classification class builder."""
13
+
14
+ def __init__(
15
+ self,
16
+ backbone: tf.keras.Model,
17
+ num_classes: int,
18
+ input_specs: tf.keras.layers.InputSpec = tf.keras.layers.InputSpec(shape=[None, None, None, 3]),
19
+ dropout_rate: float = 0.0,
20
+ kernel_initializer: Union[str, tf.keras.initializers.Initializer] = 'random_uniform',
21
+ skip_logits_layer: bool = False,
22
+ weight_decay: float = 0.0,
23
+ clip_grad_norm: float = 0.0,
24
+ **kwargs):
25
+ """Classification initialization function.
26
+ Args:
27
+ backbone: a backbone network.
28
+ num_classes: `int` number of classes in classification task.
29
+ input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
30
+ dropout_rate: `float` rate for dropout regularization.
31
+ kernel_initializer: kernel initializer for the dense layer.
32
+ kernel_regularizer: tf.keras.regularizers.Regularizer object. Default to
33
+ None.
34
+ bias_regularizer: tf.keras.regularizers.Regularizer object. Default to
35
+ None.
36
+ skip_logits_layer: `bool`, whether to skip the prediction layer.
37
+ **kwargs: keyword arguments to be passed.
38
+ """
39
+ inputs = tf.keras.Input(shape=input_specs.shape[1:], name=input_specs.name)
40
+ outputs = backbone(inputs)
41
+
42
+ outputs = tf.keras.layers.GlobalAveragePooling2D()(outputs)
43
+ if not skip_logits_layer:
44
+ if dropout_rate is not None and dropout_rate > 0:
45
+ outputs = tf.keras.layers.Dropout(dropout_rate)(outputs)
46
+ outputs = tf.keras.layers.Dense(
47
+ num_classes,
48
+ kernel_initializer=kernel_initializer,
49
+ bias_initializer=tf.keras.initializers.Constant(value=-10.0))(outputs)
50
+
51
+ super(ClassificationModel, self).__init__(inputs=inputs, outputs=outputs, **kwargs)
52
+
53
+ self._config_dict = {
54
+ 'backbone': backbone,
55
+ 'num_classes': num_classes,
56
+ 'input_specs': input_specs,
57
+ 'dropout_rate': dropout_rate,
58
+ 'kernel_initializer': kernel_initializer,
59
+ 'weight_decay': weight_decay,
60
+ 'clip_grad_norm': clip_grad_norm,
61
+ }
62
+ self._input_specs = input_specs
63
+ self._backbone = backbone
64
+ self._weight_decay = weight_decay
65
+ self._clip_grad_norm = clip_grad_norm
66
+
67
+ def _reg_l2_loss(self, weight_decay, regex=r'.*(kernel|weight):0$'):
68
+ """Return regularization l2 loss loss."""
69
+ var_match = re.compile(regex)
70
+ return weight_decay * tf.add_n([
71
+ tf.nn.l2_loss(v)
72
+ for v in self.trainable_variables
73
+ if var_match.match(v.name)
74
+ ])
75
+
76
+ def train_step(self, data):
77
+ features, labels = data
78
+ images, labels = features['image'], labels['label']
79
+
80
+ with tf.GradientTape() as tape:
81
+ pred = self(images, training=True)
82
+ pred = tf.cast(pred, tf.float32)
83
+ loss = self.compiled_loss(
84
+ labels,
85
+ pred,
86
+ regularization_losses=[self._reg_l2_loss(self._weight_decay)])
87
+
88
+ self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
89
+ self.compiled_metrics.update_state(labels, pred)
90
+ return {m.name: m.result() for m in self.metrics}
91
+
92
+ def test_step(self, data):
93
+ features, labels = data
94
+ images, labels = features['image'], labels['label']
95
+ pred = self(images, training=False)
96
+ pred = tf.cast(pred, tf.float32)
97
+
98
+ self.compiled_loss(
99
+ labels,
100
+ pred,
101
+ regularization_losses=[self._reg_l2_loss(self._weight_decay)])
102
+
103
+ self.compiled_metrics.update_state(labels, pred)
104
+ return {m.name: m.result() for m in self.metrics}
105
+
106
+ @property
107
+ def checkpoint_items(self) -> Mapping[str, tf.keras.Model]:
108
+ """Returns a dictionary of items to be additionally checkpointed."""
109
+ return dict(backbone=self.backbone)
110
+
111
+ @property
112
+ def backbone(self) -> tf.keras.Model:
113
+ return self._backbone
114
+
115
+ def get_config(self) -> Mapping[str, Any]:
116
+ return self._config_dict
117
+
118
+ @classmethod
119
+ def from_config(cls, config, custom_objects=None):
120
+ return cls(**config)
backbone/layers.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+
3
+ class Identity(tf.keras.layers.Layer):
4
+ def __init__(self, name):
5
+ super(Identity, self).__init__(name=name)
6
+
7
+ def call(self, x):
8
+ return x
backbone/vit_model.py ADDED
@@ -0,0 +1,296 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ import tensorflow_addons as tfa
3
+ import math
4
+ from tensorflow.keras.layers import (
5
+ LayerNormalization,
6
+ )
7
+ from .layers import Identity
8
+
9
+ import numpy as np
10
+ import logging
11
+
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ class MultiHeadSelfAttention(tf.keras.layers.Layer):
16
+ def __init__(self, embed_dim, num_heads=8, attn_drop_rate=0.0, proj_drop=0.0):
17
+ super(MultiHeadSelfAttention, self).__init__()
18
+ self.embed_dim = embed_dim
19
+ self.num_heads = num_heads
20
+ if embed_dim % num_heads != 0:
21
+ raise ValueError(
22
+ f"embedding dimension = {embed_dim} should be divisible by number of heads = {num_heads}"
23
+ )
24
+ self.projection_dim = embed_dim // num_heads
25
+
26
+ self.query_dense = tf.keras.layers.Dense(
27
+ units=embed_dim,
28
+ kernel_initializer=tf.keras.initializers.GlorotNormal(),
29
+ bias_initializer="zeros",
30
+ )
31
+ self.key_dense = tf.keras.layers.Dense(
32
+ units=embed_dim,
33
+ kernel_initializer=tf.keras.initializers.GlorotNormal(),
34
+ bias_initializer="zeros",
35
+ )
36
+ self.attn_drop = tf.keras.layers.Dropout(rate=attn_drop_rate)
37
+ self.value_dense = tf.keras.layers.Dense(
38
+ units=embed_dim,
39
+ kernel_initializer=tf.keras.initializers.GlorotNormal(),
40
+ bias_initializer="zeros",
41
+ )
42
+ self.combine_heads = tf.keras.layers.Dense(
43
+ units=embed_dim,
44
+ kernel_initializer=tf.keras.initializers.GlorotNormal(),
45
+ bias_initializer="zeros",
46
+ )
47
+ self.proj_drop = tf.keras.layers.Dropout(rate=proj_drop)
48
+
49
+ def attention(self, query, key, value):
50
+ score = tf.matmul(query, key, transpose_b=True)
51
+ dim_key = tf.cast(tf.shape(key)[-1], score.dtype)
52
+ scaled_score = score / tf.math.sqrt(dim_key)
53
+ weights = tf.nn.softmax(scaled_score, axis=-1)
54
+ weights = self.attn_drop(weights)
55
+ output = tf.matmul(weights, value)
56
+ return output, weights
57
+
58
+ def separate_heads(self, x, batch_size):
59
+ x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
60
+ return tf.transpose(x, perm=[0, 2, 1, 3])
61
+
62
+ def call(self, inputs, training):
63
+ batch_size = tf.shape(inputs)[0]
64
+ query = self.query_dense(inputs)
65
+ key = self.key_dense(inputs)
66
+ value = self.value_dense(inputs)
67
+
68
+ query = self.separate_heads(query, batch_size)
69
+ key = self.separate_heads(key, batch_size)
70
+ value = self.separate_heads(value, batch_size)
71
+
72
+ attention, weights = self.attention(query, key, value)
73
+ attention = tf.transpose(attention, perm=[0, 2, 1, 3])
74
+ concat_attention = tf.reshape(attention, (batch_size, -1, self.embed_dim))
75
+ output = self.combine_heads(concat_attention)
76
+ output = self.proj_drop(output, training=training)
77
+ return output
78
+
79
+ def get_config(self):
80
+ config = super().get_config()
81
+ config.update(
82
+ {
83
+ "num_heads": self.num_heads,
84
+ "embed_dim": self.embed_dim,
85
+ }
86
+ )
87
+ return config
88
+
89
+ @classmethod
90
+ def from_config(cls, config):
91
+ return cls(**config)
92
+
93
+
94
+ class TransformerBlock(tf.keras.layers.Layer):
95
+ def __init__(
96
+ self,
97
+ embed_dim,
98
+ num_heads,
99
+ mlp_dim,
100
+ drop_rate,
101
+ attn_drop_rate,
102
+ name="encoderblock",
103
+ ):
104
+ super(TransformerBlock, self).__init__(name=name)
105
+
106
+ self.att = MultiHeadSelfAttention(
107
+ embed_dim=embed_dim,
108
+ num_heads=num_heads,
109
+ attn_drop_rate=attn_drop_rate,
110
+ proj_drop=drop_rate,
111
+ )
112
+ self.mlp = tf.keras.Sequential(
113
+ [
114
+ tf.keras.layers.Dense(
115
+ units=mlp_dim,
116
+ activation="linear",
117
+ kernel_initializer=tf.keras.initializers.GlorotNormal(),
118
+ bias_initializer=tf.keras.initializers.RandomNormal(
119
+ mean=0.0, stddev=1e-6
120
+ ),
121
+ ),
122
+ tf.keras.layers.Lambda(
123
+ lambda x: tf.keras.activations.gelu(x, approximate=True)
124
+ ),
125
+ tf.keras.layers.Dropout(rate=drop_rate),
126
+ tf.keras.layers.Dense(
127
+ units=embed_dim,
128
+ kernel_initializer=tf.keras.initializers.GlorotUniform(),
129
+ bias_initializer=tf.keras.initializers.RandomNormal(
130
+ mean=0.0, stddev=1e-6
131
+ ),
132
+ ),
133
+ tf.keras.layers.Dropout(rate=drop_rate),
134
+ ]
135
+ )
136
+ self.layernorm1 = LayerNormalization(epsilon=1e-6)
137
+ self.layernorm2 = LayerNormalization(epsilon=1e-6)
138
+
139
+
140
+ def call(self, inputs, training):
141
+ inputs_norm = self.layernorm1(inputs)
142
+ attn_output = self.att(inputs_norm)
143
+
144
+ out1 = attn_output + inputs
145
+
146
+ out1_norm = self.layernorm2(out1)
147
+ mlp_output = self.mlp(out1_norm)
148
+ return out1 + mlp_output
149
+
150
+ def get_config(self):
151
+ config = super().get_config()
152
+ return config
153
+
154
+ @classmethod
155
+ def from_config(cls, config):
156
+ return cls(**config)
157
+
158
+
159
+ class VisionTransformer(tf.keras.Model):
160
+ def __init__(
161
+ self,
162
+ image_size,
163
+ patch_size,
164
+ num_layers,
165
+ hidden_size,
166
+ num_heads,
167
+ mlp_dim,
168
+ representation_size=None,
169
+ channels=3,
170
+ dropout_rate=0.1,
171
+ attention_dropout_rate=0.0,
172
+ num_classes=None,
173
+ ):
174
+ super(VisionTransformer, self).__init__()
175
+ num_patches = (image_size // patch_size) ** 2
176
+ self.patch_dim = channels * patch_size ** 2
177
+
178
+ self.patch_size = patch_size
179
+ self.d_model = hidden_size
180
+ self.num_layers = num_layers
181
+
182
+ self.class_emb = self.add_weight(
183
+ "class_emb",
184
+ shape=(1, 1, hidden_size),
185
+ initializer=tf.keras.initializers.Zeros(),
186
+ trainable=True,
187
+ )
188
+
189
+ self.pos_emb = self.add_weight(
190
+ "pos_emb",
191
+ shape=(1, num_patches + 1, hidden_size),
192
+ initializer=tf.keras.initializers.RandomNormal(
193
+ mean=0.0, stddev=0.02, seed=None
194
+ ),
195
+ trainable=True,
196
+ )
197
+ self.pos_drop = tf.keras.layers.Dropout(rate=dropout_rate, name="pos_drop")
198
+
199
+ self.embedding = tf.keras.layers.Conv2D(
200
+ filters=hidden_size,
201
+ kernel_size=self.patch_size,
202
+ strides=self.patch_size,
203
+ padding="valid",
204
+ name="embedding",
205
+ )
206
+
207
+ self.enc_layers = [
208
+ TransformerBlock(
209
+ embed_dim=hidden_size,
210
+ num_heads=num_heads,
211
+ mlp_dim=mlp_dim,
212
+ drop_rate=dropout_rate,
213
+ attn_drop_rate=attention_dropout_rate,
214
+ name=f"encoderblock_{i}",
215
+ )
216
+ for i in range(num_layers)
217
+ ]
218
+
219
+ self.norm = LayerNormalization(epsilon=1e-6, name="encoder_nrom")
220
+
221
+ self.extract_token = tf.keras.layers.Lambda(
222
+ lambda x: x[:, 0], name="extract_token"
223
+ )
224
+
225
+ self.representation = (
226
+ tf.keras.layers.Dense(
227
+ units=representation_size,
228
+ activation="tanh",
229
+ name="pre_logits",
230
+ )
231
+ if representation_size != 0
232
+ else Identity(name=f"pre_logits")
233
+ )
234
+
235
+
236
+
237
+ def call(self, x, training):
238
+ batch_size = tf.shape(x)[0]
239
+ x = self.embedding(x)
240
+ x = tf.reshape(x, [batch_size, -1, self.d_model])
241
+
242
+ class_emb = tf.broadcast_to(self.class_emb, [batch_size, 1, self.d_model])
243
+ # B x (N + 1) x d_model
244
+ x = tf.concat([tf.cast(class_emb, x.dtype), x], axis=1)
245
+ x = x + tf.cast(self.pos_emb, x.dtype)
246
+ # https://github.com/google-research/vision_transformer/blob/39c905d2caf96a4306c9d78f05df36ddb3eb8ecb/vit_jax/models.py#L192
247
+ x = self.pos_drop(x, training=training)
248
+
249
+ for layer in self.enc_layers:
250
+ x = layer(x, training)
251
+
252
+ x = self.norm(x)
253
+
254
+ # First (class token) is used for classification
255
+ x = self.extract_token(x)
256
+
257
+ x = self.representation(x)
258
+
259
+ return x[:, tf.newaxis, tf.newaxis, :]
260
+
261
+
262
+ KNOWN_MODELS = {
263
+ "ti": {
264
+ "num_layers": 12,
265
+ "hidden_size": 192,
266
+ "num_heads": 3,
267
+ "mlp_dim": 768,
268
+ },
269
+ "s": {
270
+ "num_layers": 12,
271
+ "hidden_size": 384,
272
+ "num_heads": 6,
273
+ "mlp_dim": 1536,
274
+ },
275
+ "b": {
276
+ "num_layers": 12,
277
+ "hidden_size": 768,
278
+ "num_heads": 12,
279
+ "mlp_dim": 3072,
280
+ },
281
+ "l": {
282
+ "num_layers": 24,
283
+ "hidden_size": 1024,
284
+ "num_heads": 16,
285
+ "mlp_dim": 4096,
286
+ },
287
+ }
288
+
289
+
290
+ def create_name_vit(architecture_name, **kwargs):
291
+ base, patch_size = [l.lower() for l in architecture_name.split("-")[-1].split("/")]
292
+ return VisionTransformer(
293
+ patch_size=int(patch_size),
294
+ **KNOWN_MODELS[base],
295
+ **kwargs,
296
+ )
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
1
+ # pip install -r requirements.txt
2
+
3
+ numpy>=1.18.5
4
+ tensorflow>=2.05
5
+ opencv-python
utils.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ import numpy as np
3
+ import scipy.ndimage
4
+ import logging
5
+
6
+ def set_mixed_precision_policy(strategy: tf.distribute.Strategy, use_mixed_precision: bool = True):
7
+ if use_mixed_precision:
8
+ if isinstance(strategy, tf.distribute.TPUStrategy):
9
+ tf.keras.mixed_precision.set_global_policy('mixed_bfloat16')
10
+ else:
11
+ # TODO; tf.keras.mixed_precision.LossScaleOptimizer
12
+ tf.keras.mixed_precision.set_global_policy('mixed_float16')
13
+ else:
14
+ tf.keras.mixed_precision.set_global_policy('float32')
15
+
16
+
17
+ def set_pretrained_pos_embed_for_vit(backbone, ckpt_path):
18
+ reader = tf.train.load_checkpoint(ckpt_path)
19
+ var_shape_map = reader.get_variable_to_shape_map()
20
+ key = [key for key in var_shape_map if key.startswith('backbone/pos_emb') and not 'optimizer' in key]
21
+ assert len(key) == 1, "cannot find positional embedding layer ('pos_emb')"
22
+ posemb = reader.get_tensor(key[0])
23
+ posemb_new = backbone.pos_emb.numpy()
24
+ logging.info(f"load pretrained: resized variant: {posemb.shape} to {posemb_new.shape}")
25
+
26
+ if posemb.shape[1] != posemb_new.shape[1]:
27
+ ntok_new = posemb_new.shape[1] - 1
28
+ posemb_tok, posemb_grid = posemb[:, :1], posemb[0, 1:]
29
+
30
+ gs_old = int(np.sqrt(len(posemb_grid)))
31
+ gs_new = int(np.sqrt(ntok_new))
32
+ logging.info(f"load pretrained: grid-size from {gs_old} to {gs_new}")
33
+ posemb_grid = posemb_grid.reshape(gs_old, gs_old, -1)
34
+
35
+ zoom = (gs_new / gs_old, gs_new / gs_old, 1)
36
+ posemb_grid = scipy.ndimage.zoom(posemb_grid, zoom, order=1)
37
+ posemb_grid = posemb_grid.reshape(1, gs_new * gs_new, -1)
38
+ embedding_weights = tf.convert_to_tensor(
39
+ np.concatenate([posemb_tok, posemb_grid], axis=1)
40
+ )
41
+ else:
42
+ embedding_weights = posemb
43
+ backbone.pos_emb.assign(embedding_weights)
weights/.DS_Store ADDED
Binary file (6.15 kB). View file
weights/vit_l16_384/.DS_Store ADDED
Binary file (6.15 kB). View file
weights/vit_l16_384/model-weights.data-00000-of-00001 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3be15c56904f8b443bbd8eb07e0a4723227b88fde194b6a4ec0831506046c145
3
+ size 2437847991
weights/vit_l16_384/model-weights.index ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4948ae4dc542254d66fe6d6e71bbb2b504dc253057cb92f9f097a8f78b3013db
3
+ size 13703