SakuraD commited on
Commit
62e7390
1 Parent(s): 63a5f97
Files changed (9) hide show
  1. README.md +38 -5
  2. app.py +65 -0
  3. cat.png +0 -0
  4. dog.png +0 -0
  5. imagenet_class_index.py +1002 -0
  6. library.jpeg +0 -0
  7. panda.png +0 -0
  8. requirements.txt +6 -0
  9. uniformer.py +366 -0
README.md CHANGED
@@ -1,13 +1,46 @@
1
  ---
2
  title: Uniformer_image_demo
3
- emoji: 🏃
4
- colorFrom: pink
5
- colorTo: green
6
  sdk: gradio
7
- sdk_version: 3.0.3
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: Uniformer_image_demo
3
+ emoji: 🏢
4
+ colorFrom: blue
5
+ colorTo: indigo
6
  sdk: gradio
 
7
  app_file: app.py
8
  pinned: false
9
  license: mit
10
  ---
11
 
12
+ # Configuration
13
+
14
+ `title`: _string_
15
+ Display title for the Space
16
+
17
+ `emoji`: _string_
18
+ Space emoji (emoji-only character allowed)
19
+
20
+ `colorFrom`: _string_
21
+ Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
22
+
23
+ `colorTo`: _string_
24
+ Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
25
+
26
+ `sdk`: _string_
27
+ Can be either `gradio`, `streamlit`, or `static`
28
+
29
+ `sdk_version` : _string_
30
+ Only applicable for `streamlit` SDK.
31
+ See [doc](https://hf.co/docs/hub/spaces) for more info on supported versions.
32
+
33
+ `app_file`: _string_
34
+ Path to your main application file (which contains either `gradio` or `streamlit` Python code, or `static` html code).
35
+ Path is relative to the root of the repository.
36
+
37
+ `models`: _List[string]_
38
+ HF model IDs (like "gpt2" or "deepset/roberta-base-squad2") used in the Space.
39
+ Will be parsed automatically from your code if not specified here.
40
+
41
+ `datasets`: _List[string]_
42
+ HF dataset IDs (like "common_voice" or "oscar-corpus/OSCAR-2109") used in the Space.
43
+ Will be parsed automatically from your code if not specified here.
44
+
45
+ `pinned`: _boolean_
46
+ Whether the Space stays on top of your list.
app.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import torch
4
+ import torch.nn.functional as F
5
+ import torchvision.transforms as T
6
+ from uniformer import uniformer_small
7
+ from imagenet_class_index import imagenet_classnames
8
+
9
+ import gradio as gr
10
+ from huggingface_hub import hf_hub_download
11
+
12
+ # Device on which to run the model
13
+ # Set to cuda to load on GPU
14
+ device = "cpu"
15
+ # os.system("wget https://cdn-lfs.huggingface.co/Andy1621/uniformer/fd192c31f8bd77670de8f171111bd51f56fd87e6aea45043ab2edc181e1fa775")
16
+ model_path = hf_hub_download(repo_id="Andy1621/uniformer", filename="uniformer_small_in1k.pth")
17
+ # Pick a pretrained model
18
+ model = uniformer_small()
19
+ # state_dict = torch.load('fd192c31f8bd77670de8f171111bd51f56fd87e6aea45043ab2edc181e1fa775', map_location='cpu')
20
+ state_dict = torch.load(model_path, map_location='cpu')
21
+ model.load_state_dict(state_dict['model'])
22
+
23
+ # Set to eval mode and move to desired device
24
+ model = model.to(device)
25
+ model = model.eval()
26
+
27
+ # Create an id to label name mapping
28
+ imagenet_id_to_classname = {}
29
+ for k, v in imagenet_classnames.items():
30
+ imagenet_id_to_classname[k] = v[1]
31
+
32
+
33
+ def inference(img):
34
+ image = img
35
+ image_transform = T.Compose(
36
+ [
37
+ T.Resize(224),
38
+ T.CenterCrop(224),
39
+ T.ToTensor(),
40
+ T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
41
+ ]
42
+ )
43
+ image = image_transform(image)
44
+
45
+ # The model expects inputs of shape: B x C x H x W
46
+ image = image.unsqueeze(0)
47
+
48
+ prediction = model(image)
49
+ prediction = F.softmax(prediction, dim=1).flatten()
50
+
51
+ return {imagenet_id_to_classname[str(i)]: float(prediction[i]) for i in range(1000)}
52
+
53
+
54
+ inputs = gr.inputs.Image(type='pil')
55
+ label = gr.outputs.Label(num_top_classes=5)
56
+
57
+ title = "UniFormer-S"
58
+ description = "Gradio demo for UniFormer: To use it, simply upload your image, or click one of the examples to load them. Read more at the links below."
59
+ article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2201.09450' target='_blank'>UniFormer: Unifying Convolution and Self-attention for Visual Recognition</a> | <a href='https://github.com/Sense-X/UniFormer' target='_blank'>Github Repo</a></p>"
60
+
61
+ gr.Interface(
62
+ inference, inputs, outputs=label,
63
+ title=title, description=description, article=article,
64
+ examples=[['library.jpeg'], ['cat.png'], ['dog.png'], ['panda.png']]
65
+ ).launch(enable_queue=True, cache_examples=True)
cat.png ADDED
dog.png ADDED
imagenet_class_index.py ADDED
@@ -0,0 +1,1002 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ imagenet_classnames = {
2
+ "0": ["n01440764", "tench"],
3
+ "1": ["n01443537", "goldfish"],
4
+ "2": ["n01484850", "great_white_shark"],
5
+ "3": ["n01491361", "tiger_shark"],
6
+ "4": ["n01494475", "hammerhead"],
7
+ "5": ["n01496331", "electric_ray"],
8
+ "6": ["n01498041", "stingray"],
9
+ "7": ["n01514668", "cock"],
10
+ "8": ["n01514859", "hen"],
11
+ "9": ["n01518878", "ostrich"],
12
+ "10": ["n01530575", "brambling"],
13
+ "11": ["n01531178", "goldfinch"],
14
+ "12": ["n01532829", "house_finch"],
15
+ "13": ["n01534433", "junco"],
16
+ "14": ["n01537544", "indigo_bunting"],
17
+ "15": ["n01558993", "robin"],
18
+ "16": ["n01560419", "bulbul"],
19
+ "17": ["n01580077", "jay"],
20
+ "18": ["n01582220", "magpie"],
21
+ "19": ["n01592084", "chickadee"],
22
+ "20": ["n01601694", "water_ouzel"],
23
+ "21": ["n01608432", "kite"],
24
+ "22": ["n01614925", "bald_eagle"],
25
+ "23": ["n01616318", "vulture"],
26
+ "24": ["n01622779", "great_grey_owl"],
27
+ "25": ["n01629819", "European_fire_salamander"],
28
+ "26": ["n01630670", "common_newt"],
29
+ "27": ["n01631663", "eft"],
30
+ "28": ["n01632458", "spotted_salamander"],
31
+ "29": ["n01632777", "axolotl"],
32
+ "30": ["n01641577", "bullfrog"],
33
+ "31": ["n01644373", "tree_frog"],
34
+ "32": ["n01644900", "tailed_frog"],
35
+ "33": ["n01664065", "loggerhead"],
36
+ "34": ["n01665541", "leatherback_turtle"],
37
+ "35": ["n01667114", "mud_turtle"],
38
+ "36": ["n01667778", "terrapin"],
39
+ "37": ["n01669191", "box_turtle"],
40
+ "38": ["n01675722", "banded_gecko"],
41
+ "39": ["n01677366", "common_iguana"],
42
+ "40": ["n01682714", "American_chameleon"],
43
+ "41": ["n01685808", "whiptail"],
44
+ "42": ["n01687978", "agama"],
45
+ "43": ["n01688243", "frilled_lizard"],
46
+ "44": ["n01689811", "alligator_lizard"],
47
+ "45": ["n01692333", "Gila_monster"],
48
+ "46": ["n01693334", "green_lizard"],
49
+ "47": ["n01694178", "African_chameleon"],
50
+ "48": ["n01695060", "Komodo_dragon"],
51
+ "49": ["n01697457", "African_crocodile"],
52
+ "50": ["n01698640", "American_alligator"],
53
+ "51": ["n01704323", "triceratops"],
54
+ "52": ["n01728572", "thunder_snake"],
55
+ "53": ["n01728920", "ringneck_snake"],
56
+ "54": ["n01729322", "hognose_snake"],
57
+ "55": ["n01729977", "green_snake"],
58
+ "56": ["n01734418", "king_snake"],
59
+ "57": ["n01735189", "garter_snake"],
60
+ "58": ["n01737021", "water_snake"],
61
+ "59": ["n01739381", "vine_snake"],
62
+ "60": ["n01740131", "night_snake"],
63
+ "61": ["n01742172", "boa_constrictor"],
64
+ "62": ["n01744401", "rock_python"],
65
+ "63": ["n01748264", "Indian_cobra"],
66
+ "64": ["n01749939", "green_mamba"],
67
+ "65": ["n01751748", "sea_snake"],
68
+ "66": ["n01753488", "horned_viper"],
69
+ "67": ["n01755581", "diamondback"],
70
+ "68": ["n01756291", "sidewinder"],
71
+ "69": ["n01768244", "trilobite"],
72
+ "70": ["n01770081", "harvestman"],
73
+ "71": ["n01770393", "scorpion"],
74
+ "72": ["n01773157", "black_and_gold_garden_spider"],
75
+ "73": ["n01773549", "barn_spider"],
76
+ "74": ["n01773797", "garden_spider"],
77
+ "75": ["n01774384", "black_widow"],
78
+ "76": ["n01774750", "tarantula"],
79
+ "77": ["n01775062", "wolf_spider"],
80
+ "78": ["n01776313", "tick"],
81
+ "79": ["n01784675", "centipede"],
82
+ "80": ["n01795545", "black_grouse"],
83
+ "81": ["n01796340", "ptarmigan"],
84
+ "82": ["n01797886", "ruffed_grouse"],
85
+ "83": ["n01798484", "prairie_chicken"],
86
+ "84": ["n01806143", "peacock"],
87
+ "85": ["n01806567", "quail"],
88
+ "86": ["n01807496", "partridge"],
89
+ "87": ["n01817953", "African_grey"],
90
+ "88": ["n01818515", "macaw"],
91
+ "89": ["n01819313", "sulphur-crested_cockatoo"],
92
+ "90": ["n01820546", "lorikeet"],
93
+ "91": ["n01824575", "coucal"],
94
+ "92": ["n01828970", "bee_eater"],
95
+ "93": ["n01829413", "hornbill"],
96
+ "94": ["n01833805", "hummingbird"],
97
+ "95": ["n01843065", "jacamar"],
98
+ "96": ["n01843383", "toucan"],
99
+ "97": ["n01847000", "drake"],
100
+ "98": ["n01855032", "red-breasted_merganser"],
101
+ "99": ["n01855672", "goose"],
102
+ "100": ["n01860187", "black_swan"],
103
+ "101": ["n01871265", "tusker"],
104
+ "102": ["n01872401", "echidna"],
105
+ "103": ["n01873310", "platypus"],
106
+ "104": ["n01877812", "wallaby"],
107
+ "105": ["n01882714", "koala"],
108
+ "106": ["n01883070", "wombat"],
109
+ "107": ["n01910747", "jellyfish"],
110
+ "108": ["n01914609", "sea_anemone"],
111
+ "109": ["n01917289", "brain_coral"],
112
+ "110": ["n01924916", "flatworm"],
113
+ "111": ["n01930112", "nematode"],
114
+ "112": ["n01943899", "conch"],
115
+ "113": ["n01944390", "snail"],
116
+ "114": ["n01945685", "slug"],
117
+ "115": ["n01950731", "sea_slug"],
118
+ "116": ["n01955084", "chiton"],
119
+ "117": ["n01968897", "chambered_nautilus"],
120
+ "118": ["n01978287", "Dungeness_crab"],
121
+ "119": ["n01978455", "rock_crab"],
122
+ "120": ["n01980166", "fiddler_crab"],
123
+ "121": ["n01981276", "king_crab"],
124
+ "122": ["n01983481", "American_lobster"],
125
+ "123": ["n01984695", "spiny_lobster"],
126
+ "124": ["n01985128", "crayfish"],
127
+ "125": ["n01986214", "hermit_crab"],
128
+ "126": ["n01990800", "isopod"],
129
+ "127": ["n02002556", "white_stork"],
130
+ "128": ["n02002724", "black_stork"],
131
+ "129": ["n02006656", "spoonbill"],
132
+ "130": ["n02007558", "flamingo"],
133
+ "131": ["n02009229", "little_blue_heron"],
134
+ "132": ["n02009912", "American_egret"],
135
+ "133": ["n02011460", "bittern"],
136
+ "134": ["n02012849", "crane"],
137
+ "135": ["n02013706", "limpkin"],
138
+ "136": ["n02017213", "European_gallinule"],
139
+ "137": ["n02018207", "American_coot"],
140
+ "138": ["n02018795", "bustard"],
141
+ "139": ["n02025239", "ruddy_turnstone"],
142
+ "140": ["n02027492", "red-backed_sandpiper"],
143
+ "141": ["n02028035", "redshank"],
144
+ "142": ["n02033041", "dowitcher"],
145
+ "143": ["n02037110", "oystercatcher"],
146
+ "144": ["n02051845", "pelican"],
147
+ "145": ["n02056570", "king_penguin"],
148
+ "146": ["n02058221", "albatross"],
149
+ "147": ["n02066245", "grey_whale"],
150
+ "148": ["n02071294", "killer_whale"],
151
+ "149": ["n02074367", "dugong"],
152
+ "150": ["n02077923", "sea_lion"],
153
+ "151": ["n02085620", "Chihuahua"],
154
+ "152": ["n02085782", "Japanese_spaniel"],
155
+ "153": ["n02085936", "Maltese_dog"],
156
+ "154": ["n02086079", "Pekinese"],
157
+ "155": ["n02086240", "Shih-Tzu"],
158
+ "156": ["n02086646", "Blenheim_spaniel"],
159
+ "157": ["n02086910", "papillon"],
160
+ "158": ["n02087046", "toy_terrier"],
161
+ "159": ["n02087394", "Rhodesian_ridgeback"],
162
+ "160": ["n02088094", "Afghan_hound"],
163
+ "161": ["n02088238", "basset"],
164
+ "162": ["n02088364", "beagle"],
165
+ "163": ["n02088466", "bloodhound"],
166
+ "164": ["n02088632", "bluetick"],
167
+ "165": ["n02089078", "black-and-tan_coonhound"],
168
+ "166": ["n02089867", "Walker_hound"],
169
+ "167": ["n02089973", "English_foxhound"],
170
+ "168": ["n02090379", "redbone"],
171
+ "169": ["n02090622", "borzoi"],
172
+ "170": ["n02090721", "Irish_wolfhound"],
173
+ "171": ["n02091032", "Italian_greyhound"],
174
+ "172": ["n02091134", "whippet"],
175
+ "173": ["n02091244", "Ibizan_hound"],
176
+ "174": ["n02091467", "Norwegian_elkhound"],
177
+ "175": ["n02091635", "otterhound"],
178
+ "176": ["n02091831", "Saluki"],
179
+ "177": ["n02092002", "Scottish_deerhound"],
180
+ "178": ["n02092339", "Weimaraner"],
181
+ "179": ["n02093256", "Staffordshire_bullterrier"],
182
+ "180": ["n02093428", "American_Staffordshire_terrier"],
183
+ "181": ["n02093647", "Bedlington_terrier"],
184
+ "182": ["n02093754", "Border_terrier"],
185
+ "183": ["n02093859", "Kerry_blue_terrier"],
186
+ "184": ["n02093991", "Irish_terrier"],
187
+ "185": ["n02094114", "Norfolk_terrier"],
188
+ "186": ["n02094258", "Norwich_terrier"],
189
+ "187": ["n02094433", "Yorkshire_terrier"],
190
+ "188": ["n02095314", "wire-haired_fox_terrier"],
191
+ "189": ["n02095570", "Lakeland_terrier"],
192
+ "190": ["n02095889", "Sealyham_terrier"],
193
+ "191": ["n02096051", "Airedale"],
194
+ "192": ["n02096177", "cairn"],
195
+ "193": ["n02096294", "Australian_terrier"],
196
+ "194": ["n02096437", "Dandie_Dinmont"],
197
+ "195": ["n02096585", "Boston_bull"],
198
+ "196": ["n02097047", "miniature_schnauzer"],
199
+ "197": ["n02097130", "giant_schnauzer"],
200
+ "198": ["n02097209", "standard_schnauzer"],
201
+ "199": ["n02097298", "Scotch_terrier"],
202
+ "200": ["n02097474", "Tibetan_terrier"],
203
+ "201": ["n02097658", "silky_terrier"],
204
+ "202": ["n02098105", "soft-coated_wheaten_terrier"],
205
+ "203": ["n02098286", "West_Highland_white_terrier"],
206
+ "204": ["n02098413", "Lhasa"],
207
+ "205": ["n02099267", "flat-coated_retriever"],
208
+ "206": ["n02099429", "curly-coated_retriever"],
209
+ "207": ["n02099601", "golden_retriever"],
210
+ "208": ["n02099712", "Labrador_retriever"],
211
+ "209": ["n02099849", "Chesapeake_Bay_retriever"],
212
+ "210": ["n02100236", "German_short-haired_pointer"],
213
+ "211": ["n02100583", "vizsla"],
214
+ "212": ["n02100735", "English_setter"],
215
+ "213": ["n02100877", "Irish_setter"],
216
+ "214": ["n02101006", "Gordon_setter"],
217
+ "215": ["n02101388", "Brittany_spaniel"],
218
+ "216": ["n02101556", "clumber"],
219
+ "217": ["n02102040", "English_springer"],
220
+ "218": ["n02102177", "Welsh_springer_spaniel"],
221
+ "219": ["n02102318", "cocker_spaniel"],
222
+ "220": ["n02102480", "Sussex_spaniel"],
223
+ "221": ["n02102973", "Irish_water_spaniel"],
224
+ "222": ["n02104029", "kuvasz"],
225
+ "223": ["n02104365", "schipperke"],
226
+ "224": ["n02105056", "groenendael"],
227
+ "225": ["n02105162", "malinois"],
228
+ "226": ["n02105251", "briard"],
229
+ "227": ["n02105412", "kelpie"],
230
+ "228": ["n02105505", "komondor"],
231
+ "229": ["n02105641", "Old_English_sheepdog"],
232
+ "230": ["n02105855", "Shetland_sheepdog"],
233
+ "231": ["n02106030", "collie"],
234
+ "232": ["n02106166", "Border_collie"],
235
+ "233": ["n02106382", "Bouvier_des_Flandres"],
236
+ "234": ["n02106550", "Rottweiler"],
237
+ "235": ["n02106662", "German_shepherd"],
238
+ "236": ["n02107142", "Doberman"],
239
+ "237": ["n02107312", "miniature_pinscher"],
240
+ "238": ["n02107574", "Greater_Swiss_Mountain_dog"],
241
+ "239": ["n02107683", "Bernese_mountain_dog"],
242
+ "240": ["n02107908", "Appenzeller"],
243
+ "241": ["n02108000", "EntleBucher"],
244
+ "242": ["n02108089", "boxer"],
245
+ "243": ["n02108422", "bull_mastiff"],
246
+ "244": ["n02108551", "Tibetan_mastiff"],
247
+ "245": ["n02108915", "French_bulldog"],
248
+ "246": ["n02109047", "Great_Dane"],
249
+ "247": ["n02109525", "Saint_Bernard"],
250
+ "248": ["n02109961", "Eskimo_dog"],
251
+ "249": ["n02110063", "malamute"],
252
+ "250": ["n02110185", "Siberian_husky"],
253
+ "251": ["n02110341", "dalmatian"],
254
+ "252": ["n02110627", "affenpinscher"],
255
+ "253": ["n02110806", "basenji"],
256
+ "254": ["n02110958", "pug"],
257
+ "255": ["n02111129", "Leonberg"],
258
+ "256": ["n02111277", "Newfoundland"],
259
+ "257": ["n02111500", "Great_Pyrenees"],
260
+ "258": ["n02111889", "Samoyed"],
261
+ "259": ["n02112018", "Pomeranian"],
262
+ "260": ["n02112137", "chow"],
263
+ "261": ["n02112350", "keeshond"],
264
+ "262": ["n02112706", "Brabancon_griffon"],
265
+ "263": ["n02113023", "Pembroke"],
266
+ "264": ["n02113186", "Cardigan"],
267
+ "265": ["n02113624", "toy_poodle"],
268
+ "266": ["n02113712", "miniature_poodle"],
269
+ "267": ["n02113799", "standard_poodle"],
270
+ "268": ["n02113978", "Mexican_hairless"],
271
+ "269": ["n02114367", "timber_wolf"],
272
+ "270": ["n02114548", "white_wolf"],
273
+ "271": ["n02114712", "red_wolf"],
274
+ "272": ["n02114855", "coyote"],
275
+ "273": ["n02115641", "dingo"],
276
+ "274": ["n02115913", "dhole"],
277
+ "275": ["n02116738", "African_hunting_dog"],
278
+ "276": ["n02117135", "hyena"],
279
+ "277": ["n02119022", "red_fox"],
280
+ "278": ["n02119789", "kit_fox"],
281
+ "279": ["n02120079", "Arctic_fox"],
282
+ "280": ["n02120505", "grey_fox"],
283
+ "281": ["n02123045", "tabby"],
284
+ "282": ["n02123159", "tiger_cat"],
285
+ "283": ["n02123394", "Persian_cat"],
286
+ "284": ["n02123597", "Siamese_cat"],
287
+ "285": ["n02124075", "Egyptian_cat"],
288
+ "286": ["n02125311", "cougar"],
289
+ "287": ["n02127052", "lynx"],
290
+ "288": ["n02128385", "leopard"],
291
+ "289": ["n02128757", "snow_leopard"],
292
+ "290": ["n02128925", "jaguar"],
293
+ "291": ["n02129165", "lion"],
294
+ "292": ["n02129604", "tiger"],
295
+ "293": ["n02130308", "cheetah"],
296
+ "294": ["n02132136", "brown_bear"],
297
+ "295": ["n02133161", "American_black_bear"],
298
+ "296": ["n02134084", "ice_bear"],
299
+ "297": ["n02134418", "sloth_bear"],
300
+ "298": ["n02137549", "mongoose"],
301
+ "299": ["n02138441", "meerkat"],
302
+ "300": ["n02165105", "tiger_beetle"],
303
+ "301": ["n02165456", "ladybug"],
304
+ "302": ["n02167151", "ground_beetle"],
305
+ "303": ["n02168699", "long-horned_beetle"],
306
+ "304": ["n02169497", "leaf_beetle"],
307
+ "305": ["n02172182", "dung_beetle"],
308
+ "306": ["n02174001", "rhinoceros_beetle"],
309
+ "307": ["n02177972", "weevil"],
310
+ "308": ["n02190166", "fly"],
311
+ "309": ["n02206856", "bee"],
312
+ "310": ["n02219486", "ant"],
313
+ "311": ["n02226429", "grasshopper"],
314
+ "312": ["n02229544", "cricket"],
315
+ "313": ["n02231487", "walking_stick"],
316
+ "314": ["n02233338", "cockroach"],
317
+ "315": ["n02236044", "mantis"],
318
+ "316": ["n02256656", "cicada"],
319
+ "317": ["n02259212", "leafhopper"],
320
+ "318": ["n02264363", "lacewing"],
321
+ "319": ["n02268443", "dragonfly"],
322
+ "320": ["n02268853", "damselfly"],
323
+ "321": ["n02276258", "admiral"],
324
+ "322": ["n02277742", "ringlet"],
325
+ "323": ["n02279972", "monarch"],
326
+ "324": ["n02280649", "cabbage_butterfly"],
327
+ "325": ["n02281406", "sulphur_butterfly"],
328
+ "326": ["n02281787", "lycaenid"],
329
+ "327": ["n02317335", "starfish"],
330
+ "328": ["n02319095", "sea_urchin"],
331
+ "329": ["n02321529", "sea_cucumber"],
332
+ "330": ["n02325366", "wood_rabbit"],
333
+ "331": ["n02326432", "hare"],
334
+ "332": ["n02328150", "Angora"],
335
+ "333": ["n02342885", "hamster"],
336
+ "334": ["n02346627", "porcupine"],
337
+ "335": ["n02356798", "fox_squirrel"],
338
+ "336": ["n02361337", "marmot"],
339
+ "337": ["n02363005", "beaver"],
340
+ "338": ["n02364673", "guinea_pig"],
341
+ "339": ["n02389026", "sorrel"],
342
+ "340": ["n02391049", "zebra"],
343
+ "341": ["n02395406", "hog"],
344
+ "342": ["n02396427", "wild_boar"],
345
+ "343": ["n02397096", "warthog"],
346
+ "344": ["n02398521", "hippopotamus"],
347
+ "345": ["n02403003", "ox"],
348
+ "346": ["n02408429", "water_buffalo"],
349
+ "347": ["n02410509", "bison"],
350
+ "348": ["n02412080", "ram"],
351
+ "349": ["n02415577", "bighorn"],
352
+ "350": ["n02417914", "ibex"],
353
+ "351": ["n02422106", "hartebeest"],
354
+ "352": ["n02422699", "impala"],
355
+ "353": ["n02423022", "gazelle"],
356
+ "354": ["n02437312", "Arabian_camel"],
357
+ "355": ["n02437616", "llama"],
358
+ "356": ["n02441942", "weasel"],
359
+ "357": ["n02442845", "mink"],
360
+ "358": ["n02443114", "polecat"],
361
+ "359": ["n02443484", "black-footed_ferret"],
362
+ "360": ["n02444819", "otter"],
363
+ "361": ["n02445715", "skunk"],
364
+ "362": ["n02447366", "badger"],
365
+ "363": ["n02454379", "armadillo"],
366
+ "364": ["n02457408", "three-toed_sloth"],
367
+ "365": ["n02480495", "orangutan"],
368
+ "366": ["n02480855", "gorilla"],
369
+ "367": ["n02481823", "chimpanzee"],
370
+ "368": ["n02483362", "gibbon"],
371
+ "369": ["n02483708", "siamang"],
372
+ "370": ["n02484975", "guenon"],
373
+ "371": ["n02486261", "patas"],
374
+ "372": ["n02486410", "baboon"],
375
+ "373": ["n02487347", "macaque"],
376
+ "374": ["n02488291", "langur"],
377
+ "375": ["n02488702", "colobus"],
378
+ "376": ["n02489166", "proboscis_monkey"],
379
+ "377": ["n02490219", "marmoset"],
380
+ "378": ["n02492035", "capuchin"],
381
+ "379": ["n02492660", "howler_monkey"],
382
+ "380": ["n02493509", "titi"],
383
+ "381": ["n02493793", "spider_monkey"],
384
+ "382": ["n02494079", "squirrel_monkey"],
385
+ "383": ["n02497673", "Madagascar_cat"],
386
+ "384": ["n02500267", "indri"],
387
+ "385": ["n02504013", "Indian_elephant"],
388
+ "386": ["n02504458", "African_elephant"],
389
+ "387": ["n02509815", "lesser_panda"],
390
+ "388": ["n02510455", "giant_panda"],
391
+ "389": ["n02514041", "barracouta"],
392
+ "390": ["n02526121", "eel"],
393
+ "391": ["n02536864", "coho"],
394
+ "392": ["n02606052", "rock_beauty"],
395
+ "393": ["n02607072", "anemone_fish"],
396
+ "394": ["n02640242", "sturgeon"],
397
+ "395": ["n02641379", "gar"],
398
+ "396": ["n02643566", "lionfish"],
399
+ "397": ["n02655020", "puffer"],
400
+ "398": ["n02666196", "abacus"],
401
+ "399": ["n02667093", "abaya"],
402
+ "400": ["n02669723", "academic_gown"],
403
+ "401": ["n02672831", "accordion"],
404
+ "402": ["n02676566", "acoustic_guitar"],
405
+ "403": ["n02687172", "aircraft_carrier"],
406
+ "404": ["n02690373", "airliner"],
407
+ "405": ["n02692877", "airship"],
408
+ "406": ["n02699494", "altar"],
409
+ "407": ["n02701002", "ambulance"],
410
+ "408": ["n02704792", "amphibian"],
411
+ "409": ["n02708093", "analog_clock"],
412
+ "410": ["n02727426", "apiary"],
413
+ "411": ["n02730930", "apron"],
414
+ "412": ["n02747177", "ashcan"],
415
+ "413": ["n02749479", "assault_rifle"],
416
+ "414": ["n02769748", "backpack"],
417
+ "415": ["n02776631", "bakery"],
418
+ "416": ["n02777292", "balance_beam"],
419
+ "417": ["n02782093", "balloon"],
420
+ "418": ["n02783161", "ballpoint"],
421
+ "419": ["n02786058", "Band_Aid"],
422
+ "420": ["n02787622", "banjo"],
423
+ "421": ["n02788148", "bannister"],
424
+ "422": ["n02790996", "barbell"],
425
+ "423": ["n02791124", "barber_chair"],
426
+ "424": ["n02791270", "barbershop"],
427
+ "425": ["n02793495", "barn"],
428
+ "426": ["n02794156", "barometer"],
429
+ "427": ["n02795169", "barrel"],
430
+ "428": ["n02797295", "barrow"],
431
+ "429": ["n02799071", "baseball"],
432
+ "430": ["n02802426", "basketball"],
433
+ "431": ["n02804414", "bassinet"],
434
+ "432": ["n02804610", "bassoon"],
435
+ "433": ["n02807133", "bathing_cap"],
436
+ "434": ["n02808304", "bath_towel"],
437
+ "435": ["n02808440", "bathtub"],
438
+ "436": ["n02814533", "beach_wagon"],
439
+ "437": ["n02814860", "beacon"],
440
+ "438": ["n02815834", "beaker"],
441
+ "439": ["n02817516", "bearskin"],
442
+ "440": ["n02823428", "beer_bottle"],
443
+ "441": ["n02823750", "beer_glass"],
444
+ "442": ["n02825657", "bell_cote"],
445
+ "443": ["n02834397", "bib"],
446
+ "444": ["n02835271", "bicycle-built-for-two"],
447
+ "445": ["n02837789", "bikini"],
448
+ "446": ["n02840245", "binder"],
449
+ "447": ["n02841315", "binoculars"],
450
+ "448": ["n02843684", "birdhouse"],
451
+ "449": ["n02859443", "boathouse"],
452
+ "450": ["n02860847", "bobsled"],
453
+ "451": ["n02865351", "bolo_tie"],
454
+ "452": ["n02869837", "bonnet"],
455
+ "453": ["n02870880", "bookcase"],
456
+ "454": ["n02871525", "bookshop"],
457
+ "455": ["n02877765", "bottlecap"],
458
+ "456": ["n02879718", "bow"],
459
+ "457": ["n02883205", "bow_tie"],
460
+ "458": ["n02892201", "brass"],
461
+ "459": ["n02892767", "brassiere"],
462
+ "460": ["n02894605", "breakwater"],
463
+ "461": ["n02895154", "breastplate"],
464
+ "462": ["n02906734", "broom"],
465
+ "463": ["n02909870", "bucket"],
466
+ "464": ["n02910353", "buckle"],
467
+ "465": ["n02916936", "bulletproof_vest"],
468
+ "466": ["n02917067", "bullet_train"],
469
+ "467": ["n02927161", "butcher_shop"],
470
+ "468": ["n02930766", "cab"],
471
+ "469": ["n02939185", "caldron"],
472
+ "470": ["n02948072", "candle"],
473
+ "471": ["n02950826", "cannon"],
474
+ "472": ["n02951358", "canoe"],
475
+ "473": ["n02951585", "can_opener"],
476
+ "474": ["n02963159", "cardigan"],
477
+ "475": ["n02965783", "car_mirror"],
478
+ "476": ["n02966193", "carousel"],
479
+ "477": ["n02966687", "carpenter's_kit"],
480
+ "478": ["n02971356", "carton"],
481
+ "479": ["n02974003", "car_wheel"],
482
+ "480": ["n02977058", "cash_machine"],
483
+ "481": ["n02978881", "cassette"],
484
+ "482": ["n02979186", "cassette_player"],
485
+ "483": ["n02980441", "castle"],
486
+ "484": ["n02981792", "catamaran"],
487
+ "485": ["n02988304", "CD_player"],
488
+ "486": ["n02992211", "cello"],
489
+ "487": ["n02992529", "cellular_telephone"],
490
+ "488": ["n02999410", "chain"],
491
+ "489": ["n03000134", "chainlink_fence"],
492
+ "490": ["n03000247", "chain_mail"],
493
+ "491": ["n03000684", "chain_saw"],
494
+ "492": ["n03014705", "chest"],
495
+ "493": ["n03016953", "chiffonier"],
496
+ "494": ["n03017168", "chime"],
497
+ "495": ["n03018349", "china_cabinet"],
498
+ "496": ["n03026506", "Christmas_stocking"],
499
+ "497": ["n03028079", "church"],
500
+ "498": ["n03032252", "cinema"],
501
+ "499": ["n03041632", "cleaver"],
502
+ "500": ["n03042490", "cliff_dwelling"],
503
+ "501": ["n03045698", "cloak"],
504
+ "502": ["n03047690", "clog"],
505
+ "503": ["n03062245", "cocktail_shaker"],
506
+ "504": ["n03063599", "coffee_mug"],
507
+ "505": ["n03063689", "coffeepot"],
508
+ "506": ["n03065424", "coil"],
509
+ "507": ["n03075370", "combination_lock"],
510
+ "508": ["n03085013", "computer_keyboard"],
511
+ "509": ["n03089624", "confectionery"],
512
+ "510": ["n03095699", "container_ship"],
513
+ "511": ["n03100240", "convertible"],
514
+ "512": ["n03109150", "corkscrew"],
515
+ "513": ["n03110669", "cornet"],
516
+ "514": ["n03124043", "cowboy_boot"],
517
+ "515": ["n03124170", "cowboy_hat"],
518
+ "516": ["n03125729", "cradle"],
519
+ "517": ["n03126707", "crane"],
520
+ "518": ["n03127747", "crash_helmet"],
521
+ "519": ["n03127925", "crate"],
522
+ "520": ["n03131574", "crib"],
523
+ "521": ["n03133878", "Crock_Pot"],
524
+ "522": ["n03134739", "croquet_ball"],
525
+ "523": ["n03141823", "crutch"],
526
+ "524": ["n03146219", "cuirass"],
527
+ "525": ["n03160309", "dam"],
528
+ "526": ["n03179701", "desk"],
529
+ "527": ["n03180011", "desktop_computer"],
530
+ "528": ["n03187595", "dial_telephone"],
531
+ "529": ["n03188531", "diaper"],
532
+ "530": ["n03196217", "digital_clock"],
533
+ "531": ["n03197337", "digital_watch"],
534
+ "532": ["n03201208", "dining_table"],
535
+ "533": ["n03207743", "dishrag"],
536
+ "534": ["n03207941", "dishwasher"],
537
+ "535": ["n03208938", "disk_brake"],
538
+ "536": ["n03216828", "dock"],
539
+ "537": ["n03218198", "dogsled"],
540
+ "538": ["n03220513", "dome"],
541
+ "539": ["n03223299", "doormat"],
542
+ "540": ["n03240683", "drilling_platform"],
543
+ "541": ["n03249569", "drum"],
544
+ "542": ["n03250847", "drumstick"],
545
+ "543": ["n03255030", "dumbbell"],
546
+ "544": ["n03259280", "Dutch_oven"],
547
+ "545": ["n03271574", "electric_fan"],
548
+ "546": ["n03272010", "electric_guitar"],
549
+ "547": ["n03272562", "electric_locomotive"],
550
+ "548": ["n03290653", "entertainment_center"],
551
+ "549": ["n03291819", "envelope"],
552
+ "550": ["n03297495", "espresso_maker"],
553
+ "551": ["n03314780", "face_powder"],
554
+ "552": ["n03325584", "feather_boa"],
555
+ "553": ["n03337140", "file"],
556
+ "554": ["n03344393", "fireboat"],
557
+ "555": ["n03345487", "fire_engine"],
558
+ "556": ["n03347037", "fire_screen"],
559
+ "557": ["n03355925", "flagpole"],
560
+ "558": ["n03372029", "flute"],
561
+ "559": ["n03376595", "folding_chair"],
562
+ "560": ["n03379051", "football_helmet"],
563
+ "561": ["n03384352", "forklift"],
564
+ "562": ["n03388043", "fountain"],
565
+ "563": ["n03388183", "fountain_pen"],
566
+ "564": ["n03388549", "four-poster"],
567
+ "565": ["n03393912", "freight_car"],
568
+ "566": ["n03394916", "French_horn"],
569
+ "567": ["n03400231", "frying_pan"],
570
+ "568": ["n03404251", "fur_coat"],
571
+ "569": ["n03417042", "garbage_truck"],
572
+ "570": ["n03424325", "gasmask"],
573
+ "571": ["n03425413", "gas_pump"],
574
+ "572": ["n03443371", "goblet"],
575
+ "573": ["n03444034", "go-kart"],
576
+ "574": ["n03445777", "golf_ball"],
577
+ "575": ["n03445924", "golfcart"],
578
+ "576": ["n03447447", "gondola"],
579
+ "577": ["n03447721", "gong"],
580
+ "578": ["n03450230", "gown"],
581
+ "579": ["n03452741", "grand_piano"],
582
+ "580": ["n03457902", "greenhouse"],
583
+ "581": ["n03459775", "grille"],
584
+ "582": ["n03461385", "grocery_store"],
585
+ "583": ["n03467068", "guillotine"],
586
+ "584": ["n03476684", "hair_slide"],
587
+ "585": ["n03476991", "hair_spray"],
588
+ "586": ["n03478589", "half_track"],
589
+ "587": ["n03481172", "hammer"],
590
+ "588": ["n03482405", "hamper"],
591
+ "589": ["n03483316", "hand_blower"],
592
+ "590": ["n03485407", "hand-held_computer"],
593
+ "591": ["n03485794", "handkerchief"],
594
+ "592": ["n03492542", "hard_disc"],
595
+ "593": ["n03494278", "harmonica"],
596
+ "594": ["n03495258", "harp"],
597
+ "595": ["n03496892", "harvester"],
598
+ "596": ["n03498962", "hatchet"],
599
+ "597": ["n03527444", "holster"],
600
+ "598": ["n03529860", "home_theater"],
601
+ "599": ["n03530642", "honeycomb"],
602
+ "600": ["n03532672", "hook"],
603
+ "601": ["n03534580", "hoopskirt"],
604
+ "602": ["n03535780", "horizontal_bar"],
605
+ "603": ["n03538406", "horse_cart"],
606
+ "604": ["n03544143", "hourglass"],
607
+ "605": ["n03584254", "iPod"],
608
+ "606": ["n03584829", "iron"],
609
+ "607": ["n03590841", "jack-o'-lantern"],
610
+ "608": ["n03594734", "jean"],
611
+ "609": ["n03594945", "jeep"],
612
+ "610": ["n03595614", "jersey"],
613
+ "611": ["n03598930", "jigsaw_puzzle"],
614
+ "612": ["n03599486", "jinrikisha"],
615
+ "613": ["n03602883", "joystick"],
616
+ "614": ["n03617480", "kimono"],
617
+ "615": ["n03623198", "knee_pad"],
618
+ "616": ["n03627232", "knot"],
619
+ "617": ["n03630383", "lab_coat"],
620
+ "618": ["n03633091", "ladle"],
621
+ "619": ["n03637318", "lampshade"],
622
+ "620": ["n03642806", "laptop"],
623
+ "621": ["n03649909", "lawn_mower"],
624
+ "622": ["n03657121", "lens_cap"],
625
+ "623": ["n03658185", "letter_opener"],
626
+ "624": ["n03661043", "library"],
627
+ "625": ["n03662601", "lifeboat"],
628
+ "626": ["n03666591", "lighter"],
629
+ "627": ["n03670208", "limousine"],
630
+ "628": ["n03673027", "liner"],
631
+ "629": ["n03676483", "lipstick"],
632
+ "630": ["n03680355", "Loafer"],
633
+ "631": ["n03690938", "lotion"],
634
+ "632": ["n03691459", "loudspeaker"],
635
+ "633": ["n03692522", "loupe"],
636
+ "634": ["n03697007", "lumbermill"],
637
+ "635": ["n03706229", "magnetic_compass"],
638
+ "636": ["n03709823", "mailbag"],
639
+ "637": ["n03710193", "mailbox"],
640
+ "638": ["n03710637", "maillot"],
641
+ "639": ["n03710721", "maillot"],
642
+ "640": ["n03717622", "manhole_cover"],
643
+ "641": ["n03720891", "maraca"],
644
+ "642": ["n03721384", "marimba"],
645
+ "643": ["n03724870", "mask"],
646
+ "644": ["n03729826", "matchstick"],
647
+ "645": ["n03733131", "maypole"],
648
+ "646": ["n03733281", "maze"],
649
+ "647": ["n03733805", "measuring_cup"],
650
+ "648": ["n03742115", "medicine_chest"],
651
+ "649": ["n03743016", "megalith"],
652
+ "650": ["n03759954", "microphone"],
653
+ "651": ["n03761084", "microwave"],
654
+ "652": ["n03763968", "military_uniform"],
655
+ "653": ["n03764736", "milk_can"],
656
+ "654": ["n03769881", "minibus"],
657
+ "655": ["n03770439", "miniskirt"],
658
+ "656": ["n03770679", "minivan"],
659
+ "657": ["n03773504", "missile"],
660
+ "658": ["n03775071", "mitten"],
661
+ "659": ["n03775546", "mixing_bowl"],
662
+ "660": ["n03776460", "mobile_home"],
663
+ "661": ["n03777568", "Model_T"],
664
+ "662": ["n03777754", "modem"],
665
+ "663": ["n03781244", "monastery"],
666
+ "664": ["n03782006", "monitor"],
667
+ "665": ["n03785016", "moped"],
668
+ "666": ["n03786901", "mortar"],
669
+ "667": ["n03787032", "mortarboard"],
670
+ "668": ["n03788195", "mosque"],
671
+ "669": ["n03788365", "mosquito_net"],
672
+ "670": ["n03791053", "motor_scooter"],
673
+ "671": ["n03792782", "mountain_bike"],
674
+ "672": ["n03792972", "mountain_tent"],
675
+ "673": ["n03793489", "mouse"],
676
+ "674": ["n03794056", "mousetrap"],
677
+ "675": ["n03796401", "moving_van"],
678
+ "676": ["n03803284", "muzzle"],
679
+ "677": ["n03804744", "nail"],
680
+ "678": ["n03814639", "neck_brace"],
681
+ "679": ["n03814906", "necklace"],
682
+ "680": ["n03825788", "nipple"],
683
+ "681": ["n03832673", "notebook"],
684
+ "682": ["n03837869", "obelisk"],
685
+ "683": ["n03838899", "oboe"],
686
+ "684": ["n03840681", "ocarina"],
687
+ "685": ["n03841143", "odometer"],
688
+ "686": ["n03843555", "oil_filter"],
689
+ "687": ["n03854065", "organ"],
690
+ "688": ["n03857828", "oscilloscope"],
691
+ "689": ["n03866082", "overskirt"],
692
+ "690": ["n03868242", "oxcart"],
693
+ "691": ["n03868863", "oxygen_mask"],
694
+ "692": ["n03871628", "packet"],
695
+ "693": ["n03873416", "paddle"],
696
+ "694": ["n03874293", "paddlewheel"],
697
+ "695": ["n03874599", "padlock"],
698
+ "696": ["n03876231", "paintbrush"],
699
+ "697": ["n03877472", "pajama"],
700
+ "698": ["n03877845", "palace"],
701
+ "699": ["n03884397", "panpipe"],
702
+ "700": ["n03887697", "paper_towel"],
703
+ "701": ["n03888257", "parachute"],
704
+ "702": ["n03888605", "parallel_bars"],
705
+ "703": ["n03891251", "park_bench"],
706
+ "704": ["n03891332", "parking_meter"],
707
+ "705": ["n03895866", "passenger_car"],
708
+ "706": ["n03899768", "patio"],
709
+ "707": ["n03902125", "pay-phone"],
710
+ "708": ["n03903868", "pedestal"],
711
+ "709": ["n03908618", "pencil_box"],
712
+ "710": ["n03908714", "pencil_sharpener"],
713
+ "711": ["n03916031", "perfume"],
714
+ "712": ["n03920288", "Petri_dish"],
715
+ "713": ["n03924679", "photocopier"],
716
+ "714": ["n03929660", "pick"],
717
+ "715": ["n03929855", "pickelhaube"],
718
+ "716": ["n03930313", "picket_fence"],
719
+ "717": ["n03930630", "pickup"],
720
+ "718": ["n03933933", "pier"],
721
+ "719": ["n03935335", "piggy_bank"],
722
+ "720": ["n03937543", "pill_bottle"],
723
+ "721": ["n03938244", "pillow"],
724
+ "722": ["n03942813", "ping-pong_ball"],
725
+ "723": ["n03944341", "pinwheel"],
726
+ "724": ["n03947888", "pirate"],
727
+ "725": ["n03950228", "pitcher"],
728
+ "726": ["n03954731", "plane"],
729
+ "727": ["n03956157", "planetarium"],
730
+ "728": ["n03958227", "plastic_bag"],
731
+ "729": ["n03961711", "plate_rack"],
732
+ "730": ["n03967562", "plow"],
733
+ "731": ["n03970156", "plunger"],
734
+ "732": ["n03976467", "Polaroid_camera"],
735
+ "733": ["n03976657", "pole"],
736
+ "734": ["n03977966", "police_van"],
737
+ "735": ["n03980874", "poncho"],
738
+ "736": ["n03982430", "pool_table"],
739
+ "737": ["n03983396", "pop_bottle"],
740
+ "738": ["n03991062", "pot"],
741
+ "739": ["n03992509", "potter's_wheel"],
742
+ "740": ["n03995372", "power_drill"],
743
+ "741": ["n03998194", "prayer_rug"],
744
+ "742": ["n04004767", "printer"],
745
+ "743": ["n04005630", "prison"],
746
+ "744": ["n04008634", "projectile"],
747
+ "745": ["n04009552", "projector"],
748
+ "746": ["n04019541", "puck"],
749
+ "747": ["n04023962", "punching_bag"],
750
+ "748": ["n04026417", "purse"],
751
+ "749": ["n04033901", "quill"],
752
+ "750": ["n04033995", "quilt"],
753
+ "751": ["n04037443", "racer"],
754
+ "752": ["n04039381", "racket"],
755
+ "753": ["n04040759", "radiator"],
756
+ "754": ["n04041544", "radio"],
757
+ "755": ["n04044716", "radio_telescope"],
758
+ "756": ["n04049303", "rain_barrel"],
759
+ "757": ["n04065272", "recreational_vehicle"],
760
+ "758": ["n04067472", "reel"],
761
+ "759": ["n04069434", "reflex_camera"],
762
+ "760": ["n04070727", "refrigerator"],
763
+ "761": ["n04074963", "remote_control"],
764
+ "762": ["n04081281", "restaurant"],
765
+ "763": ["n04086273", "revolver"],
766
+ "764": ["n04090263", "rifle"],
767
+ "765": ["n04099969", "rocking_chair"],
768
+ "766": ["n04111531", "rotisserie"],
769
+ "767": ["n04116512", "rubber_eraser"],
770
+ "768": ["n04118538", "rugby_ball"],
771
+ "769": ["n04118776", "rule"],
772
+ "770": ["n04120489", "running_shoe"],
773
+ "771": ["n04125021", "safe"],
774
+ "772": ["n04127249", "safety_pin"],
775
+ "773": ["n04131690", "saltshaker"],
776
+ "774": ["n04133789", "sandal"],
777
+ "775": ["n04136333", "sarong"],
778
+ "776": ["n04141076", "sax"],
779
+ "777": ["n04141327", "scabbard"],
780
+ "778": ["n04141975", "scale"],
781
+ "779": ["n04146614", "school_bus"],
782
+ "780": ["n04147183", "schooner"],
783
+ "781": ["n04149813", "scoreboard"],
784
+ "782": ["n04152593", "screen"],
785
+ "783": ["n04153751", "screw"],
786
+ "784": ["n04154565", "screwdriver"],
787
+ "785": ["n04162706", "seat_belt"],
788
+ "786": ["n04179913", "sewing_machine"],
789
+ "787": ["n04192698", "shield"],
790
+ "788": ["n04200800", "shoe_shop"],
791
+ "789": ["n04201297", "shoji"],
792
+ "790": ["n04204238", "shopping_basket"],
793
+ "791": ["n04204347", "shopping_cart"],
794
+ "792": ["n04208210", "shovel"],
795
+ "793": ["n04209133", "shower_cap"],
796
+ "794": ["n04209239", "shower_curtain"],
797
+ "795": ["n04228054", "ski"],
798
+ "796": ["n04229816", "ski_mask"],
799
+ "797": ["n04235860", "sleeping_bag"],
800
+ "798": ["n04238763", "slide_rule"],
801
+ "799": ["n04239074", "sliding_door"],
802
+ "800": ["n04243546", "slot"],
803
+ "801": ["n04251144", "snorkel"],
804
+ "802": ["n04252077", "snowmobile"],
805
+ "803": ["n04252225", "snowplow"],
806
+ "804": ["n04254120", "soap_dispenser"],
807
+ "805": ["n04254680", "soccer_ball"],
808
+ "806": ["n04254777", "sock"],
809
+ "807": ["n04258138", "solar_dish"],
810
+ "808": ["n04259630", "sombrero"],
811
+ "809": ["n04263257", "soup_bowl"],
812
+ "810": ["n04264628", "space_bar"],
813
+ "811": ["n04265275", "space_heater"],
814
+ "812": ["n04266014", "space_shuttle"],
815
+ "813": ["n04270147", "spatula"],
816
+ "814": ["n04273569", "speedboat"],
817
+ "815": ["n04275548", "spider_web"],
818
+ "816": ["n04277352", "spindle"],
819
+ "817": ["n04285008", "sports_car"],
820
+ "818": ["n04286575", "spotlight"],
821
+ "819": ["n04296562", "stage"],
822
+ "820": ["n04310018", "steam_locomotive"],
823
+ "821": ["n04311004", "steel_arch_bridge"],
824
+ "822": ["n04311174", "steel_drum"],
825
+ "823": ["n04317175", "stethoscope"],
826
+ "824": ["n04325704", "stole"],
827
+ "825": ["n04326547", "stone_wall"],
828
+ "826": ["n04328186", "stopwatch"],
829
+ "827": ["n04330267", "stove"],
830
+ "828": ["n04332243", "strainer"],
831
+ "829": ["n04335435", "streetcar"],
832
+ "830": ["n04336792", "stretcher"],
833
+ "831": ["n04344873", "studio_couch"],
834
+ "832": ["n04346328", "stupa"],
835
+ "833": ["n04347754", "submarine"],
836
+ "834": ["n04350905", "suit"],
837
+ "835": ["n04355338", "sundial"],
838
+ "836": ["n04355933", "sunglass"],
839
+ "837": ["n04356056", "sunglasses"],
840
+ "838": ["n04357314", "sunscreen"],
841
+ "839": ["n04366367", "suspension_bridge"],
842
+ "840": ["n04367480", "swab"],
843
+ "841": ["n04370456", "sweatshirt"],
844
+ "842": ["n04371430", "swimming_trunks"],
845
+ "843": ["n04371774", "swing"],
846
+ "844": ["n04372370", "switch"],
847
+ "845": ["n04376876", "syringe"],
848
+ "846": ["n04380533", "table_lamp"],
849
+ "847": ["n04389033", "tank"],
850
+ "848": ["n04392985", "tape_player"],
851
+ "849": ["n04398044", "teapot"],
852
+ "850": ["n04399382", "teddy"],
853
+ "851": ["n04404412", "television"],
854
+ "852": ["n04409515", "tennis_ball"],
855
+ "853": ["n04417672", "thatch"],
856
+ "854": ["n04418357", "theater_curtain"],
857
+ "855": ["n04423845", "thimble"],
858
+ "856": ["n04428191", "thresher"],
859
+ "857": ["n04429376", "throne"],
860
+ "858": ["n04435653", "tile_roof"],
861
+ "859": ["n04442312", "toaster"],
862
+ "860": ["n04443257", "tobacco_shop"],
863
+ "861": ["n04447861", "toilet_seat"],
864
+ "862": ["n04456115", "torch"],
865
+ "863": ["n04458633", "totem_pole"],
866
+ "864": ["n04461696", "tow_truck"],
867
+ "865": ["n04462240", "toyshop"],
868
+ "866": ["n04465501", "tractor"],
869
+ "867": ["n04467665", "trailer_truck"],
870
+ "868": ["n04476259", "tray"],
871
+ "869": ["n04479046", "trench_coat"],
872
+ "870": ["n04482393", "tricycle"],
873
+ "871": ["n04483307", "trimaran"],
874
+ "872": ["n04485082", "tripod"],
875
+ "873": ["n04486054", "triumphal_arch"],
876
+ "874": ["n04487081", "trolleybus"],
877
+ "875": ["n04487394", "trombone"],
878
+ "876": ["n04493381", "tub"],
879
+ "877": ["n04501370", "turnstile"],
880
+ "878": ["n04505470", "typewriter_keyboard"],
881
+ "879": ["n04507155", "umbrella"],
882
+ "880": ["n04509417", "unicycle"],
883
+ "881": ["n04515003", "upright"],
884
+ "882": ["n04517823", "vacuum"],
885
+ "883": ["n04522168", "vase"],
886
+ "884": ["n04523525", "vault"],
887
+ "885": ["n04525038", "velvet"],
888
+ "886": ["n04525305", "vending_machine"],
889
+ "887": ["n04532106", "vestment"],
890
+ "888": ["n04532670", "viaduct"],
891
+ "889": ["n04536866", "violin"],
892
+ "890": ["n04540053", "volleyball"],
893
+ "891": ["n04542943", "waffle_iron"],
894
+ "892": ["n04548280", "wall_clock"],
895
+ "893": ["n04548362", "wallet"],
896
+ "894": ["n04550184", "wardrobe"],
897
+ "895": ["n04552348", "warplane"],
898
+ "896": ["n04553703", "washbasin"],
899
+ "897": ["n04554684", "washer"],
900
+ "898": ["n04557648", "water_bottle"],
901
+ "899": ["n04560804", "water_jug"],
902
+ "900": ["n04562935", "water_tower"],
903
+ "901": ["n04579145", "whiskey_jug"],
904
+ "902": ["n04579432", "whistle"],
905
+ "903": ["n04584207", "wig"],
906
+ "904": ["n04589890", "window_screen"],
907
+ "905": ["n04590129", "window_shade"],
908
+ "906": ["n04591157", "Windsor_tie"],
909
+ "907": ["n04591713", "wine_bottle"],
910
+ "908": ["n04592741", "wing"],
911
+ "909": ["n04596742", "wok"],
912
+ "910": ["n04597913", "wooden_spoon"],
913
+ "911": ["n04599235", "wool"],
914
+ "912": ["n04604644", "worm_fence"],
915
+ "913": ["n04606251", "wreck"],
916
+ "914": ["n04612504", "yawl"],
917
+ "915": ["n04613696", "yurt"],
918
+ "916": ["n06359193", "web_site"],
919
+ "917": ["n06596364", "comic_book"],
920
+ "918": ["n06785654", "crossword_puzzle"],
921
+ "919": ["n06794110", "street_sign"],
922
+ "920": ["n06874185", "traffic_light"],
923
+ "921": ["n07248320", "book_jacket"],
924
+ "922": ["n07565083", "menu"],
925
+ "923": ["n07579787", "plate"],
926
+ "924": ["n07583066", "guacamole"],
927
+ "925": ["n07584110", "consomme"],
928
+ "926": ["n07590611", "hot_pot"],
929
+ "927": ["n07613480", "trifle"],
930
+ "928": ["n07614500", "ice_cream"],
931
+ "929": ["n07615774", "ice_lolly"],
932
+ "930": ["n07684084", "French_loaf"],
933
+ "931": ["n07693725", "bagel"],
934
+ "932": ["n07695742", "pretzel"],
935
+ "933": ["n07697313", "cheeseburger"],
936
+ "934": ["n07697537", "hotdog"],
937
+ "935": ["n07711569", "mashed_potato"],
938
+ "936": ["n07714571", "head_cabbage"],
939
+ "937": ["n07714990", "broccoli"],
940
+ "938": ["n07715103", "cauliflower"],
941
+ "939": ["n07716358", "zucchini"],
942
+ "940": ["n07716906", "spaghetti_squash"],
943
+ "941": ["n07717410", "acorn_squash"],
944
+ "942": ["n07717556", "butternut_squash"],
945
+ "943": ["n07718472", "cucumber"],
946
+ "944": ["n07718747", "artichoke"],
947
+ "945": ["n07720875", "bell_pepper"],
948
+ "946": ["n07730033", "cardoon"],
949
+ "947": ["n07734744", "mushroom"],
950
+ "948": ["n07742313", "Granny_Smith"],
951
+ "949": ["n07745940", "strawberry"],
952
+ "950": ["n07747607", "orange"],
953
+ "951": ["n07749582", "lemon"],
954
+ "952": ["n07753113", "fig"],
955
+ "953": ["n07753275", "pineapple"],
956
+ "954": ["n07753592", "banana"],
957
+ "955": ["n07754684", "jackfruit"],
958
+ "956": ["n07760859", "custard_apple"],
959
+ "957": ["n07768694", "pomegranate"],
960
+ "958": ["n07802026", "hay"],
961
+ "959": ["n07831146", "carbonara"],
962
+ "960": ["n07836838", "chocolate_sauce"],
963
+ "961": ["n07860988", "dough"],
964
+ "962": ["n07871810", "meat_loaf"],
965
+ "963": ["n07873807", "pizza"],
966
+ "964": ["n07875152", "potpie"],
967
+ "965": ["n07880968", "burrito"],
968
+ "966": ["n07892512", "red_wine"],
969
+ "967": ["n07920052", "espresso"],
970
+ "968": ["n07930864", "cup"],
971
+ "969": ["n07932039", "eggnog"],
972
+ "970": ["n09193705", "alp"],
973
+ "971": ["n09229709", "bubble"],
974
+ "972": ["n09246464", "cliff"],
975
+ "973": ["n09256479", "coral_reef"],
976
+ "974": ["n09288635", "geyser"],
977
+ "975": ["n09332890", "lakeside"],
978
+ "976": ["n09399592", "promontory"],
979
+ "977": ["n09421951", "sandbar"],
980
+ "978": ["n09428293", "seashore"],
981
+ "979": ["n09468604", "valley"],
982
+ "980": ["n09472597", "volcano"],
983
+ "981": ["n09835506", "ballplayer"],
984
+ "982": ["n10148035", "groom"],
985
+ "983": ["n10565667", "scuba_diver"],
986
+ "984": ["n11879895", "rapeseed"],
987
+ "985": ["n11939491", "daisy"],
988
+ "986": ["n12057211", "yellow_lady's_slipper"],
989
+ "987": ["n12144580", "corn"],
990
+ "988": ["n12267677", "acorn"],
991
+ "989": ["n12620546", "hip"],
992
+ "990": ["n12768682", "buckeye"],
993
+ "991": ["n12985857", "coral_fungus"],
994
+ "992": ["n12998815", "agaric"],
995
+ "993": ["n13037406", "gyromitra"],
996
+ "994": ["n13040303", "stinkhorn"],
997
+ "995": ["n13044778", "earthstar"],
998
+ "996": ["n13052670", "hen-of-the-woods"],
999
+ "997": ["n13054560", "bolete"],
1000
+ "998": ["n13133613", "ear"],
1001
+ "999": ["n15075141", "toilet_tissue"]
1002
+ }
library.jpeg ADDED
panda.png ADDED
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ torch
2
+ torchvision
3
+ einops
4
+ timm
5
+ Pillow
6
+ decord
uniformer.py ADDED
@@ -0,0 +1,366 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import OrderedDict
2
+ import torch
3
+ import torch.nn as nn
4
+ from functools import partial
5
+ from timm.models.vision_transformer import _cfg
6
+ from timm.models.registry import register_model
7
+ from timm.models.layers import trunc_normal_, DropPath, to_2tuple
8
+
9
+ layer_scale = False
10
+ init_value = 1e-6
11
+
12
+
13
+ class Mlp(nn.Module):
14
+ def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
15
+ super().__init__()
16
+ out_features = out_features or in_features
17
+ hidden_features = hidden_features or in_features
18
+ self.fc1 = nn.Linear(in_features, hidden_features)
19
+ self.act = act_layer()
20
+ self.fc2 = nn.Linear(hidden_features, out_features)
21
+ self.drop = nn.Dropout(drop)
22
+
23
+ def forward(self, x):
24
+ x = self.fc1(x)
25
+ x = self.act(x)
26
+ x = self.drop(x)
27
+ x = self.fc2(x)
28
+ x = self.drop(x)
29
+ return x
30
+
31
+
32
+ class CMlp(nn.Module):
33
+ def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
34
+ super().__init__()
35
+ out_features = out_features or in_features
36
+ hidden_features = hidden_features or in_features
37
+ self.fc1 = nn.Conv2d(in_features, hidden_features, 1)
38
+ self.act = act_layer()
39
+ self.fc2 = nn.Conv2d(hidden_features, out_features, 1)
40
+ self.drop = nn.Dropout(drop)
41
+
42
+ def forward(self, x):
43
+ x = self.fc1(x)
44
+ x = self.act(x)
45
+ x = self.drop(x)
46
+ x = self.fc2(x)
47
+ x = self.drop(x)
48
+ return x
49
+
50
+
51
+ class Attention(nn.Module):
52
+ def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.):
53
+ super().__init__()
54
+ self.num_heads = num_heads
55
+ head_dim = dim // num_heads
56
+ # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
57
+ self.scale = qk_scale or head_dim ** -0.5
58
+
59
+ self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
60
+ self.attn_drop = nn.Dropout(attn_drop)
61
+ self.proj = nn.Linear(dim, dim)
62
+ self.proj_drop = nn.Dropout(proj_drop)
63
+
64
+ def forward(self, x):
65
+ B, N, C = x.shape
66
+ qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
67
+ q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple)
68
+
69
+ attn = (q @ k.transpose(-2, -1)) * self.scale
70
+ attn = attn.softmax(dim=-1)
71
+ attn = self.attn_drop(attn)
72
+
73
+ x = (attn @ v).transpose(1, 2).reshape(B, N, C)
74
+ x = self.proj(x)
75
+ x = self.proj_drop(x)
76
+ return x
77
+
78
+
79
+ class CBlock(nn.Module):
80
+ def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
81
+ drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm):
82
+ super().__init__()
83
+ self.pos_embed = nn.Conv2d(dim, dim, 3, padding=1, groups=dim)
84
+ self.norm1 = nn.BatchNorm2d(dim)
85
+ self.conv1 = nn.Conv2d(dim, dim, 1)
86
+ self.conv2 = nn.Conv2d(dim, dim, 1)
87
+ self.attn = nn.Conv2d(dim, dim, 5, padding=2, groups=dim)
88
+ # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
89
+ self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
90
+ self.norm2 = nn.BatchNorm2d(dim)
91
+ mlp_hidden_dim = int(dim * mlp_ratio)
92
+ self.mlp = CMlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
93
+
94
+ def forward(self, x):
95
+ x = x + self.pos_embed(x)
96
+ x = x + self.drop_path(self.conv2(self.attn(self.conv1(self.norm1(x)))))
97
+ x = x + self.drop_path(self.mlp(self.norm2(x)))
98
+ return x
99
+
100
+
101
+ class SABlock(nn.Module):
102
+ def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
103
+ drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm):
104
+ super().__init__()
105
+ self.pos_embed = nn.Conv2d(dim, dim, 3, padding=1, groups=dim)
106
+ self.norm1 = norm_layer(dim)
107
+ self.attn = Attention(
108
+ dim,
109
+ num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
110
+ attn_drop=attn_drop, proj_drop=drop)
111
+ # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
112
+ self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
113
+ self.norm2 = norm_layer(dim)
114
+ mlp_hidden_dim = int(dim * mlp_ratio)
115
+ self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
116
+ global layer_scale
117
+ self.ls = layer_scale
118
+ if self.ls:
119
+ global init_value
120
+ print(f"Use layer_scale: {layer_scale}, init_values: {init_value}")
121
+ self.gamma_1 = nn.Parameter(init_value * torch.ones((dim)),requires_grad=True)
122
+ self.gamma_2 = nn.Parameter(init_value * torch.ones((dim)),requires_grad=True)
123
+
124
+ def forward(self, x):
125
+ x = x + self.pos_embed(x)
126
+ B, N, H, W = x.shape
127
+ x = x.flatten(2).transpose(1, 2)
128
+ if self.ls:
129
+ x = x + self.drop_path(self.gamma_1 * self.attn(self.norm1(x)))
130
+ x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
131
+ else:
132
+ x = x + self.drop_path(self.attn(self.norm1(x)))
133
+ x = x + self.drop_path(self.mlp(self.norm2(x)))
134
+ x = x.transpose(1, 2).reshape(B, N, H, W)
135
+ return x
136
+
137
+
138
+ class head_embedding(nn.Module):
139
+ def __init__(self, in_channels, out_channels):
140
+ super(head_embedding, self).__init__()
141
+
142
+ self.proj = nn.Sequential(
143
+ nn.Conv2d(in_channels, out_channels // 2, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)),
144
+ nn.BatchNorm2d(out_channels // 2),
145
+ nn.GELU(),
146
+ nn.Conv2d(out_channels // 2, out_channels, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)),
147
+ nn.BatchNorm2d(out_channels),
148
+ )
149
+
150
+ def forward(self, x):
151
+ x = self.proj(x)
152
+ return x
153
+
154
+
155
+ class middle_embedding(nn.Module):
156
+ def __init__(self, in_channels, out_channels):
157
+ super(middle_embedding, self).__init__()
158
+
159
+ self.proj = nn.Sequential(
160
+ nn.Conv2d(in_channels, out_channels, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)),
161
+ nn.BatchNorm2d(out_channels),
162
+ )
163
+
164
+ def forward(self, x):
165
+ x = self.proj(x)
166
+ return x
167
+
168
+
169
+ class PatchEmbed(nn.Module):
170
+ """ Image to Patch Embedding
171
+ """
172
+ def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
173
+ super().__init__()
174
+ img_size = to_2tuple(img_size)
175
+ patch_size = to_2tuple(patch_size)
176
+ num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])
177
+ self.img_size = img_size
178
+ self.patch_size = patch_size
179
+ self.num_patches = num_patches
180
+ self.norm = nn.LayerNorm(embed_dim)
181
+ self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
182
+
183
+ def forward(self, x):
184
+ B, C, H, W = x.shape
185
+ # FIXME look at relaxing size constraints
186
+ # assert H == self.img_size[0] and W == self.img_size[1], \
187
+ # f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
188
+ x = self.proj(x)
189
+ B, C, H, W = x.shape
190
+ x = x.flatten(2).transpose(1, 2)
191
+ x = self.norm(x)
192
+ x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
193
+ return x
194
+
195
+
196
+ class UniFormer(nn.Module):
197
+ """ Vision Transformer
198
+ A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` -
199
+ https://arxiv.org/abs/2010.11929
200
+ """
201
+ def __init__(self, depth=[3, 4, 8, 3], img_size=224, in_chans=3, num_classes=1000, embed_dim=[64, 128, 320, 512],
202
+ head_dim=64, mlp_ratio=4., qkv_bias=True, qk_scale=None, representation_size=None,
203
+ drop_rate=0., attn_drop_rate=0., drop_path_rate=0., norm_layer=None, conv_stem=False):
204
+ """
205
+ Args:
206
+ depth (list): depth of each stage
207
+ img_size (int, tuple): input image size
208
+ in_chans (int): number of input channels
209
+ num_classes (int): number of classes for classification head
210
+ embed_dim (list): embedding dimension of each stage
211
+ head_dim (int): head dimension
212
+ mlp_ratio (int): ratio of mlp hidden dim to embedding dim
213
+ qkv_bias (bool): enable bias for qkv if True
214
+ qk_scale (float): override default qk scale of head_dim ** -0.5 if set
215
+ representation_size (Optional[int]): enable and set representation layer (pre-logits) to this value if set
216
+ drop_rate (float): dropout rate
217
+ attn_drop_rate (float): attention dropout rate
218
+ drop_path_rate (float): stochastic depth rate
219
+ norm_layer: (nn.Module): normalization layer
220
+ conv_stem: (bool): whether use overlapped patch stem
221
+ """
222
+ super().__init__()
223
+ self.num_classes = num_classes
224
+ self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models
225
+ norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6)
226
+ if conv_stem:
227
+ self.patch_embed1 = head_embedding(in_channels=in_chans, out_channels=embed_dim[0])
228
+ self.patch_embed2 = middle_embedding(in_channels=embed_dim[0], out_channels=embed_dim[1])
229
+ self.patch_embed3 = middle_embedding(in_channels=embed_dim[1], out_channels=embed_dim[2])
230
+ self.patch_embed4 = middle_embedding(in_channels=embed_dim[2], out_channels=embed_dim[3])
231
+ else:
232
+ self.patch_embed1 = PatchEmbed(
233
+ img_size=img_size, patch_size=4, in_chans=in_chans, embed_dim=embed_dim[0])
234
+ self.patch_embed2 = PatchEmbed(
235
+ img_size=img_size // 4, patch_size=2, in_chans=embed_dim[0], embed_dim=embed_dim[1])
236
+ self.patch_embed3 = PatchEmbed(
237
+ img_size=img_size // 8, patch_size=2, in_chans=embed_dim[1], embed_dim=embed_dim[2])
238
+ self.patch_embed4 = PatchEmbed(
239
+ img_size=img_size // 16, patch_size=2, in_chans=embed_dim[2], embed_dim=embed_dim[3])
240
+
241
+ self.pos_drop = nn.Dropout(p=drop_rate)
242
+ dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depth))] # stochastic depth decay rule
243
+ num_heads = [dim // head_dim for dim in embed_dim]
244
+ self.blocks1 = nn.ModuleList([
245
+ CBlock(
246
+ dim=embed_dim[0], num_heads=num_heads[0], mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
247
+ drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer)
248
+ for i in range(depth[0])])
249
+ self.blocks2 = nn.ModuleList([
250
+ CBlock(
251
+ dim=embed_dim[1], num_heads=num_heads[1], mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
252
+ drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+depth[0]], norm_layer=norm_layer)
253
+ for i in range(depth[1])])
254
+ self.blocks3 = nn.ModuleList([
255
+ SABlock(
256
+ dim=embed_dim[2], num_heads=num_heads[2], mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
257
+ drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+depth[0]+depth[1]], norm_layer=norm_layer)
258
+ for i in range(depth[2])])
259
+ self.blocks4 = nn.ModuleList([
260
+ SABlock(
261
+ dim=embed_dim[3], num_heads=num_heads[3], mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
262
+ drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+depth[0]+depth[1]+depth[2]], norm_layer=norm_layer)
263
+ for i in range(depth[3])])
264
+ self.norm = nn.BatchNorm2d(embed_dim[-1])
265
+
266
+ # Representation layer
267
+ if representation_size:
268
+ self.num_features = representation_size
269
+ self.pre_logits = nn.Sequential(OrderedDict([
270
+ ('fc', nn.Linear(embed_dim, representation_size)),
271
+ ('act', nn.Tanh())
272
+ ]))
273
+ else:
274
+ self.pre_logits = nn.Identity()
275
+
276
+ # Classifier head
277
+ self.head = nn.Linear(embed_dim[-1], num_classes) if num_classes > 0 else nn.Identity()
278
+
279
+ self.apply(self._init_weights)
280
+
281
+ def _init_weights(self, m):
282
+ if isinstance(m, nn.Linear):
283
+ trunc_normal_(m.weight, std=.02)
284
+ if isinstance(m, nn.Linear) and m.bias is not None:
285
+ nn.init.constant_(m.bias, 0)
286
+ elif isinstance(m, nn.LayerNorm):
287
+ nn.init.constant_(m.bias, 0)
288
+ nn.init.constant_(m.weight, 1.0)
289
+
290
+ @torch.jit.ignore
291
+ def no_weight_decay(self):
292
+ return {'pos_embed', 'cls_token'}
293
+
294
+ def get_classifier(self):
295
+ return self.head
296
+
297
+ def reset_classifier(self, num_classes, global_pool=''):
298
+ self.num_classes = num_classes
299
+ self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity()
300
+
301
+ def forward_features(self, x):
302
+ B = x.shape[0]
303
+ x = self.patch_embed1(x)
304
+ x = self.pos_drop(x)
305
+ for blk in self.blocks1:
306
+ x = blk(x)
307
+ x = self.patch_embed2(x)
308
+ for blk in self.blocks2:
309
+ x = blk(x)
310
+ x = self.patch_embed3(x)
311
+ for blk in self.blocks3:
312
+ x = blk(x)
313
+ x = self.patch_embed4(x)
314
+ for blk in self.blocks4:
315
+ x = blk(x)
316
+ x = self.norm(x)
317
+ x = self.pre_logits(x)
318
+ return x
319
+
320
+ def forward(self, x):
321
+ x = self.forward_features(x)
322
+ x = x.flatten(2).mean(-1)
323
+ x = self.head(x)
324
+ return x
325
+
326
+
327
+ @register_model
328
+ def uniformer_small(pretrained=True, **kwargs):
329
+ model = UniFormer(
330
+ depth=[3, 4, 8, 3],
331
+ embed_dim=[64, 128, 320, 512], head_dim=64, mlp_ratio=4, qkv_bias=True,
332
+ norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
333
+ model.default_cfg = _cfg()
334
+ return model
335
+
336
+
337
+ @register_model
338
+ def uniformer_small_plus(pretrained=True, **kwargs):
339
+ model = UniFormer(
340
+ depth=[3, 5, 9, 3], conv_stem=True,
341
+ embed_dim=[64, 128, 320, 512], head_dim=64, mlp_ratio=4, qkv_bias=True,
342
+ norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
343
+ model.default_cfg = _cfg()
344
+ return model
345
+
346
+
347
+ @register_model
348
+ def uniformer_base(pretrained=True, **kwargs):
349
+ model = UniFormer(
350
+ depth=[5, 8, 20, 7],
351
+ embed_dim=[64, 128, 320, 512], head_dim=64, mlp_ratio=4, qkv_bias=True,
352
+ norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
353
+ model.default_cfg = _cfg()
354
+ return model
355
+
356
+
357
+ @register_model
358
+ def uniformer_base_ls(pretrained=True, **kwargs):
359
+ global layer_scale
360
+ layer_scale = True
361
+ model = UniFormer(
362
+ depth=[5, 8, 20, 7],
363
+ embed_dim=[64, 128, 320, 512], head_dim=64, mlp_ratio=4, qkv_bias=True,
364
+ norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
365
+ model.default_cfg = _cfg()
366
+ return model