SakuraD commited on
Commit
e8a9c3a
1 Parent(s): bc059ff

init image

Browse files
Files changed (3) hide show
  1. app.py +64 -16
  2. imagenet_class_index.py +1002 -0
  3. uniformer_light_image.py +535 -0
app.py CHANGED
@@ -9,7 +9,9 @@ from PIL import Image
9
  from decord import VideoReader
10
  from decord import cpu
11
  from uniformer_light_video import uniformer_xxs_video
 
12
  from kinetics_class_index import kinetics_classnames
 
13
  from transforms import (
14
  GroupNormalize, GroupScale, GroupCenterCrop,
15
  Stack, ToTorchFormatTensor
@@ -22,20 +24,24 @@ from huggingface_hub import hf_hub_download
22
  # Device on which to run the model
23
  # Set to cuda to load on GPU
24
  device = "cpu"
25
- model_path = hf_hub_download(repo_id="Andy1621/uniformer_light", filename="uniformer_xxs16_160_k400.pth")
 
26
  # Pick a pretrained model
27
- model = uniformer_xxs_video()
28
- state_dict = torch.load(model_path, map_location='cpu')
29
- model.load_state_dict(state_dict)
30
-
31
  # Set to eval mode and move to desired device
32
- model = model.to(device)
33
- model = model.eval()
34
 
35
  # Create an id to label name mapping
36
  kinetics_id_to_classname = {}
37
  for k, v in kinetics_classnames.items():
38
  kinetics_id_to_classname[k] = v
 
 
 
39
 
40
 
41
  def get_index(num_frames, num_segments=8):
@@ -74,7 +80,7 @@ def load_video(video_path):
74
  return torch_imgs
75
 
76
 
77
- def inference(video):
78
  vid = load_video(video)
79
 
80
  # The model expects inputs of shape: B x C x H x W
@@ -82,7 +88,7 @@ def inference(video):
82
  inputs = vid.reshape(1, TC//3, 3, H, W).permute(0, 2, 1, 3, 4)
83
 
84
  with torch.no_grad():
85
- prediction = model(inputs)
86
  prediction = F.softmax(prediction, dim=1).flatten()
87
 
88
  return {kinetics_id_to_classname[str(i)]: float(prediction[i]) for i in range(400)}
@@ -92,6 +98,32 @@ def set_example_video(example: list) -> dict:
92
  return gr.Video.update(value=example[0])
93
 
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  demo = gr.Blocks()
96
  with demo:
97
  gr.Markdown(
@@ -101,17 +133,31 @@ with demo:
101
  """
102
  )
103
 
104
- with gr.Box():
105
- with gr.Row():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  with gr.Column():
107
  with gr.Row():
108
- input_video = gr.Video(label='Input Video')
109
  with gr.Row():
110
- submit_button = gr.Button('Submit')
111
  with gr.Column():
112
  label = gr.Label(num_top_classes=5)
113
- with gr.Row():
114
- example_videos = gr.Dataset(components=[input_video], samples=[['./videos/hitting_baseball.mp4'], ['./videos/hoverboarding.mp4'], ['./videos/yoga.mp4']])
115
 
116
  gr.Markdown(
117
  """
@@ -119,7 +165,9 @@ with demo:
119
  """
120
  )
121
 
122
- submit_button.click(fn=inference, inputs=input_video, outputs=label)
123
  example_videos.click(fn=set_example_video, inputs=example_videos, outputs=example_videos.components)
 
 
124
 
125
  demo.launch(enable_queue=True)
 
9
  from decord import VideoReader
10
  from decord import cpu
11
  from uniformer_light_video import uniformer_xxs_video
12
+ from uniformer_light_image import uniformer_xxs_image
13
  from kinetics_class_index import kinetics_classnames
14
+ from imagenet_class_index import imagenet_classnames
15
  from transforms import (
16
  GroupNormalize, GroupScale, GroupCenterCrop,
17
  Stack, ToTorchFormatTensor
 
24
  # Device on which to run the model
25
  # Set to cuda to load on GPU
26
  device = "cpu"
27
+ model_video_path = hf_hub_download(repo_id="Andy1621/uniformer_light", filename="uniformer_xxs16_160_k400.pth")
28
+ model_image_path = hf_hub_download(repo_id="Andy1621/uniformer_light", filename="uniformer_xxs_160_in1k.pth")
29
  # Pick a pretrained model
30
+ model_video = uniformer_xxs_video()
31
+ model_video.load_state_dict(torch.load(model_video_path, map_location='cpu'))
32
+ model_image = uniformer_xxs_image()
33
+ model_image.load_state_dict(torch.load(model_image_path, map_location='cpu'))
34
  # Set to eval mode and move to desired device
35
+ model_video = model_video.to(device).eval()
36
+ model_image = model_image.to(device).eval()
37
 
38
  # Create an id to label name mapping
39
  kinetics_id_to_classname = {}
40
  for k, v in kinetics_classnames.items():
41
  kinetics_id_to_classname[k] = v
42
+ imagenet_id_to_classname = {}
43
+ for k, v in imagenet_classnames.items():
44
+ imagenet_id_to_classname[k] = v[1]
45
 
46
 
47
  def get_index(num_frames, num_segments=8):
 
80
  return torch_imgs
81
 
82
 
83
+ def inference_video(video):
84
  vid = load_video(video)
85
 
86
  # The model expects inputs of shape: B x C x H x W
 
88
  inputs = vid.reshape(1, TC//3, 3, H, W).permute(0, 2, 1, 3, 4)
89
 
90
  with torch.no_grad():
91
+ prediction = model_video(inputs)
92
  prediction = F.softmax(prediction, dim=1).flatten()
93
 
94
  return {kinetics_id_to_classname[str(i)]: float(prediction[i]) for i in range(400)}
 
98
  return gr.Video.update(value=example[0])
99
 
100
 
101
+ def inference_image(img):
102
+ image = img
103
+ image_transform = T.Compose(
104
+ [
105
+ T.Resize(224),
106
+ T.CenterCrop(224),
107
+ T.ToTensor(),
108
+ T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
109
+ ]
110
+ )
111
+ image = image_transform(image)
112
+
113
+ # The model expects inputs of shape: B x C x H x W
114
+ image = image.unsqueeze(0)
115
+
116
+ with torch.no_grad():
117
+ prediction = model_image(image)
118
+ prediction = F.softmax(prediction, dim=1).flatten()
119
+
120
+ return {imagenet_id_to_classname[str(i)]: float(prediction[i]) for i in range(1000)}
121
+
122
+
123
+ def set_example_image(example: list) -> dict:
124
+ return gr.Image.update(value=example[0])
125
+
126
+
127
  demo = gr.Blocks()
128
  with demo:
129
  gr.Markdown(
 
133
  """
134
  )
135
 
136
+ with gr.Tab("Video"):
137
+ with gr.Box():
138
+ with gr.Row():
139
+ with gr.Column():
140
+ with gr.Row():
141
+ input_video = gr.Video(label='Input Video').style(height=360)
142
+ with gr.Row():
143
+ submit_video_button = gr.Button('Submit')
144
+ with gr.Column():
145
+ label = gr.Label(num_top_classes=5)
146
+ with gr.Row():
147
+ example_videos = gr.Dataset(components=[input_video], samples=[['./videos/hitting_baseball.mp4'], ['./videos/hoverboarding.mp4'], ['./videos/yoga.mp4']])
148
+
149
+ with gr.Tab("Image"):
150
+ with gr.Box():
151
+ with gr.Row():
152
  with gr.Column():
153
  with gr.Row():
154
+ input_image = gr.Image(label='Input Image', type='pil').style(height=360)
155
  with gr.Row():
156
+ submit_image_button = gr.Button('Submit')
157
  with gr.Column():
158
  label = gr.Label(num_top_classes=5)
159
+ with gr.Row():
160
+ example_images = gr.Dataset(components=[input_image], samples=[['./images/cat.png'], ['./images/dog.png'], ['./images/panda.png']])
161
 
162
  gr.Markdown(
163
  """
 
165
  """
166
  )
167
 
168
+ submit_video_button.click(fn=inference_video, inputs=input_video, outputs=label)
169
  example_videos.click(fn=set_example_video, inputs=example_videos, outputs=example_videos.components)
170
+ submit_image_button.click(fn=inference_image, inputs=input_image, outputs=label)
171
+ example_images.click(fn=set_example_image, inputs=example_images, outputs=example_images.components)
172
 
173
  demo.launch(enable_queue=True)
imagenet_class_index.py ADDED
@@ -0,0 +1,1002 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ imagenet_classnames = {
2
+ "0": ["n01440764", "tench"],
3
+ "1": ["n01443537", "goldfish"],
4
+ "2": ["n01484850", "great_white_shark"],
5
+ "3": ["n01491361", "tiger_shark"],
6
+ "4": ["n01494475", "hammerhead"],
7
+ "5": ["n01496331", "electric_ray"],
8
+ "6": ["n01498041", "stingray"],
9
+ "7": ["n01514668", "cock"],
10
+ "8": ["n01514859", "hen"],
11
+ "9": ["n01518878", "ostrich"],
12
+ "10": ["n01530575", "brambling"],
13
+ "11": ["n01531178", "goldfinch"],
14
+ "12": ["n01532829", "house_finch"],
15
+ "13": ["n01534433", "junco"],
16
+ "14": ["n01537544", "indigo_bunting"],
17
+ "15": ["n01558993", "robin"],
18
+ "16": ["n01560419", "bulbul"],
19
+ "17": ["n01580077", "jay"],
20
+ "18": ["n01582220", "magpie"],
21
+ "19": ["n01592084", "chickadee"],
22
+ "20": ["n01601694", "water_ouzel"],
23
+ "21": ["n01608432", "kite"],
24
+ "22": ["n01614925", "bald_eagle"],
25
+ "23": ["n01616318", "vulture"],
26
+ "24": ["n01622779", "great_grey_owl"],
27
+ "25": ["n01629819", "European_fire_salamander"],
28
+ "26": ["n01630670", "common_newt"],
29
+ "27": ["n01631663", "eft"],
30
+ "28": ["n01632458", "spotted_salamander"],
31
+ "29": ["n01632777", "axolotl"],
32
+ "30": ["n01641577", "bullfrog"],
33
+ "31": ["n01644373", "tree_frog"],
34
+ "32": ["n01644900", "tailed_frog"],
35
+ "33": ["n01664065", "loggerhead"],
36
+ "34": ["n01665541", "leatherback_turtle"],
37
+ "35": ["n01667114", "mud_turtle"],
38
+ "36": ["n01667778", "terrapin"],
39
+ "37": ["n01669191", "box_turtle"],
40
+ "38": ["n01675722", "banded_gecko"],
41
+ "39": ["n01677366", "common_iguana"],
42
+ "40": ["n01682714", "American_chameleon"],
43
+ "41": ["n01685808", "whiptail"],
44
+ "42": ["n01687978", "agama"],
45
+ "43": ["n01688243", "frilled_lizard"],
46
+ "44": ["n01689811", "alligator_lizard"],
47
+ "45": ["n01692333", "Gila_monster"],
48
+ "46": ["n01693334", "green_lizard"],
49
+ "47": ["n01694178", "African_chameleon"],
50
+ "48": ["n01695060", "Komodo_dragon"],
51
+ "49": ["n01697457", "African_crocodile"],
52
+ "50": ["n01698640", "American_alligator"],
53
+ "51": ["n01704323", "triceratops"],
54
+ "52": ["n01728572", "thunder_snake"],
55
+ "53": ["n01728920", "ringneck_snake"],
56
+ "54": ["n01729322", "hognose_snake"],
57
+ "55": ["n01729977", "green_snake"],
58
+ "56": ["n01734418", "king_snake"],
59
+ "57": ["n01735189", "garter_snake"],
60
+ "58": ["n01737021", "water_snake"],
61
+ "59": ["n01739381", "vine_snake"],
62
+ "60": ["n01740131", "night_snake"],
63
+ "61": ["n01742172", "boa_constrictor"],
64
+ "62": ["n01744401", "rock_python"],
65
+ "63": ["n01748264", "Indian_cobra"],
66
+ "64": ["n01749939", "green_mamba"],
67
+ "65": ["n01751748", "sea_snake"],
68
+ "66": ["n01753488", "horned_viper"],
69
+ "67": ["n01755581", "diamondback"],
70
+ "68": ["n01756291", "sidewinder"],
71
+ "69": ["n01768244", "trilobite"],
72
+ "70": ["n01770081", "harvestman"],
73
+ "71": ["n01770393", "scorpion"],
74
+ "72": ["n01773157", "black_and_gold_garden_spider"],
75
+ "73": ["n01773549", "barn_spider"],
76
+ "74": ["n01773797", "garden_spider"],
77
+ "75": ["n01774384", "black_widow"],
78
+ "76": ["n01774750", "tarantula"],
79
+ "77": ["n01775062", "wolf_spider"],
80
+ "78": ["n01776313", "tick"],
81
+ "79": ["n01784675", "centipede"],
82
+ "80": ["n01795545", "black_grouse"],
83
+ "81": ["n01796340", "ptarmigan"],
84
+ "82": ["n01797886", "ruffed_grouse"],
85
+ "83": ["n01798484", "prairie_chicken"],
86
+ "84": ["n01806143", "peacock"],
87
+ "85": ["n01806567", "quail"],
88
+ "86": ["n01807496", "partridge"],
89
+ "87": ["n01817953", "African_grey"],
90
+ "88": ["n01818515", "macaw"],
91
+ "89": ["n01819313", "sulphur-crested_cockatoo"],
92
+ "90": ["n01820546", "lorikeet"],
93
+ "91": ["n01824575", "coucal"],
94
+ "92": ["n01828970", "bee_eater"],
95
+ "93": ["n01829413", "hornbill"],
96
+ "94": ["n01833805", "hummingbird"],
97
+ "95": ["n01843065", "jacamar"],
98
+ "96": ["n01843383", "toucan"],
99
+ "97": ["n01847000", "drake"],
100
+ "98": ["n01855032", "red-breasted_merganser"],
101
+ "99": ["n01855672", "goose"],
102
+ "100": ["n01860187", "black_swan"],
103
+ "101": ["n01871265", "tusker"],
104
+ "102": ["n01872401", "echidna"],
105
+ "103": ["n01873310", "platypus"],
106
+ "104": ["n01877812", "wallaby"],
107
+ "105": ["n01882714", "koala"],
108
+ "106": ["n01883070", "wombat"],
109
+ "107": ["n01910747", "jellyfish"],
110
+ "108": ["n01914609", "sea_anemone"],
111
+ "109": ["n01917289", "brain_coral"],
112
+ "110": ["n01924916", "flatworm"],
113
+ "111": ["n01930112", "nematode"],
114
+ "112": ["n01943899", "conch"],
115
+ "113": ["n01944390", "snail"],
116
+ "114": ["n01945685", "slug"],
117
+ "115": ["n01950731", "sea_slug"],
118
+ "116": ["n01955084", "chiton"],
119
+ "117": ["n01968897", "chambered_nautilus"],
120
+ "118": ["n01978287", "Dungeness_crab"],
121
+ "119": ["n01978455", "rock_crab"],
122
+ "120": ["n01980166", "fiddler_crab"],
123
+ "121": ["n01981276", "king_crab"],
124
+ "122": ["n01983481", "American_lobster"],
125
+ "123": ["n01984695", "spiny_lobster"],
126
+ "124": ["n01985128", "crayfish"],
127
+ "125": ["n01986214", "hermit_crab"],
128
+ "126": ["n01990800", "isopod"],
129
+ "127": ["n02002556", "white_stork"],
130
+ "128": ["n02002724", "black_stork"],
131
+ "129": ["n02006656", "spoonbill"],
132
+ "130": ["n02007558", "flamingo"],
133
+ "131": ["n02009229", "little_blue_heron"],
134
+ "132": ["n02009912", "American_egret"],
135
+ "133": ["n02011460", "bittern"],
136
+ "134": ["n02012849", "crane"],
137
+ "135": ["n02013706", "limpkin"],
138
+ "136": ["n02017213", "European_gallinule"],
139
+ "137": ["n02018207", "American_coot"],
140
+ "138": ["n02018795", "bustard"],
141
+ "139": ["n02025239", "ruddy_turnstone"],
142
+ "140": ["n02027492", "red-backed_sandpiper"],
143
+ "141": ["n02028035", "redshank"],
144
+ "142": ["n02033041", "dowitcher"],
145
+ "143": ["n02037110", "oystercatcher"],
146
+ "144": ["n02051845", "pelican"],
147
+ "145": ["n02056570", "king_penguin"],
148
+ "146": ["n02058221", "albatross"],
149
+ "147": ["n02066245", "grey_whale"],
150
+ "148": ["n02071294", "killer_whale"],
151
+ "149": ["n02074367", "dugong"],
152
+ "150": ["n02077923", "sea_lion"],
153
+ "151": ["n02085620", "Chihuahua"],
154
+ "152": ["n02085782", "Japanese_spaniel"],
155
+ "153": ["n02085936", "Maltese_dog"],
156
+ "154": ["n02086079", "Pekinese"],
157
+ "155": ["n02086240", "Shih-Tzu"],
158
+ "156": ["n02086646", "Blenheim_spaniel"],
159
+ "157": ["n02086910", "papillon"],
160
+ "158": ["n02087046", "toy_terrier"],
161
+ "159": ["n02087394", "Rhodesian_ridgeback"],
162
+ "160": ["n02088094", "Afghan_hound"],
163
+ "161": ["n02088238", "basset"],
164
+ "162": ["n02088364", "beagle"],
165
+ "163": ["n02088466", "bloodhound"],
166
+ "164": ["n02088632", "bluetick"],
167
+ "165": ["n02089078", "black-and-tan_coonhound"],
168
+ "166": ["n02089867", "Walker_hound"],
169
+ "167": ["n02089973", "English_foxhound"],
170
+ "168": ["n02090379", "redbone"],
171
+ "169": ["n02090622", "borzoi"],
172
+ "170": ["n02090721", "Irish_wolfhound"],
173
+ "171": ["n02091032", "Italian_greyhound"],
174
+ "172": ["n02091134", "whippet"],
175
+ "173": ["n02091244", "Ibizan_hound"],
176
+ "174": ["n02091467", "Norwegian_elkhound"],
177
+ "175": ["n02091635", "otterhound"],
178
+ "176": ["n02091831", "Saluki"],
179
+ "177": ["n02092002", "Scottish_deerhound"],
180
+ "178": ["n02092339", "Weimaraner"],
181
+ "179": ["n02093256", "Staffordshire_bullterrier"],
182
+ "180": ["n02093428", "American_Staffordshire_terrier"],
183
+ "181": ["n02093647", "Bedlington_terrier"],
184
+ "182": ["n02093754", "Border_terrier"],
185
+ "183": ["n02093859", "Kerry_blue_terrier"],
186
+ "184": ["n02093991", "Irish_terrier"],
187
+ "185": ["n02094114", "Norfolk_terrier"],
188
+ "186": ["n02094258", "Norwich_terrier"],
189
+ "187": ["n02094433", "Yorkshire_terrier"],
190
+ "188": ["n02095314", "wire-haired_fox_terrier"],
191
+ "189": ["n02095570", "Lakeland_terrier"],
192
+ "190": ["n02095889", "Sealyham_terrier"],
193
+ "191": ["n02096051", "Airedale"],
194
+ "192": ["n02096177", "cairn"],
195
+ "193": ["n02096294", "Australian_terrier"],
196
+ "194": ["n02096437", "Dandie_Dinmont"],
197
+ "195": ["n02096585", "Boston_bull"],
198
+ "196": ["n02097047", "miniature_schnauzer"],
199
+ "197": ["n02097130", "giant_schnauzer"],
200
+ "198": ["n02097209", "standard_schnauzer"],
201
+ "199": ["n02097298", "Scotch_terrier"],
202
+ "200": ["n02097474", "Tibetan_terrier"],
203
+ "201": ["n02097658", "silky_terrier"],
204
+ "202": ["n02098105", "soft-coated_wheaten_terrier"],
205
+ "203": ["n02098286", "West_Highland_white_terrier"],
206
+ "204": ["n02098413", "Lhasa"],
207
+ "205": ["n02099267", "flat-coated_retriever"],
208
+ "206": ["n02099429", "curly-coated_retriever"],
209
+ "207": ["n02099601", "golden_retriever"],
210
+ "208": ["n02099712", "Labrador_retriever"],
211
+ "209": ["n02099849", "Chesapeake_Bay_retriever"],
212
+ "210": ["n02100236", "German_short-haired_pointer"],
213
+ "211": ["n02100583", "vizsla"],
214
+ "212": ["n02100735", "English_setter"],
215
+ "213": ["n02100877", "Irish_setter"],
216
+ "214": ["n02101006", "Gordon_setter"],
217
+ "215": ["n02101388", "Brittany_spaniel"],
218
+ "216": ["n02101556", "clumber"],
219
+ "217": ["n02102040", "English_springer"],
220
+ "218": ["n02102177", "Welsh_springer_spaniel"],
221
+ "219": ["n02102318", "cocker_spaniel"],
222
+ "220": ["n02102480", "Sussex_spaniel"],
223
+ "221": ["n02102973", "Irish_water_spaniel"],
224
+ "222": ["n02104029", "kuvasz"],
225
+ "223": ["n02104365", "schipperke"],
226
+ "224": ["n02105056", "groenendael"],
227
+ "225": ["n02105162", "malinois"],
228
+ "226": ["n02105251", "briard"],
229
+ "227": ["n02105412", "kelpie"],
230
+ "228": ["n02105505", "komondor"],
231
+ "229": ["n02105641", "Old_English_sheepdog"],
232
+ "230": ["n02105855", "Shetland_sheepdog"],
233
+ "231": ["n02106030", "collie"],
234
+ "232": ["n02106166", "Border_collie"],
235
+ "233": ["n02106382", "Bouvier_des_Flandres"],
236
+ "234": ["n02106550", "Rottweiler"],
237
+ "235": ["n02106662", "German_shepherd"],
238
+ "236": ["n02107142", "Doberman"],
239
+ "237": ["n02107312", "miniature_pinscher"],
240
+ "238": ["n02107574", "Greater_Swiss_Mountain_dog"],
241
+ "239": ["n02107683", "Bernese_mountain_dog"],
242
+ "240": ["n02107908", "Appenzeller"],
243
+ "241": ["n02108000", "EntleBucher"],
244
+ "242": ["n02108089", "boxer"],
245
+ "243": ["n02108422", "bull_mastiff"],
246
+ "244": ["n02108551", "Tibetan_mastiff"],
247
+ "245": ["n02108915", "French_bulldog"],
248
+ "246": ["n02109047", "Great_Dane"],
249
+ "247": ["n02109525", "Saint_Bernard"],
250
+ "248": ["n02109961", "Eskimo_dog"],
251
+ "249": ["n02110063", "malamute"],
252
+ "250": ["n02110185", "Siberian_husky"],
253
+ "251": ["n02110341", "dalmatian"],
254
+ "252": ["n02110627", "affenpinscher"],
255
+ "253": ["n02110806", "basenji"],
256
+ "254": ["n02110958", "pug"],
257
+ "255": ["n02111129", "Leonberg"],
258
+ "256": ["n02111277", "Newfoundland"],
259
+ "257": ["n02111500", "Great_Pyrenees"],
260
+ "258": ["n02111889", "Samoyed"],
261
+ "259": ["n02112018", "Pomeranian"],
262
+ "260": ["n02112137", "chow"],
263
+ "261": ["n02112350", "keeshond"],
264
+ "262": ["n02112706", "Brabancon_griffon"],
265
+ "263": ["n02113023", "Pembroke"],
266
+ "264": ["n02113186", "Cardigan"],
267
+ "265": ["n02113624", "toy_poodle"],
268
+ "266": ["n02113712", "miniature_poodle"],
269
+ "267": ["n02113799", "standard_poodle"],
270
+ "268": ["n02113978", "Mexican_hairless"],
271
+ "269": ["n02114367", "timber_wolf"],
272
+ "270": ["n02114548", "white_wolf"],
273
+ "271": ["n02114712", "red_wolf"],
274
+ "272": ["n02114855", "coyote"],
275
+ "273": ["n02115641", "dingo"],
276
+ "274": ["n02115913", "dhole"],
277
+ "275": ["n02116738", "African_hunting_dog"],
278
+ "276": ["n02117135", "hyena"],
279
+ "277": ["n02119022", "red_fox"],
280
+ "278": ["n02119789", "kit_fox"],
281
+ "279": ["n02120079", "Arctic_fox"],
282
+ "280": ["n02120505", "grey_fox"],
283
+ "281": ["n02123045", "tabby"],
284
+ "282": ["n02123159", "tiger_cat"],
285
+ "283": ["n02123394", "Persian_cat"],
286
+ "284": ["n02123597", "Siamese_cat"],
287
+ "285": ["n02124075", "Egyptian_cat"],
288
+ "286": ["n02125311", "cougar"],
289
+ "287": ["n02127052", "lynx"],
290
+ "288": ["n02128385", "leopard"],
291
+ "289": ["n02128757", "snow_leopard"],
292
+ "290": ["n02128925", "jaguar"],
293
+ "291": ["n02129165", "lion"],
294
+ "292": ["n02129604", "tiger"],
295
+ "293": ["n02130308", "cheetah"],
296
+ "294": ["n02132136", "brown_bear"],
297
+ "295": ["n02133161", "American_black_bear"],
298
+ "296": ["n02134084", "ice_bear"],
299
+ "297": ["n02134418", "sloth_bear"],
300
+ "298": ["n02137549", "mongoose"],
301
+ "299": ["n02138441", "meerkat"],
302
+ "300": ["n02165105", "tiger_beetle"],
303
+ "301": ["n02165456", "ladybug"],
304
+ "302": ["n02167151", "ground_beetle"],
305
+ "303": ["n02168699", "long-horned_beetle"],
306
+ "304": ["n02169497", "leaf_beetle"],
307
+ "305": ["n02172182", "dung_beetle"],
308
+ "306": ["n02174001", "rhinoceros_beetle"],
309
+ "307": ["n02177972", "weevil"],
310
+ "308": ["n02190166", "fly"],
311
+ "309": ["n02206856", "bee"],
312
+ "310": ["n02219486", "ant"],
313
+ "311": ["n02226429", "grasshopper"],
314
+ "312": ["n02229544", "cricket"],
315
+ "313": ["n02231487", "walking_stick"],
316
+ "314": ["n02233338", "cockroach"],
317
+ "315": ["n02236044", "mantis"],
318
+ "316": ["n02256656", "cicada"],
319
+ "317": ["n02259212", "leafhopper"],
320
+ "318": ["n02264363", "lacewing"],
321
+ "319": ["n02268443", "dragonfly"],
322
+ "320": ["n02268853", "damselfly"],
323
+ "321": ["n02276258", "admiral"],
324
+ "322": ["n02277742", "ringlet"],
325
+ "323": ["n02279972", "monarch"],
326
+ "324": ["n02280649", "cabbage_butterfly"],
327
+ "325": ["n02281406", "sulphur_butterfly"],
328
+ "326": ["n02281787", "lycaenid"],
329
+ "327": ["n02317335", "starfish"],
330
+ "328": ["n02319095", "sea_urchin"],
331
+ "329": ["n02321529", "sea_cucumber"],
332
+ "330": ["n02325366", "wood_rabbit"],
333
+ "331": ["n02326432", "hare"],
334
+ "332": ["n02328150", "Angora"],
335
+ "333": ["n02342885", "hamster"],
336
+ "334": ["n02346627", "porcupine"],
337
+ "335": ["n02356798", "fox_squirrel"],
338
+ "336": ["n02361337", "marmot"],
339
+ "337": ["n02363005", "beaver"],
340
+ "338": ["n02364673", "guinea_pig"],
341
+ "339": ["n02389026", "sorrel"],
342
+ "340": ["n02391049", "zebra"],
343
+ "341": ["n02395406", "hog"],
344
+ "342": ["n02396427", "wild_boar"],
345
+ "343": ["n02397096", "warthog"],
346
+ "344": ["n02398521", "hippopotamus"],
347
+ "345": ["n02403003", "ox"],
348
+ "346": ["n02408429", "water_buffalo"],
349
+ "347": ["n02410509", "bison"],
350
+ "348": ["n02412080", "ram"],
351
+ "349": ["n02415577", "bighorn"],
352
+ "350": ["n02417914", "ibex"],
353
+ "351": ["n02422106", "hartebeest"],
354
+ "352": ["n02422699", "impala"],
355
+ "353": ["n02423022", "gazelle"],
356
+ "354": ["n02437312", "Arabian_camel"],
357
+ "355": ["n02437616", "llama"],
358
+ "356": ["n02441942", "weasel"],
359
+ "357": ["n02442845", "mink"],
360
+ "358": ["n02443114", "polecat"],
361
+ "359": ["n02443484", "black-footed_ferret"],
362
+ "360": ["n02444819", "otter"],
363
+ "361": ["n02445715", "skunk"],
364
+ "362": ["n02447366", "badger"],
365
+ "363": ["n02454379", "armadillo"],
366
+ "364": ["n02457408", "three-toed_sloth"],
367
+ "365": ["n02480495", "orangutan"],
368
+ "366": ["n02480855", "gorilla"],
369
+ "367": ["n02481823", "chimpanzee"],
370
+ "368": ["n02483362", "gibbon"],
371
+ "369": ["n02483708", "siamang"],
372
+ "370": ["n02484975", "guenon"],
373
+ "371": ["n02486261", "patas"],
374
+ "372": ["n02486410", "baboon"],
375
+ "373": ["n02487347", "macaque"],
376
+ "374": ["n02488291", "langur"],
377
+ "375": ["n02488702", "colobus"],
378
+ "376": ["n02489166", "proboscis_monkey"],
379
+ "377": ["n02490219", "marmoset"],
380
+ "378": ["n02492035", "capuchin"],
381
+ "379": ["n02492660", "howler_monkey"],
382
+ "380": ["n02493509", "titi"],
383
+ "381": ["n02493793", "spider_monkey"],
384
+ "382": ["n02494079", "squirrel_monkey"],
385
+ "383": ["n02497673", "Madagascar_cat"],
386
+ "384": ["n02500267", "indri"],
387
+ "385": ["n02504013", "Indian_elephant"],
388
+ "386": ["n02504458", "African_elephant"],
389
+ "387": ["n02509815", "lesser_panda"],
390
+ "388": ["n02510455", "giant_panda"],
391
+ "389": ["n02514041", "barracouta"],
392
+ "390": ["n02526121", "eel"],
393
+ "391": ["n02536864", "coho"],
394
+ "392": ["n02606052", "rock_beauty"],
395
+ "393": ["n02607072", "anemone_fish"],
396
+ "394": ["n02640242", "sturgeon"],
397
+ "395": ["n02641379", "gar"],
398
+ "396": ["n02643566", "lionfish"],
399
+ "397": ["n02655020", "puffer"],
400
+ "398": ["n02666196", "abacus"],
401
+ "399": ["n02667093", "abaya"],
402
+ "400": ["n02669723", "academic_gown"],
403
+ "401": ["n02672831", "accordion"],
404
+ "402": ["n02676566", "acoustic_guitar"],
405
+ "403": ["n02687172", "aircraft_carrier"],
406
+ "404": ["n02690373", "airliner"],
407
+ "405": ["n02692877", "airship"],
408
+ "406": ["n02699494", "altar"],
409
+ "407": ["n02701002", "ambulance"],
410
+ "408": ["n02704792", "amphibian"],
411
+ "409": ["n02708093", "analog_clock"],
412
+ "410": ["n02727426", "apiary"],
413
+ "411": ["n02730930", "apron"],
414
+ "412": ["n02747177", "ashcan"],
415
+ "413": ["n02749479", "assault_rifle"],
416
+ "414": ["n02769748", "backpack"],
417
+ "415": ["n02776631", "bakery"],
418
+ "416": ["n02777292", "balance_beam"],
419
+ "417": ["n02782093", "balloon"],
420
+ "418": ["n02783161", "ballpoint"],
421
+ "419": ["n02786058", "Band_Aid"],
422
+ "420": ["n02787622", "banjo"],
423
+ "421": ["n02788148", "bannister"],
424
+ "422": ["n02790996", "barbell"],
425
+ "423": ["n02791124", "barber_chair"],
426
+ "424": ["n02791270", "barbershop"],
427
+ "425": ["n02793495", "barn"],
428
+ "426": ["n02794156", "barometer"],
429
+ "427": ["n02795169", "barrel"],
430
+ "428": ["n02797295", "barrow"],
431
+ "429": ["n02799071", "baseball"],
432
+ "430": ["n02802426", "basketball"],
433
+ "431": ["n02804414", "bassinet"],
434
+ "432": ["n02804610", "bassoon"],
435
+ "433": ["n02807133", "bathing_cap"],
436
+ "434": ["n02808304", "bath_towel"],
437
+ "435": ["n02808440", "bathtub"],
438
+ "436": ["n02814533", "beach_wagon"],
439
+ "437": ["n02814860", "beacon"],
440
+ "438": ["n02815834", "beaker"],
441
+ "439": ["n02817516", "bearskin"],
442
+ "440": ["n02823428", "beer_bottle"],
443
+ "441": ["n02823750", "beer_glass"],
444
+ "442": ["n02825657", "bell_cote"],
445
+ "443": ["n02834397", "bib"],
446
+ "444": ["n02835271", "bicycle-built-for-two"],
447
+ "445": ["n02837789", "bikini"],
448
+ "446": ["n02840245", "binder"],
449
+ "447": ["n02841315", "binoculars"],
450
+ "448": ["n02843684", "birdhouse"],
451
+ "449": ["n02859443", "boathouse"],
452
+ "450": ["n02860847", "bobsled"],
453
+ "451": ["n02865351", "bolo_tie"],
454
+ "452": ["n02869837", "bonnet"],
455
+ "453": ["n02870880", "bookcase"],
456
+ "454": ["n02871525", "bookshop"],
457
+ "455": ["n02877765", "bottlecap"],
458
+ "456": ["n02879718", "bow"],
459
+ "457": ["n02883205", "bow_tie"],
460
+ "458": ["n02892201", "brass"],
461
+ "459": ["n02892767", "brassiere"],
462
+ "460": ["n02894605", "breakwater"],
463
+ "461": ["n02895154", "breastplate"],
464
+ "462": ["n02906734", "broom"],
465
+ "463": ["n02909870", "bucket"],
466
+ "464": ["n02910353", "buckle"],
467
+ "465": ["n02916936", "bulletproof_vest"],
468
+ "466": ["n02917067", "bullet_train"],
469
+ "467": ["n02927161", "butcher_shop"],
470
+ "468": ["n02930766", "cab"],
471
+ "469": ["n02939185", "caldron"],
472
+ "470": ["n02948072", "candle"],
473
+ "471": ["n02950826", "cannon"],
474
+ "472": ["n02951358", "canoe"],
475
+ "473": ["n02951585", "can_opener"],
476
+ "474": ["n02963159", "cardigan"],
477
+ "475": ["n02965783", "car_mirror"],
478
+ "476": ["n02966193", "carousel"],
479
+ "477": ["n02966687", "carpenter's_kit"],
480
+ "478": ["n02971356", "carton"],
481
+ "479": ["n02974003", "car_wheel"],
482
+ "480": ["n02977058", "cash_machine"],
483
+ "481": ["n02978881", "cassette"],
484
+ "482": ["n02979186", "cassette_player"],
485
+ "483": ["n02980441", "castle"],
486
+ "484": ["n02981792", "catamaran"],
487
+ "485": ["n02988304", "CD_player"],
488
+ "486": ["n02992211", "cello"],
489
+ "487": ["n02992529", "cellular_telephone"],
490
+ "488": ["n02999410", "chain"],
491
+ "489": ["n03000134", "chainlink_fence"],
492
+ "490": ["n03000247", "chain_mail"],
493
+ "491": ["n03000684", "chain_saw"],
494
+ "492": ["n03014705", "chest"],
495
+ "493": ["n03016953", "chiffonier"],
496
+ "494": ["n03017168", "chime"],
497
+ "495": ["n03018349", "china_cabinet"],
498
+ "496": ["n03026506", "Christmas_stocking"],
499
+ "497": ["n03028079", "church"],
500
+ "498": ["n03032252", "cinema"],
501
+ "499": ["n03041632", "cleaver"],
502
+ "500": ["n03042490", "cliff_dwelling"],
503
+ "501": ["n03045698", "cloak"],
504
+ "502": ["n03047690", "clog"],
505
+ "503": ["n03062245", "cocktail_shaker"],
506
+ "504": ["n03063599", "coffee_mug"],
507
+ "505": ["n03063689", "coffeepot"],
508
+ "506": ["n03065424", "coil"],
509
+ "507": ["n03075370", "combination_lock"],
510
+ "508": ["n03085013", "computer_keyboard"],
511
+ "509": ["n03089624", "confectionery"],
512
+ "510": ["n03095699", "container_ship"],
513
+ "511": ["n03100240", "convertible"],
514
+ "512": ["n03109150", "corkscrew"],
515
+ "513": ["n03110669", "cornet"],
516
+ "514": ["n03124043", "cowboy_boot"],
517
+ "515": ["n03124170", "cowboy_hat"],
518
+ "516": ["n03125729", "cradle"],
519
+ "517": ["n03126707", "crane"],
520
+ "518": ["n03127747", "crash_helmet"],
521
+ "519": ["n03127925", "crate"],
522
+ "520": ["n03131574", "crib"],
523
+ "521": ["n03133878", "Crock_Pot"],
524
+ "522": ["n03134739", "croquet_ball"],
525
+ "523": ["n03141823", "crutch"],
526
+ "524": ["n03146219", "cuirass"],
527
+ "525": ["n03160309", "dam"],
528
+ "526": ["n03179701", "desk"],
529
+ "527": ["n03180011", "desktop_computer"],
530
+ "528": ["n03187595", "dial_telephone"],
531
+ "529": ["n03188531", "diaper"],
532
+ "530": ["n03196217", "digital_clock"],
533
+ "531": ["n03197337", "digital_watch"],
534
+ "532": ["n03201208", "dining_table"],
535
+ "533": ["n03207743", "dishrag"],
536
+ "534": ["n03207941", "dishwasher"],
537
+ "535": ["n03208938", "disk_brake"],
538
+ "536": ["n03216828", "dock"],
539
+ "537": ["n03218198", "dogsled"],
540
+ "538": ["n03220513", "dome"],
541
+ "539": ["n03223299", "doormat"],
542
+ "540": ["n03240683", "drilling_platform"],
543
+ "541": ["n03249569", "drum"],
544
+ "542": ["n03250847", "drumstick"],
545
+ "543": ["n03255030", "dumbbell"],
546
+ "544": ["n03259280", "Dutch_oven"],
547
+ "545": ["n03271574", "electric_fan"],
548
+ "546": ["n03272010", "electric_guitar"],
549
+ "547": ["n03272562", "electric_locomotive"],
550
+ "548": ["n03290653", "entertainment_center"],
551
+ "549": ["n03291819", "envelope"],
552
+ "550": ["n03297495", "espresso_maker"],
553
+ "551": ["n03314780", "face_powder"],
554
+ "552": ["n03325584", "feather_boa"],
555
+ "553": ["n03337140", "file"],
556
+ "554": ["n03344393", "fireboat"],
557
+ "555": ["n03345487", "fire_engine"],
558
+ "556": ["n03347037", "fire_screen"],
559
+ "557": ["n03355925", "flagpole"],
560
+ "558": ["n03372029", "flute"],
561
+ "559": ["n03376595", "folding_chair"],
562
+ "560": ["n03379051", "football_helmet"],
563
+ "561": ["n03384352", "forklift"],
564
+ "562": ["n03388043", "fountain"],
565
+ "563": ["n03388183", "fountain_pen"],
566
+ "564": ["n03388549", "four-poster"],
567
+ "565": ["n03393912", "freight_car"],
568
+ "566": ["n03394916", "French_horn"],
569
+ "567": ["n03400231", "frying_pan"],
570
+ "568": ["n03404251", "fur_coat"],
571
+ "569": ["n03417042", "garbage_truck"],
572
+ "570": ["n03424325", "gasmask"],
573
+ "571": ["n03425413", "gas_pump"],
574
+ "572": ["n03443371", "goblet"],
575
+ "573": ["n03444034", "go-kart"],
576
+ "574": ["n03445777", "golf_ball"],
577
+ "575": ["n03445924", "golfcart"],
578
+ "576": ["n03447447", "gondola"],
579
+ "577": ["n03447721", "gong"],
580
+ "578": ["n03450230", "gown"],
581
+ "579": ["n03452741", "grand_piano"],
582
+ "580": ["n03457902", "greenhouse"],
583
+ "581": ["n03459775", "grille"],
584
+ "582": ["n03461385", "grocery_store"],
585
+ "583": ["n03467068", "guillotine"],
586
+ "584": ["n03476684", "hair_slide"],
587
+ "585": ["n03476991", "hair_spray"],
588
+ "586": ["n03478589", "half_track"],
589
+ "587": ["n03481172", "hammer"],
590
+ "588": ["n03482405", "hamper"],
591
+ "589": ["n03483316", "hand_blower"],
592
+ "590": ["n03485407", "hand-held_computer"],
593
+ "591": ["n03485794", "handkerchief"],
594
+ "592": ["n03492542", "hard_disc"],
595
+ "593": ["n03494278", "harmonica"],
596
+ "594": ["n03495258", "harp"],
597
+ "595": ["n03496892", "harvester"],
598
+ "596": ["n03498962", "hatchet"],
599
+ "597": ["n03527444", "holster"],
600
+ "598": ["n03529860", "home_theater"],
601
+ "599": ["n03530642", "honeycomb"],
602
+ "600": ["n03532672", "hook"],
603
+ "601": ["n03534580", "hoopskirt"],
604
+ "602": ["n03535780", "horizontal_bar"],
605
+ "603": ["n03538406", "horse_cart"],
606
+ "604": ["n03544143", "hourglass"],
607
+ "605": ["n03584254", "iPod"],
608
+ "606": ["n03584829", "iron"],
609
+ "607": ["n03590841", "jack-o'-lantern"],
610
+ "608": ["n03594734", "jean"],
611
+ "609": ["n03594945", "jeep"],
612
+ "610": ["n03595614", "jersey"],
613
+ "611": ["n03598930", "jigsaw_puzzle"],
614
+ "612": ["n03599486", "jinrikisha"],
615
+ "613": ["n03602883", "joystick"],
616
+ "614": ["n03617480", "kimono"],
617
+ "615": ["n03623198", "knee_pad"],
618
+ "616": ["n03627232", "knot"],
619
+ "617": ["n03630383", "lab_coat"],
620
+ "618": ["n03633091", "ladle"],
621
+ "619": ["n03637318", "lampshade"],
622
+ "620": ["n03642806", "laptop"],
623
+ "621": ["n03649909", "lawn_mower"],
624
+ "622": ["n03657121", "lens_cap"],
625
+ "623": ["n03658185", "letter_opener"],
626
+ "624": ["n03661043", "library"],
627
+ "625": ["n03662601", "lifeboat"],
628
+ "626": ["n03666591", "lighter"],
629
+ "627": ["n03670208", "limousine"],
630
+ "628": ["n03673027", "liner"],
631
+ "629": ["n03676483", "lipstick"],
632
+ "630": ["n03680355", "Loafer"],
633
+ "631": ["n03690938", "lotion"],
634
+ "632": ["n03691459", "loudspeaker"],
635
+ "633": ["n03692522", "loupe"],
636
+ "634": ["n03697007", "lumbermill"],
637
+ "635": ["n03706229", "magnetic_compass"],
638
+ "636": ["n03709823", "mailbag"],
639
+ "637": ["n03710193", "mailbox"],
640
+ "638": ["n03710637", "maillot"],
641
+ "639": ["n03710721", "maillot"],
642
+ "640": ["n03717622", "manhole_cover"],
643
+ "641": ["n03720891", "maraca"],
644
+ "642": ["n03721384", "marimba"],
645
+ "643": ["n03724870", "mask"],
646
+ "644": ["n03729826", "matchstick"],
647
+ "645": ["n03733131", "maypole"],
648
+ "646": ["n03733281", "maze"],
649
+ "647": ["n03733805", "measuring_cup"],
650
+ "648": ["n03742115", "medicine_chest"],
651
+ "649": ["n03743016", "megalith"],
652
+ "650": ["n03759954", "microphone"],
653
+ "651": ["n03761084", "microwave"],
654
+ "652": ["n03763968", "military_uniform"],
655
+ "653": ["n03764736", "milk_can"],
656
+ "654": ["n03769881", "minibus"],
657
+ "655": ["n03770439", "miniskirt"],
658
+ "656": ["n03770679", "minivan"],
659
+ "657": ["n03773504", "missile"],
660
+ "658": ["n03775071", "mitten"],
661
+ "659": ["n03775546", "mixing_bowl"],
662
+ "660": ["n03776460", "mobile_home"],
663
+ "661": ["n03777568", "Model_T"],
664
+ "662": ["n03777754", "modem"],
665
+ "663": ["n03781244", "monastery"],
666
+ "664": ["n03782006", "monitor"],
667
+ "665": ["n03785016", "moped"],
668
+ "666": ["n03786901", "mortar"],
669
+ "667": ["n03787032", "mortarboard"],
670
+ "668": ["n03788195", "mosque"],
671
+ "669": ["n03788365", "mosquito_net"],
672
+ "670": ["n03791053", "motor_scooter"],
673
+ "671": ["n03792782", "mountain_bike"],
674
+ "672": ["n03792972", "mountain_tent"],
675
+ "673": ["n03793489", "mouse"],
676
+ "674": ["n03794056", "mousetrap"],
677
+ "675": ["n03796401", "moving_van"],
678
+ "676": ["n03803284", "muzzle"],
679
+ "677": ["n03804744", "nail"],
680
+ "678": ["n03814639", "neck_brace"],
681
+ "679": ["n03814906", "necklace"],
682
+ "680": ["n03825788", "nipple"],
683
+ "681": ["n03832673", "notebook"],
684
+ "682": ["n03837869", "obelisk"],
685
+ "683": ["n03838899", "oboe"],
686
+ "684": ["n03840681", "ocarina"],
687
+ "685": ["n03841143", "odometer"],
688
+ "686": ["n03843555", "oil_filter"],
689
+ "687": ["n03854065", "organ"],
690
+ "688": ["n03857828", "oscilloscope"],
691
+ "689": ["n03866082", "overskirt"],
692
+ "690": ["n03868242", "oxcart"],
693
+ "691": ["n03868863", "oxygen_mask"],
694
+ "692": ["n03871628", "packet"],
695
+ "693": ["n03873416", "paddle"],
696
+ "694": ["n03874293", "paddlewheel"],
697
+ "695": ["n03874599", "padlock"],
698
+ "696": ["n03876231", "paintbrush"],
699
+ "697": ["n03877472", "pajama"],
700
+ "698": ["n03877845", "palace"],
701
+ "699": ["n03884397", "panpipe"],
702
+ "700": ["n03887697", "paper_towel"],
703
+ "701": ["n03888257", "parachute"],
704
+ "702": ["n03888605", "parallel_bars"],
705
+ "703": ["n03891251", "park_bench"],
706
+ "704": ["n03891332", "parking_meter"],
707
+ "705": ["n03895866", "passenger_car"],
708
+ "706": ["n03899768", "patio"],
709
+ "707": ["n03902125", "pay-phone"],
710
+ "708": ["n03903868", "pedestal"],
711
+ "709": ["n03908618", "pencil_box"],
712
+ "710": ["n03908714", "pencil_sharpener"],
713
+ "711": ["n03916031", "perfume"],
714
+ "712": ["n03920288", "Petri_dish"],
715
+ "713": ["n03924679", "photocopier"],
716
+ "714": ["n03929660", "pick"],
717
+ "715": ["n03929855", "pickelhaube"],
718
+ "716": ["n03930313", "picket_fence"],
719
+ "717": ["n03930630", "pickup"],
720
+ "718": ["n03933933", "pier"],
721
+ "719": ["n03935335", "piggy_bank"],
722
+ "720": ["n03937543", "pill_bottle"],
723
+ "721": ["n03938244", "pillow"],
724
+ "722": ["n03942813", "ping-pong_ball"],
725
+ "723": ["n03944341", "pinwheel"],
726
+ "724": ["n03947888", "pirate"],
727
+ "725": ["n03950228", "pitcher"],
728
+ "726": ["n03954731", "plane"],
729
+ "727": ["n03956157", "planetarium"],
730
+ "728": ["n03958227", "plastic_bag"],
731
+ "729": ["n03961711", "plate_rack"],
732
+ "730": ["n03967562", "plow"],
733
+ "731": ["n03970156", "plunger"],
734
+ "732": ["n03976467", "Polaroid_camera"],
735
+ "733": ["n03976657", "pole"],
736
+ "734": ["n03977966", "police_van"],
737
+ "735": ["n03980874", "poncho"],
738
+ "736": ["n03982430", "pool_table"],
739
+ "737": ["n03983396", "pop_bottle"],
740
+ "738": ["n03991062", "pot"],
741
+ "739": ["n03992509", "potter's_wheel"],
742
+ "740": ["n03995372", "power_drill"],
743
+ "741": ["n03998194", "prayer_rug"],
744
+ "742": ["n04004767", "printer"],
745
+ "743": ["n04005630", "prison"],
746
+ "744": ["n04008634", "projectile"],
747
+ "745": ["n04009552", "projector"],
748
+ "746": ["n04019541", "puck"],
749
+ "747": ["n04023962", "punching_bag"],
750
+ "748": ["n04026417", "purse"],
751
+ "749": ["n04033901", "quill"],
752
+ "750": ["n04033995", "quilt"],
753
+ "751": ["n04037443", "racer"],
754
+ "752": ["n04039381", "racket"],
755
+ "753": ["n04040759", "radiator"],
756
+ "754": ["n04041544", "radio"],
757
+ "755": ["n04044716", "radio_telescope"],
758
+ "756": ["n04049303", "rain_barrel"],
759
+ "757": ["n04065272", "recreational_vehicle"],
760
+ "758": ["n04067472", "reel"],
761
+ "759": ["n04069434", "reflex_camera"],
762
+ "760": ["n04070727", "refrigerator"],
763
+ "761": ["n04074963", "remote_control"],
764
+ "762": ["n04081281", "restaurant"],
765
+ "763": ["n04086273", "revolver"],
766
+ "764": ["n04090263", "rifle"],
767
+ "765": ["n04099969", "rocking_chair"],
768
+ "766": ["n04111531", "rotisserie"],
769
+ "767": ["n04116512", "rubber_eraser"],
770
+ "768": ["n04118538", "rugby_ball"],
771
+ "769": ["n04118776", "rule"],
772
+ "770": ["n04120489", "running_shoe"],
773
+ "771": ["n04125021", "safe"],
774
+ "772": ["n04127249", "safety_pin"],
775
+ "773": ["n04131690", "saltshaker"],
776
+ "774": ["n04133789", "sandal"],
777
+ "775": ["n04136333", "sarong"],
778
+ "776": ["n04141076", "sax"],
779
+ "777": ["n04141327", "scabbard"],
780
+ "778": ["n04141975", "scale"],
781
+ "779": ["n04146614", "school_bus"],
782
+ "780": ["n04147183", "schooner"],
783
+ "781": ["n04149813", "scoreboard"],
784
+ "782": ["n04152593", "screen"],
785
+ "783": ["n04153751", "screw"],
786
+ "784": ["n04154565", "screwdriver"],
787
+ "785": ["n04162706", "seat_belt"],
788
+ "786": ["n04179913", "sewing_machine"],
789
+ "787": ["n04192698", "shield"],
790
+ "788": ["n04200800", "shoe_shop"],
791
+ "789": ["n04201297", "shoji"],
792
+ "790": ["n04204238", "shopping_basket"],
793
+ "791": ["n04204347", "shopping_cart"],
794
+ "792": ["n04208210", "shovel"],
795
+ "793": ["n04209133", "shower_cap"],
796
+ "794": ["n04209239", "shower_curtain"],
797
+ "795": ["n04228054", "ski"],
798
+ "796": ["n04229816", "ski_mask"],
799
+ "797": ["n04235860", "sleeping_bag"],
800
+ "798": ["n04238763", "slide_rule"],
801
+ "799": ["n04239074", "sliding_door"],
802
+ "800": ["n04243546", "slot"],
803
+ "801": ["n04251144", "snorkel"],
804
+ "802": ["n04252077", "snowmobile"],
805
+ "803": ["n04252225", "snowplow"],
806
+ "804": ["n04254120", "soap_dispenser"],
807
+ "805": ["n04254680", "soccer_ball"],
808
+ "806": ["n04254777", "sock"],
809
+ "807": ["n04258138", "solar_dish"],
810
+ "808": ["n04259630", "sombrero"],
811
+ "809": ["n04263257", "soup_bowl"],
812
+ "810": ["n04264628", "space_bar"],
813
+ "811": ["n04265275", "space_heater"],
814
+ "812": ["n04266014", "space_shuttle"],
815
+ "813": ["n04270147", "spatula"],
816
+ "814": ["n04273569", "speedboat"],
817
+ "815": ["n04275548", "spider_web"],
818
+ "816": ["n04277352", "spindle"],
819
+ "817": ["n04285008", "sports_car"],
820
+ "818": ["n04286575", "spotlight"],
821
+ "819": ["n04296562", "stage"],
822
+ "820": ["n04310018", "steam_locomotive"],
823
+ "821": ["n04311004", "steel_arch_bridge"],
824
+ "822": ["n04311174", "steel_drum"],
825
+ "823": ["n04317175", "stethoscope"],
826
+ "824": ["n04325704", "stole"],
827
+ "825": ["n04326547", "stone_wall"],
828
+ "826": ["n04328186", "stopwatch"],
829
+ "827": ["n04330267", "stove"],
830
+ "828": ["n04332243", "strainer"],
831
+ "829": ["n04335435", "streetcar"],
832
+ "830": ["n04336792", "stretcher"],
833
+ "831": ["n04344873", "studio_couch"],
834
+ "832": ["n04346328", "stupa"],
835
+ "833": ["n04347754", "submarine"],
836
+ "834": ["n04350905", "suit"],
837
+ "835": ["n04355338", "sundial"],
838
+ "836": ["n04355933", "sunglass"],
839
+ "837": ["n04356056", "sunglasses"],
840
+ "838": ["n04357314", "sunscreen"],
841
+ "839": ["n04366367", "suspension_bridge"],
842
+ "840": ["n04367480", "swab"],
843
+ "841": ["n04370456", "sweatshirt"],
844
+ "842": ["n04371430", "swimming_trunks"],
845
+ "843": ["n04371774", "swing"],
846
+ "844": ["n04372370", "switch"],
847
+ "845": ["n04376876", "syringe"],
848
+ "846": ["n04380533", "table_lamp"],
849
+ "847": ["n04389033", "tank"],
850
+ "848": ["n04392985", "tape_player"],
851
+ "849": ["n04398044", "teapot"],
852
+ "850": ["n04399382", "teddy"],
853
+ "851": ["n04404412", "television"],
854
+ "852": ["n04409515", "tennis_ball"],
855
+ "853": ["n04417672", "thatch"],
856
+ "854": ["n04418357", "theater_curtain"],
857
+ "855": ["n04423845", "thimble"],
858
+ "856": ["n04428191", "thresher"],
859
+ "857": ["n04429376", "throne"],
860
+ "858": ["n04435653", "tile_roof"],
861
+ "859": ["n04442312", "toaster"],
862
+ "860": ["n04443257", "tobacco_shop"],
863
+ "861": ["n04447861", "toilet_seat"],
864
+ "862": ["n04456115", "torch"],
865
+ "863": ["n04458633", "totem_pole"],
866
+ "864": ["n04461696", "tow_truck"],
867
+ "865": ["n04462240", "toyshop"],
868
+ "866": ["n04465501", "tractor"],
869
+ "867": ["n04467665", "trailer_truck"],
870
+ "868": ["n04476259", "tray"],
871
+ "869": ["n04479046", "trench_coat"],
872
+ "870": ["n04482393", "tricycle"],
873
+ "871": ["n04483307", "trimaran"],
874
+ "872": ["n04485082", "tripod"],
875
+ "873": ["n04486054", "triumphal_arch"],
876
+ "874": ["n04487081", "trolleybus"],
877
+ "875": ["n04487394", "trombone"],
878
+ "876": ["n04493381", "tub"],
879
+ "877": ["n04501370", "turnstile"],
880
+ "878": ["n04505470", "typewriter_keyboard"],
881
+ "879": ["n04507155", "umbrella"],
882
+ "880": ["n04509417", "unicycle"],
883
+ "881": ["n04515003", "upright"],
884
+ "882": ["n04517823", "vacuum"],
885
+ "883": ["n04522168", "vase"],
886
+ "884": ["n04523525", "vault"],
887
+ "885": ["n04525038", "velvet"],
888
+ "886": ["n04525305", "vending_machine"],
889
+ "887": ["n04532106", "vestment"],
890
+ "888": ["n04532670", "viaduct"],
891
+ "889": ["n04536866", "violin"],
892
+ "890": ["n04540053", "volleyball"],
893
+ "891": ["n04542943", "waffle_iron"],
894
+ "892": ["n04548280", "wall_clock"],
895
+ "893": ["n04548362", "wallet"],
896
+ "894": ["n04550184", "wardrobe"],
897
+ "895": ["n04552348", "warplane"],
898
+ "896": ["n04553703", "washbasin"],
899
+ "897": ["n04554684", "washer"],
900
+ "898": ["n04557648", "water_bottle"],
901
+ "899": ["n04560804", "water_jug"],
902
+ "900": ["n04562935", "water_tower"],
903
+ "901": ["n04579145", "whiskey_jug"],
904
+ "902": ["n04579432", "whistle"],
905
+ "903": ["n04584207", "wig"],
906
+ "904": ["n04589890", "window_screen"],
907
+ "905": ["n04590129", "window_shade"],
908
+ "906": ["n04591157", "Windsor_tie"],
909
+ "907": ["n04591713", "wine_bottle"],
910
+ "908": ["n04592741", "wing"],
911
+ "909": ["n04596742", "wok"],
912
+ "910": ["n04597913", "wooden_spoon"],
913
+ "911": ["n04599235", "wool"],
914
+ "912": ["n04604644", "worm_fence"],
915
+ "913": ["n04606251", "wreck"],
916
+ "914": ["n04612504", "yawl"],
917
+ "915": ["n04613696", "yurt"],
918
+ "916": ["n06359193", "web_site"],
919
+ "917": ["n06596364", "comic_book"],
920
+ "918": ["n06785654", "crossword_puzzle"],
921
+ "919": ["n06794110", "street_sign"],
922
+ "920": ["n06874185", "traffic_light"],
923
+ "921": ["n07248320", "book_jacket"],
924
+ "922": ["n07565083", "menu"],
925
+ "923": ["n07579787", "plate"],
926
+ "924": ["n07583066", "guacamole"],
927
+ "925": ["n07584110", "consomme"],
928
+ "926": ["n07590611", "hot_pot"],
929
+ "927": ["n07613480", "trifle"],
930
+ "928": ["n07614500", "ice_cream"],
931
+ "929": ["n07615774", "ice_lolly"],
932
+ "930": ["n07684084", "French_loaf"],
933
+ "931": ["n07693725", "bagel"],
934
+ "932": ["n07695742", "pretzel"],
935
+ "933": ["n07697313", "cheeseburger"],
936
+ "934": ["n07697537", "hotdog"],
937
+ "935": ["n07711569", "mashed_potato"],
938
+ "936": ["n07714571", "head_cabbage"],
939
+ "937": ["n07714990", "broccoli"],
940
+ "938": ["n07715103", "cauliflower"],
941
+ "939": ["n07716358", "zucchini"],
942
+ "940": ["n07716906", "spaghetti_squash"],
943
+ "941": ["n07717410", "acorn_squash"],
944
+ "942": ["n07717556", "butternut_squash"],
945
+ "943": ["n07718472", "cucumber"],
946
+ "944": ["n07718747", "artichoke"],
947
+ "945": ["n07720875", "bell_pepper"],
948
+ "946": ["n07730033", "cardoon"],
949
+ "947": ["n07734744", "mushroom"],
950
+ "948": ["n07742313", "Granny_Smith"],
951
+ "949": ["n07745940", "strawberry"],
952
+ "950": ["n07747607", "orange"],
953
+ "951": ["n07749582", "lemon"],
954
+ "952": ["n07753113", "fig"],
955
+ "953": ["n07753275", "pineapple"],
956
+ "954": ["n07753592", "banana"],
957
+ "955": ["n07754684", "jackfruit"],
958
+ "956": ["n07760859", "custard_apple"],
959
+ "957": ["n07768694", "pomegranate"],
960
+ "958": ["n07802026", "hay"],
961
+ "959": ["n07831146", "carbonara"],
962
+ "960": ["n07836838", "chocolate_sauce"],
963
+ "961": ["n07860988", "dough"],
964
+ "962": ["n07871810", "meat_loaf"],
965
+ "963": ["n07873807", "pizza"],
966
+ "964": ["n07875152", "potpie"],
967
+ "965": ["n07880968", "burrito"],
968
+ "966": ["n07892512", "red_wine"],
969
+ "967": ["n07920052", "espresso"],
970
+ "968": ["n07930864", "cup"],
971
+ "969": ["n07932039", "eggnog"],
972
+ "970": ["n09193705", "alp"],
973
+ "971": ["n09229709", "bubble"],
974
+ "972": ["n09246464", "cliff"],
975
+ "973": ["n09256479", "coral_reef"],
976
+ "974": ["n09288635", "geyser"],
977
+ "975": ["n09332890", "lakeside"],
978
+ "976": ["n09399592", "promontory"],
979
+ "977": ["n09421951", "sandbar"],
980
+ "978": ["n09428293", "seashore"],
981
+ "979": ["n09468604", "valley"],
982
+ "980": ["n09472597", "volcano"],
983
+ "981": ["n09835506", "ballplayer"],
984
+ "982": ["n10148035", "groom"],
985
+ "983": ["n10565667", "scuba_diver"],
986
+ "984": ["n11879895", "rapeseed"],
987
+ "985": ["n11939491", "daisy"],
988
+ "986": ["n12057211", "yellow_lady's_slipper"],
989
+ "987": ["n12144580", "corn"],
990
+ "988": ["n12267677", "acorn"],
991
+ "989": ["n12620546", "hip"],
992
+ "990": ["n12768682", "buckeye"],
993
+ "991": ["n12985857", "coral_fungus"],
994
+ "992": ["n12998815", "agaric"],
995
+ "993": ["n13037406", "gyromitra"],
996
+ "994": ["n13040303", "stinkhorn"],
997
+ "995": ["n13044778", "earthstar"],
998
+ "996": ["n13052670", "hen-of-the-woods"],
999
+ "997": ["n13054560", "bolete"],
1000
+ "998": ["n13133613", "ear"],
1001
+ "999": ["n15075141", "toilet_tissue"]
1002
+ }
uniformer_light_image.py ADDED
@@ -0,0 +1,535 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # All rights reserved.
2
+ from collections import OrderedDict
3
+ import torch
4
+ import torch.nn as nn
5
+ from functools import partial
6
+ import torch.nn.functional as F
7
+ import math
8
+ from timm.models.vision_transformer import _cfg
9
+ from timm.models.registry import register_model
10
+ from timm.models.layers import trunc_normal_, DropPath, to_2tuple
11
+
12
+
13
+ layer_scale = False
14
+ init_value = 1e-6
15
+ global_attn = None
16
+ token_indices = None
17
+
18
+
19
+ # code is from https://github.com/YifanXu74/Evo-ViT
20
+ def easy_gather(x, indices):
21
+ # x => B x N x C
22
+ # indices => B x N
23
+ B, N, C = x.shape
24
+ N_new = indices.shape[1]
25
+ offset = torch.arange(B, dtype=torch.long, device=x.device).view(B, 1) * N
26
+ indices = indices + offset
27
+ # only select the informative tokens
28
+ out = x.reshape(B * N, C)[indices.view(-1)].reshape(B, N_new, C)
29
+ return out
30
+
31
+
32
+ # code is from https://github.com/YifanXu74/Evo-ViT
33
+ def merge_tokens(x_drop, score):
34
+ # x_drop => B x N_drop
35
+ # score => B x N_drop
36
+ weight = score / torch.sum(score, dim=1, keepdim=True)
37
+ x_drop = weight.unsqueeze(-1) * x_drop
38
+ return torch.sum(x_drop, dim=1, keepdim=True)
39
+
40
+
41
+ class Mlp(nn.Module):
42
+ def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
43
+ super().__init__()
44
+ out_features = out_features or in_features
45
+ hidden_features = hidden_features or in_features
46
+ self.fc1 = nn.Linear(in_features, hidden_features)
47
+ self.act = act_layer()
48
+ self.fc2 = nn.Linear(hidden_features, out_features)
49
+ self.drop = nn.Dropout(drop)
50
+
51
+ def forward(self, x):
52
+ x = self.fc1(x)
53
+ x = self.act(x)
54
+ x = self.drop(x)
55
+ x = self.fc2(x)
56
+ x = self.drop(x)
57
+ return x
58
+
59
+
60
+ class CMlp(nn.Module):
61
+ def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
62
+ super().__init__()
63
+ out_features = out_features or in_features
64
+ hidden_features = hidden_features or in_features
65
+ self.fc1 = nn.Conv2d(in_features, hidden_features, 1)
66
+ self.act = act_layer()
67
+ self.fc2 = nn.Conv2d(hidden_features, out_features, 1)
68
+ self.drop = nn.Dropout(drop)
69
+
70
+ def forward(self, x):
71
+ x = self.fc1(x)
72
+ x = self.act(x)
73
+ x = self.drop(x)
74
+ x = self.fc2(x)
75
+ x = self.drop(x)
76
+ return x
77
+
78
+
79
+ class Attention(nn.Module):
80
+ def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0., trade_off=1):
81
+ super().__init__()
82
+ self.num_heads = num_heads
83
+ head_dim = dim // num_heads
84
+ # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
85
+ self.scale = qk_scale or head_dim ** -0.5
86
+
87
+ self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
88
+ self.attn_drop = nn.Dropout(attn_drop)
89
+ self.proj = nn.Linear(dim, dim)
90
+ self.proj_drop = nn.Dropout(proj_drop)
91
+ # updating weight for global score
92
+ self.trade_off = trade_off
93
+
94
+ def forward(self, x):
95
+ B, N, C = x.shape
96
+ qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
97
+ q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple)
98
+
99
+ attn = (q @ k.transpose(-2, -1)) * self.scale
100
+ attn = attn.softmax(dim=-1)
101
+
102
+ # update global score
103
+ global global_attn
104
+ tradeoff = self.trade_off
105
+ if isinstance(global_attn, int):
106
+ global_attn = torch.mean(attn[:, :, 0, 1:], dim=1)
107
+ elif global_attn.shape[1] == N - 1:
108
+ # no additional token and no pruning, update all global scores
109
+ cls_attn = torch.mean(attn[:, :, 0, 1:], dim=1)
110
+ global_attn = (1 - tradeoff) * global_attn + tradeoff * cls_attn
111
+ else:
112
+ # only update the informative tokens
113
+ # the first one is class token
114
+ # the last one is rrepresentative token
115
+ cls_attn = torch.mean(attn[:, :, 0, 1:-1], dim=1)
116
+ if self.training:
117
+ temp_attn = (1 - tradeoff) * global_attn[:, :(N - 2)] + tradeoff * cls_attn
118
+ global_attn = torch.cat((temp_attn, global_attn[:, (N - 2):]), dim=1)
119
+ else:
120
+ # no use torch.cat() for fast inference
121
+ global_attn[:, :(N - 2)] = (1 - tradeoff) * global_attn[:, :(N - 2)] + tradeoff * cls_attn
122
+
123
+ attn = self.attn_drop(attn)
124
+
125
+ x = (attn @ v).transpose(1, 2).reshape(B, N, C)
126
+ x = self.proj(x)
127
+ x = self.proj_drop(x)
128
+ return x
129
+
130
+
131
+ class CBlock(nn.Module):
132
+ def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
133
+ drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm):
134
+ super().__init__()
135
+ self.pos_embed = nn.Conv2d(dim, dim, 3, padding=1, groups=dim)
136
+ self.norm1 = nn.BatchNorm2d(dim)
137
+ self.conv1 = nn.Conv2d(dim, dim, 1)
138
+ self.conv2 = nn.Conv2d(dim, dim, 1)
139
+ self.attn = nn.Conv2d(dim, dim, 5, padding=2, groups=dim)
140
+ # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
141
+ self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
142
+ self.norm2 = nn.BatchNorm2d(dim)
143
+ mlp_hidden_dim = int(dim * mlp_ratio)
144
+ self.mlp = CMlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
145
+ global layer_scale
146
+ self.ls = layer_scale
147
+ if self.ls:
148
+ global init_value
149
+ print(f"Use layer_scale: {layer_scale}, init_values: {init_value}")
150
+ self.gamma_1 = nn.Parameter(init_value * torch.ones((1, dim, 1, 1)),requires_grad=True)
151
+ self.gamma_2 = nn.Parameter(init_value * torch.ones((1, dim, 1, 1)),requires_grad=True)
152
+
153
+ def forward(self, x):
154
+ x = x + self.pos_embed(x)
155
+ if self.ls:
156
+ x = x + self.drop_path(self.gamma_1 * self.conv2(self.attn(self.conv1(self.norm1(x)))))
157
+ x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
158
+ else:
159
+ x = x + self.drop_path(self.conv2(self.attn(self.conv1(self.norm1(x)))))
160
+ x = x + self.drop_path(self.mlp(self.norm2(x)))
161
+ return x
162
+
163
+
164
+ class EvoSABlock(nn.Module):
165
+ def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
166
+ drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, prune_ratio=1,
167
+ trade_off=0, downsample=False):
168
+ super().__init__()
169
+ self.pos_embed = nn.Conv2d(dim, dim, 3, padding=1, groups=dim)
170
+ self.norm1 = norm_layer(dim)
171
+ self.attn = Attention(
172
+ dim,
173
+ num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
174
+ attn_drop=attn_drop, proj_drop=drop, trade_off=trade_off)
175
+ # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
176
+ self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
177
+ self.norm2 = norm_layer(dim)
178
+ mlp_hidden_dim = int(dim * mlp_ratio)
179
+ self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
180
+ self.prune_ratio = prune_ratio
181
+ self.downsample = downsample
182
+ if downsample:
183
+ self.avgpool = nn.AvgPool2d(kernel_size=2, stride=2)
184
+ global layer_scale
185
+ self.ls = layer_scale
186
+ if self.ls:
187
+ global init_value
188
+ print(f"Use layer_scale: {layer_scale}, init_values: {init_value}")
189
+ self.gamma_1 = nn.Parameter(init_value * torch.ones((dim)),requires_grad=True)
190
+ self.gamma_2 = nn.Parameter(init_value * torch.ones((dim)),requires_grad=True)
191
+ if self.prune_ratio != 1:
192
+ self.gamma_3 = nn.Parameter(init_value * torch.ones((dim)),requires_grad=True)
193
+
194
+ def forward(self, cls_token, x):
195
+ x = x + self.pos_embed(x)
196
+ B, C, H, W = x.shape
197
+ x = x.flatten(2).transpose(1, 2)
198
+
199
+ if self.prune_ratio == 1:
200
+ x = torch.cat([cls_token, x], dim=1)
201
+ if self.ls:
202
+ x = x + self.drop_path(self.gamma_1 * self.attn(self.norm1(x)))
203
+ x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
204
+ else:
205
+ x = x + self.drop_path(self.attn(self.norm1(x)))
206
+ x = x + self.drop_path(self.mlp(self.norm2(x)))
207
+ cls_token, x = x[:, :1], x[:, 1:]
208
+ x = x.transpose(1, 2).reshape(B, C, H, W)
209
+ return cls_token, x
210
+ else:
211
+ global global_attn, token_indices
212
+ # calculate the number of informative tokens
213
+ N = x.shape[1]
214
+ N_ = int(N * self.prune_ratio)
215
+ # sort global attention
216
+ indices = torch.argsort(global_attn, dim=1, descending=True)
217
+
218
+ # concatenate x, global attention and token indices => x_ga_ti
219
+ # rearrange the tensor according to new indices
220
+ x_ga_ti = torch.cat((x, global_attn.unsqueeze(-1), token_indices.unsqueeze(-1)), dim=-1)
221
+ x_ga_ti = easy_gather(x_ga_ti, indices)
222
+ x_sorted, global_attn, token_indices = x_ga_ti[:, :, :-2], x_ga_ti[:, :, -2], x_ga_ti[:, :, -1]
223
+
224
+ # informative tokens
225
+ x_info = x_sorted[:, :N_]
226
+ # merge dropped tokens
227
+ x_drop = x_sorted[:, N_:]
228
+ score = global_attn[:, N_:]
229
+ # B x N_drop x C => B x 1 x C
230
+ rep_token = merge_tokens(x_drop, score)
231
+ # concatenate new tokens
232
+ x = torch.cat((cls_token, x_info, rep_token), dim=1)
233
+
234
+ if self.ls:
235
+ # slow update
236
+ fast_update = 0
237
+ tmp_x = self.attn(self.norm1(x))
238
+ fast_update = fast_update + tmp_x[:, -1:]
239
+ x = x + self.drop_path(self.gamma_1 * tmp_x)
240
+ tmp_x = self.mlp(self.norm2(x))
241
+ fast_update = fast_update + tmp_x[:, -1:]
242
+ x = x + self.drop_path(self.gamma_2 * tmp_x)
243
+ # fast update
244
+ x_drop = x_drop + self.gamma_3 * fast_update.expand(-1, N - N_, -1)
245
+ else:
246
+ # slow update
247
+ fast_update = 0
248
+ tmp_x = self.attn(self.norm1(x))
249
+ fast_update = fast_update + tmp_x[:, -1:]
250
+ x = x + self.drop_path(tmp_x)
251
+ tmp_x = self.mlp(self.norm2(x))
252
+ fast_update = fast_update + tmp_x[:, -1:]
253
+ x = x + self.drop_path(tmp_x)
254
+ # fast update
255
+ x_drop = x_drop + fast_update.expand(-1, N - N_, -1)
256
+
257
+ cls_token, x = x[:, :1, :], x[:, 1:-1, :]
258
+ if self.training:
259
+ x_sorted = torch.cat((x, x_drop), dim=1)
260
+ else:
261
+ x_sorted[:, N_:] = x_drop
262
+ x_sorted[:, :N_] = x
263
+
264
+ # recover token
265
+ # scale for normalization
266
+ old_global_scale = torch.sum(global_attn, dim=1, keepdim=True)
267
+ # recover order
268
+ indices = torch.argsort(token_indices, dim=1)
269
+ x_ga_ti = torch.cat((x_sorted, global_attn.unsqueeze(-1), token_indices.unsqueeze(-1)), dim=-1)
270
+ x_ga_ti = easy_gather(x_ga_ti, indices)
271
+ x_patch, global_attn, token_indices = x_ga_ti[:, :, :-2], x_ga_ti[:, :, -2], x_ga_ti[:, :, -1]
272
+ x_patch = x_patch.transpose(1, 2).reshape(B, C, H, W)
273
+
274
+ if self.downsample:
275
+ # downsample global attention
276
+ global_attn = global_attn.reshape(B, 1, H, W)
277
+ global_attn = self.avgpool(global_attn).view(B, -1)
278
+ # normalize global attention
279
+ new_global_scale = torch.sum(global_attn, dim=1, keepdim=True)
280
+ scale = old_global_scale / new_global_scale
281
+ global_attn = global_attn * scale
282
+
283
+ return cls_token, x_patch
284
+
285
+
286
+ class PatchEmbed(nn.Module):
287
+ """ Image to Patch Embedding
288
+ """
289
+ def __init__(self, patch_size=16, in_chans=3, embed_dim=768):
290
+ super().__init__()
291
+ self.norm = nn.LayerNorm(embed_dim)
292
+ self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
293
+
294
+ def forward(self, x):
295
+ x = self.proj(x)
296
+ B, C, H, W = x.shape
297
+ x = x.flatten(2).transpose(1, 2)
298
+ x = self.norm(x)
299
+ x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
300
+ return x
301
+
302
+
303
+ class head_embedding(nn.Module):
304
+ def __init__(self, in_channels, out_channels):
305
+ super(head_embedding, self).__init__()
306
+ self.proj = nn.Sequential(
307
+ nn.Conv2d(in_channels, out_channels // 2, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)),
308
+ nn.BatchNorm2d(out_channels // 2),
309
+ nn.GELU(),
310
+ nn.Conv2d(out_channels // 2, out_channels, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)),
311
+ nn.BatchNorm2d(out_channels),
312
+ )
313
+
314
+ def forward(self, x):
315
+ x = self.proj(x)
316
+ return x
317
+
318
+
319
+ class middle_embedding(nn.Module):
320
+ def __init__(self, in_channels, out_channels):
321
+ super(middle_embedding, self).__init__()
322
+
323
+ self.proj = nn.Sequential(
324
+ nn.Conv2d(in_channels, out_channels, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)),
325
+ nn.BatchNorm2d(out_channels),
326
+ )
327
+
328
+ def forward(self, x):
329
+ x = self.proj(x)
330
+ return x
331
+
332
+
333
+ class UniFormer_Light(nn.Module):
334
+ """ Vision Transformer
335
+ A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` -
336
+ https://arxiv.org/abs/2010.11929
337
+ """
338
+ def __init__(self, depth=[3, 4, 8, 3], in_chans=3, num_classes=1000, embed_dim=[64, 128, 320, 512],
339
+ head_dim=64, mlp_ratio=[4., 4., 4., 4.], qkv_bias=True, qk_scale=None, representation_size=None,
340
+ drop_rate=0., attn_drop_rate=0., drop_path_rate=0., norm_layer=None, conv_stem=False,
341
+ prune_ratio=[[], [], [1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], [0.5, 0.5, 0.5]],
342
+ trade_off=[[], [], [1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], [0.5, 0.5, 0.5]]):
343
+ """
344
+ Args:
345
+ img_size (int, tuple): input image size
346
+ patch_size (int, tuple): patch size
347
+ in_chans (int): number of input channels
348
+ num_classes (int): number of classes for classification head
349
+ embed_dim (int): embedding dimension
350
+ depth (int): depth of transformer
351
+ head_dim (int): head dimension
352
+ mlp_ratio (list): ratio of mlp hidden dim to embedding dim
353
+ qkv_bias (bool): enable bias for qkv if True
354
+ qk_scale (float): override default qk scale of head_dim ** -0.5 if set
355
+ representation_size (Optional[int]): enable and set representation layer (pre-logits) to this value if set
356
+ drop_rate (float): dropout rate
357
+ attn_drop_rate (float): attention dropout rate
358
+ drop_path_rate (float): stochastic depth rate
359
+ norm_layer: (nn.Module): normalization layer
360
+ """
361
+ super().__init__()
362
+ self.num_classes = num_classes
363
+ self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models
364
+ norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6)
365
+ if conv_stem:
366
+ self.patch_embed1 = head_embedding(in_channels=in_chans, out_channels=embed_dim[0])
367
+ self.patch_embed2 = PatchEmbed(
368
+ patch_size=2, in_chans=embed_dim[0], embed_dim=embed_dim[1])
369
+ self.patch_embed3 = PatchEmbed(
370
+ patch_size=2, in_chans=embed_dim[1], embed_dim=embed_dim[2])
371
+ self.patch_embed4 = PatchEmbed(
372
+ patch_size=2, in_chans=embed_dim[2], embed_dim=embed_dim[3])
373
+ else:
374
+ self.patch_embed1 = PatchEmbed(
375
+ patch_size=4, in_chans=in_chans, embed_dim=embed_dim[0])
376
+ self.patch_embed2 = PatchEmbed(
377
+ patch_size=2, in_chans=embed_dim[0], embed_dim=embed_dim[1])
378
+ self.patch_embed3 = PatchEmbed(
379
+ patch_size=2, in_chans=embed_dim[1], embed_dim=embed_dim[2])
380
+ self.patch_embed4 = PatchEmbed(
381
+ patch_size=2, in_chans=embed_dim[2], embed_dim=embed_dim[3])
382
+
383
+ # class token
384
+ self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim[2]))
385
+ self.cls_upsample = nn.Linear(embed_dim[2], embed_dim[3])
386
+
387
+ self.pos_drop = nn.Dropout(p=drop_rate)
388
+ dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depth))] # stochastic depth decay rule
389
+ num_heads = [dim // head_dim for dim in embed_dim]
390
+ self.blocks1 = nn.ModuleList([
391
+ CBlock(
392
+ dim=embed_dim[0], num_heads=num_heads[0], mlp_ratio=mlp_ratio[0], qkv_bias=qkv_bias, qk_scale=qk_scale,
393
+ drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer)
394
+ for i in range(depth[0])])
395
+ self.blocks2 = nn.ModuleList([
396
+ CBlock(
397
+ dim=embed_dim[1], num_heads=num_heads[1], mlp_ratio=mlp_ratio[1], qkv_bias=qkv_bias, qk_scale=qk_scale,
398
+ drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+depth[0]], norm_layer=norm_layer)
399
+ for i in range(depth[1])])
400
+ self.blocks3 = nn.ModuleList([
401
+ EvoSABlock(
402
+ dim=embed_dim[2], num_heads=num_heads[2], mlp_ratio=mlp_ratio[2], qkv_bias=qkv_bias, qk_scale=qk_scale,
403
+ drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+depth[0]+depth[1]], norm_layer=norm_layer,
404
+ prune_ratio=prune_ratio[2][i], trade_off=trade_off[2][i],
405
+ downsample=True if i == depth[2] - 1 else False)
406
+ for i in range(depth[2])])
407
+ self.blocks4 = nn.ModuleList([
408
+ EvoSABlock(
409
+ dim=embed_dim[3], num_heads=num_heads[3], mlp_ratio=mlp_ratio[3], qkv_bias=qkv_bias, qk_scale=qk_scale,
410
+ drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+depth[0]+depth[1]+depth[2]], norm_layer=norm_layer,
411
+ prune_ratio=prune_ratio[3][i], trade_off=trade_off[3][i])
412
+ for i in range(depth[3])])
413
+ self.norm = nn.BatchNorm2d(embed_dim[-1])
414
+ self.norm_cls = nn.LayerNorm(embed_dim[-1])
415
+
416
+ # Representation layer
417
+ if representation_size:
418
+ self.num_features = representation_size
419
+ self.pre_logits = nn.Sequential(OrderedDict([
420
+ ('fc', nn.Linear(embed_dim, representation_size)),
421
+ ('act', nn.Tanh())
422
+ ]))
423
+ else:
424
+ self.pre_logits = nn.Identity()
425
+
426
+ # Classifier head
427
+ self.head = nn.Linear(embed_dim[-1], num_classes) if num_classes > 0 else nn.Identity()
428
+ self.head_cls = nn.Linear(embed_dim[-1], num_classes) if num_classes > 0 else nn.Identity()
429
+
430
+ self.apply(self._init_weights)
431
+
432
+ def _init_weights(self, m):
433
+ if isinstance(m, nn.Linear):
434
+ trunc_normal_(m.weight, std=.02)
435
+ if isinstance(m, nn.Linear) and m.bias is not None:
436
+ nn.init.constant_(m.bias, 0)
437
+ elif isinstance(m, nn.LayerNorm):
438
+ nn.init.constant_(m.bias, 0)
439
+ nn.init.constant_(m.weight, 1.0)
440
+
441
+ @torch.jit.ignore
442
+ def no_weight_decay(self):
443
+ return {'pos_embed', 'cls_token'}
444
+
445
+ def get_classifier(self):
446
+ return self.head
447
+
448
+ def reset_classifier(self, num_classes, global_pool=''):
449
+ self.num_classes = num_classes
450
+ self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity()
451
+
452
+ def forward_features(self, x):
453
+ B = x.shape[0]
454
+ x = self.patch_embed1(x)
455
+ x = self.pos_drop(x)
456
+ for blk in self.blocks1:
457
+ x = blk(x)
458
+ x = self.patch_embed2(x)
459
+ for blk in self.blocks2:
460
+ x = blk(x)
461
+ x = self.patch_embed3(x)
462
+ # add cls_token in stage3
463
+ cls_token = self.cls_token.expand(x.shape[0], -1, -1)
464
+ global global_attn, token_indices
465
+ global_attn = 0
466
+ token_indices = torch.arange(x.shape[2] * x.shape[3], dtype=torch.long, device=x.device).unsqueeze(0)
467
+ token_indices = token_indices.expand(x.shape[0], -1)
468
+ for blk in self.blocks3:
469
+ cls_token, x = blk(cls_token, x)
470
+ # upsample cls_token before stage4
471
+ cls_token = self.cls_upsample(cls_token)
472
+ x = self.patch_embed4(x)
473
+ # whether reset global attention? Now simple avgpool
474
+ token_indices = torch.arange(x.shape[2] * x.shape[3], dtype=torch.long, device=x.device).unsqueeze(0)
475
+ token_indices = token_indices.expand(x.shape[0], -1)
476
+ for blk in self.blocks4:
477
+ cls_token, x = blk(cls_token, x)
478
+ if self.training:
479
+ # layer normalization for cls_token
480
+ cls_token = self.norm_cls(cls_token)
481
+ x = self.norm(x)
482
+ x = self.pre_logits(x)
483
+ return cls_token, x
484
+
485
+ def forward(self, x):
486
+ cls_token, x = self.forward_features(x)
487
+ x = x.flatten(2).mean(-1)
488
+ if self.training:
489
+ x = self.head(x), self.head_cls(cls_token.squeeze(1))
490
+ else:
491
+ x = self.head(x)
492
+ return x
493
+
494
+
495
+ def uniformer_xxs_image(**kwargs):
496
+ model = UniFormer_Light(
497
+ depth=[2, 5, 8, 2], conv_stem=True,
498
+ prune_ratio=[[], [], [1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], [0.5, 0.5]],
499
+ trade_off=[[], [], [1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], [0.5, 0.5]],
500
+ embed_dim=[56, 112, 224, 448], head_dim=28, mlp_ratio=[3, 3, 3, 3], qkv_bias=True,
501
+ **kwargs)
502
+ model.default_cfg = _cfg()
503
+ return model
504
+
505
+
506
+ def uniformer_xs_image(**kwargs):
507
+ model = UniFormer_Light(
508
+ depth=[3, 5, 9, 3], conv_stem=True,
509
+ prune_ratio=[[], [], [1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], [0.5, 0.5, 0.5]],
510
+ trade_off=[[], [], [1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], [0.5, 0.5, 0.5]],
511
+ embed_dim=[64, 128, 256, 512], head_dim=32, mlp_ratio=[3, 3, 3, 3], qkv_bias=True,
512
+ **kwargs)
513
+ model.default_cfg = _cfg()
514
+ return model
515
+
516
+
517
+ if __name__ == '__main__':
518
+ import time
519
+ from fvcore.nn import FlopCountAnalysis
520
+ from fvcore.nn import flop_count_table
521
+ import numpy as np
522
+
523
+ seed = 4217
524
+ np.random.seed(seed)
525
+ torch.manual_seed(seed)
526
+ torch.cuda.manual_seed(seed)
527
+ torch.cuda.manual_seed_all(seed)
528
+
529
+ model = uniformer_xxs_image()
530
+ # print(model)
531
+
532
+ flops = FlopCountAnalysis(model, torch.rand(1, 3, 160, 160))
533
+ s = time.time()
534
+ print(flop_count_table(flops, max_depth=1))
535
+ print(time.time()-s)