Spaces:
Running
Running
init image
Browse files- app.py +64 -16
- imagenet_class_index.py +1002 -0
- uniformer_light_image.py +535 -0
app.py
CHANGED
@@ -9,7 +9,9 @@ from PIL import Image
|
|
9 |
from decord import VideoReader
|
10 |
from decord import cpu
|
11 |
from uniformer_light_video import uniformer_xxs_video
|
|
|
12 |
from kinetics_class_index import kinetics_classnames
|
|
|
13 |
from transforms import (
|
14 |
GroupNormalize, GroupScale, GroupCenterCrop,
|
15 |
Stack, ToTorchFormatTensor
|
@@ -22,20 +24,24 @@ from huggingface_hub import hf_hub_download
|
|
22 |
# Device on which to run the model
|
23 |
# Set to cuda to load on GPU
|
24 |
device = "cpu"
|
25 |
-
|
|
|
26 |
# Pick a pretrained model
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
# Set to eval mode and move to desired device
|
32 |
-
|
33 |
-
|
34 |
|
35 |
# Create an id to label name mapping
|
36 |
kinetics_id_to_classname = {}
|
37 |
for k, v in kinetics_classnames.items():
|
38 |
kinetics_id_to_classname[k] = v
|
|
|
|
|
|
|
39 |
|
40 |
|
41 |
def get_index(num_frames, num_segments=8):
|
@@ -74,7 +80,7 @@ def load_video(video_path):
|
|
74 |
return torch_imgs
|
75 |
|
76 |
|
77 |
-
def
|
78 |
vid = load_video(video)
|
79 |
|
80 |
# The model expects inputs of shape: B x C x H x W
|
@@ -82,7 +88,7 @@ def inference(video):
|
|
82 |
inputs = vid.reshape(1, TC//3, 3, H, W).permute(0, 2, 1, 3, 4)
|
83 |
|
84 |
with torch.no_grad():
|
85 |
-
prediction =
|
86 |
prediction = F.softmax(prediction, dim=1).flatten()
|
87 |
|
88 |
return {kinetics_id_to_classname[str(i)]: float(prediction[i]) for i in range(400)}
|
@@ -92,6 +98,32 @@ def set_example_video(example: list) -> dict:
|
|
92 |
return gr.Video.update(value=example[0])
|
93 |
|
94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
demo = gr.Blocks()
|
96 |
with demo:
|
97 |
gr.Markdown(
|
@@ -101,17 +133,31 @@ with demo:
|
|
101 |
"""
|
102 |
)
|
103 |
|
104 |
-
with gr.
|
105 |
-
with gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
with gr.Column():
|
107 |
with gr.Row():
|
108 |
-
|
109 |
with gr.Row():
|
110 |
-
|
111 |
with gr.Column():
|
112 |
label = gr.Label(num_top_classes=5)
|
113 |
-
|
114 |
-
|
115 |
|
116 |
gr.Markdown(
|
117 |
"""
|
@@ -119,7 +165,9 @@ with demo:
|
|
119 |
"""
|
120 |
)
|
121 |
|
122 |
-
|
123 |
example_videos.click(fn=set_example_video, inputs=example_videos, outputs=example_videos.components)
|
|
|
|
|
124 |
|
125 |
demo.launch(enable_queue=True)
|
|
|
9 |
from decord import VideoReader
|
10 |
from decord import cpu
|
11 |
from uniformer_light_video import uniformer_xxs_video
|
12 |
+
from uniformer_light_image import uniformer_xxs_image
|
13 |
from kinetics_class_index import kinetics_classnames
|
14 |
+
from imagenet_class_index import imagenet_classnames
|
15 |
from transforms import (
|
16 |
GroupNormalize, GroupScale, GroupCenterCrop,
|
17 |
Stack, ToTorchFormatTensor
|
|
|
24 |
# Device on which to run the model
|
25 |
# Set to cuda to load on GPU
|
26 |
device = "cpu"
|
27 |
+
model_video_path = hf_hub_download(repo_id="Andy1621/uniformer_light", filename="uniformer_xxs16_160_k400.pth")
|
28 |
+
model_image_path = hf_hub_download(repo_id="Andy1621/uniformer_light", filename="uniformer_xxs_160_in1k.pth")
|
29 |
# Pick a pretrained model
|
30 |
+
model_video = uniformer_xxs_video()
|
31 |
+
model_video.load_state_dict(torch.load(model_video_path, map_location='cpu'))
|
32 |
+
model_image = uniformer_xxs_image()
|
33 |
+
model_image.load_state_dict(torch.load(model_image_path, map_location='cpu'))
|
34 |
# Set to eval mode and move to desired device
|
35 |
+
model_video = model_video.to(device).eval()
|
36 |
+
model_image = model_image.to(device).eval()
|
37 |
|
38 |
# Create an id to label name mapping
|
39 |
kinetics_id_to_classname = {}
|
40 |
for k, v in kinetics_classnames.items():
|
41 |
kinetics_id_to_classname[k] = v
|
42 |
+
imagenet_id_to_classname = {}
|
43 |
+
for k, v in imagenet_classnames.items():
|
44 |
+
imagenet_id_to_classname[k] = v[1]
|
45 |
|
46 |
|
47 |
def get_index(num_frames, num_segments=8):
|
|
|
80 |
return torch_imgs
|
81 |
|
82 |
|
83 |
+
def inference_video(video):
|
84 |
vid = load_video(video)
|
85 |
|
86 |
# The model expects inputs of shape: B x C x H x W
|
|
|
88 |
inputs = vid.reshape(1, TC//3, 3, H, W).permute(0, 2, 1, 3, 4)
|
89 |
|
90 |
with torch.no_grad():
|
91 |
+
prediction = model_video(inputs)
|
92 |
prediction = F.softmax(prediction, dim=1).flatten()
|
93 |
|
94 |
return {kinetics_id_to_classname[str(i)]: float(prediction[i]) for i in range(400)}
|
|
|
98 |
return gr.Video.update(value=example[0])
|
99 |
|
100 |
|
101 |
+
def inference_image(img):
|
102 |
+
image = img
|
103 |
+
image_transform = T.Compose(
|
104 |
+
[
|
105 |
+
T.Resize(224),
|
106 |
+
T.CenterCrop(224),
|
107 |
+
T.ToTensor(),
|
108 |
+
T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
109 |
+
]
|
110 |
+
)
|
111 |
+
image = image_transform(image)
|
112 |
+
|
113 |
+
# The model expects inputs of shape: B x C x H x W
|
114 |
+
image = image.unsqueeze(0)
|
115 |
+
|
116 |
+
with torch.no_grad():
|
117 |
+
prediction = model_image(image)
|
118 |
+
prediction = F.softmax(prediction, dim=1).flatten()
|
119 |
+
|
120 |
+
return {imagenet_id_to_classname[str(i)]: float(prediction[i]) for i in range(1000)}
|
121 |
+
|
122 |
+
|
123 |
+
def set_example_image(example: list) -> dict:
|
124 |
+
return gr.Image.update(value=example[0])
|
125 |
+
|
126 |
+
|
127 |
demo = gr.Blocks()
|
128 |
with demo:
|
129 |
gr.Markdown(
|
|
|
133 |
"""
|
134 |
)
|
135 |
|
136 |
+
with gr.Tab("Video"):
|
137 |
+
with gr.Box():
|
138 |
+
with gr.Row():
|
139 |
+
with gr.Column():
|
140 |
+
with gr.Row():
|
141 |
+
input_video = gr.Video(label='Input Video').style(height=360)
|
142 |
+
with gr.Row():
|
143 |
+
submit_video_button = gr.Button('Submit')
|
144 |
+
with gr.Column():
|
145 |
+
label = gr.Label(num_top_classes=5)
|
146 |
+
with gr.Row():
|
147 |
+
example_videos = gr.Dataset(components=[input_video], samples=[['./videos/hitting_baseball.mp4'], ['./videos/hoverboarding.mp4'], ['./videos/yoga.mp4']])
|
148 |
+
|
149 |
+
with gr.Tab("Image"):
|
150 |
+
with gr.Box():
|
151 |
+
with gr.Row():
|
152 |
with gr.Column():
|
153 |
with gr.Row():
|
154 |
+
input_image = gr.Image(label='Input Image', type='pil').style(height=360)
|
155 |
with gr.Row():
|
156 |
+
submit_image_button = gr.Button('Submit')
|
157 |
with gr.Column():
|
158 |
label = gr.Label(num_top_classes=5)
|
159 |
+
with gr.Row():
|
160 |
+
example_images = gr.Dataset(components=[input_image], samples=[['./images/cat.png'], ['./images/dog.png'], ['./images/panda.png']])
|
161 |
|
162 |
gr.Markdown(
|
163 |
"""
|
|
|
165 |
"""
|
166 |
)
|
167 |
|
168 |
+
submit_video_button.click(fn=inference_video, inputs=input_video, outputs=label)
|
169 |
example_videos.click(fn=set_example_video, inputs=example_videos, outputs=example_videos.components)
|
170 |
+
submit_image_button.click(fn=inference_image, inputs=input_image, outputs=label)
|
171 |
+
example_images.click(fn=set_example_image, inputs=example_images, outputs=example_images.components)
|
172 |
|
173 |
demo.launch(enable_queue=True)
|
imagenet_class_index.py
ADDED
@@ -0,0 +1,1002 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
imagenet_classnames = {
|
2 |
+
"0": ["n01440764", "tench"],
|
3 |
+
"1": ["n01443537", "goldfish"],
|
4 |
+
"2": ["n01484850", "great_white_shark"],
|
5 |
+
"3": ["n01491361", "tiger_shark"],
|
6 |
+
"4": ["n01494475", "hammerhead"],
|
7 |
+
"5": ["n01496331", "electric_ray"],
|
8 |
+
"6": ["n01498041", "stingray"],
|
9 |
+
"7": ["n01514668", "cock"],
|
10 |
+
"8": ["n01514859", "hen"],
|
11 |
+
"9": ["n01518878", "ostrich"],
|
12 |
+
"10": ["n01530575", "brambling"],
|
13 |
+
"11": ["n01531178", "goldfinch"],
|
14 |
+
"12": ["n01532829", "house_finch"],
|
15 |
+
"13": ["n01534433", "junco"],
|
16 |
+
"14": ["n01537544", "indigo_bunting"],
|
17 |
+
"15": ["n01558993", "robin"],
|
18 |
+
"16": ["n01560419", "bulbul"],
|
19 |
+
"17": ["n01580077", "jay"],
|
20 |
+
"18": ["n01582220", "magpie"],
|
21 |
+
"19": ["n01592084", "chickadee"],
|
22 |
+
"20": ["n01601694", "water_ouzel"],
|
23 |
+
"21": ["n01608432", "kite"],
|
24 |
+
"22": ["n01614925", "bald_eagle"],
|
25 |
+
"23": ["n01616318", "vulture"],
|
26 |
+
"24": ["n01622779", "great_grey_owl"],
|
27 |
+
"25": ["n01629819", "European_fire_salamander"],
|
28 |
+
"26": ["n01630670", "common_newt"],
|
29 |
+
"27": ["n01631663", "eft"],
|
30 |
+
"28": ["n01632458", "spotted_salamander"],
|
31 |
+
"29": ["n01632777", "axolotl"],
|
32 |
+
"30": ["n01641577", "bullfrog"],
|
33 |
+
"31": ["n01644373", "tree_frog"],
|
34 |
+
"32": ["n01644900", "tailed_frog"],
|
35 |
+
"33": ["n01664065", "loggerhead"],
|
36 |
+
"34": ["n01665541", "leatherback_turtle"],
|
37 |
+
"35": ["n01667114", "mud_turtle"],
|
38 |
+
"36": ["n01667778", "terrapin"],
|
39 |
+
"37": ["n01669191", "box_turtle"],
|
40 |
+
"38": ["n01675722", "banded_gecko"],
|
41 |
+
"39": ["n01677366", "common_iguana"],
|
42 |
+
"40": ["n01682714", "American_chameleon"],
|
43 |
+
"41": ["n01685808", "whiptail"],
|
44 |
+
"42": ["n01687978", "agama"],
|
45 |
+
"43": ["n01688243", "frilled_lizard"],
|
46 |
+
"44": ["n01689811", "alligator_lizard"],
|
47 |
+
"45": ["n01692333", "Gila_monster"],
|
48 |
+
"46": ["n01693334", "green_lizard"],
|
49 |
+
"47": ["n01694178", "African_chameleon"],
|
50 |
+
"48": ["n01695060", "Komodo_dragon"],
|
51 |
+
"49": ["n01697457", "African_crocodile"],
|
52 |
+
"50": ["n01698640", "American_alligator"],
|
53 |
+
"51": ["n01704323", "triceratops"],
|
54 |
+
"52": ["n01728572", "thunder_snake"],
|
55 |
+
"53": ["n01728920", "ringneck_snake"],
|
56 |
+
"54": ["n01729322", "hognose_snake"],
|
57 |
+
"55": ["n01729977", "green_snake"],
|
58 |
+
"56": ["n01734418", "king_snake"],
|
59 |
+
"57": ["n01735189", "garter_snake"],
|
60 |
+
"58": ["n01737021", "water_snake"],
|
61 |
+
"59": ["n01739381", "vine_snake"],
|
62 |
+
"60": ["n01740131", "night_snake"],
|
63 |
+
"61": ["n01742172", "boa_constrictor"],
|
64 |
+
"62": ["n01744401", "rock_python"],
|
65 |
+
"63": ["n01748264", "Indian_cobra"],
|
66 |
+
"64": ["n01749939", "green_mamba"],
|
67 |
+
"65": ["n01751748", "sea_snake"],
|
68 |
+
"66": ["n01753488", "horned_viper"],
|
69 |
+
"67": ["n01755581", "diamondback"],
|
70 |
+
"68": ["n01756291", "sidewinder"],
|
71 |
+
"69": ["n01768244", "trilobite"],
|
72 |
+
"70": ["n01770081", "harvestman"],
|
73 |
+
"71": ["n01770393", "scorpion"],
|
74 |
+
"72": ["n01773157", "black_and_gold_garden_spider"],
|
75 |
+
"73": ["n01773549", "barn_spider"],
|
76 |
+
"74": ["n01773797", "garden_spider"],
|
77 |
+
"75": ["n01774384", "black_widow"],
|
78 |
+
"76": ["n01774750", "tarantula"],
|
79 |
+
"77": ["n01775062", "wolf_spider"],
|
80 |
+
"78": ["n01776313", "tick"],
|
81 |
+
"79": ["n01784675", "centipede"],
|
82 |
+
"80": ["n01795545", "black_grouse"],
|
83 |
+
"81": ["n01796340", "ptarmigan"],
|
84 |
+
"82": ["n01797886", "ruffed_grouse"],
|
85 |
+
"83": ["n01798484", "prairie_chicken"],
|
86 |
+
"84": ["n01806143", "peacock"],
|
87 |
+
"85": ["n01806567", "quail"],
|
88 |
+
"86": ["n01807496", "partridge"],
|
89 |
+
"87": ["n01817953", "African_grey"],
|
90 |
+
"88": ["n01818515", "macaw"],
|
91 |
+
"89": ["n01819313", "sulphur-crested_cockatoo"],
|
92 |
+
"90": ["n01820546", "lorikeet"],
|
93 |
+
"91": ["n01824575", "coucal"],
|
94 |
+
"92": ["n01828970", "bee_eater"],
|
95 |
+
"93": ["n01829413", "hornbill"],
|
96 |
+
"94": ["n01833805", "hummingbird"],
|
97 |
+
"95": ["n01843065", "jacamar"],
|
98 |
+
"96": ["n01843383", "toucan"],
|
99 |
+
"97": ["n01847000", "drake"],
|
100 |
+
"98": ["n01855032", "red-breasted_merganser"],
|
101 |
+
"99": ["n01855672", "goose"],
|
102 |
+
"100": ["n01860187", "black_swan"],
|
103 |
+
"101": ["n01871265", "tusker"],
|
104 |
+
"102": ["n01872401", "echidna"],
|
105 |
+
"103": ["n01873310", "platypus"],
|
106 |
+
"104": ["n01877812", "wallaby"],
|
107 |
+
"105": ["n01882714", "koala"],
|
108 |
+
"106": ["n01883070", "wombat"],
|
109 |
+
"107": ["n01910747", "jellyfish"],
|
110 |
+
"108": ["n01914609", "sea_anemone"],
|
111 |
+
"109": ["n01917289", "brain_coral"],
|
112 |
+
"110": ["n01924916", "flatworm"],
|
113 |
+
"111": ["n01930112", "nematode"],
|
114 |
+
"112": ["n01943899", "conch"],
|
115 |
+
"113": ["n01944390", "snail"],
|
116 |
+
"114": ["n01945685", "slug"],
|
117 |
+
"115": ["n01950731", "sea_slug"],
|
118 |
+
"116": ["n01955084", "chiton"],
|
119 |
+
"117": ["n01968897", "chambered_nautilus"],
|
120 |
+
"118": ["n01978287", "Dungeness_crab"],
|
121 |
+
"119": ["n01978455", "rock_crab"],
|
122 |
+
"120": ["n01980166", "fiddler_crab"],
|
123 |
+
"121": ["n01981276", "king_crab"],
|
124 |
+
"122": ["n01983481", "American_lobster"],
|
125 |
+
"123": ["n01984695", "spiny_lobster"],
|
126 |
+
"124": ["n01985128", "crayfish"],
|
127 |
+
"125": ["n01986214", "hermit_crab"],
|
128 |
+
"126": ["n01990800", "isopod"],
|
129 |
+
"127": ["n02002556", "white_stork"],
|
130 |
+
"128": ["n02002724", "black_stork"],
|
131 |
+
"129": ["n02006656", "spoonbill"],
|
132 |
+
"130": ["n02007558", "flamingo"],
|
133 |
+
"131": ["n02009229", "little_blue_heron"],
|
134 |
+
"132": ["n02009912", "American_egret"],
|
135 |
+
"133": ["n02011460", "bittern"],
|
136 |
+
"134": ["n02012849", "crane"],
|
137 |
+
"135": ["n02013706", "limpkin"],
|
138 |
+
"136": ["n02017213", "European_gallinule"],
|
139 |
+
"137": ["n02018207", "American_coot"],
|
140 |
+
"138": ["n02018795", "bustard"],
|
141 |
+
"139": ["n02025239", "ruddy_turnstone"],
|
142 |
+
"140": ["n02027492", "red-backed_sandpiper"],
|
143 |
+
"141": ["n02028035", "redshank"],
|
144 |
+
"142": ["n02033041", "dowitcher"],
|
145 |
+
"143": ["n02037110", "oystercatcher"],
|
146 |
+
"144": ["n02051845", "pelican"],
|
147 |
+
"145": ["n02056570", "king_penguin"],
|
148 |
+
"146": ["n02058221", "albatross"],
|
149 |
+
"147": ["n02066245", "grey_whale"],
|
150 |
+
"148": ["n02071294", "killer_whale"],
|
151 |
+
"149": ["n02074367", "dugong"],
|
152 |
+
"150": ["n02077923", "sea_lion"],
|
153 |
+
"151": ["n02085620", "Chihuahua"],
|
154 |
+
"152": ["n02085782", "Japanese_spaniel"],
|
155 |
+
"153": ["n02085936", "Maltese_dog"],
|
156 |
+
"154": ["n02086079", "Pekinese"],
|
157 |
+
"155": ["n02086240", "Shih-Tzu"],
|
158 |
+
"156": ["n02086646", "Blenheim_spaniel"],
|
159 |
+
"157": ["n02086910", "papillon"],
|
160 |
+
"158": ["n02087046", "toy_terrier"],
|
161 |
+
"159": ["n02087394", "Rhodesian_ridgeback"],
|
162 |
+
"160": ["n02088094", "Afghan_hound"],
|
163 |
+
"161": ["n02088238", "basset"],
|
164 |
+
"162": ["n02088364", "beagle"],
|
165 |
+
"163": ["n02088466", "bloodhound"],
|
166 |
+
"164": ["n02088632", "bluetick"],
|
167 |
+
"165": ["n02089078", "black-and-tan_coonhound"],
|
168 |
+
"166": ["n02089867", "Walker_hound"],
|
169 |
+
"167": ["n02089973", "English_foxhound"],
|
170 |
+
"168": ["n02090379", "redbone"],
|
171 |
+
"169": ["n02090622", "borzoi"],
|
172 |
+
"170": ["n02090721", "Irish_wolfhound"],
|
173 |
+
"171": ["n02091032", "Italian_greyhound"],
|
174 |
+
"172": ["n02091134", "whippet"],
|
175 |
+
"173": ["n02091244", "Ibizan_hound"],
|
176 |
+
"174": ["n02091467", "Norwegian_elkhound"],
|
177 |
+
"175": ["n02091635", "otterhound"],
|
178 |
+
"176": ["n02091831", "Saluki"],
|
179 |
+
"177": ["n02092002", "Scottish_deerhound"],
|
180 |
+
"178": ["n02092339", "Weimaraner"],
|
181 |
+
"179": ["n02093256", "Staffordshire_bullterrier"],
|
182 |
+
"180": ["n02093428", "American_Staffordshire_terrier"],
|
183 |
+
"181": ["n02093647", "Bedlington_terrier"],
|
184 |
+
"182": ["n02093754", "Border_terrier"],
|
185 |
+
"183": ["n02093859", "Kerry_blue_terrier"],
|
186 |
+
"184": ["n02093991", "Irish_terrier"],
|
187 |
+
"185": ["n02094114", "Norfolk_terrier"],
|
188 |
+
"186": ["n02094258", "Norwich_terrier"],
|
189 |
+
"187": ["n02094433", "Yorkshire_terrier"],
|
190 |
+
"188": ["n02095314", "wire-haired_fox_terrier"],
|
191 |
+
"189": ["n02095570", "Lakeland_terrier"],
|
192 |
+
"190": ["n02095889", "Sealyham_terrier"],
|
193 |
+
"191": ["n02096051", "Airedale"],
|
194 |
+
"192": ["n02096177", "cairn"],
|
195 |
+
"193": ["n02096294", "Australian_terrier"],
|
196 |
+
"194": ["n02096437", "Dandie_Dinmont"],
|
197 |
+
"195": ["n02096585", "Boston_bull"],
|
198 |
+
"196": ["n02097047", "miniature_schnauzer"],
|
199 |
+
"197": ["n02097130", "giant_schnauzer"],
|
200 |
+
"198": ["n02097209", "standard_schnauzer"],
|
201 |
+
"199": ["n02097298", "Scotch_terrier"],
|
202 |
+
"200": ["n02097474", "Tibetan_terrier"],
|
203 |
+
"201": ["n02097658", "silky_terrier"],
|
204 |
+
"202": ["n02098105", "soft-coated_wheaten_terrier"],
|
205 |
+
"203": ["n02098286", "West_Highland_white_terrier"],
|
206 |
+
"204": ["n02098413", "Lhasa"],
|
207 |
+
"205": ["n02099267", "flat-coated_retriever"],
|
208 |
+
"206": ["n02099429", "curly-coated_retriever"],
|
209 |
+
"207": ["n02099601", "golden_retriever"],
|
210 |
+
"208": ["n02099712", "Labrador_retriever"],
|
211 |
+
"209": ["n02099849", "Chesapeake_Bay_retriever"],
|
212 |
+
"210": ["n02100236", "German_short-haired_pointer"],
|
213 |
+
"211": ["n02100583", "vizsla"],
|
214 |
+
"212": ["n02100735", "English_setter"],
|
215 |
+
"213": ["n02100877", "Irish_setter"],
|
216 |
+
"214": ["n02101006", "Gordon_setter"],
|
217 |
+
"215": ["n02101388", "Brittany_spaniel"],
|
218 |
+
"216": ["n02101556", "clumber"],
|
219 |
+
"217": ["n02102040", "English_springer"],
|
220 |
+
"218": ["n02102177", "Welsh_springer_spaniel"],
|
221 |
+
"219": ["n02102318", "cocker_spaniel"],
|
222 |
+
"220": ["n02102480", "Sussex_spaniel"],
|
223 |
+
"221": ["n02102973", "Irish_water_spaniel"],
|
224 |
+
"222": ["n02104029", "kuvasz"],
|
225 |
+
"223": ["n02104365", "schipperke"],
|
226 |
+
"224": ["n02105056", "groenendael"],
|
227 |
+
"225": ["n02105162", "malinois"],
|
228 |
+
"226": ["n02105251", "briard"],
|
229 |
+
"227": ["n02105412", "kelpie"],
|
230 |
+
"228": ["n02105505", "komondor"],
|
231 |
+
"229": ["n02105641", "Old_English_sheepdog"],
|
232 |
+
"230": ["n02105855", "Shetland_sheepdog"],
|
233 |
+
"231": ["n02106030", "collie"],
|
234 |
+
"232": ["n02106166", "Border_collie"],
|
235 |
+
"233": ["n02106382", "Bouvier_des_Flandres"],
|
236 |
+
"234": ["n02106550", "Rottweiler"],
|
237 |
+
"235": ["n02106662", "German_shepherd"],
|
238 |
+
"236": ["n02107142", "Doberman"],
|
239 |
+
"237": ["n02107312", "miniature_pinscher"],
|
240 |
+
"238": ["n02107574", "Greater_Swiss_Mountain_dog"],
|
241 |
+
"239": ["n02107683", "Bernese_mountain_dog"],
|
242 |
+
"240": ["n02107908", "Appenzeller"],
|
243 |
+
"241": ["n02108000", "EntleBucher"],
|
244 |
+
"242": ["n02108089", "boxer"],
|
245 |
+
"243": ["n02108422", "bull_mastiff"],
|
246 |
+
"244": ["n02108551", "Tibetan_mastiff"],
|
247 |
+
"245": ["n02108915", "French_bulldog"],
|
248 |
+
"246": ["n02109047", "Great_Dane"],
|
249 |
+
"247": ["n02109525", "Saint_Bernard"],
|
250 |
+
"248": ["n02109961", "Eskimo_dog"],
|
251 |
+
"249": ["n02110063", "malamute"],
|
252 |
+
"250": ["n02110185", "Siberian_husky"],
|
253 |
+
"251": ["n02110341", "dalmatian"],
|
254 |
+
"252": ["n02110627", "affenpinscher"],
|
255 |
+
"253": ["n02110806", "basenji"],
|
256 |
+
"254": ["n02110958", "pug"],
|
257 |
+
"255": ["n02111129", "Leonberg"],
|
258 |
+
"256": ["n02111277", "Newfoundland"],
|
259 |
+
"257": ["n02111500", "Great_Pyrenees"],
|
260 |
+
"258": ["n02111889", "Samoyed"],
|
261 |
+
"259": ["n02112018", "Pomeranian"],
|
262 |
+
"260": ["n02112137", "chow"],
|
263 |
+
"261": ["n02112350", "keeshond"],
|
264 |
+
"262": ["n02112706", "Brabancon_griffon"],
|
265 |
+
"263": ["n02113023", "Pembroke"],
|
266 |
+
"264": ["n02113186", "Cardigan"],
|
267 |
+
"265": ["n02113624", "toy_poodle"],
|
268 |
+
"266": ["n02113712", "miniature_poodle"],
|
269 |
+
"267": ["n02113799", "standard_poodle"],
|
270 |
+
"268": ["n02113978", "Mexican_hairless"],
|
271 |
+
"269": ["n02114367", "timber_wolf"],
|
272 |
+
"270": ["n02114548", "white_wolf"],
|
273 |
+
"271": ["n02114712", "red_wolf"],
|
274 |
+
"272": ["n02114855", "coyote"],
|
275 |
+
"273": ["n02115641", "dingo"],
|
276 |
+
"274": ["n02115913", "dhole"],
|
277 |
+
"275": ["n02116738", "African_hunting_dog"],
|
278 |
+
"276": ["n02117135", "hyena"],
|
279 |
+
"277": ["n02119022", "red_fox"],
|
280 |
+
"278": ["n02119789", "kit_fox"],
|
281 |
+
"279": ["n02120079", "Arctic_fox"],
|
282 |
+
"280": ["n02120505", "grey_fox"],
|
283 |
+
"281": ["n02123045", "tabby"],
|
284 |
+
"282": ["n02123159", "tiger_cat"],
|
285 |
+
"283": ["n02123394", "Persian_cat"],
|
286 |
+
"284": ["n02123597", "Siamese_cat"],
|
287 |
+
"285": ["n02124075", "Egyptian_cat"],
|
288 |
+
"286": ["n02125311", "cougar"],
|
289 |
+
"287": ["n02127052", "lynx"],
|
290 |
+
"288": ["n02128385", "leopard"],
|
291 |
+
"289": ["n02128757", "snow_leopard"],
|
292 |
+
"290": ["n02128925", "jaguar"],
|
293 |
+
"291": ["n02129165", "lion"],
|
294 |
+
"292": ["n02129604", "tiger"],
|
295 |
+
"293": ["n02130308", "cheetah"],
|
296 |
+
"294": ["n02132136", "brown_bear"],
|
297 |
+
"295": ["n02133161", "American_black_bear"],
|
298 |
+
"296": ["n02134084", "ice_bear"],
|
299 |
+
"297": ["n02134418", "sloth_bear"],
|
300 |
+
"298": ["n02137549", "mongoose"],
|
301 |
+
"299": ["n02138441", "meerkat"],
|
302 |
+
"300": ["n02165105", "tiger_beetle"],
|
303 |
+
"301": ["n02165456", "ladybug"],
|
304 |
+
"302": ["n02167151", "ground_beetle"],
|
305 |
+
"303": ["n02168699", "long-horned_beetle"],
|
306 |
+
"304": ["n02169497", "leaf_beetle"],
|
307 |
+
"305": ["n02172182", "dung_beetle"],
|
308 |
+
"306": ["n02174001", "rhinoceros_beetle"],
|
309 |
+
"307": ["n02177972", "weevil"],
|
310 |
+
"308": ["n02190166", "fly"],
|
311 |
+
"309": ["n02206856", "bee"],
|
312 |
+
"310": ["n02219486", "ant"],
|
313 |
+
"311": ["n02226429", "grasshopper"],
|
314 |
+
"312": ["n02229544", "cricket"],
|
315 |
+
"313": ["n02231487", "walking_stick"],
|
316 |
+
"314": ["n02233338", "cockroach"],
|
317 |
+
"315": ["n02236044", "mantis"],
|
318 |
+
"316": ["n02256656", "cicada"],
|
319 |
+
"317": ["n02259212", "leafhopper"],
|
320 |
+
"318": ["n02264363", "lacewing"],
|
321 |
+
"319": ["n02268443", "dragonfly"],
|
322 |
+
"320": ["n02268853", "damselfly"],
|
323 |
+
"321": ["n02276258", "admiral"],
|
324 |
+
"322": ["n02277742", "ringlet"],
|
325 |
+
"323": ["n02279972", "monarch"],
|
326 |
+
"324": ["n02280649", "cabbage_butterfly"],
|
327 |
+
"325": ["n02281406", "sulphur_butterfly"],
|
328 |
+
"326": ["n02281787", "lycaenid"],
|
329 |
+
"327": ["n02317335", "starfish"],
|
330 |
+
"328": ["n02319095", "sea_urchin"],
|
331 |
+
"329": ["n02321529", "sea_cucumber"],
|
332 |
+
"330": ["n02325366", "wood_rabbit"],
|
333 |
+
"331": ["n02326432", "hare"],
|
334 |
+
"332": ["n02328150", "Angora"],
|
335 |
+
"333": ["n02342885", "hamster"],
|
336 |
+
"334": ["n02346627", "porcupine"],
|
337 |
+
"335": ["n02356798", "fox_squirrel"],
|
338 |
+
"336": ["n02361337", "marmot"],
|
339 |
+
"337": ["n02363005", "beaver"],
|
340 |
+
"338": ["n02364673", "guinea_pig"],
|
341 |
+
"339": ["n02389026", "sorrel"],
|
342 |
+
"340": ["n02391049", "zebra"],
|
343 |
+
"341": ["n02395406", "hog"],
|
344 |
+
"342": ["n02396427", "wild_boar"],
|
345 |
+
"343": ["n02397096", "warthog"],
|
346 |
+
"344": ["n02398521", "hippopotamus"],
|
347 |
+
"345": ["n02403003", "ox"],
|
348 |
+
"346": ["n02408429", "water_buffalo"],
|
349 |
+
"347": ["n02410509", "bison"],
|
350 |
+
"348": ["n02412080", "ram"],
|
351 |
+
"349": ["n02415577", "bighorn"],
|
352 |
+
"350": ["n02417914", "ibex"],
|
353 |
+
"351": ["n02422106", "hartebeest"],
|
354 |
+
"352": ["n02422699", "impala"],
|
355 |
+
"353": ["n02423022", "gazelle"],
|
356 |
+
"354": ["n02437312", "Arabian_camel"],
|
357 |
+
"355": ["n02437616", "llama"],
|
358 |
+
"356": ["n02441942", "weasel"],
|
359 |
+
"357": ["n02442845", "mink"],
|
360 |
+
"358": ["n02443114", "polecat"],
|
361 |
+
"359": ["n02443484", "black-footed_ferret"],
|
362 |
+
"360": ["n02444819", "otter"],
|
363 |
+
"361": ["n02445715", "skunk"],
|
364 |
+
"362": ["n02447366", "badger"],
|
365 |
+
"363": ["n02454379", "armadillo"],
|
366 |
+
"364": ["n02457408", "three-toed_sloth"],
|
367 |
+
"365": ["n02480495", "orangutan"],
|
368 |
+
"366": ["n02480855", "gorilla"],
|
369 |
+
"367": ["n02481823", "chimpanzee"],
|
370 |
+
"368": ["n02483362", "gibbon"],
|
371 |
+
"369": ["n02483708", "siamang"],
|
372 |
+
"370": ["n02484975", "guenon"],
|
373 |
+
"371": ["n02486261", "patas"],
|
374 |
+
"372": ["n02486410", "baboon"],
|
375 |
+
"373": ["n02487347", "macaque"],
|
376 |
+
"374": ["n02488291", "langur"],
|
377 |
+
"375": ["n02488702", "colobus"],
|
378 |
+
"376": ["n02489166", "proboscis_monkey"],
|
379 |
+
"377": ["n02490219", "marmoset"],
|
380 |
+
"378": ["n02492035", "capuchin"],
|
381 |
+
"379": ["n02492660", "howler_monkey"],
|
382 |
+
"380": ["n02493509", "titi"],
|
383 |
+
"381": ["n02493793", "spider_monkey"],
|
384 |
+
"382": ["n02494079", "squirrel_monkey"],
|
385 |
+
"383": ["n02497673", "Madagascar_cat"],
|
386 |
+
"384": ["n02500267", "indri"],
|
387 |
+
"385": ["n02504013", "Indian_elephant"],
|
388 |
+
"386": ["n02504458", "African_elephant"],
|
389 |
+
"387": ["n02509815", "lesser_panda"],
|
390 |
+
"388": ["n02510455", "giant_panda"],
|
391 |
+
"389": ["n02514041", "barracouta"],
|
392 |
+
"390": ["n02526121", "eel"],
|
393 |
+
"391": ["n02536864", "coho"],
|
394 |
+
"392": ["n02606052", "rock_beauty"],
|
395 |
+
"393": ["n02607072", "anemone_fish"],
|
396 |
+
"394": ["n02640242", "sturgeon"],
|
397 |
+
"395": ["n02641379", "gar"],
|
398 |
+
"396": ["n02643566", "lionfish"],
|
399 |
+
"397": ["n02655020", "puffer"],
|
400 |
+
"398": ["n02666196", "abacus"],
|
401 |
+
"399": ["n02667093", "abaya"],
|
402 |
+
"400": ["n02669723", "academic_gown"],
|
403 |
+
"401": ["n02672831", "accordion"],
|
404 |
+
"402": ["n02676566", "acoustic_guitar"],
|
405 |
+
"403": ["n02687172", "aircraft_carrier"],
|
406 |
+
"404": ["n02690373", "airliner"],
|
407 |
+
"405": ["n02692877", "airship"],
|
408 |
+
"406": ["n02699494", "altar"],
|
409 |
+
"407": ["n02701002", "ambulance"],
|
410 |
+
"408": ["n02704792", "amphibian"],
|
411 |
+
"409": ["n02708093", "analog_clock"],
|
412 |
+
"410": ["n02727426", "apiary"],
|
413 |
+
"411": ["n02730930", "apron"],
|
414 |
+
"412": ["n02747177", "ashcan"],
|
415 |
+
"413": ["n02749479", "assault_rifle"],
|
416 |
+
"414": ["n02769748", "backpack"],
|
417 |
+
"415": ["n02776631", "bakery"],
|
418 |
+
"416": ["n02777292", "balance_beam"],
|
419 |
+
"417": ["n02782093", "balloon"],
|
420 |
+
"418": ["n02783161", "ballpoint"],
|
421 |
+
"419": ["n02786058", "Band_Aid"],
|
422 |
+
"420": ["n02787622", "banjo"],
|
423 |
+
"421": ["n02788148", "bannister"],
|
424 |
+
"422": ["n02790996", "barbell"],
|
425 |
+
"423": ["n02791124", "barber_chair"],
|
426 |
+
"424": ["n02791270", "barbershop"],
|
427 |
+
"425": ["n02793495", "barn"],
|
428 |
+
"426": ["n02794156", "barometer"],
|
429 |
+
"427": ["n02795169", "barrel"],
|
430 |
+
"428": ["n02797295", "barrow"],
|
431 |
+
"429": ["n02799071", "baseball"],
|
432 |
+
"430": ["n02802426", "basketball"],
|
433 |
+
"431": ["n02804414", "bassinet"],
|
434 |
+
"432": ["n02804610", "bassoon"],
|
435 |
+
"433": ["n02807133", "bathing_cap"],
|
436 |
+
"434": ["n02808304", "bath_towel"],
|
437 |
+
"435": ["n02808440", "bathtub"],
|
438 |
+
"436": ["n02814533", "beach_wagon"],
|
439 |
+
"437": ["n02814860", "beacon"],
|
440 |
+
"438": ["n02815834", "beaker"],
|
441 |
+
"439": ["n02817516", "bearskin"],
|
442 |
+
"440": ["n02823428", "beer_bottle"],
|
443 |
+
"441": ["n02823750", "beer_glass"],
|
444 |
+
"442": ["n02825657", "bell_cote"],
|
445 |
+
"443": ["n02834397", "bib"],
|
446 |
+
"444": ["n02835271", "bicycle-built-for-two"],
|
447 |
+
"445": ["n02837789", "bikini"],
|
448 |
+
"446": ["n02840245", "binder"],
|
449 |
+
"447": ["n02841315", "binoculars"],
|
450 |
+
"448": ["n02843684", "birdhouse"],
|
451 |
+
"449": ["n02859443", "boathouse"],
|
452 |
+
"450": ["n02860847", "bobsled"],
|
453 |
+
"451": ["n02865351", "bolo_tie"],
|
454 |
+
"452": ["n02869837", "bonnet"],
|
455 |
+
"453": ["n02870880", "bookcase"],
|
456 |
+
"454": ["n02871525", "bookshop"],
|
457 |
+
"455": ["n02877765", "bottlecap"],
|
458 |
+
"456": ["n02879718", "bow"],
|
459 |
+
"457": ["n02883205", "bow_tie"],
|
460 |
+
"458": ["n02892201", "brass"],
|
461 |
+
"459": ["n02892767", "brassiere"],
|
462 |
+
"460": ["n02894605", "breakwater"],
|
463 |
+
"461": ["n02895154", "breastplate"],
|
464 |
+
"462": ["n02906734", "broom"],
|
465 |
+
"463": ["n02909870", "bucket"],
|
466 |
+
"464": ["n02910353", "buckle"],
|
467 |
+
"465": ["n02916936", "bulletproof_vest"],
|
468 |
+
"466": ["n02917067", "bullet_train"],
|
469 |
+
"467": ["n02927161", "butcher_shop"],
|
470 |
+
"468": ["n02930766", "cab"],
|
471 |
+
"469": ["n02939185", "caldron"],
|
472 |
+
"470": ["n02948072", "candle"],
|
473 |
+
"471": ["n02950826", "cannon"],
|
474 |
+
"472": ["n02951358", "canoe"],
|
475 |
+
"473": ["n02951585", "can_opener"],
|
476 |
+
"474": ["n02963159", "cardigan"],
|
477 |
+
"475": ["n02965783", "car_mirror"],
|
478 |
+
"476": ["n02966193", "carousel"],
|
479 |
+
"477": ["n02966687", "carpenter's_kit"],
|
480 |
+
"478": ["n02971356", "carton"],
|
481 |
+
"479": ["n02974003", "car_wheel"],
|
482 |
+
"480": ["n02977058", "cash_machine"],
|
483 |
+
"481": ["n02978881", "cassette"],
|
484 |
+
"482": ["n02979186", "cassette_player"],
|
485 |
+
"483": ["n02980441", "castle"],
|
486 |
+
"484": ["n02981792", "catamaran"],
|
487 |
+
"485": ["n02988304", "CD_player"],
|
488 |
+
"486": ["n02992211", "cello"],
|
489 |
+
"487": ["n02992529", "cellular_telephone"],
|
490 |
+
"488": ["n02999410", "chain"],
|
491 |
+
"489": ["n03000134", "chainlink_fence"],
|
492 |
+
"490": ["n03000247", "chain_mail"],
|
493 |
+
"491": ["n03000684", "chain_saw"],
|
494 |
+
"492": ["n03014705", "chest"],
|
495 |
+
"493": ["n03016953", "chiffonier"],
|
496 |
+
"494": ["n03017168", "chime"],
|
497 |
+
"495": ["n03018349", "china_cabinet"],
|
498 |
+
"496": ["n03026506", "Christmas_stocking"],
|
499 |
+
"497": ["n03028079", "church"],
|
500 |
+
"498": ["n03032252", "cinema"],
|
501 |
+
"499": ["n03041632", "cleaver"],
|
502 |
+
"500": ["n03042490", "cliff_dwelling"],
|
503 |
+
"501": ["n03045698", "cloak"],
|
504 |
+
"502": ["n03047690", "clog"],
|
505 |
+
"503": ["n03062245", "cocktail_shaker"],
|
506 |
+
"504": ["n03063599", "coffee_mug"],
|
507 |
+
"505": ["n03063689", "coffeepot"],
|
508 |
+
"506": ["n03065424", "coil"],
|
509 |
+
"507": ["n03075370", "combination_lock"],
|
510 |
+
"508": ["n03085013", "computer_keyboard"],
|
511 |
+
"509": ["n03089624", "confectionery"],
|
512 |
+
"510": ["n03095699", "container_ship"],
|
513 |
+
"511": ["n03100240", "convertible"],
|
514 |
+
"512": ["n03109150", "corkscrew"],
|
515 |
+
"513": ["n03110669", "cornet"],
|
516 |
+
"514": ["n03124043", "cowboy_boot"],
|
517 |
+
"515": ["n03124170", "cowboy_hat"],
|
518 |
+
"516": ["n03125729", "cradle"],
|
519 |
+
"517": ["n03126707", "crane"],
|
520 |
+
"518": ["n03127747", "crash_helmet"],
|
521 |
+
"519": ["n03127925", "crate"],
|
522 |
+
"520": ["n03131574", "crib"],
|
523 |
+
"521": ["n03133878", "Crock_Pot"],
|
524 |
+
"522": ["n03134739", "croquet_ball"],
|
525 |
+
"523": ["n03141823", "crutch"],
|
526 |
+
"524": ["n03146219", "cuirass"],
|
527 |
+
"525": ["n03160309", "dam"],
|
528 |
+
"526": ["n03179701", "desk"],
|
529 |
+
"527": ["n03180011", "desktop_computer"],
|
530 |
+
"528": ["n03187595", "dial_telephone"],
|
531 |
+
"529": ["n03188531", "diaper"],
|
532 |
+
"530": ["n03196217", "digital_clock"],
|
533 |
+
"531": ["n03197337", "digital_watch"],
|
534 |
+
"532": ["n03201208", "dining_table"],
|
535 |
+
"533": ["n03207743", "dishrag"],
|
536 |
+
"534": ["n03207941", "dishwasher"],
|
537 |
+
"535": ["n03208938", "disk_brake"],
|
538 |
+
"536": ["n03216828", "dock"],
|
539 |
+
"537": ["n03218198", "dogsled"],
|
540 |
+
"538": ["n03220513", "dome"],
|
541 |
+
"539": ["n03223299", "doormat"],
|
542 |
+
"540": ["n03240683", "drilling_platform"],
|
543 |
+
"541": ["n03249569", "drum"],
|
544 |
+
"542": ["n03250847", "drumstick"],
|
545 |
+
"543": ["n03255030", "dumbbell"],
|
546 |
+
"544": ["n03259280", "Dutch_oven"],
|
547 |
+
"545": ["n03271574", "electric_fan"],
|
548 |
+
"546": ["n03272010", "electric_guitar"],
|
549 |
+
"547": ["n03272562", "electric_locomotive"],
|
550 |
+
"548": ["n03290653", "entertainment_center"],
|
551 |
+
"549": ["n03291819", "envelope"],
|
552 |
+
"550": ["n03297495", "espresso_maker"],
|
553 |
+
"551": ["n03314780", "face_powder"],
|
554 |
+
"552": ["n03325584", "feather_boa"],
|
555 |
+
"553": ["n03337140", "file"],
|
556 |
+
"554": ["n03344393", "fireboat"],
|
557 |
+
"555": ["n03345487", "fire_engine"],
|
558 |
+
"556": ["n03347037", "fire_screen"],
|
559 |
+
"557": ["n03355925", "flagpole"],
|
560 |
+
"558": ["n03372029", "flute"],
|
561 |
+
"559": ["n03376595", "folding_chair"],
|
562 |
+
"560": ["n03379051", "football_helmet"],
|
563 |
+
"561": ["n03384352", "forklift"],
|
564 |
+
"562": ["n03388043", "fountain"],
|
565 |
+
"563": ["n03388183", "fountain_pen"],
|
566 |
+
"564": ["n03388549", "four-poster"],
|
567 |
+
"565": ["n03393912", "freight_car"],
|
568 |
+
"566": ["n03394916", "French_horn"],
|
569 |
+
"567": ["n03400231", "frying_pan"],
|
570 |
+
"568": ["n03404251", "fur_coat"],
|
571 |
+
"569": ["n03417042", "garbage_truck"],
|
572 |
+
"570": ["n03424325", "gasmask"],
|
573 |
+
"571": ["n03425413", "gas_pump"],
|
574 |
+
"572": ["n03443371", "goblet"],
|
575 |
+
"573": ["n03444034", "go-kart"],
|
576 |
+
"574": ["n03445777", "golf_ball"],
|
577 |
+
"575": ["n03445924", "golfcart"],
|
578 |
+
"576": ["n03447447", "gondola"],
|
579 |
+
"577": ["n03447721", "gong"],
|
580 |
+
"578": ["n03450230", "gown"],
|
581 |
+
"579": ["n03452741", "grand_piano"],
|
582 |
+
"580": ["n03457902", "greenhouse"],
|
583 |
+
"581": ["n03459775", "grille"],
|
584 |
+
"582": ["n03461385", "grocery_store"],
|
585 |
+
"583": ["n03467068", "guillotine"],
|
586 |
+
"584": ["n03476684", "hair_slide"],
|
587 |
+
"585": ["n03476991", "hair_spray"],
|
588 |
+
"586": ["n03478589", "half_track"],
|
589 |
+
"587": ["n03481172", "hammer"],
|
590 |
+
"588": ["n03482405", "hamper"],
|
591 |
+
"589": ["n03483316", "hand_blower"],
|
592 |
+
"590": ["n03485407", "hand-held_computer"],
|
593 |
+
"591": ["n03485794", "handkerchief"],
|
594 |
+
"592": ["n03492542", "hard_disc"],
|
595 |
+
"593": ["n03494278", "harmonica"],
|
596 |
+
"594": ["n03495258", "harp"],
|
597 |
+
"595": ["n03496892", "harvester"],
|
598 |
+
"596": ["n03498962", "hatchet"],
|
599 |
+
"597": ["n03527444", "holster"],
|
600 |
+
"598": ["n03529860", "home_theater"],
|
601 |
+
"599": ["n03530642", "honeycomb"],
|
602 |
+
"600": ["n03532672", "hook"],
|
603 |
+
"601": ["n03534580", "hoopskirt"],
|
604 |
+
"602": ["n03535780", "horizontal_bar"],
|
605 |
+
"603": ["n03538406", "horse_cart"],
|
606 |
+
"604": ["n03544143", "hourglass"],
|
607 |
+
"605": ["n03584254", "iPod"],
|
608 |
+
"606": ["n03584829", "iron"],
|
609 |
+
"607": ["n03590841", "jack-o'-lantern"],
|
610 |
+
"608": ["n03594734", "jean"],
|
611 |
+
"609": ["n03594945", "jeep"],
|
612 |
+
"610": ["n03595614", "jersey"],
|
613 |
+
"611": ["n03598930", "jigsaw_puzzle"],
|
614 |
+
"612": ["n03599486", "jinrikisha"],
|
615 |
+
"613": ["n03602883", "joystick"],
|
616 |
+
"614": ["n03617480", "kimono"],
|
617 |
+
"615": ["n03623198", "knee_pad"],
|
618 |
+
"616": ["n03627232", "knot"],
|
619 |
+
"617": ["n03630383", "lab_coat"],
|
620 |
+
"618": ["n03633091", "ladle"],
|
621 |
+
"619": ["n03637318", "lampshade"],
|
622 |
+
"620": ["n03642806", "laptop"],
|
623 |
+
"621": ["n03649909", "lawn_mower"],
|
624 |
+
"622": ["n03657121", "lens_cap"],
|
625 |
+
"623": ["n03658185", "letter_opener"],
|
626 |
+
"624": ["n03661043", "library"],
|
627 |
+
"625": ["n03662601", "lifeboat"],
|
628 |
+
"626": ["n03666591", "lighter"],
|
629 |
+
"627": ["n03670208", "limousine"],
|
630 |
+
"628": ["n03673027", "liner"],
|
631 |
+
"629": ["n03676483", "lipstick"],
|
632 |
+
"630": ["n03680355", "Loafer"],
|
633 |
+
"631": ["n03690938", "lotion"],
|
634 |
+
"632": ["n03691459", "loudspeaker"],
|
635 |
+
"633": ["n03692522", "loupe"],
|
636 |
+
"634": ["n03697007", "lumbermill"],
|
637 |
+
"635": ["n03706229", "magnetic_compass"],
|
638 |
+
"636": ["n03709823", "mailbag"],
|
639 |
+
"637": ["n03710193", "mailbox"],
|
640 |
+
"638": ["n03710637", "maillot"],
|
641 |
+
"639": ["n03710721", "maillot"],
|
642 |
+
"640": ["n03717622", "manhole_cover"],
|
643 |
+
"641": ["n03720891", "maraca"],
|
644 |
+
"642": ["n03721384", "marimba"],
|
645 |
+
"643": ["n03724870", "mask"],
|
646 |
+
"644": ["n03729826", "matchstick"],
|
647 |
+
"645": ["n03733131", "maypole"],
|
648 |
+
"646": ["n03733281", "maze"],
|
649 |
+
"647": ["n03733805", "measuring_cup"],
|
650 |
+
"648": ["n03742115", "medicine_chest"],
|
651 |
+
"649": ["n03743016", "megalith"],
|
652 |
+
"650": ["n03759954", "microphone"],
|
653 |
+
"651": ["n03761084", "microwave"],
|
654 |
+
"652": ["n03763968", "military_uniform"],
|
655 |
+
"653": ["n03764736", "milk_can"],
|
656 |
+
"654": ["n03769881", "minibus"],
|
657 |
+
"655": ["n03770439", "miniskirt"],
|
658 |
+
"656": ["n03770679", "minivan"],
|
659 |
+
"657": ["n03773504", "missile"],
|
660 |
+
"658": ["n03775071", "mitten"],
|
661 |
+
"659": ["n03775546", "mixing_bowl"],
|
662 |
+
"660": ["n03776460", "mobile_home"],
|
663 |
+
"661": ["n03777568", "Model_T"],
|
664 |
+
"662": ["n03777754", "modem"],
|
665 |
+
"663": ["n03781244", "monastery"],
|
666 |
+
"664": ["n03782006", "monitor"],
|
667 |
+
"665": ["n03785016", "moped"],
|
668 |
+
"666": ["n03786901", "mortar"],
|
669 |
+
"667": ["n03787032", "mortarboard"],
|
670 |
+
"668": ["n03788195", "mosque"],
|
671 |
+
"669": ["n03788365", "mosquito_net"],
|
672 |
+
"670": ["n03791053", "motor_scooter"],
|
673 |
+
"671": ["n03792782", "mountain_bike"],
|
674 |
+
"672": ["n03792972", "mountain_tent"],
|
675 |
+
"673": ["n03793489", "mouse"],
|
676 |
+
"674": ["n03794056", "mousetrap"],
|
677 |
+
"675": ["n03796401", "moving_van"],
|
678 |
+
"676": ["n03803284", "muzzle"],
|
679 |
+
"677": ["n03804744", "nail"],
|
680 |
+
"678": ["n03814639", "neck_brace"],
|
681 |
+
"679": ["n03814906", "necklace"],
|
682 |
+
"680": ["n03825788", "nipple"],
|
683 |
+
"681": ["n03832673", "notebook"],
|
684 |
+
"682": ["n03837869", "obelisk"],
|
685 |
+
"683": ["n03838899", "oboe"],
|
686 |
+
"684": ["n03840681", "ocarina"],
|
687 |
+
"685": ["n03841143", "odometer"],
|
688 |
+
"686": ["n03843555", "oil_filter"],
|
689 |
+
"687": ["n03854065", "organ"],
|
690 |
+
"688": ["n03857828", "oscilloscope"],
|
691 |
+
"689": ["n03866082", "overskirt"],
|
692 |
+
"690": ["n03868242", "oxcart"],
|
693 |
+
"691": ["n03868863", "oxygen_mask"],
|
694 |
+
"692": ["n03871628", "packet"],
|
695 |
+
"693": ["n03873416", "paddle"],
|
696 |
+
"694": ["n03874293", "paddlewheel"],
|
697 |
+
"695": ["n03874599", "padlock"],
|
698 |
+
"696": ["n03876231", "paintbrush"],
|
699 |
+
"697": ["n03877472", "pajama"],
|
700 |
+
"698": ["n03877845", "palace"],
|
701 |
+
"699": ["n03884397", "panpipe"],
|
702 |
+
"700": ["n03887697", "paper_towel"],
|
703 |
+
"701": ["n03888257", "parachute"],
|
704 |
+
"702": ["n03888605", "parallel_bars"],
|
705 |
+
"703": ["n03891251", "park_bench"],
|
706 |
+
"704": ["n03891332", "parking_meter"],
|
707 |
+
"705": ["n03895866", "passenger_car"],
|
708 |
+
"706": ["n03899768", "patio"],
|
709 |
+
"707": ["n03902125", "pay-phone"],
|
710 |
+
"708": ["n03903868", "pedestal"],
|
711 |
+
"709": ["n03908618", "pencil_box"],
|
712 |
+
"710": ["n03908714", "pencil_sharpener"],
|
713 |
+
"711": ["n03916031", "perfume"],
|
714 |
+
"712": ["n03920288", "Petri_dish"],
|
715 |
+
"713": ["n03924679", "photocopier"],
|
716 |
+
"714": ["n03929660", "pick"],
|
717 |
+
"715": ["n03929855", "pickelhaube"],
|
718 |
+
"716": ["n03930313", "picket_fence"],
|
719 |
+
"717": ["n03930630", "pickup"],
|
720 |
+
"718": ["n03933933", "pier"],
|
721 |
+
"719": ["n03935335", "piggy_bank"],
|
722 |
+
"720": ["n03937543", "pill_bottle"],
|
723 |
+
"721": ["n03938244", "pillow"],
|
724 |
+
"722": ["n03942813", "ping-pong_ball"],
|
725 |
+
"723": ["n03944341", "pinwheel"],
|
726 |
+
"724": ["n03947888", "pirate"],
|
727 |
+
"725": ["n03950228", "pitcher"],
|
728 |
+
"726": ["n03954731", "plane"],
|
729 |
+
"727": ["n03956157", "planetarium"],
|
730 |
+
"728": ["n03958227", "plastic_bag"],
|
731 |
+
"729": ["n03961711", "plate_rack"],
|
732 |
+
"730": ["n03967562", "plow"],
|
733 |
+
"731": ["n03970156", "plunger"],
|
734 |
+
"732": ["n03976467", "Polaroid_camera"],
|
735 |
+
"733": ["n03976657", "pole"],
|
736 |
+
"734": ["n03977966", "police_van"],
|
737 |
+
"735": ["n03980874", "poncho"],
|
738 |
+
"736": ["n03982430", "pool_table"],
|
739 |
+
"737": ["n03983396", "pop_bottle"],
|
740 |
+
"738": ["n03991062", "pot"],
|
741 |
+
"739": ["n03992509", "potter's_wheel"],
|
742 |
+
"740": ["n03995372", "power_drill"],
|
743 |
+
"741": ["n03998194", "prayer_rug"],
|
744 |
+
"742": ["n04004767", "printer"],
|
745 |
+
"743": ["n04005630", "prison"],
|
746 |
+
"744": ["n04008634", "projectile"],
|
747 |
+
"745": ["n04009552", "projector"],
|
748 |
+
"746": ["n04019541", "puck"],
|
749 |
+
"747": ["n04023962", "punching_bag"],
|
750 |
+
"748": ["n04026417", "purse"],
|
751 |
+
"749": ["n04033901", "quill"],
|
752 |
+
"750": ["n04033995", "quilt"],
|
753 |
+
"751": ["n04037443", "racer"],
|
754 |
+
"752": ["n04039381", "racket"],
|
755 |
+
"753": ["n04040759", "radiator"],
|
756 |
+
"754": ["n04041544", "radio"],
|
757 |
+
"755": ["n04044716", "radio_telescope"],
|
758 |
+
"756": ["n04049303", "rain_barrel"],
|
759 |
+
"757": ["n04065272", "recreational_vehicle"],
|
760 |
+
"758": ["n04067472", "reel"],
|
761 |
+
"759": ["n04069434", "reflex_camera"],
|
762 |
+
"760": ["n04070727", "refrigerator"],
|
763 |
+
"761": ["n04074963", "remote_control"],
|
764 |
+
"762": ["n04081281", "restaurant"],
|
765 |
+
"763": ["n04086273", "revolver"],
|
766 |
+
"764": ["n04090263", "rifle"],
|
767 |
+
"765": ["n04099969", "rocking_chair"],
|
768 |
+
"766": ["n04111531", "rotisserie"],
|
769 |
+
"767": ["n04116512", "rubber_eraser"],
|
770 |
+
"768": ["n04118538", "rugby_ball"],
|
771 |
+
"769": ["n04118776", "rule"],
|
772 |
+
"770": ["n04120489", "running_shoe"],
|
773 |
+
"771": ["n04125021", "safe"],
|
774 |
+
"772": ["n04127249", "safety_pin"],
|
775 |
+
"773": ["n04131690", "saltshaker"],
|
776 |
+
"774": ["n04133789", "sandal"],
|
777 |
+
"775": ["n04136333", "sarong"],
|
778 |
+
"776": ["n04141076", "sax"],
|
779 |
+
"777": ["n04141327", "scabbard"],
|
780 |
+
"778": ["n04141975", "scale"],
|
781 |
+
"779": ["n04146614", "school_bus"],
|
782 |
+
"780": ["n04147183", "schooner"],
|
783 |
+
"781": ["n04149813", "scoreboard"],
|
784 |
+
"782": ["n04152593", "screen"],
|
785 |
+
"783": ["n04153751", "screw"],
|
786 |
+
"784": ["n04154565", "screwdriver"],
|
787 |
+
"785": ["n04162706", "seat_belt"],
|
788 |
+
"786": ["n04179913", "sewing_machine"],
|
789 |
+
"787": ["n04192698", "shield"],
|
790 |
+
"788": ["n04200800", "shoe_shop"],
|
791 |
+
"789": ["n04201297", "shoji"],
|
792 |
+
"790": ["n04204238", "shopping_basket"],
|
793 |
+
"791": ["n04204347", "shopping_cart"],
|
794 |
+
"792": ["n04208210", "shovel"],
|
795 |
+
"793": ["n04209133", "shower_cap"],
|
796 |
+
"794": ["n04209239", "shower_curtain"],
|
797 |
+
"795": ["n04228054", "ski"],
|
798 |
+
"796": ["n04229816", "ski_mask"],
|
799 |
+
"797": ["n04235860", "sleeping_bag"],
|
800 |
+
"798": ["n04238763", "slide_rule"],
|
801 |
+
"799": ["n04239074", "sliding_door"],
|
802 |
+
"800": ["n04243546", "slot"],
|
803 |
+
"801": ["n04251144", "snorkel"],
|
804 |
+
"802": ["n04252077", "snowmobile"],
|
805 |
+
"803": ["n04252225", "snowplow"],
|
806 |
+
"804": ["n04254120", "soap_dispenser"],
|
807 |
+
"805": ["n04254680", "soccer_ball"],
|
808 |
+
"806": ["n04254777", "sock"],
|
809 |
+
"807": ["n04258138", "solar_dish"],
|
810 |
+
"808": ["n04259630", "sombrero"],
|
811 |
+
"809": ["n04263257", "soup_bowl"],
|
812 |
+
"810": ["n04264628", "space_bar"],
|
813 |
+
"811": ["n04265275", "space_heater"],
|
814 |
+
"812": ["n04266014", "space_shuttle"],
|
815 |
+
"813": ["n04270147", "spatula"],
|
816 |
+
"814": ["n04273569", "speedboat"],
|
817 |
+
"815": ["n04275548", "spider_web"],
|
818 |
+
"816": ["n04277352", "spindle"],
|
819 |
+
"817": ["n04285008", "sports_car"],
|
820 |
+
"818": ["n04286575", "spotlight"],
|
821 |
+
"819": ["n04296562", "stage"],
|
822 |
+
"820": ["n04310018", "steam_locomotive"],
|
823 |
+
"821": ["n04311004", "steel_arch_bridge"],
|
824 |
+
"822": ["n04311174", "steel_drum"],
|
825 |
+
"823": ["n04317175", "stethoscope"],
|
826 |
+
"824": ["n04325704", "stole"],
|
827 |
+
"825": ["n04326547", "stone_wall"],
|
828 |
+
"826": ["n04328186", "stopwatch"],
|
829 |
+
"827": ["n04330267", "stove"],
|
830 |
+
"828": ["n04332243", "strainer"],
|
831 |
+
"829": ["n04335435", "streetcar"],
|
832 |
+
"830": ["n04336792", "stretcher"],
|
833 |
+
"831": ["n04344873", "studio_couch"],
|
834 |
+
"832": ["n04346328", "stupa"],
|
835 |
+
"833": ["n04347754", "submarine"],
|
836 |
+
"834": ["n04350905", "suit"],
|
837 |
+
"835": ["n04355338", "sundial"],
|
838 |
+
"836": ["n04355933", "sunglass"],
|
839 |
+
"837": ["n04356056", "sunglasses"],
|
840 |
+
"838": ["n04357314", "sunscreen"],
|
841 |
+
"839": ["n04366367", "suspension_bridge"],
|
842 |
+
"840": ["n04367480", "swab"],
|
843 |
+
"841": ["n04370456", "sweatshirt"],
|
844 |
+
"842": ["n04371430", "swimming_trunks"],
|
845 |
+
"843": ["n04371774", "swing"],
|
846 |
+
"844": ["n04372370", "switch"],
|
847 |
+
"845": ["n04376876", "syringe"],
|
848 |
+
"846": ["n04380533", "table_lamp"],
|
849 |
+
"847": ["n04389033", "tank"],
|
850 |
+
"848": ["n04392985", "tape_player"],
|
851 |
+
"849": ["n04398044", "teapot"],
|
852 |
+
"850": ["n04399382", "teddy"],
|
853 |
+
"851": ["n04404412", "television"],
|
854 |
+
"852": ["n04409515", "tennis_ball"],
|
855 |
+
"853": ["n04417672", "thatch"],
|
856 |
+
"854": ["n04418357", "theater_curtain"],
|
857 |
+
"855": ["n04423845", "thimble"],
|
858 |
+
"856": ["n04428191", "thresher"],
|
859 |
+
"857": ["n04429376", "throne"],
|
860 |
+
"858": ["n04435653", "tile_roof"],
|
861 |
+
"859": ["n04442312", "toaster"],
|
862 |
+
"860": ["n04443257", "tobacco_shop"],
|
863 |
+
"861": ["n04447861", "toilet_seat"],
|
864 |
+
"862": ["n04456115", "torch"],
|
865 |
+
"863": ["n04458633", "totem_pole"],
|
866 |
+
"864": ["n04461696", "tow_truck"],
|
867 |
+
"865": ["n04462240", "toyshop"],
|
868 |
+
"866": ["n04465501", "tractor"],
|
869 |
+
"867": ["n04467665", "trailer_truck"],
|
870 |
+
"868": ["n04476259", "tray"],
|
871 |
+
"869": ["n04479046", "trench_coat"],
|
872 |
+
"870": ["n04482393", "tricycle"],
|
873 |
+
"871": ["n04483307", "trimaran"],
|
874 |
+
"872": ["n04485082", "tripod"],
|
875 |
+
"873": ["n04486054", "triumphal_arch"],
|
876 |
+
"874": ["n04487081", "trolleybus"],
|
877 |
+
"875": ["n04487394", "trombone"],
|
878 |
+
"876": ["n04493381", "tub"],
|
879 |
+
"877": ["n04501370", "turnstile"],
|
880 |
+
"878": ["n04505470", "typewriter_keyboard"],
|
881 |
+
"879": ["n04507155", "umbrella"],
|
882 |
+
"880": ["n04509417", "unicycle"],
|
883 |
+
"881": ["n04515003", "upright"],
|
884 |
+
"882": ["n04517823", "vacuum"],
|
885 |
+
"883": ["n04522168", "vase"],
|
886 |
+
"884": ["n04523525", "vault"],
|
887 |
+
"885": ["n04525038", "velvet"],
|
888 |
+
"886": ["n04525305", "vending_machine"],
|
889 |
+
"887": ["n04532106", "vestment"],
|
890 |
+
"888": ["n04532670", "viaduct"],
|
891 |
+
"889": ["n04536866", "violin"],
|
892 |
+
"890": ["n04540053", "volleyball"],
|
893 |
+
"891": ["n04542943", "waffle_iron"],
|
894 |
+
"892": ["n04548280", "wall_clock"],
|
895 |
+
"893": ["n04548362", "wallet"],
|
896 |
+
"894": ["n04550184", "wardrobe"],
|
897 |
+
"895": ["n04552348", "warplane"],
|
898 |
+
"896": ["n04553703", "washbasin"],
|
899 |
+
"897": ["n04554684", "washer"],
|
900 |
+
"898": ["n04557648", "water_bottle"],
|
901 |
+
"899": ["n04560804", "water_jug"],
|
902 |
+
"900": ["n04562935", "water_tower"],
|
903 |
+
"901": ["n04579145", "whiskey_jug"],
|
904 |
+
"902": ["n04579432", "whistle"],
|
905 |
+
"903": ["n04584207", "wig"],
|
906 |
+
"904": ["n04589890", "window_screen"],
|
907 |
+
"905": ["n04590129", "window_shade"],
|
908 |
+
"906": ["n04591157", "Windsor_tie"],
|
909 |
+
"907": ["n04591713", "wine_bottle"],
|
910 |
+
"908": ["n04592741", "wing"],
|
911 |
+
"909": ["n04596742", "wok"],
|
912 |
+
"910": ["n04597913", "wooden_spoon"],
|
913 |
+
"911": ["n04599235", "wool"],
|
914 |
+
"912": ["n04604644", "worm_fence"],
|
915 |
+
"913": ["n04606251", "wreck"],
|
916 |
+
"914": ["n04612504", "yawl"],
|
917 |
+
"915": ["n04613696", "yurt"],
|
918 |
+
"916": ["n06359193", "web_site"],
|
919 |
+
"917": ["n06596364", "comic_book"],
|
920 |
+
"918": ["n06785654", "crossword_puzzle"],
|
921 |
+
"919": ["n06794110", "street_sign"],
|
922 |
+
"920": ["n06874185", "traffic_light"],
|
923 |
+
"921": ["n07248320", "book_jacket"],
|
924 |
+
"922": ["n07565083", "menu"],
|
925 |
+
"923": ["n07579787", "plate"],
|
926 |
+
"924": ["n07583066", "guacamole"],
|
927 |
+
"925": ["n07584110", "consomme"],
|
928 |
+
"926": ["n07590611", "hot_pot"],
|
929 |
+
"927": ["n07613480", "trifle"],
|
930 |
+
"928": ["n07614500", "ice_cream"],
|
931 |
+
"929": ["n07615774", "ice_lolly"],
|
932 |
+
"930": ["n07684084", "French_loaf"],
|
933 |
+
"931": ["n07693725", "bagel"],
|
934 |
+
"932": ["n07695742", "pretzel"],
|
935 |
+
"933": ["n07697313", "cheeseburger"],
|
936 |
+
"934": ["n07697537", "hotdog"],
|
937 |
+
"935": ["n07711569", "mashed_potato"],
|
938 |
+
"936": ["n07714571", "head_cabbage"],
|
939 |
+
"937": ["n07714990", "broccoli"],
|
940 |
+
"938": ["n07715103", "cauliflower"],
|
941 |
+
"939": ["n07716358", "zucchini"],
|
942 |
+
"940": ["n07716906", "spaghetti_squash"],
|
943 |
+
"941": ["n07717410", "acorn_squash"],
|
944 |
+
"942": ["n07717556", "butternut_squash"],
|
945 |
+
"943": ["n07718472", "cucumber"],
|
946 |
+
"944": ["n07718747", "artichoke"],
|
947 |
+
"945": ["n07720875", "bell_pepper"],
|
948 |
+
"946": ["n07730033", "cardoon"],
|
949 |
+
"947": ["n07734744", "mushroom"],
|
950 |
+
"948": ["n07742313", "Granny_Smith"],
|
951 |
+
"949": ["n07745940", "strawberry"],
|
952 |
+
"950": ["n07747607", "orange"],
|
953 |
+
"951": ["n07749582", "lemon"],
|
954 |
+
"952": ["n07753113", "fig"],
|
955 |
+
"953": ["n07753275", "pineapple"],
|
956 |
+
"954": ["n07753592", "banana"],
|
957 |
+
"955": ["n07754684", "jackfruit"],
|
958 |
+
"956": ["n07760859", "custard_apple"],
|
959 |
+
"957": ["n07768694", "pomegranate"],
|
960 |
+
"958": ["n07802026", "hay"],
|
961 |
+
"959": ["n07831146", "carbonara"],
|
962 |
+
"960": ["n07836838", "chocolate_sauce"],
|
963 |
+
"961": ["n07860988", "dough"],
|
964 |
+
"962": ["n07871810", "meat_loaf"],
|
965 |
+
"963": ["n07873807", "pizza"],
|
966 |
+
"964": ["n07875152", "potpie"],
|
967 |
+
"965": ["n07880968", "burrito"],
|
968 |
+
"966": ["n07892512", "red_wine"],
|
969 |
+
"967": ["n07920052", "espresso"],
|
970 |
+
"968": ["n07930864", "cup"],
|
971 |
+
"969": ["n07932039", "eggnog"],
|
972 |
+
"970": ["n09193705", "alp"],
|
973 |
+
"971": ["n09229709", "bubble"],
|
974 |
+
"972": ["n09246464", "cliff"],
|
975 |
+
"973": ["n09256479", "coral_reef"],
|
976 |
+
"974": ["n09288635", "geyser"],
|
977 |
+
"975": ["n09332890", "lakeside"],
|
978 |
+
"976": ["n09399592", "promontory"],
|
979 |
+
"977": ["n09421951", "sandbar"],
|
980 |
+
"978": ["n09428293", "seashore"],
|
981 |
+
"979": ["n09468604", "valley"],
|
982 |
+
"980": ["n09472597", "volcano"],
|
983 |
+
"981": ["n09835506", "ballplayer"],
|
984 |
+
"982": ["n10148035", "groom"],
|
985 |
+
"983": ["n10565667", "scuba_diver"],
|
986 |
+
"984": ["n11879895", "rapeseed"],
|
987 |
+
"985": ["n11939491", "daisy"],
|
988 |
+
"986": ["n12057211", "yellow_lady's_slipper"],
|
989 |
+
"987": ["n12144580", "corn"],
|
990 |
+
"988": ["n12267677", "acorn"],
|
991 |
+
"989": ["n12620546", "hip"],
|
992 |
+
"990": ["n12768682", "buckeye"],
|
993 |
+
"991": ["n12985857", "coral_fungus"],
|
994 |
+
"992": ["n12998815", "agaric"],
|
995 |
+
"993": ["n13037406", "gyromitra"],
|
996 |
+
"994": ["n13040303", "stinkhorn"],
|
997 |
+
"995": ["n13044778", "earthstar"],
|
998 |
+
"996": ["n13052670", "hen-of-the-woods"],
|
999 |
+
"997": ["n13054560", "bolete"],
|
1000 |
+
"998": ["n13133613", "ear"],
|
1001 |
+
"999": ["n15075141", "toilet_tissue"]
|
1002 |
+
}
|
uniformer_light_image.py
ADDED
@@ -0,0 +1,535 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# All rights reserved.
|
2 |
+
from collections import OrderedDict
|
3 |
+
import torch
|
4 |
+
import torch.nn as nn
|
5 |
+
from functools import partial
|
6 |
+
import torch.nn.functional as F
|
7 |
+
import math
|
8 |
+
from timm.models.vision_transformer import _cfg
|
9 |
+
from timm.models.registry import register_model
|
10 |
+
from timm.models.layers import trunc_normal_, DropPath, to_2tuple
|
11 |
+
|
12 |
+
|
13 |
+
layer_scale = False
|
14 |
+
init_value = 1e-6
|
15 |
+
global_attn = None
|
16 |
+
token_indices = None
|
17 |
+
|
18 |
+
|
19 |
+
# code is from https://github.com/YifanXu74/Evo-ViT
|
20 |
+
def easy_gather(x, indices):
|
21 |
+
# x => B x N x C
|
22 |
+
# indices => B x N
|
23 |
+
B, N, C = x.shape
|
24 |
+
N_new = indices.shape[1]
|
25 |
+
offset = torch.arange(B, dtype=torch.long, device=x.device).view(B, 1) * N
|
26 |
+
indices = indices + offset
|
27 |
+
# only select the informative tokens
|
28 |
+
out = x.reshape(B * N, C)[indices.view(-1)].reshape(B, N_new, C)
|
29 |
+
return out
|
30 |
+
|
31 |
+
|
32 |
+
# code is from https://github.com/YifanXu74/Evo-ViT
|
33 |
+
def merge_tokens(x_drop, score):
|
34 |
+
# x_drop => B x N_drop
|
35 |
+
# score => B x N_drop
|
36 |
+
weight = score / torch.sum(score, dim=1, keepdim=True)
|
37 |
+
x_drop = weight.unsqueeze(-1) * x_drop
|
38 |
+
return torch.sum(x_drop, dim=1, keepdim=True)
|
39 |
+
|
40 |
+
|
41 |
+
class Mlp(nn.Module):
|
42 |
+
def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
|
43 |
+
super().__init__()
|
44 |
+
out_features = out_features or in_features
|
45 |
+
hidden_features = hidden_features or in_features
|
46 |
+
self.fc1 = nn.Linear(in_features, hidden_features)
|
47 |
+
self.act = act_layer()
|
48 |
+
self.fc2 = nn.Linear(hidden_features, out_features)
|
49 |
+
self.drop = nn.Dropout(drop)
|
50 |
+
|
51 |
+
def forward(self, x):
|
52 |
+
x = self.fc1(x)
|
53 |
+
x = self.act(x)
|
54 |
+
x = self.drop(x)
|
55 |
+
x = self.fc2(x)
|
56 |
+
x = self.drop(x)
|
57 |
+
return x
|
58 |
+
|
59 |
+
|
60 |
+
class CMlp(nn.Module):
|
61 |
+
def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
|
62 |
+
super().__init__()
|
63 |
+
out_features = out_features or in_features
|
64 |
+
hidden_features = hidden_features or in_features
|
65 |
+
self.fc1 = nn.Conv2d(in_features, hidden_features, 1)
|
66 |
+
self.act = act_layer()
|
67 |
+
self.fc2 = nn.Conv2d(hidden_features, out_features, 1)
|
68 |
+
self.drop = nn.Dropout(drop)
|
69 |
+
|
70 |
+
def forward(self, x):
|
71 |
+
x = self.fc1(x)
|
72 |
+
x = self.act(x)
|
73 |
+
x = self.drop(x)
|
74 |
+
x = self.fc2(x)
|
75 |
+
x = self.drop(x)
|
76 |
+
return x
|
77 |
+
|
78 |
+
|
79 |
+
class Attention(nn.Module):
|
80 |
+
def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0., trade_off=1):
|
81 |
+
super().__init__()
|
82 |
+
self.num_heads = num_heads
|
83 |
+
head_dim = dim // num_heads
|
84 |
+
# NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
|
85 |
+
self.scale = qk_scale or head_dim ** -0.5
|
86 |
+
|
87 |
+
self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
|
88 |
+
self.attn_drop = nn.Dropout(attn_drop)
|
89 |
+
self.proj = nn.Linear(dim, dim)
|
90 |
+
self.proj_drop = nn.Dropout(proj_drop)
|
91 |
+
# updating weight for global score
|
92 |
+
self.trade_off = trade_off
|
93 |
+
|
94 |
+
def forward(self, x):
|
95 |
+
B, N, C = x.shape
|
96 |
+
qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
|
97 |
+
q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple)
|
98 |
+
|
99 |
+
attn = (q @ k.transpose(-2, -1)) * self.scale
|
100 |
+
attn = attn.softmax(dim=-1)
|
101 |
+
|
102 |
+
# update global score
|
103 |
+
global global_attn
|
104 |
+
tradeoff = self.trade_off
|
105 |
+
if isinstance(global_attn, int):
|
106 |
+
global_attn = torch.mean(attn[:, :, 0, 1:], dim=1)
|
107 |
+
elif global_attn.shape[1] == N - 1:
|
108 |
+
# no additional token and no pruning, update all global scores
|
109 |
+
cls_attn = torch.mean(attn[:, :, 0, 1:], dim=1)
|
110 |
+
global_attn = (1 - tradeoff) * global_attn + tradeoff * cls_attn
|
111 |
+
else:
|
112 |
+
# only update the informative tokens
|
113 |
+
# the first one is class token
|
114 |
+
# the last one is rrepresentative token
|
115 |
+
cls_attn = torch.mean(attn[:, :, 0, 1:-1], dim=1)
|
116 |
+
if self.training:
|
117 |
+
temp_attn = (1 - tradeoff) * global_attn[:, :(N - 2)] + tradeoff * cls_attn
|
118 |
+
global_attn = torch.cat((temp_attn, global_attn[:, (N - 2):]), dim=1)
|
119 |
+
else:
|
120 |
+
# no use torch.cat() for fast inference
|
121 |
+
global_attn[:, :(N - 2)] = (1 - tradeoff) * global_attn[:, :(N - 2)] + tradeoff * cls_attn
|
122 |
+
|
123 |
+
attn = self.attn_drop(attn)
|
124 |
+
|
125 |
+
x = (attn @ v).transpose(1, 2).reshape(B, N, C)
|
126 |
+
x = self.proj(x)
|
127 |
+
x = self.proj_drop(x)
|
128 |
+
return x
|
129 |
+
|
130 |
+
|
131 |
+
class CBlock(nn.Module):
|
132 |
+
def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
|
133 |
+
drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm):
|
134 |
+
super().__init__()
|
135 |
+
self.pos_embed = nn.Conv2d(dim, dim, 3, padding=1, groups=dim)
|
136 |
+
self.norm1 = nn.BatchNorm2d(dim)
|
137 |
+
self.conv1 = nn.Conv2d(dim, dim, 1)
|
138 |
+
self.conv2 = nn.Conv2d(dim, dim, 1)
|
139 |
+
self.attn = nn.Conv2d(dim, dim, 5, padding=2, groups=dim)
|
140 |
+
# NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
|
141 |
+
self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
|
142 |
+
self.norm2 = nn.BatchNorm2d(dim)
|
143 |
+
mlp_hidden_dim = int(dim * mlp_ratio)
|
144 |
+
self.mlp = CMlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
|
145 |
+
global layer_scale
|
146 |
+
self.ls = layer_scale
|
147 |
+
if self.ls:
|
148 |
+
global init_value
|
149 |
+
print(f"Use layer_scale: {layer_scale}, init_values: {init_value}")
|
150 |
+
self.gamma_1 = nn.Parameter(init_value * torch.ones((1, dim, 1, 1)),requires_grad=True)
|
151 |
+
self.gamma_2 = nn.Parameter(init_value * torch.ones((1, dim, 1, 1)),requires_grad=True)
|
152 |
+
|
153 |
+
def forward(self, x):
|
154 |
+
x = x + self.pos_embed(x)
|
155 |
+
if self.ls:
|
156 |
+
x = x + self.drop_path(self.gamma_1 * self.conv2(self.attn(self.conv1(self.norm1(x)))))
|
157 |
+
x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
|
158 |
+
else:
|
159 |
+
x = x + self.drop_path(self.conv2(self.attn(self.conv1(self.norm1(x)))))
|
160 |
+
x = x + self.drop_path(self.mlp(self.norm2(x)))
|
161 |
+
return x
|
162 |
+
|
163 |
+
|
164 |
+
class EvoSABlock(nn.Module):
|
165 |
+
def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
|
166 |
+
drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, prune_ratio=1,
|
167 |
+
trade_off=0, downsample=False):
|
168 |
+
super().__init__()
|
169 |
+
self.pos_embed = nn.Conv2d(dim, dim, 3, padding=1, groups=dim)
|
170 |
+
self.norm1 = norm_layer(dim)
|
171 |
+
self.attn = Attention(
|
172 |
+
dim,
|
173 |
+
num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
|
174 |
+
attn_drop=attn_drop, proj_drop=drop, trade_off=trade_off)
|
175 |
+
# NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
|
176 |
+
self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
|
177 |
+
self.norm2 = norm_layer(dim)
|
178 |
+
mlp_hidden_dim = int(dim * mlp_ratio)
|
179 |
+
self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
|
180 |
+
self.prune_ratio = prune_ratio
|
181 |
+
self.downsample = downsample
|
182 |
+
if downsample:
|
183 |
+
self.avgpool = nn.AvgPool2d(kernel_size=2, stride=2)
|
184 |
+
global layer_scale
|
185 |
+
self.ls = layer_scale
|
186 |
+
if self.ls:
|
187 |
+
global init_value
|
188 |
+
print(f"Use layer_scale: {layer_scale}, init_values: {init_value}")
|
189 |
+
self.gamma_1 = nn.Parameter(init_value * torch.ones((dim)),requires_grad=True)
|
190 |
+
self.gamma_2 = nn.Parameter(init_value * torch.ones((dim)),requires_grad=True)
|
191 |
+
if self.prune_ratio != 1:
|
192 |
+
self.gamma_3 = nn.Parameter(init_value * torch.ones((dim)),requires_grad=True)
|
193 |
+
|
194 |
+
def forward(self, cls_token, x):
|
195 |
+
x = x + self.pos_embed(x)
|
196 |
+
B, C, H, W = x.shape
|
197 |
+
x = x.flatten(2).transpose(1, 2)
|
198 |
+
|
199 |
+
if self.prune_ratio == 1:
|
200 |
+
x = torch.cat([cls_token, x], dim=1)
|
201 |
+
if self.ls:
|
202 |
+
x = x + self.drop_path(self.gamma_1 * self.attn(self.norm1(x)))
|
203 |
+
x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
|
204 |
+
else:
|
205 |
+
x = x + self.drop_path(self.attn(self.norm1(x)))
|
206 |
+
x = x + self.drop_path(self.mlp(self.norm2(x)))
|
207 |
+
cls_token, x = x[:, :1], x[:, 1:]
|
208 |
+
x = x.transpose(1, 2).reshape(B, C, H, W)
|
209 |
+
return cls_token, x
|
210 |
+
else:
|
211 |
+
global global_attn, token_indices
|
212 |
+
# calculate the number of informative tokens
|
213 |
+
N = x.shape[1]
|
214 |
+
N_ = int(N * self.prune_ratio)
|
215 |
+
# sort global attention
|
216 |
+
indices = torch.argsort(global_attn, dim=1, descending=True)
|
217 |
+
|
218 |
+
# concatenate x, global attention and token indices => x_ga_ti
|
219 |
+
# rearrange the tensor according to new indices
|
220 |
+
x_ga_ti = torch.cat((x, global_attn.unsqueeze(-1), token_indices.unsqueeze(-1)), dim=-1)
|
221 |
+
x_ga_ti = easy_gather(x_ga_ti, indices)
|
222 |
+
x_sorted, global_attn, token_indices = x_ga_ti[:, :, :-2], x_ga_ti[:, :, -2], x_ga_ti[:, :, -1]
|
223 |
+
|
224 |
+
# informative tokens
|
225 |
+
x_info = x_sorted[:, :N_]
|
226 |
+
# merge dropped tokens
|
227 |
+
x_drop = x_sorted[:, N_:]
|
228 |
+
score = global_attn[:, N_:]
|
229 |
+
# B x N_drop x C => B x 1 x C
|
230 |
+
rep_token = merge_tokens(x_drop, score)
|
231 |
+
# concatenate new tokens
|
232 |
+
x = torch.cat((cls_token, x_info, rep_token), dim=1)
|
233 |
+
|
234 |
+
if self.ls:
|
235 |
+
# slow update
|
236 |
+
fast_update = 0
|
237 |
+
tmp_x = self.attn(self.norm1(x))
|
238 |
+
fast_update = fast_update + tmp_x[:, -1:]
|
239 |
+
x = x + self.drop_path(self.gamma_1 * tmp_x)
|
240 |
+
tmp_x = self.mlp(self.norm2(x))
|
241 |
+
fast_update = fast_update + tmp_x[:, -1:]
|
242 |
+
x = x + self.drop_path(self.gamma_2 * tmp_x)
|
243 |
+
# fast update
|
244 |
+
x_drop = x_drop + self.gamma_3 * fast_update.expand(-1, N - N_, -1)
|
245 |
+
else:
|
246 |
+
# slow update
|
247 |
+
fast_update = 0
|
248 |
+
tmp_x = self.attn(self.norm1(x))
|
249 |
+
fast_update = fast_update + tmp_x[:, -1:]
|
250 |
+
x = x + self.drop_path(tmp_x)
|
251 |
+
tmp_x = self.mlp(self.norm2(x))
|
252 |
+
fast_update = fast_update + tmp_x[:, -1:]
|
253 |
+
x = x + self.drop_path(tmp_x)
|
254 |
+
# fast update
|
255 |
+
x_drop = x_drop + fast_update.expand(-1, N - N_, -1)
|
256 |
+
|
257 |
+
cls_token, x = x[:, :1, :], x[:, 1:-1, :]
|
258 |
+
if self.training:
|
259 |
+
x_sorted = torch.cat((x, x_drop), dim=1)
|
260 |
+
else:
|
261 |
+
x_sorted[:, N_:] = x_drop
|
262 |
+
x_sorted[:, :N_] = x
|
263 |
+
|
264 |
+
# recover token
|
265 |
+
# scale for normalization
|
266 |
+
old_global_scale = torch.sum(global_attn, dim=1, keepdim=True)
|
267 |
+
# recover order
|
268 |
+
indices = torch.argsort(token_indices, dim=1)
|
269 |
+
x_ga_ti = torch.cat((x_sorted, global_attn.unsqueeze(-1), token_indices.unsqueeze(-1)), dim=-1)
|
270 |
+
x_ga_ti = easy_gather(x_ga_ti, indices)
|
271 |
+
x_patch, global_attn, token_indices = x_ga_ti[:, :, :-2], x_ga_ti[:, :, -2], x_ga_ti[:, :, -1]
|
272 |
+
x_patch = x_patch.transpose(1, 2).reshape(B, C, H, W)
|
273 |
+
|
274 |
+
if self.downsample:
|
275 |
+
# downsample global attention
|
276 |
+
global_attn = global_attn.reshape(B, 1, H, W)
|
277 |
+
global_attn = self.avgpool(global_attn).view(B, -1)
|
278 |
+
# normalize global attention
|
279 |
+
new_global_scale = torch.sum(global_attn, dim=1, keepdim=True)
|
280 |
+
scale = old_global_scale / new_global_scale
|
281 |
+
global_attn = global_attn * scale
|
282 |
+
|
283 |
+
return cls_token, x_patch
|
284 |
+
|
285 |
+
|
286 |
+
class PatchEmbed(nn.Module):
|
287 |
+
""" Image to Patch Embedding
|
288 |
+
"""
|
289 |
+
def __init__(self, patch_size=16, in_chans=3, embed_dim=768):
|
290 |
+
super().__init__()
|
291 |
+
self.norm = nn.LayerNorm(embed_dim)
|
292 |
+
self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
|
293 |
+
|
294 |
+
def forward(self, x):
|
295 |
+
x = self.proj(x)
|
296 |
+
B, C, H, W = x.shape
|
297 |
+
x = x.flatten(2).transpose(1, 2)
|
298 |
+
x = self.norm(x)
|
299 |
+
x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
|
300 |
+
return x
|
301 |
+
|
302 |
+
|
303 |
+
class head_embedding(nn.Module):
|
304 |
+
def __init__(self, in_channels, out_channels):
|
305 |
+
super(head_embedding, self).__init__()
|
306 |
+
self.proj = nn.Sequential(
|
307 |
+
nn.Conv2d(in_channels, out_channels // 2, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)),
|
308 |
+
nn.BatchNorm2d(out_channels // 2),
|
309 |
+
nn.GELU(),
|
310 |
+
nn.Conv2d(out_channels // 2, out_channels, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)),
|
311 |
+
nn.BatchNorm2d(out_channels),
|
312 |
+
)
|
313 |
+
|
314 |
+
def forward(self, x):
|
315 |
+
x = self.proj(x)
|
316 |
+
return x
|
317 |
+
|
318 |
+
|
319 |
+
class middle_embedding(nn.Module):
|
320 |
+
def __init__(self, in_channels, out_channels):
|
321 |
+
super(middle_embedding, self).__init__()
|
322 |
+
|
323 |
+
self.proj = nn.Sequential(
|
324 |
+
nn.Conv2d(in_channels, out_channels, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)),
|
325 |
+
nn.BatchNorm2d(out_channels),
|
326 |
+
)
|
327 |
+
|
328 |
+
def forward(self, x):
|
329 |
+
x = self.proj(x)
|
330 |
+
return x
|
331 |
+
|
332 |
+
|
333 |
+
class UniFormer_Light(nn.Module):
|
334 |
+
""" Vision Transformer
|
335 |
+
A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale` -
|
336 |
+
https://arxiv.org/abs/2010.11929
|
337 |
+
"""
|
338 |
+
def __init__(self, depth=[3, 4, 8, 3], in_chans=3, num_classes=1000, embed_dim=[64, 128, 320, 512],
|
339 |
+
head_dim=64, mlp_ratio=[4., 4., 4., 4.], qkv_bias=True, qk_scale=None, representation_size=None,
|
340 |
+
drop_rate=0., attn_drop_rate=0., drop_path_rate=0., norm_layer=None, conv_stem=False,
|
341 |
+
prune_ratio=[[], [], [1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], [0.5, 0.5, 0.5]],
|
342 |
+
trade_off=[[], [], [1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], [0.5, 0.5, 0.5]]):
|
343 |
+
"""
|
344 |
+
Args:
|
345 |
+
img_size (int, tuple): input image size
|
346 |
+
patch_size (int, tuple): patch size
|
347 |
+
in_chans (int): number of input channels
|
348 |
+
num_classes (int): number of classes for classification head
|
349 |
+
embed_dim (int): embedding dimension
|
350 |
+
depth (int): depth of transformer
|
351 |
+
head_dim (int): head dimension
|
352 |
+
mlp_ratio (list): ratio of mlp hidden dim to embedding dim
|
353 |
+
qkv_bias (bool): enable bias for qkv if True
|
354 |
+
qk_scale (float): override default qk scale of head_dim ** -0.5 if set
|
355 |
+
representation_size (Optional[int]): enable and set representation layer (pre-logits) to this value if set
|
356 |
+
drop_rate (float): dropout rate
|
357 |
+
attn_drop_rate (float): attention dropout rate
|
358 |
+
drop_path_rate (float): stochastic depth rate
|
359 |
+
norm_layer: (nn.Module): normalization layer
|
360 |
+
"""
|
361 |
+
super().__init__()
|
362 |
+
self.num_classes = num_classes
|
363 |
+
self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models
|
364 |
+
norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6)
|
365 |
+
if conv_stem:
|
366 |
+
self.patch_embed1 = head_embedding(in_channels=in_chans, out_channels=embed_dim[0])
|
367 |
+
self.patch_embed2 = PatchEmbed(
|
368 |
+
patch_size=2, in_chans=embed_dim[0], embed_dim=embed_dim[1])
|
369 |
+
self.patch_embed3 = PatchEmbed(
|
370 |
+
patch_size=2, in_chans=embed_dim[1], embed_dim=embed_dim[2])
|
371 |
+
self.patch_embed4 = PatchEmbed(
|
372 |
+
patch_size=2, in_chans=embed_dim[2], embed_dim=embed_dim[3])
|
373 |
+
else:
|
374 |
+
self.patch_embed1 = PatchEmbed(
|
375 |
+
patch_size=4, in_chans=in_chans, embed_dim=embed_dim[0])
|
376 |
+
self.patch_embed2 = PatchEmbed(
|
377 |
+
patch_size=2, in_chans=embed_dim[0], embed_dim=embed_dim[1])
|
378 |
+
self.patch_embed3 = PatchEmbed(
|
379 |
+
patch_size=2, in_chans=embed_dim[1], embed_dim=embed_dim[2])
|
380 |
+
self.patch_embed4 = PatchEmbed(
|
381 |
+
patch_size=2, in_chans=embed_dim[2], embed_dim=embed_dim[3])
|
382 |
+
|
383 |
+
# class token
|
384 |
+
self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim[2]))
|
385 |
+
self.cls_upsample = nn.Linear(embed_dim[2], embed_dim[3])
|
386 |
+
|
387 |
+
self.pos_drop = nn.Dropout(p=drop_rate)
|
388 |
+
dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depth))] # stochastic depth decay rule
|
389 |
+
num_heads = [dim // head_dim for dim in embed_dim]
|
390 |
+
self.blocks1 = nn.ModuleList([
|
391 |
+
CBlock(
|
392 |
+
dim=embed_dim[0], num_heads=num_heads[0], mlp_ratio=mlp_ratio[0], qkv_bias=qkv_bias, qk_scale=qk_scale,
|
393 |
+
drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer)
|
394 |
+
for i in range(depth[0])])
|
395 |
+
self.blocks2 = nn.ModuleList([
|
396 |
+
CBlock(
|
397 |
+
dim=embed_dim[1], num_heads=num_heads[1], mlp_ratio=mlp_ratio[1], qkv_bias=qkv_bias, qk_scale=qk_scale,
|
398 |
+
drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+depth[0]], norm_layer=norm_layer)
|
399 |
+
for i in range(depth[1])])
|
400 |
+
self.blocks3 = nn.ModuleList([
|
401 |
+
EvoSABlock(
|
402 |
+
dim=embed_dim[2], num_heads=num_heads[2], mlp_ratio=mlp_ratio[2], qkv_bias=qkv_bias, qk_scale=qk_scale,
|
403 |
+
drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+depth[0]+depth[1]], norm_layer=norm_layer,
|
404 |
+
prune_ratio=prune_ratio[2][i], trade_off=trade_off[2][i],
|
405 |
+
downsample=True if i == depth[2] - 1 else False)
|
406 |
+
for i in range(depth[2])])
|
407 |
+
self.blocks4 = nn.ModuleList([
|
408 |
+
EvoSABlock(
|
409 |
+
dim=embed_dim[3], num_heads=num_heads[3], mlp_ratio=mlp_ratio[3], qkv_bias=qkv_bias, qk_scale=qk_scale,
|
410 |
+
drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i+depth[0]+depth[1]+depth[2]], norm_layer=norm_layer,
|
411 |
+
prune_ratio=prune_ratio[3][i], trade_off=trade_off[3][i])
|
412 |
+
for i in range(depth[3])])
|
413 |
+
self.norm = nn.BatchNorm2d(embed_dim[-1])
|
414 |
+
self.norm_cls = nn.LayerNorm(embed_dim[-1])
|
415 |
+
|
416 |
+
# Representation layer
|
417 |
+
if representation_size:
|
418 |
+
self.num_features = representation_size
|
419 |
+
self.pre_logits = nn.Sequential(OrderedDict([
|
420 |
+
('fc', nn.Linear(embed_dim, representation_size)),
|
421 |
+
('act', nn.Tanh())
|
422 |
+
]))
|
423 |
+
else:
|
424 |
+
self.pre_logits = nn.Identity()
|
425 |
+
|
426 |
+
# Classifier head
|
427 |
+
self.head = nn.Linear(embed_dim[-1], num_classes) if num_classes > 0 else nn.Identity()
|
428 |
+
self.head_cls = nn.Linear(embed_dim[-1], num_classes) if num_classes > 0 else nn.Identity()
|
429 |
+
|
430 |
+
self.apply(self._init_weights)
|
431 |
+
|
432 |
+
def _init_weights(self, m):
|
433 |
+
if isinstance(m, nn.Linear):
|
434 |
+
trunc_normal_(m.weight, std=.02)
|
435 |
+
if isinstance(m, nn.Linear) and m.bias is not None:
|
436 |
+
nn.init.constant_(m.bias, 0)
|
437 |
+
elif isinstance(m, nn.LayerNorm):
|
438 |
+
nn.init.constant_(m.bias, 0)
|
439 |
+
nn.init.constant_(m.weight, 1.0)
|
440 |
+
|
441 |
+
@torch.jit.ignore
|
442 |
+
def no_weight_decay(self):
|
443 |
+
return {'pos_embed', 'cls_token'}
|
444 |
+
|
445 |
+
def get_classifier(self):
|
446 |
+
return self.head
|
447 |
+
|
448 |
+
def reset_classifier(self, num_classes, global_pool=''):
|
449 |
+
self.num_classes = num_classes
|
450 |
+
self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity()
|
451 |
+
|
452 |
+
def forward_features(self, x):
|
453 |
+
B = x.shape[0]
|
454 |
+
x = self.patch_embed1(x)
|
455 |
+
x = self.pos_drop(x)
|
456 |
+
for blk in self.blocks1:
|
457 |
+
x = blk(x)
|
458 |
+
x = self.patch_embed2(x)
|
459 |
+
for blk in self.blocks2:
|
460 |
+
x = blk(x)
|
461 |
+
x = self.patch_embed3(x)
|
462 |
+
# add cls_token in stage3
|
463 |
+
cls_token = self.cls_token.expand(x.shape[0], -1, -1)
|
464 |
+
global global_attn, token_indices
|
465 |
+
global_attn = 0
|
466 |
+
token_indices = torch.arange(x.shape[2] * x.shape[3], dtype=torch.long, device=x.device).unsqueeze(0)
|
467 |
+
token_indices = token_indices.expand(x.shape[0], -1)
|
468 |
+
for blk in self.blocks3:
|
469 |
+
cls_token, x = blk(cls_token, x)
|
470 |
+
# upsample cls_token before stage4
|
471 |
+
cls_token = self.cls_upsample(cls_token)
|
472 |
+
x = self.patch_embed4(x)
|
473 |
+
# whether reset global attention? Now simple avgpool
|
474 |
+
token_indices = torch.arange(x.shape[2] * x.shape[3], dtype=torch.long, device=x.device).unsqueeze(0)
|
475 |
+
token_indices = token_indices.expand(x.shape[0], -1)
|
476 |
+
for blk in self.blocks4:
|
477 |
+
cls_token, x = blk(cls_token, x)
|
478 |
+
if self.training:
|
479 |
+
# layer normalization for cls_token
|
480 |
+
cls_token = self.norm_cls(cls_token)
|
481 |
+
x = self.norm(x)
|
482 |
+
x = self.pre_logits(x)
|
483 |
+
return cls_token, x
|
484 |
+
|
485 |
+
def forward(self, x):
|
486 |
+
cls_token, x = self.forward_features(x)
|
487 |
+
x = x.flatten(2).mean(-1)
|
488 |
+
if self.training:
|
489 |
+
x = self.head(x), self.head_cls(cls_token.squeeze(1))
|
490 |
+
else:
|
491 |
+
x = self.head(x)
|
492 |
+
return x
|
493 |
+
|
494 |
+
|
495 |
+
def uniformer_xxs_image(**kwargs):
|
496 |
+
model = UniFormer_Light(
|
497 |
+
depth=[2, 5, 8, 2], conv_stem=True,
|
498 |
+
prune_ratio=[[], [], [1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], [0.5, 0.5]],
|
499 |
+
trade_off=[[], [], [1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], [0.5, 0.5]],
|
500 |
+
embed_dim=[56, 112, 224, 448], head_dim=28, mlp_ratio=[3, 3, 3, 3], qkv_bias=True,
|
501 |
+
**kwargs)
|
502 |
+
model.default_cfg = _cfg()
|
503 |
+
return model
|
504 |
+
|
505 |
+
|
506 |
+
def uniformer_xs_image(**kwargs):
|
507 |
+
model = UniFormer_Light(
|
508 |
+
depth=[3, 5, 9, 3], conv_stem=True,
|
509 |
+
prune_ratio=[[], [], [1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], [0.5, 0.5, 0.5]],
|
510 |
+
trade_off=[[], [], [1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], [0.5, 0.5, 0.5]],
|
511 |
+
embed_dim=[64, 128, 256, 512], head_dim=32, mlp_ratio=[3, 3, 3, 3], qkv_bias=True,
|
512 |
+
**kwargs)
|
513 |
+
model.default_cfg = _cfg()
|
514 |
+
return model
|
515 |
+
|
516 |
+
|
517 |
+
if __name__ == '__main__':
|
518 |
+
import time
|
519 |
+
from fvcore.nn import FlopCountAnalysis
|
520 |
+
from fvcore.nn import flop_count_table
|
521 |
+
import numpy as np
|
522 |
+
|
523 |
+
seed = 4217
|
524 |
+
np.random.seed(seed)
|
525 |
+
torch.manual_seed(seed)
|
526 |
+
torch.cuda.manual_seed(seed)
|
527 |
+
torch.cuda.manual_seed_all(seed)
|
528 |
+
|
529 |
+
model = uniformer_xxs_image()
|
530 |
+
# print(model)
|
531 |
+
|
532 |
+
flops = FlopCountAnalysis(model, torch.rand(1, 3, 160, 160))
|
533 |
+
s = time.time()
|
534 |
+
print(flop_count_table(flops, max_depth=1))
|
535 |
+
print(time.time()-s)
|