TexR6 commited on
Commit
d7b0f75
1 Parent(s): 2d5dff2

initial commit

Browse files
__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ __version__ = "1.0.3"
2
+ from .model import VisionTransformer, VALID_MODELS
3
+ from .utils import (
4
+ Params,
5
+ vision_transformer,
6
+ get_model_params,
7
+ )
app.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import PIL
3
+ import ast
4
+ import cv2
5
+ import json
6
+ import torch
7
+ import pickle
8
+ import torchvision
9
+ import numpy as np
10
+ import gradio as gr
11
+ from PIL import Image
12
+ from typing import Tuple, Dict
13
+ import matplotlib.pyplot as plt
14
+ from timeit import default_timer as timer
15
+ from torchvision import datasets, transforms
16
+
17
+ import warnings
18
+ warnings.filterwarnings('ignore')
19
+
20
+ example_list = [["examples/" + example] for example in os.listdir("examples")]
21
+
22
+ with open('labels/imagenet1k-simple-labels.json') as f:
23
+ class_names = json.load(f)
24
+
25
+ from model import VisionTransformer
26
+ from capture_weights import vit_weights
27
+
28
+ vision_transformer = VisionTransformer.from_name('ViT-B_16', num_classes=1000)
29
+ model_weights = torch.load('pretrained_weights/ViT-B_16_imagenet21k_imagenet2012.pth',
30
+ map_location=torch.device('cpu'))
31
+ vision_transformer.load_state_dict(model_weights)
32
+
33
+ data_transforms = transforms.Compose([
34
+ transforms.Resize(size=(384, 384)),
35
+ transforms.ToTensor(),
36
+ transforms.Normalize(mean=[0.485, 0.456, 0.406],
37
+ std=[0.229, 0.224, 0.225],)])
38
+
39
+ def inv_normalize(tensor):
40
+ """Normalize an image tensor back to the 0-255 range."""
41
+ tensor = (tensor - tensor.min()) / (tensor.max() - tensor.min()) * (256 - 1e-5)
42
+ return tensor
43
+
44
+ def inv_transform(tensor, normalize=True):
45
+ """Convert a tensor back to an image."""
46
+ tensor = inv_normalize(tensor)
47
+ array = tensor.detach().cpu().numpy()
48
+ array = array.transpose(1, 2, 0).astype(np.uint8)
49
+ return PIL.Image.fromarray(array)
50
+
51
+ def predict_image(image) -> Tuple[Dict, float]:
52
+ """Return prediction classes with probabilities for an input image."""
53
+ input_tensor = data_transforms(image)
54
+ start_time = timer()
55
+ prediction_dict = {}
56
+ with torch.inference_mode():
57
+ [logits] = vision_transformer(input_tensor[None])
58
+ probs = torch.softmax(logits, dim=0)
59
+ topk_prob, topk_id = torch.topk(probs, 3)
60
+ for i in range(topk_prob.size(0)):
61
+ prediction_dict[class_names[topk_id[i]]] = topk_prob[i].item()
62
+ prediction_time = round(timer() - start_time, 5)
63
+ return prediction_dict, prediction_time
64
+
65
+ def get_attention_map(img, num_layer=5, get_mask=False):
66
+ x = data_transforms(img)
67
+ logits, att_mat = vit_weights(x.unsqueeze(0))
68
+
69
+ att_mat = torch.stack(att_mat).squeeze(1)
70
+ # Take the mean of the attention weights across 12 heads
71
+ att_mat = torch.mean(att_mat, dim=1)
72
+
73
+ # To account for residual connections, we add an identity matrix to the
74
+ # attention matrix and re-normalize the weights.
75
+ residual_att = torch.eye(att_mat.size(1))
76
+ aug_att_mat = att_mat + residual_att
77
+ aug_att_mat = aug_att_mat / aug_att_mat.sum(dim=-1).unsqueeze(-1)
78
+
79
+ # Recursively multiply the weight matrices
80
+ joint_attentions = torch.zeros(aug_att_mat.size())
81
+ joint_attentions[0] = aug_att_mat[0]
82
+
83
+ for n in range(1, aug_att_mat.size(0)):
84
+ joint_attentions[n] = torch.matmul(aug_att_mat[n], joint_attentions[n-1])
85
+
86
+ v = joint_attentions[num_layer]
87
+ grid_size = int(np.sqrt(aug_att_mat.size(-1)))
88
+ mask = v[0, 1:].reshape(grid_size, grid_size).detach().numpy()
89
+ if get_mask:
90
+ attn_map = cv2.resize(mask / mask.max(), img.size)
91
+ else:
92
+ mask = cv2.resize(mask / mask.max(), img.size)[..., np.newaxis]
93
+ attn_map = (mask * img).astype("uint8")
94
+ return attn_map
95
+
96
+ attention_interface = gr.Interface(
97
+ fn=get_attention_map,
98
+ inputs=[gr.Image(type="pil", label="Image"),
99
+ gr.Dropdown(choices=["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
100
+ label="Attention Layer", value="6", type="index"),
101
+ gr.Checkbox(label="Show Mask?")],
102
+ outputs=gr.Image(type="pil", label="Attention Map").style(height=400),
103
+ examples=example_list,
104
+ title="Attention Maps 🔍",
105
+ description="The ViT Base architecture has 12 transformer Encoder layers (12 attention heads in each).",
106
+ article="From the dropdown menu, select the Encoder layer (tick the checkbox to visualize only the mask)."
107
+ )
108
+
109
+ classification_interface = gr.Interface(
110
+ fn=predict_image,
111
+ inputs=gr.Image(type="pil", label="Image"),
112
+ outputs=[gr.Label(num_top_classes=3, label="Predictions"),
113
+ gr.Number(label="Prediction time (secs)")],
114
+ examples=example_list,
115
+ title="Object Identification ✅",
116
+ description="ImageNet object identification using pretrained ViT Base (Patch Size: 16 | Image Size: 384) architecture.",
117
+ article="Upload an image from the example list or choose one of your own [[ImageNet Classes](https://github.com/anishathalye/imagenet-simple-labels/blob/master/imagenet-simple-labels.json)]."
118
+ )
119
+
120
+ demo = gr.TabbedInterface([attention_interface, classification_interface],
121
+ ["Visualize Attention Maps", "Image Prediction"], title="ImageNet 1K 📷")
122
+
123
+ if __name__ == "__main__":
124
+ demo.launch()
capture_weights.py ADDED
@@ -0,0 +1,326 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import numpy as np
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+
6
+ from torch.cuda.amp import autocast
7
+
8
+ from utils import (get_width_and_height_from_size, load_pretrained_weights, get_model_params)
9
+
10
+ VALID_MODELS = ('ViT-B_16', 'ViT-B_32', 'ViT-L_16', 'ViT-L_32')
11
+
12
+ class PositionEmbs(nn.Module):
13
+ def __init__(self, num_patches, emb_dim, dropout_rate=0.1):
14
+ super(PositionEmbs, self).__init__()
15
+ self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, emb_dim))
16
+ if dropout_rate > 0:
17
+ self.dropout = nn.Dropout(dropout_rate)
18
+ else:
19
+ self.dropout = None
20
+
21
+ @autocast()
22
+ def forward(self, x):
23
+ out = x + self.pos_embedding
24
+
25
+ if self.dropout:
26
+ out = self.dropout(out)
27
+
28
+ return out
29
+
30
+ class MlpBlock(nn.Module):
31
+ """ Transformer Feed-Forward Block """
32
+ def __init__(self, in_dim, mlp_dim, out_dim, dropout_rate=0.1):
33
+ super(MlpBlock, self).__init__()
34
+
35
+ # init layers
36
+ self.fc1 = nn.Linear(in_dim, mlp_dim)
37
+ self.fc2 = nn.Linear(mlp_dim, out_dim)
38
+ self.act = nn.GELU()
39
+ if dropout_rate > 0.0:
40
+ self.dropout1 = nn.Dropout(dropout_rate)
41
+ self.dropout2 = nn.Dropout(dropout_rate)
42
+ else:
43
+ self.dropout1 = None
44
+ self.dropout2 = None
45
+
46
+ @autocast()
47
+ def forward(self, x):
48
+
49
+ out = self.fc1(x)
50
+ out = self.act(out)
51
+ if self.dropout1:
52
+ out = self.dropout1(out)
53
+
54
+ out = self.fc2(out)
55
+ out = self.dropout2(out)
56
+ return out
57
+
58
+
59
+ class LinearGeneral(nn.Module):
60
+ def __init__(self, in_dim=(768, ), feat_dim=(12, 64)):
61
+ super(LinearGeneral, self).__init__()
62
+
63
+ self.weight = nn.Parameter(torch.randn(*in_dim, *feat_dim))
64
+ self.bias = nn.Parameter(torch.zeros(*feat_dim))
65
+
66
+ @autocast()
67
+ def forward(self, x, dims):
68
+ a = torch.tensordot(x, self.weight, dims=dims) + self.bias
69
+ return a
70
+
71
+
72
+ class SelfAttention(nn.Module):
73
+ def __init__(self, in_dim, heads=8, dropout_rate=0.1):
74
+ super(SelfAttention, self).__init__()
75
+ self.heads = heads
76
+ self.head_dim = in_dim // heads
77
+ self.scale = self.head_dim**0.5
78
+
79
+ self.query = LinearGeneral((in_dim, ), (self.heads, self.head_dim))
80
+ self.key = LinearGeneral((in_dim, ), (self.heads, self.head_dim))
81
+ self.value = LinearGeneral((in_dim, ), (self.heads, self.head_dim))
82
+ self.out = LinearGeneral((self.heads, self.head_dim), (in_dim, ))
83
+
84
+ if dropout_rate > 0:
85
+ self.dropout = nn.Dropout(dropout_rate)
86
+ else:
87
+ self.dropout = None
88
+
89
+ @autocast()
90
+ def forward(self, x):
91
+ b, n, _ = x.shape
92
+
93
+ q = self.query(x, dims=([2], [0]))
94
+ k = self.key(x, dims=([2], [0]))
95
+ v = self.value(x, dims=([2], [0]))
96
+
97
+ q = q.permute(0, 2, 1, 3)
98
+ k = k.permute(0, 2, 1, 3)
99
+ v = v.permute(0, 2, 1, 3)
100
+
101
+ attn_weights = torch.matmul(q, k.transpose(-2, -1)) / self.scale
102
+ attn_weights = F.softmax(attn_weights, dim=-1)
103
+ out = torch.matmul(attn_weights, v)
104
+ out = out.permute(0, 2, 1, 3)
105
+
106
+ out = self.out(out, dims=([2, 3], [0, 1]))
107
+
108
+ return out, attn_weights
109
+
110
+
111
+ class EncoderBlock(nn.Module):
112
+ def __init__(self, in_dim, mlp_dim, num_heads, dropout_rate=0.1, attn_dropout_rate=0.1):
113
+ super(EncoderBlock, self).__init__()
114
+
115
+ self.norm1 = nn.LayerNorm(in_dim)
116
+ self.attn = SelfAttention(in_dim, heads=num_heads, dropout_rate=attn_dropout_rate)
117
+ if dropout_rate > 0:
118
+ self.dropout = nn.Dropout(dropout_rate)
119
+ else:
120
+ self.dropout = None
121
+ self.norm2 = nn.LayerNorm(in_dim)
122
+ self.mlp = MlpBlock(in_dim, mlp_dim, in_dim, dropout_rate)
123
+
124
+ @autocast()
125
+ def forward(self, x):
126
+ residual = x
127
+ out = self.norm1(x)
128
+ out, attn_weights = self.attn(out)
129
+ if self.dropout:
130
+ out = self.dropout(out)
131
+ out += residual
132
+ residual = out
133
+
134
+ out = self.norm2(out)
135
+ out = self.mlp(out)
136
+ out += residual
137
+ return out, attn_weights
138
+
139
+
140
+ class Encoder(nn.Module):
141
+ def __init__(self,
142
+ num_patches,
143
+ emb_dim,
144
+ mlp_dim,
145
+ num_layers=12,
146
+ num_heads=12,
147
+ dropout_rate=0.1,
148
+ attn_dropout_rate=0.0):
149
+ super(Encoder, self).__init__()
150
+
151
+ # positional embedding
152
+ self.pos_embedding = PositionEmbs(num_patches, emb_dim, dropout_rate)
153
+
154
+ # encoder blocks
155
+ in_dim = emb_dim
156
+ self.encoder_layers = nn.ModuleList()
157
+ for i in range(num_layers):
158
+ layer = EncoderBlock(in_dim, mlp_dim, num_heads, dropout_rate, attn_dropout_rate)
159
+ self.encoder_layers.append(layer)
160
+ self.norm = nn.LayerNorm(in_dim)
161
+
162
+ @autocast()
163
+ def forward(self, x):
164
+ attn_weights = []
165
+ out = self.pos_embedding(x)
166
+
167
+ for layer in self.encoder_layers:
168
+ out, weights = layer(out)
169
+ attn_weights.append(weights)
170
+
171
+ out = self.norm(out)
172
+ return out, attn_weights
173
+
174
+
175
+ class VisionTransformer(nn.Module):
176
+ """ Vision Transformer.
177
+ Most easily loaded with the .from_name or .from_pretrained methods.
178
+ Args:
179
+ params (namedtuple): A set of Params.
180
+ References:
181
+ [1] https://arxiv.org/abs/2010.11929 (An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale)
182
+ Example:
183
+
184
+
185
+ import torch
186
+ >>> from vision_transformer_pytorch import VisionTransformer
187
+ >>> inputs = torch.rand(1, 3, 256, 256)
188
+ >>> model = VisionTransformer.from_pretrained('ViT-B_16')
189
+ >>> model.eval()
190
+ >>> outputs = model(inputs)
191
+ """
192
+ def __init__(self, params=None):
193
+ super(VisionTransformer, self).__init__()
194
+ self._params = params
195
+
196
+ self.embedding = nn.Conv2d(3, self._params.emb_dim, kernel_size=self.patch_size, stride=self.patch_size)
197
+ # class token
198
+ self.cls_token = nn.Parameter(torch.zeros(1, 1, self._params.emb_dim))
199
+
200
+ # transformer
201
+ self.transformer = Encoder(num_patches=self.num_patches,
202
+ emb_dim=self._params.emb_dim,
203
+ mlp_dim=self._params.mlp_dim,
204
+ num_layers=self._params.num_layers,
205
+ num_heads=self._params.num_heads,
206
+ dropout_rate=self._params.dropout_rate,
207
+ attn_dropout_rate=self._params.attn_dropout_rate)
208
+
209
+ # classfier
210
+ self.classifier = nn.Linear(self._params.emb_dim, self._params.num_classes)
211
+
212
+ @property
213
+ def image_size(self):
214
+ return get_width_and_height_from_size(self._params.image_size)
215
+
216
+ @property
217
+ def patch_size(self):
218
+ return get_width_and_height_from_size(self._params.patch_size)
219
+
220
+ @property
221
+ def num_patches(self):
222
+ h, w = self.image_size
223
+ fh, fw = self.patch_size
224
+ gh, gw = h // fh, w // fw
225
+ return gh * gw
226
+
227
+ @autocast()
228
+ def extract_features(self, x):
229
+ emb = self.embedding(x) # (n, c, gh, gw)
230
+ emb = emb.permute(0, 2, 3, 1) # (n, gh, hw, c)
231
+ b, h, w, c = emb.shape
232
+ emb = emb.reshape(b, h * w, c)
233
+
234
+ # prepend class token
235
+ cls_token = self.cls_token.repeat(b, 1, 1)
236
+ emb = torch.cat([cls_token, emb], dim=1)
237
+
238
+ # transformer
239
+ feat, attn_weights = self.transformer(emb)
240
+ return feat, attn_weights
241
+
242
+ @autocast()
243
+ def forward(self, x):
244
+ feat, attn_weights = self.extract_features(x)
245
+
246
+ # classifier
247
+ logits = self.classifier(feat[:, 0])
248
+ return logits, attn_weights
249
+
250
+ @classmethod
251
+ def from_name(cls, model_name, in_channels=3, **override_params):
252
+ """create an vision transformer model according to name.
253
+ Args:
254
+ model_name (str): Name for vision transformer.
255
+ in_channels (int): Input data's channel number.
256
+ override_params (other key word params):
257
+ Params to override model's global_params.
258
+ Optional key:
259
+ 'image_size', 'patch_size',
260
+ 'emb_dim', 'mlp_dim',
261
+ 'num_heads', 'num_layers',
262
+ 'num_classes', 'attn_dropout_rate',
263
+ 'dropout_rate'
264
+ Returns:
265
+ An vision transformer model.
266
+ """
267
+ cls._check_model_name_is_valid(model_name)
268
+ params = get_model_params(model_name, override_params)
269
+ model = cls(params)
270
+ model._change_in_channels(in_channels)
271
+ return model
272
+
273
+ @classmethod
274
+ def from_pretrained(cls, model_name, weights_path=None, in_channels=3, num_classes=1000, **override_params):
275
+ """create an vision transformer model according to name.
276
+ Args:
277
+ model_name (str): Name for vision transformer.
278
+ weights_path (None or str):
279
+ str: path to pretrained weights file on the local disk.
280
+ None: use pretrained weights downloaded from the Internet.
281
+ in_channels (int): Input data's channel number.
282
+ num_classes (int):
283
+ Number of categories for classification.
284
+ It controls the output size for final linear layer.
285
+ override_params (other key word params):
286
+ Params to override model's global_params.
287
+ Optional key:
288
+ 'image_size', 'patch_size',
289
+ 'emb_dim', 'mlp_dim',
290
+ 'num_heads', 'num_layers',
291
+ 'num_classes', 'attn_dropout_rate',
292
+ 'dropout_rate'
293
+ Returns:
294
+ A pretrained vision transformer model.
295
+ """
296
+ model = cls.from_name(model_name, num_classes=num_classes, **override_params)
297
+ load_pretrained_weights(model, model_name, weights_path=weights_path, load_fc=(num_classes == 1000))
298
+ model._change_in_channels(in_channels)
299
+ return model
300
+
301
+ @classmethod
302
+ def _check_model_name_is_valid(cls, model_name):
303
+ """Validates model name.
304
+ Args:
305
+ model_name (str): Name for vision transformer.
306
+ Returns:
307
+ bool: Is a valid name or not.
308
+ """
309
+ if model_name not in VALID_MODELS:
310
+ raise ValueError('model_name should be one of: ' + ', '.join(VALID_MODELS))
311
+
312
+ def _change_in_channels(self, in_channels):
313
+ """Adjust model's first convolution layer to in_channels, if in_channels not equals 3.
314
+ Args:
315
+ in_channels (int): Input data's channel number.
316
+ """
317
+ if in_channels != 3:
318
+ self.embedding = nn.Conv2d(in_channels,
319
+ self._params.emb_dim,
320
+ kernel_size=self.patch_size,
321
+ stride=self.patch_size)
322
+
323
+ vit_weights = VisionTransformer.from_name('ViT-B_16', num_classes=1000)
324
+ model_weights = torch.load('pretrained_weights/ViT-B_16_imagenet21k_imagenet2012.pth',
325
+ map_location=torch.device('cpu'))
326
+ vit_weights.load_state_dict(model_weights)
examples/carrier.jpg ADDED
examples/chicken.jpg ADDED
examples/eagle.jpg ADDED
examples/parachute.jpg ADDED
labels/imagenet1K_labels.txt ADDED
@@ -0,0 +1,1000 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {0: 'tench, Tinca tinca',
2
+ 1: 'goldfish, Carassius auratus',
3
+ 2: 'great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias',
4
+ 3: 'tiger shark, Galeocerdo cuvieri',
5
+ 4: 'hammerhead, hammerhead shark',
6
+ 5: 'electric ray, crampfish, numbfish, torpedo',
7
+ 6: 'stingray',
8
+ 7: 'cock',
9
+ 8: 'hen',
10
+ 9: 'ostrich, Struthio camelus',
11
+ 10: 'brambling, Fringilla montifringilla',
12
+ 11: 'goldfinch, Carduelis carduelis',
13
+ 12: 'house finch, linnet, Carpodacus mexicanus',
14
+ 13: 'junco, snowbird',
15
+ 14: 'indigo bunting, indigo finch, indigo bird, Passerina cyanea',
16
+ 15: 'robin, American robin, Turdus migratorius',
17
+ 16: 'bulbul',
18
+ 17: 'jay',
19
+ 18: 'magpie',
20
+ 19: 'chickadee',
21
+ 20: 'water ouzel, dipper',
22
+ 21: 'kite',
23
+ 22: 'bald eagle, American eagle, Haliaeetus leucocephalus',
24
+ 23: 'vulture',
25
+ 24: 'great grey owl, great gray owl, Strix nebulosa',
26
+ 25: 'European fire salamander, Salamandra salamandra',
27
+ 26: 'common newt, Triturus vulgaris',
28
+ 27: 'eft',
29
+ 28: 'spotted salamander, Ambystoma maculatum',
30
+ 29: 'axolotl, mud puppy, Ambystoma mexicanum',
31
+ 30: 'bullfrog, Rana catesbeiana',
32
+ 31: 'tree frog, tree-frog',
33
+ 32: 'tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui',
34
+ 33: 'loggerhead, loggerhead turtle, Caretta caretta',
35
+ 34: 'leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea',
36
+ 35: 'mud turtle',
37
+ 36: 'terrapin',
38
+ 37: 'box turtle, box tortoise',
39
+ 38: 'banded gecko',
40
+ 39: 'common iguana, iguana, Iguana iguana',
41
+ 40: 'American chameleon, anole, Anolis carolinensis',
42
+ 41: 'whiptail, whiptail lizard',
43
+ 42: 'agama',
44
+ 43: 'frilled lizard, Chlamydosaurus kingi',
45
+ 44: 'alligator lizard',
46
+ 45: 'Gila monster, Heloderma suspectum',
47
+ 46: 'green lizard, Lacerta viridis',
48
+ 47: 'African chameleon, Chamaeleo chamaeleon',
49
+ 48: 'Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis',
50
+ 49: 'African crocodile, Nile crocodile, Crocodylus niloticus',
51
+ 50: 'American alligator, Alligator mississipiensis',
52
+ 51: 'triceratops',
53
+ 52: 'thunder snake, worm snake, Carphophis amoenus',
54
+ 53: 'ringneck snake, ring-necked snake, ring snake',
55
+ 54: 'hognose snake, puff adder, sand viper',
56
+ 55: 'green snake, grass snake',
57
+ 56: 'king snake, kingsnake',
58
+ 57: 'garter snake, grass snake',
59
+ 58: 'water snake',
60
+ 59: 'vine snake',
61
+ 60: 'night snake, Hypsiglena torquata',
62
+ 61: 'boa constrictor, Constrictor constrictor',
63
+ 62: 'rock python, rock snake, Python sebae',
64
+ 63: 'Indian cobra, Naja naja',
65
+ 64: 'green mamba',
66
+ 65: 'sea snake',
67
+ 66: 'horned viper, cerastes, sand viper, horned asp, Cerastes cornutus',
68
+ 67: 'diamondback, diamondback rattlesnake, Crotalus adamanteus',
69
+ 68: 'sidewinder, horned rattlesnake, Crotalus cerastes',
70
+ 69: 'trilobite',
71
+ 70: 'harvestman, daddy longlegs, Phalangium opilio',
72
+ 71: 'scorpion',
73
+ 72: 'black and gold garden spider, Argiope aurantia',
74
+ 73: 'barn spider, Araneus cavaticus',
75
+ 74: 'garden spider, Aranea diademata',
76
+ 75: 'black widow, Latrodectus mactans',
77
+ 76: 'tarantula',
78
+ 77: 'wolf spider, hunting spider',
79
+ 78: 'tick',
80
+ 79: 'centipede',
81
+ 80: 'black grouse',
82
+ 81: 'ptarmigan',
83
+ 82: 'ruffed grouse, partridge, Bonasa umbellus',
84
+ 83: 'prairie chicken, prairie grouse, prairie fowl',
85
+ 84: 'peacock',
86
+ 85: 'quail',
87
+ 86: 'partridge',
88
+ 87: 'African grey, African gray, Psittacus erithacus',
89
+ 88: 'macaw',
90
+ 89: 'sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita',
91
+ 90: 'lorikeet',
92
+ 91: 'coucal',
93
+ 92: 'bee eater',
94
+ 93: 'hornbill',
95
+ 94: 'hummingbird',
96
+ 95: 'jacamar',
97
+ 96: 'toucan',
98
+ 97: 'drake',
99
+ 98: 'red-breasted merganser, Mergus serrator',
100
+ 99: 'goose',
101
+ 100: 'black swan, Cygnus atratus',
102
+ 101: 'tusker',
103
+ 102: 'echidna, spiny anteater, anteater',
104
+ 103: 'platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus',
105
+ 104: 'wallaby, brush kangaroo',
106
+ 105: 'koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus',
107
+ 106: 'wombat',
108
+ 107: 'jellyfish',
109
+ 108: 'sea anemone, anemone',
110
+ 109: 'brain coral',
111
+ 110: 'flatworm, platyhelminth',
112
+ 111: 'nematode, nematode worm, roundworm',
113
+ 112: 'conch',
114
+ 113: 'snail',
115
+ 114: 'slug',
116
+ 115: 'sea slug, nudibranch',
117
+ 116: 'chiton, coat-of-mail shell, sea cradle, polyplacophore',
118
+ 117: 'chambered nautilus, pearly nautilus, nautilus',
119
+ 118: 'Dungeness crab, Cancer magister',
120
+ 119: 'rock crab, Cancer irroratus',
121
+ 120: 'fiddler crab',
122
+ 121: 'king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica',
123
+ 122: 'American lobster, Northern lobster, Maine lobster, Homarus americanus',
124
+ 123: 'spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish',
125
+ 124: 'crayfish, crawfish, crawdad, crawdaddy',
126
+ 125: 'hermit crab',
127
+ 126: 'isopod',
128
+ 127: 'white stork, Ciconia ciconia',
129
+ 128: 'black stork, Ciconia nigra',
130
+ 129: 'spoonbill',
131
+ 130: 'flamingo',
132
+ 131: 'little blue heron, Egretta caerulea',
133
+ 132: 'American egret, great white heron, Egretta albus',
134
+ 133: 'bittern',
135
+ 134: 'crane',
136
+ 135: 'limpkin, Aramus pictus',
137
+ 136: 'European gallinule, Porphyrio porphyrio',
138
+ 137: 'American coot, marsh hen, mud hen, water hen, Fulica americana',
139
+ 138: 'bustard',
140
+ 139: 'ruddy turnstone, Arenaria interpres',
141
+ 140: 'red-backed sandpiper, dunlin, Erolia alpina',
142
+ 141: 'redshank, Tringa totanus',
143
+ 142: 'dowitcher',
144
+ 143: 'oystercatcher, oyster catcher',
145
+ 144: 'pelican',
146
+ 145: 'king penguin, Aptenodytes patagonica',
147
+ 146: 'albatross, mollymawk',
148
+ 147: 'grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus',
149
+ 148: 'killer whale, killer, orca, grampus, sea wolf, Orcinus orca',
150
+ 149: 'dugong, Dugong dugon',
151
+ 150: 'sea lion',
152
+ 151: 'Chihuahua',
153
+ 152: 'Japanese spaniel',
154
+ 153: 'Maltese dog, Maltese terrier, Maltese',
155
+ 154: 'Pekinese, Pekingese, Peke',
156
+ 155: 'Shih-Tzu',
157
+ 156: 'Blenheim spaniel',
158
+ 157: 'papillon',
159
+ 158: 'toy terrier',
160
+ 159: 'Rhodesian ridgeback',
161
+ 160: 'Afghan hound, Afghan',
162
+ 161: 'basset, basset hound',
163
+ 162: 'beagle',
164
+ 163: 'bloodhound, sleuthhound',
165
+ 164: 'bluetick',
166
+ 165: 'black-and-tan coonhound',
167
+ 166: 'Walker hound, Walker foxhound',
168
+ 167: 'English foxhound',
169
+ 168: 'redbone',
170
+ 169: 'borzoi, Russian wolfhound',
171
+ 170: 'Irish wolfhound',
172
+ 171: 'Italian greyhound',
173
+ 172: 'whippet',
174
+ 173: 'Ibizan hound, Ibizan Podenco',
175
+ 174: 'Norwegian elkhound, elkhound',
176
+ 175: 'otterhound, otter hound',
177
+ 176: 'Saluki, gazelle hound',
178
+ 177: 'Scottish deerhound, deerhound',
179
+ 178: 'Weimaraner',
180
+ 179: 'Staffordshire bullterrier, Staffordshire bull terrier',
181
+ 180: 'American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier',
182
+ 181: 'Bedlington terrier',
183
+ 182: 'Border terrier',
184
+ 183: 'Kerry blue terrier',
185
+ 184: 'Irish terrier',
186
+ 185: 'Norfolk terrier',
187
+ 186: 'Norwich terrier',
188
+ 187: 'Yorkshire terrier',
189
+ 188: 'wire-haired fox terrier',
190
+ 189: 'Lakeland terrier',
191
+ 190: 'Sealyham terrier, Sealyham',
192
+ 191: 'Airedale, Airedale terrier',
193
+ 192: 'cairn, cairn terrier',
194
+ 193: 'Australian terrier',
195
+ 194: 'Dandie Dinmont, Dandie Dinmont terrier',
196
+ 195: 'Boston bull, Boston terrier',
197
+ 196: 'miniature schnauzer',
198
+ 197: 'giant schnauzer',
199
+ 198: 'standard schnauzer',
200
+ 199: 'Scotch terrier, Scottish terrier, Scottie',
201
+ 200: 'Tibetan terrier, chrysanthemum dog',
202
+ 201: 'silky terrier, Sydney silky',
203
+ 202: 'soft-coated wheaten terrier',
204
+ 203: 'West Highland white terrier',
205
+ 204: 'Lhasa, Lhasa apso',
206
+ 205: 'flat-coated retriever',
207
+ 206: 'curly-coated retriever',
208
+ 207: 'golden retriever',
209
+ 208: 'Labrador retriever',
210
+ 209: 'Chesapeake Bay retriever',
211
+ 210: 'German short-haired pointer',
212
+ 211: 'vizsla, Hungarian pointer',
213
+ 212: 'English setter',
214
+ 213: 'Irish setter, red setter',
215
+ 214: 'Gordon setter',
216
+ 215: 'Brittany spaniel',
217
+ 216: 'clumber, clumber spaniel',
218
+ 217: 'English springer, English springer spaniel',
219
+ 218: 'Welsh springer spaniel',
220
+ 219: 'cocker spaniel, English cocker spaniel, cocker',
221
+ 220: 'Sussex spaniel',
222
+ 221: 'Irish water spaniel',
223
+ 222: 'kuvasz',
224
+ 223: 'schipperke',
225
+ 224: 'groenendael',
226
+ 225: 'malinois',
227
+ 226: 'briard',
228
+ 227: 'kelpie',
229
+ 228: 'komondor',
230
+ 229: 'Old English sheepdog, bobtail',
231
+ 230: 'Shetland sheepdog, Shetland sheep dog, Shetland',
232
+ 231: 'collie',
233
+ 232: 'Border collie',
234
+ 233: 'Bouvier des Flandres, Bouviers des Flandres',
235
+ 234: 'Rottweiler',
236
+ 235: 'German shepherd, German shepherd dog, German police dog, alsatian',
237
+ 236: 'Doberman, Doberman pinscher',
238
+ 237: 'miniature pinscher',
239
+ 238: 'Greater Swiss Mountain dog',
240
+ 239: 'Bernese mountain dog',
241
+ 240: 'Appenzeller',
242
+ 241: 'EntleBucher',
243
+ 242: 'boxer',
244
+ 243: 'bull mastiff',
245
+ 244: 'Tibetan mastiff',
246
+ 245: 'French bulldog',
247
+ 246: 'Great Dane',
248
+ 247: 'Saint Bernard, St Bernard',
249
+ 248: 'Eskimo dog, husky',
250
+ 249: 'malamute, malemute, Alaskan malamute',
251
+ 250: 'Siberian husky',
252
+ 251: 'dalmatian, coach dog, carriage dog',
253
+ 252: 'affenpinscher, monkey pinscher, monkey dog',
254
+ 253: 'basenji',
255
+ 254: 'pug, pug-dog',
256
+ 255: 'Leonberg',
257
+ 256: 'Newfoundland, Newfoundland dog',
258
+ 257: 'Great Pyrenees',
259
+ 258: 'Samoyed, Samoyede',
260
+ 259: 'Pomeranian',
261
+ 260: 'chow, chow chow',
262
+ 261: 'keeshond',
263
+ 262: 'Brabancon griffon',
264
+ 263: 'Pembroke, Pembroke Welsh corgi',
265
+ 264: 'Cardigan, Cardigan Welsh corgi',
266
+ 265: 'toy poodle',
267
+ 266: 'miniature poodle',
268
+ 267: 'standard poodle',
269
+ 268: 'Mexican hairless',
270
+ 269: 'timber wolf, grey wolf, gray wolf, Canis lupus',
271
+ 270: 'white wolf, Arctic wolf, Canis lupus tundrarum',
272
+ 271: 'red wolf, maned wolf, Canis rufus, Canis niger',
273
+ 272: 'coyote, prairie wolf, brush wolf, Canis latrans',
274
+ 273: 'dingo, warrigal, warragal, Canis dingo',
275
+ 274: 'dhole, Cuon alpinus',
276
+ 275: 'African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus',
277
+ 276: 'hyena, hyaena',
278
+ 277: 'red fox, Vulpes vulpes',
279
+ 278: 'kit fox, Vulpes macrotis',
280
+ 279: 'Arctic fox, white fox, Alopex lagopus',
281
+ 280: 'grey fox, gray fox, Urocyon cinereoargenteus',
282
+ 281: 'tabby, tabby cat',
283
+ 282: 'tiger cat',
284
+ 283: 'Persian cat',
285
+ 284: 'Siamese cat, Siamese',
286
+ 285: 'Egyptian cat',
287
+ 286: 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor',
288
+ 287: 'lynx, catamount',
289
+ 288: 'leopard, Panthera pardus',
290
+ 289: 'snow leopard, ounce, Panthera uncia',
291
+ 290: 'jaguar, panther, Panthera onca, Felis onca',
292
+ 291: 'lion, king of beasts, Panthera leo',
293
+ 292: 'tiger, Panthera tigris',
294
+ 293: 'cheetah, chetah, Acinonyx jubatus',
295
+ 294: 'brown bear, bruin, Ursus arctos',
296
+ 295: 'American black bear, black bear, Ursus americanus, Euarctos americanus',
297
+ 296: 'ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus',
298
+ 297: 'sloth bear, Melursus ursinus, Ursus ursinus',
299
+ 298: 'mongoose',
300
+ 299: 'meerkat, mierkat',
301
+ 300: 'tiger beetle',
302
+ 301: 'ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle',
303
+ 302: 'ground beetle, carabid beetle',
304
+ 303: 'long-horned beetle, longicorn, longicorn beetle',
305
+ 304: 'leaf beetle, chrysomelid',
306
+ 305: 'dung beetle',
307
+ 306: 'rhinoceros beetle',
308
+ 307: 'weevil',
309
+ 308: 'fly',
310
+ 309: 'bee',
311
+ 310: 'ant, emmet, pismire',
312
+ 311: 'grasshopper, hopper',
313
+ 312: 'cricket',
314
+ 313: 'walking stick, walkingstick, stick insect',
315
+ 314: 'cockroach, roach',
316
+ 315: 'mantis, mantid',
317
+ 316: 'cicada, cicala',
318
+ 317: 'leafhopper',
319
+ 318: 'lacewing, lacewing fly',
320
+ 319: "dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk",
321
+ 320: 'damselfly',
322
+ 321: 'admiral',
323
+ 322: 'ringlet, ringlet butterfly',
324
+ 323: 'monarch, monarch butterfly, milkweed butterfly, Danaus plexippus',
325
+ 324: 'cabbage butterfly',
326
+ 325: 'sulphur butterfly, sulfur butterfly',
327
+ 326: 'lycaenid, lycaenid butterfly',
328
+ 327: 'starfish, sea star',
329
+ 328: 'sea urchin',
330
+ 329: 'sea cucumber, holothurian',
331
+ 330: 'wood rabbit, cottontail, cottontail rabbit',
332
+ 331: 'hare',
333
+ 332: 'Angora, Angora rabbit',
334
+ 333: 'hamster',
335
+ 334: 'porcupine, hedgehog',
336
+ 335: 'fox squirrel, eastern fox squirrel, Sciurus niger',
337
+ 336: 'marmot',
338
+ 337: 'beaver',
339
+ 338: 'guinea pig, Cavia cobaya',
340
+ 339: 'sorrel',
341
+ 340: 'zebra',
342
+ 341: 'hog, pig, grunter, squealer, Sus scrofa',
343
+ 342: 'wild boar, boar, Sus scrofa',
344
+ 343: 'warthog',
345
+ 344: 'hippopotamus, hippo, river horse, Hippopotamus amphibius',
346
+ 345: 'ox',
347
+ 346: 'water buffalo, water ox, Asiatic buffalo, Bubalus bubalis',
348
+ 347: 'bison',
349
+ 348: 'ram, tup',
350
+ 349: 'bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis',
351
+ 350: 'ibex, Capra ibex',
352
+ 351: 'hartebeest',
353
+ 352: 'impala, Aepyceros melampus',
354
+ 353: 'gazelle',
355
+ 354: 'Arabian camel, dromedary, Camelus dromedarius',
356
+ 355: 'llama',
357
+ 356: 'weasel',
358
+ 357: 'mink',
359
+ 358: 'polecat, fitch, foulmart, foumart, Mustela putorius',
360
+ 359: 'black-footed ferret, ferret, Mustela nigripes',
361
+ 360: 'otter',
362
+ 361: 'skunk, polecat, wood pussy',
363
+ 362: 'badger',
364
+ 363: 'armadillo',
365
+ 364: 'three-toed sloth, ai, Bradypus tridactylus',
366
+ 365: 'orangutan, orang, orangutang, Pongo pygmaeus',
367
+ 366: 'gorilla, Gorilla gorilla',
368
+ 367: 'chimpanzee, chimp, Pan troglodytes',
369
+ 368: 'gibbon, Hylobates lar',
370
+ 369: 'siamang, Hylobates syndactylus, Symphalangus syndactylus',
371
+ 370: 'guenon, guenon monkey',
372
+ 371: 'patas, hussar monkey, Erythrocebus patas',
373
+ 372: 'baboon',
374
+ 373: 'macaque',
375
+ 374: 'langur',
376
+ 375: 'colobus, colobus monkey',
377
+ 376: 'proboscis monkey, Nasalis larvatus',
378
+ 377: 'marmoset',
379
+ 378: 'capuchin, ringtail, Cebus capucinus',
380
+ 379: 'howler monkey, howler',
381
+ 380: 'titi, titi monkey',
382
+ 381: 'spider monkey, Ateles geoffroyi',
383
+ 382: 'squirrel monkey, Saimiri sciureus',
384
+ 383: 'Madagascar cat, ring-tailed lemur, Lemur catta',
385
+ 384: 'indri, indris, Indri indri, Indri brevicaudatus',
386
+ 385: 'Indian elephant, Elephas maximus',
387
+ 386: 'African elephant, Loxodonta africana',
388
+ 387: 'lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens',
389
+ 388: 'giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca',
390
+ 389: 'barracouta, snoek',
391
+ 390: 'eel',
392
+ 391: 'coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch',
393
+ 392: 'rock beauty, Holocanthus tricolor',
394
+ 393: 'anemone fish',
395
+ 394: 'sturgeon',
396
+ 395: 'gar, garfish, garpike, billfish, Lepisosteus osseus',
397
+ 396: 'lionfish',
398
+ 397: 'puffer, pufferfish, blowfish, globefish',
399
+ 398: 'abacus',
400
+ 399: 'abaya',
401
+ 400: "academic gown, academic robe, judge's robe",
402
+ 401: 'accordion, piano accordion, squeeze box',
403
+ 402: 'acoustic guitar',
404
+ 403: 'aircraft carrier, carrier, flattop, attack aircraft carrier',
405
+ 404: 'airliner',
406
+ 405: 'airship, dirigible',
407
+ 406: 'altar',
408
+ 407: 'ambulance',
409
+ 408: 'amphibian, amphibious vehicle',
410
+ 409: 'analog clock',
411
+ 410: 'apiary, bee house',
412
+ 411: 'apron',
413
+ 412: 'ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin',
414
+ 413: 'assault rifle, assault gun',
415
+ 414: 'backpack, back pack, knapsack, packsack, rucksack, haversack',
416
+ 415: 'bakery, bakeshop, bakehouse',
417
+ 416: 'balance beam, beam',
418
+ 417: 'balloon',
419
+ 418: 'ballpoint, ballpoint pen, ballpen, Biro',
420
+ 419: 'Band Aid',
421
+ 420: 'banjo',
422
+ 421: 'bannister, banister, balustrade, balusters, handrail',
423
+ 422: 'barbell',
424
+ 423: 'barber chair',
425
+ 424: 'barbershop',
426
+ 425: 'barn',
427
+ 426: 'barometer',
428
+ 427: 'barrel, cask',
429
+ 428: 'barrow, garden cart, lawn cart, wheelbarrow',
430
+ 429: 'baseball',
431
+ 430: 'basketball',
432
+ 431: 'bassinet',
433
+ 432: 'bassoon',
434
+ 433: 'bathing cap, swimming cap',
435
+ 434: 'bath towel',
436
+ 435: 'bathtub, bathing tub, bath, tub',
437
+ 436: 'beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon',
438
+ 437: 'beacon, lighthouse, beacon light, pharos',
439
+ 438: 'beaker',
440
+ 439: 'bearskin, busby, shako',
441
+ 440: 'beer bottle',
442
+ 441: 'beer glass',
443
+ 442: 'bell cote, bell cot',
444
+ 443: 'bib',
445
+ 444: 'bicycle-built-for-two, tandem bicycle, tandem',
446
+ 445: 'bikini, two-piece',
447
+ 446: 'binder, ring-binder',
448
+ 447: 'binoculars, field glasses, opera glasses',
449
+ 448: 'birdhouse',
450
+ 449: 'boathouse',
451
+ 450: 'bobsled, bobsleigh, bob',
452
+ 451: 'bolo tie, bolo, bola tie, bola',
453
+ 452: 'bonnet, poke bonnet',
454
+ 453: 'bookcase',
455
+ 454: 'bookshop, bookstore, bookstall',
456
+ 455: 'bottlecap',
457
+ 456: 'bow',
458
+ 457: 'bow tie, bow-tie, bowtie',
459
+ 458: 'brass, memorial tablet, plaque',
460
+ 459: 'brassiere, bra, bandeau',
461
+ 460: 'breakwater, groin, groyne, mole, bulwark, seawall, jetty',
462
+ 461: 'breastplate, aegis, egis',
463
+ 462: 'broom',
464
+ 463: 'bucket, pail',
465
+ 464: 'buckle',
466
+ 465: 'bulletproof vest',
467
+ 466: 'bullet train, bullet',
468
+ 467: 'butcher shop, meat market',
469
+ 468: 'cab, hack, taxi, taxicab',
470
+ 469: 'caldron, cauldron',
471
+ 470: 'candle, taper, wax light',
472
+ 471: 'cannon',
473
+ 472: 'canoe',
474
+ 473: 'can opener, tin opener',
475
+ 474: 'cardigan',
476
+ 475: 'car mirror',
477
+ 476: 'carousel, carrousel, merry-go-round, roundabout, whirligig',
478
+ 477: "carpenter's kit, tool kit",
479
+ 478: 'carton',
480
+ 479: 'car wheel',
481
+ 480: 'cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM',
482
+ 481: 'cassette',
483
+ 482: 'cassette player',
484
+ 483: 'castle',
485
+ 484: 'catamaran',
486
+ 485: 'CD player',
487
+ 486: 'cello, violoncello',
488
+ 487: 'cellular telephone, cellular phone, cellphone, cell, mobile phone',
489
+ 488: 'chain',
490
+ 489: 'chainlink fence',
491
+ 490: 'chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour',
492
+ 491: 'chain saw, chainsaw',
493
+ 492: 'chest',
494
+ 493: 'chiffonier, commode',
495
+ 494: 'chime, bell, gong',
496
+ 495: 'china cabinet, china closet',
497
+ 496: 'Christmas stocking',
498
+ 497: 'church, church building',
499
+ 498: 'cinema, movie theater, movie theatre, movie house, picture palace',
500
+ 499: 'cleaver, meat cleaver, chopper',
501
+ 500: 'cliff dwelling',
502
+ 501: 'cloak',
503
+ 502: 'clog, geta, patten, sabot',
504
+ 503: 'cocktail shaker',
505
+ 504: 'coffee mug',
506
+ 505: 'coffeepot',
507
+ 506: 'coil, spiral, volute, whorl, helix',
508
+ 507: 'combination lock',
509
+ 508: 'computer keyboard, keypad',
510
+ 509: 'confectionery, confectionary, candy store',
511
+ 510: 'container ship, containership, container vessel',
512
+ 511: 'convertible',
513
+ 512: 'corkscrew, bottle screw',
514
+ 513: 'cornet, horn, trumpet, trump',
515
+ 514: 'cowboy boot',
516
+ 515: 'cowboy hat, ten-gallon hat',
517
+ 516: 'cradle',
518
+ 517: 'crane',
519
+ 518: 'crash helmet',
520
+ 519: 'crate',
521
+ 520: 'crib, cot',
522
+ 521: 'Crock Pot',
523
+ 522: 'croquet ball',
524
+ 523: 'crutch',
525
+ 524: 'cuirass',
526
+ 525: 'dam, dike, dyke',
527
+ 526: 'desk',
528
+ 527: 'desktop computer',
529
+ 528: 'dial telephone, dial phone',
530
+ 529: 'diaper, nappy, napkin',
531
+ 530: 'digital clock',
532
+ 531: 'digital watch',
533
+ 532: 'dining table, board',
534
+ 533: 'dishrag, dishcloth',
535
+ 534: 'dishwasher, dish washer, dishwashing machine',
536
+ 535: 'disk brake, disc brake',
537
+ 536: 'dock, dockage, docking facility',
538
+ 537: 'dogsled, dog sled, dog sleigh',
539
+ 538: 'dome',
540
+ 539: 'doormat, welcome mat',
541
+ 540: 'drilling platform, offshore rig',
542
+ 541: 'drum, membranophone, tympan',
543
+ 542: 'drumstick',
544
+ 543: 'dumbbell',
545
+ 544: 'Dutch oven',
546
+ 545: 'electric fan, blower',
547
+ 546: 'electric guitar',
548
+ 547: 'electric locomotive',
549
+ 548: 'entertainment center',
550
+ 549: 'envelope',
551
+ 550: 'espresso maker',
552
+ 551: 'face powder',
553
+ 552: 'feather boa, boa',
554
+ 553: 'file, file cabinet, filing cabinet',
555
+ 554: 'fireboat',
556
+ 555: 'fire engine, fire truck',
557
+ 556: 'fire screen, fireguard',
558
+ 557: 'flagpole, flagstaff',
559
+ 558: 'flute, transverse flute',
560
+ 559: 'folding chair',
561
+ 560: 'football helmet',
562
+ 561: 'forklift',
563
+ 562: 'fountain',
564
+ 563: 'fountain pen',
565
+ 564: 'four-poster',
566
+ 565: 'freight car',
567
+ 566: 'French horn, horn',
568
+ 567: 'frying pan, frypan, skillet',
569
+ 568: 'fur coat',
570
+ 569: 'garbage truck, dustcart',
571
+ 570: 'gasmask, respirator, gas helmet',
572
+ 571: 'gas pump, gasoline pump, petrol pump, island dispenser',
573
+ 572: 'goblet',
574
+ 573: 'go-kart',
575
+ 574: 'golf ball',
576
+ 575: 'golfcart, golf cart',
577
+ 576: 'gondola',
578
+ 577: 'gong, tam-tam',
579
+ 578: 'gown',
580
+ 579: 'grand piano, grand',
581
+ 580: 'greenhouse, nursery, glasshouse',
582
+ 581: 'grille, radiator grille',
583
+ 582: 'grocery store, grocery, food market, market',
584
+ 583: 'guillotine',
585
+ 584: 'hair slide',
586
+ 585: 'hair spray',
587
+ 586: 'half track',
588
+ 587: 'hammer',
589
+ 588: 'hamper',
590
+ 589: 'hand blower, blow dryer, blow drier, hair dryer, hair drier',
591
+ 590: 'hand-held computer, hand-held microcomputer',
592
+ 591: 'handkerchief, hankie, hanky, hankey',
593
+ 592: 'hard disc, hard disk, fixed disk',
594
+ 593: 'harmonica, mouth organ, harp, mouth harp',
595
+ 594: 'harp',
596
+ 595: 'harvester, reaper',
597
+ 596: 'hatchet',
598
+ 597: 'holster',
599
+ 598: 'home theater, home theatre',
600
+ 599: 'honeycomb',
601
+ 600: 'hook, claw',
602
+ 601: 'hoopskirt, crinoline',
603
+ 602: 'horizontal bar, high bar',
604
+ 603: 'horse cart, horse-cart',
605
+ 604: 'hourglass',
606
+ 605: 'iPod',
607
+ 606: 'iron, smoothing iron',
608
+ 607: "jack-o'-lantern",
609
+ 608: 'jean, blue jean, denim',
610
+ 609: 'jeep, landrover',
611
+ 610: 'jersey, T-shirt, tee shirt',
612
+ 611: 'jigsaw puzzle',
613
+ 612: 'jinrikisha, ricksha, rickshaw',
614
+ 613: 'joystick',
615
+ 614: 'kimono',
616
+ 615: 'knee pad',
617
+ 616: 'knot',
618
+ 617: 'lab coat, laboratory coat',
619
+ 618: 'ladle',
620
+ 619: 'lampshade, lamp shade',
621
+ 620: 'laptop, laptop computer',
622
+ 621: 'lawn mower, mower',
623
+ 622: 'lens cap, lens cover',
624
+ 623: 'letter opener, paper knife, paperknife',
625
+ 624: 'library',
626
+ 625: 'lifeboat',
627
+ 626: 'lighter, light, igniter, ignitor',
628
+ 627: 'limousine, limo',
629
+ 628: 'liner, ocean liner',
630
+ 629: 'lipstick, lip rouge',
631
+ 630: 'Loafer',
632
+ 631: 'lotion',
633
+ 632: 'loudspeaker, speaker, speaker unit, loudspeaker system, speaker system',
634
+ 633: "loupe, jeweler's loupe",
635
+ 634: 'lumbermill, sawmill',
636
+ 635: 'magnetic compass',
637
+ 636: 'mailbag, postbag',
638
+ 637: 'mailbox, letter box',
639
+ 638: 'maillot',
640
+ 639: 'maillot, tank suit',
641
+ 640: 'manhole cover',
642
+ 641: 'maraca',
643
+ 642: 'marimba, xylophone',
644
+ 643: 'mask',
645
+ 644: 'matchstick',
646
+ 645: 'maypole',
647
+ 646: 'maze, labyrinth',
648
+ 647: 'measuring cup',
649
+ 648: 'medicine chest, medicine cabinet',
650
+ 649: 'megalith, megalithic structure',
651
+ 650: 'microphone, mike',
652
+ 651: 'microwave, microwave oven',
653
+ 652: 'military uniform',
654
+ 653: 'milk can',
655
+ 654: 'minibus',
656
+ 655: 'miniskirt, mini',
657
+ 656: 'minivan',
658
+ 657: 'missile',
659
+ 658: 'mitten',
660
+ 659: 'mixing bowl',
661
+ 660: 'mobile home, manufactured home',
662
+ 661: 'Model T',
663
+ 662: 'modem',
664
+ 663: 'monastery',
665
+ 664: 'monitor',
666
+ 665: 'moped',
667
+ 666: 'mortar',
668
+ 667: 'mortarboard',
669
+ 668: 'mosque',
670
+ 669: 'mosquito net',
671
+ 670: 'motor scooter, scooter',
672
+ 671: 'mountain bike, all-terrain bike, off-roader',
673
+ 672: 'mountain tent',
674
+ 673: 'mouse, computer mouse',
675
+ 674: 'mousetrap',
676
+ 675: 'moving van',
677
+ 676: 'muzzle',
678
+ 677: 'nail',
679
+ 678: 'neck brace',
680
+ 679: 'necklace',
681
+ 680: 'nipple',
682
+ 681: 'notebook, notebook computer',
683
+ 682: 'obelisk',
684
+ 683: 'oboe, hautboy, hautbois',
685
+ 684: 'ocarina, sweet potato',
686
+ 685: 'odometer, hodometer, mileometer, milometer',
687
+ 686: 'oil filter',
688
+ 687: 'organ, pipe organ',
689
+ 688: 'oscilloscope, scope, cathode-ray oscilloscope, CRO',
690
+ 689: 'overskirt',
691
+ 690: 'oxcart',
692
+ 691: 'oxygen mask',
693
+ 692: 'packet',
694
+ 693: 'paddle, boat paddle',
695
+ 694: 'paddlewheel, paddle wheel',
696
+ 695: 'padlock',
697
+ 696: 'paintbrush',
698
+ 697: "pajama, pyjama, pj's, jammies",
699
+ 698: 'palace',
700
+ 699: 'panpipe, pandean pipe, syrinx',
701
+ 700: 'paper towel',
702
+ 701: 'parachute, chute',
703
+ 702: 'parallel bars, bars',
704
+ 703: 'park bench',
705
+ 704: 'parking meter',
706
+ 705: 'passenger car, coach, carriage',
707
+ 706: 'patio, terrace',
708
+ 707: 'pay-phone, pay-station',
709
+ 708: 'pedestal, plinth, footstall',
710
+ 709: 'pencil box, pencil case',
711
+ 710: 'pencil sharpener',
712
+ 711: 'perfume, essence',
713
+ 712: 'Petri dish',
714
+ 713: 'photocopier',
715
+ 714: 'pick, plectrum, plectron',
716
+ 715: 'pickelhaube',
717
+ 716: 'picket fence, paling',
718
+ 717: 'pickup, pickup truck',
719
+ 718: 'pier',
720
+ 719: 'piggy bank, penny bank',
721
+ 720: 'pill bottle',
722
+ 721: 'pillow',
723
+ 722: 'ping-pong ball',
724
+ 723: 'pinwheel',
725
+ 724: 'pirate, pirate ship',
726
+ 725: 'pitcher, ewer',
727
+ 726: "plane, carpenter's plane, woodworking plane",
728
+ 727: 'planetarium',
729
+ 728: 'plastic bag',
730
+ 729: 'plate rack',
731
+ 730: 'plow, plough',
732
+ 731: "plunger, plumber's helper",
733
+ 732: 'Polaroid camera, Polaroid Land camera',
734
+ 733: 'pole',
735
+ 734: 'police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria',
736
+ 735: 'poncho',
737
+ 736: 'pool table, billiard table, snooker table',
738
+ 737: 'pop bottle, soda bottle',
739
+ 738: 'pot, flowerpot',
740
+ 739: "potter's wheel",
741
+ 740: 'power drill',
742
+ 741: 'prayer rug, prayer mat',
743
+ 742: 'printer',
744
+ 743: 'prison, prison house',
745
+ 744: 'projectile, missile',
746
+ 745: 'projector',
747
+ 746: 'puck, hockey puck',
748
+ 747: 'punching bag, punch bag, punching ball, punchball',
749
+ 748: 'purse',
750
+ 749: 'quill, quill pen',
751
+ 750: 'quilt, comforter, comfort, puff',
752
+ 751: 'racer, race car, racing car',
753
+ 752: 'racket, racquet',
754
+ 753: 'radiator',
755
+ 754: 'radio, wireless',
756
+ 755: 'radio telescope, radio reflector',
757
+ 756: 'rain barrel',
758
+ 757: 'recreational vehicle, RV, R.V.',
759
+ 758: 'reel',
760
+ 759: 'reflex camera',
761
+ 760: 'refrigerator, icebox',
762
+ 761: 'remote control, remote',
763
+ 762: 'restaurant, eating house, eating place, eatery',
764
+ 763: 'revolver, six-gun, six-shooter',
765
+ 764: 'rifle',
766
+ 765: 'rocking chair, rocker',
767
+ 766: 'rotisserie',
768
+ 767: 'rubber eraser, rubber, pencil eraser',
769
+ 768: 'rugby ball',
770
+ 769: 'rule, ruler',
771
+ 770: 'running shoe',
772
+ 771: 'safe',
773
+ 772: 'safety pin',
774
+ 773: 'saltshaker, salt shaker',
775
+ 774: 'sandal',
776
+ 775: 'sarong',
777
+ 776: 'sax, saxophone',
778
+ 777: 'scabbard',
779
+ 778: 'scale, weighing machine',
780
+ 779: 'school bus',
781
+ 780: 'schooner',
782
+ 781: 'scoreboard',
783
+ 782: 'screen, CRT screen',
784
+ 783: 'screw',
785
+ 784: 'screwdriver',
786
+ 785: 'seat belt, seatbelt',
787
+ 786: 'sewing machine',
788
+ 787: 'shield, buckler',
789
+ 788: 'shoe shop, shoe-shop, shoe store',
790
+ 789: 'shoji',
791
+ 790: 'shopping basket',
792
+ 791: 'shopping cart',
793
+ 792: 'shovel',
794
+ 793: 'shower cap',
795
+ 794: 'shower curtain',
796
+ 795: 'ski',
797
+ 796: 'ski mask',
798
+ 797: 'sleeping bag',
799
+ 798: 'slide rule, slipstick',
800
+ 799: 'sliding door',
801
+ 800: 'slot, one-armed bandit',
802
+ 801: 'snorkel',
803
+ 802: 'snowmobile',
804
+ 803: 'snowplow, snowplough',
805
+ 804: 'soap dispenser',
806
+ 805: 'soccer ball',
807
+ 806: 'sock',
808
+ 807: 'solar dish, solar collector, solar furnace',
809
+ 808: 'sombrero',
810
+ 809: 'soup bowl',
811
+ 810: 'space bar',
812
+ 811: 'space heater',
813
+ 812: 'space shuttle',
814
+ 813: 'spatula',
815
+ 814: 'speedboat',
816
+ 815: "spider web, spider's web",
817
+ 816: 'spindle',
818
+ 817: 'sports car, sport car',
819
+ 818: 'spotlight, spot',
820
+ 819: 'stage',
821
+ 820: 'steam locomotive',
822
+ 821: 'steel arch bridge',
823
+ 822: 'steel drum',
824
+ 823: 'stethoscope',
825
+ 824: 'stole',
826
+ 825: 'stone wall',
827
+ 826: 'stopwatch, stop watch',
828
+ 827: 'stove',
829
+ 828: 'strainer',
830
+ 829: 'streetcar, tram, tramcar, trolley, trolley car',
831
+ 830: 'stretcher',
832
+ 831: 'studio couch, day bed',
833
+ 832: 'stupa, tope',
834
+ 833: 'submarine, pigboat, sub, U-boat',
835
+ 834: 'suit, suit of clothes',
836
+ 835: 'sundial',
837
+ 836: 'sunglass',
838
+ 837: 'sunglasses, dark glasses, shades',
839
+ 838: 'sunscreen, sunblock, sun blocker',
840
+ 839: 'suspension bridge',
841
+ 840: 'swab, swob, mop',
842
+ 841: 'sweatshirt',
843
+ 842: 'swimming trunks, bathing trunks',
844
+ 843: 'swing',
845
+ 844: 'switch, electric switch, electrical switch',
846
+ 845: 'syringe',
847
+ 846: 'table lamp',
848
+ 847: 'tank, army tank, armored combat vehicle, armoured combat vehicle',
849
+ 848: 'tape player',
850
+ 849: 'teapot',
851
+ 850: 'teddy, teddy bear',
852
+ 851: 'television, television system',
853
+ 852: 'tennis ball',
854
+ 853: 'thatch, thatched roof',
855
+ 854: 'theater curtain, theatre curtain',
856
+ 855: 'thimble',
857
+ 856: 'thresher, thrasher, threshing machine',
858
+ 857: 'throne',
859
+ 858: 'tile roof',
860
+ 859: 'toaster',
861
+ 860: 'tobacco shop, tobacconist shop, tobacconist',
862
+ 861: 'toilet seat',
863
+ 862: 'torch',
864
+ 863: 'totem pole',
865
+ 864: 'tow truck, tow car, wrecker',
866
+ 865: 'toyshop',
867
+ 866: 'tractor',
868
+ 867: 'trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi',
869
+ 868: 'tray',
870
+ 869: 'trench coat',
871
+ 870: 'tricycle, trike, velocipede',
872
+ 871: 'trimaran',
873
+ 872: 'tripod',
874
+ 873: 'triumphal arch',
875
+ 874: 'trolleybus, trolley coach, trackless trolley',
876
+ 875: 'trombone',
877
+ 876: 'tub, vat',
878
+ 877: 'turnstile',
879
+ 878: 'typewriter keyboard',
880
+ 879: 'umbrella',
881
+ 880: 'unicycle, monocycle',
882
+ 881: 'upright, upright piano',
883
+ 882: 'vacuum, vacuum cleaner',
884
+ 883: 'vase',
885
+ 884: 'vault',
886
+ 885: 'velvet',
887
+ 886: 'vending machine',
888
+ 887: 'vestment',
889
+ 888: 'viaduct',
890
+ 889: 'violin, fiddle',
891
+ 890: 'volleyball',
892
+ 891: 'waffle iron',
893
+ 892: 'wall clock',
894
+ 893: 'wallet, billfold, notecase, pocketbook',
895
+ 894: 'wardrobe, closet, press',
896
+ 895: 'warplane, military plane',
897
+ 896: 'washbasin, handbasin, washbowl, lavabo, wash-hand basin',
898
+ 897: 'washer, automatic washer, washing machine',
899
+ 898: 'water bottle',
900
+ 899: 'water jug',
901
+ 900: 'water tower',
902
+ 901: 'whiskey jug',
903
+ 902: 'whistle',
904
+ 903: 'wig',
905
+ 904: 'window screen',
906
+ 905: 'window shade',
907
+ 906: 'Windsor tie',
908
+ 907: 'wine bottle',
909
+ 908: 'wing',
910
+ 909: 'wok',
911
+ 910: 'wooden spoon',
912
+ 911: 'wool, woolen, woollen',
913
+ 912: 'worm fence, snake fence, snake-rail fence, Virginia fence',
914
+ 913: 'wreck',
915
+ 914: 'yawl',
916
+ 915: 'yurt',
917
+ 916: 'web site, website, internet site, site',
918
+ 917: 'comic book',
919
+ 918: 'crossword puzzle, crossword',
920
+ 919: 'street sign',
921
+ 920: 'traffic light, traffic signal, stoplight',
922
+ 921: 'book jacket, dust cover, dust jacket, dust wrapper',
923
+ 922: 'menu',
924
+ 923: 'plate',
925
+ 924: 'guacamole',
926
+ 925: 'consomme',
927
+ 926: 'hot pot, hotpot',
928
+ 927: 'trifle',
929
+ 928: 'ice cream, icecream',
930
+ 929: 'ice lolly, lolly, lollipop, popsicle',
931
+ 930: 'French loaf',
932
+ 931: 'bagel, beigel',
933
+ 932: 'pretzel',
934
+ 933: 'cheeseburger',
935
+ 934: 'hotdog, hot dog, red hot',
936
+ 935: 'mashed potato',
937
+ 936: 'head cabbage',
938
+ 937: 'broccoli',
939
+ 938: 'cauliflower',
940
+ 939: 'zucchini, courgette',
941
+ 940: 'spaghetti squash',
942
+ 941: 'acorn squash',
943
+ 942: 'butternut squash',
944
+ 943: 'cucumber, cuke',
945
+ 944: 'artichoke, globe artichoke',
946
+ 945: 'bell pepper',
947
+ 946: 'cardoon',
948
+ 947: 'mushroom',
949
+ 948: 'Granny Smith',
950
+ 949: 'strawberry',
951
+ 950: 'orange',
952
+ 951: 'lemon',
953
+ 952: 'fig',
954
+ 953: 'pineapple, ananas',
955
+ 954: 'banana',
956
+ 955: 'jackfruit, jak, jack',
957
+ 956: 'custard apple',
958
+ 957: 'pomegranate',
959
+ 958: 'hay',
960
+ 959: 'carbonara',
961
+ 960: 'chocolate sauce, chocolate syrup',
962
+ 961: 'dough',
963
+ 962: 'meat loaf, meatloaf',
964
+ 963: 'pizza, pizza pie',
965
+ 964: 'potpie',
966
+ 965: 'burrito',
967
+ 966: 'red wine',
968
+ 967: 'espresso',
969
+ 968: 'cup',
970
+ 969: 'eggnog',
971
+ 970: 'alp',
972
+ 971: 'bubble',
973
+ 972: 'cliff, drop, drop-off',
974
+ 973: 'coral reef',
975
+ 974: 'geyser',
976
+ 975: 'lakeside, lakeshore',
977
+ 976: 'promontory, headland, head, foreland',
978
+ 977: 'sandbar, sand bar',
979
+ 978: 'seashore, coast, seacoast, sea-coast',
980
+ 979: 'valley, vale',
981
+ 980: 'volcano',
982
+ 981: 'ballplayer, baseball player',
983
+ 982: 'groom, bridegroom',
984
+ 983: 'scuba diver',
985
+ 984: 'rapeseed',
986
+ 985: 'daisy',
987
+ 986: "yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum",
988
+ 987: 'corn',
989
+ 988: 'acorn',
990
+ 989: 'hip, rose hip, rosehip',
991
+ 990: 'buckeye, horse chestnut, conker',
992
+ 991: 'coral fungus',
993
+ 992: 'agaric',
994
+ 993: 'gyromitra',
995
+ 994: 'stinkhorn, carrion fungus',
996
+ 995: 'earthstar',
997
+ 996: 'hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa',
998
+ 997: 'bolete',
999
+ 998: 'ear, spike, capitulum',
1000
+ 999: 'toilet tissue, toilet paper, bathroom tissue'}
labels/imagenet1k-simple-labels.json ADDED
@@ -0,0 +1,1000 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ["tench",
2
+ "goldfish",
3
+ "great white shark",
4
+ "tiger shark",
5
+ "hammerhead shark",
6
+ "electric ray",
7
+ "stingray",
8
+ "cock",
9
+ "hen",
10
+ "ostrich",
11
+ "brambling",
12
+ "goldfinch",
13
+ "house finch",
14
+ "junco",
15
+ "indigo bunting",
16
+ "American robin",
17
+ "bulbul",
18
+ "jay",
19
+ "magpie",
20
+ "chickadee",
21
+ "American dipper",
22
+ "kite",
23
+ "bald eagle",
24
+ "vulture",
25
+ "great grey owl",
26
+ "fire salamander",
27
+ "smooth newt",
28
+ "newt",
29
+ "spotted salamander",
30
+ "axolotl",
31
+ "American bullfrog",
32
+ "tree frog",
33
+ "tailed frog",
34
+ "loggerhead sea turtle",
35
+ "leatherback sea turtle",
36
+ "mud turtle",
37
+ "terrapin",
38
+ "box turtle",
39
+ "banded gecko",
40
+ "green iguana",
41
+ "Carolina anole",
42
+ "desert grassland whiptail lizard",
43
+ "agama",
44
+ "frilled-necked lizard",
45
+ "alligator lizard",
46
+ "Gila monster",
47
+ "European green lizard",
48
+ "chameleon",
49
+ "Komodo dragon",
50
+ "Nile crocodile",
51
+ "American alligator",
52
+ "triceratops",
53
+ "worm snake",
54
+ "ring-necked snake",
55
+ "eastern hog-nosed snake",
56
+ "smooth green snake",
57
+ "kingsnake",
58
+ "garter snake",
59
+ "water snake",
60
+ "vine snake",
61
+ "night snake",
62
+ "boa constrictor",
63
+ "African rock python",
64
+ "Indian cobra",
65
+ "green mamba",
66
+ "sea snake",
67
+ "Saharan horned viper",
68
+ "eastern diamondback rattlesnake",
69
+ "sidewinder",
70
+ "trilobite",
71
+ "harvestman",
72
+ "scorpion",
73
+ "yellow garden spider",
74
+ "barn spider",
75
+ "European garden spider",
76
+ "southern black widow",
77
+ "tarantula",
78
+ "wolf spider",
79
+ "tick",
80
+ "centipede",
81
+ "black grouse",
82
+ "ptarmigan",
83
+ "ruffed grouse",
84
+ "prairie grouse",
85
+ "peacock",
86
+ "quail",
87
+ "partridge",
88
+ "grey parrot",
89
+ "macaw",
90
+ "sulphur-crested cockatoo",
91
+ "lorikeet",
92
+ "coucal",
93
+ "bee eater",
94
+ "hornbill",
95
+ "hummingbird",
96
+ "jacamar",
97
+ "toucan",
98
+ "duck",
99
+ "red-breasted merganser",
100
+ "goose",
101
+ "black swan",
102
+ "tusker",
103
+ "echidna",
104
+ "platypus",
105
+ "wallaby",
106
+ "koala",
107
+ "wombat",
108
+ "jellyfish",
109
+ "sea anemone",
110
+ "brain coral",
111
+ "flatworm",
112
+ "nematode",
113
+ "conch",
114
+ "snail",
115
+ "slug",
116
+ "sea slug",
117
+ "chiton",
118
+ "chambered nautilus",
119
+ "Dungeness crab",
120
+ "rock crab",
121
+ "fiddler crab",
122
+ "red king crab",
123
+ "American lobster",
124
+ "spiny lobster",
125
+ "crayfish",
126
+ "hermit crab",
127
+ "isopod",
128
+ "white stork",
129
+ "black stork",
130
+ "spoonbill",
131
+ "flamingo",
132
+ "little blue heron",
133
+ "great egret",
134
+ "bittern",
135
+ "crane (bird)",
136
+ "limpkin",
137
+ "common gallinule",
138
+ "American coot",
139
+ "bustard",
140
+ "ruddy turnstone",
141
+ "dunlin",
142
+ "common redshank",
143
+ "dowitcher",
144
+ "oystercatcher",
145
+ "pelican",
146
+ "king penguin",
147
+ "albatross",
148
+ "grey whale",
149
+ "killer whale",
150
+ "dugong",
151
+ "sea lion",
152
+ "Chihuahua",
153
+ "Japanese Chin",
154
+ "Maltese",
155
+ "Pekingese",
156
+ "Shih Tzu",
157
+ "King Charles Spaniel",
158
+ "Papillon",
159
+ "toy terrier",
160
+ "Rhodesian Ridgeback",
161
+ "Afghan Hound",
162
+ "Basset Hound",
163
+ "Beagle",
164
+ "Bloodhound",
165
+ "Bluetick Coonhound",
166
+ "Black and Tan Coonhound",
167
+ "Treeing Walker Coonhound",
168
+ "English foxhound",
169
+ "Redbone Coonhound",
170
+ "borzoi",
171
+ "Irish Wolfhound",
172
+ "Italian Greyhound",
173
+ "Whippet",
174
+ "Ibizan Hound",
175
+ "Norwegian Elkhound",
176
+ "Otterhound",
177
+ "Saluki",
178
+ "Scottish Deerhound",
179
+ "Weimaraner",
180
+ "Staffordshire Bull Terrier",
181
+ "American Staffordshire Terrier",
182
+ "Bedlington Terrier",
183
+ "Border Terrier",
184
+ "Kerry Blue Terrier",
185
+ "Irish Terrier",
186
+ "Norfolk Terrier",
187
+ "Norwich Terrier",
188
+ "Yorkshire Terrier",
189
+ "Wire Fox Terrier",
190
+ "Lakeland Terrier",
191
+ "Sealyham Terrier",
192
+ "Airedale Terrier",
193
+ "Cairn Terrier",
194
+ "Australian Terrier",
195
+ "Dandie Dinmont Terrier",
196
+ "Boston Terrier",
197
+ "Miniature Schnauzer",
198
+ "Giant Schnauzer",
199
+ "Standard Schnauzer",
200
+ "Scottish Terrier",
201
+ "Tibetan Terrier",
202
+ "Australian Silky Terrier",
203
+ "Soft-coated Wheaten Terrier",
204
+ "West Highland White Terrier",
205
+ "Lhasa Apso",
206
+ "Flat-Coated Retriever",
207
+ "Curly-coated Retriever",
208
+ "Golden Retriever",
209
+ "Labrador Retriever",
210
+ "Chesapeake Bay Retriever",
211
+ "German Shorthaired Pointer",
212
+ "Vizsla",
213
+ "English Setter",
214
+ "Irish Setter",
215
+ "Gordon Setter",
216
+ "Brittany",
217
+ "Clumber Spaniel",
218
+ "English Springer Spaniel",
219
+ "Welsh Springer Spaniel",
220
+ "Cocker Spaniels",
221
+ "Sussex Spaniel",
222
+ "Irish Water Spaniel",
223
+ "Kuvasz",
224
+ "Schipperke",
225
+ "Groenendael",
226
+ "Malinois",
227
+ "Briard",
228
+ "Australian Kelpie",
229
+ "Komondor",
230
+ "Old English Sheepdog",
231
+ "Shetland Sheepdog",
232
+ "collie",
233
+ "Border Collie",
234
+ "Bouvier des Flandres",
235
+ "Rottweiler",
236
+ "German Shepherd Dog",
237
+ "Dobermann",
238
+ "Miniature Pinscher",
239
+ "Greater Swiss Mountain Dog",
240
+ "Bernese Mountain Dog",
241
+ "Appenzeller Sennenhund",
242
+ "Entlebucher Sennenhund",
243
+ "Boxer",
244
+ "Bullmastiff",
245
+ "Tibetan Mastiff",
246
+ "French Bulldog",
247
+ "Great Dane",
248
+ "St. Bernard",
249
+ "husky",
250
+ "Alaskan Malamute",
251
+ "Siberian Husky",
252
+ "Dalmatian",
253
+ "Affenpinscher",
254
+ "Basenji",
255
+ "pug",
256
+ "Leonberger",
257
+ "Newfoundland",
258
+ "Pyrenean Mountain Dog",
259
+ "Samoyed",
260
+ "Pomeranian",
261
+ "Chow Chow",
262
+ "Keeshond",
263
+ "Griffon Bruxellois",
264
+ "Pembroke Welsh Corgi",
265
+ "Cardigan Welsh Corgi",
266
+ "Toy Poodle",
267
+ "Miniature Poodle",
268
+ "Standard Poodle",
269
+ "Mexican hairless dog",
270
+ "grey wolf",
271
+ "Alaskan tundra wolf",
272
+ "red wolf",
273
+ "coyote",
274
+ "dingo",
275
+ "dhole",
276
+ "African wild dog",
277
+ "hyena",
278
+ "red fox",
279
+ "kit fox",
280
+ "Arctic fox",
281
+ "grey fox",
282
+ "tabby cat",
283
+ "tiger cat",
284
+ "Persian cat",
285
+ "Siamese cat",
286
+ "Egyptian Mau",
287
+ "cougar",
288
+ "lynx",
289
+ "leopard",
290
+ "snow leopard",
291
+ "jaguar",
292
+ "lion",
293
+ "tiger",
294
+ "cheetah",
295
+ "brown bear",
296
+ "American black bear",
297
+ "polar bear",
298
+ "sloth bear",
299
+ "mongoose",
300
+ "meerkat",
301
+ "tiger beetle",
302
+ "ladybug",
303
+ "ground beetle",
304
+ "longhorn beetle",
305
+ "leaf beetle",
306
+ "dung beetle",
307
+ "rhinoceros beetle",
308
+ "weevil",
309
+ "fly",
310
+ "bee",
311
+ "ant",
312
+ "grasshopper",
313
+ "cricket",
314
+ "stick insect",
315
+ "cockroach",
316
+ "mantis",
317
+ "cicada",
318
+ "leafhopper",
319
+ "lacewing",
320
+ "dragonfly",
321
+ "damselfly",
322
+ "red admiral",
323
+ "ringlet",
324
+ "monarch butterfly",
325
+ "small white",
326
+ "sulphur butterfly",
327
+ "gossamer-winged butterfly",
328
+ "starfish",
329
+ "sea urchin",
330
+ "sea cucumber",
331
+ "cottontail rabbit",
332
+ "hare",
333
+ "Angora rabbit",
334
+ "hamster",
335
+ "porcupine",
336
+ "fox squirrel",
337
+ "marmot",
338
+ "beaver",
339
+ "guinea pig",
340
+ "common sorrel",
341
+ "zebra",
342
+ "pig",
343
+ "wild boar",
344
+ "warthog",
345
+ "hippopotamus",
346
+ "ox",
347
+ "water buffalo",
348
+ "bison",
349
+ "ram",
350
+ "bighorn sheep",
351
+ "Alpine ibex",
352
+ "hartebeest",
353
+ "impala",
354
+ "gazelle",
355
+ "dromedary",
356
+ "llama",
357
+ "weasel",
358
+ "mink",
359
+ "European polecat",
360
+ "black-footed ferret",
361
+ "otter",
362
+ "skunk",
363
+ "badger",
364
+ "armadillo",
365
+ "three-toed sloth",
366
+ "orangutan",
367
+ "gorilla",
368
+ "chimpanzee",
369
+ "gibbon",
370
+ "siamang",
371
+ "guenon",
372
+ "patas monkey",
373
+ "baboon",
374
+ "macaque",
375
+ "langur",
376
+ "black-and-white colobus",
377
+ "proboscis monkey",
378
+ "marmoset",
379
+ "white-headed capuchin",
380
+ "howler monkey",
381
+ "titi",
382
+ "Geoffroy's spider monkey",
383
+ "common squirrel monkey",
384
+ "ring-tailed lemur",
385
+ "indri",
386
+ "Asian elephant",
387
+ "African bush elephant",
388
+ "red panda",
389
+ "giant panda",
390
+ "snoek",
391
+ "eel",
392
+ "coho salmon",
393
+ "rock beauty",
394
+ "clownfish",
395
+ "sturgeon",
396
+ "garfish",
397
+ "lionfish",
398
+ "pufferfish",
399
+ "abacus",
400
+ "abaya",
401
+ "academic gown",
402
+ "accordion",
403
+ "acoustic guitar",
404
+ "aircraft carrier",
405
+ "airliner",
406
+ "airship",
407
+ "altar",
408
+ "ambulance",
409
+ "amphibious vehicle",
410
+ "analog clock",
411
+ "apiary",
412
+ "apron",
413
+ "waste container",
414
+ "assault rifle",
415
+ "backpack",
416
+ "bakery",
417
+ "balance beam",
418
+ "balloon",
419
+ "ballpoint pen",
420
+ "Band-Aid",
421
+ "banjo",
422
+ "baluster",
423
+ "barbell",
424
+ "barber chair",
425
+ "barbershop",
426
+ "barn",
427
+ "barometer",
428
+ "barrel",
429
+ "wheelbarrow",
430
+ "baseball",
431
+ "basketball",
432
+ "bassinet",
433
+ "bassoon",
434
+ "swimming cap",
435
+ "bath towel",
436
+ "bathtub",
437
+ "station wagon",
438
+ "lighthouse",
439
+ "beaker",
440
+ "military cap",
441
+ "beer bottle",
442
+ "beer glass",
443
+ "bell-cot",
444
+ "bib",
445
+ "tandem bicycle",
446
+ "bikini",
447
+ "ring binder",
448
+ "binoculars",
449
+ "birdhouse",
450
+ "boathouse",
451
+ "bobsleigh",
452
+ "bolo tie",
453
+ "poke bonnet",
454
+ "bookcase",
455
+ "bookstore",
456
+ "bottle cap",
457
+ "bow",
458
+ "bow tie",
459
+ "brass",
460
+ "bra",
461
+ "breakwater",
462
+ "breastplate",
463
+ "broom",
464
+ "bucket",
465
+ "buckle",
466
+ "bulletproof vest",
467
+ "high-speed train",
468
+ "butcher shop",
469
+ "taxicab",
470
+ "cauldron",
471
+ "candle",
472
+ "cannon",
473
+ "canoe",
474
+ "can opener",
475
+ "cardigan",
476
+ "car mirror",
477
+ "carousel",
478
+ "tool kit",
479
+ "carton",
480
+ "car wheel",
481
+ "automated teller machine",
482
+ "cassette",
483
+ "cassette player",
484
+ "castle",
485
+ "catamaran",
486
+ "CD player",
487
+ "cello",
488
+ "mobile phone",
489
+ "chain",
490
+ "chain-link fence",
491
+ "chain mail",
492
+ "chainsaw",
493
+ "chest",
494
+ "chiffonier",
495
+ "chime",
496
+ "china cabinet",
497
+ "Christmas stocking",
498
+ "church",
499
+ "movie theater",
500
+ "cleaver",
501
+ "cliff dwelling",
502
+ "cloak",
503
+ "clogs",
504
+ "cocktail shaker",
505
+ "coffee mug",
506
+ "coffeemaker",
507
+ "coil",
508
+ "combination lock",
509
+ "computer keyboard",
510
+ "confectionery store",
511
+ "container ship",
512
+ "convertible",
513
+ "corkscrew",
514
+ "cornet",
515
+ "cowboy boot",
516
+ "cowboy hat",
517
+ "cradle",
518
+ "crane (machine)",
519
+ "crash helmet",
520
+ "crate",
521
+ "infant bed",
522
+ "Crock Pot",
523
+ "croquet ball",
524
+ "crutch",
525
+ "cuirass",
526
+ "dam",
527
+ "desk",
528
+ "desktop computer",
529
+ "rotary dial telephone",
530
+ "diaper",
531
+ "digital clock",
532
+ "digital watch",
533
+ "dining table",
534
+ "dishcloth",
535
+ "dishwasher",
536
+ "disc brake",
537
+ "dock",
538
+ "dog sled",
539
+ "dome",
540
+ "doormat",
541
+ "drilling rig",
542
+ "drum",
543
+ "drumstick",
544
+ "dumbbell",
545
+ "Dutch oven",
546
+ "electric fan",
547
+ "electric guitar",
548
+ "electric locomotive",
549
+ "entertainment center",
550
+ "envelope",
551
+ "espresso machine",
552
+ "face powder",
553
+ "feather boa",
554
+ "filing cabinet",
555
+ "fireboat",
556
+ "fire engine",
557
+ "fire screen sheet",
558
+ "flagpole",
559
+ "flute",
560
+ "folding chair",
561
+ "football helmet",
562
+ "forklift",
563
+ "fountain",
564
+ "fountain pen",
565
+ "four-poster bed",
566
+ "freight car",
567
+ "French horn",
568
+ "frying pan",
569
+ "fur coat",
570
+ "garbage truck",
571
+ "gas mask",
572
+ "gas pump",
573
+ "goblet",
574
+ "go-kart",
575
+ "golf ball",
576
+ "golf cart",
577
+ "gondola",
578
+ "gong",
579
+ "gown",
580
+ "grand piano",
581
+ "greenhouse",
582
+ "grille",
583
+ "grocery store",
584
+ "guillotine",
585
+ "barrette",
586
+ "hair spray",
587
+ "half-track",
588
+ "hammer",
589
+ "hamper",
590
+ "hair dryer",
591
+ "hand-held computer",
592
+ "handkerchief",
593
+ "hard disk drive",
594
+ "harmonica",
595
+ "harp",
596
+ "harvester",
597
+ "hatchet",
598
+ "holster",
599
+ "home theater",
600
+ "honeycomb",
601
+ "hook",
602
+ "hoop skirt",
603
+ "horizontal bar",
604
+ "horse-drawn vehicle",
605
+ "hourglass",
606
+ "iPod",
607
+ "clothes iron",
608
+ "jack-o'-lantern",
609
+ "jeans",
610
+ "jeep",
611
+ "T-shirt",
612
+ "jigsaw puzzle",
613
+ "pulled rickshaw",
614
+ "joystick",
615
+ "kimono",
616
+ "knee pad",
617
+ "knot",
618
+ "lab coat",
619
+ "ladle",
620
+ "lampshade",
621
+ "laptop computer",
622
+ "lawn mower",
623
+ "lens cap",
624
+ "paper knife",
625
+ "library",
626
+ "lifeboat",
627
+ "lighter",
628
+ "limousine",
629
+ "ocean liner",
630
+ "lipstick",
631
+ "slip-on shoe",
632
+ "lotion",
633
+ "speaker",
634
+ "loupe",
635
+ "sawmill",
636
+ "magnetic compass",
637
+ "mail bag",
638
+ "mailbox",
639
+ "tights",
640
+ "tank suit",
641
+ "manhole cover",
642
+ "maraca",
643
+ "marimba",
644
+ "mask",
645
+ "match",
646
+ "maypole",
647
+ "maze",
648
+ "measuring cup",
649
+ "medicine chest",
650
+ "megalith",
651
+ "microphone",
652
+ "microwave oven",
653
+ "military uniform",
654
+ "milk can",
655
+ "minibus",
656
+ "miniskirt",
657
+ "minivan",
658
+ "missile",
659
+ "mitten",
660
+ "mixing bowl",
661
+ "mobile home",
662
+ "Model T",
663
+ "modem",
664
+ "monastery",
665
+ "monitor",
666
+ "moped",
667
+ "mortar",
668
+ "square academic cap",
669
+ "mosque",
670
+ "mosquito net",
671
+ "scooter",
672
+ "mountain bike",
673
+ "tent",
674
+ "computer mouse",
675
+ "mousetrap",
676
+ "moving van",
677
+ "muzzle",
678
+ "nail",
679
+ "neck brace",
680
+ "necklace",
681
+ "nipple",
682
+ "notebook computer",
683
+ "obelisk",
684
+ "oboe",
685
+ "ocarina",
686
+ "odometer",
687
+ "oil filter",
688
+ "organ",
689
+ "oscilloscope",
690
+ "overskirt",
691
+ "bullock cart",
692
+ "oxygen mask",
693
+ "packet",
694
+ "paddle",
695
+ "paddle wheel",
696
+ "padlock",
697
+ "paintbrush",
698
+ "pajamas",
699
+ "palace",
700
+ "pan flute",
701
+ "paper towel",
702
+ "parachute",
703
+ "parallel bars",
704
+ "park bench",
705
+ "parking meter",
706
+ "passenger car",
707
+ "patio",
708
+ "payphone",
709
+ "pedestal",
710
+ "pencil case",
711
+ "pencil sharpener",
712
+ "perfume",
713
+ "Petri dish",
714
+ "photocopier",
715
+ "plectrum",
716
+ "Pickelhaube",
717
+ "picket fence",
718
+ "pickup truck",
719
+ "pier",
720
+ "piggy bank",
721
+ "pill bottle",
722
+ "pillow",
723
+ "ping-pong ball",
724
+ "pinwheel",
725
+ "pirate ship",
726
+ "pitcher",
727
+ "hand plane",
728
+ "planetarium",
729
+ "plastic bag",
730
+ "plate rack",
731
+ "plow",
732
+ "plunger",
733
+ "Polaroid camera",
734
+ "pole",
735
+ "police van",
736
+ "poncho",
737
+ "billiard table",
738
+ "soda bottle",
739
+ "pot",
740
+ "potter's wheel",
741
+ "power drill",
742
+ "prayer rug",
743
+ "printer",
744
+ "prison",
745
+ "projectile",
746
+ "projector",
747
+ "hockey puck",
748
+ "punching bag",
749
+ "purse",
750
+ "quill",
751
+ "quilt",
752
+ "race car",
753
+ "racket",
754
+ "radiator",
755
+ "radio",
756
+ "radio telescope",
757
+ "rain barrel",
758
+ "recreational vehicle",
759
+ "reel",
760
+ "reflex camera",
761
+ "refrigerator",
762
+ "remote control",
763
+ "restaurant",
764
+ "revolver",
765
+ "rifle",
766
+ "rocking chair",
767
+ "rotisserie",
768
+ "eraser",
769
+ "rugby ball",
770
+ "ruler",
771
+ "running shoe",
772
+ "safe",
773
+ "safety pin",
774
+ "salt shaker",
775
+ "sandal",
776
+ "sarong",
777
+ "saxophone",
778
+ "scabbard",
779
+ "weighing scale",
780
+ "school bus",
781
+ "schooner",
782
+ "scoreboard",
783
+ "CRT screen",
784
+ "screw",
785
+ "screwdriver",
786
+ "seat belt",
787
+ "sewing machine",
788
+ "shield",
789
+ "shoe store",
790
+ "shoji",
791
+ "shopping basket",
792
+ "shopping cart",
793
+ "shovel",
794
+ "shower cap",
795
+ "shower curtain",
796
+ "ski",
797
+ "ski mask",
798
+ "sleeping bag",
799
+ "slide rule",
800
+ "sliding door",
801
+ "slot machine",
802
+ "snorkel",
803
+ "snowmobile",
804
+ "snowplow",
805
+ "soap dispenser",
806
+ "soccer ball",
807
+ "sock",
808
+ "solar thermal collector",
809
+ "sombrero",
810
+ "soup bowl",
811
+ "space bar",
812
+ "space heater",
813
+ "space shuttle",
814
+ "spatula",
815
+ "motorboat",
816
+ "spider web",
817
+ "spindle",
818
+ "sports car",
819
+ "spotlight",
820
+ "stage",
821
+ "steam locomotive",
822
+ "through arch bridge",
823
+ "steel drum",
824
+ "stethoscope",
825
+ "scarf",
826
+ "stone wall",
827
+ "stopwatch",
828
+ "stove",
829
+ "strainer",
830
+ "tram",
831
+ "stretcher",
832
+ "couch",
833
+ "stupa",
834
+ "submarine",
835
+ "suit",
836
+ "sundial",
837
+ "sunglass",
838
+ "sunglasses",
839
+ "sunscreen",
840
+ "suspension bridge",
841
+ "mop",
842
+ "sweatshirt",
843
+ "swimsuit",
844
+ "swing",
845
+ "switch",
846
+ "syringe",
847
+ "table lamp",
848
+ "tank",
849
+ "tape player",
850
+ "teapot",
851
+ "teddy bear",
852
+ "television",
853
+ "tennis ball",
854
+ "thatched roof",
855
+ "front curtain",
856
+ "thimble",
857
+ "threshing machine",
858
+ "throne",
859
+ "tile roof",
860
+ "toaster",
861
+ "tobacco shop",
862
+ "toilet seat",
863
+ "torch",
864
+ "totem pole",
865
+ "tow truck",
866
+ "toy store",
867
+ "tractor",
868
+ "semi-trailer truck",
869
+ "tray",
870
+ "trench coat",
871
+ "tricycle",
872
+ "trimaran",
873
+ "tripod",
874
+ "triumphal arch",
875
+ "trolleybus",
876
+ "trombone",
877
+ "tub",
878
+ "turnstile",
879
+ "typewriter keyboard",
880
+ "umbrella",
881
+ "unicycle",
882
+ "upright piano",
883
+ "vacuum cleaner",
884
+ "vase",
885
+ "vault",
886
+ "velvet",
887
+ "vending machine",
888
+ "vestment",
889
+ "viaduct",
890
+ "violin",
891
+ "volleyball",
892
+ "waffle iron",
893
+ "wall clock",
894
+ "wallet",
895
+ "wardrobe",
896
+ "military aircraft",
897
+ "sink",
898
+ "washing machine",
899
+ "water bottle",
900
+ "water jug",
901
+ "water tower",
902
+ "whiskey jug",
903
+ "whistle",
904
+ "wig",
905
+ "window screen",
906
+ "window shade",
907
+ "Windsor tie",
908
+ "wine bottle",
909
+ "wing",
910
+ "wok",
911
+ "wooden spoon",
912
+ "wool",
913
+ "split-rail fence",
914
+ "shipwreck",
915
+ "yawl",
916
+ "yurt",
917
+ "website",
918
+ "comic book",
919
+ "crossword",
920
+ "traffic sign",
921
+ "traffic light",
922
+ "dust jacket",
923
+ "menu",
924
+ "plate",
925
+ "guacamole",
926
+ "consomme",
927
+ "hot pot",
928
+ "trifle",
929
+ "ice cream",
930
+ "ice pop",
931
+ "baguette",
932
+ "bagel",
933
+ "pretzel",
934
+ "cheeseburger",
935
+ "hot dog",
936
+ "mashed potato",
937
+ "cabbage",
938
+ "broccoli",
939
+ "cauliflower",
940
+ "zucchini",
941
+ "spaghetti squash",
942
+ "acorn squash",
943
+ "butternut squash",
944
+ "cucumber",
945
+ "artichoke",
946
+ "bell pepper",
947
+ "cardoon",
948
+ "mushroom",
949
+ "Granny Smith",
950
+ "strawberry",
951
+ "orange",
952
+ "lemon",
953
+ "fig",
954
+ "pineapple",
955
+ "banana",
956
+ "jackfruit",
957
+ "custard apple",
958
+ "pomegranate",
959
+ "hay",
960
+ "carbonara",
961
+ "chocolate syrup",
962
+ "dough",
963
+ "meatloaf",
964
+ "pizza",
965
+ "pot pie",
966
+ "burrito",
967
+ "red wine",
968
+ "espresso",
969
+ "cup",
970
+ "eggnog",
971
+ "alp",
972
+ "bubble",
973
+ "cliff",
974
+ "coral reef",
975
+ "geyser",
976
+ "lakeshore",
977
+ "promontory",
978
+ "shoal",
979
+ "seashore",
980
+ "valley",
981
+ "volcano",
982
+ "baseball player",
983
+ "bridegroom",
984
+ "scuba diver",
985
+ "rapeseed",
986
+ "daisy",
987
+ "yellow lady's slipper",
988
+ "corn",
989
+ "acorn",
990
+ "rose hip",
991
+ "horse chestnut seed",
992
+ "coral fungus",
993
+ "agaric",
994
+ "gyromitra",
995
+ "stinkhorn mushroom",
996
+ "earth star",
997
+ "hen-of-the-woods",
998
+ "bolete",
999
+ "ear",
1000
+ "toilet paper"]
labels/imagenet21k_wordnet_lemmas.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.py ADDED
@@ -0,0 +1,359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import numpy as np
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+
6
+ from resnet import StdConv2d
7
+ from utils import (get_width_and_height_from_size, load_pretrained_weights,
8
+ get_model_params)
9
+
10
+ VALID_MODELS = ('ViT-B_16', 'ViT-B_32', 'ViT-L_16', 'ViT-L_32', 'R50+ViT-B_16')
11
+
12
+
13
+ class PositionEmbs(nn.Module):
14
+ def __init__(self, num_patches, emb_dim, dropout_rate=0.1):
15
+ super(PositionEmbs, self).__init__()
16
+ self.pos_embedding = nn.Parameter(
17
+ torch.randn(1, num_patches + 1, emb_dim))
18
+ if dropout_rate > 0:
19
+ self.dropout = nn.Dropout(dropout_rate)
20
+ else:
21
+ self.dropout = None
22
+
23
+ def forward(self, x):
24
+ out = x + self.pos_embedding
25
+
26
+ if self.dropout:
27
+ out = self.dropout(out)
28
+
29
+ return out
30
+
31
+
32
+ class MlpBlock(nn.Module):
33
+ """ Transformer Feed-Forward Block """
34
+ def __init__(self, in_dim, mlp_dim, out_dim, dropout_rate=0.1):
35
+ super(MlpBlock, self).__init__()
36
+
37
+ # init layers
38
+ self.fc1 = nn.Linear(in_dim, mlp_dim)
39
+ self.fc2 = nn.Linear(mlp_dim, out_dim)
40
+ self.act = nn.GELU()
41
+ if dropout_rate > 0.0:
42
+ self.dropout1 = nn.Dropout(dropout_rate)
43
+ self.dropout2 = nn.Dropout(dropout_rate)
44
+ else:
45
+ self.dropout1 = None
46
+ self.dropout2 = None
47
+
48
+ def forward(self, x):
49
+
50
+ out = self.fc1(x)
51
+ out = self.act(out)
52
+ if self.dropout1:
53
+ out = self.dropout1(out)
54
+
55
+ out = self.fc2(out)
56
+ out = self.dropout2(out)
57
+ return out
58
+
59
+
60
+ class LinearGeneral(nn.Module):
61
+ def __init__(self, in_dim=(768, ), feat_dim=(12, 64)):
62
+ super(LinearGeneral, self).__init__()
63
+
64
+ self.weight = nn.Parameter(torch.randn(*in_dim, *feat_dim))
65
+ self.bias = nn.Parameter(torch.zeros(*feat_dim))
66
+
67
+ def forward(self, x, dims):
68
+ a = torch.tensordot(x, self.weight, dims=dims) + self.bias
69
+ return a
70
+
71
+
72
+ class SelfAttention(nn.Module):
73
+ def __init__(self, in_dim, heads=8, dropout_rate=0.1):
74
+ super(SelfAttention, self).__init__()
75
+ self.heads = heads
76
+ self.head_dim = in_dim // heads
77
+ self.scale = self.head_dim**0.5
78
+
79
+ self.query = LinearGeneral((in_dim, ), (self.heads, self.head_dim))
80
+ self.key = LinearGeneral((in_dim, ), (self.heads, self.head_dim))
81
+ self.value = LinearGeneral((in_dim, ), (self.heads, self.head_dim))
82
+ self.out = LinearGeneral((self.heads, self.head_dim), (in_dim, ))
83
+
84
+ if dropout_rate > 0:
85
+ self.dropout = nn.Dropout(dropout_rate)
86
+ else:
87
+ self.dropout = None
88
+
89
+ def forward(self, x):
90
+ b, n, _ = x.shape
91
+
92
+ q = self.query(x, dims=([2], [0]))
93
+ k = self.key(x, dims=([2], [0]))
94
+ v = self.value(x, dims=([2], [0]))
95
+
96
+ q = q.permute(0, 2, 1, 3)
97
+ k = k.permute(0, 2, 1, 3)
98
+ v = v.permute(0, 2, 1, 3)
99
+
100
+ attn_weights = torch.matmul(q, k.transpose(-2, -1)) / self.scale
101
+ attn_weights = F.softmax(attn_weights, dim=-1)
102
+ out = torch.matmul(attn_weights, v)
103
+ out = out.permute(0, 2, 1, 3)
104
+
105
+ out = self.out(out, dims=([2, 3], [0, 1]))
106
+
107
+ return out
108
+
109
+
110
+ class EncoderBlock(nn.Module):
111
+ def __init__(self,
112
+ in_dim,
113
+ mlp_dim,
114
+ num_heads,
115
+ dropout_rate=0.1,
116
+ attn_dropout_rate=0.1):
117
+ super(EncoderBlock, self).__init__()
118
+
119
+ self.norm1 = nn.LayerNorm(in_dim)
120
+ self.attn = SelfAttention(in_dim,
121
+ heads=num_heads,
122
+ dropout_rate=attn_dropout_rate)
123
+ if dropout_rate > 0:
124
+ self.dropout = nn.Dropout(dropout_rate)
125
+ else:
126
+ self.dropout = None
127
+ self.norm2 = nn.LayerNorm(in_dim)
128
+ self.mlp = MlpBlock(in_dim, mlp_dim, in_dim, dropout_rate)
129
+
130
+ def forward(self, x):
131
+ residual = x
132
+ out = self.norm1(x)
133
+ out = self.attn(out)
134
+ if self.dropout:
135
+ out = self.dropout(out)
136
+ out += residual
137
+ residual = out
138
+
139
+ out = self.norm2(out)
140
+ out = self.mlp(out)
141
+ out += residual
142
+ return out
143
+
144
+
145
+ class Encoder(nn.Module):
146
+ def __init__(self,
147
+ num_patches,
148
+ emb_dim,
149
+ mlp_dim,
150
+ num_layers=12,
151
+ num_heads=12,
152
+ dropout_rate=0.1,
153
+ attn_dropout_rate=0.0):
154
+ super(Encoder, self).__init__()
155
+
156
+ # positional embedding
157
+ self.pos_embedding = PositionEmbs(num_patches, emb_dim, dropout_rate)
158
+
159
+ # encoder blocks
160
+ in_dim = emb_dim
161
+ self.encoder_layers = nn.ModuleList()
162
+ for i in range(num_layers):
163
+ layer = EncoderBlock(in_dim, mlp_dim, num_heads, dropout_rate,
164
+ attn_dropout_rate)
165
+ self.encoder_layers.append(layer)
166
+ self.norm = nn.LayerNorm(in_dim)
167
+
168
+ def forward(self, x):
169
+ out = self.pos_embedding(x)
170
+
171
+ for layer in self.encoder_layers:
172
+ out = layer(out)
173
+
174
+ out = self.norm(out)
175
+ return out
176
+
177
+
178
+ class VisionTransformer(nn.Module):
179
+ """ Vision Transformer.
180
+ Most easily loaded with the .from_name or .from_pretrained methods.
181
+ Args:
182
+ params (namedtuple): A set of Params.
183
+ References:
184
+ [1] https://arxiv.org/abs/2010.11929 (An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale)
185
+ Example:
186
+
187
+
188
+ import torch
189
+ >>> from vision_transformer_pytorch import VisionTransformer
190
+ >>> inputs = torch.rand(1, 3, 256, 256)
191
+ >>> model = VisionTransformer.from_pretrained('ViT-B_16')
192
+ >>> model.eval()
193
+ >>> outputs = model(inputs)
194
+ """
195
+ def __init__(self, params=None):
196
+ super(VisionTransformer, self).__init__()
197
+ self._params = params
198
+
199
+ if self._params.resnet:
200
+ self.resnet = self._params.resnet()
201
+ self.embedding = nn.Conv2d(self.resnet.width * 16,
202
+ self._params.emb_dim,
203
+ kernel_size=1,
204
+ stride=1)
205
+ else:
206
+ self.embedding = nn.Conv2d(3,
207
+ self._params.emb_dim,
208
+ kernel_size=self.patch_size,
209
+ stride=self.patch_size)
210
+ # class token
211
+ self.cls_token = nn.Parameter(torch.zeros(1, 1, self._params.emb_dim))
212
+
213
+ # transformer
214
+ self.transformer = Encoder(
215
+ num_patches=self.num_patches,
216
+ emb_dim=self._params.emb_dim,
217
+ mlp_dim=self._params.mlp_dim,
218
+ num_layers=self._params.num_layers,
219
+ num_heads=self._params.num_heads,
220
+ dropout_rate=self._params.dropout_rate,
221
+ attn_dropout_rate=self._params.attn_dropout_rate)
222
+
223
+ # classfier
224
+ self.classifier = nn.Linear(self._params.emb_dim,
225
+ self._params.num_classes)
226
+
227
+ @property
228
+ def image_size(self):
229
+ return get_width_and_height_from_size(self._params.image_size)
230
+
231
+ @property
232
+ def patch_size(self):
233
+ return get_width_and_height_from_size(self._params.patch_size)
234
+
235
+ @property
236
+ def num_patches(self):
237
+ h, w = self.image_size
238
+ fh, fw = self.patch_size
239
+ if hasattr(self, 'resnet'):
240
+ gh, gw = h // fh // self.resnet.downsample, w // fw // self.resnet.downsample
241
+ else:
242
+ gh, gw = h // fh, w // fw
243
+ return gh * gw
244
+
245
+ def extract_features(self, x):
246
+ if hasattr(self, 'resnet'):
247
+ x = self.resnet(x)
248
+
249
+ emb = self.embedding(x) # (n, c, gh, gw)
250
+ emb = emb.permute(0, 2, 3, 1) # (n, gh, hw, c)
251
+ b, h, w, c = emb.shape
252
+ emb = emb.reshape(b, h * w, c)
253
+
254
+ # prepend class token
255
+ cls_token = self.cls_token.repeat(b, 1, 1)
256
+ emb = torch.cat([cls_token, emb], dim=1)
257
+
258
+ # transformer
259
+ feat = self.transformer(emb)
260
+ return feat
261
+
262
+ def forward(self, x):
263
+ feat = self.extract_features(x)
264
+
265
+ # classifier
266
+ logits = self.classifier(feat[:, 0])
267
+ return logits
268
+
269
+ @classmethod
270
+ def from_name(cls, model_name, in_channels=3, **override_params):
271
+ """create an vision transformer model according to name.
272
+ Args:
273
+ model_name (str): Name for vision transformer.
274
+ in_channels (int): Input data's channel number.
275
+ override_params (other key word params):
276
+ Params to override model's global_params.
277
+ Optional key:
278
+ 'image_size', 'patch_size',
279
+ 'emb_dim', 'mlp_dim',
280
+ 'num_heads', 'num_layers',
281
+ 'num_classes', 'attn_dropout_rate',
282
+ 'dropout_rate'
283
+ Returns:
284
+ An vision transformer model.
285
+ """
286
+ cls._check_model_name_is_valid(model_name)
287
+ params = get_model_params(model_name, override_params)
288
+ model = cls(params)
289
+ model._change_in_channels(in_channels)
290
+ return model
291
+
292
+ @classmethod
293
+ def from_pretrained(cls,
294
+ model_name,
295
+ weights_path=None,
296
+ in_channels=3,
297
+ num_classes=1000,
298
+ **override_params):
299
+ """create an vision transformer model according to name.
300
+ Args:
301
+ model_name (str): Name for vision transformer.
302
+ weights_path (None or str):
303
+ str: path to pretrained weights file on the local disk.
304
+ None: use pretrained weights downloaded from the Internet.
305
+ in_channels (int): Input data's channel number.
306
+ num_classes (int):
307
+ Number of categories for classification.
308
+ It controls the output size for final linear layer.
309
+ override_params (other key word params):
310
+ Params to override model's global_params.
311
+ Optional key:
312
+ 'image_size', 'patch_size',
313
+ 'emb_dim', 'mlp_dim',
314
+ 'num_heads', 'num_layers',
315
+ 'num_classes', 'attn_dropout_rate',
316
+ 'dropout_rate'
317
+ Returns:
318
+ A pretrained vision transformer model.
319
+ """
320
+ model = cls.from_name(model_name,
321
+ num_classes=num_classes,
322
+ **override_params)
323
+ load_pretrained_weights(model,
324
+ model_name,
325
+ weights_path=weights_path,
326
+ load_fc=(num_classes == 1000))
327
+ model._change_in_channels(in_channels)
328
+ return model
329
+
330
+ @classmethod
331
+ def _check_model_name_is_valid(cls, model_name):
332
+ """Validates model name.
333
+ Args:
334
+ model_name (str): Name for vision transformer.
335
+ Returns:
336
+ bool: Is a valid name or not.
337
+ """
338
+ if model_name not in VALID_MODELS:
339
+ raise ValueError('model_name should be one of: ' +
340
+ ', '.join(VALID_MODELS))
341
+
342
+ def _change_in_channels(self, in_channels):
343
+ """Adjust model's first convolution layer to in_channels, if in_channels not equals 3.
344
+ Args:
345
+ in_channels (int): Input data's channel number.
346
+ """
347
+ if in_channels != 3:
348
+ if hasattr(self, 'resnet'):
349
+ self.resnet.root['conv'] = StdConv2d(in_channels,
350
+ self.resnet.width,
351
+ kernel_size=7,
352
+ stride=2,
353
+ bias=False,
354
+ padding=3)
355
+ else:
356
+ self.embedding = nn.Conv2d(in_channels,
357
+ self._params.emb_dim,
358
+ kernel_size=self.patch_size,
359
+ stride=self.patch_size)
pretrained_weights/ViT-B_16_imagenet21k_imagenet2012.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33cadfad17534e3bf51a17fe31561bbf8e650f17801cd715e71804254c1e8ef3
3
+ size 347471723
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ torch==1.13.1
2
+ torchvision==0.14.1
3
+ gradio==3.16.2
resnet.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+
5
+ from os.path import join as pjoin
6
+ from collections import OrderedDict
7
+
8
+
9
+ def weight_standardize(w, dim, eps):
10
+ """Subtracts mean and divides by standard deviation."""
11
+ w = w - torch.mean(w, dim=dim)
12
+ w = w / (torch.std(w, dim=dim) + eps)
13
+ return w
14
+
15
+
16
+ def np2th(weights, conv=False):
17
+ """Possibly convert HWIO to OIHW."""
18
+ if conv:
19
+ weights = weights.transpose([3, 2, 0, 1])
20
+ return torch.from_numpy(weights)
21
+
22
+
23
+ class StdConv2d(nn.Conv2d):
24
+ def forward(self, x):
25
+ w = weight_standardize(self.weight, [0, 1, 2], 1e-5)
26
+ return F.conv2d(x, w, self.bias, self.stride, self.padding,
27
+ self.dilation, self.groups)
28
+
29
+
30
+ def conv3x3(in_channels, out_channels, stride=1, groups=1, bias=False):
31
+ return StdConv2d(in_channels,
32
+ out_channels,
33
+ kernel_size=3,
34
+ stride=stride,
35
+ padding=1,
36
+ bias=bias,
37
+ groups=groups)
38
+
39
+
40
+ def conv1x1(in_channels, out_channels, stride=1, bias=False):
41
+ return StdConv2d(in_channels,
42
+ out_channels,
43
+ kernel_size=1,
44
+ stride=stride,
45
+ padding=0,
46
+ bias=bias)
47
+
48
+
49
+ class PreActBottleneck(nn.Module):
50
+ """Pre-activation (v2) bottleneck block.
51
+ """
52
+ def __init__(self,
53
+ in_channels,
54
+ out_channels=None,
55
+ mid_channels=None,
56
+ stride=1):
57
+ super().__init__()
58
+ out_channels = out_channels or in_channels
59
+ mid_channels = mid_channels or out_channels // 4
60
+
61
+ self.gn1 = nn.GroupNorm(32, mid_channels, eps=1e-6)
62
+ self.conv1 = conv1x1(in_channels, mid_channels, bias=False)
63
+ self.gn2 = nn.GroupNorm(32, mid_channels, eps=1e-6)
64
+ self.conv2 = conv3x3(mid_channels, mid_channels, stride,
65
+ bias=False) # Original code has it on conv1!!
66
+ self.gn3 = nn.GroupNorm(32, out_channels, eps=1e-6)
67
+ self.conv3 = conv1x1(mid_channels, out_channels, bias=False)
68
+ self.relu = nn.ReLU(inplace=True)
69
+
70
+ if (stride != 1 or in_channels != out_channels):
71
+ # Projection also with pre-activation according to paper.
72
+ self.downsample = conv1x1(in_channels,
73
+ out_channels,
74
+ stride,
75
+ bias=False)
76
+ self.gn_proj = nn.GroupNorm(out_channels, out_channels)
77
+
78
+ def forward(self, x):
79
+
80
+ # Residual branch
81
+ residual = x
82
+ if hasattr(self, 'downsample'):
83
+ residual = self.downsample(x)
84
+ residual = self.gn_proj(residual)
85
+
86
+ # Unit's branch
87
+ y = self.relu(self.gn1(self.conv1(x)))
88
+ y = self.relu(self.gn2(self.conv2(y)))
89
+ y = self.gn3(self.conv3(y))
90
+
91
+ y = self.relu(residual + y)
92
+ return y
93
+
94
+
95
+ class ResNetV2(nn.Module):
96
+ """Implementation of Pre-activation (v2) ResNet mode."""
97
+ def __init__(self, block_units, width_factor):
98
+ super().__init__()
99
+ width = int(64 * width_factor)
100
+ self.width = width
101
+ self.downsample = 16 # four stride=2 conv2d layer
102
+
103
+ # The following will be unreadable if we split lines.
104
+ # pylint: disable=line-too-long
105
+ self.root = nn.Sequential(
106
+ OrderedDict([('conv',
107
+ StdConv2d(3,
108
+ width,
109
+ kernel_size=7,
110
+ stride=2,
111
+ bias=False,
112
+ padding=3)),
113
+ ('gn', nn.GroupNorm(32, width, eps=1e-6)),
114
+ ('relu', nn.ReLU(inplace=True)),
115
+ ('pool',
116
+ nn.MaxPool2d(kernel_size=3, stride=2, padding=0))]))
117
+
118
+ self.body = nn.Sequential(
119
+ OrderedDict([
120
+ ('block1',
121
+ nn.Sequential(
122
+ OrderedDict([('unit1',
123
+ PreActBottleneck(in_channels=width,
124
+ out_channels=width * 4,
125
+ mid_channels=width))] +
126
+ [(f'unit{i:d}',
127
+ PreActBottleneck(in_channels=width * 4,
128
+ out_channels=width * 4,
129
+ mid_channels=width))
130
+ for i in range(2, block_units[0] + 1)], ))),
131
+ ('block2',
132
+ nn.Sequential(
133
+ OrderedDict([('unit1',
134
+ PreActBottleneck(in_channels=width * 4,
135
+ out_channels=width * 8,
136
+ mid_channels=width * 2,
137
+ stride=2))] +
138
+ [(f'unit{i:d}',
139
+ PreActBottleneck(in_channels=width * 8,
140
+ out_channels=width * 8,
141
+ mid_channels=width * 2))
142
+ for i in range(2, block_units[1] + 1)], ))),
143
+ ('block3',
144
+ nn.Sequential(
145
+ OrderedDict([('unit1',
146
+ PreActBottleneck(in_channels=width * 8,
147
+ out_channels=width * 16,
148
+ mid_channels=width * 4,
149
+ stride=2))] +
150
+ [(f'unit{i:d}',
151
+ PreActBottleneck(in_channels=width * 16,
152
+ out_channels=width * 16,
153
+ mid_channels=width * 4))
154
+ for i in range(2, block_units[2] + 1)], ))),
155
+ ]))
156
+
157
+ def forward(self, x):
158
+ x = self.root(x)
159
+ x = self.body(x)
160
+ return x
161
+
162
+
163
+ def resnet50():
164
+ return ResNetV2(block_units=(3, 4, 9), width_factor=1)
utils.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import math
3
+ import torch
4
+ import collections
5
+
6
+ from torch import nn
7
+ from functools import partial
8
+ from torch.utils import model_zoo
9
+ from torch.nn import functional as F
10
+
11
+ from resnet import resnet50
12
+
13
+ ################################################################################
14
+ ### Help functions for model architecture
15
+ ################################################################################
16
+
17
+ # Params: namedtuple
18
+ # get_width_and_height_from_size and calculate_output_image_size
19
+
20
+ # Parameters for the entire model (stem, all blocks, and head)
21
+ Params = collections.namedtuple('Params', [
22
+ 'image_size', 'patch_size', 'emb_dim', 'mlp_dim', 'num_heads', 'num_layers',
23
+ 'num_classes', 'attn_dropout_rate', 'dropout_rate', 'resnet'
24
+ ])
25
+
26
+ # Set Params and BlockArgs's defaults
27
+ Params.__new__.__defaults__ = (None, ) * len(Params._fields)
28
+
29
+
30
+ def get_width_and_height_from_size(x):
31
+ """Obtain height and width from x.
32
+ Args:
33
+ x (int, tuple or list): Data size.
34
+ Returns:
35
+ size: A tuple or list (H,W).
36
+ """
37
+ if isinstance(x, int):
38
+ return x, x
39
+ if isinstance(x, list) or isinstance(x, tuple):
40
+ return x
41
+ else:
42
+ raise TypeError()
43
+
44
+
45
+ ################################################################################
46
+ ### Helper functions for loading model params
47
+ ################################################################################
48
+
49
+ # get_model_params and efficientnet:
50
+ # Functions to get BlockArgs and GlobalParams for efficientnet
51
+ # url_map and url_map_advprop: Dicts of url_map for pretrained weights
52
+ # load_pretrained_weights: A function to load pretrained weights
53
+
54
+
55
+ def vision_transformer(model_name):
56
+ """Create Params for vision transformer model.
57
+ Args:
58
+ model_name (str): Model name to be queried.
59
+ Returns:
60
+ Params(params_dict[model_name])
61
+ """
62
+
63
+ params_dict = {
64
+ 'ViT-B_16': (384, 16, 768, 3072, 12, 12, 1000, 0.0, 0.1, None),
65
+ 'ViT-B_32': (384, 32, 768, 3072, 12, 12, 1000, 0.0, 0.1, None),
66
+ 'ViT-L_16': (384, 16, 1024, 4096, 16, 24, 1000, 0.0, 0.1, None),
67
+ 'ViT-L_32': (384, 32, 1024, 4096, 16, 24, 1000, 0.0, 0.1, None),
68
+ 'R50+ViT-B_16': (384, 1, 768, 3072, 12, 12, 1000, 0.0, 0.1, resnet50),
69
+ }
70
+ image_size, patch_size, emb_dim, mlp_dim, num_heads, num_layers, num_classes, attn_dropout_rate, dropout_rate, resnet = params_dict[
71
+ model_name]
72
+ params = Params(image_size=image_size,
73
+ patch_size=patch_size,
74
+ emb_dim=emb_dim,
75
+ mlp_dim=mlp_dim,
76
+ num_heads=num_heads,
77
+ num_layers=num_layers,
78
+ num_classes=num_classes,
79
+ attn_dropout_rate=attn_dropout_rate,
80
+ dropout_rate=dropout_rate,
81
+ resnet=resnet)
82
+
83
+ return params
84
+
85
+
86
+ def get_model_params(model_name, override_params):
87
+ """Get the block args and global params for a given model name.
88
+ Args:
89
+ model_name (str): Model's name.
90
+ override_params (dict): A dict to modify params.
91
+ Returns:
92
+ params
93
+ """
94
+ params = vision_transformer(model_name)
95
+
96
+ if override_params:
97
+ # ValueError will be raised here if override_params has fields not included in params.
98
+ params = params._replace(**override_params)
99
+ return params
100
+
101
+
102
+ # train with Standard methods
103
+ # check more details in paper(An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale)
104
+ url_map = {
105
+ 'ViT-B_16':
106
+ 'https://github.com/tczhangzhi/VisionTransformer-PyTorch/releases/download/1.0.1/ViT-B_16_imagenet21k_imagenet2012.pth',
107
+ 'ViT-B_32':
108
+ 'https://github.com/tczhangzhi/VisionTransformer-PyTorch/releases/download/1.0.1/ViT-B_32_imagenet21k_imagenet2012.pth',
109
+ 'ViT-L_16':
110
+ 'https://github.com/tczhangzhi/VisionTransformer-PyTorch/releases/download/1.0.1/ViT-L_16_imagenet21k_imagenet2012.pth',
111
+ 'ViT-L_32':
112
+ 'https://github.com/tczhangzhi/VisionTransformer-PyTorch/releases/download/1.0.1/ViT-L_32_imagenet21k_imagenet2012.pth',
113
+ 'R50+ViT-B_16':
114
+ 'https://github.com/tczhangzhi/VisionTransformer-PyTorch/releases/download/1.0.1/R50+ViT-B_16_imagenet21k_imagenet2012.pth',
115
+ }
116
+
117
+
118
+ def load_pretrained_weights(model,
119
+ model_name,
120
+ weights_path=None,
121
+ load_fc=True,
122
+ advprop=False):
123
+ """Loads pretrained weights from weights path or download using url.
124
+ Args:
125
+ model (Module): The whole model of vision transformer.
126
+ model_name (str): Model name of vision transformer.
127
+ weights_path (None or str):
128
+ str: path to pretrained weights file on the local disk.
129
+ None: use pretrained weights downloaded from the Internet.
130
+ load_fc (bool): Whether to load pretrained weights for fc layer at the end of the model.
131
+ """
132
+ if isinstance(weights_path, str):
133
+ state_dict = torch.load(weights_path)
134
+ else:
135
+ state_dict = model_zoo.load_url(url_map[model_name])
136
+
137
+ if load_fc:
138
+ ret = model.load_state_dict(state_dict, strict=False)
139
+ assert not ret.missing_keys, 'Missing keys when loading pretrained weights: {}'.format(
140
+ ret.missing_keys)
141
+ else:
142
+ state_dict.pop('classifier.weight')
143
+ state_dict.pop('classifier.bias')
144
+ ret = model.load_state_dict(state_dict, strict=False)
145
+ assert set(ret.missing_keys) == set([
146
+ 'classifier.weight', 'classifier.bias'
147
+ ]), 'Missing keys when loading pretrained weights: {}'.format(
148
+ ret.missing_keys)
149
+ assert not ret.unexpected_keys, 'Missing keys when loading pretrained weights: {}'.format(
150
+ ret.unexpected_keys)
151
+
152
+ print('Loaded pretrained weights for {}'.format(model_name))