Spaces:
Sleeping
Sleeping
initial commit
Browse files- __init__.py +7 -0
- app.py +124 -0
- capture_weights.py +326 -0
- examples/carrier.jpg +0 -0
- examples/chicken.jpg +0 -0
- examples/eagle.jpg +0 -0
- examples/parachute.jpg +0 -0
- labels/imagenet1K_labels.txt +1000 -0
- labels/imagenet1k-simple-labels.json +1000 -0
- labels/imagenet21k_wordnet_lemmas.txt +0 -0
- model.py +359 -0
- pretrained_weights/ViT-B_16_imagenet21k_imagenet2012.pth +3 -0
- requirements.txt +3 -0
- resnet.py +164 -0
- utils.py +152 -0
__init__.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
__version__ = "1.0.3"
|
2 |
+
from .model import VisionTransformer, VALID_MODELS
|
3 |
+
from .utils import (
|
4 |
+
Params,
|
5 |
+
vision_transformer,
|
6 |
+
get_model_params,
|
7 |
+
)
|
app.py
ADDED
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import PIL
|
3 |
+
import ast
|
4 |
+
import cv2
|
5 |
+
import json
|
6 |
+
import torch
|
7 |
+
import pickle
|
8 |
+
import torchvision
|
9 |
+
import numpy as np
|
10 |
+
import gradio as gr
|
11 |
+
from PIL import Image
|
12 |
+
from typing import Tuple, Dict
|
13 |
+
import matplotlib.pyplot as plt
|
14 |
+
from timeit import default_timer as timer
|
15 |
+
from torchvision import datasets, transforms
|
16 |
+
|
17 |
+
import warnings
|
18 |
+
warnings.filterwarnings('ignore')
|
19 |
+
|
20 |
+
example_list = [["examples/" + example] for example in os.listdir("examples")]
|
21 |
+
|
22 |
+
with open('labels/imagenet1k-simple-labels.json') as f:
|
23 |
+
class_names = json.load(f)
|
24 |
+
|
25 |
+
from model import VisionTransformer
|
26 |
+
from capture_weights import vit_weights
|
27 |
+
|
28 |
+
vision_transformer = VisionTransformer.from_name('ViT-B_16', num_classes=1000)
|
29 |
+
model_weights = torch.load('pretrained_weights/ViT-B_16_imagenet21k_imagenet2012.pth',
|
30 |
+
map_location=torch.device('cpu'))
|
31 |
+
vision_transformer.load_state_dict(model_weights)
|
32 |
+
|
33 |
+
data_transforms = transforms.Compose([
|
34 |
+
transforms.Resize(size=(384, 384)),
|
35 |
+
transforms.ToTensor(),
|
36 |
+
transforms.Normalize(mean=[0.485, 0.456, 0.406],
|
37 |
+
std=[0.229, 0.224, 0.225],)])
|
38 |
+
|
39 |
+
def inv_normalize(tensor):
|
40 |
+
"""Normalize an image tensor back to the 0-255 range."""
|
41 |
+
tensor = (tensor - tensor.min()) / (tensor.max() - tensor.min()) * (256 - 1e-5)
|
42 |
+
return tensor
|
43 |
+
|
44 |
+
def inv_transform(tensor, normalize=True):
|
45 |
+
"""Convert a tensor back to an image."""
|
46 |
+
tensor = inv_normalize(tensor)
|
47 |
+
array = tensor.detach().cpu().numpy()
|
48 |
+
array = array.transpose(1, 2, 0).astype(np.uint8)
|
49 |
+
return PIL.Image.fromarray(array)
|
50 |
+
|
51 |
+
def predict_image(image) -> Tuple[Dict, float]:
|
52 |
+
"""Return prediction classes with probabilities for an input image."""
|
53 |
+
input_tensor = data_transforms(image)
|
54 |
+
start_time = timer()
|
55 |
+
prediction_dict = {}
|
56 |
+
with torch.inference_mode():
|
57 |
+
[logits] = vision_transformer(input_tensor[None])
|
58 |
+
probs = torch.softmax(logits, dim=0)
|
59 |
+
topk_prob, topk_id = torch.topk(probs, 3)
|
60 |
+
for i in range(topk_prob.size(0)):
|
61 |
+
prediction_dict[class_names[topk_id[i]]] = topk_prob[i].item()
|
62 |
+
prediction_time = round(timer() - start_time, 5)
|
63 |
+
return prediction_dict, prediction_time
|
64 |
+
|
65 |
+
def get_attention_map(img, num_layer=5, get_mask=False):
|
66 |
+
x = data_transforms(img)
|
67 |
+
logits, att_mat = vit_weights(x.unsqueeze(0))
|
68 |
+
|
69 |
+
att_mat = torch.stack(att_mat).squeeze(1)
|
70 |
+
# Take the mean of the attention weights across 12 heads
|
71 |
+
att_mat = torch.mean(att_mat, dim=1)
|
72 |
+
|
73 |
+
# To account for residual connections, we add an identity matrix to the
|
74 |
+
# attention matrix and re-normalize the weights.
|
75 |
+
residual_att = torch.eye(att_mat.size(1))
|
76 |
+
aug_att_mat = att_mat + residual_att
|
77 |
+
aug_att_mat = aug_att_mat / aug_att_mat.sum(dim=-1).unsqueeze(-1)
|
78 |
+
|
79 |
+
# Recursively multiply the weight matrices
|
80 |
+
joint_attentions = torch.zeros(aug_att_mat.size())
|
81 |
+
joint_attentions[0] = aug_att_mat[0]
|
82 |
+
|
83 |
+
for n in range(1, aug_att_mat.size(0)):
|
84 |
+
joint_attentions[n] = torch.matmul(aug_att_mat[n], joint_attentions[n-1])
|
85 |
+
|
86 |
+
v = joint_attentions[num_layer]
|
87 |
+
grid_size = int(np.sqrt(aug_att_mat.size(-1)))
|
88 |
+
mask = v[0, 1:].reshape(grid_size, grid_size).detach().numpy()
|
89 |
+
if get_mask:
|
90 |
+
attn_map = cv2.resize(mask / mask.max(), img.size)
|
91 |
+
else:
|
92 |
+
mask = cv2.resize(mask / mask.max(), img.size)[..., np.newaxis]
|
93 |
+
attn_map = (mask * img).astype("uint8")
|
94 |
+
return attn_map
|
95 |
+
|
96 |
+
attention_interface = gr.Interface(
|
97 |
+
fn=get_attention_map,
|
98 |
+
inputs=[gr.Image(type="pil", label="Image"),
|
99 |
+
gr.Dropdown(choices=["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
|
100 |
+
label="Attention Layer", value="6", type="index"),
|
101 |
+
gr.Checkbox(label="Show Mask?")],
|
102 |
+
outputs=gr.Image(type="pil", label="Attention Map").style(height=400),
|
103 |
+
examples=example_list,
|
104 |
+
title="Attention Maps 🔍",
|
105 |
+
description="The ViT Base architecture has 12 transformer Encoder layers (12 attention heads in each).",
|
106 |
+
article="From the dropdown menu, select the Encoder layer (tick the checkbox to visualize only the mask)."
|
107 |
+
)
|
108 |
+
|
109 |
+
classification_interface = gr.Interface(
|
110 |
+
fn=predict_image,
|
111 |
+
inputs=gr.Image(type="pil", label="Image"),
|
112 |
+
outputs=[gr.Label(num_top_classes=3, label="Predictions"),
|
113 |
+
gr.Number(label="Prediction time (secs)")],
|
114 |
+
examples=example_list,
|
115 |
+
title="Object Identification ✅",
|
116 |
+
description="ImageNet object identification using pretrained ViT Base (Patch Size: 16 | Image Size: 384) architecture.",
|
117 |
+
article="Upload an image from the example list or choose one of your own [[ImageNet Classes](https://github.com/anishathalye/imagenet-simple-labels/blob/master/imagenet-simple-labels.json)]."
|
118 |
+
)
|
119 |
+
|
120 |
+
demo = gr.TabbedInterface([attention_interface, classification_interface],
|
121 |
+
["Visualize Attention Maps", "Image Prediction"], title="ImageNet 1K 📷")
|
122 |
+
|
123 |
+
if __name__ == "__main__":
|
124 |
+
demo.launch()
|
capture_weights.py
ADDED
@@ -0,0 +1,326 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import numpy as np
|
3 |
+
import torch.nn as nn
|
4 |
+
import torch.nn.functional as F
|
5 |
+
|
6 |
+
from torch.cuda.amp import autocast
|
7 |
+
|
8 |
+
from utils import (get_width_and_height_from_size, load_pretrained_weights, get_model_params)
|
9 |
+
|
10 |
+
VALID_MODELS = ('ViT-B_16', 'ViT-B_32', 'ViT-L_16', 'ViT-L_32')
|
11 |
+
|
12 |
+
class PositionEmbs(nn.Module):
|
13 |
+
def __init__(self, num_patches, emb_dim, dropout_rate=0.1):
|
14 |
+
super(PositionEmbs, self).__init__()
|
15 |
+
self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, emb_dim))
|
16 |
+
if dropout_rate > 0:
|
17 |
+
self.dropout = nn.Dropout(dropout_rate)
|
18 |
+
else:
|
19 |
+
self.dropout = None
|
20 |
+
|
21 |
+
@autocast()
|
22 |
+
def forward(self, x):
|
23 |
+
out = x + self.pos_embedding
|
24 |
+
|
25 |
+
if self.dropout:
|
26 |
+
out = self.dropout(out)
|
27 |
+
|
28 |
+
return out
|
29 |
+
|
30 |
+
class MlpBlock(nn.Module):
|
31 |
+
""" Transformer Feed-Forward Block """
|
32 |
+
def __init__(self, in_dim, mlp_dim, out_dim, dropout_rate=0.1):
|
33 |
+
super(MlpBlock, self).__init__()
|
34 |
+
|
35 |
+
# init layers
|
36 |
+
self.fc1 = nn.Linear(in_dim, mlp_dim)
|
37 |
+
self.fc2 = nn.Linear(mlp_dim, out_dim)
|
38 |
+
self.act = nn.GELU()
|
39 |
+
if dropout_rate > 0.0:
|
40 |
+
self.dropout1 = nn.Dropout(dropout_rate)
|
41 |
+
self.dropout2 = nn.Dropout(dropout_rate)
|
42 |
+
else:
|
43 |
+
self.dropout1 = None
|
44 |
+
self.dropout2 = None
|
45 |
+
|
46 |
+
@autocast()
|
47 |
+
def forward(self, x):
|
48 |
+
|
49 |
+
out = self.fc1(x)
|
50 |
+
out = self.act(out)
|
51 |
+
if self.dropout1:
|
52 |
+
out = self.dropout1(out)
|
53 |
+
|
54 |
+
out = self.fc2(out)
|
55 |
+
out = self.dropout2(out)
|
56 |
+
return out
|
57 |
+
|
58 |
+
|
59 |
+
class LinearGeneral(nn.Module):
|
60 |
+
def __init__(self, in_dim=(768, ), feat_dim=(12, 64)):
|
61 |
+
super(LinearGeneral, self).__init__()
|
62 |
+
|
63 |
+
self.weight = nn.Parameter(torch.randn(*in_dim, *feat_dim))
|
64 |
+
self.bias = nn.Parameter(torch.zeros(*feat_dim))
|
65 |
+
|
66 |
+
@autocast()
|
67 |
+
def forward(self, x, dims):
|
68 |
+
a = torch.tensordot(x, self.weight, dims=dims) + self.bias
|
69 |
+
return a
|
70 |
+
|
71 |
+
|
72 |
+
class SelfAttention(nn.Module):
|
73 |
+
def __init__(self, in_dim, heads=8, dropout_rate=0.1):
|
74 |
+
super(SelfAttention, self).__init__()
|
75 |
+
self.heads = heads
|
76 |
+
self.head_dim = in_dim // heads
|
77 |
+
self.scale = self.head_dim**0.5
|
78 |
+
|
79 |
+
self.query = LinearGeneral((in_dim, ), (self.heads, self.head_dim))
|
80 |
+
self.key = LinearGeneral((in_dim, ), (self.heads, self.head_dim))
|
81 |
+
self.value = LinearGeneral((in_dim, ), (self.heads, self.head_dim))
|
82 |
+
self.out = LinearGeneral((self.heads, self.head_dim), (in_dim, ))
|
83 |
+
|
84 |
+
if dropout_rate > 0:
|
85 |
+
self.dropout = nn.Dropout(dropout_rate)
|
86 |
+
else:
|
87 |
+
self.dropout = None
|
88 |
+
|
89 |
+
@autocast()
|
90 |
+
def forward(self, x):
|
91 |
+
b, n, _ = x.shape
|
92 |
+
|
93 |
+
q = self.query(x, dims=([2], [0]))
|
94 |
+
k = self.key(x, dims=([2], [0]))
|
95 |
+
v = self.value(x, dims=([2], [0]))
|
96 |
+
|
97 |
+
q = q.permute(0, 2, 1, 3)
|
98 |
+
k = k.permute(0, 2, 1, 3)
|
99 |
+
v = v.permute(0, 2, 1, 3)
|
100 |
+
|
101 |
+
attn_weights = torch.matmul(q, k.transpose(-2, -1)) / self.scale
|
102 |
+
attn_weights = F.softmax(attn_weights, dim=-1)
|
103 |
+
out = torch.matmul(attn_weights, v)
|
104 |
+
out = out.permute(0, 2, 1, 3)
|
105 |
+
|
106 |
+
out = self.out(out, dims=([2, 3], [0, 1]))
|
107 |
+
|
108 |
+
return out, attn_weights
|
109 |
+
|
110 |
+
|
111 |
+
class EncoderBlock(nn.Module):
|
112 |
+
def __init__(self, in_dim, mlp_dim, num_heads, dropout_rate=0.1, attn_dropout_rate=0.1):
|
113 |
+
super(EncoderBlock, self).__init__()
|
114 |
+
|
115 |
+
self.norm1 = nn.LayerNorm(in_dim)
|
116 |
+
self.attn = SelfAttention(in_dim, heads=num_heads, dropout_rate=attn_dropout_rate)
|
117 |
+
if dropout_rate > 0:
|
118 |
+
self.dropout = nn.Dropout(dropout_rate)
|
119 |
+
else:
|
120 |
+
self.dropout = None
|
121 |
+
self.norm2 = nn.LayerNorm(in_dim)
|
122 |
+
self.mlp = MlpBlock(in_dim, mlp_dim, in_dim, dropout_rate)
|
123 |
+
|
124 |
+
@autocast()
|
125 |
+
def forward(self, x):
|
126 |
+
residual = x
|
127 |
+
out = self.norm1(x)
|
128 |
+
out, attn_weights = self.attn(out)
|
129 |
+
if self.dropout:
|
130 |
+
out = self.dropout(out)
|
131 |
+
out += residual
|
132 |
+
residual = out
|
133 |
+
|
134 |
+
out = self.norm2(out)
|
135 |
+
out = self.mlp(out)
|
136 |
+
out += residual
|
137 |
+
return out, attn_weights
|
138 |
+
|
139 |
+
|
140 |
+
class Encoder(nn.Module):
|
141 |
+
def __init__(self,
|
142 |
+
num_patches,
|
143 |
+
emb_dim,
|
144 |
+
mlp_dim,
|
145 |
+
num_layers=12,
|
146 |
+
num_heads=12,
|
147 |
+
dropout_rate=0.1,
|
148 |
+
attn_dropout_rate=0.0):
|
149 |
+
super(Encoder, self).__init__()
|
150 |
+
|
151 |
+
# positional embedding
|
152 |
+
self.pos_embedding = PositionEmbs(num_patches, emb_dim, dropout_rate)
|
153 |
+
|
154 |
+
# encoder blocks
|
155 |
+
in_dim = emb_dim
|
156 |
+
self.encoder_layers = nn.ModuleList()
|
157 |
+
for i in range(num_layers):
|
158 |
+
layer = EncoderBlock(in_dim, mlp_dim, num_heads, dropout_rate, attn_dropout_rate)
|
159 |
+
self.encoder_layers.append(layer)
|
160 |
+
self.norm = nn.LayerNorm(in_dim)
|
161 |
+
|
162 |
+
@autocast()
|
163 |
+
def forward(self, x):
|
164 |
+
attn_weights = []
|
165 |
+
out = self.pos_embedding(x)
|
166 |
+
|
167 |
+
for layer in self.encoder_layers:
|
168 |
+
out, weights = layer(out)
|
169 |
+
attn_weights.append(weights)
|
170 |
+
|
171 |
+
out = self.norm(out)
|
172 |
+
return out, attn_weights
|
173 |
+
|
174 |
+
|
175 |
+
class VisionTransformer(nn.Module):
|
176 |
+
""" Vision Transformer.
|
177 |
+
Most easily loaded with the .from_name or .from_pretrained methods.
|
178 |
+
Args:
|
179 |
+
params (namedtuple): A set of Params.
|
180 |
+
References:
|
181 |
+
[1] https://arxiv.org/abs/2010.11929 (An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale)
|
182 |
+
Example:
|
183 |
+
|
184 |
+
|
185 |
+
import torch
|
186 |
+
>>> from vision_transformer_pytorch import VisionTransformer
|
187 |
+
>>> inputs = torch.rand(1, 3, 256, 256)
|
188 |
+
>>> model = VisionTransformer.from_pretrained('ViT-B_16')
|
189 |
+
>>> model.eval()
|
190 |
+
>>> outputs = model(inputs)
|
191 |
+
"""
|
192 |
+
def __init__(self, params=None):
|
193 |
+
super(VisionTransformer, self).__init__()
|
194 |
+
self._params = params
|
195 |
+
|
196 |
+
self.embedding = nn.Conv2d(3, self._params.emb_dim, kernel_size=self.patch_size, stride=self.patch_size)
|
197 |
+
# class token
|
198 |
+
self.cls_token = nn.Parameter(torch.zeros(1, 1, self._params.emb_dim))
|
199 |
+
|
200 |
+
# transformer
|
201 |
+
self.transformer = Encoder(num_patches=self.num_patches,
|
202 |
+
emb_dim=self._params.emb_dim,
|
203 |
+
mlp_dim=self._params.mlp_dim,
|
204 |
+
num_layers=self._params.num_layers,
|
205 |
+
num_heads=self._params.num_heads,
|
206 |
+
dropout_rate=self._params.dropout_rate,
|
207 |
+
attn_dropout_rate=self._params.attn_dropout_rate)
|
208 |
+
|
209 |
+
# classfier
|
210 |
+
self.classifier = nn.Linear(self._params.emb_dim, self._params.num_classes)
|
211 |
+
|
212 |
+
@property
|
213 |
+
def image_size(self):
|
214 |
+
return get_width_and_height_from_size(self._params.image_size)
|
215 |
+
|
216 |
+
@property
|
217 |
+
def patch_size(self):
|
218 |
+
return get_width_and_height_from_size(self._params.patch_size)
|
219 |
+
|
220 |
+
@property
|
221 |
+
def num_patches(self):
|
222 |
+
h, w = self.image_size
|
223 |
+
fh, fw = self.patch_size
|
224 |
+
gh, gw = h // fh, w // fw
|
225 |
+
return gh * gw
|
226 |
+
|
227 |
+
@autocast()
|
228 |
+
def extract_features(self, x):
|
229 |
+
emb = self.embedding(x) # (n, c, gh, gw)
|
230 |
+
emb = emb.permute(0, 2, 3, 1) # (n, gh, hw, c)
|
231 |
+
b, h, w, c = emb.shape
|
232 |
+
emb = emb.reshape(b, h * w, c)
|
233 |
+
|
234 |
+
# prepend class token
|
235 |
+
cls_token = self.cls_token.repeat(b, 1, 1)
|
236 |
+
emb = torch.cat([cls_token, emb], dim=1)
|
237 |
+
|
238 |
+
# transformer
|
239 |
+
feat, attn_weights = self.transformer(emb)
|
240 |
+
return feat, attn_weights
|
241 |
+
|
242 |
+
@autocast()
|
243 |
+
def forward(self, x):
|
244 |
+
feat, attn_weights = self.extract_features(x)
|
245 |
+
|
246 |
+
# classifier
|
247 |
+
logits = self.classifier(feat[:, 0])
|
248 |
+
return logits, attn_weights
|
249 |
+
|
250 |
+
@classmethod
|
251 |
+
def from_name(cls, model_name, in_channels=3, **override_params):
|
252 |
+
"""create an vision transformer model according to name.
|
253 |
+
Args:
|
254 |
+
model_name (str): Name for vision transformer.
|
255 |
+
in_channels (int): Input data's channel number.
|
256 |
+
override_params (other key word params):
|
257 |
+
Params to override model's global_params.
|
258 |
+
Optional key:
|
259 |
+
'image_size', 'patch_size',
|
260 |
+
'emb_dim', 'mlp_dim',
|
261 |
+
'num_heads', 'num_layers',
|
262 |
+
'num_classes', 'attn_dropout_rate',
|
263 |
+
'dropout_rate'
|
264 |
+
Returns:
|
265 |
+
An vision transformer model.
|
266 |
+
"""
|
267 |
+
cls._check_model_name_is_valid(model_name)
|
268 |
+
params = get_model_params(model_name, override_params)
|
269 |
+
model = cls(params)
|
270 |
+
model._change_in_channels(in_channels)
|
271 |
+
return model
|
272 |
+
|
273 |
+
@classmethod
|
274 |
+
def from_pretrained(cls, model_name, weights_path=None, in_channels=3, num_classes=1000, **override_params):
|
275 |
+
"""create an vision transformer model according to name.
|
276 |
+
Args:
|
277 |
+
model_name (str): Name for vision transformer.
|
278 |
+
weights_path (None or str):
|
279 |
+
str: path to pretrained weights file on the local disk.
|
280 |
+
None: use pretrained weights downloaded from the Internet.
|
281 |
+
in_channels (int): Input data's channel number.
|
282 |
+
num_classes (int):
|
283 |
+
Number of categories for classification.
|
284 |
+
It controls the output size for final linear layer.
|
285 |
+
override_params (other key word params):
|
286 |
+
Params to override model's global_params.
|
287 |
+
Optional key:
|
288 |
+
'image_size', 'patch_size',
|
289 |
+
'emb_dim', 'mlp_dim',
|
290 |
+
'num_heads', 'num_layers',
|
291 |
+
'num_classes', 'attn_dropout_rate',
|
292 |
+
'dropout_rate'
|
293 |
+
Returns:
|
294 |
+
A pretrained vision transformer model.
|
295 |
+
"""
|
296 |
+
model = cls.from_name(model_name, num_classes=num_classes, **override_params)
|
297 |
+
load_pretrained_weights(model, model_name, weights_path=weights_path, load_fc=(num_classes == 1000))
|
298 |
+
model._change_in_channels(in_channels)
|
299 |
+
return model
|
300 |
+
|
301 |
+
@classmethod
|
302 |
+
def _check_model_name_is_valid(cls, model_name):
|
303 |
+
"""Validates model name.
|
304 |
+
Args:
|
305 |
+
model_name (str): Name for vision transformer.
|
306 |
+
Returns:
|
307 |
+
bool: Is a valid name or not.
|
308 |
+
"""
|
309 |
+
if model_name not in VALID_MODELS:
|
310 |
+
raise ValueError('model_name should be one of: ' + ', '.join(VALID_MODELS))
|
311 |
+
|
312 |
+
def _change_in_channels(self, in_channels):
|
313 |
+
"""Adjust model's first convolution layer to in_channels, if in_channels not equals 3.
|
314 |
+
Args:
|
315 |
+
in_channels (int): Input data's channel number.
|
316 |
+
"""
|
317 |
+
if in_channels != 3:
|
318 |
+
self.embedding = nn.Conv2d(in_channels,
|
319 |
+
self._params.emb_dim,
|
320 |
+
kernel_size=self.patch_size,
|
321 |
+
stride=self.patch_size)
|
322 |
+
|
323 |
+
vit_weights = VisionTransformer.from_name('ViT-B_16', num_classes=1000)
|
324 |
+
model_weights = torch.load('pretrained_weights/ViT-B_16_imagenet21k_imagenet2012.pth',
|
325 |
+
map_location=torch.device('cpu'))
|
326 |
+
vit_weights.load_state_dict(model_weights)
|
examples/carrier.jpg
ADDED
examples/chicken.jpg
ADDED
examples/eagle.jpg
ADDED
examples/parachute.jpg
ADDED
labels/imagenet1K_labels.txt
ADDED
@@ -0,0 +1,1000 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{0: 'tench, Tinca tinca',
|
2 |
+
1: 'goldfish, Carassius auratus',
|
3 |
+
2: 'great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias',
|
4 |
+
3: 'tiger shark, Galeocerdo cuvieri',
|
5 |
+
4: 'hammerhead, hammerhead shark',
|
6 |
+
5: 'electric ray, crampfish, numbfish, torpedo',
|
7 |
+
6: 'stingray',
|
8 |
+
7: 'cock',
|
9 |
+
8: 'hen',
|
10 |
+
9: 'ostrich, Struthio camelus',
|
11 |
+
10: 'brambling, Fringilla montifringilla',
|
12 |
+
11: 'goldfinch, Carduelis carduelis',
|
13 |
+
12: 'house finch, linnet, Carpodacus mexicanus',
|
14 |
+
13: 'junco, snowbird',
|
15 |
+
14: 'indigo bunting, indigo finch, indigo bird, Passerina cyanea',
|
16 |
+
15: 'robin, American robin, Turdus migratorius',
|
17 |
+
16: 'bulbul',
|
18 |
+
17: 'jay',
|
19 |
+
18: 'magpie',
|
20 |
+
19: 'chickadee',
|
21 |
+
20: 'water ouzel, dipper',
|
22 |
+
21: 'kite',
|
23 |
+
22: 'bald eagle, American eagle, Haliaeetus leucocephalus',
|
24 |
+
23: 'vulture',
|
25 |
+
24: 'great grey owl, great gray owl, Strix nebulosa',
|
26 |
+
25: 'European fire salamander, Salamandra salamandra',
|
27 |
+
26: 'common newt, Triturus vulgaris',
|
28 |
+
27: 'eft',
|
29 |
+
28: 'spotted salamander, Ambystoma maculatum',
|
30 |
+
29: 'axolotl, mud puppy, Ambystoma mexicanum',
|
31 |
+
30: 'bullfrog, Rana catesbeiana',
|
32 |
+
31: 'tree frog, tree-frog',
|
33 |
+
32: 'tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui',
|
34 |
+
33: 'loggerhead, loggerhead turtle, Caretta caretta',
|
35 |
+
34: 'leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea',
|
36 |
+
35: 'mud turtle',
|
37 |
+
36: 'terrapin',
|
38 |
+
37: 'box turtle, box tortoise',
|
39 |
+
38: 'banded gecko',
|
40 |
+
39: 'common iguana, iguana, Iguana iguana',
|
41 |
+
40: 'American chameleon, anole, Anolis carolinensis',
|
42 |
+
41: 'whiptail, whiptail lizard',
|
43 |
+
42: 'agama',
|
44 |
+
43: 'frilled lizard, Chlamydosaurus kingi',
|
45 |
+
44: 'alligator lizard',
|
46 |
+
45: 'Gila monster, Heloderma suspectum',
|
47 |
+
46: 'green lizard, Lacerta viridis',
|
48 |
+
47: 'African chameleon, Chamaeleo chamaeleon',
|
49 |
+
48: 'Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis',
|
50 |
+
49: 'African crocodile, Nile crocodile, Crocodylus niloticus',
|
51 |
+
50: 'American alligator, Alligator mississipiensis',
|
52 |
+
51: 'triceratops',
|
53 |
+
52: 'thunder snake, worm snake, Carphophis amoenus',
|
54 |
+
53: 'ringneck snake, ring-necked snake, ring snake',
|
55 |
+
54: 'hognose snake, puff adder, sand viper',
|
56 |
+
55: 'green snake, grass snake',
|
57 |
+
56: 'king snake, kingsnake',
|
58 |
+
57: 'garter snake, grass snake',
|
59 |
+
58: 'water snake',
|
60 |
+
59: 'vine snake',
|
61 |
+
60: 'night snake, Hypsiglena torquata',
|
62 |
+
61: 'boa constrictor, Constrictor constrictor',
|
63 |
+
62: 'rock python, rock snake, Python sebae',
|
64 |
+
63: 'Indian cobra, Naja naja',
|
65 |
+
64: 'green mamba',
|
66 |
+
65: 'sea snake',
|
67 |
+
66: 'horned viper, cerastes, sand viper, horned asp, Cerastes cornutus',
|
68 |
+
67: 'diamondback, diamondback rattlesnake, Crotalus adamanteus',
|
69 |
+
68: 'sidewinder, horned rattlesnake, Crotalus cerastes',
|
70 |
+
69: 'trilobite',
|
71 |
+
70: 'harvestman, daddy longlegs, Phalangium opilio',
|
72 |
+
71: 'scorpion',
|
73 |
+
72: 'black and gold garden spider, Argiope aurantia',
|
74 |
+
73: 'barn spider, Araneus cavaticus',
|
75 |
+
74: 'garden spider, Aranea diademata',
|
76 |
+
75: 'black widow, Latrodectus mactans',
|
77 |
+
76: 'tarantula',
|
78 |
+
77: 'wolf spider, hunting spider',
|
79 |
+
78: 'tick',
|
80 |
+
79: 'centipede',
|
81 |
+
80: 'black grouse',
|
82 |
+
81: 'ptarmigan',
|
83 |
+
82: 'ruffed grouse, partridge, Bonasa umbellus',
|
84 |
+
83: 'prairie chicken, prairie grouse, prairie fowl',
|
85 |
+
84: 'peacock',
|
86 |
+
85: 'quail',
|
87 |
+
86: 'partridge',
|
88 |
+
87: 'African grey, African gray, Psittacus erithacus',
|
89 |
+
88: 'macaw',
|
90 |
+
89: 'sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita',
|
91 |
+
90: 'lorikeet',
|
92 |
+
91: 'coucal',
|
93 |
+
92: 'bee eater',
|
94 |
+
93: 'hornbill',
|
95 |
+
94: 'hummingbird',
|
96 |
+
95: 'jacamar',
|
97 |
+
96: 'toucan',
|
98 |
+
97: 'drake',
|
99 |
+
98: 'red-breasted merganser, Mergus serrator',
|
100 |
+
99: 'goose',
|
101 |
+
100: 'black swan, Cygnus atratus',
|
102 |
+
101: 'tusker',
|
103 |
+
102: 'echidna, spiny anteater, anteater',
|
104 |
+
103: 'platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus',
|
105 |
+
104: 'wallaby, brush kangaroo',
|
106 |
+
105: 'koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus',
|
107 |
+
106: 'wombat',
|
108 |
+
107: 'jellyfish',
|
109 |
+
108: 'sea anemone, anemone',
|
110 |
+
109: 'brain coral',
|
111 |
+
110: 'flatworm, platyhelminth',
|
112 |
+
111: 'nematode, nematode worm, roundworm',
|
113 |
+
112: 'conch',
|
114 |
+
113: 'snail',
|
115 |
+
114: 'slug',
|
116 |
+
115: 'sea slug, nudibranch',
|
117 |
+
116: 'chiton, coat-of-mail shell, sea cradle, polyplacophore',
|
118 |
+
117: 'chambered nautilus, pearly nautilus, nautilus',
|
119 |
+
118: 'Dungeness crab, Cancer magister',
|
120 |
+
119: 'rock crab, Cancer irroratus',
|
121 |
+
120: 'fiddler crab',
|
122 |
+
121: 'king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica',
|
123 |
+
122: 'American lobster, Northern lobster, Maine lobster, Homarus americanus',
|
124 |
+
123: 'spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish',
|
125 |
+
124: 'crayfish, crawfish, crawdad, crawdaddy',
|
126 |
+
125: 'hermit crab',
|
127 |
+
126: 'isopod',
|
128 |
+
127: 'white stork, Ciconia ciconia',
|
129 |
+
128: 'black stork, Ciconia nigra',
|
130 |
+
129: 'spoonbill',
|
131 |
+
130: 'flamingo',
|
132 |
+
131: 'little blue heron, Egretta caerulea',
|
133 |
+
132: 'American egret, great white heron, Egretta albus',
|
134 |
+
133: 'bittern',
|
135 |
+
134: 'crane',
|
136 |
+
135: 'limpkin, Aramus pictus',
|
137 |
+
136: 'European gallinule, Porphyrio porphyrio',
|
138 |
+
137: 'American coot, marsh hen, mud hen, water hen, Fulica americana',
|
139 |
+
138: 'bustard',
|
140 |
+
139: 'ruddy turnstone, Arenaria interpres',
|
141 |
+
140: 'red-backed sandpiper, dunlin, Erolia alpina',
|
142 |
+
141: 'redshank, Tringa totanus',
|
143 |
+
142: 'dowitcher',
|
144 |
+
143: 'oystercatcher, oyster catcher',
|
145 |
+
144: 'pelican',
|
146 |
+
145: 'king penguin, Aptenodytes patagonica',
|
147 |
+
146: 'albatross, mollymawk',
|
148 |
+
147: 'grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus',
|
149 |
+
148: 'killer whale, killer, orca, grampus, sea wolf, Orcinus orca',
|
150 |
+
149: 'dugong, Dugong dugon',
|
151 |
+
150: 'sea lion',
|
152 |
+
151: 'Chihuahua',
|
153 |
+
152: 'Japanese spaniel',
|
154 |
+
153: 'Maltese dog, Maltese terrier, Maltese',
|
155 |
+
154: 'Pekinese, Pekingese, Peke',
|
156 |
+
155: 'Shih-Tzu',
|
157 |
+
156: 'Blenheim spaniel',
|
158 |
+
157: 'papillon',
|
159 |
+
158: 'toy terrier',
|
160 |
+
159: 'Rhodesian ridgeback',
|
161 |
+
160: 'Afghan hound, Afghan',
|
162 |
+
161: 'basset, basset hound',
|
163 |
+
162: 'beagle',
|
164 |
+
163: 'bloodhound, sleuthhound',
|
165 |
+
164: 'bluetick',
|
166 |
+
165: 'black-and-tan coonhound',
|
167 |
+
166: 'Walker hound, Walker foxhound',
|
168 |
+
167: 'English foxhound',
|
169 |
+
168: 'redbone',
|
170 |
+
169: 'borzoi, Russian wolfhound',
|
171 |
+
170: 'Irish wolfhound',
|
172 |
+
171: 'Italian greyhound',
|
173 |
+
172: 'whippet',
|
174 |
+
173: 'Ibizan hound, Ibizan Podenco',
|
175 |
+
174: 'Norwegian elkhound, elkhound',
|
176 |
+
175: 'otterhound, otter hound',
|
177 |
+
176: 'Saluki, gazelle hound',
|
178 |
+
177: 'Scottish deerhound, deerhound',
|
179 |
+
178: 'Weimaraner',
|
180 |
+
179: 'Staffordshire bullterrier, Staffordshire bull terrier',
|
181 |
+
180: 'American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier',
|
182 |
+
181: 'Bedlington terrier',
|
183 |
+
182: 'Border terrier',
|
184 |
+
183: 'Kerry blue terrier',
|
185 |
+
184: 'Irish terrier',
|
186 |
+
185: 'Norfolk terrier',
|
187 |
+
186: 'Norwich terrier',
|
188 |
+
187: 'Yorkshire terrier',
|
189 |
+
188: 'wire-haired fox terrier',
|
190 |
+
189: 'Lakeland terrier',
|
191 |
+
190: 'Sealyham terrier, Sealyham',
|
192 |
+
191: 'Airedale, Airedale terrier',
|
193 |
+
192: 'cairn, cairn terrier',
|
194 |
+
193: 'Australian terrier',
|
195 |
+
194: 'Dandie Dinmont, Dandie Dinmont terrier',
|
196 |
+
195: 'Boston bull, Boston terrier',
|
197 |
+
196: 'miniature schnauzer',
|
198 |
+
197: 'giant schnauzer',
|
199 |
+
198: 'standard schnauzer',
|
200 |
+
199: 'Scotch terrier, Scottish terrier, Scottie',
|
201 |
+
200: 'Tibetan terrier, chrysanthemum dog',
|
202 |
+
201: 'silky terrier, Sydney silky',
|
203 |
+
202: 'soft-coated wheaten terrier',
|
204 |
+
203: 'West Highland white terrier',
|
205 |
+
204: 'Lhasa, Lhasa apso',
|
206 |
+
205: 'flat-coated retriever',
|
207 |
+
206: 'curly-coated retriever',
|
208 |
+
207: 'golden retriever',
|
209 |
+
208: 'Labrador retriever',
|
210 |
+
209: 'Chesapeake Bay retriever',
|
211 |
+
210: 'German short-haired pointer',
|
212 |
+
211: 'vizsla, Hungarian pointer',
|
213 |
+
212: 'English setter',
|
214 |
+
213: 'Irish setter, red setter',
|
215 |
+
214: 'Gordon setter',
|
216 |
+
215: 'Brittany spaniel',
|
217 |
+
216: 'clumber, clumber spaniel',
|
218 |
+
217: 'English springer, English springer spaniel',
|
219 |
+
218: 'Welsh springer spaniel',
|
220 |
+
219: 'cocker spaniel, English cocker spaniel, cocker',
|
221 |
+
220: 'Sussex spaniel',
|
222 |
+
221: 'Irish water spaniel',
|
223 |
+
222: 'kuvasz',
|
224 |
+
223: 'schipperke',
|
225 |
+
224: 'groenendael',
|
226 |
+
225: 'malinois',
|
227 |
+
226: 'briard',
|
228 |
+
227: 'kelpie',
|
229 |
+
228: 'komondor',
|
230 |
+
229: 'Old English sheepdog, bobtail',
|
231 |
+
230: 'Shetland sheepdog, Shetland sheep dog, Shetland',
|
232 |
+
231: 'collie',
|
233 |
+
232: 'Border collie',
|
234 |
+
233: 'Bouvier des Flandres, Bouviers des Flandres',
|
235 |
+
234: 'Rottweiler',
|
236 |
+
235: 'German shepherd, German shepherd dog, German police dog, alsatian',
|
237 |
+
236: 'Doberman, Doberman pinscher',
|
238 |
+
237: 'miniature pinscher',
|
239 |
+
238: 'Greater Swiss Mountain dog',
|
240 |
+
239: 'Bernese mountain dog',
|
241 |
+
240: 'Appenzeller',
|
242 |
+
241: 'EntleBucher',
|
243 |
+
242: 'boxer',
|
244 |
+
243: 'bull mastiff',
|
245 |
+
244: 'Tibetan mastiff',
|
246 |
+
245: 'French bulldog',
|
247 |
+
246: 'Great Dane',
|
248 |
+
247: 'Saint Bernard, St Bernard',
|
249 |
+
248: 'Eskimo dog, husky',
|
250 |
+
249: 'malamute, malemute, Alaskan malamute',
|
251 |
+
250: 'Siberian husky',
|
252 |
+
251: 'dalmatian, coach dog, carriage dog',
|
253 |
+
252: 'affenpinscher, monkey pinscher, monkey dog',
|
254 |
+
253: 'basenji',
|
255 |
+
254: 'pug, pug-dog',
|
256 |
+
255: 'Leonberg',
|
257 |
+
256: 'Newfoundland, Newfoundland dog',
|
258 |
+
257: 'Great Pyrenees',
|
259 |
+
258: 'Samoyed, Samoyede',
|
260 |
+
259: 'Pomeranian',
|
261 |
+
260: 'chow, chow chow',
|
262 |
+
261: 'keeshond',
|
263 |
+
262: 'Brabancon griffon',
|
264 |
+
263: 'Pembroke, Pembroke Welsh corgi',
|
265 |
+
264: 'Cardigan, Cardigan Welsh corgi',
|
266 |
+
265: 'toy poodle',
|
267 |
+
266: 'miniature poodle',
|
268 |
+
267: 'standard poodle',
|
269 |
+
268: 'Mexican hairless',
|
270 |
+
269: 'timber wolf, grey wolf, gray wolf, Canis lupus',
|
271 |
+
270: 'white wolf, Arctic wolf, Canis lupus tundrarum',
|
272 |
+
271: 'red wolf, maned wolf, Canis rufus, Canis niger',
|
273 |
+
272: 'coyote, prairie wolf, brush wolf, Canis latrans',
|
274 |
+
273: 'dingo, warrigal, warragal, Canis dingo',
|
275 |
+
274: 'dhole, Cuon alpinus',
|
276 |
+
275: 'African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus',
|
277 |
+
276: 'hyena, hyaena',
|
278 |
+
277: 'red fox, Vulpes vulpes',
|
279 |
+
278: 'kit fox, Vulpes macrotis',
|
280 |
+
279: 'Arctic fox, white fox, Alopex lagopus',
|
281 |
+
280: 'grey fox, gray fox, Urocyon cinereoargenteus',
|
282 |
+
281: 'tabby, tabby cat',
|
283 |
+
282: 'tiger cat',
|
284 |
+
283: 'Persian cat',
|
285 |
+
284: 'Siamese cat, Siamese',
|
286 |
+
285: 'Egyptian cat',
|
287 |
+
286: 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor',
|
288 |
+
287: 'lynx, catamount',
|
289 |
+
288: 'leopard, Panthera pardus',
|
290 |
+
289: 'snow leopard, ounce, Panthera uncia',
|
291 |
+
290: 'jaguar, panther, Panthera onca, Felis onca',
|
292 |
+
291: 'lion, king of beasts, Panthera leo',
|
293 |
+
292: 'tiger, Panthera tigris',
|
294 |
+
293: 'cheetah, chetah, Acinonyx jubatus',
|
295 |
+
294: 'brown bear, bruin, Ursus arctos',
|
296 |
+
295: 'American black bear, black bear, Ursus americanus, Euarctos americanus',
|
297 |
+
296: 'ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus',
|
298 |
+
297: 'sloth bear, Melursus ursinus, Ursus ursinus',
|
299 |
+
298: 'mongoose',
|
300 |
+
299: 'meerkat, mierkat',
|
301 |
+
300: 'tiger beetle',
|
302 |
+
301: 'ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle',
|
303 |
+
302: 'ground beetle, carabid beetle',
|
304 |
+
303: 'long-horned beetle, longicorn, longicorn beetle',
|
305 |
+
304: 'leaf beetle, chrysomelid',
|
306 |
+
305: 'dung beetle',
|
307 |
+
306: 'rhinoceros beetle',
|
308 |
+
307: 'weevil',
|
309 |
+
308: 'fly',
|
310 |
+
309: 'bee',
|
311 |
+
310: 'ant, emmet, pismire',
|
312 |
+
311: 'grasshopper, hopper',
|
313 |
+
312: 'cricket',
|
314 |
+
313: 'walking stick, walkingstick, stick insect',
|
315 |
+
314: 'cockroach, roach',
|
316 |
+
315: 'mantis, mantid',
|
317 |
+
316: 'cicada, cicala',
|
318 |
+
317: 'leafhopper',
|
319 |
+
318: 'lacewing, lacewing fly',
|
320 |
+
319: "dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk",
|
321 |
+
320: 'damselfly',
|
322 |
+
321: 'admiral',
|
323 |
+
322: 'ringlet, ringlet butterfly',
|
324 |
+
323: 'monarch, monarch butterfly, milkweed butterfly, Danaus plexippus',
|
325 |
+
324: 'cabbage butterfly',
|
326 |
+
325: 'sulphur butterfly, sulfur butterfly',
|
327 |
+
326: 'lycaenid, lycaenid butterfly',
|
328 |
+
327: 'starfish, sea star',
|
329 |
+
328: 'sea urchin',
|
330 |
+
329: 'sea cucumber, holothurian',
|
331 |
+
330: 'wood rabbit, cottontail, cottontail rabbit',
|
332 |
+
331: 'hare',
|
333 |
+
332: 'Angora, Angora rabbit',
|
334 |
+
333: 'hamster',
|
335 |
+
334: 'porcupine, hedgehog',
|
336 |
+
335: 'fox squirrel, eastern fox squirrel, Sciurus niger',
|
337 |
+
336: 'marmot',
|
338 |
+
337: 'beaver',
|
339 |
+
338: 'guinea pig, Cavia cobaya',
|
340 |
+
339: 'sorrel',
|
341 |
+
340: 'zebra',
|
342 |
+
341: 'hog, pig, grunter, squealer, Sus scrofa',
|
343 |
+
342: 'wild boar, boar, Sus scrofa',
|
344 |
+
343: 'warthog',
|
345 |
+
344: 'hippopotamus, hippo, river horse, Hippopotamus amphibius',
|
346 |
+
345: 'ox',
|
347 |
+
346: 'water buffalo, water ox, Asiatic buffalo, Bubalus bubalis',
|
348 |
+
347: 'bison',
|
349 |
+
348: 'ram, tup',
|
350 |
+
349: 'bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis',
|
351 |
+
350: 'ibex, Capra ibex',
|
352 |
+
351: 'hartebeest',
|
353 |
+
352: 'impala, Aepyceros melampus',
|
354 |
+
353: 'gazelle',
|
355 |
+
354: 'Arabian camel, dromedary, Camelus dromedarius',
|
356 |
+
355: 'llama',
|
357 |
+
356: 'weasel',
|
358 |
+
357: 'mink',
|
359 |
+
358: 'polecat, fitch, foulmart, foumart, Mustela putorius',
|
360 |
+
359: 'black-footed ferret, ferret, Mustela nigripes',
|
361 |
+
360: 'otter',
|
362 |
+
361: 'skunk, polecat, wood pussy',
|
363 |
+
362: 'badger',
|
364 |
+
363: 'armadillo',
|
365 |
+
364: 'three-toed sloth, ai, Bradypus tridactylus',
|
366 |
+
365: 'orangutan, orang, orangutang, Pongo pygmaeus',
|
367 |
+
366: 'gorilla, Gorilla gorilla',
|
368 |
+
367: 'chimpanzee, chimp, Pan troglodytes',
|
369 |
+
368: 'gibbon, Hylobates lar',
|
370 |
+
369: 'siamang, Hylobates syndactylus, Symphalangus syndactylus',
|
371 |
+
370: 'guenon, guenon monkey',
|
372 |
+
371: 'patas, hussar monkey, Erythrocebus patas',
|
373 |
+
372: 'baboon',
|
374 |
+
373: 'macaque',
|
375 |
+
374: 'langur',
|
376 |
+
375: 'colobus, colobus monkey',
|
377 |
+
376: 'proboscis monkey, Nasalis larvatus',
|
378 |
+
377: 'marmoset',
|
379 |
+
378: 'capuchin, ringtail, Cebus capucinus',
|
380 |
+
379: 'howler monkey, howler',
|
381 |
+
380: 'titi, titi monkey',
|
382 |
+
381: 'spider monkey, Ateles geoffroyi',
|
383 |
+
382: 'squirrel monkey, Saimiri sciureus',
|
384 |
+
383: 'Madagascar cat, ring-tailed lemur, Lemur catta',
|
385 |
+
384: 'indri, indris, Indri indri, Indri brevicaudatus',
|
386 |
+
385: 'Indian elephant, Elephas maximus',
|
387 |
+
386: 'African elephant, Loxodonta africana',
|
388 |
+
387: 'lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens',
|
389 |
+
388: 'giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca',
|
390 |
+
389: 'barracouta, snoek',
|
391 |
+
390: 'eel',
|
392 |
+
391: 'coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch',
|
393 |
+
392: 'rock beauty, Holocanthus tricolor',
|
394 |
+
393: 'anemone fish',
|
395 |
+
394: 'sturgeon',
|
396 |
+
395: 'gar, garfish, garpike, billfish, Lepisosteus osseus',
|
397 |
+
396: 'lionfish',
|
398 |
+
397: 'puffer, pufferfish, blowfish, globefish',
|
399 |
+
398: 'abacus',
|
400 |
+
399: 'abaya',
|
401 |
+
400: "academic gown, academic robe, judge's robe",
|
402 |
+
401: 'accordion, piano accordion, squeeze box',
|
403 |
+
402: 'acoustic guitar',
|
404 |
+
403: 'aircraft carrier, carrier, flattop, attack aircraft carrier',
|
405 |
+
404: 'airliner',
|
406 |
+
405: 'airship, dirigible',
|
407 |
+
406: 'altar',
|
408 |
+
407: 'ambulance',
|
409 |
+
408: 'amphibian, amphibious vehicle',
|
410 |
+
409: 'analog clock',
|
411 |
+
410: 'apiary, bee house',
|
412 |
+
411: 'apron',
|
413 |
+
412: 'ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin',
|
414 |
+
413: 'assault rifle, assault gun',
|
415 |
+
414: 'backpack, back pack, knapsack, packsack, rucksack, haversack',
|
416 |
+
415: 'bakery, bakeshop, bakehouse',
|
417 |
+
416: 'balance beam, beam',
|
418 |
+
417: 'balloon',
|
419 |
+
418: 'ballpoint, ballpoint pen, ballpen, Biro',
|
420 |
+
419: 'Band Aid',
|
421 |
+
420: 'banjo',
|
422 |
+
421: 'bannister, banister, balustrade, balusters, handrail',
|
423 |
+
422: 'barbell',
|
424 |
+
423: 'barber chair',
|
425 |
+
424: 'barbershop',
|
426 |
+
425: 'barn',
|
427 |
+
426: 'barometer',
|
428 |
+
427: 'barrel, cask',
|
429 |
+
428: 'barrow, garden cart, lawn cart, wheelbarrow',
|
430 |
+
429: 'baseball',
|
431 |
+
430: 'basketball',
|
432 |
+
431: 'bassinet',
|
433 |
+
432: 'bassoon',
|
434 |
+
433: 'bathing cap, swimming cap',
|
435 |
+
434: 'bath towel',
|
436 |
+
435: 'bathtub, bathing tub, bath, tub',
|
437 |
+
436: 'beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon',
|
438 |
+
437: 'beacon, lighthouse, beacon light, pharos',
|
439 |
+
438: 'beaker',
|
440 |
+
439: 'bearskin, busby, shako',
|
441 |
+
440: 'beer bottle',
|
442 |
+
441: 'beer glass',
|
443 |
+
442: 'bell cote, bell cot',
|
444 |
+
443: 'bib',
|
445 |
+
444: 'bicycle-built-for-two, tandem bicycle, tandem',
|
446 |
+
445: 'bikini, two-piece',
|
447 |
+
446: 'binder, ring-binder',
|
448 |
+
447: 'binoculars, field glasses, opera glasses',
|
449 |
+
448: 'birdhouse',
|
450 |
+
449: 'boathouse',
|
451 |
+
450: 'bobsled, bobsleigh, bob',
|
452 |
+
451: 'bolo tie, bolo, bola tie, bola',
|
453 |
+
452: 'bonnet, poke bonnet',
|
454 |
+
453: 'bookcase',
|
455 |
+
454: 'bookshop, bookstore, bookstall',
|
456 |
+
455: 'bottlecap',
|
457 |
+
456: 'bow',
|
458 |
+
457: 'bow tie, bow-tie, bowtie',
|
459 |
+
458: 'brass, memorial tablet, plaque',
|
460 |
+
459: 'brassiere, bra, bandeau',
|
461 |
+
460: 'breakwater, groin, groyne, mole, bulwark, seawall, jetty',
|
462 |
+
461: 'breastplate, aegis, egis',
|
463 |
+
462: 'broom',
|
464 |
+
463: 'bucket, pail',
|
465 |
+
464: 'buckle',
|
466 |
+
465: 'bulletproof vest',
|
467 |
+
466: 'bullet train, bullet',
|
468 |
+
467: 'butcher shop, meat market',
|
469 |
+
468: 'cab, hack, taxi, taxicab',
|
470 |
+
469: 'caldron, cauldron',
|
471 |
+
470: 'candle, taper, wax light',
|
472 |
+
471: 'cannon',
|
473 |
+
472: 'canoe',
|
474 |
+
473: 'can opener, tin opener',
|
475 |
+
474: 'cardigan',
|
476 |
+
475: 'car mirror',
|
477 |
+
476: 'carousel, carrousel, merry-go-round, roundabout, whirligig',
|
478 |
+
477: "carpenter's kit, tool kit",
|
479 |
+
478: 'carton',
|
480 |
+
479: 'car wheel',
|
481 |
+
480: 'cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM',
|
482 |
+
481: 'cassette',
|
483 |
+
482: 'cassette player',
|
484 |
+
483: 'castle',
|
485 |
+
484: 'catamaran',
|
486 |
+
485: 'CD player',
|
487 |
+
486: 'cello, violoncello',
|
488 |
+
487: 'cellular telephone, cellular phone, cellphone, cell, mobile phone',
|
489 |
+
488: 'chain',
|
490 |
+
489: 'chainlink fence',
|
491 |
+
490: 'chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour',
|
492 |
+
491: 'chain saw, chainsaw',
|
493 |
+
492: 'chest',
|
494 |
+
493: 'chiffonier, commode',
|
495 |
+
494: 'chime, bell, gong',
|
496 |
+
495: 'china cabinet, china closet',
|
497 |
+
496: 'Christmas stocking',
|
498 |
+
497: 'church, church building',
|
499 |
+
498: 'cinema, movie theater, movie theatre, movie house, picture palace',
|
500 |
+
499: 'cleaver, meat cleaver, chopper',
|
501 |
+
500: 'cliff dwelling',
|
502 |
+
501: 'cloak',
|
503 |
+
502: 'clog, geta, patten, sabot',
|
504 |
+
503: 'cocktail shaker',
|
505 |
+
504: 'coffee mug',
|
506 |
+
505: 'coffeepot',
|
507 |
+
506: 'coil, spiral, volute, whorl, helix',
|
508 |
+
507: 'combination lock',
|
509 |
+
508: 'computer keyboard, keypad',
|
510 |
+
509: 'confectionery, confectionary, candy store',
|
511 |
+
510: 'container ship, containership, container vessel',
|
512 |
+
511: 'convertible',
|
513 |
+
512: 'corkscrew, bottle screw',
|
514 |
+
513: 'cornet, horn, trumpet, trump',
|
515 |
+
514: 'cowboy boot',
|
516 |
+
515: 'cowboy hat, ten-gallon hat',
|
517 |
+
516: 'cradle',
|
518 |
+
517: 'crane',
|
519 |
+
518: 'crash helmet',
|
520 |
+
519: 'crate',
|
521 |
+
520: 'crib, cot',
|
522 |
+
521: 'Crock Pot',
|
523 |
+
522: 'croquet ball',
|
524 |
+
523: 'crutch',
|
525 |
+
524: 'cuirass',
|
526 |
+
525: 'dam, dike, dyke',
|
527 |
+
526: 'desk',
|
528 |
+
527: 'desktop computer',
|
529 |
+
528: 'dial telephone, dial phone',
|
530 |
+
529: 'diaper, nappy, napkin',
|
531 |
+
530: 'digital clock',
|
532 |
+
531: 'digital watch',
|
533 |
+
532: 'dining table, board',
|
534 |
+
533: 'dishrag, dishcloth',
|
535 |
+
534: 'dishwasher, dish washer, dishwashing machine',
|
536 |
+
535: 'disk brake, disc brake',
|
537 |
+
536: 'dock, dockage, docking facility',
|
538 |
+
537: 'dogsled, dog sled, dog sleigh',
|
539 |
+
538: 'dome',
|
540 |
+
539: 'doormat, welcome mat',
|
541 |
+
540: 'drilling platform, offshore rig',
|
542 |
+
541: 'drum, membranophone, tympan',
|
543 |
+
542: 'drumstick',
|
544 |
+
543: 'dumbbell',
|
545 |
+
544: 'Dutch oven',
|
546 |
+
545: 'electric fan, blower',
|
547 |
+
546: 'electric guitar',
|
548 |
+
547: 'electric locomotive',
|
549 |
+
548: 'entertainment center',
|
550 |
+
549: 'envelope',
|
551 |
+
550: 'espresso maker',
|
552 |
+
551: 'face powder',
|
553 |
+
552: 'feather boa, boa',
|
554 |
+
553: 'file, file cabinet, filing cabinet',
|
555 |
+
554: 'fireboat',
|
556 |
+
555: 'fire engine, fire truck',
|
557 |
+
556: 'fire screen, fireguard',
|
558 |
+
557: 'flagpole, flagstaff',
|
559 |
+
558: 'flute, transverse flute',
|
560 |
+
559: 'folding chair',
|
561 |
+
560: 'football helmet',
|
562 |
+
561: 'forklift',
|
563 |
+
562: 'fountain',
|
564 |
+
563: 'fountain pen',
|
565 |
+
564: 'four-poster',
|
566 |
+
565: 'freight car',
|
567 |
+
566: 'French horn, horn',
|
568 |
+
567: 'frying pan, frypan, skillet',
|
569 |
+
568: 'fur coat',
|
570 |
+
569: 'garbage truck, dustcart',
|
571 |
+
570: 'gasmask, respirator, gas helmet',
|
572 |
+
571: 'gas pump, gasoline pump, petrol pump, island dispenser',
|
573 |
+
572: 'goblet',
|
574 |
+
573: 'go-kart',
|
575 |
+
574: 'golf ball',
|
576 |
+
575: 'golfcart, golf cart',
|
577 |
+
576: 'gondola',
|
578 |
+
577: 'gong, tam-tam',
|
579 |
+
578: 'gown',
|
580 |
+
579: 'grand piano, grand',
|
581 |
+
580: 'greenhouse, nursery, glasshouse',
|
582 |
+
581: 'grille, radiator grille',
|
583 |
+
582: 'grocery store, grocery, food market, market',
|
584 |
+
583: 'guillotine',
|
585 |
+
584: 'hair slide',
|
586 |
+
585: 'hair spray',
|
587 |
+
586: 'half track',
|
588 |
+
587: 'hammer',
|
589 |
+
588: 'hamper',
|
590 |
+
589: 'hand blower, blow dryer, blow drier, hair dryer, hair drier',
|
591 |
+
590: 'hand-held computer, hand-held microcomputer',
|
592 |
+
591: 'handkerchief, hankie, hanky, hankey',
|
593 |
+
592: 'hard disc, hard disk, fixed disk',
|
594 |
+
593: 'harmonica, mouth organ, harp, mouth harp',
|
595 |
+
594: 'harp',
|
596 |
+
595: 'harvester, reaper',
|
597 |
+
596: 'hatchet',
|
598 |
+
597: 'holster',
|
599 |
+
598: 'home theater, home theatre',
|
600 |
+
599: 'honeycomb',
|
601 |
+
600: 'hook, claw',
|
602 |
+
601: 'hoopskirt, crinoline',
|
603 |
+
602: 'horizontal bar, high bar',
|
604 |
+
603: 'horse cart, horse-cart',
|
605 |
+
604: 'hourglass',
|
606 |
+
605: 'iPod',
|
607 |
+
606: 'iron, smoothing iron',
|
608 |
+
607: "jack-o'-lantern",
|
609 |
+
608: 'jean, blue jean, denim',
|
610 |
+
609: 'jeep, landrover',
|
611 |
+
610: 'jersey, T-shirt, tee shirt',
|
612 |
+
611: 'jigsaw puzzle',
|
613 |
+
612: 'jinrikisha, ricksha, rickshaw',
|
614 |
+
613: 'joystick',
|
615 |
+
614: 'kimono',
|
616 |
+
615: 'knee pad',
|
617 |
+
616: 'knot',
|
618 |
+
617: 'lab coat, laboratory coat',
|
619 |
+
618: 'ladle',
|
620 |
+
619: 'lampshade, lamp shade',
|
621 |
+
620: 'laptop, laptop computer',
|
622 |
+
621: 'lawn mower, mower',
|
623 |
+
622: 'lens cap, lens cover',
|
624 |
+
623: 'letter opener, paper knife, paperknife',
|
625 |
+
624: 'library',
|
626 |
+
625: 'lifeboat',
|
627 |
+
626: 'lighter, light, igniter, ignitor',
|
628 |
+
627: 'limousine, limo',
|
629 |
+
628: 'liner, ocean liner',
|
630 |
+
629: 'lipstick, lip rouge',
|
631 |
+
630: 'Loafer',
|
632 |
+
631: 'lotion',
|
633 |
+
632: 'loudspeaker, speaker, speaker unit, loudspeaker system, speaker system',
|
634 |
+
633: "loupe, jeweler's loupe",
|
635 |
+
634: 'lumbermill, sawmill',
|
636 |
+
635: 'magnetic compass',
|
637 |
+
636: 'mailbag, postbag',
|
638 |
+
637: 'mailbox, letter box',
|
639 |
+
638: 'maillot',
|
640 |
+
639: 'maillot, tank suit',
|
641 |
+
640: 'manhole cover',
|
642 |
+
641: 'maraca',
|
643 |
+
642: 'marimba, xylophone',
|
644 |
+
643: 'mask',
|
645 |
+
644: 'matchstick',
|
646 |
+
645: 'maypole',
|
647 |
+
646: 'maze, labyrinth',
|
648 |
+
647: 'measuring cup',
|
649 |
+
648: 'medicine chest, medicine cabinet',
|
650 |
+
649: 'megalith, megalithic structure',
|
651 |
+
650: 'microphone, mike',
|
652 |
+
651: 'microwave, microwave oven',
|
653 |
+
652: 'military uniform',
|
654 |
+
653: 'milk can',
|
655 |
+
654: 'minibus',
|
656 |
+
655: 'miniskirt, mini',
|
657 |
+
656: 'minivan',
|
658 |
+
657: 'missile',
|
659 |
+
658: 'mitten',
|
660 |
+
659: 'mixing bowl',
|
661 |
+
660: 'mobile home, manufactured home',
|
662 |
+
661: 'Model T',
|
663 |
+
662: 'modem',
|
664 |
+
663: 'monastery',
|
665 |
+
664: 'monitor',
|
666 |
+
665: 'moped',
|
667 |
+
666: 'mortar',
|
668 |
+
667: 'mortarboard',
|
669 |
+
668: 'mosque',
|
670 |
+
669: 'mosquito net',
|
671 |
+
670: 'motor scooter, scooter',
|
672 |
+
671: 'mountain bike, all-terrain bike, off-roader',
|
673 |
+
672: 'mountain tent',
|
674 |
+
673: 'mouse, computer mouse',
|
675 |
+
674: 'mousetrap',
|
676 |
+
675: 'moving van',
|
677 |
+
676: 'muzzle',
|
678 |
+
677: 'nail',
|
679 |
+
678: 'neck brace',
|
680 |
+
679: 'necklace',
|
681 |
+
680: 'nipple',
|
682 |
+
681: 'notebook, notebook computer',
|
683 |
+
682: 'obelisk',
|
684 |
+
683: 'oboe, hautboy, hautbois',
|
685 |
+
684: 'ocarina, sweet potato',
|
686 |
+
685: 'odometer, hodometer, mileometer, milometer',
|
687 |
+
686: 'oil filter',
|
688 |
+
687: 'organ, pipe organ',
|
689 |
+
688: 'oscilloscope, scope, cathode-ray oscilloscope, CRO',
|
690 |
+
689: 'overskirt',
|
691 |
+
690: 'oxcart',
|
692 |
+
691: 'oxygen mask',
|
693 |
+
692: 'packet',
|
694 |
+
693: 'paddle, boat paddle',
|
695 |
+
694: 'paddlewheel, paddle wheel',
|
696 |
+
695: 'padlock',
|
697 |
+
696: 'paintbrush',
|
698 |
+
697: "pajama, pyjama, pj's, jammies",
|
699 |
+
698: 'palace',
|
700 |
+
699: 'panpipe, pandean pipe, syrinx',
|
701 |
+
700: 'paper towel',
|
702 |
+
701: 'parachute, chute',
|
703 |
+
702: 'parallel bars, bars',
|
704 |
+
703: 'park bench',
|
705 |
+
704: 'parking meter',
|
706 |
+
705: 'passenger car, coach, carriage',
|
707 |
+
706: 'patio, terrace',
|
708 |
+
707: 'pay-phone, pay-station',
|
709 |
+
708: 'pedestal, plinth, footstall',
|
710 |
+
709: 'pencil box, pencil case',
|
711 |
+
710: 'pencil sharpener',
|
712 |
+
711: 'perfume, essence',
|
713 |
+
712: 'Petri dish',
|
714 |
+
713: 'photocopier',
|
715 |
+
714: 'pick, plectrum, plectron',
|
716 |
+
715: 'pickelhaube',
|
717 |
+
716: 'picket fence, paling',
|
718 |
+
717: 'pickup, pickup truck',
|
719 |
+
718: 'pier',
|
720 |
+
719: 'piggy bank, penny bank',
|
721 |
+
720: 'pill bottle',
|
722 |
+
721: 'pillow',
|
723 |
+
722: 'ping-pong ball',
|
724 |
+
723: 'pinwheel',
|
725 |
+
724: 'pirate, pirate ship',
|
726 |
+
725: 'pitcher, ewer',
|
727 |
+
726: "plane, carpenter's plane, woodworking plane",
|
728 |
+
727: 'planetarium',
|
729 |
+
728: 'plastic bag',
|
730 |
+
729: 'plate rack',
|
731 |
+
730: 'plow, plough',
|
732 |
+
731: "plunger, plumber's helper",
|
733 |
+
732: 'Polaroid camera, Polaroid Land camera',
|
734 |
+
733: 'pole',
|
735 |
+
734: 'police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria',
|
736 |
+
735: 'poncho',
|
737 |
+
736: 'pool table, billiard table, snooker table',
|
738 |
+
737: 'pop bottle, soda bottle',
|
739 |
+
738: 'pot, flowerpot',
|
740 |
+
739: "potter's wheel",
|
741 |
+
740: 'power drill',
|
742 |
+
741: 'prayer rug, prayer mat',
|
743 |
+
742: 'printer',
|
744 |
+
743: 'prison, prison house',
|
745 |
+
744: 'projectile, missile',
|
746 |
+
745: 'projector',
|
747 |
+
746: 'puck, hockey puck',
|
748 |
+
747: 'punching bag, punch bag, punching ball, punchball',
|
749 |
+
748: 'purse',
|
750 |
+
749: 'quill, quill pen',
|
751 |
+
750: 'quilt, comforter, comfort, puff',
|
752 |
+
751: 'racer, race car, racing car',
|
753 |
+
752: 'racket, racquet',
|
754 |
+
753: 'radiator',
|
755 |
+
754: 'radio, wireless',
|
756 |
+
755: 'radio telescope, radio reflector',
|
757 |
+
756: 'rain barrel',
|
758 |
+
757: 'recreational vehicle, RV, R.V.',
|
759 |
+
758: 'reel',
|
760 |
+
759: 'reflex camera',
|
761 |
+
760: 'refrigerator, icebox',
|
762 |
+
761: 'remote control, remote',
|
763 |
+
762: 'restaurant, eating house, eating place, eatery',
|
764 |
+
763: 'revolver, six-gun, six-shooter',
|
765 |
+
764: 'rifle',
|
766 |
+
765: 'rocking chair, rocker',
|
767 |
+
766: 'rotisserie',
|
768 |
+
767: 'rubber eraser, rubber, pencil eraser',
|
769 |
+
768: 'rugby ball',
|
770 |
+
769: 'rule, ruler',
|
771 |
+
770: 'running shoe',
|
772 |
+
771: 'safe',
|
773 |
+
772: 'safety pin',
|
774 |
+
773: 'saltshaker, salt shaker',
|
775 |
+
774: 'sandal',
|
776 |
+
775: 'sarong',
|
777 |
+
776: 'sax, saxophone',
|
778 |
+
777: 'scabbard',
|
779 |
+
778: 'scale, weighing machine',
|
780 |
+
779: 'school bus',
|
781 |
+
780: 'schooner',
|
782 |
+
781: 'scoreboard',
|
783 |
+
782: 'screen, CRT screen',
|
784 |
+
783: 'screw',
|
785 |
+
784: 'screwdriver',
|
786 |
+
785: 'seat belt, seatbelt',
|
787 |
+
786: 'sewing machine',
|
788 |
+
787: 'shield, buckler',
|
789 |
+
788: 'shoe shop, shoe-shop, shoe store',
|
790 |
+
789: 'shoji',
|
791 |
+
790: 'shopping basket',
|
792 |
+
791: 'shopping cart',
|
793 |
+
792: 'shovel',
|
794 |
+
793: 'shower cap',
|
795 |
+
794: 'shower curtain',
|
796 |
+
795: 'ski',
|
797 |
+
796: 'ski mask',
|
798 |
+
797: 'sleeping bag',
|
799 |
+
798: 'slide rule, slipstick',
|
800 |
+
799: 'sliding door',
|
801 |
+
800: 'slot, one-armed bandit',
|
802 |
+
801: 'snorkel',
|
803 |
+
802: 'snowmobile',
|
804 |
+
803: 'snowplow, snowplough',
|
805 |
+
804: 'soap dispenser',
|
806 |
+
805: 'soccer ball',
|
807 |
+
806: 'sock',
|
808 |
+
807: 'solar dish, solar collector, solar furnace',
|
809 |
+
808: 'sombrero',
|
810 |
+
809: 'soup bowl',
|
811 |
+
810: 'space bar',
|
812 |
+
811: 'space heater',
|
813 |
+
812: 'space shuttle',
|
814 |
+
813: 'spatula',
|
815 |
+
814: 'speedboat',
|
816 |
+
815: "spider web, spider's web",
|
817 |
+
816: 'spindle',
|
818 |
+
817: 'sports car, sport car',
|
819 |
+
818: 'spotlight, spot',
|
820 |
+
819: 'stage',
|
821 |
+
820: 'steam locomotive',
|
822 |
+
821: 'steel arch bridge',
|
823 |
+
822: 'steel drum',
|
824 |
+
823: 'stethoscope',
|
825 |
+
824: 'stole',
|
826 |
+
825: 'stone wall',
|
827 |
+
826: 'stopwatch, stop watch',
|
828 |
+
827: 'stove',
|
829 |
+
828: 'strainer',
|
830 |
+
829: 'streetcar, tram, tramcar, trolley, trolley car',
|
831 |
+
830: 'stretcher',
|
832 |
+
831: 'studio couch, day bed',
|
833 |
+
832: 'stupa, tope',
|
834 |
+
833: 'submarine, pigboat, sub, U-boat',
|
835 |
+
834: 'suit, suit of clothes',
|
836 |
+
835: 'sundial',
|
837 |
+
836: 'sunglass',
|
838 |
+
837: 'sunglasses, dark glasses, shades',
|
839 |
+
838: 'sunscreen, sunblock, sun blocker',
|
840 |
+
839: 'suspension bridge',
|
841 |
+
840: 'swab, swob, mop',
|
842 |
+
841: 'sweatshirt',
|
843 |
+
842: 'swimming trunks, bathing trunks',
|
844 |
+
843: 'swing',
|
845 |
+
844: 'switch, electric switch, electrical switch',
|
846 |
+
845: 'syringe',
|
847 |
+
846: 'table lamp',
|
848 |
+
847: 'tank, army tank, armored combat vehicle, armoured combat vehicle',
|
849 |
+
848: 'tape player',
|
850 |
+
849: 'teapot',
|
851 |
+
850: 'teddy, teddy bear',
|
852 |
+
851: 'television, television system',
|
853 |
+
852: 'tennis ball',
|
854 |
+
853: 'thatch, thatched roof',
|
855 |
+
854: 'theater curtain, theatre curtain',
|
856 |
+
855: 'thimble',
|
857 |
+
856: 'thresher, thrasher, threshing machine',
|
858 |
+
857: 'throne',
|
859 |
+
858: 'tile roof',
|
860 |
+
859: 'toaster',
|
861 |
+
860: 'tobacco shop, tobacconist shop, tobacconist',
|
862 |
+
861: 'toilet seat',
|
863 |
+
862: 'torch',
|
864 |
+
863: 'totem pole',
|
865 |
+
864: 'tow truck, tow car, wrecker',
|
866 |
+
865: 'toyshop',
|
867 |
+
866: 'tractor',
|
868 |
+
867: 'trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi',
|
869 |
+
868: 'tray',
|
870 |
+
869: 'trench coat',
|
871 |
+
870: 'tricycle, trike, velocipede',
|
872 |
+
871: 'trimaran',
|
873 |
+
872: 'tripod',
|
874 |
+
873: 'triumphal arch',
|
875 |
+
874: 'trolleybus, trolley coach, trackless trolley',
|
876 |
+
875: 'trombone',
|
877 |
+
876: 'tub, vat',
|
878 |
+
877: 'turnstile',
|
879 |
+
878: 'typewriter keyboard',
|
880 |
+
879: 'umbrella',
|
881 |
+
880: 'unicycle, monocycle',
|
882 |
+
881: 'upright, upright piano',
|
883 |
+
882: 'vacuum, vacuum cleaner',
|
884 |
+
883: 'vase',
|
885 |
+
884: 'vault',
|
886 |
+
885: 'velvet',
|
887 |
+
886: 'vending machine',
|
888 |
+
887: 'vestment',
|
889 |
+
888: 'viaduct',
|
890 |
+
889: 'violin, fiddle',
|
891 |
+
890: 'volleyball',
|
892 |
+
891: 'waffle iron',
|
893 |
+
892: 'wall clock',
|
894 |
+
893: 'wallet, billfold, notecase, pocketbook',
|
895 |
+
894: 'wardrobe, closet, press',
|
896 |
+
895: 'warplane, military plane',
|
897 |
+
896: 'washbasin, handbasin, washbowl, lavabo, wash-hand basin',
|
898 |
+
897: 'washer, automatic washer, washing machine',
|
899 |
+
898: 'water bottle',
|
900 |
+
899: 'water jug',
|
901 |
+
900: 'water tower',
|
902 |
+
901: 'whiskey jug',
|
903 |
+
902: 'whistle',
|
904 |
+
903: 'wig',
|
905 |
+
904: 'window screen',
|
906 |
+
905: 'window shade',
|
907 |
+
906: 'Windsor tie',
|
908 |
+
907: 'wine bottle',
|
909 |
+
908: 'wing',
|
910 |
+
909: 'wok',
|
911 |
+
910: 'wooden spoon',
|
912 |
+
911: 'wool, woolen, woollen',
|
913 |
+
912: 'worm fence, snake fence, snake-rail fence, Virginia fence',
|
914 |
+
913: 'wreck',
|
915 |
+
914: 'yawl',
|
916 |
+
915: 'yurt',
|
917 |
+
916: 'web site, website, internet site, site',
|
918 |
+
917: 'comic book',
|
919 |
+
918: 'crossword puzzle, crossword',
|
920 |
+
919: 'street sign',
|
921 |
+
920: 'traffic light, traffic signal, stoplight',
|
922 |
+
921: 'book jacket, dust cover, dust jacket, dust wrapper',
|
923 |
+
922: 'menu',
|
924 |
+
923: 'plate',
|
925 |
+
924: 'guacamole',
|
926 |
+
925: 'consomme',
|
927 |
+
926: 'hot pot, hotpot',
|
928 |
+
927: 'trifle',
|
929 |
+
928: 'ice cream, icecream',
|
930 |
+
929: 'ice lolly, lolly, lollipop, popsicle',
|
931 |
+
930: 'French loaf',
|
932 |
+
931: 'bagel, beigel',
|
933 |
+
932: 'pretzel',
|
934 |
+
933: 'cheeseburger',
|
935 |
+
934: 'hotdog, hot dog, red hot',
|
936 |
+
935: 'mashed potato',
|
937 |
+
936: 'head cabbage',
|
938 |
+
937: 'broccoli',
|
939 |
+
938: 'cauliflower',
|
940 |
+
939: 'zucchini, courgette',
|
941 |
+
940: 'spaghetti squash',
|
942 |
+
941: 'acorn squash',
|
943 |
+
942: 'butternut squash',
|
944 |
+
943: 'cucumber, cuke',
|
945 |
+
944: 'artichoke, globe artichoke',
|
946 |
+
945: 'bell pepper',
|
947 |
+
946: 'cardoon',
|
948 |
+
947: 'mushroom',
|
949 |
+
948: 'Granny Smith',
|
950 |
+
949: 'strawberry',
|
951 |
+
950: 'orange',
|
952 |
+
951: 'lemon',
|
953 |
+
952: 'fig',
|
954 |
+
953: 'pineapple, ananas',
|
955 |
+
954: 'banana',
|
956 |
+
955: 'jackfruit, jak, jack',
|
957 |
+
956: 'custard apple',
|
958 |
+
957: 'pomegranate',
|
959 |
+
958: 'hay',
|
960 |
+
959: 'carbonara',
|
961 |
+
960: 'chocolate sauce, chocolate syrup',
|
962 |
+
961: 'dough',
|
963 |
+
962: 'meat loaf, meatloaf',
|
964 |
+
963: 'pizza, pizza pie',
|
965 |
+
964: 'potpie',
|
966 |
+
965: 'burrito',
|
967 |
+
966: 'red wine',
|
968 |
+
967: 'espresso',
|
969 |
+
968: 'cup',
|
970 |
+
969: 'eggnog',
|
971 |
+
970: 'alp',
|
972 |
+
971: 'bubble',
|
973 |
+
972: 'cliff, drop, drop-off',
|
974 |
+
973: 'coral reef',
|
975 |
+
974: 'geyser',
|
976 |
+
975: 'lakeside, lakeshore',
|
977 |
+
976: 'promontory, headland, head, foreland',
|
978 |
+
977: 'sandbar, sand bar',
|
979 |
+
978: 'seashore, coast, seacoast, sea-coast',
|
980 |
+
979: 'valley, vale',
|
981 |
+
980: 'volcano',
|
982 |
+
981: 'ballplayer, baseball player',
|
983 |
+
982: 'groom, bridegroom',
|
984 |
+
983: 'scuba diver',
|
985 |
+
984: 'rapeseed',
|
986 |
+
985: 'daisy',
|
987 |
+
986: "yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum",
|
988 |
+
987: 'corn',
|
989 |
+
988: 'acorn',
|
990 |
+
989: 'hip, rose hip, rosehip',
|
991 |
+
990: 'buckeye, horse chestnut, conker',
|
992 |
+
991: 'coral fungus',
|
993 |
+
992: 'agaric',
|
994 |
+
993: 'gyromitra',
|
995 |
+
994: 'stinkhorn, carrion fungus',
|
996 |
+
995: 'earthstar',
|
997 |
+
996: 'hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa',
|
998 |
+
997: 'bolete',
|
999 |
+
998: 'ear, spike, capitulum',
|
1000 |
+
999: 'toilet tissue, toilet paper, bathroom tissue'}
|
labels/imagenet1k-simple-labels.json
ADDED
@@ -0,0 +1,1000 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
["tench",
|
2 |
+
"goldfish",
|
3 |
+
"great white shark",
|
4 |
+
"tiger shark",
|
5 |
+
"hammerhead shark",
|
6 |
+
"electric ray",
|
7 |
+
"stingray",
|
8 |
+
"cock",
|
9 |
+
"hen",
|
10 |
+
"ostrich",
|
11 |
+
"brambling",
|
12 |
+
"goldfinch",
|
13 |
+
"house finch",
|
14 |
+
"junco",
|
15 |
+
"indigo bunting",
|
16 |
+
"American robin",
|
17 |
+
"bulbul",
|
18 |
+
"jay",
|
19 |
+
"magpie",
|
20 |
+
"chickadee",
|
21 |
+
"American dipper",
|
22 |
+
"kite",
|
23 |
+
"bald eagle",
|
24 |
+
"vulture",
|
25 |
+
"great grey owl",
|
26 |
+
"fire salamander",
|
27 |
+
"smooth newt",
|
28 |
+
"newt",
|
29 |
+
"spotted salamander",
|
30 |
+
"axolotl",
|
31 |
+
"American bullfrog",
|
32 |
+
"tree frog",
|
33 |
+
"tailed frog",
|
34 |
+
"loggerhead sea turtle",
|
35 |
+
"leatherback sea turtle",
|
36 |
+
"mud turtle",
|
37 |
+
"terrapin",
|
38 |
+
"box turtle",
|
39 |
+
"banded gecko",
|
40 |
+
"green iguana",
|
41 |
+
"Carolina anole",
|
42 |
+
"desert grassland whiptail lizard",
|
43 |
+
"agama",
|
44 |
+
"frilled-necked lizard",
|
45 |
+
"alligator lizard",
|
46 |
+
"Gila monster",
|
47 |
+
"European green lizard",
|
48 |
+
"chameleon",
|
49 |
+
"Komodo dragon",
|
50 |
+
"Nile crocodile",
|
51 |
+
"American alligator",
|
52 |
+
"triceratops",
|
53 |
+
"worm snake",
|
54 |
+
"ring-necked snake",
|
55 |
+
"eastern hog-nosed snake",
|
56 |
+
"smooth green snake",
|
57 |
+
"kingsnake",
|
58 |
+
"garter snake",
|
59 |
+
"water snake",
|
60 |
+
"vine snake",
|
61 |
+
"night snake",
|
62 |
+
"boa constrictor",
|
63 |
+
"African rock python",
|
64 |
+
"Indian cobra",
|
65 |
+
"green mamba",
|
66 |
+
"sea snake",
|
67 |
+
"Saharan horned viper",
|
68 |
+
"eastern diamondback rattlesnake",
|
69 |
+
"sidewinder",
|
70 |
+
"trilobite",
|
71 |
+
"harvestman",
|
72 |
+
"scorpion",
|
73 |
+
"yellow garden spider",
|
74 |
+
"barn spider",
|
75 |
+
"European garden spider",
|
76 |
+
"southern black widow",
|
77 |
+
"tarantula",
|
78 |
+
"wolf spider",
|
79 |
+
"tick",
|
80 |
+
"centipede",
|
81 |
+
"black grouse",
|
82 |
+
"ptarmigan",
|
83 |
+
"ruffed grouse",
|
84 |
+
"prairie grouse",
|
85 |
+
"peacock",
|
86 |
+
"quail",
|
87 |
+
"partridge",
|
88 |
+
"grey parrot",
|
89 |
+
"macaw",
|
90 |
+
"sulphur-crested cockatoo",
|
91 |
+
"lorikeet",
|
92 |
+
"coucal",
|
93 |
+
"bee eater",
|
94 |
+
"hornbill",
|
95 |
+
"hummingbird",
|
96 |
+
"jacamar",
|
97 |
+
"toucan",
|
98 |
+
"duck",
|
99 |
+
"red-breasted merganser",
|
100 |
+
"goose",
|
101 |
+
"black swan",
|
102 |
+
"tusker",
|
103 |
+
"echidna",
|
104 |
+
"platypus",
|
105 |
+
"wallaby",
|
106 |
+
"koala",
|
107 |
+
"wombat",
|
108 |
+
"jellyfish",
|
109 |
+
"sea anemone",
|
110 |
+
"brain coral",
|
111 |
+
"flatworm",
|
112 |
+
"nematode",
|
113 |
+
"conch",
|
114 |
+
"snail",
|
115 |
+
"slug",
|
116 |
+
"sea slug",
|
117 |
+
"chiton",
|
118 |
+
"chambered nautilus",
|
119 |
+
"Dungeness crab",
|
120 |
+
"rock crab",
|
121 |
+
"fiddler crab",
|
122 |
+
"red king crab",
|
123 |
+
"American lobster",
|
124 |
+
"spiny lobster",
|
125 |
+
"crayfish",
|
126 |
+
"hermit crab",
|
127 |
+
"isopod",
|
128 |
+
"white stork",
|
129 |
+
"black stork",
|
130 |
+
"spoonbill",
|
131 |
+
"flamingo",
|
132 |
+
"little blue heron",
|
133 |
+
"great egret",
|
134 |
+
"bittern",
|
135 |
+
"crane (bird)",
|
136 |
+
"limpkin",
|
137 |
+
"common gallinule",
|
138 |
+
"American coot",
|
139 |
+
"bustard",
|
140 |
+
"ruddy turnstone",
|
141 |
+
"dunlin",
|
142 |
+
"common redshank",
|
143 |
+
"dowitcher",
|
144 |
+
"oystercatcher",
|
145 |
+
"pelican",
|
146 |
+
"king penguin",
|
147 |
+
"albatross",
|
148 |
+
"grey whale",
|
149 |
+
"killer whale",
|
150 |
+
"dugong",
|
151 |
+
"sea lion",
|
152 |
+
"Chihuahua",
|
153 |
+
"Japanese Chin",
|
154 |
+
"Maltese",
|
155 |
+
"Pekingese",
|
156 |
+
"Shih Tzu",
|
157 |
+
"King Charles Spaniel",
|
158 |
+
"Papillon",
|
159 |
+
"toy terrier",
|
160 |
+
"Rhodesian Ridgeback",
|
161 |
+
"Afghan Hound",
|
162 |
+
"Basset Hound",
|
163 |
+
"Beagle",
|
164 |
+
"Bloodhound",
|
165 |
+
"Bluetick Coonhound",
|
166 |
+
"Black and Tan Coonhound",
|
167 |
+
"Treeing Walker Coonhound",
|
168 |
+
"English foxhound",
|
169 |
+
"Redbone Coonhound",
|
170 |
+
"borzoi",
|
171 |
+
"Irish Wolfhound",
|
172 |
+
"Italian Greyhound",
|
173 |
+
"Whippet",
|
174 |
+
"Ibizan Hound",
|
175 |
+
"Norwegian Elkhound",
|
176 |
+
"Otterhound",
|
177 |
+
"Saluki",
|
178 |
+
"Scottish Deerhound",
|
179 |
+
"Weimaraner",
|
180 |
+
"Staffordshire Bull Terrier",
|
181 |
+
"American Staffordshire Terrier",
|
182 |
+
"Bedlington Terrier",
|
183 |
+
"Border Terrier",
|
184 |
+
"Kerry Blue Terrier",
|
185 |
+
"Irish Terrier",
|
186 |
+
"Norfolk Terrier",
|
187 |
+
"Norwich Terrier",
|
188 |
+
"Yorkshire Terrier",
|
189 |
+
"Wire Fox Terrier",
|
190 |
+
"Lakeland Terrier",
|
191 |
+
"Sealyham Terrier",
|
192 |
+
"Airedale Terrier",
|
193 |
+
"Cairn Terrier",
|
194 |
+
"Australian Terrier",
|
195 |
+
"Dandie Dinmont Terrier",
|
196 |
+
"Boston Terrier",
|
197 |
+
"Miniature Schnauzer",
|
198 |
+
"Giant Schnauzer",
|
199 |
+
"Standard Schnauzer",
|
200 |
+
"Scottish Terrier",
|
201 |
+
"Tibetan Terrier",
|
202 |
+
"Australian Silky Terrier",
|
203 |
+
"Soft-coated Wheaten Terrier",
|
204 |
+
"West Highland White Terrier",
|
205 |
+
"Lhasa Apso",
|
206 |
+
"Flat-Coated Retriever",
|
207 |
+
"Curly-coated Retriever",
|
208 |
+
"Golden Retriever",
|
209 |
+
"Labrador Retriever",
|
210 |
+
"Chesapeake Bay Retriever",
|
211 |
+
"German Shorthaired Pointer",
|
212 |
+
"Vizsla",
|
213 |
+
"English Setter",
|
214 |
+
"Irish Setter",
|
215 |
+
"Gordon Setter",
|
216 |
+
"Brittany",
|
217 |
+
"Clumber Spaniel",
|
218 |
+
"English Springer Spaniel",
|
219 |
+
"Welsh Springer Spaniel",
|
220 |
+
"Cocker Spaniels",
|
221 |
+
"Sussex Spaniel",
|
222 |
+
"Irish Water Spaniel",
|
223 |
+
"Kuvasz",
|
224 |
+
"Schipperke",
|
225 |
+
"Groenendael",
|
226 |
+
"Malinois",
|
227 |
+
"Briard",
|
228 |
+
"Australian Kelpie",
|
229 |
+
"Komondor",
|
230 |
+
"Old English Sheepdog",
|
231 |
+
"Shetland Sheepdog",
|
232 |
+
"collie",
|
233 |
+
"Border Collie",
|
234 |
+
"Bouvier des Flandres",
|
235 |
+
"Rottweiler",
|
236 |
+
"German Shepherd Dog",
|
237 |
+
"Dobermann",
|
238 |
+
"Miniature Pinscher",
|
239 |
+
"Greater Swiss Mountain Dog",
|
240 |
+
"Bernese Mountain Dog",
|
241 |
+
"Appenzeller Sennenhund",
|
242 |
+
"Entlebucher Sennenhund",
|
243 |
+
"Boxer",
|
244 |
+
"Bullmastiff",
|
245 |
+
"Tibetan Mastiff",
|
246 |
+
"French Bulldog",
|
247 |
+
"Great Dane",
|
248 |
+
"St. Bernard",
|
249 |
+
"husky",
|
250 |
+
"Alaskan Malamute",
|
251 |
+
"Siberian Husky",
|
252 |
+
"Dalmatian",
|
253 |
+
"Affenpinscher",
|
254 |
+
"Basenji",
|
255 |
+
"pug",
|
256 |
+
"Leonberger",
|
257 |
+
"Newfoundland",
|
258 |
+
"Pyrenean Mountain Dog",
|
259 |
+
"Samoyed",
|
260 |
+
"Pomeranian",
|
261 |
+
"Chow Chow",
|
262 |
+
"Keeshond",
|
263 |
+
"Griffon Bruxellois",
|
264 |
+
"Pembroke Welsh Corgi",
|
265 |
+
"Cardigan Welsh Corgi",
|
266 |
+
"Toy Poodle",
|
267 |
+
"Miniature Poodle",
|
268 |
+
"Standard Poodle",
|
269 |
+
"Mexican hairless dog",
|
270 |
+
"grey wolf",
|
271 |
+
"Alaskan tundra wolf",
|
272 |
+
"red wolf",
|
273 |
+
"coyote",
|
274 |
+
"dingo",
|
275 |
+
"dhole",
|
276 |
+
"African wild dog",
|
277 |
+
"hyena",
|
278 |
+
"red fox",
|
279 |
+
"kit fox",
|
280 |
+
"Arctic fox",
|
281 |
+
"grey fox",
|
282 |
+
"tabby cat",
|
283 |
+
"tiger cat",
|
284 |
+
"Persian cat",
|
285 |
+
"Siamese cat",
|
286 |
+
"Egyptian Mau",
|
287 |
+
"cougar",
|
288 |
+
"lynx",
|
289 |
+
"leopard",
|
290 |
+
"snow leopard",
|
291 |
+
"jaguar",
|
292 |
+
"lion",
|
293 |
+
"tiger",
|
294 |
+
"cheetah",
|
295 |
+
"brown bear",
|
296 |
+
"American black bear",
|
297 |
+
"polar bear",
|
298 |
+
"sloth bear",
|
299 |
+
"mongoose",
|
300 |
+
"meerkat",
|
301 |
+
"tiger beetle",
|
302 |
+
"ladybug",
|
303 |
+
"ground beetle",
|
304 |
+
"longhorn beetle",
|
305 |
+
"leaf beetle",
|
306 |
+
"dung beetle",
|
307 |
+
"rhinoceros beetle",
|
308 |
+
"weevil",
|
309 |
+
"fly",
|
310 |
+
"bee",
|
311 |
+
"ant",
|
312 |
+
"grasshopper",
|
313 |
+
"cricket",
|
314 |
+
"stick insect",
|
315 |
+
"cockroach",
|
316 |
+
"mantis",
|
317 |
+
"cicada",
|
318 |
+
"leafhopper",
|
319 |
+
"lacewing",
|
320 |
+
"dragonfly",
|
321 |
+
"damselfly",
|
322 |
+
"red admiral",
|
323 |
+
"ringlet",
|
324 |
+
"monarch butterfly",
|
325 |
+
"small white",
|
326 |
+
"sulphur butterfly",
|
327 |
+
"gossamer-winged butterfly",
|
328 |
+
"starfish",
|
329 |
+
"sea urchin",
|
330 |
+
"sea cucumber",
|
331 |
+
"cottontail rabbit",
|
332 |
+
"hare",
|
333 |
+
"Angora rabbit",
|
334 |
+
"hamster",
|
335 |
+
"porcupine",
|
336 |
+
"fox squirrel",
|
337 |
+
"marmot",
|
338 |
+
"beaver",
|
339 |
+
"guinea pig",
|
340 |
+
"common sorrel",
|
341 |
+
"zebra",
|
342 |
+
"pig",
|
343 |
+
"wild boar",
|
344 |
+
"warthog",
|
345 |
+
"hippopotamus",
|
346 |
+
"ox",
|
347 |
+
"water buffalo",
|
348 |
+
"bison",
|
349 |
+
"ram",
|
350 |
+
"bighorn sheep",
|
351 |
+
"Alpine ibex",
|
352 |
+
"hartebeest",
|
353 |
+
"impala",
|
354 |
+
"gazelle",
|
355 |
+
"dromedary",
|
356 |
+
"llama",
|
357 |
+
"weasel",
|
358 |
+
"mink",
|
359 |
+
"European polecat",
|
360 |
+
"black-footed ferret",
|
361 |
+
"otter",
|
362 |
+
"skunk",
|
363 |
+
"badger",
|
364 |
+
"armadillo",
|
365 |
+
"three-toed sloth",
|
366 |
+
"orangutan",
|
367 |
+
"gorilla",
|
368 |
+
"chimpanzee",
|
369 |
+
"gibbon",
|
370 |
+
"siamang",
|
371 |
+
"guenon",
|
372 |
+
"patas monkey",
|
373 |
+
"baboon",
|
374 |
+
"macaque",
|
375 |
+
"langur",
|
376 |
+
"black-and-white colobus",
|
377 |
+
"proboscis monkey",
|
378 |
+
"marmoset",
|
379 |
+
"white-headed capuchin",
|
380 |
+
"howler monkey",
|
381 |
+
"titi",
|
382 |
+
"Geoffroy's spider monkey",
|
383 |
+
"common squirrel monkey",
|
384 |
+
"ring-tailed lemur",
|
385 |
+
"indri",
|
386 |
+
"Asian elephant",
|
387 |
+
"African bush elephant",
|
388 |
+
"red panda",
|
389 |
+
"giant panda",
|
390 |
+
"snoek",
|
391 |
+
"eel",
|
392 |
+
"coho salmon",
|
393 |
+
"rock beauty",
|
394 |
+
"clownfish",
|
395 |
+
"sturgeon",
|
396 |
+
"garfish",
|
397 |
+
"lionfish",
|
398 |
+
"pufferfish",
|
399 |
+
"abacus",
|
400 |
+
"abaya",
|
401 |
+
"academic gown",
|
402 |
+
"accordion",
|
403 |
+
"acoustic guitar",
|
404 |
+
"aircraft carrier",
|
405 |
+
"airliner",
|
406 |
+
"airship",
|
407 |
+
"altar",
|
408 |
+
"ambulance",
|
409 |
+
"amphibious vehicle",
|
410 |
+
"analog clock",
|
411 |
+
"apiary",
|
412 |
+
"apron",
|
413 |
+
"waste container",
|
414 |
+
"assault rifle",
|
415 |
+
"backpack",
|
416 |
+
"bakery",
|
417 |
+
"balance beam",
|
418 |
+
"balloon",
|
419 |
+
"ballpoint pen",
|
420 |
+
"Band-Aid",
|
421 |
+
"banjo",
|
422 |
+
"baluster",
|
423 |
+
"barbell",
|
424 |
+
"barber chair",
|
425 |
+
"barbershop",
|
426 |
+
"barn",
|
427 |
+
"barometer",
|
428 |
+
"barrel",
|
429 |
+
"wheelbarrow",
|
430 |
+
"baseball",
|
431 |
+
"basketball",
|
432 |
+
"bassinet",
|
433 |
+
"bassoon",
|
434 |
+
"swimming cap",
|
435 |
+
"bath towel",
|
436 |
+
"bathtub",
|
437 |
+
"station wagon",
|
438 |
+
"lighthouse",
|
439 |
+
"beaker",
|
440 |
+
"military cap",
|
441 |
+
"beer bottle",
|
442 |
+
"beer glass",
|
443 |
+
"bell-cot",
|
444 |
+
"bib",
|
445 |
+
"tandem bicycle",
|
446 |
+
"bikini",
|
447 |
+
"ring binder",
|
448 |
+
"binoculars",
|
449 |
+
"birdhouse",
|
450 |
+
"boathouse",
|
451 |
+
"bobsleigh",
|
452 |
+
"bolo tie",
|
453 |
+
"poke bonnet",
|
454 |
+
"bookcase",
|
455 |
+
"bookstore",
|
456 |
+
"bottle cap",
|
457 |
+
"bow",
|
458 |
+
"bow tie",
|
459 |
+
"brass",
|
460 |
+
"bra",
|
461 |
+
"breakwater",
|
462 |
+
"breastplate",
|
463 |
+
"broom",
|
464 |
+
"bucket",
|
465 |
+
"buckle",
|
466 |
+
"bulletproof vest",
|
467 |
+
"high-speed train",
|
468 |
+
"butcher shop",
|
469 |
+
"taxicab",
|
470 |
+
"cauldron",
|
471 |
+
"candle",
|
472 |
+
"cannon",
|
473 |
+
"canoe",
|
474 |
+
"can opener",
|
475 |
+
"cardigan",
|
476 |
+
"car mirror",
|
477 |
+
"carousel",
|
478 |
+
"tool kit",
|
479 |
+
"carton",
|
480 |
+
"car wheel",
|
481 |
+
"automated teller machine",
|
482 |
+
"cassette",
|
483 |
+
"cassette player",
|
484 |
+
"castle",
|
485 |
+
"catamaran",
|
486 |
+
"CD player",
|
487 |
+
"cello",
|
488 |
+
"mobile phone",
|
489 |
+
"chain",
|
490 |
+
"chain-link fence",
|
491 |
+
"chain mail",
|
492 |
+
"chainsaw",
|
493 |
+
"chest",
|
494 |
+
"chiffonier",
|
495 |
+
"chime",
|
496 |
+
"china cabinet",
|
497 |
+
"Christmas stocking",
|
498 |
+
"church",
|
499 |
+
"movie theater",
|
500 |
+
"cleaver",
|
501 |
+
"cliff dwelling",
|
502 |
+
"cloak",
|
503 |
+
"clogs",
|
504 |
+
"cocktail shaker",
|
505 |
+
"coffee mug",
|
506 |
+
"coffeemaker",
|
507 |
+
"coil",
|
508 |
+
"combination lock",
|
509 |
+
"computer keyboard",
|
510 |
+
"confectionery store",
|
511 |
+
"container ship",
|
512 |
+
"convertible",
|
513 |
+
"corkscrew",
|
514 |
+
"cornet",
|
515 |
+
"cowboy boot",
|
516 |
+
"cowboy hat",
|
517 |
+
"cradle",
|
518 |
+
"crane (machine)",
|
519 |
+
"crash helmet",
|
520 |
+
"crate",
|
521 |
+
"infant bed",
|
522 |
+
"Crock Pot",
|
523 |
+
"croquet ball",
|
524 |
+
"crutch",
|
525 |
+
"cuirass",
|
526 |
+
"dam",
|
527 |
+
"desk",
|
528 |
+
"desktop computer",
|
529 |
+
"rotary dial telephone",
|
530 |
+
"diaper",
|
531 |
+
"digital clock",
|
532 |
+
"digital watch",
|
533 |
+
"dining table",
|
534 |
+
"dishcloth",
|
535 |
+
"dishwasher",
|
536 |
+
"disc brake",
|
537 |
+
"dock",
|
538 |
+
"dog sled",
|
539 |
+
"dome",
|
540 |
+
"doormat",
|
541 |
+
"drilling rig",
|
542 |
+
"drum",
|
543 |
+
"drumstick",
|
544 |
+
"dumbbell",
|
545 |
+
"Dutch oven",
|
546 |
+
"electric fan",
|
547 |
+
"electric guitar",
|
548 |
+
"electric locomotive",
|
549 |
+
"entertainment center",
|
550 |
+
"envelope",
|
551 |
+
"espresso machine",
|
552 |
+
"face powder",
|
553 |
+
"feather boa",
|
554 |
+
"filing cabinet",
|
555 |
+
"fireboat",
|
556 |
+
"fire engine",
|
557 |
+
"fire screen sheet",
|
558 |
+
"flagpole",
|
559 |
+
"flute",
|
560 |
+
"folding chair",
|
561 |
+
"football helmet",
|
562 |
+
"forklift",
|
563 |
+
"fountain",
|
564 |
+
"fountain pen",
|
565 |
+
"four-poster bed",
|
566 |
+
"freight car",
|
567 |
+
"French horn",
|
568 |
+
"frying pan",
|
569 |
+
"fur coat",
|
570 |
+
"garbage truck",
|
571 |
+
"gas mask",
|
572 |
+
"gas pump",
|
573 |
+
"goblet",
|
574 |
+
"go-kart",
|
575 |
+
"golf ball",
|
576 |
+
"golf cart",
|
577 |
+
"gondola",
|
578 |
+
"gong",
|
579 |
+
"gown",
|
580 |
+
"grand piano",
|
581 |
+
"greenhouse",
|
582 |
+
"grille",
|
583 |
+
"grocery store",
|
584 |
+
"guillotine",
|
585 |
+
"barrette",
|
586 |
+
"hair spray",
|
587 |
+
"half-track",
|
588 |
+
"hammer",
|
589 |
+
"hamper",
|
590 |
+
"hair dryer",
|
591 |
+
"hand-held computer",
|
592 |
+
"handkerchief",
|
593 |
+
"hard disk drive",
|
594 |
+
"harmonica",
|
595 |
+
"harp",
|
596 |
+
"harvester",
|
597 |
+
"hatchet",
|
598 |
+
"holster",
|
599 |
+
"home theater",
|
600 |
+
"honeycomb",
|
601 |
+
"hook",
|
602 |
+
"hoop skirt",
|
603 |
+
"horizontal bar",
|
604 |
+
"horse-drawn vehicle",
|
605 |
+
"hourglass",
|
606 |
+
"iPod",
|
607 |
+
"clothes iron",
|
608 |
+
"jack-o'-lantern",
|
609 |
+
"jeans",
|
610 |
+
"jeep",
|
611 |
+
"T-shirt",
|
612 |
+
"jigsaw puzzle",
|
613 |
+
"pulled rickshaw",
|
614 |
+
"joystick",
|
615 |
+
"kimono",
|
616 |
+
"knee pad",
|
617 |
+
"knot",
|
618 |
+
"lab coat",
|
619 |
+
"ladle",
|
620 |
+
"lampshade",
|
621 |
+
"laptop computer",
|
622 |
+
"lawn mower",
|
623 |
+
"lens cap",
|
624 |
+
"paper knife",
|
625 |
+
"library",
|
626 |
+
"lifeboat",
|
627 |
+
"lighter",
|
628 |
+
"limousine",
|
629 |
+
"ocean liner",
|
630 |
+
"lipstick",
|
631 |
+
"slip-on shoe",
|
632 |
+
"lotion",
|
633 |
+
"speaker",
|
634 |
+
"loupe",
|
635 |
+
"sawmill",
|
636 |
+
"magnetic compass",
|
637 |
+
"mail bag",
|
638 |
+
"mailbox",
|
639 |
+
"tights",
|
640 |
+
"tank suit",
|
641 |
+
"manhole cover",
|
642 |
+
"maraca",
|
643 |
+
"marimba",
|
644 |
+
"mask",
|
645 |
+
"match",
|
646 |
+
"maypole",
|
647 |
+
"maze",
|
648 |
+
"measuring cup",
|
649 |
+
"medicine chest",
|
650 |
+
"megalith",
|
651 |
+
"microphone",
|
652 |
+
"microwave oven",
|
653 |
+
"military uniform",
|
654 |
+
"milk can",
|
655 |
+
"minibus",
|
656 |
+
"miniskirt",
|
657 |
+
"minivan",
|
658 |
+
"missile",
|
659 |
+
"mitten",
|
660 |
+
"mixing bowl",
|
661 |
+
"mobile home",
|
662 |
+
"Model T",
|
663 |
+
"modem",
|
664 |
+
"monastery",
|
665 |
+
"monitor",
|
666 |
+
"moped",
|
667 |
+
"mortar",
|
668 |
+
"square academic cap",
|
669 |
+
"mosque",
|
670 |
+
"mosquito net",
|
671 |
+
"scooter",
|
672 |
+
"mountain bike",
|
673 |
+
"tent",
|
674 |
+
"computer mouse",
|
675 |
+
"mousetrap",
|
676 |
+
"moving van",
|
677 |
+
"muzzle",
|
678 |
+
"nail",
|
679 |
+
"neck brace",
|
680 |
+
"necklace",
|
681 |
+
"nipple",
|
682 |
+
"notebook computer",
|
683 |
+
"obelisk",
|
684 |
+
"oboe",
|
685 |
+
"ocarina",
|
686 |
+
"odometer",
|
687 |
+
"oil filter",
|
688 |
+
"organ",
|
689 |
+
"oscilloscope",
|
690 |
+
"overskirt",
|
691 |
+
"bullock cart",
|
692 |
+
"oxygen mask",
|
693 |
+
"packet",
|
694 |
+
"paddle",
|
695 |
+
"paddle wheel",
|
696 |
+
"padlock",
|
697 |
+
"paintbrush",
|
698 |
+
"pajamas",
|
699 |
+
"palace",
|
700 |
+
"pan flute",
|
701 |
+
"paper towel",
|
702 |
+
"parachute",
|
703 |
+
"parallel bars",
|
704 |
+
"park bench",
|
705 |
+
"parking meter",
|
706 |
+
"passenger car",
|
707 |
+
"patio",
|
708 |
+
"payphone",
|
709 |
+
"pedestal",
|
710 |
+
"pencil case",
|
711 |
+
"pencil sharpener",
|
712 |
+
"perfume",
|
713 |
+
"Petri dish",
|
714 |
+
"photocopier",
|
715 |
+
"plectrum",
|
716 |
+
"Pickelhaube",
|
717 |
+
"picket fence",
|
718 |
+
"pickup truck",
|
719 |
+
"pier",
|
720 |
+
"piggy bank",
|
721 |
+
"pill bottle",
|
722 |
+
"pillow",
|
723 |
+
"ping-pong ball",
|
724 |
+
"pinwheel",
|
725 |
+
"pirate ship",
|
726 |
+
"pitcher",
|
727 |
+
"hand plane",
|
728 |
+
"planetarium",
|
729 |
+
"plastic bag",
|
730 |
+
"plate rack",
|
731 |
+
"plow",
|
732 |
+
"plunger",
|
733 |
+
"Polaroid camera",
|
734 |
+
"pole",
|
735 |
+
"police van",
|
736 |
+
"poncho",
|
737 |
+
"billiard table",
|
738 |
+
"soda bottle",
|
739 |
+
"pot",
|
740 |
+
"potter's wheel",
|
741 |
+
"power drill",
|
742 |
+
"prayer rug",
|
743 |
+
"printer",
|
744 |
+
"prison",
|
745 |
+
"projectile",
|
746 |
+
"projector",
|
747 |
+
"hockey puck",
|
748 |
+
"punching bag",
|
749 |
+
"purse",
|
750 |
+
"quill",
|
751 |
+
"quilt",
|
752 |
+
"race car",
|
753 |
+
"racket",
|
754 |
+
"radiator",
|
755 |
+
"radio",
|
756 |
+
"radio telescope",
|
757 |
+
"rain barrel",
|
758 |
+
"recreational vehicle",
|
759 |
+
"reel",
|
760 |
+
"reflex camera",
|
761 |
+
"refrigerator",
|
762 |
+
"remote control",
|
763 |
+
"restaurant",
|
764 |
+
"revolver",
|
765 |
+
"rifle",
|
766 |
+
"rocking chair",
|
767 |
+
"rotisserie",
|
768 |
+
"eraser",
|
769 |
+
"rugby ball",
|
770 |
+
"ruler",
|
771 |
+
"running shoe",
|
772 |
+
"safe",
|
773 |
+
"safety pin",
|
774 |
+
"salt shaker",
|
775 |
+
"sandal",
|
776 |
+
"sarong",
|
777 |
+
"saxophone",
|
778 |
+
"scabbard",
|
779 |
+
"weighing scale",
|
780 |
+
"school bus",
|
781 |
+
"schooner",
|
782 |
+
"scoreboard",
|
783 |
+
"CRT screen",
|
784 |
+
"screw",
|
785 |
+
"screwdriver",
|
786 |
+
"seat belt",
|
787 |
+
"sewing machine",
|
788 |
+
"shield",
|
789 |
+
"shoe store",
|
790 |
+
"shoji",
|
791 |
+
"shopping basket",
|
792 |
+
"shopping cart",
|
793 |
+
"shovel",
|
794 |
+
"shower cap",
|
795 |
+
"shower curtain",
|
796 |
+
"ski",
|
797 |
+
"ski mask",
|
798 |
+
"sleeping bag",
|
799 |
+
"slide rule",
|
800 |
+
"sliding door",
|
801 |
+
"slot machine",
|
802 |
+
"snorkel",
|
803 |
+
"snowmobile",
|
804 |
+
"snowplow",
|
805 |
+
"soap dispenser",
|
806 |
+
"soccer ball",
|
807 |
+
"sock",
|
808 |
+
"solar thermal collector",
|
809 |
+
"sombrero",
|
810 |
+
"soup bowl",
|
811 |
+
"space bar",
|
812 |
+
"space heater",
|
813 |
+
"space shuttle",
|
814 |
+
"spatula",
|
815 |
+
"motorboat",
|
816 |
+
"spider web",
|
817 |
+
"spindle",
|
818 |
+
"sports car",
|
819 |
+
"spotlight",
|
820 |
+
"stage",
|
821 |
+
"steam locomotive",
|
822 |
+
"through arch bridge",
|
823 |
+
"steel drum",
|
824 |
+
"stethoscope",
|
825 |
+
"scarf",
|
826 |
+
"stone wall",
|
827 |
+
"stopwatch",
|
828 |
+
"stove",
|
829 |
+
"strainer",
|
830 |
+
"tram",
|
831 |
+
"stretcher",
|
832 |
+
"couch",
|
833 |
+
"stupa",
|
834 |
+
"submarine",
|
835 |
+
"suit",
|
836 |
+
"sundial",
|
837 |
+
"sunglass",
|
838 |
+
"sunglasses",
|
839 |
+
"sunscreen",
|
840 |
+
"suspension bridge",
|
841 |
+
"mop",
|
842 |
+
"sweatshirt",
|
843 |
+
"swimsuit",
|
844 |
+
"swing",
|
845 |
+
"switch",
|
846 |
+
"syringe",
|
847 |
+
"table lamp",
|
848 |
+
"tank",
|
849 |
+
"tape player",
|
850 |
+
"teapot",
|
851 |
+
"teddy bear",
|
852 |
+
"television",
|
853 |
+
"tennis ball",
|
854 |
+
"thatched roof",
|
855 |
+
"front curtain",
|
856 |
+
"thimble",
|
857 |
+
"threshing machine",
|
858 |
+
"throne",
|
859 |
+
"tile roof",
|
860 |
+
"toaster",
|
861 |
+
"tobacco shop",
|
862 |
+
"toilet seat",
|
863 |
+
"torch",
|
864 |
+
"totem pole",
|
865 |
+
"tow truck",
|
866 |
+
"toy store",
|
867 |
+
"tractor",
|
868 |
+
"semi-trailer truck",
|
869 |
+
"tray",
|
870 |
+
"trench coat",
|
871 |
+
"tricycle",
|
872 |
+
"trimaran",
|
873 |
+
"tripod",
|
874 |
+
"triumphal arch",
|
875 |
+
"trolleybus",
|
876 |
+
"trombone",
|
877 |
+
"tub",
|
878 |
+
"turnstile",
|
879 |
+
"typewriter keyboard",
|
880 |
+
"umbrella",
|
881 |
+
"unicycle",
|
882 |
+
"upright piano",
|
883 |
+
"vacuum cleaner",
|
884 |
+
"vase",
|
885 |
+
"vault",
|
886 |
+
"velvet",
|
887 |
+
"vending machine",
|
888 |
+
"vestment",
|
889 |
+
"viaduct",
|
890 |
+
"violin",
|
891 |
+
"volleyball",
|
892 |
+
"waffle iron",
|
893 |
+
"wall clock",
|
894 |
+
"wallet",
|
895 |
+
"wardrobe",
|
896 |
+
"military aircraft",
|
897 |
+
"sink",
|
898 |
+
"washing machine",
|
899 |
+
"water bottle",
|
900 |
+
"water jug",
|
901 |
+
"water tower",
|
902 |
+
"whiskey jug",
|
903 |
+
"whistle",
|
904 |
+
"wig",
|
905 |
+
"window screen",
|
906 |
+
"window shade",
|
907 |
+
"Windsor tie",
|
908 |
+
"wine bottle",
|
909 |
+
"wing",
|
910 |
+
"wok",
|
911 |
+
"wooden spoon",
|
912 |
+
"wool",
|
913 |
+
"split-rail fence",
|
914 |
+
"shipwreck",
|
915 |
+
"yawl",
|
916 |
+
"yurt",
|
917 |
+
"website",
|
918 |
+
"comic book",
|
919 |
+
"crossword",
|
920 |
+
"traffic sign",
|
921 |
+
"traffic light",
|
922 |
+
"dust jacket",
|
923 |
+
"menu",
|
924 |
+
"plate",
|
925 |
+
"guacamole",
|
926 |
+
"consomme",
|
927 |
+
"hot pot",
|
928 |
+
"trifle",
|
929 |
+
"ice cream",
|
930 |
+
"ice pop",
|
931 |
+
"baguette",
|
932 |
+
"bagel",
|
933 |
+
"pretzel",
|
934 |
+
"cheeseburger",
|
935 |
+
"hot dog",
|
936 |
+
"mashed potato",
|
937 |
+
"cabbage",
|
938 |
+
"broccoli",
|
939 |
+
"cauliflower",
|
940 |
+
"zucchini",
|
941 |
+
"spaghetti squash",
|
942 |
+
"acorn squash",
|
943 |
+
"butternut squash",
|
944 |
+
"cucumber",
|
945 |
+
"artichoke",
|
946 |
+
"bell pepper",
|
947 |
+
"cardoon",
|
948 |
+
"mushroom",
|
949 |
+
"Granny Smith",
|
950 |
+
"strawberry",
|
951 |
+
"orange",
|
952 |
+
"lemon",
|
953 |
+
"fig",
|
954 |
+
"pineapple",
|
955 |
+
"banana",
|
956 |
+
"jackfruit",
|
957 |
+
"custard apple",
|
958 |
+
"pomegranate",
|
959 |
+
"hay",
|
960 |
+
"carbonara",
|
961 |
+
"chocolate syrup",
|
962 |
+
"dough",
|
963 |
+
"meatloaf",
|
964 |
+
"pizza",
|
965 |
+
"pot pie",
|
966 |
+
"burrito",
|
967 |
+
"red wine",
|
968 |
+
"espresso",
|
969 |
+
"cup",
|
970 |
+
"eggnog",
|
971 |
+
"alp",
|
972 |
+
"bubble",
|
973 |
+
"cliff",
|
974 |
+
"coral reef",
|
975 |
+
"geyser",
|
976 |
+
"lakeshore",
|
977 |
+
"promontory",
|
978 |
+
"shoal",
|
979 |
+
"seashore",
|
980 |
+
"valley",
|
981 |
+
"volcano",
|
982 |
+
"baseball player",
|
983 |
+
"bridegroom",
|
984 |
+
"scuba diver",
|
985 |
+
"rapeseed",
|
986 |
+
"daisy",
|
987 |
+
"yellow lady's slipper",
|
988 |
+
"corn",
|
989 |
+
"acorn",
|
990 |
+
"rose hip",
|
991 |
+
"horse chestnut seed",
|
992 |
+
"coral fungus",
|
993 |
+
"agaric",
|
994 |
+
"gyromitra",
|
995 |
+
"stinkhorn mushroom",
|
996 |
+
"earth star",
|
997 |
+
"hen-of-the-woods",
|
998 |
+
"bolete",
|
999 |
+
"ear",
|
1000 |
+
"toilet paper"]
|
labels/imagenet21k_wordnet_lemmas.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model.py
ADDED
@@ -0,0 +1,359 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import numpy as np
|
3 |
+
import torch.nn as nn
|
4 |
+
import torch.nn.functional as F
|
5 |
+
|
6 |
+
from resnet import StdConv2d
|
7 |
+
from utils import (get_width_and_height_from_size, load_pretrained_weights,
|
8 |
+
get_model_params)
|
9 |
+
|
10 |
+
VALID_MODELS = ('ViT-B_16', 'ViT-B_32', 'ViT-L_16', 'ViT-L_32', 'R50+ViT-B_16')
|
11 |
+
|
12 |
+
|
13 |
+
class PositionEmbs(nn.Module):
|
14 |
+
def __init__(self, num_patches, emb_dim, dropout_rate=0.1):
|
15 |
+
super(PositionEmbs, self).__init__()
|
16 |
+
self.pos_embedding = nn.Parameter(
|
17 |
+
torch.randn(1, num_patches + 1, emb_dim))
|
18 |
+
if dropout_rate > 0:
|
19 |
+
self.dropout = nn.Dropout(dropout_rate)
|
20 |
+
else:
|
21 |
+
self.dropout = None
|
22 |
+
|
23 |
+
def forward(self, x):
|
24 |
+
out = x + self.pos_embedding
|
25 |
+
|
26 |
+
if self.dropout:
|
27 |
+
out = self.dropout(out)
|
28 |
+
|
29 |
+
return out
|
30 |
+
|
31 |
+
|
32 |
+
class MlpBlock(nn.Module):
|
33 |
+
""" Transformer Feed-Forward Block """
|
34 |
+
def __init__(self, in_dim, mlp_dim, out_dim, dropout_rate=0.1):
|
35 |
+
super(MlpBlock, self).__init__()
|
36 |
+
|
37 |
+
# init layers
|
38 |
+
self.fc1 = nn.Linear(in_dim, mlp_dim)
|
39 |
+
self.fc2 = nn.Linear(mlp_dim, out_dim)
|
40 |
+
self.act = nn.GELU()
|
41 |
+
if dropout_rate > 0.0:
|
42 |
+
self.dropout1 = nn.Dropout(dropout_rate)
|
43 |
+
self.dropout2 = nn.Dropout(dropout_rate)
|
44 |
+
else:
|
45 |
+
self.dropout1 = None
|
46 |
+
self.dropout2 = None
|
47 |
+
|
48 |
+
def forward(self, x):
|
49 |
+
|
50 |
+
out = self.fc1(x)
|
51 |
+
out = self.act(out)
|
52 |
+
if self.dropout1:
|
53 |
+
out = self.dropout1(out)
|
54 |
+
|
55 |
+
out = self.fc2(out)
|
56 |
+
out = self.dropout2(out)
|
57 |
+
return out
|
58 |
+
|
59 |
+
|
60 |
+
class LinearGeneral(nn.Module):
|
61 |
+
def __init__(self, in_dim=(768, ), feat_dim=(12, 64)):
|
62 |
+
super(LinearGeneral, self).__init__()
|
63 |
+
|
64 |
+
self.weight = nn.Parameter(torch.randn(*in_dim, *feat_dim))
|
65 |
+
self.bias = nn.Parameter(torch.zeros(*feat_dim))
|
66 |
+
|
67 |
+
def forward(self, x, dims):
|
68 |
+
a = torch.tensordot(x, self.weight, dims=dims) + self.bias
|
69 |
+
return a
|
70 |
+
|
71 |
+
|
72 |
+
class SelfAttention(nn.Module):
|
73 |
+
def __init__(self, in_dim, heads=8, dropout_rate=0.1):
|
74 |
+
super(SelfAttention, self).__init__()
|
75 |
+
self.heads = heads
|
76 |
+
self.head_dim = in_dim // heads
|
77 |
+
self.scale = self.head_dim**0.5
|
78 |
+
|
79 |
+
self.query = LinearGeneral((in_dim, ), (self.heads, self.head_dim))
|
80 |
+
self.key = LinearGeneral((in_dim, ), (self.heads, self.head_dim))
|
81 |
+
self.value = LinearGeneral((in_dim, ), (self.heads, self.head_dim))
|
82 |
+
self.out = LinearGeneral((self.heads, self.head_dim), (in_dim, ))
|
83 |
+
|
84 |
+
if dropout_rate > 0:
|
85 |
+
self.dropout = nn.Dropout(dropout_rate)
|
86 |
+
else:
|
87 |
+
self.dropout = None
|
88 |
+
|
89 |
+
def forward(self, x):
|
90 |
+
b, n, _ = x.shape
|
91 |
+
|
92 |
+
q = self.query(x, dims=([2], [0]))
|
93 |
+
k = self.key(x, dims=([2], [0]))
|
94 |
+
v = self.value(x, dims=([2], [0]))
|
95 |
+
|
96 |
+
q = q.permute(0, 2, 1, 3)
|
97 |
+
k = k.permute(0, 2, 1, 3)
|
98 |
+
v = v.permute(0, 2, 1, 3)
|
99 |
+
|
100 |
+
attn_weights = torch.matmul(q, k.transpose(-2, -1)) / self.scale
|
101 |
+
attn_weights = F.softmax(attn_weights, dim=-1)
|
102 |
+
out = torch.matmul(attn_weights, v)
|
103 |
+
out = out.permute(0, 2, 1, 3)
|
104 |
+
|
105 |
+
out = self.out(out, dims=([2, 3], [0, 1]))
|
106 |
+
|
107 |
+
return out
|
108 |
+
|
109 |
+
|
110 |
+
class EncoderBlock(nn.Module):
|
111 |
+
def __init__(self,
|
112 |
+
in_dim,
|
113 |
+
mlp_dim,
|
114 |
+
num_heads,
|
115 |
+
dropout_rate=0.1,
|
116 |
+
attn_dropout_rate=0.1):
|
117 |
+
super(EncoderBlock, self).__init__()
|
118 |
+
|
119 |
+
self.norm1 = nn.LayerNorm(in_dim)
|
120 |
+
self.attn = SelfAttention(in_dim,
|
121 |
+
heads=num_heads,
|
122 |
+
dropout_rate=attn_dropout_rate)
|
123 |
+
if dropout_rate > 0:
|
124 |
+
self.dropout = nn.Dropout(dropout_rate)
|
125 |
+
else:
|
126 |
+
self.dropout = None
|
127 |
+
self.norm2 = nn.LayerNorm(in_dim)
|
128 |
+
self.mlp = MlpBlock(in_dim, mlp_dim, in_dim, dropout_rate)
|
129 |
+
|
130 |
+
def forward(self, x):
|
131 |
+
residual = x
|
132 |
+
out = self.norm1(x)
|
133 |
+
out = self.attn(out)
|
134 |
+
if self.dropout:
|
135 |
+
out = self.dropout(out)
|
136 |
+
out += residual
|
137 |
+
residual = out
|
138 |
+
|
139 |
+
out = self.norm2(out)
|
140 |
+
out = self.mlp(out)
|
141 |
+
out += residual
|
142 |
+
return out
|
143 |
+
|
144 |
+
|
145 |
+
class Encoder(nn.Module):
|
146 |
+
def __init__(self,
|
147 |
+
num_patches,
|
148 |
+
emb_dim,
|
149 |
+
mlp_dim,
|
150 |
+
num_layers=12,
|
151 |
+
num_heads=12,
|
152 |
+
dropout_rate=0.1,
|
153 |
+
attn_dropout_rate=0.0):
|
154 |
+
super(Encoder, self).__init__()
|
155 |
+
|
156 |
+
# positional embedding
|
157 |
+
self.pos_embedding = PositionEmbs(num_patches, emb_dim, dropout_rate)
|
158 |
+
|
159 |
+
# encoder blocks
|
160 |
+
in_dim = emb_dim
|
161 |
+
self.encoder_layers = nn.ModuleList()
|
162 |
+
for i in range(num_layers):
|
163 |
+
layer = EncoderBlock(in_dim, mlp_dim, num_heads, dropout_rate,
|
164 |
+
attn_dropout_rate)
|
165 |
+
self.encoder_layers.append(layer)
|
166 |
+
self.norm = nn.LayerNorm(in_dim)
|
167 |
+
|
168 |
+
def forward(self, x):
|
169 |
+
out = self.pos_embedding(x)
|
170 |
+
|
171 |
+
for layer in self.encoder_layers:
|
172 |
+
out = layer(out)
|
173 |
+
|
174 |
+
out = self.norm(out)
|
175 |
+
return out
|
176 |
+
|
177 |
+
|
178 |
+
class VisionTransformer(nn.Module):
|
179 |
+
""" Vision Transformer.
|
180 |
+
Most easily loaded with the .from_name or .from_pretrained methods.
|
181 |
+
Args:
|
182 |
+
params (namedtuple): A set of Params.
|
183 |
+
References:
|
184 |
+
[1] https://arxiv.org/abs/2010.11929 (An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale)
|
185 |
+
Example:
|
186 |
+
|
187 |
+
|
188 |
+
import torch
|
189 |
+
>>> from vision_transformer_pytorch import VisionTransformer
|
190 |
+
>>> inputs = torch.rand(1, 3, 256, 256)
|
191 |
+
>>> model = VisionTransformer.from_pretrained('ViT-B_16')
|
192 |
+
>>> model.eval()
|
193 |
+
>>> outputs = model(inputs)
|
194 |
+
"""
|
195 |
+
def __init__(self, params=None):
|
196 |
+
super(VisionTransformer, self).__init__()
|
197 |
+
self._params = params
|
198 |
+
|
199 |
+
if self._params.resnet:
|
200 |
+
self.resnet = self._params.resnet()
|
201 |
+
self.embedding = nn.Conv2d(self.resnet.width * 16,
|
202 |
+
self._params.emb_dim,
|
203 |
+
kernel_size=1,
|
204 |
+
stride=1)
|
205 |
+
else:
|
206 |
+
self.embedding = nn.Conv2d(3,
|
207 |
+
self._params.emb_dim,
|
208 |
+
kernel_size=self.patch_size,
|
209 |
+
stride=self.patch_size)
|
210 |
+
# class token
|
211 |
+
self.cls_token = nn.Parameter(torch.zeros(1, 1, self._params.emb_dim))
|
212 |
+
|
213 |
+
# transformer
|
214 |
+
self.transformer = Encoder(
|
215 |
+
num_patches=self.num_patches,
|
216 |
+
emb_dim=self._params.emb_dim,
|
217 |
+
mlp_dim=self._params.mlp_dim,
|
218 |
+
num_layers=self._params.num_layers,
|
219 |
+
num_heads=self._params.num_heads,
|
220 |
+
dropout_rate=self._params.dropout_rate,
|
221 |
+
attn_dropout_rate=self._params.attn_dropout_rate)
|
222 |
+
|
223 |
+
# classfier
|
224 |
+
self.classifier = nn.Linear(self._params.emb_dim,
|
225 |
+
self._params.num_classes)
|
226 |
+
|
227 |
+
@property
|
228 |
+
def image_size(self):
|
229 |
+
return get_width_and_height_from_size(self._params.image_size)
|
230 |
+
|
231 |
+
@property
|
232 |
+
def patch_size(self):
|
233 |
+
return get_width_and_height_from_size(self._params.patch_size)
|
234 |
+
|
235 |
+
@property
|
236 |
+
def num_patches(self):
|
237 |
+
h, w = self.image_size
|
238 |
+
fh, fw = self.patch_size
|
239 |
+
if hasattr(self, 'resnet'):
|
240 |
+
gh, gw = h // fh // self.resnet.downsample, w // fw // self.resnet.downsample
|
241 |
+
else:
|
242 |
+
gh, gw = h // fh, w // fw
|
243 |
+
return gh * gw
|
244 |
+
|
245 |
+
def extract_features(self, x):
|
246 |
+
if hasattr(self, 'resnet'):
|
247 |
+
x = self.resnet(x)
|
248 |
+
|
249 |
+
emb = self.embedding(x) # (n, c, gh, gw)
|
250 |
+
emb = emb.permute(0, 2, 3, 1) # (n, gh, hw, c)
|
251 |
+
b, h, w, c = emb.shape
|
252 |
+
emb = emb.reshape(b, h * w, c)
|
253 |
+
|
254 |
+
# prepend class token
|
255 |
+
cls_token = self.cls_token.repeat(b, 1, 1)
|
256 |
+
emb = torch.cat([cls_token, emb], dim=1)
|
257 |
+
|
258 |
+
# transformer
|
259 |
+
feat = self.transformer(emb)
|
260 |
+
return feat
|
261 |
+
|
262 |
+
def forward(self, x):
|
263 |
+
feat = self.extract_features(x)
|
264 |
+
|
265 |
+
# classifier
|
266 |
+
logits = self.classifier(feat[:, 0])
|
267 |
+
return logits
|
268 |
+
|
269 |
+
@classmethod
|
270 |
+
def from_name(cls, model_name, in_channels=3, **override_params):
|
271 |
+
"""create an vision transformer model according to name.
|
272 |
+
Args:
|
273 |
+
model_name (str): Name for vision transformer.
|
274 |
+
in_channels (int): Input data's channel number.
|
275 |
+
override_params (other key word params):
|
276 |
+
Params to override model's global_params.
|
277 |
+
Optional key:
|
278 |
+
'image_size', 'patch_size',
|
279 |
+
'emb_dim', 'mlp_dim',
|
280 |
+
'num_heads', 'num_layers',
|
281 |
+
'num_classes', 'attn_dropout_rate',
|
282 |
+
'dropout_rate'
|
283 |
+
Returns:
|
284 |
+
An vision transformer model.
|
285 |
+
"""
|
286 |
+
cls._check_model_name_is_valid(model_name)
|
287 |
+
params = get_model_params(model_name, override_params)
|
288 |
+
model = cls(params)
|
289 |
+
model._change_in_channels(in_channels)
|
290 |
+
return model
|
291 |
+
|
292 |
+
@classmethod
|
293 |
+
def from_pretrained(cls,
|
294 |
+
model_name,
|
295 |
+
weights_path=None,
|
296 |
+
in_channels=3,
|
297 |
+
num_classes=1000,
|
298 |
+
**override_params):
|
299 |
+
"""create an vision transformer model according to name.
|
300 |
+
Args:
|
301 |
+
model_name (str): Name for vision transformer.
|
302 |
+
weights_path (None or str):
|
303 |
+
str: path to pretrained weights file on the local disk.
|
304 |
+
None: use pretrained weights downloaded from the Internet.
|
305 |
+
in_channels (int): Input data's channel number.
|
306 |
+
num_classes (int):
|
307 |
+
Number of categories for classification.
|
308 |
+
It controls the output size for final linear layer.
|
309 |
+
override_params (other key word params):
|
310 |
+
Params to override model's global_params.
|
311 |
+
Optional key:
|
312 |
+
'image_size', 'patch_size',
|
313 |
+
'emb_dim', 'mlp_dim',
|
314 |
+
'num_heads', 'num_layers',
|
315 |
+
'num_classes', 'attn_dropout_rate',
|
316 |
+
'dropout_rate'
|
317 |
+
Returns:
|
318 |
+
A pretrained vision transformer model.
|
319 |
+
"""
|
320 |
+
model = cls.from_name(model_name,
|
321 |
+
num_classes=num_classes,
|
322 |
+
**override_params)
|
323 |
+
load_pretrained_weights(model,
|
324 |
+
model_name,
|
325 |
+
weights_path=weights_path,
|
326 |
+
load_fc=(num_classes == 1000))
|
327 |
+
model._change_in_channels(in_channels)
|
328 |
+
return model
|
329 |
+
|
330 |
+
@classmethod
|
331 |
+
def _check_model_name_is_valid(cls, model_name):
|
332 |
+
"""Validates model name.
|
333 |
+
Args:
|
334 |
+
model_name (str): Name for vision transformer.
|
335 |
+
Returns:
|
336 |
+
bool: Is a valid name or not.
|
337 |
+
"""
|
338 |
+
if model_name not in VALID_MODELS:
|
339 |
+
raise ValueError('model_name should be one of: ' +
|
340 |
+
', '.join(VALID_MODELS))
|
341 |
+
|
342 |
+
def _change_in_channels(self, in_channels):
|
343 |
+
"""Adjust model's first convolution layer to in_channels, if in_channels not equals 3.
|
344 |
+
Args:
|
345 |
+
in_channels (int): Input data's channel number.
|
346 |
+
"""
|
347 |
+
if in_channels != 3:
|
348 |
+
if hasattr(self, 'resnet'):
|
349 |
+
self.resnet.root['conv'] = StdConv2d(in_channels,
|
350 |
+
self.resnet.width,
|
351 |
+
kernel_size=7,
|
352 |
+
stride=2,
|
353 |
+
bias=False,
|
354 |
+
padding=3)
|
355 |
+
else:
|
356 |
+
self.embedding = nn.Conv2d(in_channels,
|
357 |
+
self._params.emb_dim,
|
358 |
+
kernel_size=self.patch_size,
|
359 |
+
stride=self.patch_size)
|
pretrained_weights/ViT-B_16_imagenet21k_imagenet2012.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33cadfad17534e3bf51a17fe31561bbf8e650f17801cd715e71804254c1e8ef3
|
3 |
+
size 347471723
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
torch==1.13.1
|
2 |
+
torchvision==0.14.1
|
3 |
+
gradio==3.16.2
|
resnet.py
ADDED
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch.nn.functional as F
|
4 |
+
|
5 |
+
from os.path import join as pjoin
|
6 |
+
from collections import OrderedDict
|
7 |
+
|
8 |
+
|
9 |
+
def weight_standardize(w, dim, eps):
|
10 |
+
"""Subtracts mean and divides by standard deviation."""
|
11 |
+
w = w - torch.mean(w, dim=dim)
|
12 |
+
w = w / (torch.std(w, dim=dim) + eps)
|
13 |
+
return w
|
14 |
+
|
15 |
+
|
16 |
+
def np2th(weights, conv=False):
|
17 |
+
"""Possibly convert HWIO to OIHW."""
|
18 |
+
if conv:
|
19 |
+
weights = weights.transpose([3, 2, 0, 1])
|
20 |
+
return torch.from_numpy(weights)
|
21 |
+
|
22 |
+
|
23 |
+
class StdConv2d(nn.Conv2d):
|
24 |
+
def forward(self, x):
|
25 |
+
w = weight_standardize(self.weight, [0, 1, 2], 1e-5)
|
26 |
+
return F.conv2d(x, w, self.bias, self.stride, self.padding,
|
27 |
+
self.dilation, self.groups)
|
28 |
+
|
29 |
+
|
30 |
+
def conv3x3(in_channels, out_channels, stride=1, groups=1, bias=False):
|
31 |
+
return StdConv2d(in_channels,
|
32 |
+
out_channels,
|
33 |
+
kernel_size=3,
|
34 |
+
stride=stride,
|
35 |
+
padding=1,
|
36 |
+
bias=bias,
|
37 |
+
groups=groups)
|
38 |
+
|
39 |
+
|
40 |
+
def conv1x1(in_channels, out_channels, stride=1, bias=False):
|
41 |
+
return StdConv2d(in_channels,
|
42 |
+
out_channels,
|
43 |
+
kernel_size=1,
|
44 |
+
stride=stride,
|
45 |
+
padding=0,
|
46 |
+
bias=bias)
|
47 |
+
|
48 |
+
|
49 |
+
class PreActBottleneck(nn.Module):
|
50 |
+
"""Pre-activation (v2) bottleneck block.
|
51 |
+
"""
|
52 |
+
def __init__(self,
|
53 |
+
in_channels,
|
54 |
+
out_channels=None,
|
55 |
+
mid_channels=None,
|
56 |
+
stride=1):
|
57 |
+
super().__init__()
|
58 |
+
out_channels = out_channels or in_channels
|
59 |
+
mid_channels = mid_channels or out_channels // 4
|
60 |
+
|
61 |
+
self.gn1 = nn.GroupNorm(32, mid_channels, eps=1e-6)
|
62 |
+
self.conv1 = conv1x1(in_channels, mid_channels, bias=False)
|
63 |
+
self.gn2 = nn.GroupNorm(32, mid_channels, eps=1e-6)
|
64 |
+
self.conv2 = conv3x3(mid_channels, mid_channels, stride,
|
65 |
+
bias=False) # Original code has it on conv1!!
|
66 |
+
self.gn3 = nn.GroupNorm(32, out_channels, eps=1e-6)
|
67 |
+
self.conv3 = conv1x1(mid_channels, out_channels, bias=False)
|
68 |
+
self.relu = nn.ReLU(inplace=True)
|
69 |
+
|
70 |
+
if (stride != 1 or in_channels != out_channels):
|
71 |
+
# Projection also with pre-activation according to paper.
|
72 |
+
self.downsample = conv1x1(in_channels,
|
73 |
+
out_channels,
|
74 |
+
stride,
|
75 |
+
bias=False)
|
76 |
+
self.gn_proj = nn.GroupNorm(out_channels, out_channels)
|
77 |
+
|
78 |
+
def forward(self, x):
|
79 |
+
|
80 |
+
# Residual branch
|
81 |
+
residual = x
|
82 |
+
if hasattr(self, 'downsample'):
|
83 |
+
residual = self.downsample(x)
|
84 |
+
residual = self.gn_proj(residual)
|
85 |
+
|
86 |
+
# Unit's branch
|
87 |
+
y = self.relu(self.gn1(self.conv1(x)))
|
88 |
+
y = self.relu(self.gn2(self.conv2(y)))
|
89 |
+
y = self.gn3(self.conv3(y))
|
90 |
+
|
91 |
+
y = self.relu(residual + y)
|
92 |
+
return y
|
93 |
+
|
94 |
+
|
95 |
+
class ResNetV2(nn.Module):
|
96 |
+
"""Implementation of Pre-activation (v2) ResNet mode."""
|
97 |
+
def __init__(self, block_units, width_factor):
|
98 |
+
super().__init__()
|
99 |
+
width = int(64 * width_factor)
|
100 |
+
self.width = width
|
101 |
+
self.downsample = 16 # four stride=2 conv2d layer
|
102 |
+
|
103 |
+
# The following will be unreadable if we split lines.
|
104 |
+
# pylint: disable=line-too-long
|
105 |
+
self.root = nn.Sequential(
|
106 |
+
OrderedDict([('conv',
|
107 |
+
StdConv2d(3,
|
108 |
+
width,
|
109 |
+
kernel_size=7,
|
110 |
+
stride=2,
|
111 |
+
bias=False,
|
112 |
+
padding=3)),
|
113 |
+
('gn', nn.GroupNorm(32, width, eps=1e-6)),
|
114 |
+
('relu', nn.ReLU(inplace=True)),
|
115 |
+
('pool',
|
116 |
+
nn.MaxPool2d(kernel_size=3, stride=2, padding=0))]))
|
117 |
+
|
118 |
+
self.body = nn.Sequential(
|
119 |
+
OrderedDict([
|
120 |
+
('block1',
|
121 |
+
nn.Sequential(
|
122 |
+
OrderedDict([('unit1',
|
123 |
+
PreActBottleneck(in_channels=width,
|
124 |
+
out_channels=width * 4,
|
125 |
+
mid_channels=width))] +
|
126 |
+
[(f'unit{i:d}',
|
127 |
+
PreActBottleneck(in_channels=width * 4,
|
128 |
+
out_channels=width * 4,
|
129 |
+
mid_channels=width))
|
130 |
+
for i in range(2, block_units[0] + 1)], ))),
|
131 |
+
('block2',
|
132 |
+
nn.Sequential(
|
133 |
+
OrderedDict([('unit1',
|
134 |
+
PreActBottleneck(in_channels=width * 4,
|
135 |
+
out_channels=width * 8,
|
136 |
+
mid_channels=width * 2,
|
137 |
+
stride=2))] +
|
138 |
+
[(f'unit{i:d}',
|
139 |
+
PreActBottleneck(in_channels=width * 8,
|
140 |
+
out_channels=width * 8,
|
141 |
+
mid_channels=width * 2))
|
142 |
+
for i in range(2, block_units[1] + 1)], ))),
|
143 |
+
('block3',
|
144 |
+
nn.Sequential(
|
145 |
+
OrderedDict([('unit1',
|
146 |
+
PreActBottleneck(in_channels=width * 8,
|
147 |
+
out_channels=width * 16,
|
148 |
+
mid_channels=width * 4,
|
149 |
+
stride=2))] +
|
150 |
+
[(f'unit{i:d}',
|
151 |
+
PreActBottleneck(in_channels=width * 16,
|
152 |
+
out_channels=width * 16,
|
153 |
+
mid_channels=width * 4))
|
154 |
+
for i in range(2, block_units[2] + 1)], ))),
|
155 |
+
]))
|
156 |
+
|
157 |
+
def forward(self, x):
|
158 |
+
x = self.root(x)
|
159 |
+
x = self.body(x)
|
160 |
+
return x
|
161 |
+
|
162 |
+
|
163 |
+
def resnet50():
|
164 |
+
return ResNetV2(block_units=(3, 4, 9), width_factor=1)
|
utils.py
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import math
|
3 |
+
import torch
|
4 |
+
import collections
|
5 |
+
|
6 |
+
from torch import nn
|
7 |
+
from functools import partial
|
8 |
+
from torch.utils import model_zoo
|
9 |
+
from torch.nn import functional as F
|
10 |
+
|
11 |
+
from resnet import resnet50
|
12 |
+
|
13 |
+
################################################################################
|
14 |
+
### Help functions for model architecture
|
15 |
+
################################################################################
|
16 |
+
|
17 |
+
# Params: namedtuple
|
18 |
+
# get_width_and_height_from_size and calculate_output_image_size
|
19 |
+
|
20 |
+
# Parameters for the entire model (stem, all blocks, and head)
|
21 |
+
Params = collections.namedtuple('Params', [
|
22 |
+
'image_size', 'patch_size', 'emb_dim', 'mlp_dim', 'num_heads', 'num_layers',
|
23 |
+
'num_classes', 'attn_dropout_rate', 'dropout_rate', 'resnet'
|
24 |
+
])
|
25 |
+
|
26 |
+
# Set Params and BlockArgs's defaults
|
27 |
+
Params.__new__.__defaults__ = (None, ) * len(Params._fields)
|
28 |
+
|
29 |
+
|
30 |
+
def get_width_and_height_from_size(x):
|
31 |
+
"""Obtain height and width from x.
|
32 |
+
Args:
|
33 |
+
x (int, tuple or list): Data size.
|
34 |
+
Returns:
|
35 |
+
size: A tuple or list (H,W).
|
36 |
+
"""
|
37 |
+
if isinstance(x, int):
|
38 |
+
return x, x
|
39 |
+
if isinstance(x, list) or isinstance(x, tuple):
|
40 |
+
return x
|
41 |
+
else:
|
42 |
+
raise TypeError()
|
43 |
+
|
44 |
+
|
45 |
+
################################################################################
|
46 |
+
### Helper functions for loading model params
|
47 |
+
################################################################################
|
48 |
+
|
49 |
+
# get_model_params and efficientnet:
|
50 |
+
# Functions to get BlockArgs and GlobalParams for efficientnet
|
51 |
+
# url_map and url_map_advprop: Dicts of url_map for pretrained weights
|
52 |
+
# load_pretrained_weights: A function to load pretrained weights
|
53 |
+
|
54 |
+
|
55 |
+
def vision_transformer(model_name):
|
56 |
+
"""Create Params for vision transformer model.
|
57 |
+
Args:
|
58 |
+
model_name (str): Model name to be queried.
|
59 |
+
Returns:
|
60 |
+
Params(params_dict[model_name])
|
61 |
+
"""
|
62 |
+
|
63 |
+
params_dict = {
|
64 |
+
'ViT-B_16': (384, 16, 768, 3072, 12, 12, 1000, 0.0, 0.1, None),
|
65 |
+
'ViT-B_32': (384, 32, 768, 3072, 12, 12, 1000, 0.0, 0.1, None),
|
66 |
+
'ViT-L_16': (384, 16, 1024, 4096, 16, 24, 1000, 0.0, 0.1, None),
|
67 |
+
'ViT-L_32': (384, 32, 1024, 4096, 16, 24, 1000, 0.0, 0.1, None),
|
68 |
+
'R50+ViT-B_16': (384, 1, 768, 3072, 12, 12, 1000, 0.0, 0.1, resnet50),
|
69 |
+
}
|
70 |
+
image_size, patch_size, emb_dim, mlp_dim, num_heads, num_layers, num_classes, attn_dropout_rate, dropout_rate, resnet = params_dict[
|
71 |
+
model_name]
|
72 |
+
params = Params(image_size=image_size,
|
73 |
+
patch_size=patch_size,
|
74 |
+
emb_dim=emb_dim,
|
75 |
+
mlp_dim=mlp_dim,
|
76 |
+
num_heads=num_heads,
|
77 |
+
num_layers=num_layers,
|
78 |
+
num_classes=num_classes,
|
79 |
+
attn_dropout_rate=attn_dropout_rate,
|
80 |
+
dropout_rate=dropout_rate,
|
81 |
+
resnet=resnet)
|
82 |
+
|
83 |
+
return params
|
84 |
+
|
85 |
+
|
86 |
+
def get_model_params(model_name, override_params):
|
87 |
+
"""Get the block args and global params for a given model name.
|
88 |
+
Args:
|
89 |
+
model_name (str): Model's name.
|
90 |
+
override_params (dict): A dict to modify params.
|
91 |
+
Returns:
|
92 |
+
params
|
93 |
+
"""
|
94 |
+
params = vision_transformer(model_name)
|
95 |
+
|
96 |
+
if override_params:
|
97 |
+
# ValueError will be raised here if override_params has fields not included in params.
|
98 |
+
params = params._replace(**override_params)
|
99 |
+
return params
|
100 |
+
|
101 |
+
|
102 |
+
# train with Standard methods
|
103 |
+
# check more details in paper(An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale)
|
104 |
+
url_map = {
|
105 |
+
'ViT-B_16':
|
106 |
+
'https://github.com/tczhangzhi/VisionTransformer-PyTorch/releases/download/1.0.1/ViT-B_16_imagenet21k_imagenet2012.pth',
|
107 |
+
'ViT-B_32':
|
108 |
+
'https://github.com/tczhangzhi/VisionTransformer-PyTorch/releases/download/1.0.1/ViT-B_32_imagenet21k_imagenet2012.pth',
|
109 |
+
'ViT-L_16':
|
110 |
+
'https://github.com/tczhangzhi/VisionTransformer-PyTorch/releases/download/1.0.1/ViT-L_16_imagenet21k_imagenet2012.pth',
|
111 |
+
'ViT-L_32':
|
112 |
+
'https://github.com/tczhangzhi/VisionTransformer-PyTorch/releases/download/1.0.1/ViT-L_32_imagenet21k_imagenet2012.pth',
|
113 |
+
'R50+ViT-B_16':
|
114 |
+
'https://github.com/tczhangzhi/VisionTransformer-PyTorch/releases/download/1.0.1/R50+ViT-B_16_imagenet21k_imagenet2012.pth',
|
115 |
+
}
|
116 |
+
|
117 |
+
|
118 |
+
def load_pretrained_weights(model,
|
119 |
+
model_name,
|
120 |
+
weights_path=None,
|
121 |
+
load_fc=True,
|
122 |
+
advprop=False):
|
123 |
+
"""Loads pretrained weights from weights path or download using url.
|
124 |
+
Args:
|
125 |
+
model (Module): The whole model of vision transformer.
|
126 |
+
model_name (str): Model name of vision transformer.
|
127 |
+
weights_path (None or str):
|
128 |
+
str: path to pretrained weights file on the local disk.
|
129 |
+
None: use pretrained weights downloaded from the Internet.
|
130 |
+
load_fc (bool): Whether to load pretrained weights for fc layer at the end of the model.
|
131 |
+
"""
|
132 |
+
if isinstance(weights_path, str):
|
133 |
+
state_dict = torch.load(weights_path)
|
134 |
+
else:
|
135 |
+
state_dict = model_zoo.load_url(url_map[model_name])
|
136 |
+
|
137 |
+
if load_fc:
|
138 |
+
ret = model.load_state_dict(state_dict, strict=False)
|
139 |
+
assert not ret.missing_keys, 'Missing keys when loading pretrained weights: {}'.format(
|
140 |
+
ret.missing_keys)
|
141 |
+
else:
|
142 |
+
state_dict.pop('classifier.weight')
|
143 |
+
state_dict.pop('classifier.bias')
|
144 |
+
ret = model.load_state_dict(state_dict, strict=False)
|
145 |
+
assert set(ret.missing_keys) == set([
|
146 |
+
'classifier.weight', 'classifier.bias'
|
147 |
+
]), 'Missing keys when loading pretrained weights: {}'.format(
|
148 |
+
ret.missing_keys)
|
149 |
+
assert not ret.unexpected_keys, 'Missing keys when loading pretrained weights: {}'.format(
|
150 |
+
ret.unexpected_keys)
|
151 |
+
|
152 |
+
print('Loaded pretrained weights for {}'.format(model_name))
|