Spaces:

TexR6
/

AttentionMaps

Sleeping

App Files Files Community

TexR6 commited on Feb 3, 2023

Commit

d7b0f75

•

1 Parent(s): 2d5dff2

initial commit

Browse files

Files changed (15) hide show

__init__.py +7 -0
app.py +124 -0
capture_weights.py +326 -0
examples/carrier.jpg +0 -0
examples/chicken.jpg +0 -0
examples/eagle.jpg +0 -0
examples/parachute.jpg +0 -0
labels/imagenet1K_labels.txt +1000 -0
labels/imagenet1k-simple-labels.json +1000 -0
labels/imagenet21k_wordnet_lemmas.txt +0 -0
model.py +359 -0
pretrained_weights/ViT-B_16_imagenet21k_imagenet2012.pth +3 -0
requirements.txt +3 -0
resnet.py +164 -0
utils.py +152 -0

__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+__version__ = "1.0.3"
+from .model import VisionTransformer, VALID_MODELS
+from .utils import (
+    Params,
+    vision_transformer,
+    get_model_params,
+)

app.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import os
+import PIL
+import ast
+import cv2
+import json
+import torch
+import pickle
+import torchvision
+import numpy as np
+import gradio as gr
+from PIL import Image
+from typing import Tuple, Dict
+import matplotlib.pyplot as plt
+from timeit import default_timer as timer
+from torchvision import datasets, transforms
+import warnings
+warnings.filterwarnings('ignore')
+example_list = [["examples/" + example] for example in os.listdir("examples")]
+with open('labels/imagenet1k-simple-labels.json') as f:
+    class_names = json.load(f)
+from model import VisionTransformer
+from capture_weights import vit_weights
+vision_transformer = VisionTransformer.from_name('ViT-B_16', num_classes=1000)
+model_weights = torch.load('pretrained_weights/ViT-B_16_imagenet21k_imagenet2012.pth',
+                           map_location=torch.device('cpu'))
+vision_transformer.load_state_dict(model_weights)
+data_transforms = transforms.Compose([
+    transforms.Resize(size=(384, 384)),
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                         std=[0.229, 0.224, 0.225],)])
+def inv_normalize(tensor):
+    """Normalize an image tensor back to the 0-255 range."""
+    tensor = (tensor - tensor.min()) / (tensor.max() - tensor.min()) * (256 - 1e-5)
+    return tensor
+def inv_transform(tensor, normalize=True):
+    """Convert a tensor back to an image."""
+    tensor = inv_normalize(tensor)
+    array = tensor.detach().cpu().numpy()
+    array = array.transpose(1, 2, 0).astype(np.uint8)
+    return PIL.Image.fromarray(array)
+def predict_image(image) -> Tuple[Dict, float]:
+    """Return prediction classes with probabilities for an input image."""
+    input_tensor = data_transforms(image)
+    start_time = timer()
+    prediction_dict = {}
+    with torch.inference_mode():
+        [logits] = vision_transformer(input_tensor[None])
+        probs = torch.softmax(logits, dim=0)
+        topk_prob, topk_id = torch.topk(probs, 3)
+        for i in range(topk_prob.size(0)):
+            prediction_dict[class_names[topk_id[i]]] = topk_prob[i].item()
+    prediction_time = round(timer() - start_time, 5)
+    return prediction_dict, prediction_time
+def get_attention_map(img, num_layer=5, get_mask=False):
+    x = data_transforms(img)
+    logits, att_mat = vit_weights(x.unsqueeze(0))
+    att_mat = torch.stack(att_mat).squeeze(1)
+    # Take the mean of the attention weights across 12 heads
+    att_mat = torch.mean(att_mat, dim=1)
+    # To account for residual connections, we add an identity matrix to the
+    # attention matrix and re-normalize the weights.
+    residual_att = torch.eye(att_mat.size(1))
+    aug_att_mat = att_mat + residual_att
+    aug_att_mat = aug_att_mat / aug_att_mat.sum(dim=-1).unsqueeze(-1)
+    # Recursively multiply the weight matrices
+    joint_attentions = torch.zeros(aug_att_mat.size())
+    joint_attentions[0] = aug_att_mat[0]
+    for n in range(1, aug_att_mat.size(0)):
+        joint_attentions[n] = torch.matmul(aug_att_mat[n], joint_attentions[n-1])
+    v = joint_attentions[num_layer]
+    grid_size = int(np.sqrt(aug_att_mat.size(-1)))
+    mask = v[0, 1:].reshape(grid_size, grid_size).detach().numpy()
+    if get_mask:
+        attn_map = cv2.resize(mask / mask.max(), img.size)
+    else:
+        mask = cv2.resize(mask / mask.max(), img.size)[..., np.newaxis]
+        attn_map = (mask * img).astype("uint8")
+    return attn_map
+attention_interface = gr.Interface(
+    fn=get_attention_map,
+    inputs=[gr.Image(type="pil", label="Image"),
+            gr.Dropdown(choices=["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
+                        label="Attention Layer", value="6", type="index"),
+            gr.Checkbox(label="Show Mask?")],
+    outputs=gr.Image(type="pil", label="Attention Map").style(height=400),
+    examples=example_list,
+    title="Attention Maps 🔍",
+    description="The ViT Base architecture has 12 transformer Encoder layers (12 attention heads in each).",
+    article="From the dropdown menu, select the Encoder layer (tick the checkbox to visualize only the mask)."
+)
+classification_interface = gr.Interface(
+    fn=predict_image,
+    inputs=gr.Image(type="pil", label="Image"),
+    outputs=[gr.Label(num_top_classes=3, label="Predictions"),
+              gr.Number(label="Prediction time (secs)")],
+    examples=example_list,
+    title="Object Identification ✅",
+    description="ImageNet object identification using pretrained ViT Base (Patch Size: 16 | Image Size: 384) architecture.",
+    article="Upload an image from the example list or choose one of your own [[ImageNet Classes](https://github.com/anishathalye/imagenet-simple-labels/blob/master/imagenet-simple-labels.json)]."
+)
+demo = gr.TabbedInterface([attention_interface, classification_interface],
+                          ["Visualize Attention Maps", "Image Prediction"], title="ImageNet 1K 📷")
+if __name__ == "__main__":
+    demo.launch()

capture_weights.py ADDED Viewed

	@@ -0,0 +1,326 @@

+import torch
+import numpy as np
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.cuda.amp import autocast
+from utils import (get_width_and_height_from_size, load_pretrained_weights, get_model_params)
+VALID_MODELS = ('ViT-B_16', 'ViT-B_32', 'ViT-L_16', 'ViT-L_32')
+class PositionEmbs(nn.Module):
+    def __init__(self, num_patches, emb_dim, dropout_rate=0.1):
+        super(PositionEmbs, self).__init__()
+        self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, emb_dim))
+        if dropout_rate > 0:
+            self.dropout = nn.Dropout(dropout_rate)
+        else:
+            self.dropout = None
+    @autocast()
+    def forward(self, x):
+        out = x + self.pos_embedding
+        if self.dropout:
+            out = self.dropout(out)
+        return out
+class MlpBlock(nn.Module):
+    """ Transformer Feed-Forward Block """
+    def __init__(self, in_dim, mlp_dim, out_dim, dropout_rate=0.1):
+        super(MlpBlock, self).__init__()
+        # init layers
+        self.fc1 = nn.Linear(in_dim, mlp_dim)
+        self.fc2 = nn.Linear(mlp_dim, out_dim)
+        self.act = nn.GELU()
+        if dropout_rate > 0.0:
+            self.dropout1 = nn.Dropout(dropout_rate)
+            self.dropout2 = nn.Dropout(dropout_rate)
+        else:
+            self.dropout1 = None
+            self.dropout2 = None
+    @autocast()
+    def forward(self, x):
+        out = self.fc1(x)
+        out = self.act(out)
+        if self.dropout1:
+            out = self.dropout1(out)
+        out = self.fc2(out)
+        out = self.dropout2(out)
+        return out
+class LinearGeneral(nn.Module):
+    def __init__(self, in_dim=(768, ), feat_dim=(12, 64)):
+        super(LinearGeneral, self).__init__()
+        self.weight = nn.Parameter(torch.randn(*in_dim, *feat_dim))
+        self.bias = nn.Parameter(torch.zeros(*feat_dim))
+    @autocast()
+    def forward(self, x, dims):
+        a = torch.tensordot(x, self.weight, dims=dims) + self.bias
+        return a
+class SelfAttention(nn.Module):
+    def __init__(self, in_dim, heads=8, dropout_rate=0.1):
+        super(SelfAttention, self).__init__()
+        self.heads = heads
+        self.head_dim = in_dim // heads
+        self.scale = self.head_dim**0.5
+        self.query = LinearGeneral((in_dim, ), (self.heads, self.head_dim))
+        self.key = LinearGeneral((in_dim, ), (self.heads, self.head_dim))
+        self.value = LinearGeneral((in_dim, ), (self.heads, self.head_dim))
+        self.out = LinearGeneral((self.heads, self.head_dim), (in_dim, ))
+        if dropout_rate > 0:
+            self.dropout = nn.Dropout(dropout_rate)
+        else:
+            self.dropout = None
+    @autocast()
+    def forward(self, x):
+        b, n, _ = x.shape
+        q = self.query(x, dims=([2], [0]))
+        k = self.key(x, dims=([2], [0]))
+        v = self.value(x, dims=([2], [0]))
+        q = q.permute(0, 2, 1, 3)
+        k = k.permute(0, 2, 1, 3)
+        v = v.permute(0, 2, 1, 3)
+        attn_weights = torch.matmul(q, k.transpose(-2, -1)) / self.scale
+        attn_weights = F.softmax(attn_weights, dim=-1)
+        out = torch.matmul(attn_weights, v)
+        out = out.permute(0, 2, 1, 3)
+        out = self.out(out, dims=([2, 3], [0, 1]))
+        return out, attn_weights
+class EncoderBlock(nn.Module):
+    def __init__(self, in_dim, mlp_dim, num_heads, dropout_rate=0.1, attn_dropout_rate=0.1):
+        super(EncoderBlock, self).__init__()
+        self.norm1 = nn.LayerNorm(in_dim)
+        self.attn = SelfAttention(in_dim, heads=num_heads, dropout_rate=attn_dropout_rate)
+        if dropout_rate > 0:
+            self.dropout = nn.Dropout(dropout_rate)
+        else:
+            self.dropout = None
+        self.norm2 = nn.LayerNorm(in_dim)
+        self.mlp = MlpBlock(in_dim, mlp_dim, in_dim, dropout_rate)
+    @autocast()
+    def forward(self, x):
+        residual = x
+        out = self.norm1(x)
+        out, attn_weights = self.attn(out)
+        if self.dropout:
+            out = self.dropout(out)
+        out += residual
+        residual = out
+        out = self.norm2(out)
+        out = self.mlp(out)
+        out += residual
+        return out, attn_weights
+class Encoder(nn.Module):
+    def __init__(self,
+                 num_patches,
+                 emb_dim,
+                 mlp_dim,
+                 num_layers=12,
+                 num_heads=12,
+                 dropout_rate=0.1,
+                 attn_dropout_rate=0.0):
+        super(Encoder, self).__init__()
+        # positional embedding
+        self.pos_embedding = PositionEmbs(num_patches, emb_dim, dropout_rate)
+        # encoder blocks
+        in_dim = emb_dim
+        self.encoder_layers = nn.ModuleList()
+        for i in range(num_layers):
+            layer = EncoderBlock(in_dim, mlp_dim, num_heads, dropout_rate, attn_dropout_rate)
+            self.encoder_layers.append(layer)
+        self.norm = nn.LayerNorm(in_dim)
+    @autocast()
+    def forward(self, x):
+        attn_weights = []
+        out = self.pos_embedding(x)
+        for layer in self.encoder_layers:
+            out, weights = layer(out)
+            attn_weights.append(weights)
+        out = self.norm(out)
+        return out, attn_weights
+class VisionTransformer(nn.Module):
+    """ Vision Transformer.
+        Most easily loaded with the .from_name or .from_pretrained methods.
+        Args:
+            params (namedtuple): A set of Params.
+        References:
+            [1] https://arxiv.org/abs/2010.11929 (An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale)
+        Example:
+            import torch
+            >>> from vision_transformer_pytorch import VisionTransformer
+            >>> inputs = torch.rand(1, 3, 256, 256)
+            >>> model = VisionTransformer.from_pretrained('ViT-B_16')
+            >>> model.eval()
+            >>> outputs = model(inputs)
+    """
+    def __init__(self, params=None):
+        super(VisionTransformer, self).__init__()
+        self._params = params
+        self.embedding = nn.Conv2d(3, self._params.emb_dim, kernel_size=self.patch_size, stride=self.patch_size)
+        # class token
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, self._params.emb_dim))
+        # transformer
+        self.transformer = Encoder(num_patches=self.num_patches,
+                                   emb_dim=self._params.emb_dim,
+                                   mlp_dim=self._params.mlp_dim,
+                                   num_layers=self._params.num_layers,
+                                   num_heads=self._params.num_heads,
+                                   dropout_rate=self._params.dropout_rate,
+                                   attn_dropout_rate=self._params.attn_dropout_rate)
+        # classfier
+        self.classifier = nn.Linear(self._params.emb_dim, self._params.num_classes)
+    @property
+    def image_size(self):
+        return get_width_and_height_from_size(self._params.image_size)
+    @property
+    def patch_size(self):
+        return get_width_and_height_from_size(self._params.patch_size)
+    @property
+    def num_patches(self):
+        h, w = self.image_size
+        fh, fw = self.patch_size
+        gh, gw = h // fh, w // fw
+        return gh * gw
+    @autocast()
+    def extract_features(self, x):
+        emb = self.embedding(x)  # (n, c, gh, gw)
+        emb = emb.permute(0, 2, 3, 1)  # (n, gh, hw, c)
+        b, h, w, c = emb.shape
+        emb = emb.reshape(b, h * w, c)
+        # prepend class token
+        cls_token = self.cls_token.repeat(b, 1, 1)
+        emb = torch.cat([cls_token, emb], dim=1)
+        # transformer
+        feat, attn_weights = self.transformer(emb)
+        return feat, attn_weights
+    @autocast()
+    def forward(self, x):
+        feat, attn_weights = self.extract_features(x)
+        # classifier
+        logits = self.classifier(feat[:, 0])
+        return logits, attn_weights
+    @classmethod
+    def from_name(cls, model_name, in_channels=3, **override_params):
+        """create an vision transformer model according to name.
+        Args:
+            model_name (str): Name for vision transformer.
+            in_channels (int): Input data's channel number.
+            override_params (other key word params):
+                Params to override model's global_params.
+                Optional key:
+                    'image_size', 'patch_size',
+                    'emb_dim', 'mlp_dim',
+                    'num_heads', 'num_layers',
+                    'num_classes', 'attn_dropout_rate',
+                    'dropout_rate'
+        Returns:
+            An vision transformer model.
+        """
+        cls._check_model_name_is_valid(model_name)
+        params = get_model_params(model_name, override_params)
+        model = cls(params)
+        model._change_in_channels(in_channels)
+        return model
+    @classmethod
+    def from_pretrained(cls, model_name, weights_path=None, in_channels=3, num_classes=1000, **override_params):
+        """create an vision transformer model according to name.
+        Args:
+            model_name (str): Name for vision transformer.
+            weights_path (None or str):
+                str: path to pretrained weights file on the local disk.
+                None: use pretrained weights downloaded from the Internet.
+            in_channels (int): Input data's channel number.
+            num_classes (int):
+                Number of categories for classification.
+                It controls the output size for final linear layer.
+            override_params (other key word params):
+                Params to override model's global_params.
+                Optional key:
+                    'image_size', 'patch_size',
+                    'emb_dim', 'mlp_dim',
+                    'num_heads', 'num_layers',
+                    'num_classes', 'attn_dropout_rate',
+                    'dropout_rate'
+        Returns:
+            A pretrained vision transformer model.
+        """
+        model = cls.from_name(model_name, num_classes=num_classes, **override_params)
+        load_pretrained_weights(model, model_name, weights_path=weights_path, load_fc=(num_classes == 1000))
+        model._change_in_channels(in_channels)
+        return model
+    @classmethod
+    def _check_model_name_is_valid(cls, model_name):
+        """Validates model name.
+        Args:
+            model_name (str): Name for vision transformer.
+        Returns:
+            bool: Is a valid name or not.
+        """
+        if model_name not in VALID_MODELS:
+            raise ValueError('model_name should be one of: ' + ', '.join(VALID_MODELS))
+    def _change_in_channels(self, in_channels):
+        """Adjust model's first convolution layer to in_channels, if in_channels not equals 3.
+        Args:
+            in_channels (int): Input data's channel number.
+        """
+        if in_channels != 3:
+            self.embedding = nn.Conv2d(in_channels,
+                                       self._params.emb_dim,
+                                       kernel_size=self.patch_size,
+                                       stride=self.patch_size)
+vit_weights = VisionTransformer.from_name('ViT-B_16', num_classes=1000)
+model_weights = torch.load('pretrained_weights/ViT-B_16_imagenet21k_imagenet2012.pth',
+                           map_location=torch.device('cpu'))
+vit_weights.load_state_dict(model_weights)

examples/carrier.jpg ADDED Viewed

examples/chicken.jpg ADDED Viewed

examples/eagle.jpg ADDED Viewed

examples/parachute.jpg ADDED Viewed

labels/imagenet1K_labels.txt ADDED Viewed

	@@ -0,0 +1,1000 @@

+{0: 'tench, Tinca tinca',
+ 1: 'goldfish, Carassius auratus',
+ 2: 'great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias',
+ 3: 'tiger shark, Galeocerdo cuvieri',
+ 4: 'hammerhead, hammerhead shark',
+ 5: 'electric ray, crampfish, numbfish, torpedo',
+ 6: 'stingray',
+ 7: 'cock',
+ 8: 'hen',
+ 9: 'ostrich, Struthio camelus',
+ 10: 'brambling, Fringilla montifringilla',
+ 11: 'goldfinch, Carduelis carduelis',
+ 12: 'house finch, linnet, Carpodacus mexicanus',
+ 13: 'junco, snowbird',
+ 14: 'indigo bunting, indigo finch, indigo bird, Passerina cyanea',
+ 15: 'robin, American robin, Turdus migratorius',
+ 16: 'bulbul',
+ 17: 'jay',
+ 18: 'magpie',
+ 19: 'chickadee',
+ 20: 'water ouzel, dipper',
+ 21: 'kite',
+ 22: 'bald eagle, American eagle, Haliaeetus leucocephalus',
+ 23: 'vulture',
+ 24: 'great grey owl, great gray owl, Strix nebulosa',
+ 25: 'European fire salamander, Salamandra salamandra',
+ 26: 'common newt, Triturus vulgaris',
+ 27: 'eft',
+ 28: 'spotted salamander, Ambystoma maculatum',
+ 29: 'axolotl, mud puppy, Ambystoma mexicanum',
+ 30: 'bullfrog, Rana catesbeiana',
+ 31: 'tree frog, tree-frog',
+ 32: 'tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui',
+ 33: 'loggerhead, loggerhead turtle, Caretta caretta',
+ 34: 'leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea',
+ 35: 'mud turtle',
+ 36: 'terrapin',
+ 37: 'box turtle, box tortoise',
+ 38: 'banded gecko',
+ 39: 'common iguana, iguana, Iguana iguana',
+ 40: 'American chameleon, anole, Anolis carolinensis',
+ 41: 'whiptail, whiptail lizard',
+ 42: 'agama',
+ 43: 'frilled lizard, Chlamydosaurus kingi',
+ 44: 'alligator lizard',
+ 45: 'Gila monster, Heloderma suspectum',
+ 46: 'green lizard, Lacerta viridis',
+ 47: 'African chameleon, Chamaeleo chamaeleon',
+ 48: 'Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis',
+ 49: 'African crocodile, Nile crocodile, Crocodylus niloticus',
+ 50: 'American alligator, Alligator mississipiensis',
+ 51: 'triceratops',
+ 52: 'thunder snake, worm snake, Carphophis amoenus',
+ 53: 'ringneck snake, ring-necked snake, ring snake',
+ 54: 'hognose snake, puff adder, sand viper',
+ 55: 'green snake, grass snake',
+ 56: 'king snake, kingsnake',
+ 57: 'garter snake, grass snake',
+ 58: 'water snake',
+ 59: 'vine snake',
+ 60: 'night snake, Hypsiglena torquata',
+ 61: 'boa constrictor, Constrictor constrictor',
+ 62: 'rock python, rock snake, Python sebae',
+ 63: 'Indian cobra, Naja naja',
+ 64: 'green mamba',
+ 65: 'sea snake',
+ 66: 'horned viper, cerastes, sand viper, horned asp, Cerastes cornutus',
+ 67: 'diamondback, diamondback rattlesnake, Crotalus adamanteus',
+ 68: 'sidewinder, horned rattlesnake, Crotalus cerastes',
+ 69: 'trilobite',
+ 70: 'harvestman, daddy longlegs, Phalangium opilio',
+ 71: 'scorpion',
+ 72: 'black and gold garden spider, Argiope aurantia',
+ 73: 'barn spider, Araneus cavaticus',
+ 74: 'garden spider, Aranea diademata',
+ 75: 'black widow, Latrodectus mactans',
+ 76: 'tarantula',
+ 77: 'wolf spider, hunting spider',
+ 78: 'tick',
+ 79: 'centipede',
+ 80: 'black grouse',
+ 81: 'ptarmigan',
+ 82: 'ruffed grouse, partridge, Bonasa umbellus',
+ 83: 'prairie chicken, prairie grouse, prairie fowl',
+ 84: 'peacock',
+ 85: 'quail',
+ 86: 'partridge',
+ 87: 'African grey, African gray, Psittacus erithacus',
+ 88: 'macaw',
+ 89: 'sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita',
+ 90: 'lorikeet',
+ 91: 'coucal',
+ 92: 'bee eater',
+ 93: 'hornbill',
+ 94: 'hummingbird',
+ 95: 'jacamar',
+ 96: 'toucan',
+ 97: 'drake',
+ 98: 'red-breasted merganser, Mergus serrator',
+ 99: 'goose',
+ 100: 'black swan, Cygnus atratus',
+ 101: 'tusker',
+ 102: 'echidna, spiny anteater, anteater',
+ 103: 'platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus',
+ 104: 'wallaby, brush kangaroo',
+ 105: 'koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus',
+ 106: 'wombat',
+ 107: 'jellyfish',
+ 108: 'sea anemone, anemone',
+ 109: 'brain coral',
+ 110: 'flatworm, platyhelminth',
+ 111: 'nematode, nematode worm, roundworm',
+ 112: 'conch',
+ 113: 'snail',
+ 114: 'slug',
+ 115: 'sea slug, nudibranch',
+ 116: 'chiton, coat-of-mail shell, sea cradle, polyplacophore',
+ 117: 'chambered nautilus, pearly nautilus, nautilus',
+ 118: 'Dungeness crab, Cancer magister',
+ 119: 'rock crab, Cancer irroratus',
+ 120: 'fiddler crab',
+ 121: 'king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica',
+ 122: 'American lobster, Northern lobster, Maine lobster, Homarus americanus',
+ 123: 'spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish',
+ 124: 'crayfish, crawfish, crawdad, crawdaddy',
+ 125: 'hermit crab',
+ 126: 'isopod',
+ 127: 'white stork, Ciconia ciconia',
+ 128: 'black stork, Ciconia nigra',
+ 129: 'spoonbill',
+ 130: 'flamingo',
+ 131: 'little blue heron, Egretta caerulea',
+ 132: 'American egret, great white heron, Egretta albus',
+ 133: 'bittern',
+ 134: 'crane',
+ 135: 'limpkin, Aramus pictus',
+ 136: 'European gallinule, Porphyrio porphyrio',
+ 137: 'American coot, marsh hen, mud hen, water hen, Fulica americana',
+ 138: 'bustard',
+ 139: 'ruddy turnstone, Arenaria interpres',
+ 140: 'red-backed sandpiper, dunlin, Erolia alpina',
+ 141: 'redshank, Tringa totanus',
+ 142: 'dowitcher',
+ 143: 'oystercatcher, oyster catcher',
+ 144: 'pelican',
+ 145: 'king penguin, Aptenodytes patagonica',
+ 146: 'albatross, mollymawk',
+ 147: 'grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus',
+ 148: 'killer whale, killer, orca, grampus, sea wolf, Orcinus orca',
+ 149: 'dugong, Dugong dugon',
+ 150: 'sea lion',
+ 151: 'Chihuahua',
+ 152: 'Japanese spaniel',
+ 153: 'Maltese dog, Maltese terrier, Maltese',
+ 154: 'Pekinese, Pekingese, Peke',
+ 155: 'Shih-Tzu',
+ 156: 'Blenheim spaniel',
+ 157: 'papillon',
+ 158: 'toy terrier',
+ 159: 'Rhodesian ridgeback',
+ 160: 'Afghan hound, Afghan',
+ 161: 'basset, basset hound',
+ 162: 'beagle',
+ 163: 'bloodhound, sleuthhound',
+ 164: 'bluetick',
+ 165: 'black-and-tan coonhound',
+ 166: 'Walker hound, Walker foxhound',
+ 167: 'English foxhound',
+ 168: 'redbone',
+ 169: 'borzoi, Russian wolfhound',
+ 170: 'Irish wolfhound',
+ 171: 'Italian greyhound',
+ 172: 'whippet',
+ 173: 'Ibizan hound, Ibizan Podenco',
+ 174: 'Norwegian elkhound, elkhound',
+ 175: 'otterhound, otter hound',
+ 176: 'Saluki, gazelle hound',
+ 177: 'Scottish deerhound, deerhound',
+ 178: 'Weimaraner',
+ 179: 'Staffordshire bullterrier, Staffordshire bull terrier',
+ 180: 'American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier',
+ 181: 'Bedlington terrier',
+ 182: 'Border terrier',
+ 183: 'Kerry blue terrier',
+ 184: 'Irish terrier',
+ 185: 'Norfolk terrier',
+ 186: 'Norwich terrier',
+ 187: 'Yorkshire terrier',
+ 188: 'wire-haired fox terrier',
+ 189: 'Lakeland terrier',
+ 190: 'Sealyham terrier, Sealyham',
+ 191: 'Airedale, Airedale terrier',
+ 192: 'cairn, cairn terrier',
+ 193: 'Australian terrier',
+ 194: 'Dandie Dinmont, Dandie Dinmont terrier',
+ 195: 'Boston bull, Boston terrier',
+ 196: 'miniature schnauzer',
+ 197: 'giant schnauzer',
+ 198: 'standard schnauzer',
+ 199: 'Scotch terrier, Scottish terrier, Scottie',
+ 200: 'Tibetan terrier, chrysanthemum dog',
+ 201: 'silky terrier, Sydney silky',
+ 202: 'soft-coated wheaten terrier',
+ 203: 'West Highland white terrier',
+ 204: 'Lhasa, Lhasa apso',
+ 205: 'flat-coated retriever',
+ 206: 'curly-coated retriever',
+ 207: 'golden retriever',
+ 208: 'Labrador retriever',
+ 209: 'Chesapeake Bay retriever',
+ 210: 'German short-haired pointer',
+ 211: 'vizsla, Hungarian pointer',
+ 212: 'English setter',
+ 213: 'Irish setter, red setter',
+ 214: 'Gordon setter',
+ 215: 'Brittany spaniel',
+ 216: 'clumber, clumber spaniel',
+ 217: 'English springer, English springer spaniel',
+ 218: 'Welsh springer spaniel',
+ 219: 'cocker spaniel, English cocker spaniel, cocker',
+ 220: 'Sussex spaniel',
+ 221: 'Irish water spaniel',
+ 222: 'kuvasz',
+ 223: 'schipperke',
+ 224: 'groenendael',
+ 225: 'malinois',
+ 226: 'briard',
+ 227: 'kelpie',
+ 228: 'komondor',
+ 229: 'Old English sheepdog, bobtail',
+ 230: 'Shetland sheepdog, Shetland sheep dog, Shetland',
+ 231: 'collie',
+ 232: 'Border collie',
+ 233: 'Bouvier des Flandres, Bouviers des Flandres',
+ 234: 'Rottweiler',
+ 235: 'German shepherd, German shepherd dog, German police dog, alsatian',
+ 236: 'Doberman, Doberman pinscher',
+ 237: 'miniature pinscher',
+ 238: 'Greater Swiss Mountain dog',
+ 239: 'Bernese mountain dog',
+ 240: 'Appenzeller',
+ 241: 'EntleBucher',
+ 242: 'boxer',
+ 243: 'bull mastiff',
+ 244: 'Tibetan mastiff',
+ 245: 'French bulldog',
+ 246: 'Great Dane',
+ 247: 'Saint Bernard, St Bernard',
+ 248: 'Eskimo dog, husky',
+ 249: 'malamute, malemute, Alaskan malamute',
+ 250: 'Siberian husky',
+ 251: 'dalmatian, coach dog, carriage dog',
+ 252: 'affenpinscher, monkey pinscher, monkey dog',
+ 253: 'basenji',
+ 254: 'pug, pug-dog',
+ 255: 'Leonberg',
+ 256: 'Newfoundland, Newfoundland dog',
+ 257: 'Great Pyrenees',
+ 258: 'Samoyed, Samoyede',
+ 259: 'Pomeranian',
+ 260: 'chow, chow chow',
+ 261: 'keeshond',
+ 262: 'Brabancon griffon',
+ 263: 'Pembroke, Pembroke Welsh corgi',
+ 264: 'Cardigan, Cardigan Welsh corgi',
+ 265: 'toy poodle',
+ 266: 'miniature poodle',
+ 267: 'standard poodle',
+ 268: 'Mexican hairless',
+ 269: 'timber wolf, grey wolf, gray wolf, Canis lupus',
+ 270: 'white wolf, Arctic wolf, Canis lupus tundrarum',
+ 271: 'red wolf, maned wolf, Canis rufus, Canis niger',
+ 272: 'coyote, prairie wolf, brush wolf, Canis latrans',
+ 273: 'dingo, warrigal, warragal, Canis dingo',
+ 274: 'dhole, Cuon alpinus',
+ 275: 'African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus',
+ 276: 'hyena, hyaena',
+ 277: 'red fox, Vulpes vulpes',
+ 278: 'kit fox, Vulpes macrotis',
+ 279: 'Arctic fox, white fox, Alopex lagopus',
+ 280: 'grey fox, gray fox, Urocyon cinereoargenteus',
+ 281: 'tabby, tabby cat',
+ 282: 'tiger cat',
+ 283: 'Persian cat',
+ 284: 'Siamese cat, Siamese',
+ 285: 'Egyptian cat',
+ 286: 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor',
+ 287: 'lynx, catamount',
+ 288: 'leopard, Panthera pardus',
+ 289: 'snow leopard, ounce, Panthera uncia',
+ 290: 'jaguar, panther, Panthera onca, Felis onca',
+ 291: 'lion, king of beasts, Panthera leo',
+ 292: 'tiger, Panthera tigris',
+ 293: 'cheetah, chetah, Acinonyx jubatus',
+ 294: 'brown bear, bruin, Ursus arctos',
+ 295: 'American black bear, black bear, Ursus americanus, Euarctos americanus',
+ 296: 'ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus',
+ 297: 'sloth bear, Melursus ursinus, Ursus ursinus',
+ 298: 'mongoose',
+ 299: 'meerkat, mierkat',
+ 300: 'tiger beetle',
+ 301: 'ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle',
+ 302: 'ground beetle, carabid beetle',
+ 303: 'long-horned beetle, longicorn, longicorn beetle',
+ 304: 'leaf beetle, chrysomelid',
+ 305: 'dung beetle',
+ 306: 'rhinoceros beetle',
+ 307: 'weevil',
+ 308: 'fly',
+ 309: 'bee',
+ 310: 'ant, emmet, pismire',
+ 311: 'grasshopper, hopper',
+ 312: 'cricket',
+ 313: 'walking stick, walkingstick, stick insect',
+ 314: 'cockroach, roach',
+ 315: 'mantis, mantid',
+ 316: 'cicada, cicala',
+ 317: 'leafhopper',
+ 318: 'lacewing, lacewing fly',
+ 319: "dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk",
+ 320: 'damselfly',
+ 321: 'admiral',
+ 322: 'ringlet, ringlet butterfly',
+ 323: 'monarch, monarch butterfly, milkweed butterfly, Danaus plexippus',
+ 324: 'cabbage butterfly',
+ 325: 'sulphur butterfly, sulfur butterfly',
+ 326: 'lycaenid, lycaenid butterfly',
+ 327: 'starfish, sea star',
+ 328: 'sea urchin',
+ 329: 'sea cucumber, holothurian',
+ 330: 'wood rabbit, cottontail, cottontail rabbit',
+ 331: 'hare',
+ 332: 'Angora, Angora rabbit',
+ 333: 'hamster',
+ 334: 'porcupine, hedgehog',
+ 335: 'fox squirrel, eastern fox squirrel, Sciurus niger',
+ 336: 'marmot',
+ 337: 'beaver',
+ 338: 'guinea pig, Cavia cobaya',
+ 339: 'sorrel',
+ 340: 'zebra',
+ 341: 'hog, pig, grunter, squealer, Sus scrofa',
+ 342: 'wild boar, boar, Sus scrofa',
+ 343: 'warthog',
+ 344: 'hippopotamus, hippo, river horse, Hippopotamus amphibius',
+ 345: 'ox',
+ 346: 'water buffalo, water ox, Asiatic buffalo, Bubalus bubalis',
+ 347: 'bison',
+ 348: 'ram, tup',
+ 349: 'bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis',
+ 350: 'ibex, Capra ibex',
+ 351: 'hartebeest',
+ 352: 'impala, Aepyceros melampus',
+ 353: 'gazelle',
+ 354: 'Arabian camel, dromedary, Camelus dromedarius',
+ 355: 'llama',
+ 356: 'weasel',
+ 357: 'mink',
+ 358: 'polecat, fitch, foulmart, foumart, Mustela putorius',
+ 359: 'black-footed ferret, ferret, Mustela nigripes',
+ 360: 'otter',
+ 361: 'skunk, polecat, wood pussy',
+ 362: 'badger',
+ 363: 'armadillo',
+ 364: 'three-toed sloth, ai, Bradypus tridactylus',
+ 365: 'orangutan, orang, orangutang, Pongo pygmaeus',
+ 366: 'gorilla, Gorilla gorilla',
+ 367: 'chimpanzee, chimp, Pan troglodytes',
+ 368: 'gibbon, Hylobates lar',
+ 369: 'siamang, Hylobates syndactylus, Symphalangus syndactylus',
+ 370: 'guenon, guenon monkey',
+ 371: 'patas, hussar monkey, Erythrocebus patas',
+ 372: 'baboon',
+ 373: 'macaque',
+ 374: 'langur',
+ 375: 'colobus, colobus monkey',
+ 376: 'proboscis monkey, Nasalis larvatus',
+ 377: 'marmoset',
+ 378: 'capuchin, ringtail, Cebus capucinus',
+ 379: 'howler monkey, howler',
+ 380: 'titi, titi monkey',
+ 381: 'spider monkey, Ateles geoffroyi',
+ 382: 'squirrel monkey, Saimiri sciureus',
+ 383: 'Madagascar cat, ring-tailed lemur, Lemur catta',
+ 384: 'indri, indris, Indri indri, Indri brevicaudatus',
+ 385: 'Indian elephant, Elephas maximus',
+ 386: 'African elephant, Loxodonta africana',
+ 387: 'lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens',
+ 388: 'giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca',
+ 389: 'barracouta, snoek',
+ 390: 'eel',
+ 391: 'coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch',
+ 392: 'rock beauty, Holocanthus tricolor',
+ 393: 'anemone fish',
+ 394: 'sturgeon',
+ 395: 'gar, garfish, garpike, billfish, Lepisosteus osseus',
+ 396: 'lionfish',
+ 397: 'puffer, pufferfish, blowfish, globefish',
+ 398: 'abacus',
+ 399: 'abaya',
+ 400: "academic gown, academic robe, judge's robe",
+ 401: 'accordion, piano accordion, squeeze box',
+ 402: 'acoustic guitar',
+ 403: 'aircraft carrier, carrier, flattop, attack aircraft carrier',
+ 404: 'airliner',
+ 405: 'airship, dirigible',
+ 406: 'altar',
+ 407: 'ambulance',
+ 408: 'amphibian, amphibious vehicle',
+ 409: 'analog clock',
+ 410: 'apiary, bee house',
+ 411: 'apron',
+ 412: 'ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin',
+ 413: 'assault rifle, assault gun',
+ 414: 'backpack, back pack, knapsack, packsack, rucksack, haversack',
+ 415: 'bakery, bakeshop, bakehouse',
+ 416: 'balance beam, beam',
+ 417: 'balloon',
+ 418: 'ballpoint, ballpoint pen, ballpen, Biro',
+ 419: 'Band Aid',
+ 420: 'banjo',
+ 421: 'bannister, banister, balustrade, balusters, handrail',
+ 422: 'barbell',
+ 423: 'barber chair',
+ 424: 'barbershop',
+ 425: 'barn',
+ 426: 'barometer',
+ 427: 'barrel, cask',
+ 428: 'barrow, garden cart, lawn cart, wheelbarrow',
+ 429: 'baseball',
+ 430: 'basketball',
+ 431: 'bassinet',
+ 432: 'bassoon',
+ 433: 'bathing cap, swimming cap',
+ 434: 'bath towel',
+ 435: 'bathtub, bathing tub, bath, tub',
+ 436: 'beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon',
+ 437: 'beacon, lighthouse, beacon light, pharos',
+ 438: 'beaker',
+ 439: 'bearskin, busby, shako',
+ 440: 'beer bottle',
+ 441: 'beer glass',
+ 442: 'bell cote, bell cot',
+ 443: 'bib',
+ 444: 'bicycle-built-for-two, tandem bicycle, tandem',
+ 445: 'bikini, two-piece',
+ 446: 'binder, ring-binder',
+ 447: 'binoculars, field glasses, opera glasses',
+ 448: 'birdhouse',
+ 449: 'boathouse',
+ 450: 'bobsled, bobsleigh, bob',
+ 451: 'bolo tie, bolo, bola tie, bola',
+ 452: 'bonnet, poke bonnet',
+ 453: 'bookcase',
+ 454: 'bookshop, bookstore, bookstall',
+ 455: 'bottlecap',
+ 456: 'bow',
+ 457: 'bow tie, bow-tie, bowtie',
+ 458: 'brass, memorial tablet, plaque',
+ 459: 'brassiere, bra, bandeau',
+ 460: 'breakwater, groin, groyne, mole, bulwark, seawall, jetty',
+ 461: 'breastplate, aegis, egis',
+ 462: 'broom',
+ 463: 'bucket, pail',
+ 464: 'buckle',
+ 465: 'bulletproof vest',
+ 466: 'bullet train, bullet',
+ 467: 'butcher shop, meat market',
+ 468: 'cab, hack, taxi, taxicab',
+ 469: 'caldron, cauldron',
+ 470: 'candle, taper, wax light',
+ 471: 'cannon',
+ 472: 'canoe',
+ 473: 'can opener, tin opener',
+ 474: 'cardigan',
+ 475: 'car mirror',
+ 476: 'carousel, carrousel, merry-go-round, roundabout, whirligig',
+ 477: "carpenter's kit, tool kit",
+ 478: 'carton',
+ 479: 'car wheel',
+ 480: 'cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM',
+ 481: 'cassette',
+ 482: 'cassette player',
+ 483: 'castle',
+ 484: 'catamaran',
+ 485: 'CD player',
+ 486: 'cello, violoncello',
+ 487: 'cellular telephone, cellular phone, cellphone, cell, mobile phone',
+ 488: 'chain',
+ 489: 'chainlink fence',
+ 490: 'chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour',
+ 491: 'chain saw, chainsaw',
+ 492: 'chest',
+ 493: 'chiffonier, commode',
+ 494: 'chime, bell, gong',
+ 495: 'china cabinet, china closet',
+ 496: 'Christmas stocking',
+ 497: 'church, church building',
+ 498: 'cinema, movie theater, movie theatre, movie house, picture palace',
+ 499: 'cleaver, meat cleaver, chopper',
+ 500: 'cliff dwelling',
+ 501: 'cloak',
+ 502: 'clog, geta, patten, sabot',
+ 503: 'cocktail shaker',
+ 504: 'coffee mug',
+ 505: 'coffeepot',
+ 506: 'coil, spiral, volute, whorl, helix',
+ 507: 'combination lock',
+ 508: 'computer keyboard, keypad',
+ 509: 'confectionery, confectionary, candy store',
+ 510: 'container ship, containership, container vessel',
+ 511: 'convertible',
+ 512: 'corkscrew, bottle screw',
+ 513: 'cornet, horn, trumpet, trump',
+ 514: 'cowboy boot',
+ 515: 'cowboy hat, ten-gallon hat',
+ 516: 'cradle',
+ 517: 'crane',
+ 518: 'crash helmet',
+ 519: 'crate',
+ 520: 'crib, cot',
+ 521: 'Crock Pot',
+ 522: 'croquet ball',
+ 523: 'crutch',
+ 524: 'cuirass',
+ 525: 'dam, dike, dyke',
+ 526: 'desk',
+ 527: 'desktop computer',
+ 528: 'dial telephone, dial phone',
+ 529: 'diaper, nappy, napkin',
+ 530: 'digital clock',
+ 531: 'digital watch',
+ 532: 'dining table, board',
+ 533: 'dishrag, dishcloth',
+ 534: 'dishwasher, dish washer, dishwashing machine',
+ 535: 'disk brake, disc brake',
+ 536: 'dock, dockage, docking facility',
+ 537: 'dogsled, dog sled, dog sleigh',
+ 538: 'dome',
+ 539: 'doormat, welcome mat',
+ 540: 'drilling platform, offshore rig',
+ 541: 'drum, membranophone, tympan',
+ 542: 'drumstick',
+ 543: 'dumbbell',
+ 544: 'Dutch oven',
+ 545: 'electric fan, blower',
+ 546: 'electric guitar',
+ 547: 'electric locomotive',
+ 548: 'entertainment center',
+ 549: 'envelope',
+ 550: 'espresso maker',
+ 551: 'face powder',
+ 552: 'feather boa, boa',
+ 553: 'file, file cabinet, filing cabinet',
+ 554: 'fireboat',
+ 555: 'fire engine, fire truck',
+ 556: 'fire screen, fireguard',
+ 557: 'flagpole, flagstaff',
+ 558: 'flute, transverse flute',
+ 559: 'folding chair',
+ 560: 'football helmet',
+ 561: 'forklift',
+ 562: 'fountain',
+ 563: 'fountain pen',
+ 564: 'four-poster',
+ 565: 'freight car',
+ 566: 'French horn, horn',
+ 567: 'frying pan, frypan, skillet',
+ 568: 'fur coat',
+ 569: 'garbage truck, dustcart',
+ 570: 'gasmask, respirator, gas helmet',
+ 571: 'gas pump, gasoline pump, petrol pump, island dispenser',
+ 572: 'goblet',
+ 573: 'go-kart',
+ 574: 'golf ball',
+ 575: 'golfcart, golf cart',
+ 576: 'gondola',
+ 577: 'gong, tam-tam',
+ 578: 'gown',
+ 579: 'grand piano, grand',
+ 580: 'greenhouse, nursery, glasshouse',
+ 581: 'grille, radiator grille',
+ 582: 'grocery store, grocery, food market, market',
+ 583: 'guillotine',
+ 584: 'hair slide',
+ 585: 'hair spray',
+ 586: 'half track',
+ 587: 'hammer',
+ 588: 'hamper',
+ 589: 'hand blower, blow dryer, blow drier, hair dryer, hair drier',
+ 590: 'hand-held computer, hand-held microcomputer',
+ 591: 'handkerchief, hankie, hanky, hankey',
+ 592: 'hard disc, hard disk, fixed disk',
+ 593: 'harmonica, mouth organ, harp, mouth harp',
+ 594: 'harp',
+ 595: 'harvester, reaper',
+ 596: 'hatchet',
+ 597: 'holster',
+ 598: 'home theater, home theatre',
+ 599: 'honeycomb',
+ 600: 'hook, claw',
+ 601: 'hoopskirt, crinoline',
+ 602: 'horizontal bar, high bar',
+ 603: 'horse cart, horse-cart',
+ 604: 'hourglass',
+ 605: 'iPod',
+ 606: 'iron, smoothing iron',
+ 607: "jack-o'-lantern",
+ 608: 'jean, blue jean, denim',
+ 609: 'jeep, landrover',
+ 610: 'jersey, T-shirt, tee shirt',
+ 611: 'jigsaw puzzle',
+ 612: 'jinrikisha, ricksha, rickshaw',
+ 613: 'joystick',
+ 614: 'kimono',
+ 615: 'knee pad',
+ 616: 'knot',
+ 617: 'lab coat, laboratory coat',
+ 618: 'ladle',
+ 619: 'lampshade, lamp shade',
+ 620: 'laptop, laptop computer',
+ 621: 'lawn mower, mower',
+ 622: 'lens cap, lens cover',
+ 623: 'letter opener, paper knife, paperknife',
+ 624: 'library',
+ 625: 'lifeboat',
+ 626: 'lighter, light, igniter, ignitor',
+ 627: 'limousine, limo',
+ 628: 'liner, ocean liner',
+ 629: 'lipstick, lip rouge',
+ 630: 'Loafer',
+ 631: 'lotion',
+ 632: 'loudspeaker, speaker, speaker unit, loudspeaker system, speaker system',
+ 633: "loupe, jeweler's loupe",
+ 634: 'lumbermill, sawmill',
+ 635: 'magnetic compass',
+ 636: 'mailbag, postbag',
+ 637: 'mailbox, letter box',
+ 638: 'maillot',
+ 639: 'maillot, tank suit',
+ 640: 'manhole cover',
+ 641: 'maraca',
+ 642: 'marimba, xylophone',
+ 643: 'mask',
+ 644: 'matchstick',
+ 645: 'maypole',
+ 646: 'maze, labyrinth',
+ 647: 'measuring cup',
+ 648: 'medicine chest, medicine cabinet',
+ 649: 'megalith, megalithic structure',
+ 650: 'microphone, mike',
+ 651: 'microwave, microwave oven',
+ 652: 'military uniform',
+ 653: 'milk can',
+ 654: 'minibus',
+ 655: 'miniskirt, mini',
+ 656: 'minivan',
+ 657: 'missile',
+ 658: 'mitten',
+ 659: 'mixing bowl',
+ 660: 'mobile home, manufactured home',
+ 661: 'Model T',
+ 662: 'modem',
+ 663: 'monastery',
+ 664: 'monitor',
+ 665: 'moped',
+ 666: 'mortar',
+ 667: 'mortarboard',
+ 668: 'mosque',
+ 669: 'mosquito net',
+ 670: 'motor scooter, scooter',
+ 671: 'mountain bike, all-terrain bike, off-roader',
+ 672: 'mountain tent',
+ 673: 'mouse, computer mouse',
+ 674: 'mousetrap',
+ 675: 'moving van',
+ 676: 'muzzle',
+ 677: 'nail',
+ 678: 'neck brace',
+ 679: 'necklace',
+ 680: 'nipple',
+ 681: 'notebook, notebook computer',
+ 682: 'obelisk',
+ 683: 'oboe, hautboy, hautbois',
+ 684: 'ocarina, sweet potato',
+ 685: 'odometer, hodometer, mileometer, milometer',
+ 686: 'oil filter',
+ 687: 'organ, pipe organ',
+ 688: 'oscilloscope, scope, cathode-ray oscilloscope, CRO',
+ 689: 'overskirt',
+ 690: 'oxcart',
+ 691: 'oxygen mask',
+ 692: 'packet',
+ 693: 'paddle, boat paddle',
+ 694: 'paddlewheel, paddle wheel',
+ 695: 'padlock',
+ 696: 'paintbrush',
+ 697: "pajama, pyjama, pj's, jammies",
+ 698: 'palace',
+ 699: 'panpipe, pandean pipe, syrinx',
+ 700: 'paper towel',
+ 701: 'parachute, chute',
+ 702: 'parallel bars, bars',
+ 703: 'park bench',
+ 704: 'parking meter',
+ 705: 'passenger car, coach, carriage',
+ 706: 'patio, terrace',
+ 707: 'pay-phone, pay-station',
+ 708: 'pedestal, plinth, footstall',
+ 709: 'pencil box, pencil case',
+ 710: 'pencil sharpener',
+ 711: 'perfume, essence',
+ 712: 'Petri dish',
+ 713: 'photocopier',
+ 714: 'pick, plectrum, plectron',
+ 715: 'pickelhaube',
+ 716: 'picket fence, paling',
+ 717: 'pickup, pickup truck',
+ 718: 'pier',
+ 719: 'piggy bank, penny bank',
+ 720: 'pill bottle',
+ 721: 'pillow',
+ 722: 'ping-pong ball',
+ 723: 'pinwheel',
+ 724: 'pirate, pirate ship',
+ 725: 'pitcher, ewer',
+ 726: "plane, carpenter's plane, woodworking plane",
+ 727: 'planetarium',
+ 728: 'plastic bag',
+ 729: 'plate rack',
+ 730: 'plow, plough',
+ 731: "plunger, plumber's helper",
+ 732: 'Polaroid camera, Polaroid Land camera',
+ 733: 'pole',
+ 734: 'police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria',
+ 735: 'poncho',
+ 736: 'pool table, billiard table, snooker table',
+ 737: 'pop bottle, soda bottle',
+ 738: 'pot, flowerpot',
+ 739: "potter's wheel",
+ 740: 'power drill',
+ 741: 'prayer rug, prayer mat',
+ 742: 'printer',
+ 743: 'prison, prison house',
+ 744: 'projectile, missile',
+ 745: 'projector',
+ 746: 'puck, hockey puck',
+ 747: 'punching bag, punch bag, punching ball, punchball',
+ 748: 'purse',
+ 749: 'quill, quill pen',
+ 750: 'quilt, comforter, comfort, puff',
+ 751: 'racer, race car, racing car',
+ 752: 'racket, racquet',
+ 753: 'radiator',
+ 754: 'radio, wireless',
+ 755: 'radio telescope, radio reflector',
+ 756: 'rain barrel',
+ 757: 'recreational vehicle, RV, R.V.',
+ 758: 'reel',
+ 759: 'reflex camera',
+ 760: 'refrigerator, icebox',
+ 761: 'remote control, remote',
+ 762: 'restaurant, eating house, eating place, eatery',
+ 763: 'revolver, six-gun, six-shooter',
+ 764: 'rifle',
+ 765: 'rocking chair, rocker',
+ 766: 'rotisserie',
+ 767: 'rubber eraser, rubber, pencil eraser',
+ 768: 'rugby ball',
+ 769: 'rule, ruler',
+ 770: 'running shoe',
+ 771: 'safe',
+ 772: 'safety pin',
+ 773: 'saltshaker, salt shaker',
+ 774: 'sandal',
+ 775: 'sarong',
+ 776: 'sax, saxophone',
+ 777: 'scabbard',
+ 778: 'scale, weighing machine',
+ 779: 'school bus',
+ 780: 'schooner',
+ 781: 'scoreboard',
+ 782: 'screen, CRT screen',
+ 783: 'screw',
+ 784: 'screwdriver',
+ 785: 'seat belt, seatbelt',
+ 786: 'sewing machine',
+ 787: 'shield, buckler',
+ 788: 'shoe shop, shoe-shop, shoe store',
+ 789: 'shoji',
+ 790: 'shopping basket',
+ 791: 'shopping cart',
+ 792: 'shovel',
+ 793: 'shower cap',
+ 794: 'shower curtain',
+ 795: 'ski',
+ 796: 'ski mask',
+ 797: 'sleeping bag',
+ 798: 'slide rule, slipstick',
+ 799: 'sliding door',
+ 800: 'slot, one-armed bandit',
+ 801: 'snorkel',
+ 802: 'snowmobile',
+ 803: 'snowplow, snowplough',
+ 804: 'soap dispenser',
+ 805: 'soccer ball',
+ 806: 'sock',
+ 807: 'solar dish, solar collector, solar furnace',
+ 808: 'sombrero',
+ 809: 'soup bowl',
+ 810: 'space bar',
+ 811: 'space heater',
+ 812: 'space shuttle',
+ 813: 'spatula',
+ 814: 'speedboat',
+ 815: "spider web, spider's web",
+ 816: 'spindle',
+ 817: 'sports car, sport car',
+ 818: 'spotlight, spot',
+ 819: 'stage',
+ 820: 'steam locomotive',
+ 821: 'steel arch bridge',
+ 822: 'steel drum',
+ 823: 'stethoscope',
+ 824: 'stole',
+ 825: 'stone wall',
+ 826: 'stopwatch, stop watch',
+ 827: 'stove',
+ 828: 'strainer',
+ 829: 'streetcar, tram, tramcar, trolley, trolley car',
+ 830: 'stretcher',
+ 831: 'studio couch, day bed',
+ 832: 'stupa, tope',
+ 833: 'submarine, pigboat, sub, U-boat',
+ 834: 'suit, suit of clothes',
+ 835: 'sundial',
+ 836: 'sunglass',
+ 837: 'sunglasses, dark glasses, shades',
+ 838: 'sunscreen, sunblock, sun blocker',
+ 839: 'suspension bridge',
+ 840: 'swab, swob, mop',
+ 841: 'sweatshirt',
+ 842: 'swimming trunks, bathing trunks',
+ 843: 'swing',
+ 844: 'switch, electric switch, electrical switch',
+ 845: 'syringe',
+ 846: 'table lamp',
+ 847: 'tank, army tank, armored combat vehicle, armoured combat vehicle',
+ 848: 'tape player',
+ 849: 'teapot',
+ 850: 'teddy, teddy bear',
+ 851: 'television, television system',
+ 852: 'tennis ball',
+ 853: 'thatch, thatched roof',
+ 854: 'theater curtain, theatre curtain',
+ 855: 'thimble',
+ 856: 'thresher, thrasher, threshing machine',
+ 857: 'throne',
+ 858: 'tile roof',
+ 859: 'toaster',
+ 860: 'tobacco shop, tobacconist shop, tobacconist',
+ 861: 'toilet seat',
+ 862: 'torch',
+ 863: 'totem pole',
+ 864: 'tow truck, tow car, wrecker',
+ 865: 'toyshop',
+ 866: 'tractor',
+ 867: 'trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi',
+ 868: 'tray',
+ 869: 'trench coat',
+ 870: 'tricycle, trike, velocipede',
+ 871: 'trimaran',
+ 872: 'tripod',
+ 873: 'triumphal arch',
+ 874: 'trolleybus, trolley coach, trackless trolley',
+ 875: 'trombone',
+ 876: 'tub, vat',
+ 877: 'turnstile',
+ 878: 'typewriter keyboard',
+ 879: 'umbrella',
+ 880: 'unicycle, monocycle',
+ 881: 'upright, upright piano',
+ 882: 'vacuum, vacuum cleaner',
+ 883: 'vase',
+ 884: 'vault',
+ 885: 'velvet',
+ 886: 'vending machine',
+ 887: 'vestment',
+ 888: 'viaduct',
+ 889: 'violin, fiddle',
+ 890: 'volleyball',
+ 891: 'waffle iron',
+ 892: 'wall clock',
+ 893: 'wallet, billfold, notecase, pocketbook',
+ 894: 'wardrobe, closet, press',
+ 895: 'warplane, military plane',
+ 896: 'washbasin, handbasin, washbowl, lavabo, wash-hand basin',
+ 897: 'washer, automatic washer, washing machine',
+ 898: 'water bottle',
+ 899: 'water jug',
+ 900: 'water tower',
+ 901: 'whiskey jug',
+ 902: 'whistle',
+ 903: 'wig',
+ 904: 'window screen',
+ 905: 'window shade',
+ 906: 'Windsor tie',
+ 907: 'wine bottle',
+ 908: 'wing',
+ 909: 'wok',
+ 910: 'wooden spoon',
+ 911: 'wool, woolen, woollen',
+ 912: 'worm fence, snake fence, snake-rail fence, Virginia fence',
+ 913: 'wreck',
+ 914: 'yawl',
+ 915: 'yurt',
+ 916: 'web site, website, internet site, site',
+ 917: 'comic book',
+ 918: 'crossword puzzle, crossword',
+ 919: 'street sign',
+ 920: 'traffic light, traffic signal, stoplight',
+ 921: 'book jacket, dust cover, dust jacket, dust wrapper',
+ 922: 'menu',
+ 923: 'plate',
+ 924: 'guacamole',
+ 925: 'consomme',
+ 926: 'hot pot, hotpot',
+ 927: 'trifle',
+ 928: 'ice cream, icecream',
+ 929: 'ice lolly, lolly, lollipop, popsicle',
+ 930: 'French loaf',
+ 931: 'bagel, beigel',
+ 932: 'pretzel',
+ 933: 'cheeseburger',
+ 934: 'hotdog, hot dog, red hot',
+ 935: 'mashed potato',
+ 936: 'head cabbage',
+ 937: 'broccoli',
+ 938: 'cauliflower',
+ 939: 'zucchini, courgette',
+ 940: 'spaghetti squash',
+ 941: 'acorn squash',
+ 942: 'butternut squash',
+ 943: 'cucumber, cuke',
+ 944: 'artichoke, globe artichoke',
+ 945: 'bell pepper',
+ 946: 'cardoon',
+ 947: 'mushroom',
+ 948: 'Granny Smith',
+ 949: 'strawberry',
+ 950: 'orange',
+ 951: 'lemon',
+ 952: 'fig',
+ 953: 'pineapple, ananas',
+ 954: 'banana',
+ 955: 'jackfruit, jak, jack',
+ 956: 'custard apple',
+ 957: 'pomegranate',
+ 958: 'hay',
+ 959: 'carbonara',
+ 960: 'chocolate sauce, chocolate syrup',
+ 961: 'dough',
+ 962: 'meat loaf, meatloaf',
+ 963: 'pizza, pizza pie',
+ 964: 'potpie',
+ 965: 'burrito',
+ 966: 'red wine',
+ 967: 'espresso',
+ 968: 'cup',
+ 969: 'eggnog',
+ 970: 'alp',
+ 971: 'bubble',
+ 972: 'cliff, drop, drop-off',
+ 973: 'coral reef',
+ 974: 'geyser',
+ 975: 'lakeside, lakeshore',
+ 976: 'promontory, headland, head, foreland',
+ 977: 'sandbar, sand bar',
+ 978: 'seashore, coast, seacoast, sea-coast',
+ 979: 'valley, vale',
+ 980: 'volcano',
+ 981: 'ballplayer, baseball player',
+ 982: 'groom, bridegroom',
+ 983: 'scuba diver',
+ 984: 'rapeseed',
+ 985: 'daisy',
+ 986: "yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum",
+ 987: 'corn',
+ 988: 'acorn',
+ 989: 'hip, rose hip, rosehip',
+ 990: 'buckeye, horse chestnut, conker',
+ 991: 'coral fungus',
+ 992: 'agaric',
+ 993: 'gyromitra',
+ 994: 'stinkhorn, carrion fungus',
+ 995: 'earthstar',
+ 996: 'hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa',
+ 997: 'bolete',
+ 998: 'ear, spike, capitulum',
+ 999: 'toilet tissue, toilet paper, bathroom tissue'}

labels/imagenet1k-simple-labels.json ADDED Viewed

	@@ -0,0 +1,1000 @@

+["tench",
+"goldfish",
+"great white shark",
+"tiger shark",
+"hammerhead shark",
+"electric ray",
+"stingray",
+"cock",
+"hen",
+"ostrich",
+"brambling",
+"goldfinch",
+"house finch",
+"junco",
+"indigo bunting",
+"American robin",
+"bulbul",
+"jay",
+"magpie",
+"chickadee",
+"American dipper",
+"kite",
+"bald eagle",
+"vulture",
+"great grey owl",
+"fire salamander",
+"smooth newt",
+"newt",
+"spotted salamander",
+"axolotl",
+"American bullfrog",
+"tree frog",
+"tailed frog",
+"loggerhead sea turtle",
+"leatherback sea turtle",
+"mud turtle",
+"terrapin",
+"box turtle",
+"banded gecko",
+"green iguana",
+"Carolina anole",
+"desert grassland whiptail lizard",
+"agama",
+"frilled-necked lizard",
+"alligator lizard",
+"Gila monster",
+"European green lizard",
+"chameleon",
+"Komodo dragon",
+"Nile crocodile",
+"American alligator",
+"triceratops",
+"worm snake",
+"ring-necked snake",
+"eastern hog-nosed snake",
+"smooth green snake",
+"kingsnake",
+"garter snake",
+"water snake",
+"vine snake",
+"night snake",
+"boa constrictor",
+"African rock python",
+"Indian cobra",
+"green mamba",
+"sea snake",
+"Saharan horned viper",
+"eastern diamondback rattlesnake",
+"sidewinder",
+"trilobite",
+"harvestman",
+"scorpion",
+"yellow garden spider",
+"barn spider",
+"European garden spider",
+"southern black widow",
+"tarantula",
+"wolf spider",
+"tick",
+"centipede",
+"black grouse",
+"ptarmigan",
+"ruffed grouse",
+"prairie grouse",
+"peacock",
+"quail",
+"partridge",
+"grey parrot",
+"macaw",
+"sulphur-crested cockatoo",
+"lorikeet",
+"coucal",
+"bee eater",
+"hornbill",
+"hummingbird",
+"jacamar",
+"toucan",
+"duck",
+"red-breasted merganser",
+"goose",
+"black swan",
+"tusker",
+"echidna",
+"platypus",
+"wallaby",
+"koala",
+"wombat",
+"jellyfish",
+"sea anemone",
+"brain coral",
+"flatworm",
+"nematode",
+"conch",
+"snail",
+"slug",
+"sea slug",
+"chiton",
+"chambered nautilus",
+"Dungeness crab",
+"rock crab",
+"fiddler crab",
+"red king crab",
+"American lobster",
+"spiny lobster",
+"crayfish",
+"hermit crab",
+"isopod",
+"white stork",
+"black stork",
+"spoonbill",
+"flamingo",
+"little blue heron",
+"great egret",
+"bittern",
+"crane (bird)",
+"limpkin",
+"common gallinule",
+"American coot",
+"bustard",
+"ruddy turnstone",
+"dunlin",
+"common redshank",
+"dowitcher",
+"oystercatcher",
+"pelican",
+"king penguin",
+"albatross",
+"grey whale",
+"killer whale",
+"dugong",
+"sea lion",
+"Chihuahua",
+"Japanese Chin",
+"Maltese",
+"Pekingese",
+"Shih Tzu",
+"King Charles Spaniel",
+"Papillon",
+"toy terrier",
+"Rhodesian Ridgeback",
+"Afghan Hound",
+"Basset Hound",
+"Beagle",
+"Bloodhound",
+"Bluetick Coonhound",
+"Black and Tan Coonhound",
+"Treeing Walker Coonhound",
+"English foxhound",
+"Redbone Coonhound",
+"borzoi",
+"Irish Wolfhound",
+"Italian Greyhound",
+"Whippet",
+"Ibizan Hound",
+"Norwegian Elkhound",
+"Otterhound",
+"Saluki",
+"Scottish Deerhound",
+"Weimaraner",
+"Staffordshire Bull Terrier",
+"American Staffordshire Terrier",
+"Bedlington Terrier",
+"Border Terrier",
+"Kerry Blue Terrier",
+"Irish Terrier",
+"Norfolk Terrier",
+"Norwich Terrier",
+"Yorkshire Terrier",
+"Wire Fox Terrier",
+"Lakeland Terrier",
+"Sealyham Terrier",
+"Airedale Terrier",
+"Cairn Terrier",
+"Australian Terrier",
+"Dandie Dinmont Terrier",
+"Boston Terrier",
+"Miniature Schnauzer",
+"Giant Schnauzer",
+"Standard Schnauzer",
+"Scottish Terrier",
+"Tibetan Terrier",
+"Australian Silky Terrier",
+"Soft-coated Wheaten Terrier",
+"West Highland White Terrier",
+"Lhasa Apso",
+"Flat-Coated Retriever",
+"Curly-coated Retriever",
+"Golden Retriever",
+"Labrador Retriever",
+"Chesapeake Bay Retriever",
+"German Shorthaired Pointer",
+"Vizsla",
+"English Setter",
+"Irish Setter",
+"Gordon Setter",
+"Brittany",
+"Clumber Spaniel",
+"English Springer Spaniel",
+"Welsh Springer Spaniel",
+"Cocker Spaniels",
+"Sussex Spaniel",
+"Irish Water Spaniel",
+"Kuvasz",
+"Schipperke",
+"Groenendael",
+"Malinois",
+"Briard",
+"Australian Kelpie",
+"Komondor",
+"Old English Sheepdog",
+"Shetland Sheepdog",
+"collie",
+"Border Collie",
+"Bouvier des Flandres",
+"Rottweiler",
+"German Shepherd Dog",
+"Dobermann",
+"Miniature Pinscher",
+"Greater Swiss Mountain Dog",
+"Bernese Mountain Dog",
+"Appenzeller Sennenhund",
+"Entlebucher Sennenhund",
+"Boxer",
+"Bullmastiff",
+"Tibetan Mastiff",
+"French Bulldog",
+"Great Dane",
+"St. Bernard",
+"husky",
+"Alaskan Malamute",
+"Siberian Husky",
+"Dalmatian",
+"Affenpinscher",
+"Basenji",
+"pug",
+"Leonberger",
+"Newfoundland",
+"Pyrenean Mountain Dog",
+"Samoyed",
+"Pomeranian",
+"Chow Chow",
+"Keeshond",
+"Griffon Bruxellois",
+"Pembroke Welsh Corgi",
+"Cardigan Welsh Corgi",
+"Toy Poodle",
+"Miniature Poodle",
+"Standard Poodle",
+"Mexican hairless dog",
+"grey wolf",
+"Alaskan tundra wolf",
+"red wolf",
+"coyote",
+"dingo",
+"dhole",
+"African wild dog",
+"hyena",
+"red fox",
+"kit fox",
+"Arctic fox",
+"grey fox",
+"tabby cat",
+"tiger cat",
+"Persian cat",
+"Siamese cat",
+"Egyptian Mau",
+"cougar",
+"lynx",
+"leopard",
+"snow leopard",
+"jaguar",
+"lion",
+"tiger",
+"cheetah",
+"brown bear",
+"American black bear",
+"polar bear",
+"sloth bear",
+"mongoose",
+"meerkat",
+"tiger beetle",
+"ladybug",
+"ground beetle",
+"longhorn beetle",
+"leaf beetle",
+"dung beetle",
+"rhinoceros beetle",
+"weevil",
+"fly",
+"bee",
+"ant",
+"grasshopper",
+"cricket",
+"stick insect",
+"cockroach",
+"mantis",
+"cicada",
+"leafhopper",
+"lacewing",
+"dragonfly",
+"damselfly",
+"red admiral",
+"ringlet",
+"monarch butterfly",
+"small white",
+"sulphur butterfly",
+"gossamer-winged butterfly",
+"starfish",
+"sea urchin",
+"sea cucumber",
+"cottontail rabbit",
+"hare",
+"Angora rabbit",
+"hamster",
+"porcupine",
+"fox squirrel",
+"marmot",
+"beaver",
+"guinea pig",
+"common sorrel",
+"zebra",
+"pig",
+"wild boar",
+"warthog",
+"hippopotamus",
+"ox",
+"water buffalo",
+"bison",
+"ram",
+"bighorn sheep",
+"Alpine ibex",
+"hartebeest",
+"impala",
+"gazelle",
+"dromedary",
+"llama",
+"weasel",
+"mink",
+"European polecat",
+"black-footed ferret",
+"otter",
+"skunk",
+"badger",
+"armadillo",
+"three-toed sloth",
+"orangutan",
+"gorilla",
+"chimpanzee",
+"gibbon",
+"siamang",
+"guenon",
+"patas monkey",
+"baboon",
+"macaque",
+"langur",
+"black-and-white colobus",
+"proboscis monkey",
+"marmoset",
+"white-headed capuchin",
+"howler monkey",
+"titi",
+"Geoffroy's spider monkey",
+"common squirrel monkey",
+"ring-tailed lemur",
+"indri",
+"Asian elephant",
+"African bush elephant",
+"red panda",
+"giant panda",
+"snoek",
+"eel",
+"coho salmon",
+"rock beauty",
+"clownfish",
+"sturgeon",
+"garfish",
+"lionfish",
+"pufferfish",
+"abacus",
+"abaya",
+"academic gown",
+"accordion",
+"acoustic guitar",
+"aircraft carrier",
+"airliner",
+"airship",
+"altar",
+"ambulance",
+"amphibious vehicle",
+"analog clock",
+"apiary",
+"apron",
+"waste container",
+"assault rifle",
+"backpack",
+"bakery",
+"balance beam",
+"balloon",
+"ballpoint pen",
+"Band-Aid",
+"banjo",
+"baluster",
+"barbell",
+"barber chair",
+"barbershop",
+"barn",
+"barometer",
+"barrel",
+"wheelbarrow",
+"baseball",
+"basketball",
+"bassinet",
+"bassoon",
+"swimming cap",
+"bath towel",
+"bathtub",
+"station wagon",
+"lighthouse",
+"beaker",
+"military cap",
+"beer bottle",
+"beer glass",
+"bell-cot",
+"bib",
+"tandem bicycle",
+"bikini",
+"ring binder",
+"binoculars",
+"birdhouse",
+"boathouse",
+"bobsleigh",
+"bolo tie",
+"poke bonnet",
+"bookcase",
+"bookstore",
+"bottle cap",
+"bow",
+"bow tie",
+"brass",
+"bra",
+"breakwater",
+"breastplate",
+"broom",
+"bucket",
+"buckle",
+"bulletproof vest",
+"high-speed train",
+"butcher shop",
+"taxicab",
+"cauldron",
+"candle",
+"cannon",
+"canoe",
+"can opener",
+"cardigan",
+"car mirror",
+"carousel",
+"tool kit",
+"carton",
+"car wheel",
+"automated teller machine",
+"cassette",
+"cassette player",
+"castle",
+"catamaran",
+"CD player",
+"cello",
+"mobile phone",
+"chain",
+"chain-link fence",
+"chain mail",
+"chainsaw",
+"chest",
+"chiffonier",
+"chime",
+"china cabinet",
+"Christmas stocking",
+"church",
+"movie theater",
+"cleaver",
+"cliff dwelling",
+"cloak",
+"clogs",
+"cocktail shaker",
+"coffee mug",
+"coffeemaker",
+"coil",
+"combination lock",
+"computer keyboard",
+"confectionery store",
+"container ship",
+"convertible",
+"corkscrew",
+"cornet",
+"cowboy boot",
+"cowboy hat",
+"cradle",
+"crane (machine)",
+"crash helmet",
+"crate",
+"infant bed",
+"Crock Pot",
+"croquet ball",
+"crutch",
+"cuirass",
+"dam",
+"desk",
+"desktop computer",
+"rotary dial telephone",
+"diaper",
+"digital clock",
+"digital watch",
+"dining table",
+"dishcloth",
+"dishwasher",
+"disc brake",
+"dock",
+"dog sled",
+"dome",
+"doormat",
+"drilling rig",
+"drum",
+"drumstick",
+"dumbbell",
+"Dutch oven",
+"electric fan",
+"electric guitar",
+"electric locomotive",
+"entertainment center",
+"envelope",
+"espresso machine",
+"face powder",
+"feather boa",
+"filing cabinet",
+"fireboat",
+"fire engine",
+"fire screen sheet",
+"flagpole",
+"flute",
+"folding chair",
+"football helmet",
+"forklift",
+"fountain",
+"fountain pen",
+"four-poster bed",
+"freight car",
+"French horn",
+"frying pan",
+"fur coat",
+"garbage truck",
+"gas mask",
+"gas pump",
+"goblet",
+"go-kart",
+"golf ball",
+"golf cart",
+"gondola",
+"gong",
+"gown",
+"grand piano",
+"greenhouse",
+"grille",
+"grocery store",
+"guillotine",
+"barrette",
+"hair spray",
+"half-track",
+"hammer",
+"hamper",
+"hair dryer",
+"hand-held computer",
+"handkerchief",
+"hard disk drive",
+"harmonica",
+"harp",
+"harvester",
+"hatchet",
+"holster",
+"home theater",
+"honeycomb",
+"hook",
+"hoop skirt",
+"horizontal bar",
+"horse-drawn vehicle",
+"hourglass",
+"iPod",
+"clothes iron",
+"jack-o'-lantern",
+"jeans",
+"jeep",
+"T-shirt",
+"jigsaw puzzle",
+"pulled rickshaw",
+"joystick",
+"kimono",
+"knee pad",
+"knot",
+"lab coat",
+"ladle",
+"lampshade",
+"laptop computer",
+"lawn mower",
+"lens cap",
+"paper knife",
+"library",
+"lifeboat",
+"lighter",
+"limousine",
+"ocean liner",
+"lipstick",
+"slip-on shoe",
+"lotion",
+"speaker",
+"loupe",
+"sawmill",
+"magnetic compass",
+"mail bag",
+"mailbox",
+"tights",
+"tank suit",
+"manhole cover",
+"maraca",
+"marimba",
+"mask",
+"match",
+"maypole",
+"maze",
+"measuring cup",
+"medicine chest",
+"megalith",
+"microphone",
+"microwave oven",
+"military uniform",
+"milk can",
+"minibus",
+"miniskirt",
+"minivan",
+"missile",
+"mitten",
+"mixing bowl",
+"mobile home",
+"Model T",
+"modem",
+"monastery",
+"monitor",
+"moped",
+"mortar",
+"square academic cap",
+"mosque",
+"mosquito net",
+"scooter",
+"mountain bike",
+"tent",
+"computer mouse",
+"mousetrap",
+"moving van",
+"muzzle",
+"nail",
+"neck brace",
+"necklace",
+"nipple",
+"notebook computer",
+"obelisk",
+"oboe",
+"ocarina",
+"odometer",
+"oil filter",
+"organ",
+"oscilloscope",
+"overskirt",
+"bullock cart",
+"oxygen mask",
+"packet",
+"paddle",
+"paddle wheel",
+"padlock",
+"paintbrush",
+"pajamas",
+"palace",
+"pan flute",
+"paper towel",
+"parachute",
+"parallel bars",
+"park bench",
+"parking meter",
+"passenger car",
+"patio",
+"payphone",
+"pedestal",
+"pencil case",
+"pencil sharpener",
+"perfume",
+"Petri dish",
+"photocopier",
+"plectrum",
+"Pickelhaube",
+"picket fence",
+"pickup truck",
+"pier",
+"piggy bank",
+"pill bottle",
+"pillow",
+"ping-pong ball",
+"pinwheel",
+"pirate ship",
+"pitcher",
+"hand plane",
+"planetarium",
+"plastic bag",
+"plate rack",
+"plow",
+"plunger",
+"Polaroid camera",
+"pole",
+"police van",
+"poncho",
+"billiard table",
+"soda bottle",
+"pot",
+"potter's wheel",
+"power drill",
+"prayer rug",
+"printer",
+"prison",
+"projectile",
+"projector",
+"hockey puck",
+"punching bag",
+"purse",
+"quill",
+"quilt",
+"race car",
+"racket",
+"radiator",
+"radio",
+"radio telescope",
+"rain barrel",
+"recreational vehicle",
+"reel",
+"reflex camera",
+"refrigerator",
+"remote control",
+"restaurant",
+"revolver",
+"rifle",
+"rocking chair",
+"rotisserie",
+"eraser",
+"rugby ball",
+"ruler",
+"running shoe",
+"safe",
+"safety pin",
+"salt shaker",
+"sandal",
+"sarong",
+"saxophone",
+"scabbard",
+"weighing scale",
+"school bus",
+"schooner",
+"scoreboard",
+"CRT screen",
+"screw",
+"screwdriver",
+"seat belt",
+"sewing machine",
+"shield",
+"shoe store",
+"shoji",
+"shopping basket",
+"shopping cart",
+"shovel",
+"shower cap",
+"shower curtain",
+"ski",
+"ski mask",
+"sleeping bag",
+"slide rule",
+"sliding door",
+"slot machine",
+"snorkel",
+"snowmobile",
+"snowplow",
+"soap dispenser",
+"soccer ball",
+"sock",
+"solar thermal collector",
+"sombrero",
+"soup bowl",
+"space bar",
+"space heater",
+"space shuttle",
+"spatula",
+"motorboat",
+"spider web",
+"spindle",
+"sports car",
+"spotlight",
+"stage",
+"steam locomotive",
+"through arch bridge",
+"steel drum",
+"stethoscope",
+"scarf",
+"stone wall",
+"stopwatch",
+"stove",
+"strainer",
+"tram",
+"stretcher",
+"couch",
+"stupa",
+"submarine",
+"suit",
+"sundial",
+"sunglass",
+"sunglasses",
+"sunscreen",
+"suspension bridge",
+"mop",
+"sweatshirt",
+"swimsuit",
+"swing",
+"switch",
+"syringe",
+"table lamp",
+"tank",
+"tape player",
+"teapot",
+"teddy bear",
+"television",
+"tennis ball",
+"thatched roof",
+"front curtain",
+"thimble",
+"threshing machine",
+"throne",
+"tile roof",
+"toaster",
+"tobacco shop",
+"toilet seat",
+"torch",
+"totem pole",
+"tow truck",
+"toy store",
+"tractor",
+"semi-trailer truck",
+"tray",
+"trench coat",
+"tricycle",
+"trimaran",
+"tripod",
+"triumphal arch",
+"trolleybus",
+"trombone",
+"tub",
+"turnstile",
+"typewriter keyboard",
+"umbrella",
+"unicycle",
+"upright piano",
+"vacuum cleaner",
+"vase",
+"vault",
+"velvet",
+"vending machine",
+"vestment",
+"viaduct",
+"violin",
+"volleyball",
+"waffle iron",
+"wall clock",
+"wallet",
+"wardrobe",
+"military aircraft",
+"sink",
+"washing machine",
+"water bottle",
+"water jug",
+"water tower",
+"whiskey jug",
+"whistle",
+"wig",
+"window screen",
+"window shade",
+"Windsor tie",
+"wine bottle",
+"wing",
+"wok",
+"wooden spoon",
+"wool",
+"split-rail fence",
+"shipwreck",
+"yawl",
+"yurt",
+"website",
+"comic book",
+"crossword",
+"traffic sign",
+"traffic light",
+"dust jacket",
+"menu",
+"plate",
+"guacamole",
+"consomme",
+"hot pot",
+"trifle",
+"ice cream",
+"ice pop",
+"baguette",
+"bagel",
+"pretzel",
+"cheeseburger",
+"hot dog",
+"mashed potato",
+"cabbage",
+"broccoli",
+"cauliflower",
+"zucchini",
+"spaghetti squash",
+"acorn squash",
+"butternut squash",
+"cucumber",
+"artichoke",
+"bell pepper",
+"cardoon",
+"mushroom",
+"Granny Smith",
+"strawberry",
+"orange",
+"lemon",
+"fig",
+"pineapple",
+"banana",
+"jackfruit",
+"custard apple",
+"pomegranate",
+"hay",
+"carbonara",
+"chocolate syrup",
+"dough",
+"meatloaf",
+"pizza",
+"pot pie",
+"burrito",
+"red wine",
+"espresso",
+"cup",
+"eggnog",
+"alp",
+"bubble",
+"cliff",
+"coral reef",
+"geyser",
+"lakeshore",
+"promontory",
+"shoal",
+"seashore",
+"valley",
+"volcano",
+"baseball player",
+"bridegroom",
+"scuba diver",
+"rapeseed",
+"daisy",
+"yellow lady's slipper",
+"corn",
+"acorn",
+"rose hip",
+"horse chestnut seed",
+"coral fungus",
+"agaric",
+"gyromitra",
+"stinkhorn mushroom",
+"earth star",
+"hen-of-the-woods",
+"bolete",
+"ear",
+"toilet paper"]

labels/imagenet21k_wordnet_lemmas.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.py ADDED Viewed

	@@ -0,0 +1,359 @@

+import torch
+import numpy as np
+import torch.nn as nn
+import torch.nn.functional as F
+from resnet import StdConv2d
+from utils import (get_width_and_height_from_size, load_pretrained_weights,
+                    get_model_params)
+VALID_MODELS = ('ViT-B_16', 'ViT-B_32', 'ViT-L_16', 'ViT-L_32', 'R50+ViT-B_16')
+class PositionEmbs(nn.Module):
+    def __init__(self, num_patches, emb_dim, dropout_rate=0.1):
+        super(PositionEmbs, self).__init__()
+        self.pos_embedding = nn.Parameter(
+            torch.randn(1, num_patches + 1, emb_dim))
+        if dropout_rate > 0:
+            self.dropout = nn.Dropout(dropout_rate)
+        else:
+            self.dropout = None
+    def forward(self, x):
+        out = x + self.pos_embedding
+        if self.dropout:
+            out = self.dropout(out)
+        return out
+class MlpBlock(nn.Module):
+    """ Transformer Feed-Forward Block """
+    def __init__(self, in_dim, mlp_dim, out_dim, dropout_rate=0.1):
+        super(MlpBlock, self).__init__()
+        # init layers
+        self.fc1 = nn.Linear(in_dim, mlp_dim)
+        self.fc2 = nn.Linear(mlp_dim, out_dim)
+        self.act = nn.GELU()
+        if dropout_rate > 0.0:
+            self.dropout1 = nn.Dropout(dropout_rate)
+            self.dropout2 = nn.Dropout(dropout_rate)
+        else:
+            self.dropout1 = None
+            self.dropout2 = None
+    def forward(self, x):
+        out = self.fc1(x)
+        out = self.act(out)
+        if self.dropout1:
+            out = self.dropout1(out)
+        out = self.fc2(out)
+        out = self.dropout2(out)
+        return out
+class LinearGeneral(nn.Module):
+    def __init__(self, in_dim=(768, ), feat_dim=(12, 64)):
+        super(LinearGeneral, self).__init__()
+        self.weight = nn.Parameter(torch.randn(*in_dim, *feat_dim))
+        self.bias = nn.Parameter(torch.zeros(*feat_dim))
+    def forward(self, x, dims):
+        a = torch.tensordot(x, self.weight, dims=dims) + self.bias
+        return a
+class SelfAttention(nn.Module):
+    def __init__(self, in_dim, heads=8, dropout_rate=0.1):
+        super(SelfAttention, self).__init__()
+        self.heads = heads
+        self.head_dim = in_dim // heads
+        self.scale = self.head_dim**0.5
+        self.query = LinearGeneral((in_dim, ), (self.heads, self.head_dim))
+        self.key = LinearGeneral((in_dim, ), (self.heads, self.head_dim))
+        self.value = LinearGeneral((in_dim, ), (self.heads, self.head_dim))
+        self.out = LinearGeneral((self.heads, self.head_dim), (in_dim, ))
+        if dropout_rate > 0:
+            self.dropout = nn.Dropout(dropout_rate)
+        else:
+            self.dropout = None
+    def forward(self, x):
+        b, n, _ = x.shape
+        q = self.query(x, dims=([2], [0]))
+        k = self.key(x, dims=([2], [0]))
+        v = self.value(x, dims=([2], [0]))
+        q = q.permute(0, 2, 1, 3)
+        k = k.permute(0, 2, 1, 3)
+        v = v.permute(0, 2, 1, 3)
+        attn_weights = torch.matmul(q, k.transpose(-2, -1)) / self.scale
+        attn_weights = F.softmax(attn_weights, dim=-1)
+        out = torch.matmul(attn_weights, v)
+        out = out.permute(0, 2, 1, 3)
+        out = self.out(out, dims=([2, 3], [0, 1]))
+        return out
+class EncoderBlock(nn.Module):
+    def __init__(self,
+                 in_dim,
+                 mlp_dim,
+                 num_heads,
+                 dropout_rate=0.1,
+                 attn_dropout_rate=0.1):
+        super(EncoderBlock, self).__init__()
+        self.norm1 = nn.LayerNorm(in_dim)
+        self.attn = SelfAttention(in_dim,
+                                  heads=num_heads,
+                                  dropout_rate=attn_dropout_rate)
+        if dropout_rate > 0:
+            self.dropout = nn.Dropout(dropout_rate)
+        else:
+            self.dropout = None
+        self.norm2 = nn.LayerNorm(in_dim)
+        self.mlp = MlpBlock(in_dim, mlp_dim, in_dim, dropout_rate)
+    def forward(self, x):
+        residual = x
+        out = self.norm1(x)
+        out = self.attn(out)
+        if self.dropout:
+            out = self.dropout(out)
+        out += residual
+        residual = out
+        out = self.norm2(out)
+        out = self.mlp(out)
+        out += residual
+        return out
+class Encoder(nn.Module):
+    def __init__(self,
+                 num_patches,
+                 emb_dim,
+                 mlp_dim,
+                 num_layers=12,
+                 num_heads=12,
+                 dropout_rate=0.1,
+                 attn_dropout_rate=0.0):
+        super(Encoder, self).__init__()
+        # positional embedding
+        self.pos_embedding = PositionEmbs(num_patches, emb_dim, dropout_rate)
+        # encoder blocks
+        in_dim = emb_dim
+        self.encoder_layers = nn.ModuleList()
+        for i in range(num_layers):
+            layer = EncoderBlock(in_dim, mlp_dim, num_heads, dropout_rate,
+                                 attn_dropout_rate)
+            self.encoder_layers.append(layer)
+        self.norm = nn.LayerNorm(in_dim)
+    def forward(self, x):
+        out = self.pos_embedding(x)
+        for layer in self.encoder_layers:
+            out = layer(out)
+        out = self.norm(out)
+        return out
+class VisionTransformer(nn.Module):
+    """ Vision Transformer.
+        Most easily loaded with the .from_name or .from_pretrained methods.
+        Args:
+            params (namedtuple): A set of Params.
+        References:
+            [1] https://arxiv.org/abs/2010.11929 (An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale)
+        Example:
+            import torch
+            >>> from vision_transformer_pytorch import VisionTransformer
+            >>> inputs = torch.rand(1, 3, 256, 256)
+            >>> model = VisionTransformer.from_pretrained('ViT-B_16')
+            >>> model.eval()
+            >>> outputs = model(inputs)
+    """
+    def __init__(self, params=None):
+        super(VisionTransformer, self).__init__()
+        self._params = params
+        if self._params.resnet:
+            self.resnet = self._params.resnet()
+            self.embedding = nn.Conv2d(self.resnet.width * 16,
+                                       self._params.emb_dim,
+                                       kernel_size=1,
+                                       stride=1)
+        else:
+            self.embedding = nn.Conv2d(3,
+                                       self._params.emb_dim,
+                                       kernel_size=self.patch_size,
+                                       stride=self.patch_size)
+        # class token
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, self._params.emb_dim))
+        # transformer
+        self.transformer = Encoder(
+            num_patches=self.num_patches,
+            emb_dim=self._params.emb_dim,
+            mlp_dim=self._params.mlp_dim,
+            num_layers=self._params.num_layers,
+            num_heads=self._params.num_heads,
+            dropout_rate=self._params.dropout_rate,
+            attn_dropout_rate=self._params.attn_dropout_rate)
+        # classfier
+        self.classifier = nn.Linear(self._params.emb_dim,
+                                    self._params.num_classes)
+    @property
+    def image_size(self):
+        return get_width_and_height_from_size(self._params.image_size)
+    @property
+    def patch_size(self):
+        return get_width_and_height_from_size(self._params.patch_size)
+    @property
+    def num_patches(self):
+        h, w = self.image_size
+        fh, fw = self.patch_size
+        if hasattr(self, 'resnet'):
+            gh, gw = h // fh // self.resnet.downsample, w // fw // self.resnet.downsample
+        else:
+            gh, gw = h // fh, w // fw
+        return gh * gw
+    def extract_features(self, x):
+        if hasattr(self, 'resnet'):
+            x = self.resnet(x)
+        emb = self.embedding(x)  # (n, c, gh, gw)
+        emb = emb.permute(0, 2, 3, 1)  # (n, gh, hw, c)
+        b, h, w, c = emb.shape
+        emb = emb.reshape(b, h * w, c)
+        # prepend class token
+        cls_token = self.cls_token.repeat(b, 1, 1)
+        emb = torch.cat([cls_token, emb], dim=1)
+        # transformer
+        feat = self.transformer(emb)
+        return feat
+    def forward(self, x):
+        feat = self.extract_features(x)
+        # classifier
+        logits = self.classifier(feat[:, 0])
+        return logits
+    @classmethod
+    def from_name(cls, model_name, in_channels=3, **override_params):
+        """create an vision transformer model according to name.
+        Args:
+            model_name (str): Name for vision transformer.
+            in_channels (int): Input data's channel number.
+            override_params (other key word params):
+                Params to override model's global_params.
+                Optional key:
+                    'image_size', 'patch_size',
+                    'emb_dim', 'mlp_dim',
+                    'num_heads', 'num_layers',
+                    'num_classes', 'attn_dropout_rate',
+                    'dropout_rate'
+        Returns:
+            An vision transformer model.
+        """
+        cls._check_model_name_is_valid(model_name)
+        params = get_model_params(model_name, override_params)
+        model = cls(params)
+        model._change_in_channels(in_channels)
+        return model
+    @classmethod
+    def from_pretrained(cls,
+                        model_name,
+                        weights_path=None,
+                        in_channels=3,
+                        num_classes=1000,
+                        **override_params):
+        """create an vision transformer model according to name.
+        Args:
+            model_name (str): Name for vision transformer.
+            weights_path (None or str):
+                str: path to pretrained weights file on the local disk.
+                None: use pretrained weights downloaded from the Internet.
+            in_channels (int): Input data's channel number.
+            num_classes (int):
+                Number of categories for classification.
+                It controls the output size for final linear layer.
+            override_params (other key word params):
+                Params to override model's global_params.
+                Optional key:
+                    'image_size', 'patch_size',
+                    'emb_dim', 'mlp_dim',
+                    'num_heads', 'num_layers',
+                    'num_classes', 'attn_dropout_rate',
+                    'dropout_rate'
+        Returns:
+            A pretrained vision transformer model.
+        """
+        model = cls.from_name(model_name,
+                              num_classes=num_classes,
+                              **override_params)
+        load_pretrained_weights(model,
+                                model_name,
+                                weights_path=weights_path,
+                                load_fc=(num_classes == 1000))
+        model._change_in_channels(in_channels)
+        return model
+    @classmethod
+    def _check_model_name_is_valid(cls, model_name):
+        """Validates model name.
+        Args:
+            model_name (str): Name for vision transformer.
+        Returns:
+            bool: Is a valid name or not.
+        """
+        if model_name not in VALID_MODELS:
+            raise ValueError('model_name should be one of: ' +
+                             ', '.join(VALID_MODELS))
+    def _change_in_channels(self, in_channels):
+        """Adjust model's first convolution layer to in_channels, if in_channels not equals 3.
+        Args:
+            in_channels (int): Input data's channel number.
+        """
+        if in_channels != 3:
+            if hasattr(self, 'resnet'):
+                self.resnet.root['conv'] = StdConv2d(in_channels,
+                                                     self.resnet.width,
+                                                     kernel_size=7,
+                                                     stride=2,
+                                                     bias=False,
+                                                     padding=3)
+            else:
+                self.embedding = nn.Conv2d(in_channels,
+                                           self._params.emb_dim,
+                                           kernel_size=self.patch_size,
+                                           stride=self.patch_size)

pretrained_weights/ViT-B_16_imagenet21k_imagenet2012.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:33cadfad17534e3bf51a17fe31561bbf8e650f17801cd715e71804254c1e8ef3
+size 347471723

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+torch==1.13.1
+torchvision==0.14.1
+gradio==3.16.2

resnet.py ADDED Viewed

	@@ -0,0 +1,164 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from os.path import join as pjoin
+from collections import OrderedDict
+def weight_standardize(w, dim, eps):
+    """Subtracts mean and divides by standard deviation."""
+    w = w - torch.mean(w, dim=dim)
+    w = w / (torch.std(w, dim=dim) + eps)
+    return w
+def np2th(weights, conv=False):
+    """Possibly convert HWIO to OIHW."""
+    if conv:
+        weights = weights.transpose([3, 2, 0, 1])
+    return torch.from_numpy(weights)
+class StdConv2d(nn.Conv2d):
+    def forward(self, x):
+        w = weight_standardize(self.weight, [0, 1, 2], 1e-5)
+        return F.conv2d(x, w, self.bias, self.stride, self.padding,
+                        self.dilation, self.groups)
+def conv3x3(in_channels, out_channels, stride=1, groups=1, bias=False):
+    return StdConv2d(in_channels,
+                     out_channels,
+                     kernel_size=3,
+                     stride=stride,
+                     padding=1,
+                     bias=bias,
+                     groups=groups)
+def conv1x1(in_channels, out_channels, stride=1, bias=False):
+    return StdConv2d(in_channels,
+                     out_channels,
+                     kernel_size=1,
+                     stride=stride,
+                     padding=0,
+                     bias=bias)
+class PreActBottleneck(nn.Module):
+    """Pre-activation (v2) bottleneck block.
+    """
+    def __init__(self,
+                 in_channels,
+                 out_channels=None,
+                 mid_channels=None,
+                 stride=1):
+        super().__init__()
+        out_channels = out_channels or in_channels
+        mid_channels = mid_channels or out_channels // 4
+        self.gn1 = nn.GroupNorm(32, mid_channels, eps=1e-6)
+        self.conv1 = conv1x1(in_channels, mid_channels, bias=False)
+        self.gn2 = nn.GroupNorm(32, mid_channels, eps=1e-6)
+        self.conv2 = conv3x3(mid_channels, mid_channels, stride,
+                             bias=False)  # Original code has it on conv1!!
+        self.gn3 = nn.GroupNorm(32, out_channels, eps=1e-6)
+        self.conv3 = conv1x1(mid_channels, out_channels, bias=False)
+        self.relu = nn.ReLU(inplace=True)
+        if (stride != 1 or in_channels != out_channels):
+            # Projection also with pre-activation according to paper.
+            self.downsample = conv1x1(in_channels,
+                                      out_channels,
+                                      stride,
+                                      bias=False)
+            self.gn_proj = nn.GroupNorm(out_channels, out_channels)
+    def forward(self, x):
+        # Residual branch
+        residual = x
+        if hasattr(self, 'downsample'):
+            residual = self.downsample(x)
+            residual = self.gn_proj(residual)
+        # Unit's branch
+        y = self.relu(self.gn1(self.conv1(x)))
+        y = self.relu(self.gn2(self.conv2(y)))
+        y = self.gn3(self.conv3(y))
+        y = self.relu(residual + y)
+        return y
+class ResNetV2(nn.Module):
+    """Implementation of Pre-activation (v2) ResNet mode."""
+    def __init__(self, block_units, width_factor):
+        super().__init__()
+        width = int(64 * width_factor)
+        self.width = width
+        self.downsample = 16  # four stride=2 conv2d layer
+        # The following will be unreadable if we split lines.
+        # pylint: disable=line-too-long
+        self.root = nn.Sequential(
+            OrderedDict([('conv',
+                          StdConv2d(3,
+                                    width,
+                                    kernel_size=7,
+                                    stride=2,
+                                    bias=False,
+                                    padding=3)),
+                         ('gn', nn.GroupNorm(32, width, eps=1e-6)),
+                         ('relu', nn.ReLU(inplace=True)),
+                         ('pool',
+                          nn.MaxPool2d(kernel_size=3, stride=2, padding=0))]))
+        self.body = nn.Sequential(
+            OrderedDict([
+                ('block1',
+                 nn.Sequential(
+                     OrderedDict([('unit1',
+                                   PreActBottleneck(in_channels=width,
+                                                    out_channels=width * 4,
+                                                    mid_channels=width))] +
+                                 [(f'unit{i:d}',
+                                   PreActBottleneck(in_channels=width * 4,
+                                                    out_channels=width * 4,
+                                                    mid_channels=width))
+                                  for i in range(2, block_units[0] + 1)], ))),
+                ('block2',
+                 nn.Sequential(
+                     OrderedDict([('unit1',
+                                   PreActBottleneck(in_channels=width * 4,
+                                                    out_channels=width * 8,
+                                                    mid_channels=width * 2,
+                                                    stride=2))] +
+                                 [(f'unit{i:d}',
+                                   PreActBottleneck(in_channels=width * 8,
+                                                    out_channels=width * 8,
+                                                    mid_channels=width * 2))
+                                  for i in range(2, block_units[1] + 1)], ))),
+                ('block3',
+                 nn.Sequential(
+                     OrderedDict([('unit1',
+                                   PreActBottleneck(in_channels=width * 8,
+                                                    out_channels=width * 16,
+                                                    mid_channels=width * 4,
+                                                    stride=2))] +
+                                 [(f'unit{i:d}',
+                                   PreActBottleneck(in_channels=width * 16,
+                                                    out_channels=width * 16,
+                                                    mid_channels=width * 4))
+                                  for i in range(2, block_units[2] + 1)], ))),
+            ]))
+    def forward(self, x):
+        x = self.root(x)
+        x = self.body(x)
+        return x
+def resnet50():
+    return ResNetV2(block_units=(3, 4, 9), width_factor=1)

utils.py ADDED Viewed

	@@ -0,0 +1,152 @@

+import re
+import math
+import torch
+import collections
+from torch import nn
+from functools import partial
+from torch.utils import model_zoo
+from torch.nn import functional as F
+from resnet import resnet50
+################################################################################
+### Help functions for model architecture
+################################################################################
+# Params: namedtuple
+# get_width_and_height_from_size and calculate_output_image_size
+# Parameters for the entire model (stem, all blocks, and head)
+Params = collections.namedtuple('Params', [
+    'image_size', 'patch_size', 'emb_dim', 'mlp_dim', 'num_heads', 'num_layers',
+    'num_classes', 'attn_dropout_rate', 'dropout_rate', 'resnet'
+])
+# Set Params and BlockArgs's defaults
+Params.__new__.__defaults__ = (None, ) * len(Params._fields)
+def get_width_and_height_from_size(x):
+    """Obtain height and width from x.
+    Args:
+        x (int, tuple or list): Data size.
+    Returns:
+        size: A tuple or list (H,W).
+    """
+    if isinstance(x, int):
+        return x, x
+    if isinstance(x, list) or isinstance(x, tuple):
+        return x
+    else:
+        raise TypeError()
+################################################################################
+### Helper functions for loading model params
+################################################################################
+# get_model_params and efficientnet:
+#     Functions to get BlockArgs and GlobalParams for efficientnet
+# url_map and url_map_advprop: Dicts of url_map for pretrained weights
+# load_pretrained_weights: A function to load pretrained weights
+def vision_transformer(model_name):
+    """Create Params for vision transformer model.
+    Args:
+        model_name (str): Model name to be queried.
+    Returns:
+        Params(params_dict[model_name])
+    """
+    params_dict = {
+        'ViT-B_16': (384, 16, 768, 3072, 12, 12, 1000, 0.0, 0.1, None),
+        'ViT-B_32': (384, 32, 768, 3072, 12, 12, 1000, 0.0, 0.1, None),
+        'ViT-L_16': (384, 16, 1024, 4096, 16, 24, 1000, 0.0, 0.1, None),
+        'ViT-L_32': (384, 32, 1024, 4096, 16, 24, 1000, 0.0, 0.1, None),
+        'R50+ViT-B_16': (384, 1, 768, 3072, 12, 12, 1000, 0.0, 0.1, resnet50),
+    }
+    image_size, patch_size, emb_dim, mlp_dim, num_heads, num_layers, num_classes, attn_dropout_rate, dropout_rate, resnet = params_dict[
+        model_name]
+    params = Params(image_size=image_size,
+                    patch_size=patch_size,
+                    emb_dim=emb_dim,
+                    mlp_dim=mlp_dim,
+                    num_heads=num_heads,
+                    num_layers=num_layers,
+                    num_classes=num_classes,
+                    attn_dropout_rate=attn_dropout_rate,
+                    dropout_rate=dropout_rate,
+                    resnet=resnet)
+    return params
+def get_model_params(model_name, override_params):
+    """Get the block args and global params for a given model name.
+    Args:
+        model_name (str): Model's name.
+        override_params (dict): A dict to modify params.
+    Returns:
+        params
+    """
+    params = vision_transformer(model_name)
+    if override_params:
+        # ValueError will be raised here if override_params has fields not included in params.
+        params = params._replace(**override_params)
+    return params
+# train with Standard methods
+# check more details in paper(An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale)
+url_map = {
+    'ViT-B_16':
+    'https://github.com/tczhangzhi/VisionTransformer-PyTorch/releases/download/1.0.1/ViT-B_16_imagenet21k_imagenet2012.pth',
+    'ViT-B_32':
+    'https://github.com/tczhangzhi/VisionTransformer-PyTorch/releases/download/1.0.1/ViT-B_32_imagenet21k_imagenet2012.pth',
+    'ViT-L_16':
+    'https://github.com/tczhangzhi/VisionTransformer-PyTorch/releases/download/1.0.1/ViT-L_16_imagenet21k_imagenet2012.pth',
+    'ViT-L_32':
+    'https://github.com/tczhangzhi/VisionTransformer-PyTorch/releases/download/1.0.1/ViT-L_32_imagenet21k_imagenet2012.pth',
+    'R50+ViT-B_16':
+    'https://github.com/tczhangzhi/VisionTransformer-PyTorch/releases/download/1.0.1/R50+ViT-B_16_imagenet21k_imagenet2012.pth',
+}
+def load_pretrained_weights(model,
+                            model_name,
+                            weights_path=None,
+                            load_fc=True,
+                            advprop=False):
+    """Loads pretrained weights from weights path or download using url.
+    Args:
+        model (Module): The whole model of vision transformer.
+        model_name (str): Model name of vision transformer.
+        weights_path (None or str):
+            str: path to pretrained weights file on the local disk.
+            None: use pretrained weights downloaded from the Internet.
+        load_fc (bool): Whether to load pretrained weights for fc layer at the end of the model.
+    """
+    if isinstance(weights_path, str):
+        state_dict = torch.load(weights_path)
+    else:
+        state_dict = model_zoo.load_url(url_map[model_name])
+    if load_fc:
+        ret = model.load_state_dict(state_dict, strict=False)
+        assert not ret.missing_keys, 'Missing keys when loading pretrained weights: {}'.format(
+            ret.missing_keys)
+    else:
+        state_dict.pop('classifier.weight')
+        state_dict.pop('classifier.bias')
+        ret = model.load_state_dict(state_dict, strict=False)
+        assert set(ret.missing_keys) == set([
+            'classifier.weight', 'classifier.bias'
+        ]), 'Missing keys when loading pretrained weights: {}'.format(
+            ret.missing_keys)
+    assert not ret.unexpected_keys, 'Missing keys when loading pretrained weights: {}'.format(
+        ret.unexpected_keys)
+    print('Loaded pretrained weights for {}'.format(model_name))