Spaces:

WwYc
/

explain-ViT

Sleeping

App Files Files Community

WwYc commited on Mar 4

Commit

3d27aee

•

1 Parent(s): 971cce4

Upload 707 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +6 -0
ViT_DeiT/.gitignore +11 -0
ViT_DeiT/.ipynb_checkpoints/DeiT_example-checkpoint.ipynb +0 -0
ViT_DeiT/.ipynb_checkpoints/example-checkpoint.ipynb +0 -0
ViT_DeiT/LICENSE +21 -0
ViT_DeiT/VIT-EXPL.py +96 -0
ViT_DeiT/baselines/ViT/ViT_LRP.py +437 -0
ViT_DeiT/baselines/ViT/ViT_explanation_generator.py +83 -0
ViT_DeiT/baselines/ViT/ViT_new.py +238 -0
ViT_DeiT/baselines/ViT/ViT_orig_LRP.py +425 -0
ViT_DeiT/baselines/ViT/__pycache__/ViT_LRP.cpython-38.pyc +0 -0
ViT_DeiT/baselines/ViT/__pycache__/ViT_explanation_generator.cpython-38.pyc +0 -0
ViT_DeiT/baselines/ViT/__pycache__/helpers.cpython-38.pyc +0 -0
ViT_DeiT/baselines/ViT/__pycache__/layer_helpers.cpython-38.pyc +0 -0
ViT_DeiT/baselines/ViT/__pycache__/weight_init.cpython-38.pyc +0 -0
ViT_DeiT/baselines/ViT/generate_visualizations.py +208 -0
ViT_DeiT/baselines/ViT/helpers.py +295 -0
ViT_DeiT/baselines/ViT/imagenet_seg_eval.py +334 -0
ViT_DeiT/baselines/ViT/layer_helpers.py +21 -0
ViT_DeiT/baselines/ViT/misc_functions.py +68 -0
ViT_DeiT/baselines/ViT/pertubation_eval_from_hdf5.py +233 -0
ViT_DeiT/baselines/ViT/weight_init.py +60 -0
ViT_DeiT/data/VOC.py +372 -0
ViT_DeiT/data/__init__.py +0 -0
ViT_DeiT/data/imagenet.py +74 -0
ViT_DeiT/data/imagenet_utils.py +1002 -0
ViT_DeiT/data/transforms.py +442 -0
ViT_DeiT/dataset/expl_hdf5.py +51 -0
ViT_DeiT/modules/__init__.py +0 -0
ViT_DeiT/modules/__pycache__/__init__.cpython-38.pyc +0 -0
ViT_DeiT/modules/__pycache__/layers_ours.cpython-38.pyc +0 -0
ViT_DeiT/modules/layers_lrp.py +261 -0
ViT_DeiT/modules/layers_ours.py +280 -0
ViT_DeiT/requirements.txt +15 -0
ViT_DeiT/samples/CLS2IDX.py +1000 -0
ViT_DeiT/samples/__pycache__/CLS2IDX.cpython-38.pyc +0 -0
ViT_DeiT/samples/catdog.png +0 -0
ViT_DeiT/samples/dogbird.png +0 -0
ViT_DeiT/samples/dogcat2.png +0 -0
ViT_DeiT/samples/el1.png +0 -0
ViT_DeiT/samples/el2.png +0 -0
ViT_DeiT/samples/el3.png +0 -0
ViT_DeiT/samples/el4.png +0 -0
ViT_DeiT/samples/el5.png +0 -0
ViT_DeiT/utils/__init__.py +0 -0
ViT_DeiT/utils/__pycache__/__init__.cpython-38.pyc +0 -0
ViT_DeiT/utils/confusionmatrix.py +88 -0
ViT_DeiT/utils/iou.py +93 -0
ViT_DeiT/utils/metric.py +12 -0
ViT_DeiT/utils/metrices.py +208 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+ViT_DeiT/venv/Scripts/_hashlib.pyd filter=lfs diff=lfs merge=lfs -text
+ViT_DeiT/venv/Scripts/_ssl.pyd filter=lfs diff=lfs merge=lfs -text
+ViT_DeiT/venv/Scripts/python36.dll filter=lfs diff=lfs merge=lfs -text
+ViT_DeiT/venv/Scripts/sqlite3.dll filter=lfs diff=lfs merge=lfs -text
+ViT_DeiT/venv/Scripts/tcl86t.dll filter=lfs diff=lfs merge=lfs -text
+ViT_DeiT/venv/Scripts/tk86t.dll filter=lfs diff=lfs merge=lfs -text

ViT_DeiT/.gitignore ADDED Viewed

	@@ -0,0 +1,11 @@

+*.pyc
+all_good_vis/
+__pycache__
+*.tar
+.idea
+run/
+baselines/ViT/experiments/
+baselines/ViT/visualizations/
+bert_models/
+data/movies/

ViT_DeiT/.ipynb_checkpoints/DeiT_example-checkpoint.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

ViT_DeiT/.ipynb_checkpoints/example-checkpoint.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

ViT_DeiT/LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2020 Hila Chefer
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

ViT_DeiT/VIT-EXPL.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import os
+from PIL import Image
+import torchvision.transforms as transforms
+import matplotlib.pyplot as plt
+import pylab
+import torch
+import numpy as np
+import cv2
+from samples.CLS2IDX import CLS2IDX
+from baselines.ViT.ViT_LRP import vit_base_patch16_224 as vit_LRP
+from baselines.ViT.ViT_explanation_generator import LRP
+normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
+transform = transforms.Compose([
+    transforms.Resize(256),
+    transforms.CenterCrop(224),
+    transforms.ToTensor(),
+    normalize,
+])
+use_thresholding =  False
+def show_cam_on_image(img, mask):
+    heatmap = cv2.applyColorMap(np.uint8(255 * mask), cv2.COLORMAP_JET)
+    heatmap = np.float32(heatmap) / 255
+    cam = heatmap + np.float32(img)
+    cam = cam / np.max(cam)
+    return cam
+# initialize ViT pretrained
+model = vit_LRP(pretrained=True).cuda()
+model.eval()
+attribution_generator = LRP(model)
+def generate_visualization(original_image, class_index=None):
+    transformer_attribution = attribution_generator.generate_LRP(original_image.unsqueeze(0).cuda(), method="transformer_attribution", index=class_index).detach()
+    transformer_attribution = transformer_attribution.reshape(1, 1, 14, 14)
+    transformer_attribution = torch.nn.functional.interpolate(transformer_attribution, scale_factor=16, mode='bilinear')
+    transformer_attribution = transformer_attribution.reshape(224, 224).data.cpu().numpy()
+    transformer_attribution = (transformer_attribution - transformer_attribution.min()) / (transformer_attribution.max() - transformer_attribution.min())
+    if use_thresholding:
+      transformer_attribution = transformer_attribution * 255
+      transformer_attribution = transformer_attribution.astype(np.uint8)
+      ret, transformer_attribution = cv2.threshold(transformer_attribution, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+      transformer_attribution[transformer_attribution == 255] = 1
+    image_transformer_attribution = original_image.permute(1, 2, 0).data.cpu().numpy()
+    image_transformer_attribution = (image_transformer_attribution - image_transformer_attribution.min()) / (image_transformer_attribution.max() - image_transformer_attribution.min())
+    vis = show_cam_on_image(image_transformer_attribution, transformer_attribution)
+    vis =  np.uint8(255 * vis)
+    vis = cv2.cvtColor(np.array(vis), cv2.COLOR_RGB2BGR)
+    return vis
+def print_top_classes(predictions, **kwargs):
+    # Print Top-5 predictions
+    prob = torch.softmax(predictions, dim=1)
+    class_indices = predictions.data.topk(5, dim=1)[1][0].tolist()
+    max_str_len = 0
+    class_names = []
+    for cls_idx in class_indices:
+        class_names.append(CLS2IDX[cls_idx])
+        if len(CLS2IDX[cls_idx]) > max_str_len:
+            max_str_len = len(CLS2IDX[cls_idx])
+    print('Top 5 classes:')
+    for cls_idx in class_indices:
+        output_string = '\t{} : {}'.format(cls_idx, CLS2IDX[cls_idx])
+        output_string += ' ' * (max_str_len - len(CLS2IDX[cls_idx])) + '\t\t'
+        output_string += 'value = {:.3f}\t prob = {:.1f}%'.format(predictions[0, cls_idx], 100 * prob[0, cls_idx])
+        print(output_string)
+image = Image.open('samples/dogcat2.png')
+dog_cat_image = transform(image)
+fig, axs = plt.subplots(1, 3)
+axs[0].imshow(image);
+axs[0].axis('off');
+output = model(dog_cat_image.unsqueeze(0).cuda())
+print_top_classes(output)
+# cat - the predicted class
+cat = generate_visualization(dog_cat_image)
+# dog
+# generate visualization for class 243: 'bull mastiff'
+dog = generate_visualization(dog_cat_image, class_index=243)
+axs[1].imshow(cat);
+axs[1].axis('off');
+axs[2].imshow(dog);
+axs[2].axis('off');
+pylab.show()

ViT_DeiT/baselines/ViT/ViT_LRP.py ADDED Viewed

	@@ -0,0 +1,437 @@

+""" Vision Transformer (ViT) in PyTorch
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+import torch
+import torch.nn as nn
+from einops import rearrange
+from modules.layers_ours import *
+from baselines.ViT.helpers import load_pretrained
+from baselines.ViT.weight_init import trunc_normal_
+from baselines.ViT.layer_helpers import to_2tuple
+def _cfg(url='', **kwargs):
+    return {
+        'url': url,
+        'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': None,
+        'crop_pct': .9, 'interpolation': 'bicubic',
+        'first_conv': 'patch_embed.proj', 'classifier': 'head',
+        **kwargs
+    }
+default_cfgs = {
+    # patch models
+    'vit_small_patch16_224': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/vit_small_p16_224-15ec54c9.pth',
+    ),
+    'vit_base_patch16_224': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth',
+        mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5),
+    ),
+    'vit_large_patch16_224': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_p16_224-4ee7a4dc.pth',
+        mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
+}
+def compute_rollout_attention(all_layer_matrices, start_layer=0):
+    # adding residual consideration
+    num_tokens = all_layer_matrices[0].shape[1]
+    batch_size = all_layer_matrices[0].shape[0]
+    eye = torch.eye(num_tokens).expand(batch_size, num_tokens, num_tokens).to(all_layer_matrices[0].device)
+    all_layer_matrices = [all_layer_matrices[i] + eye for i in range(len(all_layer_matrices))]
+    # all_layer_matrices = [all_layer_matrices[i] / all_layer_matrices[i].sum(dim=-1, keepdim=True)
+    #                       for i in range(len(all_layer_matrices))]
+    joint_attention = all_layer_matrices[start_layer]
+    for i in range(start_layer+1, len(all_layer_matrices)):
+        joint_attention = all_layer_matrices[i].bmm(joint_attention)
+    return joint_attention
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = Linear(in_features, hidden_features)
+        self.act = GELU()
+        self.fc2 = Linear(hidden_features, out_features)
+        self.drop = Dropout(drop)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+    def relprop(self, cam, **kwargs):
+        cam = self.drop.relprop(cam, **kwargs)
+        cam = self.fc2.relprop(cam, **kwargs)
+        cam = self.act.relprop(cam, **kwargs)
+        cam = self.fc1.relprop(cam, **kwargs)
+        return cam
+class Attention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False,attn_drop=0., proj_drop=0.):
+        super().__init__()
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
+        self.scale = head_dim ** -0.5
+        # A = Q*K^T
+        self.matmul1 = einsum('bhid,bhjd->bhij')
+        # attn = A*V
+        self.matmul2 = einsum('bhij,bhjd->bhid')
+        self.qkv = Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = Dropout(attn_drop)
+        self.proj = Linear(dim, dim)
+        self.proj_drop = Dropout(proj_drop)
+        self.softmax = Softmax(dim=-1)
+        self.attn_cam = None
+        self.attn = None
+        self.v = None
+        self.v_cam = None
+        self.attn_gradients = None
+    def get_attn(self):
+        return self.attn
+    def save_attn(self, attn):
+        self.attn = attn
+    def save_attn_cam(self, cam):
+        self.attn_cam = cam
+    def get_attn_cam(self):
+        return self.attn_cam
+    def get_v(self):
+        return self.v
+    def save_v(self, v):
+        self.v = v
+    def save_v_cam(self, cam):
+        self.v_cam = cam
+    def get_v_cam(self):
+        return self.v_cam
+    def save_attn_gradients(self, attn_gradients):
+        self.attn_gradients = attn_gradients
+    def get_attn_gradients(self):
+        return self.attn_gradients
+    def forward(self, x):
+        b, n, _, h = *x.shape, self.num_heads
+        qkv = self.qkv(x)
+        q, k, v = rearrange(qkv, 'b n (qkv h d) -> qkv b h n d', qkv=3, h=h)
+        self.save_v(v)
+        dots = self.matmul1([q, k]) * self.scale
+        attn = self.softmax(dots)
+        attn = self.attn_drop(attn)
+        self.save_attn(attn)
+        attn.register_hook(self.save_attn_gradients)
+        out = self.matmul2([attn, v])
+        out = rearrange(out, 'b h n d -> b n (h d)')
+        out = self.proj(out)
+        out = self.proj_drop(out)
+        return out
+    def relprop(self, cam, **kwargs):
+        cam = self.proj_drop.relprop(cam, **kwargs)
+        cam = self.proj.relprop(cam, **kwargs)
+        cam = rearrange(cam, 'b n (h d) -> b h n d', h=self.num_heads)
+        # attn = A*V
+        (cam1, cam_v)= self.matmul2.relprop(cam, **kwargs)
+        cam1 /= 2
+        cam_v /= 2
+        self.save_v_cam(cam_v)
+        self.save_attn_cam(cam1)
+        cam1 = self.attn_drop.relprop(cam1, **kwargs)
+        cam1 = self.softmax.relprop(cam1, **kwargs)
+        # A = Q*K^T
+        (cam_q, cam_k) = self.matmul1.relprop(cam1, **kwargs)
+        cam_q /= 2
+        cam_k /= 2
+        cam_qkv = rearrange([cam_q, cam_k, cam_v], 'qkv b h n d -> b n (qkv h d)', qkv=3, h=self.num_heads)
+        return self.qkv.relprop(cam_qkv, **kwargs)
+class Block(nn.Module):
+    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, drop=0., attn_drop=0.):
+        super().__init__()
+        self.norm1 = LayerNorm(dim, eps=1e-6)
+        self.attn = Attention(
+            dim, num_heads=num_heads, qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop)
+        self.norm2 = LayerNorm(dim, eps=1e-6)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, drop=drop)
+        self.add1 = Add()
+        self.add2 = Add()
+        self.clone1 = Clone()
+        self.clone2 = Clone()
+    def forward(self, x):
+        x1, x2 = self.clone1(x, 2)
+        x = self.add1([x1, self.attn(self.norm1(x2))])
+        x1, x2 = self.clone2(x, 2)
+        x = self.add2([x1, self.mlp(self.norm2(x2))])
+        return x
+    def relprop(self, cam, **kwargs):
+        (cam1, cam2) = self.add2.relprop(cam, **kwargs)
+        cam2 = self.mlp.relprop(cam2, **kwargs)
+        cam2 = self.norm2.relprop(cam2, **kwargs)
+        cam = self.clone2.relprop((cam1, cam2), **kwargs)
+        (cam1, cam2) = self.add1.relprop(cam, **kwargs)
+        cam2 = self.attn.relprop(cam2, **kwargs)
+        cam2 = self.norm1.relprop(cam2, **kwargs)
+        cam = self.clone1.relprop((cam1, cam2), **kwargs)
+        return cam
+class PatchEmbed(nn.Module):
+    """ Image to Patch Embedding
+    """
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.num_patches = num_patches
+        self.proj = Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
+    def forward(self, x):
+        B, C, H, W = x.shape
+        # FIXME look at relaxing size constraints
+        assert H == self.img_size[0] and W == self.img_size[1], \
+            f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
+        x = self.proj(x).flatten(2).transpose(1, 2)
+        return x
+    def relprop(self, cam, **kwargs):
+        cam = cam.transpose(1,2)
+        cam = cam.reshape(cam.shape[0], cam.shape[1],
+                     (self.img_size[0] // self.patch_size[0]), (self.img_size[1] // self.patch_size[1]))
+        return self.proj.relprop(cam, **kwargs)
+class VisionTransformer(nn.Module):
+    """ Vision Transformer with support for patch or hybrid CNN input stage
+    """
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dim=768, depth=12,
+                 num_heads=12, mlp_ratio=4., qkv_bias=False, mlp_head=False, drop_rate=0., attn_drop_rate=0.):
+        super().__init__()
+        self.num_classes = num_classes
+        self.num_features = self.embed_dim = embed_dim  # num_features for consistency with other models
+        self.patch_embed = PatchEmbed(
+                img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim)
+        num_patches = self.patch_embed.num_patches
+        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
+        self.blocks = nn.ModuleList([
+            Block(
+                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,
+                drop=drop_rate, attn_drop=attn_drop_rate)
+            for i in range(depth)])
+        self.norm = LayerNorm(embed_dim)
+        if mlp_head:
+            # paper diagram suggests 'MLP head', but results in 4M extra parameters vs paper
+            self.head = Mlp(embed_dim, int(embed_dim * mlp_ratio), num_classes)
+        else:
+            # with a single Linear layer as head, the param count within rounding of paper
+            self.head = Linear(embed_dim, num_classes)
+        # FIXME not quite sure what the proper weight init is supposed to be,
+        # normal / trunc normal w/ std == .02 similar to other Bert like transformers
+        trunc_normal_(self.pos_embed, std=.02)  # embeddings same as weights?
+        trunc_normal_(self.cls_token, std=.02)
+        self.apply(self._init_weights)
+        self.pool = IndexSelect()
+        self.add = Add()
+        self.inp_grad = None
+    def save_inp_grad(self,grad):
+        self.inp_grad = grad
+    def get_inp_grad(self):
+        return self.inp_grad
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+    @property
+    def no_weight_decay(self):
+        return {'pos_embed', 'cls_token'}
+    def forward(self, x):
+        B = x.shape[0]
+        x = self.patch_embed(x)
+        cls_tokens = self.cls_token.expand(B, -1, -1)  # stole cls_tokens impl from Phil Wang, thanks
+        x = torch.cat((cls_tokens, x), dim=1)
+        x = self.add([x, self.pos_embed])
+        x.register_hook(self.save_inp_grad)
+        for blk in self.blocks:
+            x = blk(x)
+        x = self.norm(x)
+        x = self.pool(x, dim=1, indices=torch.tensor(0, device=x.device))
+        x = x.squeeze(1)
+        x = self.head(x)
+        return x
+    def relprop(self, cam=None,method="transformer_attribution", is_ablation=False, start_layer=0, **kwargs):
+        # print(kwargs)
+        # print("conservation 1", cam.sum())
+        cam = self.head.relprop(cam, **kwargs)
+        cam = cam.unsqueeze(1)
+        cam = self.pool.relprop(cam, **kwargs)
+        cam = self.norm.relprop(cam, **kwargs)
+        for blk in reversed(self.blocks):
+            cam = blk.relprop(cam, **kwargs)
+        # print("conservation 2", cam.sum())
+        # print("min", cam.min())
+        if method == "full":
+            (cam, _) = self.add.relprop(cam, **kwargs)
+            cam = cam[:, 1:]
+            cam = self.patch_embed.relprop(cam, **kwargs)
+            # sum on channels
+            cam = cam.sum(dim=1)
+            return cam
+        elif method == "rollout":
+            # cam rollout
+            attn_cams = []
+            for blk in self.blocks:
+                attn_heads = blk.attn.get_attn_cam().clamp(min=0)
+                avg_heads = (attn_heads.sum(dim=1) / attn_heads.shape[1]).detach()
+                attn_cams.append(avg_heads)
+            cam = compute_rollout_attention(attn_cams, start_layer=start_layer)
+            cam = cam[:, 0, 1:]
+            return cam
+        # our method, method name grad is legacy
+        elif method == "transformer_attribution" or method == "grad":
+            cams = []
+            for blk in self.blocks:
+                grad = blk.attn.get_attn_gradients()
+                cam = blk.attn.get_attn_cam()
+                cam = cam[0].reshape(-1, cam.shape[-1], cam.shape[-1])
+                grad = grad[0].reshape(-1, grad.shape[-1], grad.shape[-1])
+                cam = grad * cam
+                cam = cam.clamp(min=0).mean(dim=0)
+                cams.append(cam.unsqueeze(0))
+            rollout = compute_rollout_attention(cams, start_layer=start_layer)
+            cam = rollout[:, 0, 1:]
+            return cam
+        elif method == "last_layer":
+            cam = self.blocks[-1].attn.get_attn_cam()
+            cam = cam[0].reshape(-1, cam.shape[-1], cam.shape[-1])
+            if is_ablation:
+                grad = self.blocks[-1].attn.get_attn_gradients()
+                grad = grad[0].reshape(-1, grad.shape[-1], grad.shape[-1])
+                cam = grad * cam
+            cam = cam.clamp(min=0).mean(dim=0)
+            cam = cam[0, 1:]
+            return cam
+        elif method == "last_layer_attn":
+            cam = self.blocks[-1].attn.get_attn()
+            cam = cam[0].reshape(-1, cam.shape[-1], cam.shape[-1])
+            cam = cam.clamp(min=0).mean(dim=0)
+            cam = cam[0, 1:]
+            return cam
+        elif method == "second_layer":
+            cam = self.blocks[1].attn.get_attn_cam()
+            cam = cam[0].reshape(-1, cam.shape[-1], cam.shape[-1])
+            if is_ablation:
+                grad = self.blocks[1].attn.get_attn_gradients()
+                grad = grad[0].reshape(-1, grad.shape[-1], grad.shape[-1])
+                cam = grad * cam
+            cam = cam.clamp(min=0).mean(dim=0)
+            cam = cam[0, 1:]
+            return cam
+def _conv_filter(state_dict, patch_size=16):
+    """ convert patch embedding weight from manual patchify + linear proj to conv"""
+    out_dict = {}
+    for k, v in state_dict.items():
+        if 'patch_embed.proj.weight' in k:
+            v = v.reshape((v.shape[0], 3, patch_size, patch_size))
+        out_dict[k] = v
+    return out_dict
+def vit_base_patch16_224(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=True, **kwargs)
+    model.default_cfg = default_cfgs['vit_base_patch16_224']
+    if pretrained:
+        load_pretrained(
+            model, num_classes=model.num_classes, in_chans=kwargs.get('in_chans', 3), filter_fn=_conv_filter)
+    return model
+def vit_large_patch16_224(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=1024, depth=24, num_heads=16, mlp_ratio=4, qkv_bias=True, **kwargs)
+    model.default_cfg = default_cfgs['vit_large_patch16_224']
+    if pretrained:
+        load_pretrained(model, num_classes=model.num_classes, in_chans=kwargs.get('in_chans', 3))
+    return model
+def deit_base_patch16_224(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=True, **kwargs)
+    model.default_cfg = _cfg()
+    if pretrained:
+        checkpoint = torch.hub.load_state_dict_from_url(
+            url="https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth",
+            map_location="cpu", check_hash=True
+        )
+        model.load_state_dict(checkpoint["model"])
+    return model

ViT_DeiT/baselines/ViT/ViT_explanation_generator.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import argparse
+import torch
+import numpy as np
+from numpy import *
+# compute rollout between attention layers
+def compute_rollout_attention(all_layer_matrices, start_layer=0):
+    # adding residual consideration- code adapted from https://github.com/samiraabnar/attention_flow
+    num_tokens = all_layer_matrices[0].shape[1]
+    batch_size = all_layer_matrices[0].shape[0]
+    eye = torch.eye(num_tokens).expand(batch_size, num_tokens, num_tokens).to(all_layer_matrices[0].device)
+    all_layer_matrices = [all_layer_matrices[i] + eye for i in range(len(all_layer_matrices))]
+    matrices_aug = [all_layer_matrices[i] / all_layer_matrices[i].sum(dim=-1, keepdim=True)
+                          for i in range(len(all_layer_matrices))]
+    joint_attention = matrices_aug[start_layer]
+    for i in range(start_layer+1, len(matrices_aug)):
+        joint_attention = matrices_aug[i].bmm(joint_attention)
+    return joint_attention
+class LRP:
+    def __init__(self, model):
+        self.model = model
+        self.model.eval()
+    def generate_LRP(self, input, index=None, method="transformer_attribution", is_ablation=False, start_layer=0):
+        output = self.model(input)
+        kwargs = {"alpha": 1}
+        if index == None:
+            index = np.argmax(output.cpu().data.numpy(), axis=-1)
+        one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)
+        one_hot[0, index] = 1
+        one_hot_vector = one_hot
+        one_hot = torch.from_numpy(one_hot).requires_grad_(True)
+        one_hot = torch.sum(one_hot.cuda() * output)
+        self.model.zero_grad()
+        one_hot.backward(retain_graph=True)
+        return self.model.relprop(torch.tensor(one_hot_vector).to(input.device), method=method, is_ablation=is_ablation,
+                                  start_layer=start_layer, **kwargs)
+class Baselines:
+    def __init__(self, model):
+        self.model = model
+        self.model.eval()
+    def generate_cam_attn(self, input, index=None):
+        output = self.model(input.cuda(), register_hook=True)
+        if index == None:
+            index = np.argmax(output.cpu().data.numpy())
+        one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)
+        one_hot[0][index] = 1
+        one_hot = torch.from_numpy(one_hot).requires_grad_(True)
+        one_hot = torch.sum(one_hot.cuda() * output)
+        self.model.zero_grad()
+        one_hot.backward(retain_graph=True)
+        #################### attn
+        grad = self.model.blocks[-1].attn.get_attn_gradients()
+        cam = self.model.blocks[-1].attn.get_attention_map()
+        cam = cam[0, :, 0, 1:].reshape(-1, 14, 14)
+        grad = grad[0, :, 0, 1:].reshape(-1, 14, 14)
+        grad = grad.mean(dim=[1, 2], keepdim=True)
+        cam = (cam * grad).mean(0).clamp(min=0)
+        cam = (cam - cam.min()) / (cam.max() - cam.min())
+        return cam
+        #################### attn
+    def generate_rollout(self, input, start_layer=0):
+        self.model(input)
+        blocks = self.model.blocks
+        all_layer_attentions = []
+        for blk in blocks:
+            attn_heads = blk.attn.get_attention_map()
+            avg_heads = (attn_heads.sum(dim=1) / attn_heads.shape[1]).detach()
+            all_layer_attentions.append(avg_heads)
+        rollout = compute_rollout_attention(all_layer_attentions, start_layer=start_layer)
+        return rollout[:,0, 1:]

ViT_DeiT/baselines/ViT/ViT_new.py ADDED Viewed

	@@ -0,0 +1,238 @@

+""" Vision Transformer (ViT) in PyTorch
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+import torch
+import torch.nn as nn
+from functools import partial
+from einops import rearrange
+from baselines.ViT.helpers import load_pretrained
+from baselines.ViT.weight_init import trunc_normal_
+from baselines.ViT.layer_helpers import to_2tuple
+def _cfg(url='', **kwargs):
+    return {
+        'url': url,
+        'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': None,
+        'crop_pct': .9, 'interpolation': 'bicubic',
+        'first_conv': 'patch_embed.proj', 'classifier': 'head',
+        **kwargs
+    }
+default_cfgs = {
+    # patch models
+    'vit_small_patch16_224': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/vit_small_p16_224-15ec54c9.pth',
+    ),
+    'vit_base_patch16_224': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth',
+        mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5),
+    ),
+    'vit_large_patch16_224': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_p16_224-4ee7a4dc.pth',
+        mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
+}
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+class Attention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False,attn_drop=0., proj_drop=0.):
+        super().__init__()
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
+        self.scale = head_dim ** -0.5
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+        self.attn_gradients = None
+        self.attention_map = None
+    def save_attn_gradients(self, attn_gradients):
+        self.attn_gradients = attn_gradients
+    def get_attn_gradients(self):
+        return self.attn_gradients
+    def save_attention_map(self, attention_map):
+        self.attention_map = attention_map
+    def get_attention_map(self):
+        return self.attention_map
+    def forward(self, x, register_hook=False):
+        b, n, _, h = *x.shape, self.num_heads
+        # self.save_output(x)
+        # x.register_hook(self.save_output_grad)
+        qkv = self.qkv(x)
+        q, k, v = rearrange(qkv, 'b n (qkv h d) -> qkv b h n d', qkv = 3, h = h)
+        dots = torch.einsum('bhid,bhjd->bhij', q, k) * self.scale
+        attn = dots.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+        out = torch.einsum('bhij,bhjd->bhid', attn, v)
+        self.save_attention_map(attn)
+        if register_hook:
+            attn.register_hook(self.save_attn_gradients)
+        out = rearrange(out, 'b h n d -> b n (h d)')
+        out =  self.proj(out)
+        out = self.proj_drop(out)
+        return out
+class Block(nn.Module):
+    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, drop=0., attn_drop=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.norm1 = norm_layer(dim)
+        self.attn = Attention(
+            dim, num_heads=num_heads, qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop)
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+    def forward(self, x, register_hook=False):
+        x = x + self.attn(self.norm1(x), register_hook=register_hook)
+        x = x + self.mlp(self.norm2(x))
+        return x
+class PatchEmbed(nn.Module):
+    """ Image to Patch Embedding
+    """
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.num_patches = num_patches
+        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
+    def forward(self, x):
+        B, C, H, W = x.shape
+        # FIXME look at relaxing size constraints
+        assert H == self.img_size[0] and W == self.img_size[1], \
+            f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
+        x = self.proj(x).flatten(2).transpose(1, 2)
+        return x
+class VisionTransformer(nn.Module):
+    """ Vision Transformer
+    """
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dim=768, depth=12,
+                 num_heads=12, mlp_ratio=4., qkv_bias=False, drop_rate=0., attn_drop_rate=0., norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.num_classes = num_classes
+        self.num_features = self.embed_dim = embed_dim  # num_features for consistency with other models
+        self.patch_embed = PatchEmbed(
+                img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim)
+        num_patches = self.patch_embed.num_patches
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
+        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
+        self.pos_drop = nn.Dropout(p=drop_rate)
+        self.blocks = nn.ModuleList([
+            Block(
+                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,
+                drop=drop_rate, attn_drop=attn_drop_rate, norm_layer=norm_layer)
+            for i in range(depth)])
+        self.norm = norm_layer(embed_dim)
+        # Classifier head
+        self.head = nn.Linear(embed_dim, num_classes) if num_classes > 0 else nn.Identity()
+        trunc_normal_(self.pos_embed, std=.02)
+        trunc_normal_(self.cls_token, std=.02)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {'pos_embed', 'cls_token'}
+    def forward(self, x, register_hook=False):
+        B = x.shape[0]
+        x = self.patch_embed(x)
+        cls_tokens = self.cls_token.expand(B, -1, -1)  # stole cls_tokens impl from Phil Wang, thanks
+        x = torch.cat((cls_tokens, x), dim=1)
+        x = x + self.pos_embed
+        x = self.pos_drop(x)
+        for blk in self.blocks:
+            x = blk(x, register_hook=register_hook)
+        x = self.norm(x)
+        x = x[:, 0]
+        x = self.head(x)
+        return x
+def _conv_filter(state_dict, patch_size=16):
+    """ convert patch embedding weight from manual patchify + linear proj to conv"""
+    out_dict = {}
+    for k, v in state_dict.items():
+        if 'patch_embed.proj.weight' in k:
+            v = v.reshape((v.shape[0], 3, patch_size, patch_size))
+        out_dict[k] = v
+    return out_dict
+def vit_base_patch16_224(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    model.default_cfg = default_cfgs['vit_base_patch16_224']
+    if pretrained:
+        load_pretrained(
+            model, num_classes=model.num_classes, in_chans=kwargs.get('in_chans', 3), filter_fn=_conv_filter)
+    return model
+def vit_large_patch16_224(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=1024, depth=24, num_heads=16, mlp_ratio=4, qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    model.default_cfg = default_cfgs['vit_large_patch16_224']
+    if pretrained:
+        load_pretrained(model, num_classes=model.num_classes, in_chans=kwargs.get('in_chans', 3))
+    return model

ViT_DeiT/baselines/ViT/ViT_orig_LRP.py ADDED Viewed

	@@ -0,0 +1,425 @@

+""" Vision Transformer (ViT) in PyTorch
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+import torch
+import torch.nn as nn
+from einops import rearrange
+from modules.layers_lrp import *
+from baselines.ViT.helpers import load_pretrained
+from baselines.ViT.weight_init import trunc_normal_
+from baselines.ViT.layer_helpers import to_2tuple
+def _cfg(url='', **kwargs):
+    return {
+        'url': url,
+        'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': None,
+        'crop_pct': .9, 'interpolation': 'bicubic',
+        'first_conv': 'patch_embed.proj', 'classifier': 'head',
+        **kwargs
+    }
+default_cfgs = {
+    # patch models
+    'vit_small_patch16_224': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/vit_small_p16_224-15ec54c9.pth',
+    ),
+    'vit_base_patch16_224': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth',
+        mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5),
+    ),
+    'vit_large_patch16_224': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_p16_224-4ee7a4dc.pth',
+        mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
+}
+def compute_rollout_attention(all_layer_matrices, start_layer=0):
+    # adding residual consideration
+    num_tokens = all_layer_matrices[0].shape[1]
+    batch_size = all_layer_matrices[0].shape[0]
+    eye = torch.eye(num_tokens).expand(batch_size, num_tokens, num_tokens).to(all_layer_matrices[0].device)
+    all_layer_matrices = [all_layer_matrices[i] + eye for i in range(len(all_layer_matrices))]
+    # all_layer_matrices = [all_layer_matrices[i] / all_layer_matrices[i].sum(dim=-1, keepdim=True)
+    #                       for i in range(len(all_layer_matrices))]
+    joint_attention = all_layer_matrices[start_layer]
+    for i in range(start_layer+1, len(all_layer_matrices)):
+        joint_attention = all_layer_matrices[i].bmm(joint_attention)
+    return joint_attention
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = Linear(in_features, hidden_features)
+        self.act = GELU()
+        self.fc2 = Linear(hidden_features, out_features)
+        self.drop = Dropout(drop)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+    def relprop(self, cam, **kwargs):
+        cam = self.drop.relprop(cam, **kwargs)
+        cam = self.fc2.relprop(cam, **kwargs)
+        cam = self.act.relprop(cam, **kwargs)
+        cam = self.fc1.relprop(cam, **kwargs)
+        return cam
+class Attention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False,attn_drop=0., proj_drop=0.):
+        super().__init__()
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
+        self.scale = head_dim ** -0.5
+        # A = Q*K^T
+        self.matmul1 = einsum('bhid,bhjd->bhij')
+        # attn = A*V
+        self.matmul2 = einsum('bhij,bhjd->bhid')
+        self.qkv = Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = Dropout(attn_drop)
+        self.proj = Linear(dim, dim)
+        self.proj_drop = Dropout(proj_drop)
+        self.softmax = Softmax(dim=-1)
+        self.attn_cam = None
+        self.attn = None
+        self.v = None
+        self.v_cam = None
+        self.attn_gradients = None
+    def get_attn(self):
+        return self.attn
+    def save_attn(self, attn):
+        self.attn = attn
+    def save_attn_cam(self, cam):
+        self.attn_cam = cam
+    def get_attn_cam(self):
+        return self.attn_cam
+    def get_v(self):
+        return self.v
+    def save_v(self, v):
+        self.v = v
+    def save_v_cam(self, cam):
+        self.v_cam = cam
+    def get_v_cam(self):
+        return self.v_cam
+    def save_attn_gradients(self, attn_gradients):
+        self.attn_gradients = attn_gradients
+    def get_attn_gradients(self):
+        return self.attn_gradients
+    def forward(self, x):
+        b, n, _, h = *x.shape, self.num_heads
+        qkv = self.qkv(x)
+        q, k, v = rearrange(qkv, 'b n (qkv h d) -> qkv b h n d', qkv=3, h=h)
+        self.save_v(v)
+        dots = self.matmul1([q, k]) * self.scale
+        attn = self.softmax(dots)
+        attn = self.attn_drop(attn)
+        self.save_attn(attn)
+        attn.register_hook(self.save_attn_gradients)
+        out = self.matmul2([attn, v])
+        out = rearrange(out, 'b h n d -> b n (h d)')
+        out = self.proj(out)
+        out = self.proj_drop(out)
+        return out
+    def relprop(self, cam, **kwargs):
+        cam = self.proj_drop.relprop(cam, **kwargs)
+        cam = self.proj.relprop(cam, **kwargs)
+        cam = rearrange(cam, 'b n (h d) -> b h n d', h=self.num_heads)
+        # attn = A*V
+        (cam1, cam_v)= self.matmul2.relprop(cam, **kwargs)
+        cam1 /= 2
+        cam_v /= 2
+        self.save_v_cam(cam_v)
+        self.save_attn_cam(cam1)
+        cam1 = self.attn_drop.relprop(cam1, **kwargs)
+        cam1 = self.softmax.relprop(cam1, **kwargs)
+        # A = Q*K^T
+        (cam_q, cam_k) = self.matmul1.relprop(cam1, **kwargs)
+        cam_q /= 2
+        cam_k /= 2
+        cam_qkv = rearrange([cam_q, cam_k, cam_v], 'qkv b h n d -> b n (qkv h d)', qkv=3, h=self.num_heads)
+        return self.qkv.relprop(cam_qkv, **kwargs)
+class Block(nn.Module):
+    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, drop=0., attn_drop=0.):
+        super().__init__()
+        self.norm1 = LayerNorm(dim, eps=1e-6)
+        self.attn = Attention(
+            dim, num_heads=num_heads, qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop)
+        self.norm2 = LayerNorm(dim, eps=1e-6)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, drop=drop)
+        self.add1 = Add()
+        self.add2 = Add()
+        self.clone1 = Clone()
+        self.clone2 = Clone()
+    def forward(self, x):
+        x1, x2 = self.clone1(x, 2)
+        x = self.add1([x1, self.attn(self.norm1(x2))])
+        x1, x2 = self.clone2(x, 2)
+        x = self.add2([x1, self.mlp(self.norm2(x2))])
+        return x
+    def relprop(self, cam, **kwargs):
+        (cam1, cam2) = self.add2.relprop(cam, **kwargs)
+        cam2 = self.mlp.relprop(cam2, **kwargs)
+        cam2 = self.norm2.relprop(cam2, **kwargs)
+        cam = self.clone2.relprop((cam1, cam2), **kwargs)
+        (cam1, cam2) = self.add1.relprop(cam, **kwargs)
+        cam2 = self.attn.relprop(cam2, **kwargs)
+        cam2 = self.norm1.relprop(cam2, **kwargs)
+        cam = self.clone1.relprop((cam1, cam2), **kwargs)
+        return cam
+class PatchEmbed(nn.Module):
+    """ Image to Patch Embedding
+    """
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.num_patches = num_patches
+        self.proj = Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
+    def forward(self, x):
+        B, C, H, W = x.shape
+        # FIXME look at relaxing size constraints
+        assert H == self.img_size[0] and W == self.img_size[1], \
+            f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
+        x = self.proj(x).flatten(2).transpose(1, 2)
+        return x
+    def relprop(self, cam, **kwargs):
+        cam = cam.transpose(1,2)
+        cam = cam.reshape(cam.shape[0], cam.shape[1],
+                     (self.img_size[0] // self.patch_size[0]), (self.img_size[1] // self.patch_size[1]))
+        return self.proj.relprop(cam, **kwargs)
+class VisionTransformer(nn.Module):
+    """ Vision Transformer with support for patch or hybrid CNN input stage
+    """
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dim=768, depth=12,
+                 num_heads=12, mlp_ratio=4., qkv_bias=False, mlp_head=False, drop_rate=0., attn_drop_rate=0.):
+        super().__init__()
+        self.num_classes = num_classes
+        self.num_features = self.embed_dim = embed_dim  # num_features for consistency with other models
+        self.patch_embed = PatchEmbed(
+                img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim)
+        num_patches = self.patch_embed.num_patches
+        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
+        self.blocks = nn.ModuleList([
+            Block(
+                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,
+                drop=drop_rate, attn_drop=attn_drop_rate)
+            for i in range(depth)])
+        self.norm = LayerNorm(embed_dim)
+        if mlp_head:
+            # paper diagram suggests 'MLP head', but results in 4M extra parameters vs paper
+            self.head = Mlp(embed_dim, int(embed_dim * mlp_ratio), num_classes)
+        else:
+            # with a single Linear layer as head, the param count within rounding of paper
+            self.head = Linear(embed_dim, num_classes)
+        # FIXME not quite sure what the proper weight init is supposed to be,
+        # normal / trunc normal w/ std == .02 similar to other Bert like transformers
+        trunc_normal_(self.pos_embed, std=.02)  # embeddings same as weights?
+        trunc_normal_(self.cls_token, std=.02)
+        self.apply(self._init_weights)
+        self.pool = IndexSelect()
+        self.add = Add()
+        self.inp_grad = None
+    def save_inp_grad(self,grad):
+        self.inp_grad = grad
+    def get_inp_grad(self):
+        return self.inp_grad
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+    @property
+    def no_weight_decay(self):
+        return {'pos_embed', 'cls_token'}
+    def forward(self, x):
+        B = x.shape[0]
+        x = self.patch_embed(x)
+        cls_tokens = self.cls_token.expand(B, -1, -1)  # stole cls_tokens impl from Phil Wang, thanks
+        x = torch.cat((cls_tokens, x), dim=1)
+        x = self.add([x, self.pos_embed])
+        x.register_hook(self.save_inp_grad)
+        for blk in self.blocks:
+            x = blk(x)
+        x = self.norm(x)
+        x = self.pool(x, dim=1, indices=torch.tensor(0, device=x.device))
+        x = x.squeeze(1)
+        x = self.head(x)
+        return x
+    def relprop(self, cam=None,method="grad", is_ablation=False, start_layer=0, **kwargs):
+        # print(kwargs)
+        # print("conservation 1", cam.sum())
+        cam = self.head.relprop(cam, **kwargs)
+        cam = cam.unsqueeze(1)
+        cam = self.pool.relprop(cam, **kwargs)
+        cam = self.norm.relprop(cam, **kwargs)
+        for blk in reversed(self.blocks):
+            cam = blk.relprop(cam, **kwargs)
+        # print("conservation 2", cam.sum())
+        # print("min", cam.min())
+        if method == "full":
+            (cam, _) = self.add.relprop(cam, **kwargs)
+            cam = cam[:, 1:]
+            cam = self.patch_embed.relprop(cam, **kwargs)
+            # sum on channels
+            cam = cam.sum(dim=1)
+            return cam
+        elif method == "rollout":
+            # cam rollout
+            attn_cams = []
+            for blk in self.blocks:
+                attn_heads = blk.attn.get_attn_cam().clamp(min=0)
+                avg_heads = (attn_heads.sum(dim=1) / attn_heads.shape[1]).detach()
+                attn_cams.append(avg_heads)
+            cam = compute_rollout_attention(attn_cams, start_layer=start_layer)
+            cam = cam[:, 0, 1:]
+            return cam
+        elif method == "grad":
+            cams = []
+            for blk in self.blocks:
+                grad = blk.attn.get_attn_gradients()
+                cam = blk.attn.get_attn_cam()
+                cam = cam[0].reshape(-1, cam.shape[-1], cam.shape[-1])
+                grad = grad[0].reshape(-1, grad.shape[-1], grad.shape[-1])
+                cam = grad * cam
+                cam = cam.clamp(min=0).mean(dim=0)
+                cams.append(cam.unsqueeze(0))
+            rollout = compute_rollout_attention(cams, start_layer=start_layer)
+            cam = rollout[:, 0, 1:]
+            return cam
+        elif method == "last_layer":
+            cam = self.blocks[-1].attn.get_attn_cam()
+            cam = cam[0].reshape(-1, cam.shape[-1], cam.shape[-1])
+            if is_ablation:
+                grad = self.blocks[-1].attn.get_attn_gradients()
+                grad = grad[0].reshape(-1, grad.shape[-1], grad.shape[-1])
+                cam = grad * cam
+            cam = cam.clamp(min=0).mean(dim=0)
+            cam = cam[0, 1:]
+            return cam
+        elif method == "last_layer_attn":
+            cam = self.blocks[-1].attn.get_attn()
+            cam = cam[0].reshape(-1, cam.shape[-1], cam.shape[-1])
+            cam = cam.clamp(min=0).mean(dim=0)
+            cam = cam[0, 1:]
+            return cam
+        elif method == "second_layer":
+            cam = self.blocks[1].attn.get_attn_cam()
+            cam = cam[0].reshape(-1, cam.shape[-1], cam.shape[-1])
+            if is_ablation:
+                grad = self.blocks[1].attn.get_attn_gradients()
+                grad = grad[0].reshape(-1, grad.shape[-1], grad.shape[-1])
+                cam = grad * cam
+            cam = cam.clamp(min=0).mean(dim=0)
+            cam = cam[0, 1:]
+            return cam
+def _conv_filter(state_dict, patch_size=16):
+    """ convert patch embedding weight from manual patchify + linear proj to conv"""
+    out_dict = {}
+    for k, v in state_dict.items():
+        if 'patch_embed.proj.weight' in k:
+            v = v.reshape((v.shape[0], 3, patch_size, patch_size))
+        out_dict[k] = v
+    return out_dict
+def vit_base_patch16_224(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=True, **kwargs)
+    model.default_cfg = default_cfgs['vit_base_patch16_224']
+    if pretrained:
+        load_pretrained(
+            model, num_classes=model.num_classes, in_chans=kwargs.get('in_chans', 3), filter_fn=_conv_filter)
+    return model
+def vit_large_patch16_224(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=1024, depth=24, num_heads=16, mlp_ratio=4, qkv_bias=True, **kwargs)
+    model.default_cfg = default_cfgs['vit_large_patch16_224']
+    if pretrained:
+        load_pretrained(model, num_classes=model.num_classes, in_chans=kwargs.get('in_chans', 3))
+    return model

ViT_DeiT/baselines/ViT/__pycache__/ViT_LRP.cpython-38.pyc ADDED Viewed

Binary file (14 kB). View file

ViT_DeiT/baselines/ViT/__pycache__/ViT_explanation_generator.cpython-38.pyc ADDED Viewed

Binary file (3.44 kB). View file

ViT_DeiT/baselines/ViT/__pycache__/helpers.cpython-38.pyc ADDED Viewed

Binary file (7.25 kB). View file

ViT_DeiT/baselines/ViT/__pycache__/layer_helpers.cpython-38.pyc ADDED Viewed

Binary file (766 Bytes). View file

ViT_DeiT/baselines/ViT/__pycache__/weight_init.cpython-38.pyc ADDED Viewed

Binary file (1.92 kB). View file

ViT_DeiT/baselines/ViT/generate_visualizations.py ADDED Viewed

	@@ -0,0 +1,208 @@

+import os
+from tqdm import tqdm
+import h5py
+import argparse
+# Import saliency methods and models
+from misc_functions import *
+from ViT_explanation_generator import Baselines, LRP
+from ViT_new import vit_base_patch16_224
+from ViT_LRP import vit_base_patch16_224 as vit_LRP
+from ViT_orig_LRP import vit_base_patch16_224 as vit_orig_LRP
+from torchvision.datasets import ImageNet
+def normalize(tensor,
+              mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]):
+    dtype = tensor.dtype
+    mean = torch.as_tensor(mean, dtype=dtype, device=tensor.device)
+    std = torch.as_tensor(std, dtype=dtype, device=tensor.device)
+    tensor.sub_(mean[None, :, None, None]).div_(std[None, :, None, None])
+    return tensor
+def compute_saliency_and_save(args):
+    first = True
+    with h5py.File(os.path.join(args.method_dir, 'results.hdf5'), 'a') as f:
+        data_cam = f.create_dataset('vis',
+                                    (1, 1, 224, 224),
+                                    maxshape=(None, 1, 224, 224),
+                                    dtype=np.float32,
+                                    compression="gzip")
+        data_image = f.create_dataset('image',
+                                      (1, 3, 224, 224),
+                                      maxshape=(None, 3, 224, 224),
+                                      dtype=np.float32,
+                                      compression="gzip")
+        data_target = f.create_dataset('target',
+                                       (1,),
+                                       maxshape=(None,),
+                                       dtype=np.int32,
+                                       compression="gzip")
+        for batch_idx, (data, target) in enumerate(tqdm(sample_loader)):
+            if first:
+                first = False
+                data_cam.resize(data_cam.shape[0] + data.shape[0] - 1, axis=0)
+                data_image.resize(data_image.shape[0] + data.shape[0] - 1, axis=0)
+                data_target.resize(data_target.shape[0] + data.shape[0] - 1, axis=0)
+            else:
+                data_cam.resize(data_cam.shape[0] + data.shape[0], axis=0)
+                data_image.resize(data_image.shape[0] + data.shape[0], axis=0)
+                data_target.resize(data_target.shape[0] + data.shape[0], axis=0)
+            # Add data
+            data_image[-data.shape[0]:] = data.data.cpu().numpy()
+            data_target[-data.shape[0]:] = target.data.cpu().numpy()
+            target = target.to(device)
+            data = normalize(data)
+            data = data.to(device)
+            data.requires_grad_()
+            index = None
+            if args.vis_class == 'target':
+                index = target
+            if args.method == 'rollout':
+                Res = baselines.generate_rollout(data, start_layer=1).reshape(data.shape[0], 1, 14, 14)
+                # Res = Res - Res.mean()
+            elif args.method == 'lrp':
+                Res = lrp.generate_LRP(data, start_layer=1, index=index).reshape(data.shape[0], 1, 14, 14)
+                # Res = Res - Res.mean()
+            elif args.method == 'transformer_attribution':
+                Res = lrp.generate_LRP(data, start_layer=1, method="grad", index=index).reshape(data.shape[0], 1, 14, 14)
+                # Res = Res - Res.mean()
+            elif args.method == 'full_lrp':
+                Res = orig_lrp.generate_LRP(data, method="full", index=index).reshape(data.shape[0], 1, 224, 224)
+                # Res = Res - Res.mean()
+            elif args.method == 'lrp_last_layer':
+                Res = orig_lrp.generate_LRP(data, method="last_layer", is_ablation=args.is_ablation, index=index) \
+                    .reshape(data.shape[0], 1, 14, 14)
+                # Res = Res - Res.mean()
+            elif args.method == 'attn_last_layer':
+                Res = lrp.generate_LRP(data, method="last_layer_attn", is_ablation=args.is_ablation) \
+                    .reshape(data.shape[0], 1, 14, 14)
+            elif args.method == 'attn_gradcam':
+                Res = baselines.generate_cam_attn(data, index=index).reshape(data.shape[0], 1, 14, 14)
+            if args.method != 'full_lrp' and args.method != 'input_grads':
+                Res = torch.nn.functional.interpolate(Res, scale_factor=16, mode='bilinear').cuda()
+            Res = (Res - Res.min()) / (Res.max() - Res.min())
+            data_cam[-data.shape[0]:] = Res.data.cpu().numpy()
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Train a segmentation')
+    parser.add_argument('--batch-size', type=int,
+                        default=1,
+                        help='')
+    parser.add_argument('--method', type=str,
+                        default='grad_rollout',
+                        choices=['rollout', 'lrp', 'transformer_attribution', 'full_lrp', 'lrp_last_layer',
+                                 'attn_last_layer', 'attn_gradcam'],
+                        help='')
+    parser.add_argument('--lmd', type=float,
+                        default=10,
+                        help='')
+    parser.add_argument('--vis-class', type=str,
+                        default='top',
+                        choices=['top', 'target', 'index'],
+                        help='')
+    parser.add_argument('--class-id', type=int,
+                        default=0,
+                        help='')
+    parser.add_argument('--cls-agn', action='store_true',
+                        default=False,
+                        help='')
+    parser.add_argument('--no-ia', action='store_true',
+                        default=False,
+                        help='')
+    parser.add_argument('--no-fx', action='store_true',
+                        default=False,
+                        help='')
+    parser.add_argument('--no-fgx', action='store_true',
+                        default=False,
+                        help='')
+    parser.add_argument('--no-m', action='store_true',
+                        default=False,
+                        help='')
+    parser.add_argument('--no-reg', action='store_true',
+                        default=False,
+                        help='')
+    parser.add_argument('--is-ablation', type=bool,
+                        default=False,
+                        help='')
+    parser.add_argument('--imagenet-validation-path', type=str,
+                        required=True,
+                        help='')
+    args = parser.parse_args()
+    # PATH variables
+    PATH = os.path.dirname(os.path.abspath(__file__)) + '/'
+    os.makedirs(os.path.join(PATH, 'visualizations'), exist_ok=True)
+    try:
+        os.remove(os.path.join(PATH, 'visualizations/{}/{}/results.hdf5'.format(args.method,
+                                                                                args.vis_class)))
+    except OSError:
+        pass
+    os.makedirs(os.path.join(PATH, 'visualizations/{}'.format(args.method)), exist_ok=True)
+    if args.vis_class == 'index':
+        os.makedirs(os.path.join(PATH, 'visualizations/{}/{}_{}'.format(args.method,
+                                                                        args.vis_class,
+                                                                        args.class_id)), exist_ok=True)
+        args.method_dir = os.path.join(PATH, 'visualizations/{}/{}_{}'.format(args.method,
+                                                                              args.vis_class,
+                                                                              args.class_id))
+    else:
+        ablation_fold = 'ablation' if args.is_ablation else 'not_ablation'
+        os.makedirs(os.path.join(PATH, 'visualizations/{}/{}/{}'.format(args.method,
+                                                                     args.vis_class, ablation_fold)), exist_ok=True)
+        args.method_dir = os.path.join(PATH, 'visualizations/{}/{}/{}'.format(args.method,
+                                                                           args.vis_class, ablation_fold))
+    cuda = torch.cuda.is_available()
+    device = torch.device("cuda" if cuda else "cpu")
+    # Model
+    model = vit_base_patch16_224(pretrained=True).cuda()
+    baselines = Baselines(model)
+    # LRP
+    model_LRP = vit_LRP(pretrained=True).cuda()
+    model_LRP.eval()
+    lrp = LRP(model_LRP)
+    # orig LRP
+    model_orig_LRP = vit_orig_LRP(pretrained=True).cuda()
+    model_orig_LRP.eval()
+    orig_lrp = LRP(model_orig_LRP)
+    # Dataset loader for sample images
+    transform = transforms.Compose([
+        transforms.Resize((224, 224)),
+        transforms.ToTensor(),
+    ])
+    imagenet_ds = ImageNet(args.imagenet_validation_path, split='val', download=False, transform=transform)
+    sample_loader = torch.utils.data.DataLoader(
+        imagenet_ds,
+        batch_size=args.batch_size,
+        shuffle=False,
+        num_workers=4
+    )
+    compute_saliency_and_save(args)

ViT_DeiT/baselines/ViT/helpers.py ADDED Viewed

	@@ -0,0 +1,295 @@

+""" Model creation / weight loading / state_dict helpers
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+import logging
+import os
+import math
+from collections import OrderedDict
+from copy import deepcopy
+from typing import Callable
+import torch
+import torch.nn as nn
+import torch.utils.model_zoo as model_zoo
+_logger = logging.getLogger(__name__)
+def load_state_dict(checkpoint_path, use_ema=False):
+    if checkpoint_path and os.path.isfile(checkpoint_path):
+        checkpoint = torch.load(checkpoint_path, map_location='cpu')
+        state_dict_key = 'state_dict'
+        if isinstance(checkpoint, dict):
+            if use_ema and 'state_dict_ema' in checkpoint:
+                state_dict_key = 'state_dict_ema'
+        if state_dict_key and state_dict_key in checkpoint:
+            new_state_dict = OrderedDict()
+            for k, v in checkpoint[state_dict_key].items():
+                # strip `module.` prefix
+                name = k[7:] if k.startswith('module') else k
+                new_state_dict[name] = v
+            state_dict = new_state_dict
+        else:
+            state_dict = checkpoint
+        _logger.info("Loaded {} from checkpoint '{}'".format(state_dict_key, checkpoint_path))
+        return state_dict
+    else:
+        _logger.error("No checkpoint found at '{}'".format(checkpoint_path))
+        raise FileNotFoundError()
+def load_checkpoint(model, checkpoint_path, use_ema=False, strict=True):
+    state_dict = load_state_dict(checkpoint_path, use_ema)
+    model.load_state_dict(state_dict, strict=strict)
+def resume_checkpoint(model, checkpoint_path, optimizer=None, loss_scaler=None, log_info=True):
+    resume_epoch = None
+    if os.path.isfile(checkpoint_path):
+        checkpoint = torch.load(checkpoint_path, map_location='cpu')
+        if isinstance(checkpoint, dict) and 'state_dict' in checkpoint:
+            if log_info:
+                _logger.info('Restoring model state from checkpoint...')
+            new_state_dict = OrderedDict()
+            for k, v in checkpoint['state_dict'].items():
+                name = k[7:] if k.startswith('module') else k
+                new_state_dict[name] = v
+            model.load_state_dict(new_state_dict)
+            if optimizer is not None and 'optimizer' in checkpoint:
+                if log_info:
+                    _logger.info('Restoring optimizer state from checkpoint...')
+                optimizer.load_state_dict(checkpoint['optimizer'])
+            if loss_scaler is not None and loss_scaler.state_dict_key in checkpoint:
+                if log_info:
+                    _logger.info('Restoring AMP loss scaler state from checkpoint...')
+                loss_scaler.load_state_dict(checkpoint[loss_scaler.state_dict_key])
+            if 'epoch' in checkpoint:
+                resume_epoch = checkpoint['epoch']
+                if 'version' in checkpoint and checkpoint['version'] > 1:
+                    resume_epoch += 1  # start at the next epoch, old checkpoints incremented before save
+            if log_info:
+                _logger.info("Loaded checkpoint '{}' (epoch {})".format(checkpoint_path, checkpoint['epoch']))
+        else:
+            model.load_state_dict(checkpoint)
+            if log_info:
+                _logger.info("Loaded checkpoint '{}'".format(checkpoint_path))
+        return resume_epoch
+    else:
+        _logger.error("No checkpoint found at '{}'".format(checkpoint_path))
+        raise FileNotFoundError()
+def load_pretrained(model, cfg=None, num_classes=1000, in_chans=3, filter_fn=None, strict=True):
+    if cfg is None:
+        cfg = getattr(model, 'default_cfg')
+    if cfg is None or 'url' not in cfg or not cfg['url']:
+        _logger.warning("Pretrained model URL is invalid, using random initialization.")
+        return
+    state_dict = model_zoo.load_url(cfg['url'], progress=False, map_location='cpu')
+    if filter_fn is not None:
+        state_dict = filter_fn(state_dict)
+    if in_chans == 1:
+        conv1_name = cfg['first_conv']
+        _logger.info('Converting first conv (%s) pretrained weights from 3 to 1 channel' % conv1_name)
+        conv1_weight = state_dict[conv1_name + '.weight']
+        # Some weights are in torch.half, ensure it's float for sum on CPU
+        conv1_type = conv1_weight.dtype
+        conv1_weight = conv1_weight.float()
+        O, I, J, K = conv1_weight.shape
+        if I > 3:
+            assert conv1_weight.shape[1] % 3 == 0
+            # For models with space2depth stems
+            conv1_weight = conv1_weight.reshape(O, I // 3, 3, J, K)
+            conv1_weight = conv1_weight.sum(dim=2, keepdim=False)
+        else:
+            conv1_weight = conv1_weight.sum(dim=1, keepdim=True)
+        conv1_weight = conv1_weight.to(conv1_type)
+        state_dict[conv1_name + '.weight'] = conv1_weight
+    elif in_chans != 3:
+        conv1_name = cfg['first_conv']
+        conv1_weight = state_dict[conv1_name + '.weight']
+        conv1_type = conv1_weight.dtype
+        conv1_weight = conv1_weight.float()
+        O, I, J, K = conv1_weight.shape
+        if I != 3:
+            _logger.warning('Deleting first conv (%s) from pretrained weights.' % conv1_name)
+            del state_dict[conv1_name + '.weight']
+            strict = False
+        else:
+            # NOTE this strategy should be better than random init, but there could be other combinations of
+            # the original RGB input layer weights that'd work better for specific cases.
+            _logger.info('Repeating first conv (%s) weights in channel dim.' % conv1_name)
+            repeat = int(math.ceil(in_chans / 3))
+            conv1_weight = conv1_weight.repeat(1, repeat, 1, 1)[:, :in_chans, :, :]
+            conv1_weight *= (3 / float(in_chans))
+            conv1_weight = conv1_weight.to(conv1_type)
+            state_dict[conv1_name + '.weight'] = conv1_weight
+    classifier_name = cfg['classifier']
+    if num_classes == 1000 and cfg['num_classes'] == 1001:
+        # special case for imagenet trained models with extra background class in pretrained weights
+        classifier_weight = state_dict[classifier_name + '.weight']
+        state_dict[classifier_name + '.weight'] = classifier_weight[1:]
+        classifier_bias = state_dict[classifier_name + '.bias']
+        state_dict[classifier_name + '.bias'] = classifier_bias[1:]
+    elif num_classes != cfg['num_classes']:
+        # completely discard fully connected for all other differences between pretrained and created model
+        del state_dict[classifier_name + '.weight']
+        del state_dict[classifier_name + '.bias']
+        strict = False
+    model.load_state_dict(state_dict, strict=strict)
+def extract_layer(model, layer):
+    layer = layer.split('.')
+    module = model
+    if hasattr(model, 'module') and layer[0] != 'module':
+        module = model.module
+    if not hasattr(model, 'module') and layer[0] == 'module':
+        layer = layer[1:]
+    for l in layer:
+        if hasattr(module, l):
+            if not l.isdigit():
+                module = getattr(module, l)
+            else:
+                module = module[int(l)]
+        else:
+            return module
+    return module
+def set_layer(model, layer, val):
+    layer = layer.split('.')
+    module = model
+    if hasattr(model, 'module') and layer[0] != 'module':
+        module = model.module
+    lst_index = 0
+    module2 = module
+    for l in layer:
+        if hasattr(module2, l):
+            if not l.isdigit():
+                module2 = getattr(module2, l)
+            else:
+                module2 = module2[int(l)]
+            lst_index += 1
+    lst_index -= 1
+    for l in layer[:lst_index]:
+        if not l.isdigit():
+            module = getattr(module, l)
+        else:
+            module = module[int(l)]
+    l = layer[lst_index]
+    setattr(module, l, val)
+def adapt_model_from_string(parent_module, model_string):
+    separator = '***'
+    state_dict = {}
+    lst_shape = model_string.split(separator)
+    for k in lst_shape:
+        k = k.split(':')
+        key = k[0]
+        shape = k[1][1:-1].split(',')
+        if shape[0] != '':
+            state_dict[key] = [int(i) for i in shape]
+    new_module = deepcopy(parent_module)
+    for n, m in parent_module.named_modules():
+        old_module = extract_layer(parent_module, n)
+        if isinstance(old_module, nn.Conv2d) or isinstance(old_module, Conv2dSame):
+            if isinstance(old_module, Conv2dSame):
+                conv = Conv2dSame
+            else:
+                conv = nn.Conv2d
+            s = state_dict[n + '.weight']
+            in_channels = s[1]
+            out_channels = s[0]
+            g = 1
+            if old_module.groups > 1:
+                in_channels = out_channels
+                g = in_channels
+            new_conv = conv(
+                in_channels=in_channels, out_channels=out_channels, kernel_size=old_module.kernel_size,
+                bias=old_module.bias is not None, padding=old_module.padding, dilation=old_module.dilation,
+                groups=g, stride=old_module.stride)
+            set_layer(new_module, n, new_conv)
+        if isinstance(old_module, nn.BatchNorm2d):
+            new_bn = nn.BatchNorm2d(
+                num_features=state_dict[n + '.weight'][0], eps=old_module.eps, momentum=old_module.momentum,
+                affine=old_module.affine, track_running_stats=True)
+            set_layer(new_module, n, new_bn)
+        if isinstance(old_module, nn.Linear):
+            # FIXME extra checks to ensure this is actually the FC classifier layer and not a diff Linear layer?
+            num_features = state_dict[n + '.weight'][1]
+            new_fc = nn.Linear(
+                in_features=num_features, out_features=old_module.out_features, bias=old_module.bias is not None)
+            set_layer(new_module, n, new_fc)
+            if hasattr(new_module, 'num_features'):
+                new_module.num_features = num_features
+    new_module.eval()
+    parent_module.eval()
+    return new_module
+def adapt_model_from_file(parent_module, model_variant):
+    adapt_file = os.path.join(os.path.dirname(__file__), 'pruned', model_variant + '.txt')
+    with open(adapt_file, 'r') as f:
+        return adapt_model_from_string(parent_module, f.read().strip())
+def build_model_with_cfg(
+        model_cls: Callable,
+        variant: str,
+        pretrained: bool,
+        default_cfg: dict,
+        model_cfg: dict = None,
+        feature_cfg: dict = None,
+        pretrained_strict: bool = True,
+        pretrained_filter_fn: Callable = None,
+        **kwargs):
+    pruned = kwargs.pop('pruned', False)
+    features = False
+    feature_cfg = feature_cfg or {}
+    if kwargs.pop('features_only', False):
+        features = True
+        feature_cfg.setdefault('out_indices', (0, 1, 2, 3, 4))
+        if 'out_indices' in kwargs:
+            feature_cfg['out_indices'] = kwargs.pop('out_indices')
+    model = model_cls(**kwargs) if model_cfg is None else model_cls(cfg=model_cfg, **kwargs)
+    model.default_cfg = deepcopy(default_cfg)
+    if pruned:
+        model = adapt_model_from_file(model, variant)
+    if pretrained:
+        load_pretrained(
+            model,
+            num_classes=kwargs.get('num_classes', 0),
+            in_chans=kwargs.get('in_chans', 3),
+            filter_fn=pretrained_filter_fn, strict=pretrained_strict)
+    if features:
+        feature_cls = FeatureListNet
+        if 'feature_cls' in feature_cfg:
+            feature_cls = feature_cfg.pop('feature_cls')
+            if isinstance(feature_cls, str):
+                feature_cls = feature_cls.lower()
+                if 'hook' in feature_cls:
+                    feature_cls = FeatureHookNet
+                else:
+                    assert False, f'Unknown feature class {feature_cls}'
+        model = feature_cls(model, **feature_cfg)
+    return model

ViT_DeiT/baselines/ViT/imagenet_seg_eval.py ADDED Viewed

	@@ -0,0 +1,334 @@

+import numpy as np
+import torch
+import torchvision.transforms as transforms
+from torch.utils.data import DataLoader
+from numpy import *
+import argparse
+from PIL import Image
+import imageio
+import os
+from tqdm import tqdm
+from utils.metrices import *
+from utils import render
+from utils.saver import Saver
+from utils.iou import IoU
+from data.Imagenet import Imagenet_Segmentation
+from ViT_explanation_generator import Baselines, LRP
+from ViT_new import vit_base_patch16_224
+from ViT_LRP import vit_base_patch16_224 as vit_LRP
+from ViT_orig_LRP import vit_base_patch16_224 as vit_orig_LRP
+from sklearn.metrics import precision_recall_curve
+import matplotlib.pyplot as plt
+import torch.nn.functional as F
+plt.switch_backend('agg')
+# hyperparameters
+num_workers = 0
+batch_size = 1
+cls = ['airplane',
+       'bicycle',
+       'bird',
+       'boat',
+       'bottle',
+       'bus',
+       'car',
+       'cat',
+       'chair',
+       'cow',
+       'dining table',
+       'dog',
+       'horse',
+       'motobike',
+       'person',
+       'potted plant',
+       'sheep',
+       'sofa',
+       'train',
+       'tv'
+       ]
+# Args
+parser = argparse.ArgumentParser(description='Training multi-class classifier')
+parser.add_argument('--arc', type=str, default='vgg', metavar='N',
+                    help='Model architecture')
+parser.add_argument('--train_dataset', type=str, default='imagenet', metavar='N',
+                    help='Testing Dataset')
+parser.add_argument('--method', type=str,
+                    default='grad_rollout',
+                    choices=[ 'rollout', 'lrp','transformer_attribution', 'full_lrp', 'lrp_last_layer',
+                              'attn_last_layer', 'attn_gradcam'],
+                    help='')
+parser.add_argument('--thr', type=float, default=0.,
+                    help='threshold')
+parser.add_argument('--K', type=int, default=1,
+                    help='new - top K results')
+parser.add_argument('--save-img', action='store_true',
+                    default=False,
+                    help='')
+parser.add_argument('--no-ia', action='store_true',
+                    default=False,
+                    help='')
+parser.add_argument('--no-fx', action='store_true',
+                    default=False,
+                    help='')
+parser.add_argument('--no-fgx', action='store_true',
+                    default=False,
+                    help='')
+parser.add_argument('--no-m', action='store_true',
+                    default=False,
+                    help='')
+parser.add_argument('--no-reg', action='store_true',
+                    default=False,
+                    help='')
+parser.add_argument('--is-ablation', type=bool,
+                    default=False,
+                    help='')
+parser.add_argument('--imagenet-seg-path', type=str, required=True)
+args = parser.parse_args()
+args.checkname = args.method + '_' + args.arc
+alpha = 2
+cuda = torch.cuda.is_available()
+device = torch.device("cuda" if cuda else "cpu")
+# Define Saver
+saver = Saver(args)
+saver.results_dir = os.path.join(saver.experiment_dir, 'results')
+if not os.path.exists(saver.results_dir):
+    os.makedirs(saver.results_dir)
+if not os.path.exists(os.path.join(saver.results_dir, 'input')):
+    os.makedirs(os.path.join(saver.results_dir, 'input'))
+if not os.path.exists(os.path.join(saver.results_dir, 'explain')):
+    os.makedirs(os.path.join(saver.results_dir, 'explain'))
+args.exp_img_path = os.path.join(saver.results_dir, 'explain/img')
+if not os.path.exists(args.exp_img_path):
+    os.makedirs(args.exp_img_path)
+args.exp_np_path = os.path.join(saver.results_dir, 'explain/np')
+if not os.path.exists(args.exp_np_path):
+    os.makedirs(args.exp_np_path)
+# Data
+normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
+test_img_trans = transforms.Compose([
+    transforms.Resize((224, 224)),
+    transforms.ToTensor(),
+    normalize,
+])
+test_lbl_trans = transforms.Compose([
+    transforms.Resize((224, 224), Image.NEAREST),
+])
+ds = Imagenet_Segmentation(args.imagenet_seg_path,
+                           transform=test_img_trans, target_transform=test_lbl_trans)
+dl = DataLoader(ds, batch_size=batch_size, shuffle=False, num_workers=1, drop_last=False)
+# Model
+model = vit_base_patch16_224(pretrained=True).cuda()
+baselines = Baselines(model)
+# LRP
+model_LRP = vit_LRP(pretrained=True).cuda()
+model_LRP.eval()
+lrp = LRP(model_LRP)
+# orig LRP
+model_orig_LRP = vit_orig_LRP(pretrained=True).cuda()
+model_orig_LRP.eval()
+orig_lrp = LRP(model_orig_LRP)
+metric = IoU(2, ignore_index=-1)
+iterator = tqdm(dl)
+model.eval()
+def compute_pred(output):
+    pred = output.data.max(1, keepdim=True)[1]  # get the index of the max log-probability
+    # pred[0, 0] = 282
+    # print('Pred cls : ' + str(pred))
+    T = pred.squeeze().cpu().numpy()
+    T = np.expand_dims(T, 0)
+    T = (T[:, np.newaxis] == np.arange(1000)) * 1.0
+    T = torch.from_numpy(T).type(torch.FloatTensor)
+    Tt = T.cuda()
+    return Tt
+def eval_batch(image, labels, evaluator, index):
+    evaluator.zero_grad()
+    # Save input image
+    if args.save_img:
+        img = image[0].permute(1, 2, 0).data.cpu().numpy()
+        img = 255 * (img - img.min()) / (img.max() - img.min())
+        img = img.astype('uint8')
+        Image.fromarray(img, 'RGB').save(os.path.join(saver.results_dir, 'input/{}_input.png'.format(index)))
+        Image.fromarray((labels.repeat(3, 1, 1).permute(1, 2, 0).data.cpu().numpy() * 255).astype('uint8'), 'RGB').save(
+            os.path.join(saver.results_dir, 'input/{}_mask.png'.format(index)))
+    image.requires_grad = True
+    image = image.requires_grad_()
+    predictions = evaluator(image)
+    # segmentation test for the rollout baseline
+    if args.method == 'rollout':
+        Res = baselines.generate_rollout(image.cuda(), start_layer=1).reshape(batch_size, 1, 14, 14)
+    # segmentation test for the LRP baseline (this is full LRP, not partial)
+    elif args.method == 'full_lrp':
+        Res = orig_lrp.generate_LRP(image.cuda(), method="full").reshape(batch_size, 1, 224, 224)
+    # segmentation test for our method
+    elif args.method == 'transformer_attribution':
+        Res = lrp.generate_LRP(image.cuda(), start_layer=1, method="transformer_attribution").reshape(batch_size, 1, 14, 14)
+    # segmentation test for the partial LRP baseline (last attn layer)
+    elif args.method == 'lrp_last_layer':
+        Res = orig_lrp.generate_LRP(image.cuda(), method="last_layer", is_ablation=args.is_ablation)\
+            .reshape(batch_size, 1, 14, 14)
+    # segmentation test for the raw attention baseline (last attn layer)
+    elif args.method == 'attn_last_layer':
+        Res = orig_lrp.generate_LRP(image.cuda(), method="last_layer_attn", is_ablation=args.is_ablation)\
+            .reshape(batch_size, 1, 14, 14)
+    # segmentation test for the GradCam baseline (last attn layer)
+    elif args.method == 'attn_gradcam':
+        Res = baselines.generate_cam_attn(image.cuda()).reshape(batch_size, 1, 14, 14)
+    if args.method != 'full_lrp':
+        # interpolate to full image size (224,224)
+        Res = torch.nn.functional.interpolate(Res, scale_factor=16, mode='bilinear').cuda()
+    # threshold between FG and BG is the mean
+    Res = (Res - Res.min()) / (Res.max() - Res.min())
+    ret = Res.mean()
+    Res_1 = Res.gt(ret).type(Res.type())
+    Res_0 = Res.le(ret).type(Res.type())
+    Res_1_AP = Res
+    Res_0_AP = 1-Res
+    Res_1[Res_1 != Res_1] = 0
+    Res_0[Res_0 != Res_0] = 0
+    Res_1_AP[Res_1_AP != Res_1_AP] = 0
+    Res_0_AP[Res_0_AP != Res_0_AP] = 0
+    # TEST
+    pred = Res.clamp(min=args.thr) / Res.max()
+    pred = pred.view(-1).data.cpu().numpy()
+    target = labels.view(-1).data.cpu().numpy()
+    # print("target", target.shape)
+    output = torch.cat((Res_0, Res_1), 1)
+    output_AP = torch.cat((Res_0_AP, Res_1_AP), 1)
+    if args.save_img:
+        # Save predicted mask
+        mask = F.interpolate(Res_1, [64, 64], mode='bilinear')
+        mask = mask[0].squeeze().data.cpu().numpy()
+        # mask = Res_1[0].squeeze().data.cpu().numpy()
+        mask = 255 * mask
+        mask = mask.astype('uint8')
+        imageio.imsave(os.path.join(args.exp_img_path, 'mask_' + str(index) + '.jpg'), mask)
+        relevance = F.interpolate(Res, [64, 64], mode='bilinear')
+        relevance = relevance[0].permute(1, 2, 0).data.cpu().numpy()
+        # relevance = Res[0].permute(1, 2, 0).data.cpu().numpy()
+        hm = np.sum(relevance, axis=-1)
+        maps = (render.hm_to_rgb(hm, scaling=3, sigma=1, cmap='seismic') * 255).astype(np.uint8)
+        imageio.imsave(os.path.join(args.exp_img_path, 'heatmap_' + str(index) + '.jpg'), maps)
+    # Evaluate Segmentation
+    batch_inter, batch_union, batch_correct, batch_label = 0, 0, 0, 0
+    batch_ap, batch_f1 = 0, 0
+    # Segmentation resutls
+    correct, labeled = batch_pix_accuracy(output[0].data.cpu(), labels[0])
+    inter, union = batch_intersection_union(output[0].data.cpu(), labels[0], 2)
+    batch_correct += correct
+    batch_label += labeled
+    batch_inter += inter
+    batch_union += union
+    # print("output", output.shape)
+    # print("ap labels", labels.shape)
+    # ap = np.nan_to_num(get_ap_scores(output, labels))
+    ap = np.nan_to_num(get_ap_scores(output_AP, labels))
+    f1 = np.nan_to_num(get_f1_scores(output[0, 1].data.cpu(), labels[0]))
+    batch_ap += ap
+    batch_f1 += f1
+    return batch_correct, batch_label, batch_inter, batch_union, batch_ap, batch_f1, pred, target
+total_inter, total_union, total_correct, total_label = np.int64(0), np.int64(0), np.int64(0), np.int64(0)
+total_ap, total_f1 = [], []
+predictions, targets = [], []
+for batch_idx, (image, labels) in enumerate(iterator):
+    if args.method == "blur":
+        images = (image[0].cuda(), image[1].cuda())
+    else:
+        images = image.cuda()
+    labels = labels.cuda()
+    # print("image", image.shape)
+    # print("lables", labels.shape)
+    correct, labeled, inter, union, ap, f1, pred, target = eval_batch(images, labels, model, batch_idx)
+    predictions.append(pred)
+    targets.append(target)
+    total_correct += correct.astype('int64')
+    total_label += labeled.astype('int64')
+    total_inter += inter.astype('int64')
+    total_union += union.astype('int64')
+    total_ap += [ap]
+    total_f1 += [f1]
+    pixAcc = np.float64(1.0) * total_correct / (np.spacing(1, dtype=np.float64) + total_label)
+    IoU = np.float64(1.0) * total_inter / (np.spacing(1, dtype=np.float64) + total_union)
+    mIoU = IoU.mean()
+    mAp = np.mean(total_ap)
+    mF1 = np.mean(total_f1)
+    iterator.set_description('pixAcc: %.4f, mIoU: %.4f, mAP: %.4f, mF1: %.4f' % (pixAcc, mIoU, mAp, mF1))
+predictions = np.concatenate(predictions)
+targets = np.concatenate(targets)
+pr, rc, thr = precision_recall_curve(targets, predictions)
+np.save(os.path.join(saver.experiment_dir, 'precision.npy'), pr)
+np.save(os.path.join(saver.experiment_dir, 'recall.npy'), rc)
+plt.figure()
+plt.plot(rc, pr)
+plt.savefig(os.path.join(saver.experiment_dir, 'PR_curve_{}.png'.format(args.method)))
+txtfile = os.path.join(saver.experiment_dir, 'result_mIoU_%.4f.txt' % mIoU)
+# txtfile = 'result_mIoU_%.4f.txt' % mIoU
+fh = open(txtfile, 'w')
+print("Mean IoU over %d classes: %.4f\n" % (2, mIoU))
+print("Pixel-wise Accuracy: %2.2f%%\n" % (pixAcc * 100))
+print("Mean AP over %d classes: %.4f\n" % (2, mAp))
+print("Mean F1 over %d classes: %.4f\n" % (2, mF1))
+fh.write("Mean IoU over %d classes: %.4f\n" % (2, mIoU))
+fh.write("Pixel-wise Accuracy: %2.2f%%\n" % (pixAcc * 100))
+fh.write("Mean AP over %d classes: %.4f\n" % (2, mAp))
+fh.write("Mean F1 over %d classes: %.4f\n" % (2, mF1))
+fh.close()

ViT_DeiT/baselines/ViT/layer_helpers.py ADDED Viewed

	@@ -0,0 +1,21 @@

+""" Layer/Module Helpers
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+from itertools import repeat
+import collections.abc
+# From PyTorch internals
+def _ntuple(n):
+    def parse(x):
+        if isinstance(x, collections.abc.Iterable):
+            return x
+        return tuple(repeat(x, n))
+    return parse
+to_1tuple = _ntuple(1)
+to_2tuple = _ntuple(2)
+to_3tuple = _ntuple(3)
+to_4tuple = _ntuple(4)
+to_ntuple = _ntuple

ViT_DeiT/baselines/ViT/misc_functions.py ADDED Viewed

	@@ -0,0 +1,68 @@

+#
+# Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/
+# Written by Suraj Srinivas <suraj.srinivas@idiap.ch>
+#
+""" Misc helper functions """
+import cv2
+import numpy as np
+import subprocess
+import torch
+import torchvision.transforms as transforms
+class NormalizeInverse(transforms.Normalize):
+    # Undo normalization on images
+    def __init__(self, mean, std):
+        mean = torch.as_tensor(mean)
+        std = torch.as_tensor(std)
+        std_inv = 1 / (std + 1e-7)
+        mean_inv = -mean * std_inv
+        super(NormalizeInverse, self).__init__(mean=mean_inv, std=std_inv)
+    def __call__(self, tensor):
+        return super(NormalizeInverse, self).__call__(tensor.clone())
+def create_folder(folder_name):
+    try:
+        subprocess.call(['mkdir', '-p', folder_name])
+    except OSError:
+        None
+def save_saliency_map(image, saliency_map, filename):
+    """
+    Save saliency map on image.
+    Args:
+        image: Tensor of size (3,H,W)
+        saliency_map: Tensor of size (1,H,W)
+        filename: string with complete path and file extension
+    """
+    image = image.data.cpu().numpy()
+    saliency_map = saliency_map.data.cpu().numpy()
+    saliency_map = saliency_map - saliency_map.min()
+    saliency_map = saliency_map / saliency_map.max()
+    saliency_map = saliency_map.clip(0, 1)
+    saliency_map = np.uint8(saliency_map * 255).transpose(1, 2, 0)
+    saliency_map = cv2.resize(saliency_map, (224, 224))
+    image = np.uint8(image * 255).transpose(1, 2, 0)
+    image = cv2.resize(image, (224, 224))
+    # Apply JET colormap
+    color_heatmap = cv2.applyColorMap(saliency_map, cv2.COLORMAP_JET)
+    # Combine image with heatmap
+    img_with_heatmap = np.float32(color_heatmap) + np.float32(image)
+    img_with_heatmap = img_with_heatmap / np.max(img_with_heatmap)
+    cv2.imwrite(filename, np.uint8(255 * img_with_heatmap))

ViT_DeiT/baselines/ViT/pertubation_eval_from_hdf5.py ADDED Viewed

	@@ -0,0 +1,233 @@

+import torch
+import os
+from tqdm import tqdm
+import numpy as np
+import argparse
+# Import saliency methods and models
+from ViT_explanation_generator import Baselines
+from ViT_new import vit_base_patch16_224
+# from models.vgg import vgg19
+import glob
+from dataset.expl_hdf5 import ImagenetResults
+def normalize(tensor,
+              mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]):
+    dtype = tensor.dtype
+    mean = torch.as_tensor(mean, dtype=dtype, device=tensor.device)
+    std = torch.as_tensor(std, dtype=dtype, device=tensor.device)
+    tensor.sub_(mean[None, :, None, None]).div_(std[None, :, None, None])
+    return tensor
+def eval(args):
+    num_samples = 0
+    num_correct_model = np.zeros((len(imagenet_ds,)))
+    dissimilarity_model = np.zeros((len(imagenet_ds,)))
+    model_index = 0
+    if args.scale == 'per':
+        base_size = 224 * 224
+        perturbation_steps = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
+    elif args.scale == '100':
+        base_size = 100
+        perturbation_steps = [5, 10, 15, 20, 25, 30, 35, 40, 45]
+    else:
+        raise Exception('scale not valid')
+    num_correct_pertub = np.zeros((9, len(imagenet_ds)))
+    dissimilarity_pertub = np.zeros((9, len(imagenet_ds)))
+    logit_diff_pertub = np.zeros((9, len(imagenet_ds)))
+    prob_diff_pertub = np.zeros((9, len(imagenet_ds)))
+    perturb_index = 0
+    for batch_idx, (data, vis, target) in enumerate(tqdm(sample_loader)):
+        # Update the number of samples
+        num_samples += len(data)
+        data = data.to(device)
+        vis = vis.to(device)
+        target = target.to(device)
+        norm_data = normalize(data.clone())
+        # Compute model accuracy
+        pred = model(norm_data)
+        pred_probabilities = torch.softmax(pred, dim=1)
+        pred_org_logit = pred.data.max(1, keepdim=True)[0].squeeze(1)
+        pred_org_prob = pred_probabilities.data.max(1, keepdim=True)[0].squeeze(1)
+        pred_class = pred.data.max(1, keepdim=True)[1].squeeze(1)
+        tgt_pred = (target == pred_class).type(target.type()).data.cpu().numpy()
+        num_correct_model[model_index:model_index+len(tgt_pred)] = tgt_pred
+        probs = torch.softmax(pred, dim=1)
+        target_probs = torch.gather(probs, 1, target[:, None])[:, 0]
+        second_probs = probs.data.topk(2, dim=1)[0][:, 1]
+        temp = torch.log(target_probs / second_probs).data.cpu().numpy()
+        dissimilarity_model[model_index:model_index+len(temp)] = temp
+        if args.wrong:
+            wid = np.argwhere(tgt_pred == 0).flatten()
+            if len(wid) == 0:
+                continue
+            wid = torch.from_numpy(wid).to(vis.device)
+            vis = vis.index_select(0, wid)
+            data = data.index_select(0, wid)
+            target = target.index_select(0, wid)
+        # Save original shape
+        org_shape = data.shape
+        if args.neg:
+            vis = -vis
+        vis = vis.reshape(org_shape[0], -1)
+        for i in range(len(perturbation_steps)):
+            _data = data.clone()
+            _, idx = torch.topk(vis, int(base_size * perturbation_steps[i]), dim=-1)
+            idx = idx.unsqueeze(1).repeat(1, org_shape[1], 1)
+            _data = _data.reshape(org_shape[0], org_shape[1], -1)
+            _data = _data.scatter_(-1, idx, 0)
+            _data = _data.reshape(*org_shape)
+            _norm_data = normalize(_data)
+            out = model(_norm_data)
+            pred_probabilities = torch.softmax(out, dim=1)
+            pred_prob = pred_probabilities.data.max(1, keepdim=True)[0].squeeze(1)
+            diff = (pred_prob - pred_org_prob).data.cpu().numpy()
+            prob_diff_pertub[i, perturb_index:perturb_index+len(diff)] = diff
+            pred_logit = out.data.max(1, keepdim=True)[0].squeeze(1)
+            diff = (pred_logit - pred_org_logit).data.cpu().numpy()
+            logit_diff_pertub[i, perturb_index:perturb_index+len(diff)] = diff
+            target_class = out.data.max(1, keepdim=True)[1].squeeze(1)
+            temp = (target == target_class).type(target.type()).data.cpu().numpy()
+            num_correct_pertub[i, perturb_index:perturb_index+len(temp)] = temp
+            probs_pertub = torch.softmax(out, dim=1)
+            target_probs = torch.gather(probs_pertub, 1, target[:, None])[:, 0]
+            second_probs = probs_pertub.data.topk(2, dim=1)[0][:, 1]
+            temp = torch.log(target_probs / second_probs).data.cpu().numpy()
+            dissimilarity_pertub[i, perturb_index:perturb_index+len(temp)] = temp
+        model_index += len(target)
+        perturb_index += len(target)
+    np.save(os.path.join(args.experiment_dir, 'model_hits.npy'), num_correct_model)
+    np.save(os.path.join(args.experiment_dir, 'model_dissimilarities.npy'), dissimilarity_model)
+    np.save(os.path.join(args.experiment_dir, 'perturbations_hits.npy'), num_correct_pertub[:, :perturb_index])
+    np.save(os.path.join(args.experiment_dir, 'perturbations_dissimilarities.npy'), dissimilarity_pertub[:, :perturb_index])
+    np.save(os.path.join(args.experiment_dir, 'perturbations_logit_diff.npy'), logit_diff_pertub[:, :perturb_index])
+    np.save(os.path.join(args.experiment_dir, 'perturbations_prob_diff.npy'), prob_diff_pertub[:, :perturb_index])
+    print(np.mean(num_correct_model), np.std(num_correct_model))
+    print(np.mean(dissimilarity_model), np.std(dissimilarity_model))
+    print(perturbation_steps)
+    print(np.mean(num_correct_pertub, axis=1), np.std(num_correct_pertub, axis=1))
+    print(np.mean(dissimilarity_pertub, axis=1), np.std(dissimilarity_pertub, axis=1))
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Train a segmentation')
+    parser.add_argument('--batch-size', type=int,
+                        default=16,
+                        help='')
+    parser.add_argument('--neg', type=bool,
+                        default=True,
+                        help='')
+    parser.add_argument('--value', action='store_true',
+                        default=False,
+                        help='')
+    parser.add_argument('--scale', type=str,
+                        default='per',
+                        choices=['per', '100'],
+                        help='')
+    parser.add_argument('--method', type=str,
+                        default='grad_rollout',
+                        choices=['rollout', 'lrp', 'transformer_attribution', 'full_lrp', 'v_gradcam', 'lrp_last_layer',
+                                 'lrp_second_layer', 'gradcam',
+                                 'attn_last_layer', 'attn_gradcam', 'input_grads'],
+                        help='')
+    parser.add_argument('--vis-class', type=str,
+                        default='top',
+                        choices=['top', 'target', 'index'],
+                        help='')
+    parser.add_argument('--wrong', action='store_true',
+                        default=False,
+                        help='')
+    parser.add_argument('--class-id', type=int,
+                        default=0,
+                        help='')
+    parser.add_argument('--is-ablation', type=bool,
+                        default=False,
+                        help='')
+    args = parser.parse_args()
+    torch.multiprocessing.set_start_method('spawn')
+    # PATH variables
+    PATH = os.path.dirname(os.path.abspath(__file__)) + '/'
+    dataset = PATH + 'dataset/'
+    os.makedirs(os.path.join(PATH, 'experiments'), exist_ok=True)
+    os.makedirs(os.path.join(PATH, 'experiments/perturbations'), exist_ok=True)
+    exp_name = args.method
+    exp_name += '_neg' if args.neg else '_pos'
+    print(exp_name)
+    if args.vis_class == 'index':
+        args.runs_dir = os.path.join(PATH, 'experiments/perturbations/{}/{}_{}'.format(exp_name,
+                                                                                       args.vis_class,
+                                                                                       args.class_id))
+    else:
+        ablation_fold = 'ablation' if args.is_ablation else 'not_ablation'
+        args.runs_dir = os.path.join(PATH, 'experiments/perturbations/{}/{}/{}'.format(exp_name,
+                                                                                    args.vis_class, ablation_fold))
+        # args.runs_dir = os.path.join(PATH, 'experiments/perturbations/{}/{}'.format(exp_name,
+        #                                                                             args.vis_class))
+    if args.wrong:
+        args.runs_dir += '_wrong'
+    experiments = sorted(glob.glob(os.path.join(args.runs_dir, 'experiment_*')))
+    experiment_id = int(experiments[-1].split('_')[-1]) + 1 if experiments else 0
+    args.experiment_dir = os.path.join(args.runs_dir, 'experiment_{}'.format(str(experiment_id)))
+    os.makedirs(args.experiment_dir, exist_ok=True)
+    cuda = torch.cuda.is_available()
+    device = torch.device("cuda" if cuda else "cpu")
+    if args.vis_class == 'index':
+        vis_method_dir = os.path.join(PATH,'visualizations/{}/{}_{}'.format(args.method,
+                                                          args.vis_class,
+                                                          args.class_id))
+    else:
+        ablation_fold = 'ablation' if args.is_ablation else 'not_ablation'
+        vis_method_dir = os.path.join(PATH,'visualizations/{}/{}/{}'.format(args.method,
+                                                       args.vis_class, ablation_fold))
+        # vis_method_dir = os.path.join(PATH, 'visualizations/{}/{}'.format(args.method,
+        #                                                                      args.vis_class))
+    # imagenet_ds = ImagenetResults('visualizations/{}'.format(args.method))
+    imagenet_ds = ImagenetResults(vis_method_dir)
+    # Model
+    model = vit_base_patch16_224(pretrained=True).cuda()
+    model.eval()
+    save_path = PATH + 'results/'
+    sample_loader = torch.utils.data.DataLoader(
+        imagenet_ds,
+        batch_size=args.batch_size,
+        num_workers=2,
+        shuffle=False)
+    eval(args)

ViT_DeiT/baselines/ViT/weight_init.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import torch
+import math
+import warnings
+def _no_grad_trunc_normal_(tensor, mean, std, a, b):
+    # Cut & paste from PyTorch official master until it's in a few official releases - RW
+    # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
+    def norm_cdf(x):
+        # Computes standard normal cumulative distribution function
+        return (1. + math.erf(x / math.sqrt(2.))) / 2.
+    if (mean < a - 2 * std) or (mean > b + 2 * std):
+        warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
+                      "The distribution of values may be incorrect.",
+                      stacklevel=2)
+    with torch.no_grad():
+        # Values are generated by using a truncated uniform distribution and
+        # then using the inverse CDF for the normal distribution.
+        # Get upper and lower cdf values
+        l = norm_cdf((a - mean) / std)
+        u = norm_cdf((b - mean) / std)
+        # Uniformly fill tensor with values from [l, u], then translate to
+        # [2l-1, 2u-1].
+        tensor.uniform_(2 * l - 1, 2 * u - 1)
+        # Use inverse cdf transform for normal distribution to get truncated
+        # standard normal
+        tensor.erfinv_()
+        # Transform to proper mean, std
+        tensor.mul_(std * math.sqrt(2.))
+        tensor.add_(mean)
+        # Clamp to ensure it's in the proper range
+        tensor.clamp_(min=a, max=b)
+        return tensor
+def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
+    # type: (Tensor, float, float, float, float) -> Tensor
+    r"""Fills the input Tensor with values drawn from a truncated
+    normal distribution. The values are effectively drawn from the
+    normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
+    with values outside :math:`[a, b]` redrawn until they are within
+    the bounds. The method used for generating the random values works
+    best when :math:`a \leq \text{mean} \leq b`.
+    Args:
+        tensor: an n-dimensional `torch.Tensor`
+        mean: the mean of the normal distribution
+        std: the standard deviation of the normal distribution
+        a: the minimum cutoff value
+        b: the maximum cutoff value
+    Examples:
+        >>> w = torch.empty(3, 5)
+        >>> nn.init.trunc_normal_(w)
+    """
+    return _no_grad_trunc_normal_(tensor, mean, std, a, b)

ViT_DeiT/data/VOC.py ADDED Viewed

	@@ -0,0 +1,372 @@

+import os
+import tarfile
+import torch
+import torch.utils.data as data
+import numpy as np
+import h5py
+from PIL import Image
+from scipy import io
+from torchvision.datasets.utils import download_url
+DATASET_YEAR_DICT = {
+    '2012': {
+        'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar',
+        'filename': 'VOCtrainval_11-May-2012.tar',
+        'md5': '6cd6e144f989b92b3379bac3b3de84fd',
+        'base_dir': 'VOCdevkit/VOC2012'
+    },
+    '2011': {
+        'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2011/VOCtrainval_25-May-2011.tar',
+        'filename': 'VOCtrainval_25-May-2011.tar',
+        'md5': '6c3384ef61512963050cb5d687e5bf1e',
+        'base_dir': 'TrainVal/VOCdevkit/VOC2011'
+    },
+    '2010': {
+        'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar',
+        'filename': 'VOCtrainval_03-May-2010.tar',
+        'md5': 'da459979d0c395079b5c75ee67908abb',
+        'base_dir': 'VOCdevkit/VOC2010'
+    },
+    '2009': {
+        'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2009/VOCtrainval_11-May-2009.tar',
+        'filename': 'VOCtrainval_11-May-2009.tar',
+        'md5': '59065e4b188729180974ef6572f6a212',
+        'base_dir': 'VOCdevkit/VOC2009'
+    },
+    '2008': {
+        'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2008/VOCtrainval_14-Jul-2008.tar',
+        'filename': 'VOCtrainval_11-May-2012.tar',
+        'md5': '2629fa636546599198acfcfbfcf1904a',
+        'base_dir': 'VOCdevkit/VOC2008'
+    },
+    '2007': {
+        'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar',
+        'filename': 'VOCtrainval_06-Nov-2007.tar',
+        'md5': 'c52e279531787c972589f7e41ab4ae64',
+        'base_dir': 'VOCdevkit/VOC2007'
+    }
+}
+class VOCSegmentation(data.Dataset):
+    """`Pascal VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`_ Segmentation Dataset.
+    Args:
+        root (string): Root directory of the VOC Dataset.
+        year (string, optional): The dataset year, supports years 2007 to 2012.
+        image_set (string, optional): Select the image_set to use, ``train``, ``trainval`` or ``val``
+        download (bool, optional): If true, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+    """
+    CLASSES = 20
+    CLASSES_NAMES = [
+        'aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
+        'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
+        'motorbike', 'person', 'potted-plant', 'sheep', 'sofa', 'train',
+        'tvmonitor', 'ambigious'
+    ]
+    def __init__(self,
+                 root,
+                 year='2012',
+                 image_set='train',
+                 download=False,
+                 transform=None,
+                 target_transform=None):
+        self.root = os.path.expanduser(root)
+        self.year = year
+        self.url = DATASET_YEAR_DICT[year]['url']
+        self.filename = DATASET_YEAR_DICT[year]['filename']
+        self.md5 = DATASET_YEAR_DICT[year]['md5']
+        self.transform = transform
+        self.target_transform = target_transform
+        self.image_set = image_set
+        base_dir = DATASET_YEAR_DICT[year]['base_dir']
+        voc_root = os.path.join(self.root, base_dir)
+        image_dir = os.path.join(voc_root, 'JPEGImages')
+        mask_dir = os.path.join(voc_root, 'SegmentationClass')
+        if download:
+            download_extract(self.url, self.root, self.filename, self.md5)
+        if not os.path.isdir(voc_root):
+            raise RuntimeError('Dataset not found or corrupted.' +
+                               ' You can use download=True to download it')
+        splits_dir = os.path.join(voc_root, 'ImageSets/Segmentation')
+        split_f = os.path.join(splits_dir, image_set.rstrip('\n') + '.txt')
+        if not os.path.exists(split_f):
+            raise ValueError(
+                'Wrong image_set entered! Please use image_set="train" '
+                'or image_set="trainval" or image_set="val"')
+        with open(os.path.join(split_f), "r") as f:
+            file_names = [x.strip() for x in f.readlines()]
+        self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names]
+        self.masks = [os.path.join(mask_dir, x + ".png") for x in file_names]
+        assert (len(self.images) == len(self.masks))
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: (image, target) where target is the image segmentation.
+        """
+        img = Image.open(self.images[index]).convert('RGB')
+        target = Image.open(self.masks[index])
+        if self.transform is not None:
+            img = self.transform(img)
+        if self.target_transform is not None:
+            target = np.array(self.target_transform(target)).astype('int32')
+            target[target == 255] = -1
+            target = torch.from_numpy(target).long()
+        return img, target
+    @staticmethod
+    def _mask_transform(mask):
+        target = np.array(mask).astype('int32')
+        target[target == 255] = -1
+        return torch.from_numpy(target).long()
+    def __len__(self):
+        return len(self.images)
+    @property
+    def pred_offset(self):
+        return 0
+class VOCClassification(data.Dataset):
+    """`Pascal VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`_ Segmentation Dataset.
+    Args:
+        root (string): Root directory of the VOC Dataset.
+        year (string, optional): The dataset year, supports years 2007 to 2012.
+        image_set (string, optional): Select the image_set to use, ``train``, ``trainval`` or ``val``
+        download (bool, optional): If true, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
+    """
+    CLASSES = 20
+    def __init__(self,
+                 root,
+                 year='2012',
+                 image_set='train',
+                 download=False,
+                 transform=None):
+        self.root = os.path.expanduser(root)
+        self.year = year
+        self.url = DATASET_YEAR_DICT[year]['url']
+        self.filename = DATASET_YEAR_DICT[year]['filename']
+        self.md5 = DATASET_YEAR_DICT[year]['md5']
+        self.transform = transform
+        self.image_set = image_set
+        base_dir = DATASET_YEAR_DICT[year]['base_dir']
+        voc_root = os.path.join(self.root, base_dir)
+        image_dir = os.path.join(voc_root, 'JPEGImages')
+        mask_dir = os.path.join(voc_root, 'SegmentationClass')
+        if download:
+            download_extract(self.url, self.root, self.filename, self.md5)
+        if not os.path.isdir(voc_root):
+            raise RuntimeError('Dataset not found or corrupted.' +
+                               ' You can use download=True to download it')
+        splits_dir = os.path.join(voc_root, 'ImageSets/Segmentation')
+        split_f = os.path.join(splits_dir, image_set.rstrip('\n') + '.txt')
+        if not os.path.exists(split_f):
+            raise ValueError(
+                'Wrong image_set entered! Please use image_set="train" '
+                'or image_set="trainval" or image_set="val"')
+        with open(os.path.join(split_f), "r") as f:
+            file_names = [x.strip() for x in f.readlines()]
+        self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names]
+        self.masks = [os.path.join(mask_dir, x + ".png") for x in file_names]
+        assert (len(self.images) == len(self.masks))
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: (image, target) where target is the image segmentation.
+        """
+        img = Image.open(self.images[index]).convert('RGB')
+        target = Image.open(self.masks[index])
+        # if self.transform is not None:
+        #     img = self.transform(img)
+        if self.transform is not None:
+            img, target = self.transform(img, target)
+        visible_classes = np.unique(target)
+        labels = torch.zeros(self.CLASSES)
+        for id in visible_classes:
+            if id not in (0, 255):
+                labels[id - 1].fill_(1)
+        return img, labels
+    def __len__(self):
+        return len(self.images)
+class VOCSBDClassification(data.Dataset):
+    """`Pascal VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`_ Segmentation Dataset.
+    Args:
+        root (string): Root directory of the VOC Dataset.
+        year (string, optional): The dataset year, supports years 2007 to 2012.
+        image_set (string, optional): Select the image_set to use, ``train``, ``trainval`` or ``val``
+        download (bool, optional): If true, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
+    """
+    CLASSES = 20
+    def __init__(self,
+                 root,
+                 sbd_root,
+                 year='2012',
+                 image_set='train',
+                 download=False,
+                 transform=None):
+        self.root = os.path.expanduser(root)
+        self.sbd_root = os.path.expanduser(sbd_root)
+        self.year = year
+        self.url = DATASET_YEAR_DICT[year]['url']
+        self.filename = DATASET_YEAR_DICT[year]['filename']
+        self.md5 = DATASET_YEAR_DICT[year]['md5']
+        self.transform = transform
+        self.image_set = image_set
+        base_dir = DATASET_YEAR_DICT[year]['base_dir']
+        voc_root = os.path.join(self.root, base_dir)
+        image_dir = os.path.join(voc_root, 'JPEGImages')
+        mask_dir = os.path.join(voc_root, 'SegmentationClass')
+        sbd_image_dir = os.path.join(sbd_root, 'img')
+        sbd_mask_dir = os.path.join(sbd_root, 'cls')
+        if download:
+            download_extract(self.url, self.root, self.filename, self.md5)
+        if not os.path.isdir(voc_root):
+            raise RuntimeError('Dataset not found or corrupted.' +
+                               ' You can use download=True to download it')
+        splits_dir = os.path.join(voc_root, 'ImageSets/Segmentation')
+        split_f = os.path.join(splits_dir, image_set.rstrip('\n') + '.txt')
+        sbd_split = os.path.join(sbd_root, 'train.txt')
+        if not os.path.exists(split_f):
+            raise ValueError(
+                'Wrong image_set entered! Please use image_set="train" '
+                'or image_set="trainval" or image_set="val"')
+        with open(os.path.join(split_f), "r") as f:
+            voc_file_names = [x.strip() for x in f.readlines()]
+        with open(os.path.join(sbd_split), "r") as f:
+            sbd_file_names = [x.strip() for x in f.readlines()]
+        self.images = [os.path.join(image_dir, x + ".jpg") for x in voc_file_names]
+        self.images += [os.path.join(sbd_image_dir, x + ".jpg") for x in sbd_file_names]
+        self.masks = [os.path.join(mask_dir, x + ".png") for x in voc_file_names]
+        self.masks += [os.path.join(sbd_mask_dir, x + ".mat") for x in sbd_file_names]
+        assert (len(self.images) == len(self.masks))
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: (image, target) where target is the image segmentation.
+        """
+        img = Image.open(self.images[index]).convert('RGB')
+        mask_path = self.masks[index]
+        if mask_path[-3:] == 'mat':
+            target = io.loadmat(mask_path, struct_as_record=False, squeeze_me=True)['GTcls'].Segmentation
+            target = Image.fromarray(target, mode='P')
+        else:
+            target = Image.open(self.masks[index])
+        if self.transform is not None:
+            img, target = self.transform(img, target)
+        visible_classes = np.unique(target)
+        labels = torch.zeros(self.CLASSES)
+        for id in visible_classes:
+            if id not in (0, 255):
+                labels[id - 1].fill_(1)
+        return img, labels
+    def __len__(self):
+        return len(self.images)
+def download_extract(url, root, filename, md5):
+    download_url(url, root, filename, md5)
+    with tarfile.open(os.path.join(root, filename), "r") as tar:
+        tar.extractall(path=root)
+class VOCResults(data.Dataset):
+    CLASSES = 20
+    CLASSES_NAMES = [
+        'aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
+        'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
+        'motorbike', 'person', 'potted-plant', 'sheep', 'sofa', 'train',
+        'tvmonitor', 'ambigious'
+    ]
+    def __init__(self, path):
+        super(VOCResults, self).__init__()
+        self.path = os.path.join(path, 'results.hdf5')
+        self.data = None
+        print('Reading dataset length...')
+        with h5py.File(self.path , 'r') as f:
+            self.data_length = len(f['/image'])
+    def __len__(self):
+        return self.data_length
+    def __getitem__(self, item):
+        if self.data is None:
+            self.data = h5py.File(self.path, 'r')
+        image = torch.tensor(self.data['image'][item])
+        vis = torch.tensor(self.data['vis'][item])
+        target = torch.tensor(self.data['target'][item])
+        class_pred = torch.tensor(self.data['class_pred'][item])
+        return image, vis, target, class_pred

ViT_DeiT/data/__init__.py ADDED Viewed

File without changes

ViT_DeiT/data/imagenet.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import os
+import torch
+import torch.utils.data as data
+import numpy as np
+from PIL import Image
+import h5py
+__all__ = ['ImagenetResults']
+class Imagenet_Segmentation(data.Dataset):
+    CLASSES = 2
+    def __init__(self,
+                 path,
+                 transform=None,
+                 target_transform=None):
+        self.path = path
+        self.transform = transform
+        self.target_transform = target_transform
+        self.h5py = None
+        tmp = h5py.File(path, 'r')
+        self.data_length = len(tmp['/value/img'])
+        tmp.close()
+        del tmp
+    def __getitem__(self, index):
+        if self.h5py is None:
+            self.h5py = h5py.File(self.path, 'r')
+        img = np.array(self.h5py[self.h5py['/value/img'][index, 0]]).transpose((2, 1, 0))
+        target = np.array(self.h5py[self.h5py[self.h5py['/value/gt'][index, 0]][0, 0]]).transpose((1, 0))
+        img = Image.fromarray(img).convert('RGB')
+        target = Image.fromarray(target)
+        if self.transform is not None:
+            img = self.transform(img)
+        if self.target_transform is not None:
+            target = np.array(self.target_transform(target)).astype('int32')
+            target = torch.from_numpy(target).long()
+        return img, target
+    def __len__(self):
+        return self.data_length
+class ImagenetResults(data.Dataset):
+    def __init__(self, path):
+        super(ImagenetResults, self).__init__()
+        self.path = os.path.join(path, 'results.hdf5')
+        self.data = None
+        print('Reading dataset length...')
+        with h5py.File(self.path, 'r') as f:
+            self.data_length = len(f['/image'])
+    def __len__(self):
+        return self.data_length
+    def __getitem__(self, item):
+        if self.data is None:
+            self.data = h5py.File(self.path, 'r')
+        image = torch.tensor(self.data['image'][item])
+        vis = torch.tensor(self.data['vis'][item])
+        target = torch.tensor(self.data['target'][item]).long()
+        return image, vis, target

ViT_DeiT/data/imagenet_utils.py ADDED Viewed

	@@ -0,0 +1,1002 @@

+CLS2IDX = {
+    0: 'tench, Tinca tinca',
+    1: 'goldfish, Carassius auratus',
+    2: 'great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias',
+    3: 'tiger shark, Galeocerdo cuvieri',
+    4: 'hammerhead, hammerhead shark',
+    5: 'electric ray, crampfish, numbfish, torpedo',
+    6: 'stingray',
+    7: 'cock',
+    8: 'hen',
+    9: 'ostrich, Struthio camelus',
+    10: 'brambling, Fringilla montifringilla',
+    11: 'goldfinch, Carduelis carduelis',
+    12: 'house finch, linnet, Carpodacus mexicanus',
+    13: 'junco, snowbird',
+    14: 'indigo bunting, indigo finch, indigo bird, Passerina cyanea',
+    15: 'robin, American robin, Turdus migratorius',
+    16: 'bulbul',
+    17: 'jay',
+    18: 'magpie',
+    19: 'chickadee',
+    20: 'water ouzel, dipper',
+    21: 'kite',
+    22: 'bald eagle, American eagle, Haliaeetus leucocephalus',
+    23: 'vulture',
+    24: 'great grey owl, great gray owl, Strix nebulosa',
+    25: 'European fire salamander, Salamandra salamandra',
+    26: 'common newt, Triturus vulgaris',
+    27: 'eft',
+    28: 'spotted salamander, Ambystoma maculatum',
+    29: 'axolotl, mud puppy, Ambystoma mexicanum',
+    30: 'bullfrog, Rana catesbeiana',
+    31: 'tree frog, tree-frog',
+    32: 'tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui',
+    33: 'loggerhead, loggerhead turtle, Caretta caretta',
+    34: 'leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea',
+    35: 'mud turtle',
+    36: 'terrapin',
+    37: 'box turtle, box tortoise',
+    38: 'banded gecko',
+    39: 'common iguana, iguana, Iguana iguana',
+    40: 'American chameleon, anole, Anolis carolinensis',
+    41: 'whiptail, whiptail lizard',
+    42: 'agama',
+    43: 'frilled lizard, Chlamydosaurus kingi',
+    44: 'alligator lizard',
+    45: 'Gila monster, Heloderma suspectum',
+    46: 'green lizard, Lacerta viridis',
+    47: 'African chameleon, Chamaeleo chamaeleon',
+    48: 'Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis',
+    49: 'African crocodile, Nile crocodile, Crocodylus niloticus',
+    50: 'American alligator, Alligator mississipiensis',
+    51: 'triceratops',
+    52: 'thunder snake, worm snake, Carphophis amoenus',
+    53: 'ringneck snake, ring-necked snake, ring snake',
+    54: 'hognose snake, puff adder, sand viper',
+    55: 'green snake, grass snake',
+    56: 'king snake, kingsnake',
+    57: 'garter snake, grass snake',
+    58: 'water snake',
+    59: 'vine snake',
+    60: 'night snake, Hypsiglena torquata',
+    61: 'boa constrictor, Constrictor constrictor',
+    62: 'rock python, rock snake, Python sebae',
+    63: 'Indian cobra, Naja naja',
+    64: 'green mamba',
+    65: 'sea snake',
+    66: 'horned viper, cerastes, sand viper, horned asp, Cerastes cornutus',
+    67: 'diamondback, diamondback rattlesnake, Crotalus adamanteus',
+    68: 'sidewinder, horned rattlesnake, Crotalus cerastes',
+    69: 'trilobite',
+    70: 'harvestman, daddy longlegs, Phalangium opilio',
+    71: 'scorpion',
+    72: 'black and gold garden spider, Argiope aurantia',
+    73: 'barn spider, Araneus cavaticus',
+    74: 'garden spider, Aranea diademata',
+    75: 'black widow, Latrodectus mactans',
+    76: 'tarantula',
+    77: 'wolf spider, hunting spider',
+    78: 'tick',
+    79: 'centipede',
+    80: 'black grouse',
+    81: 'ptarmigan',
+    82: 'ruffed grouse, partridge, Bonasa umbellus',
+    83: 'prairie chicken, prairie grouse, prairie fowl',
+    84: 'peacock',
+    85: 'quail',
+    86: 'partridge',
+    87: 'African grey, African gray, Psittacus erithacus',
+    88: 'macaw',
+    89: 'sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita',
+    90: 'lorikeet',
+    91: 'coucal',
+    92: 'bee eater',
+    93: 'hornbill',
+    94: 'hummingbird',
+    95: 'jacamar',
+    96: 'toucan',
+    97: 'drake',
+    98: 'red-breasted merganser, Mergus serrator',
+    99: 'goose',
+    100: 'black swan, Cygnus atratus',
+    101: 'tusker',
+    102: 'echidna, spiny anteater, anteater',
+    103: 'platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus',
+    104: 'wallaby, brush kangaroo',
+    105: 'koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus',
+    106: 'wombat',
+    107: 'jellyfish',
+    108: 'sea anemone, anemone',
+    109: 'brain coral',
+    110: 'flatworm, platyhelminth',
+    111: 'nematode, nematode worm, roundworm',
+    112: 'conch',
+    113: 'snail',
+    114: 'slug',
+    115: 'sea slug, nudibranch',
+    116: 'chiton, coat-of-mail shell, sea cradle, polyplacophore',
+    117: 'chambered nautilus, pearly nautilus, nautilus',
+    118: 'Dungeness crab, Cancer magister',
+    119: 'rock crab, Cancer irroratus',
+    120: 'fiddler crab',
+    121: 'king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica',
+    122: 'American lobster, Northern lobster, Maine lobster, Homarus americanus',
+    123: 'spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish',
+    124: 'crayfish, crawfish, crawdad, crawdaddy',
+    125: 'hermit crab',
+    126: 'isopod',
+    127: 'white stork, Ciconia ciconia',
+    128: 'black stork, Ciconia nigra',
+    129: 'spoonbill',
+    130: 'flamingo',
+    131: 'little blue heron, Egretta caerulea',
+    132: 'American egret, great white heron, Egretta albus',
+    133: 'bittern',
+    134: 'crane',
+    135: 'limpkin, Aramus pictus',
+    136: 'European gallinule, Porphyrio porphyrio',
+    137: 'American coot, marsh hen, mud hen, water hen, Fulica americana',
+    138: 'bustard',
+    139: 'ruddy turnstone, Arenaria interpres',
+    140: 'red-backed sandpiper, dunlin, Erolia alpina',
+    141: 'redshank, Tringa totanus',
+    142: 'dowitcher',
+    143: 'oystercatcher, oyster catcher',
+    144: 'pelican',
+    145: 'king penguin, Aptenodytes patagonica',
+    146: 'albatross, mollymawk',
+    147: 'grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus',
+    148: 'killer whale, killer, orca, grampus, sea wolf, Orcinus orca',
+    149: 'dugong, Dugong dugon',
+    150: 'sea lion',
+    151: 'Chihuahua',
+    152: 'Japanese spaniel',
+    153: 'Maltese dog, Maltese terrier, Maltese',
+    154: 'Pekinese, Pekingese, Peke',
+    155: 'Shih-Tzu',
+    156: 'Blenheim spaniel',
+    157: 'papillon',
+    158: 'toy terrier',
+    159: 'Rhodesian ridgeback',
+    160: 'Afghan hound, Afghan',
+    161: 'basset, basset hound',
+    162: 'beagle',
+    163: 'bloodhound, sleuthhound',
+    164: 'bluetick',
+    165: 'black-and-tan coonhound',
+    166: 'Walker hound, Walker foxhound',
+    167: 'English foxhound',
+    168: 'redbone',
+    169: 'borzoi, Russian wolfhound',
+    170: 'Irish wolfhound',
+    171: 'Italian greyhound',
+    172: 'whippet',
+    173: 'Ibizan hound, Ibizan Podenco',
+    174: 'Norwegian elkhound, elkhound',
+    175: 'otterhound, otter hound',
+    176: 'Saluki, gazelle hound',
+    177: 'Scottish deerhound, deerhound',
+    178: 'Weimaraner',
+    179: 'Staffordshire bullterrier, Staffordshire bull terrier',
+    180: 'American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier',
+    181: 'Bedlington terrier',
+    182: 'Border terrier',
+    183: 'Kerry blue terrier',
+    184: 'Irish terrier',
+    185: 'Norfolk terrier',
+    186: 'Norwich terrier',
+    187: 'Yorkshire terrier',
+    188: 'wire-haired fox terrier',
+    189: 'Lakeland terrier',
+    190: 'Sealyham terrier, Sealyham',
+    191: 'Airedale, Airedale terrier',
+    192: 'cairn, cairn terrier',
+    193: 'Australian terrier',
+    194: 'Dandie Dinmont, Dandie Dinmont terrier',
+    195: 'Boston bull, Boston terrier',
+    196: 'miniature schnauzer',
+    197: 'giant schnauzer',
+    198: 'standard schnauzer',
+    199: 'Scotch terrier, Scottish terrier, Scottie',
+    200: 'Tibetan terrier, chrysanthemum dog',
+    201: 'silky terrier, Sydney silky',
+    202: 'soft-coated wheaten terrier',
+    203: 'West Highland white terrier',
+    204: 'Lhasa, Lhasa apso',
+    205: 'flat-coated retriever',
+    206: 'curly-coated retriever',
+    207: 'golden retriever',
+    208: 'Labrador retriever',
+    209: 'Chesapeake Bay retriever',
+    210: 'German short-haired pointer',
+    211: 'vizsla, Hungarian pointer',
+    212: 'English setter',
+    213: 'Irish setter, red setter',
+    214: 'Gordon setter',
+    215: 'Brittany spaniel',
+    216: 'clumber, clumber spaniel',
+    217: 'English springer, English springer spaniel',
+    218: 'Welsh springer spaniel',
+    219: 'cocker spaniel, English cocker spaniel, cocker',
+    220: 'Sussex spaniel',
+    221: 'Irish water spaniel',
+    222: 'kuvasz',
+    223: 'schipperke',
+    224: 'groenendael',
+    225: 'malinois',
+    226: 'briard',
+    227: 'kelpie',
+    228: 'komondor',
+    229: 'Old English sheepdog, bobtail',
+    230: 'Shetland sheepdog, Shetland sheep dog, Shetland',
+    231: 'collie',
+    232: 'Border collie',
+    233: 'Bouvier des Flandres, Bouviers des Flandres',
+    234: 'Rottweiler',
+    235: 'German shepherd, German shepherd dog, German police dog, alsatian',
+    236: 'Doberman, Doberman pinscher',
+    237: 'miniature pinscher',
+    238: 'Greater Swiss Mountain dog',
+    239: 'Bernese mountain dog',
+    240: 'Appenzeller',
+    241: 'EntleBucher',
+    242: 'boxer',
+    243: 'bull mastiff',
+    244: 'Tibetan mastiff',
+    245: 'French bulldog',
+    246: 'Great Dane',
+    247: 'Saint Bernard, St Bernard',
+    248: 'Eskimo dog, husky',
+    249: 'malamute, malemute, Alaskan malamute',
+    250: 'Siberian husky',
+    251: 'dalmatian, coach dog, carriage dog',
+    252: 'affenpinscher, monkey pinscher, monkey dog',
+    253: 'basenji',
+    254: 'pug, pug-dog',
+    255: 'Leonberg',
+    256: 'Newfoundland, Newfoundland dog',
+    257: 'Great Pyrenees',
+    258: 'Samoyed, Samoyede',
+    259: 'Pomeranian',
+    260: 'chow, chow chow',
+    261: 'keeshond',
+    262: 'Brabancon griffon',
+    263: 'Pembroke, Pembroke Welsh corgi',
+    264: 'Cardigan, Cardigan Welsh corgi',
+    265: 'toy poodle',
+    266: 'miniature poodle',
+    267: 'standard poodle',
+    268: 'Mexican hairless',
+    269: 'timber wolf, grey wolf, gray wolf, Canis lupus',
+    270: 'white wolf, Arctic wolf, Canis lupus tundrarum',
+    271: 'red wolf, maned wolf, Canis rufus, Canis niger',
+    272: 'coyote, prairie wolf, brush wolf, Canis latrans',
+    273: 'dingo, warrigal, warragal, Canis dingo',
+    274: 'dhole, Cuon alpinus',
+    275: 'African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus',
+    276: 'hyena, hyaena',
+    277: 'red fox, Vulpes vulpes',
+    278: 'kit fox, Vulpes macrotis',
+    279: 'Arctic fox, white fox, Alopex lagopus',
+    280: 'grey fox, gray fox, Urocyon cinereoargenteus',
+    281: 'tabby, tabby cat',
+    282: 'tiger cat',
+    283: 'Persian cat',
+    284: 'Siamese cat, Siamese',
+    285: 'Egyptian cat',
+    286: 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor',
+    287: 'lynx, catamount',
+    288: 'leopard, Panthera pardus',
+    289: 'snow leopard, ounce, Panthera uncia',
+    290: 'jaguar, panther, Panthera onca, Felis onca',
+    291: 'lion, king of beasts, Panthera leo',
+    292: 'tiger, Panthera tigris',
+    293: 'cheetah, chetah, Acinonyx jubatus',
+    294: 'brown bear, bruin, Ursus arctos',
+    295: 'American black bear, black bear, Ursus americanus, Euarctos americanus',
+    296: 'ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus',
+    297: 'sloth bear, Melursus ursinus, Ursus ursinus',
+    298: 'mongoose',
+    299: 'meerkat, mierkat',
+    300: 'tiger beetle',
+    301: 'ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle',
+    302: 'ground beetle, carabid beetle',
+    303: 'long-horned beetle, longicorn, longicorn beetle',
+    304: 'leaf beetle, chrysomelid',
+    305: 'dung beetle',
+    306: 'rhinoceros beetle',
+    307: 'weevil',
+    308: 'fly',
+    309: 'bee',
+    310: 'ant, emmet, pismire',
+    311: 'grasshopper, hopper',
+    312: 'cricket',
+    313: 'walking stick, walkingstick, stick insect',
+    314: 'cockroach, roach',
+    315: 'mantis, mantid',
+    316: 'cicada, cicala',
+    317: 'leafhopper',
+    318: 'lacewing, lacewing fly',
+    319: "dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk",
+    320: 'damselfly',
+    321: 'admiral',
+    322: 'ringlet, ringlet butterfly',
+    323: 'monarch, monarch butterfly, milkweed butterfly, Danaus plexippus',
+    324: 'cabbage butterfly',
+    325: 'sulphur butterfly, sulfur butterfly',
+    326: 'lycaenid, lycaenid butterfly',
+    327: 'starfish, sea star',
+    328: 'sea urchin',
+    329: 'sea cucumber, holothurian',
+    330: 'wood rabbit, cottontail, cottontail rabbit',
+    331: 'hare',
+    332: 'Angora, Angora rabbit',
+    333: 'hamster',
+    334: 'porcupine, hedgehog',
+    335: 'fox squirrel, eastern fox squirrel, Sciurus niger',
+    336: 'marmot',
+    337: 'beaver',
+    338: 'guinea pig, Cavia cobaya',
+    339: 'sorrel',
+    340: 'zebra',
+    341: 'hog, pig, grunter, squealer, Sus scrofa',
+    342: 'wild boar, boar, Sus scrofa',
+    343: 'warthog',
+    344: 'hippopotamus, hippo, river horse, Hippopotamus amphibius',
+    345: 'ox',
+    346: 'water buffalo, water ox, Asiatic buffalo, Bubalus bubalis',
+    347: 'bison',
+    348: 'ram, tup',
+    349: 'bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis',
+    350: 'ibex, Capra ibex',
+    351: 'hartebeest',
+    352: 'impala, Aepyceros melampus',
+    353: 'gazelle',
+    354: 'Arabian camel, dromedary, Camelus dromedarius',
+    355: 'llama',
+    356: 'weasel',
+    357: 'mink',
+    358: 'polecat, fitch, foulmart, foumart, Mustela putorius',
+    359: 'black-footed ferret, ferret, Mustela nigripes',
+    360: 'otter',
+    361: 'skunk, polecat, wood pussy',
+    362: 'badger',
+    363: 'armadillo',
+    364: 'three-toed sloth, ai, Bradypus tridactylus',
+    365: 'orangutan, orang, orangutang, Pongo pygmaeus',
+    366: 'gorilla, Gorilla gorilla',
+    367: 'chimpanzee, chimp, Pan troglodytes',
+    368: 'gibbon, Hylobates lar',
+    369: 'siamang, Hylobates syndactylus, Symphalangus syndactylus',
+    370: 'guenon, guenon monkey',
+    371: 'patas, hussar monkey, Erythrocebus patas',
+    372: 'baboon',
+    373: 'macaque',
+    374: 'langur',
+    375: 'colobus, colobus monkey',
+    376: 'proboscis monkey, Nasalis larvatus',
+    377: 'marmoset',
+    378: 'capuchin, ringtail, Cebus capucinus',
+    379: 'howler monkey, howler',
+    380: 'titi, titi monkey',
+    381: 'spider monkey, Ateles geoffroyi',
+    382: 'squirrel monkey, Saimiri sciureus',
+    383: 'Madagascar cat, ring-tailed lemur, Lemur catta',
+    384: 'indri, indris, Indri indri, Indri brevicaudatus',
+    385: 'Indian elephant, Elephas maximus',
+    386: 'African elephant, Loxodonta africana',
+    387: 'lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens',
+    388: 'giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca',
+    389: 'barracouta, snoek',
+    390: 'eel',
+    391: 'coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch',
+    392: 'rock beauty, Holocanthus tricolor',
+    393: 'anemone fish',
+    394: 'sturgeon',
+    395: 'gar, garfish, garpike, billfish, Lepisosteus osseus',
+    396: 'lionfish',
+    397: 'puffer, pufferfish, blowfish, globefish',
+    398: 'abacus',
+    399: 'abaya',
+    400: "academic gown, academic robe, judge's robe",
+    401: 'accordion, piano accordion, squeeze box',
+    402: 'acoustic guitar',
+    403: 'aircraft carrier, carrier, flattop, attack aircraft carrier',
+    404: 'airliner',
+    405: 'airship, dirigible',
+    406: 'altar',
+    407: 'ambulance',
+    408: 'amphibian, amphibious vehicle',
+    409: 'analog clock',
+    410: 'apiary, bee house',
+    411: 'apron',
+    412: 'ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin',
+    413: 'assault rifle, assault gun',
+    414: 'backpack, back pack, knapsack, packsack, rucksack, haversack',
+    415: 'bakery, bakeshop, bakehouse',
+    416: 'balance beam, beam',
+    417: 'balloon',
+    418: 'ballpoint, ballpoint pen, ballpen, Biro',
+    419: 'Band Aid',
+    420: 'banjo',
+    421: 'bannister, banister, balustrade, balusters, handrail',
+    422: 'barbell',
+    423: 'barber chair',
+    424: 'barbershop',
+    425: 'barn',
+    426: 'barometer',
+    427: 'barrel, cask',
+    428: 'barrow, garden cart, lawn cart, wheelbarrow',
+    429: 'baseball',
+    430: 'basketball',
+    431: 'bassinet',
+    432: 'bassoon',
+    433: 'bathing cap, swimming cap',
+    434: 'bath towel',
+    435: 'bathtub, bathing tub, bath, tub',
+    436: 'beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon',
+    437: 'beacon, lighthouse, beacon light, pharos',
+    438: 'beaker',
+    439: 'bearskin, busby, shako',
+    440: 'beer bottle',
+    441: 'beer glass',
+    442: 'bell cote, bell cot',
+    443: 'bib',
+    444: 'bicycle-built-for-two, tandem bicycle, tandem',
+    445: 'bikini, two-piece',
+    446: 'binder, ring-binder',
+    447: 'binoculars, field glasses, opera glasses',
+    448: 'birdhouse',
+    449: 'boathouse',
+    450: 'bobsled, bobsleigh, bob',
+    451: 'bolo tie, bolo, bola tie, bola',
+    452: 'bonnet, poke bonnet',
+    453: 'bookcase',
+    454: 'bookshop, bookstore, bookstall',
+    455: 'bottlecap',
+    456: 'bow',
+    457: 'bow tie, bow-tie, bowtie',
+    458: 'brass, memorial tablet, plaque',
+    459: 'brassiere, bra, bandeau',
+    460: 'breakwater, groin, groyne, mole, bulwark, seawall, jetty',
+    461: 'breastplate, aegis, egis',
+    462: 'broom',
+    463: 'bucket, pail',
+    464: 'buckle',
+    465: 'bulletproof vest',
+    466: 'bullet train, bullet',
+    467: 'butcher shop, meat market',
+    468: 'cab, hack, taxi, taxicab',
+    469: 'caldron, cauldron',
+    470: 'candle, taper, wax light',
+    471: 'cannon',
+    472: 'canoe',
+    473: 'can opener, tin opener',
+    474: 'cardigan',
+    475: 'car mirror',
+    476: 'carousel, carrousel, merry-go-round, roundabout, whirligig',
+    477: "carpenter's kit, tool kit",
+    478: 'carton',
+    479: 'car wheel',
+    480: 'cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM',
+    481: 'cassette',
+    482: 'cassette player',
+    483: 'castle',
+    484: 'catamaran',
+    485: 'CD player',
+    486: 'cello, violoncello',
+    487: 'cellular telephone, cellular phone, cellphone, cell, mobile phone',
+    488: 'chain',
+    489: 'chainlink fence',
+    490: 'chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour',
+    491: 'chain saw, chainsaw',
+    492: 'chest',
+    493: 'chiffonier, commode',
+    494: 'chime, bell, gong',
+    495: 'china cabinet, china closet',
+    496: 'Christmas stocking',
+    497: 'church, church building',
+    498: 'cinema, movie theater, movie theatre, movie house, picture palace',
+    499: 'cleaver, meat cleaver, chopper',
+    500: 'cliff dwelling',
+    501: 'cloak',
+    502: 'clog, geta, patten, sabot',
+    503: 'cocktail shaker',
+    504: 'coffee mug',
+    505: 'coffeepot',
+    506: 'coil, spiral, volute, whorl, helix',
+    507: 'combination lock',
+    508: 'computer keyboard, keypad',
+    509: 'confectionery, confectionary, candy store',
+    510: 'container ship, containership, container vessel',
+    511: 'convertible',
+    512: 'corkscrew, bottle screw',
+    513: 'cornet, horn, trumpet, trump',
+    514: 'cowboy boot',
+    515: 'cowboy hat, ten-gallon hat',
+    516: 'cradle',
+    517: 'crane',
+    518: 'crash helmet',
+    519: 'crate',
+    520: 'crib, cot',
+    521: 'Crock Pot',
+    522: 'croquet ball',
+    523: 'crutch',
+    524: 'cuirass',
+    525: 'dam, dike, dyke',
+    526: 'desk',
+    527: 'desktop computer',
+    528: 'dial telephone, dial phone',
+    529: 'diaper, nappy, napkin',
+    530: 'digital clock',
+    531: 'digital watch',
+    532: 'dining table, board',
+    533: 'dishrag, dishcloth',
+    534: 'dishwasher, dish washer, dishwashing machine',
+    535: 'disk brake, disc brake',
+    536: 'dock, dockage, docking facility',
+    537: 'dogsled, dog sled, dog sleigh',
+    538: 'dome',
+    539: 'doormat, welcome mat',
+    540: 'drilling platform, offshore rig',
+    541: 'drum, membranophone, tympan',
+    542: 'drumstick',
+    543: 'dumbbell',
+    544: 'Dutch oven',
+    545: 'electric fan, blower',
+    546: 'electric guitar',
+    547: 'electric locomotive',
+    548: 'entertainment center',
+    549: 'envelope',
+    550: 'espresso maker',
+    551: 'face powder',
+    552: 'feather boa, boa',
+    553: 'file, file cabinet, filing cabinet',
+    554: 'fireboat',
+    555: 'fire engine, fire truck',
+    556: 'fire screen, fireguard',
+    557: 'flagpole, flagstaff',
+    558: 'flute, transverse flute',
+    559: 'folding chair',
+    560: 'football helmet',
+    561: 'forklift',
+    562: 'fountain',
+    563: 'fountain pen',
+    564: 'four-poster',
+    565: 'freight car',
+    566: 'French horn, horn',
+    567: 'frying pan, frypan, skillet',
+    568: 'fur coat',
+    569: 'garbage truck, dustcart',
+    570: 'gasmask, respirator, gas helmet',
+    571: 'gas pump, gasoline pump, petrol pump, island dispenser',
+    572: 'goblet',
+    573: 'go-kart',
+    574: 'golf ball',
+    575: 'golfcart, golf cart',
+    576: 'gondola',
+    577: 'gong, tam-tam',
+    578: 'gown',
+    579: 'grand piano, grand',
+    580: 'greenhouse, nursery, glasshouse',
+    581: 'grille, radiator grille',
+    582: 'grocery store, grocery, food market, market',
+    583: 'guillotine',
+    584: 'hair slide',
+    585: 'hair spray',
+    586: 'half track',
+    587: 'hammer',
+    588: 'hamper',
+    589: 'hand blower, blow dryer, blow drier, hair dryer, hair drier',
+    590: 'hand-held computer, hand-held microcomputer',
+    591: 'handkerchief, hankie, hanky, hankey',
+    592: 'hard disc, hard disk, fixed disk',
+    593: 'harmonica, mouth organ, harp, mouth harp',
+    594: 'harp',
+    595: 'harvester, reaper',
+    596: 'hatchet',
+    597: 'holster',
+    598: 'home theater, home theatre',
+    599: 'honeycomb',
+    600: 'hook, claw',
+    601: 'hoopskirt, crinoline',
+    602: 'horizontal bar, high bar',
+    603: 'horse cart, horse-cart',
+    604: 'hourglass',
+    605: 'iPod',
+    606: 'iron, smoothing iron',
+    607: "jack-o'-lantern",
+    608: 'jean, blue jean, denim',
+    609: 'jeep, landrover',
+    610: 'jersey, T-shirt, tee shirt',
+    611: 'jigsaw puzzle',
+    612: 'jinrikisha, ricksha, rickshaw',
+    613: 'joystick',
+    614: 'kimono',
+    615: 'knee pad',
+    616: 'knot',
+    617: 'lab coat, laboratory coat',
+    618: 'ladle',
+    619: 'lampshade, lamp shade',
+    620: 'laptop, laptop computer',
+    621: 'lawn mower, mower',
+    622: 'lens cap, lens cover',
+    623: 'letter opener, paper knife, paperknife',
+    624: 'library',
+    625: 'lifeboat',
+    626: 'lighter, light, igniter, ignitor',
+    627: 'limousine, limo',
+    628: 'liner, ocean liner',
+    629: 'lipstick, lip rouge',
+    630: 'Loafer',
+    631: 'lotion',
+    632: 'loudspeaker, speaker, speaker unit, loudspeaker system, speaker system',
+    633: "loupe, jeweler's loupe",
+    634: 'lumbermill, sawmill',
+    635: 'magnetic compass',
+    636: 'mailbag, postbag',
+    637: 'mailbox, letter box',
+    638: 'maillot',
+    639: 'maillot, tank suit',
+    640: 'manhole cover',
+    641: 'maraca',
+    642: 'marimba, xylophone',
+    643: 'mask',
+    644: 'matchstick',
+    645: 'maypole',
+    646: 'maze, labyrinth',
+    647: 'measuring cup',
+    648: 'medicine chest, medicine cabinet',
+    649: 'megalith, megalithic structure',
+    650: 'microphone, mike',
+    651: 'microwave, microwave oven',
+    652: 'military uniform',
+    653: 'milk can',
+    654: 'minibus',
+    655: 'miniskirt, mini',
+    656: 'minivan',
+    657: 'missile',
+    658: 'mitten',
+    659: 'mixing bowl',
+    660: 'mobile home, manufactured home',
+    661: 'Model T',
+    662: 'modem',
+    663: 'monastery',
+    664: 'monitor',
+    665: 'moped',
+    666: 'mortar',
+    667: 'mortarboard',
+    668: 'mosque',
+    669: 'mosquito net',
+    670: 'motor scooter, scooter',
+    671: 'mountain bike, all-terrain bike, off-roader',
+    672: 'mountain tent',
+    673: 'mouse, computer mouse',
+    674: 'mousetrap',
+    675: 'moving van',
+    676: 'muzzle',
+    677: 'nail',
+    678: 'neck brace',
+    679: 'necklace',
+    680: 'nipple',
+    681: 'notebook, notebook computer',
+    682: 'obelisk',
+    683: 'oboe, hautboy, hautbois',
+    684: 'ocarina, sweet potato',
+    685: 'odometer, hodometer, mileometer, milometer',
+    686: 'oil filter',
+    687: 'organ, pipe organ',
+    688: 'oscilloscope, scope, cathode-ray oscilloscope, CRO',
+    689: 'overskirt',
+    690: 'oxcart',
+    691: 'oxygen mask',
+    692: 'packet',
+    693: 'paddle, boat paddle',
+    694: 'paddlewheel, paddle wheel',
+    695: 'padlock',
+    696: 'paintbrush',
+    697: "pajama, pyjama, pj's, jammies",
+    698: 'palace',
+    699: 'panpipe, pandean pipe, syrinx',
+    700: 'paper towel',
+    701: 'parachute, chute',
+    702: 'parallel bars, bars',
+    703: 'park bench',
+    704: 'parking meter',
+    705: 'passenger car, coach, carriage',
+    706: 'patio, terrace',
+    707: 'pay-phone, pay-station',
+    708: 'pedestal, plinth, footstall',
+    709: 'pencil box, pencil case',
+    710: 'pencil sharpener',
+    711: 'perfume, essence',
+    712: 'Petri dish',
+    713: 'photocopier',
+    714: 'pick, plectrum, plectron',
+    715: 'pickelhaube',
+    716: 'picket fence, paling',
+    717: 'pickup, pickup truck',
+    718: 'pier',
+    719: 'piggy bank, penny bank',
+    720: 'pill bottle',
+    721: 'pillow',
+    722: 'ping-pong ball',
+    723: 'pinwheel',
+    724: 'pirate, pirate ship',
+    725: 'pitcher, ewer',
+    726: "plane, carpenter's plane, woodworking plane",
+    727: 'planetarium',
+    728: 'plastic bag',
+    729: 'plate rack',
+    730: 'plow, plough',
+    731: "plunger, plumber's helper",
+    732: 'Polaroid camera, Polaroid Land camera',
+    733: 'pole',
+    734: 'police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria',
+    735: 'poncho',
+    736: 'pool table, billiard table, snooker table',
+    737: 'pop bottle, soda bottle',
+    738: 'pot, flowerpot',
+    739: "potter's wheel",
+    740: 'power drill',
+    741: 'prayer rug, prayer mat',
+    742: 'printer',
+    743: 'prison, prison house',
+    744: 'projectile, missile',
+    745: 'projector',
+    746: 'puck, hockey puck',
+    747: 'punching bag, punch bag, punching ball, punchball',
+    748: 'purse',
+    749: 'quill, quill pen',
+    750: 'quilt, comforter, comfort, puff',
+    751: 'racer, race car, racing car',
+    752: 'racket, racquet',
+    753: 'radiator',
+    754: 'radio, wireless',
+    755: 'radio telescope, radio reflector',
+    756: 'rain barrel',
+    757: 'recreational vehicle, RV, R.V.',
+    758: 'reel',
+    759: 'reflex camera',
+    760: 'refrigerator, icebox',
+    761: 'remote control, remote',
+    762: 'restaurant, eating house, eating place, eatery',
+    763: 'revolver, six-gun, six-shooter',
+    764: 'rifle',
+    765: 'rocking chair, rocker',
+    766: 'rotisserie',
+    767: 'rubber eraser, rubber, pencil eraser',
+    768: 'rugby ball',
+    769: 'rule, ruler',
+    770: 'running shoe',
+    771: 'safe',
+    772: 'safety pin',
+    773: 'saltshaker, salt shaker',
+    774: 'sandal',
+    775: 'sarong',
+    776: 'sax, saxophone',
+    777: 'scabbard',
+    778: 'scale, weighing machine',
+    779: 'school bus',
+    780: 'schooner',
+    781: 'scoreboard',
+    782: 'screen, CRT screen',
+    783: 'screw',
+    784: 'screwdriver',
+    785: 'seat belt, seatbelt',
+    786: 'sewing machine',
+    787: 'shield, buckler',
+    788: 'shoe shop, shoe-shop, shoe store',
+    789: 'shoji',
+    790: 'shopping basket',
+    791: 'shopping cart',
+    792: 'shovel',
+    793: 'shower cap',
+    794: 'shower curtain',
+    795: 'ski',
+    796: 'ski mask',
+    797: 'sleeping bag',
+    798: 'slide rule, slipstick',
+    799: 'sliding door',
+    800: 'slot, one-armed bandit',
+    801: 'snorkel',
+    802: 'snowmobile',
+    803: 'snowplow, snowplough',
+    804: 'soap dispenser',
+    805: 'soccer ball',
+    806: 'sock',
+    807: 'solar dish, solar collector, solar furnace',
+    808: 'sombrero',
+    809: 'soup bowl',
+    810: 'space bar',
+    811: 'space heater',
+    812: 'space shuttle',
+    813: 'spatula',
+    814: 'speedboat',
+    815: "spider web, spider's web",
+    816: 'spindle',
+    817: 'sports car, sport car',
+    818: 'spotlight, spot',
+    819: 'stage',
+    820: 'steam locomotive',
+    821: 'steel arch bridge',
+    822: 'steel drum',
+    823: 'stethoscope',
+    824: 'stole',
+    825: 'stone wall',
+    826: 'stopwatch, stop watch',
+    827: 'stove',
+    828: 'strainer',
+    829: 'streetcar, tram, tramcar, trolley, trolley car',
+    830: 'stretcher',
+    831: 'studio couch, day bed',
+    832: 'stupa, tope',
+    833: 'submarine, pigboat, sub, U-boat',
+    834: 'suit, suit of clothes',
+    835: 'sundial',
+    836: 'sunglass',
+    837: 'sunglasses, dark glasses, shades',
+    838: 'sunscreen, sunblock, sun blocker',
+    839: 'suspension bridge',
+    840: 'swab, swob, mop',
+    841: 'sweatshirt',
+    842: 'swimming trunks, bathing trunks',
+    843: 'swing',
+    844: 'switch, electric switch, electrical switch',
+    845: 'syringe',
+    846: 'table lamp',
+    847: 'tank, army tank, armored combat vehicle, armoured combat vehicle',
+    848: 'tape player',
+    849: 'teapot',
+    850: 'teddy, teddy bear',
+    851: 'television, television system',
+    852: 'tennis ball',
+    853: 'thatch, thatched roof',
+    854: 'theater curtain, theatre curtain',
+    855: 'thimble',
+    856: 'thresher, thrasher, threshing machine',
+    857: 'throne',
+    858: 'tile roof',
+    859: 'toaster',
+    860: 'tobacco shop, tobacconist shop, tobacconist',
+    861: 'toilet seat',
+    862: 'torch',
+    863: 'totem pole',
+    864: 'tow truck, tow car, wrecker',
+    865: 'toyshop',
+    866: 'tractor',
+    867: 'trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi',
+    868: 'tray',
+    869: 'trench coat',
+    870: 'tricycle, trike, velocipede',
+    871: 'trimaran',
+    872: 'tripod',
+    873: 'triumphal arch',
+    874: 'trolleybus, trolley coach, trackless trolley',
+    875: 'trombone',
+    876: 'tub, vat',
+    877: 'turnstile',
+    878: 'typewriter keyboard',
+    879: 'umbrella',
+    880: 'unicycle, monocycle',
+    881: 'upright, upright piano',
+    882: 'vacuum, vacuum cleaner',
+    883: 'vase',
+    884: 'vault',
+    885: 'velvet',
+    886: 'vending machine',
+    887: 'vestment',
+    888: 'viaduct',
+    889: 'violin, fiddle',
+    890: 'volleyball',
+    891: 'waffle iron',
+    892: 'wall clock',
+    893: 'wallet, billfold, notecase, pocketbook',
+    894: 'wardrobe, closet, press',
+    895: 'warplane, military plane',
+    896: 'washbasin, handbasin, washbowl, lavabo, wash-hand basin',
+    897: 'washer, automatic washer, washing machine',
+    898: 'water bottle',
+    899: 'water jug',
+    900: 'water tower',
+    901: 'whiskey jug',
+    902: 'whistle',
+    903: 'wig',
+    904: 'window screen',
+    905: 'window shade',
+    906: 'Windsor tie',
+    907: 'wine bottle',
+    908: 'wing',
+    909: 'wok',
+    910: 'wooden spoon',
+    911: 'wool, woolen, woollen',
+    912: 'worm fence, snake fence, snake-rail fence, Virginia fence',
+    913: 'wreck',
+    914: 'yawl',
+    915: 'yurt',
+    916: 'web site, website, internet site, site',
+    917: 'comic book',
+    918: 'crossword puzzle, crossword',
+    919: 'street sign',
+    920: 'traffic light, traffic signal, stoplight',
+    921: 'book jacket, dust cover, dust jacket, dust wrapper',
+    922: 'menu',
+    923: 'plate',
+    924: 'guacamole',
+    925: 'consomme',
+    926: 'hot pot, hotpot',
+    927: 'trifle',
+    928: 'ice cream, icecream',
+    929: 'ice lolly, lolly, lollipop, popsicle',
+    930: 'French loaf',
+    931: 'bagel, beigel',
+    932: 'pretzel',
+    933: 'cheeseburger',
+    934: 'hotdog, hot dog, red hot',
+    935: 'mashed potato',
+    936: 'head cabbage',
+    937: 'broccoli',
+    938: 'cauliflower',
+    939: 'zucchini, courgette',
+    940: 'spaghetti squash',
+    941: 'acorn squash',
+    942: 'butternut squash',
+    943: 'cucumber, cuke',
+    944: 'artichoke, globe artichoke',
+    945: 'bell pepper',
+    946: 'cardoon',
+    947: 'mushroom',
+    948: 'Granny Smith',
+    949: 'strawberry',
+    950: 'orange',
+    951: 'lemon',
+    952: 'fig',
+    953: 'pineapple, ananas',
+    954: 'banana',
+    955: 'jackfruit, jak, jack',
+    956: 'custard apple',
+    957: 'pomegranate',
+    958: 'hay',
+    959: 'carbonara',
+    960: 'chocolate sauce, chocolate syrup',
+    961: 'dough',
+    962: 'meat loaf, meatloaf',
+    963: 'pizza, pizza pie',
+    964: 'potpie',
+    965: 'burrito',
+    966: 'red wine',
+    967: 'espresso',
+    968: 'cup',
+    969: 'eggnog',
+    970: 'alp',
+    971: 'bubble',
+    972: 'cliff, drop, drop-off',
+    973: 'coral reef',
+    974: 'geyser',
+    975: 'lakeside, lakeshore',
+    976: 'promontory, headland, head, foreland',
+    977: 'sandbar, sand bar',
+    978: 'seashore, coast, seacoast, sea-coast',
+    979: 'valley, vale',
+    980: 'volcano',
+    981: 'ballplayer, baseball player',
+    982: 'groom, bridegroom',
+    983: 'scuba diver',
+    984: 'rapeseed',
+    985: 'daisy',
+    986: "yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum",
+    987: 'corn',
+    988: 'acorn',
+    989: 'hip, rose hip, rosehip',
+    990: 'buckeye, horse chestnut, conker',
+    991: 'coral fungus',
+    992: 'agaric',
+    993: 'gyromitra',
+    994: 'stinkhorn, carrion fungus',
+    995: 'earthstar',
+    996: 'hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa',
+    997: 'bolete',
+    998: 'ear, spike, capitulum',
+    999: 'toilet tissue, toilet paper, bathroom tissue'
+}

ViT_DeiT/data/transforms.py ADDED Viewed

	@@ -0,0 +1,442 @@

+from __future__ import division
+import sys
+import random
+from PIL import Image
+try:
+    import accimage
+except ImportError:
+    accimage = None
+import numbers
+import collections
+from torchvision.transforms import functional as F
+if sys.version_info < (3, 3):
+    Sequence = collections.Sequence
+    Iterable = collections.Iterable
+else:
+    Sequence = collections.abc.Sequence
+    Iterable = collections.abc.Iterable
+_pil_interpolation_to_str = {
+    Image.NEAREST: 'PIL.Image.NEAREST',
+    Image.BILINEAR: 'PIL.Image.BILINEAR',
+    Image.BICUBIC: 'PIL.Image.BICUBIC',
+    Image.LANCZOS: 'PIL.Image.LANCZOS',
+    Image.HAMMING: 'PIL.Image.HAMMING',
+    Image.BOX: 'PIL.Image.BOX',
+}
+class Compose(object):
+    """Composes several transforms together.
+    Args:
+        transforms (list of ``Transform`` objects): list of transforms to compose.
+    Example:
+        >>> transforms.Compose([
+        >>>     transforms.CenterCrop(10),
+        >>>     transforms.ToTensor(),
+        >>> ])
+    """
+    def __init__(self, transforms):
+        self.transforms = transforms
+    def __call__(self, img, tgt):
+        for t in self.transforms:
+            img, tgt = t(img, tgt)
+        return img, tgt
+    def __repr__(self):
+        format_string = self.__class__.__name__ + '('
+        for t in self.transforms:
+            format_string += '\n'
+            format_string += '    {0}'.format(t)
+        format_string += '\n)'
+        return format_string
+class Resize(object):
+    """Resize the input PIL Image to the given size.
+    Args:
+        size (sequence or int): Desired output size. If size is a sequence like
+            (h, w), output size will be matched to this. If size is an int,
+            smaller edge of the image will be matched to this number.
+            i.e, if height > width, then image will be rescaled to
+            (size * height / width, size)
+        interpolation (int, optional): Desired interpolation. Default is
+            ``PIL.Image.BILINEAR``
+    """
+    def __init__(self, size, interpolation=Image.BILINEAR):
+        assert isinstance(size, int) or (isinstance(size, Iterable) and len(size) == 2)
+        self.size = size
+        self.interpolation = interpolation
+    def __call__(self, img, tgt):
+        """
+        Args:
+            img (PIL Image): Image to be scaled.
+        Returns:
+            PIL Image: Rescaled image.
+        """
+        return F.resize(img, self.size, self.interpolation), F.resize(tgt, self.size, Image.NEAREST)
+    def __repr__(self):
+        interpolate_str = _pil_interpolation_to_str[self.interpolation]
+        return self.__class__.__name__ + '(size={0}, interpolation={1})'.format(self.size, interpolate_str)
+class CenterCrop(object):
+    """Crops the given PIL Image at the center.
+    Args:
+        size (sequence or int): Desired output size of the crop. If size is an
+            int instead of sequence like (h, w), a square crop (size, size) is
+            made.
+    """
+    def __init__(self, size):
+        if isinstance(size, numbers.Number):
+            self.size = (int(size), int(size))
+        else:
+            self.size = size
+    def __call__(self, img, tgt):
+        """
+        Args:
+            img (PIL Image): Image to be cropped.
+        Returns:
+            PIL Image: Cropped image.
+        """
+        return F.center_crop(img, self.size), F.center_crop(tgt, self.size)
+    def __repr__(self):
+        return self.__class__.__name__ + '(size={0})'.format(self.size)
+class RandomCrop(object):
+    """Crop the given PIL Image at a random location.
+    Args:
+        size (sequence or int): Desired output size of the crop. If size is an
+            int instead of sequence like (h, w), a square crop (size, size) is
+            made.
+        padding (int or sequence, optional): Optional padding on each border
+            of the image. Default is None, i.e no padding. If a sequence of length
+            4 is provided, it is used to pad left, top, right, bottom borders
+            respectively. If a sequence of length 2 is provided, it is used to
+            pad left/right, top/bottom borders, respectively.
+        pad_if_needed (boolean): It will pad the image if smaller than the
+            desired size to avoid raising an exception.
+        fill: Pixel fill value for constant fill. Default is 0. If a tuple of
+            length 3, it is used to fill R, G, B channels respectively.
+            This value is only used when the padding_mode is constant
+        padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant.
+             - constant: pads with a constant value, this value is specified with fill
+             - edge: pads with the last value on the edge of the image
+             - reflect: pads with reflection of image (without repeating the last value on the edge)
+                padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
+                will result in [3, 2, 1, 2, 3, 4, 3, 2]
+             - symmetric: pads with reflection of image (repeating the last value on the edge)
+                padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
+                will result in [2, 1, 1, 2, 3, 4, 4, 3]
+    """
+    def __init__(self, size, padding=None, pad_if_needed=False, fill=0, padding_mode='constant'):
+        if isinstance(size, numbers.Number):
+            self.size = (int(size), int(size))
+        else:
+            self.size = size
+        self.padding = padding
+        self.pad_if_needed = pad_if_needed
+        self.fill = fill
+        self.padding_mode = padding_mode
+    @staticmethod
+    def get_params(img, output_size):
+        """Get parameters for ``crop`` for a random crop.
+        Args:
+            img (PIL Image): Image to be cropped.
+            output_size (tuple): Expected output size of the crop.
+        Returns:
+            tuple: params (i, j, h, w) to be passed to ``crop`` for random crop.
+        """
+        w, h = img.size
+        th, tw = output_size
+        if w == tw and h == th:
+            return 0, 0, h, w
+        i = random.randint(0, h - th)
+        j = random.randint(0, w - tw)
+        return i, j, th, tw
+    def __call__(self, img, tgt):
+        """
+        Args:
+            img (PIL Image): Image to be cropped.
+        Returns:
+            PIL Image: Cropped image.
+        """
+        if self.padding is not None:
+            img = F.pad(img, self.padding, self.fill, self.padding_mode)
+            tgt = F.pad(tgt, self.padding, self.fill, self.padding_mode)
+        # pad the width if needed
+        if self.pad_if_needed and img.size[0] < self.size[1]:
+            img = F.pad(img, (self.size[1] - img.size[0], 0), self.fill, self.padding_mode)
+            tgt = F.pad(tgt, (self.size[1] - img.size[0], 0), self.fill, self.padding_mode)
+        # pad the height if needed
+        if self.pad_if_needed and img.size[1] < self.size[0]:
+            img = F.pad(img, (0, self.size[0] - img.size[1]), self.fill, self.padding_mode)
+            tgt = F.pad(tgt, (0, self.size[0] - img.size[1]), self.fill, self.padding_mode)
+        i, j, h, w = self.get_params(img, self.size)
+        return F.crop(img, i, j, h, w), F.crop(tgt, i, j, h, w)
+    def __repr__(self):
+        return self.__class__.__name__ + '(size={0}, padding={1})'.format(self.size, self.padding)
+class RandomHorizontalFlip(object):
+    """Horizontally flip the given PIL Image randomly with a given probability.
+    Args:
+        p (float): probability of the image being flipped. Default value is 0.5
+    """
+    def __init__(self, p=0.5):
+        self.p = p
+    def __call__(self, img, tgt):
+        """
+        Args:
+            img (PIL Image): Image to be flipped.
+        Returns:
+            PIL Image: Randomly flipped image.
+        """
+        if random.random() < self.p:
+            return F.hflip(img), F.hflip(tgt)
+        return img, tgt
+    def __repr__(self):
+        return self.__class__.__name__ + '(p={})'.format(self.p)
+class RandomVerticalFlip(object):
+    """Vertically flip the given PIL Image randomly with a given probability.
+    Args:
+        p (float): probability of the image being flipped. Default value is 0.5
+    """
+    def __init__(self, p=0.5):
+        self.p = p
+    def __call__(self, img, tgt):
+        """
+        Args:
+            img (PIL Image): Image to be flipped.
+        Returns:
+            PIL Image: Randomly flipped image.
+        """
+        if random.random() < self.p:
+            return F.vflip(img), F.vflip(tgt)
+        return img, tgt
+    def __repr__(self):
+        return self.__class__.__name__ + '(p={})'.format(self.p)
+class Lambda(object):
+    """Apply a user-defined lambda as a transform.
+    Args:
+        lambd (function): Lambda/function to be used for transform.
+    """
+    def __init__(self, lambd):
+        assert callable(lambd), repr(type(lambd).__name__) + " object is not callable"
+        self.lambd = lambd
+    def __call__(self, img, tgt):
+        return self.lambd(img, tgt)
+    def __repr__(self):
+        return self.__class__.__name__ + '()'
+class ColorJitter(object):
+    """Randomly change the brightness, contrast and saturation of an image.
+    Args:
+        brightness (float or tuple of float (min, max)): How much to jitter brightness.
+            brightness_factor is chosen uniformly from [max(0, 1 - brightness), 1 + brightness]
+            or the given [min, max]. Should be non negative numbers.
+        contrast (float or tuple of float (min, max)): How much to jitter contrast.
+            contrast_factor is chosen uniformly from [max(0, 1 - contrast), 1 + contrast]
+            or the given [min, max]. Should be non negative numbers.
+        saturation (float or tuple of float (min, max)): How much to jitter saturation.
+            saturation_factor is chosen uniformly from [max(0, 1 - saturation), 1 + saturation]
+            or the given [min, max]. Should be non negative numbers.
+        hue (float or tuple of float (min, max)): How much to jitter hue.
+            hue_factor is chosen uniformly from [-hue, hue] or the given [min, max].
+            Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5.
+    """
+    def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
+        self.brightness = self._check_input(brightness, 'brightness')
+        self.contrast = self._check_input(contrast, 'contrast')
+        self.saturation = self._check_input(saturation, 'saturation')
+        self.hue = self._check_input(hue, 'hue', center=0, bound=(-0.5, 0.5),
+                                     clip_first_on_zero=False)
+    def _check_input(self, value, name, center=1, bound=(0, float('inf')), clip_first_on_zero=True):
+        if isinstance(value, numbers.Number):
+            if value < 0:
+                raise ValueError("If {} is a single number, it must be non negative.".format(name))
+            value = [center - value, center + value]
+            if clip_first_on_zero:
+                value[0] = max(value[0], 0)
+        elif isinstance(value, (tuple, list)) and len(value) == 2:
+            if not bound[0] <= value[0] <= value[1] <= bound[1]:
+                raise ValueError("{} values should be between {}".format(name, bound))
+        else:
+            raise TypeError("{} should be a single number or a list/tuple with lenght 2.".format(name))
+        # if value is 0 or (1., 1.) for brightness/contrast/saturation
+        # or (0., 0.) for hue, do nothing
+        if value[0] == value[1] == center:
+            value = None
+        return value
+    @staticmethod
+    def get_params(brightness, contrast, saturation, hue):
+        """Get a randomized transform to be applied on image.
+        Arguments are same as that of __init__.
+        Returns:
+            Transform which randomly adjusts brightness, contrast and
+            saturation in a random order.
+        """
+        transforms = []
+        if brightness is not None:
+            brightness_factor = random.uniform(brightness[0], brightness[1])
+            transforms.append(Lambda(lambda img, tgt: (F.adjust_brightness(img, brightness_factor), tgt)))
+        if contrast is not None:
+            contrast_factor = random.uniform(contrast[0], contrast[1])
+            transforms.append(Lambda(lambda img, tgt: (F.adjust_contrast(img, contrast_factor), tgt)))
+        if saturation is not None:
+            saturation_factor = random.uniform(saturation[0], saturation[1])
+            transforms.append(Lambda(lambda img, tgt: (F.adjust_saturation(img, saturation_factor), tgt)))
+        if hue is not None:
+            hue_factor = random.uniform(hue[0], hue[1])
+            transforms.append(Lambda(lambda img, tgt: (F.adjust_hue(img, hue_factor), tgt)))
+        random.shuffle(transforms)
+        transform = Compose(transforms)
+        return transform
+    def __call__(self, img, tgt):
+        """
+        Args:
+            img (PIL Image): Input image.
+        Returns:
+            PIL Image: Color jittered image.
+        """
+        transform = self.get_params(self.brightness, self.contrast,
+                                    self.saturation, self.hue)
+        return transform(img, tgt)
+    def __repr__(self):
+        format_string = self.__class__.__name__ + '('
+        format_string += 'brightness={0}'.format(self.brightness)
+        format_string += ', contrast={0}'.format(self.contrast)
+        format_string += ', saturation={0}'.format(self.saturation)
+        format_string += ', hue={0})'.format(self.hue)
+        return format_string
+class Normalize(object):
+    """Normalize a tensor image with mean and standard deviation.
+    Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, this transform
+    will normalize each channel of the input ``torch.*Tensor`` i.e.
+    ``input[channel] = (input[channel] - mean[channel]) / std[channel]``
+    .. note::
+        This transform acts out of place, i.e., it does not mutates the input tensor.
+    Args:
+        mean (sequence): Sequence of means for each channel.
+        std (sequence): Sequence of standard deviations for each channel.
+    """
+    def __init__(self, mean, std, inplace=False):
+        self.mean = mean
+        self.std = std
+        self.inplace = inplace
+    def __call__(self, img, tgt):
+        """
+        Args:
+            tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
+        Returns:
+            Tensor: Normalized Tensor image.
+        """
+        # return F.normalize(img, self.mean, self.std, self.inplace), tgt
+        return F.normalize(img, self.mean, self.std), tgt
+    def __repr__(self):
+        return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)
+class ToTensor(object):
+    """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
+    Converts a PIL Image or numpy.ndarray (H x W x C) in the range
+    [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]
+    if the PIL Image belongs to one of the modes (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1)
+    or if the numpy.ndarray has dtype = np.uint8
+    In the other cases, tensors are returned without scaling.
+    """
+    def __call__(self, img, tgt):
+        """
+        Args:
+            pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
+        Returns:
+            Tensor: Converted image.
+        """
+        return F.to_tensor(img), tgt
+    def __repr__(self):
+        return self.__class__.__name__ + '()'

ViT_DeiT/dataset/expl_hdf5.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import torch
+from torch.utils.data import Dataset
+import h5py
+import os
+class ImagenetResults(Dataset):
+    def __init__(self, path):
+        super(ImagenetResults, self).__init__()
+        self.path = os.path.join(path, 'results.hdf5')
+        self.data = None
+        print('Reading dataset length...')
+        with h5py.File(self.path , 'r') as f:
+        # tmp = h5py.File(self.path , 'r')
+            self.data_length = len(f['/image'])
+    def __len__(self):
+        return self.data_length
+    def __getitem__(self, item):
+        if self.data is None:
+            self.data = h5py.File(self.path, 'r')
+        image = torch.tensor(self.data['image'][item])
+        vis = torch.tensor(self.data['vis'][item])
+        target = torch.tensor(self.data['target'][item]).long()
+        return image, vis, target
+if __name__ == '__main__':
+    from utils import render
+    import imageio
+    import numpy as np
+    ds = ImagenetResults('../visualizations/fullgrad')
+    sample_loader = torch.utils.data.DataLoader(
+        ds,
+        batch_size=5,
+        shuffle=False)
+    iterator = iter(sample_loader)
+    image, vis, target = next(iterator)
+    maps = (render.hm_to_rgb(vis[0].data.cpu().numpy(), scaling=3, sigma=1, cmap='seismic') * 255).astype(np.uint8)
+    # imageio.imsave('../delete_hm.jpg', maps)
+    print(len(ds))

ViT_DeiT/modules/__init__.py ADDED Viewed

File without changes

ViT_DeiT/modules/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (165 Bytes). View file

ViT_DeiT/modules/__pycache__/layers_ours.cpython-38.pyc ADDED Viewed

Binary file (9.94 kB). View file

ViT_DeiT/modules/layers_lrp.py ADDED Viewed

	@@ -0,0 +1,261 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+__all__ = ['forward_hook', 'Clone', 'Add', 'Cat', 'ReLU', 'GELU', 'Dropout', 'BatchNorm2d', 'Linear', 'MaxPool2d',
+           'AdaptiveAvgPool2d', 'AvgPool2d', 'Conv2d', 'Sequential', 'safe_divide', 'einsum', 'Softmax', 'IndexSelect',
+           'LayerNorm', 'AddEye']
+def safe_divide(a, b):
+    den = b.clamp(min=1e-9) + b.clamp(max=1e-9)
+    den = den + den.eq(0).type(den.type()) * 1e-9
+    return a / den * b.ne(0).type(b.type())
+def forward_hook(self, input, output):
+    if type(input[0]) in (list, tuple):
+        self.X = []
+        for i in input[0]:
+            x = i.detach()
+            x.requires_grad = True
+            self.X.append(x)
+    else:
+        self.X = input[0].detach()
+        self.X.requires_grad = True
+    self.Y = output
+def backward_hook(self, grad_input, grad_output):
+    self.grad_input = grad_input
+    self.grad_output = grad_output
+class RelProp(nn.Module):
+    def __init__(self):
+        super(RelProp, self).__init__()
+        # if not self.training:
+        self.register_forward_hook(forward_hook)
+    def gradprop(self, Z, X, S):
+        C = torch.autograd.grad(Z, X, S, retain_graph=True)
+        return C
+    def relprop(self, R, alpha):
+        return R
+class RelPropSimple(RelProp):
+    def relprop(self, R, alpha):
+        Z = self.forward(self.X)
+        S = safe_divide(R, Z)
+        C = self.gradprop(Z, self.X, S)
+        if torch.is_tensor(self.X) == False:
+            outputs = []
+            outputs.append(self.X[0] * C[0])
+            outputs.append(self.X[1] * C[1])
+        else:
+            outputs = self.X * (C[0])
+        return outputs
+class AddEye(RelPropSimple):
+    # input of shape B, C, seq_len, seq_len
+    def forward(self, input):
+        return input + torch.eye(input.shape[2]).expand_as(input).to(input.device)
+class ReLU(nn.ReLU, RelProp):
+    pass
+class GELU(nn.GELU, RelProp):
+    pass
+class Softmax(nn.Softmax, RelProp):
+    pass
+class LayerNorm(nn.LayerNorm, RelProp):
+    pass
+class Dropout(nn.Dropout, RelProp):
+    pass
+class MaxPool2d(nn.MaxPool2d, RelPropSimple):
+    pass
+class LayerNorm(nn.LayerNorm, RelProp):
+    pass
+class AdaptiveAvgPool2d(nn.AdaptiveAvgPool2d, RelPropSimple):
+    pass
+class AvgPool2d(nn.AvgPool2d, RelPropSimple):
+    pass
+class Add(RelPropSimple):
+    def forward(self, inputs):
+        return torch.add(*inputs)
+class einsum(RelPropSimple):
+    def __init__(self, equation):
+        super().__init__()
+        self.equation = equation
+    def forward(self, *operands):
+        return torch.einsum(self.equation, *operands)
+class IndexSelect(RelProp):
+    def forward(self, inputs, dim, indices):
+        self.__setattr__('dim', dim)
+        self.__setattr__('indices', indices)
+        return torch.index_select(inputs, dim, indices)
+    def relprop(self, R, alpha):
+        Z = self.forward(self.X, self.dim, self.indices)
+        S = safe_divide(R, Z)
+        C = self.gradprop(Z, self.X, S)
+        if torch.is_tensor(self.X) == False:
+            outputs = []
+            outputs.append(self.X[0] * C[0])
+            outputs.append(self.X[1] * C[1])
+        else:
+            outputs = self.X * (C[0])
+        return outputs
+class Clone(RelProp):
+    def forward(self, input, num):
+        self.__setattr__('num', num)
+        outputs = []
+        for _ in range(num):
+            outputs.append(input)
+        return outputs
+    def relprop(self, R, alpha):
+        Z = []
+        for _ in range(self.num):
+            Z.append(self.X)
+        S = [safe_divide(r, z) for r, z in zip(R, Z)]
+        C = self.gradprop(Z, self.X, S)[0]
+        R = self.X * C
+        return R
+class Cat(RelProp):
+    def forward(self, inputs, dim):
+        self.__setattr__('dim', dim)
+        return torch.cat(inputs, dim)
+    def relprop(self, R, alpha):
+        Z = self.forward(self.X, self.dim)
+        S = safe_divide(R, Z)
+        C = self.gradprop(Z, self.X, S)
+        outputs = []
+        for x, c in zip(self.X, C):
+            outputs.append(x * c)
+        return outputs
+class Sequential(nn.Sequential):
+    def relprop(self, R, alpha):
+        for m in reversed(self._modules.values()):
+            R = m.relprop(R, alpha)
+        return R
+class BatchNorm2d(nn.BatchNorm2d, RelProp):
+    def relprop(self, R, alpha):
+        X = self.X
+        beta = 1 - alpha
+        weight = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3) / (
+            (self.running_var.unsqueeze(0).unsqueeze(2).unsqueeze(3).pow(2) + self.eps).pow(0.5))
+        Z = X * weight + 1e-9
+        S = R / Z
+        Ca = S * weight
+        R = self.X * (Ca)
+        return R
+class Linear(nn.Linear, RelProp):
+    def relprop(self, R, alpha):
+        beta = alpha - 1
+        pw = torch.clamp(self.weight, min=0)
+        nw = torch.clamp(self.weight, max=0)
+        px = torch.clamp(self.X, min=0)
+        nx = torch.clamp(self.X, max=0)
+        def f(w1, w2, x1, x2):
+            Z1 = F.linear(x1, w1)
+            Z2 = F.linear(x2, w2)
+            S1 = safe_divide(R, Z1)
+            S2 = safe_divide(R, Z2)
+            C1 = x1 * torch.autograd.grad(Z1, x1, S1)[0]
+            C2 = x2 * torch.autograd.grad(Z2, x2, S2)[0]
+            return C1 + C2
+        activator_relevances = f(pw, nw, px, nx)
+        inhibitor_relevances = f(nw, pw, px, nx)
+        R = alpha * activator_relevances - beta * inhibitor_relevances
+        return R
+class Conv2d(nn.Conv2d, RelProp):
+    def gradprop2(self, DY, weight):
+        Z = self.forward(self.X)
+        output_padding = self.X.size()[2] - (
+                (Z.size()[2] - 1) * self.stride[0] - 2 * self.padding[0] + self.kernel_size[0])
+        return F.conv_transpose2d(DY, weight, stride=self.stride, padding=self.padding, output_padding=output_padding)
+    def relprop(self, R, alpha):
+        if self.X.shape[1] == 3:
+            pw = torch.clamp(self.weight, min=0)
+            nw = torch.clamp(self.weight, max=0)
+            X = self.X
+            L = self.X * 0 + \
+                torch.min(torch.min(torch.min(self.X, dim=1, keepdim=True)[0], dim=2, keepdim=True)[0], dim=3,
+                          keepdim=True)[0]
+            H = self.X * 0 + \
+                torch.max(torch.max(torch.max(self.X, dim=1, keepdim=True)[0], dim=2, keepdim=True)[0], dim=3,
+                          keepdim=True)[0]
+            Za = torch.conv2d(X, self.weight, bias=None, stride=self.stride, padding=self.padding) - \
+                 torch.conv2d(L, pw, bias=None, stride=self.stride, padding=self.padding) - \
+                 torch.conv2d(H, nw, bias=None, stride=self.stride, padding=self.padding) + 1e-9
+            S = R / Za
+            C = X * self.gradprop2(S, self.weight) - L * self.gradprop2(S, pw) - H * self.gradprop2(S, nw)
+            R = C
+        else:
+            beta = alpha - 1
+            pw = torch.clamp(self.weight, min=0)
+            nw = torch.clamp(self.weight, max=0)
+            px = torch.clamp(self.X, min=0)
+            nx = torch.clamp(self.X, max=0)
+            def f(w1, w2, x1, x2):
+                Z1 = F.conv2d(x1, w1, bias=None, stride=self.stride, padding=self.padding)
+                Z2 = F.conv2d(x2, w2, bias=None, stride=self.stride, padding=self.padding)
+                S1 = safe_divide(R, Z1)
+                S2 = safe_divide(R, Z2)
+                C1 = x1 * self.gradprop(Z1, x1, S1)[0]
+                C2 = x2 * self.gradprop(Z2, x2, S2)[0]
+                return C1 + C2
+            activator_relevances = f(pw, nw, px, nx)
+            inhibitor_relevances = f(nw, pw, px, nx)
+            R = alpha * activator_relevances - beta * inhibitor_relevances
+        return R

ViT_DeiT/modules/layers_ours.py ADDED Viewed

	@@ -0,0 +1,280 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+__all__ = ['forward_hook', 'Clone', 'Add', 'Cat', 'ReLU', 'GELU', 'Dropout', 'BatchNorm2d', 'Linear', 'MaxPool2d',
+           'AdaptiveAvgPool2d', 'AvgPool2d', 'Conv2d', 'Sequential', 'safe_divide', 'einsum', 'Softmax', 'IndexSelect',
+           'LayerNorm', 'AddEye']
+def safe_divide(a, b):
+    den = b.clamp(min=1e-9) + b.clamp(max=1e-9)
+    den = den + den.eq(0).type(den.type()) * 1e-9
+    return a / den * b.ne(0).type(b.type())
+def forward_hook(self, input, output):
+    if type(input[0]) in (list, tuple):
+        self.X = []
+        for i in input[0]:
+            x = i.detach()
+            x.requires_grad = True
+            self.X.append(x)
+    else:
+        self.X = input[0].detach()
+        self.X.requires_grad = True
+    self.Y = output
+def backward_hook(self, grad_input, grad_output):
+    self.grad_input = grad_input
+    self.grad_output = grad_output
+class RelProp(nn.Module):
+    def __init__(self):
+        super(RelProp, self).__init__()
+        # if not self.training:
+        self.register_forward_hook(forward_hook)
+    def gradprop(self, Z, X, S):
+        C = torch.autograd.grad(Z, X, S, retain_graph=True)
+        return C
+    def relprop(self, R, alpha):
+        return R
+class RelPropSimple(RelProp):
+    def relprop(self, R, alpha):
+        Z = self.forward(self.X)
+        S = safe_divide(R, Z)
+        C = self.gradprop(Z, self.X, S)
+        if torch.is_tensor(self.X) == False:
+            outputs = []
+            outputs.append(self.X[0] * C[0])
+            outputs.append(self.X[1] * C[1])
+        else:
+            outputs = self.X * (C[0])
+        return outputs
+class AddEye(RelPropSimple):
+    # input of shape B, C, seq_len, seq_len
+    def forward(self, input):
+        return input + torch.eye(input.shape[2]).expand_as(input).to(input.device)
+class ReLU(nn.ReLU, RelProp):
+    pass
+class GELU(nn.GELU, RelProp):
+    pass
+class Softmax(nn.Softmax, RelProp):
+    pass
+class LayerNorm(nn.LayerNorm, RelProp):
+    pass
+class Dropout(nn.Dropout, RelProp):
+    pass
+class MaxPool2d(nn.MaxPool2d, RelPropSimple):
+    pass
+class LayerNorm(nn.LayerNorm, RelProp):
+    pass
+class AdaptiveAvgPool2d(nn.AdaptiveAvgPool2d, RelPropSimple):
+    pass
+class AvgPool2d(nn.AvgPool2d, RelPropSimple):
+    pass
+class Add(RelPropSimple):
+    def forward(self, inputs):
+        return torch.add(*inputs)
+    def relprop(self, R, alpha):
+        Z = self.forward(self.X)
+        S = safe_divide(R, Z)
+        C = self.gradprop(Z, self.X, S)
+        a = self.X[0] * C[0]
+        b = self.X[1] * C[1]
+        a_sum = a.sum()
+        b_sum = b.sum()
+        a_fact = safe_divide(a_sum.abs(), a_sum.abs() + b_sum.abs()) * R.sum()
+        b_fact = safe_divide(b_sum.abs(), a_sum.abs() + b_sum.abs()) * R.sum()
+        a = a * safe_divide(a_fact, a.sum())
+        b = b * safe_divide(b_fact, b.sum())
+        outputs = [a, b]
+        return outputs
+class einsum(RelPropSimple):
+    def __init__(self, equation):
+        super().__init__()
+        self.equation = equation
+    def forward(self, *operands):
+        return torch.einsum(self.equation, *operands)
+class IndexSelect(RelProp):
+    def forward(self, inputs, dim, indices):
+        self.__setattr__('dim', dim)
+        self.__setattr__('indices', indices)
+        return torch.index_select(inputs, dim, indices)
+    def relprop(self, R, alpha):
+        Z = self.forward(self.X, self.dim, self.indices)
+        S = safe_divide(R, Z)
+        C = self.gradprop(Z, self.X, S)
+        if torch.is_tensor(self.X) == False:
+            outputs = []
+            outputs.append(self.X[0] * C[0])
+            outputs.append(self.X[1] * C[1])
+        else:
+            outputs = self.X * (C[0])
+        return outputs
+class Clone(RelProp):
+    def forward(self, input, num):
+        self.__setattr__('num', num)
+        outputs = []
+        for _ in range(num):
+            outputs.append(input)
+        return outputs
+    def relprop(self, R, alpha):
+        Z = []
+        for _ in range(self.num):
+            Z.append(self.X)
+        S = [safe_divide(r, z) for r, z in zip(R, Z)]
+        C = self.gradprop(Z, self.X, S)[0]
+        R = self.X * C
+        return R
+class Cat(RelProp):
+    def forward(self, inputs, dim):
+        self.__setattr__('dim', dim)
+        return torch.cat(inputs, dim)
+    def relprop(self, R, alpha):
+        Z = self.forward(self.X, self.dim)
+        S = safe_divide(R, Z)
+        C = self.gradprop(Z, self.X, S)
+        outputs = []
+        for x, c in zip(self.X, C):
+            outputs.append(x * c)
+        return outputs
+class Sequential(nn.Sequential):
+    def relprop(self, R, alpha):
+        for m in reversed(self._modules.values()):
+            R = m.relprop(R, alpha)
+        return R
+class BatchNorm2d(nn.BatchNorm2d, RelProp):
+    def relprop(self, R, alpha):
+        X = self.X
+        beta = 1 - alpha
+        weight = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3) / (
+            (self.running_var.unsqueeze(0).unsqueeze(2).unsqueeze(3).pow(2) + self.eps).pow(0.5))
+        Z = X * weight + 1e-9
+        S = R / Z
+        Ca = S * weight
+        R = self.X * (Ca)
+        return R
+class Linear(nn.Linear, RelProp):
+    def relprop(self, R, alpha):
+        beta = alpha - 1
+        pw = torch.clamp(self.weight, min=0)
+        nw = torch.clamp(self.weight, max=0)
+        px = torch.clamp(self.X, min=0)
+        nx = torch.clamp(self.X, max=0)
+        def f(w1, w2, x1, x2):
+            Z1 = F.linear(x1, w1)
+            Z2 = F.linear(x2, w2)
+            S1 = safe_divide(R, Z1 + Z2)
+            S2 = safe_divide(R, Z1 + Z2)
+            C1 = x1 * torch.autograd.grad(Z1, x1, S1)[0]
+            C2 = x2 * torch.autograd.grad(Z2, x2, S2)[0]
+            return C1 + C2
+        activator_relevances = f(pw, nw, px, nx)
+        inhibitor_relevances = f(nw, pw, px, nx)
+        R = alpha * activator_relevances - beta * inhibitor_relevances
+        return R
+class Conv2d(nn.Conv2d, RelProp):
+    def gradprop2(self, DY, weight):
+        Z = self.forward(self.X)
+        output_padding = self.X.size()[2] - (
+                (Z.size()[2] - 1) * self.stride[0] - 2 * self.padding[0] + self.kernel_size[0])
+        return F.conv_transpose2d(DY, weight, stride=self.stride, padding=self.padding, output_padding=output_padding)
+    def relprop(self, R, alpha):
+        if self.X.shape[1] == 3:
+            pw = torch.clamp(self.weight, min=0)
+            nw = torch.clamp(self.weight, max=0)
+            X = self.X
+            L = self.X * 0 + \
+                torch.min(torch.min(torch.min(self.X, dim=1, keepdim=True)[0], dim=2, keepdim=True)[0], dim=3,
+                          keepdim=True)[0]
+            H = self.X * 0 + \
+                torch.max(torch.max(torch.max(self.X, dim=1, keepdim=True)[0], dim=2, keepdim=True)[0], dim=3,
+                          keepdim=True)[0]
+            Za = torch.conv2d(X, self.weight, bias=None, stride=self.stride, padding=self.padding) - \
+                 torch.conv2d(L, pw, bias=None, stride=self.stride, padding=self.padding) - \
+                 torch.conv2d(H, nw, bias=None, stride=self.stride, padding=self.padding) + 1e-9
+            S = R / Za
+            C = X * self.gradprop2(S, self.weight) - L * self.gradprop2(S, pw) - H * self.gradprop2(S, nw)
+            R = C
+        else:
+            beta = alpha - 1
+            pw = torch.clamp(self.weight, min=0)
+            nw = torch.clamp(self.weight, max=0)
+            px = torch.clamp(self.X, min=0)
+            nx = torch.clamp(self.X, max=0)
+            def f(w1, w2, x1, x2):
+                Z1 = F.conv2d(x1, w1, bias=None, stride=self.stride, padding=self.padding)
+                Z2 = F.conv2d(x2, w2, bias=None, stride=self.stride, padding=self.padding)
+                S1 = safe_divide(R, Z1)
+                S2 = safe_divide(R, Z2)
+                C1 = x1 * self.gradprop(Z1, x1, S1)[0]
+                C2 = x2 * self.gradprop(Z2, x2, S2)[0]
+                return C1 + C2
+            activator_relevances = f(pw, nw, px, nx)
+            inhibitor_relevances = f(nw, pw, px, nx)
+            R = alpha * activator_relevances - beta * inhibitor_relevances
+        return R

ViT_DeiT/requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+Pillow>=8.1.1
+einops == 0.3.0
+h5py == 2.8.0
+imageio == 2.9.0
+matplotlib == 3.3.2
+opencv_python
+scikit_image == 0.17.2
+scipy == 1.5.2
+sklearn
+torch == 1.7.0
+torchvision == 0.8.1
+tqdm == 4.51.0
+transformers == 3.5.1
+utils == 1.0.1
+Pygments>=2.7.4

ViT_DeiT/samples/CLS2IDX.py ADDED Viewed

	@@ -0,0 +1,1000 @@

+CLS2IDX = {0: 'tench, Tinca tinca',
+ 1: 'goldfish, Carassius auratus',
+ 2: 'great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias',
+ 3: 'tiger shark, Galeocerdo cuvieri',
+ 4: 'hammerhead, hammerhead shark',
+ 5: 'electric ray, crampfish, numbfish, torpedo',
+ 6: 'stingray',
+ 7: 'cock',
+ 8: 'hen',
+ 9: 'ostrich, Struthio camelus',
+ 10: 'brambling, Fringilla montifringilla',
+ 11: 'goldfinch, Carduelis carduelis',
+ 12: 'house finch, linnet, Carpodacus mexicanus',
+ 13: 'junco, snowbird',
+ 14: 'indigo bunting, indigo finch, indigo bird, Passerina cyanea',
+ 15: 'robin, American robin, Turdus migratorius',
+ 16: 'bulbul',
+ 17: 'jay',
+ 18: 'magpie',
+ 19: 'chickadee',
+ 20: 'water ouzel, dipper',
+ 21: 'kite',
+ 22: 'bald eagle, American eagle, Haliaeetus leucocephalus',
+ 23: 'vulture',
+ 24: 'great grey owl, great gray owl, Strix nebulosa',
+ 25: 'European fire salamander, Salamandra salamandra',
+ 26: 'common newt, Triturus vulgaris',
+ 27: 'eft',
+ 28: 'spotted salamander, Ambystoma maculatum',
+ 29: 'axolotl, mud puppy, Ambystoma mexicanum',
+ 30: 'bullfrog, Rana catesbeiana',
+ 31: 'tree frog, tree-frog',
+ 32: 'tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui',
+ 33: 'loggerhead, loggerhead turtle, Caretta caretta',
+ 34: 'leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea',
+ 35: 'mud turtle',
+ 36: 'terrapin',
+ 37: 'box turtle, box tortoise',
+ 38: 'banded gecko',
+ 39: 'common iguana, iguana, Iguana iguana',
+ 40: 'American chameleon, anole, Anolis carolinensis',
+ 41: 'whiptail, whiptail lizard',
+ 42: 'agama',
+ 43: 'frilled lizard, Chlamydosaurus kingi',
+ 44: 'alligator lizard',
+ 45: 'Gila monster, Heloderma suspectum',
+ 46: 'green lizard, Lacerta viridis',
+ 47: 'African chameleon, Chamaeleo chamaeleon',
+ 48: 'Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis',
+ 49: 'African crocodile, Nile crocodile, Crocodylus niloticus',
+ 50: 'American alligator, Alligator mississipiensis',
+ 51: 'triceratops',
+ 52: 'thunder snake, worm snake, Carphophis amoenus',
+ 53: 'ringneck snake, ring-necked snake, ring snake',
+ 54: 'hognose snake, puff adder, sand viper',
+ 55: 'green snake, grass snake',
+ 56: 'king snake, kingsnake',
+ 57: 'garter snake, grass snake',
+ 58: 'water snake',
+ 59: 'vine snake',
+ 60: 'night snake, Hypsiglena torquata',
+ 61: 'boa constrictor, Constrictor constrictor',
+ 62: 'rock python, rock snake, Python sebae',
+ 63: 'Indian cobra, Naja naja',
+ 64: 'green mamba',
+ 65: 'sea snake',
+ 66: 'horned viper, cerastes, sand viper, horned asp, Cerastes cornutus',
+ 67: 'diamondback, diamondback rattlesnake, Crotalus adamanteus',
+ 68: 'sidewinder, horned rattlesnake, Crotalus cerastes',
+ 69: 'trilobite',
+ 70: 'harvestman, daddy longlegs, Phalangium opilio',
+ 71: 'scorpion',
+ 72: 'black and gold garden spider, Argiope aurantia',
+ 73: 'barn spider, Araneus cavaticus',
+ 74: 'garden spider, Aranea diademata',
+ 75: 'black widow, Latrodectus mactans',
+ 76: 'tarantula',
+ 77: 'wolf spider, hunting spider',
+ 78: 'tick',
+ 79: 'centipede',
+ 80: 'black grouse',
+ 81: 'ptarmigan',
+ 82: 'ruffed grouse, partridge, Bonasa umbellus',
+ 83: 'prairie chicken, prairie grouse, prairie fowl',
+ 84: 'peacock',
+ 85: 'quail',
+ 86: 'partridge',
+ 87: 'African grey, African gray, Psittacus erithacus',
+ 88: 'macaw',
+ 89: 'sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita',
+ 90: 'lorikeet',
+ 91: 'coucal',
+ 92: 'bee eater',
+ 93: 'hornbill',
+ 94: 'hummingbird',
+ 95: 'jacamar',
+ 96: 'toucan',
+ 97: 'drake',
+ 98: 'red-breasted merganser, Mergus serrator',
+ 99: 'goose',
+ 100: 'black swan, Cygnus atratus',
+ 101: 'tusker',
+ 102: 'echidna, spiny anteater, anteater',
+ 103: 'platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus',
+ 104: 'wallaby, brush kangaroo',
+ 105: 'koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus',
+ 106: 'wombat',
+ 107: 'jellyfish',
+ 108: 'sea anemone, anemone',
+ 109: 'brain coral',
+ 110: 'flatworm, platyhelminth',
+ 111: 'nematode, nematode worm, roundworm',
+ 112: 'conch',
+ 113: 'snail',
+ 114: 'slug',
+ 115: 'sea slug, nudibranch',
+ 116: 'chiton, coat-of-mail shell, sea cradle, polyplacophore',
+ 117: 'chambered nautilus, pearly nautilus, nautilus',
+ 118: 'Dungeness crab, Cancer magister',
+ 119: 'rock crab, Cancer irroratus',
+ 120: 'fiddler crab',
+ 121: 'king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica',
+ 122: 'American lobster, Northern lobster, Maine lobster, Homarus americanus',
+ 123: 'spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish',
+ 124: 'crayfish, crawfish, crawdad, crawdaddy',
+ 125: 'hermit crab',
+ 126: 'isopod',
+ 127: 'white stork, Ciconia ciconia',
+ 128: 'black stork, Ciconia nigra',
+ 129: 'spoonbill',
+ 130: 'flamingo',
+ 131: 'little blue heron, Egretta caerulea',
+ 132: 'American egret, great white heron, Egretta albus',
+ 133: 'bittern',
+ 134: 'crane',
+ 135: 'limpkin, Aramus pictus',
+ 136: 'European gallinule, Porphyrio porphyrio',
+ 137: 'American coot, marsh hen, mud hen, water hen, Fulica americana',
+ 138: 'bustard',
+ 139: 'ruddy turnstone, Arenaria interpres',
+ 140: 'red-backed sandpiper, dunlin, Erolia alpina',
+ 141: 'redshank, Tringa totanus',
+ 142: 'dowitcher',
+ 143: 'oystercatcher, oyster catcher',
+ 144: 'pelican',
+ 145: 'king penguin, Aptenodytes patagonica',
+ 146: 'albatross, mollymawk',
+ 147: 'grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus',
+ 148: 'killer whale, killer, orca, grampus, sea wolf, Orcinus orca',
+ 149: 'dugong, Dugong dugon',
+ 150: 'sea lion',
+ 151: 'Chihuahua',
+ 152: 'Japanese spaniel',
+ 153: 'Maltese dog, Maltese terrier, Maltese',
+ 154: 'Pekinese, Pekingese, Peke',
+ 155: 'Shih-Tzu',
+ 156: 'Blenheim spaniel',
+ 157: 'papillon',
+ 158: 'toy terrier',
+ 159: 'Rhodesian ridgeback',
+ 160: 'Afghan hound, Afghan',
+ 161: 'basset, basset hound',
+ 162: 'beagle',
+ 163: 'bloodhound, sleuthhound',
+ 164: 'bluetick',
+ 165: 'black-and-tan coonhound',
+ 166: 'Walker hound, Walker foxhound',
+ 167: 'English foxhound',
+ 168: 'redbone',
+ 169: 'borzoi, Russian wolfhound',
+ 170: 'Irish wolfhound',
+ 171: 'Italian greyhound',
+ 172: 'whippet',
+ 173: 'Ibizan hound, Ibizan Podenco',
+ 174: 'Norwegian elkhound, elkhound',
+ 175: 'otterhound, otter hound',
+ 176: 'Saluki, gazelle hound',
+ 177: 'Scottish deerhound, deerhound',
+ 178: 'Weimaraner',
+ 179: 'Staffordshire bullterrier, Staffordshire bull terrier',
+ 180: 'American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier',
+ 181: 'Bedlington terrier',
+ 182: 'Border terrier',
+ 183: 'Kerry blue terrier',
+ 184: 'Irish terrier',
+ 185: 'Norfolk terrier',
+ 186: 'Norwich terrier',
+ 187: 'Yorkshire terrier',
+ 188: 'wire-haired fox terrier',
+ 189: 'Lakeland terrier',
+ 190: 'Sealyham terrier, Sealyham',
+ 191: 'Airedale, Airedale terrier',
+ 192: 'cairn, cairn terrier',
+ 193: 'Australian terrier',
+ 194: 'Dandie Dinmont, Dandie Dinmont terrier',
+ 195: 'Boston bull, Boston terrier',
+ 196: 'miniature schnauzer',
+ 197: 'giant schnauzer',
+ 198: 'standard schnauzer',
+ 199: 'Scotch terrier, Scottish terrier, Scottie',
+ 200: 'Tibetan terrier, chrysanthemum dog',
+ 201: 'silky terrier, Sydney silky',
+ 202: 'soft-coated wheaten terrier',
+ 203: 'West Highland white terrier',
+ 204: 'Lhasa, Lhasa apso',
+ 205: 'flat-coated retriever',
+ 206: 'curly-coated retriever',
+ 207: 'golden retriever',
+ 208: 'Labrador retriever',
+ 209: 'Chesapeake Bay retriever',
+ 210: 'German short-haired pointer',
+ 211: 'vizsla, Hungarian pointer',
+ 212: 'English setter',
+ 213: 'Irish setter, red setter',
+ 214: 'Gordon setter',
+ 215: 'Brittany spaniel',
+ 216: 'clumber, clumber spaniel',
+ 217: 'English springer, English springer spaniel',
+ 218: 'Welsh springer spaniel',
+ 219: 'cocker spaniel, English cocker spaniel, cocker',
+ 220: 'Sussex spaniel',
+ 221: 'Irish water spaniel',
+ 222: 'kuvasz',
+ 223: 'schipperke',
+ 224: 'groenendael',
+ 225: 'malinois',
+ 226: 'briard',
+ 227: 'kelpie',
+ 228: 'komondor',
+ 229: 'Old English sheepdog, bobtail',
+ 230: 'Shetland sheepdog, Shetland sheep dog, Shetland',
+ 231: 'collie',
+ 232: 'Border collie',
+ 233: 'Bouvier des Flandres, Bouviers des Flandres',
+ 234: 'Rottweiler',
+ 235: 'German shepherd, German shepherd dog, German police dog, alsatian',
+ 236: 'Doberman, Doberman pinscher',
+ 237: 'miniature pinscher',
+ 238: 'Greater Swiss Mountain dog',
+ 239: 'Bernese mountain dog',
+ 240: 'Appenzeller',
+ 241: 'EntleBucher',
+ 242: 'boxer',
+ 243: 'bull mastiff',
+ 244: 'Tibetan mastiff',
+ 245: 'French bulldog',
+ 246: 'Great Dane',
+ 247: 'Saint Bernard, St Bernard',
+ 248: 'Eskimo dog, husky',
+ 249: 'malamute, malemute, Alaskan malamute',
+ 250: 'Siberian husky',
+ 251: 'dalmatian, coach dog, carriage dog',
+ 252: 'affenpinscher, monkey pinscher, monkey dog',
+ 253: 'basenji',
+ 254: 'pug, pug-dog',
+ 255: 'Leonberg',
+ 256: 'Newfoundland, Newfoundland dog',
+ 257: 'Great Pyrenees',
+ 258: 'Samoyed, Samoyede',
+ 259: 'Pomeranian',
+ 260: 'chow, chow chow',
+ 261: 'keeshond',
+ 262: 'Brabancon griffon',
+ 263: 'Pembroke, Pembroke Welsh corgi',
+ 264: 'Cardigan, Cardigan Welsh corgi',
+ 265: 'toy poodle',
+ 266: 'miniature poodle',
+ 267: 'standard poodle',
+ 268: 'Mexican hairless',
+ 269: 'timber wolf, grey wolf, gray wolf, Canis lupus',
+ 270: 'white wolf, Arctic wolf, Canis lupus tundrarum',
+ 271: 'red wolf, maned wolf, Canis rufus, Canis niger',
+ 272: 'coyote, prairie wolf, brush wolf, Canis latrans',
+ 273: 'dingo, warrigal, warragal, Canis dingo',
+ 274: 'dhole, Cuon alpinus',
+ 275: 'African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus',
+ 276: 'hyena, hyaena',
+ 277: 'red fox, Vulpes vulpes',
+ 278: 'kit fox, Vulpes macrotis',
+ 279: 'Arctic fox, white fox, Alopex lagopus',
+ 280: 'grey fox, gray fox, Urocyon cinereoargenteus',
+ 281: 'tabby, tabby cat',
+ 282: 'tiger cat',
+ 283: 'Persian cat',
+ 284: 'Siamese cat, Siamese',
+ 285: 'Egyptian cat',
+ 286: 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor',
+ 287: 'lynx, catamount',
+ 288: 'leopard, Panthera pardus',
+ 289: 'snow leopard, ounce, Panthera uncia',
+ 290: 'jaguar, panther, Panthera onca, Felis onca',
+ 291: 'lion, king of beasts, Panthera leo',
+ 292: 'tiger, Panthera tigris',
+ 293: 'cheetah, chetah, Acinonyx jubatus',
+ 294: 'brown bear, bruin, Ursus arctos',
+ 295: 'American black bear, black bear, Ursus americanus, Euarctos americanus',
+ 296: 'ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus',
+ 297: 'sloth bear, Melursus ursinus, Ursus ursinus',
+ 298: 'mongoose',
+ 299: 'meerkat, mierkat',
+ 300: 'tiger beetle',
+ 301: 'ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle',
+ 302: 'ground beetle, carabid beetle',
+ 303: 'long-horned beetle, longicorn, longicorn beetle',
+ 304: 'leaf beetle, chrysomelid',
+ 305: 'dung beetle',
+ 306: 'rhinoceros beetle',
+ 307: 'weevil',
+ 308: 'fly',
+ 309: 'bee',
+ 310: 'ant, emmet, pismire',
+ 311: 'grasshopper, hopper',
+ 312: 'cricket',
+ 313: 'walking stick, walkingstick, stick insect',
+ 314: 'cockroach, roach',
+ 315: 'mantis, mantid',
+ 316: 'cicada, cicala',
+ 317: 'leafhopper',
+ 318: 'lacewing, lacewing fly',
+ 319: "dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk",
+ 320: 'damselfly',
+ 321: 'admiral',
+ 322: 'ringlet, ringlet butterfly',
+ 323: 'monarch, monarch butterfly, milkweed butterfly, Danaus plexippus',
+ 324: 'cabbage butterfly',
+ 325: 'sulphur butterfly, sulfur butterfly',
+ 326: 'lycaenid, lycaenid butterfly',
+ 327: 'starfish, sea star',
+ 328: 'sea urchin',
+ 329: 'sea cucumber, holothurian',
+ 330: 'wood rabbit, cottontail, cottontail rabbit',
+ 331: 'hare',
+ 332: 'Angora, Angora rabbit',
+ 333: 'hamster',
+ 334: 'porcupine, hedgehog',
+ 335: 'fox squirrel, eastern fox squirrel, Sciurus niger',
+ 336: 'marmot',
+ 337: 'beaver',
+ 338: 'guinea pig, Cavia cobaya',
+ 339: 'sorrel',
+ 340: 'zebra',
+ 341: 'hog, pig, grunter, squealer, Sus scrofa',
+ 342: 'wild boar, boar, Sus scrofa',
+ 343: 'warthog',
+ 344: 'hippopotamus, hippo, river horse, Hippopotamus amphibius',
+ 345: 'ox',
+ 346: 'water buffalo, water ox, Asiatic buffalo, Bubalus bubalis',
+ 347: 'bison',
+ 348: 'ram, tup',
+ 349: 'bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis',
+ 350: 'ibex, Capra ibex',
+ 351: 'hartebeest',
+ 352: 'impala, Aepyceros melampus',
+ 353: 'gazelle',
+ 354: 'Arabian camel, dromedary, Camelus dromedarius',
+ 355: 'llama',
+ 356: 'weasel',
+ 357: 'mink',
+ 358: 'polecat, fitch, foulmart, foumart, Mustela putorius',
+ 359: 'black-footed ferret, ferret, Mustela nigripes',
+ 360: 'otter',
+ 361: 'skunk, polecat, wood pussy',
+ 362: 'badger',
+ 363: 'armadillo',
+ 364: 'three-toed sloth, ai, Bradypus tridactylus',
+ 365: 'orangutan, orang, orangutang, Pongo pygmaeus',
+ 366: 'gorilla, Gorilla gorilla',
+ 367: 'chimpanzee, chimp, Pan troglodytes',
+ 368: 'gibbon, Hylobates lar',
+ 369: 'siamang, Hylobates syndactylus, Symphalangus syndactylus',
+ 370: 'guenon, guenon monkey',
+ 371: 'patas, hussar monkey, Erythrocebus patas',
+ 372: 'baboon',
+ 373: 'macaque',
+ 374: 'langur',
+ 375: 'colobus, colobus monkey',
+ 376: 'proboscis monkey, Nasalis larvatus',
+ 377: 'marmoset',
+ 378: 'capuchin, ringtail, Cebus capucinus',
+ 379: 'howler monkey, howler',
+ 380: 'titi, titi monkey',
+ 381: 'spider monkey, Ateles geoffroyi',
+ 382: 'squirrel monkey, Saimiri sciureus',
+ 383: 'Madagascar cat, ring-tailed lemur, Lemur catta',
+ 384: 'indri, indris, Indri indri, Indri brevicaudatus',
+ 385: 'Indian elephant, Elephas maximus',
+ 386: 'African elephant, Loxodonta africana',
+ 387: 'lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens',
+ 388: 'giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca',
+ 389: 'barracouta, snoek',
+ 390: 'eel',
+ 391: 'coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch',
+ 392: 'rock beauty, Holocanthus tricolor',
+ 393: 'anemone fish',
+ 394: 'sturgeon',
+ 395: 'gar, garfish, garpike, billfish, Lepisosteus osseus',
+ 396: 'lionfish',
+ 397: 'puffer, pufferfish, blowfish, globefish',
+ 398: 'abacus',
+ 399: 'abaya',
+ 400: "academic gown, academic robe, judge's robe",
+ 401: 'accordion, piano accordion, squeeze box',
+ 402: 'acoustic guitar',
+ 403: 'aircraft carrier, carrier, flattop, attack aircraft carrier',
+ 404: 'airliner',
+ 405: 'airship, dirigible',
+ 406: 'altar',
+ 407: 'ambulance',
+ 408: 'amphibian, amphibious vehicle',
+ 409: 'analog clock',
+ 410: 'apiary, bee house',
+ 411: 'apron',
+ 412: 'ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin',
+ 413: 'assault rifle, assault gun',
+ 414: 'backpack, back pack, knapsack, packsack, rucksack, haversack',
+ 415: 'bakery, bakeshop, bakehouse',
+ 416: 'balance beam, beam',
+ 417: 'balloon',
+ 418: 'ballpoint, ballpoint pen, ballpen, Biro',
+ 419: 'Band Aid',
+ 420: 'banjo',
+ 421: 'bannister, banister, balustrade, balusters, handrail',
+ 422: 'barbell',
+ 423: 'barber chair',
+ 424: 'barbershop',
+ 425: 'barn',
+ 426: 'barometer',
+ 427: 'barrel, cask',
+ 428: 'barrow, garden cart, lawn cart, wheelbarrow',
+ 429: 'baseball',
+ 430: 'basketball',
+ 431: 'bassinet',
+ 432: 'bassoon',
+ 433: 'bathing cap, swimming cap',
+ 434: 'bath towel',
+ 435: 'bathtub, bathing tub, bath, tub',
+ 436: 'beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon',
+ 437: 'beacon, lighthouse, beacon light, pharos',
+ 438: 'beaker',
+ 439: 'bearskin, busby, shako',
+ 440: 'beer bottle',
+ 441: 'beer glass',
+ 442: 'bell cote, bell cot',
+ 443: 'bib',
+ 444: 'bicycle-built-for-two, tandem bicycle, tandem',
+ 445: 'bikini, two-piece',
+ 446: 'binder, ring-binder',
+ 447: 'binoculars, field glasses, opera glasses',
+ 448: 'birdhouse',
+ 449: 'boathouse',
+ 450: 'bobsled, bobsleigh, bob',
+ 451: 'bolo tie, bolo, bola tie, bola',
+ 452: 'bonnet, poke bonnet',
+ 453: 'bookcase',
+ 454: 'bookshop, bookstore, bookstall',
+ 455: 'bottlecap',
+ 456: 'bow',
+ 457: 'bow tie, bow-tie, bowtie',
+ 458: 'brass, memorial tablet, plaque',
+ 459: 'brassiere, bra, bandeau',
+ 460: 'breakwater, groin, groyne, mole, bulwark, seawall, jetty',
+ 461: 'breastplate, aegis, egis',
+ 462: 'broom',
+ 463: 'bucket, pail',
+ 464: 'buckle',
+ 465: 'bulletproof vest',
+ 466: 'bullet train, bullet',
+ 467: 'butcher shop, meat market',
+ 468: 'cab, hack, taxi, taxicab',
+ 469: 'caldron, cauldron',
+ 470: 'candle, taper, wax light',
+ 471: 'cannon',
+ 472: 'canoe',
+ 473: 'can opener, tin opener',
+ 474: 'cardigan',
+ 475: 'car mirror',
+ 476: 'carousel, carrousel, merry-go-round, roundabout, whirligig',
+ 477: "carpenter's kit, tool kit",
+ 478: 'carton',
+ 479: 'car wheel',
+ 480: 'cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM',
+ 481: 'cassette',
+ 482: 'cassette player',
+ 483: 'castle',
+ 484: 'catamaran',
+ 485: 'CD player',
+ 486: 'cello, violoncello',
+ 487: 'cellular telephone, cellular phone, cellphone, cell, mobile phone',
+ 488: 'chain',
+ 489: 'chainlink fence',
+ 490: 'chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour',
+ 491: 'chain saw, chainsaw',
+ 492: 'chest',
+ 493: 'chiffonier, commode',
+ 494: 'chime, bell, gong',
+ 495: 'china cabinet, china closet',
+ 496: 'Christmas stocking',
+ 497: 'church, church building',
+ 498: 'cinema, movie theater, movie theatre, movie house, picture palace',
+ 499: 'cleaver, meat cleaver, chopper',
+ 500: 'cliff dwelling',
+ 501: 'cloak',
+ 502: 'clog, geta, patten, sabot',
+ 503: 'cocktail shaker',
+ 504: 'coffee mug',
+ 505: 'coffeepot',
+ 506: 'coil, spiral, volute, whorl, helix',
+ 507: 'combination lock',
+ 508: 'computer keyboard, keypad',
+ 509: 'confectionery, confectionary, candy store',
+ 510: 'container ship, containership, container vessel',
+ 511: 'convertible',
+ 512: 'corkscrew, bottle screw',
+ 513: 'cornet, horn, trumpet, trump',
+ 514: 'cowboy boot',
+ 515: 'cowboy hat, ten-gallon hat',
+ 516: 'cradle',
+ 517: 'crane',
+ 518: 'crash helmet',
+ 519: 'crate',
+ 520: 'crib, cot',
+ 521: 'Crock Pot',
+ 522: 'croquet ball',
+ 523: 'crutch',
+ 524: 'cuirass',
+ 525: 'dam, dike, dyke',
+ 526: 'desk',
+ 527: 'desktop computer',
+ 528: 'dial telephone, dial phone',
+ 529: 'diaper, nappy, napkin',
+ 530: 'digital clock',
+ 531: 'digital watch',
+ 532: 'dining table, board',
+ 533: 'dishrag, dishcloth',
+ 534: 'dishwasher, dish washer, dishwashing machine',
+ 535: 'disk brake, disc brake',
+ 536: 'dock, dockage, docking facility',
+ 537: 'dogsled, dog sled, dog sleigh',
+ 538: 'dome',
+ 539: 'doormat, welcome mat',
+ 540: 'drilling platform, offshore rig',
+ 541: 'drum, membranophone, tympan',
+ 542: 'drumstick',
+ 543: 'dumbbell',
+ 544: 'Dutch oven',
+ 545: 'electric fan, blower',
+ 546: 'electric guitar',
+ 547: 'electric locomotive',
+ 548: 'entertainment center',
+ 549: 'envelope',
+ 550: 'espresso maker',
+ 551: 'face powder',
+ 552: 'feather boa, boa',
+ 553: 'file, file cabinet, filing cabinet',
+ 554: 'fireboat',
+ 555: 'fire engine, fire truck',
+ 556: 'fire screen, fireguard',
+ 557: 'flagpole, flagstaff',
+ 558: 'flute, transverse flute',
+ 559: 'folding chair',
+ 560: 'football helmet',
+ 561: 'forklift',
+ 562: 'fountain',
+ 563: 'fountain pen',
+ 564: 'four-poster',
+ 565: 'freight car',
+ 566: 'French horn, horn',
+ 567: 'frying pan, frypan, skillet',
+ 568: 'fur coat',
+ 569: 'garbage truck, dustcart',
+ 570: 'gasmask, respirator, gas helmet',
+ 571: 'gas pump, gasoline pump, petrol pump, island dispenser',
+ 572: 'goblet',
+ 573: 'go-kart',
+ 574: 'golf ball',
+ 575: 'golfcart, golf cart',
+ 576: 'gondola',
+ 577: 'gong, tam-tam',
+ 578: 'gown',
+ 579: 'grand piano, grand',
+ 580: 'greenhouse, nursery, glasshouse',
+ 581: 'grille, radiator grille',
+ 582: 'grocery store, grocery, food market, market',
+ 583: 'guillotine',
+ 584: 'hair slide',
+ 585: 'hair spray',
+ 586: 'half track',
+ 587: 'hammer',
+ 588: 'hamper',
+ 589: 'hand blower, blow dryer, blow drier, hair dryer, hair drier',
+ 590: 'hand-held computer, hand-held microcomputer',
+ 591: 'handkerchief, hankie, hanky, hankey',
+ 592: 'hard disc, hard disk, fixed disk',
+ 593: 'harmonica, mouth organ, harp, mouth harp',
+ 594: 'harp',
+ 595: 'harvester, reaper',
+ 596: 'hatchet',
+ 597: 'holster',
+ 598: 'home theater, home theatre',
+ 599: 'honeycomb',
+ 600: 'hook, claw',
+ 601: 'hoopskirt, crinoline',
+ 602: 'horizontal bar, high bar',
+ 603: 'horse cart, horse-cart',
+ 604: 'hourglass',
+ 605: 'iPod',
+ 606: 'iron, smoothing iron',
+ 607: "jack-o'-lantern",
+ 608: 'jean, blue jean, denim',
+ 609: 'jeep, landrover',
+ 610: 'jersey, T-shirt, tee shirt',
+ 611: 'jigsaw puzzle',
+ 612: 'jinrikisha, ricksha, rickshaw',
+ 613: 'joystick',
+ 614: 'kimono',
+ 615: 'knee pad',
+ 616: 'knot',
+ 617: 'lab coat, laboratory coat',
+ 618: 'ladle',
+ 619: 'lampshade, lamp shade',
+ 620: 'laptop, laptop computer',
+ 621: 'lawn mower, mower',
+ 622: 'lens cap, lens cover',
+ 623: 'letter opener, paper knife, paperknife',
+ 624: 'library',
+ 625: 'lifeboat',
+ 626: 'lighter, light, igniter, ignitor',
+ 627: 'limousine, limo',
+ 628: 'liner, ocean liner',
+ 629: 'lipstick, lip rouge',
+ 630: 'Loafer',
+ 631: 'lotion',
+ 632: 'loudspeaker, speaker, speaker unit, loudspeaker system, speaker system',
+ 633: "loupe, jeweler's loupe",
+ 634: 'lumbermill, sawmill',
+ 635: 'magnetic compass',
+ 636: 'mailbag, postbag',
+ 637: 'mailbox, letter box',
+ 638: 'maillot',
+ 639: 'maillot, tank suit',
+ 640: 'manhole cover',
+ 641: 'maraca',
+ 642: 'marimba, xylophone',
+ 643: 'mask',
+ 644: 'matchstick',
+ 645: 'maypole',
+ 646: 'maze, labyrinth',
+ 647: 'measuring cup',
+ 648: 'medicine chest, medicine cabinet',
+ 649: 'megalith, megalithic structure',
+ 650: 'microphone, mike',
+ 651: 'microwave, microwave oven',
+ 652: 'military uniform',
+ 653: 'milk can',
+ 654: 'minibus',
+ 655: 'miniskirt, mini',
+ 656: 'minivan',
+ 657: 'missile',
+ 658: 'mitten',
+ 659: 'mixing bowl',
+ 660: 'mobile home, manufactured home',
+ 661: 'Model T',
+ 662: 'modem',
+ 663: 'monastery',
+ 664: 'monitor',
+ 665: 'moped',
+ 666: 'mortar',
+ 667: 'mortarboard',
+ 668: 'mosque',
+ 669: 'mosquito net',
+ 670: 'motor scooter, scooter',
+ 671: 'mountain bike, all-terrain bike, off-roader',
+ 672: 'mountain tent',
+ 673: 'mouse, computer mouse',
+ 674: 'mousetrap',
+ 675: 'moving van',
+ 676: 'muzzle',
+ 677: 'nail',
+ 678: 'neck brace',
+ 679: 'necklace',
+ 680: 'nipple',
+ 681: 'notebook, notebook computer',
+ 682: 'obelisk',
+ 683: 'oboe, hautboy, hautbois',
+ 684: 'ocarina, sweet potato',
+ 685: 'odometer, hodometer, mileometer, milometer',
+ 686: 'oil filter',
+ 687: 'organ, pipe organ',
+ 688: 'oscilloscope, scope, cathode-ray oscilloscope, CRO',
+ 689: 'overskirt',
+ 690: 'oxcart',
+ 691: 'oxygen mask',
+ 692: 'packet',
+ 693: 'paddle, boat paddle',
+ 694: 'paddlewheel, paddle wheel',
+ 695: 'padlock',
+ 696: 'paintbrush',
+ 697: "pajama, pyjama, pj's, jammies",
+ 698: 'palace',
+ 699: 'panpipe, pandean pipe, syrinx',
+ 700: 'paper towel',
+ 701: 'parachute, chute',
+ 702: 'parallel bars, bars',
+ 703: 'park bench',
+ 704: 'parking meter',
+ 705: 'passenger car, coach, carriage',
+ 706: 'patio, terrace',
+ 707: 'pay-phone, pay-station',
+ 708: 'pedestal, plinth, footstall',
+ 709: 'pencil box, pencil case',
+ 710: 'pencil sharpener',
+ 711: 'perfume, essence',
+ 712: 'Petri dish',
+ 713: 'photocopier',
+ 714: 'pick, plectrum, plectron',
+ 715: 'pickelhaube',
+ 716: 'picket fence, paling',
+ 717: 'pickup, pickup truck',
+ 718: 'pier',
+ 719: 'piggy bank, penny bank',
+ 720: 'pill bottle',
+ 721: 'pillow',
+ 722: 'ping-pong ball',
+ 723: 'pinwheel',
+ 724: 'pirate, pirate ship',
+ 725: 'pitcher, ewer',
+ 726: "plane, carpenter's plane, woodworking plane",
+ 727: 'planetarium',
+ 728: 'plastic bag',
+ 729: 'plate rack',
+ 730: 'plow, plough',
+ 731: "plunger, plumber's helper",
+ 732: 'Polaroid camera, Polaroid Land camera',
+ 733: 'pole',
+ 734: 'police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria',
+ 735: 'poncho',
+ 736: 'pool table, billiard table, snooker table',
+ 737: 'pop bottle, soda bottle',
+ 738: 'pot, flowerpot',
+ 739: "potter's wheel",
+ 740: 'power drill',
+ 741: 'prayer rug, prayer mat',
+ 742: 'printer',
+ 743: 'prison, prison house',
+ 744: 'projectile, missile',
+ 745: 'projector',
+ 746: 'puck, hockey puck',
+ 747: 'punching bag, punch bag, punching ball, punchball',
+ 748: 'purse',
+ 749: 'quill, quill pen',
+ 750: 'quilt, comforter, comfort, puff',
+ 751: 'racer, race car, racing car',
+ 752: 'racket, racquet',
+ 753: 'radiator',
+ 754: 'radio, wireless',
+ 755: 'radio telescope, radio reflector',
+ 756: 'rain barrel',
+ 757: 'recreational vehicle, RV, R.V.',
+ 758: 'reel',
+ 759: 'reflex camera',
+ 760: 'refrigerator, icebox',
+ 761: 'remote control, remote',
+ 762: 'restaurant, eating house, eating place, eatery',
+ 763: 'revolver, six-gun, six-shooter',
+ 764: 'rifle',
+ 765: 'rocking chair, rocker',
+ 766: 'rotisserie',
+ 767: 'rubber eraser, rubber, pencil eraser',
+ 768: 'rugby ball',
+ 769: 'rule, ruler',
+ 770: 'running shoe',
+ 771: 'safe',
+ 772: 'safety pin',
+ 773: 'saltshaker, salt shaker',
+ 774: 'sandal',
+ 775: 'sarong',
+ 776: 'sax, saxophone',
+ 777: 'scabbard',
+ 778: 'scale, weighing machine',
+ 779: 'school bus',
+ 780: 'schooner',
+ 781: 'scoreboard',
+ 782: 'screen, CRT screen',
+ 783: 'screw',
+ 784: 'screwdriver',
+ 785: 'seat belt, seatbelt',
+ 786: 'sewing machine',
+ 787: 'shield, buckler',
+ 788: 'shoe shop, shoe-shop, shoe store',
+ 789: 'shoji',
+ 790: 'shopping basket',
+ 791: 'shopping cart',
+ 792: 'shovel',
+ 793: 'shower cap',
+ 794: 'shower curtain',
+ 795: 'ski',
+ 796: 'ski mask',
+ 797: 'sleeping bag',
+ 798: 'slide rule, slipstick',
+ 799: 'sliding door',
+ 800: 'slot, one-armed bandit',
+ 801: 'snorkel',
+ 802: 'snowmobile',
+ 803: 'snowplow, snowplough',
+ 804: 'soap dispenser',
+ 805: 'soccer ball',
+ 806: 'sock',
+ 807: 'solar dish, solar collector, solar furnace',
+ 808: 'sombrero',
+ 809: 'soup bowl',
+ 810: 'space bar',
+ 811: 'space heater',
+ 812: 'space shuttle',
+ 813: 'spatula',
+ 814: 'speedboat',
+ 815: "spider web, spider's web",
+ 816: 'spindle',
+ 817: 'sports car, sport car',
+ 818: 'spotlight, spot',
+ 819: 'stage',
+ 820: 'steam locomotive',
+ 821: 'steel arch bridge',
+ 822: 'steel drum',
+ 823: 'stethoscope',
+ 824: 'stole',
+ 825: 'stone wall',
+ 826: 'stopwatch, stop watch',
+ 827: 'stove',
+ 828: 'strainer',
+ 829: 'streetcar, tram, tramcar, trolley, trolley car',
+ 830: 'stretcher',
+ 831: 'studio couch, day bed',
+ 832: 'stupa, tope',
+ 833: 'submarine, pigboat, sub, U-boat',
+ 834: 'suit, suit of clothes',
+ 835: 'sundial',
+ 836: 'sunglass',
+ 837: 'sunglasses, dark glasses, shades',
+ 838: 'sunscreen, sunblock, sun blocker',
+ 839: 'suspension bridge',
+ 840: 'swab, swob, mop',
+ 841: 'sweatshirt',
+ 842: 'swimming trunks, bathing trunks',
+ 843: 'swing',
+ 844: 'switch, electric switch, electrical switch',
+ 845: 'syringe',
+ 846: 'table lamp',
+ 847: 'tank, army tank, armored combat vehicle, armoured combat vehicle',
+ 848: 'tape player',
+ 849: 'teapot',
+ 850: 'teddy, teddy bear',
+ 851: 'television, television system',
+ 852: 'tennis ball',
+ 853: 'thatch, thatched roof',
+ 854: 'theater curtain, theatre curtain',
+ 855: 'thimble',
+ 856: 'thresher, thrasher, threshing machine',
+ 857: 'throne',
+ 858: 'tile roof',
+ 859: 'toaster',
+ 860: 'tobacco shop, tobacconist shop, tobacconist',
+ 861: 'toilet seat',
+ 862: 'torch',
+ 863: 'totem pole',
+ 864: 'tow truck, tow car, wrecker',
+ 865: 'toyshop',
+ 866: 'tractor',
+ 867: 'trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi',
+ 868: 'tray',
+ 869: 'trench coat',
+ 870: 'tricycle, trike, velocipede',
+ 871: 'trimaran',
+ 872: 'tripod',
+ 873: 'triumphal arch',
+ 874: 'trolleybus, trolley coach, trackless trolley',
+ 875: 'trombone',
+ 876: 'tub, vat',
+ 877: 'turnstile',
+ 878: 'typewriter keyboard',
+ 879: 'umbrella',
+ 880: 'unicycle, monocycle',
+ 881: 'upright, upright piano',
+ 882: 'vacuum, vacuum cleaner',
+ 883: 'vase',
+ 884: 'vault',
+ 885: 'velvet',
+ 886: 'vending machine',
+ 887: 'vestment',
+ 888: 'viaduct',
+ 889: 'violin, fiddle',
+ 890: 'volleyball',
+ 891: 'waffle iron',
+ 892: 'wall clock',
+ 893: 'wallet, billfold, notecase, pocketbook',
+ 894: 'wardrobe, closet, press',
+ 895: 'warplane, military plane',
+ 896: 'washbasin, handbasin, washbowl, lavabo, wash-hand basin',
+ 897: 'washer, automatic washer, washing machine',
+ 898: 'water bottle',
+ 899: 'water jug',
+ 900: 'water tower',
+ 901: 'whiskey jug',
+ 902: 'whistle',
+ 903: 'wig',
+ 904: 'window screen',
+ 905: 'window shade',
+ 906: 'Windsor tie',
+ 907: 'wine bottle',
+ 908: 'wing',
+ 909: 'wok',
+ 910: 'wooden spoon',
+ 911: 'wool, woolen, woollen',
+ 912: 'worm fence, snake fence, snake-rail fence, Virginia fence',
+ 913: 'wreck',
+ 914: 'yawl',
+ 915: 'yurt',
+ 916: 'web site, website, internet site, site',
+ 917: 'comic book',
+ 918: 'crossword puzzle, crossword',
+ 919: 'street sign',
+ 920: 'traffic light, traffic signal, stoplight',
+ 921: 'book jacket, dust cover, dust jacket, dust wrapper',
+ 922: 'menu',
+ 923: 'plate',
+ 924: 'guacamole',
+ 925: 'consomme',
+ 926: 'hot pot, hotpot',
+ 927: 'trifle',
+ 928: 'ice cream, icecream',
+ 929: 'ice lolly, lolly, lollipop, popsicle',
+ 930: 'French loaf',
+ 931: 'bagel, beigel',
+ 932: 'pretzel',
+ 933: 'cheeseburger',
+ 934: 'hotdog, hot dog, red hot',
+ 935: 'mashed potato',
+ 936: 'head cabbage',
+ 937: 'broccoli',
+ 938: 'cauliflower',
+ 939: 'zucchini, courgette',
+ 940: 'spaghetti squash',
+ 941: 'acorn squash',
+ 942: 'butternut squash',
+ 943: 'cucumber, cuke',
+ 944: 'artichoke, globe artichoke',
+ 945: 'bell pepper',
+ 946: 'cardoon',
+ 947: 'mushroom',
+ 948: 'Granny Smith',
+ 949: 'strawberry',
+ 950: 'orange',
+ 951: 'lemon',
+ 952: 'fig',
+ 953: 'pineapple, ananas',
+ 954: 'banana',
+ 955: 'jackfruit, jak, jack',
+ 956: 'custard apple',
+ 957: 'pomegranate',
+ 958: 'hay',
+ 959: 'carbonara',
+ 960: 'chocolate sauce, chocolate syrup',
+ 961: 'dough',
+ 962: 'meat loaf, meatloaf',
+ 963: 'pizza, pizza pie',
+ 964: 'potpie',
+ 965: 'burrito',
+ 966: 'red wine',
+ 967: 'espresso',
+ 968: 'cup',
+ 969: 'eggnog',
+ 970: 'alp',
+ 971: 'bubble',
+ 972: 'cliff, drop, drop-off',
+ 973: 'coral reef',
+ 974: 'geyser',
+ 975: 'lakeside, lakeshore',
+ 976: 'promontory, headland, head, foreland',
+ 977: 'sandbar, sand bar',
+ 978: 'seashore, coast, seacoast, sea-coast',
+ 979: 'valley, vale',
+ 980: 'volcano',
+ 981: 'ballplayer, baseball player',
+ 982: 'groom, bridegroom',
+ 983: 'scuba diver',
+ 984: 'rapeseed',
+ 985: 'daisy',
+ 986: "yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum",
+ 987: 'corn',
+ 988: 'acorn',
+ 989: 'hip, rose hip, rosehip',
+ 990: 'buckeye, horse chestnut, conker',
+ 991: 'coral fungus',
+ 992: 'agaric',
+ 993: 'gyromitra',
+ 994: 'stinkhorn, carrion fungus',
+ 995: 'earthstar',
+ 996: 'hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa',
+ 997: 'bolete',
+ 998: 'ear, spike, capitulum',
+ 999: 'toilet tissue, toilet paper, bathroom tissue'}

ViT_DeiT/samples/__pycache__/CLS2IDX.cpython-38.pyc ADDED Viewed

Binary file (33.4 kB). View file

ViT_DeiT/samples/catdog.png ADDED Viewed

ViT_DeiT/samples/dogbird.png ADDED Viewed

ViT_DeiT/samples/dogcat2.png ADDED Viewed

ViT_DeiT/samples/el1.png ADDED Viewed

ViT_DeiT/samples/el2.png ADDED Viewed

ViT_DeiT/samples/el3.png ADDED Viewed

ViT_DeiT/samples/el4.png ADDED Viewed

ViT_DeiT/samples/el5.png ADDED Viewed

ViT_DeiT/utils/__init__.py ADDED Viewed

File without changes

ViT_DeiT/utils/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (163 Bytes). View file

ViT_DeiT/utils/confusionmatrix.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import numpy as np
+import torch
+from . import metric
+class ConfusionMatrix(metric.Metric):
+    """Constructs a confusion matrix for a multi-class classification problems.
+    Does not support multi-label, multi-class problems.
+    Keyword arguments:
+    - num_classes (int): number of classes in the classification problem.
+    - normalized (boolean, optional): Determines whether or not the confusion
+    matrix is normalized or not. Default: False.
+    Modified from: https://github.com/pytorch/tnt/blob/master/torchnet/meter/confusionmeter.py
+    """
+    def __init__(self, num_classes, normalized=False):
+        super().__init__()
+        self.conf = np.ndarray((num_classes, num_classes), dtype=np.int32)
+        self.normalized = normalized
+        self.num_classes = num_classes
+        self.reset()
+    def reset(self):
+        self.conf.fill(0)
+    def add(self, predicted, target):
+        """Computes the confusion matrix
+        The shape of the confusion matrix is K x K, where K is the number
+        of classes.
+        Keyword arguments:
+        - predicted (Tensor or numpy.ndarray): Can be an N x K tensor/array of
+        predicted scores obtained from the model for N examples and K classes,
+        or an N-tensor/array of integer values between 0 and K-1.
+        - target (Tensor or numpy.ndarray): Can be an N x K tensor/array of
+        ground-truth classes for N examples and K classes, or an N-tensor/array
+        of integer values between 0 and K-1.
+        """
+        # If target and/or predicted are tensors, convert them to numpy arrays
+        if torch.is_tensor(predicted):
+            predicted = predicted.cpu().numpy()
+        if torch.is_tensor(target):
+            target = target.cpu().numpy()
+        assert predicted.shape[0] == target.shape[0], \
+            'number of targets and predicted outputs do not match'
+        if np.ndim(predicted) != 1:
+            assert predicted.shape[1] == self.num_classes, \
+                'number of predictions does not match size of confusion matrix'
+            predicted = np.argmax(predicted, 1)
+        else:
+            assert (predicted.max() < self.num_classes) and (predicted.min() >= 0), \
+                'predicted values are not between 0 and k-1'
+        if np.ndim(target) != 1:
+            assert target.shape[1] == self.num_classes, \
+                'Onehot target does not match size of confusion matrix'
+            assert (target >= 0).all() and (target <= 1).all(), \
+                'in one-hot encoding, target values should be 0 or 1'
+            assert (target.sum(1) == 1).all(), \
+                'multi-label setting is not supported'
+            target = np.argmax(target, 1)
+        else:
+            assert (target.max() < self.num_classes) and (target.min() >= 0), \
+                'target values are not between 0 and k-1'
+        # hack for bincounting 2 arrays together
+        x = predicted + self.num_classes * target
+        bincount_2d = np.bincount(
+            x.astype(np.int32), minlength=self.num_classes**2)
+        assert bincount_2d.size == self.num_classes**2
+        conf = bincount_2d.reshape((self.num_classes, self.num_classes))
+        self.conf += conf
+    def value(self):
+        """
+        Returns:
+            Confustion matrix of K rows and K columns, where rows corresponds
+            to ground-truth targets and columns corresponds to predicted
+            targets.
+        """
+        if self.normalized:
+            conf = self.conf.astype(np.float32)
+            return conf / conf.sum(1).clip(min=1e-12)[:, None]
+        else:
+            return self.conf

ViT_DeiT/utils/iou.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import torch
+import numpy as np
+from . import metric
+from .confusionmatrix import ConfusionMatrix
+class IoU(metric.Metric):
+    """Computes the intersection over union (IoU) per class and corresponding
+    mean (mIoU).
+    Intersection over union (IoU) is a common evaluation metric for semantic
+    segmentation. The predictions are first accumulated in a confusion matrix
+    and the IoU is computed from it as follows:
+        IoU = true_positive / (true_positive + false_positive + false_negative).
+    Keyword arguments:
+    - num_classes (int): number of classes in the classification problem
+    - normalized (boolean, optional): Determines whether or not the confusion
+    matrix is normalized or not. Default: False.
+    - ignore_index (int or iterable, optional): Index of the classes to ignore
+    when computing the IoU. Can be an int, or any iterable of ints.
+    """
+    def __init__(self, num_classes, normalized=False, ignore_index=None):
+        super().__init__()
+        self.conf_metric = ConfusionMatrix(num_classes, normalized)
+        if ignore_index is None:
+            self.ignore_index = None
+        elif isinstance(ignore_index, int):
+            self.ignore_index = (ignore_index,)
+        else:
+            try:
+                self.ignore_index = tuple(ignore_index)
+            except TypeError:
+                raise ValueError("'ignore_index' must be an int or iterable")
+    def reset(self):
+        self.conf_metric.reset()
+    def add(self, predicted, target):
+        """Adds the predicted and target pair to the IoU metric.
+        Keyword arguments:
+        - predicted (Tensor): Can be a (N, K, H, W) tensor of
+        predicted scores obtained from the model for N examples and K classes,
+        or (N, H, W) tensor of integer values between 0 and K-1.
+        - target (Tensor): Can be a (N, K, H, W) tensor of
+        target scores for N examples and K classes, or (N, H, W) tensor of
+        integer values between 0 and K-1.
+        """
+        # Dimensions check
+        assert predicted.size(0) == target.size(0), \
+            'number of targets and predicted outputs do not match'
+        assert predicted.dim() == 3 or predicted.dim() == 4, \
+            "predictions must be of dimension (N, H, W) or (N, K, H, W)"
+        assert target.dim() == 3 or target.dim() == 4, \
+            "targets must be of dimension (N, H, W) or (N, K, H, W)"
+        # If the tensor is in categorical format convert it to integer format
+        if predicted.dim() == 4:
+            _, predicted = predicted.max(1)
+        if target.dim() == 4:
+            _, target = target.max(1)
+        self.conf_metric.add(predicted.view(-1), target.view(-1))
+    def value(self):
+        """Computes the IoU and mean IoU.
+        The mean computation ignores NaN elements of the IoU array.
+        Returns:
+            Tuple: (IoU, mIoU). The first output is the per class IoU,
+            for K classes it's numpy.ndarray with K elements. The second output,
+            is the mean IoU.
+        """
+        conf_matrix = self.conf_metric.value()
+        if self.ignore_index is not None:
+            for index in self.ignore_index:
+                conf_matrix[:, self.ignore_index] = 0
+                conf_matrix[self.ignore_index, :] = 0
+        true_positive = np.diag(conf_matrix)
+        false_positive = np.sum(conf_matrix, 0) - true_positive
+        false_negative = np.sum(conf_matrix, 1) - true_positive
+        # Just in case we get a division by 0, ignore/hide the error
+        with np.errstate(divide='ignore', invalid='ignore'):
+            iou = true_positive / (true_positive + false_positive + false_negative)
+        return iou, np.nanmean(iou)

ViT_DeiT/utils/metric.py ADDED Viewed

	@@ -0,0 +1,12 @@

+class Metric(object):
+    """Base class for all metrics.
+    From: https://github.com/pytorch/tnt/blob/master/torchnet/meter/meter.py
+    """
+    def reset(self):
+        pass
+    def add(self):
+        pass
+    def value(self):
+        pass

ViT_DeiT/utils/metrices.py ADDED Viewed

	@@ -0,0 +1,208 @@

+import numpy as np
+import torch
+from sklearn.metrics import f1_score, average_precision_score
+from sklearn.metrics import precision_recall_curve, roc_curve
+SMOOTH = 1e-6
+__all__ = ['get_f1_scores', 'get_ap_scores', 'batch_pix_accuracy', 'batch_intersection_union', 'get_iou', 'get_pr',
+           'get_roc', 'get_ap_multiclass']
+def get_iou(outputs: torch.Tensor, labels: torch.Tensor):
+    # You can comment out this line if you are passing tensors of equal shape
+    # But if you are passing output from UNet or something it will most probably
+    # be with the BATCH x 1 x H x W shape
+    outputs = outputs.squeeze(1)  # BATCH x 1 x H x W => BATCH x H x W
+    labels = labels.squeeze(1)  # BATCH x 1 x H x W => BATCH x H x W
+    intersection = (outputs & labels).float().sum((1, 2))  # Will be zero if Truth=0 or Prediction=0
+    union = (outputs | labels).float().sum((1, 2))  # Will be zzero if both are 0
+    iou = (intersection + SMOOTH) / (union + SMOOTH)  # We smooth our devision to avoid 0/0
+    return iou.cpu().numpy()
+def get_f1_scores(predict, target, ignore_index=-1):
+    # Tensor process
+    batch_size = predict.shape[0]
+    predict = predict.data.cpu().numpy().reshape(-1)
+    target = target.data.cpu().numpy().reshape(-1)
+    pb = predict[target != ignore_index].reshape(batch_size, -1)
+    tb = target[target != ignore_index].reshape(batch_size, -1)
+    total = []
+    for p, t in zip(pb, tb):
+        total.append(np.nan_to_num(f1_score(t, p)))
+    return total
+def get_roc(predict, target, ignore_index=-1):
+    target_expand = target.unsqueeze(1).expand_as(predict)
+    target_expand_numpy = target_expand.data.cpu().numpy().reshape(-1)
+    # Tensor process
+    x = torch.zeros_like(target_expand)
+    t = target.unsqueeze(1).clamp(min=0)
+    target_1hot = x.scatter_(1, t, 1)
+    batch_size = predict.shape[0]
+    predict = predict.data.cpu().numpy().reshape(-1)
+    target = target_1hot.data.cpu().numpy().reshape(-1)
+    pb = predict[target_expand_numpy != ignore_index].reshape(batch_size, -1)
+    tb = target[target_expand_numpy != ignore_index].reshape(batch_size, -1)
+    total = []
+    for p, t in zip(pb, tb):
+        total.append(roc_curve(t, p))
+    return total
+def get_pr(predict, target, ignore_index=-1):
+    target_expand = target.unsqueeze(1).expand_as(predict)
+    target_expand_numpy = target_expand.data.cpu().numpy().reshape(-1)
+    # Tensor process
+    x = torch.zeros_like(target_expand)
+    t = target.unsqueeze(1).clamp(min=0)
+    target_1hot = x.scatter_(1, t, 1)
+    batch_size = predict.shape[0]
+    predict = predict.data.cpu().numpy().reshape(-1)
+    target = target_1hot.data.cpu().numpy().reshape(-1)
+    pb = predict[target_expand_numpy != ignore_index].reshape(batch_size, -1)
+    tb = target[target_expand_numpy != ignore_index].reshape(batch_size, -1)
+    total = []
+    for p, t in zip(pb, tb):
+        total.append(precision_recall_curve(t, p))
+    return total
+def get_ap_scores(predict, target, ignore_index=-1):
+    total = []
+    for pred, tgt in zip(predict, target):
+        target_expand = tgt.unsqueeze(0).expand_as(pred)
+        target_expand_numpy = target_expand.data.cpu().numpy().reshape(-1)
+        # Tensor process
+        x = torch.zeros_like(target_expand)
+        t = tgt.unsqueeze(0).clamp(min=0).long()
+        target_1hot = x.scatter_(0, t, 1)
+        predict_flat = pred.data.cpu().numpy().reshape(-1)
+        target_flat = target_1hot.data.cpu().numpy().reshape(-1)
+        p = predict_flat[target_expand_numpy != ignore_index]
+        t = target_flat[target_expand_numpy != ignore_index]
+        total.append(np.nan_to_num(average_precision_score(t, p)))
+    return total
+def get_ap_multiclass(predict, target):
+    total = []
+    for pred, tgt in zip(predict, target):
+        predict_flat = pred.data.cpu().numpy().reshape(-1)
+        target_flat = tgt.data.cpu().numpy().reshape(-1)
+        total.append(np.nan_to_num(average_precision_score(target_flat, predict_flat)))
+    return total
+def batch_precision_recall(predict, target, thr=0.5):
+    """Batch Precision Recall
+    Args:
+        predict: input 4D tensor
+        target: label 4D tensor
+    """
+    # _, predict = torch.max(predict, 1)
+    predict = predict > thr
+    predict = predict.data.cpu().numpy() + 1
+    target = target.data.cpu().numpy() + 1
+    tp = np.sum(((predict == 2) * (target == 2)) * (target > 0))
+    fp = np.sum(((predict == 2) * (target == 1)) * (target > 0))
+    fn = np.sum(((predict == 1) * (target == 2)) * (target > 0))
+    precision = float(np.nan_to_num(tp / (tp + fp)))
+    recall = float(np.nan_to_num(tp / (tp + fn)))
+    return precision, recall
+def batch_pix_accuracy(predict, target):
+    """Batch Pixel Accuracy
+    Args:
+        predict: input 3D tensor
+        target: label 3D tensor
+    """
+    # for thr in np.linspace(0, 1, slices):
+    _, predict = torch.max(predict, 0)
+    predict = predict.cpu().numpy() + 1
+    target = target.cpu().numpy() + 1
+    pixel_labeled = np.sum(target > 0)
+    pixel_correct = np.sum((predict == target) * (target > 0))
+    assert pixel_correct <= pixel_labeled, \
+        "Correct area should be smaller than Labeled"
+    return pixel_correct, pixel_labeled
+def batch_intersection_union(predict, target, nclass):
+    """Batch Intersection of Union
+    Args:
+        predict: input 3D tensor
+        target: label 3D tensor
+        nclass: number of categories (int)
+    """
+    _, predict = torch.max(predict, 0)
+    mini = 1
+    maxi = nclass
+    nbins = nclass
+    predict = predict.cpu().numpy() + 1
+    target = target.cpu().numpy() + 1
+    predict = predict * (target > 0).astype(predict.dtype)
+    intersection = predict * (predict == target)
+    # areas of intersection and union
+    area_inter, _ = np.histogram(intersection, bins=nbins, range=(mini, maxi))
+    area_pred, _ = np.histogram(predict, bins=nbins, range=(mini, maxi))
+    area_lab, _ = np.histogram(target, bins=nbins, range=(mini, maxi))
+    area_union = area_pred + area_lab - area_inter
+    assert (area_inter <= area_union).all(), \
+        "Intersection area should be smaller than Union area"
+    return area_inter, area_union
+# ref https://github.com/CSAILVision/sceneparsing/blob/master/evaluationCode/utils_eval.py
+def pixel_accuracy(im_pred, im_lab):
+    im_pred = np.asarray(im_pred)
+    im_lab = np.asarray(im_lab)
+    # Remove classes from unlabeled pixels in gt image.
+    # We should not penalize detections in unlabeled portions of the image.
+    pixel_labeled = np.sum(im_lab > 0)
+    pixel_correct = np.sum((im_pred == im_lab) * (im_lab > 0))
+    # pixel_accuracy = 1.0 * pixel_correct / pixel_labeled
+    return pixel_correct, pixel_labeled
+def intersection_and_union(im_pred, im_lab, num_class):
+    im_pred = np.asarray(im_pred)
+    im_lab = np.asarray(im_lab)
+    # Remove classes from unlabeled pixels in gt image.
+    im_pred = im_pred * (im_lab > 0)
+    # Compute area intersection:
+    intersection = im_pred * (im_pred == im_lab)
+    area_inter, _ = np.histogram(intersection, bins=num_class - 1,
+                                 range=(1, num_class - 1))
+    # Compute area union:
+    area_pred, _ = np.histogram(im_pred, bins=num_class - 1,
+                                range=(1, num_class - 1))
+    area_lab, _ = np.histogram(im_lab, bins=num_class - 1,
+                               range=(1, num_class - 1))
+    area_union = area_pred + area_lab - area_inter
+    return area_inter, area_union