Spaces:

AnnonSubmission
/

xai-cl

Sleeping

App Files Files Community

Annonymous commited on Nov 19, 2022

Commit

b157c29

•

1 Parent(s): 2c29c28

Upload 4 files

Browse files

Files changed (4) hide show

ssl_models/barlow_twins.py +77 -0
ssl_models/dino.py +184 -0
ssl_models/simclr2.py +214 -0
ssl_models/simsiam.py +91 -0

ssl_models/barlow_twins.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import torch
+import torch.nn as nn
+import torchvision
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+"""from https://github.com/facebookresearch/barlowtwins"""
+def off_diagonal(x):
+    # return a flattened view of the off-diagonal elements of a square matrix
+    n, m = x.shape
+    assert n == m
+    return x.flatten()[:-1].view(n - 1, n + 1)[:, 1:].flatten()
+class BarlowTwins(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.backbone = torchvision.models.resnet50(zero_init_residual=True)
+        self.backbone.fc = nn.Identity()
+        # projector
+        sizes = [2048] + list(map(int, '8192-8192-8192'.split('-')))
+        layers = []
+        for i in range(len(sizes) - 2):
+            layers.append(nn.Linear(sizes[i], sizes[i + 1], bias=False))
+            layers.append(nn.BatchNorm1d(sizes[i + 1]))
+            layers.append(nn.ReLU(inplace=True))
+        layers.append(nn.Linear(sizes[-2], sizes[-1], bias=False))
+        self.projector = nn.Sequential(*layers)
+        # normalization layer for the representations z1 and z2
+        self.bn = nn.BatchNorm1d(sizes[-1], affine=False)
+    def forward(self, y1, y2):
+        z1 = self.projector(self.backbone(y1))
+        z2 = self.projector(self.backbone(y2))
+        # empirical cross-correlation matrix
+        c = self.bn(z1).T @ self.bn(z2)
+        on_diag = torch.diagonal(c).add_(-1).pow_(2).sum()
+        off_diag = off_diagonal(c).pow_(2).sum()
+        loss = on_diag + 0.0051 * off_diag
+        return loss
+class ResNet(nn.Module):
+    def __init__(self, backbone):
+        super().__init__()
+        modules = list(backbone.children())[:-2]
+        self.net = nn.Sequential(*modules)
+    def forward(self, x):
+        return self.net(x).mean(dim=[2, 3])
+class RestructuredBarlowTwins(nn.Module):
+    def __init__(self, model):
+        super().__init__()
+        self.encoder = ResNet(model.backbone)
+        self.contrastive_head = model.projector
+    def forward(self, x):
+        x = self.encoder(x)
+        x = self.contrastive_head(x)
+        return x
+def get_barlow_twins_model(ckpt_path = 'barlow_twins.pth'):
+    model = BarlowTwins()
+    state_dict = torch.load('pretrained_models/barlow_models/' + ckpt_path, map_location='cpu')
+    state_dict = state_dict['model']
+    state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
+    model.load_state_dict(state_dict)
+    restructured_model = RestructuredBarlowTwins(model)
+    return restructured_model.to(device)

ssl_models/dino.py ADDED Viewed

	@@ -0,0 +1,184 @@

+import torch
+import torch.nn as nn
+import torchvision
+import torch.nn.functional as F
+import numpy as np
+import pathlib
+temp = pathlib.PosixPath
+pathlib.PosixPath = pathlib.WindowsPath
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+""" from https://github.com/facebookresearch/dino"""
+class DINOHead(nn.Module):
+    def __init__(self, in_dim, out_dim, use_bn, norm_last_layer, nlayers, hidden_dim, bottleneck_dim):
+        super().__init__()
+        nlayers = max(nlayers, 1)
+        if nlayers == 1:
+            self.mlp = nn.Linear(in_dim, bottleneck_dim)
+        else:
+            layers = [nn.Linear(in_dim, hidden_dim)]
+            if use_bn:
+                layers.append(nn.BatchNorm1d(hidden_dim))
+            layers.append(nn.GELU())
+            for _ in range(nlayers - 2):
+                layers.append(nn.Linear(hidden_dim, hidden_dim))
+                if use_bn:
+                    layers.append(nn.BatchNorm1d(hidden_dim))
+                layers.append(nn.GELU())
+            layers.append(nn.Linear(hidden_dim, bottleneck_dim))
+            self.mlp = nn.Sequential(*layers)
+        self.last_layer = nn.utils.weight_norm(nn.Linear(bottleneck_dim, out_dim, bias=False))
+        self.last_layer.weight_g.data.fill_(1)
+        if norm_last_layer:
+            self.last_layer.weight_g.requires_grad = False
+    def forward(self, x):
+        x = self.mlp(x)
+        x = F.normalize(x, dim=-1, p=2)
+        x = self.last_layer(x)
+        return x
+class MultiCropWrapper(nn.Module):
+    def __init__(self, backbone, head):
+        super(MultiCropWrapper, self).__init__()
+        backbone.fc, backbone.head = nn.Identity(), nn.Identity()
+        self.backbone = backbone
+        self.head = head
+    def forward(self, x):
+        return self.head(self.backbone(x))
+class DINOLoss(nn.Module):
+    def __init__(self, out_dim, warmup_teacher_temp, teacher_temp, warmup_teacher_temp_epochs, nepochs,
+                 student_temp=0.1, center_momentum=0.9):
+        super().__init__()
+        self.student_temp = student_temp
+        self.center_momentum = center_momentum
+        self.register_buffer("center", torch.zeros(1, out_dim))
+        self.nepochs = nepochs
+        self.teacher_temp_schedule = np.concatenate((np.linspace(warmup_teacher_temp, teacher_temp, warmup_teacher_temp_epochs),
+                                                     np.ones(nepochs - warmup_teacher_temp_epochs) * teacher_temp))
+    def forward(self, student_output, teacher_output):
+        student_out = student_output / self.student_temp
+        temp = self.teacher_temp_schedule[self.nepochs - 1]    # last one
+        teacher_out = F.softmax((teacher_output - self.center) / temp, dim=-1)
+        teacher_out = teacher_out.detach()
+        loss = torch.sum(-teacher_out * F.log_softmax(student_out, dim=-1), dim=-1).mean()
+        return loss
+class ResNet(nn.Module):
+    def __init__(self, backbone):
+        super().__init__()
+        modules = list(backbone.children())[:-2]
+        self.net = nn.Sequential(*modules)
+    def forward(self, x):
+        return self.net(x).mean(dim=[2, 3])
+class RestructuredDINO(nn.Module):
+    def __init__(self, student, teacher):
+        super().__init__()
+        self.encoder_student = ResNet(student.backbone)
+        self.encoder = ResNet(teacher.backbone)
+        self.contrastive_head_student = student.head
+        self.contrastive_head = teacher.head
+    def forward(self, x, run_teacher):
+        if run_teacher:
+            x = self.encoder(x)
+            x = self.contrastive_head(x)
+        else:
+            x = self.encoder_student(x)
+            x = self.contrastive_head_student(x)
+        return x
+def get_dino_model_without_loss(ckpt_path = 'dino_resnet50_pretrain_full_checkpoint.pth'):
+    state_dict = torch.load('pretrained_models/dino_models/' + ckpt_path, map_location='cpu')
+    state_dict_student = state_dict['student']
+    state_dict_teacher = state_dict['teacher']
+    state_dict_student = {k.replace("module.", ""): v for k, v in state_dict_student.items()}
+    state_dict_teacher = {k.replace("module.", ""): v for k, v in state_dict_teacher.items()}
+    student_backbone = torchvision.models.resnet50()
+    teacher_backbone = torchvision.models.resnet50()
+    embed_dim = student_backbone.fc.weight.shape[1]
+    student_head = DINOHead(in_dim = embed_dim, out_dim = 60000, use_bn=True, norm_last_layer=True, nlayers=2, hidden_dim=4096, bottleneck_dim=256)
+    teacher_head = DINOHead(in_dim = embed_dim, out_dim = 60000, use_bn =True, norm_last_layer=True, nlayers=2, hidden_dim=4096, bottleneck_dim=256)
+    student_head.last_layer = nn.Linear(256, 60000, bias = False)
+    teacher_head.last_layer = nn.Linear(256, 60000, bias = False)
+    student = MultiCropWrapper(student_backbone, student_head)
+    teacher = MultiCropWrapper(teacher_backbone, teacher_head)
+    student.load_state_dict(state_dict_student)
+    teacher.load_state_dict(state_dict_teacher)
+    restructured_model = RestructuredDINO(student, teacher)
+    return restructured_model.to(device)
+def get_dino_model_with_loss(ckpt_path = 'dino_rn50_checkpoint.pth'):
+    state_dict = torch.load('pretrained_models/dino_models/' + ckpt_path, map_location='cpu')
+    state_dict_student = state_dict['student']
+    state_dict_teacher = state_dict['teacher']
+    state_dict_args = vars(state_dict['args'])
+    state_dic_dino_loss = state_dict['dino_loss']
+    state_dict_student = {k.replace("module.", ""): v for k, v in state_dict_student.items()}
+    state_dict_teacher = {k.replace("module.", ""): v for k, v in state_dict_teacher.items()}
+    student_backbone = torchvision.models.resnet50()
+    teacher_backbone = torchvision.models.resnet50()
+    embed_dim = student_backbone.fc.weight.shape[1]
+    student_head = DINOHead(in_dim = embed_dim,
+                            out_dim = state_dict_args['out_dim'],
+                            use_bn = state_dict_args['use_bn_in_head'],
+                            norm_last_layer = state_dict_args['norm_last_layer'],
+                            nlayers = 3,
+                            hidden_dim = 2048,
+                            bottleneck_dim = 256)
+    teacher_head = DINOHead(in_dim = embed_dim,
+                            out_dim = state_dict_args['out_dim'],
+                            use_bn = state_dict_args['use_bn_in_head'],
+                            norm_last_layer = state_dict_args['norm_last_layer'],
+                            nlayers = 3,
+                            hidden_dim = 2048,
+                            bottleneck_dim = 256)
+    loss = DINOLoss(out_dim = state_dict_args['out_dim'],
+                    warmup_teacher_temp = state_dict_args['warmup_teacher_temp'],
+                    teacher_temp = state_dict_args['teacher_temp'],
+                    warmup_teacher_temp_epochs = state_dict_args['warmup_teacher_temp_epochs'],
+                    nepochs = state_dict_args['epochs'])
+    student = MultiCropWrapper(student_backbone, student_head)
+    teacher = MultiCropWrapper(teacher_backbone, teacher_head)
+    student.load_state_dict(state_dict_student)
+    teacher.load_state_dict(state_dict_teacher)
+    loss.load_state_dict(state_dic_dino_loss)
+    restructured_model = RestructuredDINO(student, teacher)
+    return restructured_model.to(device), loss.to(device)

ssl_models/simclr2.py ADDED Viewed

	@@ -0,0 +1,214 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+"""
+from https://github.com/Separius/SimCLRv2-Pytorch
+"""
+BATCH_NORM_EPSILON = 1e-5
+BATCH_NORM_DECAY = 0.9  # == pytorch's default value as well
+class BatchNormRelu(nn.Sequential):
+    def __init__(self, num_channels, relu=True):
+        super().__init__(nn.BatchNorm2d(num_channels, eps=BATCH_NORM_EPSILON),
+                         nn.ReLU() if relu else nn.Identity())
+def conv(in_channels, out_channels, kernel_size=3, stride=1, bias=False):
+    return nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size,
+                     stride=stride, padding=(kernel_size - 1) // 2, bias=bias)
+class SelectiveKernel(nn.Module):
+    def __init__(self, in_channels, out_channels, stride, sk_ratio, min_dim=32):
+        super().__init__()
+        assert sk_ratio > 0.0
+        self.main_conv = nn.Sequential(conv(in_channels, 2 * out_channels, stride=stride),
+                                       BatchNormRelu(2 * out_channels))
+        mid_dim = max(int(out_channels * sk_ratio), min_dim)
+        self.mixing_conv = nn.Sequential(conv(out_channels, mid_dim, kernel_size=1),
+                                         BatchNormRelu(mid_dim),
+                                         conv(mid_dim, 2 * out_channels, kernel_size=1))
+    def forward(self, x):
+        x = self.main_conv(x)
+        x = torch.stack(torch.chunk(x, 2, dim=1), dim=0)  # 2, B, C, H, W
+        g = x.sum(dim=0).mean(dim=[2, 3], keepdim=True)
+        m = self.mixing_conv(g)
+        m = torch.stack(torch.chunk(m, 2, dim=1), dim=0)  # 2, B, C, 1, 1
+        return (x * F.softmax(m, dim=0)).sum(dim=0)
+class Projection(nn.Module):
+    def __init__(self, in_channels, out_channels, stride, sk_ratio=0):
+        super().__init__()
+        if sk_ratio > 0:
+            self.shortcut = nn.Sequential(nn.ZeroPad2d((0, 1, 0, 1)),
+                                          nn.AvgPool2d(kernel_size=2, stride=stride, padding=0),
+                                          conv(in_channels, out_channels, kernel_size=1))
+        else:
+            self.shortcut = conv(in_channels, out_channels, kernel_size=1, stride=stride)
+        self.bn = BatchNormRelu(out_channels, relu=False)
+    def forward(self, x):
+        return self.bn(self.shortcut(x))
+class BottleneckBlock(nn.Module):
+    expansion = 4
+    def __init__(self, in_channels, out_channels, stride, sk_ratio=0, use_projection=False):
+        super().__init__()
+        if use_projection:
+            self.projection = Projection(in_channels, out_channels * 4, stride, sk_ratio)
+        else:
+            self.projection = nn.Identity()
+        ops = [conv(in_channels, out_channels, kernel_size=1), BatchNormRelu(out_channels)]
+        if sk_ratio > 0:
+            ops.append(SelectiveKernel(out_channels, out_channels, stride, sk_ratio))
+        else:
+            ops.append(conv(out_channels, out_channels, stride=stride))
+            ops.append(BatchNormRelu(out_channels))
+        ops.append(conv(out_channels, out_channels * 4, kernel_size=1))
+        ops.append(BatchNormRelu(out_channels * 4, relu=False))
+        self.net = nn.Sequential(*ops)
+    def forward(self, x):
+        shortcut = self.projection(x)
+        return F.relu(shortcut + self.net(x))
+class Blocks(nn.Module):
+    def __init__(self, num_blocks, in_channels, out_channels, stride, sk_ratio=0):
+        super().__init__()
+        self.blocks = nn.ModuleList([BottleneckBlock(in_channels, out_channels, stride, sk_ratio, True)])
+        self.channels_out = out_channels * BottleneckBlock.expansion
+        for _ in range(num_blocks - 1):
+            self.blocks.append(BottleneckBlock(self.channels_out, out_channels, 1, sk_ratio))
+    def forward(self, x):
+        for b in self.blocks:
+            x = b(x)
+        return x
+class Stem(nn.Sequential):
+    def __init__(self, sk_ratio, width_multiplier):
+        ops = []
+        channels = 64 * width_multiplier // 2
+        if sk_ratio > 0:
+            ops.append(conv(3, channels, stride=2))
+            ops.append(BatchNormRelu(channels))
+            ops.append(conv(channels, channels))
+            ops.append(BatchNormRelu(channels))
+            ops.append(conv(channels, channels * 2))
+        else:
+            ops.append(conv(3, channels * 2, kernel_size=7, stride=2))
+        ops.append(BatchNormRelu(channels * 2))
+        ops.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
+        super().__init__(*ops)
+class ResNet(nn.Module):
+    def __init__(self, layers, width_multiplier, sk_ratio):
+        super().__init__()
+        ops = [Stem(sk_ratio, width_multiplier)]
+        channels_in = 64 * width_multiplier
+        ops.append(Blocks(layers[0], channels_in, 64 * width_multiplier, 1, sk_ratio))
+        channels_in = ops[-1].channels_out
+        ops.append(Blocks(layers[1], channels_in, 128 * width_multiplier, 2, sk_ratio))
+        channels_in = ops[-1].channels_out
+        ops.append(Blocks(layers[2], channels_in, 256 * width_multiplier, 2, sk_ratio))
+        channels_in = ops[-1].channels_out
+        ops.append(Blocks(layers[3], channels_in, 512 * width_multiplier, 2, sk_ratio))
+        channels_in = ops[-1].channels_out
+        self.channels_out = channels_in
+        self.net = nn.Sequential(*ops)
+        self.fc = nn.Linear(channels_in, 1000)
+    def forward(self, x, apply_fc=False):
+        h = self.net(x).mean(dim=[2, 3])
+        if apply_fc:
+            h = self.fc(h)
+        return h
+class ContrastiveHead(nn.Module):
+    def __init__(self, channels_in, out_dim=128, num_layers=3):
+        super().__init__()
+        self.layers = nn.ModuleList()
+        for i in range(num_layers):
+            if i != num_layers - 1:
+                dim, relu = channels_in, True
+            else:
+                dim, relu = out_dim, False
+            self.layers.append(nn.Linear(channels_in, dim, bias=False))
+            bn = nn.BatchNorm1d(dim, eps=BATCH_NORM_EPSILON, affine=True)
+            if i == num_layers - 1:
+                nn.init.zeros_(bn.bias)
+            self.layers.append(bn)
+            if relu:
+                self.layers.append(nn.ReLU())
+    def forward(self, x):
+        for b in self.layers:
+            x = b(x)
+        return x
+def get_resnet(depth=50, width_multiplier=1, sk_ratio=0):  # sk_ratio=0.0625 is recommended
+    layers = {50: [3, 4, 6, 3], 101: [3, 4, 23, 3], 152: [3, 8, 36, 3], 200: [3, 24, 36, 3]}[depth]
+    resnet = ResNet(layers, width_multiplier, sk_ratio)
+    return resnet, ContrastiveHead(resnet.channels_out)
+def name_to_params(checkpoint):
+    sk_ratio = 0.0625 if '_sk1' in checkpoint else 0
+    if 'r50_' in checkpoint:
+        depth = 50
+    elif 'r101_' in checkpoint:
+        depth = 101
+    elif 'r152_' in checkpoint:
+        depth = 152
+    else:
+        raise NotImplementedError
+    if '_1x_' in checkpoint:
+        width = 1
+    elif '_2x_' in checkpoint:
+        width = 2
+    elif '_3x_' in checkpoint:
+        width = 3
+    else:
+        raise NotImplementedError
+    return depth, width, sk_ratio
+class SimCLRv2(nn.Module):
+    def __init__(self, model, head):
+        super(SimCLRv2, self).__init__()
+        self.encoder = model
+        self.contrastive_head = head
+    def forward(self, x):
+        x = self.encoder(x)
+        x = self.contrastive_head(x)
+        return x
+def get_simclr2_model(ckpt_path):
+    depth, width, sk_ratio = name_to_params(ckpt_path)
+    model, head = get_resnet(depth, width, sk_ratio)
+    checkpoint = torch.load('pretrained_models/simclr2_models/' + ckpt_path)
+    model.load_state_dict(checkpoint['resnet'])
+    head.load_state_dict(checkpoint['head'])
+    del model.fc
+    simclr2 = SimCLRv2(model, head)
+    return simclr2.to(device)

ssl_models/simsiam.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import torch
+import torch.nn as nn
+import torchvision
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+"""from https://github.com/facebookresearch/simsiam"""
+class SimSiam(nn.Module):
+    def __init__(self, base_encoder, dim, pred_dim):
+        """
+        dim: feature dimension (default: 2048)
+        pred_dim: hidden dimension of the predictor (default: 512)
+        symetric is True only when training
+        """
+        super(SimSiam, self).__init__()
+        # create the encoder
+        # num_classes is the output fc dimension, zero-initialize last BNs
+        self.encoder = base_encoder(num_classes=dim, zero_init_residual=True)
+        # build a 3-layer projector
+        prev_dim = self.encoder.fc.weight.shape[1]
+        self.encoder.fc = nn.Sequential(nn.Linear(prev_dim, prev_dim, bias=False),
+                                        nn.BatchNorm1d(prev_dim),
+                                        nn.ReLU(inplace=True), # first layer
+                                        nn.Linear(prev_dim, prev_dim, bias=False),
+                                        nn.BatchNorm1d(prev_dim),
+                                        nn.ReLU(inplace=True), # second layer
+                                        self.encoder.fc,
+                                        nn.BatchNorm1d(dim, affine=False)) # output layer
+        self.encoder.fc[6].bias.requires_grad = False # hack: not use bias as it is followed by BN
+        # build a 2-layer predictor
+        self.predictor = nn.Sequential(nn.Linear(dim, pred_dim, bias=False),
+                                        nn.BatchNorm1d(pred_dim),
+                                        nn.ReLU(inplace=True), # hidden layer
+                                        nn.Linear(pred_dim, dim)) # output layer
+    def forward(self, x1, x2):
+        z1 = self.encoder(x1).detach() # NxC
+        z2 = self.encoder(x2).detach() # NxC
+        p1 = self.predictor(z1) # NxC
+        p2 = self.predictor(z2) # NxC
+        loss = -(nn.CosineSimilarity(dim=1)(p1, z2).mean() + nn.CosineSimilarity(dim=1)(p2, z1).mean()) * 0.5
+        return loss
+class ResNet(nn.Module):
+    def __init__(self, backbone):
+        super().__init__()
+        modules = list(backbone.children())[:-2]
+        self.net = nn.Sequential(*modules)
+    def forward(self, x):
+        return self.net(x).mean(dim=[2, 3])
+class RestructuredSimSiam(nn.Module):
+    def __init__(self, model):
+        super().__init__()
+        self.encoder = ResNet(model.encoder)
+        self.mlp_encoder = model.encoder.fc
+        self.mlp_encoder[6].bias.requires_grad = False
+        self.contrastive_head = model.predictor
+    def forward(self, x, run_head = True):
+        x = self.mlp_encoder(self.encoder(x))   # don't detach since we will do backprop for explainability
+        if run_head:
+            x = self.contrastive_head(x)
+        return x
+def get_simsiam(ckpt_path = 'checkpoint_0099.pth.tar'):
+    model = SimSiam(base_encoder = torchvision.models.resnet50,
+                    dim = 2048,
+                    pred_dim = 512)
+    checkpoint = torch.load('pretrained_models/simsiam_models/'+ ckpt_path, map_location='cpu')
+    state_dic = checkpoint['state_dict']
+    state_dic = {k.replace("module.", ""): v for k, v in state_dic.items()}
+    model.load_state_dict(state_dic)
+    restructured_model = RestructuredSimSiam(model)
+    return restructured_model.to(device)