Spaces:

ygtxr1997
/

ReliableSwap_Demo

Running

App Files Files Community

gavinyuan commited on Jun 6, 2023

Commit

a104d3f

1 Parent(s): 523fb10

udpate: app.py import FSGenerator

Browse files

Files changed (25) hide show

app.py +1 -1
modules/layers/discriminator.py +153 -0
modules/layers/faceshifter/hear_layers.py +60 -0
modules/layers/faceshifter/layers.py +388 -0
modules/layers/simswap/base_model.py +140 -0
modules/layers/simswap/fs_networks_fix.py +223 -0
modules/layers/simswap/pg_modules/blocks.py +325 -0
modules/layers/simswap/pg_modules/diffaug.py +76 -0
modules/layers/simswap/pg_modules/projected_discriminator.py +191 -0
modules/layers/simswap/pg_modules/projector.py +160 -0
modules/layers/smoothswap/id_embedder.py +50 -0
modules/layers/smoothswap/resnet.py +359 -0
modules/networks/faceshifter.py +162 -0
modules/networks/simswap.py +230 -0
third_party/arcface/__init__.py +2 -0
third_party/arcface/dataloaderx.py +67 -0
third_party/arcface/iresnet.py +311 -0
third_party/arcface/load_dataset.py +202 -0
third_party/arcface/margin_loss.py +463 -0
third_party/arcface/mouth_net.py +117 -0
third_party/arcface/mouth_net_eval.py +69 -0
third_party/arcface/mouth_net_pl.py +358 -0
third_party/arcface/resnet.py +2 -0
third_party/arcface/utils_callbacks.py +141 -0
third_party/arcface/verification.py +440 -0

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ import numpy as np
 from PIL import Image
 import tqdm
-# from modules.networks.faceshifter import FSGenerator
 # from inference.alignment import norm_crop, norm_crop_with_M, paste_back
 # from inference.utils import save, get_5_from_98, get_detector, get_lmk
 # from inference.PIPNet.lib.tools import get_lmk_model, demo_image

 from PIL import Image
 import tqdm
+from modules.networks.faceshifter import FSGenerator
 # from inference.alignment import norm_crop, norm_crop_with_M, paste_back
 # from inference.utils import save, get_5_from_98, get_detector, get_lmk
 # from inference.PIPNet.lib.tools import get_lmk_model, demo_image

modules/layers/discriminator.py ADDED Viewed

	@@ -0,0 +1,153 @@

+import numpy as np
+import torch
+import torch.nn as nn
+import torchvision
+def weights_init(m):
+    classname = m.__class__.__name__
+    if classname.find("Conv") != -1:
+        m.weight.data.normal_(0.0, 0.02)
+    elif classname.find("BatchNorm2d") != -1:
+        m.weight.data.normal_(1.0, 0.02)
+        m.bias.data.fill_(0)
+class MultiscaleDiscriminator(nn.Module):
+    def __init__(
+        self,
+        input_nc,
+        ndf=64,
+        n_layers=3,
+        norm_layer=nn.BatchNorm2d,
+        use_sigmoid=False,
+        num_D=3,
+        getIntermFeat=False,
+        finetune=False,
+    ):
+        super(MultiscaleDiscriminator, self).__init__()
+        self.num_D = num_D
+        self.n_layers = n_layers
+        self.getIntermFeat = getIntermFeat
+        for i in range(num_D):
+            netD = NLayerDiscriminator(
+                input_nc, ndf, n_layers, norm_layer, use_sigmoid, getIntermFeat
+            )
+            if getIntermFeat:
+                for j in range(n_layers + 2):
+                    setattr(
+                        self,
+                        "scale" + str(i) + "_layer" + str(j),
+                        getattr(netD, "model" + str(j)),
+                    )
+            else:
+                setattr(self, "layer" + str(i), netD.model)
+        self.downsample = nn.AvgPool2d(
+            3, stride=2, padding=[1, 1], count_include_pad=False
+        )
+        weights_init(self)
+        if finetune:
+            self.requires_grad_(False)
+            for name, param in self.named_parameters():
+                if 'layer0' in name:
+                    param.requires_grad = True
+    def singleD_forward(self, model, input):
+        if self.getIntermFeat:
+            result = [input]
+            for i in range(len(model)):
+                result.append(model[i](result[-1]))
+            return result[1:]
+        else:
+            return [model(input)]
+    def forward(self, input):
+        num_D = self.num_D
+        result = []
+        input_downsampled = input
+        for i in range(num_D):
+            if self.getIntermFeat:
+                model = [
+                    getattr(self, "scale" + str(num_D - 1 - i) + "_layer" + str(j))
+                    for j in range(self.n_layers + 2)
+                ]
+            else:
+                model = getattr(self, "layer" + str(num_D - 1 - i))
+            result.append(self.singleD_forward(model, input_downsampled))
+            if i != (num_D - 1):
+                input_downsampled = self.downsample(input_downsampled)
+        return result
+# Defines the PatchGAN discriminator with the specified arguments.
+class NLayerDiscriminator(nn.Module):
+    def __init__(
+        self,
+        input_nc,
+        ndf=64,
+        n_layers=3,
+        norm_layer=nn.BatchNorm2d,
+        use_sigmoid=False,
+        getIntermFeat=False,
+    ):
+        super(NLayerDiscriminator, self).__init__()
+        self.getIntermFeat = getIntermFeat
+        self.n_layers = n_layers
+        kw = 4
+        padw = int(np.ceil((kw - 1.0) / 2))
+        sequence = [
+            [
+                nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw),
+                nn.LeakyReLU(0.2, True),
+            ]
+        ]
+        nf = ndf
+        for n in range(1, n_layers):
+            nf_prev = nf
+            nf = min(nf * 2, 512)
+            sequence += [
+                [
+                    nn.Conv2d(nf_prev, nf, kernel_size=kw, stride=2, padding=padw),
+                    norm_layer(nf),
+                    nn.LeakyReLU(0.2, True),
+                ]
+            ]
+        nf_prev = nf
+        nf = min(nf * 2, 512)
+        sequence += [
+            [
+                nn.Conv2d(nf_prev, nf, kernel_size=kw, stride=1, padding=padw),
+                norm_layer(nf),
+                nn.LeakyReLU(0.2, True),
+            ]
+        ]
+        sequence += [[nn.Conv2d(nf, 1, kernel_size=kw, stride=1, padding=padw)]]
+        if use_sigmoid:
+            sequence += [[nn.Sigmoid()]]
+        if getIntermFeat:
+            for n in range(len(sequence)):
+                setattr(self, "model" + str(n), nn.Sequential(*sequence[n]))
+        else:
+            sequence_stream = []
+            for n in range(len(sequence)):
+                sequence_stream += sequence[n]
+            self.model = nn.Sequential(*sequence_stream)
+    def forward(self, input):
+        if self.getIntermFeat:
+            res = [input]
+            for n in range(self.n_layers + 2):
+                model = getattr(self, "model" + str(n))
+                res.append(model(res[-1]))
+            return res[1:]
+        else:
+            return self.model(input)

modules/layers/faceshifter/hear_layers.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import torch
+from torch import nn
+def conv4x4(in_c, out_c):
+    return nn.Sequential(
+        nn.Conv2d(in_c, out_c,kernel_size=4, stride=2, padding=1),
+        nn.BatchNorm2d(out_c),
+        nn.LeakyReLU(0.1, inplace=True),
+    )
+def deconv4x4(in_c, out_c):
+    return nn.Sequential(
+        nn.ConvTranspose2d(in_c, out_c, kernel_size=4, stride=2, padding=1),
+        nn.BatchNorm2d(out_c),
+        nn.LeakyReLU(0.1, inplace=True),
+    )
+class Hear_Net(nn.Module):
+    def __init__(self):
+        super(Hear_Net, self).__init__()
+        self.down1 = conv4x4(6, 64)
+        self.down2 = conv4x4(64, 128)
+        self.down3 = conv4x4(128, 256)
+        self.down4 = conv4x4(256, 512)
+        self.down5 = conv4x4(512, 512)
+        self.up1 = deconv4x4(512, 512)
+        self.up2 = deconv4x4(512*2, 256)
+        self.up3 = deconv4x4(256*2, 128)
+        self.up4 = deconv4x4(128*2, 64)
+        self.up5 = nn.Conv2d(64*2, 3, kernel_size=3, stride=1, padding=1)
+    def forward(self, x):  # input:(B,6,256,256)
+        c1 = self.down1(x)
+        c2 = self.down2(c1)
+        c3 = self.down3(c2)
+        c4 = self.down4(c3)
+        c5 = self.down5(c4)
+        m1 = self.up1(c5)
+        m1 = torch.cat((c4, m1), dim=1)
+        m2 = self.up2(m1)
+        m2 = torch.cat((c3, m2), dim=1)
+        m3 = self.up3(m2)
+        m3 = torch.cat((c2, m3), dim=1)
+        m4 = self.up4(m3)
+        m4 = torch.cat((c1, m4), dim=1)
+        out = nn.functional.interpolate(m4, scale_factor=2, mode='bilinear', align_corners=True)
+        out = self.up5(out)
+        return torch.tanh(out)  # output:(B,3,256,256)
+if __name__ == '__main__':
+    y_cat = torch.randn(5, 6, 256, 256)
+    hear = Hear_Net()
+    y_st = hear(y_cat)
+    print(y_st.shape)

modules/layers/faceshifter/layers.py ADDED Viewed

	@@ -0,0 +1,388 @@

+"""
+This file only for testing mask regularzation.
+If it works, it will be merged with `layers.py`.
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class AADLayer(nn.Module):
+    def __init__(self, c_x, attr_c, c_id=256):
+        super(AADLayer, self).__init__()
+        self.attr_c = attr_c
+        self.c_id = c_id
+        self.c_x = c_x
+        self.conv1 = nn.Conv2d(
+            attr_c, c_x, kernel_size=1, stride=1, padding=0, bias=True
+        )
+        self.conv2 = nn.Conv2d(
+            attr_c, c_x, kernel_size=1, stride=1, padding=0, bias=True
+        )
+        self.fc1 = nn.Linear(c_id, c_x)
+        self.fc2 = nn.Linear(c_id, c_x)
+        self.norm = nn.InstanceNorm2d(c_x, affine=False)
+        self.conv_h = nn.Conv2d(c_x, 1, kernel_size=1, stride=1, padding=0, bias=True)
+    def forward(self, h_in, z_attr, z_id):
+        # h_in cxnxn
+        # zid 256x1x1
+        # zattr cxnxn
+        h = self.norm(h_in)
+        gamma_attr = self.conv1(z_attr)
+        beta_attr = self.conv2(z_attr)
+        gamma_id = self.fc1(z_id)
+        beta_id = self.fc2(z_id)
+        A = gamma_attr * h + beta_attr
+        gamma_id = gamma_id.reshape(h.shape[0], self.c_x, 1, 1).expand_as(h)
+        beta_id = beta_id.reshape(h.shape[0], self.c_x, 1, 1).expand_as(h)
+        I = gamma_id * h + beta_id
+        M = torch.sigmoid(self.conv_h(h))
+        out = (torch.ones_like(M).to(M.device) - M) * A + M * I
+        return out, torch.mean(torch.ones_like(M).to(M.device) - M, dim=[1, 2, 3])
+class AAD_ResBlk(nn.Module):
+    def __init__(self, cin, cout, c_attr, c_id=256):
+        super(AAD_ResBlk, self).__init__()
+        self.cin = cin
+        self.cout = cout
+        self.AAD1 = AADLayer(cin, c_attr, c_id)
+        self.conv1 = nn.Conv2d(cin, cin, kernel_size=3, stride=1, padding=1, bias=False)
+        self.relu1 = nn.ReLU(inplace=True)
+        self.AAD2 = AADLayer(cin, c_attr, c_id)
+        self.conv2 = nn.Conv2d(
+            cin, cout, kernel_size=3, stride=1, padding=1, bias=False
+        )
+        self.relu2 = nn.ReLU(inplace=True)
+        if cin != cout:
+            self.AAD3 = AADLayer(cin, c_attr, c_id)
+            self.conv3 = nn.Conv2d(
+                cin, cout, kernel_size=3, stride=1, padding=1, bias=False
+            )
+            self.relu3 = nn.ReLU(inplace=True)
+    def forward(self, h, z_attr, z_id):
+        x, m1_ = self.AAD1(h, z_attr, z_id)
+        x = self.relu1(x)
+        x = self.conv1(x)
+        x, m2_ = self.AAD2(x, z_attr, z_id)
+        x = self.relu2(x)
+        x = self.conv2(x)
+        m = m1_ + m2_
+        if self.cin != self.cout:
+            h, m3_ = self.AAD3(h, z_attr, z_id)
+            h = self.relu3(h)
+            h = self.conv3(h)
+            m += m3_
+        x = x + h
+        return x, m
+def weight_init(m):
+    if isinstance(m, nn.Linear):
+        m.weight.data.normal_(0, 0.001)
+        m.bias.data.zero_()
+    if isinstance(m, nn.Conv2d):
+        nn.init.xavier_normal_(m.weight.data)
+    if isinstance(m, nn.ConvTranspose2d):
+        nn.init.xavier_normal_(m.weight.data)
+def conv4x4(in_c, out_c, norm=nn.BatchNorm2d):
+    return nn.Sequential(
+        nn.Conv2d(
+            in_channels=in_c,
+            out_channels=out_c,
+            kernel_size=4,
+            stride=2,
+            padding=1,
+            bias=False,
+        ),
+        norm(out_c),
+        nn.LeakyReLU(0.1, inplace=True),
+    )
+class deconv4x4(nn.Module):
+    def __init__(self, in_c, out_c, norm=nn.BatchNorm2d):
+        super(deconv4x4, self).__init__()
+        self.deconv = nn.ConvTranspose2d(
+            in_channels=in_c,
+            out_channels=out_c,
+            kernel_size=4,
+            stride=2,
+            padding=1,
+            bias=False,
+        )
+        self.bn = norm(out_c)
+        self.lrelu = nn.LeakyReLU(0.1, inplace=True)
+    def forward(self, input, skip):
+        x = self.deconv(input)
+        x = self.bn(x)
+        x = self.lrelu(x)
+        return torch.cat((x, skip), dim=1)
+class MLAttrEncoder(nn.Module):
+    def __init__(self, finetune=False, downup=False):
+        super(MLAttrEncoder, self).__init__()
+        self.downup = downup
+        if self.downup:
+            self.conv00 = conv4x4(3, 16)
+            self.conv01 = conv4x4(16, 32)
+            self.deconv7 = deconv4x4(64, 16)
+        self.conv1 = conv4x4(3, 32)
+        self.conv2 = conv4x4(32, 64)
+        self.conv3 = conv4x4(64, 128)
+        self.conv4 = conv4x4(128, 256)
+        self.conv5 = conv4x4(256, 512)
+        self.conv6 = conv4x4(512, 1024)
+        self.conv7 = conv4x4(1024, 1024)
+        self.deconv1 = deconv4x4(1024, 1024)
+        self.deconv2 = deconv4x4(2048, 512)
+        self.deconv3 = deconv4x4(1024, 256)
+        self.deconv4 = deconv4x4(512, 128)
+        self.deconv5 = deconv4x4(256, 64)
+        self.deconv6 = deconv4x4(128, 32)
+        self.apply(weight_init)
+        self.finetune = finetune
+        if finetune:
+            for name, param in self.named_parameters():
+                param.requires_grad = False
+            if self.downup:
+                self.conv00.requires_grad_(True)
+                self.conv01.requires_grad_(True)
+                self.deconv7.requires_grad_(True)
+    def forward(self, Xt):
+        if self.downup:
+            feat0 = self.conv00(Xt)  # (16,256,256)
+            feat1 = self.conv01(feat0)  # (32,128,128)
+        else:
+            feat0 = None
+            feat1 = self.conv1(Xt)
+            # 32x128x128
+        feat2 = self.conv2(feat1)
+        # 64x64x64
+        feat3 = self.conv3(feat2)
+        # 128x32x32
+        feat4 = self.conv4(feat3)
+        # 256x16xx16
+        feat5 = self.conv5(feat4)
+        # 512x8x8
+        feat6 = self.conv6(feat5)
+        # 1024x4x4
+        if self.downup:
+            z_attr1 = self.conv7(feat6)
+            # 1024x2x2
+            z_attr2 = self.deconv1(z_attr1, feat6)
+            z_attr3 = self.deconv2(z_attr2, feat5)
+            z_attr4 = self.deconv3(z_attr3, feat4)
+            z_attr5 = self.deconv4(z_attr4, feat3)
+            z_attr6 = self.deconv5(z_attr5, feat2)
+            z_attr7 = self.deconv6(z_attr6, feat1)  # (128,64,64)+(32,128,128)->(64,128,128)
+            z_attr8 = self.deconv7(z_attr7, feat0)  # (64,128,128)+(16,256,256)->(32,256,256)
+            z_attr9 = F.interpolate(
+                z_attr8, scale_factor=2, mode="bilinear", align_corners=True
+            )  # (32,512,512)
+            return (
+                z_attr1,
+                z_attr2,
+                z_attr3,
+                z_attr4,
+                z_attr5,
+                z_attr6,
+                z_attr7,
+                z_attr8,
+                z_attr9
+            )
+        else:
+            z_attr1 = self.conv7(feat6)
+            # 1024x2x2
+            z_attr2 = self.deconv1(z_attr1, feat6)
+            z_attr3 = self.deconv2(z_attr2, feat5)
+            z_attr4 = self.deconv3(z_attr3, feat4)
+            z_attr5 = self.deconv4(z_attr4, feat3)
+            z_attr6 = self.deconv5(z_attr5, feat2)
+            z_attr7 = self.deconv6(z_attr6, feat1)
+            z_attr8 = F.interpolate(
+                z_attr7, scale_factor=2, mode="bilinear", align_corners=True
+            )
+            return (
+                z_attr1,
+                z_attr2,
+                z_attr3,
+                z_attr4,
+                z_attr5,
+                z_attr6,
+                z_attr7,
+                z_attr8,
+            )
+class AADGenerator(nn.Module):
+    def __init__(self, c_id=256, finetune=False, downup=False):
+        super(AADGenerator, self).__init__()
+        self.up1 = nn.ConvTranspose2d(c_id, 1024, kernel_size=2, stride=1, padding=0)
+        self.AADBlk1 = AAD_ResBlk(1024, 1024, 1024, c_id)
+        self.AADBlk2 = AAD_ResBlk(1024, 1024, 2048, c_id)
+        self.AADBlk3 = AAD_ResBlk(1024, 1024, 1024, c_id)
+        self.AADBlk4 = AAD_ResBlk(1024, 512, 512, c_id)
+        self.AADBlk5 = AAD_ResBlk(512, 256, 256, c_id)
+        self.AADBlk6 = AAD_ResBlk(256, 128, 128, c_id)
+        self.AADBlk7 = AAD_ResBlk(128, 64, 64, c_id)
+        self.AADBlk8 = AAD_ResBlk(64, 3, 64, c_id)
+        self.downup = downup
+        if downup:
+            self.AADBlk8_0 = AAD_ResBlk(64, 32, 32, c_id)
+            self.AADBlk8_1 = AAD_ResBlk(32, 3, 32, c_id)
+        self.apply(weight_init)
+        if finetune:
+            for name, param in self.named_parameters():
+                param.requires_grad = False
+            self.AADBlk8_0.requires_grad_(True)
+            self.AADBlk8_1.requires_grad_(True)
+    def forward(self, z_attr, z_id):
+        m = self.up1(z_id.reshape(z_id.shape[0], -1, 1, 1))
+        scale= z_attr[0].shape[2] // 2  # adaptive support for 512x512, 1024x1024
+        m = F.interpolate(m, scale_factor=scale, mode='bilinear', align_corners=True)
+        m2, m2_ = self.AADBlk1(m, z_attr[0], z_id)
+        m2 = F.interpolate(
+            m2,
+            scale_factor=2,
+            mode="bilinear",
+            align_corners=True,
+        )
+        m3, m3_ = self.AADBlk2(m2, z_attr[1], z_id)
+        m3 = F.interpolate(
+            m3,
+            scale_factor=2,
+            mode="bilinear",
+            align_corners=True,
+        )
+        m4, m4_ = self.AADBlk3(m3, z_attr[2], z_id)
+        m4 = F.interpolate(
+            m4,
+            scale_factor=2,
+            mode="bilinear",
+            align_corners=True,
+        )
+        m5, m5_ = self.AADBlk4(m4, z_attr[3], z_id)
+        m5 = F.interpolate(
+            m5,
+            scale_factor=2,
+            mode="bilinear",
+            align_corners=True,
+        )
+        m6, m6_ = self.AADBlk5(m5, z_attr[4], z_id)
+        m6 = F.interpolate(
+            m6,
+            scale_factor=2,
+            mode="bilinear",
+            align_corners=True,
+        )
+        m7, m7_ = self.AADBlk6(m6, z_attr[5], z_id)
+        m7 = F.interpolate(
+            m7,
+            scale_factor=2,
+            mode="bilinear",
+            align_corners=True,
+        )
+        m8, m8_ = self.AADBlk7(m7, z_attr[6], z_id)
+        m8 = F.interpolate(
+            m8,
+            scale_factor=2,
+            mode="bilinear",
+            align_corners=True,
+        )
+        if self.downup:
+            y0, m9_ = self.AADBlk8_0(m8, z_attr[7], z_id)
+            y0 = F.interpolate(y0, scale_factor=2, mode='bilinear', align_corners=True)
+            y1, m10_ = self.AADBlk8_1(y0, z_attr[8], z_id)
+            y = torch.tanh(y1)
+        else:
+            y, m9_ = self.AADBlk8(m8, z_attr[7], z_id)
+            y = torch.tanh(y)
+        return y  # , m  # yuange
+class AEI_Net(nn.Module):
+    def __init__(self, c_id=512, finetune=False, downup=False):
+        super(AEI_Net, self).__init__()
+        self.encoder = MLAttrEncoder(finetune=finetune, downup=downup)
+        self.generator = AADGenerator(c_id, finetune=finetune, downup=downup)
+    def forward(self, Xt, z_id):
+        attr = self.encoder(Xt)
+        Y = self.generator(attr, z_id)  # yuange
+        return Y, attr
+    def get_attr(self, X):
+        return self.encoder(X)
+    def trainable_params(self):
+        train_params = []
+        for param in self.parameters():
+            if param.requires_grad:
+                train_params.append(param)
+        return train_params
+if __name__ == "__main__":
+    aie = AEI_Net(512).eval()
+    x = aie(torch.randn(1, 3, 512, 512), torch.randn(1, 512))
+    # def numel(m: torch.nn.Module, only_trainable: bool = False):
+    #     """
+    #     returns the total number of parameters used by `m` (only counting
+    #     shared parameters once); if `only_trainable` is True, then only
+    #     includes parameters with `requires_grad = True`
+    #     """
+    #     parameters = list(m.parameters())
+    #     if only_trainable:
+    #         parameters = [p for p in parameters if p.requires_grad]
+    #     unique = {p.data_ptr(): p for p in parameters}.values()
+    #     return sum(p.numel() for p in unique)
+    #
+    #
+    # print(numel(aie, True))
+    # print(x[0].size())
+    # print(len(x[-1]))
+    import thop
+    img = torch.randn(1, 3, 256, 256)
+    latent = torch.randn(1, 512)
+    net = aie
+    flops, params = thop.profile(net, inputs=(img, latent), verbose=False)
+    print('#Params=%.2fM, GFLOPS=%.2f' % (params / 1e6, flops / 1e9))

modules/layers/simswap/base_model.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import os
+import torch
+import sys
+class BaseModel(torch.nn.Module):
+    def name(self):
+        return 'BaseModel'
+    def initialize(self, opt):
+        self.opt = opt
+        self.gpu_ids = opt.gpu_ids
+        self.isTrain = opt.isTrain
+        self.Tensor = torch.cuda.FloatTensor if self.gpu_ids else torch.Tensor
+        self.save_dir = os.path.join(opt.checkpoints_dir, opt.name)
+    def set_input(self, input):
+        self.input = input
+    def forward(self):
+        pass
+    # used in test time, no backprop
+    def test(self):
+        pass
+    def get_image_paths(self):
+        pass
+    def optimize_parameters(self):
+        pass
+    def get_current_visuals(self):
+        return self.input
+    def get_current_errors(self):
+        return {}
+    def save(self, label):
+        pass
+    # helper saving function that can be used by subclasses
+    def save_network(self, network, network_label, epoch_label, gpu_ids=None):
+        save_filename = '{}_net_{}.pth'.format(epoch_label, network_label)
+        save_path = os.path.join(self.save_dir, save_filename)
+        torch.save(network.cpu().state_dict(), save_path)
+        if torch.cuda.is_available():
+            network.cuda()
+    def save_optim(self, network, network_label, epoch_label, gpu_ids=None):
+        save_filename = '{}_optim_{}.pth'.format(epoch_label, network_label)
+        save_path = os.path.join(self.save_dir, save_filename)
+        torch.save(network.state_dict(), save_path)
+    # helper loading function that can be used by subclasses
+    def load_network(self, network, network_label, epoch_label, save_dir=''):
+        save_filename = '%s_net_%s.pth' % (epoch_label, network_label)
+        if not save_dir:
+            save_dir = self.save_dir
+        save_path = os.path.join(save_dir, save_filename)
+        if not os.path.isfile(save_path):
+            print('%s not exists yet!' % save_path)
+            if network_label == 'G':
+                raise ('Generator must exist!')
+        else:
+            # network.load_state_dict(torch.load(save_path))
+            try:
+                network.load_state_dict(torch.load(save_path))
+            except:
+                pretrained_dict = torch.load(save_path)
+                model_dict = network.state_dict()
+                try:
+                    pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
+                    network.load_state_dict(pretrained_dict)
+                    if self.opt.verbose:
+                        print(
+                            'Pretrained network %s has excessive layers; Only loading layers that are used' % network_label)
+                except:
+                    print('Pretrained network %s has fewer layers; The following are not initialized:' % network_label)
+                    for k, v in pretrained_dict.items():
+                        if v.size() == model_dict[k].size():
+                            model_dict[k] = v
+                    if sys.version_info >= (3, 0):
+                        not_initialized = set()
+                    else:
+                        from sets import Set
+                        not_initialized = Set()
+                    for k, v in model_dict.items():
+                        if k not in pretrained_dict or v.size() != pretrained_dict[k].size():
+                            not_initialized.add(k.split('.')[0])
+                    print(sorted(not_initialized))
+                    network.load_state_dict(model_dict)
+    # helper loading function that can be used by subclasses
+    def load_optim(self, network, network_label, epoch_label, save_dir=''):
+        save_filename = '%s_optim_%s.pth' % (epoch_label, network_label)
+        if not save_dir:
+            save_dir = self.save_dir
+        save_path = os.path.join(save_dir, save_filename)
+        if not os.path.isfile(save_path):
+            print('%s not exists yet!' % save_path)
+            if network_label == 'G':
+                raise ('Generator must exist!')
+        else:
+            # network.load_state_dict(torch.load(save_path))
+            try:
+                network.load_state_dict(torch.load(save_path, map_location=torch.device("cpu")))
+            except:
+                pretrained_dict = torch.load(save_path, map_location=torch.device("cpu"))
+                model_dict = network.state_dict()
+                try:
+                    pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
+                    network.load_state_dict(pretrained_dict)
+                    if self.opt.verbose:
+                        print(
+                            'Pretrained network %s has excessive layers; Only loading layers that are used' % network_label)
+                except:
+                    print('Pretrained network %s has fewer layers; The following are not initialized:' % network_label)
+                    for k, v in pretrained_dict.items():
+                        if v.size() == model_dict[k].size():
+                            model_dict[k] = v
+                    if sys.version_info >= (3, 0):
+                        not_initialized = set()
+                    else:
+                        from sets import Set
+                        not_initialized = Set()
+                    for k, v in model_dict.items():
+                        if k not in pretrained_dict or v.size() != pretrained_dict[k].size():
+                            not_initialized.add(k.split('.')[0])
+                    print(sorted(not_initialized))
+                    network.load_state_dict(model_dict)
+    def update_learning_rate(self):
+        pass

modules/layers/simswap/fs_networks_fix.py ADDED Viewed

	@@ -0,0 +1,223 @@

+"""
+Copyright (C) 2019 NVIDIA Corporation.  All rights reserved.
+Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import kornia
+class InstanceNorm(nn.Module):
+    def __init__(self, epsilon=1e-8):
+        """
+            @notice: avoid in-place ops.
+            https://discuss.pytorch.org/t/encounter-the-runtimeerror-one-of-the-variables-needed-for-gradient-computation-has-been-modified-by-an-inplace-operation/836/3
+        """
+        super(InstanceNorm, self).__init__()
+        self.epsilon = epsilon
+    def forward(self, x):
+        x   = x - torch.mean(x, (2, 3), True)
+        tmp = torch.mul(x, x) # or x ** 2
+        tmp = torch.rsqrt(torch.mean(tmp, (2, 3), True) + self.epsilon)
+        return x * tmp
+class ApplyStyle(nn.Module):
+    """
+        @ref: https://github.com/lernapparat/lernapparat/blob/master/style_gan/pytorch_style_gan.ipynb
+    """
+    def __init__(self, latent_size, channels):
+        super(ApplyStyle, self).__init__()
+        self.linear = nn.Linear(latent_size, channels * 2)
+    def forward(self, x, latent):
+        style = self.linear(latent)  # style => [batch_size, n_channels*2]
+        shape = [-1, 2, x.size(1), 1, 1]
+        style = style.view(shape)    # [batch_size, 2, n_channels, ...]
+        #x = x * (style[:, 0] + 1.) + style[:, 1]
+        x = x * (style[:, 0] * 1 + 1.) + style[:, 1] * 1
+        return x
+class ResnetBlock_Adain(nn.Module):
+    def __init__(self, dim, latent_size, padding_type, activation=nn.ReLU(True)):
+        super(ResnetBlock_Adain, self).__init__()
+        p = 0
+        conv1 = []
+        if padding_type == 'reflect':
+            conv1 += [nn.ReflectionPad2d(1)]
+        elif padding_type == 'replicate':
+            conv1 += [nn.ReplicationPad2d(1)]
+        elif padding_type == 'zero':
+            p = 1
+        else:
+            raise NotImplementedError('padding [%s] is not implemented' % padding_type)
+        conv1 += [nn.Conv2d(dim, dim, kernel_size=3, padding = p), InstanceNorm()]
+        self.conv1 = nn.Sequential(*conv1)
+        self.style1 = ApplyStyle(latent_size, dim)
+        self.act1 = activation
+        p = 0
+        conv2 = []
+        if padding_type == 'reflect':
+            conv2 += [nn.ReflectionPad2d(1)]
+        elif padding_type == 'replicate':
+            conv2 += [nn.ReplicationPad2d(1)]
+        elif padding_type == 'zero':
+            p = 1
+        else:
+            raise NotImplementedError('padding [%s] is not implemented' % padding_type)
+        conv2 += [nn.Conv2d(dim, dim, kernel_size=3, padding=p), InstanceNorm()]
+        self.conv2 = nn.Sequential(*conv2)
+        self.style2 = ApplyStyle(latent_size, dim)
+    def forward(self, x, dlatents_in_slice):
+        y = self.conv1(x)
+        y = self.style1(y, dlatents_in_slice)
+        y = self.act1(y)
+        y = self.conv2(y)
+        y = self.style2(y, dlatents_in_slice)
+        out = x + y
+        return out
+class Generator_Adain_Upsample(nn.Module):
+    def __init__(self, input_nc, output_nc, latent_size, n_blocks=6, deep=False,
+                 norm_layer=nn.BatchNorm2d,
+                 padding_type='reflect',
+                 mouth_net_param: dict = None,
+                 ):
+        assert (n_blocks >= 0)
+        super(Generator_Adain_Upsample, self).__init__()
+        self.latent_size = latent_size
+        self.mouth_net_param = mouth_net_param
+        if mouth_net_param.get('use'):
+            self.latent_size += mouth_net_param.get('feature_dim')
+        activation = nn.ReLU(True)
+        self.deep = deep
+        self.first_layer = nn.Sequential(nn.ReflectionPad2d(3), nn.Conv2d(input_nc, 64, kernel_size=7, padding=0),
+                                         norm_layer(64), activation)
+        ### downsample
+        self.down1 = nn.Sequential(nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),
+                                   norm_layer(128), activation)
+        self.down2 = nn.Sequential(nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1),
+                                   norm_layer(256), activation)
+        self.down3 = nn.Sequential(nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1),
+                                   norm_layer(512), activation)
+        if self.deep:
+            self.down4 = nn.Sequential(nn.Conv2d(512, 512, kernel_size=3, stride=2, padding=1),
+                                       norm_layer(512), activation)
+        ### resnet blocks
+        BN = []
+        for i in range(n_blocks):
+            BN += [
+                ResnetBlock_Adain(512, latent_size=self.latent_size,
+                                  padding_type=padding_type, activation=activation)]
+        self.BottleNeck = nn.Sequential(*BN)
+        if self.deep:
+            self.up4 = nn.Sequential(
+                nn.Upsample(scale_factor=2, mode='bilinear',align_corners=False),
+                nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
+                nn.BatchNorm2d(512), activation
+            )
+        self.up3 = nn.Sequential(
+            nn.Upsample(scale_factor=2, mode='bilinear',align_corners=False),
+            nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1),
+            nn.BatchNorm2d(256), activation
+        )
+        self.up2 = nn.Sequential(
+            nn.Upsample(scale_factor=2, mode='bilinear',align_corners=False),
+            nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1),
+            nn.BatchNorm2d(128), activation
+        )
+        self.up1 = nn.Sequential(
+            nn.Upsample(scale_factor=2, mode='bilinear',align_corners=False),
+            nn.Conv2d(128, 64, kernel_size=3, stride=1, padding=1),
+            nn.BatchNorm2d(64), activation
+        )
+        self.last_layer = nn.Sequential(nn.ReflectionPad2d(3), nn.Conv2d(64, output_nc, kernel_size=7, padding=0))
+        self.register_buffer(
+            name="trans_matrix",
+            tensor=torch.tensor(
+                [
+                    [
+                        [1.07695457, -0.03625215, -1.56352194],
+                        [0.03625215, 1.07695457, -5.32134629],
+                    ]
+                ],
+                requires_grad=False,
+            ).float(),
+        )
+    def forward(self, source, target, net_arc, mouth_net=None):
+        x = target  # 3*224*224
+        if net_arc is None:
+            id_vector = source
+        else:
+            with torch.no_grad():
+                ''' 1. get id '''
+                # M = self.trans_matrix.repeat(source.size()[0], 1, 1)
+                # source = kornia.geometry.transform.warp_affine(source, M, (256, 256))
+                resize_input = F.interpolate(source, size=112, mode="bilinear", align_corners=True)
+                id_vector = F.normalize(net_arc(resize_input), dim=-1, p=2)
+                ''' 2. get mouth feature '''
+                if mouth_net is not None:
+                    w1, h1, w2, h2 = self.mouth_net_param.get('crop_param')
+                    mouth_input = resize_input[:, :, h1:h2, w1:w2]
+                    mouth_feat = mouth_net(mouth_input)
+                    id_vector = torch.cat([id_vector, mouth_feat], dim=-1)  # (B,dim_id+dim_mouth)
+        skip1 = self.first_layer(x)
+        skip2 = self.down1(skip1)
+        skip3 = self.down2(skip2)
+        if self.deep:
+            skip4 = self.down3(skip3)
+            x = self.down4(skip4)
+        else:
+            x = self.down3(skip3)
+        bot = []
+        bot.append(x)
+        features = []
+        for i in range(len(self.BottleNeck)):
+            x = self.BottleNeck[i](x, id_vector)
+            bot.append(x)
+        if self.deep:
+            x = self.up4(x)
+            features.append(x)
+        x = self.up3(x)
+        features.append(x)
+        x = self.up2(x)
+        features.append(x)
+        x = self.up1(x)
+        features.append(x)
+        x = self.last_layer(x)
+        # x = (x + 1) / 2
+        # return x, bot, features, dlatents
+        return x
+if __name__ == "__main__":
+    import thop
+    img = torch.randn(1, 3, 256, 256)
+    latent = torch.randn(1, 512)
+    net = Generator_Adain_Upsample(input_nc=3, output_nc=3, latent_size=512, n_blocks=9,
+                                   mouth_net_param={"use": False})
+    flops, params = thop.profile(net, inputs=(latent, img, None, None), verbose=False)
+    print('#Params=%.2fM, GFLOPS=%.2f' % (params / 1e6, flops / 1e9))

modules/layers/simswap/pg_modules/blocks.py ADDED Viewed

	@@ -0,0 +1,325 @@

+import functools
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn.utils import spectral_norm
+### single layers
+def conv2d(*args, **kwargs):
+    return spectral_norm(nn.Conv2d(*args, **kwargs))
+def convTranspose2d(*args, **kwargs):
+    return spectral_norm(nn.ConvTranspose2d(*args, **kwargs))
+def embedding(*args, **kwargs):
+    return spectral_norm(nn.Embedding(*args, **kwargs))
+def linear(*args, **kwargs):
+    return spectral_norm(nn.Linear(*args, **kwargs))
+def NormLayer(c, mode='batch'):
+    if mode == 'group':
+        return nn.GroupNorm(c//2, c)
+    elif mode == 'batch':
+        return nn.BatchNorm2d(c)
+### Activations
+class GLU(nn.Module):
+    def forward(self, x):
+        nc = x.size(1)
+        assert nc % 2 == 0, 'channels dont divide 2!'
+        nc = int(nc/2)
+        return x[:, :nc] * torch.sigmoid(x[:, nc:])
+class Swish(nn.Module):
+    def forward(self, feat):
+        return feat * torch.sigmoid(feat)
+### Upblocks
+class InitLayer(nn.Module):
+    def __init__(self, nz, channel, sz=4):
+        super().__init__()
+        self.init = nn.Sequential(
+            convTranspose2d(nz, channel*2, sz, 1, 0, bias=False),
+            NormLayer(channel*2),
+            GLU(),
+        )
+    def forward(self, noise):
+        noise = noise.view(noise.shape[0], -1, 1, 1)
+        return self.init(noise)
+def UpBlockSmall(in_planes, out_planes):
+    block = nn.Sequential(
+        nn.Upsample(scale_factor=2, mode='nearest'),
+        conv2d(in_planes, out_planes*2, 3, 1, 1, bias=False),
+        NormLayer(out_planes*2), GLU())
+    return block
+class UpBlockSmallCond(nn.Module):
+    def __init__(self, in_planes, out_planes, z_dim):
+        super().__init__()
+        self.in_planes = in_planes
+        self.out_planes = out_planes
+        self.up = nn.Upsample(scale_factor=2, mode='nearest')
+        self.conv = conv2d(in_planes, out_planes*2, 3, 1, 1, bias=False)
+        which_bn = functools.partial(CCBN, which_linear=linear, input_size=z_dim)
+        self.bn = which_bn(2*out_planes)
+        self.act = GLU()
+    def forward(self, x, c):
+        x = self.up(x)
+        x = self.conv(x)
+        x = self.bn(x, c)
+        x = self.act(x)
+        return x
+def UpBlockBig(in_planes, out_planes):
+    block = nn.Sequential(
+        nn.Upsample(scale_factor=2, mode='nearest'),
+        conv2d(in_planes, out_planes*2, 3, 1, 1, bias=False),
+        NoiseInjection(),
+        NormLayer(out_planes*2), GLU(),
+        conv2d(out_planes, out_planes*2, 3, 1, 1, bias=False),
+        NoiseInjection(),
+        NormLayer(out_planes*2), GLU()
+        )
+    return block
+class UpBlockBigCond(nn.Module):
+    def __init__(self, in_planes, out_planes, z_dim):
+        super().__init__()
+        self.in_planes = in_planes
+        self.out_planes = out_planes
+        self.up = nn.Upsample(scale_factor=2, mode='nearest')
+        self.conv1 = conv2d(in_planes, out_planes*2, 3, 1, 1, bias=False)
+        self.conv2 = conv2d(out_planes, out_planes*2, 3, 1, 1, bias=False)
+        which_bn = functools.partial(CCBN, which_linear=linear, input_size=z_dim)
+        self.bn1 = which_bn(2*out_planes)
+        self.bn2 = which_bn(2*out_planes)
+        self.act = GLU()
+        self.noise = NoiseInjection()
+    def forward(self, x, c):
+        # block 1
+        x = self.up(x)
+        x = self.conv1(x)
+        x = self.noise(x)
+        x = self.bn1(x, c)
+        x = self.act(x)
+        # block 2
+        x = self.conv2(x)
+        x = self.noise(x)
+        x = self.bn2(x, c)
+        x = self.act(x)
+        return x
+class SEBlock(nn.Module):
+    def __init__(self, ch_in, ch_out):
+        super().__init__()
+        self.main = nn.Sequential(
+            nn.AdaptiveAvgPool2d(4),
+            conv2d(ch_in, ch_out, 4, 1, 0, bias=False),
+            Swish(),
+            conv2d(ch_out, ch_out, 1, 1, 0, bias=False),
+            nn.Sigmoid(),
+        )
+    def forward(self, feat_small, feat_big):
+        return feat_big * self.main(feat_small)
+### Downblocks
+class SeparableConv2d(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, bias=False):
+        super(SeparableConv2d, self).__init__()
+        self.depthwise = conv2d(in_channels, in_channels, kernel_size=kernel_size,
+            groups=in_channels, bias=bias, padding=1)
+        self.pointwise = conv2d(in_channels, out_channels,
+            kernel_size=1, bias=bias)
+    def forward(self, x):
+        out = self.depthwise(x)
+        out = self.pointwise(out)
+        return out
+class DownBlock(nn.Module):
+    def __init__(self, in_planes, out_planes, separable=False):
+        super().__init__()
+        if not separable:
+            self.main = nn.Sequential(
+                conv2d(in_planes, out_planes, 4, 2, 1),
+                NormLayer(out_planes),
+                nn.LeakyReLU(0.2, inplace=True),
+            )
+        else:
+            self.main = nn.Sequential(
+                SeparableConv2d(in_planes, out_planes, 3),
+                NormLayer(out_planes),
+                nn.LeakyReLU(0.2, inplace=True),
+                nn.AvgPool2d(2, 2),
+            )
+    def forward(self, feat):
+        return self.main(feat)
+class DownBlockPatch(nn.Module):
+    def __init__(self, in_planes, out_planes, separable=False):
+        super().__init__()
+        self.main = nn.Sequential(
+            DownBlock(in_planes, out_planes, separable),
+            conv2d(out_planes, out_planes, 1, 1, 0, bias=False),
+            NormLayer(out_planes),
+            nn.LeakyReLU(0.2, inplace=True),
+        )
+    def forward(self, feat):
+        return self.main(feat)
+### CSM
+class ResidualConvUnit(nn.Module):
+    def __init__(self, cin, activation, bn):
+        super().__init__()
+        self.conv = nn.Conv2d(cin, cin, kernel_size=3, stride=1, padding=1, bias=True)
+        self.skip_add = nn.quantized.FloatFunctional()
+    def forward(self, x):
+        return self.skip_add.add(self.conv(x), x)
+class FeatureFusionBlock(nn.Module):
+    def __init__(self, features, activation, deconv=False, bn=False, expand=False, align_corners=True, lowest=False):
+        super().__init__()
+        self.deconv = deconv
+        self.align_corners = align_corners
+        self.expand = expand
+        out_features = features
+        if self.expand==True:
+            out_features = features//2
+        self.out_conv = nn.Conv2d(features, out_features, kernel_size=1, stride=1, padding=0, bias=True, groups=1)
+        self.skip_add = nn.quantized.FloatFunctional()
+    def forward(self, *xs):
+        output = xs[0]
+        if len(xs) == 2:
+            output = self.skip_add.add(output, xs[1])
+        output = nn.functional.interpolate(
+            output, scale_factor=2, mode="bilinear", align_corners=self.align_corners
+        )
+        output = self.out_conv(output)
+        return output
+### Misc
+class NoiseInjection(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.weight = nn.Parameter(torch.zeros(1), requires_grad=True)
+    def forward(self, feat, noise=None):
+        if noise is None:
+            batch, _, height, width = feat.shape
+            noise = torch.randn(batch, 1, height, width).to(feat.device)
+        return feat + self.weight * noise
+class CCBN(nn.Module):
+    ''' conditional batchnorm '''
+    def __init__(self, output_size, input_size, which_linear, eps=1e-5, momentum=0.1):
+        super().__init__()
+        self.output_size, self.input_size = output_size, input_size
+        # Prepare gain and bias layers
+        self.gain = which_linear(input_size, output_size)
+        self.bias = which_linear(input_size, output_size)
+        # epsilon to avoid dividing by 0
+        self.eps = eps
+        # Momentum
+        self.momentum = momentum
+        self.register_buffer('stored_mean', torch.zeros(output_size))
+        self.register_buffer('stored_var', torch.ones(output_size))
+    def forward(self, x, y):
+        # Calculate class-conditional gains and biases
+        gain = (1 + self.gain(y)).view(y.size(0), -1, 1, 1)
+        bias = self.bias(y).view(y.size(0), -1, 1, 1)
+        out = F.batch_norm(x, self.stored_mean, self.stored_var, None, None,
+                           self.training, 0.1, self.eps)
+        return out * gain + bias
+class Interpolate(nn.Module):
+    """Interpolation module."""
+    def __init__(self, size, mode='bilinear', align_corners=False):
+        """Init.
+        Args:
+            scale_factor (float): scaling
+            mode (str): interpolation mode
+        """
+        super(Interpolate, self).__init__()
+        self.interp = nn.functional.interpolate
+        self.size = size
+        self.mode = mode
+        self.align_corners = align_corners
+    def forward(self, x):
+        """Forward pass.
+        Args:
+            x (tensor): input
+        Returns:
+            tensor: interpolated data
+        """
+        x = self.interp(
+            x,
+            size=self.size,
+            mode=self.mode,
+            align_corners=self.align_corners,
+        )
+        return x

modules/layers/simswap/pg_modules/diffaug.py ADDED Viewed

	@@ -0,0 +1,76 @@

+# Differentiable Augmentation for Data-Efficient GAN Training
+# Shengyu Zhao, Zhijian Liu, Ji Lin, Jun-Yan Zhu, and Song Han
+# https://arxiv.org/pdf/2006.10738
+import torch
+import torch.nn.functional as F
+def DiffAugment(x, policy='', channels_first=True):
+    if policy:
+        if not channels_first:
+            x = x.permute(0, 3, 1, 2)
+        for p in policy.split(','):
+            for f in AUGMENT_FNS[p]:
+                x = f(x)
+        if not channels_first:
+            x = x.permute(0, 2, 3, 1)
+        x = x.contiguous()
+    return x
+def rand_brightness(x):
+    x = x + (torch.rand(x.size(0), 1, 1, 1, dtype=x.dtype, device=x.device) - 0.5)
+    return x
+def rand_saturation(x):
+    x_mean = x.mean(dim=1, keepdim=True)
+    x = (x - x_mean) * (torch.rand(x.size(0), 1, 1, 1, dtype=x.dtype, device=x.device) * 2) + x_mean
+    return x
+def rand_contrast(x):
+    x_mean = x.mean(dim=[1, 2, 3], keepdim=True)
+    x = (x - x_mean) * (torch.rand(x.size(0), 1, 1, 1, dtype=x.dtype, device=x.device) + 0.5) + x_mean
+    return x
+def rand_translation(x, ratio=0.125):
+    shift_x, shift_y = int(x.size(2) * ratio + 0.5), int(x.size(3) * ratio + 0.5)
+    translation_x = torch.randint(-shift_x, shift_x + 1, size=[x.size(0), 1, 1], device=x.device)
+    translation_y = torch.randint(-shift_y, shift_y + 1, size=[x.size(0), 1, 1], device=x.device)
+    grid_batch, grid_x, grid_y = torch.meshgrid(
+        torch.arange(x.size(0), dtype=torch.long, device=x.device),
+        torch.arange(x.size(2), dtype=torch.long, device=x.device),
+        torch.arange(x.size(3), dtype=torch.long, device=x.device),
+    )
+    grid_x = torch.clamp(grid_x + translation_x + 1, 0, x.size(2) + 1)
+    grid_y = torch.clamp(grid_y + translation_y + 1, 0, x.size(3) + 1)
+    x_pad = F.pad(x, [1, 1, 1, 1, 0, 0, 0, 0])
+    x = x_pad.permute(0, 2, 3, 1).contiguous()[grid_batch, grid_x, grid_y].permute(0, 3, 1, 2)
+    return x
+def rand_cutout(x, ratio=0.2):
+    cutout_size = int(x.size(2) * ratio + 0.5), int(x.size(3) * ratio + 0.5)
+    offset_x = torch.randint(0, x.size(2) + (1 - cutout_size[0] % 2), size=[x.size(0), 1, 1], device=x.device)
+    offset_y = torch.randint(0, x.size(3) + (1 - cutout_size[1] % 2), size=[x.size(0), 1, 1], device=x.device)
+    grid_batch, grid_x, grid_y = torch.meshgrid(
+        torch.arange(x.size(0), dtype=torch.long, device=x.device),
+        torch.arange(cutout_size[0], dtype=torch.long, device=x.device),
+        torch.arange(cutout_size[1], dtype=torch.long, device=x.device),
+    )
+    grid_x = torch.clamp(grid_x + offset_x - cutout_size[0] // 2, min=0, max=x.size(2) - 1)
+    grid_y = torch.clamp(grid_y + offset_y - cutout_size[1] // 2, min=0, max=x.size(3) - 1)
+    mask = torch.ones(x.size(0), x.size(2), x.size(3), dtype=x.dtype, device=x.device)
+    mask[grid_batch, grid_x, grid_y] = 0
+    x = x * mask.unsqueeze(1)
+    return x
+AUGMENT_FNS = {
+    'color': [rand_brightness, rand_saturation, rand_contrast],
+    'translation': [rand_translation],
+    'cutout': [rand_cutout],
+}

modules/layers/simswap/pg_modules/projected_discriminator.py ADDED Viewed

	@@ -0,0 +1,191 @@

+from functools import partial
+import numpy as np
+import torch
+import torch.nn as nn
+from modules.layers.simswap.pg_modules.blocks import DownBlock, DownBlockPatch, conv2d
+from modules.layers.simswap.pg_modules.projector import F_RandomProj
+from modules.layers.simswap.pg_modules.diffaug import DiffAugment
+class SingleDisc(nn.Module):
+    def __init__(self, nc=None, ndf=None, start_sz=256, end_sz=8, head=None, separable=False, patch=False):
+        super().__init__()
+        channel_dict = {4: 512, 8: 512, 16: 256, 32: 128, 64: 64, 128: 64,
+                        256: 32, 512: 16, 1024: 8}
+        # interpolate for start sz that are not powers of two
+        if start_sz not in channel_dict.keys():
+            sizes = np.array(list(channel_dict.keys()))
+            start_sz = sizes[np.argmin(abs(sizes - start_sz))]
+        self.start_sz = start_sz
+        # if given ndf, allocate all layers with the same ndf
+        if ndf is None:
+            nfc = channel_dict
+        else:
+            nfc = {k: ndf for k, v in channel_dict.items()}
+        # for feature map discriminators with nfc not in channel_dict
+        # this is the case for the pretrained backbone (midas.pretrained)
+        if nc is not None and head is None:
+            nfc[start_sz] = nc
+        layers = []
+        # Head if the initial input is the full modality
+        if head:
+            layers += [conv2d(nc, nfc[256], 3, 1, 1, bias=False),
+                       nn.LeakyReLU(0.2, inplace=True)]
+        # Down Blocks
+        DB = partial(DownBlockPatch, separable=separable) if patch else partial(DownBlock, separable=separable)
+        while start_sz > end_sz:
+            layers.append(DB(nfc[start_sz],  nfc[start_sz//2]))
+            start_sz = start_sz // 2
+        layers.append(conv2d(nfc[end_sz], 1, 4, 1, 0, bias=False))
+        self.main = nn.Sequential(*layers)
+    def forward(self, x, c):
+        return self.main(x)
+class SingleDiscCond(nn.Module):
+    def __init__(self, nc=None, ndf=None, start_sz=256, end_sz=8, head=None, separable=False, patch=False, c_dim=1000, cmap_dim=64, embedding_dim=128):
+        super().__init__()
+        self.cmap_dim = cmap_dim
+        # midas channels
+        channel_dict = {4: 512, 8: 512, 16: 256, 32: 128, 64: 64, 128: 64,
+                        256: 32, 512: 16, 1024: 8}
+        # interpolate for start sz that are not powers of two
+        if start_sz not in channel_dict.keys():
+            sizes = np.array(list(channel_dict.keys()))
+            start_sz = sizes[np.argmin(abs(sizes - start_sz))]
+        self.start_sz = start_sz
+        # if given ndf, allocate all layers with the same ndf
+        if ndf is None:
+            nfc = channel_dict
+        else:
+            nfc = {k: ndf for k, v in channel_dict.items()}
+        # for feature map discriminators with nfc not in channel_dict
+        # this is the case for the pretrained backbone (midas.pretrained)
+        if nc is not None and head is None:
+            nfc[start_sz] = nc
+        layers = []
+        # Head if the initial input is the full modality
+        if head:
+            layers += [conv2d(nc, nfc[256], 3, 1, 1, bias=False),
+                       nn.LeakyReLU(0.2, inplace=True)]
+        # Down Blocks
+        DB = partial(DownBlockPatch, separable=separable) if patch else partial(DownBlock, separable=separable)
+        while start_sz > end_sz:
+            layers.append(DB(nfc[start_sz],  nfc[start_sz//2]))
+            start_sz = start_sz // 2
+        self.main = nn.Sequential(*layers)
+        # additions for conditioning on class information
+        self.cls = conv2d(nfc[end_sz], self.cmap_dim, 4, 1, 0, bias=False)
+        self.embed = nn.Embedding(num_embeddings=c_dim, embedding_dim=embedding_dim)
+        self.embed_proj = nn.Sequential(
+            nn.Linear(self.embed.embedding_dim, self.cmap_dim),
+            nn.LeakyReLU(0.2, inplace=True),
+        )
+    def forward(self, x, c):
+        h = self.main(x)
+        out = self.cls(h)
+        # conditioning via projection
+        cmap = self.embed_proj(self.embed(c.argmax(1))).unsqueeze(-1).unsqueeze(-1)
+        out = (out * cmap).sum(dim=1, keepdim=True) * (1 / np.sqrt(self.cmap_dim))
+        return out
+class MultiScaleD(nn.Module):
+    def __init__(
+        self,
+        channels,
+        resolutions,
+        num_discs=4,
+        proj_type=2,  # 0 = no projection, 1 = cross channel mixing, 2 = cross scale mixing
+        cond=0,
+        separable=False,
+        patch=False,
+        **kwargs,
+    ):
+        super().__init__()
+        assert num_discs in [1, 2, 3, 4]
+        # the first disc is on the lowest level of the backbone
+        self.disc_in_channels = channels[:num_discs]
+        self.disc_in_res = resolutions[:num_discs]
+        Disc = SingleDiscCond if cond else SingleDisc
+        mini_discs = []
+        for i, (cin, res) in enumerate(zip(self.disc_in_channels, self.disc_in_res)):
+            start_sz = res if not patch else 16
+            mini_discs += [str(i), Disc(nc=cin, start_sz=start_sz, end_sz=8, separable=separable, patch=patch)],
+        self.mini_discs = nn.ModuleDict(mini_discs)
+    def forward(self, features, c):
+        all_logits = []
+        for k, disc in self.mini_discs.items():
+            res = disc(features[k], c).view(features[k].size(0), -1)
+            all_logits.append(res)
+        all_logits = torch.cat(all_logits, dim=1)
+        return all_logits
+class ProjectedDiscriminator(torch.nn.Module):
+    def __init__(
+        self,
+        diffaug=True,
+        interp224=True,
+        backbone_kwargs={},
+        **kwargs
+    ):
+        super().__init__()
+        self.diffaug = diffaug
+        self.interp224 = interp224
+        self.feature_network = F_RandomProj(**backbone_kwargs)
+        self.discriminator = MultiScaleD(
+            channels=self.feature_network.CHANNELS,
+            resolutions=self.feature_network.RESOLUTIONS,
+            **backbone_kwargs,
+        )
+    def train(self, mode=True):
+        self.feature_network = self.feature_network.train(False)
+        self.discriminator = self.discriminator.train(mode)
+        return self
+    def eval(self):
+        return self.train(False)
+    def get_feature(self, x):
+        features = self.feature_network(x, get_features=True)
+        return features
+    def forward(self, x, c):
+        # if self.diffaug:
+        #     x = DiffAugment(x, policy='color,translation,cutout')
+        # if self.interp224:
+        #     x = F.interpolate(x, 224, mode='bilinear', align_corners=False)
+        features,backbone_features = self.feature_network(x)
+        logits = self.discriminator(features, c)
+        return logits,backbone_features

modules/layers/simswap/pg_modules/projector.py ADDED Viewed

	@@ -0,0 +1,160 @@

+import torch
+import torch.nn as nn
+import timm
+from modules.layers.simswap.pg_modules.blocks import FeatureFusionBlock
+def _make_scratch_ccm(scratch, in_channels, cout, expand=False):
+    # shapes
+    out_channels = [cout, cout*2, cout*4, cout*8] if expand else [cout]*4
+    scratch.layer0_ccm = nn.Conv2d(in_channels[0], out_channels[0], kernel_size=1, stride=1, padding=0, bias=True)
+    scratch.layer1_ccm = nn.Conv2d(in_channels[1], out_channels[1], kernel_size=1, stride=1, padding=0, bias=True)
+    scratch.layer2_ccm = nn.Conv2d(in_channels[2], out_channels[2], kernel_size=1, stride=1, padding=0, bias=True)
+    scratch.layer3_ccm = nn.Conv2d(in_channels[3], out_channels[3], kernel_size=1, stride=1, padding=0, bias=True)
+    scratch.CHANNELS = out_channels
+    return scratch
+def _make_scratch_csm(scratch, in_channels, cout, expand):
+    scratch.layer3_csm = FeatureFusionBlock(in_channels[3], nn.ReLU(False), expand=expand, lowest=True)
+    scratch.layer2_csm = FeatureFusionBlock(in_channels[2], nn.ReLU(False), expand=expand)
+    scratch.layer1_csm = FeatureFusionBlock(in_channels[1], nn.ReLU(False), expand=expand)
+    scratch.layer0_csm = FeatureFusionBlock(in_channels[0], nn.ReLU(False))
+    # last refinenet does not expand to save channels in higher dimensions
+    scratch.CHANNELS = [cout, cout, cout*2, cout*4] if expand else [cout]*4
+    return scratch
+def _make_efficientnet(model):
+    pretrained = nn.Module()
+    pretrained.layer0 = nn.Sequential(model.conv_stem, model.bn1, model.act1, *model.blocks[0:2])
+    pretrained.layer1 = nn.Sequential(*model.blocks[2:3])
+    pretrained.layer2 = nn.Sequential(*model.blocks[3:5])
+    pretrained.layer3 = nn.Sequential(*model.blocks[5:9])
+    return pretrained
+def calc_channels(pretrained, inp_res=224):
+    channels = []
+    tmp = torch.zeros(1, 3, inp_res, inp_res)
+    # forward pass
+    tmp = pretrained.layer0(tmp)
+    channels.append(tmp.shape[1])
+    tmp = pretrained.layer1(tmp)
+    channels.append(tmp.shape[1])
+    tmp = pretrained.layer2(tmp)
+    channels.append(tmp.shape[1])
+    tmp = pretrained.layer3(tmp)
+    channels.append(tmp.shape[1])
+    return channels
+def _make_projector(im_res, cout, proj_type, expand=False):
+    assert proj_type in [0, 1, 2], "Invalid projection type"
+    ### Build pretrained feature network
+    model = timm.create_model('tf_efficientnet_lite0', pretrained=False,
+                              checkpoint_path='/gavin/code/FaceSwapping/modules/third_party/efficientnet/'
+                                              'tf_efficientnet_lite0-0aa007d2.pth')
+    pretrained = _make_efficientnet(model)
+    # determine resolution of feature maps, this is later used to calculate the number
+    # of down blocks in the discriminators. Interestingly, the best results are achieved
+    # by fixing this to 256, ie., we use the same number of down blocks per discriminator
+    # independent of the dataset resolution
+    im_res = 256
+    pretrained.RESOLUTIONS = [im_res//4, im_res//8, im_res//16, im_res//32]
+    pretrained.CHANNELS = calc_channels(pretrained)
+    if proj_type == 0: return pretrained, None
+    ### Build CCM
+    scratch = nn.Module()
+    scratch = _make_scratch_ccm(scratch, in_channels=pretrained.CHANNELS, cout=cout, expand=expand)
+    pretrained.CHANNELS = scratch.CHANNELS
+    if proj_type == 1: return pretrained, scratch
+    ### build CSM
+    scratch = _make_scratch_csm(scratch, in_channels=scratch.CHANNELS, cout=cout, expand=expand)
+    # CSM upsamples x2 so the feature map resolution doubles
+    pretrained.RESOLUTIONS = [res*2 for res in pretrained.RESOLUTIONS]
+    pretrained.CHANNELS = scratch.CHANNELS
+    return pretrained, scratch
+class F_RandomProj(nn.Module):
+    def __init__(
+        self,
+        im_res=256,
+        cout=64,
+        expand=True,
+        proj_type=2,  # 0 = no projection, 1 = cross channel mixing, 2 = cross scale mixing
+        **kwargs,
+    ):
+        super().__init__()
+        self.proj_type = proj_type
+        self.cout = cout
+        self.expand = expand
+        # build pretrained feature network and random decoder (scratch)
+        self.pretrained, self.scratch = _make_projector(im_res=im_res, cout=self.cout, proj_type=self.proj_type, expand=self.expand)
+        self.CHANNELS = self.pretrained.CHANNELS
+        self.RESOLUTIONS = self.pretrained.RESOLUTIONS
+    def forward(self, x, get_features=False):
+        # predict feature maps
+        out0 = self.pretrained.layer0(x)
+        out1 = self.pretrained.layer1(out0)
+        out2 = self.pretrained.layer2(out1)
+        out3 = self.pretrained.layer3(out2)
+        # start enumerating at the lowest layer (this is where we put the first discriminator)
+        backbone_features = {
+            '0': out0,
+            '1': out1,
+            '2': out2,
+            '3': out3,
+        }
+        if get_features:
+            return backbone_features
+        if self.proj_type == 0: return backbone_features
+        out0_channel_mixed = self.scratch.layer0_ccm(backbone_features['0'])
+        out1_channel_mixed = self.scratch.layer1_ccm(backbone_features['1'])
+        out2_channel_mixed = self.scratch.layer2_ccm(backbone_features['2'])
+        out3_channel_mixed = self.scratch.layer3_ccm(backbone_features['3'])
+        out = {
+            '0': out0_channel_mixed,
+            '1': out1_channel_mixed,
+            '2': out2_channel_mixed,
+            '3': out3_channel_mixed,
+        }
+        if self.proj_type == 1: return out
+        # from bottom to top
+        out3_scale_mixed = self.scratch.layer3_csm(out3_channel_mixed)
+        out2_scale_mixed = self.scratch.layer2_csm(out3_scale_mixed, out2_channel_mixed)
+        out1_scale_mixed = self.scratch.layer1_csm(out2_scale_mixed, out1_channel_mixed)
+        out0_scale_mixed = self.scratch.layer0_csm(out1_scale_mixed, out0_channel_mixed)
+        out = {
+            '0': out0_scale_mixed,
+            '1': out1_scale_mixed,
+            '2': out2_scale_mixed,
+            '3': out3_scale_mixed,
+        }
+        return out, backbone_features

modules/layers/smoothswap/id_embedder.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from modules.layers.smoothswap.resnet import resnet50
+class IdentityHead(nn.Module):
+    def __init__(self):
+        super(IdentityHead, self).__init__()
+        self.fc1 = nn.Sequential(
+            nn.Linear(512 * 4, 1024),
+            nn.BatchNorm1d(num_features=1024),
+            nn.LeakyReLU(negative_slope=0.2, inplace=True)
+        )
+        self.fc2 = nn.Sequential(
+            nn.Linear(1024, 512),
+            nn.BatchNorm1d(num_features=512)
+        )
+        for m in self.modules():
+            if isinstance(m, (nn.BatchNorm2d,)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.fc2(x)
+        x = F.normalize(x)
+        return x
+class IdentityEmbedder(nn.Module):
+    def __init__(self):
+        super(IdentityEmbedder, self).__init__()
+        self.backbone = resnet50(pretrained=False)
+        self.head = IdentityHead()
+    def forward(self, x_src):
+        x_src = self.backbone(x_src)
+        x_src = self.head(x_src)
+        return x_src
+if __name__ == '__main__':
+    img = torch.randn((11, 3, 256, 256)).cuda()
+    net = IdentityEmbedder().cuda()
+    out = net(img)
+    print(out.shape)

modules/layers/smoothswap/resnet.py ADDED Viewed

	@@ -0,0 +1,359 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+# from .utils import load_state_dict_from_url
+__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
+           'resnet152', 'resnext50_32x4d', 'resnext101_32x8d',
+           'wide_resnet50_2', 'wide_resnet101_2']
+model_urls = {
+    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
+    'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
+    'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
+    'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth',
+    'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth',
+}
+def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=dilation, groups=groups, bias=False, dilation=dilation)
+def conv1x1(in_planes, out_planes, stride=1):
+    """1x1 convolution"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+                 base_width=64, dilation=1, norm_layer=None):
+        super(BasicBlock, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        if groups != 1 or base_width != 64:
+            raise ValueError('BasicBlock only supports groups=1 and base_width=64')
+        if dilation > 1:
+            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
+        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = norm_layer(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = norm_layer(planes)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+class Bottleneck(nn.Module):
+    # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
+    # while original implementation places the stride at the first 1x1 convolution(self.conv1)
+    # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
+    # This variant is also known as ResNet V1.5 and improves accuracy according to
+    # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+                 base_width=64, dilation=1, norm_layer=None):
+        super(Bottleneck, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        width = int(planes * (base_width / 64.)) * groups
+        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv1x1(inplanes, width)
+        self.bn1 = norm_layer(width)
+        self.conv2 = conv3x3(width, width, stride, groups, dilation)
+        self.bn2 = norm_layer(width)
+        self.conv3 = conv1x1(width, planes * self.expansion)
+        self.bn3 = norm_layer(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+class ResNet(nn.Module):
+    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
+                 groups=1, width_per_group=64, replace_stride_with_dilation=None,
+                 norm_layer=None):
+        super(ResNet, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        self._norm_layer = norm_layer
+        self.inplanes = 64
+        self.dilation = 1
+        if replace_stride_with_dilation is None:
+            # each element in the tuple indicates if we should replace
+            # the 2x2 stride with a dilated convolution instead
+            replace_stride_with_dilation = [False, False, False]
+        if len(replace_stride_with_dilation) != 3:
+            raise ValueError("replace_stride_with_dilation should be None "
+                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
+        self.groups = groups
+        self.base_width = width_per_group
+        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = norm_layer(self.inplanes)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
+                                       dilate=replace_stride_with_dilation[0])
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
+                                       dilate=replace_stride_with_dilation[1])
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
+                                       dilate=replace_stride_with_dilation[2])
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        ''' head '''
+        # op1. vanilla ResNet
+        # self.fc = nn.Linear(512 * block.expansion, num_classes)
+        # op2. smooth-swap resnet
+        # FC is defined in id_embedder.py
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+        # Zero-initialize the last BN in each residual branch,
+        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
+        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
+        if zero_init_residual:
+            for m in self.modules():
+                if isinstance(m, Bottleneck):
+                    nn.init.constant_(m.bn3.weight, 0)
+                elif isinstance(m, BasicBlock):
+                    nn.init.constant_(m.bn2.weight, 0)
+    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
+        norm_layer = self._norm_layer
+        downsample = None
+        previous_dilation = self.dilation
+        if dilate:
+            self.dilation *= stride
+            stride = 1
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                norm_layer(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
+                            self.base_width, previous_dilation, norm_layer))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes, groups=self.groups,
+                                base_width=self.base_width, dilation=self.dilation,
+                                norm_layer=norm_layer))
+        return nn.Sequential(*layers)
+    def _forward_impl(self, x):
+        # See note [TorchScript super()]
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.avgpool(x)
+        x = torch.flatten(x, 1)
+        return x
+    def forward(self, x):
+        return self._forward_impl(x)
+def _resnet(arch, block, layers, pretrained, progress, **kwargs):
+    model = ResNet(block, layers, **kwargs)
+    if pretrained:
+        state_dict = load_state_dict_from_url(model_urls[arch],
+                                              progress=progress)
+        model.load_state_dict(state_dict)
+    return model
+def resnet18(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-18 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress,
+                   **kwargs)
+def resnet34(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-34 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress,
+                   **kwargs)
+def resnet50(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-50 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
+                   **kwargs)
+def resnet101(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-101 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress,
+                   **kwargs)
+def resnet152(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-152 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress,
+                   **kwargs)
+def resnext50_32x4d(pretrained=False, progress=True, **kwargs):
+    r"""ResNeXt-50 32x4d model from
+    `"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['groups'] = 32
+    kwargs['width_per_group'] = 4
+    return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],
+                   pretrained, progress, **kwargs)
+def resnext101_32x8d(pretrained=False, progress=True, **kwargs):
+    r"""ResNeXt-101 32x8d model from
+    `"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['groups'] = 32
+    kwargs['width_per_group'] = 8
+    return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
+                   pretrained, progress, **kwargs)
+def wide_resnet50_2(pretrained=False, progress=True, **kwargs):
+    r"""Wide ResNet-50-2 model from
+    `"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
+    The model is the same as ResNet except for the bottleneck number of channels
+    which is twice larger in every block. The number of channels in outer 1x1
+    convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
+    channels, and in Wide ResNet-50-2 has 2048-1024-2048.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['width_per_group'] = 64 * 2
+    return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3],
+                   pretrained, progress, **kwargs)
+def wide_resnet101_2(pretrained=False, progress=True, **kwargs):
+    r"""Wide ResNet-101-2 model from
+    `"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
+    The model is the same as ResNet except for the bottleneck number of channels
+    which is twice larger in every block. The number of channels in outer 1x1
+    convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
+    channels, and in Wide ResNet-50-2 has 2048-1024-2048.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['width_per_group'] = 64 * 2
+    return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3],
+                   pretrained, progress, **kwargs)

modules/networks/faceshifter.py ADDED Viewed

	@@ -0,0 +1,162 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import os
+import kornia
+import warnings
+from modules.layers.faceshifter.layers import AEI_Net
+from modules.layers.faceshifter.hear_layers import Hear_Net
+from third_party.arcface import iresnet100, MouthNet
+make_abs_path = lambda fn: os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), fn))
+class FSGenerator(nn.Module):
+    def __init__(self,
+                 id_ckpt: str = None,
+                 id_dim: int = 512,
+                 mouth_net_param: dict = None,
+                 in_size: int = 256,
+                 finetune: bool = False,
+                 downup: bool = False,
+                 ):
+        super(FSGenerator, self).__init__()
+        ''' MouthNet '''
+        self.use_mouth_net = mouth_net_param.get('use')
+        self.mouth_feat_dim = 0
+        self.mouth_net = None
+        if self.use_mouth_net:
+            self.mouth_feat_dim = mouth_net_param.get('feature_dim')
+            self.mouth_crop_param = mouth_net_param.get('crop_param')
+            mouth_weight_path = make_abs_path(mouth_net_param.get('weight_path'))
+            self.mouth_net = MouthNet(
+                bisenet=None,
+                feature_dim=self.mouth_feat_dim,
+                crop_param=self.mouth_crop_param
+            )
+            self.mouth_net.load_backbone(mouth_weight_path)
+            print("[FaceShifter Generator] MouthNet loaded from %s" % mouth_weight_path)
+            self.mouth_net.eval()
+            self.mouth_net.requires_grad_(False)
+        self.G = AEI_Net(c_id=id_dim + self.mouth_feat_dim, finetune=finetune, downup=downup)
+        self.iresnet = iresnet100()
+        if not id_ckpt is None:
+            self.iresnet.load_state_dict(torch.load(id_ckpt, "cpu"))
+        else:
+            warnings.warn("Face ID backbone [%s] not found!" % id_ckpt)
+            raise FileNotFoundError("Face ID backbone [%s] not found!" % id_ckpt)
+        self.iresnet.eval()
+        self.register_buffer(
+            name="trans_matrix",
+            tensor=torch.tensor(
+                [
+                    [
+                        [1.07695457, -0.03625215, -1.56352194 * (in_size / 256)],
+                        [0.03625215, 1.07695457, -5.32134629 * (in_size / 256)],
+                    ]
+                ],
+                requires_grad=False,
+            ).float(),
+        )
+        self.in_size = in_size
+        self.iresnet.requires_grad_(False)
+    def forward(self, source, target, infer=False):
+        with torch.no_grad():
+            ''' 1. get id '''
+            if infer:
+                resize_input = F.interpolate(source, size=112, mode="bilinear", align_corners=True)
+                id_vector = F.normalize(self.iresnet(resize_input), dim=-1, p=2)
+            else:
+                M = self.trans_matrix.repeat(source.size()[0], 1, 1)
+                source = kornia.geometry.transform.warp_affine(source, M, (self.in_size, self.in_size))
+                # import cv2
+                # from tricks import Trick
+                # cv2.imwrite('warpped_source.png', Trick.tensor_to_arr(source)[0, :, :, ::-1])
+                resize_input = F.interpolate(source, size=112, mode="bilinear", align_corners=True)
+                id_vector = F.normalize(self.iresnet(resize_input), dim=-1, p=2)
+            ''' 2. get mouth feature '''
+            if self.use_mouth_net:
+                w1, h1, w2, h2 = self.mouth_crop_param
+                mouth_input = resize_input[:, :, h1:h2, w1:w2]  # 112->mouth
+                mouth_feat = self.mouth_net(mouth_input)
+                id_vector = torch.cat([id_vector, mouth_feat], dim=-1)  # (B,dim_id+dim_mouth)
+        x, att = self.G(target, id_vector)
+        return x, id_vector, att
+    def get_recon(self):
+        return self.G.get_recon_tensor()
+    def get_att(self, x):
+        return self.G.get_attr(x)
+class FSHearNet(nn.Module):
+    def __init__(self, aei_path: str):
+        super(FSHearNet, self).__init__()
+        ''' Stage I. AEI_Net '''
+        self.aei = FSGenerator(
+            id_ckpt=make_abs_path("../../modules/third_party/arcface/weights/ms1mv3_arcface_r100_fp16/backbone.pth")
+        ).requires_grad_(False)
+        print('Loading pre-trained AEI-Net from %s...' % aei_path)
+        self._load_pretrained_aei(aei_path)
+        print('Loaded.')
+        ''' Stage II. HEAR_Net '''
+        self.hear = Hear_Net()
+    def _load_pretrained_aei(self, path: str):
+        if '.ckpt' in path:
+            from trainer.faceshifter.extract_ckpt import extract_generator
+            pth_folder = make_abs_path('../../trainer/faceshifter/extracted_ckpt')
+            pth_name = 'hear_tmp.pth'
+            assert '.pth' in pth_name
+            state_dict = extract_generator(load_path=path, path=os.path.join(pth_folder, pth_name))
+            self.aei.load_state_dict(state_dict, strict=False)
+            self.aei.eval()
+        elif '.pth' in path:
+            self.aei.load_state_dict(torch.load(path, "cpu"), strict=False)
+            self.aei.eval()
+        else:
+            raise FileNotFoundError('%s (.ckpt or .pth) not found.' % path)
+    def forward(self, source, target):
+        with torch.no_grad():
+            y_hat_st, _, _ = self.aei(source, target, infer=True)
+            y_hat_tt, _, _ = self.aei(target, target, infer=True)
+            delta_y_t = target - y_hat_tt
+            y_cat = torch.cat([y_hat_st, delta_y_t], dim=1)  # (B,6,256,256)
+        y_st = self.hear(y_cat)
+        return y_st, y_hat_st  # both (B,3,256,256)
+if __name__ == '__main__':
+    source = torch.randn(8, 3, 512, 512)
+    target = torch.randn(8, 3, 512, 512)
+    net = FSGenerator(
+        id_ckpt="/apdcephfs_cq2/share_1290939/gavinyuan/code/FaceShifter/faceswap/faceswap/checkpoints/"
+                "face_id/ms1mv3_arcface_r100_fp16_backbone.pth",
+        mouth_net_param={
+            'use': False
+        }
+    )
+    result, _, _ = net(source, target)
+    print('result:', result.shape)
+    # stage2 = FSHearNet(
+    #     aei_path=make_abs_path("../../trainer/faceshifter/out/faceshifter_vanilla/epoch=32-step=509999.ckpt")
+    # )
+    # final_out, _ = stage2(source, target)
+    # print('final out:', final_out.shape)

modules/networks/simswap.py ADDED Viewed

	@@ -0,0 +1,230 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+#############################################################
+# File: fs_model_fix_idnorm_donggp_saveoptim copy.py
+# Created Date: Wednesday January 12th 2022
+# Author: Chen Xuanhong
+# Email: chenxuanhongzju@outlook.com
+# Last Modified:  Thursday, 21st April 2022 8:13:37 pm
+# Modified By: Chen Xuanhong
+# Copyright (c) 2022 Shanghai Jiao Tong University
+#############################################################
+import torch
+import torch.nn as nn
+from modules.layers.simswap.base_model import BaseModel
+from modules.layers.simswap.fs_networks_fix import Generator_Adain_Upsample
+from modules.layers.simswap.pg_modules.projected_discriminator import ProjectedDiscriminator
+def compute_grad2(d_out, x_in):
+    batch_size = x_in.size(0)
+    grad_dout = torch.autograd.grad(
+        outputs=d_out.sum(), inputs=x_in,
+        create_graph=True, retain_graph=True, only_inputs=True
+    )[0]
+    grad_dout2 = grad_dout.pow(2)
+    assert(grad_dout2.size() == x_in.size())
+    reg = grad_dout2.view(batch_size, -1).sum(1)
+    return reg
+class fsModel(BaseModel):
+    def name(self):
+        return 'fsModel'
+    def initialize(self, opt):
+        BaseModel.initialize(self, opt)
+        # if opt.resize_or_crop != 'none' or not opt.isTrain:  # when training at full res this causes OOM
+        self.isTrain = opt.isTrain
+        # Generator network
+        self.netG = Generator_Adain_Upsample(input_nc=3, output_nc=3, latent_size=512, n_blocks=9, deep=opt.Gdeep)
+        self.netG.cuda()
+        # Id network
+        from third_party.arcface import iresnet100
+        netArc_pth = "/apdcephfs_cq2/share_1290939/gavinyuan/code/FaceShifter/faceswap/faceswap/" \
+                     "checkpoints/face_id/ms1mv3_arcface_r100_fp16_backbone.pth"  #opt.Arc_path
+        self.netArc = iresnet100(pretrained=False, fp16=False)
+        self.netArc.load_state_dict(torch.load(netArc_pth, map_location="cpu"))
+        # netArc_checkpoint = opt.Arc_path
+        # netArc_checkpoint = torch.load(netArc_checkpoint, map_location=torch.device("cpu"))
+        # self.netArc = netArc_checkpoint['model'].module
+        self.netArc = self.netArc.cuda()
+        self.netArc.eval()
+        self.netArc.requires_grad_(False)
+        if not self.isTrain:
+            pretrained_path =  opt.checkpoints_dir
+            self.load_network(self.netG, 'G', opt.which_epoch, pretrained_path)
+            return
+        self.netD = ProjectedDiscriminator(diffaug=False, interp224=False, **{})
+        # self.netD.feature_network.requires_grad_(False)
+        self.netD.cuda()
+        if self.isTrain:
+            # define loss functions
+            self.criterionFeat  = nn.L1Loss()
+            self.criterionRec   = nn.L1Loss()
+            # initialize optimizers
+            # optimizer G
+            params = list(self.netG.parameters())
+            self.optimizer_G = torch.optim.Adam(params, lr=opt.lr, betas=(opt.beta1, 0.99),eps=1e-8)
+            # optimizer D
+            params = list(self.netD.parameters())
+            self.optimizer_D = torch.optim.Adam(params, lr=opt.lr, betas=(opt.beta1, 0.99),eps=1e-8)
+        # load networks
+        if opt.continue_train:
+            pretrained_path = '' if not self.isTrain else opt.load_pretrain
+            # print (pretrained_path)
+            self.load_network(self.netG, 'G', opt.which_epoch, pretrained_path)
+            self.load_network(self.netD, 'D', opt.which_epoch, pretrained_path)
+            self.load_optim(self.optimizer_G, 'G', opt.which_epoch, pretrained_path)
+            self.load_optim(self.optimizer_D, 'D', opt.which_epoch, pretrained_path)
+        torch.cuda.empty_cache()
+    def cosin_metric(self, x1, x2):
+        #return np.dot(x1, x2) / (np.linalg.norm(x1) * np.linalg.norm(x2))
+        return torch.sum(x1 * x2, dim=1) / (torch.norm(x1, dim=1) * torch.norm(x2, dim=1))
+    def save(self, which_epoch):
+        self.save_network(self.netG, 'G', which_epoch)
+        self.save_network(self.netD, 'D', which_epoch)
+        self.save_optim(self.optimizer_G, 'G', which_epoch)
+        self.save_optim(self.optimizer_D, 'D', which_epoch)
+        '''if self.gen_features:
+            self.save_network(self.netE, 'E', which_epoch, self.gpu_ids)'''
+    def update_fixed_params(self):
+        raise ValueError('Not used')
+        # after fixing the global generator for a number of iterations, also start finetuning it
+        params = list(self.netG.parameters())
+        if self.gen_features:
+            params += list(self.netE.parameters())
+        self.optimizer_G = torch.optim.Adam(params, lr=self.opt.lr, betas=(self.opt.beta1, 0.999))
+        if self.opt.verbose:
+            print('------------ Now also finetuning global generator -----------')
+    def update_learning_rate(self):
+        raise ValueError('Not used')
+        lrd = self.opt.lr / self.opt.niter_decay
+        lr = self.old_lr - lrd
+        for param_group in self.optimizer_D.param_groups:
+            param_group['lr'] = lr
+        for param_group in self.optimizer_G.param_groups:
+            param_group['lr'] = lr
+        if self.opt.verbose:
+            print('update learning rate: %f -> %f' % (self.old_lr, lr))
+        self.old_lr = lr
+if __name__ == "__main__":
+    import os
+    import argparse
+    def str2bool(v):
+        return v.lower() in ('true')
+    class TrainOptions:
+        def __init__(self):
+            self.parser = argparse.ArgumentParser()
+            self.initialized = False
+        def initialize(self):
+            self.parser.add_argument('--name', type=str, default='simswap',
+                                     help='name of the experiment. It decides where to store samples and models')
+            self.parser.add_argument('--gpu_ids', default='0')
+            self.parser.add_argument('--checkpoints_dir', type=str, default='./checkpoints',
+                                     help='models are saved here')
+            self.parser.add_argument('--isTrain', type=str2bool, default='True')
+            # input/output sizes
+            self.parser.add_argument('--batchSize', type=int, default=8, help='input batch size')
+            # for displays
+            self.parser.add_argument('--use_tensorboard', type=str2bool, default='False')
+            # for training
+            self.parser.add_argument('--dataset', type=str, default="/path/to/VGGFace2",
+                                     help='path to the face swapping dataset')
+            self.parser.add_argument('--continue_train', type=str2bool, default='False',
+                                     help='continue training: load the latest model')
+            self.parser.add_argument('--load_pretrain', type=str, default='./checkpoints/simswap224_test',
+                                     help='load the pretrained model from the specified location')
+            self.parser.add_argument('--which_epoch', type=str, default='10000',
+                                     help='which epoch to load? set to latest to use latest cached model')
+            self.parser.add_argument('--phase', type=str, default='train', help='train, val, test, etc')
+            self.parser.add_argument('--niter', type=int, default=10000, help='# of iter at starting learning rate')
+            self.parser.add_argument('--niter_decay', type=int, default=10000,
+                                     help='# of iter to linearly decay learning rate to zero')
+            self.parser.add_argument('--beta1', type=float, default=0.0, help='momentum term of adam')
+            self.parser.add_argument('--lr', type=float, default=0.0004, help='initial learning rate for adam')
+            self.parser.add_argument('--Gdeep', type=str2bool, default='False')
+            # for discriminators
+            self.parser.add_argument('--lambda_feat', type=float, default=10.0, help='weight for feature matching loss')
+            self.parser.add_argument('--lambda_id', type=float, default=30.0, help='weight for id loss')
+            self.parser.add_argument('--lambda_rec', type=float, default=10.0, help='weight for reconstruction loss')
+            self.parser.add_argument("--Arc_path", type=str, default='arcface_model/arcface_checkpoint.tar',
+                                     help="run ONNX model via TRT")
+            self.parser.add_argument("--total_step", type=int, default=1000000, help='total training step')
+            self.parser.add_argument("--log_frep", type=int, default=200, help='frequence for printing log information')
+            self.parser.add_argument("--sample_freq", type=int, default=1000, help='frequence for sampling')
+            self.parser.add_argument("--model_freq", type=int, default=10000, help='frequence for saving the model')
+            self.isTrain = True
+        def parse(self, save=True):
+            if not self.initialized:
+                self.initialize()
+            self.opt = self.parser.parse_args()
+            self.opt.isTrain = self.isTrain  # train or test
+            args = vars(self.opt)
+            print('------------ Options -------------')
+            for k, v in sorted(args.items()):
+                print('%s: %s' % (str(k), str(v)))
+            print('-------------- End ----------------')
+            # save to the disk
+            # if self.opt.isTrain:
+            #     expr_dir = os.path.join(self.opt.checkpoints_dir, self.opt.name)
+            #     util.mkdirs(expr_dir)
+            #     if save and not self.opt.continue_train:
+            #         file_name = os.path.join(expr_dir, 'opt.txt')
+            #         with open(file_name, 'wt') as opt_file:
+            #             opt_file.write('------------ Options -------------\n')
+            #             for k, v in sorted(args.items()):
+            #                 opt_file.write('%s: %s\n' % (str(k), str(v)))
+            #             opt_file.write('-------------- End ----------------\n')
+            return self.opt
+    source = torch.randn(8, 3, 256, 256).cuda()
+    target = torch.randn(8, 3, 256, 256).cuda()
+    opt = TrainOptions().parse()
+    model = fsModel()
+    model.initialize(opt)
+    import torch.nn.functional as F
+    img_id_112 = F.interpolate(source, size=(112, 112), mode='bicubic')
+    latent_id = model.netArc(img_id_112)
+    latent_id = F.normalize(latent_id, p=2, dim=1)
+    img_fake = model.netG(target, latent_id)
+    gen_logits, _ = model.netD(img_fake.detach(), None)
+    loss_Dgen = (F.relu(torch.ones_like(gen_logits) + gen_logits)).mean()
+    real_logits, _ = model.netD(source, None)
+    print('img_fake:', img_fake.shape, 'real_logits:', real_logits.shape)

third_party/arcface/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from third_party.arcface.iresnet import iresnet18, iresnet34, iresnet50, iresnet100
2	+ from third_party.arcface.mouth_net import MouthNet

third_party/arcface/dataloaderx.py ADDED Viewed

	@@ -0,0 +1,67 @@

+"""
+A copy from https://github.com/deepinsight/insightface/blob/master/recognition/arcface_torch/dataset.py
+"""
+import queue as Queue
+import threading
+import torch
+from torch.utils.data import DataLoader
+class BackgroundGenerator(threading.Thread):
+    def __init__(self, generator, local_rank, max_prefetch=6):
+        super(BackgroundGenerator, self).__init__()
+        self.queue = Queue.Queue(max_prefetch)
+        self.generator = generator
+        self.local_rank = local_rank
+        self.daemon = True
+        self.start()
+    def run(self):
+        torch.cuda.set_device(self.local_rank)
+        for item in self.generator:
+            self.queue.put(item)
+        self.queue.put(None)
+    def next(self):
+        next_item = self.queue.get()
+        if next_item is None:
+            raise StopIteration
+        return next_item
+    def __next__(self):
+        return self.next()
+    def __iter__(self):
+        return self
+class DataLoaderX(DataLoader):
+    def __init__(self, local_rank, **kwargs):
+        super(DataLoaderX, self).__init__(**kwargs)
+        self.stream = torch.cuda.Stream(local_rank)
+        self.local_rank = local_rank
+    def __iter__(self):
+        self.iter = super(DataLoaderX, self).__iter__()
+        self.iter = BackgroundGenerator(self.iter, self.local_rank)
+        self.preload()
+        return self
+    def preload(self):
+        self.batch = next(self.iter, None)
+        if self.batch is None:
+            return None
+        with torch.cuda.stream(self.stream):
+            for k in range(len(self.batch)):
+                self.batch[k] = self.batch[k].to(device=self.local_rank,
+                                                 non_blocking=True)
+    def __next__(self):
+        torch.cuda.current_stream().wait_stream(self.stream)
+        batch = self.batch
+        if batch is None:
+            raise StopIteration
+        self.preload()
+        return batch

third_party/arcface/iresnet.py ADDED Viewed

	@@ -0,0 +1,311 @@

+import torch
+from torch import nn
+__all__ = ["iresnet18", "iresnet34", "iresnet50", "iresnet100", "iresnet200"]
+def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(
+        in_planes,
+        out_planes,
+        kernel_size=3,
+        stride=stride,
+        padding=dilation,
+        groups=groups,
+        bias=False,
+        dilation=dilation,
+    )
+def conv1x1(in_planes, out_planes, stride=1):
+    """1x1 convolution"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+class IBasicBlock(nn.Module):
+    expansion = 1
+    def __init__(
+        self,
+        inplanes,
+        planes,
+        stride=1,
+        downsample=None,
+        groups=1,
+        base_width=64,
+        dilation=1,
+    ):
+        super(IBasicBlock, self).__init__()
+        if groups != 1 or base_width != 64:
+            raise ValueError("BasicBlock only supports groups=1 and base_width=64")
+        if dilation > 1:
+            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
+        self.bn1 = nn.BatchNorm2d(inplanes, eps=1e-05,)
+        self.conv1 = conv3x3(inplanes, planes)
+        self.bn2 = nn.BatchNorm2d(planes, eps=1e-05,)
+        self.prelu = nn.PReLU(planes)
+        self.conv2 = conv3x3(planes, planes, stride)
+        self.bn3 = nn.BatchNorm2d(planes, eps=1e-05,)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.bn1(x)
+        out = self.conv1(out)
+        out = self.bn2(out)
+        out = self.prelu(out)
+        out = self.conv2(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        return out
+class IResNet(nn.Module):
+    def __init__(
+        self,
+        block,
+        layers,
+        dropout=0,
+        num_features=512,
+        zero_init_residual=False,
+        groups=1,
+        width_per_group=64,
+        replace_stride_with_dilation=None,
+        fp16=False,
+        fc_scale = 7 * 7,
+    ):
+        super(IResNet, self).__init__()
+        self.fp16 = fp16
+        self.inplanes = 64
+        self.dilation = 1
+        self.fc_scale = fc_scale
+        if replace_stride_with_dilation is None:
+            replace_stride_with_dilation = [False, False, False]
+        if len(replace_stride_with_dilation) != 3:
+            raise ValueError(
+                "replace_stride_with_dilation should be None "
+                "or a 3-element tuple, got {}".format(replace_stride_with_dilation)
+            )
+        self.groups = groups
+        self.base_width = width_per_group
+        self.conv1 = nn.Conv2d(
+            3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False
+        )
+        self.bn1 = nn.BatchNorm2d(self.inplanes, eps=1e-05)
+        self.prelu = nn.PReLU(self.inplanes)
+        self.layer1 = self._make_layer(block, 64, layers[0], stride=2)
+        self.layer2 = self._make_layer(
+            block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0]
+        )
+        self.layer3 = self._make_layer(
+            block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1]
+        )
+        self.layer4 = self._make_layer(
+            block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2]
+        )
+        self.bn2 = nn.BatchNorm2d(512 * block.expansion, eps=1e-05,)
+        self.dropout = nn.Dropout(p=dropout, inplace=True)
+        self.fc = nn.Linear(512 * block.expansion * self.fc_scale, num_features)
+        self.features = nn.BatchNorm1d(num_features, eps=1e-05)
+        nn.init.constant_(self.features.weight, 1.0)
+        self.features.weight.requires_grad = False
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.normal_(m.weight, 0, 0.1)
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+        if zero_init_residual:
+            for m in self.modules():
+                if isinstance(m, IBasicBlock):
+                    nn.init.constant_(m.bn2.weight, 0)
+    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
+        downsample = None
+        previous_dilation = self.dilation
+        if dilate:
+            self.dilation *= stride
+            stride = 1
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                nn.BatchNorm2d(planes * block.expansion, eps=1e-05,),
+            )
+        layers = []
+        layers.append(
+            block(
+                self.inplanes,
+                planes,
+                stride,
+                downsample,
+                self.groups,
+                self.base_width,
+                previous_dilation,
+            )
+        )
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(
+                block(
+                    self.inplanes,
+                    planes,
+                    groups=self.groups,
+                    base_width=self.base_width,
+                    dilation=self.dilation,
+                )
+            )
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        with torch.cuda.amp.autocast(self.fp16):
+            x = self.conv1(x)
+            x = self.bn1(x)
+            x = self.prelu(x)
+            x = self.layer1(x)
+            x = self.layer2(x)
+            x = self.layer3(x)
+            x = self.layer4(x)
+            x = self.bn2(x)
+            # print(x.shape)
+            x = torch.flatten(x, 1)
+            x = self.dropout(x)
+        x = self.fc(x.float() if self.fp16 else x)
+        x = self.features(x)
+        return x
+def _iresnet(arch, block, layers, pretrained, progress, **kwargs):
+    model = IResNet(block, layers, **kwargs)
+    if pretrained:
+        model_dir = {
+            'iresnet18': './weights/r18-backbone.pth',
+            'iresnet34': './weights/r34-backbone.pth',
+            'iresnet50': './weights/r50-backbone.pth',
+            'iresnet100': './weights/r100-backbone.pth',
+        }
+        pre_trained_weights = torch.load(model_dir[arch], map_location=torch.device('cpu'))
+        tmp_dict = {}
+        for key in pre_trained_weights:
+            # if 'features' in key or 'fc' in key:
+            #     print('skip %s' % key)
+            #     continue
+            tmp_dict[key] = pre_trained_weights[key]
+        # get 'iresnet' model layers which don't exist in 'arcxx' and insert to tmp
+        model_dict = model.state_dict()
+        for key in model_dict:
+            if key not in tmp_dict:
+                tmp_dict[key] = model_dict[key]
+        model.load_state_dict(tmp_dict, strict=False)
+        print("load pre-trained iresnet from %s" % model_dir[arch])
+    return model
+def iresnet18(pretrained=False, progress=True, **kwargs):
+    return _iresnet(
+        "iresnet18", IBasicBlock, [2, 2, 2, 2], pretrained, progress, **kwargs
+    )
+def iresnet34(pretrained=False, progress=True, **kwargs):
+    return _iresnet(
+        "iresnet34", IBasicBlock, [3, 4, 6, 3], pretrained, progress, **kwargs
+    )
+def iresnet50(pretrained=False, progress=True, **kwargs):
+    return _iresnet(
+        "iresnet50", IBasicBlock, [3, 4, 14, 3], pretrained, progress, **kwargs
+    )
+def iresnet100(pretrained=False, progress=True, **kwargs):
+    return _iresnet(
+        "iresnet100", IBasicBlock, [3, 13, 30, 3], pretrained, progress, **kwargs
+    )
+def iresnet200(pretrained=False, progress=True, **kwargs):
+    return _iresnet(
+        "iresnet200", IBasicBlock, [6, 26, 60, 6], pretrained, progress, **kwargs
+    )
+@torch.no_grad()
+def identification(folder: str = './images', target_idx: int = 0):
+    import os
+    from PIL import Image
+    import torch
+    import torchvision.transforms as transforms
+    import torch.nn.functional as F
+    import kornia
+    import numpy as np
+    os.makedirs('crop', exist_ok=True)
+    img_list = os.listdir(folder)
+    img_list.sort()
+    n = len(img_list)
+    trans = transforms.Compose([
+        transforms.Resize(256),
+        transforms.CenterCrop(224),
+        transforms.ToTensor(),
+        # transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
+        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+    ])
+    trans_matrix = torch.tensor(
+            [[[1.07695457, -0.03625215, -1.56352194],
+               [0.03625215, 1.07695457, -5.32134629]]],
+            requires_grad=False).float().cuda()
+    fid_model = iresnet50(pretrained=True).cuda().eval()
+    def save_tensor_to_img(tensor: torch.Tensor, path: str, scale=255):
+        tensor = tensor.permute(0, 2, 3, 1)[0]  # in [0,1]
+        tensor = tensor.clamp(0, 1)
+        tensor = tensor * scale
+        tensor_np = tensor.cpu().numpy().astype(np.uint8)
+        if tensor_np.shape[-1] == 1:  # channel dim
+            tensor_np = tensor_np.repeat(3, axis=-1)
+        tensor_img = Image.fromarray(tensor_np)
+        tensor_img.save(path)
+    feats = torch.zeros((n, 512), dtype=torch.float32).cuda()
+    for idx, img_path in enumerate(img_list):
+        img_pil = Image.open(os.path.join(folder, img_path)).convert('RGB')
+        img_tensor = trans(img_pil).unsqueeze(0).cuda()
+        # img_tensor = kornia.geometry.transform.warp_affine(img_tensor, trans_matrix, (256, 256))
+        save_tensor_to_img(img_tensor / 2 + 0.5, path=os.path.join('./crop', img_path))
+        img_tensor = F.interpolate(img_tensor, size=112, mode="bilinear", align_corners=True)  # to 112
+        feat = fid_model(img_tensor)
+        feats[idx] = feat
+    target_feat = feats[target_idx].unsqueeze(0)
+    cosine_sim = F.cosine_similarity(target_feat, feats, 1)
+    print(cosine_sim.shape)
+    print('====== similarity with %s ======' % img_list[target_idx])
+    for idx in range(n):
+        print('[%d] %s = %.2f' % (idx, img_list[idx], float(cosine_sim[idx].cpu())))
+if __name__ == '__main__':
+    import argparse
+    parser = argparse.ArgumentParser(description="arcface")
+    parser.add_argument("-i", "--target_idx", type=int, default=0)
+    args = parser.parse_args()
+    identification(target_idx=args.target_idx)

third_party/arcface/load_dataset.py ADDED Viewed

	@@ -0,0 +1,202 @@

+import os
+import numbers
+import torch
+import mxnet as mx
+from PIL import Image
+from torch.utils import data
+from torchvision import transforms
+import numpy as np
+import PIL.Image as Image
+""" Original mxnet dataset
+"""
+class MXFaceDataset(data.Dataset):
+    def __init__(self, root_dir, crop_param=(0, 0, 112, 112)):
+        super(MXFaceDataset, self,).__init__()
+        self.transform = transforms.Compose([
+             # transforms.ToPILImage(),
+             transforms.RandomHorizontalFlip(),
+             transforms.ToTensor(),
+             transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
+             ])
+        self.root_dir = root_dir
+        self.crop_param = crop_param
+        path_imgrec = os.path.join(root_dir, 'train.rec')
+        path_imgidx = os.path.join(root_dir, 'train.idx')
+        self.imgrec = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r')
+        s = self.imgrec.read_idx(0)
+        header, _ = mx.recordio.unpack(s)
+        if header.flag > 0:
+            self.header0 = (int(header.label[0]), int(header.label[1]))
+            self.imgidx = np.array(range(1, int(header.label[0])))
+        else:
+            self.imgidx = np.array(list(self.imgrec.keys))
+    def __getitem__(self, index):
+        idx = self.imgidx[index]
+        s = self.imgrec.read_idx(idx)
+        header, img = mx.recordio.unpack(s)
+        label = header.label
+        if not isinstance(label, numbers.Number):
+            label = label[0]
+        label = torch.tensor(label, dtype=torch.long)
+        sample = mx.image.imdecode(img).asnumpy()
+        if self.transform is not None:
+            sample: Image = transforms.ToPILImage()(sample)
+            sample = sample.crop(self.crop_param)
+            sample = self.transform(sample)
+        return sample, label
+    def __len__(self):
+        return len(self.imgidx)
+""" MXNet binary dataset reader.
+Refer to https://github.com/deepinsight/insightface.
+"""
+import pickle
+from typing import List
+from mxnet import ndarray as nd
+class ReadMXNet(object):
+    def __init__(self, val_targets, rec_prefix, image_size=(112, 112)):
+        self.ver_list: List[object] = []
+        self.ver_name_list: List[str] = []
+        self.rec_prefix = rec_prefix
+        self.val_targets = val_targets
+    def init_dataset(self, val_targets, data_dir, image_size):
+        for name in val_targets:
+            path = os.path.join(data_dir, name + ".bin")
+            if os.path.exists(path):
+                data_set = self.load_bin(path, image_size)
+                self.ver_list.append(data_set)
+                self.ver_name_list.append(name)
+    def load_bin(self, path, image_size):
+        try:
+            with open(path, 'rb') as f:
+                bins, issame_list = pickle.load(f)  # py2
+        except UnicodeDecodeError as e:
+            with open(path, 'rb') as f:
+                bins, issame_list = pickle.load(f, encoding='bytes')  # py3
+        data_list = []
+        # for flip in [0, 1]:
+        #     data = torch.empty((len(issame_list) * 2, 3, image_size[0], image_size[1]))
+        #     data_list.append(data)
+        for idx in range(len(issame_list) * 2):
+            _bin = bins[idx]
+            img = mx.image.imdecode(_bin)
+            if img.shape[1] != image_size[0]:
+                img = mx.image.resize_short(img, image_size[0])
+            img = nd.transpose(img, axes=(2, 0, 1))  # (C, H, W)
+            img = nd.transpose(img, axes=(1, 2, 0))  # (H, W, C)
+            import PIL.Image as Image
+            fig = Image.fromarray(img.asnumpy(), mode='RGB')
+            data_list.append(fig)
+            # data_list[flip][idx][:] = torch.from_numpy(img.asnumpy())
+            if idx % 1000 == 0:
+                print('loading bin', idx)
+            # # save img to '/home/yuange/dataset/LFW/rgb-arcface'
+            # img = nd.transpose(img, axes=(1, 2, 0))  # (H, W, C)
+            # # save_name = 'ind_' + str(idx) + '.bmp'
+            # # import os
+            # # save_name = os.path.join('/home/yuange/dataset/LFW/rgb-arcface', save_name)
+            # import PIL.Image as Image
+            # fig = Image.fromarray(img.asnumpy(), mode='RGB')
+            # # fig.save(save_name)
+        print('load finished', len(data_list))
+        return data_list, issame_list
+"""
+Evaluation Benchmark
+"""
+class EvalDataset(data.Dataset):
+    def __init__(self,
+                 target: str = 'lfw',
+                 rec_folder: str = '',
+                 transform = None,
+                 crop_param = (0, 0, 112, 112)
+                 ):
+        print("=> Pre-loading images ...")
+        self.target = target
+        self.rec_folder = rec_folder
+        mx_reader = ReadMXNet(target, rec_folder)
+        path = os.path.join(rec_folder, target + ".bin")
+        all_img, issame_list = mx_reader.load_bin(path, (112, 112))
+        self.all_img = all_img
+        self.issame_list = []
+        for i in range(len(issame_list)):
+            flag = 0 if issame_list[i] else 1  # 0:is same
+            self.issame_list.append(flag)
+        self.transform = transform
+        if self.transform is None:
+            self.transform = transforms.Compose([
+                transforms.ToTensor(),
+                transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
+            ])
+        self.crop_param = crop_param
+    def __getitem__(self, index):
+        img1 = self.all_img[index * 2]
+        img2 = self.all_img[index * 2 + 1]
+        same = self.issame_list[index]
+        save_index = 11
+        if index == save_index:
+            img1.save('img1_ori.jpg')
+            img2.save('img2_ori.jpg')
+        img1 = img1.crop(self.crop_param)
+        img2 = img2.crop(self.crop_param)
+        if index == save_index:
+            img1.save('img1_crop.jpg')
+            img2.save('img2_crop.jpg')
+        img1 = self.transform(img1)
+        img2 = self.transform(img2)
+        return img1, img2, same
+    def __len__(self):
+        return len(self.issame_list)
+if __name__ == '__main__':
+    import PIL.Image as Image
+    import time
+    np.random.seed(1)
+    torch.manual_seed(1)
+    torch.cuda.manual_seed(1)
+    torch.cuda.manual_seed_all(1)
+    mx.random.seed(1)
+    is_gray = False
+    train_set = FaceByRandOccMask(
+        root_dir='/tmp/train_tmp/casia',
+        local_rank=0,
+        use_norm=True,
+        is_gray=is_gray,
+    )
+    start = time.time()
+    for idx in range(100):
+        face, mask, label = train_set.__getitem__(idx)
+        if idx < 15:
+            face = ((face + 1) * 128).numpy().astype(np.uint8)
+            face = np.transpose(face, (1, 2, 0))
+            if is_gray:
+                face = Image.fromarray(face[:, :, 0], mode='L')
+            else:
+                face = Image.fromarray(face, mode='RGB')
+            face.save('face_{}.jpg'.format(idx))
+    print('time cost: %d ms' % (int((time.time() - start) * 1000)))

third_party/arcface/margin_loss.py ADDED Viewed

	@@ -0,0 +1,463 @@

+import torch
+import torch.nn.functional as F
+from torch import nn
+from torch.nn import Parameter
+import numpy as np
+__all__ = ['Softmax', 'AMCosFace', 'AMArcFace', ]
+MIN_NUM_PATCHES = 16
+""" All losses can run in 'torch.distributed.DistributedDataParallel'.
+"""
+class Softmax(nn.Module):
+    r"""Implementation of Softmax (normal classification head):
+        Args:
+            in_features: dimension (d_in) of input feature (B, d_in)
+            out_features: dimension (d_out) of output feature (B, d_out)
+            device_id: the ID of GPU where the model will be trained by data parallel (or DP). (not used)
+                        if device_id=None, it will be trained on model parallel (or DDP). (recommend!)
+        """
+    def __init__(self,
+                 in_features: int,
+                 out_features: int,
+                 device_id,
+                 ):
+        super(Softmax, self).__init__()
+        self.in_features = in_features
+        self.out_features = out_features
+        self.device_id = device_id
+        self.weight = Parameter(torch.FloatTensor(out_features, in_features))
+        self.bias = Parameter(torch.FloatTensor(out_features))
+        nn.init.xavier_uniform_(self.weight)
+        nn.init.zeros_(self.bias)
+    def forward(self, embedding, label):
+        """
+        :param embedding: learned face representation
+        :param label:
+            - label >= 0: ground truth identity
+            - label = -1: invalid identity for this GPU (refer to 'PartialFC')
+            + Example: label = torch.tensor([-1, 4, -1, 5, 3, -1])
+        :return:
+        """
+        if self.device_id is None:
+            """ Regular linear layer.
+            """
+            out = F.linear(embedding, self.weight, self.bias)
+        else:
+            raise ValueError('DataParallel is not implemented yet.')
+            x = input
+            sub_weights = torch.chunk(self.weight, len(self.device_id), dim=0)
+            sub_biases = torch.chunk(self.bias, len(self.device_id), dim=0)
+            temp_x = x.cuda(self.device_id[0])
+            weight = sub_weights[0].cuda(self.device_id[0])
+            bias = sub_biases[0].cuda(self.device_id[0])
+            out = F.linear(temp_x, weight, bias)
+            for i in range(1, len(self.device_id)):
+                temp_x = x.cuda(self.device_id[i])
+                weight = sub_weights[i].cuda(self.device_id[i])
+                bias = sub_biases[i].cuda(self.device_id[i])
+                out = torch.cat((out, F.linear(temp_x, weight, bias).cuda(self.device_id[0])), dim=1)
+        return out
+""" Not Used """
+class ArcFace(nn.Module):
+    r"""Implement of ArcFace (https://arxiv.org/pdf/1801.07698v1.pdf):
+        Args:
+            in_features: size of each input sample
+            out_features: size of each output sample
+            device_id: the ID of GPU where the model will be trained by model parallel.
+                       if device_id=None, it will be trained on CPU without model parallel.
+            s: norm of input feature
+            m: margin
+            cos(theta+m)
+        """
+    def __init__(self, in_features, out_features, device_id, s=64.0, m=0.50, easy_margin=False):
+        super(ArcFace, self).__init__()
+        self.in_features = in_features
+        self.out_features = out_features
+        self.device_id = device_id
+        self.s = s
+        self.m = m
+        print('ArcFace, s=%.1f, m=%.2f' % (s, m))
+        self.weight = Parameter(torch.FloatTensor(out_features, in_features))
+        nn.init.xavier_uniform_(self.weight)
+        self.easy_margin = easy_margin
+        self.cos_m = np.cos(m)
+        self.sin_m = np.sin(m)
+        self.th = np.cos(np.pi - m)
+        self.mm = np.sin(np.pi - m) * m
+    def forward(self, input, label):
+        # --------------------------- cos(theta) & phi(theta) ---------------------------
+        if self.device_id == None:
+            cosine = F.linear(F.normalize(input), F.normalize(self.weight))
+        else:
+            x = input
+            sub_weights = torch.chunk(self.weight, len(self.device_id), dim=0)
+            temp_x = x.cuda(self.device_id[0])
+            weight = sub_weights[0].cuda(self.device_id[0])
+            cosine = F.linear(F.normalize(temp_x), F.normalize(weight))
+            for i in range(1, len(self.device_id)):
+                temp_x = x.cuda(self.device_id[i])
+                weight = sub_weights[i].cuda(self.device_id[i])
+                cosine = torch.cat((cosine, F.linear(F.normalize(temp_x), F.normalize(weight)).cuda(self.device_id[0])),
+                                   dim=1)
+        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
+        phi = cosine * self.cos_m - sine * self.sin_m
+        if self.easy_margin:
+            phi = torch.where(cosine > 0, phi, cosine)
+        else:
+            phi = torch.where(cosine > self.th, phi, cosine - self.mm)
+        # --------------------------- convert label to one-hot ---------------------------
+        one_hot = torch.zeros(cosine.size())
+        if self.device_id != None:
+            one_hot = one_hot.cuda(self.device_id[0])
+        else:
+            one_hot = one_hot.cuda()
+        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
+        # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
+        output = (one_hot * phi) + (
+                    (1.0 - one_hot) * cosine)  # you can use torch.where if your torch.__version__ is 0.4
+        output *= self.s
+        return output
+""" Not Used """
+class CosFace(nn.Module):
+    r"""Implement of CosFace (https://arxiv.org/pdf/1801.09414.pdf):
+    Args:
+        in_features: size of each input sample
+        out_features: size of each output sample
+        device_id: the ID of GPU where the model will be trained by model parallel.
+                       if device_id=None, it will be trained on CPU without model parallel.
+        s: norm of input feature
+        m: margin
+        cos(theta)-m
+    """
+    def __init__(self, in_features, out_features, device_id, s=64.0, m=0.4):
+        super(CosFace, self).__init__()
+        print('CosFace, s=%.1f, m=%.2f' % (s, m))
+        self.in_features = in_features
+        self.out_features = out_features
+        self.device_id = device_id
+        self.s = s
+        self.m = m
+        self.weight = Parameter(torch.FloatTensor(out_features, in_features))
+        nn.init.xavier_uniform_(self.weight)
+    def forward(self, input, label):
+        # --------------------------- cos(theta) & phi(theta) ---------------------------
+        if self.device_id == None:
+            cosine = F.linear(F.normalize(input), F.normalize(self.weight))
+        else:
+            x = input
+            sub_weights = torch.chunk(self.weight, len(self.device_id), dim=0)
+            temp_x = x.cuda(self.device_id[0])
+            weight = sub_weights[0].cuda(self.device_id[0])
+            cosine = F.linear(F.normalize(temp_x), F.normalize(weight))
+            for i in range(1, len(self.device_id)):
+                temp_x = x.cuda(self.device_id[i])
+                weight = sub_weights[i].cuda(self.device_id[i])
+                cosine = torch.cat((cosine, F.linear(F.normalize(temp_x), F.normalize(weight)).cuda(self.device_id[0])),
+                                   dim=1)
+        phi = cosine - self.m
+        # --------------------------- convert label to one-hot ---------------------------
+        one_hot = torch.zeros(cosine.size()).cuda()
+        if self.device_id != None:
+            one_hot = one_hot.cuda(self.device_id[0])
+        # one_hot = one_hot.cuda() if cosine.is_cuda else one_hot
+            one_hot.scatter_(1, label.cuda(self.device_id[0]).view(-1, 1).long(), 1)
+        else:
+            one_hot.scatter_(1, label.view(-1, 1).long(), 1)
+        # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
+        output = (one_hot * phi) + (
+                    (1.0 - one_hot) * cosine)  # you can use torch.where if your torch.__version__ is 0.4
+        output *= self.s
+        return output
+    def __repr__(self):
+        return self.__class__.__name__ + '(' \
+               + 'in_features = ' + str(self.in_features) \
+               + ', out_features = ' + str(self.out_features) \
+               + ', s = ' + str(self.s) \
+               + ', m = ' + str(self.m) + ')'
+class AMCosFace(nn.Module):
+    r"""Implementation of Adaptive Margin CosFace:
+    cos(theta)-m+k(theta-a)
+    When k is 0, AMCosFace degenerates into CosFace.
+    Args:
+        in_features: dimension (d_in) of input feature (B, d_in)
+        out_features: dimension (d_out) of output feature (B, d_out)
+        device_id: the ID of GPU where the model will be trained by data parallel (or DP). (not used)
+                    if device_id=None, it will be trained on model parallel (or DDP). (recommend!)
+        s: norm of input feature
+        m: margin
+        a: AM Loss
+        k: AM Loss
+    """
+    def __init__(self,
+                 in_features: int,
+                 out_features: int,
+                 device_id,
+                 s: float = 64.0,
+                 m: float = 0.4,
+                 a: float = 1.2,
+                 k: float = 0.1,
+                 ):
+        super(AMCosFace, self).__init__()
+        print('AMCosFace, s=%.1f, m=%.2f, a=%.2f, k=%.2f' % (s, m, a, k))
+        self.in_features = in_features
+        self.out_features = out_features
+        self.device_id = device_id
+        self.s = s
+        self.m = m
+        self.a = a
+        self.k = k
+        """ Weight Matrix W (d_out, d_in) """
+        self.weight = Parameter(torch.FloatTensor(out_features, in_features))
+        nn.init.xavier_uniform_(self.weight)
+    def forward(self, embedding, label):
+        """
+        :param embedding: learned face representation
+        :param label:
+            - label >= 0: ground truth identity
+            - label = -1: invalid identity for this GPU (refer to 'PartialFC')
+            + Example: label = torch.tensor([-1, 4, -1, 5, 3, -1])
+        :return:
+        """
+        if self.device_id is None:
+            """ - embedding: shape is (B, d_in)
+                - weight: shape is (d_out, d_in)
+                - cosine: shape is (B, d_out)
+                + F.normalize is very important here.
+            """
+            cosine = F.linear(F.normalize(embedding), F.normalize(self.weight))  # y = xA^T + b
+        else:
+            raise ValueError('DataParallel is not implemented yet.')
+            x = input
+            sub_weights = torch.chunk(self.weight, len(self.device_id), dim=0)
+            temp_x = x.cuda(self.device_id[0])
+            weight = sub_weights[0].cuda(self.device_id[0])
+            cosine = F.linear(F.normalize(temp_x), F.normalize(weight))
+            for i in range(1, len(self.device_id)):
+                temp_x = x.cuda(self.device_id[i])
+                weight = sub_weights[i].cuda(self.device_id[i])
+                cosine = torch.cat((cosine, F.linear(F.normalize(temp_x),
+                                                     F.normalize(weight)).cuda(self.device_id[0])),
+                                   dim=1)
+        """ - index: the index of valid identity in label, shape is (d_valid, )
+            + torch.where() returns a tuple indicating the index of each dimension
+            + Example: index = torch.tensor([1, 3, 4])
+        """
+        index = torch.where(label != -1)[0]
+        """ - m_hot: one-hot tensor of margin m_2, shape is (d_valid, d_out)
+            + torch.tensor.scatter_(dim, index, source) is usually used to generate ont-hot tensor
+            + Example: label = torch.tensor([-1, 4, -1, 5, 3, -1])
+                       index = torch.tensor([1, 3, 4])  # d_valid = index.shape[0] = 3
+                       m_hot = torch.tensor([[0, 0, 0, 0, m, 0],
+                                             [0, 0, 0, 0, 0, m],
+                                             [0, 0, 0, m, 0, 0],
+                                            ])
+        """
+        m_hot = torch.zeros(index.size()[0], cosine.size()[1], device=cosine.device)
+        m_hot.scatter_(1, label[index, None], self.m)
+        """ logit(theta) = cos(theta) - m_2 + k * (theta - a)
+            - theta = cosine.acos_()
+            + Example: m_hot = torch.tensor([[0, 0, 0, 0, m-k(theta[0,4]-a), 0],
+                                             [0, 0, 0, 0, 0, m-k(theta[1,5]-a)],
+                                             [0, 0, 0, m-k(theta[2,3]-a), 0, 0],
+                                            ])
+        """
+        a = self.a
+        k = self.k
+        m_hot[range(0, index.size()[0]), label[index]] -= k * (cosine[index, label[index]].acos_() - a)
+        cosine[index] -= m_hot
+        """ Because we have used F.normalize, we should rescale the logit term by s.
+        """
+        output = cosine * self.s
+        return output
+    def __repr__(self):
+        return self.__class__.__name__ + '(' \
+               + 'in_features = ' + str(self.in_features) \
+               + ', out_features = ' + str(self.out_features) \
+               + ', s = ' + str(self.s) \
+               + ', m = ' + str(self.m) \
+               + ', a = ' + str(self.a) \
+               + ', k = ' + str(self.k) \
+               + ')'
+class AMArcFace(nn.Module):
+    r"""Implementation of Adaptive Margin ArcFace:
+    cos(theta+m-k(theta-a))
+    When k is 0, AMArcFace degenerates into ArcFace.
+    Args:
+        in_features: dimension (d_in) of input feature (B, d_in)
+        out_features: dimension (d_out) of output feature (B, d_out)
+        device_id: the ID of GPU where the model will be trained by data parallel (or DP). (not used)
+                    if device_id=None, it will be trained on model parallel (or DDP). (recommend!)
+        s: norm of input feature
+        m: margin
+        a: AM Loss
+        k: AM Loss
+    """
+    def __init__(self,
+                 in_features: int,
+                 out_features: int,
+                 device_id,
+                 s: float = 64.0,
+                 m: float = 0.5,
+                 a: float = 1.2,
+                 k: float = 0.1,
+                 ):
+        super(AMArcFace, self).__init__()
+        print('AMArcFace, s=%.1f, m=%.2f, a=%.2f, k=%.2f' % (s, m, a, k))
+        self.in_features = in_features
+        self.out_features = out_features
+        self.device_id = device_id
+        self.s = s
+        self.m = m
+        self.a = a
+        self.k = k
+        """ Weight Matrix W (d_out, d_in) """
+        self.weight = Parameter(torch.FloatTensor(out_features, in_features))
+        nn.init.xavier_uniform_(self.weight)
+    def forward(self, embedding, label):
+        """
+        :param embedding: learned face representation
+        :param label:
+            - label >= 0: ground truth identity
+            - label = -1: invalid identity for this GPU (refer to 'PartialFC')
+            + Example: label = torch.tensor([-1, 4, -1, 5, 3, -1])
+        :return:
+        """
+        if self.device_id is None:
+            """ - embedding: shape is (B, d_in)
+                - weight: shape is (d_out, d_in)
+                - cosine: shape is (B, d_out)
+                + F.normalize is very important here.
+            """
+            cosine = F.linear(F.normalize(embedding), F.normalize(self.weight))  # y = xA^T + b
+        else:
+            raise ValueError('DataParallel is not implemented yet.')
+            x = input
+            sub_weights = torch.chunk(self.weight, len(self.device_id), dim=0)
+            temp_x = x.cuda(self.device_id[0])
+            weight = sub_weights[0].cuda(self.device_id[0])
+            cosine = F.linear(F.normalize(temp_x), F.normalize(weight))
+            for i in range(1, len(self.device_id)):
+                temp_x = x.cuda(self.device_id[i])
+                weight = sub_weights[i].cuda(self.device_id[i])
+                cosine = torch.cat((cosine, F.linear(F.normalize(temp_x),
+                                                     F.normalize(weight)).cuda(self.device_id[0])),
+                                   dim=1)
+        """ - index: the index of valid identity in label, shape is (d_valid, )
+            + torch.where() returns a tuple indicating the index of each dimension
+            + Example: index = torch.tensor([1, 3, 4])
+        """
+        index = torch.where(label != -1)[0]
+        """ - m_hot: one-hot tensor of margin m_2, shape is (d_valid, d_out)
+            + torch.tensor.scatter_(dim, index, source) is usually used to generate ont-hot tensor
+            + Example: label = torch.tensor([-1, 4, -1, 5, 3, -1])
+                       index = torch.tensor([1, 3, 4])  # d_valid = index.shape[0] = 3
+                       m_hot = torch.tensor([[0, 0, 0, 0, m, 0],
+                                             [0, 0, 0, 0, 0, m],
+                                             [0, 0, 0, m, 0, 0],
+                                            ])
+        """
+        m_hot = torch.zeros(index.size()[0], cosine.size()[1], device=cosine.device)
+        m_hot.scatter_(1, label[index, None], self.m)
+        """ logit(theta) = cos(theta) - m_2 + k * (theta - a)
+            - theta = cosine.acos_()
+            + Example: m_hot = torch.tensor([[0, 0, 0, 0, m-k(theta[0,4]-a), 0],
+                                             [0, 0, 0, 0, 0, m-k(theta[1,5]-a)],
+                                             [0, 0, 0, m-k(theta[2,3]-a), 0, 0],
+                                            ])
+        """
+        a = self.a
+        k = self.k
+        m_hot[range(0, index.size()[0]), label[index]] -= k * (cosine[index, label[index]].acos_() - a)
+        cosine.acos_()
+        cosine[index] += m_hot
+        cosine.cos_().mul_(self.s)
+        return cosine
+    def __repr__(self):
+        return self.__class__.__name__ + '(' \
+               + 'in_features = ' + str(self.in_features) \
+               + ', out_features = ' + str(self.out_features) \
+               + ', s = ' + str(self.s) \
+               + ', m = ' + str(self.m) \
+               + ', a = ' + str(self.a) \
+               + ', k = ' + str(self.k) \
+               + ')'
+if __name__ == '__main__':
+    cosine = torch.randn(6, 8) / 100
+    cosine[0][2] = 0.3
+    cosine[1][4] = 0.4
+    cosine[2][6] = 0.5
+    cosine[3][5] = 0.6
+    cosine[4][3] = 0.7
+    cosine[5][0] = 0.8
+    label = torch.tensor([-1, 4, -1, 5, 3, -1])
+    # layer = AMCosFace(in_features=8,
+    #                   out_features=8,
+    #                   device_id=None,
+    #                   m=0.35, s=1.0,
+    #                   a=1.2, k=0.1)
+    # layer = Softmax(in_features=8,
+    #                 out_features=8,
+    #                 device_id=None)
+    layer = AMArcFace(in_features=8,
+                      out_features=8,
+                      device_id=None,
+                      m=0.5, s=1.0,
+                      a=1.2, k=0.1)
+    logit = layer(cosine, label)
+    logit = F.softmax(logit, dim=-1)
+    from utils.vis_tensor import plot_tensor
+    plot_tensor((cosine, logit),
+                ('embedding', 'logit'),
+                'AMArc.jpg')

third_party/arcface/mouth_net.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+from third_party.arcface.iresnet import iresnet50, iresnet100
+class MouthNet(nn.Module):
+    def __init__(self,
+                 bisenet: nn.Module,
+                 feature_dim: int = 64,
+                 crop_param: tuple = (0, 0, 112, 112),
+                 iresnet_pretrained: bool = False,
+                 ):
+        super(MouthNet, self).__init__()
+        crop_size = (crop_param[3] - crop_param[1], crop_param[2] - crop_param[0])  # (H,W)
+        fc_scale = int(math.ceil(crop_size[0] / 112 * 7) * math.ceil(crop_size[1] / 112 * 7))
+        self.bisenet = bisenet
+        self.backbone = iresnet50(
+            pretrained=iresnet_pretrained,
+            num_features=feature_dim,
+            fp16=False,
+            fc_scale=fc_scale,
+        )
+        self.register_buffer(
+            name="vgg_mean",
+            tensor=torch.tensor([[[0.485]], [[0.456]], [[0.406]]], requires_grad=False),
+        )
+        self.register_buffer(
+            name="vgg_std",
+            tensor=torch.tensor([[[0.229]], [[0.224]], [[0.225]]], requires_grad=False),
+        )
+    def forward(self, x):
+        # with torch.no_grad():
+        #     x_mouth_mask = self.get_any_mask(x, par=[11, 12, 13], normalized=True)  # (B,1,H,W), in [0,1], 1:chosed
+        x_mouth_mask = 1
+        x_mouth = x * x_mouth_mask  # (B,3,112,112)
+        mouth_feature = self.backbone(x_mouth)
+        return mouth_feature
+    def get_any_mask(self, img, par, normalized=False):
+        # [0, 'background', 1 'skin', 2 'l_brow', 3 'r_brow', 4 'l_eye', 5 'r_eye',
+        # 6 'eye_g', 7 'l_ear', 8 'r_ear', 9 'ear_r',  10 'nose', 11 'mouth', 12 'u_lip',
+        # 13 'l_lip', 14 'neck', 15 'neck_l', 16 'cloth', 17 'hair', 18 'hat']
+        ori_size = img.size()[-1]
+        with torch.no_grad():
+            img = F.interpolate(img, size=512, mode="nearest", )
+            if not normalized:
+                img = img * 0.5 + 0.5
+                img = img.sub(self.vgg_mean.detach()).div(self.vgg_std.detach())
+            out = self.bisenet(img)[0]
+            parsing = out.softmax(1).argmax(1)
+        mask = torch.zeros_like(parsing)
+        for p in par:
+            mask = mask + ((parsing == p).float())
+        mask = mask.unsqueeze(1)
+        mask = F.interpolate(mask, size=ori_size, mode="bilinear", align_corners=True)
+        return mask
+    def save_backbone(self, path: str):
+        torch.save(self.backbone.state_dict(), path)
+    def load_backbone(self, path: str):
+        self.backbone.load_state_dict(torch.load(path))
+if __name__ == "__main__":
+    from third_party.bisenet.bisenet import BiSeNet
+    bisenet = BiSeNet(19)
+    bisenet.load_state_dict(
+        torch.load(
+            "/gavin/datasets/hanbang/79999_iter.pth",
+            map_location="cpu",
+        )
+    )
+    bisenet.eval()
+    bisenet.requires_grad_(False)
+    crop_param = (28, 56, 84, 112)
+    import numpy as np
+    img = np.random.randn(112, 112, 3) * 225
+    from PIL import Image
+    img = Image.fromarray(img.astype(np.uint8))
+    img = img.crop(crop_param)
+    from torchvision import transforms
+    trans = transforms.ToTensor()
+    img = trans(img).unsqueeze(0)
+    img = img.repeat(3, 1, 1, 1)
+    print(img.shape)
+    net = MouthNet(
+        bisenet=bisenet,
+        feature_dim=64,
+        crop_param=crop_param
+    )
+    mouth_feat = net(img)
+    print(mouth_feat.shape)
+    import thop
+    crop_size = (crop_param[3] - crop_param[1], crop_param[2] - crop_param[0])  # (H,W)
+    fc_scale = int(math.ceil(crop_size[0] / 112 * 7) * math.ceil(crop_size[1] / 112 * 7))
+    backbone = iresnet100(
+        pretrained=False,
+        num_features=64,
+        fp16=False,
+        # fc_scale=fc_scale,
+    )
+    flops, params = thop.profile(backbone, inputs=(torch.randn(1, 3, 112, 112),), verbose=False)
+    print('#Params=%.2fM, GFLOPS=%.2f' % (params / 1e6, flops / 1e9))

third_party/arcface/mouth_net_eval.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import argparse
+import pytorch_lightning as pl
+import numpy as np
+import torch
+from third_party.arcface.mouth_net_pl import MouthNetPL
+from third_party.arcface.mouth_net import MouthNet
+class MouthTest(object):
+    def __init__(self):
+        self.dataset_len = 400
+        self.fixer_crop_param = (28, 56, 84, 112)
+        self.fixer_casia_model = MouthNet(
+            bisenet=None,
+            feature_dim=128,
+            crop_param=self.fixer_crop_param
+        ).cuda()
+        fixer_path = "/gavin/code/FaceSwapping/modules/third_party/arcface/weights/fixer_net_casia_28_56_84_112.pth"
+        self.fixer_casia_model.load_backbone(fixer_path)
+        self.fixer_casia_model.eval()
+        self.fixer_t = np.zeros((self.dataset_len, 128), dtype=np.float32)
+        self.fixer_s = np.zeros_like(self.fixer_t, dtype=np.float32)  # each embedding repeats 10 times in ffplus
+        self.fixer_r = np.zeros_like(self.fixer_t, dtype=np.float32)
+        print('Fixer model loaded.')
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    args = parser.parse_args()
+    args.val_targets = []
+    args.rec_folder = "/gavin/datasets/msml/ms1m-retinaface"
+    fixer_net = MouthNetPL.load_from_checkpoint(
+        "/apdcephfs/share_1290939/gavinyuan/out/fixernet_casia/epoch=22-step=10999-v1.ckpt",
+        map_location='cpu', strict=False,
+        num_classes=10572,
+        batch_size=128,
+        dim_feature=128,
+        rec_folder=args.rec_folder,
+        header_type="AMCosFace",
+        crop=(28, 56, 84, 112),
+    )
+    lower_net_1 = MouthNetPL.load_from_checkpoint(
+        "/apdcephfs/share_1290939/gavinyuan/out/mouth_net_1/epoch=24-step=242999.ckpt",
+        map_location='cpu', strict=False,
+        num_classes=93431,
+        batch_size=128,
+        dim_feature=128,
+        rec_folder=args.rec_folder,
+        header_type="AMArcFace",
+        crop=(28, 56, 84, 112),
+    )
+    # test_net = fixer_net
+    test_net = lower_net_1
+    trainer = pl.Trainer(
+        logger=False,
+        gpus=1,
+        distributed_backend='dp',
+        benchmark=True,
+    )
+    trainer.test(test_net)
+    # print('Fixer model loading...')
+    # m_test = MouthTest()

third_party/arcface/mouth_net_pl.py ADDED Viewed

	@@ -0,0 +1,358 @@

+import os.path
+import torch
+import torchvision
+import torch.nn.functional as F
+from torch.utils.data import DataLoader
+import pytorch_lightning as pl
+import numpy as np
+import sklearn
+from sklearn.metrics import roc_curve, auc
+from scipy.spatial.distance import cdist
+from third_party.arcface.mouth_net import MouthNet
+from third_party.arcface.margin_loss import Softmax, AMArcFace, AMCosFace
+from third_party.arcface.load_dataset import MXFaceDataset, EvalDataset
+from third_party.bisenet.bisenet import BiSeNet
+class MouthNetPL(pl.LightningModule):
+    def __init__(
+            self,
+            num_classes: int,
+            batch_size: int = 256,
+            dim_feature: int = 128,
+            header_type: str = 'AMArcFace',
+            header_params: tuple = (64.0, 0.5, 0.0, 0.0),  # (s, m, a, k)
+            rec_folder: str = "/gavin/datasets/msml/ms1m-retinaface",
+            learning_rate: int = 0.1,
+            crop: tuple = (0, 0, 112, 112),  # (w1,h1,w2,h2)
+    ):
+        super(MouthNetPL, self).__init__()
+        # self.img_size = (112, 112)
+        ''' mouth feature extractor '''
+        bisenet = BiSeNet(19)
+        bisenet.load_state_dict(
+            torch.load(
+                "/gavin/datasets/hanbang/79999_iter.pth",
+                map_location="cpu",
+            )
+        )
+        bisenet.eval()
+        bisenet.requires_grad_(False)
+        self.mouth_net = MouthNet(
+            bisenet=None,
+            feature_dim=dim_feature,
+            crop_param=crop,
+            iresnet_pretrained=False,
+        )
+        ''' head & loss '''
+        self.automatic_optimization = False
+        self.dim_feature = dim_feature
+        self.num_classes = num_classes
+        self._prepare_header(header_type, header_params)
+        self.cls_criterion = torch.nn.CrossEntropyLoss()
+        self.learning_rate = learning_rate
+        ''' dataset '''
+        assert os.path.exists(rec_folder)
+        self.rec_folder = rec_folder
+        self.batch_size = batch_size
+        self.crop_param = crop
+        ''' validation '''
+    def _prepare_header(self, head_type, header_params):
+        dim_in = self.dim_feature
+        dim_out = self.num_classes
+        """ Get hyper-params of header """
+        s, m, a, k = header_params
+        """ Choose the header """
+        if 'Softmax' in head_type:
+            self.classification = Softmax(dim_in, dim_out, device_id=None)
+        elif 'AMCosFace' in head_type:
+            self.classification = AMCosFace(dim_in, dim_out,
+                                            device_id=None,
+                                            s=s, m=m,
+                                            a=a, k=k,
+                                            )
+        elif 'AMArcFace' in head_type:
+            self.classification = AMArcFace(dim_in, dim_out,
+                                            device_id=None,
+                                            s=s, m=m,
+                                            a=a, k=k,
+                                            )
+        else:
+            raise ValueError('Header type error!')
+    def forward(self, x, label=None):
+        feat = self.mouth_net(x)
+        if self.training:
+            assert label is not None
+            cls = self.classification(feat, label)
+            return feat, cls
+        else:
+            return feat
+    def training_step(self, batch, batch_idx):
+        opt = self.optimizers(use_pl_optimizer=True)
+        img, label = batch
+        mouth_feat, final_cls = self(img, label)
+        cls_loss = self.cls_criterion(final_cls, label)
+        opt.zero_grad()
+        self.manual_backward(cls_loss)
+        torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=5, norm_type=2)
+        opt.step()
+        ''' loss logging '''
+        self.logging_dict({"cls_loss": cls_loss}, prefix="train / ")
+        self.logging_lr()
+        if batch_idx % 50 == 0 and self.local_rank == 0:
+            print('loss=', cls_loss)
+        return cls_loss
+    def training_epoch_end(self, outputs):
+        sch = self.lr_schedulers()
+        sch.step()
+        lr = -1
+        opts = self.trainer.optimizers
+        for opt in opts:
+            for param_group in opt.param_groups:
+                lr = param_group["lr"]
+                break
+        print('learning rate changed to %.6f' % lr)
+    # def validation_step(self, batch, batch_idx):
+    #     return self.test_step(batch, batch_idx)
+    #
+    # def validation_step_end(self, outputs):
+    #     return self.test_step_end(outputs)
+    #
+    # def validation_epoch_end(self, outputs):
+    #     return self.test_step_end(outputs)
+    @staticmethod
+    def save_tensor(tensor: torch.Tensor, path: str, b_idx: int = 0):
+        tensor = (tensor + 1.) * 127.5
+        img = tensor.permute(0, 2, 3, 1)[b_idx].cpu().numpy()
+        from PIL import Image
+        img_pil = Image.fromarray(img.astype(np.uint8))
+        img_pil.save(path)
+    def test_step(self, batch, batch_idx):
+        img1, img2, same = batch
+        feat1 = self.mouth_net(img1)
+        feat2 = self.mouth_net(img2)
+        return feat1, feat2, same
+    def test_step_end(self, outputs):
+        feat1, feat2, same = outputs
+        feat1 = feat1.cpu().numpy()
+        feat2 = feat2.cpu().numpy()
+        same = same.cpu().numpy()
+        feat1 = sklearn.preprocessing.normalize(feat1)
+        feat2 = sklearn.preprocessing.normalize(feat2)
+        predict_label = []
+        num = feat1.shape[0]
+        for i in range(num):
+            dis_cos = cdist(feat1[i, None], feat2[i, None], metric='cosine')
+            predict_label.append(dis_cos[0, 0])
+        predict_label = np.array(predict_label)
+        return {
+            "pred": predict_label,
+            "gt": same,
+        }
+    def test_epoch_end(self, outputs):
+        print(outputs)
+        pred, same = None, None
+        for batch_output in outputs:
+            if pred is None and same is None:
+                pred = batch_output["pred"]
+                same = batch_output["gt"]
+            else:
+                pred = np.concatenate([pred, batch_output["pred"]])
+                same = np.concatenate([same, batch_output["gt"]])
+        print(pred.shape, same.shape)
+        fpr, tpr, threshold = roc_curve(same, pred)
+        acc = tpr[np.argmin(np.abs(tpr - (1 - fpr)))]  # choose proper threshold
+        print("=> verification finished, acc=%.4f" % (acc))
+        ''' save pth '''
+        pth_path = "./weights/fixer_net_casia_%s.pth" % ('_'.join((str(x) for x in self.crop_param)))
+        self.mouth_net.save_backbone(pth_path)
+        print("=> model save to %s" % pth_path)
+        mouth_net = MouthNet(
+            bisenet=None,
+            feature_dim=self.dim_feature,
+            crop_param=self.crop_param
+        )
+        mouth_net.load_backbone(pth_path)
+        print("=> MouthNet pth checked")
+        return acc
+    def logging_dict(self, log_dict, prefix=None):
+        for key, val in log_dict.items():
+            if prefix is not None:
+                key = prefix + key
+            self.log(key, val)
+    def logging_lr(self):
+        opts = self.trainer.optimizers
+        for idx, opt in enumerate(opts):
+            lr = None
+            for param_group in opt.param_groups:
+                lr = param_group["lr"]
+                break
+            self.log(f"lr_{idx}", lr)
+    def configure_optimizers(self):
+        params = list(self.parameters())
+        learning_rate = self.learning_rate / 512 * self.batch_size * torch.cuda.device_count()
+        optimizer = torch.optim.SGD(params, lr=learning_rate,
+            momentum=0.9, weight_decay=5e-4)
+        print('lr is set as %.5f due to the global batch_size %d' % (learning_rate,
+                                                                     self.batch_size * torch.cuda.device_count()))
+        def lr_step_func(epoch):
+            return ((epoch + 1) / (4 + 1)) ** 2 if epoch < 0 else 0.1 ** len(
+                [m for m in [11, 17, 22] if m - 1 <= epoch])  # 0.1, 0.01, 0.001, 0.0001
+        scheduler= torch.optim.lr_scheduler.LambdaLR(
+            optimizer=optimizer, lr_lambda=lr_step_func)
+        return [optimizer], [scheduler]
+    def train_dataloader(self):
+        dataset = MXFaceDataset(
+            root_dir=self.rec_folder,
+            crop_param=self.crop_param,
+        )
+        train_loader = DataLoader(
+            dataset, self.batch_size, num_workers=24, shuffle=True, drop_last=True
+        )
+        return train_loader
+    def val_dataloader(self):
+        return self.test_dataloader()
+    def test_dataloader(self):
+        dataset = EvalDataset(
+            rec_folder=self.rec_folder,
+            target='lfw',
+            crop_param=self.crop_param
+        )
+        test_loader = DataLoader(
+            dataset, 20, num_workers=12, shuffle=False, drop_last=False
+        )
+        return test_loader
+def start_train():
+    import os
+    import argparse
+    import torch
+    import pytorch_lightning as pl
+    from pytorch_lightning.callbacks import ModelCheckpoint
+    import wandb
+    from pytorch_lightning.loggers import WandbLogger
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-g",
+        "--gpus",
+        type=str,
+        default=None,
+        help="Number of gpus to use (e.g. '0,1,2,3'). Will use all if not given.",
+    )
+    parser.add_argument("-n", "--name", type=str, required=True, help="Name of the run.")
+    parser.add_argument("-pj", "--project", type=str, default="mouthnet", help="Name of the project.")
+    parser.add_argument("-rp", "--resume_checkpoint_path",
+                        type=str, default=None, help="path of checkpoint for resuming", )
+    parser.add_argument("-p", "--saving_folder",
+                        type=str, default="/apdcephfs/share_1290939/gavinyuan/out", help="saving folder", )
+    parser.add_argument("--wandb_resume",
+                        type=str, default=None, help="resume wandb logging from the input id", )
+    parser.add_argument("--header_type", type=str, default="AMArcFace", help="loss type.")
+    parser.add_argument("-bs", "--batch_size", type=int, default=128, help="bs.")
+    parser.add_argument("-fs", "--fast_dev_run", type=bool, default=False, help="pytorch.lightning fast_dev_run")
+    args = parser.parse_args()
+    args.val_targets = []
+    # args.rec_folder = "/gavin/datasets/msml/ms1m-retinaface"
+    # num_classes = 93431
+    args.rec_folder = "/gavin/datasets/msml/casia"
+    num_classes = 10572
+    save_path = os.path.join(args.saving_folder, args.name)
+    os.makedirs(save_path, exist_ok=True)
+    checkpoint_callback = ModelCheckpoint(
+        dirpath=save_path,
+        monitor="train / cls_loss",
+        save_top_k=10,
+        verbose=True,
+        every_n_train_steps=200,
+    )
+    torch.cuda.empty_cache()
+    mouth_net = MouthNetPL(
+        num_classes=num_classes,
+        batch_size=args.batch_size,
+        dim_feature=128,
+        rec_folder=args.rec_folder,
+        header_type=args.header_type,
+        crop=(28, 56, 84, 112)
+    )
+    if args.wandb_resume == None:
+        resume = "allow"
+        wandb_id = wandb.util.generate_id()
+    else:
+        resume = True
+        wandb_id = args.wandb_resume
+    logger = WandbLogger(
+        project=args.project,
+        entity="gavinyuan",
+        name=args.name,
+        resume=resume,
+        id=wandb_id,
+    )
+    trainer = pl.Trainer(
+        gpus=-1 if args.gpus is None else torch.cuda.device_count(),
+        callbacks=[checkpoint_callback],
+        logger=logger,
+        weights_save_path=save_path,
+        resume_from_checkpoint=args.resume_checkpoint_path,
+        gradient_clip_val=0,
+        max_epochs=25,
+        num_sanity_val_steps=1,
+        fast_dev_run=args.fast_dev_run,
+        val_check_interval=50,
+        progress_bar_refresh_rate=1,
+        distributed_backend="ddp",
+        benchmark=True,
+    )
+    trainer.fit(mouth_net)
+if __name__ == "__main__":
+    start_train()

third_party/arcface/resnet.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ import torch
2	+ from torchvision.models import resnet50

third_party/arcface/utils_callbacks.py ADDED Viewed

	@@ -0,0 +1,141 @@

+import logging
+import os
+import time
+from typing import List
+import torch
+from third_party.arcface import verification
+class AverageMeter(object):
+    """ Computes and stores the average and current value
+    """
+    def __init__(self):
+        self.val = None
+        self.avg = None
+        self.sum = None
+        self.count = None
+        self.reset()
+    def reset(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+class CallBackVerification(object):
+    def __init__(self, frequent, rank, val_targets, rec_prefix, image_size=(112, 112),
+                 is_gray=False):
+        self.frequent: int = frequent
+        self.rank: int = rank
+        self.highest_acc: float = 0.0
+        self.highest_acc_list: List[float] = [0.0] * len(val_targets)
+        self.ver_list: List[object] = []
+        self.ver_name_list: List[str] = []
+        if self.rank is 0:
+            self.init_dataset(val_targets=val_targets, data_dir=rec_prefix, image_size=image_size)
+        self.is_gray = is_gray
+    def ver_test(self, backbone: torch.nn.Module, global_step: int):
+        results = []
+        for i in range(len(self.ver_list)):
+            acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test(
+                self.ver_list[i], backbone, 10, 10,
+                is_gray=self.is_gray)
+            # logging.info('[%s][%d]XNorm: %f' % (self.ver_name_list[i], global_step, xnorm))
+            # logging.info('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (self.ver_name_list[i], global_step, acc2, std2))
+            print('[%s][%d]XNorm: %f' % (self.ver_name_list[i], global_step, xnorm))
+            print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (self.ver_name_list[i], global_step, acc2, std2))
+            if acc2 > self.highest_acc_list[i]:
+                self.highest_acc_list[i] = acc2
+            # logging.info(
+            #     '[%s][%d]Accuracy-Highest: %1.5f' % (self.ver_name_list[i], global_step, self.highest_acc_list[i]))
+            print(
+                '[%s][%d]Accuracy-Highest: %1.5f' % (self.ver_name_list[i], global_step, self.highest_acc_list[i]))
+            results.append(acc2)
+    def init_dataset(self, val_targets, data_dir, image_size):
+        for name in val_targets:
+            path = os.path.join(data_dir, name + ".bin")
+            if os.path.exists(path):
+                data_set = verification.load_bin(path, image_size)
+                self.ver_list.append(data_set)
+                self.ver_name_list.append(name)
+    def __call__(self, num_update, backbone: torch.nn.Module):
+        if self.rank is 0 and num_update > 0 and num_update % self.frequent == 0:
+            backbone.eval()
+            self.ver_test(backbone, num_update)
+            backbone.train()
+class CallBackLogging(object):
+    def __init__(self, frequent, rank, total_step, batch_size, world_size, writer=None):
+        self.frequent: int = frequent
+        self.rank: int = rank
+        self.time_start = time.time()
+        self.total_step: int = total_step
+        self.batch_size: int = batch_size
+        self.world_size: int = world_size
+        self.writer = writer
+        self.init = False
+        self.tic = 0
+    def __call__(self, global_step, loss: AverageMeter, epoch: int, fp16: bool, grad_scaler: torch.cuda.amp.GradScaler):
+        if self.rank is 0 and global_step > 0 and global_step % self.frequent == 0:
+            if self.init:
+                try:
+                    speed: float = self.frequent * self.batch_size / (time.time() - self.tic)
+                    speed_total = speed * self.world_size
+                except ZeroDivisionError:
+                    speed_total = float('inf')
+                time_now = (time.time() - self.time_start) / 3600
+                time_total = time_now / ((global_step + 1) / self.total_step)
+                time_for_end = time_total - time_now
+                if self.writer is not None:
+                    self.writer.add_scalar('time_for_end', time_for_end, global_step)
+                    self.writer.add_scalar('loss', loss.avg, global_step)
+                if fp16:
+                    msg = "Speed %.2f samples/sec   Loss %.4f   Epoch: %d   Global Step: %d   "\
+                          "Fp16 Grad Scale: %2.f   Required: %1.f hours" % (
+                        speed_total, loss.avg, epoch, global_step, grad_scaler.get_scale(), time_for_end
+                    )
+                else:
+                    msg = "Speed %.2f samples/sec   Loss %.4f   Epoch: %d   Global Step: %d   Required: %1.f hours" % (
+                        speed_total, loss.avg, epoch, global_step, time_for_end
+                    )
+                logging.info(msg)
+                loss.reset()
+                self.tic = time.time()
+            else:
+                self.init = True
+                self.tic = time.time()
+class CallBackModelCheckpoint(object):
+    def __init__(self, rank, output="./"):
+        self.rank: int = rank
+        self.output: str = output
+    def __call__(self,
+                 global_step,
+                 backbone: torch.nn.Module,
+                 partial_fc=None,
+                 awloss=None,):
+        print('CallBackModelCheckpoint...')
+        if global_step > 100 and self.rank is 0:
+            torch.save(backbone.module.state_dict(), os.path.join(self.output, "backbone.pth"))
+        if global_step > 100 and partial_fc is not None:
+            partial_fc.save_params()
+        if global_step > 100 and awloss is not None:
+            torch.save(awloss.state_dict(), os.path.join(self.output, "awloss.pth"))

third_party/arcface/verification.py ADDED Viewed

	@@ -0,0 +1,440 @@

+"""Helper for evaluation on the Labeled Faces in the Wild dataset
+"""
+# MIT License
+#
+# Copyright (c) 2016 David Sandberg
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+import datetime
+import os
+import pickle
+import mxnet as mx
+import numpy as np
+import sklearn
+import torch
+from mxnet import ndarray as nd
+from scipy import interpolate
+from sklearn.decomposition import PCA
+from sklearn.model_selection import KFold
+class LFold:
+    def __init__(self, n_splits=2, shuffle=False):
+        self.n_splits = n_splits
+        if self.n_splits > 1:
+            self.k_fold = KFold(n_splits=n_splits, shuffle=shuffle)
+    def split(self, indices):
+        if self.n_splits > 1:
+            return self.k_fold.split(indices)
+        else:
+            return [(indices, indices)]
+def calculate_roc(thresholds,
+                  embeddings1,
+                  embeddings2,
+                  actual_issame,
+                  nrof_folds=10,
+                  pca=0):
+    assert (embeddings1.shape[0] == embeddings2.shape[0])
+    assert (embeddings1.shape[1] == embeddings2.shape[1])
+    nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
+    nrof_thresholds = len(thresholds)
+    k_fold = LFold(n_splits=nrof_folds, shuffle=False)
+    tprs = np.zeros((nrof_folds, nrof_thresholds))
+    fprs = np.zeros((nrof_folds, nrof_thresholds))
+    accuracy = np.zeros((nrof_folds))
+    indices = np.arange(nrof_pairs)
+    if pca == 0:
+        diff = np.subtract(embeddings1, embeddings2)
+        dist = np.sum(np.square(diff), 1)
+        print('dist', dist.min(), dist.max())
+    for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
+        if pca > 0:
+            print('doing pca on', fold_idx)
+            embed1_train = embeddings1[train_set]
+            embed2_train = embeddings2[train_set]
+            _embed_train = np.concatenate((embed1_train, embed2_train), axis=0)
+            pca_model = PCA(n_components=pca)
+            pca_model.fit(_embed_train)
+            embed1 = pca_model.transform(embeddings1)
+            embed2 = pca_model.transform(embeddings2)
+            embed1 = sklearn.preprocessing.normalize(embed1)
+            embed2 = sklearn.preprocessing.normalize(embed2)
+            diff = np.subtract(embed1, embed2)
+            dist = np.sum(np.square(diff), 1)
+        # Find the best threshold for the fold
+        acc_train = np.zeros((nrof_thresholds))
+        for threshold_idx, threshold in enumerate(thresholds):
+            _, _, acc_train[threshold_idx] = calculate_accuracy(
+                threshold, dist[train_set], actual_issame[train_set])
+        best_threshold_index = np.argmax(acc_train)
+        for threshold_idx, threshold in enumerate(thresholds):
+            tprs[fold_idx, threshold_idx], fprs[fold_idx, threshold_idx], _ = calculate_accuracy(
+                threshold, dist[test_set],
+                actual_issame[test_set])
+        _, _, accuracy[fold_idx] = calculate_accuracy(
+            thresholds[best_threshold_index], dist[test_set],
+            actual_issame[test_set])
+    tpr = np.mean(tprs, 0)
+    fpr = np.mean(fprs, 0)
+    return tpr, fpr, accuracy
+def calculate_accuracy(threshold, dist, actual_issame):
+    predict_issame = np.less(dist, threshold)
+    tp = np.sum(np.logical_and(predict_issame, actual_issame))
+    fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
+    tn = np.sum(
+        np.logical_and(np.logical_not(predict_issame),
+                       np.logical_not(actual_issame)))
+    fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame))
+    tpr = 0 if (tp + fn == 0) else float(tp) / float(tp + fn)
+    fpr = 0 if (fp + tn == 0) else float(fp) / float(fp + tn)
+    acc = float(tp + tn) / dist.size
+    return tpr, fpr, acc
+def calculate_val(thresholds,
+                  embeddings1,
+                  embeddings2,
+                  actual_issame,
+                  far_target,
+                  nrof_folds=10):
+    assert (embeddings1.shape[0] == embeddings2.shape[0])
+    assert (embeddings1.shape[1] == embeddings2.shape[1])
+    nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
+    nrof_thresholds = len(thresholds)
+    k_fold = LFold(n_splits=nrof_folds, shuffle=False)
+    val = np.zeros(nrof_folds)
+    far = np.zeros(nrof_folds)
+    diff = np.subtract(embeddings1, embeddings2)
+    dist = np.sum(np.square(diff), 1)
+    indices = np.arange(nrof_pairs)
+    for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
+        # Find the threshold that gives FAR = far_target
+        far_train = np.zeros(nrof_thresholds)
+        for threshold_idx, threshold in enumerate(thresholds):
+            _, far_train[threshold_idx] = calculate_val_far(
+                threshold, dist[train_set], actual_issame[train_set])
+        if np.max(far_train) >= far_target:
+            f = interpolate.interp1d(far_train, thresholds, kind='slinear')
+            threshold = f(far_target)
+        else:
+            threshold = 0.0
+        val[fold_idx], far[fold_idx] = calculate_val_far(
+            threshold, dist[test_set], actual_issame[test_set])
+    val_mean = np.mean(val)
+    far_mean = np.mean(far)
+    val_std = np.std(val)
+    return val_mean, val_std, far_mean
+def calculate_val_far(threshold, dist, actual_issame):
+    predict_issame = np.less(dist, threshold)
+    true_accept = np.sum(np.logical_and(predict_issame, actual_issame))
+    false_accept = np.sum(
+        np.logical_and(predict_issame, np.logical_not(actual_issame)))
+    n_same = np.sum(actual_issame)
+    n_diff = np.sum(np.logical_not(actual_issame))
+    # print(true_accept, false_accept)
+    # print(actual_issame)
+    # print(n_same, n_diff)
+    val = float(true_accept) / float(n_same)
+    far = float(false_accept) / float(n_diff)
+    return val, far
+def evaluate(embeddings, actual_issame, nrof_folds=10, pca=0):
+    # Calculate evaluation metrics
+    thresholds = np.arange(0, 4, 0.01)
+    embeddings1 = embeddings[0::2]
+    embeddings2 = embeddings[1::2]
+    tpr, fpr, accuracy = calculate_roc(thresholds,
+                                       embeddings1,
+                                       embeddings2,
+                                       np.asarray(actual_issame),
+                                       nrof_folds=nrof_folds,
+                                       pca=pca)
+    thresholds = np.arange(0, 4, 0.001)
+    val, val_std, far = calculate_val(thresholds,
+                                      embeddings1,
+                                      embeddings2,
+                                      np.asarray(actual_issame),
+                                      1e-3,
+                                      nrof_folds=nrof_folds)
+    return tpr, fpr, accuracy, val, val_std, far
+@torch.no_grad()
+def load_bin(path, image_size):
+    try:
+        with open(path, 'rb') as f:
+            bins, issame_list = pickle.load(f)  # py2
+    except UnicodeDecodeError as e:
+        with open(path, 'rb') as f:
+            bins, issame_list = pickle.load(f, encoding='bytes')  # py3
+    data_list = []
+    for flip in [0, 1]:
+        data = torch.empty((len(issame_list) * 2, 3, image_size[0], image_size[1]))
+        data_list.append(data)
+    for idx in range(len(issame_list) * 2):
+        _bin = bins[idx]
+        img = mx.image.imdecode(_bin)
+        if img.shape[1] != image_size[0]:
+            img = mx.image.resize_short(img, image_size[0])
+        img = nd.transpose(img, axes=(2, 0, 1))  # (C, H, W)
+        for flip in [0, 1]:
+            if flip == 1:
+                img = mx.ndarray.flip(data=img, axis=2)
+            data_list[flip][idx][:] = torch.from_numpy(img.asnumpy())
+        if idx % 1000 == 0:
+            print('loading bin', idx)
+        # # save img to '/home/yuange/dataset/LFW/rgb-arcface'
+        # img = nd.transpose(img, axes=(1, 2, 0))  # (H, W, C)
+        # save_name = 'ind_' + str(idx) + '.bmp'
+        # import os
+        # save_name = os.path.join('/home/yuange/dataset/LFW/rgb-arcface', save_name)
+        # import PIL.Image as Image
+        # fig = Image.fromarray(img.asnumpy(), mode='RGB')
+        # fig.save(save_name)
+    print('load finished', data_list[0].shape)
+    return data_list, issame_list
+@torch.no_grad()
+def test(data_set, backbone, batch_size, nfolds=10,
+         is_gray=False,):
+    print('testing verification..')
+    data_list = data_set[0]
+    issame_list = data_set[1]
+    embeddings_list = []
+    time_consumed = 0.0
+    for i in range(len(data_list)):
+        data = data_list[i]  # (12000, 3, 112, 112)
+        print(data.shape)
+        if is_gray:
+            data = (0.2989 * data[:, 0] +
+                    0.5870 * data[:, 1] +
+                    0.1140 * data[:, 2]) / 3
+            data = data[:, None, :, :]
+        print(data.shape)
+        embeddings = None
+        ba = 0
+        while ba < data.shape[0]:
+            bb = min(ba + batch_size, data.shape[0])
+            count = bb - ba
+            _data = data[bb - batch_size: bb]
+            time0 = datetime.datetime.now()
+            if not is_gray:
+                img = ((_data / 255) - 0.5) / 0.5
+            else:
+                img = _data / 255
+            # mouth_net returns a feature whether in training or testing
+            feature = backbone(img.cuda(0))
+            net_out: torch.Tensor = feature
+            _embeddings = net_out.detach().cpu().numpy()
+            time_now = datetime.datetime.now()
+            diff = time_now - time0
+            time_consumed += diff.total_seconds()
+            if embeddings is None:
+                embeddings = np.zeros((data.shape[0], _embeddings.shape[1]))
+            embeddings[ba:bb, :] = _embeddings[(batch_size - count):, :]
+            ba = bb
+        embeddings_list.append(embeddings)
+    print('emb_list', len(embeddings_list), embeddings_list[0].size, embeddings_list[1].size)
+    _xnorm = 0.0
+    _xnorm_cnt = 0
+    for embed in embeddings_list:
+        for i in range(embed.shape[0]):
+            _em = embed[i]
+            _norm = np.linalg.norm(_em)
+            _xnorm += _norm
+            _xnorm_cnt += 1
+    _xnorm /= _xnorm_cnt
+    embeddings = embeddings_list[0].copy()
+    embeddings = sklearn.preprocessing.normalize(embeddings)
+    acc1 = 0.0
+    std1 = 0.0
+    embeddings = embeddings_list[0] + embeddings_list[1]
+    embeddings = sklearn.preprocessing.normalize(embeddings)
+    print('embeddings.shape', embeddings.shape)
+    print('infer time', time_consumed)
+    _, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=nfolds)
+    acc2, std2 = np.mean(accuracy), np.std(accuracy)
+    return acc1, std1, acc2, std2, _xnorm, embeddings_list
+def dumpR(data_set,
+          backbone,
+          batch_size,
+          name='',
+          data_extra=None,
+          label_shape=None):
+    print('dump verification embedding..')
+    data_list = data_set[0]
+    issame_list = data_set[1]
+    embeddings_list = []
+    time_consumed = 0.0
+    for i in range(len(data_list)):
+        data = data_list[i]
+        embeddings = None
+        ba = 0
+        while ba < data.shape[0]:
+            bb = min(ba + batch_size, data.shape[0])
+            count = bb - ba
+            _data = nd.slice_axis(data, axis=0, begin=bb - batch_size, end=bb)
+            time0 = datetime.datetime.now()
+            if data_extra is None:
+                db = mx.io.DataBatch(data=(_data,), label=(_label,))
+            else:
+                db = mx.io.DataBatch(data=(_data, _data_extra),
+                                     label=(_label,))
+            model.forward(db, is_train=False)
+            net_out = model.get_outputs()
+            _embeddings = net_out[0].asnumpy()
+            time_now = datetime.datetime.now()
+            diff = time_now - time0
+            time_consumed += diff.total_seconds()
+            if embeddings is None:
+                embeddings = np.zeros((data.shape[0], _embeddings.shape[1]))
+            embeddings[ba:bb, :] = _embeddings[(batch_size - count):, :]
+            ba = bb
+        embeddings_list.append(embeddings)
+    embeddings = embeddings_list[0] + embeddings_list[1]
+    embeddings = sklearn.preprocessing.normalize(embeddings)
+    actual_issame = np.asarray(issame_list)
+    outname = os.path.join('temp.bin')
+    with open(outname, 'wb') as f:
+        pickle.dump((embeddings, issame_list),
+                    f,
+                    protocol=pickle.HIGHEST_PROTOCOL)
+# if __name__ == '__main__':
+#
+#     parser = argparse.ArgumentParser(description='do verification')
+#     # general
+#     parser.add_argument('--data-dir', default='', help='')
+#     parser.add_argument('--model',
+#                         default='../model/softmax,50',
+#                         help='path to load model.')
+#     parser.add_argument('--target',
+#                         default='lfw,cfp_ff,cfp_fp,agedb_30',
+#                         help='test targets.')
+#     parser.add_argument('--gpu', default=0, type=int, help='gpu id')
+#     parser.add_argument('--batch-size', default=32, type=int, help='')
+#     parser.add_argument('--max', default='', type=str, help='')
+#     parser.add_argument('--mode', default=0, type=int, help='')
+#     parser.add_argument('--nfolds', default=10, type=int, help='')
+#     args = parser.parse_args()
+#     image_size = [112, 112]
+#     print('image_size', image_size)
+#     ctx = mx.gpu(args.gpu)
+#     nets = []
+#     vec = args.model.split(',')
+#     prefix = args.model.split(',')[0]
+#     epochs = []
+#     if len(vec) == 1:
+#         pdir = os.path.dirname(prefix)
+#         for fname in os.listdir(pdir):
+#             if not fname.endswith('.params'):
+#                 continue
+#             _file = os.path.join(pdir, fname)
+#             if _file.startswith(prefix):
+#                 epoch = int(fname.split('.')[0].split('-')[1])
+#                 epochs.append(epoch)
+#         epochs = sorted(epochs, reverse=True)
+#         if len(args.max) > 0:
+#             _max = [int(x) for x in args.max.split(',')]
+#             assert len(_max) == 2
+#             if len(epochs) > _max[1]:
+#                 epochs = epochs[_max[0]:_max[1]]
+#
+#     else:
+#         epochs = [int(x) for x in vec[1].split('|')]
+#     print('model number', len(epochs))
+#     time0 = datetime.datetime.now()
+#     for epoch in epochs:
+#         print('loading', prefix, epoch)
+#         sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
+#         # arg_params, aux_params = ch_dev(arg_params, aux_params, ctx)
+#         all_layers = sym.get_internals()
+#         sym = all_layers['fc1_output']
+#         model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
+#         # model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))])
+#         model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0],
+#                                           image_size[1]))])
+#         model.set_params(arg_params, aux_params)
+#         nets.append(model)
+#     time_now = datetime.datetime.now()
+#     diff = time_now - time0
+#     print('model loading time', diff.total_seconds())
+#
+#     ver_list = []
+#     ver_name_list = []
+#     for name in args.target.split(','):
+#         path = os.path.join(args.data_dir, name + ".bin")
+#         if os.path.exists(path):
+#             print('loading.. ', name)
+#             data_set = load_bin(path, image_size)
+#             ver_list.append(data_set)
+#             ver_name_list.append(name)
+#
+#     if args.mode == 0:
+#         for i in range(len(ver_list)):
+#             results = []
+#             for model in nets:
+#                 acc1, std1, acc2, std2, xnorm, embeddings_list = test(
+#                     ver_list[i], model, args.batch_size, args.nfolds)
+#                 print('[%s]XNorm: %f' % (ver_name_list[i], xnorm))
+#                 print('[%s]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], acc1, std1))
+#                 print('[%s]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], acc2, std2))
+#                 results.append(acc2)
+#             print('Max of [%s] is %1.5f' % (ver_name_list[i], np.max(results)))
+#     elif args.mode == 1:
+#         raise ValueError
+#     else:
+#         model = nets[0]
+#         dumpR(ver_list[0], model, args.batch_size, args.target)