Spaces:

Reeve
/

Ohayou_Face

Runtime error

App Files Files Community

Reevee commited on Mar 4, 2022

Commit

f39e999

•

0 Parent(s):

first

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +2 -0
README.md +11 -0
app.py +31 -0
configs/__init__.py +0 -0
configs/data_configs.py +41 -0
configs/paths_config.py +23 -0
configs/transforms_config.py +152 -0
criteria/__init__.py +0 -0
criteria/id_loss.py +44 -0
criteria/lpips/__init__.py +0 -0
criteria/lpips/lpips.py +35 -0
criteria/lpips/networks.py +96 -0
criteria/lpips/utils.py +30 -0
criteria/moco_loss.py +69 -0
criteria/w_norm.py +14 -0
datasets/__init__.py +0 -0
datasets/augmentations.py +110 -0
datasets/gt_res_dataset.py +32 -0
datasets/images_dataset.py +33 -0
datasets/inference_dataset.py +22 -0
dnnlib/__init__.py +9 -0
dnnlib/util.py +477 -0
legacy.py +384 -0
model_build.py +95 -0
models/__init__.py +0 -0
models/encoders/__init__.py +0 -0
models/encoders/helpers.py +119 -0
models/encoders/model_irse.py +84 -0
models/encoders/psp_encoders.py +186 -0
models/mtcnn/__init__.py +0 -0
models/mtcnn/mtcnn.py +156 -0
models/mtcnn/mtcnn_pytorch/__init__.py +0 -0
models/mtcnn/mtcnn_pytorch/src/__init__.py +2 -0
models/mtcnn/mtcnn_pytorch/src/align_trans.py +304 -0
models/mtcnn/mtcnn_pytorch/src/box_utils.py +238 -0
models/mtcnn/mtcnn_pytorch/src/detector.py +126 -0
models/mtcnn/mtcnn_pytorch/src/first_stage.py +101 -0
models/mtcnn/mtcnn_pytorch/src/get_nets.py +171 -0
models/mtcnn/mtcnn_pytorch/src/matlab_cp2tform.py +350 -0
models/mtcnn/mtcnn_pytorch/src/visualization_utils.py +31 -0
models/psp.py +118 -0
models/stylegan2/__init__.py +0 -0
models/stylegan2/model.py +674 -0
models/stylegan2/op/__init__.py +2 -0
models/stylegan2/op/fused_act.py +37 -0
models/stylegan2/op/upfirdn2d.py +60 -0
pretrained/ohayou_face.pkl +3 -0
pretrained/ohayou_face.pt +3 -0
requirements.txt +10 -0
torch_utils/__init__.py +9 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ pretrained/ohayou_face.pt filter=lfs diff=lfs merge=lfs -text
2	+ pretrained/ohayou_face.pkl filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,11 @@

+---
+title: Ohayou_Face
+emoji: ⚡
+colorFrom: red
+colorTo: yellow
+sdk: gradio
+app_file: app.py
+pinned: false
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference

app.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import os
+from PIL import Image
+import gradio as gr
+from torchvision import transforms
+import easydict
+import torch
+import numpy as np
+import model_build
+psp = model_build.build_psp()
+stylegan2 = model_build.build_stylegan2()
+pretransform = transforms.Compose([
+    transforms.Resize((256, 256)),
+    transforms.ToTensor(),
+    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
+def pipeline(img):
+    img = model_build.img_preprocess(img, pretransform)
+    with torch.no_grad():
+        _, latent_space = psp(img.float(), randomize_noise=True, resize=False, return_latents=True)
+    img = stylegan2(latent_space, noise_mode='none')
+    img = Image.fromarray(np.array((img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8).squeeze(0)[20:-20,:,:]))
+    img.save('output.png')
+    return 'output.png'
+examples=[['momoi_out.png',False], ['churuki_out.png', False], ['fgfgfggf.png', False], ['dsfd.png', False]]
+description="The male image doesn't work well. 1:1 ratio image recommended (square cropable after uploading). If the background is not monochromatic, it can be mixed with hair color. It takes an average of 5 seconds, but it can take longer if there is a lot of traffic. 남성 이미지에는 잘 작동하지 않음. 1:1비율 권장(업로드 후 정사각형 자르기 가능), 배경이 단색이 아니면 머리색과 섞일 수 있음. 트래픽이 많으면 5초 이상 걸릴 수 있음. Email:krkmfn@gmail.com"
+gr.Interface(pipeline, [gr.inputs.Image(type="pil")], gr.outputs.Image(type="file"),description=description,allow_flagging=False,examples=examples,allow_screenshot=False,enable_queue=False).launch()

configs/__init__.py ADDED Viewed

File without changes

configs/data_configs.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from configs import transforms_config
+from configs.paths_config import dataset_paths
+DATASETS = {
+	'ffhq_encode': {
+		'transforms': transforms_config.EncodeTransforms,
+		'train_source_root': dataset_paths['ffhq'],
+		'train_target_root': dataset_paths['ffhq'],
+		'test_source_root': dataset_paths['celeba_test'],
+		'test_target_root': dataset_paths['celeba_test'],
+	},
+	'furry': {
+		'transforms': transforms_config.FrontalizationTransforms,
+		'train_source_root': dataset_paths['anime'],
+		'train_target_root': dataset_paths['anime'],
+		'test_source_root': dataset_paths['gogal'],
+		'test_target_root': dataset_paths['gogal'],
+	},
+	'celebs_sketch_to_face': {
+		'transforms': transforms_config.SketchToImageTransforms,
+		'train_source_root': dataset_paths['celeba_train_sketch'],
+		'train_target_root': dataset_paths['celeba_train'],
+		'test_source_root': dataset_paths['celeba_test_sketch'],
+		'test_target_root': dataset_paths['celeba_test'],
+	},
+	'celebs_seg_to_face': {
+		'transforms': transforms_config.SegToImageTransforms,
+		'train_source_root': dataset_paths['celeba_train_segmentation'],
+		'train_target_root': dataset_paths['celeba_train'],
+		'test_source_root': dataset_paths['celeba_test_segmentation'],
+		'test_target_root': dataset_paths['celeba_test'],
+	},
+	'celebs_super_resolution': {
+		'transforms': transforms_config.SuperResTransforms,
+		'train_source_root': dataset_paths['celeba_train'],
+		'train_target_root': dataset_paths['celeba_train'],
+		'test_source_root': dataset_paths['celeba_test'],
+		'test_target_root': dataset_paths['celeba_test'],
+	},
+}

configs/paths_config.py ADDED Viewed

	@@ -0,0 +1,23 @@

+dataset_paths = {
+	'celeba_train': '',
+	'celeba_test': '',
+	'celeba_train_sketch': '',
+	'celeba_test_sketch': '',
+	'celeba_train_segmentation': '',
+	'celeba_test_segmentation': '',
+	'ffhq': '',
+    'anime' : '/content/drive/MyDrive/Dataset/anime',
+    'gogal' : '/content/drive/MyDrive/All Data/고갈왕'
+}
+model_paths = {
+	'stylegan_ffhq': 'pretrained_models/stylegan2-ffhq-config-f.pt',
+	'ir_se50': 'pretrained_models/model_ir_se50.pth',
+	'circular_face': 'pretrained_models/CurricularFace_Backbone.pth',
+	'mtcnn_pnet': 'pretrained_models/mtcnn/pnet.npy',
+	'mtcnn_rnet': 'pretrained_models/mtcnn/rnet.npy',
+	'mtcnn_onet': 'pretrained_models/mtcnn/onet.npy',
+	'shape_predictor': 'shape_predictor_68_face_landmarks.dat',
+	'moco': 'pretrained_models/moco_v2_800ep_pretrain.pth.tar',
+    'anime' : '/content/drive/MyDrive/StyleGAN2-ada/result/pretrained/anime_face.pt'
+}

configs/transforms_config.py ADDED Viewed

	@@ -0,0 +1,152 @@

+from abc import abstractmethod
+import torchvision.transforms as transforms
+from datasets import augmentations
+class TransformsConfig(object):
+	def __init__(self, opts):
+		self.opts = opts
+	@abstractmethod
+	def get_transforms(self):
+		pass
+class EncodeTransforms(TransformsConfig):
+	def __init__(self, opts):
+		super(EncodeTransforms, self).__init__(opts)
+	def get_transforms(self):
+		transforms_dict = {
+			'transform_gt_train': transforms.Compose([
+				transforms.Resize((256, 256)),
+				transforms.RandomHorizontalFlip(0.5),
+				transforms.ToTensor(),
+				transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+			'transform_source': None,
+			'transform_test': transforms.Compose([
+				transforms.Resize((256, 256)),
+				transforms.ToTensor(),
+				transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+			'transform_inference': transforms.Compose([
+				transforms.Resize((256, 256)),
+				transforms.ToTensor(),
+				transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
+		}
+		return transforms_dict
+class FrontalizationTransforms(TransformsConfig):
+	def __init__(self, opts):
+		super(FrontalizationTransforms, self).__init__(opts)
+	def get_transforms(self):
+		transforms_dict = {
+			'transform_gt_train': transforms.Compose([
+				transforms.Resize((256, 256)),
+				transforms.ToTensor(),
+				transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+			'transform_source': transforms.Compose([
+				transforms.Resize((256, 256)),
+				transforms.ToTensor(),
+				transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+			'transform_test': transforms.Compose([
+				transforms.Resize((256, 256)),
+				transforms.ToTensor(),
+				transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+			'transform_inference': transforms.Compose([
+				transforms.Resize((256, 256)),
+				transforms.ToTensor(),
+				transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
+		}
+		return transforms_dict
+class SketchToImageTransforms(TransformsConfig):
+	def __init__(self, opts):
+		super(SketchToImageTransforms, self).__init__(opts)
+	def get_transforms(self):
+		transforms_dict = {
+			'transform_gt_train': transforms.Compose([
+				transforms.Resize((256, 256)),
+				transforms.ToTensor(),
+				transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+			'transform_source': transforms.Compose([
+				transforms.Resize((256, 256)),
+				transforms.ToTensor()]),
+			'transform_test': transforms.Compose([
+				transforms.Resize((256, 256)),
+				transforms.ToTensor(),
+				transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+			'transform_inference': transforms.Compose([
+				transforms.Resize((256, 256)),
+				transforms.ToTensor()]),
+		}
+		return transforms_dict
+class SegToImageTransforms(TransformsConfig):
+	def __init__(self, opts):
+		super(SegToImageTransforms, self).__init__(opts)
+	def get_transforms(self):
+		transforms_dict = {
+			'transform_gt_train': transforms.Compose([
+				transforms.Resize((256, 256)),
+				transforms.ToTensor(),
+				transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+			'transform_source': transforms.Compose([
+				transforms.Resize((256, 256)),
+				augmentations.ToOneHot(self.opts.label_nc),
+				transforms.ToTensor()]),
+			'transform_test': transforms.Compose([
+				transforms.Resize((256, 256)),
+				transforms.ToTensor(),
+				transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+			'transform_inference': transforms.Compose([
+				transforms.Resize((256, 256)),
+				augmentations.ToOneHot(self.opts.label_nc),
+				transforms.ToTensor()])
+		}
+		return transforms_dict
+class SuperResTransforms(TransformsConfig):
+	def __init__(self, opts):
+		super(SuperResTransforms, self).__init__(opts)
+	def get_transforms(self):
+		if self.opts.resize_factors is None:
+			self.opts.resize_factors = '1,2,4,8,16,32'
+		factors = [int(f) for f in self.opts.resize_factors.split(",")]
+		print("Performing down-sampling with factors: {}".format(factors))
+		transforms_dict = {
+			'transform_gt_train': transforms.Compose([
+				transforms.Resize((256, 256)),
+				transforms.ToTensor(),
+				transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+			'transform_source': transforms.Compose([
+				transforms.Resize((256, 256)),
+				augmentations.BilinearResize(factors=factors),
+				transforms.Resize((256, 256)),
+				transforms.ToTensor(),
+				transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+			'transform_test': transforms.Compose([
+				transforms.Resize((256, 256)),
+				transforms.ToTensor(),
+				transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+			'transform_inference': transforms.Compose([
+				transforms.Resize((256, 256)),
+				augmentations.BilinearResize(factors=factors),
+				transforms.Resize((256, 256)),
+				transforms.ToTensor(),
+				transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
+		}
+		return transforms_dict

criteria/__init__.py ADDED Viewed

File without changes

criteria/id_loss.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import torch
+from torch import nn
+from configs.paths_config import model_paths
+from models.encoders.model_irse import Backbone
+class IDLoss(nn.Module):
+    def __init__(self):
+        super(IDLoss, self).__init__()
+        print('Loading ResNet ArcFace')
+        self.facenet = Backbone(input_size=112, num_layers=50, drop_ratio=0.6, mode='ir_se')
+        self.facenet.load_state_dict(torch.load(model_paths['ir_se50']))
+        self.face_pool = torch.nn.AdaptiveAvgPool2d((112, 112))
+        self.facenet.eval()
+    def extract_feats(self, x):
+        x = x[:, :, 35:223, 32:220]  # Crop interesting region
+        x = self.face_pool(x)
+        x_feats = self.facenet(x)
+        return x_feats
+    def forward(self, y_hat, y, x):
+        n_samples = x.shape[0]
+        x_feats = self.extract_feats(x)
+        y_feats = self.extract_feats(y)  # Otherwise use the feature from there
+        y_hat_feats = self.extract_feats(y_hat)
+        y_feats = y_feats.detach()
+        loss = 0
+        sim_improvement = 0
+        id_logs = []
+        count = 0
+        for i in range(n_samples):
+            diff_target = y_hat_feats[i].dot(y_feats[i])
+            diff_input = y_hat_feats[i].dot(x_feats[i])
+            diff_views = y_feats[i].dot(x_feats[i])
+            id_logs.append({'diff_target': float(diff_target),
+                            'diff_input': float(diff_input),
+                            'diff_views': float(diff_views)})
+            loss += 1 - diff_target
+            id_diff = float(diff_target) - float(diff_views)
+            sim_improvement += id_diff
+            count += 1
+        return loss / count, sim_improvement / count, id_logs

criteria/lpips/__init__.py ADDED Viewed

File without changes

criteria/lpips/lpips.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import torch
+import torch.nn as nn
+from criteria.lpips.networks import get_network, LinLayers
+from criteria.lpips.utils import get_state_dict
+class LPIPS(nn.Module):
+    r"""Creates a criterion that measures
+    Learned Perceptual Image Patch Similarity (LPIPS).
+    Arguments:
+        net_type (str): the network type to compare the features:
+                        'alex' | 'squeeze' | 'vgg'. Default: 'alex'.
+        version (str): the version of LPIPS. Default: 0.1.
+    """
+    def __init__(self, net_type: str = 'alex', version: str = '0.1'):
+        assert version in ['0.1'], 'v0.1 is only supported now'
+        super(LPIPS, self).__init__()
+        # pretrained network
+        self.net = get_network(net_type).to("cuda")
+        # linear layers
+        self.lin = LinLayers(self.net.n_channels_list).to("cuda")
+        self.lin.load_state_dict(get_state_dict(net_type, version))
+    def forward(self, x: torch.Tensor, y: torch.Tensor):
+        feat_x, feat_y = self.net(x), self.net(y)
+        diff = [(fx - fy) ** 2 for fx, fy in zip(feat_x, feat_y)]
+        res = [l(d).mean((2, 3), True) for d, l in zip(diff, self.lin)]
+        return torch.sum(torch.cat(res, 0)) / x.shape[0]

criteria/lpips/networks.py ADDED Viewed

	@@ -0,0 +1,96 @@

+from typing import Sequence
+from itertools import chain
+import torch
+import torch.nn as nn
+from torchvision import models
+from criteria.lpips.utils import normalize_activation
+def get_network(net_type: str):
+    if net_type == 'alex':
+        return AlexNet()
+    elif net_type == 'squeeze':
+        return SqueezeNet()
+    elif net_type == 'vgg':
+        return VGG16()
+    else:
+        raise NotImplementedError('choose net_type from [alex, squeeze, vgg].')
+class LinLayers(nn.ModuleList):
+    def __init__(self, n_channels_list: Sequence[int]):
+        super(LinLayers, self).__init__([
+            nn.Sequential(
+                nn.Identity(),
+                nn.Conv2d(nc, 1, 1, 1, 0, bias=False)
+            ) for nc in n_channels_list
+        ])
+        for param in self.parameters():
+            param.requires_grad = False
+class BaseNet(nn.Module):
+    def __init__(self):
+        super(BaseNet, self).__init__()
+        # register buffer
+        self.register_buffer(
+            'mean', torch.Tensor([-.030, -.088, -.188])[None, :, None, None])
+        self.register_buffer(
+            'std', torch.Tensor([.458, .448, .450])[None, :, None, None])
+    def set_requires_grad(self, state: bool):
+        for param in chain(self.parameters(), self.buffers()):
+            param.requires_grad = state
+    def z_score(self, x: torch.Tensor):
+        return (x - self.mean) / self.std
+    def forward(self, x: torch.Tensor):
+        x = self.z_score(x)
+        output = []
+        for i, (_, layer) in enumerate(self.layers._modules.items(), 1):
+            x = layer(x)
+            if i in self.target_layers:
+                output.append(normalize_activation(x))
+            if len(output) == len(self.target_layers):
+                break
+        return output
+class SqueezeNet(BaseNet):
+    def __init__(self):
+        super(SqueezeNet, self).__init__()
+        self.layers = models.squeezenet1_1(True).features
+        self.target_layers = [2, 5, 8, 10, 11, 12, 13]
+        self.n_channels_list = [64, 128, 256, 384, 384, 512, 512]
+        self.set_requires_grad(False)
+class AlexNet(BaseNet):
+    def __init__(self):
+        super(AlexNet, self).__init__()
+        self.layers = models.alexnet(True).features
+        self.target_layers = [2, 5, 8, 10, 12]
+        self.n_channels_list = [64, 192, 384, 256, 256]
+        self.set_requires_grad(False)
+class VGG16(BaseNet):
+    def __init__(self):
+        super(VGG16, self).__init__()
+        self.layers = models.vgg16(True).features
+        self.target_layers = [4, 9, 16, 23, 30]
+        self.n_channels_list = [64, 128, 256, 512, 512]
+        self.set_requires_grad(False)

criteria/lpips/utils.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from collections import OrderedDict
+import torch
+def normalize_activation(x, eps=1e-10):
+    norm_factor = torch.sqrt(torch.sum(x ** 2, dim=1, keepdim=True))
+    return x / (norm_factor + eps)
+def get_state_dict(net_type: str = 'alex', version: str = '0.1'):
+    # build url
+    url = 'https://raw.githubusercontent.com/richzhang/PerceptualSimilarity/' \
+        + f'master/lpips/weights/v{version}/{net_type}.pth'
+    # download
+    old_state_dict = torch.hub.load_state_dict_from_url(
+        url, progress=True,
+        map_location=None if torch.cuda.is_available() else torch.device('cpu')
+    )
+    # rename keys
+    new_state_dict = OrderedDict()
+    for key, val in old_state_dict.items():
+        new_key = key
+        new_key = new_key.replace('lin', '')
+        new_key = new_key.replace('model.', '')
+        new_state_dict[new_key] = val
+    return new_state_dict

criteria/moco_loss.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import torch
+from torch import nn
+import torch.nn.functional as F
+from configs.paths_config import model_paths
+class MocoLoss(nn.Module):
+    def __init__(self):
+        super(MocoLoss, self).__init__()
+        print("Loading MOCO model from path: {}".format(model_paths["moco"]))
+        self.model = self.__load_model()
+        self.model.cuda()
+        self.model.eval()
+    @staticmethod
+    def __load_model():
+        import torchvision.models as models
+        model = models.__dict__["resnet50"]()
+        # freeze all layers but the last fc
+        for name, param in model.named_parameters():
+            if name not in ['fc.weight', 'fc.bias']:
+                param.requires_grad = False
+        checkpoint = torch.load(model_paths['moco'], map_location="cpu")
+        state_dict = checkpoint['state_dict']
+        # rename moco pre-trained keys
+        for k in list(state_dict.keys()):
+            # retain only encoder_q up to before the embedding layer
+            if k.startswith('module.encoder_q') and not k.startswith('module.encoder_q.fc'):
+                # remove prefix
+                state_dict[k[len("module.encoder_q."):]] = state_dict[k]
+            # delete renamed or unused k
+            del state_dict[k]
+        msg = model.load_state_dict(state_dict, strict=False)
+        assert set(msg.missing_keys) == {"fc.weight", "fc.bias"}
+        # remove output layer
+        model = nn.Sequential(*list(model.children())[:-1]).cuda()
+        return model
+    def extract_feats(self, x):
+        x = F.interpolate(x, size=224)
+        x_feats = self.model(x)
+        x_feats = nn.functional.normalize(x_feats, dim=1)
+        x_feats = x_feats.squeeze()
+        return x_feats
+    def forward(self, y_hat, y, x):
+        n_samples = x.shape[0]
+        x_feats = self.extract_feats(x)
+        y_feats = self.extract_feats(y)
+        y_hat_feats = self.extract_feats(y_hat)
+        y_feats = y_feats.detach()
+        loss = 0
+        sim_improvement = 0
+        sim_logs = []
+        count = 0
+        for i in range(n_samples):
+            diff_target = y_hat_feats[i].dot(y_feats[i])
+            diff_input = y_hat_feats[i].dot(x_feats[i])
+            diff_views = y_feats[i].dot(x_feats[i])
+            sim_logs.append({'diff_target': float(diff_target),
+                             'diff_input': float(diff_input),
+                             'diff_views': float(diff_views)})
+            loss += 1 - diff_target
+            sim_diff = float(diff_target) - float(diff_views)
+            sim_improvement += sim_diff
+            count += 1
+        return loss / count, sim_improvement / count, sim_logs

criteria/w_norm.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import torch
+from torch import nn
+class WNormLoss(nn.Module):
+	def __init__(self, start_from_latent_avg=True):
+		super(WNormLoss, self).__init__()
+		self.start_from_latent_avg = start_from_latent_avg
+	def forward(self, latent, latent_avg=None):
+		if self.start_from_latent_avg:
+			latent = latent - latent_avg
+		return torch.sum(latent.norm(2, dim=(1, 2))) / latent.shape[0]

datasets/__init__.py ADDED Viewed

File without changes

datasets/augmentations.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import numpy as np
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torchvision import transforms
+class ToOneHot(object):
+	""" Convert the input PIL image to a one-hot torch tensor """
+	def __init__(self, n_classes=None):
+		self.n_classes = n_classes
+	def onehot_initialization(self, a):
+		if self.n_classes is None:
+			self.n_classes = len(np.unique(a))
+		out = np.zeros(a.shape + (self.n_classes, ), dtype=int)
+		out[self.__all_idx(a, axis=2)] = 1
+		return out
+	def __all_idx(self, idx, axis):
+		grid = np.ogrid[tuple(map(slice, idx.shape))]
+		grid.insert(axis, idx)
+		return tuple(grid)
+	def __call__(self, img):
+		img = np.array(img)
+		one_hot = self.onehot_initialization(img)
+		return one_hot
+class BilinearResize(object):
+	def __init__(self, factors=[1, 2, 4, 8, 16, 32]):
+		self.factors = factors
+	def __call__(self, image):
+		factor = np.random.choice(self.factors, size=1)[0]
+		D = BicubicDownSample(factor=factor, cuda=False)
+		img_tensor = transforms.ToTensor()(image).unsqueeze(0)
+		img_tensor_lr = D(img_tensor)[0].clamp(0, 1)
+		img_low_res = transforms.ToPILImage()(img_tensor_lr)
+		return img_low_res
+class BicubicDownSample(nn.Module):
+	def bicubic_kernel(self, x, a=-0.50):
+		"""
+		This equation is exactly copied from the website below:
+		https://clouard.users.greyc.fr/Pantheon/experiments/rescaling/index-en.html#bicubic
+		"""
+		abs_x = torch.abs(x)
+		if abs_x <= 1.:
+			return (a + 2.) * torch.pow(abs_x, 3.) - (a + 3.) * torch.pow(abs_x, 2.) + 1
+		elif 1. < abs_x < 2.:
+			return a * torch.pow(abs_x, 3) - 5. * a * torch.pow(abs_x, 2.) + 8. * a * abs_x - 4. * a
+		else:
+			return 0.0
+	def __init__(self, factor=4, cuda=True, padding='reflect'):
+		super().__init__()
+		self.factor = factor
+		size = factor * 4
+		k = torch.tensor([self.bicubic_kernel((i - torch.floor(torch.tensor(size / 2)) + 0.5) / factor)
+						  for i in range(size)], dtype=torch.float32)
+		k = k / torch.sum(k)
+		k1 = torch.reshape(k, shape=(1, 1, size, 1))
+		self.k1 = torch.cat([k1, k1, k1], dim=0)
+		k2 = torch.reshape(k, shape=(1, 1, 1, size))
+		self.k2 = torch.cat([k2, k2, k2], dim=0)
+		self.cuda = '.cuda' if cuda else ''
+		self.padding = padding
+		for param in self.parameters():
+			param.requires_grad = False
+	def forward(self, x, nhwc=False, clip_round=False, byte_output=False):
+		filter_height = self.factor * 4
+		filter_width = self.factor * 4
+		stride = self.factor
+		pad_along_height = max(filter_height - stride, 0)
+		pad_along_width = max(filter_width - stride, 0)
+		filters1 = self.k1.type('torch{}.FloatTensor'.format(self.cuda))
+		filters2 = self.k2.type('torch{}.FloatTensor'.format(self.cuda))
+		# compute actual padding values for each side
+		pad_top = pad_along_height // 2
+		pad_bottom = pad_along_height - pad_top
+		pad_left = pad_along_width // 2
+		pad_right = pad_along_width - pad_left
+		# apply mirror padding
+		if nhwc:
+			x = torch.transpose(torch.transpose(x, 2, 3), 1, 2)   # NHWC to NCHW
+		# downscaling performed by 1-d convolution
+		x = F.pad(x, (0, 0, pad_top, pad_bottom), self.padding)
+		x = F.conv2d(input=x, weight=filters1, stride=(stride, 1), groups=3)
+		if clip_round:
+			x = torch.clamp(torch.round(x), 0.0, 255.)
+		x = F.pad(x, (pad_left, pad_right, 0, 0), self.padding)
+		x = F.conv2d(input=x, weight=filters2, stride=(1, stride), groups=3)
+		if clip_round:
+			x = torch.clamp(torch.round(x), 0.0, 255.)
+		if nhwc:
+			x = torch.transpose(torch.transpose(x, 1, 3), 1, 2)
+		if byte_output:
+			return x.type('torch.ByteTensor'.format(self.cuda))
+		else:
+			return x

datasets/gt_res_dataset.py ADDED Viewed

	@@ -0,0 +1,32 @@

+#!/usr/bin/python
+# encoding: utf-8
+import os
+from torch.utils.data import Dataset
+from PIL import Image
+class GTResDataset(Dataset):
+	def __init__(self, root_path, gt_dir=None, transform=None, transform_train=None):
+		self.pairs = []
+		for f in os.listdir(root_path):
+			image_path = os.path.join(root_path, f)
+			gt_path = os.path.join(gt_dir, f)
+			if f.endswith(".jpg") or f.endswith(".png"):
+				self.pairs.append([image_path, gt_path.replace('.png', '.jpg'), None])
+		self.transform = transform
+		self.transform_train = transform_train
+	def __len__(self):
+		return len(self.pairs)
+	def __getitem__(self, index):
+		from_path, to_path, _ = self.pairs[index]
+		from_im = Image.open(from_path).convert('RGB')
+		to_im = Image.open(to_path).convert('RGB')
+		if self.transform:
+			to_im = self.transform(to_im)
+			from_im = self.transform(from_im)
+		return from_im, to_im

datasets/images_dataset.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from torch.utils.data import Dataset
+from PIL import Image
+from utils import data_utils
+class ImagesDataset(Dataset):
+	def __init__(self, source_root, target_root, opts, target_transform=None, source_transform=None):
+		self.source_paths = sorted(data_utils.make_dataset(source_root))
+		self.target_paths = sorted(data_utils.make_dataset(target_root))
+		self.source_transform = source_transform
+		self.target_transform = target_transform
+		self.opts = opts
+	def __len__(self):
+		return len(self.source_paths)
+	def __getitem__(self, index):
+		from_path = self.source_paths[index]
+		from_im = Image.open(from_path)
+		from_im = from_im.convert('RGB') if self.opts.label_nc == 0 else from_im.convert('L')
+		to_path = self.target_paths[index]
+		to_im = Image.open(to_path).convert('RGB')
+		if self.target_transform:
+			to_im = self.target_transform(to_im)
+		if self.source_transform:
+			from_im = self.source_transform(from_im)
+		else:
+			from_im = to_im
+		return from_im, to_im

datasets/inference_dataset.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from torch.utils.data import Dataset
+from PIL import Image
+from utils import data_utils
+class InferenceDataset(Dataset):
+	def __init__(self, root, opts, transform=None):
+		self.paths = sorted(data_utils.make_dataset(root))
+		self.transform = transform
+		self.opts = opts
+	def __len__(self):
+		return len(self.paths)
+	def __getitem__(self, index):
+		from_path = self.paths[index]
+		from_im = Image.open(from_path)
+		from_im = from_im.convert('RGB') if self.opts.label_nc == 0 else from_im.convert('L')
+		if self.transform:
+			from_im = self.transform(from_im)
+		return from_im

dnnlib/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+from .util import EasyDict, make_cache_dir_path

dnnlib/util.py ADDED Viewed

	@@ -0,0 +1,477 @@

+# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+"""Miscellaneous utility classes and functions."""
+import ctypes
+import fnmatch
+import importlib
+import inspect
+import numpy as np
+import os
+import shutil
+import sys
+import types
+import io
+import pickle
+import re
+import requests
+import html
+import hashlib
+import glob
+import tempfile
+import urllib
+import urllib.request
+import uuid
+from distutils.util import strtobool
+from typing import Any, List, Tuple, Union
+# Util classes
+# ------------------------------------------------------------------------------------------
+class EasyDict(dict):
+    """Convenience class that behaves like a dict but allows access with the attribute syntax."""
+    def __getattr__(self, name: str) -> Any:
+        try:
+            return self[name]
+        except KeyError:
+            raise AttributeError(name)
+    def __setattr__(self, name: str, value: Any) -> None:
+        self[name] = value
+    def __delattr__(self, name: str) -> None:
+        del self[name]
+class Logger(object):
+    """Redirect stderr to stdout, optionally print stdout to a file, and optionally force flushing on both stdout and the file."""
+    def __init__(self, file_name: str = None, file_mode: str = "w", should_flush: bool = True):
+        self.file = None
+        if file_name is not None:
+            self.file = open(file_name, file_mode)
+        self.should_flush = should_flush
+        self.stdout = sys.stdout
+        self.stderr = sys.stderr
+        sys.stdout = self
+        sys.stderr = self
+    def __enter__(self) -> "Logger":
+        return self
+    def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
+        self.close()
+    def write(self, text: Union[str, bytes]) -> None:
+        """Write text to stdout (and a file) and optionally flush."""
+        if isinstance(text, bytes):
+            text = text.decode()
+        if len(text) == 0: # workaround for a bug in VSCode debugger: sys.stdout.write(''); sys.stdout.flush() => crash
+            return
+        if self.file is not None:
+            self.file.write(text)
+        self.stdout.write(text)
+        if self.should_flush:
+            self.flush()
+    def flush(self) -> None:
+        """Flush written text to both stdout and a file, if open."""
+        if self.file is not None:
+            self.file.flush()
+        self.stdout.flush()
+    def close(self) -> None:
+        """Flush, close possible files, and remove stdout/stderr mirroring."""
+        self.flush()
+        # if using multiple loggers, prevent closing in wrong order
+        if sys.stdout is self:
+            sys.stdout = self.stdout
+        if sys.stderr is self:
+            sys.stderr = self.stderr
+        if self.file is not None:
+            self.file.close()
+            self.file = None
+# Cache directories
+# ------------------------------------------------------------------------------------------
+_dnnlib_cache_dir = None
+def set_cache_dir(path: str) -> None:
+    global _dnnlib_cache_dir
+    _dnnlib_cache_dir = path
+def make_cache_dir_path(*paths: str) -> str:
+    if _dnnlib_cache_dir is not None:
+        return os.path.join(_dnnlib_cache_dir, *paths)
+    if 'DNNLIB_CACHE_DIR' in os.environ:
+        return os.path.join(os.environ['DNNLIB_CACHE_DIR'], *paths)
+    if 'HOME' in os.environ:
+        return os.path.join(os.environ['HOME'], '.cache', 'dnnlib', *paths)
+    if 'USERPROFILE' in os.environ:
+        return os.path.join(os.environ['USERPROFILE'], '.cache', 'dnnlib', *paths)
+    return os.path.join(tempfile.gettempdir(), '.cache', 'dnnlib', *paths)
+# Small util functions
+# ------------------------------------------------------------------------------------------
+def format_time(seconds: Union[int, float]) -> str:
+    """Convert the seconds to human readable string with days, hours, minutes and seconds."""
+    s = int(np.rint(seconds))
+    if s < 60:
+        return "{0}s".format(s)
+    elif s < 60 * 60:
+        return "{0}m {1:02}s".format(s // 60, s % 60)
+    elif s < 24 * 60 * 60:
+        return "{0}h {1:02}m {2:02}s".format(s // (60 * 60), (s // 60) % 60, s % 60)
+    else:
+        return "{0}d {1:02}h {2:02}m".format(s // (24 * 60 * 60), (s // (60 * 60)) % 24, (s // 60) % 60)
+def ask_yes_no(question: str) -> bool:
+    """Ask the user the question until the user inputs a valid answer."""
+    while True:
+        try:
+            print("{0} [y/n]".format(question))
+            return strtobool(input().lower())
+        except ValueError:
+            pass
+def tuple_product(t: Tuple) -> Any:
+    """Calculate the product of the tuple elements."""
+    result = 1
+    for v in t:
+        result *= v
+    return result
+_str_to_ctype = {
+    "uint8": ctypes.c_ubyte,
+    "uint16": ctypes.c_uint16,
+    "uint32": ctypes.c_uint32,
+    "uint64": ctypes.c_uint64,
+    "int8": ctypes.c_byte,
+    "int16": ctypes.c_int16,
+    "int32": ctypes.c_int32,
+    "int64": ctypes.c_int64,
+    "float32": ctypes.c_float,
+    "float64": ctypes.c_double
+}
+def get_dtype_and_ctype(type_obj: Any) -> Tuple[np.dtype, Any]:
+    """Given a type name string (or an object having a __name__ attribute), return matching Numpy and ctypes types that have the same size in bytes."""
+    type_str = None
+    if isinstance(type_obj, str):
+        type_str = type_obj
+    elif hasattr(type_obj, "__name__"):
+        type_str = type_obj.__name__
+    elif hasattr(type_obj, "name"):
+        type_str = type_obj.name
+    else:
+        raise RuntimeError("Cannot infer type name from input")
+    assert type_str in _str_to_ctype.keys()
+    my_dtype = np.dtype(type_str)
+    my_ctype = _str_to_ctype[type_str]
+    assert my_dtype.itemsize == ctypes.sizeof(my_ctype)
+    return my_dtype, my_ctype
+def is_pickleable(obj: Any) -> bool:
+    try:
+        with io.BytesIO() as stream:
+            pickle.dump(obj, stream)
+        return True
+    except:
+        return False
+# Functionality to import modules/objects by name, and call functions by name
+# ------------------------------------------------------------------------------------------
+def get_module_from_obj_name(obj_name: str) -> Tuple[types.ModuleType, str]:
+    """Searches for the underlying module behind the name to some python object.
+    Returns the module and the object name (original name with module part removed)."""
+    # allow convenience shorthands, substitute them by full names
+    obj_name = re.sub("^np.", "numpy.", obj_name)
+    obj_name = re.sub("^tf.", "tensorflow.", obj_name)
+    # list alternatives for (module_name, local_obj_name)
+    parts = obj_name.split(".")
+    name_pairs = [(".".join(parts[:i]), ".".join(parts[i:])) for i in range(len(parts), 0, -1)]
+    # try each alternative in turn
+    for module_name, local_obj_name in name_pairs:
+        try:
+            module = importlib.import_module(module_name) # may raise ImportError
+            get_obj_from_module(module, local_obj_name) # may raise AttributeError
+            return module, local_obj_name
+        except:
+            pass
+    # maybe some of the modules themselves contain errors?
+    for module_name, _local_obj_name in name_pairs:
+        try:
+            importlib.import_module(module_name) # may raise ImportError
+        except ImportError:
+            if not str(sys.exc_info()[1]).startswith("No module named '" + module_name + "'"):
+                raise
+    # maybe the requested attribute is missing?
+    for module_name, local_obj_name in name_pairs:
+        try:
+            module = importlib.import_module(module_name) # may raise ImportError
+            get_obj_from_module(module, local_obj_name) # may raise AttributeError
+        except ImportError:
+            pass
+    # we are out of luck, but we have no idea why
+    raise ImportError(obj_name)
+def get_obj_from_module(module: types.ModuleType, obj_name: str) -> Any:
+    """Traverses the object name and returns the last (rightmost) python object."""
+    if obj_name == '':
+        return module
+    obj = module
+    for part in obj_name.split("."):
+        obj = getattr(obj, part)
+    return obj
+def get_obj_by_name(name: str) -> Any:
+    """Finds the python object with the given name."""
+    module, obj_name = get_module_from_obj_name(name)
+    return get_obj_from_module(module, obj_name)
+def call_func_by_name(*args, func_name: str = None, **kwargs) -> Any:
+    """Finds the python object with the given name and calls it as a function."""
+    assert func_name is not None
+    func_obj = get_obj_by_name(func_name)
+    assert callable(func_obj)
+    return func_obj(*args, **kwargs)
+def construct_class_by_name(*args, class_name: str = None, **kwargs) -> Any:
+    """Finds the python class with the given name and constructs it with the given arguments."""
+    return call_func_by_name(*args, func_name=class_name, **kwargs)
+def get_module_dir_by_obj_name(obj_name: str) -> str:
+    """Get the directory path of the module containing the given object name."""
+    module, _ = get_module_from_obj_name(obj_name)
+    return os.path.dirname(inspect.getfile(module))
+def is_top_level_function(obj: Any) -> bool:
+    """Determine whether the given object is a top-level function, i.e., defined at module scope using 'def'."""
+    return callable(obj) and obj.__name__ in sys.modules[obj.__module__].__dict__
+def get_top_level_function_name(obj: Any) -> str:
+    """Return the fully-qualified name of a top-level function."""
+    assert is_top_level_function(obj)
+    module = obj.__module__
+    if module == '__main__':
+        module = os.path.splitext(os.path.basename(sys.modules[module].__file__))[0]
+    return module + "." + obj.__name__
+# File system helpers
+# ------------------------------------------------------------------------------------------
+def list_dir_recursively_with_ignore(dir_path: str, ignores: List[str] = None, add_base_to_relative: bool = False) -> List[Tuple[str, str]]:
+    """List all files recursively in a given directory while ignoring given file and directory names.
+    Returns list of tuples containing both absolute and relative paths."""
+    assert os.path.isdir(dir_path)
+    base_name = os.path.basename(os.path.normpath(dir_path))
+    if ignores is None:
+        ignores = []
+    result = []
+    for root, dirs, files in os.walk(dir_path, topdown=True):
+        for ignore_ in ignores:
+            dirs_to_remove = [d for d in dirs if fnmatch.fnmatch(d, ignore_)]
+            # dirs need to be edited in-place
+            for d in dirs_to_remove:
+                dirs.remove(d)
+            files = [f for f in files if not fnmatch.fnmatch(f, ignore_)]
+        absolute_paths = [os.path.join(root, f) for f in files]
+        relative_paths = [os.path.relpath(p, dir_path) for p in absolute_paths]
+        if add_base_to_relative:
+            relative_paths = [os.path.join(base_name, p) for p in relative_paths]
+        assert len(absolute_paths) == len(relative_paths)
+        result += zip(absolute_paths, relative_paths)
+    return result
+def copy_files_and_create_dirs(files: List[Tuple[str, str]]) -> None:
+    """Takes in a list of tuples of (src, dst) paths and copies files.
+    Will create all necessary directories."""
+    for file in files:
+        target_dir_name = os.path.dirname(file[1])
+        # will create all intermediate-level directories
+        if not os.path.exists(target_dir_name):
+            os.makedirs(target_dir_name)
+        shutil.copyfile(file[0], file[1])
+# URL helpers
+# ------------------------------------------------------------------------------------------
+def is_url(obj: Any, allow_file_urls: bool = False) -> bool:
+    """Determine whether the given object is a valid URL string."""
+    if not isinstance(obj, str) or not "://" in obj:
+        return False
+    if allow_file_urls and obj.startswith('file://'):
+        return True
+    try:
+        res = requests.compat.urlparse(obj)
+        if not res.scheme or not res.netloc or not "." in res.netloc:
+            return False
+        res = requests.compat.urlparse(requests.compat.urljoin(obj, "/"))
+        if not res.scheme or not res.netloc or not "." in res.netloc:
+            return False
+    except:
+        return False
+    return True
+def open_url(url: str, cache_dir: str = None, num_attempts: int = 10, verbose: bool = True, return_filename: bool = False, cache: bool = True) -> Any:
+    """Download the given URL and return a binary-mode file object to access the data."""
+    assert num_attempts >= 1
+    assert not (return_filename and (not cache))
+    # Doesn't look like an URL scheme so interpret it as a local filename.
+    if not re.match('^[a-z]+://', url):
+        return url if return_filename else open(url, "rb")
+    # Handle file URLs.  This code handles unusual file:// patterns that
+    # arise on Windows:
+    #
+    # file:///c:/foo.txt
+    #
+    # which would translate to a local '/c:/foo.txt' filename that's
+    # invalid.  Drop the forward slash for such pathnames.
+    #
+    # If you touch this code path, you should test it on both Linux and
+    # Windows.
+    #
+    # Some internet resources suggest using urllib.request.url2pathname() but
+    # but that converts forward slashes to backslashes and this causes
+    # its own set of problems.
+    if url.startswith('file://'):
+        filename = urllib.parse.urlparse(url).path
+        if re.match(r'^/[a-zA-Z]:', filename):
+            filename = filename[1:]
+        return filename if return_filename else open(filename, "rb")
+    assert is_url(url)
+    # Lookup from cache.
+    if cache_dir is None:
+        cache_dir = make_cache_dir_path('downloads')
+    url_md5 = hashlib.md5(url.encode("utf-8")).hexdigest()
+    if cache:
+        cache_files = glob.glob(os.path.join(cache_dir, url_md5 + "_*"))
+        if len(cache_files) == 1:
+            filename = cache_files[0]
+            return filename if return_filename else open(filename, "rb")
+    # Download.
+    url_name = None
+    url_data = None
+    with requests.Session() as session:
+        if verbose:
+            print("Downloading %s ..." % url, end="", flush=True)
+        for attempts_left in reversed(range(num_attempts)):
+            try:
+                with session.get(url) as res:
+                    res.raise_for_status()
+                    if len(res.content) == 0:
+                        raise IOError("No data received")
+                    if len(res.content) < 8192:
+                        content_str = res.content.decode("utf-8")
+                        if "download_warning" in res.headers.get("Set-Cookie", ""):
+                            links = [html.unescape(link) for link in content_str.split('"') if "export=download" in link]
+                            if len(links) == 1:
+                                url = requests.compat.urljoin(url, links[0])
+                                raise IOError("Google Drive virus checker nag")
+                        if "Google Drive - Quota exceeded" in content_str:
+                            raise IOError("Google Drive download quota exceeded -- please try again later")
+                    match = re.search(r'filename="([^"]*)"', res.headers.get("Content-Disposition", ""))
+                    url_name = match[1] if match else url
+                    url_data = res.content
+                    if verbose:
+                        print(" done")
+                    break
+            except KeyboardInterrupt:
+                raise
+            except:
+                if not attempts_left:
+                    if verbose:
+                        print(" failed")
+                    raise
+                if verbose:
+                    print(".", end="", flush=True)
+    # Save to cache.
+    if cache:
+        safe_name = re.sub(r"[^0-9a-zA-Z-._]", "_", url_name)
+        cache_file = os.path.join(cache_dir, url_md5 + "_" + safe_name)
+        temp_file = os.path.join(cache_dir, "tmp_" + uuid.uuid4().hex + "_" + url_md5 + "_" + safe_name)
+        os.makedirs(cache_dir, exist_ok=True)
+        with open(temp_file, "wb") as f:
+            f.write(url_data)
+        os.replace(temp_file, cache_file) # atomic
+        if return_filename:
+            return cache_file
+    # Return data as file object.
+    assert not return_filename
+    return io.BytesIO(url_data)

legacy.py ADDED Viewed

	@@ -0,0 +1,384 @@

+# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import click
+import pickle
+import re
+import copy
+import numpy as np
+import torch
+import dnnlib
+from torch_utils import misc
+#----------------------------------------------------------------------------
+# !!! custom
+def load_network_pkl(f, force_fp16=False, custom=False, **ex_kwargs):
+# def load_network_pkl(f, force_fp16=False):
+    data = _LegacyUnpickler(f).load()
+    # data = pickle.load(f, encoding='latin1')
+    # Legacy TensorFlow pickle => convert.
+    if isinstance(data, tuple) and len(data) == 3 and all(isinstance(net, _TFNetworkStub) for net in data):
+        tf_G, tf_D, tf_Gs = data
+        G = convert_tf_generator(tf_G, custom=custom, **ex_kwargs)
+        D = convert_tf_discriminator(tf_D)
+        G_ema = convert_tf_generator(tf_Gs, custom=custom, **ex_kwargs)
+        data = dict(G=G, D=D, G_ema=G_ema)
+# !!! custom
+        assert isinstance(data['G'], torch.nn.Module)
+        assert isinstance(data['D'], torch.nn.Module)
+        nets = ['G', 'D', 'G_ema']
+    elif isinstance(data, _TFNetworkStub):
+        G_ema = convert_tf_generator(data, custom=custom, **ex_kwargs)
+        data = dict(G_ema=G_ema)
+        nets = ['G_ema']
+    else:
+# !!! custom
+        if custom is True:
+            G_ema = custom_generator(data, **ex_kwargs)
+            data = dict(G_ema=G_ema)
+            nets = ['G_ema']
+        else:
+            nets = []
+            for name in ['G', 'D', 'G_ema']:
+                if name in data.keys():
+                    nets.append(name)
+        # print(nets)
+    # Add missing fields.
+    if 'training_set_kwargs' not in data:
+        data['training_set_kwargs'] = None
+    if 'augment_pipe' not in data:
+        data['augment_pipe'] = None
+    # Validate contents.
+    assert isinstance(data['G_ema'], torch.nn.Module)
+    assert isinstance(data['training_set_kwargs'], (dict, type(None)))
+    assert isinstance(data['augment_pipe'], (torch.nn.Module, type(None)))
+    # Force FP16.
+    if force_fp16:
+        for key in nets: # !!! custom
+            old = data[key]
+            kwargs = copy.deepcopy(old.init_kwargs)
+            if key.startswith('G'):
+                kwargs.synthesis_kwargs = dnnlib.EasyDict(kwargs.get('synthesis_kwargs', {}))
+                kwargs.synthesis_kwargs.num_fp16_res = 4
+                kwargs.synthesis_kwargs.conv_clamp = 256
+            if key.startswith('D'):
+                kwargs.num_fp16_res = 4
+                kwargs.conv_clamp = 256
+            if kwargs != old.init_kwargs:
+                new = type(old)(**kwargs).eval().requires_grad_(False)
+                misc.copy_params_and_buffers(old, new, require_all=True)
+                data[key] = new
+    return data
+#----------------------------------------------------------------------------
+class _TFNetworkStub(dnnlib.EasyDict):
+    pass
+class _LegacyUnpickler(pickle.Unpickler):
+    def find_class(self, module, name):
+        if module == 'dnnlib.tflib.network' and name == 'Network':
+            return _TFNetworkStub
+        return super().find_class(module, name)
+#----------------------------------------------------------------------------
+def _collect_tf_params(tf_net):
+    # pylint: disable=protected-access
+    tf_params = dict()
+    def recurse(prefix, tf_net):
+        for name, value in tf_net.variables:
+            tf_params[prefix + name] = value
+        for name, comp in tf_net.components.items():
+            recurse(prefix + name + '/', comp)
+    recurse('', tf_net)
+    return tf_params
+#----------------------------------------------------------------------------
+def _populate_module_params(module, *patterns):
+    for name, tensor in misc.named_params_and_buffers(module):
+        found = False
+        value = None
+        for pattern, value_fn in zip(patterns[0::2], patterns[1::2]):
+            match = re.fullmatch(pattern, name)
+            if match:
+                found = True
+                if value_fn is not None:
+                    value = value_fn(*match.groups())
+                break
+        try:
+            assert found
+            if value is not None:
+                tensor.copy_(torch.from_numpy(np.array(value)))
+        except:
+            print(name, list(tensor.shape))
+            raise
+#----------------------------------------------------------------------------
+# !!! custom
+def custom_generator(data, **ex_kwargs):
+    from training import stylegan2_multi as networks
+    try: # saved? (with new fix)
+        fmap_base = data['G_ema'].synthesis.fmap_base
+    except: # default from original configs
+        fmap_base = 32768 if data['G_ema'].img_resolution >= 512 else 16384
+    kwargs = dnnlib.EasyDict(
+        z_dim           = data['G_ema'].z_dim,
+        c_dim           = data['G_ema'].c_dim,
+        w_dim           = data['G_ema'].w_dim,
+        img_resolution  = data['G_ema'].img_resolution,
+        img_channels    = data['G_ema'].img_channels,
+        init_res        = [4,4], # hacky
+        mapping_kwargs  = dnnlib.EasyDict(num_layers = data['G_ema'].mapping.num_layers),
+        synthesis_kwargs = dnnlib.EasyDict(channel_base = fmap_base, **ex_kwargs),
+    )
+    G_out = networks.Generator(**kwargs).eval().requires_grad_(False)
+    misc.copy_params_and_buffers(data['G_ema'], G_out, require_all=False)
+    return G_out
+# !!! custom
+def convert_tf_generator(tf_G, custom=False, **ex_kwargs):
+# def convert_tf_generator(tf_G):
+    if tf_G.version < 4:
+        raise ValueError('TensorFlow pickle version too low')
+    # Collect kwargs.
+    tf_kwargs = tf_G.static_kwargs
+    known_kwargs = set()
+    def kwarg(tf_name, default=None, none=None):
+        known_kwargs.add(tf_name)
+        val = tf_kwargs.get(tf_name, default)
+        return val if val is not None else none
+    # Convert kwargs.
+    kwargs = dnnlib.EasyDict(
+        z_dim                   = kwarg('latent_size',          512),
+        c_dim                   = kwarg('label_size',           0),
+        w_dim                   = kwarg('dlatent_size',         512),
+        img_resolution          = kwarg('resolution',           1024),
+        img_channels            = kwarg('num_channels',         3),
+        mapping_kwargs = dnnlib.EasyDict(
+            num_layers          = kwarg('mapping_layers',       8),
+            embed_features      = kwarg('label_fmaps',          None),
+            layer_features      = kwarg('mapping_fmaps',        None),
+            activation          = kwarg('mapping_nonlinearity', 'lrelu'),
+            lr_multiplier       = kwarg('mapping_lrmul',        0.01),
+            w_avg_beta          = kwarg('w_avg_beta',           0.995,  none=1),
+        ),
+        synthesis_kwargs = dnnlib.EasyDict(
+            channel_base        = kwarg('fmap_base',            16384) * 2,
+            channel_max         = kwarg('fmap_max',             512),
+            num_fp16_res        = kwarg('num_fp16_res',         0),
+            conv_clamp          = kwarg('conv_clamp',           None),
+            architecture        = kwarg('architecture',         'skip'),
+            resample_filter     = kwarg('resample_kernel',      [1,3,3,1]),
+            use_noise           = kwarg('use_noise',            True),
+            activation          = kwarg('nonlinearity',         'lrelu'),
+        ),
+# !!! custom
+        # init_res                = kwarg('init_res',            [4,4]),
+    )
+    # Check for unknown kwargs.
+    kwarg('truncation_psi')
+    kwarg('truncation_cutoff')
+    kwarg('style_mixing_prob')
+    kwarg('structure')
+    unknown_kwargs = list(set(tf_kwargs.keys()) - known_kwargs)
+# !!! custom
+    if custom:
+        kwargs.init_res = [4,4]
+        kwargs.synthesis_kwargs = dnnlib.EasyDict(**kwargs.synthesis_kwargs, **ex_kwargs)
+    if len(unknown_kwargs) > 0:
+        print('Unknown TensorFlow data! This may result in problems with your converted model.')
+        print(unknown_kwargs)
+        #raise ValueError('Unknown TensorFlow kwargs:', unknown_kwargs)
+        # raise ValueError('Unknown TensorFlow kwarg', unknown_kwargs[0])
+    # try:
+        # if ex_kwargs['verbose'] is True: print(kwargs.synthesis_kwargs)
+    # except: pass
+    # Collect params.
+    tf_params = _collect_tf_params(tf_G)
+    for name, value in list(tf_params.items()):
+        match = re.fullmatch(r'ToRGB_lod(\d+)/(.*)', name)
+        if match:
+            r = kwargs.img_resolution // (2 ** int(match.group(1)))
+            tf_params[f'{r}x{r}/ToRGB/{match.group(2)}'] = value
+            kwargs.synthesis.kwargs.architecture = 'orig'
+    #for name, value in tf_params.items(): print(f'{name:<50s}{list(value.shape)}')
+    # Convert params.
+    if custom:
+        from training import stylegan2_multi as networks
+    else:
+        from training import networks
+    G = networks.Generator(**kwargs).eval().requires_grad_(False)
+    # pylint: disable=unnecessary-lambda
+    _populate_module_params(G,
+        r'mapping\.w_avg',                                  lambda:     tf_params[f'dlatent_avg'],
+        r'mapping\.embed\.weight',                          lambda:     tf_params[f'mapping/LabelEmbed/weight'].transpose(),
+        r'mapping\.embed\.bias',                            lambda:     tf_params[f'mapping/LabelEmbed/bias'],
+        r'mapping\.fc(\d+)\.weight',                        lambda i:   tf_params[f'mapping/Dense{i}/weight'].transpose(),
+        r'mapping\.fc(\d+)\.bias',                          lambda i:   tf_params[f'mapping/Dense{i}/bias'],
+        r'synthesis\.b4\.const',                            lambda:     tf_params[f'synthesis/4x4/Const/const'][0],
+        r'synthesis\.b4\.conv1\.weight',                    lambda:     tf_params[f'synthesis/4x4/Conv/weight'].transpose(3, 2, 0, 1),
+        r'synthesis\.b4\.conv1\.bias',                      lambda:     tf_params[f'synthesis/4x4/Conv/bias'],
+        r'synthesis\.b4\.conv1\.noise_const',               lambda:     tf_params[f'synthesis/noise0'][0, 0],
+        r'synthesis\.b4\.conv1\.noise_strength',            lambda:     tf_params[f'synthesis/4x4/Conv/noise_strength'],
+        r'synthesis\.b4\.conv1\.affine\.weight',            lambda:     tf_params[f'synthesis/4x4/Conv/mod_weight'].transpose(),
+        r'synthesis\.b4\.conv1\.affine\.bias',              lambda:     tf_params[f'synthesis/4x4/Conv/mod_bias'] + 1,
+        r'synthesis\.b(\d+)\.conv0\.weight',                lambda r:   tf_params[f'synthesis/{r}x{r}/Conv0_up/weight'][::-1, ::-1].transpose(3, 2, 0, 1),
+        r'synthesis\.b(\d+)\.conv0\.bias',                  lambda r:   tf_params[f'synthesis/{r}x{r}/Conv0_up/bias'],
+        r'synthesis\.b(\d+)\.conv0\.noise_const',           lambda r:   tf_params[f'synthesis/noise{int(np.log2(int(r)))*2-5}'][0, 0],
+        r'synthesis\.b(\d+)\.conv0\.noise_strength',        lambda r:   tf_params[f'synthesis/{r}x{r}/Conv0_up/noise_strength'],
+        r'synthesis\.b(\d+)\.conv0\.affine\.weight',        lambda r:   tf_params[f'synthesis/{r}x{r}/Conv0_up/mod_weight'].transpose(),
+        r'synthesis\.b(\d+)\.conv0\.affine\.bias',          lambda r:   tf_params[f'synthesis/{r}x{r}/Conv0_up/mod_bias'] + 1,
+        r'synthesis\.b(\d+)\.conv1\.weight',                lambda r:   tf_params[f'synthesis/{r}x{r}/Conv1/weight'].transpose(3, 2, 0, 1),
+        r'synthesis\.b(\d+)\.conv1\.bias',                  lambda r:   tf_params[f'synthesis/{r}x{r}/Conv1/bias'],
+        r'synthesis\.b(\d+)\.conv1\.noise_const',           lambda r:   tf_params[f'synthesis/noise{int(np.log2(int(r)))*2-4}'][0, 0],
+        r'synthesis\.b(\d+)\.conv1\.noise_strength',        lambda r:   tf_params[f'synthesis/{r}x{r}/Conv1/noise_strength'],
+        r'synthesis\.b(\d+)\.conv1\.affine\.weight',        lambda r:   tf_params[f'synthesis/{r}x{r}/Conv1/mod_weight'].transpose(),
+        r'synthesis\.b(\d+)\.conv1\.affine\.bias',          lambda r:   tf_params[f'synthesis/{r}x{r}/Conv1/mod_bias'] + 1,
+        r'synthesis\.b(\d+)\.torgb\.weight',                lambda r:   tf_params[f'synthesis/{r}x{r}/ToRGB/weight'].transpose(3, 2, 0, 1),
+        r'synthesis\.b(\d+)\.torgb\.bias',                  lambda r:   tf_params[f'synthesis/{r}x{r}/ToRGB/bias'],
+        r'synthesis\.b(\d+)\.torgb\.affine\.weight',        lambda r:   tf_params[f'synthesis/{r}x{r}/ToRGB/mod_weight'].transpose(),
+        r'synthesis\.b(\d+)\.torgb\.affine\.bias',          lambda r:   tf_params[f'synthesis/{r}x{r}/ToRGB/mod_bias'] + 1,
+        r'synthesis\.b(\d+)\.skip\.weight',                 lambda r:   tf_params[f'synthesis/{r}x{r}/Skip/weight'][::-1, ::-1].transpose(3, 2, 0, 1),
+        r'.*\.resample_filter',                             None,
+    )
+    return G
+#----------------------------------------------------------------------------
+def convert_tf_discriminator(tf_D):
+    if tf_D.version < 4:
+        raise ValueError('TensorFlow pickle version too low')
+    # Collect kwargs.
+    tf_kwargs = tf_D.static_kwargs
+    known_kwargs = set()
+    def kwarg(tf_name, default=None):
+        known_kwargs.add(tf_name)
+        return tf_kwargs.get(tf_name, default)
+    # Convert kwargs.
+    kwargs = dnnlib.EasyDict(
+        c_dim                   = kwarg('label_size',           0),
+        img_resolution          = kwarg('resolution',           1024),
+        img_channels            = kwarg('num_channels',         3),
+        architecture            = kwarg('architecture',         'resnet'),
+        channel_base            = kwarg('fmap_base',            16384) * 2,
+        channel_max             = kwarg('fmap_max',             512),
+        num_fp16_res            = kwarg('num_fp16_res',         0),
+        conv_clamp              = kwarg('conv_clamp',           None),
+        cmap_dim                = kwarg('mapping_fmaps',        None),
+        block_kwargs = dnnlib.EasyDict(
+            activation          = kwarg('nonlinearity',         'lrelu'),
+            resample_filter     = kwarg('resample_kernel',      [1,3,3,1]),
+            freeze_layers       = kwarg('freeze_layers',        0),
+        ),
+        mapping_kwargs = dnnlib.EasyDict(
+            num_layers          = kwarg('mapping_layers',       0),
+            embed_features      = kwarg('mapping_fmaps',        None),
+            layer_features      = kwarg('mapping_fmaps',        None),
+            activation          = kwarg('nonlinearity',         'lrelu'),
+            lr_multiplier       = kwarg('mapping_lrmul',        0.1),
+        ),
+        epilogue_kwargs = dnnlib.EasyDict(
+            mbstd_group_size    = kwarg('mbstd_group_size',     None),
+            mbstd_num_channels  = kwarg('mbstd_num_features',   1),
+            activation          = kwarg('nonlinearity',         'lrelu'),
+        ),
+# !!! custom
+        # init_res                = kwarg('init_res',            [4,4]),
+    )
+    # Check for unknown kwargs.
+    kwarg('structure')
+    unknown_kwargs = list(set(tf_kwargs.keys()) - known_kwargs)
+    if len(unknown_kwargs) > 0:
+        print('Unknown TensorFlow data! This may result in problems with your converted model.')
+        print(unknown_kwargs)
+        # originally this repo threw errors:
+        # raise ValueError('Unknown TensorFlow kwarg', unknown_kwargs[0])
+    # Collect params.
+    tf_params = _collect_tf_params(tf_D)
+    for name, value in list(tf_params.items()):
+        match = re.fullmatch(r'FromRGB_lod(\d+)/(.*)', name)
+        if match:
+            r = kwargs.img_resolution // (2 ** int(match.group(1)))
+            tf_params[f'{r}x{r}/FromRGB/{match.group(2)}'] = value
+            kwargs.architecture = 'orig'
+    #for name, value in tf_params.items(): print(f'{name:<50s}{list(value.shape)}')
+    # Convert params.
+    from training import networks
+    D = networks.Discriminator(**kwargs).eval().requires_grad_(False)
+    # pylint: disable=unnecessary-lambda
+    _populate_module_params(D,
+        r'b(\d+)\.fromrgb\.weight',     lambda r:       tf_params[f'{r}x{r}/FromRGB/weight'].transpose(3, 2, 0, 1),
+        r'b(\d+)\.fromrgb\.bias',       lambda r:       tf_params[f'{r}x{r}/FromRGB/bias'],
+        r'b(\d+)\.conv(\d+)\.weight',   lambda r, i:    tf_params[f'{r}x{r}/Conv{i}{["","_down"][int(i)]}/weight'].transpose(3, 2, 0, 1),
+        r'b(\d+)\.conv(\d+)\.bias',     lambda r, i:    tf_params[f'{r}x{r}/Conv{i}{["","_down"][int(i)]}/bias'],
+        r'b(\d+)\.skip\.weight',        lambda r:       tf_params[f'{r}x{r}/Skip/weight'].transpose(3, 2, 0, 1),
+        r'mapping\.embed\.weight',      lambda:         tf_params[f'LabelEmbed/weight'].transpose(),
+        r'mapping\.embed\.bias',        lambda:         tf_params[f'LabelEmbed/bias'],
+        r'mapping\.fc(\d+)\.weight',    lambda i:       tf_params[f'Mapping{i}/weight'].transpose(),
+        r'mapping\.fc(\d+)\.bias',      lambda i:       tf_params[f'Mapping{i}/bias'],
+        r'b4\.conv\.weight',            lambda:         tf_params[f'4x4/Conv/weight'].transpose(3, 2, 0, 1),
+        r'b4\.conv\.bias',              lambda:         tf_params[f'4x4/Conv/bias'],
+        r'b4\.fc\.weight',              lambda:         tf_params[f'4x4/Dense0/weight'].transpose(),
+        r'b4\.fc\.bias',                lambda:         tf_params[f'4x4/Dense0/bias'],
+        r'b4\.out\.weight',             lambda:         tf_params[f'Output/weight'].transpose(),
+        r'b4\.out\.bias',               lambda:         tf_params[f'Output/bias'],
+        r'.*\.resample_filter',         None,
+    )
+    return D
+#----------------------------------------------------------------------------
+@click.command()
+@click.option('--source', help='Input pickle', required=True, metavar='PATH')
+@click.option('--dest', help='Output pickle', required=True, metavar='PATH')
+@click.option('--force-fp16', help='Force the networks to use FP16', type=bool, default=False, metavar='BOOL', show_default=True)
+def convert_network_pickle(source, dest, force_fp16):
+    """Convert legacy network pickle into the native PyTorch format.
+    The tool is able to load the main network configurations exported using the TensorFlow version of StyleGAN2 or StyleGAN2-ADA.
+    It does not support e.g. StyleGAN2-ADA comparison methods, StyleGAN2 configs A-D, or StyleGAN1 networks.
+    Example:
+    \b
+    python legacy.py \\
+        --source=https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/stylegan2-cat-config-f.pkl \\
+        --dest=stylegan2-cat-config-f.pkl
+    """
+    print(f'Loading "{source}"...')
+    with dnnlib.util.open_url(source) as f:
+        data = load_network_pkl(f, force_fp16=force_fp16)
+    print(f'Saving "{dest}"...')
+    with open(dest, 'wb') as f:
+        pickle.dump(data, f)
+    print('Done.')
+#----------------------------------------------------------------------------
+if __name__ == "__main__":
+    convert_network_pickle() # pylint: disable=no-value-for-parameter
+#----------------------------------------------------------------------------

model_build.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import os
+import glob
+import numpy as np
+from numpy import linalg
+import PIL.Image as Image
+import torch
+from torchvision import transforms
+from tqdm import tqdm
+from argparse import Namespace
+import easydict
+import legacy
+import dnnlib
+from opensimplex import OpenSimplex
+from configs import data_configs
+from models.psp import pSp
+def build_stylegan2(
+    increment = 0.01,
+    network_pkl = 'pretrained/furry.pkl',
+    process = 'image',                 #['image', 'interpolation','truncation','interpolation-truncation']
+    random_seed = 0,
+    diameter = 100.0,
+    scale_type = 'pad',               #['pad', 'padside', 'symm','symmside']
+    size = [512, 512],
+    seeds =  [0],
+    space = 'z',                    #['z', 'w']
+    fps = 24,
+    frames = 240,
+    noise_mode = 'none',     #['const', 'random', 'none']
+    outdir = 'path',
+    projected_w = 'path',
+    easing = 'linear',
+    device = 'cpu'
+    ):
+    G_kwargs = dnnlib.EasyDict()
+    G_kwargs.size = size
+    G_kwargs.scale_type = scale_type
+    device = torch.device(device)
+    with dnnlib.util.open_url(network_pkl) as f:
+        # G = legacy.load_network_pkl(f)['G_ema'].to(device) # type: ignore
+        G = legacy.load_network_pkl(f, custom=True, **G_kwargs)['G_ema'].to(device) # type: ignore
+    return G.synthesis
+def build_psp():
+    test_opts = easydict.EasyDict({
+        # arguments for inference script
+        'checkpoint_path' : 'pretrained/psp.pt',
+        'couple_outputs' : False,
+        'resize_outputs' : False,
+        'test_batch_size' : 1,
+        'test_workers' : 1,
+        # arguments for style-mixing script
+        'n_images' : None,
+        'n_outputs_to_generate' : 5,
+        'mix_alpha' : None,
+        'latent_mask' : None,
+        # arguments for super-resolution
+        'resize_factors' : None,
+    })
+    # update test options with options used during training
+    ckpt = torch.load(test_opts.checkpoint_path, map_location='cpu')
+    opts = ckpt['opts']
+    opts.update(vars(test_opts))
+    if 'learn_in_w' not in opts:
+        opts['learn_in_w'] = False
+    opts = Namespace(**opts)
+    opts.device = 'cpu'
+    net = pSp(opts)
+    net.eval()
+    return net
+def img_preprocess(img, transform):
+    if (img.mode == 'RGBA') or (img.mode == 'P'):
+        img.load()
+        background = Image.new("RGB", img.size, (255, 255, 255))
+        background.paste(img, mask=img.split()[3]) # 3 is the alpha channel
+        img = background
+    assert img.mode == 'RGB'
+    img = transform(img)
+    img = img.unsqueeze(dim=0)
+    return img

models/__init__.py ADDED Viewed

File without changes

models/encoders/__init__.py ADDED Viewed

File without changes

models/encoders/helpers.py ADDED Viewed

	@@ -0,0 +1,119 @@

+from collections import namedtuple
+import torch
+from torch.nn import Conv2d, BatchNorm2d, PReLU, ReLU, Sigmoid, MaxPool2d, AdaptiveAvgPool2d, Sequential, Module
+"""
+ArcFace implementation from [TreB1eN](https://github.com/TreB1eN/InsightFace_Pytorch)
+"""
+class Flatten(Module):
+	def forward(self, input):
+		return input.view(input.size(0), -1)
+def l2_norm(input, axis=1):
+	norm = torch.norm(input, 2, axis, True)
+	output = torch.div(input, norm)
+	return output
+class Bottleneck(namedtuple('Block', ['in_channel', 'depth', 'stride'])):
+	""" A named tuple describing a ResNet block. """
+def get_block(in_channel, depth, num_units, stride=2):
+	return [Bottleneck(in_channel, depth, stride)] + [Bottleneck(depth, depth, 1) for i in range(num_units - 1)]
+def get_blocks(num_layers):
+	if num_layers == 50:
+		blocks = [
+			get_block(in_channel=64, depth=64, num_units=3),
+			get_block(in_channel=64, depth=128, num_units=4),
+			get_block(in_channel=128, depth=256, num_units=14),
+			get_block(in_channel=256, depth=512, num_units=3)
+		]
+	elif num_layers == 100:
+		blocks = [
+			get_block(in_channel=64, depth=64, num_units=3),
+			get_block(in_channel=64, depth=128, num_units=13),
+			get_block(in_channel=128, depth=256, num_units=30),
+			get_block(in_channel=256, depth=512, num_units=3)
+		]
+	elif num_layers == 152:
+		blocks = [
+			get_block(in_channel=64, depth=64, num_units=3),
+			get_block(in_channel=64, depth=128, num_units=8),
+			get_block(in_channel=128, depth=256, num_units=36),
+			get_block(in_channel=256, depth=512, num_units=3)
+		]
+	else:
+		raise ValueError("Invalid number of layers: {}. Must be one of [50, 100, 152]".format(num_layers))
+	return blocks
+class SEModule(Module):
+	def __init__(self, channels, reduction):
+		super(SEModule, self).__init__()
+		self.avg_pool = AdaptiveAvgPool2d(1)
+		self.fc1 = Conv2d(channels, channels // reduction, kernel_size=1, padding=0, bias=False)
+		self.relu = ReLU(inplace=True)
+		self.fc2 = Conv2d(channels // reduction, channels, kernel_size=1, padding=0, bias=False)
+		self.sigmoid = Sigmoid()
+	def forward(self, x):
+		module_input = x
+		x = self.avg_pool(x)
+		x = self.fc1(x)
+		x = self.relu(x)
+		x = self.fc2(x)
+		x = self.sigmoid(x)
+		return module_input * x
+class bottleneck_IR(Module):
+	def __init__(self, in_channel, depth, stride):
+		super(bottleneck_IR, self).__init__()
+		if in_channel == depth:
+			self.shortcut_layer = MaxPool2d(1, stride)
+		else:
+			self.shortcut_layer = Sequential(
+				Conv2d(in_channel, depth, (1, 1), stride, bias=False),
+				BatchNorm2d(depth)
+			)
+		self.res_layer = Sequential(
+			BatchNorm2d(in_channel),
+			Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False), PReLU(depth),
+			Conv2d(depth, depth, (3, 3), stride, 1, bias=False), BatchNorm2d(depth)
+		)
+	def forward(self, x):
+		shortcut = self.shortcut_layer(x)
+		res = self.res_layer(x)
+		return res + shortcut
+class bottleneck_IR_SE(Module):
+	def __init__(self, in_channel, depth, stride):
+		super(bottleneck_IR_SE, self).__init__()
+		if in_channel == depth:
+			self.shortcut_layer = MaxPool2d(1, stride)
+		else:
+			self.shortcut_layer = Sequential(
+				Conv2d(in_channel, depth, (1, 1), stride, bias=False),
+				BatchNorm2d(depth)
+			)
+		self.res_layer = Sequential(
+			BatchNorm2d(in_channel),
+			Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False),
+			PReLU(depth),
+			Conv2d(depth, depth, (3, 3), stride, 1, bias=False),
+			BatchNorm2d(depth),
+			SEModule(depth, 16)
+		)
+	def forward(self, x):
+		shortcut = self.shortcut_layer(x)
+		res = self.res_layer(x)
+		return res + shortcut

models/encoders/model_irse.py ADDED Viewed

	@@ -0,0 +1,84 @@

+from torch.nn import Linear, Conv2d, BatchNorm1d, BatchNorm2d, PReLU, Dropout, Sequential, Module
+from models.encoders.helpers import get_blocks, Flatten, bottleneck_IR, bottleneck_IR_SE, l2_norm
+"""
+Modified Backbone implementation from [TreB1eN](https://github.com/TreB1eN/InsightFace_Pytorch)
+"""
+class Backbone(Module):
+	def __init__(self, input_size, num_layers, mode='ir', drop_ratio=0.4, affine=True):
+		super(Backbone, self).__init__()
+		assert input_size in [112, 224], "input_size should be 112 or 224"
+		assert num_layers in [50, 100, 152], "num_layers should be 50, 100 or 152"
+		assert mode in ['ir', 'ir_se'], "mode should be ir or ir_se"
+		blocks = get_blocks(num_layers)
+		if mode == 'ir':
+			unit_module = bottleneck_IR
+		elif mode == 'ir_se':
+			unit_module = bottleneck_IR_SE
+		self.input_layer = Sequential(Conv2d(3, 64, (3, 3), 1, 1, bias=False),
+									  BatchNorm2d(64),
+									  PReLU(64))
+		if input_size == 112:
+			self.output_layer = Sequential(BatchNorm2d(512),
+			                               Dropout(drop_ratio),
+			                               Flatten(),
+			                               Linear(512 * 7 * 7, 512),
+			                               BatchNorm1d(512, affine=affine))
+		else:
+			self.output_layer = Sequential(BatchNorm2d(512),
+			                               Dropout(drop_ratio),
+			                               Flatten(),
+			                               Linear(512 * 14 * 14, 512),
+			                               BatchNorm1d(512, affine=affine))
+		modules = []
+		for block in blocks:
+			for bottleneck in block:
+				modules.append(unit_module(bottleneck.in_channel,
+										   bottleneck.depth,
+										   bottleneck.stride))
+		self.body = Sequential(*modules)
+	def forward(self, x):
+		x = self.input_layer(x)
+		x = self.body(x)
+		x = self.output_layer(x)
+		return l2_norm(x)
+def IR_50(input_size):
+	"""Constructs a ir-50 model."""
+	model = Backbone(input_size, num_layers=50, mode='ir', drop_ratio=0.4, affine=False)
+	return model
+def IR_101(input_size):
+	"""Constructs a ir-101 model."""
+	model = Backbone(input_size, num_layers=100, mode='ir', drop_ratio=0.4, affine=False)
+	return model
+def IR_152(input_size):
+	"""Constructs a ir-152 model."""
+	model = Backbone(input_size, num_layers=152, mode='ir', drop_ratio=0.4, affine=False)
+	return model
+def IR_SE_50(input_size):
+	"""Constructs a ir_se-50 model."""
+	model = Backbone(input_size, num_layers=50, mode='ir_se', drop_ratio=0.4, affine=False)
+	return model
+def IR_SE_101(input_size):
+	"""Constructs a ir_se-101 model."""
+	model = Backbone(input_size, num_layers=100, mode='ir_se', drop_ratio=0.4, affine=False)
+	return model
+def IR_SE_152(input_size):
+	"""Constructs a ir_se-152 model."""
+	model = Backbone(input_size, num_layers=152, mode='ir_se', drop_ratio=0.4, affine=False)
+	return model

models/encoders/psp_encoders.py ADDED Viewed

	@@ -0,0 +1,186 @@

+import numpy as np
+import torch
+import torch.nn.functional as F
+from torch import nn
+from torch.nn import Linear, Conv2d, BatchNorm2d, PReLU, Sequential, Module
+from models.encoders.helpers import get_blocks, Flatten, bottleneck_IR, bottleneck_IR_SE
+from models.stylegan2.model import EqualLinear
+class GradualStyleBlock(Module):
+    def __init__(self, in_c, out_c, spatial):
+        super(GradualStyleBlock, self).__init__()
+        self.out_c = out_c
+        self.spatial = spatial
+        num_pools = int(np.log2(spatial))
+        modules = []
+        modules += [Conv2d(in_c, out_c, kernel_size=3, stride=2, padding=1),
+                    nn.LeakyReLU()]
+        for i in range(num_pools - 1):
+            modules += [
+                Conv2d(out_c, out_c, kernel_size=3, stride=2, padding=1),
+                nn.LeakyReLU()
+            ]
+        self.convs = nn.Sequential(*modules)
+        self.linear = EqualLinear(out_c, out_c, lr_mul=1)
+    def forward(self, x):
+        x = self.convs(x)
+        x = x.view(-1, self.out_c)
+        x = self.linear(x)
+        return x
+class GradualStyleEncoder(Module):
+    def __init__(self, num_layers, mode='ir', opts=None):
+        super(GradualStyleEncoder, self).__init__()
+        assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152'
+        assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se'
+        blocks = get_blocks(num_layers)
+        if mode == 'ir':
+            unit_module = bottleneck_IR
+        elif mode == 'ir_se':
+            unit_module = bottleneck_IR_SE
+        self.input_layer = Sequential(Conv2d(opts.input_nc, 64, (3, 3), 1, 1, bias=False),
+                                      BatchNorm2d(64),
+                                      PReLU(64))
+        modules = []
+        for block in blocks:
+            for bottleneck in block:
+                modules.append(unit_module(bottleneck.in_channel,
+                                           bottleneck.depth,
+                                           bottleneck.stride))
+        self.body = Sequential(*modules)
+        self.styles = nn.ModuleList()
+        self.style_count = opts.n_styles
+        self.coarse_ind = 3
+        self.middle_ind = 7
+        for i in range(self.style_count):
+            if i < self.coarse_ind:
+                style = GradualStyleBlock(512, 512, 16)
+            elif i < self.middle_ind:
+                style = GradualStyleBlock(512, 512, 32)
+            else:
+                style = GradualStyleBlock(512, 512, 64)
+            self.styles.append(style)
+        self.latlayer1 = nn.Conv2d(256, 512, kernel_size=1, stride=1, padding=0)
+        self.latlayer2 = nn.Conv2d(128, 512, kernel_size=1, stride=1, padding=0)
+    def _upsample_add(self, x, y):
+        '''Upsample and add two feature maps.
+        Args:
+          x: (Variable) top feature map to be upsampled.
+          y: (Variable) lateral feature map.
+        Returns:
+          (Variable) added feature map.
+        Note in PyTorch, when input size is odd, the upsampled feature map
+        with `F.upsample(..., scale_factor=2, mode='nearest')`
+        maybe not equal to the lateral feature map size.
+        e.g.
+        original input size: [N,_,15,15] ->
+        conv2d feature map size: [N,_,8,8] ->
+        upsampled feature map size: [N,_,16,16]
+        So we choose bilinear upsample which supports arbitrary output sizes.
+        '''
+        _, _, H, W = y.size()
+        return F.interpolate(x, size=(H, W), mode='bilinear', align_corners=True) + y
+    def forward(self, x):
+        x = self.input_layer(x)
+        latents = []
+        modulelist = list(self.body._modules.values())
+        for i, l in enumerate(modulelist):
+            x = l(x)
+            if i == 6:
+                c1 = x
+            elif i == 20:
+                c2 = x
+            elif i == 23:
+                c3 = x
+        for j in range(self.coarse_ind):
+            latents.append(self.styles[j](c3))
+        p2 = self._upsample_add(c3, self.latlayer1(c2))
+        for j in range(self.coarse_ind, self.middle_ind):
+            latents.append(self.styles[j](p2))
+        p1 = self._upsample_add(p2, self.latlayer2(c1))
+        for j in range(self.middle_ind, self.style_count):
+            latents.append(self.styles[j](p1))
+        out = torch.stack(latents, dim=1)
+        return out
+class BackboneEncoderUsingLastLayerIntoW(Module):
+    def __init__(self, num_layers, mode='ir', opts=None):
+        super(BackboneEncoderUsingLastLayerIntoW, self).__init__()
+        print('Using BackboneEncoderUsingLastLayerIntoW')
+        assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152'
+        assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se'
+        blocks = get_blocks(num_layers)
+        if mode == 'ir':
+            unit_module = bottleneck_IR
+        elif mode == 'ir_se':
+            unit_module = bottleneck_IR_SE
+        self.input_layer = Sequential(Conv2d(opts.input_nc, 64, (3, 3), 1, 1, bias=False),
+                                      BatchNorm2d(64),
+                                      PReLU(64))
+        self.output_pool = torch.nn.AdaptiveAvgPool2d((1, 1))
+        self.linear = EqualLinear(512, 512, lr_mul=1)
+        modules = []
+        for block in blocks:
+            for bottleneck in block:
+                modules.append(unit_module(bottleneck.in_channel,
+                                           bottleneck.depth,
+                                           bottleneck.stride))
+        self.body = Sequential(*modules)
+    def forward(self, x):
+        x = self.input_layer(x)
+        x = self.body(x)
+        x = self.output_pool(x)
+        x = x.view(-1, 512)
+        x = self.linear(x)
+        return x
+class BackboneEncoderUsingLastLayerIntoWPlus(Module):
+    def __init__(self, num_layers, mode='ir', opts=None):
+        super(BackboneEncoderUsingLastLayerIntoWPlus, self).__init__()
+        print('Using BackboneEncoderUsingLastLayerIntoWPlus')
+        assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152'
+        assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se'
+        blocks = get_blocks(num_layers)
+        if mode == 'ir':
+            unit_module = bottleneck_IR
+        elif mode == 'ir_se':
+            unit_module = bottleneck_IR_SE
+        self.n_styles = opts.n_styles
+        self.input_layer = Sequential(Conv2d(opts.input_nc, 64, (3, 3), 1, 1, bias=False),
+                                      BatchNorm2d(64),
+                                      PReLU(64))
+        self.output_layer_2 = Sequential(BatchNorm2d(512),
+                                         torch.nn.AdaptiveAvgPool2d((7, 7)),
+                                         Flatten(),
+                                         Linear(512 * 7 * 7, 512))
+        self.linear = EqualLinear(512, 512 * self.n_styles, lr_mul=1)
+        modules = []
+        for block in blocks:
+            for bottleneck in block:
+                modules.append(unit_module(bottleneck.in_channel,
+                                           bottleneck.depth,
+                                           bottleneck.stride))
+        self.body = Sequential(*modules)
+    def forward(self, x):
+        x = self.input_layer(x)
+        x = self.body(x)
+        x = self.output_layer_2(x)
+        x = self.linear(x)
+        x = x.view(-1, self.n_styles, 512)
+        return x

models/mtcnn/__init__.py ADDED Viewed

File without changes

models/mtcnn/mtcnn.py ADDED Viewed

	@@ -0,0 +1,156 @@

+import numpy as np
+import torch
+from PIL import Image
+from models.mtcnn.mtcnn_pytorch.src.get_nets import PNet, RNet, ONet
+from models.mtcnn.mtcnn_pytorch.src.box_utils import nms, calibrate_box, get_image_boxes, convert_to_square
+from models.mtcnn.mtcnn_pytorch.src.first_stage import run_first_stage
+from models.mtcnn.mtcnn_pytorch.src.align_trans import get_reference_facial_points, warp_and_crop_face
+device = 'cuda:0'
+class MTCNN():
+    def __init__(self):
+        print(device)
+        self.pnet = PNet().to(device)
+        self.rnet = RNet().to(device)
+        self.onet = ONet().to(device)
+        self.pnet.eval()
+        self.rnet.eval()
+        self.onet.eval()
+        self.refrence = get_reference_facial_points(default_square=True)
+    def align(self, img):
+        _, landmarks = self.detect_faces(img)
+        if len(landmarks) == 0:
+            return None, None
+        facial5points = [[landmarks[0][j], landmarks[0][j + 5]] for j in range(5)]
+        warped_face, tfm = warp_and_crop_face(np.array(img), facial5points, self.refrence, crop_size=(112, 112))
+        return Image.fromarray(warped_face), tfm
+    def align_multi(self, img, limit=None, min_face_size=30.0):
+        boxes, landmarks = self.detect_faces(img, min_face_size)
+        if limit:
+            boxes = boxes[:limit]
+            landmarks = landmarks[:limit]
+        faces = []
+        tfms = []
+        for landmark in landmarks:
+            facial5points = [[landmark[j], landmark[j + 5]] for j in range(5)]
+            warped_face, tfm = warp_and_crop_face(np.array(img), facial5points, self.refrence, crop_size=(112, 112))
+            faces.append(Image.fromarray(warped_face))
+            tfms.append(tfm)
+        return boxes, faces, tfms
+    def detect_faces(self, image, min_face_size=20.0,
+                     thresholds=[0.15, 0.25, 0.35],
+                     nms_thresholds=[0.7, 0.7, 0.7]):
+        """
+        Arguments:
+            image: an instance of PIL.Image.
+            min_face_size: a float number.
+            thresholds: a list of length 3.
+            nms_thresholds: a list of length 3.
+        Returns:
+            two float numpy arrays of shapes [n_boxes, 4] and [n_boxes, 10],
+            bounding boxes and facial landmarks.
+        """
+        # BUILD AN IMAGE PYRAMID
+        width, height = image.size
+        min_length = min(height, width)
+        min_detection_size = 12
+        factor = 0.707  # sqrt(0.5)
+        # scales for scaling the image
+        scales = []
+        # scales the image so that
+        # minimum size that we can detect equals to
+        # minimum face size that we want to detect
+        m = min_detection_size / min_face_size
+        min_length *= m
+        factor_count = 0
+        while min_length > min_detection_size:
+            scales.append(m * factor ** factor_count)
+            min_length *= factor
+            factor_count += 1
+        # STAGE 1
+        # it will be returned
+        bounding_boxes = []
+        with torch.no_grad():
+            # run P-Net on different scales
+            for s in scales:
+                boxes = run_first_stage(image, self.pnet, scale=s, threshold=thresholds[0])
+                bounding_boxes.append(boxes)
+            # collect boxes (and offsets, and scores) from different scales
+            bounding_boxes = [i for i in bounding_boxes if i is not None]
+            bounding_boxes = np.vstack(bounding_boxes)
+            keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0])
+            bounding_boxes = bounding_boxes[keep]
+            # use offsets predicted by pnet to transform bounding boxes
+            bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:])
+            # shape [n_boxes, 5]
+            bounding_boxes = convert_to_square(bounding_boxes)
+            bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
+            # STAGE 2
+            img_boxes = get_image_boxes(bounding_boxes, image, size=24)
+            img_boxes = torch.FloatTensor(img_boxes).to(device)
+            output = self.rnet(img_boxes)
+            offsets = output[0].cpu().data.numpy()  # shape [n_boxes, 4]
+            probs = output[1].cpu().data.numpy()  # shape [n_boxes, 2]
+            keep = np.where(probs[:, 1] > thresholds[1])[0]
+            bounding_boxes = bounding_boxes[keep]
+            bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,))
+            offsets = offsets[keep]
+            keep = nms(bounding_boxes, nms_thresholds[1])
+            bounding_boxes = bounding_boxes[keep]
+            bounding_boxes = calibrate_box(bounding_boxes, offsets[keep])
+            bounding_boxes = convert_to_square(bounding_boxes)
+            bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
+            # STAGE 3
+            img_boxes = get_image_boxes(bounding_boxes, image, size=48)
+            if len(img_boxes) == 0:
+                return [], []
+            img_boxes = torch.FloatTensor(img_boxes).to(device)
+            output = self.onet(img_boxes)
+            landmarks = output[0].cpu().data.numpy()  # shape [n_boxes, 10]
+            offsets = output[1].cpu().data.numpy()  # shape [n_boxes, 4]
+            probs = output[2].cpu().data.numpy()  # shape [n_boxes, 2]
+            keep = np.where(probs[:, 1] > thresholds[2])[0]
+            bounding_boxes = bounding_boxes[keep]
+            bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,))
+            offsets = offsets[keep]
+            landmarks = landmarks[keep]
+            # compute landmark points
+            width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0
+            height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0
+            xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1]
+            landmarks[:, 0:5] = np.expand_dims(xmin, 1) + np.expand_dims(width, 1) * landmarks[:, 0:5]
+            landmarks[:, 5:10] = np.expand_dims(ymin, 1) + np.expand_dims(height, 1) * landmarks[:, 5:10]
+            bounding_boxes = calibrate_box(bounding_boxes, offsets)
+            keep = nms(bounding_boxes, nms_thresholds[2], mode='min')
+            bounding_boxes = bounding_boxes[keep]
+            landmarks = landmarks[keep]
+        return bounding_boxes, landmarks

models/mtcnn/mtcnn_pytorch/__init__.py ADDED Viewed

File without changes

models/mtcnn/mtcnn_pytorch/src/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .visualization_utils import show_bboxes
2	+ from .detector import detect_faces

models/mtcnn/mtcnn_pytorch/src/align_trans.py ADDED Viewed

	@@ -0,0 +1,304 @@

+# -*- coding: utf-8 -*-
+"""
+Created on Mon Apr 24 15:43:29 2017
+@author: zhaoy
+"""
+import numpy as np
+import cv2
+# from scipy.linalg import lstsq
+# from scipy.ndimage import geometric_transform  # , map_coordinates
+from models.mtcnn.mtcnn_pytorch.src.matlab_cp2tform import get_similarity_transform_for_cv2
+# reference facial points, a list of coordinates (x,y)
+REFERENCE_FACIAL_POINTS = [
+    [30.29459953, 51.69630051],
+    [65.53179932, 51.50139999],
+    [48.02519989, 71.73660278],
+    [33.54930115, 92.3655014],
+    [62.72990036, 92.20410156]
+]
+DEFAULT_CROP_SIZE = (96, 112)
+class FaceWarpException(Exception):
+    def __str__(self):
+        return 'In File {}:{}'.format(
+            __file__, super.__str__(self))
+def get_reference_facial_points(output_size=None,
+                                inner_padding_factor=0.0,
+                                outer_padding=(0, 0),
+                                default_square=False):
+    """
+    Function:
+    ----------
+        get reference 5 key points according to crop settings:
+        0. Set default crop_size:
+            if default_square:
+                crop_size = (112, 112)
+            else:
+                crop_size = (96, 112)
+        1. Pad the crop_size by inner_padding_factor in each side;
+        2. Resize crop_size into (output_size - outer_padding*2),
+            pad into output_size with outer_padding;
+        3. Output reference_5point;
+    Parameters:
+    ----------
+        @output_size: (w, h) or None
+            size of aligned face image
+        @inner_padding_factor: (w_factor, h_factor)
+            padding factor for inner (w, h)
+        @outer_padding: (w_pad, h_pad)
+            each row is a pair of coordinates (x, y)
+        @default_square: True or False
+            if True:
+                default crop_size = (112, 112)
+            else:
+                default crop_size = (96, 112);
+        !!! make sure, if output_size is not None:
+                (output_size - outer_padding)
+                = some_scale * (default crop_size * (1.0 + inner_padding_factor))
+    Returns:
+    ----------
+        @reference_5point: 5x2 np.array
+            each row is a pair of transformed coordinates (x, y)
+    """
+    # print('\n===> get_reference_facial_points():')
+    # print('---> Params:')
+    # print('            output_size: ', output_size)
+    # print('            inner_padding_factor: ', inner_padding_factor)
+    # print('            outer_padding:', outer_padding)
+    # print('            default_square: ', default_square)
+    tmp_5pts = np.array(REFERENCE_FACIAL_POINTS)
+    tmp_crop_size = np.array(DEFAULT_CROP_SIZE)
+    # 0) make the inner region a square
+    if default_square:
+        size_diff = max(tmp_crop_size) - tmp_crop_size
+        tmp_5pts += size_diff / 2
+        tmp_crop_size += size_diff
+    # print('---> default:')
+    # print('              crop_size = ', tmp_crop_size)
+    # print('              reference_5pts = ', tmp_5pts)
+    if (output_size and
+            output_size[0] == tmp_crop_size[0] and
+            output_size[1] == tmp_crop_size[1]):
+        # print('output_size == DEFAULT_CROP_SIZE {}: return default reference points'.format(tmp_crop_size))
+        return tmp_5pts
+    if (inner_padding_factor == 0 and
+            outer_padding == (0, 0)):
+        if output_size is None:
+            # print('No paddings to do: return default reference points')
+            return tmp_5pts
+        else:
+            raise FaceWarpException(
+                'No paddings to do, output_size must be None or {}'.format(tmp_crop_size))
+    # check output size
+    if not (0 <= inner_padding_factor <= 1.0):
+        raise FaceWarpException('Not (0 <= inner_padding_factor <= 1.0)')
+    if ((inner_padding_factor > 0 or outer_padding[0] > 0 or outer_padding[1] > 0)
+            and output_size is None):
+        output_size = tmp_crop_size * \
+                      (1 + inner_padding_factor * 2).astype(np.int32)
+        output_size += np.array(outer_padding)
+        # print('              deduced from paddings, output_size = ', output_size)
+    if not (outer_padding[0] < output_size[0]
+            and outer_padding[1] < output_size[1]):
+        raise FaceWarpException('Not (outer_padding[0] < output_size[0]'
+                                'and outer_padding[1] < output_size[1])')
+    # 1) pad the inner region according inner_padding_factor
+    # print('---> STEP1: pad the inner region according inner_padding_factor')
+    if inner_padding_factor > 0:
+        size_diff = tmp_crop_size * inner_padding_factor * 2
+        tmp_5pts += size_diff / 2
+        tmp_crop_size += np.round(size_diff).astype(np.int32)
+    # print('              crop_size = ', tmp_crop_size)
+    # print('              reference_5pts = ', tmp_5pts)
+    # 2) resize the padded inner region
+    # print('---> STEP2: resize the padded inner region')
+    size_bf_outer_pad = np.array(output_size) - np.array(outer_padding) * 2
+    # print('              crop_size = ', tmp_crop_size)
+    # print('              size_bf_outer_pad = ', size_bf_outer_pad)
+    if size_bf_outer_pad[0] * tmp_crop_size[1] != size_bf_outer_pad[1] * tmp_crop_size[0]:
+        raise FaceWarpException('Must have (output_size - outer_padding)'
+                                '= some_scale * (crop_size * (1.0 + inner_padding_factor)')
+    scale_factor = size_bf_outer_pad[0].astype(np.float32) / tmp_crop_size[0]
+    # print('              resize scale_factor = ', scale_factor)
+    tmp_5pts = tmp_5pts * scale_factor
+    #    size_diff = tmp_crop_size * (scale_factor - min(scale_factor))
+    #    tmp_5pts = tmp_5pts + size_diff / 2
+    tmp_crop_size = size_bf_outer_pad
+    # print('              crop_size = ', tmp_crop_size)
+    # print('              reference_5pts = ', tmp_5pts)
+    # 3) add outer_padding to make output_size
+    reference_5point = tmp_5pts + np.array(outer_padding)
+    tmp_crop_size = output_size
+    # print('---> STEP3: add outer_padding to make output_size')
+    # print('              crop_size = ', tmp_crop_size)
+    # print('              reference_5pts = ', tmp_5pts)
+    # print('===> end get_reference_facial_points\n')
+    return reference_5point
+def get_affine_transform_matrix(src_pts, dst_pts):
+    """
+    Function:
+    ----------
+        get affine transform matrix 'tfm' from src_pts to dst_pts
+    Parameters:
+    ----------
+        @src_pts: Kx2 np.array
+            source points matrix, each row is a pair of coordinates (x, y)
+        @dst_pts: Kx2 np.array
+            destination points matrix, each row is a pair of coordinates (x, y)
+    Returns:
+    ----------
+        @tfm: 2x3 np.array
+            transform matrix from src_pts to dst_pts
+    """
+    tfm = np.float32([[1, 0, 0], [0, 1, 0]])
+    n_pts = src_pts.shape[0]
+    ones = np.ones((n_pts, 1), src_pts.dtype)
+    src_pts_ = np.hstack([src_pts, ones])
+    dst_pts_ = np.hstack([dst_pts, ones])
+    #    #print(('src_pts_:\n' + str(src_pts_))
+    #    #print(('dst_pts_:\n' + str(dst_pts_))
+    A, res, rank, s = np.linalg.lstsq(src_pts_, dst_pts_)
+    #    #print(('np.linalg.lstsq return A: \n' + str(A))
+    #    #print(('np.linalg.lstsq return res: \n' + str(res))
+    #    #print(('np.linalg.lstsq return rank: \n' + str(rank))
+    #    #print(('np.linalg.lstsq return s: \n' + str(s))
+    if rank == 3:
+        tfm = np.float32([
+            [A[0, 0], A[1, 0], A[2, 0]],
+            [A[0, 1], A[1, 1], A[2, 1]]
+        ])
+    elif rank == 2:
+        tfm = np.float32([
+            [A[0, 0], A[1, 0], 0],
+            [A[0, 1], A[1, 1], 0]
+        ])
+    return tfm
+def warp_and_crop_face(src_img,
+                       facial_pts,
+                       reference_pts=None,
+                       crop_size=(96, 112),
+                       align_type='smilarity'):
+    """
+    Function:
+    ----------
+        apply affine transform 'trans' to uv
+    Parameters:
+    ----------
+        @src_img: 3x3 np.array
+            input image
+        @facial_pts: could be
+            1)a list of K coordinates (x,y)
+        or
+            2) Kx2 or 2xK np.array
+            each row or col is a pair of coordinates (x, y)
+        @reference_pts: could be
+            1) a list of K coordinates (x,y)
+        or
+            2) Kx2 or 2xK np.array
+            each row or col is a pair of coordinates (x, y)
+        or
+            3) None
+            if None, use default reference facial points
+        @crop_size: (w, h)
+            output face image size
+        @align_type: transform type, could be one of
+            1) 'similarity': use similarity transform
+            2) 'cv2_affine': use the first 3 points to do affine transform,
+                    by calling cv2.getAffineTransform()
+            3) 'affine': use all points to do affine transform
+    Returns:
+    ----------
+        @face_img: output face image with size (w, h) = @crop_size
+    """
+    if reference_pts is None:
+        if crop_size[0] == 96 and crop_size[1] == 112:
+            reference_pts = REFERENCE_FACIAL_POINTS
+        else:
+            default_square = False
+            inner_padding_factor = 0
+            outer_padding = (0, 0)
+            output_size = crop_size
+            reference_pts = get_reference_facial_points(output_size,
+                                                        inner_padding_factor,
+                                                        outer_padding,
+                                                        default_square)
+    ref_pts = np.float32(reference_pts)
+    ref_pts_shp = ref_pts.shape
+    if max(ref_pts_shp) < 3 or min(ref_pts_shp) != 2:
+        raise FaceWarpException(
+            'reference_pts.shape must be (K,2) or (2,K) and K>2')
+    if ref_pts_shp[0] == 2:
+        ref_pts = ref_pts.T
+    src_pts = np.float32(facial_pts)
+    src_pts_shp = src_pts.shape
+    if max(src_pts_shp) < 3 or min(src_pts_shp) != 2:
+        raise FaceWarpException(
+            'facial_pts.shape must be (K,2) or (2,K) and K>2')
+    if src_pts_shp[0] == 2:
+        src_pts = src_pts.T
+    #    #print('--->src_pts:\n', src_pts
+    #    #print('--->ref_pts\n', ref_pts
+    if src_pts.shape != ref_pts.shape:
+        raise FaceWarpException(
+            'facial_pts and reference_pts must have the same shape')
+    if align_type is 'cv2_affine':
+        tfm = cv2.getAffineTransform(src_pts[0:3], ref_pts[0:3])
+    #        #print(('cv2.getAffineTransform() returns tfm=\n' + str(tfm))
+    elif align_type is 'affine':
+        tfm = get_affine_transform_matrix(src_pts, ref_pts)
+    #        #print(('get_affine_transform_matrix() returns tfm=\n' + str(tfm))
+    else:
+        tfm = get_similarity_transform_for_cv2(src_pts, ref_pts)
+    #        #print(('get_similarity_transform_for_cv2() returns tfm=\n' + str(tfm))
+    #    #print('--->Transform matrix: '
+    #    #print(('type(tfm):' + str(type(tfm)))
+    #    #print(('tfm.dtype:' + str(tfm.dtype))
+    #    #print( tfm
+    face_img = cv2.warpAffine(src_img, tfm, (crop_size[0], crop_size[1]))
+    return face_img, tfm

models/mtcnn/mtcnn_pytorch/src/box_utils.py ADDED Viewed

	@@ -0,0 +1,238 @@

+import numpy as np
+from PIL import Image
+def nms(boxes, overlap_threshold=0.5, mode='union'):
+    """Non-maximum suppression.
+    Arguments:
+        boxes: a float numpy array of shape [n, 5],
+            where each row is (xmin, ymin, xmax, ymax, score).
+        overlap_threshold: a float number.
+        mode: 'union' or 'min'.
+    Returns:
+        list with indices of the selected boxes
+    """
+    # if there are no boxes, return the empty list
+    if len(boxes) == 0:
+        return []
+    # list of picked indices
+    pick = []
+    # grab the coordinates of the bounding boxes
+    x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)]
+    area = (x2 - x1 + 1.0) * (y2 - y1 + 1.0)
+    ids = np.argsort(score)  # in increasing order
+    while len(ids) > 0:
+        # grab index of the largest value
+        last = len(ids) - 1
+        i = ids[last]
+        pick.append(i)
+        # compute intersections
+        # of the box with the largest score
+        # with the rest of boxes
+        # left top corner of intersection boxes
+        ix1 = np.maximum(x1[i], x1[ids[:last]])
+        iy1 = np.maximum(y1[i], y1[ids[:last]])
+        # right bottom corner of intersection boxes
+        ix2 = np.minimum(x2[i], x2[ids[:last]])
+        iy2 = np.minimum(y2[i], y2[ids[:last]])
+        # width and height of intersection boxes
+        w = np.maximum(0.0, ix2 - ix1 + 1.0)
+        h = np.maximum(0.0, iy2 - iy1 + 1.0)
+        # intersections' areas
+        inter = w * h
+        if mode == 'min':
+            overlap = inter / np.minimum(area[i], area[ids[:last]])
+        elif mode == 'union':
+            # intersection over union (IoU)
+            overlap = inter / (area[i] + area[ids[:last]] - inter)
+        # delete all boxes where overlap is too big
+        ids = np.delete(
+            ids,
+            np.concatenate([[last], np.where(overlap > overlap_threshold)[0]])
+        )
+    return pick
+def convert_to_square(bboxes):
+    """Convert bounding boxes to a square form.
+    Arguments:
+        bboxes: a float numpy array of shape [n, 5].
+    Returns:
+        a float numpy array of shape [n, 5],
+            squared bounding boxes.
+    """
+    square_bboxes = np.zeros_like(bboxes)
+    x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
+    h = y2 - y1 + 1.0
+    w = x2 - x1 + 1.0
+    max_side = np.maximum(h, w)
+    square_bboxes[:, 0] = x1 + w * 0.5 - max_side * 0.5
+    square_bboxes[:, 1] = y1 + h * 0.5 - max_side * 0.5
+    square_bboxes[:, 2] = square_bboxes[:, 0] + max_side - 1.0
+    square_bboxes[:, 3] = square_bboxes[:, 1] + max_side - 1.0
+    return square_bboxes
+def calibrate_box(bboxes, offsets):
+    """Transform bounding boxes to be more like true bounding boxes.
+    'offsets' is one of the outputs of the nets.
+    Arguments:
+        bboxes: a float numpy array of shape [n, 5].
+        offsets: a float numpy array of shape [n, 4].
+    Returns:
+        a float numpy array of shape [n, 5].
+    """
+    x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
+    w = x2 - x1 + 1.0
+    h = y2 - y1 + 1.0
+    w = np.expand_dims(w, 1)
+    h = np.expand_dims(h, 1)
+    # this is what happening here:
+    # tx1, ty1, tx2, ty2 = [offsets[:, i] for i in range(4)]
+    # x1_true = x1 + tx1*w
+    # y1_true = y1 + ty1*h
+    # x2_true = x2 + tx2*w
+    # y2_true = y2 + ty2*h
+    # below is just more compact form of this
+    # are offsets always such that
+    # x1 < x2 and y1 < y2 ?
+    translation = np.hstack([w, h, w, h]) * offsets
+    bboxes[:, 0:4] = bboxes[:, 0:4] + translation
+    return bboxes
+def get_image_boxes(bounding_boxes, img, size=24):
+    """Cut out boxes from the image.
+    Arguments:
+        bounding_boxes: a float numpy array of shape [n, 5].
+        img: an instance of PIL.Image.
+        size: an integer, size of cutouts.
+    Returns:
+        a float numpy array of shape [n, 3, size, size].
+    """
+    num_boxes = len(bounding_boxes)
+    width, height = img.size
+    [dy, edy, dx, edx, y, ey, x, ex, w, h] = correct_bboxes(bounding_boxes, width, height)
+    img_boxes = np.zeros((num_boxes, 3, size, size), 'float32')
+    for i in range(num_boxes):
+        img_box = np.zeros((h[i], w[i], 3), 'uint8')
+        img_array = np.asarray(img, 'uint8')
+        img_box[dy[i]:(edy[i] + 1), dx[i]:(edx[i] + 1), :] = \
+            img_array[y[i]:(ey[i] + 1), x[i]:(ex[i] + 1), :]
+        # resize
+        img_box = Image.fromarray(img_box)
+        img_box = img_box.resize((size, size), Image.BILINEAR)
+        img_box = np.asarray(img_box, 'float32')
+        img_boxes[i, :, :, :] = _preprocess(img_box)
+    return img_boxes
+def correct_bboxes(bboxes, width, height):
+    """Crop boxes that are too big and get coordinates
+    with respect to cutouts.
+    Arguments:
+        bboxes: a float numpy array of shape [n, 5],
+            where each row is (xmin, ymin, xmax, ymax, score).
+        width: a float number.
+        height: a float number.
+    Returns:
+        dy, dx, edy, edx: a int numpy arrays of shape [n],
+            coordinates of the boxes with respect to the cutouts.
+        y, x, ey, ex: a int numpy arrays of shape [n],
+            corrected ymin, xmin, ymax, xmax.
+        h, w: a int numpy arrays of shape [n],
+            just heights and widths of boxes.
+        in the following order:
+            [dy, edy, dx, edx, y, ey, x, ex, w, h].
+    """
+    x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
+    w, h = x2 - x1 + 1.0, y2 - y1 + 1.0
+    num_boxes = bboxes.shape[0]
+    # 'e' stands for end
+    # (x, y) -> (ex, ey)
+    x, y, ex, ey = x1, y1, x2, y2
+    # we need to cut out a box from the image.
+    # (x, y, ex, ey) are corrected coordinates of the box
+    # in the image.
+    # (dx, dy, edx, edy) are coordinates of the box in the cutout
+    # from the image.
+    dx, dy = np.zeros((num_boxes,)), np.zeros((num_boxes,))
+    edx, edy = w.copy() - 1.0, h.copy() - 1.0
+    # if box's bottom right corner is too far right
+    ind = np.where(ex > width - 1.0)[0]
+    edx[ind] = w[ind] + width - 2.0 - ex[ind]
+    ex[ind] = width - 1.0
+    # if box's bottom right corner is too low
+    ind = np.where(ey > height - 1.0)[0]
+    edy[ind] = h[ind] + height - 2.0 - ey[ind]
+    ey[ind] = height - 1.0
+    # if box's top left corner is too far left
+    ind = np.where(x < 0.0)[0]
+    dx[ind] = 0.0 - x[ind]
+    x[ind] = 0.0
+    # if box's top left corner is too high
+    ind = np.where(y < 0.0)[0]
+    dy[ind] = 0.0 - y[ind]
+    y[ind] = 0.0
+    return_list = [dy, edy, dx, edx, y, ey, x, ex, w, h]
+    return_list = [i.astype('int32') for i in return_list]
+    return return_list
+def _preprocess(img):
+    """Preprocessing step before feeding the network.
+    Arguments:
+        img: a float numpy array of shape [h, w, c].
+    Returns:
+        a float numpy array of shape [1, c, h, w].
+    """
+    img = img.transpose((2, 0, 1))
+    img = np.expand_dims(img, 0)
+    img = (img - 127.5) * 0.0078125
+    return img

models/mtcnn/mtcnn_pytorch/src/detector.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import numpy as np
+import torch
+from torch.autograd import Variable
+from .get_nets import PNet, RNet, ONet
+from .box_utils import nms, calibrate_box, get_image_boxes, convert_to_square
+from .first_stage import run_first_stage
+def detect_faces(image, min_face_size=20.0,
+                 thresholds=[0.6, 0.7, 0.8],
+                 nms_thresholds=[0.7, 0.7, 0.7]):
+    """
+    Arguments:
+        image: an instance of PIL.Image.
+        min_face_size: a float number.
+        thresholds: a list of length 3.
+        nms_thresholds: a list of length 3.
+    Returns:
+        two float numpy arrays of shapes [n_boxes, 4] and [n_boxes, 10],
+        bounding boxes and facial landmarks.
+    """
+    # LOAD MODELS
+    pnet = PNet()
+    rnet = RNet()
+    onet = ONet()
+    onet.eval()
+    # BUILD AN IMAGE PYRAMID
+    width, height = image.size
+    min_length = min(height, width)
+    min_detection_size = 12
+    factor = 0.707  # sqrt(0.5)
+    # scales for scaling the image
+    scales = []
+    # scales the image so that
+    # minimum size that we can detect equals to
+    # minimum face size that we want to detect
+    m = min_detection_size / min_face_size
+    min_length *= m
+    factor_count = 0
+    while min_length > min_detection_size:
+        scales.append(m * factor ** factor_count)
+        min_length *= factor
+        factor_count += 1
+    # STAGE 1
+    # it will be returned
+    bounding_boxes = []
+    with torch.no_grad():
+        # run P-Net on different scales
+        for s in scales:
+            boxes = run_first_stage(image, pnet, scale=s, threshold=thresholds[0])
+            bounding_boxes.append(boxes)
+        # collect boxes (and offsets, and scores) from different scales
+        bounding_boxes = [i for i in bounding_boxes if i is not None]
+        bounding_boxes = np.vstack(bounding_boxes)
+        keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0])
+        bounding_boxes = bounding_boxes[keep]
+        # use offsets predicted by pnet to transform bounding boxes
+        bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:])
+        # shape [n_boxes, 5]
+        bounding_boxes = convert_to_square(bounding_boxes)
+        bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
+        # STAGE 2
+        img_boxes = get_image_boxes(bounding_boxes, image, size=24)
+        img_boxes = torch.FloatTensor(img_boxes)
+        output = rnet(img_boxes)
+        offsets = output[0].data.numpy()  # shape [n_boxes, 4]
+        probs = output[1].data.numpy()  # shape [n_boxes, 2]
+        keep = np.where(probs[:, 1] > thresholds[1])[0]
+        bounding_boxes = bounding_boxes[keep]
+        bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,))
+        offsets = offsets[keep]
+        keep = nms(bounding_boxes, nms_thresholds[1])
+        bounding_boxes = bounding_boxes[keep]
+        bounding_boxes = calibrate_box(bounding_boxes, offsets[keep])
+        bounding_boxes = convert_to_square(bounding_boxes)
+        bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
+        # STAGE 3
+        img_boxes = get_image_boxes(bounding_boxes, image, size=48)
+        if len(img_boxes) == 0:
+            return [], []
+        img_boxes = torch.FloatTensor(img_boxes)
+        output = onet(img_boxes)
+        landmarks = output[0].data.numpy()  # shape [n_boxes, 10]
+        offsets = output[1].data.numpy()  # shape [n_boxes, 4]
+        probs = output[2].data.numpy()  # shape [n_boxes, 2]
+        keep = np.where(probs[:, 1] > thresholds[2])[0]
+        bounding_boxes = bounding_boxes[keep]
+        bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,))
+        offsets = offsets[keep]
+        landmarks = landmarks[keep]
+        # compute landmark points
+        width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0
+        height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0
+        xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1]
+        landmarks[:, 0:5] = np.expand_dims(xmin, 1) + np.expand_dims(width, 1) * landmarks[:, 0:5]
+        landmarks[:, 5:10] = np.expand_dims(ymin, 1) + np.expand_dims(height, 1) * landmarks[:, 5:10]
+        bounding_boxes = calibrate_box(bounding_boxes, offsets)
+        keep = nms(bounding_boxes, nms_thresholds[2], mode='min')
+        bounding_boxes = bounding_boxes[keep]
+        landmarks = landmarks[keep]
+    return bounding_boxes, landmarks

models/mtcnn/mtcnn_pytorch/src/first_stage.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import torch
+from torch.autograd import Variable
+import math
+from PIL import Image
+import numpy as np
+from .box_utils import nms, _preprocess
+# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+device = 'cuda:0'
+def run_first_stage(image, net, scale, threshold):
+    """Run P-Net, generate bounding boxes, and do NMS.
+    Arguments:
+        image: an instance of PIL.Image.
+        net: an instance of pytorch's nn.Module, P-Net.
+        scale: a float number,
+            scale width and height of the image by this number.
+        threshold: a float number,
+            threshold on the probability of a face when generating
+            bounding boxes from predictions of the net.
+    Returns:
+        a float numpy array of shape [n_boxes, 9],
+            bounding boxes with scores and offsets (4 + 1 + 4).
+    """
+    # scale the image and convert it to a float array
+    width, height = image.size
+    sw, sh = math.ceil(width * scale), math.ceil(height * scale)
+    img = image.resize((sw, sh), Image.BILINEAR)
+    img = np.asarray(img, 'float32')
+    img = torch.FloatTensor(_preprocess(img)).to(device)
+    with torch.no_grad():
+        output = net(img)
+        probs = output[1].cpu().data.numpy()[0, 1, :, :]
+        offsets = output[0].cpu().data.numpy()
+        # probs: probability of a face at each sliding window
+        # offsets: transformations to true bounding boxes
+        boxes = _generate_bboxes(probs, offsets, scale, threshold)
+        if len(boxes) == 0:
+            return None
+        keep = nms(boxes[:, 0:5], overlap_threshold=0.5)
+    return boxes[keep]
+def _generate_bboxes(probs, offsets, scale, threshold):
+    """Generate bounding boxes at places
+    where there is probably a face.
+    Arguments:
+        probs: a float numpy array of shape [n, m].
+        offsets: a float numpy array of shape [1, 4, n, m].
+        scale: a float number,
+            width and height of the image were scaled by this number.
+        threshold: a float number.
+    Returns:
+        a float numpy array of shape [n_boxes, 9]
+    """
+    # applying P-Net is equivalent, in some sense, to
+    # moving 12x12 window with stride 2
+    stride = 2
+    cell_size = 12
+    # indices of boxes where there is probably a face
+    inds = np.where(probs > threshold)
+    if inds[0].size == 0:
+        return np.array([])
+    # transformations of bounding boxes
+    tx1, ty1, tx2, ty2 = [offsets[0, i, inds[0], inds[1]] for i in range(4)]
+    # they are defined as:
+    # w = x2 - x1 + 1
+    # h = y2 - y1 + 1
+    # x1_true = x1 + tx1*w
+    # x2_true = x2 + tx2*w
+    # y1_true = y1 + ty1*h
+    # y2_true = y2 + ty2*h
+    offsets = np.array([tx1, ty1, tx2, ty2])
+    score = probs[inds[0], inds[1]]
+    # P-Net is applied to scaled images
+    # so we need to rescale bounding boxes back
+    bounding_boxes = np.vstack([
+        np.round((stride * inds[1] + 1.0) / scale),
+        np.round((stride * inds[0] + 1.0) / scale),
+        np.round((stride * inds[1] + 1.0 + cell_size) / scale),
+        np.round((stride * inds[0] + 1.0 + cell_size) / scale),
+        score, offsets
+    ])
+    # why one is added?
+    return bounding_boxes.T

models/mtcnn/mtcnn_pytorch/src/get_nets.py ADDED Viewed

	@@ -0,0 +1,171 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from collections import OrderedDict
+import numpy as np
+from configs.paths_config import model_paths
+PNET_PATH = model_paths["mtcnn_pnet"]
+ONET_PATH = model_paths["mtcnn_onet"]
+RNET_PATH = model_paths["mtcnn_rnet"]
+class Flatten(nn.Module):
+    def __init__(self):
+        super(Flatten, self).__init__()
+    def forward(self, x):
+        """
+        Arguments:
+            x: a float tensor with shape [batch_size, c, h, w].
+        Returns:
+            a float tensor with shape [batch_size, c*h*w].
+        """
+        # without this pretrained model isn't working
+        x = x.transpose(3, 2).contiguous()
+        return x.view(x.size(0), -1)
+class PNet(nn.Module):
+    def __init__(self):
+        super().__init__()
+        # suppose we have input with size HxW, then
+        # after first layer: H - 2,
+        # after pool: ceil((H - 2)/2),
+        # after second conv: ceil((H - 2)/2) - 2,
+        # after last conv: ceil((H - 2)/2) - 4,
+        # and the same for W
+        self.features = nn.Sequential(OrderedDict([
+            ('conv1', nn.Conv2d(3, 10, 3, 1)),
+            ('prelu1', nn.PReLU(10)),
+            ('pool1', nn.MaxPool2d(2, 2, ceil_mode=True)),
+            ('conv2', nn.Conv2d(10, 16, 3, 1)),
+            ('prelu2', nn.PReLU(16)),
+            ('conv3', nn.Conv2d(16, 32, 3, 1)),
+            ('prelu3', nn.PReLU(32))
+        ]))
+        self.conv4_1 = nn.Conv2d(32, 2, 1, 1)
+        self.conv4_2 = nn.Conv2d(32, 4, 1, 1)
+        weights = np.load(PNET_PATH, allow_pickle=True)[()]
+        for n, p in self.named_parameters():
+            p.data = torch.FloatTensor(weights[n])
+    def forward(self, x):
+        """
+        Arguments:
+            x: a float tensor with shape [batch_size, 3, h, w].
+        Returns:
+            b: a float tensor with shape [batch_size, 4, h', w'].
+            a: a float tensor with shape [batch_size, 2, h', w'].
+        """
+        x = self.features(x)
+        a = self.conv4_1(x)
+        b = self.conv4_2(x)
+        a = F.softmax(a, dim=-1)
+        return b, a
+class RNet(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.features = nn.Sequential(OrderedDict([
+            ('conv1', nn.Conv2d(3, 28, 3, 1)),
+            ('prelu1', nn.PReLU(28)),
+            ('pool1', nn.MaxPool2d(3, 2, ceil_mode=True)),
+            ('conv2', nn.Conv2d(28, 48, 3, 1)),
+            ('prelu2', nn.PReLU(48)),
+            ('pool2', nn.MaxPool2d(3, 2, ceil_mode=True)),
+            ('conv3', nn.Conv2d(48, 64, 2, 1)),
+            ('prelu3', nn.PReLU(64)),
+            ('flatten', Flatten()),
+            ('conv4', nn.Linear(576, 128)),
+            ('prelu4', nn.PReLU(128))
+        ]))
+        self.conv5_1 = nn.Linear(128, 2)
+        self.conv5_2 = nn.Linear(128, 4)
+        weights = np.load(RNET_PATH, allow_pickle=True)[()]
+        for n, p in self.named_parameters():
+            p.data = torch.FloatTensor(weights[n])
+    def forward(self, x):
+        """
+        Arguments:
+            x: a float tensor with shape [batch_size, 3, h, w].
+        Returns:
+            b: a float tensor with shape [batch_size, 4].
+            a: a float tensor with shape [batch_size, 2].
+        """
+        x = self.features(x)
+        a = self.conv5_1(x)
+        b = self.conv5_2(x)
+        a = F.softmax(a, dim=-1)
+        return b, a
+class ONet(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.features = nn.Sequential(OrderedDict([
+            ('conv1', nn.Conv2d(3, 32, 3, 1)),
+            ('prelu1', nn.PReLU(32)),
+            ('pool1', nn.MaxPool2d(3, 2, ceil_mode=True)),
+            ('conv2', nn.Conv2d(32, 64, 3, 1)),
+            ('prelu2', nn.PReLU(64)),
+            ('pool2', nn.MaxPool2d(3, 2, ceil_mode=True)),
+            ('conv3', nn.Conv2d(64, 64, 3, 1)),
+            ('prelu3', nn.PReLU(64)),
+            ('pool3', nn.MaxPool2d(2, 2, ceil_mode=True)),
+            ('conv4', nn.Conv2d(64, 128, 2, 1)),
+            ('prelu4', nn.PReLU(128)),
+            ('flatten', Flatten()),
+            ('conv5', nn.Linear(1152, 256)),
+            ('drop5', nn.Dropout(0.25)),
+            ('prelu5', nn.PReLU(256)),
+        ]))
+        self.conv6_1 = nn.Linear(256, 2)
+        self.conv6_2 = nn.Linear(256, 4)
+        self.conv6_3 = nn.Linear(256, 10)
+        weights = np.load(ONET_PATH, allow_pickle=True)[()]
+        for n, p in self.named_parameters():
+            p.data = torch.FloatTensor(weights[n])
+    def forward(self, x):
+        """
+        Arguments:
+            x: a float tensor with shape [batch_size, 3, h, w].
+        Returns:
+            c: a float tensor with shape [batch_size, 10].
+            b: a float tensor with shape [batch_size, 4].
+            a: a float tensor with shape [batch_size, 2].
+        """
+        x = self.features(x)
+        a = self.conv6_1(x)
+        b = self.conv6_2(x)
+        c = self.conv6_3(x)
+        a = F.softmax(a, dim=-1)
+        return c, b, a

models/mtcnn/mtcnn_pytorch/src/matlab_cp2tform.py ADDED Viewed

	@@ -0,0 +1,350 @@

+# -*- coding: utf-8 -*-
+"""
+Created on Tue Jul 11 06:54:28 2017
+@author: zhaoyafei
+"""
+import numpy as np
+from numpy.linalg import inv, norm, lstsq
+from numpy.linalg import matrix_rank as rank
+class MatlabCp2tormException(Exception):
+    def __str__(self):
+        return 'In File {}:{}'.format(
+            __file__, super.__str__(self))
+def tformfwd(trans, uv):
+    """
+    Function:
+    ----------
+        apply affine transform 'trans' to uv
+    Parameters:
+    ----------
+        @trans: 3x3 np.array
+            transform matrix
+        @uv: Kx2 np.array
+            each row is a pair of coordinates (x, y)
+    Returns:
+    ----------
+        @xy: Kx2 np.array
+            each row is a pair of transformed coordinates (x, y)
+    """
+    uv = np.hstack((
+        uv, np.ones((uv.shape[0], 1))
+    ))
+    xy = np.dot(uv, trans)
+    xy = xy[:, 0:-1]
+    return xy
+def tforminv(trans, uv):
+    """
+    Function:
+    ----------
+        apply the inverse of affine transform 'trans' to uv
+    Parameters:
+    ----------
+        @trans: 3x3 np.array
+            transform matrix
+        @uv: Kx2 np.array
+            each row is a pair of coordinates (x, y)
+    Returns:
+    ----------
+        @xy: Kx2 np.array
+            each row is a pair of inverse-transformed coordinates (x, y)
+    """
+    Tinv = inv(trans)
+    xy = tformfwd(Tinv, uv)
+    return xy
+def findNonreflectiveSimilarity(uv, xy, options=None):
+    options = {'K': 2}
+    K = options['K']
+    M = xy.shape[0]
+    x = xy[:, 0].reshape((-1, 1))  # use reshape to keep a column vector
+    y = xy[:, 1].reshape((-1, 1))  # use reshape to keep a column vector
+    # print('--->x, y:\n', x, y
+    tmp1 = np.hstack((x, y, np.ones((M, 1)), np.zeros((M, 1))))
+    tmp2 = np.hstack((y, -x, np.zeros((M, 1)), np.ones((M, 1))))
+    X = np.vstack((tmp1, tmp2))
+    # print('--->X.shape: ', X.shape
+    # print('X:\n', X
+    u = uv[:, 0].reshape((-1, 1))  # use reshape to keep a column vector
+    v = uv[:, 1].reshape((-1, 1))  # use reshape to keep a column vector
+    U = np.vstack((u, v))
+    # print('--->U.shape: ', U.shape
+    # print('U:\n', U
+    # We know that X * r = U
+    if rank(X) >= 2 * K:
+        r, _, _, _ = lstsq(X, U, rcond=None)  # Make sure this is what I want
+        r = np.squeeze(r)
+    else:
+        raise Exception('cp2tform:twoUniquePointsReq')
+    # print('--->r:\n', r
+    sc = r[0]
+    ss = r[1]
+    tx = r[2]
+    ty = r[3]
+    Tinv = np.array([
+        [sc, -ss, 0],
+        [ss, sc, 0],
+        [tx, ty, 1]
+    ])
+    # print('--->Tinv:\n', Tinv
+    T = inv(Tinv)
+    # print('--->T:\n', T
+    T[:, 2] = np.array([0, 0, 1])
+    return T, Tinv
+def findSimilarity(uv, xy, options=None):
+    options = {'K': 2}
+    #    uv = np.array(uv)
+    #    xy = np.array(xy)
+    # Solve for trans1
+    trans1, trans1_inv = findNonreflectiveSimilarity(uv, xy, options)
+    # Solve for trans2
+    # manually reflect the xy data across the Y-axis
+    xyR = xy
+    xyR[:, 0] = -1 * xyR[:, 0]
+    trans2r, trans2r_inv = findNonreflectiveSimilarity(uv, xyR, options)
+    # manually reflect the tform to undo the reflection done on xyR
+    TreflectY = np.array([
+        [-1, 0, 0],
+        [0, 1, 0],
+        [0, 0, 1]
+    ])
+    trans2 = np.dot(trans2r, TreflectY)
+    # Figure out if trans1 or trans2 is better
+    xy1 = tformfwd(trans1, uv)
+    norm1 = norm(xy1 - xy)
+    xy2 = tformfwd(trans2, uv)
+    norm2 = norm(xy2 - xy)
+    if norm1 <= norm2:
+        return trans1, trans1_inv
+    else:
+        trans2_inv = inv(trans2)
+        return trans2, trans2_inv
+def get_similarity_transform(src_pts, dst_pts, reflective=True):
+    """
+    Function:
+    ----------
+        Find Similarity Transform Matrix 'trans':
+            u = src_pts[:, 0]
+            v = src_pts[:, 1]
+            x = dst_pts[:, 0]
+            y = dst_pts[:, 1]
+            [x, y, 1] = [u, v, 1] * trans
+    Parameters:
+    ----------
+        @src_pts: Kx2 np.array
+            source points, each row is a pair of coordinates (x, y)
+        @dst_pts: Kx2 np.array
+            destination points, each row is a pair of transformed
+            coordinates (x, y)
+        @reflective: True or False
+            if True:
+                use reflective similarity transform
+            else:
+                use non-reflective similarity transform
+    Returns:
+    ----------
+       @trans: 3x3 np.array
+            transform matrix from uv to xy
+        trans_inv: 3x3 np.array
+            inverse of trans, transform matrix from xy to uv
+    """
+    if reflective:
+        trans, trans_inv = findSimilarity(src_pts, dst_pts)
+    else:
+        trans, trans_inv = findNonreflectiveSimilarity(src_pts, dst_pts)
+    return trans, trans_inv
+def cvt_tform_mat_for_cv2(trans):
+    """
+    Function:
+    ----------
+        Convert Transform Matrix 'trans' into 'cv2_trans' which could be
+        directly used by cv2.warpAffine():
+            u = src_pts[:, 0]
+            v = src_pts[:, 1]
+            x = dst_pts[:, 0]
+            y = dst_pts[:, 1]
+            [x, y].T = cv_trans * [u, v, 1].T
+    Parameters:
+    ----------
+        @trans: 3x3 np.array
+            transform matrix from uv to xy
+    Returns:
+    ----------
+        @cv2_trans: 2x3 np.array
+            transform matrix from src_pts to dst_pts, could be directly used
+            for cv2.warpAffine()
+    """
+    cv2_trans = trans[:, 0:2].T
+    return cv2_trans
+def get_similarity_transform_for_cv2(src_pts, dst_pts, reflective=True):
+    """
+    Function:
+    ----------
+        Find Similarity Transform Matrix 'cv2_trans' which could be
+        directly used by cv2.warpAffine():
+            u = src_pts[:, 0]
+            v = src_pts[:, 1]
+            x = dst_pts[:, 0]
+            y = dst_pts[:, 1]
+            [x, y].T = cv_trans * [u, v, 1].T
+    Parameters:
+    ----------
+        @src_pts: Kx2 np.array
+            source points, each row is a pair of coordinates (x, y)
+        @dst_pts: Kx2 np.array
+            destination points, each row is a pair of transformed
+            coordinates (x, y)
+        reflective: True or False
+            if True:
+                use reflective similarity transform
+            else:
+                use non-reflective similarity transform
+    Returns:
+    ----------
+        @cv2_trans: 2x3 np.array
+            transform matrix from src_pts to dst_pts, could be directly used
+            for cv2.warpAffine()
+    """
+    trans, trans_inv = get_similarity_transform(src_pts, dst_pts, reflective)
+    cv2_trans = cvt_tform_mat_for_cv2(trans)
+    return cv2_trans
+if __name__ == '__main__':
+    """
+    u = [0, 6, -2]
+    v = [0, 3, 5]
+    x = [-1, 0, 4]
+    y = [-1, -10, 4]
+    # In Matlab, run:
+    #
+    #   uv = [u'; v'];
+    #   xy = [x'; y'];
+    #   tform_sim=cp2tform(uv,xy,'similarity');
+    #
+    #   trans = tform_sim.tdata.T
+    #   ans =
+    #       -0.0764   -1.6190         0
+    #        1.6190   -0.0764         0
+    #       -3.2156    0.0290    1.0000
+    #   trans_inv = tform_sim.tdata.Tinv
+    #    ans =
+    #
+    #       -0.0291    0.6163         0
+    #       -0.6163   -0.0291         0
+    #       -0.0756    1.9826    1.0000
+    #    xy_m=tformfwd(tform_sim, u,v)
+    #
+    #    xy_m =
+    #
+    #       -3.2156    0.0290
+    #        1.1833   -9.9143
+    #        5.0323    2.8853
+    #    uv_m=tforminv(tform_sim, x,y)
+    #
+    #    uv_m =
+    #
+    #        0.5698    1.3953
+    #        6.0872    2.2733
+    #       -2.6570    4.3314
+    """
+    u = [0, 6, -2]
+    v = [0, 3, 5]
+    x = [-1, 0, 4]
+    y = [-1, -10, 4]
+    uv = np.array((u, v)).T
+    xy = np.array((x, y)).T
+    print('\n--->uv:')
+    print(uv)
+    print('\n--->xy:')
+    print(xy)
+    trans, trans_inv = get_similarity_transform(uv, xy)
+    print('\n--->trans matrix:')
+    print(trans)
+    print('\n--->trans_inv matrix:')
+    print(trans_inv)
+    print('\n---> apply transform to uv')
+    print('\nxy_m = uv_augmented * trans')
+    uv_aug = np.hstack((
+        uv, np.ones((uv.shape[0], 1))
+    ))
+    xy_m = np.dot(uv_aug, trans)
+    print(xy_m)
+    print('\nxy_m = tformfwd(trans, uv)')
+    xy_m = tformfwd(trans, uv)
+    print(xy_m)
+    print('\n---> apply inverse transform to xy')
+    print('\nuv_m = xy_augmented * trans_inv')
+    xy_aug = np.hstack((
+        xy, np.ones((xy.shape[0], 1))
+    ))
+    uv_m = np.dot(xy_aug, trans_inv)
+    print(uv_m)
+    print('\nuv_m = tformfwd(trans_inv, xy)')
+    uv_m = tformfwd(trans_inv, xy)
+    print(uv_m)
+    uv_m = tforminv(trans, xy)
+    print('\nuv_m = tforminv(trans, xy)')
+    print(uv_m)

models/mtcnn/mtcnn_pytorch/src/visualization_utils.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from PIL import ImageDraw
+def show_bboxes(img, bounding_boxes, facial_landmarks=[]):
+    """Draw bounding boxes and facial landmarks.
+    Arguments:
+        img: an instance of PIL.Image.
+        bounding_boxes: a float numpy array of shape [n, 5].
+        facial_landmarks: a float numpy array of shape [n, 10].
+    Returns:
+        an instance of PIL.Image.
+    """
+    img_copy = img.copy()
+    draw = ImageDraw.Draw(img_copy)
+    for b in bounding_boxes:
+        draw.rectangle([
+            (b[0], b[1]), (b[2], b[3])
+        ], outline='white')
+    for p in facial_landmarks:
+        for i in range(5):
+            draw.ellipse([
+                (p[i] - 1.0, p[i + 5] - 1.0),
+                (p[i] + 1.0, p[i + 5] + 1.0)
+            ], outline='blue')
+    return img_copy

models/psp.py ADDED Viewed

	@@ -0,0 +1,118 @@

+"""
+This file defines the core research contribution
+"""
+import matplotlib
+matplotlib.use('Agg')
+import math
+import torch
+from torch import nn
+from models.encoders import psp_encoders
+from models.stylegan2.model import Generator
+from configs.paths_config import model_paths
+def get_keys(d, name):
+	if 'state_dict' in d:
+		d = d['state_dict']
+	d_filt = {k[len(name) + 1:]: v for k, v in d.items() if k[:len(name)] == name}
+	return d_filt
+class pSp(nn.Module):
+	def __init__(self, opts):
+		super(pSp, self).__init__()
+		self.set_opts(opts)
+		# compute number of style inputs based on the output resolution
+		self.opts.n_styles = int(math.log(self.opts.output_size, 2)) * 2 - 2
+		# Define architecture
+		self.encoder = self.set_encoder()
+		self.decoder = Generator(self.opts.output_size, 512, 8)
+		self.face_pool = torch.nn.AdaptiveAvgPool2d((256, 256))
+		# Load weights if needed
+		self.load_weights()
+	def set_encoder(self):
+		if self.opts.encoder_type == 'GradualStyleEncoder':
+			encoder = psp_encoders.GradualStyleEncoder(50, 'ir_se', self.opts)
+		elif self.opts.encoder_type == 'BackboneEncoderUsingLastLayerIntoW':
+			encoder = psp_encoders.BackboneEncoderUsingLastLayerIntoW(50, 'ir_se', self.opts)
+		elif self.opts.encoder_type == 'BackboneEncoderUsingLastLayerIntoWPlus':
+			encoder = psp_encoders.BackboneEncoderUsingLastLayerIntoWPlus(50, 'ir_se', self.opts)
+		else:
+			raise Exception('{} is not a valid encoders'.format(self.opts.encoder_type))
+		return encoder
+	def load_weights(self):
+		if self.opts.checkpoint_path is not None:
+			print('Loading pSp from checkpoint: {}'.format(self.opts.checkpoint_path))
+			ckpt = torch.load(self.opts.checkpoint_path, map_location='cpu')
+			self.encoder.load_state_dict(get_keys(ckpt, 'encoder'), strict=True)
+			self.decoder.load_state_dict(get_keys(ckpt, 'decoder'), strict=True)
+			self.__load_latent_avg(ckpt)
+		else:
+			print('Loading encoders weights from irse50!')
+			encoder_ckpt = torch.load(model_paths['ir_se50'])
+			# if input to encoder is not an RGB image, do not load the input layer weights
+			if self.opts.label_nc != 0:
+				encoder_ckpt = {k: v for k, v in encoder_ckpt.items() if "input_layer" not in k}
+			self.encoder.load_state_dict(encoder_ckpt, strict=False)
+			print('Loading decoder weights from pretrained!')
+			ckpt = torch.load(self.opts.stylegan_weights)
+			self.decoder.load_state_dict(ckpt['g_ema'], strict=False)
+			if self.opts.learn_in_w:
+				self.__load_latent_avg(ckpt, repeat=1)
+			else:
+				self.__load_latent_avg(ckpt, repeat=self.opts.n_styles)
+	def forward(self, x, resize=True, latent_mask=None, input_code=False, randomize_noise=True,
+	            inject_latent=None, return_latents=False, alpha=None):
+		if input_code:
+			codes = x
+		else:
+			codes = self.encoder(x)
+			# normalize with respect to the center of an average face
+			if self.opts.start_from_latent_avg:
+				if self.opts.learn_in_w:
+					codes = codes + self.latent_avg.repeat(codes.shape[0], 1)
+				else:
+					codes = codes + self.latent_avg.repeat(codes.shape[0], 1, 1)
+		if latent_mask is not None:
+			for i in latent_mask:
+				if inject_latent is not None:
+					if alpha is not None:
+						codes[:, i] = alpha * inject_latent[:, i] + (1 - alpha) * codes[:, i]
+					else:
+						codes[:, i] = inject_latent[:, i]
+				else:
+					codes[:, i] = 0
+		input_is_latent = not input_code
+		if return_latents:
+		    result_latent = self.decoder([codes],input_is_latent=input_is_latent,randomize_noise=randomize_noise,return_latents=return_latents)
+		    return result_latent
+		else:
+		    images, result_latent = self.decoder([codes],
+		                                         input_is_latent=input_is_latent,
+		                                         randomize_noise=randomize_noise,
+		                                         return_latents=return_latents)
+		if resize:
+			images = self.face_pool(images)
+		return images
+	def set_opts(self, opts):
+		self.opts = opts
+	def __load_latent_avg(self, ckpt, repeat=None):
+		if 'latent_avg' in ckpt:
+			self.latent_avg = ckpt['latent_avg'].to(self.opts.device)
+			if repeat is not None:
+				self.latent_avg = self.latent_avg.repeat(repeat, 1)
+		else:
+			self.latent_avg = None

models/stylegan2/__init__.py ADDED Viewed

File without changes

models/stylegan2/model.py ADDED Viewed

	@@ -0,0 +1,674 @@

+import math
+import random
+import torch
+from torch import nn
+from torch.nn import functional as F
+from models.stylegan2.op import FusedLeakyReLU, fused_leaky_relu, upfirdn2d
+class PixelNorm(nn.Module):
+    def __init__(self):
+        super().__init__()
+    def forward(self, input):
+        return input * torch.rsqrt(torch.mean(input ** 2, dim=1, keepdim=True) + 1e-8)
+def make_kernel(k):
+    k = torch.tensor(k, dtype=torch.float32)
+    if k.ndim == 1:
+        k = k[None, :] * k[:, None]
+    k /= k.sum()
+    return k
+class Upsample(nn.Module):
+    def __init__(self, kernel, factor=2):
+        super().__init__()
+        self.factor = factor
+        kernel = make_kernel(kernel) * (factor ** 2)
+        self.register_buffer('kernel', kernel)
+        p = kernel.shape[0] - factor
+        pad0 = (p + 1) // 2 + factor - 1
+        pad1 = p // 2
+        self.pad = (pad0, pad1)
+    def forward(self, input):
+        out = upfirdn2d(input, self.kernel, up=self.factor, down=1, pad=self.pad)
+        return out
+class Downsample(nn.Module):
+    def __init__(self, kernel, factor=2):
+        super().__init__()
+        self.factor = factor
+        kernel = make_kernel(kernel)
+        self.register_buffer('kernel', kernel)
+        p = kernel.shape[0] - factor
+        pad0 = (p + 1) // 2
+        pad1 = p // 2
+        self.pad = (pad0, pad1)
+    def forward(self, input):
+        out = upfirdn2d(input, self.kernel, up=1, down=self.factor, pad=self.pad)
+        return out
+class Blur(nn.Module):
+    def __init__(self, kernel, pad, upsample_factor=1):
+        super().__init__()
+        kernel = make_kernel(kernel)
+        if upsample_factor > 1:
+            kernel = kernel * (upsample_factor ** 2)
+        self.register_buffer('kernel', kernel)
+        self.pad = pad
+    def forward(self, input):
+        out = upfirdn2d(input, self.kernel, pad=self.pad)
+        return out
+class EqualConv2d(nn.Module):
+    def __init__(
+            self, in_channel, out_channel, kernel_size, stride=1, padding=0, bias=True
+    ):
+        super().__init__()
+        self.weight = nn.Parameter(
+            torch.randn(out_channel, in_channel, kernel_size, kernel_size)
+        )
+        self.scale = 1 / math.sqrt(in_channel * kernel_size ** 2)
+        self.stride = stride
+        self.padding = padding
+        if bias:
+            self.bias = nn.Parameter(torch.zeros(out_channel))
+        else:
+            self.bias = None
+    def forward(self, input):
+        out = F.conv2d(
+            input,
+            self.weight * self.scale,
+            bias=self.bias,
+            stride=self.stride,
+            padding=self.padding,
+        )
+        return out
+    def __repr__(self):
+        return (
+            f'{self.__class__.__name__}({self.weight.shape[1]}, {self.weight.shape[0]},'
+            f' {self.weight.shape[2]}, stride={self.stride}, padding={self.padding})'
+        )
+class EqualLinear(nn.Module):
+    def __init__(
+            self, in_dim, out_dim, bias=True, bias_init=0, lr_mul=1, activation=None
+    ):
+        super().__init__()
+        self.weight = nn.Parameter(torch.randn(out_dim, in_dim).div_(lr_mul))
+        if bias:
+            self.bias = nn.Parameter(torch.zeros(out_dim).fill_(bias_init))
+        else:
+            self.bias = None
+        self.activation = activation
+        self.scale = (1 / math.sqrt(in_dim)) * lr_mul
+        self.lr_mul = lr_mul
+    def forward(self, input):
+        if self.activation:
+            out = F.linear(input, self.weight * self.scale)
+            out = fused_leaky_relu(out, self.bias * self.lr_mul)
+        else:
+            out = F.linear(
+                input, self.weight * self.scale, bias=self.bias * self.lr_mul
+            )
+        return out
+    def __repr__(self):
+        return (
+            f'{self.__class__.__name__}({self.weight.shape[1]}, {self.weight.shape[0]})'
+        )
+class ScaledLeakyReLU(nn.Module):
+    def __init__(self, negative_slope=0.2):
+        super().__init__()
+        self.negative_slope = negative_slope
+    def forward(self, input):
+        out = F.leaky_relu(input, negative_slope=self.negative_slope)
+        return out * math.sqrt(2)
+class ModulatedConv2d(nn.Module):
+    def __init__(
+            self,
+            in_channel,
+            out_channel,
+            kernel_size,
+            style_dim,
+            demodulate=True,
+            upsample=False,
+            downsample=False,
+            blur_kernel=[1, 3, 3, 1],
+    ):
+        super().__init__()
+        self.eps = 1e-8
+        self.kernel_size = kernel_size
+        self.in_channel = in_channel
+        self.out_channel = out_channel
+        self.upsample = upsample
+        self.downsample = downsample
+        if upsample:
+            factor = 2
+            p = (len(blur_kernel) - factor) - (kernel_size - 1)
+            pad0 = (p + 1) // 2 + factor - 1
+            pad1 = p // 2 + 1
+            self.blur = Blur(blur_kernel, pad=(pad0, pad1), upsample_factor=factor)
+        if downsample:
+            factor = 2
+            p = (len(blur_kernel) - factor) + (kernel_size - 1)
+            pad0 = (p + 1) // 2
+            pad1 = p // 2
+            self.blur = Blur(blur_kernel, pad=(pad0, pad1))
+        fan_in = in_channel * kernel_size ** 2
+        self.scale = 1 / math.sqrt(fan_in)
+        self.padding = kernel_size // 2
+        self.weight = nn.Parameter(
+            torch.randn(1, out_channel, in_channel, kernel_size, kernel_size)
+        )
+        self.modulation = EqualLinear(style_dim, in_channel, bias_init=1)
+        self.demodulate = demodulate
+    def __repr__(self):
+        return (
+            f'{self.__class__.__name__}({self.in_channel}, {self.out_channel}, {self.kernel_size}, '
+            f'upsample={self.upsample}, downsample={self.downsample})'
+        )
+    def forward(self, input, style):
+        batch, in_channel, height, width = input.shape
+        style = self.modulation(style).view(batch, 1, in_channel, 1, 1)
+        weight = self.scale * self.weight * style
+        if self.demodulate:
+            demod = torch.rsqrt(weight.pow(2).sum([2, 3, 4]) + 1e-8)
+            weight = weight * demod.view(batch, self.out_channel, 1, 1, 1)
+        weight = weight.view(
+            batch * self.out_channel, in_channel, self.kernel_size, self.kernel_size
+        )
+        if self.upsample:
+            input = input.view(1, batch * in_channel, height, width)
+            weight = weight.view(
+                batch, self.out_channel, in_channel, self.kernel_size, self.kernel_size
+            )
+            weight = weight.transpose(1, 2).reshape(
+                batch * in_channel, self.out_channel, self.kernel_size, self.kernel_size
+            )
+            out = F.conv_transpose2d(input, weight, padding=0, stride=2, groups=batch)
+            _, _, height, width = out.shape
+            out = out.view(batch, self.out_channel, height, width)
+            out = self.blur(out)
+        elif self.downsample:
+            input = self.blur(input)
+            _, _, height, width = input.shape
+            input = input.view(1, batch * in_channel, height, width)
+            out = F.conv2d(input, weight, padding=0, stride=2, groups=batch)
+            _, _, height, width = out.shape
+            out = out.view(batch, self.out_channel, height, width)
+        else:
+            input = input.view(1, batch * in_channel, height, width)
+            out = F.conv2d(input, weight, padding=self.padding, groups=batch)
+            _, _, height, width = out.shape
+            out = out.view(batch, self.out_channel, height, width)
+        return out
+class NoiseInjection(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.weight = nn.Parameter(torch.zeros(1))
+    def forward(self, image, noise=None):
+        if noise is None:
+            batch, _, height, width = image.shape
+            noise = image.new_empty(batch, 1, height, width).normal_()
+        return image + self.weight * noise
+class ConstantInput(nn.Module):
+    def __init__(self, channel, size=4):
+        super().__init__()
+        self.input = nn.Parameter(torch.randn(1, channel, size, size))
+    def forward(self, input):
+        batch = input.shape[0]
+        out = self.input.repeat(batch, 1, 1, 1)
+        return out
+class StyledConv(nn.Module):
+    def __init__(
+            self,
+            in_channel,
+            out_channel,
+            kernel_size,
+            style_dim,
+            upsample=False,
+            blur_kernel=[1, 3, 3, 1],
+            demodulate=True,
+    ):
+        super().__init__()
+        self.conv = ModulatedConv2d(
+            in_channel,
+            out_channel,
+            kernel_size,
+            style_dim,
+            upsample=upsample,
+            blur_kernel=blur_kernel,
+            demodulate=demodulate,
+        )
+        self.noise = NoiseInjection()
+        # self.bias = nn.Parameter(torch.zeros(1, out_channel, 1, 1))
+        # self.activate = ScaledLeakyReLU(0.2)
+        self.activate = FusedLeakyReLU(out_channel)
+    def forward(self, input, style, noise=None):
+        out = self.conv(input, style)
+        out = self.noise(out, noise=noise)
+        # out = out + self.bias
+        out = self.activate(out)
+        return out
+class ToRGB(nn.Module):
+    def __init__(self, in_channel, style_dim, upsample=True, blur_kernel=[1, 3, 3, 1]):
+        super().__init__()
+        if upsample:
+            self.upsample = Upsample(blur_kernel)
+        self.conv = ModulatedConv2d(in_channel, 3, 1, style_dim, demodulate=False)
+        self.bias = nn.Parameter(torch.zeros(1, 3, 1, 1))
+    def forward(self, input, style, skip=None):
+        out = self.conv(input, style)
+        out = out + self.bias
+        if skip is not None:
+            skip = self.upsample(skip)
+            out = out + skip
+        return out
+class Generator(nn.Module):
+    def __init__(
+            self,
+            size,
+            style_dim,
+            n_mlp,
+            channel_multiplier=2,
+            blur_kernel=[1, 3, 3, 1],
+            lr_mlp=0.01,
+    ):
+        super().__init__()
+        self.size = size
+        self.style_dim = style_dim
+        layers = [PixelNorm()]
+        for i in range(n_mlp):
+            layers.append(
+                EqualLinear(
+                    style_dim, style_dim, lr_mul=lr_mlp, activation='fused_lrelu'
+                )
+            )
+        self.style = nn.Sequential(*layers)
+        self.channels = {
+            4: 512,
+            8: 512,
+            16: 512,
+            32: 512,
+            64: 256 * channel_multiplier,
+            128: 128 * channel_multiplier,
+            256: 64 * channel_multiplier,
+            512: 32 * channel_multiplier,
+            1024: 16 * channel_multiplier,
+        }
+        self.input = ConstantInput(self.channels[4])
+        self.conv1 = StyledConv(
+            self.channels[4], self.channels[4], 3, style_dim, blur_kernel=blur_kernel
+        )
+        self.to_rgb1 = ToRGB(self.channels[4], style_dim, upsample=False)
+        self.log_size = int(math.log(size, 2))
+        self.num_layers = (self.log_size - 2) * 2 + 1
+        self.convs = nn.ModuleList()
+        self.upsamples = nn.ModuleList()
+        self.to_rgbs = nn.ModuleList()
+        self.noises = nn.Module()
+        in_channel = self.channels[4]
+        for layer_idx in range(self.num_layers):
+            res = (layer_idx + 5) // 2
+            shape = [1, 1, 2 ** res, 2 ** res]
+            self.noises.register_buffer(f'noise_{layer_idx}', torch.randn(*shape))
+        for i in range(3, self.log_size + 1):
+            out_channel = self.channels[2 ** i]
+            self.convs.append(
+                StyledConv(
+                    in_channel,
+                    out_channel,
+                    3,
+                    style_dim,
+                    upsample=True,
+                    blur_kernel=blur_kernel,
+                )
+            )
+            self.convs.append(
+                StyledConv(
+                    out_channel, out_channel, 3, style_dim, blur_kernel=blur_kernel
+                )
+            )
+            self.to_rgbs.append(ToRGB(out_channel, style_dim))
+            in_channel = out_channel
+        self.n_latent = self.log_size * 2 - 2
+    def make_noise(self):
+        device = self.input.input.device
+        noises = [torch.randn(1, 1, 2 ** 2, 2 ** 2, device=device)]
+        for i in range(3, self.log_size + 1):
+            for _ in range(2):
+                noises.append(torch.randn(1, 1, 2 ** i, 2 ** i, device=device))
+        return noises
+    def mean_latent(self, n_latent):
+        latent_in = torch.randn(
+            n_latent, self.style_dim, device=self.input.input.device
+        )
+        latent = self.style(latent_in).mean(0, keepdim=True)
+        return latent
+    def get_latent(self, input):
+        return self.style(input)
+    def forward(
+            self,
+            styles,
+            return_latents=False,
+            return_features=False,
+            inject_index=None,
+            truncation=1,
+            truncation_latent=None,
+            input_is_latent=False,
+            noise=None,
+            randomize_noise=True,
+    ):
+        if not input_is_latent:
+            styles = [self.style(s) for s in styles]
+        if noise is None:
+            if randomize_noise:
+                noise = [None] * self.num_layers
+            else:
+                noise = [
+                    getattr(self.noises, f'noise_{i}') for i in range(self.num_layers)
+                ]
+        if truncation < 1:
+            style_t = []
+            for style in styles:
+                style_t.append(
+                    truncation_latent + truncation * (style - truncation_latent)
+                )
+            styles = style_t
+        if len(styles) < 2:
+            inject_index = self.n_latent
+            if styles[0].ndim < 3:
+                latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
+            else:
+                latent = styles[0]
+        else:
+            if inject_index is None:
+                inject_index = random.randint(1, self.n_latent - 1)
+            latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
+            latent2 = styles[1].unsqueeze(1).repeat(1, self.n_latent - inject_index, 1)
+            latent = torch.cat([latent, latent2], 1)
+        if return_latents:
+            return latent
+        out = self.input(latent)
+        out = self.conv1(out, latent[:, 0], noise=noise[0])
+        skip = self.to_rgb1(out, latent[:, 1])
+        i = 1
+        for conv1, conv2, noise1, noise2, to_rgb in zip(
+                self.convs[::2], self.convs[1::2], noise[1::2], noise[2::2], self.to_rgbs
+        ):
+            out = conv1(out, latent[:, i], noise=noise1)
+            out = conv2(out, latent[:, i + 1], noise=noise2)
+            skip = to_rgb(out, latent[:, i + 2], skip)
+            i += 2
+        image = skip
+        if return_features:
+            return image, out
+        else:
+            return image, None
+class ConvLayer(nn.Sequential):
+    def __init__(
+            self,
+            in_channel,
+            out_channel,
+            kernel_size,
+            downsample=False,
+            blur_kernel=[1, 3, 3, 1],
+            bias=True,
+            activate=True,
+    ):
+        layers = []
+        if downsample:
+            factor = 2
+            p = (len(blur_kernel) - factor) + (kernel_size - 1)
+            pad0 = (p + 1) // 2
+            pad1 = p // 2
+            layers.append(Blur(blur_kernel, pad=(pad0, pad1)))
+            stride = 2
+            self.padding = 0
+        else:
+            stride = 1
+            self.padding = kernel_size // 2
+        layers.append(
+            EqualConv2d(
+                in_channel,
+                out_channel,
+                kernel_size,
+                padding=self.padding,
+                stride=stride,
+                bias=bias and not activate,
+            )
+        )
+        if activate:
+            if bias:
+                layers.append(FusedLeakyReLU(out_channel))
+            else:
+                layers.append(ScaledLeakyReLU(0.2))
+        super().__init__(*layers)
+class ResBlock(nn.Module):
+    def __init__(self, in_channel, out_channel, blur_kernel=[1, 3, 3, 1]):
+        super().__init__()
+        self.conv1 = ConvLayer(in_channel, in_channel, 3)
+        self.conv2 = ConvLayer(in_channel, out_channel, 3, downsample=True)
+        self.skip = ConvLayer(
+            in_channel, out_channel, 1, downsample=True, activate=False, bias=False
+        )
+    def forward(self, input):
+        out = self.conv1(input)
+        out = self.conv2(out)
+        skip = self.skip(input)
+        out = (out + skip) / math.sqrt(2)
+        return out
+class Discriminator(nn.Module):
+    def __init__(self, size, channel_multiplier=2, blur_kernel=[1, 3, 3, 1]):
+        super().__init__()
+        channels = {
+            4: 512,
+            8: 512,
+            16: 512,
+            32: 512,
+            64: 256 * channel_multiplier,
+            128: 128 * channel_multiplier,
+            256: 64 * channel_multiplier,
+            512: 32 * channel_multiplier,
+            1024: 16 * channel_multiplier,
+        }
+        convs = [ConvLayer(3, channels[size], 1)]
+        log_size = int(math.log(size, 2))
+        in_channel = channels[size]
+        for i in range(log_size, 2, -1):
+            out_channel = channels[2 ** (i - 1)]
+            convs.append(ResBlock(in_channel, out_channel, blur_kernel))
+            in_channel = out_channel
+        self.convs = nn.Sequential(*convs)
+        self.stddev_group = 4
+        self.stddev_feat = 1
+        self.final_conv = ConvLayer(in_channel + 1, channels[4], 3)
+        self.final_linear = nn.Sequential(
+            EqualLinear(channels[4] * 4 * 4, channels[4], activation='fused_lrelu'),
+            EqualLinear(channels[4], 1),
+        )
+    def forward(self, input):
+        out = self.convs(input)
+        batch, channel, height, width = out.shape
+        group = min(batch, self.stddev_group)
+        stddev = out.view(
+            group, -1, self.stddev_feat, channel // self.stddev_feat, height, width
+        )
+        stddev = torch.sqrt(stddev.var(0, unbiased=False) + 1e-8)
+        stddev = stddev.mean([2, 3, 4], keepdims=True).squeeze(2)
+        stddev = stddev.repeat(group, 1, height, width)
+        out = torch.cat([out, stddev], 1)
+        out = self.final_conv(out)
+        out = out.view(batch, -1)
+        out = self.final_linear(out)
+        return out

models/stylegan2/op/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .fused_act import FusedLeakyReLU, fused_leaky_relu
2	+ from .upfirdn2d import upfirdn2d

models/stylegan2/op/fused_act.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import os
+import torch
+from torch import nn
+from torch.autograd import Function
+from torch.nn import functional as F
+module_path = os.path.dirname(__file__)
+class FusedLeakyReLU(nn.Module):
+    def __init__(self, channel, negative_slope=0.2, scale=2 ** 0.5):
+        super().__init__()
+        self.bias = nn.Parameter(torch.zeros(channel))
+        self.negative_slope = negative_slope
+        self.scale = scale
+    def forward(self, input):
+        return fused_leaky_relu(input, self.bias, self.negative_slope, self.scale)
+def fused_leaky_relu(input, bias=None, negative_slope=0.2, scale=2 ** 0.5):
+    if input.device.type == "cpu":
+        if bias is not None:
+            rest_dim = [1] * (input.ndim - bias.ndim - 1)
+            return (
+                F.leaky_relu(
+                    input + bias.view(1, bias.shape[0], *rest_dim), negative_slope=0.2
+                )
+                * scale
+            )
+        else:
+            return F.leaky_relu(input, negative_slope=0.2) * scale

models/stylegan2/op/upfirdn2d.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import os
+import torch
+from torch.autograd import Function
+from torch.nn import functional as F
+module_path = os.path.dirname(__file__)
+def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)):
+    out = upfirdn2d_native(
+        input, kernel, up, up, down, down, pad[0], pad[1], pad[0], pad[1]
+    )
+    return out
+def upfirdn2d_native(
+    input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1
+):
+    _, channel, in_h, in_w = input.shape
+    input = input.reshape(-1, in_h, in_w, 1)
+    _, in_h, in_w, minor = input.shape
+    kernel_h, kernel_w = kernel.shape
+    out = input.view(-1, in_h, 1, in_w, 1, minor)
+    out = F.pad(out, [0, 0, 0, up_x - 1, 0, 0, 0, up_y - 1])
+    out = out.view(-1, in_h * up_y, in_w * up_x, minor)
+    out = F.pad(
+        out, [0, 0, max(pad_x0, 0), max(pad_x1, 0), max(pad_y0, 0), max(pad_y1, 0)]
+    )
+    out = out[
+        :,
+        max(-pad_y0, 0) : out.shape[1] - max(-pad_y1, 0),
+        max(-pad_x0, 0) : out.shape[2] - max(-pad_x1, 0),
+        :,
+    ]
+    out = out.permute(0, 3, 1, 2)
+    out = out.reshape(
+        [-1, 1, in_h * up_y + pad_y0 + pad_y1, in_w * up_x + pad_x0 + pad_x1]
+    )
+    w = torch.flip(kernel, [0, 1]).view(1, 1, kernel_h, kernel_w)
+    out = F.conv2d(out, w)
+    out = out.reshape(
+        -1,
+        minor,
+        in_h * up_y + pad_y0 + pad_y1 - kernel_h + 1,
+        in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1,
+    )
+    out = out.permute(0, 2, 3, 1)
+    out = out[:, ::down_y, ::down_x, :]
+    out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h + down_y) // down_y
+    out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w + down_x) // down_x
+    return out.view(-1, channel, out_h, out_w)

pretrained/ohayou_face.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89abef3962a9ca6b214f1447e7050725b73d41822d7381e1f4d0f96ac8035381
+size 363965331

pretrained/ohayou_face.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c63de7970a7af6cc5b5c0cf677eb16095f2aaabd68dab41fcc3851bb5c7464f9
+size 1077486507

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+torch
+numpy
+torchvision
+Pillow
+tqdm
+imageio
+scipy
+easydict
+opensimplex==0.3
+ninja

torch_utils/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+# empty