Spaces:

gradio
/

GANsNRoses

Runtime error

App Files Files Community

aliabd commited on Jun 25, 2021

Commit

bca104a

1 Parent(s): c775144

copied all files from repo

Browse files

Files changed (12) hide show

LICENSE +21 -0
dataset.py +167 -0
distributed.py +126 -0
gradiodemo.py +84 -0
inference.ipynb +0 -0
inference_colab.ipynb +0 -0
model.py +757 -0
requirements.txt +10 -0
teaser.gif +0 -0
teaser.png +0 -0
train.py +458 -0
util.py +161 -0

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2021 Min Jin Chong
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

dataset.py ADDED Viewed

	@@ -0,0 +1,167 @@

+import torch.utils.data as data
+from PIL import Image
+import os
+import os.path
+from io import BytesIO
+import lmdb
+from torch.utils.data import Dataset
+class MultiResolutionDataset(Dataset):
+    def __init__(self, path, transform, resolution=256):
+        self.env = lmdb.open(
+            path,
+            max_readers=32,
+            readonly=True,
+            lock=False,
+            readahead=False,
+            meminit=False,
+        )
+        if not self.env:
+            raise IOError('Cannot open lmdb dataset', path)
+        with self.env.begin(write=False) as txn:
+            self.length = int(txn.get('length'.encode('utf-8')).decode('utf-8'))
+        self.resolution = resolution
+        self.transform = transform
+    def __len__(self):
+        return self.length
+    def __getitem__(self, index):
+        with self.env.begin(write=False) as txn:
+            key = f'{self.resolution}-{str(index).zfill(5)}'.encode('utf-8')
+            img_bytes = txn.get(key)
+        buffer = BytesIO(img_bytes)
+        img = Image.open(buffer)
+        img = self.transform(img)
+        return img
+def has_file_allowed_extension(filename, extensions):
+    """Checks if a file is an allowed extension.
+    Args:
+        filename (string): path to a file
+    Returns:
+        bool: True if the filename ends with a known image extension
+    """
+    filename_lower = filename.lower()
+    return any(filename_lower.endswith(ext) for ext in extensions)
+def find_classes(dir):
+    classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))]
+    classes.sort()
+    class_to_idx = {classes[i]: i for i in range(len(classes))}
+    return classes, class_to_idx
+def make_dataset(dir, extensions):
+    images = []
+    for root, _, fnames in sorted(os.walk(dir)):
+        for fname in sorted(fnames):
+            if has_file_allowed_extension(fname, extensions):
+                path = os.path.join(root, fname)
+                item = (path, 0)
+                images.append(item)
+    return images
+class DatasetFolder(data.Dataset):
+    def __init__(self, root, loader, extensions, transform=None, target_transform=None):
+        # classes, class_to_idx = find_classes(root)
+        samples = make_dataset(root, extensions)
+        if len(samples) == 0:
+            raise(RuntimeError("Found 0 files in subfolders of: " + root + "\n"
+                               "Supported extensions are: " + ",".join(extensions)))
+        self.root = root
+        self.loader = loader
+        self.extensions = extensions
+        self.samples = samples
+        self.transform = transform
+        self.target_transform = target_transform
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: (sample, target) where target is class_index of the target class.
+        """
+        path, target = self.samples[index]
+        sample = self.loader(path)
+        if self.transform is not None:
+            sample = self.transform(sample)
+        if self.target_transform is not None:
+            target = self.target_transform(target)
+        return sample
+    def __len__(self):
+        return len(self.samples)
+    def __repr__(self):
+        fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
+        fmt_str += '    Number of datapoints: {}\n'.format(self.__len__())
+        fmt_str += '    Root Location: {}\n'.format(self.root)
+        tmp = '    Transforms (if any): '
+        fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
+        tmp = '    Target Transforms (if any): '
+        fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
+        return fmt_str
+IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif']
+def pil_loader(path):
+    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
+    with open(path, 'rb') as f:
+        img = Image.open(f)
+        return img.convert('RGB')
+def default_loader(path):
+    return pil_loader(path)
+class ImageFolder(DatasetFolder):
+    def __init__(self, root, transform1=None, transform2=None, target_transform=None,
+                 loader=default_loader):
+        super(ImageFolder, self).__init__(root, loader, IMG_EXTENSIONS,
+                                          transform=transform1,
+                                          target_transform=target_transform)
+        self.imgs = self.samples
+        self.transform2 = transform2
+    def set_stage(self, stage):
+        if stage == 'last':
+            self.transform = self.transform2
+class ListFolder(Dataset):
+    def __init__(self, txt, transform):
+        with open(txt) as f:
+            imgpaths= f.readlines()
+        self.imgpaths = [x.strip() for x in imgpaths]
+        self.transform = transform
+    def __getitem__(self, idx):
+        path = self.imgpaths[idx]
+        image = Image.open(path)
+        return self.transform(image)
+    def __len__(self):
+        return len(self.imgpaths)

distributed.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import math
+import pickle
+import torch
+from torch import distributed as dist
+from torch.utils.data.sampler import Sampler
+def get_rank():
+    if not dist.is_available():
+        return 0
+    if not dist.is_initialized():
+        return 0
+    return dist.get_rank()
+def synchronize():
+    if not dist.is_available():
+        return
+    if not dist.is_initialized():
+        return
+    world_size = dist.get_world_size()
+    if world_size == 1:
+        return
+    dist.barrier()
+def get_world_size():
+    if not dist.is_available():
+        return 1
+    if not dist.is_initialized():
+        return 1
+    return dist.get_world_size()
+def reduce_sum(tensor):
+    if not dist.is_available():
+        return tensor
+    if not dist.is_initialized():
+        return tensor
+    tensor = tensor.clone()
+    dist.all_reduce(tensor, op=dist.ReduceOp.SUM)
+    return tensor
+def gather_grad(params):
+    world_size = get_world_size()
+    if world_size == 1:
+        return
+    for param in params:
+        if param.grad is not None:
+            dist.all_reduce(param.grad.data, op=dist.ReduceOp.SUM)
+            param.grad.data.div_(world_size)
+def all_gather(data):
+    world_size = get_world_size()
+    if world_size == 1:
+        return [data]
+    buffer = pickle.dumps(data)
+    storage = torch.ByteStorage.from_buffer(buffer)
+    tensor = torch.ByteTensor(storage).to('cuda')
+    local_size = torch.IntTensor([tensor.numel()]).to('cuda')
+    size_list = [torch.IntTensor([0]).to('cuda') for _ in range(world_size)]
+    dist.all_gather(size_list, local_size)
+    size_list = [int(size.item()) for size in size_list]
+    max_size = max(size_list)
+    tensor_list = []
+    for _ in size_list:
+        tensor_list.append(torch.ByteTensor(size=(max_size,)).to('cuda'))
+    if local_size != max_size:
+        padding = torch.ByteTensor(size=(max_size - local_size,)).to('cuda')
+        tensor = torch.cat((tensor, padding), 0)
+    dist.all_gather(tensor_list, tensor)
+    data_list = []
+    for size, tensor in zip(size_list, tensor_list):
+        buffer = tensor.cpu().numpy().tobytes()[:size]
+        data_list.append(pickle.loads(buffer))
+    return data_list
+def reduce_loss_dict(loss_dict):
+    world_size = get_world_size()
+    if world_size < 2:
+        return loss_dict
+    with torch.no_grad():
+        keys = []
+        losses = []
+        for k in sorted(loss_dict.keys()):
+            keys.append(k)
+            losses.append(loss_dict[k])
+        losses = torch.stack(losses, 0)
+        dist.reduce(losses, dst=0)
+        if dist.get_rank() == 0:
+            losses /= world_size
+        reduced_losses = {k: v for k, v in zip(keys, losses)}
+    return reduced_losses

gradiodemo.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import os
+import numpy as np
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torch.utils import data
+from torchvision import transforms, utils
+from tqdm import tqdm
+torch.backends.cudnn.benchmark = True
+import copy
+from util import *
+from PIL import Image
+from model import *
+import moviepy.video.io.ImageSequenceClip
+import scipy
+import kornia.augmentation as K
+from base64 import b64encode
+import gradio as gr
+from torchvision import transforms
+torch.hub.download_url_to_file('https://i.imgur.com/HiOTPNg.png', 'mona.png')
+torch.hub.download_url_to_file('https://i.imgur.com/Cw8HcTN.png', 'painting.png')
+device = 'cpu'
+latent_dim = 8
+n_mlp = 5
+num_down = 3
+G_A2B = Generator(256, 4, latent_dim, n_mlp, channel_multiplier=1, lr_mlp=.01,n_res=1).to(device).eval()
+ensure_checkpoint_exists('GNR_checkpoint.pt')
+ckpt = torch.load('GNR_checkpoint.pt', map_location=device)
+G_A2B.load_state_dict(ckpt['G_A2B_ema'])
+# mean latent
+truncation = 1
+with torch.no_grad():
+    mean_style = G_A2B.mapping(torch.randn([1000, latent_dim]).to(device)).mean(0, keepdim=True)
+test_transform = transforms.Compose([
+    transforms.Resize((256, 256)),
+    transforms.ToTensor(),
+    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), inplace=True)
+])
+plt.rcParams['figure.dpi'] = 200
+torch.manual_seed(84986)
+num_styles = 1
+style = torch.randn([num_styles, latent_dim]).to(device)
+def inference(input_im):
+    real_A = test_transform(input_im).unsqueeze(0).to(device)
+    with torch.no_grad():
+        A2B_content, _ = G_A2B.encode(real_A)
+        fake_A2B = G_A2B.decode(A2B_content.repeat(num_styles,1,1,1), style)
+        std=(0.5, 0.5, 0.5)
+        mean=(0.5, 0.5, 0.5)
+        z = fake_A2B * torch.tensor(std).view(3, 1, 1)
+        z = z + torch.tensor(mean).view(3, 1, 1)
+        tensor_to_pil = transforms.ToPILImage(mode='RGB')(z.squeeze())
+        return tensor_to_pil
+title = "GANsNRoses"
+description = "demo for GANsNRoses. To use it, simply upload your image, or click one of the examples to load them. Read more at the links below."
+article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2106.06561'>GANs N' Roses: Stable, Controllable, Diverse Image to Image Translation (works for videos too!)</a> | <a href='https://github.com/mchong6/GANsNRoses'>Github Repo</a></p>"
+gr.Interface(
+    inference,
+    [gr.inputs.Image(type="pil", label="Input")],
+    gr.outputs.Image(type="pil", label="Output"),
+    title=title,
+    description=description,
+    article=article,
+    examples=[
+        ["mona.png"],
+        ["painting.png"]
+    ]).launch()

inference.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

inference_colab.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

model.py ADDED Viewed

	@@ -0,0 +1,757 @@

+import torchvision
+import math
+import random
+import functools
+import operator
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torch.autograd import Function
+from op import FusedLeakyReLU, fused_leaky_relu, upfirdn2d
+n_latent = 11
+channels = {
+    4: 512,
+    8: 512,
+    16: 512,
+    32: 512,
+    64: 256,
+    128: 128,
+    256: 64,
+    512: 32,
+    1024: 16,
+}
+class LambdaLR():
+    def __init__(self, n_epochs, offset, decay_start_epoch):
+        assert ((n_epochs - decay_start_epoch) > 0), "Decay must start before the training session ends!"
+        self.n_epochs = n_epochs
+        self.offset = offset
+        self.decay_start_epoch = decay_start_epoch
+    def step(self, epoch):
+        return 1.0 - max(0, epoch + self.offset - self.decay_start_epoch)/(self.n_epochs - self.decay_start_epoch)
+class PixelNorm(nn.Module):
+    def __init__(self):
+        super().__init__()
+    def forward(self, input):
+        return input * torch.rsqrt(torch.mean(input ** 2, dim=1, keepdim=True) + 1e-8)
+def make_kernel(k):
+    k = torch.tensor(k, dtype=torch.float32)
+    if k.ndim == 1:
+        k = k[None, :] * k[:, None]
+    k /= k.sum()
+    return k
+class Upsample(nn.Module):
+    def __init__(self, kernel, factor=2):
+        super().__init__()
+        self.factor = factor
+        kernel = make_kernel(kernel) * (factor ** 2)
+        self.register_buffer('kernel', kernel)
+        p = kernel.shape[0] - factor
+        pad0 = (p + 1) // 2 + factor - 1
+        pad1 = p // 2
+        self.pad = (pad0, pad1)
+    def forward(self, input):
+        out = upfirdn2d(input, self.kernel, up=self.factor, down=1, pad=self.pad)
+        return out
+class Downsample(nn.Module):
+    def __init__(self, kernel, factor=2):
+        super().__init__()
+        self.factor = factor
+        kernel = make_kernel(kernel)
+        self.register_buffer('kernel', kernel)
+        p = kernel.shape[0] - factor
+        pad0 = (p + 1) // 2
+        pad1 = p // 2
+        self.pad = (pad0, pad1)
+    def forward(self, input):
+        out = upfirdn2d(input, self.kernel, up=1, down=self.factor, pad=self.pad)
+        return out
+class Blur(nn.Module):
+    def __init__(self, kernel, pad, upsample_factor=1):
+        super().__init__()
+        kernel = make_kernel(kernel)
+        if upsample_factor > 1:
+            kernel = kernel * (upsample_factor ** 2)
+        self.register_buffer('kernel', kernel)
+        self.pad = pad
+    def forward(self, input):
+        out = upfirdn2d(input, self.kernel, pad=self.pad)
+        return out
+class EqualConv2d(nn.Module):
+    def __init__(
+        self, in_channel, out_channel, kernel_size, stride=1, padding=0, bias=True
+    ):
+        super().__init__()
+        self.weight = nn.Parameter(
+            torch.randn(out_channel, in_channel, kernel_size, kernel_size)
+        )
+        self.scale = 1 / math.sqrt(in_channel * kernel_size ** 2)
+        self.stride = stride
+        self.padding = padding
+        if bias:
+            self.bias = nn.Parameter(torch.zeros(out_channel))
+        else:
+            self.bias = None
+    def forward(self, input):
+        out = F.conv2d(
+            input,
+            self.weight * self.scale,
+            bias=self.bias,
+            stride=self.stride,
+            padding=self.padding,
+        )
+        return out
+    def __repr__(self):
+        return (
+            f'{self.__class__.__name__}({self.weight.shape[1]}, {self.weight.shape[0]},'
+            f' {self.weight.shape[2]}, stride={self.stride}, padding={self.padding})'
+        )
+class EqualLinear(nn.Module):
+    def __init__(
+        self, in_dim, out_dim, bias=True, bias_init=0, lr_mul=1, activation=None
+    ):
+        super().__init__()
+        self.weight = nn.Parameter(torch.randn(out_dim, in_dim).div_(lr_mul))
+        if bias:
+            self.bias = nn.Parameter(torch.zeros(out_dim).fill_(bias_init))
+        else:
+            self.bias = None
+        self.activation = activation
+        self.scale = (1 / math.sqrt(in_dim)) * lr_mul
+        self.lr_mul = lr_mul
+    def forward(self, input):
+        bias = self.bias*self.lr_mul if self.bias is not None else None
+        if self.activation:
+            out = F.linear(input, self.weight * self.scale)
+            out = fused_leaky_relu(out, bias)
+        else:
+            out = F.linear(
+                input, self.weight * self.scale, bias=bias
+            )
+        return out
+    def __repr__(self):
+        return (
+            f'{self.__class__.__name__}({self.weight.shape[1]}, {self.weight.shape[0]})'
+        )
+class ScaledLeakyReLU(nn.Module):
+    def __init__(self, negative_slope=0.2):
+        super().__init__()
+        self.negative_slope = negative_slope
+    def forward(self, input):
+        out = F.leaky_relu(input, negative_slope=self.negative_slope)
+        return out * math.sqrt(2)
+class ModulatedConv2d(nn.Module):
+    def __init__(
+        self,
+        in_channel,
+        out_channel,
+        kernel_size,
+        style_dim,
+        use_style=True,
+        demodulate=True,
+        upsample=False,
+        downsample=False,
+        blur_kernel=[1, 3, 3, 1],
+    ):
+        super().__init__()
+        self.eps = 1e-8
+        self.kernel_size = kernel_size
+        self.in_channel = in_channel
+        self.out_channel = out_channel
+        self.upsample = upsample
+        self.downsample = downsample
+        self.use_style = use_style
+        if upsample:
+            factor = 2
+            p = (len(blur_kernel) - factor) - (kernel_size - 1)
+            pad0 = (p + 1) // 2 + factor - 1
+            pad1 = p // 2 + 1
+            self.blur = Blur(blur_kernel, pad=(pad0, pad1), upsample_factor=factor)
+        if downsample:
+            factor = 2
+            p = (len(blur_kernel) - factor) + (kernel_size - 1)
+            pad0 = (p + 1) // 2
+            pad1 = p // 2
+            self.blur = Blur(blur_kernel, pad=(pad0, pad1))
+        fan_in = in_channel * kernel_size ** 2
+        self.scale = 1 / math.sqrt(fan_in)
+        self.padding = kernel_size // 2
+        self.weight = nn.Parameter(
+            torch.randn(1, out_channel, in_channel, kernel_size, kernel_size)
+        )
+        if use_style:
+            self.modulation = EqualLinear(style_dim, in_channel, bias_init=1)
+        else:
+            self.modulation = nn.Parameter(torch.Tensor(1, 1, in_channel, 1, 1).fill_(1))
+        self.demodulate = demodulate
+    def __repr__(self):
+        return (
+            f'{self.__class__.__name__}({self.in_channel}, {self.out_channel}, {self.kernel_size}, '
+            f'upsample={self.upsample}, downsample={self.downsample})'
+        )
+    def forward(self, input, style):
+        batch, in_channel, height, width = input.shape
+        if self.use_style:
+            style = self.modulation(style).view(batch, 1, in_channel, 1, 1)
+            weight = self.scale * self.weight * style
+        else:
+            weight = self.scale * self.weight.expand(batch,-1,-1,-1,-1) * self.modulation
+        if self.demodulate:
+            demod = torch.rsqrt(weight.pow(2).sum([2, 3, 4]) + 1e-8)
+            weight = weight * demod.view(batch, self.out_channel, 1, 1, 1)
+        weight = weight.view(
+            batch * self.out_channel, in_channel, self.kernel_size, self.kernel_size
+        )
+        if self.upsample:
+            input = input.view(1, batch * in_channel, height, width)
+            weight = weight.view(
+                batch, self.out_channel, in_channel, self.kernel_size, self.kernel_size
+            )
+            weight = weight.transpose(1, 2).reshape(
+                batch * in_channel, self.out_channel, self.kernel_size, self.kernel_size
+            )
+            out = F.conv_transpose2d(input, weight, padding=0, stride=2, groups=batch)
+            _, _, height, width = out.shape
+            out = out.view(batch, self.out_channel, height, width)
+            out = self.blur(out)
+        elif self.downsample:
+            input = self.blur(input)
+            _, _, height, width = input.shape
+            input = input.view(1, batch * in_channel, height, width)
+            out = F.conv2d(input, weight, padding=0, stride=2, groups=batch)
+            _, _, height, width = out.shape
+            out = out.view(batch, self.out_channel, height, width)
+        else:
+            input = input.view(1, batch * in_channel, height, width)
+            out = F.conv2d(input, weight, padding=self.padding, groups=batch)
+            _, _, height, width = out.shape
+            out = out.view(batch, self.out_channel, height, width)
+        return out
+class NoiseInjection(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.weight = nn.Parameter(torch.zeros(1))
+    def forward(self, image, noise=None):
+        if noise is None:
+            batch, _, height, width = image.shape
+            noise = image.new_empty(batch, 1, height, width).normal_()
+        return image + self.weight * noise
+class ConstantInput(nn.Module):
+    def __init__(self, style_dim):
+        super().__init__()
+        self.input = nn.Parameter(torch.randn(1, style_dim))
+    def forward(self, input):
+        batch = input.shape[0]
+        out = self.input.repeat(batch, n_latent)
+        return out
+class StyledConv(nn.Module):
+    def __init__(
+        self,
+        in_channel,
+        out_channel,
+        kernel_size,
+        style_dim,
+        use_style=True,
+        upsample=False,
+        downsample=False,
+        blur_kernel=[1, 3, 3, 1],
+        demodulate=True,
+    ):
+        super().__init__()
+        self.use_style = use_style
+        self.conv = ModulatedConv2d(
+            in_channel,
+            out_channel,
+            kernel_size,
+            style_dim,
+            use_style=use_style,
+            upsample=upsample,
+            downsample=downsample,
+            blur_kernel=blur_kernel,
+            demodulate=demodulate,
+        )
+        #if use_style:
+        #    self.noise = NoiseInjection()
+        #else:
+        #    self.noise = None
+        # self.bias = nn.Parameter(torch.zeros(1, out_channel, 1, 1))
+        # self.activate = ScaledLeakyReLU(0.2)
+        self.activate = FusedLeakyReLU(out_channel)
+    def forward(self, input, style=None, noise=None):
+        out = self.conv(input, style)
+        #if self.use_style:
+        #    out = self.noise(out, noise=noise)
+        # out = out + self.bias
+        out = self.activate(out)
+        return out
+class StyledResBlock(nn.Module):
+    def __init__(self, in_channel, style_dim, blur_kernel=[1, 3, 3, 1], demodulate=True):
+        super().__init__()
+        self.conv1 = StyledConv(in_channel, in_channel, 3, style_dim, upsample=False, blur_kernel=blur_kernel, demodulate=demodulate)
+        self.conv2 = StyledConv(in_channel, in_channel, 3, style_dim, upsample=False, blur_kernel=blur_kernel, demodulate=demodulate)
+    def forward(self, input, style):
+        out = self.conv1(input, style)
+        out = self.conv2(out, style)
+        out = (out + input) / math.sqrt(2)
+        return out
+class ToRGB(nn.Module):
+    def __init__(self, in_channel, style_dim, upsample=True, blur_kernel=[1, 3, 3, 1]):
+        super().__init__()
+        if upsample:
+            self.upsample = Upsample(blur_kernel)
+        self.conv = ModulatedConv2d(in_channel, 3, 1, style_dim, demodulate=False)
+        self.bias = nn.Parameter(torch.zeros(1, 3, 1, 1))
+    def forward(self, input, style, skip=None):
+        out = self.conv(input, style)
+        out = out + self.bias
+        if skip is not None:
+            skip = self.upsample(skip)
+            out = out + skip
+        return out
+class Generator(nn.Module):
+    def __init__(
+        self,
+        size,
+        num_down,
+        latent_dim,
+        n_mlp,
+        n_res,
+        channel_multiplier=1,
+        blur_kernel=[1, 3, 3, 1],
+        lr_mlp=0.01,
+    ):
+        super().__init__()
+        self.size = size
+        style_dim = 512
+        mapping = [EqualLinear(latent_dim, style_dim, lr_mul=lr_mlp, activation='fused_lrelu')]
+        for i in range(n_mlp-1):
+            mapping.append(EqualLinear(style_dim, style_dim, lr_mul=lr_mlp, activation='fused_lrelu'))
+        self.mapping = nn.Sequential(*mapping)
+        self.encoder = Encoder(size, latent_dim, num_down, n_res, channel_multiplier)
+        self.log_size = int(math.log(size, 2)) #7
+        in_log_size = self.log_size - num_down #7-2 or 7-3
+        in_size = 2 ** in_log_size
+        in_channel = channels[in_size]
+        self.adain_bottleneck = nn.ModuleList()
+        for i in range(n_res):
+            self.adain_bottleneck.append(StyledResBlock(in_channel, style_dim))
+        self.conv1 = StyledConv(in_channel, in_channel, 3, style_dim, blur_kernel=blur_kernel)
+        self.to_rgb1 = ToRGB(in_channel, style_dim, upsample=False)
+        self.num_layers = (self.log_size - in_log_size) * 2 + 1 #7
+        self.convs = nn.ModuleList()
+        self.upsamples = nn.ModuleList()
+        self.to_rgbs = nn.ModuleList()
+        #self.noises = nn.Module()
+        #for layer_idx in range(self.num_layers):
+        #    res = (layer_idx + (in_log_size*2+1)) // 2 #2,3,3,5 ... -> 4,5,5,6 ...
+        #    shape = [1, 1, 2 ** res, 2 ** res]
+        #    self.noises.register_buffer(f'noise_{layer_idx}', torch.randn(*shape))
+        for i in range(in_log_size+1, self.log_size + 1):
+            out_channel = channels[2 ** i]
+            self.convs.append(
+                StyledConv(
+                    in_channel,
+                    out_channel,
+                    3,
+                    style_dim,
+                    upsample=True,
+                    blur_kernel=blur_kernel,
+                )
+            )
+            self.convs.append(
+                StyledConv(
+                    out_channel, out_channel, 3, style_dim, blur_kernel=blur_kernel
+                )
+            )
+            self.to_rgbs.append(ToRGB(out_channel, style_dim))
+            in_channel = out_channel
+    def style_encode(self, input):
+        return self.encoder(input)[1]
+    def encode(self, input):
+        return self.encoder(input)
+    def forward(self, input, z=None):
+        content, style = self.encode(input)
+        if z is None:
+            out = self.decode(content, style)
+        else:
+            out = self.decode(content, z)
+        return out, content, style
+    def decode(self, input, styles, use_mapping=True):
+        if use_mapping:
+            styles = self.mapping(styles)
+        #styles = styles.repeat(1, n_latent).view(styles.size(0), n_latent, -1)
+        out = input
+        i = 0
+        for conv in self.adain_bottleneck:
+            out = conv(out, styles)
+            i += 1
+        out = self.conv1(out, styles, noise=None)
+        skip = self.to_rgb1(out, styles)
+        i += 2
+        for conv1, conv2, to_rgb in zip(
+            self.convs[::2], self.convs[1::2], self.to_rgbs
+        ):
+            out = conv1(out, styles, noise=None)
+            out = conv2(out, styles, noise=None)
+            skip = to_rgb(out, styles, skip)
+            i += 3
+        image = skip
+        return image
+class ConvLayer(nn.Sequential):
+    def __init__(
+        self,
+        in_channel,
+        out_channel,
+        kernel_size,
+        downsample=False,
+        blur_kernel=[1, 3, 3, 1],
+        bias=True,
+        activate=True,
+    ):
+        layers = []
+        if downsample:
+            factor = 2
+            p = (len(blur_kernel) - factor) + (kernel_size - 1)
+            pad0 = (p + 1) // 2
+            pad1 = p // 2
+            layers.append(Blur(blur_kernel, pad=(pad0, pad1)))
+            stride = 2
+            self.padding = 0
+        else:
+            stride = 1
+            self.padding = kernel_size // 2
+        layers.append(
+            EqualConv2d(
+                in_channel,
+                out_channel,
+                kernel_size,
+                padding=self.padding,
+                stride=stride,
+                bias=bias and not activate,
+            )
+        )
+        if activate:
+            if bias:
+                layers.append(FusedLeakyReLU(out_channel))
+            else:
+                layers.append(ScaledLeakyReLU(0.2))
+        super().__init__(*layers)
+class InResBlock(nn.Module):
+    def __init__(self, in_channel, blur_kernel=[1, 3, 3, 1]):
+        super().__init__()
+        self.conv1 = StyledConv(in_channel, in_channel, 3, None, blur_kernel=blur_kernel, demodulate=True, use_style=False)
+        self.conv2 = StyledConv(in_channel, in_channel, 3, None, blur_kernel=blur_kernel, demodulate=True, use_style=False)
+    def forward(self, input):
+        out = self.conv1(input, None)
+        out = self.conv2(out, None)
+        out = (out + input) / math.sqrt(2)
+        return out
+class ResBlock(nn.Module):
+    def __init__(self, in_channel, out_channel, blur_kernel=[1, 3, 3, 1], downsample=True):
+        super().__init__()
+        self.conv1 = ConvLayer(in_channel, in_channel, 3)
+        self.conv2 = ConvLayer(in_channel, out_channel, 3, downsample=downsample)
+        if downsample or in_channel != out_channel:
+            self.skip = ConvLayer(
+                in_channel, out_channel, 1, downsample=downsample, activate=False, bias=False
+            )
+        else:
+            self.skip = None
+    def forward(self, input):
+        out = self.conv1(input)
+        out = self.conv2(out)
+        if self.skip is None:
+            skip = input
+        else:
+            skip = self.skip(input)
+        out = (out + skip) / math.sqrt(2)
+        return out
+class Discriminator(nn.Module):
+    def __init__(self, size, channel_multiplier=2, blur_kernel=[1, 3, 3, 1]):
+        super().__init__()
+        self.size = size
+        l_branch = self.make_net_(32)
+        l_branch += [ConvLayer(channels[32], 1, 1, activate=False)]
+        self.l_branch = nn.Sequential(*l_branch)
+        g_branch = self.make_net_(8)
+        self.g_branch = nn.Sequential(*g_branch)
+        self.g_adv = ConvLayer(channels[8], 1, 1, activate=False)
+        self.g_std = nn.Sequential(ConvLayer(channels[8], channels[4], 3, downsample=True),
+                      nn.Flatten(),
+                      EqualLinear(channels[4] * 4 * 4, 128, activation='fused_lrelu'),
+                      )
+        self.g_final = EqualLinear(128, 1, activation=False)
+    def make_net_(self, out_size):
+        size = self.size
+        convs = [ConvLayer(3, channels[size], 1)]
+        log_size = int(math.log(size, 2))
+        out_log_size = int(math.log(out_size, 2))
+        in_channel = channels[size]
+        for i in range(log_size, out_log_size, -1):
+            out_channel = channels[2 ** (i - 1)]
+            convs.append(ResBlock(in_channel, out_channel))
+            in_channel = out_channel
+        return convs
+    def forward(self, x):
+        l_adv = self.l_branch(x)
+        g_act = self.g_branch(x)
+        g_adv = self.g_adv(g_act)
+        output = self.g_std(g_act)
+        g_stddev = torch.sqrt(output.var(0, keepdim=True, unbiased=False) + 1e-8).repeat(x.size(0),1)
+        g_std = self.g_final(g_stddev)
+        return [l_adv, g_adv, g_std]
+class Encoder(nn.Module):
+    def __init__(self, size, latent_dim, num_down, n_res, channel_multiplier=2, blur_kernel=[1, 3, 3, 1]):
+        super().__init__()
+        stem = [ConvLayer(3, channels[size], 1)]
+        log_size = int(math.log(size, 2))
+        in_channel = channels[size]
+        for i in range(log_size, log_size-num_down, -1):
+            out_channel = channels[2 ** (i - 1)]
+            stem.append(ResBlock(in_channel, out_channel, downsample=True))
+            in_channel = out_channel
+        stem += [ResBlock(in_channel, in_channel, downsample=False) for i in range(n_res)]
+        self.stem = nn.Sequential(*stem)
+        self.content = nn.Sequential(
+                        ConvLayer(in_channel, in_channel, 1),
+                        ConvLayer(in_channel, in_channel, 1)
+                        )
+        style  = []
+        for i in range(log_size-num_down, 2, -1):
+            out_channel = channels[2 ** (i - 1)]
+            style.append(ConvLayer(in_channel, out_channel, 3, downsample=True))
+            in_channel = out_channel
+        style += [
+            nn.Flatten(),
+            EqualLinear(channels[4] * 4 * 4, channels[4], activation='fused_lrelu'),
+            EqualLinear(channels[4], latent_dim),
+              ]
+        self.style = nn.Sequential(*style)
+    def forward(self, input):
+        act = self.stem(input)
+        content = self.content(act)
+        style = self.style(act)
+        return content, style
+class StyleEncoder(nn.Module):
+    def __init__(self, size, style_dim, channel_multiplier=2, blur_kernel=[1, 3, 3, 1]):
+        super().__init__()
+        convs = [ConvLayer(3, channels[size], 1)]
+        log_size = int(math.log(size, 2))
+        in_channel = channels[size]
+        num_down = 6
+        for i in range(log_size, log_size-num_down, -1):
+            w = 2 ** (i - 1)
+            out_channel = channels[w]
+            convs.append(ConvLayer(in_channel, out_channel, 3, downsample=True))
+            in_channel = out_channel
+        convs += [
+                nn.Flatten(),
+            EqualLinear(channels[4] * 4 * 4, channels[4], activation='fused_lrelu'), EqualLinear(channels[4], style_dim),
+                  ]
+        self.convs = nn.Sequential(*convs)
+    def forward(self, input):
+        style = self.convs(input)
+        return style.view(input.size(0), -1)
+class LatDiscriminator(nn.Module):
+    def __init__(self, style_dim):
+        super().__init__()
+        fc = [EqualLinear(style_dim, 256, activation='fused_lrelu')]
+        for i in range(3):
+            fc += [EqualLinear(256, 256, activation='fused_lrelu')]
+        fc += [FCMinibatchStd(256, 256)]
+        fc += [EqualLinear(256, 1)]
+        self.fc = nn.Sequential(*fc)
+    def forward(self, input):
+        return [self.fc(input), ]
+class FCMinibatchStd(nn.Module):
+    def __init__(self, in_channel, out_channel):
+        super().__init__()
+        self.fc = EqualLinear(in_channel+1, out_channel, activation='fused_lrelu')
+    def forward(self, out):
+        stddev = torch.sqrt(out.var(0, unbiased=False) + 1e-8).mean().view(1,1).repeat(out.size(0), 1)
+        out = torch.cat([out, stddev], 1)
+        out = self.fc(out)
+        return out

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+tqdm
+gdown
+kornia
+scipy
+opencv-python
+moviepy
+lpips
+ninja
+gradio
+torchvision

teaser.gif ADDED Viewed

teaser.png ADDED Viewed

train.py ADDED Viewed

	@@ -0,0 +1,458 @@

+import argparse
+import math
+import random
+import os
+from util import *
+import numpy as np
+import torch
+torch.backends.cudnn.benchmark = True
+from torch import nn, autograd
+from torch import optim
+from torch.nn import functional as F
+from torch.utils import data
+import torch.distributed as dist
+from torchvision import transforms, utils
+from tqdm import tqdm
+from torch.optim import lr_scheduler
+import copy
+import kornia.augmentation as K
+import kornia
+import lpips
+from model import *
+from dataset import ImageFolder
+from distributed import (
+    get_rank,
+    synchronize,
+    reduce_loss_dict,
+    reduce_sum,
+    get_world_size,
+)
+mse_criterion = nn.MSELoss()
+def test(args, genA2B, genB2A, testA_loader, testB_loader, name, step):
+    testA_loader = iter(testA_loader)
+    testB_loader = iter(testB_loader)
+    with torch.no_grad():
+        test_sample_num = 16
+        genA2B.eval(), genB2A.eval()
+        A2B = []
+        B2A = []
+        for i in range(test_sample_num):
+            real_A = testA_loader.next()
+            real_B = testB_loader.next()
+            real_A, real_B = real_A.cuda(), real_B.cuda()
+            A2B_content, A2B_style = genA2B.encode(real_A)
+            B2A_content, B2A_style = genB2A.encode(real_B)
+            if i % 2 == 0:
+                A2B_mod1 = torch.randn([1, args.latent_dim]).cuda()
+                B2A_mod1 = torch.randn([1, args.latent_dim]).cuda()
+                A2B_mod2 = torch.randn([1, args.latent_dim]).cuda()
+                B2A_mod2 = torch.randn([1, args.latent_dim]).cuda()
+            fake_B2B, _, _ = genA2B(real_B)
+            fake_A2A, _, _ = genB2A(real_A)
+            colsA = [real_A, fake_A2A]
+            colsB = [real_B, fake_B2B]
+            fake_A2B_1 = genA2B.decode(A2B_content, A2B_mod1)
+            fake_B2A_1 = genB2A.decode(B2A_content, B2A_mod1)
+            fake_A2B_2 = genA2B.decode(A2B_content, A2B_mod2)
+            fake_B2A_2 = genB2A.decode(B2A_content, B2A_mod2)
+            fake_A2B_3 = genA2B.decode(A2B_content, B2A_style)
+            fake_B2A_3 = genB2A.decode(B2A_content, A2B_style)
+            colsA += [fake_A2B_3, fake_A2B_1, fake_A2B_2]
+            colsB += [fake_B2A_3, fake_B2A_1, fake_B2A_2]
+            fake_A2B2A, _,  _ = genB2A(fake_A2B_3, A2B_style)
+            fake_B2A2B, _,  _ = genA2B(fake_B2A_3, B2A_style)
+            colsA.append(fake_A2B2A)
+            colsB.append(fake_B2A2B)
+            fake_A2B2A, _,  _ = genB2A(fake_A2B_1, A2B_style)
+            fake_B2A2B, _,  _ = genA2B(fake_B2A_1, B2A_style)
+            colsA.append(fake_A2B2A)
+            colsB.append(fake_B2A2B)
+            fake_A2B2A, _,  _ = genB2A(fake_A2B_2, A2B_style)
+            fake_B2A2B, _,  _ = genA2B(fake_B2A_2, B2A_style)
+            colsA.append(fake_A2B2A)
+            colsB.append(fake_B2A2B)
+            fake_A2B2A, _, _ = genB2A(fake_A2B_1)
+            fake_B2A2B, _, _ = genA2B(fake_B2A_1)
+            colsA.append(fake_A2B2A)
+            colsB.append(fake_B2A2B)
+            colsA = torch.cat(colsA, 2).detach().cpu()
+            colsB = torch.cat(colsB, 2).detach().cpu()
+            A2B.append(colsA)
+            B2A.append(colsB)
+        A2B = torch.cat(A2B, 0)
+        B2A = torch.cat(B2A, 0)
+        utils.save_image(A2B, f'{im_path}/{name}_A2B_{str(step).zfill(6)}.jpg', normalize=True, range=(-1, 1), nrow=16)
+        utils.save_image(B2A, f'{im_path}/{name}_B2A_{str(step).zfill(6)}.jpg', normalize=True, range=(-1, 1), nrow=16)
+        genA2B.train(), genB2A.train()
+def train(args, trainA_loader, trainB_loader, testA_loader, testB_loader, G_A2B, G_B2A, D_A, D_B, G_optim, D_optim, device):
+    G_A2B.train(), G_B2A.train(), D_A.train(), D_B.train()
+    trainA_loader = sample_data(trainA_loader)
+    trainB_loader = sample_data(trainB_loader)
+    G_scheduler = lr_scheduler.StepLR(G_optim, step_size=100000, gamma=0.5)
+    D_scheduler = lr_scheduler.StepLR(D_optim, step_size=100000, gamma=0.5)
+    pbar = range(args.iter)
+    if get_rank() == 0:
+        pbar = tqdm(pbar, initial=args.start_iter, dynamic_ncols=True, smoothing=0.1)
+    loss_dict = {}
+    mean_path_length_A2B = 0
+    mean_path_length_B2A = 0
+    if args.distributed:
+        G_A2B_module = G_A2B.module
+        G_B2A_module = G_B2A.module
+        D_A_module = D_A.module
+        D_B_module = D_B.module
+        D_L_module = D_L.module
+    else:
+        G_A2B_module = G_A2B
+        G_B2A_module = G_B2A
+        D_A_module = D_A
+        D_B_module = D_B
+        D_L_module = D_L
+    for idx in pbar:
+        i = idx + args.start_iter
+        if i > args.iter:
+            print('Done!')
+            break
+        ori_A = next(trainA_loader)
+        ori_B = next(trainB_loader)
+        if isinstance(ori_A, list):
+            ori_A = ori_A[0]
+        if isinstance(ori_B, list):
+            ori_B = ori_B[0]
+        ori_A = ori_A.to(device)
+        ori_B = ori_B.to(device)
+        aug_A = aug(ori_A)
+        aug_B = aug(ori_B)
+        A = aug(ori_A[[np.random.randint(args.batch)]].expand_as(ori_A))
+        B = aug(ori_B[[np.random.randint(args.batch)]].expand_as(ori_B))
+        if i % args.d_reg_every == 0:
+            aug_A.requires_grad = True
+            aug_B.requires_grad = True
+        A2B_content, A2B_style = G_A2B.encode(A)
+        B2A_content, B2A_style = G_B2A.encode(B)
+        # get new style
+        aug_A2B_style = G_B2A.style_encode(aug_B)
+        aug_B2A_style = G_A2B.style_encode(aug_A)
+        rand_A2B_style = torch.randn([args.batch, args.latent_dim]).to(device).requires_grad_()
+        rand_B2A_style = torch.randn([args.batch, args.latent_dim]).to(device).requires_grad_()
+        # styles
+        idx = torch.randperm(2*args.batch)
+        input_A2B_style = torch.cat([rand_A2B_style, aug_A2B_style], 0)[idx][:args.batch]
+        idx = torch.randperm(2*args.batch)
+        input_B2A_style = torch.cat([rand_B2A_style, aug_B2A_style], 0)[idx][:args.batch]
+        fake_A2B = G_A2B.decode(A2B_content, input_A2B_style)
+        fake_B2A = G_B2A.decode(B2A_content, input_B2A_style)
+        # train disc
+        real_A_logit = D_A(aug_A)
+        real_B_logit = D_B(aug_B)
+        real_L_logit1 = D_L(rand_A2B_style)
+        real_L_logit2 = D_L(rand_B2A_style)
+        fake_B_logit = D_B(fake_A2B.detach())
+        fake_A_logit = D_A(fake_B2A.detach())
+        fake_L_logit1 = D_L(aug_A2B_style.detach())
+        fake_L_logit2 = D_L(aug_B2A_style.detach())
+        # global loss
+        D_loss = d_logistic_loss(real_A_logit, fake_A_logit) +\
+                 d_logistic_loss(real_B_logit, fake_B_logit) +\
+                 d_logistic_loss(real_L_logit1, fake_L_logit1) +\
+                 d_logistic_loss(real_L_logit2, fake_L_logit2)
+        loss_dict['D_adv'] = D_loss
+        if i % args.d_reg_every == 0:
+            r1_A_loss = d_r1_loss(real_A_logit, aug_A)
+            r1_B_loss = d_r1_loss(real_B_logit, aug_B)
+            r1_L_loss = d_r1_loss(real_L_logit1, rand_A2B_style) + d_r1_loss(real_L_logit2, rand_B2A_style)
+            r1_loss = r1_A_loss + r1_B_loss + r1_L_loss
+            D_r1_loss = (args.r1 / 2 * r1_loss * args.d_reg_every)
+            D_loss += D_r1_loss
+        D_optim.zero_grad()
+        D_loss.backward()
+        D_optim.step()
+        #Generator
+        # adv loss
+        fake_B_logit = D_B(fake_A2B)
+        fake_A_logit = D_A(fake_B2A)
+        fake_L_logit1 = D_L(aug_A2B_style)
+        fake_L_logit2 = D_L(aug_B2A_style)
+        lambda_adv = (1, 1, 1)
+        G_adv_loss = 1 * (g_nonsaturating_loss(fake_A_logit, lambda_adv) +\
+                         g_nonsaturating_loss(fake_B_logit, lambda_adv) +\
+                         2*g_nonsaturating_loss(fake_L_logit1, (1,)) +\
+                         2*g_nonsaturating_loss(fake_L_logit2, (1,)))
+        # style consis loss
+        G_con_loss = 50 * (A2B_style.var(0, unbiased=False).sum() + B2A_style.var(0, unbiased=False).sum())
+        # cycle recon
+        A2B2A_content, A2B2A_style = G_B2A.encode(fake_A2B)
+        B2A2B_content, B2A2B_style = G_A2B.encode(fake_B2A)
+        fake_A2B2A = G_B2A.decode(A2B2A_content, shuffle_batch(A2B_style))
+        fake_B2A2B = G_A2B.decode(B2A2B_content, shuffle_batch(B2A_style))
+        G_cycle_loss = 20 * (F.mse_loss(fake_A2B2A, A) + F.mse_loss(fake_B2A2B, B))
+        lpips_loss = 10 * (lpips_fn(fake_A2B2A, A).mean() + lpips_fn(fake_B2A2B, B).mean()) #10 for anime
+        # style reconstruction
+        G_style_loss = 5 * (mse_criterion(A2B2A_style, input_A2B_style) +\
+                            mse_criterion(B2A2B_style, input_B2A_style))
+        G_loss =  G_adv_loss + G_cycle_loss + G_con_loss + lpips_loss + G_style_loss
+        loss_dict['G_adv'] = G_adv_loss
+        loss_dict['G_con'] = G_con_loss
+        loss_dict['G_cycle'] = G_cycle_loss
+        loss_dict['lpips'] = lpips_loss
+        G_optim.zero_grad()
+        G_loss.backward()
+        G_optim.step()
+        G_scheduler.step()
+        D_scheduler.step()
+        accumulate(G_A2B_ema, G_A2B_module)
+        accumulate(G_B2A_ema, G_B2A_module)
+        loss_reduced = reduce_loss_dict(loss_dict)
+        D_adv_loss_val = loss_reduced['D_adv'].mean().item()
+        G_adv_loss_val = loss_reduced['G_adv'].mean().item()
+        G_cycle_loss_val = loss_reduced['G_cycle'].mean().item()
+        G_con_loss_val = loss_reduced['G_con'].mean().item()
+        lpips_val = loss_reduced['lpips'].mean().item()
+        if get_rank() == 0:
+            pbar.set_description(
+                (
+                    f'Dadv: {D_adv_loss_val:.2f}; lpips: {lpips_val:.2f} '
+                    f'Gadv: {G_adv_loss_val:.2f}; Gcycle: {G_cycle_loss_val:.2f}; GMS: {G_con_loss_val:.2f} {G_style_loss.item():.2f}'
+                )
+            )
+            if i % 1000 == 0:
+                with torch.no_grad():
+                    test(args, G_A2B, G_B2A, testA_loader, testB_loader, 'normal', i)
+                    test(args, G_A2B_ema, G_B2A_ema, testA_loader, testB_loader, 'ema', i)
+            if (i+1) % 2000 == 0:
+                torch.save(
+                    {
+                        'G_A2B': G_A2B_module.state_dict(),
+                        'G_B2A': G_B2A_module.state_dict(),
+                        'G_A2B_ema': G_A2B_ema.state_dict(),
+                        'G_B2A_ema': G_B2A_ema.state_dict(),
+                        'D_A': D_A_module.state_dict(),
+                        'D_B': D_B_module.state_dict(),
+                        'D_L': D_L_module.state_dict(),
+                        'G_optim': G_optim.state_dict(),
+                        'D_optim': D_optim.state_dict(),
+                        'iter': i,
+                    },
+                    os.path.join(model_path, 'ck.pt'),
+                )
+if __name__ == '__main__':
+    device = 'cuda'
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--iter', type=int, default=300000)
+    parser.add_argument('--batch', type=int, default=4)
+    parser.add_argument('--n_sample', type=int, default=64)
+    parser.add_argument('--size', type=int, default=256)
+    parser.add_argument('--r1', type=float, default=10)
+    parser.add_argument('--lambda_cycle', type=int, default=1)
+    parser.add_argument('--path_regularize', type=float, default=2)
+    parser.add_argument('--path_batch_shrink', type=int, default=2)
+    parser.add_argument('--d_reg_every', type=int, default=16)
+    parser.add_argument('--g_reg_every', type=int, default=4)
+    parser.add_argument('--mixing', type=float, default=0.9)
+    parser.add_argument('--ckpt', type=str, default=None)
+    parser.add_argument('--lr', type=float, default=2e-3)
+    parser.add_argument('--local_rank', type=int, default=0)
+    parser.add_argument('--num_down', type=int, default=3)
+    parser.add_argument('--name', type=str, required=True)
+    parser.add_argument('--d_path', type=str, required=True)
+    parser.add_argument('--latent_dim', type=int, default=8)
+    parser.add_argument('--lr_mlp', type=float, default=0.01)
+    parser.add_argument('--n_res', type=int, default=1)
+    args = parser.parse_args()
+    n_gpu = int(os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1
+    args.distributed = False
+    if args.distributed:
+        torch.cuda.set_device(args.local_rank)
+        torch.distributed.init_process_group(backend='nccl', init_method='env://')
+        synchronize()
+    save_path = f'./{args.name}'
+    im_path = os.path.join(save_path, 'sample')
+    model_path = os.path.join(save_path, 'checkpoint')
+    os.makedirs(im_path, exist_ok=True)
+    os.makedirs(model_path, exist_ok=True)
+    args.n_mlp = 5
+    args.start_iter = 0
+    G_A2B = Generator( args.size, args.num_down, args.latent_dim, args.n_mlp, lr_mlp=args.lr_mlp, n_res=args.n_res).to(device)
+    D_A = Discriminator(args.size).to(device)
+    G_B2A = Generator( args.size, args.num_down, args.latent_dim, args.n_mlp, lr_mlp=args.lr_mlp, n_res=args.n_res).to(device)
+    D_B = Discriminator(args.size).to(device)
+    D_L = LatDiscriminator(args.latent_dim).to(device)
+    lpips_fn = lpips.LPIPS(net='vgg').to(device)
+    G_A2B_ema = copy.deepcopy(G_A2B).to(device).eval()
+    G_B2A_ema = copy.deepcopy(G_B2A).to(device).eval()
+    g_reg_ratio = args.g_reg_every / (args.g_reg_every + 1)
+    d_reg_ratio = args.d_reg_every / (args.d_reg_every + 1)
+    G_optim = optim.Adam( list(G_A2B.parameters()) + list(G_B2A.parameters()), lr=args.lr, betas=(0, 0.99))
+    D_optim = optim.Adam(
+        list(D_L.parameters()) + list(D_A.parameters()) + list(D_B.parameters()),
+        lr=args.lr, betas=(0**d_reg_ratio, 0.99**d_reg_ratio))
+    if args.ckpt is not None:
+        ckpt = torch.load(args.ckpt, map_location=lambda storage, loc: storage)
+        try:
+            ckpt_name = os.path.basename(args.ckpt)
+            args.start_iter = int(os.path.splitext(ckpt_name)[0])
+        except ValueError:
+            pass
+        G_A2B.load_state_dict(ckpt['G_A2B'])
+        G_B2A.load_state_dict(ckpt['G_B2A'])
+        G_A2B_ema.load_state_dict(ckpt['G_A2B_ema'])
+        G_B2A_ema.load_state_dict(ckpt['G_B2A_ema'])
+        D_A.load_state_dict(ckpt['D_A'])
+        D_B.load_state_dict(ckpt['D_B'])
+        D_L.load_state_dict(ckpt['D_L'])
+        G_optim.load_state_dict(ckpt['G_optim'])
+        D_optim.load_state_dict(ckpt['D_optim'])
+        args.start_iter = ckpt['iter']
+    if args.distributed:
+        G_A2B = nn.parallel.DistributedDataParallel(
+            G_A2B,
+            device_ids=[args.local_rank],
+            output_device=args.local_rank,
+            broadcast_buffers=False,
+        )
+        D_A = nn.parallel.DistributedDataParallel(
+            D_A,
+            device_ids=[args.local_rank],
+            output_device=args.local_rank,
+            broadcast_buffers=False,
+        )
+        G_B2A = nn.parallel.DistributedDataParallel(
+            G_B2A,
+            device_ids=[args.local_rank],
+            output_device=args.local_rank,
+            broadcast_buffers=False,
+        )
+        D_B = nn.parallel.DistributedDataParallel(
+            D_B,
+            device_ids=[args.local_rank],
+            output_device=args.local_rank,
+            broadcast_buffers=False,
+        )
+        D_L = nn.parallel.DistributedDataParallel(
+            D_L,
+            device_ids=[args.local_rank],
+            output_device=args.local_rank,
+            broadcast_buffers=False,
+        )
+    train_transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), inplace=True)
+    ])
+    test_transform = transforms.Compose([
+        transforms.Resize((args.size, args.size)),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), inplace=True)
+    ])
+    aug = nn.Sequential(
+        K.RandomAffine(degrees=(-20,20), scale=(0.8, 1.2), translate=(0.1, 0.1), shear=0.15),
+        kornia.geometry.transform.Resize(256+30),
+        K.RandomCrop((256,256)),
+        K.RandomHorizontalFlip(),
+    )
+    d_path = args.d_path
+    trainA = ImageFolder(os.path.join(d_path, 'trainA'), train_transform)
+    trainB = ImageFolder(os.path.join(d_path, 'trainB'), train_transform)
+    testA = ImageFolder(os.path.join(d_path, 'testA'), test_transform)
+    testB = ImageFolder(os.path.join(d_path, 'testB'), test_transform)
+    trainA_loader = data.DataLoader(trainA, batch_size=args.batch,
+            sampler=data_sampler(trainA, shuffle=True, distributed=args.distributed), drop_last=True, pin_memory=True, num_workers=5)
+    trainB_loader = data.DataLoader(trainB, batch_size=args.batch,
+            sampler=data_sampler(trainB, shuffle=True, distributed=args.distributed), drop_last=True, pin_memory=True, num_workers=5)
+    testA_loader = data.DataLoader(testA, batch_size=1, shuffle=False)
+    testB_loader = data.DataLoader(testB, batch_size=1, shuffle=False)
+    train(args, trainA_loader, trainB_loader, testA_loader, testB_loader, G_A2B, G_B2A, D_A, D_B, G_optim, D_optim, device)

util.py ADDED Viewed

	@@ -0,0 +1,161 @@

+import torch
+import torch.nn.functional as F
+from torch.utils import data
+from torch import nn, autograd
+import os
+import matplotlib.pyplot as plt
+google_drive_paths = {
+    "GNR_checkpoint.pt": "https://drive.google.com/uc?id=1IMIVke4WDaGayUa7vk_xVw1uqIHikGtC",
+}
+def ensure_checkpoint_exists(model_weights_filename):
+    if not os.path.isfile(model_weights_filename) and (
+        model_weights_filename in google_drive_paths
+    ):
+        gdrive_url = google_drive_paths[model_weights_filename]
+        try:
+            from gdown import download as drive_download
+            drive_download(gdrive_url, model_weights_filename, quiet=False)
+        except ModuleNotFoundError:
+            print(
+                "gdown module not found.",
+                "pip3 install gdown or, manually download the checkpoint file:",
+                gdrive_url
+            )
+    if not os.path.isfile(model_weights_filename) and (
+        model_weights_filename not in google_drive_paths
+    ):
+        print(
+            model_weights_filename,
+            " not found, you may need to manually download the model weights."
+        )
+def shuffle_batch(x):
+    return x[torch.randperm(x.size(0))]
+def data_sampler(dataset, shuffle, distributed):
+    if distributed:
+        return data.distributed.DistributedSampler(dataset, shuffle=shuffle)
+    if shuffle:
+        return data.RandomSampler(dataset)
+    else:
+        return data.SequentialSampler(dataset)
+def accumulate(model1, model2, decay=0.999):
+    par1 = dict(model1.named_parameters())
+    par2 = dict(model2.named_parameters())
+    for k in par1.keys():
+        par1[k].data.mul_(decay).add_(1 - decay, par2[k].data)
+def sample_data(loader):
+    while True:
+        for batch in loader:
+            yield batch
+def d_logistic_loss(real_pred, fake_pred):
+    loss = 0
+    for real, fake in zip(real_pred, fake_pred):
+        real_loss = F.softplus(-real)
+        fake_loss = F.softplus(fake)
+        loss += real_loss.mean() + fake_loss.mean()
+    return loss
+def d_r1_loss(real_pred, real_img):
+    grad_penalty = 0
+    for real in real_pred:
+        grad_real, = autograd.grad(
+            outputs=real.mean(), inputs=real_img, create_graph=True, only_inputs=True
+        )
+        grad_penalty += grad_real.pow(2).view(grad_real.shape[0], -1).sum(1).mean()
+    return grad_penalty
+def g_nonsaturating_loss(fake_pred, weights):
+    loss = 0
+    for fake, weight in zip(fake_pred, weights):
+        loss += weight*F.softplus(-fake).mean()
+    return loss / len(fake_pred)
+def display_image(image, size=None, mode='nearest', unnorm=False, title=''):
+    # image is [3,h,w] or [1,3,h,w] tensor [0,1]
+    if image.is_cuda:
+        image = image.cpu()
+    if size is not None and image.size(-1) != size:
+        image = F.interpolate(image, size=(size,size), mode=mode)
+    if image.dim() == 4:
+        image = image[0]
+    image = image.permute(1, 2, 0).detach().numpy()
+    plt.figure()
+    plt.title(title)
+    plt.axis('off')
+    plt.imshow(image)
+def normalize(x):
+    return ((x+1)/2).clamp(0,1)
+def get_boundingbox(face, width, height, scale=1.3, minsize=None):
+    """
+    Expects a dlib face to generate a quadratic bounding box.
+    :param face: dlib face class
+    :param width: frame width
+    :param height: frame height
+    :param scale: bounding box size multiplier to get a bigger face region
+    :param minsize: set minimum bounding box size
+    :return: x, y, bounding_box_size in opencv form
+    """
+    x1 = face.left()
+    y1 = face.top()
+    x2 = face.right()
+    y2 = face.bottom()
+    size_bb = int(max(x2 - x1, y2 - y1) * scale)
+    if minsize:
+        if size_bb < minsize:
+            size_bb = minsize
+    center_x, center_y = (x1 + x2) // 2, (y1 + y2) // 2
+    # Check for out of bounds, x-y top left corner
+    x1 = max(int(center_x - size_bb // 2), 0)
+    y1 = max(int(center_y - size_bb // 2), 0)
+    # Check for too big bb size for given x, y
+    size_bb = min(width - x1, size_bb)
+    size_bb = min(height - y1, size_bb)
+    return x1, y1, size_bb
+def preprocess_image(image, cuda=True):
+    """
+    Preprocesses the image such that it can be fed into our network.
+    During this process we envoke PIL to cast it into a PIL image.
+    :param image: numpy image in opencv form (i.e., BGR and of shape
+    :return: pytorch tensor of shape [1, 3, image_size, image_size], not
+    necessarily casted to cuda
+    """
+    # Revert from BGR
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    # Preprocess using the preprocessing function used during training and
+    # casting it to PIL image
+    preprocess = xception_default_data_transforms['test']
+    preprocessed_image = preprocess(pil_image.fromarray(image))
+    # Add first dimension as the network expects a batch
+    preprocessed_image = preprocessed_image.unsqueeze(0)
+    if cuda:
+        preprocessed_image = preprocessed_image.cuda()
+    return preprocessed_image
+def truncate(x, truncation, mean_style):
+    return truncation*x + (1-truncation)*mean_style