Spaces:

maidentech
/

photo2video

Runtime error

App Files Files Community

Tolga commited on Jul 10, 2023

Commit

280b585

•

1 Parent(s): 79da588

version1

Browse files

Files changed (17) hide show

app.py +109 -0
config/vox-256.yaml +74 -0
model.py +119 -0
modules/.DS_Store +0 -0
modules/__pycache__/avd_network.cpython-310.pyc +0 -0
modules/__pycache__/dense_motion.cpython-310.pyc +0 -0
modules/__pycache__/inpainting_network.cpython-310.pyc +0 -0
modules/__pycache__/keypoint_detector.cpython-310.pyc +0 -0
modules/__pycache__/util.cpython-310.pyc +0 -0
modules/avd_network.py +65 -0
modules/bg_motion_predictor.py +24 -0
modules/dense_motion.py +164 -0
modules/inpainting_network.py +127 -0
modules/keypoint_detector.py +27 -0
modules/model.py +182 -0
modules/util.py +349 -0
requirements.txt +27 -0

app.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import gradio as gr
+import torch
+import imageio
+import imageio_ffmpeg
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.animation as animation
+from skimage.transform import resize
+from IPython.display import HTML
+import warnings
+import os
+from model import load_checkpoints
+from model import make_animation
+from skimage import img_as_ubyte
+from PIL import Image
+import time
+warnings.filterwarnings("ignore")
+device = torch.device('cuda:0')
+device = torch.device('cpu')
+dataset_name = 'vox' # ['vox', 'taichi', 'ted', 'mgif']
+source_image_path = './assets/source.png'
+driving_video_path = './assets/driving.mp4'
+output_video_path = './generated.mp4'
+config_path = './config/vox-256.yaml'
+checkpoint_path = 'checkpoints/vox.pth.tar'
+checkpoint_path = 'vox.pth.tar'
+predict_mode = 'relative' # ['standard', 'relative', 'avd']
+find_best_frame = False # when use the relative mode to animate a face, use 'find_best_frame=True' can get better quality result
+pixel = 256 # for vox, taichi and mgif, the resolution is 256*256
+if(dataset_name == 'ted'): # for ted, the resolution is 384*384
+    pixel = 384
+if find_best_frame:
+  #!pip install face_alignment
+  pass
+def create_video(tt):
+    source_image = imageio.imread(f"assets/img_{tt}.jpg")
+    reader = imageio.get_reader(f"assets/ref_{tt}.mp4")
+    source_image = resize(source_image, (pixel, pixel))[..., :3]
+    fps = reader.get_meta_data()['fps']
+    driving_video = []
+    try:
+        for im in reader:
+            driving_video.append(im)
+    except RuntimeError:
+        pass
+    reader.close()
+    driving_video = [resize(frame, (pixel, pixel))[..., :3] for frame in driving_video]
+    def display(source, driving, generated=None):
+        fig = plt.figure(figsize=(8 + 4 * (generated is not None), 6))
+        ims = []
+        for i in range(len(driving)):
+            cols = [source]
+            cols.append(driving[i])
+            if generated is not None:
+                cols.append(generated[i])
+            im = plt.imshow(np.concatenate(cols, axis=1), animated=True)
+            plt.axis('off')
+            ims.append([im])
+        ani = animation.ArtistAnimation(fig, ims, interval=50, repeat_delay=1000)
+        plt.close()
+        return ani
+    #HTML(display(source_image, driving_video).to_html5_video())
+    inpainting, kp_detector, dense_motion_network, avd_network = load_checkpoints(config_path = config_path, checkpoint_path = checkpoint_path, device = device)
+    if predict_mode=='relative' and find_best_frame:
+        from model import find_best_frame as _find
+        i = _find(source_image, driving_video, device.type=='cpu')
+        print ("Best frame: " + str(i))
+        driving_forward = driving_video[i:]
+        driving_backward = driving_video[:(i+1)][::-1]
+        predictions_forward = make_animation(source_image, driving_forward, inpainting, kp_detector, dense_motion_network, avd_network, device = device, mode = predict_mode)
+        predictions_backward = make_animation(source_image, driving_backward, inpainting, kp_detector, dense_motion_network, avd_network, device = device, mode = predict_mode)
+        predictions = predictions_backward[::-1] + predictions_forward[1:]
+    else:
+        predictions = make_animation(source_image, driving_video, inpainting, kp_detector, dense_motion_network, avd_network, device = device, mode = predict_mode)
+    #save resulting video
+    imageio.mimsave(f"./assets/output_{tt}.mp4", [img_as_ubyte(frame) for frame in predictions], fps=fps)
+def greet(img,video):
+    tt=str(time.time())
+    os.replace(video, f"assets/ref_{tt}.mp4")
+    img.save(f"assets/img_{tt}.jpg")
+    create_video(tt)
+    return f"./assets/output_{tt}.mp4"
+iface = gr.Interface(fn=greet, inputs=[gr.inputs.Image(type="pil"),gr.inputs.Video()], outputs=gr.inputs.Video())
+iface.launch()

config/vox-256.yaml ADDED Viewed

	@@ -0,0 +1,74 @@

+dataset_params:
+  root_dir: ../vox
+  frame_shape: null
+  id_sampling: True
+  augmentation_params:
+    flip_param:
+      horizontal_flip: True
+      time_flip: True
+    jitter_param:
+      brightness: 0.1
+      contrast: 0.1
+      saturation: 0.1
+      hue: 0.1
+model_params:
+  common_params:
+    num_tps: 10
+    num_channels: 3
+    bg: True
+    multi_mask: True
+  generator_params:
+    block_expansion: 64
+    max_features: 512
+    num_down_blocks: 3
+  dense_motion_params:
+    block_expansion: 64
+    max_features: 1024
+    num_blocks: 5
+    scale_factor: 0.25
+  avd_network_params:
+    id_bottle_size: 128
+    pose_bottle_size: 128
+train_params:
+  num_epochs: 100
+  num_repeats: 75
+  epoch_milestones: [70, 90]
+  lr_generator: 2.0e-4
+  batch_size: 28
+  scales: [1, 0.5, 0.25, 0.125]
+  dataloader_workers: 12
+  checkpoint_freq: 50
+  dropout_epoch: 35
+  dropout_maxp: 0.3
+  dropout_startp: 0.1
+  dropout_inc_epoch: 10
+  bg_start: 10
+  transform_params:
+    sigma_affine: 0.05
+    sigma_tps: 0.005
+    points_tps: 5
+  loss_weights:
+    perceptual: [10, 10, 10, 10, 10]
+    equivariance_value: 10
+    warp_loss: 10
+    bg: 10
+train_avd_params:
+  num_epochs: 200
+  num_repeats: 300
+  batch_size: 256
+  dataloader_workers: 24
+  checkpoint_freq: 50
+  epoch_milestones: [140, 180]
+  lr: 1.0e-3
+  lambda_shift: 1
+  random_scale: 0.25
+visualizer_params:
+  kp_size: 5
+  draw_border: True
+  colormap: 'gist_rainbow'

model.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import matplotlib
+matplotlib.use('Agg')
+import sys
+import yaml
+from argparse import ArgumentParser
+from tqdm import tqdm
+from scipy.spatial import ConvexHull
+import numpy as np
+import imageio
+from skimage.transform import resize
+from skimage import img_as_ubyte
+import torch
+from modules.inpainting_network import InpaintingNetwork
+from modules.keypoint_detector import KPDetector
+from modules.dense_motion import DenseMotionNetwork
+from modules.avd_network import AVDNetwork
+def load_checkpoints(config_path, checkpoint_path, device):
+    with open(config_path) as f:
+        config = yaml.full_load(f)
+    inpainting = InpaintingNetwork(**config['model_params']['generator_params'],
+                                        **config['model_params']['common_params'])
+    kp_detector = KPDetector(**config['model_params']['common_params'])
+    dense_motion_network = DenseMotionNetwork(**config['model_params']['common_params'],
+                                              **config['model_params']['dense_motion_params'])
+    avd_network = AVDNetwork(num_tps=config['model_params']['common_params']['num_tps'],
+                             **config['model_params']['avd_network_params'])
+    kp_detector.to(device)
+    dense_motion_network.to(device)
+    inpainting.to(device)
+    avd_network.to(device)
+    checkpoint = torch.load(checkpoint_path, map_location=device)
+    inpainting.load_state_dict(checkpoint['inpainting_network'])
+    kp_detector.load_state_dict(checkpoint['kp_detector'])
+    dense_motion_network.load_state_dict(checkpoint['dense_motion_network'])
+    if 'avd_network' in checkpoint:
+        avd_network.load_state_dict(checkpoint['avd_network'])
+    inpainting.eval()
+    kp_detector.eval()
+    dense_motion_network.eval()
+    avd_network.eval()
+    return inpainting, kp_detector, dense_motion_network, avd_network
+def relative_kp(kp_source, kp_driving, kp_driving_initial):
+    source_area = ConvexHull(kp_source['fg_kp'][0].data.cpu().numpy()).volume
+    driving_area = ConvexHull(kp_driving_initial['fg_kp'][0].data.cpu().numpy()).volume
+    adapt_movement_scale = np.sqrt(source_area) / np.sqrt(driving_area)
+    kp_new = {k: v for k, v in kp_driving.items()}
+    kp_value_diff = (kp_driving['fg_kp'] - kp_driving_initial['fg_kp'])
+    kp_value_diff *= adapt_movement_scale
+    kp_new['fg_kp'] = kp_value_diff + kp_source['fg_kp']
+    return kp_new
+def make_animation(source_image, driving_video, inpainting_network, kp_detector, dense_motion_network, avd_network, device, mode = 'relative'):
+    assert mode in ['standard', 'relative', 'avd']
+    with torch.no_grad():
+        predictions = []
+        source = torch.tensor(source_image[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2)
+        source = source.to(device)
+        driving = torch.tensor(np.array(driving_video)[np.newaxis].astype(np.float32)).permute(0, 4, 1, 2, 3).to(device)
+        kp_source = kp_detector(source)
+        kp_driving_initial = kp_detector(driving[:, :, 0])
+        for frame_idx in tqdm(range(driving.shape[2])):
+            driving_frame = driving[:, :, frame_idx]
+            driving_frame = driving_frame.to(device)
+            kp_driving = kp_detector(driving_frame)
+            if mode == 'standard':
+                kp_norm = kp_driving
+            elif mode=='relative':
+                kp_norm = relative_kp(kp_source=kp_source, kp_driving=kp_driving,
+                                    kp_driving_initial=kp_driving_initial)
+            elif mode == 'avd':
+                kp_norm = avd_network(kp_source, kp_driving)
+            dense_motion = dense_motion_network(source_image=source, kp_driving=kp_norm,
+                                                    kp_source=kp_source, bg_param = None,
+                                                    dropout_flag = False)
+            out = inpainting_network(source, dense_motion)
+            predictions.append(np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0])
+    return predictions
+def find_best_frame(source, driving, cpu):
+    import face_alignment
+    def normalize_kp(kp):
+        kp = kp - kp.mean(axis=0, keepdims=True)
+        area = ConvexHull(kp[:, :2]).volume
+        area = np.sqrt(area)
+        kp[:, :2] = kp[:, :2] / area
+        return kp
+    fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=True,
+                                      device= 'cpu' if cpu else 'cuda')
+    kp_source = fa.get_landmarks(255 * source)[0]
+    kp_source = normalize_kp(kp_source)
+    norm  = float('inf')
+    frame_num = 0
+    for i, image in tqdm(enumerate(driving)):
+        try:
+            kp_driving = fa.get_landmarks(255 * image)[0]
+            kp_driving = normalize_kp(kp_driving)
+            new_norm = (np.abs(kp_source - kp_driving) ** 2).sum()
+            if new_norm < norm:
+                norm = new_norm
+                frame_num = i
+        except:
+            pass
+    return frame_num

modules/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

modules/__pycache__/avd_network.cpython-310.pyc ADDED Viewed

Binary file (1.57 kB). View file

modules/__pycache__/dense_motion.cpython-310.pyc ADDED Viewed

Binary file (5.67 kB). View file

modules/__pycache__/inpainting_network.cpython-310.pyc ADDED Viewed

Binary file (3.75 kB). View file

modules/__pycache__/keypoint_detector.cpython-310.pyc ADDED Viewed

Binary file (1.12 kB). View file

modules/__pycache__/util.cpython-310.pyc ADDED Viewed

Binary file (10.8 kB). View file

modules/avd_network.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import torch
+from torch import nn
+class AVDNetwork(nn.Module):
+    """
+    Animation via Disentanglement network
+    """
+    def __init__(self, num_tps, id_bottle_size=64, pose_bottle_size=64):
+        super(AVDNetwork, self).__init__()
+        input_size = 5*2 * num_tps
+        self.num_tps = num_tps
+        self.id_encoder = nn.Sequential(
+            nn.Linear(input_size, 256),
+            nn.BatchNorm1d(256),
+            nn.ReLU(inplace=True),
+            nn.Linear(256, 512),
+            nn.BatchNorm1d(512),
+            nn.ReLU(inplace=True),
+            nn.Linear(512, 1024),
+            nn.BatchNorm1d(1024),
+            nn.ReLU(inplace=True),
+            nn.Linear(1024, id_bottle_size)
+        )
+        self.pose_encoder = nn.Sequential(
+            nn.Linear(input_size, 256),
+            nn.BatchNorm1d(256),
+            nn.ReLU(inplace=True),
+            nn.Linear(256, 512),
+            nn.BatchNorm1d(512),
+            nn.ReLU(inplace=True),
+            nn.Linear(512, 1024),
+            nn.BatchNorm1d(1024),
+            nn.ReLU(inplace=True),
+            nn.Linear(1024, pose_bottle_size)
+        )
+        self.decoder = nn.Sequential(
+            nn.Linear(pose_bottle_size + id_bottle_size, 1024),
+            nn.BatchNorm1d(1024),
+            nn.ReLU(),
+            nn.Linear(1024, 512),
+            nn.BatchNorm1d(512),
+            nn.ReLU(),
+            nn.Linear(512, 256),
+            nn.BatchNorm1d(256),
+            nn.ReLU(),
+            nn.Linear(256, input_size)
+        )
+    def forward(self, kp_source, kp_random):
+        bs = kp_source['fg_kp'].shape[0]
+        pose_emb = self.pose_encoder(kp_random['fg_kp'].view(bs, -1))
+        id_emb = self.id_encoder(kp_source['fg_kp'].view(bs, -1))
+        rec = self.decoder(torch.cat([pose_emb, id_emb], dim=1))
+        rec =  {'fg_kp': rec.view(bs, self.num_tps*5, -1)}
+        return rec

modules/bg_motion_predictor.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from torch import nn
+import torch
+from torchvision import models
+class BGMotionPredictor(nn.Module):
+    """
+    Module for background estimation, return single transformation, parametrized as 3x3 matrix. The third row is [0 0 1]
+    """
+    def __init__(self):
+        super(BGMotionPredictor, self).__init__()
+        self.bg_encoder = models.resnet18(pretrained=False)
+        self.bg_encoder.conv1 = nn.Conv2d(6, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        num_features = self.bg_encoder.fc.in_features
+        self.bg_encoder.fc = nn.Linear(num_features, 6)
+        self.bg_encoder.fc.weight.data.zero_()
+        self.bg_encoder.fc.bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))
+    def forward(self, source_image, driving_image):
+        bs = source_image.shape[0]
+        out = torch.eye(3).unsqueeze(0).repeat(bs, 1, 1).type(source_image.type())
+        prediction = self.bg_encoder(torch.cat([source_image, driving_image], dim=1))
+        out[:, :2, :] = prediction.view(bs, 2, 3)
+        return out

modules/dense_motion.py ADDED Viewed

	@@ -0,0 +1,164 @@

+from torch import nn
+import torch.nn.functional as F
+import torch
+from modules.util import Hourglass, AntiAliasInterpolation2d, make_coordinate_grid, kp2gaussian
+from modules.util import to_homogeneous, from_homogeneous, UpBlock2d, TPS
+import math
+class DenseMotionNetwork(nn.Module):
+    """
+    Module that estimating an optical flow and multi-resolution occlusion masks
+                        from K TPS transformations and an affine transformation.
+    """
+    def __init__(self, block_expansion, num_blocks, max_features, num_tps, num_channels,
+                 scale_factor=0.25, bg = False, multi_mask = True, kp_variance=0.01):
+        super(DenseMotionNetwork, self).__init__()
+        if scale_factor != 1:
+            self.down = AntiAliasInterpolation2d(num_channels, scale_factor)
+        self.scale_factor = scale_factor
+        self.multi_mask = multi_mask
+        self.hourglass = Hourglass(block_expansion=block_expansion, in_features=(num_channels * (num_tps+1) + num_tps*5+1),
+                                   max_features=max_features, num_blocks=num_blocks)
+        hourglass_output_size = self.hourglass.out_channels
+        self.maps = nn.Conv2d(hourglass_output_size[-1], num_tps + 1, kernel_size=(7, 7), padding=(3, 3))
+        if multi_mask:
+            up = []
+            self.up_nums = int(math.log(1/scale_factor, 2))
+            self.occlusion_num = 4
+            channel = [hourglass_output_size[-1]//(2**i) for i in range(self.up_nums)]
+            for i in range(self.up_nums):
+                up.append(UpBlock2d(channel[i], channel[i]//2, kernel_size=3, padding=1))
+            self.up = nn.ModuleList(up)
+            channel = [hourglass_output_size[-i-1] for i in range(self.occlusion_num-self.up_nums)[::-1]]
+            for i in range(self.up_nums):
+                channel.append(hourglass_output_size[-1]//(2**(i+1)))
+            occlusion = []
+            for i in range(self.occlusion_num):
+                occlusion.append(nn.Conv2d(channel[i], 1, kernel_size=(7, 7), padding=(3, 3)))
+            self.occlusion = nn.ModuleList(occlusion)
+        else:
+            occlusion = [nn.Conv2d(hourglass_output_size[-1], 1, kernel_size=(7, 7), padding=(3, 3))]
+            self.occlusion = nn.ModuleList(occlusion)
+        self.num_tps = num_tps
+        self.bg = bg
+        self.kp_variance = kp_variance
+    def create_heatmap_representations(self, source_image, kp_driving, kp_source):
+        spatial_size = source_image.shape[2:]
+        gaussian_driving = kp2gaussian(kp_driving['fg_kp'], spatial_size=spatial_size, kp_variance=self.kp_variance)
+        gaussian_source = kp2gaussian(kp_source['fg_kp'], spatial_size=spatial_size, kp_variance=self.kp_variance)
+        heatmap = gaussian_driving - gaussian_source
+        zeros = torch.zeros(heatmap.shape[0], 1, spatial_size[0], spatial_size[1]).type(heatmap.type()).to(heatmap.device)
+        heatmap = torch.cat([zeros, heatmap], dim=1)
+        return heatmap
+    def create_transformations(self, source_image, kp_driving, kp_source, bg_param):
+        # K TPS transformaions
+        bs, _, h, w = source_image.shape
+        kp_1 = kp_driving['fg_kp']
+        kp_2 = kp_source['fg_kp']
+        kp_1 = kp_1.view(bs, -1, 5, 2)
+        kp_2 = kp_2.view(bs, -1, 5, 2)
+        trans = TPS(mode = 'kp', bs = bs, kp_1 = kp_1, kp_2 = kp_2)
+        driving_to_source = trans.transform_frame(source_image)
+        identity_grid = make_coordinate_grid((h, w), type=kp_1.type()).to(kp_1.device)
+        identity_grid = identity_grid.view(1, 1, h, w, 2)
+        identity_grid = identity_grid.repeat(bs, 1, 1, 1, 1)
+        # affine background transformation
+        if not (bg_param is None):
+            identity_grid = to_homogeneous(identity_grid)
+            identity_grid = torch.matmul(bg_param.view(bs, 1, 1, 1, 3, 3), identity_grid.unsqueeze(-1)).squeeze(-1)
+            identity_grid = from_homogeneous(identity_grid)
+        transformations = torch.cat([identity_grid, driving_to_source], dim=1)
+        return transformations
+    def create_deformed_source_image(self, source_image, transformations):
+        bs, _, h, w = source_image.shape
+        source_repeat = source_image.unsqueeze(1).unsqueeze(1).repeat(1, self.num_tps + 1, 1, 1, 1, 1)
+        source_repeat = source_repeat.view(bs * (self.num_tps + 1), -1, h, w)
+        transformations = transformations.view((bs * (self.num_tps + 1), h, w, -1))
+        deformed = F.grid_sample(source_repeat, transformations, align_corners=True)
+        deformed = deformed.view((bs, self.num_tps+1, -1, h, w))
+        return deformed
+    def dropout_softmax(self, X, P):
+        '''
+        Dropout for TPS transformations. Eq(7) and Eq(8) in the paper.
+        '''
+        drop = (torch.rand(X.shape[0],X.shape[1]) < (1-P)).type(X.type()).to(X.device)
+        drop[..., 0] = 1
+        drop = drop.repeat(X.shape[2],X.shape[3],1,1).permute(2,3,0,1)
+        maxx = X.max(1).values.unsqueeze_(1)
+        X = X - maxx
+        X_exp = X.exp()
+        X[:,1:,...] /= (1-P)
+        mask_bool =(drop == 0)
+        X_exp = X_exp.masked_fill(mask_bool, 0)
+        partition = X_exp.sum(dim=1, keepdim=True) + 1e-6
+        return X_exp / partition
+    def forward(self, source_image, kp_driving, kp_source, bg_param = None, dropout_flag=False, dropout_p = 0):
+        if self.scale_factor != 1:
+            source_image = self.down(source_image)
+        bs, _, h, w = source_image.shape
+        out_dict = dict()
+        heatmap_representation = self.create_heatmap_representations(source_image, kp_driving, kp_source)
+        transformations = self.create_transformations(source_image, kp_driving, kp_source, bg_param)
+        deformed_source = self.create_deformed_source_image(source_image, transformations)
+        out_dict['deformed_source'] = deformed_source
+        # out_dict['transformations'] = transformations
+        deformed_source = deformed_source.view(bs,-1,h,w)
+        input = torch.cat([heatmap_representation, deformed_source], dim=1)
+        input = input.view(bs, -1, h, w)
+        prediction = self.hourglass(input, mode = 1)
+        contribution_maps = self.maps(prediction[-1])
+        if(dropout_flag):
+            contribution_maps = self.dropout_softmax(contribution_maps, dropout_p)
+        else:
+            contribution_maps = F.softmax(contribution_maps, dim=1)
+        out_dict['contribution_maps'] = contribution_maps
+        # Combine the K+1 transformations
+        # Eq(6) in the paper
+        contribution_maps = contribution_maps.unsqueeze(2)
+        transformations = transformations.permute(0, 1, 4, 2, 3)
+        deformation = (transformations * contribution_maps).sum(dim=1)
+        deformation = deformation.permute(0, 2, 3, 1)
+        out_dict['deformation'] = deformation # Optical Flow
+        occlusion_map = []
+        if self.multi_mask:
+            for i in range(self.occlusion_num-self.up_nums):
+                occlusion_map.append(torch.sigmoid(self.occlusion[i](prediction[self.up_nums-self.occlusion_num+i])))
+            prediction = prediction[-1]
+            for i in range(self.up_nums):
+                prediction = self.up[i](prediction)
+                occlusion_map.append(torch.sigmoid(self.occlusion[i+self.occlusion_num-self.up_nums](prediction)))
+        else:
+            occlusion_map.append(torch.sigmoid(self.occlusion[0](prediction[-1])))
+        out_dict['occlusion_map'] = occlusion_map # Multi-resolution Occlusion Masks
+        return out_dict

modules/inpainting_network.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import torch
+from torch import nn
+import torch.nn.functional as F
+from modules.util import ResBlock2d, SameBlock2d, UpBlock2d, DownBlock2d
+from modules.dense_motion import DenseMotionNetwork
+class InpaintingNetwork(nn.Module):
+    """
+    Inpaint the missing regions and reconstruct the Driving image.
+    """
+    def __init__(self, num_channels, block_expansion, max_features, num_down_blocks, multi_mask = True, **kwargs):
+        super(InpaintingNetwork, self).__init__()
+        self.num_down_blocks = num_down_blocks
+        self.multi_mask = multi_mask
+        self.first = SameBlock2d(num_channels, block_expansion, kernel_size=(7, 7), padding=(3, 3))
+        down_blocks = []
+        up_blocks = []
+        resblock = []
+        for i in range(num_down_blocks):
+            in_features = min(max_features, block_expansion * (2 ** i))
+            out_features = min(max_features, block_expansion * (2 ** (i + 1)))
+            down_blocks.append(DownBlock2d(in_features, out_features, kernel_size=(3, 3), padding=(1, 1)))
+            decoder_in_feature = out_features * 2
+            if i==num_down_blocks-1:
+                decoder_in_feature = out_features
+            up_blocks.append(UpBlock2d(decoder_in_feature, in_features, kernel_size=(3, 3), padding=(1, 1)))
+            resblock.append(ResBlock2d(decoder_in_feature, kernel_size=(3, 3), padding=(1, 1)))
+            resblock.append(ResBlock2d(decoder_in_feature, kernel_size=(3, 3), padding=(1, 1)))
+        self.down_blocks = nn.ModuleList(down_blocks)
+        self.up_blocks = nn.ModuleList(up_blocks[::-1])
+        self.resblock = nn.ModuleList(resblock[::-1])
+        self.final = nn.Conv2d(block_expansion, num_channels, kernel_size=(7, 7), padding=(3, 3))
+        self.num_channels = num_channels
+    def deform_input(self, inp, deformation):
+        _, h_old, w_old, _ = deformation.shape
+        _, _, h, w = inp.shape
+        if h_old != h or w_old != w:
+            deformation = deformation.permute(0, 3, 1, 2)
+            deformation = F.interpolate(deformation, size=(h, w), mode='bilinear', align_corners=True)
+            deformation = deformation.permute(0, 2, 3, 1)
+        return F.grid_sample(inp, deformation,align_corners=True)
+    def occlude_input(self, inp, occlusion_map):
+        if not self.multi_mask:
+            if inp.shape[2] != occlusion_map.shape[2] or inp.shape[3] != occlusion_map.shape[3]:
+                occlusion_map = F.interpolate(occlusion_map, size=inp.shape[2:], mode='bilinear',align_corners=True)
+        out = inp * occlusion_map
+        return out
+    def forward(self, source_image, dense_motion):
+        out = self.first(source_image)
+        encoder_map = [out]
+        for i in range(len(self.down_blocks)):
+            out = self.down_blocks[i](out)
+            encoder_map.append(out)
+        output_dict = {}
+        output_dict['contribution_maps'] = dense_motion['contribution_maps']
+        output_dict['deformed_source'] = dense_motion['deformed_source']
+        occlusion_map = dense_motion['occlusion_map']
+        output_dict['occlusion_map'] = occlusion_map
+        deformation = dense_motion['deformation']
+        out_ij = self.deform_input(out.detach(), deformation)
+        out = self.deform_input(out, deformation)
+        out_ij = self.occlude_input(out_ij, occlusion_map[0].detach())
+        out = self.occlude_input(out, occlusion_map[0])
+        warped_encoder_maps = []
+        warped_encoder_maps.append(out_ij)
+        for i in range(self.num_down_blocks):
+            out = self.resblock[2*i](out)
+            out = self.resblock[2*i+1](out)
+            out = self.up_blocks[i](out)
+            encode_i = encoder_map[-(i+2)]
+            encode_ij = self.deform_input(encode_i.detach(), deformation)
+            encode_i = self.deform_input(encode_i, deformation)
+            occlusion_ind = 0
+            if self.multi_mask:
+                occlusion_ind = i+1
+            encode_ij = self.occlude_input(encode_ij, occlusion_map[occlusion_ind].detach())
+            encode_i = self.occlude_input(encode_i, occlusion_map[occlusion_ind])
+            warped_encoder_maps.append(encode_ij)
+            if(i==self.num_down_blocks-1):
+                break
+            out = torch.cat([out, encode_i], 1)
+        deformed_source = self.deform_input(source_image, deformation)
+        output_dict["deformed"] = deformed_source
+        output_dict["warped_encoder_maps"] = warped_encoder_maps
+        occlusion_last = occlusion_map[-1]
+        if not self.multi_mask:
+            occlusion_last = F.interpolate(occlusion_last, size=out.shape[2:], mode='bilinear',align_corners=True)
+        out = out * (1 - occlusion_last) + encode_i
+        out = self.final(out)
+        out = torch.sigmoid(out)
+        out = out * (1 - occlusion_last) + deformed_source * occlusion_last
+        output_dict["prediction"] = out
+        return output_dict
+    def get_encode(self, driver_image, occlusion_map):
+        out = self.first(driver_image)
+        encoder_map = []
+        encoder_map.append(self.occlude_input(out.detach(), occlusion_map[-1].detach()))
+        for i in range(len(self.down_blocks)):
+            out = self.down_blocks[i](out.detach())
+            out_mask = self.occlude_input(out.detach(), occlusion_map[2-i].detach())
+            encoder_map.append(out_mask.detach())
+        return encoder_map

modules/keypoint_detector.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from torch import nn
+import torch
+from torchvision import models
+class KPDetector(nn.Module):
+    """
+    Predict K*5 keypoints.
+    """
+    def __init__(self, num_tps, **kwargs):
+        super(KPDetector, self).__init__()
+        self.num_tps = num_tps
+        self.fg_encoder = models.resnet18(pretrained=False)
+        num_features = self.fg_encoder.fc.in_features
+        self.fg_encoder.fc = nn.Linear(num_features, num_tps*5*2)
+    def forward(self, image):
+        fg_kp = self.fg_encoder(image)
+        bs, _, = fg_kp.shape
+        fg_kp = torch.sigmoid(fg_kp)
+        fg_kp = fg_kp * 2 - 1
+        out = {'fg_kp': fg_kp.view(bs, self.num_tps*5, -1)}
+        return out

modules/model.py ADDED Viewed

	@@ -0,0 +1,182 @@

+from torch import nn
+import torch
+import torch.nn.functional as F
+from modules.util import AntiAliasInterpolation2d, TPS
+from torchvision import models
+import numpy as np
+class Vgg19(torch.nn.Module):
+    """
+    Vgg19 network for perceptual loss. See Sec 3.3.
+    """
+    def __init__(self, requires_grad=False):
+        super(Vgg19, self).__init__()
+        vgg_pretrained_features = models.vgg19(pretrained=True).features
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        self.slice4 = torch.nn.Sequential()
+        self.slice5 = torch.nn.Sequential()
+        for x in range(2):
+            self.slice1.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(2, 7):
+            self.slice2.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(7, 12):
+            self.slice3.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(12, 21):
+            self.slice4.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(21, 30):
+            self.slice5.add_module(str(x), vgg_pretrained_features[x])
+        self.mean = torch.nn.Parameter(data=torch.Tensor(np.array([0.485, 0.456, 0.406]).reshape((1, 3, 1, 1))),
+                                       requires_grad=False)
+        self.std = torch.nn.Parameter(data=torch.Tensor(np.array([0.229, 0.224, 0.225]).reshape((1, 3, 1, 1))),
+                                      requires_grad=False)
+        if not requires_grad:
+            for param in self.parameters():
+                param.requires_grad = False
+    def forward(self, X):
+        X = (X - self.mean) / self.std
+        h_relu1 = self.slice1(X)
+        h_relu2 = self.slice2(h_relu1)
+        h_relu3 = self.slice3(h_relu2)
+        h_relu4 = self.slice4(h_relu3)
+        h_relu5 = self.slice5(h_relu4)
+        out = [h_relu1, h_relu2, h_relu3, h_relu4, h_relu5]
+        return out
+class ImagePyramide(torch.nn.Module):
+    """
+    Create image pyramide for computing pyramide perceptual loss. See Sec 3.3
+    """
+    def __init__(self, scales, num_channels):
+        super(ImagePyramide, self).__init__()
+        downs = {}
+        for scale in scales:
+            downs[str(scale).replace('.', '-')] = AntiAliasInterpolation2d(num_channels, scale)
+        self.downs = nn.ModuleDict(downs)
+    def forward(self, x):
+        out_dict = {}
+        for scale, down_module in self.downs.items():
+            out_dict['prediction_' + str(scale).replace('-', '.')] = down_module(x)
+        return out_dict
+def detach_kp(kp):
+    return {key: value.detach() for key, value in kp.items()}
+class GeneratorFullModel(torch.nn.Module):
+    """
+    Merge all generator related updates into single model for better multi-gpu usage
+    """
+    def __init__(self, kp_extractor, bg_predictor, dense_motion_network, inpainting_network, train_params, *kwargs):
+        super(GeneratorFullModel, self).__init__()
+        self.kp_extractor = kp_extractor
+        self.inpainting_network = inpainting_network
+        self.dense_motion_network = dense_motion_network
+        self.bg_predictor = None
+        if bg_predictor:
+            self.bg_predictor = bg_predictor
+            self.bg_start = train_params['bg_start']
+        self.train_params = train_params
+        self.scales = train_params['scales']
+        self.pyramid = ImagePyramide(self.scales, inpainting_network.num_channels)
+        if torch.cuda.is_available():
+            self.pyramid = self.pyramid.cuda()
+        self.loss_weights = train_params['loss_weights']
+        self.dropout_epoch = train_params['dropout_epoch']
+        self.dropout_maxp = train_params['dropout_maxp']
+        self.dropout_inc_epoch = train_params['dropout_inc_epoch']
+        self.dropout_startp =train_params['dropout_startp']
+        if sum(self.loss_weights['perceptual']) != 0:
+            self.vgg = Vgg19()
+            if torch.cuda.is_available():
+                self.vgg = self.vgg.cuda()
+    def forward(self, x, epoch):
+        kp_source = self.kp_extractor(x['source'])
+        kp_driving = self.kp_extractor(x['driving'])
+        bg_param = None
+        if self.bg_predictor:
+            if(epoch>=self.bg_start):
+                bg_param = self.bg_predictor(x['source'], x['driving'])
+        if(epoch>=self.dropout_epoch):
+            dropout_flag = False
+            dropout_p = 0
+        else:
+            # dropout_p will linearly increase from dropout_startp to dropout_maxp
+            dropout_flag = True
+            dropout_p = min(epoch/self.dropout_inc_epoch * self.dropout_maxp + self.dropout_startp, self.dropout_maxp)
+        dense_motion = self.dense_motion_network(source_image=x['source'], kp_driving=kp_driving,
+                                                    kp_source=kp_source, bg_param = bg_param,
+                                                    dropout_flag = dropout_flag, dropout_p = dropout_p)
+        generated = self.inpainting_network(x['source'], dense_motion)
+        generated.update({'kp_source': kp_source, 'kp_driving': kp_driving})
+        loss_values = {}
+        pyramide_real = self.pyramid(x['driving'])
+        pyramide_generated = self.pyramid(generated['prediction'])
+        # reconstruction loss
+        if sum(self.loss_weights['perceptual']) != 0:
+            value_total = 0
+            for scale in self.scales:
+                x_vgg = self.vgg(pyramide_generated['prediction_' + str(scale)])
+                y_vgg = self.vgg(pyramide_real['prediction_' + str(scale)])
+                for i, weight in enumerate(self.loss_weights['perceptual']):
+                    value = torch.abs(x_vgg[i] - y_vgg[i].detach()).mean()
+                    value_total += self.loss_weights['perceptual'][i] * value
+            loss_values['perceptual'] = value_total
+        # equivariance loss
+        if self.loss_weights['equivariance_value'] != 0:
+            transform_random = TPS(mode = 'random', bs = x['driving'].shape[0], **self.train_params['transform_params'])
+            transform_grid = transform_random.transform_frame(x['driving'])
+            transformed_frame = F.grid_sample(x['driving'], transform_grid, padding_mode="reflection",align_corners=True)
+            transformed_kp = self.kp_extractor(transformed_frame)
+            generated['transformed_frame'] = transformed_frame
+            generated['transformed_kp'] = transformed_kp
+            warped = transform_random.warp_coordinates(transformed_kp['fg_kp'])
+            kp_d = kp_driving['fg_kp']
+            value = torch.abs(kp_d - warped).mean()
+            loss_values['equivariance_value'] = self.loss_weights['equivariance_value'] * value
+        # warp loss
+        if self.loss_weights['warp_loss'] != 0:
+            occlusion_map = generated['occlusion_map']
+            encode_map = self.inpainting_network.get_encode(x['driving'], occlusion_map)
+            decode_map = generated['warped_encoder_maps']
+            value = 0
+            for i in range(len(encode_map)):
+                value += torch.abs(encode_map[i]-decode_map[-i-1]).mean()
+            loss_values['warp_loss'] = self.loss_weights['warp_loss'] * value
+        # bg loss
+        if self.bg_predictor and epoch >= self.bg_start and self.loss_weights['bg'] != 0:
+            bg_param_reverse = self.bg_predictor(x['driving'], x['source'])
+            value = torch.matmul(bg_param, bg_param_reverse)
+            eye = torch.eye(3).view(1, 1, 3, 3).type(value.type())
+            value = torch.abs(eye - value).mean()
+            loss_values['bg'] = self.loss_weights['bg'] * value
+        return loss_values, generated

modules/util.py ADDED Viewed

	@@ -0,0 +1,349 @@

+from torch import nn
+import torch.nn.functional as F
+import torch
+class TPS:
+    '''
+    TPS transformation, mode 'kp' for Eq(2) in the paper, mode 'random' for equivariance loss.
+    '''
+    def __init__(self, mode, bs, **kwargs):
+        self.bs = bs
+        self.mode = mode
+        if mode == 'random':
+            noise = torch.normal(mean=0, std=kwargs['sigma_affine'] * torch.ones([bs, 2, 3]))
+            self.theta = noise + torch.eye(2, 3).view(1, 2, 3)
+            self.control_points = make_coordinate_grid((kwargs['points_tps'], kwargs['points_tps']), type=noise.type())
+            self.control_points = self.control_points.unsqueeze(0)
+            self.control_params = torch.normal(mean=0,
+                        std=kwargs['sigma_tps'] * torch.ones([bs, 1, kwargs['points_tps'] ** 2]))
+        elif mode == 'kp':
+            kp_1 = kwargs["kp_1"]
+            kp_2 = kwargs["kp_2"]
+            device = kp_1.device
+            kp_type = kp_1.type()
+            self.gs = kp_1.shape[1]
+            n = kp_1.shape[2]
+            K = torch.norm(kp_1[:,:,:, None]-kp_1[:,:, None, :], dim=4, p=2)
+            K = K**2
+            K = K * torch.log(K+1e-9)
+            one1 = torch.ones(self.bs, kp_1.shape[1], kp_1.shape[2], 1).to(device).type(kp_type)
+            kp_1p = torch.cat([kp_1,one1], 3)
+            zero = torch.zeros(self.bs, kp_1.shape[1], 3, 3).to(device).type(kp_type)
+            P = torch.cat([kp_1p, zero],2)
+            L = torch.cat([K,kp_1p.permute(0,1,3,2)],2)
+            L = torch.cat([L,P],3)
+            zero = torch.zeros(self.bs, kp_1.shape[1], 3, 2).to(device).type(kp_type)
+            Y = torch.cat([kp_2, zero], 2)
+            one = torch.eye(L.shape[2]).expand(L.shape).to(device).type(kp_type)*0.01
+            L = L + one
+            param = torch.matmul(torch.inverse(L),Y)
+            self.theta = param[:,:,n:,:].permute(0,1,3,2)
+            self.control_points = kp_1
+            self.control_params = param[:,:,:n,:]
+        else:
+            raise Exception("Error TPS mode")
+    def transform_frame(self, frame):
+        grid = make_coordinate_grid(frame.shape[2:], type=frame.type()).unsqueeze(0).to(frame.device)
+        grid = grid.view(1, frame.shape[2] * frame.shape[3], 2)
+        shape = [self.bs, frame.shape[2], frame.shape[3], 2]
+        if self.mode == 'kp':
+            shape.insert(1, self.gs)
+        grid = self.warp_coordinates(grid).view(*shape)
+        return grid
+    def warp_coordinates(self, coordinates):
+        theta = self.theta.type(coordinates.type()).to(coordinates.device)
+        control_points = self.control_points.type(coordinates.type()).to(coordinates.device)
+        control_params = self.control_params.type(coordinates.type()).to(coordinates.device)
+        if self.mode == 'kp':
+            transformed = torch.matmul(theta[:, :, :, :2], coordinates.permute(0, 2, 1)) + theta[:, :, :, 2:]
+            distances = coordinates.view(coordinates.shape[0], 1, 1, -1, 2) - control_points.view(self.bs, control_points.shape[1], -1, 1, 2)
+            distances = distances ** 2
+            result = distances.sum(-1)
+            result = result * torch.log(result + 1e-9)
+            result = torch.matmul(result.permute(0, 1, 3, 2), control_params)
+            transformed = transformed.permute(0, 1, 3, 2) + result
+        elif self.mode == 'random':
+            theta = theta.unsqueeze(1)
+            transformed = torch.matmul(theta[:, :, :, :2], coordinates.unsqueeze(-1)) + theta[:, :, :, 2:]
+            transformed = transformed.squeeze(-1)
+            ances = coordinates.view(coordinates.shape[0], -1, 1, 2) - control_points.view(1, 1, -1, 2)
+            distances = ances ** 2
+            result = distances.sum(-1)
+            result = result * torch.log(result + 1e-9)
+            result = result * control_params
+            result = result.sum(dim=2).view(self.bs, coordinates.shape[1], 1)
+            transformed = transformed + result
+        else:
+            raise Exception("Error TPS mode")
+        return transformed
+def kp2gaussian(kp, spatial_size, kp_variance):
+    """
+    Transform a keypoint into gaussian like representation
+    """
+    coordinate_grid = make_coordinate_grid(spatial_size, kp.type()).to(kp.device)
+    number_of_leading_dimensions = len(kp.shape) - 1
+    shape = (1,) * number_of_leading_dimensions + coordinate_grid.shape
+    coordinate_grid = coordinate_grid.view(*shape)
+    repeats = kp.shape[:number_of_leading_dimensions] + (1, 1, 1)
+    coordinate_grid = coordinate_grid.repeat(*repeats)
+    # Preprocess kp shape
+    shape = kp.shape[:number_of_leading_dimensions] + (1, 1, 2)
+    kp = kp.view(*shape)
+    mean_sub = (coordinate_grid - kp)
+    out = torch.exp(-0.5 * (mean_sub ** 2).sum(-1) / kp_variance)
+    return out
+def make_coordinate_grid(spatial_size, type):
+    """
+    Create a meshgrid [-1,1] x [-1,1] of given spatial_size.
+    """
+    h, w = spatial_size
+    x = torch.arange(w).type(type)
+    y = torch.arange(h).type(type)
+    x = (2 * (x / (w - 1)) - 1)
+    y = (2 * (y / (h - 1)) - 1)
+    yy = y.view(-1, 1).repeat(1, w)
+    xx = x.view(1, -1).repeat(h, 1)
+    meshed = torch.cat([xx.unsqueeze_(2), yy.unsqueeze_(2)], 2)
+    return meshed
+class ResBlock2d(nn.Module):
+    """
+    Res block, preserve spatial resolution.
+    """
+    def __init__(self, in_features, kernel_size, padding):
+        super(ResBlock2d, self).__init__()
+        self.conv1 = nn.Conv2d(in_channels=in_features, out_channels=in_features, kernel_size=kernel_size,
+                               padding=padding)
+        self.conv2 = nn.Conv2d(in_channels=in_features, out_channels=in_features, kernel_size=kernel_size,
+                               padding=padding)
+        self.norm1 = nn.InstanceNorm2d(in_features, affine=True)
+        self.norm2 = nn.InstanceNorm2d(in_features, affine=True)
+    def forward(self, x):
+        out = self.norm1(x)
+        out = F.relu(out)
+        out = self.conv1(out)
+        out = self.norm2(out)
+        out = F.relu(out)
+        out = self.conv2(out)
+        out += x
+        return out
+class UpBlock2d(nn.Module):
+    """
+    Upsampling block for use in decoder.
+    """
+    def __init__(self, in_features, out_features, kernel_size=3, padding=1, groups=1):
+        super(UpBlock2d, self).__init__()
+        self.conv = nn.Conv2d(in_channels=in_features, out_channels=out_features, kernel_size=kernel_size,
+                              padding=padding, groups=groups)
+        self.norm = nn.InstanceNorm2d(out_features, affine=True)
+    def forward(self, x):
+        out = F.interpolate(x, scale_factor=2)
+        out = self.conv(out)
+        out = self.norm(out)
+        out = F.relu(out)
+        return out
+class DownBlock2d(nn.Module):
+    """
+    Downsampling block for use in encoder.
+    """
+    def __init__(self, in_features, out_features, kernel_size=3, padding=1, groups=1):
+        super(DownBlock2d, self).__init__()
+        self.conv = nn.Conv2d(in_channels=in_features, out_channels=out_features, kernel_size=kernel_size,
+                              padding=padding, groups=groups)
+        self.norm = nn.InstanceNorm2d(out_features, affine=True)
+        self.pool = nn.AvgPool2d(kernel_size=(2, 2))
+    def forward(self, x):
+        out = self.conv(x)
+        out = self.norm(out)
+        out = F.relu(out)
+        out = self.pool(out)
+        return out
+class SameBlock2d(nn.Module):
+    """
+    Simple block, preserve spatial resolution.
+    """
+    def __init__(self, in_features, out_features, groups=1, kernel_size=3, padding=1):
+        super(SameBlock2d, self).__init__()
+        self.conv = nn.Conv2d(in_channels=in_features, out_channels=out_features,
+                              kernel_size=kernel_size, padding=padding, groups=groups)
+        self.norm = nn.InstanceNorm2d(out_features, affine=True)
+    def forward(self, x):
+        out = self.conv(x)
+        out = self.norm(out)
+        out = F.relu(out)
+        return out
+class Encoder(nn.Module):
+    """
+    Hourglass Encoder
+    """
+    def __init__(self, block_expansion, in_features, num_blocks=3, max_features=256):
+        super(Encoder, self).__init__()
+        down_blocks = []
+        for i in range(num_blocks):
+            down_blocks.append(DownBlock2d(in_features if i == 0 else min(max_features, block_expansion * (2 ** i)),
+                                           min(max_features, block_expansion * (2 ** (i + 1))),
+                                           kernel_size=3, padding=1))
+        self.down_blocks = nn.ModuleList(down_blocks)
+    def forward(self, x):
+        outs = [x]
+        #print('encoder:' ,outs[-1].shape)
+        for down_block in self.down_blocks:
+            outs.append(down_block(outs[-1]))
+            #print('encoder:' ,outs[-1].shape)
+        return outs
+class Decoder(nn.Module):
+    """
+    Hourglass Decoder
+    """
+    def __init__(self, block_expansion, in_features, num_blocks=3, max_features=256):
+        super(Decoder, self).__init__()
+        up_blocks = []
+        self.out_channels = []
+        for i in range(num_blocks)[::-1]:
+            in_filters = (1 if i == num_blocks - 1 else 2) * min(max_features, block_expansion * (2 ** (i + 1)))
+            self.out_channels.append(in_filters)
+            out_filters = min(max_features, block_expansion * (2 ** i))
+            up_blocks.append(UpBlock2d(in_filters, out_filters, kernel_size=3, padding=1))
+        self.up_blocks = nn.ModuleList(up_blocks)
+        self.out_channels.append(block_expansion + in_features)
+        # self.out_filters = block_expansion + in_features
+    def forward(self, x, mode = 0):
+        out = x.pop()
+        outs = []
+        for up_block in self.up_blocks:
+            out = up_block(out)
+            skip = x.pop()
+            out = torch.cat([out, skip], dim=1)
+            outs.append(out)
+        if(mode == 0):
+            return out
+        else:
+            return outs
+class Hourglass(nn.Module):
+    """
+    Hourglass architecture.
+    """
+    def __init__(self, block_expansion, in_features, num_blocks=3, max_features=256):
+        super(Hourglass, self).__init__()
+        self.encoder = Encoder(block_expansion, in_features, num_blocks, max_features)
+        self.decoder = Decoder(block_expansion, in_features, num_blocks, max_features)
+        self.out_channels = self.decoder.out_channels
+        # self.out_filters = self.decoder.out_filters
+    def forward(self, x, mode = 0):
+        return self.decoder(self.encoder(x), mode)
+class AntiAliasInterpolation2d(nn.Module):
+    """
+    Band-limited downsampling, for better preservation of the input signal.
+    """
+    def __init__(self, channels, scale):
+        super(AntiAliasInterpolation2d, self).__init__()
+        sigma = (1 / scale - 1) / 2
+        kernel_size = 2 * round(sigma * 4) + 1
+        self.ka = kernel_size // 2
+        self.kb = self.ka - 1 if kernel_size % 2 == 0 else self.ka
+        kernel_size = [kernel_size, kernel_size]
+        sigma = [sigma, sigma]
+        # The gaussian kernel is the product of the
+        # gaussian function of each dimension.
+        kernel = 1
+        meshgrids = torch.meshgrid(
+            [
+                torch.arange(size, dtype=torch.float32)
+                for size in kernel_size
+                ]
+        )
+        for size, std, mgrid in zip(kernel_size, sigma, meshgrids):
+            mean = (size - 1) / 2
+            kernel *= torch.exp(-(mgrid - mean) ** 2 / (2 * std ** 2))
+        # Make sure sum of values in gaussian kernel equals 1.
+        kernel = kernel / torch.sum(kernel)
+        # Reshape to depthwise convolutional weight
+        kernel = kernel.view(1, 1, *kernel.size())
+        kernel = kernel.repeat(channels, *[1] * (kernel.dim() - 1))
+        self.register_buffer('weight', kernel)
+        self.groups = channels
+        self.scale = scale
+    def forward(self, input):
+        if self.scale == 1.0:
+            return input
+        out = F.pad(input, (self.ka, self.kb, self.ka, self.kb))
+        out = F.conv2d(out, weight=self.weight, groups=self.groups)
+        out = F.interpolate(out, scale_factor=(self.scale, self.scale))
+        return out
+def to_homogeneous(coordinates):
+    ones_shape = list(coordinates.shape)
+    ones_shape[-1] = 1
+    ones = torch.ones(ones_shape).type(coordinates.type())
+    return torch.cat([coordinates, ones], dim=-1)
+def from_homogeneous(coordinates):
+    return coordinates[..., :2] / coordinates[..., 2:3]

requirements.txt ADDED Viewed

	@@ -0,0 +1,27 @@

+gradio
+cffi==1.14.6
+cycler==0.10.0
+decorator==5.1.0
+face-alignment==1.3.5
+imageio==2.9.0
+imageio-ffmpeg==0.4.5
+kiwisolver==1.3.2
+matplotlib==3.4.3
+networkx==2.6.3
+numpy==1.20.3
+pandas==1.3.3
+Pillow==8.3.2
+pycparser==2.20
+pyparsing==2.4.7
+python-dateutil==2.8.2
+pytz==2021.1
+PyWavelets==1.1.1
+PyYAML==5.4.1
+scikit-image==0.18.3
+scikit-learn==1.0
+scipy==1.7.1
+six==1.16.0
+torch==1.10.0+cu113
+torchvision==0.11.0+cu113
+tqdm==4.62.3
+face-alignment