StyleGANEX

Build error

App Files Files Community

Zaesar

PKUWilliamYang commited on Mar 15, 2023

Commit

0483f57

•

0 Parent(s):

Duplicate from PKUWilliamYang/StyleGANEX

Browse files

Co-authored-by: Shuai Yang <PKUWilliamYang@users.noreply.huggingface.co>

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

packages.txt +2 -0
.gitattributes +34 -0
README.md +10 -0
app.py +112 -0
configs/__init__.py +0 -0
configs/data_configs.py +48 -0
configs/dataset_config.yml +60 -0
configs/paths_config.py +25 -0
configs/transforms_config.py +242 -0
datasets/__init__.py +0 -0
datasets/augmentations.py +110 -0
datasets/ffhq_degradation_dataset.py +235 -0
datasets/gt_res_dataset.py +32 -0
datasets/images_dataset.py +33 -0
datasets/inference_dataset.py +22 -0
latent_optimization.py +107 -0
models/__init__.py +0 -0
models/bisenet/LICENSE +21 -0
models/bisenet/README.md +68 -0
models/bisenet/model.py +283 -0
models/bisenet/resnet.py +109 -0
models/encoders/__init__.py +0 -0
models/encoders/helpers.py +119 -0
models/encoders/model_irse.py +84 -0
models/encoders/psp_encoders.py +357 -0
models/mtcnn/__init__.py +0 -0
models/mtcnn/mtcnn.py +156 -0
models/mtcnn/mtcnn_pytorch/__init__.py +0 -0
models/mtcnn/mtcnn_pytorch/src/__init__.py +2 -0
models/mtcnn/mtcnn_pytorch/src/align_trans.py +304 -0
models/mtcnn/mtcnn_pytorch/src/box_utils.py +238 -0
models/mtcnn/mtcnn_pytorch/src/detector.py +126 -0
models/mtcnn/mtcnn_pytorch/src/first_stage.py +101 -0
models/mtcnn/mtcnn_pytorch/src/get_nets.py +171 -0
models/mtcnn/mtcnn_pytorch/src/matlab_cp2tform.py +350 -0
models/mtcnn/mtcnn_pytorch/src/visualization_utils.py +31 -0
models/mtcnn/mtcnn_pytorch/src/weights/onet.npy +3 -0
models/mtcnn/mtcnn_pytorch/src/weights/pnet.npy +3 -0
models/mtcnn/mtcnn_pytorch/src/weights/rnet.npy +3 -0
models/psp.py +148 -0
models/stylegan2/__init__.py +0 -0
models/stylegan2/lpips/__init__.py +161 -0
models/stylegan2/lpips/base_model.py +58 -0
models/stylegan2/lpips/dist_model.py +284 -0
models/stylegan2/lpips/networks_basic.py +187 -0
models/stylegan2/lpips/pretrained_networks.py +181 -0
models/stylegan2/lpips/weights/v0.0/alex.pth +3 -0
models/stylegan2/lpips/weights/v0.0/squeeze.pth +3 -0
models/stylegan2/lpips/weights/v0.0/vgg.pth +3 -0
models/stylegan2/lpips/weights/v0.1/alex.pth +3 -0

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ bzip2
2	+ cmake

.gitattributes ADDED Viewed

	@@ -0,0 +1,34 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,10 @@

+---
+title: StyleGANEX
+sdk: gradio
+emoji: 🐨
+colorFrom: pink
+colorTo: yellow
+app_file: app.py
+pinned: false
+duplicated_from: PKUWilliamYang/StyleGANEX
+---

app.py ADDED Viewed

	@@ -0,0 +1,112 @@

+from __future__ import annotations
+import argparse
+import pathlib
+import torch
+import gradio as gr
+from webUI.app_task import *
+from webUI.styleganex_model import Model
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--device', type=str, default='cpu')
+    parser.add_argument('--theme', type=str)
+    parser.add_argument('--share', action='store_true')
+    parser.add_argument('--port', type=int)
+    parser.add_argument('--disable-queue',
+                        dest='enable_queue',
+                        action='store_false')
+    return parser.parse_args()
+DESCRIPTION = '''
+<div align=center>
+<h1 style="font-weight: 900; margin-bottom: 7px;">
+   Face Manipulation with <a href="https://github.com/williamyang1991/StyleGANEX">StyleGANEX</a>
+</h1>
+<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
+<a href="https://huggingface.co/spaces/PKUWilliamYang/StyleGANEX?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a></p>
+<p/>
+<img style="margin-top: 0em" src="https://raw.githubusercontent.com/williamyang1991/tmpfile/master/imgs/example.jpg" alt="example">
+</div>
+'''
+ARTICLE = r"""
+If StyleGANEX is helpful, please help to ⭐ the <a href='https://github.com/williamyang1991/StyleGANEX' target='_blank'>Github Repo</a>. Thanks!
+[![GitHub Stars](https://img.shields.io/github/stars/williamyang1991/StyleGANEX?style=social)](https://github.com/williamyang1991/StyleGANEX)
+---
+📝 **Citation**
+If our work is useful for your research, please consider citing:
+```bibtex
+@article{yang2023styleganex,
+  title = {StyleGANEX: StyleGAN-Based Manipulation Beyond Cropped Aligned Faces},
+  author = {Yang, Shuai and Jiang, Liming and Liu, Ziwei and and Loy, Chen Change},
+  journal = {arXiv preprint arXiv:2303.06146},
+  year={2023},
+}
+```
+📋 **License**
+This project is licensed under <a rel="license" href="https://github.com/williamyang1991/VToonify/blob/main/LICENSE.md">S-Lab License 1.0</a>.
+Redistribution and use for non-commercial purposes should follow this license.
+📧 **Contact**
+If you have any questions, please feel free to reach me out at <b>williamyang@pku.edu.cn</b>.
+"""
+FOOTER = '<div align=center><img id="visitor-badge" alt="visitor badge" src="https://visitor-badge.laobi.icu/badge?page_id=williamyang1991/styleganex" /></div>'
+def main():
+    args = parse_args()
+    args.device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    print('*** Now using %s.'%(args.device))
+    model = Model(device=args.device)
+    torch.hub.download_url_to_file('https://raw.githubusercontent.com/williamyang1991/StyleGANEX/main/data/234_sketch.jpg',
+        '234_sketch.jpg')
+    torch.hub.download_url_to_file('https://github.com/williamyang1991/StyleGANEX/raw/main/output/ILip77SbmOE_inversion.pt',
+        'ILip77SbmOE_inversion.pt')
+    torch.hub.download_url_to_file('https://raw.githubusercontent.com/williamyang1991/StyleGANEX/main/data/ILip77SbmOE.png',
+        'ILip77SbmOE.png')
+    torch.hub.download_url_to_file('https://raw.githubusercontent.com/williamyang1991/StyleGANEX/main/data/ILip77SbmOE_mask.png',
+        'ILip77SbmOE_mask.png')
+    torch.hub.download_url_to_file('https://raw.githubusercontent.com/williamyang1991/StyleGANEX/main/data/pexels-daniel-xavier-1239291.jpg',
+        'pexels-daniel-xavier-1239291.jpg')
+    torch.hub.download_url_to_file('https://github.com/williamyang1991/StyleGANEX/raw/main/data/529_2.mp4',
+        '529_2.mp4')
+    torch.hub.download_url_to_file('https://github.com/williamyang1991/StyleGANEX/raw/main/data/684.mp4',
+        '684.mp4')
+    torch.hub.download_url_to_file('https://github.com/williamyang1991/StyleGANEX/raw/main/data/pexels-anthony-shkraba-production-8136210.mp4',
+        'pexels-anthony-shkraba-production-8136210.mp4')
+    with gr.Blocks(css='style.css') as demo:
+        gr.Markdown(DESCRIPTION)
+        with gr.Tabs():
+            with gr.TabItem('Inversion for Editing'):
+                create_demo_inversion(model.process_inversion, allow_optimization=False)
+            with gr.TabItem('Image Face Toonify'):
+                create_demo_toonify(model.process_toonify)
+            with gr.TabItem('Video Face Toonify'):
+                create_demo_vtoonify(model.process_vtoonify, max_frame_num=12)
+            with gr.TabItem('Image Face Editing'):
+                create_demo_editing(model.process_editing)
+            with gr.TabItem('Video Face Editing'):
+                create_demo_vediting(model.process_vediting, max_frame_num=12)
+            with gr.TabItem('Sketch2Face'):
+                create_demo_s2f(model.process_s2f)
+            with gr.TabItem('Mask2Face'):
+                create_demo_m2f(model.process_m2f)
+            with gr.TabItem('SR'):
+                create_demo_sr(model.process_sr)
+        gr.Markdown(ARTICLE)
+        gr.Markdown(FOOTER)
+    demo.launch(
+        enable_queue=args.enable_queue,
+        server_port=args.port,
+        share=args.share,
+    )
+if __name__ == '__main__':
+    main()

configs/__init__.py ADDED Viewed

File without changes

configs/data_configs.py ADDED Viewed

	@@ -0,0 +1,48 @@

+from configs import transforms_config
+from configs.paths_config import dataset_paths
+DATASETS = {
+    'ffhq_encode': {
+        'transforms': transforms_config.EncodeTransforms,
+        'train_source_root': dataset_paths['ffhq'],
+        'train_target_root': dataset_paths['ffhq'],
+        'test_source_root': dataset_paths['ffhq_test'],
+        'test_target_root': dataset_paths['ffhq_test'],
+    },
+    'ffhq_sketch_to_face': {
+        'transforms': transforms_config.SketchToImageTransforms,
+        'train_source_root': dataset_paths['ffhq_train_sketch'],
+        'train_target_root': dataset_paths['ffhq'],
+        'test_source_root': dataset_paths['ffhq_test_sketch'],
+        'test_target_root': dataset_paths['ffhq_test'],
+    },
+    'ffhq_seg_to_face': {
+        'transforms': transforms_config.SegToImageTransforms,
+        'train_source_root': dataset_paths['ffhq_train_segmentation'],
+        'train_target_root': dataset_paths['ffhq'],
+        'test_source_root': dataset_paths['ffhq_test_segmentation'],
+        'test_target_root': dataset_paths['ffhq_test'],
+    },
+    'ffhq_super_resolution': {
+        'transforms': transforms_config.SuperResTransforms,
+        'train_source_root': dataset_paths['ffhq'],
+        'train_target_root': dataset_paths['ffhq1280'],
+        'test_source_root': dataset_paths['ffhq_test'],
+        'test_target_root': dataset_paths['ffhq1280_test'],
+    },
+    'toonify': {
+        'transforms': transforms_config.ToonifyTransforms,
+        'train_source_root': dataset_paths['toonify_in'],
+        'train_target_root': dataset_paths['toonify_out'],
+        'test_source_root': dataset_paths['toonify_test_in'],
+        'test_target_root': dataset_paths['toonify_test_out'],
+    },
+    'ffhq_edit': {
+        'transforms': transforms_config.EditingTransforms,
+        'train_source_root': dataset_paths['ffhq'],
+        'train_target_root': dataset_paths['ffhq'],
+        'test_source_root': dataset_paths['ffhq_test'],
+        'test_target_root': dataset_paths['ffhq_test'],
+    },
+}

configs/dataset_config.yml ADDED Viewed

	@@ -0,0 +1,60 @@

+# dataset and data loader settings
+datasets:
+  train:
+    name: FFHQ
+    type: FFHQDegradationDataset
+    # dataroot_gt: datasets/ffhq/ffhq_512.lmdb
+    dataroot_gt: ../../../../share/shuaiyang/ffhq/realign1280x1280test/
+    io_backend:
+      # type: lmdb
+      type: disk
+    use_hflip: true
+    mean: [0.5, 0.5, 0.5]
+    std: [0.5, 0.5, 0.5]
+    out_size: 1280
+    scale: 4
+    blur_kernel_size: 41
+    kernel_list: ['iso', 'aniso']
+    kernel_prob: [0.5, 0.5]
+    blur_sigma: [0.1, 10]
+    downsample_range: [4, 40]
+    noise_range: [0, 20]
+    jpeg_range: [60, 100]
+    # color jitter and gray
+    #color_jitter_prob: 0.3
+    #color_jitter_shift: 20
+    #color_jitter_pt_prob: 0.3
+    #gray_prob: 0.01
+    # If you do not want colorization, please set
+    color_jitter_prob: ~
+    color_jitter_pt_prob: ~
+    gray_prob: 0.01
+    gt_gray: True
+    crop_components: true
+    component_path: ./pretrained_models/FFHQ_eye_mouth_landmarks_512.pth
+    eye_enlarge_ratio: 1.4
+    # data loader
+    use_shuffle: true
+    num_worker_per_gpu: 6
+    batch_size_per_gpu: 4
+    dataset_enlarge_ratio: 1
+    prefetch_mode: ~
+  val:
+    # Please modify accordingly to use your own validation
+    # Or comment the val block if do not need validation during training
+    name: validation
+    type: PairedImageDataset
+    dataroot_lq: datasets/faces/validation/input
+    dataroot_gt: datasets/faces/validation/reference
+    io_backend:
+      type: disk
+    mean: [0.5, 0.5, 0.5]
+    std: [0.5, 0.5, 0.5]
+    scale: 1

configs/paths_config.py ADDED Viewed

	@@ -0,0 +1,25 @@

+dataset_paths = {
+    'ffhq': 'data/train/ffhq/realign320x320/',
+    'ffhq_test': 'data/train/ffhq/realign320x320test/',
+    'ffhq1280': 'data/train/ffhq/realign1280x1280/',
+    'ffhq1280_test': 'data/train/ffhq/realign1280x1280test/',
+    'ffhq_train_sketch': 'data/train/ffhq/realign640x640sketch/',
+    'ffhq_test_sketch': 'data/train/ffhq/realign640x640sketchtest/',
+    'ffhq_train_segmentation': 'data/train/ffhq/realign320x320mask/',
+    'ffhq_test_segmentation': 'data/train/ffhq/realign320x320masktest/',
+    'toonify_in': 'data/train/pixar/trainA/',
+    'toonify_out': 'data/train/pixar/trainB/',
+    'toonify_test_in': 'data/train/pixar/testA/',
+    'toonify_test_out': 'data/train/testB/',
+}
+model_paths = {
+    'stylegan_ffhq': 'pretrained_models/stylegan2-ffhq-config-f.pt',
+    'ir_se50': 'pretrained_models/model_ir_se50.pth',
+    'circular_face': 'pretrained_models/CurricularFace_Backbone.pth',
+    'mtcnn_pnet': 'pretrained_models/mtcnn/pnet.npy',
+    'mtcnn_rnet': 'pretrained_models/mtcnn/rnet.npy',
+    'mtcnn_onet': 'pretrained_models/mtcnn/onet.npy',
+    'shape_predictor': 'shape_predictor_68_face_landmarks.dat',
+    'moco': 'pretrained_models/moco_v2_800ep_pretrain.pth.tar'
+}

configs/transforms_config.py ADDED Viewed

	@@ -0,0 +1,242 @@

+from abc import abstractmethod
+import torchvision.transforms as transforms
+from datasets import augmentations
+class TransformsConfig(object):
+    def __init__(self, opts):
+        self.opts = opts
+    @abstractmethod
+    def get_transforms(self):
+        pass
+class EncodeTransforms(TransformsConfig):
+    def __init__(self, opts):
+        super(EncodeTransforms, self).__init__(opts)
+    def get_transforms(self):
+        transforms_dict = {
+            'transform_gt_train': transforms.Compose([
+                transforms.Resize((320, 320)),
+                transforms.RandomHorizontalFlip(0.5),
+                transforms.ToTensor(),
+                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+            'transform_source': None,
+            'transform_test': transforms.Compose([
+                transforms.Resize((320, 320)),
+                transforms.ToTensor(),
+                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+            'transform_inference': transforms.Compose([
+                transforms.Resize((320, 320)),
+                transforms.ToTensor(),
+                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
+        }
+        return transforms_dict
+class FrontalizationTransforms(TransformsConfig):
+    def __init__(self, opts):
+        super(FrontalizationTransforms, self).__init__(opts)
+    def get_transforms(self):
+        transforms_dict = {
+            'transform_gt_train': transforms.Compose([
+                transforms.Resize((256, 256)),
+                transforms.RandomHorizontalFlip(0.5),
+                transforms.ToTensor(),
+                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+            'transform_source': transforms.Compose([
+                transforms.Resize((256, 256)),
+                transforms.RandomHorizontalFlip(0.5),
+                transforms.ToTensor(),
+                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+            'transform_test': transforms.Compose([
+                transforms.Resize((256, 256)),
+                transforms.ToTensor(),
+                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+            'transform_inference': transforms.Compose([
+                transforms.Resize((256, 256)),
+                transforms.ToTensor(),
+                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
+        }
+        return transforms_dict
+class SketchToImageTransforms(TransformsConfig):
+    def __init__(self, opts):
+        super(SketchToImageTransforms, self).__init__(opts)
+    def get_transforms(self):
+        transforms_dict = {
+            'transform_gt_train': transforms.Compose([
+                transforms.Resize((320, 320)),
+                transforms.ToTensor(),
+                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+            'transform_source': transforms.Compose([
+                transforms.Resize((320, 320)),
+                transforms.ToTensor()]),
+            'transform_test': transforms.Compose([
+                transforms.Resize((320, 320)),
+                transforms.ToTensor(),
+                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+            'transform_inference': transforms.Compose([
+                transforms.Resize((320, 320)),
+                transforms.ToTensor()]),
+        }
+        return transforms_dict
+class SegToImageTransforms(TransformsConfig):
+    def __init__(self, opts):
+        super(SegToImageTransforms, self).__init__(opts)
+    def get_transforms(self):
+        transforms_dict = {
+            'transform_gt_train': transforms.Compose([
+                transforms.Resize((320, 320)),
+                transforms.ToTensor(),
+                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+            'transform_source': transforms.Compose([
+                transforms.Resize((320, 320)),
+                augmentations.ToOneHot(self.opts.label_nc),
+                transforms.ToTensor()]),
+            'transform_test': transforms.Compose([
+                transforms.Resize((320, 320)),
+                transforms.ToTensor(),
+                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+            'transform_inference': transforms.Compose([
+                transforms.Resize((320, 320)),
+                augmentations.ToOneHot(self.opts.label_nc),
+                transforms.ToTensor()])
+        }
+        return transforms_dict
+class SuperResTransforms(TransformsConfig):
+    def __init__(self, opts):
+        super(SuperResTransforms, self).__init__(opts)
+    def get_transforms(self):
+        if self.opts.resize_factors is None:
+            self.opts.resize_factors = '1,2,4,8,16,32'
+        factors = [int(f) for f in self.opts.resize_factors.split(",")]
+        print("Performing down-sampling with factors: {}".format(factors))
+        transforms_dict = {
+            'transform_gt_train': transforms.Compose([
+                transforms.Resize((1280, 1280)),
+                transforms.ToTensor(),
+                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+            'transform_source': transforms.Compose([
+                transforms.Resize((320, 320)),
+                augmentations.BilinearResize(factors=factors),
+                transforms.Resize((320, 320)),
+                transforms.ToTensor(),
+                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+            'transform_test': transforms.Compose([
+                transforms.Resize((1280, 1280)),
+                transforms.ToTensor(),
+                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+            'transform_inference': transforms.Compose([
+                transforms.Resize((320, 320)),
+                augmentations.BilinearResize(factors=factors),
+                transforms.Resize((320, 320)),
+                transforms.ToTensor(),
+                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
+        }
+        return transforms_dict
+class SuperResTransforms_320(TransformsConfig):
+    def __init__(self, opts):
+        super(SuperResTransforms_320, self).__init__(opts)
+    def get_transforms(self):
+        if self.opts.resize_factors is None:
+            self.opts.resize_factors = '1,2,4,8,16,32'
+        factors = [int(f) for f in self.opts.resize_factors.split(",")]
+        print("Performing down-sampling with factors: {}".format(factors))
+        transforms_dict = {
+            'transform_gt_train': transforms.Compose([
+                transforms.Resize((320, 320)),
+                transforms.ToTensor(),
+                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+            'transform_source': transforms.Compose([
+                transforms.Resize((320, 320)),
+                augmentations.BilinearResize(factors=factors),
+                transforms.Resize((320, 320)),
+                transforms.ToTensor(),
+                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+            'transform_test': transforms.Compose([
+                transforms.Resize((320, 320)),
+                transforms.ToTensor(),
+                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+            'transform_inference': transforms.Compose([
+                transforms.Resize((320, 320)),
+                augmentations.BilinearResize(factors=factors),
+                transforms.Resize((320, 320)),
+                transforms.ToTensor(),
+                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
+        }
+        return transforms_dict
+class ToonifyTransforms(TransformsConfig):
+    def __init__(self, opts):
+        super(ToonifyTransforms, self).__init__(opts)
+    def get_transforms(self):
+        transforms_dict = {
+            'transform_gt_train': transforms.Compose([
+                transforms.Resize((1024, 1024)),
+                transforms.ToTensor(),
+                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+            'transform_source': transforms.Compose([
+                transforms.Resize((256, 256)),
+                transforms.ToTensor(),
+                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+            'transform_test': transforms.Compose([
+                transforms.Resize((1024, 1024)),
+                transforms.ToTensor(),
+                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+            'transform_inference': transforms.Compose([
+                transforms.Resize((256, 256)),
+                transforms.ToTensor(),
+                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
+        }
+        return transforms_dict
+class EditingTransforms(TransformsConfig):
+    def __init__(self, opts):
+        super(EditingTransforms, self).__init__(opts)
+    def get_transforms(self):
+        transforms_dict = {
+            'transform_gt_train': transforms.Compose([
+                transforms.Resize((1280, 1280)),
+                transforms.ToTensor(),
+                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+            'transform_source': transforms.Compose([
+                transforms.Resize((320, 320)),
+                transforms.ToTensor(),
+                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+            'transform_test': transforms.Compose([
+                transforms.Resize((1280, 1280)),
+                transforms.ToTensor(),
+                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
+            'transform_inference': transforms.Compose([
+                transforms.Resize((320, 320)),
+                transforms.ToTensor(),
+                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
+        }
+        return transforms_dict

datasets/__init__.py ADDED Viewed

File without changes

datasets/augmentations.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import numpy as np
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torchvision import transforms
+class ToOneHot(object):
+	""" Convert the input PIL image to a one-hot torch tensor """
+	def __init__(self, n_classes=None):
+		self.n_classes = n_classes
+	def onehot_initialization(self, a):
+		if self.n_classes is None:
+			self.n_classes = len(np.unique(a))
+		out = np.zeros(a.shape + (self.n_classes, ), dtype=int)
+		out[self.__all_idx(a, axis=2)] = 1
+		return out
+	def __all_idx(self, idx, axis):
+		grid = np.ogrid[tuple(map(slice, idx.shape))]
+		grid.insert(axis, idx)
+		return tuple(grid)
+	def __call__(self, img):
+		img = np.array(img)
+		one_hot = self.onehot_initialization(img)
+		return one_hot
+class BilinearResize(object):
+	def __init__(self, factors=[1, 2, 4, 8, 16, 32]):
+		self.factors = factors
+	def __call__(self, image):
+		factor = np.random.choice(self.factors, size=1)[0]
+		D = BicubicDownSample(factor=factor, cuda=False)
+		img_tensor = transforms.ToTensor()(image).unsqueeze(0)
+		img_tensor_lr = D(img_tensor)[0].clamp(0, 1)
+		img_low_res = transforms.ToPILImage()(img_tensor_lr)
+		return img_low_res
+class BicubicDownSample(nn.Module):
+	def bicubic_kernel(self, x, a=-0.50):
+		"""
+		This equation is exactly copied from the website below:
+		https://clouard.users.greyc.fr/Pantheon/experiments/rescaling/index-en.html#bicubic
+		"""
+		abs_x = torch.abs(x)
+		if abs_x <= 1.:
+			return (a + 2.) * torch.pow(abs_x, 3.) - (a + 3.) * torch.pow(abs_x, 2.) + 1
+		elif 1. < abs_x < 2.:
+			return a * torch.pow(abs_x, 3) - 5. * a * torch.pow(abs_x, 2.) + 8. * a * abs_x - 4. * a
+		else:
+			return 0.0
+	def __init__(self, factor=4, cuda=True, padding='reflect'):
+		super().__init__()
+		self.factor = factor
+		size = factor * 4
+		k = torch.tensor([self.bicubic_kernel((i - torch.floor(torch.tensor(size / 2)) + 0.5) / factor)
+						  for i in range(size)], dtype=torch.float32)
+		k = k / torch.sum(k)
+		k1 = torch.reshape(k, shape=(1, 1, size, 1))
+		self.k1 = torch.cat([k1, k1, k1], dim=0)
+		k2 = torch.reshape(k, shape=(1, 1, 1, size))
+		self.k2 = torch.cat([k2, k2, k2], dim=0)
+		self.cuda = '.cuda' if cuda else ''
+		self.padding = padding
+		for param in self.parameters():
+			param.requires_grad = False
+	def forward(self, x, nhwc=False, clip_round=False, byte_output=False):
+		filter_height = self.factor * 4
+		filter_width = self.factor * 4
+		stride = self.factor
+		pad_along_height = max(filter_height - stride, 0)
+		pad_along_width = max(filter_width - stride, 0)
+		filters1 = self.k1.type('torch{}.FloatTensor'.format(self.cuda))
+		filters2 = self.k2.type('torch{}.FloatTensor'.format(self.cuda))
+		# compute actual padding values for each side
+		pad_top = pad_along_height // 2
+		pad_bottom = pad_along_height - pad_top
+		pad_left = pad_along_width // 2
+		pad_right = pad_along_width - pad_left
+		# apply mirror padding
+		if nhwc:
+			x = torch.transpose(torch.transpose(x, 2, 3), 1, 2)   # NHWC to NCHW
+		# downscaling performed by 1-d convolution
+		x = F.pad(x, (0, 0, pad_top, pad_bottom), self.padding)
+		x = F.conv2d(input=x, weight=filters1, stride=(stride, 1), groups=3)
+		if clip_round:
+			x = torch.clamp(torch.round(x), 0.0, 255.)
+		x = F.pad(x, (pad_left, pad_right, 0, 0), self.padding)
+		x = F.conv2d(input=x, weight=filters2, stride=(1, stride), groups=3)
+		if clip_round:
+			x = torch.clamp(torch.round(x), 0.0, 255.)
+		if nhwc:
+			x = torch.transpose(torch.transpose(x, 1, 3), 1, 2)
+		if byte_output:
+			return x.type('torch.ByteTensor'.format(self.cuda))
+		else:
+			return x

datasets/ffhq_degradation_dataset.py ADDED Viewed

	@@ -0,0 +1,235 @@

+import cv2
+import math
+import numpy as np
+import os.path as osp
+import torch
+import torch.utils.data as data
+from basicsr.data import degradations as degradations
+from basicsr.data.data_util import paths_from_folder
+from basicsr.data.transforms import augment
+from basicsr.utils import FileClient, get_root_logger, imfrombytes, img2tensor
+from basicsr.utils.registry import DATASET_REGISTRY
+from torchvision.transforms.functional import (adjust_brightness, adjust_contrast, adjust_hue, adjust_saturation,
+                                               normalize)
+@DATASET_REGISTRY.register()
+class FFHQDegradationDataset(data.Dataset):
+    """FFHQ dataset for GFPGAN.
+    It reads high resolution images, and then generate low-quality (LQ) images on-the-fly.
+    Args:
+        opt (dict): Config for train datasets. It contains the following keys:
+            dataroot_gt (str): Data root path for gt.
+            io_backend (dict): IO backend type and other kwarg.
+            mean (list | tuple): Image mean.
+            std (list | tuple): Image std.
+            use_hflip (bool): Whether to horizontally flip.
+            Please see more options in the codes.
+    """
+    def __init__(self, opt):
+        super(FFHQDegradationDataset, self).__init__()
+        self.opt = opt
+        # file client (io backend)
+        self.file_client = None
+        self.io_backend_opt = opt['io_backend']
+        self.gt_folder = opt['dataroot_gt']
+        self.mean = opt['mean']
+        self.std = opt['std']
+        self.out_size = opt['out_size']
+        self.crop_components = opt.get('crop_components', False)  # facial components
+        self.eye_enlarge_ratio = opt.get('eye_enlarge_ratio', 1)  # whether enlarge eye regions
+        if self.crop_components:
+            # load component list from a pre-process pth files
+            self.components_list = torch.load(opt.get('component_path'))
+        # file client (lmdb io backend)
+        if self.io_backend_opt['type'] == 'lmdb':
+            self.io_backend_opt['db_paths'] = self.gt_folder
+            if not self.gt_folder.endswith('.lmdb'):
+                raise ValueError(f"'dataroot_gt' should end with '.lmdb', but received {self.gt_folder}")
+            with open(osp.join(self.gt_folder, 'meta_info.txt')) as fin:
+                self.paths = [line.split('.')[0] for line in fin]
+        else:
+            # disk backend: scan file list from a folder
+            self.paths = paths_from_folder(self.gt_folder)
+        # degradation configurations
+        self.blur_kernel_size = opt['blur_kernel_size']
+        self.kernel_list = opt['kernel_list']
+        self.kernel_prob = opt['kernel_prob']
+        self.blur_sigma = opt['blur_sigma']
+        self.downsample_range = opt['downsample_range']
+        self.noise_range = opt['noise_range']
+        self.jpeg_range = opt['jpeg_range']
+        # color jitter
+        self.color_jitter_prob = opt.get('color_jitter_prob')
+        self.color_jitter_pt_prob = opt.get('color_jitter_pt_prob')
+        self.color_jitter_shift = opt.get('color_jitter_shift', 20)
+        # to gray
+        self.gray_prob = opt.get('gray_prob')
+        logger = get_root_logger()
+        logger.info(f'Blur: blur_kernel_size {self.blur_kernel_size}, sigma: [{", ".join(map(str, self.blur_sigma))}]')
+        logger.info(f'Downsample: downsample_range [{", ".join(map(str, self.downsample_range))}]')
+        logger.info(f'Noise: [{", ".join(map(str, self.noise_range))}]')
+        logger.info(f'JPEG compression: [{", ".join(map(str, self.jpeg_range))}]')
+        if self.color_jitter_prob is not None:
+            logger.info(f'Use random color jitter. Prob: {self.color_jitter_prob}, shift: {self.color_jitter_shift}')
+        if self.gray_prob is not None:
+            logger.info(f'Use random gray. Prob: {self.gray_prob}')
+        self.color_jitter_shift /= 255.
+    @staticmethod
+    def color_jitter(img, shift):
+        """jitter color: randomly jitter the RGB values, in numpy formats"""
+        jitter_val = np.random.uniform(-shift, shift, 3).astype(np.float32)
+        img = img + jitter_val
+        img = np.clip(img, 0, 1)
+        return img
+    @staticmethod
+    def color_jitter_pt(img, brightness, contrast, saturation, hue):
+        """jitter color: randomly jitter the brightness, contrast, saturation, and hue, in torch Tensor formats"""
+        fn_idx = torch.randperm(4)
+        for fn_id in fn_idx:
+            if fn_id == 0 and brightness is not None:
+                brightness_factor = torch.tensor(1.0).uniform_(brightness[0], brightness[1]).item()
+                img = adjust_brightness(img, brightness_factor)
+            if fn_id == 1 and contrast is not None:
+                contrast_factor = torch.tensor(1.0).uniform_(contrast[0], contrast[1]).item()
+                img = adjust_contrast(img, contrast_factor)
+            if fn_id == 2 and saturation is not None:
+                saturation_factor = torch.tensor(1.0).uniform_(saturation[0], saturation[1]).item()
+                img = adjust_saturation(img, saturation_factor)
+            if fn_id == 3 and hue is not None:
+                hue_factor = torch.tensor(1.0).uniform_(hue[0], hue[1]).item()
+                img = adjust_hue(img, hue_factor)
+        return img
+    def get_component_coordinates(self, index, status):
+        """Get facial component (left_eye, right_eye, mouth) coordinates from a pre-loaded pth file"""
+        components_bbox = self.components_list[f'{index:08d}']
+        if status[0]:  # hflip
+            # exchange right and left eye
+            tmp = components_bbox['left_eye']
+            components_bbox['left_eye'] = components_bbox['right_eye']
+            components_bbox['right_eye'] = tmp
+            # modify the width coordinate
+            components_bbox['left_eye'][0] = self.out_size - components_bbox['left_eye'][0]
+            components_bbox['right_eye'][0] = self.out_size - components_bbox['right_eye'][0]
+            components_bbox['mouth'][0] = self.out_size - components_bbox['mouth'][0]
+        # get coordinates
+        locations = []
+        for part in ['left_eye', 'right_eye', 'mouth']:
+            mean = components_bbox[part][0:2]
+            mean[0] = mean[0] * 2 + 128 ########
+            mean[1] = mean[1] * 2 + 128 ########
+            half_len = components_bbox[part][2] * 2 ########
+            if 'eye' in part:
+                half_len *= self.eye_enlarge_ratio
+            loc = np.hstack((mean - half_len + 1, mean + half_len))
+            loc = torch.from_numpy(loc).float()
+            locations.append(loc)
+        return locations
+    def __getitem__(self, index):
+        if self.file_client is None:
+            self.file_client = FileClient(self.io_backend_opt.pop('type'), **self.io_backend_opt)
+        # load gt image
+        # Shape: (h, w, c); channel order: BGR; image range: [0, 1], float32.
+        gt_path = self.paths[index]
+        img_bytes = self.file_client.get(gt_path)
+        img_gt = imfrombytes(img_bytes, float32=True)
+        # random horizontal flip
+        img_gt, status = augment(img_gt, hflip=self.opt['use_hflip'], rotation=False, return_status=True)
+        h, w, _ = img_gt.shape
+        # get facial component coordinates
+        if self.crop_components:
+            locations = self.get_component_coordinates(index, status)
+            loc_left_eye, loc_right_eye, loc_mouth = locations
+        # ------------------------ generate lq image ------------------------ #
+        # blur
+        kernel = degradations.random_mixed_kernels(
+            self.kernel_list,
+            self.kernel_prob,
+            self.blur_kernel_size,
+            self.blur_sigma,
+            self.blur_sigma, [-math.pi, math.pi],
+            noise_range=None)
+        img_lq = cv2.filter2D(img_gt, -1, kernel)
+        # downsample
+        scale = np.random.uniform(self.downsample_range[0], self.downsample_range[1])
+        img_lq = cv2.resize(img_lq, (int(w // scale), int(h // scale)), interpolation=cv2.INTER_LINEAR)
+        # noise
+        if self.noise_range is not None:
+            img_lq = degradations.random_add_gaussian_noise(img_lq, self.noise_range)
+        # jpeg compression
+        if self.jpeg_range is not None:
+            img_lq = degradations.random_add_jpg_compression(img_lq, self.jpeg_range)
+        # resize to original size
+        img_lq = cv2.resize(img_lq, (int(w // self.opt['scale']), int(h // self.opt['scale'])), interpolation=cv2.INTER_LINEAR)
+        # random color jitter (only for lq)
+        if self.color_jitter_prob is not None and (np.random.uniform() < self.color_jitter_prob):
+            img_lq = self.color_jitter(img_lq, self.color_jitter_shift)
+        # random to gray (only for lq)
+        if self.gray_prob and np.random.uniform() < self.gray_prob:
+            img_lq = cv2.cvtColor(img_lq, cv2.COLOR_BGR2GRAY)
+            img_lq = np.tile(img_lq[:, :, None], [1, 1, 3])
+            if self.opt.get('gt_gray'):  # whether convert GT to gray images
+                img_gt = cv2.cvtColor(img_gt, cv2.COLOR_BGR2GRAY)
+                img_gt = np.tile(img_gt[:, :, None], [1, 1, 3])  # repeat the color channels
+        # BGR to RGB, HWC to CHW, numpy to tensor
+        #img_gt, img_lq = img2tensor([img_gt, img_lq], bgr2rgb=True, float32=True)
+        img_gt = img2tensor(img_gt, bgr2rgb=True, float32=True)
+        img_lq = img2tensor(img_lq, bgr2rgb=True, float32=True)
+        # random color jitter (pytorch version) (only for lq)
+        if self.color_jitter_pt_prob is not None and (np.random.uniform() < self.color_jitter_pt_prob):
+            brightness = self.opt.get('brightness', (0.5, 1.5))
+            contrast = self.opt.get('contrast', (0.5, 1.5))
+            saturation = self.opt.get('saturation', (0, 1.5))
+            hue = self.opt.get('hue', (-0.1, 0.1))
+            img_lq = self.color_jitter_pt(img_lq, brightness, contrast, saturation, hue)
+        # round and clip
+        img_lq = torch.clamp((img_lq * 255.0).round(), 0, 255) / 255.
+        # normalize
+        normalize(img_gt, self.mean, self.std, inplace=True)
+        normalize(img_lq, self.mean, self.std, inplace=True)
+        '''
+        if self.crop_components:
+            return_dict = {
+                'lq': img_lq,
+                'gt': img_gt,
+                'gt_path': gt_path,
+                'loc_left_eye': loc_left_eye,
+                'loc_right_eye': loc_right_eye,
+                'loc_mouth': loc_mouth
+            }
+            return return_dict
+        else:
+            return {'lq': img_lq, 'gt': img_gt, 'gt_path': gt_path}
+        '''
+        return img_lq, img_gt
+    def __len__(self):
+        return len(self.paths)

datasets/gt_res_dataset.py ADDED Viewed

	@@ -0,0 +1,32 @@

+#!/usr/bin/python
+# encoding: utf-8
+import os
+from torch.utils.data import Dataset
+from PIL import Image
+class GTResDataset(Dataset):
+	def __init__(self, root_path, gt_dir=None, transform=None, transform_train=None):
+		self.pairs = []
+		for f in os.listdir(root_path):
+			image_path = os.path.join(root_path, f)
+			gt_path = os.path.join(gt_dir, f)
+			if f.endswith(".jpg") or f.endswith(".png"):
+				self.pairs.append([image_path, gt_path.replace('.png', '.jpg'), None])
+		self.transform = transform
+		self.transform_train = transform_train
+	def __len__(self):
+		return len(self.pairs)
+	def __getitem__(self, index):
+		from_path, to_path, _ = self.pairs[index]
+		from_im = Image.open(from_path).convert('RGB')
+		to_im = Image.open(to_path).convert('RGB')
+		if self.transform:
+			to_im = self.transform(to_im)
+			from_im = self.transform(from_im)
+		return from_im, to_im

datasets/images_dataset.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from torch.utils.data import Dataset
+from PIL import Image
+from utils import data_utils
+class ImagesDataset(Dataset):
+	def __init__(self, source_root, target_root, opts, target_transform=None, source_transform=None):
+		self.source_paths = sorted(data_utils.make_dataset(source_root))
+		self.target_paths = sorted(data_utils.make_dataset(target_root))
+		self.source_transform = source_transform
+		self.target_transform = target_transform
+		self.opts = opts
+	def __len__(self):
+		return len(self.source_paths)
+	def __getitem__(self, index):
+		from_path = self.source_paths[index]
+		from_im = Image.open(from_path)
+		from_im = from_im.convert('RGB') if self.opts.label_nc == 0 else from_im.convert('L')
+		to_path = self.target_paths[index]
+		to_im = Image.open(to_path).convert('RGB')
+		if self.target_transform:
+			to_im = self.target_transform(to_im)
+		if self.source_transform:
+			from_im = self.source_transform(from_im)
+		else:
+			from_im = to_im
+		return from_im, to_im

datasets/inference_dataset.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from torch.utils.data import Dataset
+from PIL import Image
+from utils import data_utils
+class InferenceDataset(Dataset):
+	def __init__(self, root, opts, transform=None):
+		self.paths = sorted(data_utils.make_dataset(root))
+		self.transform = transform
+		self.opts = opts
+	def __len__(self):
+		return len(self.paths)
+	def __getitem__(self, index):
+		from_path = self.paths[index]
+		from_im = Image.open(from_path)
+		from_im = from_im.convert('RGB') if self.opts.label_nc == 0 else from_im.convert('L')
+		if self.transform:
+			from_im = self.transform(from_im)
+		return from_im

latent_optimization.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import models.stylegan2.lpips as lpips
+from torch import autograd, optim
+from torchvision import transforms, utils
+from tqdm import tqdm
+import torch
+from scripts.align_all_parallel import align_face
+from utils.inference_utils import noise_regularize, noise_normalize_, get_lr, latent_noise, visualize
+def latent_optimization(frame, pspex, landmarkpredictor, step=500, device='cuda'):
+    percept = lpips.PerceptualLoss(
+        model="net-lin", net="vgg", use_gpu=device.startswith("cuda")
+    )
+    transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.5, 0.5, 0.5],std=[0.5,0.5,0.5]),
+        ])
+    with torch.no_grad():
+        noise_sample = torch.randn(1000, 512, device=device)
+        latent_out = pspex.decoder.style(noise_sample)
+        latent_mean = latent_out.mean(0)
+        latent_std = ((latent_out - latent_mean).pow(2).sum() / 1000) ** 0.5
+        y = transform(frame).unsqueeze(dim=0).to(device)
+        I_ = align_face(frame, landmarkpredictor)
+        I_ = transform(I_).unsqueeze(dim=0).to(device)
+        wplus = pspex.encoder(I_) + pspex.latent_avg.unsqueeze(0)
+        _, f = pspex.encoder(y, return_feat=True)
+        latent_in = wplus.detach().clone()
+        feat = [f[0].detach().clone(), f[1].detach().clone()]
+    # wplus and f to optimize
+    latent_in.requires_grad = True
+    feat[0].requires_grad = True
+    feat[1].requires_grad = True
+    noises_single = pspex.decoder.make_noise()
+    basic_height, basic_width = int(y.shape[2]*32/256), int(y.shape[3]*32/256)
+    noises = []
+    for noise in noises_single:
+        noises.append(noise.new_empty(y.shape[0], 1, max(basic_height, int(y.shape[2]*noise.shape[2]/256)),
+                                      max(basic_width, int(y.shape[3]*noise.shape[2]/256))).normal_())
+    for noise in noises:
+        noise.requires_grad = True
+    init_lr=0.05
+    optimizer = optim.Adam(feat + noises, lr=init_lr)
+    optimizer2 = optim.Adam([latent_in], lr=init_lr)
+    noise_weight = 0.05 * 0.2
+    pbar = tqdm(range(step))
+    latent_path = []
+    for i in pbar:
+        t = i / step
+        lr = get_lr(t, init_lr)
+        optimizer.param_groups[0]["lr"] = lr
+        optimizer2.param_groups[0]["lr"] = get_lr(t, init_lr)
+        noise_strength = latent_std * noise_weight * max(0, 1 - t / 0.75) ** 2
+        latent_n = latent_noise(latent_in, noise_strength.item())
+        y_hat, _ = pspex.decoder([latent_n], input_is_latent=True, randomize_noise=False,
+                                 first_layer_feature=feat, noise=noises)
+        batch, channel, height, width = y_hat.shape
+        if height > y.shape[2]:
+            factor = height // y.shape[2]
+            y_hat = y_hat.reshape(
+                batch, channel, height // factor, factor, width // factor, factor
+            )
+            y_hat = y_hat.mean([3, 5])
+        p_loss = percept(y_hat, y).sum()
+        n_loss = noise_regularize(noises) * 1e3
+        loss = p_loss + n_loss
+        optimizer.zero_grad()
+        optimizer2.zero_grad()
+        loss.backward()
+        optimizer.step()
+        optimizer2.step()
+        noise_normalize_(noises)
+        ''' for visualization
+        if (i + 1) % 100 == 0 or i == 0:
+            viz = torch.cat((y_hat,y,y_hat-y), dim=3)
+            visualize(torch.clamp(viz[0].cpu(),-1,1), 60)
+        '''
+        pbar.set_description(
+            (
+                f"perceptual: {p_loss.item():.4f}; noise regularize: {n_loss.item():.4f};"
+                f" lr: {lr:.4f}"
+            )
+        )
+    return latent_n, feat, noises, wplus, f

models/__init__.py ADDED Viewed

File without changes

models/bisenet/LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2019 zll
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

models/bisenet/README.md ADDED Viewed

	@@ -0,0 +1,68 @@

+# face-parsing.PyTorch
+<p align="center">
+	<a href="https://github.com/zllrunning/face-parsing.PyTorch">
+    <img class="page-image" src="https://github.com/zllrunning/face-parsing.PyTorch/blob/master/6.jpg" >
+	</a>
+</p>
+### Contents
+- [Training](#training)
+- [Demo](#Demo)
+- [References](#references)
+## Training
+1. Prepare training data:
+    -- download [CelebAMask-HQ dataset](https://github.com/switchablenorms/CelebAMask-HQ)
+	--  change file path in the `prepropess_data.py`  and run
+```Shell
+python prepropess_data.py
+```
+2. Train the model using CelebAMask-HQ dataset:
+Just run the train script:
+```
+    $ CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --nproc_per_node=2 train.py
+```
+If you do not wish to train the model, you can download [our pre-trained model](https://drive.google.com/open?id=154JgKpzCPW82qINcVieuPH3fZ2e0P812) and save it in `res/cp`.
+## Demo
+1. Evaluate the trained model using:
+```Shell
+# evaluate using GPU
+python test.py
+```
+## Face makeup using parsing maps
+[**face-makeup.PyTorch**](https://github.com/zllrunning/face-makeup.PyTorch)
+<table>
+<tr>
+<th>&nbsp;</th>
+<th>Hair</th>
+<th>Lip</th>
+</tr>
+<!-- Line 1: Original Input -->
+<tr>
+<td><em>Original Input</em></td>
+<td><img src="makeup/116_ori.png" height="256" width="256" alt="Original Input"></td>
+<td><img src="makeup/116_lip_ori.png" height="256" width="256" alt="Original Input"></td>
+</tr>
+<!-- Line 3: Color -->
+<tr>
+<td>Color</td>
+<td><img src="makeup/116_1.png" height="256" width="256" alt="Color"></td>
+<td><img src="makeup/116_3.png" height="256" width="256" alt="Color"></td>
+</tr>
+</table>
+## References
+- [BiSeNet](https://github.com/CoinCheung/BiSeNet)

models/bisenet/model.py ADDED Viewed

	@@ -0,0 +1,283 @@

+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision
+from models.bisenet.resnet import Resnet18
+# from modules.bn import InPlaceABNSync as BatchNorm2d
+class ConvBNReLU(nn.Module):
+    def __init__(self, in_chan, out_chan, ks=3, stride=1, padding=1, *args, **kwargs):
+        super(ConvBNReLU, self).__init__()
+        self.conv = nn.Conv2d(in_chan,
+                out_chan,
+                kernel_size = ks,
+                stride = stride,
+                padding = padding,
+                bias = False)
+        self.bn = nn.BatchNorm2d(out_chan)
+        self.init_weight()
+    def forward(self, x):
+        x = self.conv(x)
+        x = F.relu(self.bn(x))
+        return x
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+class BiSeNetOutput(nn.Module):
+    def __init__(self, in_chan, mid_chan, n_classes, *args, **kwargs):
+        super(BiSeNetOutput, self).__init__()
+        self.conv = ConvBNReLU(in_chan, mid_chan, ks=3, stride=1, padding=1)
+        self.conv_out = nn.Conv2d(mid_chan, n_classes, kernel_size=1, bias=False)
+        self.init_weight()
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.conv_out(x)
+        return x
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+    def get_params(self):
+        wd_params, nowd_params = [], []
+        for name, module in self.named_modules():
+            if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
+                wd_params.append(module.weight)
+                if not module.bias is None:
+                    nowd_params.append(module.bias)
+            elif isinstance(module, nn.BatchNorm2d):
+                nowd_params += list(module.parameters())
+        return wd_params, nowd_params
+class AttentionRefinementModule(nn.Module):
+    def __init__(self, in_chan, out_chan, *args, **kwargs):
+        super(AttentionRefinementModule, self).__init__()
+        self.conv = ConvBNReLU(in_chan, out_chan, ks=3, stride=1, padding=1)
+        self.conv_atten = nn.Conv2d(out_chan, out_chan, kernel_size= 1, bias=False)
+        self.bn_atten = nn.BatchNorm2d(out_chan)
+        self.sigmoid_atten = nn.Sigmoid()
+        self.init_weight()
+    def forward(self, x):
+        feat = self.conv(x)
+        atten = F.avg_pool2d(feat, feat.size()[2:])
+        atten = self.conv_atten(atten)
+        atten = self.bn_atten(atten)
+        atten = self.sigmoid_atten(atten)
+        out = torch.mul(feat, atten)
+        return out
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+class ContextPath(nn.Module):
+    def __init__(self, *args, **kwargs):
+        super(ContextPath, self).__init__()
+        self.resnet = Resnet18()
+        self.arm16 = AttentionRefinementModule(256, 128)
+        self.arm32 = AttentionRefinementModule(512, 128)
+        self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
+        self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
+        self.conv_avg = ConvBNReLU(512, 128, ks=1, stride=1, padding=0)
+        self.init_weight()
+    def forward(self, x):
+        H0, W0 = x.size()[2:]
+        feat8, feat16, feat32 = self.resnet(x)
+        H8, W8 = feat8.size()[2:]
+        H16, W16 = feat16.size()[2:]
+        H32, W32 = feat32.size()[2:]
+        avg = F.avg_pool2d(feat32, feat32.size()[2:])
+        avg = self.conv_avg(avg)
+        avg_up = F.interpolate(avg, (H32, W32), mode='nearest')
+        feat32_arm = self.arm32(feat32)
+        feat32_sum = feat32_arm + avg_up
+        feat32_up = F.interpolate(feat32_sum, (H16, W16), mode='nearest')
+        feat32_up = self.conv_head32(feat32_up)
+        feat16_arm = self.arm16(feat16)
+        feat16_sum = feat16_arm + feat32_up
+        feat16_up = F.interpolate(feat16_sum, (H8, W8), mode='nearest')
+        feat16_up = self.conv_head16(feat16_up)
+        return feat8, feat16_up, feat32_up  # x8, x8, x16
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+    def get_params(self):
+        wd_params, nowd_params = [], []
+        for name, module in self.named_modules():
+            if isinstance(module, (nn.Linear, nn.Conv2d)):
+                wd_params.append(module.weight)
+                if not module.bias is None:
+                    nowd_params.append(module.bias)
+            elif isinstance(module, nn.BatchNorm2d):
+                nowd_params += list(module.parameters())
+        return wd_params, nowd_params
+### This is not used, since I replace this with the resnet feature with the same size
+class SpatialPath(nn.Module):
+    def __init__(self, *args, **kwargs):
+        super(SpatialPath, self).__init__()
+        self.conv1 = ConvBNReLU(3, 64, ks=7, stride=2, padding=3)
+        self.conv2 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1)
+        self.conv3 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1)
+        self.conv_out = ConvBNReLU(64, 128, ks=1, stride=1, padding=0)
+        self.init_weight()
+    def forward(self, x):
+        feat = self.conv1(x)
+        feat = self.conv2(feat)
+        feat = self.conv3(feat)
+        feat = self.conv_out(feat)
+        return feat
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+    def get_params(self):
+        wd_params, nowd_params = [], []
+        for name, module in self.named_modules():
+            if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
+                wd_params.append(module.weight)
+                if not module.bias is None:
+                    nowd_params.append(module.bias)
+            elif isinstance(module, nn.BatchNorm2d):
+                nowd_params += list(module.parameters())
+        return wd_params, nowd_params
+class FeatureFusionModule(nn.Module):
+    def __init__(self, in_chan, out_chan, *args, **kwargs):
+        super(FeatureFusionModule, self).__init__()
+        self.convblk = ConvBNReLU(in_chan, out_chan, ks=1, stride=1, padding=0)
+        self.conv1 = nn.Conv2d(out_chan,
+                out_chan//4,
+                kernel_size = 1,
+                stride = 1,
+                padding = 0,
+                bias = False)
+        self.conv2 = nn.Conv2d(out_chan//4,
+                out_chan,
+                kernel_size = 1,
+                stride = 1,
+                padding = 0,
+                bias = False)
+        self.relu = nn.ReLU(inplace=True)
+        self.sigmoid = nn.Sigmoid()
+        self.init_weight()
+    def forward(self, fsp, fcp):
+        fcat = torch.cat([fsp, fcp], dim=1)
+        feat = self.convblk(fcat)
+        atten = F.avg_pool2d(feat, feat.size()[2:])
+        atten = self.conv1(atten)
+        atten = self.relu(atten)
+        atten = self.conv2(atten)
+        atten = self.sigmoid(atten)
+        feat_atten = torch.mul(feat, atten)
+        feat_out = feat_atten + feat
+        return feat_out
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+    def get_params(self):
+        wd_params, nowd_params = [], []
+        for name, module in self.named_modules():
+            if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
+                wd_params.append(module.weight)
+                if not module.bias is None:
+                    nowd_params.append(module.bias)
+            elif isinstance(module, nn.BatchNorm2d):
+                nowd_params += list(module.parameters())
+        return wd_params, nowd_params
+class BiSeNet(nn.Module):
+    def __init__(self, n_classes, *args, **kwargs):
+        super(BiSeNet, self).__init__()
+        self.cp = ContextPath()
+        ## here self.sp is deleted
+        self.ffm = FeatureFusionModule(256, 256)
+        self.conv_out = BiSeNetOutput(256, 256, n_classes)
+        self.conv_out16 = BiSeNetOutput(128, 64, n_classes)
+        self.conv_out32 = BiSeNetOutput(128, 64, n_classes)
+        self.init_weight()
+    def forward(self, x):
+        H, W = x.size()[2:]
+        feat_res8, feat_cp8, feat_cp16 = self.cp(x)  # here return res3b1 feature
+        feat_sp = feat_res8  # use res3b1 feature to replace spatial path feature
+        feat_fuse = self.ffm(feat_sp, feat_cp8)
+        feat_out = self.conv_out(feat_fuse)
+        feat_out16 = self.conv_out16(feat_cp8)
+        feat_out32 = self.conv_out32(feat_cp16)
+        feat_out = F.interpolate(feat_out, (H, W), mode='bilinear', align_corners=True)
+        feat_out16 = F.interpolate(feat_out16, (H, W), mode='bilinear', align_corners=True)
+        feat_out32 = F.interpolate(feat_out32, (H, W), mode='bilinear', align_corners=True)
+        return feat_out, feat_out16, feat_out32
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+    def get_params(self):
+        wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = [], [], [], []
+        for name, child in self.named_children():
+            child_wd_params, child_nowd_params = child.get_params()
+            if isinstance(child, FeatureFusionModule) or isinstance(child, BiSeNetOutput):
+                lr_mul_wd_params += child_wd_params
+                lr_mul_nowd_params += child_nowd_params
+            else:
+                wd_params += child_wd_params
+                nowd_params += child_nowd_params
+        return wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params
+if __name__ == "__main__":
+    net = BiSeNet(19)
+    net.cuda()
+    net.eval()
+    in_ten = torch.randn(16, 3, 640, 480).cuda()
+    out, out16, out32 = net(in_ten)
+    print(out.shape)
+    net.get_params()

models/bisenet/resnet.py ADDED Viewed

	@@ -0,0 +1,109 @@

+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.model_zoo as modelzoo
+# from modules.bn import InPlaceABNSync as BatchNorm2d
+resnet18_url = 'https://download.pytorch.org/models/resnet18-5c106cde.pth'
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=1, bias=False)
+class BasicBlock(nn.Module):
+    def __init__(self, in_chan, out_chan, stride=1):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(in_chan, out_chan, stride)
+        self.bn1 = nn.BatchNorm2d(out_chan)
+        self.conv2 = conv3x3(out_chan, out_chan)
+        self.bn2 = nn.BatchNorm2d(out_chan)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = None
+        if in_chan != out_chan or stride != 1:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(in_chan, out_chan,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(out_chan),
+                )
+    def forward(self, x):
+        residual = self.conv1(x)
+        residual = F.relu(self.bn1(residual))
+        residual = self.conv2(residual)
+        residual = self.bn2(residual)
+        shortcut = x
+        if self.downsample is not None:
+            shortcut = self.downsample(x)
+        out = shortcut + residual
+        out = self.relu(out)
+        return out
+def create_layer_basic(in_chan, out_chan, bnum, stride=1):
+    layers = [BasicBlock(in_chan, out_chan, stride=stride)]
+    for i in range(bnum-1):
+        layers.append(BasicBlock(out_chan, out_chan, stride=1))
+    return nn.Sequential(*layers)
+class Resnet18(nn.Module):
+    def __init__(self):
+        super(Resnet18, self).__init__()
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = create_layer_basic(64, 64, bnum=2, stride=1)
+        self.layer2 = create_layer_basic(64, 128, bnum=2, stride=2)
+        self.layer3 = create_layer_basic(128, 256, bnum=2, stride=2)
+        self.layer4 = create_layer_basic(256, 512, bnum=2, stride=2)
+        self.init_weight()
+    def forward(self, x):
+        x = self.conv1(x)
+        x = F.relu(self.bn1(x))
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        feat8 = self.layer2(x) # 1/8
+        feat16 = self.layer3(feat8) # 1/16
+        feat32 = self.layer4(feat16) # 1/32
+        return feat8, feat16, feat32
+    def init_weight(self):
+        state_dict = modelzoo.load_url(resnet18_url)
+        self_state_dict = self.state_dict()
+        for k, v in state_dict.items():
+            if 'fc' in k: continue
+            self_state_dict.update({k: v})
+        self.load_state_dict(self_state_dict)
+    def get_params(self):
+        wd_params, nowd_params = [], []
+        for name, module in self.named_modules():
+            if isinstance(module, (nn.Linear, nn.Conv2d)):
+                wd_params.append(module.weight)
+                if not module.bias is None:
+                    nowd_params.append(module.bias)
+            elif isinstance(module,  nn.BatchNorm2d):
+                nowd_params += list(module.parameters())
+        return wd_params, nowd_params
+if __name__ == "__main__":
+    net = Resnet18()
+    x = torch.randn(16, 3, 224, 224)
+    out = net(x)
+    print(out[0].size())
+    print(out[1].size())
+    print(out[2].size())
+    net.get_params()

models/encoders/__init__.py ADDED Viewed

File without changes

models/encoders/helpers.py ADDED Viewed

	@@ -0,0 +1,119 @@

+from collections import namedtuple
+import torch
+from torch.nn import Conv2d, BatchNorm2d, PReLU, ReLU, Sigmoid, MaxPool2d, AdaptiveAvgPool2d, Sequential, Module
+"""
+ArcFace implementation from [TreB1eN](https://github.com/TreB1eN/InsightFace_Pytorch)
+"""
+class Flatten(Module):
+	def forward(self, input):
+		return input.view(input.size(0), -1)
+def l2_norm(input, axis=1):
+	norm = torch.norm(input, 2, axis, True)
+	output = torch.div(input, norm)
+	return output
+class Bottleneck(namedtuple('Block', ['in_channel', 'depth', 'stride'])):
+	""" A named tuple describing a ResNet block. """
+def get_block(in_channel, depth, num_units, stride=2):
+	return [Bottleneck(in_channel, depth, stride)] + [Bottleneck(depth, depth, 1) for i in range(num_units - 1)]
+def get_blocks(num_layers):
+	if num_layers == 50:
+		blocks = [
+			get_block(in_channel=64, depth=64, num_units=3),
+			get_block(in_channel=64, depth=128, num_units=4),
+			get_block(in_channel=128, depth=256, num_units=14),
+			get_block(in_channel=256, depth=512, num_units=3)
+		]
+	elif num_layers == 100:
+		blocks = [
+			get_block(in_channel=64, depth=64, num_units=3),
+			get_block(in_channel=64, depth=128, num_units=13),
+			get_block(in_channel=128, depth=256, num_units=30),
+			get_block(in_channel=256, depth=512, num_units=3)
+		]
+	elif num_layers == 152:
+		blocks = [
+			get_block(in_channel=64, depth=64, num_units=3),
+			get_block(in_channel=64, depth=128, num_units=8),
+			get_block(in_channel=128, depth=256, num_units=36),
+			get_block(in_channel=256, depth=512, num_units=3)
+		]
+	else:
+		raise ValueError("Invalid number of layers: {}. Must be one of [50, 100, 152]".format(num_layers))
+	return blocks
+class SEModule(Module):
+	def __init__(self, channels, reduction):
+		super(SEModule, self).__init__()
+		self.avg_pool = AdaptiveAvgPool2d(1)
+		self.fc1 = Conv2d(channels, channels // reduction, kernel_size=1, padding=0, bias=False)
+		self.relu = ReLU(inplace=True)
+		self.fc2 = Conv2d(channels // reduction, channels, kernel_size=1, padding=0, bias=False)
+		self.sigmoid = Sigmoid()
+	def forward(self, x):
+		module_input = x
+		x = self.avg_pool(x)
+		x = self.fc1(x)
+		x = self.relu(x)
+		x = self.fc2(x)
+		x = self.sigmoid(x)
+		return module_input * x
+class bottleneck_IR(Module):
+	def __init__(self, in_channel, depth, stride):
+		super(bottleneck_IR, self).__init__()
+		if in_channel == depth:
+			self.shortcut_layer = MaxPool2d(1, stride)
+		else:
+			self.shortcut_layer = Sequential(
+				Conv2d(in_channel, depth, (1, 1), stride, bias=False),
+				BatchNorm2d(depth)
+			)
+		self.res_layer = Sequential(
+			BatchNorm2d(in_channel),
+			Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False), PReLU(depth),
+			Conv2d(depth, depth, (3, 3), stride, 1, bias=False), BatchNorm2d(depth)
+		)
+	def forward(self, x):
+		shortcut = self.shortcut_layer(x)
+		res = self.res_layer(x)
+		return res + shortcut
+class bottleneck_IR_SE(Module):
+	def __init__(self, in_channel, depth, stride):
+		super(bottleneck_IR_SE, self).__init__()
+		if in_channel == depth:
+			self.shortcut_layer = MaxPool2d(1, stride)
+		else:
+			self.shortcut_layer = Sequential(
+				Conv2d(in_channel, depth, (1, 1), stride, bias=False),
+				BatchNorm2d(depth)
+			)
+		self.res_layer = Sequential(
+			BatchNorm2d(in_channel),
+			Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False),
+			PReLU(depth),
+			Conv2d(depth, depth, (3, 3), stride, 1, bias=False),
+			BatchNorm2d(depth),
+			SEModule(depth, 16)
+		)
+	def forward(self, x):
+		shortcut = self.shortcut_layer(x)
+		res = self.res_layer(x)
+		return res + shortcut

models/encoders/model_irse.py ADDED Viewed

	@@ -0,0 +1,84 @@

+from torch.nn import Linear, Conv2d, BatchNorm1d, BatchNorm2d, PReLU, Dropout, Sequential, Module
+from models.encoders.helpers import get_blocks, Flatten, bottleneck_IR, bottleneck_IR_SE, l2_norm
+"""
+Modified Backbone implementation from [TreB1eN](https://github.com/TreB1eN/InsightFace_Pytorch)
+"""
+class Backbone(Module):
+	def __init__(self, input_size, num_layers, mode='ir', drop_ratio=0.4, affine=True):
+		super(Backbone, self).__init__()
+		assert input_size in [112, 224], "input_size should be 112 or 224"
+		assert num_layers in [50, 100, 152], "num_layers should be 50, 100 or 152"
+		assert mode in ['ir', 'ir_se'], "mode should be ir or ir_se"
+		blocks = get_blocks(num_layers)
+		if mode == 'ir':
+			unit_module = bottleneck_IR
+		elif mode == 'ir_se':
+			unit_module = bottleneck_IR_SE
+		self.input_layer = Sequential(Conv2d(3, 64, (3, 3), 1, 1, bias=False),
+									  BatchNorm2d(64),
+									  PReLU(64))
+		if input_size == 112:
+			self.output_layer = Sequential(BatchNorm2d(512),
+			                               Dropout(drop_ratio),
+			                               Flatten(),
+			                               Linear(512 * 7 * 7, 512),
+			                               BatchNorm1d(512, affine=affine))
+		else:
+			self.output_layer = Sequential(BatchNorm2d(512),
+			                               Dropout(drop_ratio),
+			                               Flatten(),
+			                               Linear(512 * 14 * 14, 512),
+			                               BatchNorm1d(512, affine=affine))
+		modules = []
+		for block in blocks:
+			for bottleneck in block:
+				modules.append(unit_module(bottleneck.in_channel,
+										   bottleneck.depth,
+										   bottleneck.stride))
+		self.body = Sequential(*modules)
+	def forward(self, x):
+		x = self.input_layer(x)
+		x = self.body(x)
+		x = self.output_layer(x)
+		return l2_norm(x)
+def IR_50(input_size):
+	"""Constructs a ir-50 model."""
+	model = Backbone(input_size, num_layers=50, mode='ir', drop_ratio=0.4, affine=False)
+	return model
+def IR_101(input_size):
+	"""Constructs a ir-101 model."""
+	model = Backbone(input_size, num_layers=100, mode='ir', drop_ratio=0.4, affine=False)
+	return model
+def IR_152(input_size):
+	"""Constructs a ir-152 model."""
+	model = Backbone(input_size, num_layers=152, mode='ir', drop_ratio=0.4, affine=False)
+	return model
+def IR_SE_50(input_size):
+	"""Constructs a ir_se-50 model."""
+	model = Backbone(input_size, num_layers=50, mode='ir_se', drop_ratio=0.4, affine=False)
+	return model
+def IR_SE_101(input_size):
+	"""Constructs a ir_se-101 model."""
+	model = Backbone(input_size, num_layers=100, mode='ir_se', drop_ratio=0.4, affine=False)
+	return model
+def IR_SE_152(input_size):
+	"""Constructs a ir_se-152 model."""
+	model = Backbone(input_size, num_layers=152, mode='ir_se', drop_ratio=0.4, affine=False)
+	return model

models/encoders/psp_encoders.py ADDED Viewed

	@@ -0,0 +1,357 @@

+import numpy as np
+import torch
+import torch.nn.functional as F
+from torch import nn
+from torch.nn import Linear, Conv2d, BatchNorm2d, PReLU, Sequential, Module
+from models.encoders.helpers import get_blocks, Flatten, bottleneck_IR, bottleneck_IR_SE
+from models.stylegan2.model import EqualLinear
+class GradualStyleBlock(Module):
+    def __init__(self, in_c, out_c, spatial, max_pooling=False):
+        super(GradualStyleBlock, self).__init__()
+        self.out_c = out_c
+        self.spatial = spatial
+        self.max_pooling = max_pooling
+        num_pools = int(np.log2(spatial))
+        modules = []
+        modules += [Conv2d(in_c, out_c, kernel_size=3, stride=2, padding=1),
+                    nn.LeakyReLU()]
+        for i in range(num_pools - 1):
+            modules += [
+                Conv2d(out_c, out_c, kernel_size=3, stride=2, padding=1),
+                nn.LeakyReLU()
+            ]
+        self.convs = nn.Sequential(*modules)
+        self.linear = EqualLinear(out_c, out_c, lr_mul=1)
+    def forward(self, x):
+        x = self.convs(x)
+        # To make E accept more general H*W images, we add global average pooling to
+        # resize all features to 1*1*512 before mapping to latent codes
+        if self.max_pooling:
+            x = F.adaptive_max_pool2d(x, 1) ##### modified
+        else:
+            x = F.adaptive_avg_pool2d(x, 1) ##### modified
+        x = x.view(-1, self.out_c)
+        x = self.linear(x)
+        return x
+class AdaptiveInstanceNorm(nn.Module):
+    def __init__(self, fin, style_dim=512):
+        super().__init__()
+        self.norm = nn.InstanceNorm2d(fin, affine=False)
+        self.style = nn.Linear(style_dim, fin * 2)
+        self.style.bias.data[:fin] = 1
+        self.style.bias.data[fin:] = 0
+    def forward(self, input, style):
+        style = self.style(style).unsqueeze(2).unsqueeze(3)
+        gamma, beta = style.chunk(2, 1)
+        out = self.norm(input)
+        out = gamma * out + beta
+        return out
+class FusionLayer(Module):  ##### modified
+    def __init__(self, inchannel, outchannel, use_skip_torgb=True, use_att=0):
+        super(FusionLayer, self).__init__()
+        self.transform = nn.Sequential(nn.Conv2d(inchannel, outchannel, kernel_size=3, stride=1, padding=1),
+                                      nn.LeakyReLU())
+        self.fusion_out = nn.Conv2d(outchannel*2, outchannel, kernel_size=3, stride=1, padding=1)
+        self.fusion_out.weight.data *= 0.01
+        self.fusion_out.weight[:,0:outchannel,1,1].data += torch.eye(outchannel)
+        self.use_skip_torgb = use_skip_torgb
+        if use_skip_torgb:
+            self.fusion_skip = nn.Conv2d(3+outchannel, 3, kernel_size=3, stride=1, padding=1)
+            self.fusion_skip.weight.data *= 0.01
+            self.fusion_skip.weight[:,0:3,1,1].data += torch.eye(3)
+        self.use_att = use_att
+        if use_att:
+            modules = []
+            modules.append(nn.Linear(512, outchannel))
+            for _ in range(use_att):
+                modules.append(nn.LeakyReLU(negative_slope=0.2, inplace=True))
+                modules.append(nn.Linear(outchannel, outchannel))
+            modules.append(nn.LeakyReLU(negative_slope=0.2, inplace=True))
+            self.linear = Sequential(*modules)
+            self.norm = AdaptiveInstanceNorm(outchannel*2, outchannel)
+            self.conv = nn.Conv2d(outchannel*2, 1, 3, 1, 1, bias=True)
+    def forward(self, feat, out, skip, editing_w=None):
+        x = self.transform(feat)
+        # similar to VToonify, use editing vector as condition
+        # fuse encoder feature and decoder feature with a predicted attention mask m_E
+        # if self.use_att = False, just fuse them with a simple conv layer
+        if self.use_att and editing_w is not None:
+            label = self.linear(editing_w)
+            m_E = (F.relu(self.conv(self.norm(torch.cat([out, abs(out-x)], dim=1), label)))).tanh()
+            x = x * m_E
+        out = self.fusion_out(torch.cat((out, x), dim=1))
+        if self.use_skip_torgb:
+            skip = self.fusion_skip(torch.cat((skip, x), dim=1))
+        return out, skip
+class ResnetBlock(nn.Module):
+    def __init__(self, dim):
+        super(ResnetBlock, self).__init__()
+        self.conv_block = nn.Sequential(Conv2d(dim, dim, 3, 1, 1),
+                                        nn.LeakyReLU(),
+                                        Conv2d(dim, dim, 3, 1, 1))
+        self.relu = nn.LeakyReLU()
+    def forward(self, x):
+        out = x + self.conv_block(x)
+        return self.relu(out)
+# trainable light-weight translation network T
+# for sketch/mask-to-face translation,
+# we add a trainable T to map y to an intermediate domain where E can more easily extract features.
+class ResnetGenerator(nn.Module):
+    def __init__(self, in_channel=19, res_num=2):
+        super(ResnetGenerator, self).__init__()
+        modules = []
+        modules.append(Conv2d(in_channel, 16, 3, 2, 1))
+        modules.append(nn.LeakyReLU())
+        modules.append(Conv2d(16, 16, 3, 2, 1))
+        modules.append(nn.LeakyReLU())
+        for _ in range(res_num):
+            modules.append(ResnetBlock(16))
+        for _ in range(2):
+            modules.append(nn.ConvTranspose2d(16, 16, 3, 2, 1, output_padding=1))
+            modules.append(nn.LeakyReLU())
+        modules.append(Conv2d(16, 64, 3, 1, 1, bias=False))
+        modules.append(BatchNorm2d(64))
+        modules.append(PReLU(64))
+        self.model = Sequential(*modules)
+    def forward(self, input):
+        return self.model(input)
+class GradualStyleEncoder(Module):
+    def __init__(self, num_layers, mode='ir', opts=None):
+        super(GradualStyleEncoder, self).__init__()
+        assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152'
+        assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se'
+        blocks = get_blocks(num_layers)
+        if mode == 'ir':
+            unit_module = bottleneck_IR
+        elif mode == 'ir_se':
+            unit_module = bottleneck_IR_SE
+        # for sketch/mask-to-face translation, add a new network T
+        if opts.input_nc != 3:
+            self.input_label_layer = ResnetGenerator(opts.input_nc, opts.res_num)
+        self.input_layer = Sequential(Conv2d(3, 64, (3, 3), 1, 1, bias=False),
+                                      BatchNorm2d(64),
+                                      PReLU(64))
+        modules = []
+        for block in blocks:
+            for bottleneck in block:
+                modules.append(unit_module(bottleneck.in_channel,
+                                           bottleneck.depth,
+                                           bottleneck.stride))
+        self.body = Sequential(*modules)
+        self.styles = nn.ModuleList()
+        self.style_count = opts.n_styles
+        self.coarse_ind = 3
+        self.middle_ind = 7
+        for i in range(self.style_count):
+            if i < self.coarse_ind:
+                style = GradualStyleBlock(512, 512, 16, 'max_pooling' in opts and opts.max_pooling)
+            elif i < self.middle_ind:
+                style = GradualStyleBlock(512, 512, 32, 'max_pooling' in opts and opts.max_pooling)
+            else:
+                style = GradualStyleBlock(512, 512, 64, 'max_pooling' in opts and opts.max_pooling)
+            self.styles.append(style)
+        self.latlayer1 = nn.Conv2d(256, 512, kernel_size=1, stride=1, padding=0)
+        self.latlayer2 = nn.Conv2d(128, 512, kernel_size=1, stride=1, padding=0)
+        # we concatenate pSp features in the middle layers and
+        # add a convolution layer to map the concatenated features to the first-layer input feature f of G.
+        self.featlayer = nn.Conv2d(768, 512, kernel_size=1, stride=1, padding=0) ##### modified
+        self.skiplayer = nn.Conv2d(768, 3, kernel_size=1, stride=1, padding=0) ##### modified
+        # skip connection
+        if 'use_skip' in opts and opts.use_skip: ##### modified
+            self.fusion = nn.ModuleList()
+            channels = [[256,512], [256,512], [256,512], [256,512], [128,512], [64,256], [64,128]]
+            # opts.skip_max_layer: how many layers are skipped to the decoder
+            for inc, outc in channels[:max(1, min(7, opts.skip_max_layer))]: # from 4 to 256
+                self.fusion.append(FusionLayer(inc, outc, opts.use_skip_torgb, opts.use_att))
+    def _upsample_add(self, x, y):
+        '''Upsample and add two feature maps.
+        Args:
+          x: (Variable) top feature map to be upsampled.
+          y: (Variable) lateral feature map.
+        Returns:
+          (Variable) added feature map.
+        Note in PyTorch, when input size is odd, the upsampled feature map
+        with `F.upsample(..., scale_factor=2, mode='nearest')`
+        maybe not equal to the lateral feature map size.
+        e.g.
+        original input size: [N,_,15,15] ->
+        conv2d feature map size: [N,_,8,8] ->
+        upsampled feature map size: [N,_,16,16]
+        So we choose bilinear upsample which supports arbitrary output sizes.
+        '''
+        _, _, H, W = y.size()
+        return F.interpolate(x, size=(H, W), mode='bilinear', align_corners=True) + y
+    # return_feat: return f
+    # return_full: return f and the skipped encoder features
+    # return [out, feats]
+    # out is the style latent code w+
+    # feats[0] is f for the 1st conv layer, feats[1] is f for the 1st torgb layer
+    # feats[2-8] is the skipped encoder features
+    def forward(self, x, return_feat=False, return_full=False): ##### modified
+        if x.shape[1] != 3:
+            x = self.input_label_layer(x)
+        else:
+            x = self.input_layer(x)
+        c256 = x ##### modified
+        latents = []
+        modulelist = list(self.body._modules.values())
+        for i, l in enumerate(modulelist):
+            x = l(x)
+            if i == 2:    ##### modified
+                c128 = x
+            elif i == 6:
+                c1 = x
+            elif i == 10: ##### modified
+                c21 = x   ##### modified
+            elif i == 15: ##### modified
+                c22 = x   ##### modified
+            elif i == 20:
+                c2 = x
+            elif i == 23:
+                c3 = x
+        for j in range(self.coarse_ind):
+            latents.append(self.styles[j](c3))
+        p2 = self._upsample_add(c3, self.latlayer1(c2))
+        for j in range(self.coarse_ind, self.middle_ind):
+            latents.append(self.styles[j](p2))
+        p1 = self._upsample_add(p2, self.latlayer2(c1))
+        for j in range(self.middle_ind, self.style_count):
+            latents.append(self.styles[j](p1))
+        out = torch.stack(latents, dim=1)
+        if not return_feat:
+            return out
+        feats = [self.featlayer(torch.cat((c21, c22, c2), dim=1)), self.skiplayer(torch.cat((c21, c22, c2), dim=1))]
+        if return_full: ##### modified
+            feats += [c2, c2, c22, c21, c1, c128, c256]
+        return out, feats
+    # only compute the first-layer feature f
+    # E_F in the paper
+    def get_feat(self, x): ##### modified
+        # for sketch/mask-to-face translation
+        # use a trainable light-weight translation network T
+        if x.shape[1] != 3:
+            x = self.input_label_layer(x)
+        else:
+            x = self.input_layer(x)
+        latents = []
+        modulelist = list(self.body._modules.values())
+        for i, l in enumerate(modulelist):
+            x = l(x)
+            if i == 10: ##### modified
+                c21 = x   ##### modified
+            elif i == 15: ##### modified
+                c22 = x   ##### modified
+            elif i == 20:
+                c2 = x
+                break
+        return self.featlayer(torch.cat((c21, c22, c2), dim=1))
+class BackboneEncoderUsingLastLayerIntoW(Module):
+    def __init__(self, num_layers, mode='ir', opts=None):
+        super(BackboneEncoderUsingLastLayerIntoW, self).__init__()
+        print('Using BackboneEncoderUsingLastLayerIntoW')
+        assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152'
+        assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se'
+        blocks = get_blocks(num_layers)
+        if mode == 'ir':
+            unit_module = bottleneck_IR
+        elif mode == 'ir_se':
+            unit_module = bottleneck_IR_SE
+        self.input_layer = Sequential(Conv2d(opts.input_nc, 64, (3, 3), 1, 1, bias=False),
+                                      BatchNorm2d(64),
+                                      PReLU(64))
+        self.output_pool = torch.nn.AdaptiveAvgPool2d((1, 1))
+        self.linear = EqualLinear(512, 512, lr_mul=1)
+        modules = []
+        for block in blocks:
+            for bottleneck in block:
+                modules.append(unit_module(bottleneck.in_channel,
+                                           bottleneck.depth,
+                                           bottleneck.stride))
+        self.body = Sequential(*modules)
+    def forward(self, x):
+        x = self.input_layer(x)
+        x = self.body(x)
+        x = self.output_pool(x)
+        x = x.view(-1, 512)
+        x = self.linear(x)
+        return x
+class BackboneEncoderUsingLastLayerIntoWPlus(Module):
+    def __init__(self, num_layers, mode='ir', opts=None):
+        super(BackboneEncoderUsingLastLayerIntoWPlus, self).__init__()
+        print('Using BackboneEncoderUsingLastLayerIntoWPlus')
+        assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152'
+        assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se'
+        blocks = get_blocks(num_layers)
+        if mode == 'ir':
+            unit_module = bottleneck_IR
+        elif mode == 'ir_se':
+            unit_module = bottleneck_IR_SE
+        self.n_styles = opts.n_styles
+        self.input_layer = Sequential(Conv2d(opts.input_nc, 64, (3, 3), 1, 1, bias=False),
+                                      BatchNorm2d(64),
+                                      PReLU(64))
+        self.output_layer_2 = Sequential(BatchNorm2d(512),
+                                         torch.nn.AdaptiveAvgPool2d((7, 7)),
+                                         Flatten(),
+                                         Linear(512 * 7 * 7, 512))
+        self.linear = EqualLinear(512, 512 * self.n_styles, lr_mul=1)
+        modules = []
+        for block in blocks:
+            for bottleneck in block:
+                modules.append(unit_module(bottleneck.in_channel,
+                                           bottleneck.depth,
+                                           bottleneck.stride))
+        self.body = Sequential(*modules)
+    def forward(self, x):
+        x = self.input_layer(x)
+        x = self.body(x)
+        x = self.output_layer_2(x)
+        x = self.linear(x)
+        x = x.view(-1, self.n_styles, 512)
+        return x

models/mtcnn/__init__.py ADDED Viewed

File without changes

models/mtcnn/mtcnn.py ADDED Viewed

	@@ -0,0 +1,156 @@

+import numpy as np
+import torch
+from PIL import Image
+from models.mtcnn.mtcnn_pytorch.src.get_nets import PNet, RNet, ONet
+from models.mtcnn.mtcnn_pytorch.src.box_utils import nms, calibrate_box, get_image_boxes, convert_to_square
+from models.mtcnn.mtcnn_pytorch.src.first_stage import run_first_stage
+from models.mtcnn.mtcnn_pytorch.src.align_trans import get_reference_facial_points, warp_and_crop_face
+device = 'cuda:0'
+class MTCNN():
+    def __init__(self):
+        print(device)
+        self.pnet = PNet().to(device)
+        self.rnet = RNet().to(device)
+        self.onet = ONet().to(device)
+        self.pnet.eval()
+        self.rnet.eval()
+        self.onet.eval()
+        self.refrence = get_reference_facial_points(default_square=True)
+    def align(self, img):
+        _, landmarks = self.detect_faces(img)
+        if len(landmarks) == 0:
+            return None, None
+        facial5points = [[landmarks[0][j], landmarks[0][j + 5]] for j in range(5)]
+        warped_face, tfm = warp_and_crop_face(np.array(img), facial5points, self.refrence, crop_size=(112, 112))
+        return Image.fromarray(warped_face), tfm
+    def align_multi(self, img, limit=None, min_face_size=30.0):
+        boxes, landmarks = self.detect_faces(img, min_face_size)
+        if limit:
+            boxes = boxes[:limit]
+            landmarks = landmarks[:limit]
+        faces = []
+        tfms = []
+        for landmark in landmarks:
+            facial5points = [[landmark[j], landmark[j + 5]] for j in range(5)]
+            warped_face, tfm = warp_and_crop_face(np.array(img), facial5points, self.refrence, crop_size=(112, 112))
+            faces.append(Image.fromarray(warped_face))
+            tfms.append(tfm)
+        return boxes, faces, tfms
+    def detect_faces(self, image, min_face_size=20.0,
+                     thresholds=[0.15, 0.25, 0.35],
+                     nms_thresholds=[0.7, 0.7, 0.7]):
+        """
+        Arguments:
+            image: an instance of PIL.Image.
+            min_face_size: a float number.
+            thresholds: a list of length 3.
+            nms_thresholds: a list of length 3.
+        Returns:
+            two float numpy arrays of shapes [n_boxes, 4] and [n_boxes, 10],
+            bounding boxes and facial landmarks.
+        """
+        # BUILD AN IMAGE PYRAMID
+        width, height = image.size
+        min_length = min(height, width)
+        min_detection_size = 12
+        factor = 0.707  # sqrt(0.5)
+        # scales for scaling the image
+        scales = []
+        # scales the image so that
+        # minimum size that we can detect equals to
+        # minimum face size that we want to detect
+        m = min_detection_size / min_face_size
+        min_length *= m
+        factor_count = 0
+        while min_length > min_detection_size:
+            scales.append(m * factor ** factor_count)
+            min_length *= factor
+            factor_count += 1
+        # STAGE 1
+        # it will be returned
+        bounding_boxes = []
+        with torch.no_grad():
+            # run P-Net on different scales
+            for s in scales:
+                boxes = run_first_stage(image, self.pnet, scale=s, threshold=thresholds[0])
+                bounding_boxes.append(boxes)
+            # collect boxes (and offsets, and scores) from different scales
+            bounding_boxes = [i for i in bounding_boxes if i is not None]
+            bounding_boxes = np.vstack(bounding_boxes)
+            keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0])
+            bounding_boxes = bounding_boxes[keep]
+            # use offsets predicted by pnet to transform bounding boxes
+            bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:])
+            # shape [n_boxes, 5]
+            bounding_boxes = convert_to_square(bounding_boxes)
+            bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
+            # STAGE 2
+            img_boxes = get_image_boxes(bounding_boxes, image, size=24)
+            img_boxes = torch.FloatTensor(img_boxes).to(device)
+            output = self.rnet(img_boxes)
+            offsets = output[0].cpu().data.numpy()  # shape [n_boxes, 4]
+            probs = output[1].cpu().data.numpy()  # shape [n_boxes, 2]
+            keep = np.where(probs[:, 1] > thresholds[1])[0]
+            bounding_boxes = bounding_boxes[keep]
+            bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,))
+            offsets = offsets[keep]
+            keep = nms(bounding_boxes, nms_thresholds[1])
+            bounding_boxes = bounding_boxes[keep]
+            bounding_boxes = calibrate_box(bounding_boxes, offsets[keep])
+            bounding_boxes = convert_to_square(bounding_boxes)
+            bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
+            # STAGE 3
+            img_boxes = get_image_boxes(bounding_boxes, image, size=48)
+            if len(img_boxes) == 0:
+                return [], []
+            img_boxes = torch.FloatTensor(img_boxes).to(device)
+            output = self.onet(img_boxes)
+            landmarks = output[0].cpu().data.numpy()  # shape [n_boxes, 10]
+            offsets = output[1].cpu().data.numpy()  # shape [n_boxes, 4]
+            probs = output[2].cpu().data.numpy()  # shape [n_boxes, 2]
+            keep = np.where(probs[:, 1] > thresholds[2])[0]
+            bounding_boxes = bounding_boxes[keep]
+            bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,))
+            offsets = offsets[keep]
+            landmarks = landmarks[keep]
+            # compute landmark points
+            width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0
+            height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0
+            xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1]
+            landmarks[:, 0:5] = np.expand_dims(xmin, 1) + np.expand_dims(width, 1) * landmarks[:, 0:5]
+            landmarks[:, 5:10] = np.expand_dims(ymin, 1) + np.expand_dims(height, 1) * landmarks[:, 5:10]
+            bounding_boxes = calibrate_box(bounding_boxes, offsets)
+            keep = nms(bounding_boxes, nms_thresholds[2], mode='min')
+            bounding_boxes = bounding_boxes[keep]
+            landmarks = landmarks[keep]
+        return bounding_boxes, landmarks

models/mtcnn/mtcnn_pytorch/__init__.py ADDED Viewed

File without changes

models/mtcnn/mtcnn_pytorch/src/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .visualization_utils import show_bboxes
2	+ from .detector import detect_faces

models/mtcnn/mtcnn_pytorch/src/align_trans.py ADDED Viewed

	@@ -0,0 +1,304 @@

+# -*- coding: utf-8 -*-
+"""
+Created on Mon Apr 24 15:43:29 2017
+@author: zhaoy
+"""
+import numpy as np
+import cv2
+# from scipy.linalg import lstsq
+# from scipy.ndimage import geometric_transform  # , map_coordinates
+from models.mtcnn.mtcnn_pytorch.src.matlab_cp2tform import get_similarity_transform_for_cv2
+# reference facial points, a list of coordinates (x,y)
+REFERENCE_FACIAL_POINTS = [
+    [30.29459953, 51.69630051],
+    [65.53179932, 51.50139999],
+    [48.02519989, 71.73660278],
+    [33.54930115, 92.3655014],
+    [62.72990036, 92.20410156]
+]
+DEFAULT_CROP_SIZE = (96, 112)
+class FaceWarpException(Exception):
+    def __str__(self):
+        return 'In File {}:{}'.format(
+            __file__, super.__str__(self))
+def get_reference_facial_points(output_size=None,
+                                inner_padding_factor=0.0,
+                                outer_padding=(0, 0),
+                                default_square=False):
+    """
+    Function:
+    ----------
+        get reference 5 key points according to crop settings:
+        0. Set default crop_size:
+            if default_square:
+                crop_size = (112, 112)
+            else:
+                crop_size = (96, 112)
+        1. Pad the crop_size by inner_padding_factor in each side;
+        2. Resize crop_size into (output_size - outer_padding*2),
+            pad into output_size with outer_padding;
+        3. Output reference_5point;
+    Parameters:
+    ----------
+        @output_size: (w, h) or None
+            size of aligned face image
+        @inner_padding_factor: (w_factor, h_factor)
+            padding factor for inner (w, h)
+        @outer_padding: (w_pad, h_pad)
+            each row is a pair of coordinates (x, y)
+        @default_square: True or False
+            if True:
+                default crop_size = (112, 112)
+            else:
+                default crop_size = (96, 112);
+        !!! make sure, if output_size is not None:
+                (output_size - outer_padding)
+                = some_scale * (default crop_size * (1.0 + inner_padding_factor))
+    Returns:
+    ----------
+        @reference_5point: 5x2 np.array
+            each row is a pair of transformed coordinates (x, y)
+    """
+    # print('\n===> get_reference_facial_points():')
+    # print('---> Params:')
+    # print('            output_size: ', output_size)
+    # print('            inner_padding_factor: ', inner_padding_factor)
+    # print('            outer_padding:', outer_padding)
+    # print('            default_square: ', default_square)
+    tmp_5pts = np.array(REFERENCE_FACIAL_POINTS)
+    tmp_crop_size = np.array(DEFAULT_CROP_SIZE)
+    # 0) make the inner region a square
+    if default_square:
+        size_diff = max(tmp_crop_size) - tmp_crop_size
+        tmp_5pts += size_diff / 2
+        tmp_crop_size += size_diff
+    # print('---> default:')
+    # print('              crop_size = ', tmp_crop_size)
+    # print('              reference_5pts = ', tmp_5pts)
+    if (output_size and
+            output_size[0] == tmp_crop_size[0] and
+            output_size[1] == tmp_crop_size[1]):
+        # print('output_size == DEFAULT_CROP_SIZE {}: return default reference points'.format(tmp_crop_size))
+        return tmp_5pts
+    if (inner_padding_factor == 0 and
+            outer_padding == (0, 0)):
+        if output_size is None:
+            # print('No paddings to do: return default reference points')
+            return tmp_5pts
+        else:
+            raise FaceWarpException(
+                'No paddings to do, output_size must be None or {}'.format(tmp_crop_size))
+    # check output size
+    if not (0 <= inner_padding_factor <= 1.0):
+        raise FaceWarpException('Not (0 <= inner_padding_factor <= 1.0)')
+    if ((inner_padding_factor > 0 or outer_padding[0] > 0 or outer_padding[1] > 0)
+            and output_size is None):
+        output_size = tmp_crop_size * \
+                      (1 + inner_padding_factor * 2).astype(np.int32)
+        output_size += np.array(outer_padding)
+        # print('              deduced from paddings, output_size = ', output_size)
+    if not (outer_padding[0] < output_size[0]
+            and outer_padding[1] < output_size[1]):
+        raise FaceWarpException('Not (outer_padding[0] < output_size[0]'
+                                'and outer_padding[1] < output_size[1])')
+    # 1) pad the inner region according inner_padding_factor
+    # print('---> STEP1: pad the inner region according inner_padding_factor')
+    if inner_padding_factor > 0:
+        size_diff = tmp_crop_size * inner_padding_factor * 2
+        tmp_5pts += size_diff / 2
+        tmp_crop_size += np.round(size_diff).astype(np.int32)
+    # print('              crop_size = ', tmp_crop_size)
+    # print('              reference_5pts = ', tmp_5pts)
+    # 2) resize the padded inner region
+    # print('---> STEP2: resize the padded inner region')
+    size_bf_outer_pad = np.array(output_size) - np.array(outer_padding) * 2
+    # print('              crop_size = ', tmp_crop_size)
+    # print('              size_bf_outer_pad = ', size_bf_outer_pad)
+    if size_bf_outer_pad[0] * tmp_crop_size[1] != size_bf_outer_pad[1] * tmp_crop_size[0]:
+        raise FaceWarpException('Must have (output_size - outer_padding)'
+                                '= some_scale * (crop_size * (1.0 + inner_padding_factor)')
+    scale_factor = size_bf_outer_pad[0].astype(np.float32) / tmp_crop_size[0]
+    # print('              resize scale_factor = ', scale_factor)
+    tmp_5pts = tmp_5pts * scale_factor
+    #    size_diff = tmp_crop_size * (scale_factor - min(scale_factor))
+    #    tmp_5pts = tmp_5pts + size_diff / 2
+    tmp_crop_size = size_bf_outer_pad
+    # print('              crop_size = ', tmp_crop_size)
+    # print('              reference_5pts = ', tmp_5pts)
+    # 3) add outer_padding to make output_size
+    reference_5point = tmp_5pts + np.array(outer_padding)
+    tmp_crop_size = output_size
+    # print('---> STEP3: add outer_padding to make output_size')
+    # print('              crop_size = ', tmp_crop_size)
+    # print('              reference_5pts = ', tmp_5pts)
+    # print('===> end get_reference_facial_points\n')
+    return reference_5point
+def get_affine_transform_matrix(src_pts, dst_pts):
+    """
+    Function:
+    ----------
+        get affine transform matrix 'tfm' from src_pts to dst_pts
+    Parameters:
+    ----------
+        @src_pts: Kx2 np.array
+            source points matrix, each row is a pair of coordinates (x, y)
+        @dst_pts: Kx2 np.array
+            destination points matrix, each row is a pair of coordinates (x, y)
+    Returns:
+    ----------
+        @tfm: 2x3 np.array
+            transform matrix from src_pts to dst_pts
+    """
+    tfm = np.float32([[1, 0, 0], [0, 1, 0]])
+    n_pts = src_pts.shape[0]
+    ones = np.ones((n_pts, 1), src_pts.dtype)
+    src_pts_ = np.hstack([src_pts, ones])
+    dst_pts_ = np.hstack([dst_pts, ones])
+    #    #print(('src_pts_:\n' + str(src_pts_))
+    #    #print(('dst_pts_:\n' + str(dst_pts_))
+    A, res, rank, s = np.linalg.lstsq(src_pts_, dst_pts_)
+    #    #print(('np.linalg.lstsq return A: \n' + str(A))
+    #    #print(('np.linalg.lstsq return res: \n' + str(res))
+    #    #print(('np.linalg.lstsq return rank: \n' + str(rank))
+    #    #print(('np.linalg.lstsq return s: \n' + str(s))
+    if rank == 3:
+        tfm = np.float32([
+            [A[0, 0], A[1, 0], A[2, 0]],
+            [A[0, 1], A[1, 1], A[2, 1]]
+        ])
+    elif rank == 2:
+        tfm = np.float32([
+            [A[0, 0], A[1, 0], 0],
+            [A[0, 1], A[1, 1], 0]
+        ])
+    return tfm
+def warp_and_crop_face(src_img,
+                       facial_pts,
+                       reference_pts=None,
+                       crop_size=(96, 112),
+                       align_type='smilarity'):
+    """
+    Function:
+    ----------
+        apply affine transform 'trans' to uv
+    Parameters:
+    ----------
+        @src_img: 3x3 np.array
+            input image
+        @facial_pts: could be
+            1)a list of K coordinates (x,y)
+        or
+            2) Kx2 or 2xK np.array
+            each row or col is a pair of coordinates (x, y)
+        @reference_pts: could be
+            1) a list of K coordinates (x,y)
+        or
+            2) Kx2 or 2xK np.array
+            each row or col is a pair of coordinates (x, y)
+        or
+            3) None
+            if None, use default reference facial points
+        @crop_size: (w, h)
+            output face image size
+        @align_type: transform type, could be one of
+            1) 'similarity': use similarity transform
+            2) 'cv2_affine': use the first 3 points to do affine transform,
+                    by calling cv2.getAffineTransform()
+            3) 'affine': use all points to do affine transform
+    Returns:
+    ----------
+        @face_img: output face image with size (w, h) = @crop_size
+    """
+    if reference_pts is None:
+        if crop_size[0] == 96 and crop_size[1] == 112:
+            reference_pts = REFERENCE_FACIAL_POINTS
+        else:
+            default_square = False
+            inner_padding_factor = 0
+            outer_padding = (0, 0)
+            output_size = crop_size
+            reference_pts = get_reference_facial_points(output_size,
+                                                        inner_padding_factor,
+                                                        outer_padding,
+                                                        default_square)
+    ref_pts = np.float32(reference_pts)
+    ref_pts_shp = ref_pts.shape
+    if max(ref_pts_shp) < 3 or min(ref_pts_shp) != 2:
+        raise FaceWarpException(
+            'reference_pts.shape must be (K,2) or (2,K) and K>2')
+    if ref_pts_shp[0] == 2:
+        ref_pts = ref_pts.T
+    src_pts = np.float32(facial_pts)
+    src_pts_shp = src_pts.shape
+    if max(src_pts_shp) < 3 or min(src_pts_shp) != 2:
+        raise FaceWarpException(
+            'facial_pts.shape must be (K,2) or (2,K) and K>2')
+    if src_pts_shp[0] == 2:
+        src_pts = src_pts.T
+    #    #print('--->src_pts:\n', src_pts
+    #    #print('--->ref_pts\n', ref_pts
+    if src_pts.shape != ref_pts.shape:
+        raise FaceWarpException(
+            'facial_pts and reference_pts must have the same shape')
+    if align_type is 'cv2_affine':
+        tfm = cv2.getAffineTransform(src_pts[0:3], ref_pts[0:3])
+    #        #print(('cv2.getAffineTransform() returns tfm=\n' + str(tfm))
+    elif align_type is 'affine':
+        tfm = get_affine_transform_matrix(src_pts, ref_pts)
+    #        #print(('get_affine_transform_matrix() returns tfm=\n' + str(tfm))
+    else:
+        tfm = get_similarity_transform_for_cv2(src_pts, ref_pts)
+    #        #print(('get_similarity_transform_for_cv2() returns tfm=\n' + str(tfm))
+    #    #print('--->Transform matrix: '
+    #    #print(('type(tfm):' + str(type(tfm)))
+    #    #print(('tfm.dtype:' + str(tfm.dtype))
+    #    #print( tfm
+    face_img = cv2.warpAffine(src_img, tfm, (crop_size[0], crop_size[1]))
+    return face_img, tfm

models/mtcnn/mtcnn_pytorch/src/box_utils.py ADDED Viewed

	@@ -0,0 +1,238 @@

+import numpy as np
+from PIL import Image
+def nms(boxes, overlap_threshold=0.5, mode='union'):
+    """Non-maximum suppression.
+    Arguments:
+        boxes: a float numpy array of shape [n, 5],
+            where each row is (xmin, ymin, xmax, ymax, score).
+        overlap_threshold: a float number.
+        mode: 'union' or 'min'.
+    Returns:
+        list with indices of the selected boxes
+    """
+    # if there are no boxes, return the empty list
+    if len(boxes) == 0:
+        return []
+    # list of picked indices
+    pick = []
+    # grab the coordinates of the bounding boxes
+    x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)]
+    area = (x2 - x1 + 1.0) * (y2 - y1 + 1.0)
+    ids = np.argsort(score)  # in increasing order
+    while len(ids) > 0:
+        # grab index of the largest value
+        last = len(ids) - 1
+        i = ids[last]
+        pick.append(i)
+        # compute intersections
+        # of the box with the largest score
+        # with the rest of boxes
+        # left top corner of intersection boxes
+        ix1 = np.maximum(x1[i], x1[ids[:last]])
+        iy1 = np.maximum(y1[i], y1[ids[:last]])
+        # right bottom corner of intersection boxes
+        ix2 = np.minimum(x2[i], x2[ids[:last]])
+        iy2 = np.minimum(y2[i], y2[ids[:last]])
+        # width and height of intersection boxes
+        w = np.maximum(0.0, ix2 - ix1 + 1.0)
+        h = np.maximum(0.0, iy2 - iy1 + 1.0)
+        # intersections' areas
+        inter = w * h
+        if mode == 'min':
+            overlap = inter / np.minimum(area[i], area[ids[:last]])
+        elif mode == 'union':
+            # intersection over union (IoU)
+            overlap = inter / (area[i] + area[ids[:last]] - inter)
+        # delete all boxes where overlap is too big
+        ids = np.delete(
+            ids,
+            np.concatenate([[last], np.where(overlap > overlap_threshold)[0]])
+        )
+    return pick
+def convert_to_square(bboxes):
+    """Convert bounding boxes to a square form.
+    Arguments:
+        bboxes: a float numpy array of shape [n, 5].
+    Returns:
+        a float numpy array of shape [n, 5],
+            squared bounding boxes.
+    """
+    square_bboxes = np.zeros_like(bboxes)
+    x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
+    h = y2 - y1 + 1.0
+    w = x2 - x1 + 1.0
+    max_side = np.maximum(h, w)
+    square_bboxes[:, 0] = x1 + w * 0.5 - max_side * 0.5
+    square_bboxes[:, 1] = y1 + h * 0.5 - max_side * 0.5
+    square_bboxes[:, 2] = square_bboxes[:, 0] + max_side - 1.0
+    square_bboxes[:, 3] = square_bboxes[:, 1] + max_side - 1.0
+    return square_bboxes
+def calibrate_box(bboxes, offsets):
+    """Transform bounding boxes to be more like true bounding boxes.
+    'offsets' is one of the outputs of the nets.
+    Arguments:
+        bboxes: a float numpy array of shape [n, 5].
+        offsets: a float numpy array of shape [n, 4].
+    Returns:
+        a float numpy array of shape [n, 5].
+    """
+    x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
+    w = x2 - x1 + 1.0
+    h = y2 - y1 + 1.0
+    w = np.expand_dims(w, 1)
+    h = np.expand_dims(h, 1)
+    # this is what happening here:
+    # tx1, ty1, tx2, ty2 = [offsets[:, i] for i in range(4)]
+    # x1_true = x1 + tx1*w
+    # y1_true = y1 + ty1*h
+    # x2_true = x2 + tx2*w
+    # y2_true = y2 + ty2*h
+    # below is just more compact form of this
+    # are offsets always such that
+    # x1 < x2 and y1 < y2 ?
+    translation = np.hstack([w, h, w, h]) * offsets
+    bboxes[:, 0:4] = bboxes[:, 0:4] + translation
+    return bboxes
+def get_image_boxes(bounding_boxes, img, size=24):
+    """Cut out boxes from the image.
+    Arguments:
+        bounding_boxes: a float numpy array of shape [n, 5].
+        img: an instance of PIL.Image.
+        size: an integer, size of cutouts.
+    Returns:
+        a float numpy array of shape [n, 3, size, size].
+    """
+    num_boxes = len(bounding_boxes)
+    width, height = img.size
+    [dy, edy, dx, edx, y, ey, x, ex, w, h] = correct_bboxes(bounding_boxes, width, height)
+    img_boxes = np.zeros((num_boxes, 3, size, size), 'float32')
+    for i in range(num_boxes):
+        img_box = np.zeros((h[i], w[i], 3), 'uint8')
+        img_array = np.asarray(img, 'uint8')
+        img_box[dy[i]:(edy[i] + 1), dx[i]:(edx[i] + 1), :] = \
+            img_array[y[i]:(ey[i] + 1), x[i]:(ex[i] + 1), :]
+        # resize
+        img_box = Image.fromarray(img_box)
+        img_box = img_box.resize((size, size), Image.BILINEAR)
+        img_box = np.asarray(img_box, 'float32')
+        img_boxes[i, :, :, :] = _preprocess(img_box)
+    return img_boxes
+def correct_bboxes(bboxes, width, height):
+    """Crop boxes that are too big and get coordinates
+    with respect to cutouts.
+    Arguments:
+        bboxes: a float numpy array of shape [n, 5],
+            where each row is (xmin, ymin, xmax, ymax, score).
+        width: a float number.
+        height: a float number.
+    Returns:
+        dy, dx, edy, edx: a int numpy arrays of shape [n],
+            coordinates of the boxes with respect to the cutouts.
+        y, x, ey, ex: a int numpy arrays of shape [n],
+            corrected ymin, xmin, ymax, xmax.
+        h, w: a int numpy arrays of shape [n],
+            just heights and widths of boxes.
+        in the following order:
+            [dy, edy, dx, edx, y, ey, x, ex, w, h].
+    """
+    x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
+    w, h = x2 - x1 + 1.0, y2 - y1 + 1.0
+    num_boxes = bboxes.shape[0]
+    # 'e' stands for end
+    # (x, y) -> (ex, ey)
+    x, y, ex, ey = x1, y1, x2, y2
+    # we need to cut out a box from the image.
+    # (x, y, ex, ey) are corrected coordinates of the box
+    # in the image.
+    # (dx, dy, edx, edy) are coordinates of the box in the cutout
+    # from the image.
+    dx, dy = np.zeros((num_boxes,)), np.zeros((num_boxes,))
+    edx, edy = w.copy() - 1.0, h.copy() - 1.0
+    # if box's bottom right corner is too far right
+    ind = np.where(ex > width - 1.0)[0]
+    edx[ind] = w[ind] + width - 2.0 - ex[ind]
+    ex[ind] = width - 1.0
+    # if box's bottom right corner is too low
+    ind = np.where(ey > height - 1.0)[0]
+    edy[ind] = h[ind] + height - 2.0 - ey[ind]
+    ey[ind] = height - 1.0
+    # if box's top left corner is too far left
+    ind = np.where(x < 0.0)[0]
+    dx[ind] = 0.0 - x[ind]
+    x[ind] = 0.0
+    # if box's top left corner is too high
+    ind = np.where(y < 0.0)[0]
+    dy[ind] = 0.0 - y[ind]
+    y[ind] = 0.0
+    return_list = [dy, edy, dx, edx, y, ey, x, ex, w, h]
+    return_list = [i.astype('int32') for i in return_list]
+    return return_list
+def _preprocess(img):
+    """Preprocessing step before feeding the network.
+    Arguments:
+        img: a float numpy array of shape [h, w, c].
+    Returns:
+        a float numpy array of shape [1, c, h, w].
+    """
+    img = img.transpose((2, 0, 1))
+    img = np.expand_dims(img, 0)
+    img = (img - 127.5) * 0.0078125
+    return img

models/mtcnn/mtcnn_pytorch/src/detector.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import numpy as np
+import torch
+from torch.autograd import Variable
+from .get_nets import PNet, RNet, ONet
+from .box_utils import nms, calibrate_box, get_image_boxes, convert_to_square
+from .first_stage import run_first_stage
+def detect_faces(image, min_face_size=20.0,
+                 thresholds=[0.6, 0.7, 0.8],
+                 nms_thresholds=[0.7, 0.7, 0.7]):
+    """
+    Arguments:
+        image: an instance of PIL.Image.
+        min_face_size: a float number.
+        thresholds: a list of length 3.
+        nms_thresholds: a list of length 3.
+    Returns:
+        two float numpy arrays of shapes [n_boxes, 4] and [n_boxes, 10],
+        bounding boxes and facial landmarks.
+    """
+    # LOAD MODELS
+    pnet = PNet()
+    rnet = RNet()
+    onet = ONet()
+    onet.eval()
+    # BUILD AN IMAGE PYRAMID
+    width, height = image.size
+    min_length = min(height, width)
+    min_detection_size = 12
+    factor = 0.707  # sqrt(0.5)
+    # scales for scaling the image
+    scales = []
+    # scales the image so that
+    # minimum size that we can detect equals to
+    # minimum face size that we want to detect
+    m = min_detection_size / min_face_size
+    min_length *= m
+    factor_count = 0
+    while min_length > min_detection_size:
+        scales.append(m * factor ** factor_count)
+        min_length *= factor
+        factor_count += 1
+    # STAGE 1
+    # it will be returned
+    bounding_boxes = []
+    with torch.no_grad():
+        # run P-Net on different scales
+        for s in scales:
+            boxes = run_first_stage(image, pnet, scale=s, threshold=thresholds[0])
+            bounding_boxes.append(boxes)
+        # collect boxes (and offsets, and scores) from different scales
+        bounding_boxes = [i for i in bounding_boxes if i is not None]
+        bounding_boxes = np.vstack(bounding_boxes)
+        keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0])
+        bounding_boxes = bounding_boxes[keep]
+        # use offsets predicted by pnet to transform bounding boxes
+        bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:])
+        # shape [n_boxes, 5]
+        bounding_boxes = convert_to_square(bounding_boxes)
+        bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
+        # STAGE 2
+        img_boxes = get_image_boxes(bounding_boxes, image, size=24)
+        img_boxes = torch.FloatTensor(img_boxes)
+        output = rnet(img_boxes)
+        offsets = output[0].data.numpy()  # shape [n_boxes, 4]
+        probs = output[1].data.numpy()  # shape [n_boxes, 2]
+        keep = np.where(probs[:, 1] > thresholds[1])[0]
+        bounding_boxes = bounding_boxes[keep]
+        bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,))
+        offsets = offsets[keep]
+        keep = nms(bounding_boxes, nms_thresholds[1])
+        bounding_boxes = bounding_boxes[keep]
+        bounding_boxes = calibrate_box(bounding_boxes, offsets[keep])
+        bounding_boxes = convert_to_square(bounding_boxes)
+        bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
+        # STAGE 3
+        img_boxes = get_image_boxes(bounding_boxes, image, size=48)
+        if len(img_boxes) == 0:
+            return [], []
+        img_boxes = torch.FloatTensor(img_boxes)
+        output = onet(img_boxes)
+        landmarks = output[0].data.numpy()  # shape [n_boxes, 10]
+        offsets = output[1].data.numpy()  # shape [n_boxes, 4]
+        probs = output[2].data.numpy()  # shape [n_boxes, 2]
+        keep = np.where(probs[:, 1] > thresholds[2])[0]
+        bounding_boxes = bounding_boxes[keep]
+        bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,))
+        offsets = offsets[keep]
+        landmarks = landmarks[keep]
+        # compute landmark points
+        width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0
+        height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0
+        xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1]
+        landmarks[:, 0:5] = np.expand_dims(xmin, 1) + np.expand_dims(width, 1) * landmarks[:, 0:5]
+        landmarks[:, 5:10] = np.expand_dims(ymin, 1) + np.expand_dims(height, 1) * landmarks[:, 5:10]
+        bounding_boxes = calibrate_box(bounding_boxes, offsets)
+        keep = nms(bounding_boxes, nms_thresholds[2], mode='min')
+        bounding_boxes = bounding_boxes[keep]
+        landmarks = landmarks[keep]
+    return bounding_boxes, landmarks

models/mtcnn/mtcnn_pytorch/src/first_stage.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import torch
+from torch.autograd import Variable
+import math
+from PIL import Image
+import numpy as np
+from .box_utils import nms, _preprocess
+# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+device = 'cuda:0'
+def run_first_stage(image, net, scale, threshold):
+    """Run P-Net, generate bounding boxes, and do NMS.
+    Arguments:
+        image: an instance of PIL.Image.
+        net: an instance of pytorch's nn.Module, P-Net.
+        scale: a float number,
+            scale width and height of the image by this number.
+        threshold: a float number,
+            threshold on the probability of a face when generating
+            bounding boxes from predictions of the net.
+    Returns:
+        a float numpy array of shape [n_boxes, 9],
+            bounding boxes with scores and offsets (4 + 1 + 4).
+    """
+    # scale the image and convert it to a float array
+    width, height = image.size
+    sw, sh = math.ceil(width * scale), math.ceil(height * scale)
+    img = image.resize((sw, sh), Image.BILINEAR)
+    img = np.asarray(img, 'float32')
+    img = torch.FloatTensor(_preprocess(img)).to(device)
+    with torch.no_grad():
+        output = net(img)
+        probs = output[1].cpu().data.numpy()[0, 1, :, :]
+        offsets = output[0].cpu().data.numpy()
+        # probs: probability of a face at each sliding window
+        # offsets: transformations to true bounding boxes
+        boxes = _generate_bboxes(probs, offsets, scale, threshold)
+        if len(boxes) == 0:
+            return None
+        keep = nms(boxes[:, 0:5], overlap_threshold=0.5)
+    return boxes[keep]
+def _generate_bboxes(probs, offsets, scale, threshold):
+    """Generate bounding boxes at places
+    where there is probably a face.
+    Arguments:
+        probs: a float numpy array of shape [n, m].
+        offsets: a float numpy array of shape [1, 4, n, m].
+        scale: a float number,
+            width and height of the image were scaled by this number.
+        threshold: a float number.
+    Returns:
+        a float numpy array of shape [n_boxes, 9]
+    """
+    # applying P-Net is equivalent, in some sense, to
+    # moving 12x12 window with stride 2
+    stride = 2
+    cell_size = 12
+    # indices of boxes where there is probably a face
+    inds = np.where(probs > threshold)
+    if inds[0].size == 0:
+        return np.array([])
+    # transformations of bounding boxes
+    tx1, ty1, tx2, ty2 = [offsets[0, i, inds[0], inds[1]] for i in range(4)]
+    # they are defined as:
+    # w = x2 - x1 + 1
+    # h = y2 - y1 + 1
+    # x1_true = x1 + tx1*w
+    # x2_true = x2 + tx2*w
+    # y1_true = y1 + ty1*h
+    # y2_true = y2 + ty2*h
+    offsets = np.array([tx1, ty1, tx2, ty2])
+    score = probs[inds[0], inds[1]]
+    # P-Net is applied to scaled images
+    # so we need to rescale bounding boxes back
+    bounding_boxes = np.vstack([
+        np.round((stride * inds[1] + 1.0) / scale),
+        np.round((stride * inds[0] + 1.0) / scale),
+        np.round((stride * inds[1] + 1.0 + cell_size) / scale),
+        np.round((stride * inds[0] + 1.0 + cell_size) / scale),
+        score, offsets
+    ])
+    # why one is added?
+    return bounding_boxes.T

models/mtcnn/mtcnn_pytorch/src/get_nets.py ADDED Viewed

	@@ -0,0 +1,171 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from collections import OrderedDict
+import numpy as np
+from configs.paths_config import model_paths
+PNET_PATH = model_paths["mtcnn_pnet"]
+ONET_PATH = model_paths["mtcnn_onet"]
+RNET_PATH = model_paths["mtcnn_rnet"]
+class Flatten(nn.Module):
+    def __init__(self):
+        super(Flatten, self).__init__()
+    def forward(self, x):
+        """
+        Arguments:
+            x: a float tensor with shape [batch_size, c, h, w].
+        Returns:
+            a float tensor with shape [batch_size, c*h*w].
+        """
+        # without this pretrained model isn't working
+        x = x.transpose(3, 2).contiguous()
+        return x.view(x.size(0), -1)
+class PNet(nn.Module):
+    def __init__(self):
+        super().__init__()
+        # suppose we have input with size HxW, then
+        # after first layer: H - 2,
+        # after pool: ceil((H - 2)/2),
+        # after second conv: ceil((H - 2)/2) - 2,
+        # after last conv: ceil((H - 2)/2) - 4,
+        # and the same for W
+        self.features = nn.Sequential(OrderedDict([
+            ('conv1', nn.Conv2d(3, 10, 3, 1)),
+            ('prelu1', nn.PReLU(10)),
+            ('pool1', nn.MaxPool2d(2, 2, ceil_mode=True)),
+            ('conv2', nn.Conv2d(10, 16, 3, 1)),
+            ('prelu2', nn.PReLU(16)),
+            ('conv3', nn.Conv2d(16, 32, 3, 1)),
+            ('prelu3', nn.PReLU(32))
+        ]))
+        self.conv4_1 = nn.Conv2d(32, 2, 1, 1)
+        self.conv4_2 = nn.Conv2d(32, 4, 1, 1)
+        weights = np.load(PNET_PATH, allow_pickle=True)[()]
+        for n, p in self.named_parameters():
+            p.data = torch.FloatTensor(weights[n])
+    def forward(self, x):
+        """
+        Arguments:
+            x: a float tensor with shape [batch_size, 3, h, w].
+        Returns:
+            b: a float tensor with shape [batch_size, 4, h', w'].
+            a: a float tensor with shape [batch_size, 2, h', w'].
+        """
+        x = self.features(x)
+        a = self.conv4_1(x)
+        b = self.conv4_2(x)
+        a = F.softmax(a, dim=-1)
+        return b, a
+class RNet(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.features = nn.Sequential(OrderedDict([
+            ('conv1', nn.Conv2d(3, 28, 3, 1)),
+            ('prelu1', nn.PReLU(28)),
+            ('pool1', nn.MaxPool2d(3, 2, ceil_mode=True)),
+            ('conv2', nn.Conv2d(28, 48, 3, 1)),
+            ('prelu2', nn.PReLU(48)),
+            ('pool2', nn.MaxPool2d(3, 2, ceil_mode=True)),
+            ('conv3', nn.Conv2d(48, 64, 2, 1)),
+            ('prelu3', nn.PReLU(64)),
+            ('flatten', Flatten()),
+            ('conv4', nn.Linear(576, 128)),
+            ('prelu4', nn.PReLU(128))
+        ]))
+        self.conv5_1 = nn.Linear(128, 2)
+        self.conv5_2 = nn.Linear(128, 4)
+        weights = np.load(RNET_PATH, allow_pickle=True)[()]
+        for n, p in self.named_parameters():
+            p.data = torch.FloatTensor(weights[n])
+    def forward(self, x):
+        """
+        Arguments:
+            x: a float tensor with shape [batch_size, 3, h, w].
+        Returns:
+            b: a float tensor with shape [batch_size, 4].
+            a: a float tensor with shape [batch_size, 2].
+        """
+        x = self.features(x)
+        a = self.conv5_1(x)
+        b = self.conv5_2(x)
+        a = F.softmax(a, dim=-1)
+        return b, a
+class ONet(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.features = nn.Sequential(OrderedDict([
+            ('conv1', nn.Conv2d(3, 32, 3, 1)),
+            ('prelu1', nn.PReLU(32)),
+            ('pool1', nn.MaxPool2d(3, 2, ceil_mode=True)),
+            ('conv2', nn.Conv2d(32, 64, 3, 1)),
+            ('prelu2', nn.PReLU(64)),
+            ('pool2', nn.MaxPool2d(3, 2, ceil_mode=True)),
+            ('conv3', nn.Conv2d(64, 64, 3, 1)),
+            ('prelu3', nn.PReLU(64)),
+            ('pool3', nn.MaxPool2d(2, 2, ceil_mode=True)),
+            ('conv4', nn.Conv2d(64, 128, 2, 1)),
+            ('prelu4', nn.PReLU(128)),
+            ('flatten', Flatten()),
+            ('conv5', nn.Linear(1152, 256)),
+            ('drop5', nn.Dropout(0.25)),
+            ('prelu5', nn.PReLU(256)),
+        ]))
+        self.conv6_1 = nn.Linear(256, 2)
+        self.conv6_2 = nn.Linear(256, 4)
+        self.conv6_3 = nn.Linear(256, 10)
+        weights = np.load(ONET_PATH, allow_pickle=True)[()]
+        for n, p in self.named_parameters():
+            p.data = torch.FloatTensor(weights[n])
+    def forward(self, x):
+        """
+        Arguments:
+            x: a float tensor with shape [batch_size, 3, h, w].
+        Returns:
+            c: a float tensor with shape [batch_size, 10].
+            b: a float tensor with shape [batch_size, 4].
+            a: a float tensor with shape [batch_size, 2].
+        """
+        x = self.features(x)
+        a = self.conv6_1(x)
+        b = self.conv6_2(x)
+        c = self.conv6_3(x)
+        a = F.softmax(a, dim=-1)
+        return c, b, a

models/mtcnn/mtcnn_pytorch/src/matlab_cp2tform.py ADDED Viewed

	@@ -0,0 +1,350 @@

+# -*- coding: utf-8 -*-
+"""
+Created on Tue Jul 11 06:54:28 2017
+@author: zhaoyafei
+"""
+import numpy as np
+from numpy.linalg import inv, norm, lstsq
+from numpy.linalg import matrix_rank as rank
+class MatlabCp2tormException(Exception):
+    def __str__(self):
+        return 'In File {}:{}'.format(
+            __file__, super.__str__(self))
+def tformfwd(trans, uv):
+    """
+    Function:
+    ----------
+        apply affine transform 'trans' to uv
+    Parameters:
+    ----------
+        @trans: 3x3 np.array
+            transform matrix
+        @uv: Kx2 np.array
+            each row is a pair of coordinates (x, y)
+    Returns:
+    ----------
+        @xy: Kx2 np.array
+            each row is a pair of transformed coordinates (x, y)
+    """
+    uv = np.hstack((
+        uv, np.ones((uv.shape[0], 1))
+    ))
+    xy = np.dot(uv, trans)
+    xy = xy[:, 0:-1]
+    return xy
+def tforminv(trans, uv):
+    """
+    Function:
+    ----------
+        apply the inverse of affine transform 'trans' to uv
+    Parameters:
+    ----------
+        @trans: 3x3 np.array
+            transform matrix
+        @uv: Kx2 np.array
+            each row is a pair of coordinates (x, y)
+    Returns:
+    ----------
+        @xy: Kx2 np.array
+            each row is a pair of inverse-transformed coordinates (x, y)
+    """
+    Tinv = inv(trans)
+    xy = tformfwd(Tinv, uv)
+    return xy
+def findNonreflectiveSimilarity(uv, xy, options=None):
+    options = {'K': 2}
+    K = options['K']
+    M = xy.shape[0]
+    x = xy[:, 0].reshape((-1, 1))  # use reshape to keep a column vector
+    y = xy[:, 1].reshape((-1, 1))  # use reshape to keep a column vector
+    # print('--->x, y:\n', x, y
+    tmp1 = np.hstack((x, y, np.ones((M, 1)), np.zeros((M, 1))))
+    tmp2 = np.hstack((y, -x, np.zeros((M, 1)), np.ones((M, 1))))
+    X = np.vstack((tmp1, tmp2))
+    # print('--->X.shape: ', X.shape
+    # print('X:\n', X
+    u = uv[:, 0].reshape((-1, 1))  # use reshape to keep a column vector
+    v = uv[:, 1].reshape((-1, 1))  # use reshape to keep a column vector
+    U = np.vstack((u, v))
+    # print('--->U.shape: ', U.shape
+    # print('U:\n', U
+    # We know that X * r = U
+    if rank(X) >= 2 * K:
+        r, _, _, _ = lstsq(X, U, rcond=None)  # Make sure this is what I want
+        r = np.squeeze(r)
+    else:
+        raise Exception('cp2tform:twoUniquePointsReq')
+    # print('--->r:\n', r
+    sc = r[0]
+    ss = r[1]
+    tx = r[2]
+    ty = r[3]
+    Tinv = np.array([
+        [sc, -ss, 0],
+        [ss, sc, 0],
+        [tx, ty, 1]
+    ])
+    # print('--->Tinv:\n', Tinv
+    T = inv(Tinv)
+    # print('--->T:\n', T
+    T[:, 2] = np.array([0, 0, 1])
+    return T, Tinv
+def findSimilarity(uv, xy, options=None):
+    options = {'K': 2}
+    #    uv = np.array(uv)
+    #    xy = np.array(xy)
+    # Solve for trans1
+    trans1, trans1_inv = findNonreflectiveSimilarity(uv, xy, options)
+    # Solve for trans2
+    # manually reflect the xy data across the Y-axis
+    xyR = xy
+    xyR[:, 0] = -1 * xyR[:, 0]
+    trans2r, trans2r_inv = findNonreflectiveSimilarity(uv, xyR, options)
+    # manually reflect the tform to undo the reflection done on xyR
+    TreflectY = np.array([
+        [-1, 0, 0],
+        [0, 1, 0],
+        [0, 0, 1]
+    ])
+    trans2 = np.dot(trans2r, TreflectY)
+    # Figure out if trans1 or trans2 is better
+    xy1 = tformfwd(trans1, uv)
+    norm1 = norm(xy1 - xy)
+    xy2 = tformfwd(trans2, uv)
+    norm2 = norm(xy2 - xy)
+    if norm1 <= norm2:
+        return trans1, trans1_inv
+    else:
+        trans2_inv = inv(trans2)
+        return trans2, trans2_inv
+def get_similarity_transform(src_pts, dst_pts, reflective=True):
+    """
+    Function:
+    ----------
+        Find Similarity Transform Matrix 'trans':
+            u = src_pts[:, 0]
+            v = src_pts[:, 1]
+            x = dst_pts[:, 0]
+            y = dst_pts[:, 1]
+            [x, y, 1] = [u, v, 1] * trans
+    Parameters:
+    ----------
+        @src_pts: Kx2 np.array
+            source points, each row is a pair of coordinates (x, y)
+        @dst_pts: Kx2 np.array
+            destination points, each row is a pair of transformed
+            coordinates (x, y)
+        @reflective: True or False
+            if True:
+                use reflective similarity transform
+            else:
+                use non-reflective similarity transform
+    Returns:
+    ----------
+       @trans: 3x3 np.array
+            transform matrix from uv to xy
+        trans_inv: 3x3 np.array
+            inverse of trans, transform matrix from xy to uv
+    """
+    if reflective:
+        trans, trans_inv = findSimilarity(src_pts, dst_pts)
+    else:
+        trans, trans_inv = findNonreflectiveSimilarity(src_pts, dst_pts)
+    return trans, trans_inv
+def cvt_tform_mat_for_cv2(trans):
+    """
+    Function:
+    ----------
+        Convert Transform Matrix 'trans' into 'cv2_trans' which could be
+        directly used by cv2.warpAffine():
+            u = src_pts[:, 0]
+            v = src_pts[:, 1]
+            x = dst_pts[:, 0]
+            y = dst_pts[:, 1]
+            [x, y].T = cv_trans * [u, v, 1].T
+    Parameters:
+    ----------
+        @trans: 3x3 np.array
+            transform matrix from uv to xy
+    Returns:
+    ----------
+        @cv2_trans: 2x3 np.array
+            transform matrix from src_pts to dst_pts, could be directly used
+            for cv2.warpAffine()
+    """
+    cv2_trans = trans[:, 0:2].T
+    return cv2_trans
+def get_similarity_transform_for_cv2(src_pts, dst_pts, reflective=True):
+    """
+    Function:
+    ----------
+        Find Similarity Transform Matrix 'cv2_trans' which could be
+        directly used by cv2.warpAffine():
+            u = src_pts[:, 0]
+            v = src_pts[:, 1]
+            x = dst_pts[:, 0]
+            y = dst_pts[:, 1]
+            [x, y].T = cv_trans * [u, v, 1].T
+    Parameters:
+    ----------
+        @src_pts: Kx2 np.array
+            source points, each row is a pair of coordinates (x, y)
+        @dst_pts: Kx2 np.array
+            destination points, each row is a pair of transformed
+            coordinates (x, y)
+        reflective: True or False
+            if True:
+                use reflective similarity transform
+            else:
+                use non-reflective similarity transform
+    Returns:
+    ----------
+        @cv2_trans: 2x3 np.array
+            transform matrix from src_pts to dst_pts, could be directly used
+            for cv2.warpAffine()
+    """
+    trans, trans_inv = get_similarity_transform(src_pts, dst_pts, reflective)
+    cv2_trans = cvt_tform_mat_for_cv2(trans)
+    return cv2_trans
+if __name__ == '__main__':
+    """
+    u = [0, 6, -2]
+    v = [0, 3, 5]
+    x = [-1, 0, 4]
+    y = [-1, -10, 4]
+    # In Matlab, run:
+    #
+    #   uv = [u'; v'];
+    #   xy = [x'; y'];
+    #   tform_sim=cp2tform(uv,xy,'similarity');
+    #
+    #   trans = tform_sim.tdata.T
+    #   ans =
+    #       -0.0764   -1.6190         0
+    #        1.6190   -0.0764         0
+    #       -3.2156    0.0290    1.0000
+    #   trans_inv = tform_sim.tdata.Tinv
+    #    ans =
+    #
+    #       -0.0291    0.6163         0
+    #       -0.6163   -0.0291         0
+    #       -0.0756    1.9826    1.0000
+    #    xy_m=tformfwd(tform_sim, u,v)
+    #
+    #    xy_m =
+    #
+    #       -3.2156    0.0290
+    #        1.1833   -9.9143
+    #        5.0323    2.8853
+    #    uv_m=tforminv(tform_sim, x,y)
+    #
+    #    uv_m =
+    #
+    #        0.5698    1.3953
+    #        6.0872    2.2733
+    #       -2.6570    4.3314
+    """
+    u = [0, 6, -2]
+    v = [0, 3, 5]
+    x = [-1, 0, 4]
+    y = [-1, -10, 4]
+    uv = np.array((u, v)).T
+    xy = np.array((x, y)).T
+    print('\n--->uv:')
+    print(uv)
+    print('\n--->xy:')
+    print(xy)
+    trans, trans_inv = get_similarity_transform(uv, xy)
+    print('\n--->trans matrix:')
+    print(trans)
+    print('\n--->trans_inv matrix:')
+    print(trans_inv)
+    print('\n---> apply transform to uv')
+    print('\nxy_m = uv_augmented * trans')
+    uv_aug = np.hstack((
+        uv, np.ones((uv.shape[0], 1))
+    ))
+    xy_m = np.dot(uv_aug, trans)
+    print(xy_m)
+    print('\nxy_m = tformfwd(trans, uv)')
+    xy_m = tformfwd(trans, uv)
+    print(xy_m)
+    print('\n---> apply inverse transform to xy')
+    print('\nuv_m = xy_augmented * trans_inv')
+    xy_aug = np.hstack((
+        xy, np.ones((xy.shape[0], 1))
+    ))
+    uv_m = np.dot(xy_aug, trans_inv)
+    print(uv_m)
+    print('\nuv_m = tformfwd(trans_inv, xy)')
+    uv_m = tformfwd(trans_inv, xy)
+    print(uv_m)
+    uv_m = tforminv(trans, xy)
+    print('\nuv_m = tforminv(trans, xy)')
+    print(uv_m)

models/mtcnn/mtcnn_pytorch/src/visualization_utils.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from PIL import ImageDraw
+def show_bboxes(img, bounding_boxes, facial_landmarks=[]):
+    """Draw bounding boxes and facial landmarks.
+    Arguments:
+        img: an instance of PIL.Image.
+        bounding_boxes: a float numpy array of shape [n, 5].
+        facial_landmarks: a float numpy array of shape [n, 10].
+    Returns:
+        an instance of PIL.Image.
+    """
+    img_copy = img.copy()
+    draw = ImageDraw.Draw(img_copy)
+    for b in bounding_boxes:
+        draw.rectangle([
+            (b[0], b[1]), (b[2], b[3])
+        ], outline='white')
+    for p in facial_landmarks:
+        for i in range(5):
+            draw.ellipse([
+                (p[i] - 1.0, p[i + 5] - 1.0),
+                (p[i] + 1.0, p[i + 5] + 1.0)
+            ], outline='blue')
+    return img_copy

models/mtcnn/mtcnn_pytorch/src/weights/onet.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:313141c3646bebb73cb8350a2d5fee4c7f044fb96304b46ccc21aeea8b818f83
+size 2345483

models/mtcnn/mtcnn_pytorch/src/weights/pnet.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:03e19e5c473932ab38f5a6308fe6210624006994a687e858d1dcda53c66f18cb
+size 41271

models/mtcnn/mtcnn_pytorch/src/weights/rnet.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5660aad67688edc9e8a3dd4e47ed120932835e06a8a711a423252a6f2c747083
+size 604651

models/psp.py ADDED Viewed

	@@ -0,0 +1,148 @@

+"""
+This file defines the core research contribution
+"""
+import matplotlib
+matplotlib.use('Agg')
+import math
+import torch
+from torch import nn
+from models.encoders import psp_encoders
+from models.stylegan2.model import Generator
+from configs.paths_config import model_paths
+import torch.nn.functional as F
+def get_keys(d, name):
+    if 'state_dict' in d:
+        d = d['state_dict']
+    d_filt = {k[len(name) + 1:]: v for k, v in d.items() if k[:len(name)] == name}
+    return d_filt
+class pSp(nn.Module):
+    def __init__(self, opts, ckpt=None):
+        super(pSp, self).__init__()
+        self.set_opts(opts)
+        # compute number of style inputs based on the output resolution
+        self.opts.n_styles = int(math.log(self.opts.output_size, 2)) * 2 - 2
+        # Define architecture
+        self.encoder = self.set_encoder()
+        self.decoder = Generator(self.opts.output_size, 512, 8)
+        self.face_pool = torch.nn.AdaptiveAvgPool2d((256, 256))
+        # Load weights if needed
+        self.load_weights(ckpt)
+    def set_encoder(self):
+        if self.opts.encoder_type == 'GradualStyleEncoder':
+            encoder = psp_encoders.GradualStyleEncoder(50, 'ir_se', self.opts)
+        elif self.opts.encoder_type == 'BackboneEncoderUsingLastLayerIntoW':
+            encoder = psp_encoders.BackboneEncoderUsingLastLayerIntoW(50, 'ir_se', self.opts)
+        elif self.opts.encoder_type == 'BackboneEncoderUsingLastLayerIntoWPlus':
+            encoder = psp_encoders.BackboneEncoderUsingLastLayerIntoWPlus(50, 'ir_se', self.opts)
+        else:
+            raise Exception('{} is not a valid encoders'.format(self.opts.encoder_type))
+        return encoder
+    def load_weights(self, ckpt=None):
+        if self.opts.checkpoint_path is not None:
+            print('Loading pSp from checkpoint: {}'.format(self.opts.checkpoint_path))
+            if ckpt is None:
+                ckpt = torch.load(self.opts.checkpoint_path, map_location='cpu')
+            self.encoder.load_state_dict(get_keys(ckpt, 'encoder'), strict=False)
+            self.decoder.load_state_dict(get_keys(ckpt, 'decoder'), strict=False)
+            self.__load_latent_avg(ckpt)
+        else:
+            print('Loading encoders weights from irse50!')
+            encoder_ckpt = torch.load(model_paths['ir_se50'])
+            # if input to encoder is not an RGB image, do not load the input layer weights
+            if self.opts.label_nc != 0:
+                encoder_ckpt = {k: v for k, v in encoder_ckpt.items() if "input_layer" not in k}
+            self.encoder.load_state_dict(encoder_ckpt, strict=False)
+            print('Loading decoder weights from pretrained!')
+            ckpt = torch.load(self.opts.stylegan_weights)
+            self.decoder.load_state_dict(ckpt['g_ema'], strict=False)
+            if self.opts.learn_in_w:
+                self.__load_latent_avg(ckpt, repeat=1)
+            else:
+                self.__load_latent_avg(ckpt, repeat=self.opts.n_styles)
+        # for video toonification, we load G0' model
+        if self.opts.toonify_weights is not None: ##### modified
+            ckpt = torch.load(self.opts.toonify_weights)
+            self.decoder.load_state_dict(ckpt['g_ema'], strict=False)
+            self.opts.toonify_weights = None
+    # x1: image for first-layer feature f.
+    # x2: image for style latent code w+. If not specified, x2=x1.
+    # inject_latent: for sketch/mask-to-face translation, another latent code to fuse with w+
+    # latent_mask: fuse w+ and inject_latent with the mask (1~7 use w+ and 8~18 use inject_latent)
+    # use_feature: use f. Otherwise, use the orginal StyleGAN first-layer constant 4*4 feature
+    # first_layer_feature_ind: always=0, means the 1st layer of G accept f
+    # use_skip: use skip connection.
+    # zero_noise: use zero noises.
+    # editing_w: the editing vector v for video face editing
+    def forward(self, x1, x2=None, resize=True, latent_mask=None, randomize_noise=True,
+                inject_latent=None, return_latents=False, alpha=None, use_feature=True,
+                first_layer_feature_ind=0, use_skip=False, zero_noise=False, editing_w=None): ##### modified
+        feats = None # f and the skipped encoder features
+        codes, feats = self.encoder(x1, return_feat=True, return_full=use_skip) ##### modified
+        if x2 is not None: ##### modified
+            codes = self.encoder(x2) ##### modified
+        # normalize with respect to the center of an average face
+        if self.opts.start_from_latent_avg:
+            if self.opts.learn_in_w:
+                codes = codes + self.latent_avg.repeat(codes.shape[0], 1)
+            else:
+                codes = codes + self.latent_avg.repeat(codes.shape[0], 1, 1)
+        # E_W^{1:7}(T(x1)) concatenate E_W^{8:18}(w~)
+        if latent_mask is not None:
+            for i in latent_mask:
+                if inject_latent is not None:
+                    if alpha is not None:
+                        codes[:, i] = alpha * inject_latent[:, i] + (1 - alpha) * codes[:, i]
+                    else:
+                        codes[:, i] = inject_latent[:, i]
+                else:
+                    codes[:, i] = 0
+        first_layer_feats, skip_layer_feats, fusion = None, None, None ##### modified
+        if use_feature: ##### modified
+            first_layer_feats = feats[0:2] # use f
+            if use_skip: ##### modified
+                skip_layer_feats = feats[2:] # use skipped encoder feature
+                fusion = self.encoder.fusion # use fusion layer to fuse encoder feature and decoder feature.
+        images, result_latent = self.decoder([codes],
+                                             input_is_latent=True,
+                                             randomize_noise=randomize_noise,
+                                             return_latents=return_latents,
+                                             first_layer_feature=first_layer_feats,
+                                             first_layer_feature_ind=first_layer_feature_ind,
+                                             skip_layer_feature=skip_layer_feats,
+                                             fusion_block=fusion,
+                                             zero_noise=zero_noise,
+                                             editing_w=editing_w) ##### modified
+        if resize:
+            if self.opts.output_size == 1024:  ##### modified
+                images = F.adaptive_avg_pool2d(images, (images.shape[2]//4, images.shape[3]//4))  ##### modified
+            else:
+                images = self.face_pool(images)
+        if return_latents:
+            return images, result_latent
+        else:
+            return images
+    def set_opts(self, opts):
+        self.opts = opts
+    def __load_latent_avg(self, ckpt, repeat=None):
+        if 'latent_avg' in ckpt:
+            self.latent_avg = ckpt['latent_avg'].to(self.opts.device)
+            if repeat is not None:
+                self.latent_avg = self.latent_avg.repeat(repeat, 1)
+        else:
+            self.latent_avg = None

models/stylegan2/__init__.py ADDED Viewed

File without changes

models/stylegan2/lpips/__init__.py ADDED Viewed

	@@ -0,0 +1,161 @@

+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+#from skimage.measure import compare_ssim
+from skimage.metrics import structural_similarity as compare_ssim
+import torch
+from torch.autograd import Variable
+from models.stylegan2.lpips import dist_model
+class PerceptualLoss(torch.nn.Module):
+    def __init__(self, model='net-lin', net='alex', colorspace='rgb', spatial=False, use_gpu=True, gpu_ids=[0]): # VGG using our perceptually-learned weights (LPIPS metric)
+    # def __init__(self, model='net', net='vgg', use_gpu=True): # "default" way of using VGG as a perceptual loss
+        super(PerceptualLoss, self).__init__()
+        print('Setting up Perceptual loss...')
+        self.use_gpu = use_gpu
+        self.spatial = spatial
+        self.gpu_ids = gpu_ids
+        self.model = dist_model.DistModel()
+        self.model.initialize(model=model, net=net, use_gpu=use_gpu, colorspace=colorspace, spatial=self.spatial, gpu_ids=gpu_ids)
+        print('...[%s] initialized'%self.model.name())
+        print('...Done')
+    def forward(self, pred, target, normalize=False):
+        """
+        Pred and target are Variables.
+        If normalize is True, assumes the images are between [0,1] and then scales them between [-1,+1]
+        If normalize is False, assumes the images are already between [-1,+1]
+        Inputs pred and target are Nx3xHxW
+        Output pytorch Variable N long
+        """
+        if normalize:
+            target = 2 * target  - 1
+            pred = 2 * pred  - 1
+        return self.model.forward(target, pred)
+def normalize_tensor(in_feat,eps=1e-10):
+    norm_factor = torch.sqrt(torch.sum(in_feat**2,dim=1,keepdim=True))
+    return in_feat/(norm_factor+eps)
+def l2(p0, p1, range=255.):
+    return .5*np.mean((p0 / range - p1 / range)**2)
+def psnr(p0, p1, peak=255.):
+    return 10*np.log10(peak**2/np.mean((1.*p0-1.*p1)**2))
+def dssim(p0, p1, range=255.):
+    return (1 - compare_ssim(p0, p1, data_range=range, multichannel=True)) / 2.
+def rgb2lab(in_img,mean_cent=False):
+    from skimage import color
+    img_lab = color.rgb2lab(in_img)
+    if(mean_cent):
+        img_lab[:,:,0] = img_lab[:,:,0]-50
+    return img_lab
+def tensor2np(tensor_obj):
+    # change dimension of a tensor object into a numpy array
+    return tensor_obj[0].cpu().float().numpy().transpose((1,2,0))
+def np2tensor(np_obj):
+     # change dimenion of np array into tensor array
+    return torch.Tensor(np_obj[:, :, :, np.newaxis].transpose((3, 2, 0, 1)))
+def tensor2tensorlab(image_tensor,to_norm=True,mc_only=False):
+    # image tensor to lab tensor
+    from skimage import color
+    img = tensor2im(image_tensor)
+    img_lab = color.rgb2lab(img)
+    if(mc_only):
+        img_lab[:,:,0] = img_lab[:,:,0]-50
+    if(to_norm and not mc_only):
+        img_lab[:,:,0] = img_lab[:,:,0]-50
+        img_lab = img_lab/100.
+    return np2tensor(img_lab)
+def tensorlab2tensor(lab_tensor,return_inbnd=False):
+    from skimage import color
+    import warnings
+    warnings.filterwarnings("ignore")
+    lab = tensor2np(lab_tensor)*100.
+    lab[:,:,0] = lab[:,:,0]+50
+    rgb_back = 255.*np.clip(color.lab2rgb(lab.astype('float')),0,1)
+    if(return_inbnd):
+        # convert back to lab, see if we match
+        lab_back = color.rgb2lab(rgb_back.astype('uint8'))
+        mask = 1.*np.isclose(lab_back,lab,atol=2.)
+        mask = np2tensor(np.prod(mask,axis=2)[:,:,np.newaxis])
+        return (im2tensor(rgb_back),mask)
+    else:
+        return im2tensor(rgb_back)
+def rgb2lab(input):
+    from skimage import color
+    return color.rgb2lab(input / 255.)
+def tensor2im(image_tensor, imtype=np.uint8, cent=1., factor=255./2.):
+    image_numpy = image_tensor[0].cpu().float().numpy()
+    image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + cent) * factor
+    return image_numpy.astype(imtype)
+def im2tensor(image, imtype=np.uint8, cent=1., factor=255./2.):
+    return torch.Tensor((image / factor - cent)
+                        [:, :, :, np.newaxis].transpose((3, 2, 0, 1)))
+def tensor2vec(vector_tensor):
+    return vector_tensor.data.cpu().numpy()[:, :, 0, 0]
+def voc_ap(rec, prec, use_07_metric=False):
+    """ ap = voc_ap(rec, prec, [use_07_metric])
+    Compute VOC AP given precision and recall.
+    If use_07_metric is true, uses the
+    VOC 07 11 point method (default:False).
+    """
+    if use_07_metric:
+        # 11 point metric
+        ap = 0.
+        for t in np.arange(0., 1.1, 0.1):
+            if np.sum(rec >= t) == 0:
+                p = 0
+            else:
+                p = np.max(prec[rec >= t])
+            ap = ap + p / 11.
+    else:
+        # correct AP calculation
+        # first append sentinel values at the end
+        mrec = np.concatenate(([0.], rec, [1.]))
+        mpre = np.concatenate(([0.], prec, [0.]))
+        # compute the precision envelope
+        for i in range(mpre.size - 1, 0, -1):
+            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
+        # to calculate area under PR curve, look for points
+        # where X axis (recall) changes value
+        i = np.where(mrec[1:] != mrec[:-1])[0]
+        # and sum (\Delta recall) * prec
+        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
+    return ap
+def tensor2im(image_tensor, imtype=np.uint8, cent=1., factor=255./2.):
+# def tensor2im(image_tensor, imtype=np.uint8, cent=1., factor=1.):
+    image_numpy = image_tensor[0].cpu().float().numpy()
+    image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + cent) * factor
+    return image_numpy.astype(imtype)
+def im2tensor(image, imtype=np.uint8, cent=1., factor=255./2.):
+# def im2tensor(image, imtype=np.uint8, cent=1., factor=1.):
+    return torch.Tensor((image / factor - cent)
+                        [:, :, :, np.newaxis].transpose((3, 2, 0, 1)))

models/stylegan2/lpips/base_model.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import os
+import numpy as np
+import torch
+from torch.autograd import Variable
+from pdb import set_trace as st
+from IPython import embed
+class BaseModel():
+    def __init__(self):
+        pass;
+    def name(self):
+        return 'BaseModel'
+    def initialize(self, use_gpu=True, gpu_ids=[0]):
+        self.use_gpu = use_gpu
+        self.gpu_ids = gpu_ids
+    def forward(self):
+        pass
+    def get_image_paths(self):
+        pass
+    def optimize_parameters(self):
+        pass
+    def get_current_visuals(self):
+        return self.input
+    def get_current_errors(self):
+        return {}
+    def save(self, label):
+        pass
+    # helper saving function that can be used by subclasses
+    def save_network(self, network, path, network_label, epoch_label):
+        save_filename = '%s_net_%s.pth' % (epoch_label, network_label)
+        save_path = os.path.join(path, save_filename)
+        torch.save(network.state_dict(), save_path)
+    # helper loading function that can be used by subclasses
+    def load_network(self, network, network_label, epoch_label):
+        save_filename = '%s_net_%s.pth' % (epoch_label, network_label)
+        save_path = os.path.join(self.save_dir, save_filename)
+        print('Loading network from %s'%save_path)
+        network.load_state_dict(torch.load(save_path))
+    def update_learning_rate():
+        pass
+    def get_image_paths(self):
+        return self.image_paths
+    def save_done(self, flag=False):
+        np.save(os.path.join(self.save_dir, 'done_flag'),flag)
+        np.savetxt(os.path.join(self.save_dir, 'done_flag'),[flag,],fmt='%i')

models/stylegan2/lpips/dist_model.py ADDED Viewed

	@@ -0,0 +1,284 @@

+from __future__ import absolute_import
+import sys
+import numpy as np
+import torch
+from torch import nn
+import os
+from collections import OrderedDict
+from torch.autograd import Variable
+import itertools
+from models.stylegan2.lpips.base_model import BaseModel
+from scipy.ndimage import zoom
+import fractions
+import functools
+import skimage.transform
+from tqdm import tqdm
+from IPython import embed
+from models.stylegan2.lpips import networks_basic as networks
+import models.stylegan2.lpips as util
+class DistModel(BaseModel):
+    def name(self):
+        return self.model_name
+    def initialize(self, model='net-lin', net='alex', colorspace='Lab', pnet_rand=False, pnet_tune=False, model_path=None,
+            use_gpu=True, printNet=False, spatial=False,
+            is_train=False, lr=.0001, beta1=0.5, version='0.1', gpu_ids=[0]):
+        '''
+        INPUTS
+            model - ['net-lin'] for linearly calibrated network
+                    ['net'] for off-the-shelf network
+                    ['L2'] for L2 distance in Lab colorspace
+                    ['SSIM'] for ssim in RGB colorspace
+            net - ['squeeze','alex','vgg']
+            model_path - if None, will look in weights/[NET_NAME].pth
+            colorspace - ['Lab','RGB'] colorspace to use for L2 and SSIM
+            use_gpu - bool - whether or not to use a GPU
+            printNet - bool - whether or not to print network architecture out
+            spatial - bool - whether to output an array containing varying distances across spatial dimensions
+            spatial_shape - if given, output spatial shape. if None then spatial shape is determined automatically via spatial_factor (see below).
+            spatial_factor - if given, specifies upsampling factor relative to the largest spatial extent of a convolutional layer. if None then resized to size of input images.
+            spatial_order - spline order of filter for upsampling in spatial mode, by default 1 (bilinear).
+            is_train - bool - [True] for training mode
+            lr - float - initial learning rate
+            beta1 - float - initial momentum term for adam
+            version - 0.1 for latest, 0.0 was original (with a bug)
+            gpu_ids - int array - [0] by default, gpus to use
+        '''
+        BaseModel.initialize(self, use_gpu=use_gpu, gpu_ids=gpu_ids)
+        self.model = model
+        self.net = net
+        self.is_train = is_train
+        self.spatial = spatial
+        self.gpu_ids = gpu_ids
+        self.model_name = '%s [%s]'%(model,net)
+        if(self.model == 'net-lin'): # pretrained net + linear layer
+            self.net = networks.PNetLin(pnet_rand=pnet_rand, pnet_tune=pnet_tune, pnet_type=net,
+                use_dropout=True, spatial=spatial, version=version, lpips=True)
+            kw = {}
+            if not use_gpu:
+                kw['map_location'] = 'cpu'
+            if(model_path is None):
+                import inspect
+                model_path = os.path.abspath(os.path.join(inspect.getfile(self.initialize), '..', 'weights/v%s/%s.pth'%(version,net)))
+            if(not is_train):
+                print('Loading model from: %s'%model_path)
+                self.net.load_state_dict(torch.load(model_path, **kw), strict=False)
+        elif(self.model=='net'): # pretrained network
+            self.net = networks.PNetLin(pnet_rand=pnet_rand, pnet_type=net, lpips=False)
+        elif(self.model in ['L2','l2']):
+            self.net = networks.L2(use_gpu=use_gpu,colorspace=colorspace) # not really a network, only for testing
+            self.model_name = 'L2'
+        elif(self.model in ['DSSIM','dssim','SSIM','ssim']):
+            self.net = networks.DSSIM(use_gpu=use_gpu,colorspace=colorspace)
+            self.model_name = 'SSIM'
+        else:
+            raise ValueError("Model [%s] not recognized." % self.model)
+        self.parameters = list(self.net.parameters())
+        if self.is_train: # training mode
+            # extra network on top to go from distances (d0,d1) => predicted human judgment (h*)
+            self.rankLoss = networks.BCERankingLoss()
+            self.parameters += list(self.rankLoss.net.parameters())
+            self.lr = lr
+            self.old_lr = lr
+            self.optimizer_net = torch.optim.Adam(self.parameters, lr=lr, betas=(beta1, 0.999))
+        else: # test mode
+            self.net.eval()
+        if(use_gpu):
+            self.net.to(gpu_ids[0])
+            self.net = torch.nn.DataParallel(self.net, device_ids=gpu_ids)
+            if(self.is_train):
+                self.rankLoss = self.rankLoss.to(device=gpu_ids[0]) # just put this on GPU0
+        if(printNet):
+            print('---------- Networks initialized -------------')
+            networks.print_network(self.net)
+            print('-----------------------------------------------')
+    def forward(self, in0, in1, retPerLayer=False):
+        ''' Function computes the distance between image patches in0 and in1
+        INPUTS
+            in0, in1 - torch.Tensor object of shape Nx3xXxY - image patch scaled to [-1,1]
+        OUTPUT
+            computed distances between in0 and in1
+        '''
+        return self.net.forward(in0, in1, retPerLayer=retPerLayer)
+    # ***** TRAINING FUNCTIONS *****
+    def optimize_parameters(self):
+        self.forward_train()
+        self.optimizer_net.zero_grad()
+        self.backward_train()
+        self.optimizer_net.step()
+        self.clamp_weights()
+    def clamp_weights(self):
+        for module in self.net.modules():
+            if(hasattr(module, 'weight') and module.kernel_size==(1,1)):
+                module.weight.data = torch.clamp(module.weight.data,min=0)
+    def set_input(self, data):
+        self.input_ref = data['ref']
+        self.input_p0 = data['p0']
+        self.input_p1 = data['p1']
+        self.input_judge = data['judge']
+        if(self.use_gpu):
+            self.input_ref = self.input_ref.to(device=self.gpu_ids[0])
+            self.input_p0 = self.input_p0.to(device=self.gpu_ids[0])
+            self.input_p1 = self.input_p1.to(device=self.gpu_ids[0])
+            self.input_judge = self.input_judge.to(device=self.gpu_ids[0])
+        self.var_ref = Variable(self.input_ref,requires_grad=True)
+        self.var_p0 = Variable(self.input_p0,requires_grad=True)
+        self.var_p1 = Variable(self.input_p1,requires_grad=True)
+    def forward_train(self): # run forward pass
+        # print(self.net.module.scaling_layer.shift)
+        # print(torch.norm(self.net.module.net.slice1[0].weight).item(), torch.norm(self.net.module.lin0.model[1].weight).item())
+        self.d0 = self.forward(self.var_ref, self.var_p0)
+        self.d1 = self.forward(self.var_ref, self.var_p1)
+        self.acc_r = self.compute_accuracy(self.d0,self.d1,self.input_judge)
+        self.var_judge = Variable(1.*self.input_judge).view(self.d0.size())
+        self.loss_total = self.rankLoss.forward(self.d0, self.d1, self.var_judge*2.-1.)
+        return self.loss_total
+    def backward_train(self):
+        torch.mean(self.loss_total).backward()
+    def compute_accuracy(self,d0,d1,judge):
+        ''' d0, d1 are Variables, judge is a Tensor '''
+        d1_lt_d0 = (d1<d0).cpu().data.numpy().flatten()
+        judge_per = judge.cpu().numpy().flatten()
+        return d1_lt_d0*judge_per + (1-d1_lt_d0)*(1-judge_per)
+    def get_current_errors(self):
+        retDict = OrderedDict([('loss_total', self.loss_total.data.cpu().numpy()),
+                            ('acc_r', self.acc_r)])
+        for key in retDict.keys():
+            retDict[key] = np.mean(retDict[key])
+        return retDict
+    def get_current_visuals(self):
+        zoom_factor = 256/self.var_ref.data.size()[2]
+        ref_img = util.tensor2im(self.var_ref.data)
+        p0_img = util.tensor2im(self.var_p0.data)
+        p1_img = util.tensor2im(self.var_p1.data)
+        ref_img_vis = zoom(ref_img,[zoom_factor, zoom_factor, 1],order=0)
+        p0_img_vis = zoom(p0_img,[zoom_factor, zoom_factor, 1],order=0)
+        p1_img_vis = zoom(p1_img,[zoom_factor, zoom_factor, 1],order=0)
+        return OrderedDict([('ref', ref_img_vis),
+                            ('p0', p0_img_vis),
+                            ('p1', p1_img_vis)])
+    def save(self, path, label):
+        if(self.use_gpu):
+            self.save_network(self.net.module, path, '', label)
+        else:
+            self.save_network(self.net, path, '', label)
+        self.save_network(self.rankLoss.net, path, 'rank', label)
+    def update_learning_rate(self,nepoch_decay):
+        lrd = self.lr / nepoch_decay
+        lr = self.old_lr - lrd
+        for param_group in self.optimizer_net.param_groups:
+            param_group['lr'] = lr
+        print('update lr [%s] decay: %f -> %f' % (type,self.old_lr, lr))
+        self.old_lr = lr
+def score_2afc_dataset(data_loader, func, name=''):
+    ''' Function computes Two Alternative Forced Choice (2AFC) score using
+        distance function 'func' in dataset 'data_loader'
+    INPUTS
+        data_loader - CustomDatasetDataLoader object - contains a TwoAFCDataset inside
+        func - callable distance function - calling d=func(in0,in1) should take 2
+            pytorch tensors with shape Nx3xXxY, and return numpy array of length N
+    OUTPUTS
+        [0] - 2AFC score in [0,1], fraction of time func agrees with human evaluators
+        [1] - dictionary with following elements
+            d0s,d1s - N arrays containing distances between reference patch to perturbed patches
+            gts - N array in [0,1], preferred patch selected by human evaluators
+                (closer to "0" for left patch p0, "1" for right patch p1,
+                "0.6" means 60pct people preferred right patch, 40pct preferred left)
+            scores - N array in [0,1], corresponding to what percentage function agreed with humans
+    CONSTS
+        N - number of test triplets in data_loader
+    '''
+    d0s = []
+    d1s = []
+    gts = []
+    for data in tqdm(data_loader.load_data(), desc=name):
+        d0s+=func(data['ref'],data['p0']).data.cpu().numpy().flatten().tolist()
+        d1s+=func(data['ref'],data['p1']).data.cpu().numpy().flatten().tolist()
+        gts+=data['judge'].cpu().numpy().flatten().tolist()
+    d0s = np.array(d0s)
+    d1s = np.array(d1s)
+    gts = np.array(gts)
+    scores = (d0s<d1s)*(1.-gts) + (d1s<d0s)*gts + (d1s==d0s)*.5
+    return(np.mean(scores), dict(d0s=d0s,d1s=d1s,gts=gts,scores=scores))
+def score_jnd_dataset(data_loader, func, name=''):
+    ''' Function computes JND score using distance function 'func' in dataset 'data_loader'
+    INPUTS
+        data_loader - CustomDatasetDataLoader object - contains a JNDDataset inside
+        func - callable distance function - calling d=func(in0,in1) should take 2
+            pytorch tensors with shape Nx3xXxY, and return pytorch array of length N
+    OUTPUTS
+        [0] - JND score in [0,1], mAP score (area under precision-recall curve)
+        [1] - dictionary with following elements
+            ds - N array containing distances between two patches shown to human evaluator
+            sames - N array containing fraction of people who thought the two patches were identical
+    CONSTS
+        N - number of test triplets in data_loader
+    '''
+    ds = []
+    gts = []
+    for data in tqdm(data_loader.load_data(), desc=name):
+        ds+=func(data['p0'],data['p1']).data.cpu().numpy().tolist()
+        gts+=data['same'].cpu().numpy().flatten().tolist()
+    sames = np.array(gts)
+    ds = np.array(ds)
+    sorted_inds = np.argsort(ds)
+    ds_sorted = ds[sorted_inds]
+    sames_sorted = sames[sorted_inds]
+    TPs = np.cumsum(sames_sorted)
+    FPs = np.cumsum(1-sames_sorted)
+    FNs = np.sum(sames_sorted)-TPs
+    precs = TPs/(TPs+FPs)
+    recs = TPs/(TPs+FNs)
+    score = util.voc_ap(recs,precs)
+    return(score, dict(ds=ds,sames=sames))

models/stylegan2/lpips/networks_basic.py ADDED Viewed

	@@ -0,0 +1,187 @@

+from __future__ import absolute_import
+import sys
+import torch
+import torch.nn as nn
+import torch.nn.init as init
+from torch.autograd import Variable
+import numpy as np
+from pdb import set_trace as st
+from skimage import color
+from IPython import embed
+from models.stylegan2.lpips import pretrained_networks as pn
+import models.stylegan2.lpips as util
+def spatial_average(in_tens, keepdim=True):
+    return in_tens.mean([2,3],keepdim=keepdim)
+def upsample(in_tens, out_H=64): # assumes scale factor is same for H and W
+    in_H = in_tens.shape[2]
+    scale_factor = 1.*out_H/in_H
+    return nn.Upsample(scale_factor=scale_factor, mode='bilinear', align_corners=False)(in_tens)
+# Learned perceptual metric
+class PNetLin(nn.Module):
+    def __init__(self, pnet_type='vgg', pnet_rand=False, pnet_tune=False, use_dropout=True, spatial=False, version='0.1', lpips=True):
+        super(PNetLin, self).__init__()
+        self.pnet_type = pnet_type
+        self.pnet_tune = pnet_tune
+        self.pnet_rand = pnet_rand
+        self.spatial = spatial
+        self.lpips = lpips
+        self.version = version
+        self.scaling_layer = ScalingLayer()
+        if(self.pnet_type in ['vgg','vgg16']):
+            net_type = pn.vgg16
+            self.chns = [64,128,256,512,512]
+        elif(self.pnet_type=='alex'):
+            net_type = pn.alexnet
+            self.chns = [64,192,384,256,256]
+        elif(self.pnet_type=='squeeze'):
+            net_type = pn.squeezenet
+            self.chns = [64,128,256,384,384,512,512]
+        self.L = len(self.chns)
+        self.net = net_type(pretrained=not self.pnet_rand, requires_grad=self.pnet_tune)
+        if(lpips):
+            self.lin0 = NetLinLayer(self.chns[0], use_dropout=use_dropout)
+            self.lin1 = NetLinLayer(self.chns[1], use_dropout=use_dropout)
+            self.lin2 = NetLinLayer(self.chns[2], use_dropout=use_dropout)
+            self.lin3 = NetLinLayer(self.chns[3], use_dropout=use_dropout)
+            self.lin4 = NetLinLayer(self.chns[4], use_dropout=use_dropout)
+            self.lins = [self.lin0,self.lin1,self.lin2,self.lin3,self.lin4]
+            if(self.pnet_type=='squeeze'): # 7 layers for squeezenet
+                self.lin5 = NetLinLayer(self.chns[5], use_dropout=use_dropout)
+                self.lin6 = NetLinLayer(self.chns[6], use_dropout=use_dropout)
+                self.lins+=[self.lin5,self.lin6]
+    def forward(self, in0, in1, retPerLayer=False):
+        # v0.0 - original release had a bug, where input was not scaled
+        in0_input, in1_input = (self.scaling_layer(in0), self.scaling_layer(in1)) if self.version=='0.1' else (in0, in1)
+        outs0, outs1 = self.net.forward(in0_input), self.net.forward(in1_input)
+        feats0, feats1, diffs = {}, {}, {}
+        for kk in range(self.L):
+            feats0[kk], feats1[kk] = util.normalize_tensor(outs0[kk]), util.normalize_tensor(outs1[kk])
+            diffs[kk] = (feats0[kk]-feats1[kk])**2
+        if(self.lpips):
+            if(self.spatial):
+                res = [upsample(self.lins[kk].model(diffs[kk]), out_H=in0.shape[2]) for kk in range(self.L)]
+            else:
+                res = [spatial_average(self.lins[kk].model(diffs[kk]), keepdim=True) for kk in range(self.L)]
+        else:
+            if(self.spatial):
+                res = [upsample(diffs[kk].sum(dim=1,keepdim=True), out_H=in0.shape[2]) for kk in range(self.L)]
+            else:
+                res = [spatial_average(diffs[kk].sum(dim=1,keepdim=True), keepdim=True) for kk in range(self.L)]
+        val = res[0]
+        for l in range(1,self.L):
+            val += res[l]
+        if(retPerLayer):
+            return (val, res)
+        else:
+            return val
+class ScalingLayer(nn.Module):
+    def __init__(self):
+        super(ScalingLayer, self).__init__()
+        self.register_buffer('shift', torch.Tensor([-.030,-.088,-.188])[None,:,None,None])
+        self.register_buffer('scale', torch.Tensor([.458,.448,.450])[None,:,None,None])
+    def forward(self, inp):
+        return (inp - self.shift) / self.scale
+class NetLinLayer(nn.Module):
+    ''' A single linear layer which does a 1x1 conv '''
+    def __init__(self, chn_in, chn_out=1, use_dropout=False):
+        super(NetLinLayer, self).__init__()
+        layers = [nn.Dropout(),] if(use_dropout) else []
+        layers += [nn.Conv2d(chn_in, chn_out, 1, stride=1, padding=0, bias=False),]
+        self.model = nn.Sequential(*layers)
+class Dist2LogitLayer(nn.Module):
+    ''' takes 2 distances, puts through fc layers, spits out value between [0,1] (if use_sigmoid is True) '''
+    def __init__(self, chn_mid=32, use_sigmoid=True):
+        super(Dist2LogitLayer, self).__init__()
+        layers = [nn.Conv2d(5, chn_mid, 1, stride=1, padding=0, bias=True),]
+        layers += [nn.LeakyReLU(0.2,True),]
+        layers += [nn.Conv2d(chn_mid, chn_mid, 1, stride=1, padding=0, bias=True),]
+        layers += [nn.LeakyReLU(0.2,True),]
+        layers += [nn.Conv2d(chn_mid, 1, 1, stride=1, padding=0, bias=True),]
+        if(use_sigmoid):
+            layers += [nn.Sigmoid(),]
+        self.model = nn.Sequential(*layers)
+    def forward(self,d0,d1,eps=0.1):
+        return self.model.forward(torch.cat((d0,d1,d0-d1,d0/(d1+eps),d1/(d0+eps)),dim=1))
+class BCERankingLoss(nn.Module):
+    def __init__(self, chn_mid=32):
+        super(BCERankingLoss, self).__init__()
+        self.net = Dist2LogitLayer(chn_mid=chn_mid)
+        # self.parameters = list(self.net.parameters())
+        self.loss = torch.nn.BCELoss()
+    def forward(self, d0, d1, judge):
+        per = (judge+1.)/2.
+        self.logit = self.net.forward(d0,d1)
+        return self.loss(self.logit, per)
+# L2, DSSIM metrics
+class FakeNet(nn.Module):
+    def __init__(self, use_gpu=True, colorspace='Lab'):
+        super(FakeNet, self).__init__()
+        self.use_gpu = use_gpu
+        self.colorspace=colorspace
+class L2(FakeNet):
+    def forward(self, in0, in1, retPerLayer=None):
+        assert(in0.size()[0]==1) # currently only supports batchSize 1
+        if(self.colorspace=='RGB'):
+            (N,C,X,Y) = in0.size()
+            value = torch.mean(torch.mean(torch.mean((in0-in1)**2,dim=1).view(N,1,X,Y),dim=2).view(N,1,1,Y),dim=3).view(N)
+            return value
+        elif(self.colorspace=='Lab'):
+            value = util.l2(util.tensor2np(util.tensor2tensorlab(in0.data,to_norm=False)),
+                util.tensor2np(util.tensor2tensorlab(in1.data,to_norm=False)), range=100.).astype('float')
+            ret_var = Variable( torch.Tensor((value,) ) )
+            if(self.use_gpu):
+                ret_var = ret_var.cuda()
+            return ret_var
+class DSSIM(FakeNet):
+    def forward(self, in0, in1, retPerLayer=None):
+        assert(in0.size()[0]==1) # currently only supports batchSize 1
+        if(self.colorspace=='RGB'):
+            value = util.dssim(1.*util.tensor2im(in0.data), 1.*util.tensor2im(in1.data), range=255.).astype('float')
+        elif(self.colorspace=='Lab'):
+            value = util.dssim(util.tensor2np(util.tensor2tensorlab(in0.data,to_norm=False)),
+                util.tensor2np(util.tensor2tensorlab(in1.data,to_norm=False)), range=100.).astype('float')
+        ret_var = Variable( torch.Tensor((value,) ) )
+        if(self.use_gpu):
+            ret_var = ret_var.cuda()
+        return ret_var
+def print_network(net):
+    num_params = 0
+    for param in net.parameters():
+        num_params += param.numel()
+    print('Network',net)
+    print('Total number of parameters: %d' % num_params)

models/stylegan2/lpips/pretrained_networks.py ADDED Viewed

	@@ -0,0 +1,181 @@

+from collections import namedtuple
+import torch
+from torchvision import models as tv
+from IPython import embed
+class squeezenet(torch.nn.Module):
+    def __init__(self, requires_grad=False, pretrained=True):
+        super(squeezenet, self).__init__()
+        pretrained_features = tv.squeezenet1_1(pretrained=pretrained).features
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        self.slice4 = torch.nn.Sequential()
+        self.slice5 = torch.nn.Sequential()
+        self.slice6 = torch.nn.Sequential()
+        self.slice7 = torch.nn.Sequential()
+        self.N_slices = 7
+        for x in range(2):
+            self.slice1.add_module(str(x), pretrained_features[x])
+        for x in range(2,5):
+            self.slice2.add_module(str(x), pretrained_features[x])
+        for x in range(5, 8):
+            self.slice3.add_module(str(x), pretrained_features[x])
+        for x in range(8, 10):
+            self.slice4.add_module(str(x), pretrained_features[x])
+        for x in range(10, 11):
+            self.slice5.add_module(str(x), pretrained_features[x])
+        for x in range(11, 12):
+            self.slice6.add_module(str(x), pretrained_features[x])
+        for x in range(12, 13):
+            self.slice7.add_module(str(x), pretrained_features[x])
+        if not requires_grad:
+            for param in self.parameters():
+                param.requires_grad = False
+    def forward(self, X):
+        h = self.slice1(X)
+        h_relu1 = h
+        h = self.slice2(h)
+        h_relu2 = h
+        h = self.slice3(h)
+        h_relu3 = h
+        h = self.slice4(h)
+        h_relu4 = h
+        h = self.slice5(h)
+        h_relu5 = h
+        h = self.slice6(h)
+        h_relu6 = h
+        h = self.slice7(h)
+        h_relu7 = h
+        vgg_outputs = namedtuple("SqueezeOutputs", ['relu1','relu2','relu3','relu4','relu5','relu6','relu7'])
+        out = vgg_outputs(h_relu1,h_relu2,h_relu3,h_relu4,h_relu5,h_relu6,h_relu7)
+        return out
+class alexnet(torch.nn.Module):
+    def __init__(self, requires_grad=False, pretrained=True):
+        super(alexnet, self).__init__()
+        alexnet_pretrained_features = tv.alexnet(pretrained=pretrained).features
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        self.slice4 = torch.nn.Sequential()
+        self.slice5 = torch.nn.Sequential()
+        self.N_slices = 5
+        for x in range(2):
+            self.slice1.add_module(str(x), alexnet_pretrained_features[x])
+        for x in range(2, 5):
+            self.slice2.add_module(str(x), alexnet_pretrained_features[x])
+        for x in range(5, 8):
+            self.slice3.add_module(str(x), alexnet_pretrained_features[x])
+        for x in range(8, 10):
+            self.slice4.add_module(str(x), alexnet_pretrained_features[x])
+        for x in range(10, 12):
+            self.slice5.add_module(str(x), alexnet_pretrained_features[x])
+        if not requires_grad:
+            for param in self.parameters():
+                param.requires_grad = False
+    def forward(self, X):
+        h = self.slice1(X)
+        h_relu1 = h
+        h = self.slice2(h)
+        h_relu2 = h
+        h = self.slice3(h)
+        h_relu3 = h
+        h = self.slice4(h)
+        h_relu4 = h
+        h = self.slice5(h)
+        h_relu5 = h
+        alexnet_outputs = namedtuple("AlexnetOutputs", ['relu1', 'relu2', 'relu3', 'relu4', 'relu5'])
+        out = alexnet_outputs(h_relu1, h_relu2, h_relu3, h_relu4, h_relu5)
+        return out
+class vgg16(torch.nn.Module):
+    def __init__(self, requires_grad=False, pretrained=True):
+        super(vgg16, self).__init__()
+        vgg_pretrained_features = tv.vgg16(pretrained=pretrained).features
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        self.slice4 = torch.nn.Sequential()
+        self.slice5 = torch.nn.Sequential()
+        self.N_slices = 5
+        for x in range(4):
+            self.slice1.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(4, 9):
+            self.slice2.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(9, 16):
+            self.slice3.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(16, 23):
+            self.slice4.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(23, 30):
+            self.slice5.add_module(str(x), vgg_pretrained_features[x])
+        if not requires_grad:
+            for param in self.parameters():
+                param.requires_grad = False
+    def forward(self, X):
+        h = self.slice1(X)
+        h_relu1_2 = h
+        h = self.slice2(h)
+        h_relu2_2 = h
+        h = self.slice3(h)
+        h_relu3_3 = h
+        h = self.slice4(h)
+        h_relu4_3 = h
+        h = self.slice5(h)
+        h_relu5_3 = h
+        vgg_outputs = namedtuple("VggOutputs", ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3', 'relu5_3'])
+        out = vgg_outputs(h_relu1_2, h_relu2_2, h_relu3_3, h_relu4_3, h_relu5_3)
+        return out
+class resnet(torch.nn.Module):
+    def __init__(self, requires_grad=False, pretrained=True, num=18):
+        super(resnet, self).__init__()
+        if(num==18):
+            self.net = tv.resnet18(pretrained=pretrained)
+        elif(num==34):
+            self.net = tv.resnet34(pretrained=pretrained)
+        elif(num==50):
+            self.net = tv.resnet50(pretrained=pretrained)
+        elif(num==101):
+            self.net = tv.resnet101(pretrained=pretrained)
+        elif(num==152):
+            self.net = tv.resnet152(pretrained=pretrained)
+        self.N_slices = 5
+        self.conv1 = self.net.conv1
+        self.bn1 = self.net.bn1
+        self.relu = self.net.relu
+        self.maxpool = self.net.maxpool
+        self.layer1 = self.net.layer1
+        self.layer2 = self.net.layer2
+        self.layer3 = self.net.layer3
+        self.layer4 = self.net.layer4
+    def forward(self, X):
+        h = self.conv1(X)
+        h = self.bn1(h)
+        h = self.relu(h)
+        h_relu1 = h
+        h = self.maxpool(h)
+        h = self.layer1(h)
+        h_conv2 = h
+        h = self.layer2(h)
+        h_conv3 = h
+        h = self.layer3(h)
+        h_conv4 = h
+        h = self.layer4(h)
+        h_conv5 = h
+        outputs = namedtuple("Outputs", ['relu1','conv2','conv3','conv4','conv5'])
+        out = outputs(h_relu1, h_conv2, h_conv3, h_conv4, h_conv5)
+        return out

models/stylegan2/lpips/weights/v0.0/alex.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:18720f55913d0af89042f13faa7e536a6ce1444a0914e6db9461355ece1e8cd5
+size 5455

models/stylegan2/lpips/weights/v0.0/squeeze.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c27abd3a0145541baa50990817df58d3759c3f8154949f42af3b59b4e042d0bf
+size 10057

models/stylegan2/lpips/weights/v0.0/vgg.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b9e4236260c3dd988fc79d2a48d645d885afcbb21f9fd595e6744cf7419b582c
+size 6735

models/stylegan2/lpips/weights/v0.1/alex.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:df73285e35b22355a2df87cdb6b70b343713b667eddbda73e1977e0c860835c0
+size 6009