HairCLIP

Runtime error

App Files Files Community

hysts HF staff commited on May 26, 2022

Commit

a1b524b

•

1 Parent(s): 6080ed9

Add files

Browse files

Files changed (16) hide show

.gitattributes +1 -0
.gitmodules +6 -0
.style.yapf +5 -0
HairCLIP +1 -0
app.py +151 -0
encoder4editing +1 -0
images/95UF6LXe-Lo.jpg +3 -0
images/ILip77SbmOE.jpg +3 -0
images/README.md +7 -0
images/et_78QkMMQs.jpg +3 -0
images/rDEOVtE7vOs.jpg +3 -0
model.py +151 -0
packages.txt +2 -0
patch.e4e +131 -0
patch.hairclip +61 -0
requirements.txt +8 -0

.gitattributes CHANGED Viewed

@@ -1,3 +1,4 @@
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text

+*.jpg filter=lfs diff=lfs merge=lfs -text
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text

.gitmodules ADDED Viewed

	@@ -0,0 +1,6 @@

+[submodule "HairCLIP"]
+	path = HairCLIP
+	url = https://github.com/wty-ustc/HairCLIP
+[submodule "encoder4editing"]
+	path = encoder4editing
+	url = https://github.com/omertov/encoder4editing

.style.yapf ADDED Viewed

	@@ -0,0 +1,5 @@

+[style]
+based_on_style = pep8
+blank_line_before_nested_class_or_def = false
+spaces_before_comment = 2
+split_before_logical_operator = true

HairCLIP ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit 29290cf5bdca0f21ff27e0ec2e93bdd1ebbe3605

app.py ADDED Viewed

	@@ -0,0 +1,151 @@

+#!/usr/bin/env python
+from __future__ import annotations
+import argparse
+import os
+import pathlib
+import subprocess
+import gradio as gr
+if os.getenv('SYSTEM') == 'spaces':
+    subprocess.call('git apply ../patch.e4e'.split(), cwd='encoder4editing')
+    subprocess.call('git apply ../patch.hairclip'.split(), cwd='HairCLIP')
+from model import Model
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--device', type=str, default='cpu')
+    parser.add_argument('--theme', type=str)
+    parser.add_argument('--share', action='store_true')
+    parser.add_argument('--port', type=int)
+    parser.add_argument('--disable-queue',
+                        dest='enable_queue',
+                        action='store_false')
+    return parser.parse_args()
+def load_hairstyle_list() -> list[str]:
+    with open('HairCLIP/mapper/hairstyle_list.txt') as f:
+        lines = [line.strip() for line in f.readlines()]
+        lines = [line[:-10] for line in lines]
+    return lines
+def set_example_image(example: list) -> dict:
+    return gr.Image.update(value=example[0])
+def update_step2_components(choice: str) -> tuple[dict, dict]:
+    return (
+        gr.Dropdown.update(visible=choice in ['hairstyle', 'both']),
+        gr.Textbox.update(visible=choice in ['color', 'both']),
+    )
+def main():
+    args = parse_args()
+    model = Model(device=args.device)
+    css = '''
+h1#title {
+  text-align: center;
+}
+img#teaser {
+  max-width: 1000px;
+  max-height: 600px;
+}
+'''
+    with gr.Blocks(theme=args.theme, css=css) as demo:
+        gr.Markdown('''<h1 id="title">HairCLIP</h1>
+This is an unofficial demo for <a href="https://github.com/wty-ustc/HairCLIP">https://github.com/wty-ustc/HairCLIP</a>.
+<center><img id="teaser" src="https://raw.githubusercontent.com/wty-ustc/HairCLIP/main/assets/teaser.png" alt="teaser"></center>
+''')
+        with gr.Box():
+            gr.Markdown('## Step 1')
+            with gr.Row():
+                with gr.Column():
+                    with gr.Row():
+                        input_image = gr.Image(label='Input Image',
+                                               type='file')
+                    with gr.Row():
+                        preprocess_button = gr.Button('Preprocess')
+                with gr.Column():
+                    aligned_face = gr.Image(label='Aligned Face',
+                                            type='pil',
+                                            interactive=False)
+                with gr.Column():
+                    reconstructed_face = gr.Image(label='Reconstructed Face',
+                                                  type='numpy')
+                    latent = gr.Variable()
+            with gr.Row():
+                paths = sorted(pathlib.Path('images').glob('*.jpg'))
+                example_images = gr.Dataset(components=[input_image],
+                                            samples=[[path.as_posix()]
+                                                     for path in paths])
+        with gr.Box():
+            gr.Markdown('## Step 2')
+            with gr.Row():
+                with gr.Column():
+                    with gr.Row():
+                        editing_type = gr.Radio(['hairstyle', 'color', 'both'],
+                                                value='both',
+                                                type='value',
+                                                label='Editing Type')
+                    with gr.Row():
+                        hairstyles = load_hairstyle_list()
+                        hairstyle_index = gr.Dropdown(hairstyles,
+                                                      value='afro',
+                                                      type='index',
+                                                      label='Hairstyle')
+                    with gr.Row():
+                        color_description = gr.Textbox(value='red',
+                                                       label='Color')
+                    with gr.Row():
+                        run_button = gr.Button('Run')
+                with gr.Column():
+                    result = gr.Image(label='Result')
+        gr.Markdown(
+            '<center><img src="https://visitor-badge.glitch.me/badge?page_id=gradio-blocks.hairclip" alt="visitor badge"/></center>'
+        )
+        preprocess_button.click(fn=model.detect_and_align_face,
+                                inputs=[input_image],
+                                outputs=[aligned_face])
+        aligned_face.change(fn=model.reconstruct_face,
+                            inputs=[aligned_face],
+                            outputs=[reconstructed_face, latent])
+        editing_type.change(fn=update_step2_components,
+                            inputs=[editing_type],
+                            outputs=[hairstyle_index, color_description])
+        run_button.click(fn=model.generate,
+                         inputs=[
+                             editing_type,
+                             hairstyle_index,
+                             color_description,
+                             latent,
+                         ],
+                         outputs=[result])
+        example_images.click(fn=set_example_image,
+                             inputs=example_images,
+                             outputs=example_images.components)
+    demo.launch(
+        enable_queue=args.enable_queue,
+        server_port=args.port,
+        share=args.share,
+    )
+if __name__ == '__main__':
+    main()

encoder4editing ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit 99ea50578695d2e8a1cf7259d8ee89b23eea942b

images/95UF6LXe-Lo.jpg ADDED Viewed

Git LFS Details

SHA256: 9ba751a6519822fa683e062ee3a383e748f15b41d4ca87d14c4fa73f9beed845
Pointer size: 131 Bytes
Size of remote file: 503 kB

images/ILip77SbmOE.jpg ADDED Viewed

Git LFS Details

SHA256: 3eed82923bc76a90f067415f148d56239fdfa4a1aca9eef1d459bc6050c9dde8
Pointer size: 131 Bytes
Size of remote file: 939 kB

images/README.md ADDED Viewed

	@@ -0,0 +1,7 @@

+These images are freely-usable ones from [Unsplash](https://unsplash.com/).
+- https://unsplash.com/photos/rDEOVtE7vOs
+- https://unsplash.com/photos/et_78QkMMQs
+- https://unsplash.com/photos/ILip77SbmOE
+- https://unsplash.com/photos/95UF6LXe-Lo

images/et_78QkMMQs.jpg ADDED Viewed

Git LFS Details

SHA256: c63a2e9de5eda3cb28012cfc8e4ba9384daeda8cca7a8989ad90b21a1293cc6f
Pointer size: 131 Bytes
Size of remote file: 371 kB

images/rDEOVtE7vOs.jpg ADDED Viewed

Git LFS Details

SHA256: b136bf195fef5599f277a563f0eef79af5301d9352d4ebf82bd7a0a061b7bdc0
Pointer size: 131 Bytes
Size of remote file: 155 kB

model.py ADDED Viewed

	@@ -0,0 +1,151 @@

+from __future__ import annotations
+import argparse
+import os
+import sys
+from typing import Callable, Union
+import dlib
+import huggingface_hub
+import numpy as np
+import PIL.Image
+import torch
+import torch.nn as nn
+import torchvision.transforms as T
+sys.path.insert(0, 'encoder4editing')
+from models.psp import pSp
+from utils.alignment import align_face
+sys.path.insert(0, 'HairCLIP/')
+sys.path.insert(0, 'HairCLIP/mapper/')
+from mapper.datasets.latents_dataset_inference import LatentsDatasetInference
+from mapper.hairclip_mapper import HairCLIPMapper
+TOKEN = os.environ['TOKEN']
+class Model:
+    def __init__(self, device: Union[torch.device, str]):
+        self.device = torch.device(device)
+        self.landmark_model = self._create_dlib_landmark_model()
+        self.e4e = self._load_e4e()
+        self.hairclip = self._load_hairclip()
+        self.transform = self._create_transform()
+    @staticmethod
+    def _create_dlib_landmark_model():
+        path = huggingface_hub.hf_hub_download(
+            'hysts/dlib_face_landmark_model',
+            'shape_predictor_68_face_landmarks.dat',
+            use_auth_token=TOKEN)
+        return dlib.shape_predictor(path)
+    def _load_e4e(self) -> nn.Module:
+        ckpt_path = huggingface_hub.hf_hub_download('hysts/e4e',
+                                                    'e4e_ffhq_encode.pt',
+                                                    use_auth_token=TOKEN)
+        ckpt = torch.load(ckpt_path, map_location='cpu')
+        opts = ckpt['opts']
+        opts['device'] = self.device.type
+        opts['checkpoint_path'] = ckpt_path
+        opts = argparse.Namespace(**opts)
+        model = pSp(opts)
+        model.to(self.device)
+        model.eval()
+        return model
+    def _load_hairclip(self) -> nn.Module:
+        ckpt_path = huggingface_hub.hf_hub_download('hysts/HairCLIP',
+                                                    'hairclip.pt',
+                                                    use_auth_token=TOKEN)
+        ckpt = torch.load(ckpt_path, map_location='cpu')
+        opts = ckpt['opts']
+        opts['device'] = self.device.type
+        opts['checkpoint_path'] = ckpt_path
+        opts['editing_type'] = 'both'
+        opts['input_type'] = 'text'
+        opts['hairstyle_description'] = 'HairCLIP/mapper/hairstyle_list.txt'
+        opts['color_description'] = 'red'
+        opts = argparse.Namespace(**opts)
+        model = HairCLIPMapper(opts)
+        model.to(self.device)
+        model.eval()
+        return model
+    @staticmethod
+    def _create_transform() -> Callable:
+        transform = T.Compose([
+            T.Resize(256),
+            T.CenterCrop(256),
+            T.ToTensor(),
+            T.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
+        ])
+        return transform
+    def detect_and_align_face(self, image) -> PIL.Image.Image:
+        image = align_face(filepath=image.name, predictor=self.landmark_model)
+        return image
+    @staticmethod
+    def denormalize(tensor: torch.Tensor) -> torch.Tensor:
+        return torch.clamp((tensor + 1) / 2 * 255, 0, 255).to(torch.uint8)
+    def postprocess(self, tensor: torch.Tensor) -> np.ndarray:
+        tensor = self.denormalize(tensor)
+        return tensor.cpu().numpy().transpose(1, 2, 0)
+    @torch.inference_mode()
+    def reconstruct_face(
+            self, image: PIL.Image.Image) -> tuple[np.ndarray, torch.Tensor]:
+        input_data = self.transform(image).unsqueeze(0).to(self.device)
+        reconstructed_images, latents = self.e4e(input_data,
+                                                 randomize_noise=False,
+                                                 return_latents=True)
+        reconstructed = torch.clamp(reconstructed_images[0].detach(), -1, 1)
+        reconstructed = self.postprocess(reconstructed)
+        return reconstructed, latents[0]
+    @torch.inference_mode()
+    def generate(self, editing_type: str, hairstyle_index: int,
+                 color_description: str, latent: torch.Tensor) -> np.ndarray:
+        opts = self.hairclip.opts
+        opts.editing_type = editing_type
+        opts.color_description = color_description
+        if editing_type == 'color':
+            hairstyle_index = 0
+        device = torch.device(opts.device)
+        dataset = LatentsDatasetInference(latents=latent.unsqueeze(0).cpu(),
+                                          opts=opts)
+        w, hairstyle_text_inputs_list, color_text_inputs_list = dataset[0][:3]
+        w = w.unsqueeze(0).to(device)
+        hairstyle_text_inputs = hairstyle_text_inputs_list[
+            hairstyle_index].unsqueeze(0).to(device)
+        color_text_inputs = color_text_inputs_list[0].unsqueeze(0).to(device)
+        hairstyle_tensor_hairmasked = torch.Tensor([0]).unsqueeze(0).to(device)
+        color_tensor_hairmasked = torch.Tensor([0]).unsqueeze(0).to(device)
+        w_hat = w + 0.1 * self.hairclip.mapper(
+            w,
+            hairstyle_text_inputs,
+            color_text_inputs,
+            hairstyle_tensor_hairmasked,
+            color_tensor_hairmasked,
+        )
+        x_hat, _ = self.hairclip.decoder(
+            [w_hat],
+            input_is_latent=True,
+            return_latents=True,
+            randomize_noise=False,
+            truncation=1,
+        )
+        res = torch.clamp(x_hat[0].detach(), -1, 1)
+        res = self.postprocess(res)
+        return res

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ cmake
2	+ ninja-build

patch.e4e ADDED Viewed

	@@ -0,0 +1,131 @@

+diff --git a/models/stylegan2/op/fused_act.py b/models/stylegan2/op/fused_act.py
+index 973a84f..6854b97 100644
+--- a/models/stylegan2/op/fused_act.py
++++ b/models/stylegan2/op/fused_act.py
+@@ -2,17 +2,18 @@ import os
+ import torch
+ from torch import nn
++from torch.nn import functional as F
+ from torch.autograd import Function
+ from torch.utils.cpp_extension import load
+-module_path = os.path.dirname(__file__)
+-fused = load(
+-    'fused',
+-    sources=[
+-        os.path.join(module_path, 'fused_bias_act.cpp'),
+-        os.path.join(module_path, 'fused_bias_act_kernel.cu'),
+-    ],
+-)
++#module_path = os.path.dirname(__file__)
++#fused = load(
++#    'fused',
++#    sources=[
++#        os.path.join(module_path, 'fused_bias_act.cpp'),
++#        os.path.join(module_path, 'fused_bias_act_kernel.cu'),
++#    ],
++#)
+ class FusedLeakyReLUFunctionBackward(Function):
+@@ -82,4 +83,18 @@ class FusedLeakyReLU(nn.Module):
+ def fused_leaky_relu(input, bias, negative_slope=0.2, scale=2 ** 0.5):
+-    return FusedLeakyReLUFunction.apply(input, bias, negative_slope, scale)
++    if input.device.type == "cpu":
++        if bias is not None:
++            rest_dim = [1] * (input.ndim - bias.ndim - 1)
++            return (
++                F.leaky_relu(
++                    input + bias.view(1, bias.shape[0], *rest_dim), negative_slope=0.2
++                )
++                * scale
++            )
++
++        else:
++            return F.leaky_relu(input, negative_slope=0.2) * scale
++
++    else:
++        return FusedLeakyReLUFunction.apply(input, bias, negative_slope, scale)
+diff --git a/models/stylegan2/op/upfirdn2d.py b/models/stylegan2/op/upfirdn2d.py
+index 7bc5a1e..5465d1a 100644
+--- a/models/stylegan2/op/upfirdn2d.py
++++ b/models/stylegan2/op/upfirdn2d.py
+@@ -1,17 +1,18 @@
+ import os
+ import torch
++from torch.nn import functional as F
+ from torch.autograd import Function
+ from torch.utils.cpp_extension import load
+-module_path = os.path.dirname(__file__)
+-upfirdn2d_op = load(
+-    'upfirdn2d',
+-    sources=[
+-        os.path.join(module_path, 'upfirdn2d.cpp'),
+-        os.path.join(module_path, 'upfirdn2d_kernel.cu'),
+-    ],
+-)
++#module_path = os.path.dirname(__file__)
++#upfirdn2d_op = load(
++#    'upfirdn2d',
++#    sources=[
++#        os.path.join(module_path, 'upfirdn2d.cpp'),
++#        os.path.join(module_path, 'upfirdn2d_kernel.cu'),
++#    ],
++#)
+ class UpFirDn2dBackward(Function):
+@@ -97,8 +98,8 @@ class UpFirDn2d(Function):
+         ctx.save_for_backward(kernel, torch.flip(kernel, [0, 1]))
+-        out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1
+-        out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1
++        out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h + down_y) // down_y
++        out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w + down_x) // down_x
+         ctx.out_size = (out_h, out_w)
+         ctx.up = (up_x, up_y)
+@@ -140,9 +141,13 @@ class UpFirDn2d(Function):
+ def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)):
+-    out = UpFirDn2d.apply(
+-        input, kernel, (up, up), (down, down), (pad[0], pad[1], pad[0], pad[1])
+-    )
++    if input.device.type == "cpu":
++        out = upfirdn2d_native(input, kernel, up, up, down, down, pad[0], pad[1], pad[0], pad[1])
++
++    else:
++        out = UpFirDn2d.apply(
++            input, kernel, (up, up), (down, down), (pad[0], pad[1], pad[0], pad[1])
++        )
+     return out
+@@ -150,6 +155,9 @@ def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)):
+ def upfirdn2d_native(
+         input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1
+ ):
++    _, channel, in_h, in_w = input.shape
++    input = input.reshape(-1, in_h, in_w, 1)
++
+     _, in_h, in_w, minor = input.shape
+     kernel_h, kernel_w = kernel.shape
+@@ -180,5 +188,9 @@ def upfirdn2d_native(
+         in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1,
+     )
+     out = out.permute(0, 2, 3, 1)
++    out = out[:, ::down_y, ::down_x, :]
++
++    out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h + down_y) // down_y
++    out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w + down_x) // down_x
+-    return out[:, ::down_y, ::down_x, :]
++    return out.view(-1, channel, out_h, out_w)

patch.hairclip ADDED Viewed

	@@ -0,0 +1,61 @@

+diff --git a/mapper/latent_mappers.py b/mapper/latent_mappers.py
+index 56b9c55..f0dd005 100644
+--- a/mapper/latent_mappers.py
++++ b/mapper/latent_mappers.py
+@@ -19,7 +19,7 @@ class ModulationModule(Module):
+     def forward(self, x, embedding, cut_flag):
+         x = self.fc(x)
+-        x = self.norm(x)
++        x = self.norm(x)
+         if cut_flag == 1:
+             return x
+         gamma = self.gamma_function(embedding.float())
+@@ -39,20 +39,20 @@ class SubHairMapper(Module):
+     def forward(self, x, embedding, cut_flag=0):
+         x = self.pixelnorm(x)
+         for modulation_module in self.modulation_module_list:
+-        	x = modulation_module(x, embedding, cut_flag)
++        	x = modulation_module(x, embedding, cut_flag)
+         return x
+-class HairMapper(Module):
++class HairMapper(Module):
+     def __init__(self, opts):
+         super(HairMapper, self).__init__()
+         self.opts = opts
+-        self.clip_model, self.preprocess = clip.load("ViT-B/32", device="cuda")
++        self.clip_model, self.preprocess = clip.load("ViT-B/32", device=opts.device)
+         self.transform = transforms.Compose([transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711))])
+         self.face_pool = torch.nn.AdaptiveAvgPool2d((224, 224))
+         self.hairstyle_cut_flag = 0
+         self.color_cut_flag = 0
+-        if not opts.no_coarse_mapper:
++        if not opts.no_coarse_mapper:
+             self.course_mapping = SubHairMapper(opts, 4)
+         if not opts.no_medium_mapper:
+             self.medium_mapping = SubHairMapper(opts, 4)
+@@ -70,13 +70,13 @@ class HairMapper(Module):
+         elif hairstyle_tensor.shape[1] != 1:
+             hairstyle_embedding = self.gen_image_embedding(hairstyle_tensor, self.clip_model, self.preprocess).unsqueeze(1).repeat(1, 18, 1).detach()
+         else:
+-            hairstyle_embedding = torch.ones(x.shape[0], 18, 512).cuda()
++            hairstyle_embedding = torch.ones(x.shape[0], 18, 512).to(self.opts.device)
+         if color_text_inputs.shape[1] != 1:
+             color_embedding = self.clip_model.encode_text(color_text_inputs).unsqueeze(1).repeat(1, 18, 1).detach()
+         elif color_tensor.shape[1] != 1:
+             color_embedding = self.gen_image_embedding(color_tensor, self.clip_model, self.preprocess).unsqueeze(1).repeat(1, 18, 1).detach()
+         else:
+-            color_embedding = torch.ones(x.shape[0], 18, 512).cuda()
++            color_embedding = torch.ones(x.shape[0], 18, 512).to(self.opts.device)
+         if (hairstyle_text_inputs.shape[1] == 1) and (hairstyle_tensor.shape[1] == 1):
+@@ -106,4 +106,4 @@ class HairMapper(Module):
+             x_fine = torch.zeros_like(x_fine)
+         out = torch.cat([x_coarse, x_medium, x_fine], dim=1)
+-        return out
++        return out

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+dlib==19.23.0
+numpy==1.22.3
+opencv-python-headless==4.5.5.64
+Pillow==9.1.0
+scipy==1.8.0
+torch==1.11.0
+torchvision==0.12.0
+git+https://github.com/openai/CLIP.git