Spaces:

ethanNeuralImage
/

inversion_testing

Runtime error

App Files Files Community

ethanNeuralImage commited on Aug 12, 2022

Commit

07ecdd9

•

1 Parent(s): 7deca27

initial commit -- gradio

Browse files

Files changed (3) hide show

app.py +215 -0
gradio_wrapper/demo.py +549 -0
gradio_wrapper/gradio_options.py +53 -0

app.py ADDED Viewed

	@@ -0,0 +1,215 @@

+import sys
+import os
+import torch
+sys.path.append(".")
+from gradio_wrapper.gradio_options import GradioTestOptions
+from models.hyperstyle.utils.model_utils import load_model
+from models.hyperstyle.utils.common import tensor2im
+from models.hyperstyle.utils.inference_utils import run_inversion
+from hyperstyle_global_directions.edit import load_direction_calculator, edit_image
+from torchvision import transforms
+import gradio as gr
+from utils.alignment import align_face
+import dlib
+from argparse import Namespace
+from mapper.styleclip_mapper import StyleCLIPMapper
+from PIL import Image
+opts_args = ['--no_fine_mapper']
+opts = GradioTestOptions().parse(opts_args)
+mapper_dict = {
+    'afro':'./pretrained_models/styleCLIP_mappers/afro_hairstyle.pt',
+    'bob':'./pretrained_models/styleCLIP_mappers/bob_hairstyle.pt',
+    'bowl':'./pretrained_models/styleCLIP_mappers/bowl_hairstyle.pt',
+    'buzz':'./pretrained_models/styleCLIP_mappers/buzz_hairstyle.pt',
+    'caesar':'./pretrained_models/styleCLIP_mappers/caesar_hairstyle.pt',
+    'crew':'./pretrained_models/styleCLIP_mappers/crew_hairstyle.pt',
+    'pixie':'./pretrained_models/styleCLIP_mappers/pixie_hairstyle.pt',
+    'straight':'./pretrained_models/styleCLIP_mappers/straight_hairstyle.pt',
+    'undercut':'./pretrained_models/styleCLIP_mappers/undercut_hairstyle.pt',
+    'wavy':'./pretrained_models/styleCLIP_mappers/wavy_hairstyle.pt'
+}
+predictor = dlib.shape_predictor("./pretrained_models/hyperstyle/shape_predictor_68_face_landmarks.dat")
+hyperstyle, hyperstyle_args = load_model(opts.hyperstyle_checkpoint_path, update_opts=opts)
+resize_amount = (256, 256) if hyperstyle_args.resize_outputs else (hyperstyle_args.output_size, hyperstyle_args.output_size)
+im2tensor_transforms = transforms.Compose([transforms.Resize((256, 256)), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
+direction_calculator = load_direction_calculator(opts)
+ckpt = torch.load(mapper_dict['afro'], map_location='cpu')
+opts.checkpoint_path = mapper_dict['afro']
+mapper_args = ckpt['opts']
+mapper_args.update(vars(opts))
+mapper_args = Namespace(**mapper_args)
+mapper = StyleCLIPMapper(mapper_args)
+mapper.eval()
+mapper.cuda()
+def change_mapper(desc):
+    global mapper
+    global mapper_args
+    mapper = None
+    ckpt = None
+    mapper_args = None
+    torch.cuda.empty_cache()
+    opts.checkpoint_path = mapper_dict[desc]
+    ckpt = torch.load(mapper_dict[desc], map_location='cpu')
+    mapper_args = ckpt['opts']
+    mapper_args.update(vars(opts))
+    mapper_args = Namespace(**mapper_args)
+    mapper = StyleCLIPMapper(mapper_args)
+    mapper.eval()
+    mapper.cuda()
+with gr.Blocks() as demo:
+    with gr.Row() as row:
+        with gr.Column() as inputs:
+            source = gr.Image(label="Image to Map", type='filepath')
+            align = gr.Checkbox(True, label='Align Image')
+            inverter_bools = gr.CheckboxGroup(["Hyperstyle", "E4E"], value=['Hyperstyle'], label='Inverter Choices')
+            n_hyperstyle_iterations = gr.Number(3, label='Number of Iterations For Hyperstyle', precision=0)
+            with gr.Box():
+                mapper_bool = gr.Checkbox(True, label='Output Mapper Result')
+                with gr.Box() as mapper_opts:
+                    mapper_choice = gr.Dropdown(['afro', 'bob', 'bowl', 'buzz', 'caesar', 'crew', 'pixie', 'straight', 'undercut', 'wavy'], value='afro', label='What Hairstyle Mapper to Use?')
+                    mapper_alpha = gr.Slider(minimum=-0.5, maximum=0.5, value=0.01, step=0.1, label='Strength of Mapper Alpha',)
+            with gr.Box():
+                gd_bool = gr.Checkbox(False, label='Output Global Direction Result')
+                with gr.Box(visible=False) as gd_opts:
+                    neutral_text = gr.Text(value='A face with hair', label='Neutral Text')
+                    target_text = gr.Text(value=mapper_args.description, label='Target Text')
+                    alpha = gr.Slider(minimum=-10.0, maximum=10.0, value=4.1, step=0.1, label="Alpha for Global Direction")
+                    beta = gr.Slider(minimum=0.0, maximum=0.30, value=0.15, step=0.01, label="Beta for Global Direction")
+            submit_button = gr.Button("Edit Image")
+        with gr.Column() as outputs:
+            with gr.Row() as hyperstyle_images:
+                output_hyperstyle_mapper = gr.Image(type='pil', label="Hyperstyle Mapper")
+                output_hyperstyle_gd = gr.Image(type='pil', label="Hyperstyle Global Directions", visible=False)
+            with gr.Row(visible=False) as e4e_images:
+                output_e4e_mapper = gr.Image(type='pil', label="E4E Mapper")
+                output_e4e_gd = gr.Image(type='pil', label="E4E Global Directions", visible=False)
+    def mapper_change(new_mapper):
+        change_mapper(new_mapper)
+        return mapper_args.description
+    def inverter_toggles(bools):
+        e4e_bool = 'E4E' in bools
+        hyperstyle_bool = 'Hyperstyle' in bools
+        return {
+            hyperstyle_images: gr.update(visible=hyperstyle_bool),
+            e4e_images: gr.update(visible=e4e_bool),
+            n_hyperstyle_iterations: gr.update(visible=hyperstyle_bool)
+        }
+    def mapper_toggles(bool):
+        return {
+            mapper_opts: gr.update(visible=bool),
+            output_hyperstyle_mapper: gr.update(visible=bool),
+            output_e4e_mapper: gr.update(visible=bool)
+            }
+    def gd_toggles(bool):
+        return {
+            gd_opts: gr.update(visible=bool),
+            output_hyperstyle_gd: gr.update(visible=bool),
+            output_e4e_gd: gr.update(visible=bool)
+            }
+    mapper_choice.change(mapper_change, mapper_choice, [target_text])
+    inverter_bools.change(inverter_toggles, inverter_bools, [hyperstyle_images, e4e_images, n_hyperstyle_iterations])
+    mapper_bool.change(mapper_toggles, mapper_bool, [mapper_opts, output_hyperstyle_mapper, output_e4e_mapper])
+    gd_bool.change(gd_toggles, gd_bool, [gd_opts, output_hyperstyle_gd, output_e4e_gd])
+    def map_latent(inputs, stylespace=False, weight_deltas=None, strength=0.1):
+        w = inputs.cuda()
+        with torch.no_grad():
+            if stylespace:
+                delta = mapper.mapper(w)
+                w_hat = [c + strength * delta_c for (c, delta_c) in zip(w, delta)]
+                x_hat, _, w_hat = mapper.decoder([w_hat], input_is_latent=True, return_latents=True,
+			                                       randomize_noise=False, truncation=1, input_is_stylespace=True, weights_deltas=weight_deltas)
+            else:
+                delta = mapper.mapper(w)
+                w_hat = w + strength * delta
+                x_hat, w_hat, _ = mapper.decoder([w_hat], input_is_latent=True, return_latents=True,
+			                                       randomize_noise=False, truncation=1, weights_deltas=weight_deltas)
+            result_batch = (x_hat, w_hat)
+        return result_batch
+    def submit(
+        src, align_img, inverter_bools, n_iterations,
+        mapper_bool, mapper_choice, mapper_alpha,
+        gd_bool, neutral_text, target_text, alpha, beta,
+        ):
+        torch.cuda.empty_cache()
+        with torch.no_grad():
+            output_imgs = []
+            if align_img:
+                input_img = align_face(src, predictor)
+            else:
+                input_img = Image.open(src).convert('RGB')
+            input_img = im2tensor_transforms(input_img).cuda()
+            if gd_bool:
+                opts.neutral_text = neutral_text
+                opts.target_text = target_text
+                opts.alpha = alpha
+                opts.beta = beta
+            if 'Hyperstyle' in inverter_bools:
+                hyperstyle_batch, hyperstyle_latents, hyperstyle_deltas, _ = run_inversion(input_img.unsqueeze(0), hyperstyle, hyperstyle_args, return_intermediate_results=False)
+                if mapper_bool:
+                    mapped_hyperstyle, _ = map_latent(hyperstyle_latents, stylespace=False, weight_deltas=hyperstyle_deltas, strength=mapper_alpha)
+                    mapped_hyperstyle = tensor2im(mapped_hyperstyle[0])
+                else:
+                    mapped_hyperstyle = None
+                if gd_bool:
+                    gd_hyperstyle = edit_image(_, hyperstyle_latents[0], hyperstyle.decoder, direction_calculator, opts, hyperstyle_deltas)[0]
+                    gd_hyperstyle = tensor2im(gd_hyperstyle)
+                else:
+                    gd_hyperstyle = None
+                hyperstyle_output = [mapped_hyperstyle,gd_hyperstyle]
+            else:
+                hyperstyle_output = [None, None]
+            output_imgs.extend(hyperstyle_output)
+            if 'E4E' in inverter_bools:
+                e4e_batch, e4e_latents = hyperstyle.w_invert(input_img.unsqueeze(0))
+                e4e_deltas = None
+                if mapper_bool:
+                    mapped_e4e, _ = map_latent(e4e_latents, stylespace=False, weight_deltas=e4e_deltas, strength=mapper_alpha)
+                    mapped_e4e = tensor2im(mapped_e4e[0])
+                else:
+                    mapped_e4e = None
+                if gd_bool:
+                    gd_e4e = edit_image(_, e4e_latents[0], hyperstyle.decoder, direction_calculator, opts, e4e_deltas)[0]
+                    gd_e4e = tensor2im(gd_e4e)
+                else:
+                    gd_e4e = None
+                e4e_output = [mapped_e4e, gd_e4e]
+            else:
+                e4e_output = [None, None]
+            output_imgs.extend(e4e_output)
+        return output_imgs
+    submit_button.click(
+        submit,
+        [
+            source, align, inverter_bools, n_hyperstyle_iterations,
+            mapper_bool, mapper_choice, mapper_alpha,
+            gd_bool, neutral_text, target_text, alpha, beta,
+        ],
+        [output_hyperstyle_mapper, output_hyperstyle_gd, output_e4e_mapper, output_e4e_gd]
+            )
+demo.launch()

gradio_wrapper/demo.py ADDED Viewed

	@@ -0,0 +1,549 @@

+import gradio as gr
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+import torch.backends.cudnn as cudnn
+cudnn.benchmark = True
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import sys
+from tqdm import tqdm as tqdm
+import pickle
+import warnings
+warnings.filterwarnings("ignore")
+from spherical_kmeans import MiniBatchSphericalKMeans as sKmeans
+from argparse import Namespace
+from torch.utils.data import Dataset, DataLoader
+from torchvision.models import vgg19
+import glob
+from pathlib import Path
+import lpips
+import argparse
+import gc
+import cv2
+from e4e_projection import projection
+from model import *
+from util import *
+from e4e.models.psp import pSp
+import torchvision.transforms as transforms
+from torch.nn import DataParallel
+import torchvision.transforms.functional as TF
+from FaceQualityMetrics.utils import FaceMetric
+from hyperstyle.utils.model_utils import load_model
+from configs.paths_config import model_paths
+from manipulator import Manipulator
+from wrapper import Generator_wrapper
+def run_inversion(inputs, net, n_iters_per_batch, return_intermediate_results=False, resize_outputs=False, weights_deltas=None):
+	y_hat, latent, weights_deltas, codes = None, None, weights_deltas, None
+	if return_intermediate_results:
+		results_batch = {idx: [] for idx in range(inputs.shape[0])}
+		results_latent = {idx: [] for idx in range(inputs.shape[0])}
+		results_deltas = {idx: [] for idx in range(inputs.shape[0])}
+	else:
+		results_batch, results_latent, results_deltas = None, None, None
+	if weights_deltas is None:
+		for iter in range(n_iters_per_batch):
+			y_hat, latent, weights_deltas, codes, _ = net.forward(inputs,
+																y_hat=y_hat,
+																codes=codes,
+																weights_deltas=weights_deltas,
+																return_latents=True,
+																resize=resize_outputs,
+																randomize_noise=False,
+																return_weight_deltas_and_codes=True)
+			# weights_deltas[14]= None
+			# weights_deltas[20]= None
+			# weights_deltas[21]= None
+			# weights_deltas[23]= None
+			# weights_deltas[24]= None
+			if return_intermediate_results:
+				store_intermediate_results(results_batch, results_latent, results_deltas, y_hat, latent, weights_deltas)
+			# resize input to 256 before feeding into next iteration
+			y_hat = net.face_pool(y_hat)
+	else:
+		for iter in range(n_iters_per_batch):
+			y_hat, latent, _, codes, _ = net.forward(inputs,
+																y_hat=y_hat,
+																codes=codes,
+																weights_deltas=weights_deltas,
+																return_latents=True,
+																resize=resize_outputs,
+																randomize_noise=False,
+																return_weight_deltas_and_codes=True)
+			if return_intermediate_results:
+				store_intermediate_results(results_batch, results_latent, results_deltas, y_hat, latent, weights_deltas)
+			# resize input to 256 before feeding into next iteration
+			y_hat = net.face_pool(y_hat)
+	if return_intermediate_results:
+		return results_batch, results_latent, results_deltas
+	return y_hat, latent, weights_deltas, codes
+def store_intermediate_results(results_batch, results_latent, results_deltas, y_hat, latent, weights_deltas):
+	for idx in range(y_hat.shape[0]):
+		results_batch[idx].append(y_hat[idx])
+		results_latent[idx].append(latent[idx].cpu().numpy())
+		results_deltas[idx].append([w[idx].cpu().numpy() if w is not None else None for w in weights_deltas])
+# compute M given a style code.
+@torch.no_grad()
+def compute_M(w, weights_deltas=None, device='cuda'):
+	M = []
+	# get segmentation
+	# _, outputs = generator(w, is_cluster=1)
+	_, outputs = generator(w, weights_deltas=weights_deltas)
+	cluster_layer = outputs[stop_idx][0]
+	activation = flatten_act(cluster_layer)
+	seg_mask = clusterer.predict(activation)
+	b,c,h,w = cluster_layer.size()
+	# create masks for each feature
+	all_seg_mask = []
+	seg_mask = torch.from_numpy(seg_mask).view(b,1,h,w,1).to(device)
+	for key in range(n_class):
+		# combine masks for all indices for a particular segmentation class
+		indices = labels_map[key].view(1,1,1,1,-1)
+		key_mask = (seg_mask == indices.to(device)).any(-1) #[b,1,h,w]
+		all_seg_mask.append(key_mask)
+	all_seg_mask = torch.stack(all_seg_mask, 1)
+	# go through each activation layer and compute M
+	for layer_idx in range(len(outputs)):
+		layer = outputs[layer_idx][1].to(device)
+		b,c,h,w = layer.size()
+		layer = F.instance_norm(layer)
+		layer = layer.pow(2)
+		# resize the segmentation masks to current activations' resolution
+		layer_seg_mask = F.interpolate(all_seg_mask.flatten(0,1).float(), align_corners=False,
+									 size=(h,w), mode='bilinear').view(b,-1,1,h,w)
+		masked_layer = layer.unsqueeze(1) * layer_seg_mask # [b,k,c,h,w]
+		masked_layer = (masked_layer.sum([3,4])/ (h*w))#[b,k,c]
+		M.append(masked_layer.to(device))
+	M = torch.cat(M, -1) #[b, k, c]
+	# softmax to assign each channel to a particular segmentation class
+	M = F.softmax(M/.1, 1)
+	# simple thresholding
+	M = (M>.8).float()
+	# zero out torgb transfers, from https://arxiv.org/abs/2011.12799
+	for i in range(n_class):
+		part_M = style2list(M[:, i])
+		for j in range(len(part_M)):
+			if j in rgb_layer_idx:
+				part_M[j].zero_()
+		part_M = list2style(part_M)
+		M[:, i] = part_M
+	return M
+#====
+	# for i in range(len(blend_deltas)):
+	# 	if blend_deltas[i] is not None:
+	# 		print(f'{i}: {part_M_mask[i].sum()}/{sum(part_M_mask[i].shape)}')
+	# 		if  part_M_mask[i].sum() >= sum(part_M_mask[i].shape)/2:
+	# 			print(i)
+	# 			blend_deltas[i] = ref_deltas[i]
+def tensor2img(tensor):
+	tensor = tensor.cpu().clamp(-1, 1)
+	img = topil(tensor.squeeze())
+	return img
+def hair_transfer_hyperstyle(source_img_path, ref_img_path):
+	with torch.no_grad():
+		source_img = align_face(source_img_path, predictor=predictor)
+		ref_img = align_face(ref_img_path, predictor=predictor)
+		source_img = Image.fromarray(np.uint8(source_img))
+		ref_img = Image.fromarray(np.uint8(ref_img))
+		source_tensor = transform(source_img).unsqueeze(0).to(device)
+		ref_tensor = transform(ref_img).unsqueeze(0).to(device)
+		source_batch, source_latent, source_deltas, source_codes = run_inversion(source_tensor, net, n_iters_per_batch=5, return_intermediate_results=False)
+		ref_batch, ref_latent, ref_deltas, ref_codes = run_inversion(ref_tensor, net, n_iters_per_batch=5, return_intermediate_results=False)
+		source = generator.get_latent(source_latent[0].unsqueeze(0), truncation=1, is_latent=True)
+		ref = generator.get_latent(ref_latent[0].unsqueeze(0), truncation=1, is_latent=True)
+		source_out, _ = generator(source, weights_deltas=source_deltas, randomize_noise=False)
+		ref_out, _ = generator(ref, weights_deltas=ref_deltas, randomize_noise=False)
+		source_M = compute_M(source, weights_deltas=source_deltas, device='cpu')
+		ref_M = compute_M(ref, weights_deltas=ref_deltas, device='cpu')
+		blend_deltas = source_deltas
+		max_M = torch.max(source_M.expand_as(ref_M), ref_M)
+		max_M = add_pose(max_M, labels2idx)
+		idx = labels2idx['hair']
+		part_M = max_M[:, idx].to(device)
+		part_M_mask = style2list(part_M)
+		blend = style2list((add_direction(source, ref, part_M, 1.3)))
+		blend_out, _ = generator(blend, weights_deltas=blend_deltas)
+		source_out = tensor2img(source_out)
+		ref_out = tensor2img(ref_out)
+		blend_out = tensor2img(blend_out)
+		lpips_face, _ = lpips(blend_out, source_out)
+		ssim_face, _ = ssim(blend_out, source_out)
+		id_face, _ = id_score(blend_out, source_out)
+		_, lpips_hair = lpips(blend_out, ref_out)
+		_, ssim_hair = ssim(blend_out, ref_out)
+		_, clip_hair = clip(blend_out, source_out)
+		out_str = f'lpips_face: {lpips_face}\nlpips_hair: {lpips_hair}\nssim_face: {ssim_face}\nssim_hair: {ssim_hair}\nid_face: {id_face}\n clip_hair: {clip_hair}'
+		e4e_blend_out, _ = generator(blend)
+		e4e_blend_out = tensor2img(e4e_blend_out)
+		_, _, e4e_blend_hair_mask = lpips.parser(e4e_blend_out)
+		source_out_np = np.array(source_out)
+		blend_np =np.array(e4e_blend_out).astype(np.uint8)
+		e4e_blend_hair_mask = e4e_blend_hair_mask.cpu().numpy().astype(np.uint8)*255
+		mask_dilate = cv2.dilate(e4e_blend_hair_mask,
+								kernel=np.ones((50, 50), np.uint8))
+		mask_dilate_blur = cv2.blur(mask_dilate, ksize=(30, 30))
+		mask_dilate_blur = (e4e_blend_hair_mask + (255 - e4e_blend_hair_mask) / 255 * mask_dilate_blur).astype(np.uint8)
+		face_mask = 255 - mask_dilate_blur
+		index = np.where(face_mask > 0)
+		cy = (np.min(index[0]) + np.max(index[0])) // 2
+		cx = (np.min(index[1]) + np.max(index[1])) // 2
+		center = (cx, cy)
+		clone_out = cv2.seamlessClone(source_out_np, blend_np, face_mask, center, cv2.NORMAL_CLONE)
+		return source_out, ref_out, blend_out, out_str, clone_out
+def hair_transfer_e4e(source_img_path, ref_img_path):
+	with torch.no_grad():
+		source_img = align_face(source_img_path, predictor=predictor)
+		ref_img = align_face(ref_img_path, predictor=predictor)
+		source_img = Image.fromarray(np.uint8(source_img))
+		ref_img = Image.fromarray(np.uint8(ref_img))
+		source_tensor = transform(source_img).unsqueeze(0).to(device)
+		ref_tensor = transform(ref_img).unsqueeze(0).to(device)
+		source_batch, source_latent, source_deltas, source_codes = run_inversion(source_tensor, net, n_iters_per_batch=5, return_intermediate_results=False)
+		ref_batch, ref_latent, ref_deltas, ref_codes = run_inversion(ref_tensor, net, n_iters_per_batch=5, return_intermediate_results=False)
+		source = generator.get_latent(source_latent[0].unsqueeze(0), truncation=1, is_latent=True)
+		ref = generator.get_latent(ref_latent[0].unsqueeze(0), truncation=1, is_latent=True)
+		e4e_source_out, _ = generator(source, randomize_noise=False)
+		e4e_ref_out, _ = generator(ref, randomize_noise=False)
+		e4e_source_M = compute_M(source, device='cpu')
+		e4e_ref_M = compute_M(ref, device='cpu')
+		e4e_max_M = torch.max(e4e_source_M.expand_as(e4e_ref_M), e4e_ref_M)
+		e4e_max_M = add_pose(e4e_max_M, labels2idx)
+		e4e_idx = labels2idx['hair']
+		e4e_part_M = e4e_max_M[:, e4e_idx].to(device)
+		e4e_part_M_mask = style2list(e4e_part_M)
+		e4e_blend = style2list((add_direction(source, ref, e4e_part_M, 1.3)))
+		e4e_blend_out, _ = generator(e4e_blend)
+		e4e_source_out = tensor2img(e4e_source_out)
+		e4e_ref_out = tensor2img(e4e_ref_out)
+		e4e_blend_out = tensor2img(e4e_blend_out)
+		e4e_lpips_face, _ = lpips(e4e_blend_out, e4e_source_out)
+		e4e_ssim_face, _ = ssim(e4e_blend_out, e4e_source_out)
+		e4e_id_face, _ = id_score(e4e_blend_out, e4e_source_out)
+		_, e4e_lpips_hair = lpips(e4e_blend_out, e4e_ref_out)
+		_, e4e_ssim_hair = ssim(e4e_blend_out, e4e_ref_out)
+		_, e4e_clip_hair = clip(e4e_blend_out, e4e_source_out)
+		e4e_out_str = f'e4e_lpips_face: {e4e_lpips_face}\ne4e_lpips_hair: {e4e_lpips_hair}\ne4e_ssim_face: {e4e_ssim_face}\ne4e_ssim_hair: {e4e_ssim_hair}\ne4e_id_face: {e4e_id_face}\ne4e_ clip_hair: {e4e_clip_hair}'
+		return e4e_source_out, e4e_ref_out, e4e_blend_out, e4e_out_str
+def hair_transfer_PTI(source_img_path, ref_img_path):
+	ckpt = 'pretrained/ffhq.pkl'
+	G = Generator_wrapper(ckpt, device)
+	manipulator = Manipulator(G, device)
+	manipulator.set_real_img_projection(source_img_path, inv_mode='w+', pti_mode='s')
+	with torch.no_grad():
+		source_img = align_face(source_img_path, predictor=predictor)
+		ref_img = align_face(ref_img_path, predictor=predictor)
+		source_img = Image.fromarray(np.uint8(source_img))
+		projection(source_img, 'source', generator, device)
+		projection(ref_img, 'ref', generator, device)
+		source = load_source('source', generator, device)
+		ref = load_source('ref', generator, device)
+		e4e_source_out, _ = generator(source, randomize_noise=False)
+		e4e_ref_out, _ = generator(ref, randomize_noise=False)
+		e4e_source_M = compute_M(source, device='cpu')
+		e4e_ref_M = compute_M(ref, device='cpu')
+		e4e_max_M = torch.max(e4e_source_M.expand_as(e4e_ref_M), e4e_ref_M)
+		e4e_max_M = add_pose(e4e_max_M, labels2idx)
+		e4e_idx = labels2idx['hair']
+		e4e_part_M = e4e_max_M[:, e4e_idx].to(device)
+		e4e_part_M_mask = style2list(e4e_part_M)
+		e4e_blend = style2list((add_direction(source, ref, e4e_part_M, 1.3)))
+		e4e_source_out = tensor2img(e4e_source_out)
+		e4e_ref_out = tensor2img(e4e_ref_out)
+		# e4e_blend_out = tensor2img(e4e_blend_out)
+		# e4e_lpips_face, _ = lpips(e4e_blend_out, e4e_source_out)
+		# e4e_ssim_face, _ = ssim(e4e_blend_out, e4e_source_out)
+		# e4e_id_face, _ = id_score(e4e_blend_out, e4e_source_out)
+		# _, e4e_lpips_hair = lpips(e4e_blend_out, e4e_ref_out)
+		# _, e4e_ssim_hair = ssim(e4e_blend_out, e4e_ref_out)
+		# _, e4e_clip_hair = clip(e4e_blend_out, e4e_source_out)
+		keys = (['G.synthesis.b4.conv1.affine', 'G.synthesis.b4.torgb.affine', 'G.synthesis.b8.conv0.affine', 'G.synthesis.b8.conv1.affine', 'G.synthesis.b8.torgb.affine', 'G.synthesis.b16.conv0.affine', 'G.synthesis.b16.conv1.affine', 'G.synthesis.b16.torgb.affine', 'G.synthesis.b32.conv0.affine', 'G.synthesis.b32.conv1.affine', 'G.synthesis.b32.torgb.affine', 'G.synthesis.b64.conv0.affine', 'G.synthesis.b64.conv1.affine', 'G.synthesis.b64.torgb.affine', 'G.synthesis.b128.conv0.affine', 'G.synthesis.b128.conv1.affine', 'G.synthesis.b128.torgb.affine', 'G.synthesis.b256.conv0.affine', 'G.synthesis.b256.conv1.affine', 'G.synthesis.b256.torgb.affine', 'G.synthesis.b512.conv0.affine', 'G.synthesis.b512.conv1.affine', 'G.synthesis.b512.torgb.affine', 'G.synthesis.b1024.conv0.affine', 'G.synthesis.b1024.conv1.affine', 'G.synthesis.b1024.torgb.affine'])
+		test_dict = dict(zip(keys, e4e_blend))
+		manipulator_list = []
+		manipulator_list.append(test_dict)
+		all_imgs = manipulator.synthesis_from_styles(manipulator_list, 0, 1)
+		PTI_outstr = 'PTI_outstr'
+		blend_out = tensor2img(all_imgs[0])
+		return e4e_source_out, e4e_ref_out, blend_out, PTI_outstr
+	# 	_, _, e4e_blend_hair_mask = lpips.parser(e4e_blend_out)
+	# 	blend_out_np = np.array(blend_out)
+	# 	blend_np =np.array(e4e_blend_out).astype(np.uint8)
+	# 	e4e_blend_hair_mask = e4e_blend_hair_mask.cpu().numpy().astype(np.uint8)*255
+	# 	mask_dilate = cv2.dilate(e4e_blend_hair_mask,
+	# 							kernel=np.ones((50, 50), np.uint8))
+	# 	mask_dilate_blur = cv2.blur(mask_dilate, ksize=(30, 30))
+	# 	mask_dilate_blur = (e4e_blend_hair_mask + (255 - e4e_blend_hair_mask) / 255 * mask_dilate_blur).astype(np.uint8)
+	# 	face_mask = 255 - mask_dilate_blur
+	# 	index = np.where(face_mask > 0)
+	# 	cy = (np.min(index[0]) + np.max(index[0])) // 2
+	# 	cx = (np.min(index[1]) + np.max(index[1])) // 2
+	# 	center = (cx, cy)
+	# 	clone_out = cv2.seamlessClone(blend_out_np, blend_np, face_mask, center, cv2.NORMAL_CLONE)
+	# 	out_str = f'lpips_face: {lpips_face}\nlpips_hair: {lpips_hair}\nssim_face: {ssim_face}\nssim_hair: {ssim_hair}\nid_face: {id_face}\n clip_hair: {clip_hair}'
+	# 	seg_out = torch.tensor(face_mask).float().unsqueeze(-1).repeat(1,1,3)
+	# 	seg_out = seg_out.cpu().numpy().astype(np.uint8)
+	# 	# seg_out*=255
+	# 	seg_out = Image.fromarray(seg_out)
+	# 	# return source_out, ref_out, blend_out, out_str, e4e_source_out, e4e_ref_out, e4e_blend_out, e4e_out_str, clone_out, seg_out
+	# ## Set source_tensor requires_grad=True
+	# source_tensor.requires_grad = True
+	# ref_tensor.requires_grad = True
+	# ckpt = 'pretrained/ffhq.pkl'
+	# G = Generator_wrapper(ckpt, device)
+	# manipulator = Manipulator(G, device)
+	# manipulator.set_real_img_projection(source_img_path, inv_mode='w+', pti_mode='s')
+	# blend = style2list((add_direction(source_tensor, ref_tensor, part_M, 1.3)))
+	# keys = (['G.synthesis.b4.conv1.affine', 'G.synthesis.b4.torgb.affine', 'G.synthesis.b8.conv0.affine', 'G.synthesis.b8.conv1.affine', 'G.synthesis.b8.torgb.affine', 'G.synthesis.b16.conv0.affine', 'G.synthesis.b16.conv1.affine', 'G.synthesis.b16.torgb.affine', 'G.synthesis.b32.conv0.affine', 'G.synthesis.b32.conv1.affine', 'G.synthesis.b32.torgb.affine', 'G.synthesis.b64.conv0.affine', 'G.synthesis.b64.conv1.affine', 'G.synthesis.b64.torgb.affine', 'G.synthesis.b128.conv0.affine', 'G.synthesis.b128.conv1.affine', 'G.synthesis.b128.torgb.affine', 'G.synthesis.b256.conv0.affine', 'G.synthesis.b256.conv1.affine', 'G.synthesis.b256.torgb.affine', 'G.synthesis.b512.conv0.affine', 'G.synthesis.b512.conv1.affine', 'G.synthesis.b512.torgb.affine', 'G.synthesis.b1024.conv0.affine', 'G.synthesis.b1024.conv1.affine', 'G.synthesis.b1024.torgb.affine'])
+	# test_dict = dict(zip(keys, blend))
+	# manipulator_list = []
+	# manipulator_list.append(test_dict)
+	# all_imgs = manipulator.synthesis_from_styles(manipulator_list, 0, 1)
+	# return source_out, ref_out, blend_out, out_str, e4e_source_out, e4e_ref_out, e4e_blend_out, e4e_out_str, clone_out, all_imgs
+## argument for choosing encoder between e4e and hyperstyle
+args = argparse.ArgumentParser()
+args.add_argument('--encoder', type=str, default='hyperstyle')
+opt = args.parse_args()
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+lpips = FaceMetric(metric_type='lpips', device=device)
+ssim = FaceMetric(metric_type='ms-ssim', device=device)
+id_score = FaceMetric(metric_type='id', device=device)
+clip = FaceMetric(metric_type='cliphair', device=device)
+# generator = Generator(1024, 512, 8, channel_multiplier=2).to(device).eval()
+# ckpt = torch.load('stylegan2-ffhq-config-f.pt', map_location=lambda storage, loc: storage)
+# generator.load_state_dict(ckpt['g_ema'], strict=False)
+generator = Generator(1024, 512, 8, channel_multiplier=2).to(device).eval()
+ckpt = torch.load('stylegan2-ffhq-config-f.pt', map_location=lambda storage, loc: storage)
+generator.load_state_dict(ckpt['g_ema'], strict=False)
+ckpt = 'pretrained/ffhq.pkl'
+G = Generator_wrapper(ckpt, device)
+manipulator = Manipulator(G, device)
+if opt.encoder == 'e4e':
+	from util import align_face
+	model_path = 'e4e_ffhq_encode.pt'
+	ensure_checkpoint_exists(model_path)
+	ckpt = torch.load(model_path, map_location='cpu')
+	opts = ckpt['opts']
+	opts['checkpoint_path'] = model_path
+	opts= Namespace(**opts)
+	net = pSp(opts, device).eval().to(device)
+elif opt.encoder == 'hyperstyle':
+	from hyperstyle.scripts.align_faces_parallel import align_face
+	model_path = 'pretrained_models/hyperstyle_ffhq.pt'
+	predictor = dlib.shape_predictor('pretrained_models/shape_predictor_68_face_landmarks.dat')
+	net, _ = load_model(model_path)
+else:
+	raise ValueError('invalid encoder')
+truncation = 0.5
+stop_idx = 11
+n_clusters = 18
+clusterer = pickle.load(open('catalog.pkl', 'rb'))
+labels2idx = {
+	'nose': 0,
+	'eyes': 1,
+	'mouth': 2,
+	'hair': 3,
+	'background': 4,
+	'cheek': 5,
+	'neck': 6,
+	'clothes': 7,
+}
+labels_map = {
+	0: torch.tensor([7]),
+	1: torch.tensor([1,6]),
+	2: torch.tensor([4]),
+	3: torch.tensor([0,3,5,8,10,15,16]),
+	4: torch.tensor([11,13,14]),
+	5: torch.tensor([9]),
+	6: torch.tensor([17]),
+	7: torch.tensor([2,12]),
+}
+lables2idx = dict((v,k) for k,v in labels2idx.items())
+n_class = len(lables2idx)
+segid_map = dict.fromkeys(labels_map[0].tolist(), 0)
+segid_map.update(dict.fromkeys(labels_map[1].tolist(), 1))
+segid_map.update(dict.fromkeys(labels_map[2].tolist(), 2))
+segid_map.update(dict.fromkeys(labels_map[3].tolist(), 3))
+segid_map.update(dict.fromkeys(labels_map[4].tolist(), 4))
+segid_map.update(dict.fromkeys(labels_map[5].tolist(), 5))
+segid_map.update(dict.fromkeys(labels_map[6].tolist(), 6))
+segid_map.update(dict.fromkeys(labels_map[7].tolist(), 7))
+torch.manual_seed(0)
+transform = transforms.Compose(
+	[
+		transforms.Resize(256),
+		transforms.CenterCrop(256),
+		transforms.ToTensor(),
+		transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
+	]
+)
+topil = transforms.Compose(
+	[
+		transforms.Normalize([-1, -1, -1], [2, 2, 2]),
+		transforms.ToPILImage(),
+		transforms.Resize(1024),
+	]
+)
+e4e_ris_demo = gr.Interface(hair_transfer_e4e, inputs=[gr.Image(type='filepath'),gr.Image(type='filepath')], outputs=["image","image","image","text"])
+hyperstyle_ris_demo = gr.Interface(hair_transfer_hyperstyle, inputs=[gr.Image(type='filepath'),gr.Image(type='filepath')], outputs=["image","image","image","text", "image"])
+PTI_ris_demo = gr.Interface(hair_transfer_PTI, inputs=[gr.Image(type='filepath'),gr.Image(type='filepath')], outputs=["image","image","image","text"])
+ris_demo = gr.TabbedInterface(interface_list = [hyperstyle_ris_demo,e4e_ris_demo, PTI_ris_demo], tab_names=["hyperstyle", "e4e", "PTI"])
+ris_demo.launch(share=True)

gradio_wrapper/gradio_options.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import sys
+import os
+sys.path.append(".")
+sys.path.append("..")
+from argparse import ArgumentParser
+class GradioTestOptions:
+	def __init__(self):
+		self.parser = ArgumentParser()
+		self.initialize()
+	def initialize(self):
+		# arguments for inference script
+		self.parser.add_argument('--checkpoint_path', default=None, type=str, help='Path to model checkpoint')
+		self.parser.add_argument('--mapper_type', default='LevelsMapper', type=str, help='Which mapper to use')
+		self.parser.add_argument('--no_coarse_mapper', default=False, action="store_true")
+		self.parser.add_argument('--no_medium_mapper', default=False, action="store_true")
+		self.parser.add_argument('--no_fine_mapper', default=False, action="store_true")
+		self.parser.add_argument('--use_weight_delta_mapper', default=False, action="store_true")
+		self.parser.add_argument('--stylegan_size', default=1024, type=int)
+		self.parser.add_argument('--alpha', default=4.1, type=float, help='Alpha to use for weight delta')
+		self.parser.add_argument('--beta', default=0.14, type=float, help='Beta to use for weight delta')
+		self.parser.add_argument('--edit_weight_delta', default=False, action='store_true', help='Edit the Weight Delta in addition')
+		self.parser.add_argument('--weight_delta_alpha', default=4.1, type=float, help='Alpha to use for weight delta')
+		self.parser.add_argument('--weight_delta_beta', default=0.14, type=float, help='Beta to use for weight delta')
+		self.parser.add_argument("--delta_i_c", type=str, default='./hyperstyle_global_directions/global_directions/ffhq/fs3.npy', help="path to file containing delta_i_c")
+		self.parser.add_argument("--s_statistics", type=str, default='./hyperstyle_global_directions/global_directions/ffhq/S_mean_std', help="path to file containing s statistics")
+		self.parser.add_argument("--text_prompt_templates", default='./hyperstyle_global_directions/global_directions/templates.txt')
+		self.parser.add_argument("--neutral_text", type=str, default="A face with hair")
+		self.parser.add_argument("--target_text", type=str, default=None)
+		#arguments for hyperstyle
+		self.parser.add_argument('--hyperstyle_checkpoint_path', default='./pretrained_models/hyperstyle/hyperstyle_ffhq.pt', type=str, help='Path to HyperStyle model checkpoint')
+		self.parser.add_argument('--resize_outputs', action='store_true', help='Whether to resize outputs to 256x256 or keep at original output resolution')
+		# arguments for loading pre-trained encoder
+		self.parser.add_argument('--load_w_encoder', action='store_true', help='Whether to load the w e4e encoder.')
+		self.parser.add_argument('--w_encoder_checkpoint_path', default='./pretrained_models/hyperstyle/faces_w_encoder.pt', type=str, help='Path to pre-trained W-encoder.')
+		self.parser.add_argument('--w_encoder_type', default='WEncoder', help='Encoder type for the encoder used to get the initial inversion')
+		# arguments for iterative inference
+		self.parser.add_argument('--n_iters_per_batch', default=5, type=int, help='Number of forward passes per batch during training.')
+		#arguments to test dataset
+		self.parser.add_argument('--work_in_stylespace', default=False, action='store_true')
+	def parse(self, args=None):
+		opts = self.parser.parse_args(args)
+		return opts