Spaces:

PKUWilliamYang
/

VToonify

Running on T4

983684c over 1 year ago

No virus

7.6 kB

	import numpy as np
	import matplotlib.pyplot as plt
	from PIL import Image
	import cv2
	import random
	import math
	import argparse
	import torch
	from torch.utils import data
	from torch.nn import functional as F
	from torch import autograd
	from torch.nn import init
	import torchvision.transforms as transforms
	from model.stylegan.op import conv2d_gradfix
	from model.encoder.encoders.psp_encoders import GradualStyleEncoder
	from model.encoder.align_all_parallel import get_landmark

	def visualize(img_arr, dpi):
	plt.figure(figsize=(10,10),dpi=dpi)
	plt.imshow(((img_arr.detach().cpu().numpy().transpose(1, 2, 0) + 1.0) * 127.5).astype(np.uint8))
	plt.axis('off')
	plt.show()

	def save_image(img, filename):
	tmp = ((img.detach().cpu().numpy().transpose(1, 2, 0) + 1.0) * 127.5).astype(np.uint8)
	cv2.imwrite(filename, cv2.cvtColor(tmp, cv2.COLOR_RGB2BGR))

	def load_image(filename):
	transform = transforms.Compose([
	transforms.ToTensor(),
	transforms.Normalize(mean=[0.5, 0.5, 0.5],std=[0.5,0.5,0.5]),
	])

	img = Image.open(filename)
	img = transform(img)
	return img.unsqueeze(dim=0)

	def data_sampler(dataset, shuffle, distributed):
	if distributed:
	return data.distributed.DistributedSampler(dataset, shuffle=shuffle)

	if shuffle:
	return data.RandomSampler(dataset)

	else:
	return data.SequentialSampler(dataset)


	def requires_grad(model, flag=True):
	for p in model.parameters():
	p.requires_grad = flag


	def accumulate(model1, model2, decay=0.999):
	par1 = dict(model1.named_parameters())
	par2 = dict(model2.named_parameters())

	for k in par1.keys():
	par1[k].data.mul_(decay).add_(par2[k].data, alpha=1 - decay)


	def sample_data(loader):
	while True:
	for batch in loader:
	yield batch


	def d_logistic_loss(real_pred, fake_pred):
	real_loss = F.softplus(-real_pred)
	fake_loss = F.softplus(fake_pred)

	return real_loss.mean() + fake_loss.mean()


	def d_r1_loss(real_pred, real_img):
	with conv2d_gradfix.no_weight_gradients():
	grad_real, = autograd.grad(
	outputs=real_pred.sum(), inputs=real_img, create_graph=True
	)
	grad_penalty = grad_real.pow(2).reshape(grad_real.shape[0], -1).sum(1).mean()

	return grad_penalty


	def g_nonsaturating_loss(fake_pred):
	loss = F.softplus(-fake_pred).mean()

	return loss


	def g_path_regularize(fake_img, latents, mean_path_length, decay=0.01):
	noise = torch.randn_like(fake_img) / math.sqrt(
	fake_img.shape[2] * fake_img.shape[3]
	)
	grad, = autograd.grad(
	outputs=(fake_img * noise).sum(), inputs=latents, create_graph=True
	)
	path_lengths = torch.sqrt(grad.pow(2).sum(2).mean(1))

	path_mean = mean_path_length + decay * (path_lengths.mean() - mean_path_length)

	path_penalty = (path_lengths - path_mean).pow(2).mean()

	return path_penalty, path_mean.detach(), path_lengths


	def make_noise(batch, latent_dim, n_noise, device):
	if n_noise == 1:
	return torch.randn(batch, latent_dim, device=device)

	noises = torch.randn(n_noise, batch, latent_dim, device=device).unbind(0)

	return noises


	def mixing_noise(batch, latent_dim, prob, device):
	if prob > 0 and random.random() < prob:
	return make_noise(batch, latent_dim, 2, device)

	else:
	return [make_noise(batch, latent_dim, 1, device)]


	def set_grad_none(model, targets):
	for n, p in model.named_parameters():
	if n in targets:
	p.grad = None


	def weights_init(m):
	classname = m.__class__.__name__
	if classname.find('BatchNorm2d') != -1:
	if hasattr(m, 'weight') and m.weight is not None:
	init.normal_(m.weight.data, 1.0, 0.02)
	if hasattr(m, 'bias') and m.bias is not None:
	init.constant_(m.bias.data, 0.0)
	elif hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1):
	init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
	if hasattr(m, 'bias') and m.bias is not None:
	init.constant_(m.bias.data, 0.0)


	def load_psp_standalone(checkpoint_path, device='cuda'):
	ckpt = torch.load(checkpoint_path, map_location='cpu')
	opts = ckpt['opts']
	if 'output_size' not in opts:
	opts['output_size'] = 1024
	opts['n_styles'] = int(math.log(opts['output_size'], 2)) * 2 - 2
	opts = argparse.Namespace(**opts)
	psp = GradualStyleEncoder(50, 'ir_se', opts)
	psp_dict = {k.replace('encoder.', ''): v for k, v in ckpt['state_dict'].items() if k.startswith('encoder.')}
	psp.load_state_dict(psp_dict)
	psp.eval()
	psp = psp.to(device)
	latent_avg = ckpt['latent_avg'].to(device)

	def add_latent_avg(model, inputs, outputs):
	return outputs + latent_avg.repeat(outputs.shape[0], 1, 1)

	psp.register_forward_hook(add_latent_avg)
	return psp

	def get_video_crop_parameter(filepath, predictor, padding=[200,200,200,200]):
	if type(filepath) == str:
	img = dlib.load_rgb_image(filepath)
	else:
	img = filepath
	lm = get_landmark(img, predictor)
	if lm is None:
	return None
	lm_chin = lm[0 : 17] # left-right
	lm_eyebrow_left = lm[17 : 22] # left-right
	lm_eyebrow_right = lm[22 : 27] # left-right
	lm_nose = lm[27 : 31] # top-down
	lm_nostrils = lm[31 : 36] # top-down
	lm_eye_left = lm[36 : 42] # left-clockwise
	lm_eye_right = lm[42 : 48] # left-clockwise
	lm_mouth_outer = lm[48 : 60] # left-clockwise
	lm_mouth_inner = lm[60 : 68] # left-clockwise

	scale = 64. / (np.mean(lm_eye_right[:,0])-np.mean(lm_eye_left[:,0]))
	center = ((np.mean(lm_eye_right, axis=0)+np.mean(lm_eye_left, axis=0)) / 2) * scale
	h, w = round(img.shape[0] * scale), round(img.shape[1] * scale)
	left = max(round(center[0] - padding[0]), 0) // 8 * 8
	right = min(round(center[0] + padding[1]), w) // 8 * 8
	top = max(round(center[1] - padding[2]), 0) // 8 * 8
	bottom = min(round(center[1] + padding[3]), h) // 8 * 8
	return h,w,top,bottom,left,right,scale

	def tensor2cv2(img):
	tmp = ((img.cpu().numpy().transpose(1, 2, 0) + 1.0) * 127.5).astype(np.uint8)
	return cv2.cvtColor(tmp, cv2.COLOR_RGB2BGR)

	# get parameters from the stylegan and mark them with their layers
	def gather_params(G):
	params = dict(
	[(res, {}) for res in range(18)] + [("others", {})]
	)
	for n, p in sorted(list(G.named_buffers()) + list(G.named_parameters())):
	if n.startswith("convs"):
	layer = int(n.split(".")[1]) + 1
	params[layer][n] = p
	elif n.startswith("to_rgbs"):
	layer = int(n.split(".")[1]) * 2 + 3
	params[layer][n] = p
	elif n.startswith("conv1"):
	params[0][n] = p
	elif n.startswith("to_rgb1"):
	params[1][n] = p
	else:
	params["others"][n] = p
	return params

	# blend the ffhq stylegan model and the finetuned model for toonify
	# see ``Resolution Dependent GAN Interpolation for Controllable Image Synthesis Between Domains''
	def blend_models(G_low, G_high, weight=[1]7+[0]11):
	params_low = gather_params(G_low)
	params_high = gather_params(G_high)

	for res in range(18):
	for n, p in params_high[res].items():
	params_high[res][n] = params_high[res][n] * (1-weight[res]) + params_low[res][n] * weight[res]

	state_dict = {}
	for _, p in params_high.items():
	state_dict.update(p)

	return state_dict