Spaces:

dongyi
/

MMFS

Sleeping

App Files Files Community

MMFS / app.py

dongyi

Update app.py

582610a almost 2 years ago

raw

history blame contribute delete

12.1 kB

	import gradio as gr
	from PIL import Image

	import os
	import cv2
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from models.modules.stylegan2.model import StyledConv, ToRGB, EqualLinear, ResBlock, ConvLayer, PixelNorm

	from utils.util import *
	from utils.data_utils import Transforms
	from data import CustomDataLoader
	from data.super_dataset import SuperDataset
	from configs import parse_config
	from utils.augmentation import ImagePathToImage
	import clip
	from torchvision.transforms import Compose, Resize, ToTensor, Normalize, InterpolationMode
	from models.style_based_pix2pixII_model import CLIPFeats2Wplus


	class Stylizer(nn.Module):

	def __init__(self, ngf=64, phase=2, model_weights=None):
	super(Stylizer, self).__init__()

	# encoder
	self.encoder = nn.Sequential(
	ConvLayer(3, ngf, 3), # 512
	ResBlock(ngf * 1, ngf * 1), # 256
	ResBlock(ngf * 1, ngf * 2), # 128
	ResBlock(ngf * 2, ngf * 4), # 64
	ResBlock(ngf * 4, ngf * 8), # 32
	ConvLayer(ngf * 8, ngf * 8, 3) # 32
	)

	# mapping network
	self.mapping_z = nn.Sequential(*([ PixelNorm() ] + [ EqualLinear(512, 512, activation='fused_lrelu', lr_mul=0.01) for _ in range(8) ]))

	# style-based decoder
	channels = {
	32 : ngf * 8,
	64 : ngf * 8,
	128: ngf * 4,
	256: ngf * 2,
	512: ngf * 1
	}
	self.decoder0 = StyledConv(channels[32], channels[32], 3, 512)
	self.to_rgb0 = ToRGB(channels[32], 512, upsample=False)
	for i in range(4):
	ichan = channels[2 ** (i + 5)]
	ochan = channels[2 ** (i + 6)]
	setattr(self, f'decoder{i + 1}a', StyledConv(ichan, ochan, 3, 512, upsample=True))
	setattr(self, f'decoder{i + 1}b', StyledConv(ochan, ochan, 3, 512))
	setattr(self, f'to_rgb{i + 1}', ToRGB(ochan, 512))
	self.n_latent = 10

	# random style for testing
	self.test_z = torch.randn(1, 512)

	# load pretrained model weights

	if phase == 2:
	# load pretrained encoder and stylegan2 decoder
	self.load_state_dict(model_weights)
	if phase == 3:
	self.clip_mapper = CLIPFeats2Wplus(n_tokens=self.n_latent)
	# load pretraned base model and freeze all params except clip mapper
	self.load_state_dict(model_weights, strict=False)
	params = dict(self.named_parameters())
	for k in params.keys():
	if 'clip_mapper' in k:
	print(f'{k} not freezed !')
	continue
	params[k].requires_grad = False

	def get_styles(self, x, **kwargs):
	if len(kwargs) == 0:
	return self.mapping_z(self.test_z.to(x.device).repeat(x.shape[0], 1)).repeat(self.n_latent, 1, 1)
	elif 'mixing' in kwargs and kwargs['mixing']:
	w0 = self.mapping_z(torch.randn(x.shape[0], 512, device=x.device))
	w1 = self.mapping_z(torch.randn(x.shape[0], 512, device=x.device))
	inject_index = random.randint(1, self.n_latent - 1)
	return torch.cat([
	w0.repeat(inject_index, 1, 1),
	w1.repeat(self.n_latent - inject_index, 1, 1)
	])
	elif 'z' in kwargs:
	return self.mapping_z(kwargs['z']).repeat(self.n_latent, 1, 1)
	elif 'clip_feats' in kwargs:
	return self.clip_mapper(kwargs['clip_feats'])
	else:
	z = torch.randn(x.shape[0], 512, device=x.device)
	return self.mapping_z(z).repeat(self.n_latent, 1, 1)

	def forward(self, x, **kwargs):
	# encode
	feat = self.encoder(x)

	# get style code
	styles = self.get_styles(x, **kwargs)

	# style-based generate
	feat = self.decoder0(feat, styles[0])
	out = self.to_rgb0(feat, styles[1])
	for i in range(4):
	feat = getattr(self, f'decoder{i + 1}a')(feat, styles[i * 2 + 1])
	feat = getattr(self, f'decoder{i + 1}b')(feat, styles[i * 2 + 2])
	out = getattr(self, f'to_rgb{i + 1}')(feat, styles[i * 2 + 3], out)

	return F.hardtanh(out)

	def tensor2file(input_image):

	if not isinstance(input_image, np.ndarray):
	if isinstance(input_image, torch.Tensor): # get the data from a variable
	image_tensor = input_image.data
	else:
	return input_image
	image_numpy = image_tensor[0].cpu().float().numpy() # convert it into a numpy array
	if image_numpy.shape[0] == 1: # grayscale to RGB
	image_numpy = np.tile(image_numpy, (3, 1, 1))
	image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0 # post-processing: tranpose and scaling
	else: # if it is a numpy array, do nothing
	image_numpy = input_image

	if image_numpy.shape[2] <= 3:
	image_numpy = image_numpy.astype(np.uint8)
	image_pil = Image.fromarray(image_numpy)
	return image_pil
	else:
	return image_pil


	device = "cuda"
	def generate_multi_model(input_img):

	# parse config
	config = parse_config("./exp/sp2pII-phase2.yaml")


	# hard-code some parameters for test
	config['common']['phase'] = "test"
	config['dataset']['n_threads'] = 0 # test code only supports num_threads = 0
	config['dataset']['batch_size'] = 1 # test code only supports batch_size = 1
	config['dataset']['serial_batches'] = True # disable data shuffling; comment this line if results on randomly chosen images are needed.
	config['dataset']['no_flip'] = True # no flip; comment this line if results on flipped images are needed.

	# override data augmentation
	config['dataset']['load_size'] = config['testing']['load_size']
	config['dataset']['crop_size'] = config['testing']['crop_size']
	config['dataset']['preprocess'] = config['testing']['preprocess']

	config['training']['pretrained_model'] = "./pretrained_models/phase2_pretrain_90000.pth"

	# add testing path
	config['testing']['test_img'] = input_img
	config['testing']['test_video'] = None
	config['testing']['test_folder'] = None

	dataset = SuperDataset(config)
	dataloader = CustomDataLoader(config, dataset)

	model_dict = torch.load("./pretrained_models/phase2_pretrain_90000.pth", map_location='cpu')

	# init netG
	model = Stylizer(ngf=config['model']['ngf'], phase=2, model_weights=model_dict['G_ema_model']).to(device)

	for data in dataloader:

	real_A = data['test_A'].to(device)
	fake_B = model(real_A, mixing=False)
	output_img = tensor2file(fake_B) # get image results

	return output_img


	def generate_one_shot(src_img, img_prompt):

	# init model
	state_dict = torch.load(f"./checkpoints/{img_prompt[-2:]}/epoch_latest.pth", map_location='cpu')
	model = Stylizer(ngf=64, phase=3, model_weights=state_dict['G_ema_model'])
	model.to(device)
	model.eval()
	model.requires_grad_(False)

	clip_model, img_preprocess = clip.load('ViT-B/32', device=device)
	clip_model.eval()
	clip_model.requires_grad_(False)

	# image transform for stylizer
	img_transform = Compose([
	Resize((512, 512), interpolation=InterpolationMode.LANCZOS),
	ToTensor(),
	Normalize([0.5], [0.5])
	])

	# get clip features
	with torch.no_grad():
	img = img_preprocess(Image.open(f"./example/reference/{img_prompt[-2:]}.png")).unsqueeze(0).to(device)
	clip_feats = clip_model.encode_image(img)
	clip_feats /= clip_feats.norm(dim=1, keepdim=True)


	# load image & to tensor
	img = Image.open(src_img)
	if not img.mode == 'RGB':
	img = img.convert('RGB')
	img = img_transform(img).unsqueeze(0).to(device)

	# stylize it !
	with torch.no_grad():
	res = model(img, clip_feats=clip_feats)

	output_img = tensor2file(res) # get image results
	return output_img


	def generate_zero_shot(src_img, txt_prompt):
	# init model
	state_dict = torch.load(f"./checkpoints/{txt_prompt.replace(' ', '_')}/epoch_latest.pth", map_location='cpu')
	model = Stylizer(ngf=64, phase=3, model_weights=state_dict['G_ema_model'])
	model.to(device)
	model.eval()
	model.requires_grad_(False)

	clip_model, img_preprocess = clip.load('ViT-B/32', device=device)
	clip_model.eval()
	clip_model.requires_grad_(False)

	# image transform for stylizer
	img_transform = Compose([
	Resize((512, 512), interpolation=InterpolationMode.LANCZOS),
	ToTensor(),
	Normalize([0.5], [0.5])
	])

	# get clip features
	with torch.no_grad():
	text = clip.tokenize(txt_prompt).to(device)
	clip_feats = clip_model.encode_text(text)
	clip_feats /= clip_feats.norm(dim=1, keepdim=True)


	# load image & to tensor
	img = Image.open(src_img)
	if not img.mode == 'RGB':
	img = img.convert('RGB')
	img = img_transform(img).unsqueeze(0).to(device)

	# stylize it !
	with torch.no_grad():
	res = model(img, clip_feats=clip_feats)

	output_img = tensor2file(res) # get image results
	return output_img


	with gr.Blocks() as demo:
	# 顶部文字
	gr.Markdown("# MMFS")

	# 多个tab
	with gr.Tabs():

	with gr.TabItem("Multi-Model"):
	multi_input_img = gr.Image(label="Upload Input Face Image", type='filepath', height=400)
	gr.Examples(examples=["./example/source/01.png", "./example/source/02.png", "./example/source/03.png", "./example/source/04.png"], inputs=multi_input_img)
	multi_model_button = gr.Button("Random Stylize")

	multi_output_img = gr.Image(label="Output Image", height=400)


	with gr.TabItem("One-Shot"):
	one_shot_src_img = gr.Image(label="Upload Input Face Image", type='filepath', height=400)
	gr.Examples(examples=["./example/source/01.png", "./example/source/02.png", "./example/source/03.png", "./example/source/04.png"], inputs=one_shot_src_img)
	with gr.Row():
	gr.Image(shape=(100, 100), value = Image.open("example/reference/01.png"), type='pil', label="ref01")
	gr.Image(shape=(100, 100), value = Image.open("example/reference/02.png"), type='pil', label="ref02")
	gr.Image(shape=(100, 100), value = Image.open("example/reference/03.png"), type='pil', label="ref03")
	gr.Image(shape=(100, 100), value = Image.open("example/reference/04.png"), type='pil', label="ref04")

	one_shot_ref_img = gr.Radio(['ref01','ref02','ref03','ref04'],value="ref01", label="Select a reference style image")

	one_shot_test_button = gr.Button("Stylize Image")

	one_shot_output_img = gr.Image(label="Output Image", height=400)


	with gr.TabItem("Zero-Shot"):
	zero_shot_src_img = gr.Image(label="Upload Input Face Image", type='filepath', height=400)
	gr.Examples(examples=["./example/source/01.png", "./example/source/02.png", "./example/source/03.png", "./example/source/04.png"], inputs=zero_shot_src_img)
	zero_shot_ref_prompt = gr.Dropdown(
	label="Txt Prompt",
	info="Select a reference style prompt",
	choices=[
	"pop art",
	"watercolor painting",
	],
	max_choices=1,
	value="pop art",
	)

	zero_shot_test_button = gr.Button("Stylize Image")

	zero_shot_output_img = gr.Image(label="Output Image", height=400)


	multi_model_button.click(fn=generate_multi_model, inputs=multi_input_img, outputs=multi_output_img)
	one_shot_test_button.click(fn=generate_one_shot, inputs=[one_shot_src_img, one_shot_ref_img], outputs=one_shot_output_img)
	zero_shot_test_button.click(fn=generate_zero_shot, inputs=[zero_shot_src_img, zero_shot_ref_prompt], outputs=zero_shot_output_img)

	demo.queue(max_size=20)
	demo.launch()