Spaces:

PKUWilliamYang
/

VToonify

Starting on T4

VToonify / vtoonify /model /encoder /encoders /psp_encoders.py

983684c about 2 years ago

7.57 kB

	import numpy as np
	import torch
	import torch.nn.functional as F
	from torch import nn
	from torch.nn import Linear, Conv2d, BatchNorm2d, PReLU, Sequential, Module

	from model.encoder.encoders.helpers import get_blocks, Flatten, bottleneck_IR, bottleneck_IR_SE
	from model.stylegan.model import EqualLinear


	class GradualStyleBlock(Module):
	def __init__(self, in_c, out_c, spatial):
	super(GradualStyleBlock, self).__init__()
	self.out_c = out_c
	self.spatial = spatial
	num_pools = int(np.log2(spatial))
	modules = []
	modules += [Conv2d(in_c, out_c, kernel_size=3, stride=2, padding=1),
	nn.LeakyReLU()]
	for i in range(num_pools - 1):
	modules += [
	Conv2d(out_c, out_c, kernel_size=3, stride=2, padding=1),
	nn.LeakyReLU()
	]
	self.convs = nn.Sequential(*modules)
	self.linear = EqualLinear(out_c, out_c, lr_mul=1)

	def forward(self, x):
	x = self.convs(x)
	x = x.view(-1, self.out_c)
	x = self.linear(x)
	return x


	class GradualStyleEncoder(Module):
	def __init__(self, num_layers, mode='ir', opts=None):
	super(GradualStyleEncoder, self).__init__()
	assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152'
	assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se'
	blocks = get_blocks(num_layers)
	if mode == 'ir':
	unit_module = bottleneck_IR
	elif mode == 'ir_se':
	unit_module = bottleneck_IR_SE
	self.input_layer = Sequential(Conv2d(opts.input_nc, 64, (3, 3), 1, 1, bias=False),
	BatchNorm2d(64),
	PReLU(64))
	modules = []
	for block in blocks:
	for bottleneck in block:
	modules.append(unit_module(bottleneck.in_channel,
	bottleneck.depth,
	bottleneck.stride))
	self.body = Sequential(*modules)

	self.styles = nn.ModuleList()
	self.style_count = opts.n_styles
	self.coarse_ind = 3
	self.middle_ind = 7
	for i in range(self.style_count):
	if i < self.coarse_ind:
	style = GradualStyleBlock(512, 512, 16)
	elif i < self.middle_ind:
	style = GradualStyleBlock(512, 512, 32)
	else:
	style = GradualStyleBlock(512, 512, 64)
	self.styles.append(style)
	self.latlayer1 = nn.Conv2d(256, 512, kernel_size=1, stride=1, padding=0)
	self.latlayer2 = nn.Conv2d(128, 512, kernel_size=1, stride=1, padding=0)

	def _upsample_add(self, x, y):
	'''Upsample and add two feature maps.
	Args:
	x: (Variable) top feature map to be upsampled.
	y: (Variable) lateral feature map.
	Returns:
	(Variable) added feature map.
	Note in PyTorch, when input size is odd, the upsampled feature map
	with `F.upsample(..., scale_factor=2, mode='nearest')`
	maybe not equal to the lateral feature map size.
	e.g.
	original input size: [N,_,15,15] ->
	conv2d feature map size: [N,_,8,8] ->
	upsampled feature map size: [N,_,16,16]
	So we choose bilinear upsample which supports arbitrary output sizes.
	'''
	_, _, H, W = y.size()
	return F.interpolate(x, size=(H, W), mode='bilinear', align_corners=True) + y

	def forward(self, x):
	x = self.input_layer(x)

	latents = []
	modulelist = list(self.body._modules.values())
	for i, l in enumerate(modulelist):
	x = l(x)
	if i == 6:
	c1 = x
	elif i == 20:
	c2 = x
	elif i == 23:
	c3 = x

	for j in range(self.coarse_ind):
	latents.append(self.styles[j](c3))

	p2 = self._upsample_add(c3, self.latlayer1(c2))
	for j in range(self.coarse_ind, self.middle_ind):
	latents.append(self.styles[j](p2))

	p1 = self._upsample_add(p2, self.latlayer2(c1))
	for j in range(self.middle_ind, self.style_count):
	latents.append(self.styles[j](p1))

	out = torch.stack(latents, dim=1)
	return out


	class BackboneEncoderUsingLastLayerIntoW(Module):
	def __init__(self, num_layers, mode='ir', opts=None):
	super(BackboneEncoderUsingLastLayerIntoW, self).__init__()
	print('Using BackboneEncoderUsingLastLayerIntoW')
	assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152'
	assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se'
	blocks = get_blocks(num_layers)
	if mode == 'ir':
	unit_module = bottleneck_IR
	elif mode == 'ir_se':
	unit_module = bottleneck_IR_SE
	self.input_layer = Sequential(Conv2d(opts.input_nc, 64, (3, 3), 1, 1, bias=False),
	BatchNorm2d(64),
	PReLU(64))
	self.output_pool = torch.nn.AdaptiveAvgPool2d((1, 1))
	self.linear = EqualLinear(512, 512, lr_mul=1)
	modules = []
	for block in blocks:
	for bottleneck in block:
	modules.append(unit_module(bottleneck.in_channel,
	bottleneck.depth,
	bottleneck.stride))
	self.body = Sequential(*modules)

	def forward(self, x):
	x = self.input_layer(x)
	x = self.body(x)
	x = self.output_pool(x)
	x = x.view(-1, 512)
	x = self.linear(x)
	return x


	class BackboneEncoderUsingLastLayerIntoWPlus(Module):
	def __init__(self, num_layers, mode='ir', opts=None):
	super(BackboneEncoderUsingLastLayerIntoWPlus, self).__init__()
	print('Using BackboneEncoderUsingLastLayerIntoWPlus')
	assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152'
	assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se'
	blocks = get_blocks(num_layers)
	if mode == 'ir':
	unit_module = bottleneck_IR
	elif mode == 'ir_se':
	unit_module = bottleneck_IR_SE
	self.n_styles = opts.n_styles
	self.input_layer = Sequential(Conv2d(opts.input_nc, 64, (3, 3), 1, 1, bias=False),
	BatchNorm2d(64),
	PReLU(64))
	self.output_layer_2 = Sequential(BatchNorm2d(512),
	torch.nn.AdaptiveAvgPool2d((7, 7)),
	Flatten(),
	Linear(512 * 7 * 7, 512))
	self.linear = EqualLinear(512, 512 * self.n_styles, lr_mul=1)
	modules = []
	for block in blocks:
	for bottleneck in block:
	modules.append(unit_module(bottleneck.in_channel,
	bottleneck.depth,
	bottleneck.stride))
	self.body = Sequential(*modules)

	def forward(self, x):
	x = self.input_layer(x)
	x = self.body(x)
	x = self.output_layer_2(x)
	x = self.linear(x)
	x = x.view(-1, self.n_styles, 512)
	return x