BlendGAN / psp_encoder /psp_encoders.py
Ahsen Khaliq
Update psp_encoder/psp_encoders.py
4a4961c
raw
history blame
5.41 kB
import numpy as np
import torch
import torch.nn.functional as F
from torch import nn
from torch.nn import Conv2d, BatchNorm2d, PReLU, Sequential, Module
import math
from .helpers import get_blocks, bottleneck_IR, bottleneck_IR_SE
import sys, os
sys.path.append(os.path.dirname(__file__) + os.sep + '../')
from model import EqualLinear
"""
Modified from [pSp](https://github.com/eladrich/pixel2style2pixel)
"""
class GradualStyleBlock(Module):
def __init__(self, in_c, out_c, spatial):
super(GradualStyleBlock, self).__init__()
self.out_c = out_c
self.spatial = spatial
num_pools = int(np.log2(spatial))
modules = []
modules += [Conv2d(in_c, out_c, kernel_size=3, stride=2, padding=1),
nn.LeakyReLU()]
for i in range(num_pools - 1):
modules += [
Conv2d(out_c, out_c, kernel_size=3, stride=2, padding=1),
nn.LeakyReLU()
]
self.convs = nn.Sequential(*modules)
self.linear = EqualLinear(out_c, out_c, lr_mul=1)
def forward(self, x):
x = self.convs(x)
x = x.view(-1, self.out_c)
x = self.linear(x)
return x
class GradualStyleEncoder(Module):
def __init__(self, num_layers, mode='ir', n_styles=18):
super(GradualStyleEncoder, self).__init__()
assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152'
assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se'
blocks = get_blocks(num_layers)
if mode == 'ir':
unit_module = bottleneck_IR
elif mode == 'ir_se':
unit_module = bottleneck_IR_SE
self.input_layer = Sequential(Conv2d(3, 64, (3, 3), 1, 1, bias=False),
BatchNorm2d(64),
PReLU(64))
modules = []
for block in blocks:
for bottleneck in block:
modules.append(unit_module(bottleneck.in_channel,
bottleneck.depth,
bottleneck.stride))
self.body = Sequential(*modules)
self.styles = nn.ModuleList()
self.style_count = n_styles # opts.n_styles
self.coarse_ind = 3
self.middle_ind = 7
for i in range(self.style_count):
if i < self.coarse_ind:
style = GradualStyleBlock(512, 512, 16)
elif i < self.middle_ind:
style = GradualStyleBlock(512, 512, 32)
else:
style = GradualStyleBlock(512, 512, 64)
self.styles.append(style)
self.latlayer1 = nn.Conv2d(256, 512, kernel_size=1, stride=1, padding=0)
self.latlayer2 = nn.Conv2d(128, 512, kernel_size=1, stride=1, padding=0)
def _upsample_add(self, x, y):
'''Upsample and add two feature maps.
Args:
x: (Variable) top feature map to be upsampled.
y: (Variable) lateral feature map.
Returns:
(Variable) added feature map.
Note in PyTorch, when input size is odd, the upsampled feature map
with `F.upsample(..., scale_factor=2, mode='nearest')`
maybe not equal to the lateral feature map size.
e.g.
original input size: [N,_,15,15] ->
conv2d feature map size: [N,_,8,8] ->
upsampled feature map size: [N,_,16,16]
So we choose bilinear upsample which supports arbitrary output sizes.
'''
_, _, H, W = y.size()
return F.interpolate(x, size=(H, W), mode='bilinear', align_corners=True) + y
def forward(self, x):
x = self.input_layer(x)
latents = []
modulelist = list(self.body._modules.values())
for i, l in enumerate(modulelist):
x = l(x)
if i == 6:
c1 = x
elif i == 20:
c2 = x
elif i == 23:
c3 = x
for j in range(self.coarse_ind):
latents.append(self.styles[j](c3))
p2 = self._upsample_add(c3, self.latlayer1(c2))
for j in range(self.coarse_ind, self.middle_ind):
latents.append(self.styles[j](p2))
p1 = self._upsample_add(p2, self.latlayer2(c1))
for j in range(self.middle_ind, self.style_count):
latents.append(self.styles[j](p1))
out = torch.stack(latents, dim=1)
return out
def get_keys(d, name):
if 'state_dict' in d:
d = d['state_dict']
d_filt = {k[len(name) + 1:]: v for k, v in d.items() if k[:len(name)] == name}
return d_filt
class PSPEncoder(Module):
def __init__(self, encoder_ckpt_path, output_size=1024):
super(PSPEncoder, self).__init__()
n_styles = int(math.log(output_size, 2)) * 2 - 2
self.encoder = GradualStyleEncoder(50, 'ir_se', n_styles)
print('Loading psp encoders weights from irse50!')
encoder_ckpt = torch.load(encoder_ckpt_path, map_location='cpu')
self.encoder.load_state_dict(get_keys(encoder_ckpt, 'encoder'), strict=True)
self.latent_avg = encoder_ckpt['latent_avg']
self.face_pool = torch.nn.AdaptiveAvgPool2d((256, 256))
def forward(self, x):
x = self.face_pool(x)
codes = self.encoder(x)
codes = codes + self.latent_avg.repeat(codes.shape[0], 1, 1)
return codes