Spaces:

venite
/

sat3density

Runtime error

App Files Files Community

sat3density / imaginaire /generators /unit.py

venite

initial

f670afc almost 2 years ago

raw

history blame contribute delete

12.2 kB

	# Copyright (C) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
	#
	# This work is made available under the Nvidia Source Code License-NC.
	# To view a copy of this license, check out LICENSE.md
	import warnings

	from torch import nn
	from torch.nn import Upsample as NearestUpsample

	from imaginaire.layers import Conv2dBlock, Res2dBlock


	class Generator(nn.Module):
	r"""Improved UNIT generator.

	Args:
	gen_cfg (obj): Generator definition part of the yaml config file.
	data_cfg (obj): Data definition part of the yaml config file.
	"""

	def __init__(self, gen_cfg, data_cfg):
	super().__init__()
	self.autoencoder_a = AutoEncoder(**vars(gen_cfg))
	self.autoencoder_b = AutoEncoder(**vars(gen_cfg))

	def forward(self, data, image_recon=True, cycle_recon=True):
	r"""UNIT forward function"""
	images_a = data['images_a']
	images_b = data['images_b']
	net_G_output = dict()

	# encode input images into latent code
	content_a = self.autoencoder_a.content_encoder(images_a)
	content_b = self.autoencoder_b.content_encoder(images_b)

	# decode (within domain)
	if image_recon:
	images_aa = self.autoencoder_a.decoder(content_a)
	images_bb = self.autoencoder_b.decoder(content_b)
	net_G_output.update(dict(images_aa=images_aa, images_bb=images_bb))

	# decode (cross domain)
	images_ba = self.autoencoder_a.decoder(content_b)
	images_ab = self.autoencoder_b.decoder(content_a)

	# cycle reconstruction
	if cycle_recon:
	content_ba = self.autoencoder_a.content_encoder(images_ba)
	content_ab = self.autoencoder_b.content_encoder(images_ab)
	images_aba = self.autoencoder_a.decoder(content_ab)
	images_bab = self.autoencoder_b.decoder(content_ba)
	net_G_output.update(
	dict(content_ba=content_ba, content_ab=content_ab,
	images_aba=images_aba, images_bab=images_bab))

	# required outputs
	net_G_output.update(dict(content_a=content_a, content_b=content_b,
	images_ba=images_ba, images_ab=images_ab))

	return net_G_output

	def inference(self, data, a2b=True):
	r"""UNIT inference.

	Args:
	data (dict): Training data at the current iteration.
	- images_a (tensor): Images from domain A.
	- images_b (tensor): Images from domain B.
	a2b (bool): If ``True``, translates images from domain A to B,
	otherwise from B to A.
	"""
	if a2b:
	input_key = 'images_a'
	content_encode = self.autoencoder_a.content_encoder
	decode = self.autoencoder_b.decoder
	else:
	input_key = 'images_b'
	content_encode = self.autoencoder_b.content_encoder
	decode = self.autoencoder_a.decoder

	content_images = data[input_key]
	content = content_encode(content_images)
	output_images = decode(content)
	filename = '%s/%s' % (
	data['key'][input_key]['sequence_name'][0],
	data['key'][input_key]['filename'][0])
	filenames = [filename]
	return output_images, filenames


	class AutoEncoder(nn.Module):
	r"""Improved UNIT autoencoder.

	Args:
	num_filters (int): Base filter numbers.
	max_num_filters (int): Maximum number of filters in the encoder.
	num_res_blocks (int): Number of residual blocks at the end of the
	content encoder.
	num_downsamples_content (int): Number of times we reduce
	resolution by 2x2 for the content image.
	num_image_channels (int): Number of input image channels.
	content_norm_type (str): Type of activation normalization in the
	content encoder.
	decoder_norm_type (str): Type of activation normalization in the
	decoder.
	weight_norm_type (str): Type of weight normalization.
	output_nonlinearity (str): Type of nonlinearity before final output,
	``'tanh'`` or ``'none'``.
	pre_act (bool): If ``True``, uses pre-activation residual blocks.
	apply_noise (bool): If ``True``, injects Gaussian noise in the decoder.
	"""

	def __init__(self,
	num_filters=64,
	max_num_filters=256,
	num_res_blocks=4,
	num_downsamples_content=2,
	num_image_channels=3,
	content_norm_type='instance',
	decoder_norm_type='instance',
	weight_norm_type='',
	output_nonlinearity='',
	pre_act=False,
	apply_noise=False,
	**kwargs):
	super().__init__()
	for key in kwargs:
	if key != 'type':
	warnings.warn(
	"Generator argument '{}' is not used.".format(key))
	self.content_encoder = ContentEncoder(num_downsamples_content,
	num_res_blocks,
	num_image_channels,
	num_filters,
	max_num_filters,
	'reflect',
	content_norm_type,
	weight_norm_type,
	'relu',
	pre_act)
	self.decoder = Decoder(num_downsamples_content,
	num_res_blocks,
	self.content_encoder.output_dim,
	num_image_channels,
	'reflect',
	decoder_norm_type,
	weight_norm_type,
	'relu',
	output_nonlinearity,
	pre_act,
	apply_noise)

	def forward(self, images):
	r"""Reconstruct an image.

	Args:
	images (Tensor): Input images.
	Returns:
	images_recon (Tensor): Reconstructed images.
	"""
	content = self.content_encoder(images)
	images_recon = self.decoder(content)
	return images_recon


	class ContentEncoder(nn.Module):
	r"""Improved UNIT encoder. The network consists of:

	- input layers
	- $(num_downsamples) convolutional blocks
	- $(num_res_blocks) residual blocks.
	- output layer.

	Args:
	num_downsamples (int): Number of times we reduce
	resolution by 2x2.
	num_res_blocks (int): Number of residual blocks at the end of the
	content encoder.
	num_image_channels (int): Number of input image channels.
	num_filters (int): Base filter numbers.
	max_num_filters (int): Maximum number of filters in the encoder.
	padding_mode (string): Type of padding.
	activation_norm_type (str): Type of activation normalization.
	weight_norm_type (str): Type of weight normalization.
	nonlinearity (str): Type of nonlinear activation function.
	pre_act (bool): If ``True``, uses pre-activation residual blocks.
	"""

	def __init__(self,
	num_downsamples,
	num_res_blocks,
	num_image_channels,
	num_filters,
	max_num_filters,
	padding_mode,
	activation_norm_type,
	weight_norm_type,
	nonlinearity,
	pre_act=False):
	super().__init__()
	conv_params = dict(padding_mode=padding_mode,
	activation_norm_type=activation_norm_type,
	weight_norm_type=weight_norm_type,
	nonlinearity=nonlinearity)
	# Whether or not it is safe to use inplace nonlinear activation.
	if not pre_act or (activation_norm_type != '' and
	activation_norm_type != 'none'):
	conv_params['inplace_nonlinearity'] = True

	# The order of operations in residual blocks.
	order = 'pre_act' if pre_act else 'CNACNA'

	model = []
	model += [Conv2dBlock(num_image_channels, num_filters, 7, 1, 3,
	**conv_params)]

	# Downsampling blocks.
	for i in range(num_downsamples):
	num_filters_prev = num_filters
	num_filters = min(num_filters * 2, max_num_filters)
	model += [Conv2dBlock(num_filters_prev, num_filters, 4, 2, 1,
	**conv_params)]

	# Residual blocks.
	for _ in range(num_res_blocks):
	model += [Res2dBlock(num_filters, num_filters,
	**conv_params,
	order=order)]
	self.model = nn.Sequential(*model)
	self.output_dim = num_filters

	def forward(self, x):
	r"""

	Args:
	x (tensor): Input image.
	"""
	return self.model(x)


	class Decoder(nn.Module):
	r"""Improved UNIT decoder. The network consists of:

	- $(num_res_blocks) residual blocks.
	- $(num_upsamples) residual blocks or convolutional blocks
	- output layer.

	Args:
	num_upsamples (int): Number of times we increase resolution by 2x2.
	num_res_blocks (int): Number of residual blocks.
	num_filters (int): Base filter numbers.
	num_image_channels (int): Number of input image channels.
	padding_mode (string): Type of padding.
	activation_norm_type (str): Type of activation normalization.
	weight_norm_type (str): Type of weight normalization.
	nonlinearity (str): Type of nonlinear activation function.
	output_nonlinearity (str): Type of nonlinearity before final output,
	``'tanh'`` or ``'none'``.
	pre_act (bool): If ``True``, uses pre-activation residual blocks.
	apply_noise (bool): If ``True``, injects Gaussian noise.
	"""

	def __init__(self,
	num_upsamples,
	num_res_blocks,
	num_filters,
	num_image_channels,
	padding_mode,
	activation_norm_type,
	weight_norm_type,
	nonlinearity,
	output_nonlinearity,
	pre_act=False,
	apply_noise=False):
	super().__init__()

	conv_params = dict(padding_mode=padding_mode,
	nonlinearity=nonlinearity,
	inplace_nonlinearity=True,
	apply_noise=apply_noise,
	weight_norm_type=weight_norm_type,
	activation_norm_type=activation_norm_type)

	# The order of operations in residual blocks.
	order = 'pre_act' if pre_act else 'CNACNA'

	# Residual blocks.
	self.decoder = nn.ModuleList()
	for _ in range(num_res_blocks):
	self.decoder += [Res2dBlock(num_filters, num_filters,
	**conv_params,
	order=order)]

	# Convolutional blocks with upsampling.
	for i in range(num_upsamples):
	self.decoder += [NearestUpsample(scale_factor=2)]
	self.decoder += [Conv2dBlock(num_filters, num_filters // 2,
	5, 1, 2, **conv_params)]
	num_filters //= 2
	self.decoder += [Conv2dBlock(num_filters, num_image_channels, 7, 1, 3,
	nonlinearity=output_nonlinearity,
	padding_mode=padding_mode)]

	def forward(self, x):
	r"""

	Args:
	x (tensor): Content embedding of the content image.
	"""
	for block in self.decoder:
	x = block(x)
	return x