Rasm / dnnlib /tflib /ops /upfirdn_2d.py

First commit

baa26ad over 2 years ago

No virus

19.2 kB

	# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
	#
	# NVIDIA CORPORATION and its licensors retain all intellectual property
	# and proprietary rights in and to this software, related documentation
	# and any modifications thereto. Any use, reproduction, disclosure or
	# distribution of this software and related documentation without an express
	# license agreement from NVIDIA CORPORATION is strictly prohibited.

	"""Custom TensorFlow ops for efficient resampling of 2D images."""

	import os
	import numpy as np
	import tensorflow as tf
	from .. import custom_ops

	def _get_plugin():
	return custom_ops.get_plugin(os.path.splitext(__file__)[0] + '.cu')

	#----------------------------------------------------------------------------

	def upfirdn_2d(x, k, upx=1, upy=1, downx=1, downy=1, padx0=0, padx1=0, pady0=0, pady1=0, impl='cuda'):
	r"""Pad, upsample, FIR filter, and downsample a batch of 2D images.

	Accepts a batch of 2D images of the shape `[majorDim, inH, inW, minorDim]`
	and performs the following operations for each image, batched across
	`majorDim` and `minorDim`:

	1. Upsample the image by inserting the zeros after each pixel (`upx`, `upy`).

	2. Pad the image with zeros by the specified number of pixels on each side
	(`padx0`, `padx1`, `pady0`, `pady1`). Specifying a negative value
	corresponds to cropping the image.

	3. Convolve the image with the specified 2D FIR filter (`k`), shrinking the
	image so that the footprint of all output pixels lies within the input image.

	4. Downsample the image by throwing away pixels (`downx`, `downy`).

	This sequence of operations bears close resemblance to scipy.signal.upfirdn().
	The fused op is considerably more efficient than performing the same calculation
	using standard TensorFlow ops. It supports gradients of arbitrary order.

	Args:
	x: Input tensor of the shape `[majorDim, inH, inW, minorDim]`.
	k: 2D FIR filter of the shape `[firH, firW]`.
	upx: Integer upsampling factor along the X-axis (default: 1).
	upy: Integer upsampling factor along the Y-axis (default: 1).
	downx: Integer downsampling factor along the X-axis (default: 1).
	downy: Integer downsampling factor along the Y-axis (default: 1).
	padx0: Number of pixels to pad on the left side (default: 0).
	padx1: Number of pixels to pad on the right side (default: 0).
	pady0: Number of pixels to pad on the top side (default: 0).
	pady1: Number of pixels to pad on the bottom side (default: 0).
	impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).

	Returns:
	Tensor of the shape `[majorDim, outH, outW, minorDim]`, and same datatype as `x`.
	"""

	impl_dict = {
	'ref': _upfirdn_2d_ref,
	'cuda': _upfirdn_2d_cuda,
	}
	return impl_dict[impl](x=x, k=k, upx=upx, upy=upy, downx=downx, downy=downy, padx0=padx0, padx1=padx1, pady0=pady0, pady1=pady1)

	#----------------------------------------------------------------------------

	def _upfirdn_2d_ref(x, k, upx, upy, downx, downy, padx0, padx1, pady0, pady1):
	"""Slow reference implementation of `upfirdn_2d()` using standard TensorFlow ops."""

	x = tf.convert_to_tensor(x)
	k = np.asarray(k, dtype=np.float32)
	assert x.shape.rank == 4
	inH = x.shape[1].value
	inW = x.shape[2].value
	minorDim = _shape(x, 3)
	kernelH, kernelW = k.shape
	assert inW >= 1 and inH >= 1
	assert kernelW >= 1 and kernelH >= 1
	assert isinstance(upx, int) and isinstance(upy, int)
	assert isinstance(downx, int) and isinstance(downy, int)
	assert isinstance(padx0, int) and isinstance(padx1, int)
	assert isinstance(pady0, int) and isinstance(pady1, int)

	# Upsample (insert zeros).
	x = tf.reshape(x, [-1, inH, 1, inW, 1, minorDim])
	x = tf.pad(x, [[0, 0], [0, 0], [0, upy - 1], [0, 0], [0, upx - 1], [0, 0]])
	x = tf.reshape(x, [-1, inH * upy, inW * upx, minorDim])

	# Pad (crop if negative).
	x = tf.pad(x, [[0, 0], [max(pady0, 0), max(pady1, 0)], [max(padx0, 0), max(padx1, 0)], [0, 0]])
	x = x[:, max(-pady0, 0) : x.shape[1].value - max(-pady1, 0), max(-padx0, 0) : x.shape[2].value - max(-padx1, 0), :]

	# Convolve with filter.
	x = tf.transpose(x, [0, 3, 1, 2])
	x = tf.reshape(x, [-1, 1, inH * upy + pady0 + pady1, inW * upx + padx0 + padx1])
	w = tf.constant(k[::-1, ::-1, np.newaxis, np.newaxis], dtype=x.dtype)
	x = tf.nn.conv2d(x, w, strides=[1,1,1,1], padding='VALID', data_format='NCHW')
	x = tf.reshape(x, [-1, minorDim, inH * upy + pady0 + pady1 - kernelH + 1, inW * upx + padx0 + padx1 - kernelW + 1])
	x = tf.transpose(x, [0, 2, 3, 1])

	# Downsample (throw away pixels).
	return x[:, ::downy, ::downx, :]

	#----------------------------------------------------------------------------

	def _upfirdn_2d_cuda(x, k, upx, upy, downx, downy, padx0, padx1, pady0, pady1):
	"""Fast CUDA implementation of `upfirdn_2d()` using custom ops."""

	x = tf.convert_to_tensor(x)
	k = np.asarray(k, dtype=np.float32)
	majorDim, inH, inW, minorDim = x.shape.as_list()
	kernelH, kernelW = k.shape
	assert inW >= 1 and inH >= 1
	assert kernelW >= 1 and kernelH >= 1
	assert isinstance(upx, int) and isinstance(upy, int)
	assert isinstance(downx, int) and isinstance(downy, int)
	assert isinstance(padx0, int) and isinstance(padx1, int)
	assert isinstance(pady0, int) and isinstance(pady1, int)

	outW = (inW * upx + padx0 + padx1 - kernelW) // downx + 1
	outH = (inH * upy + pady0 + pady1 - kernelH) // downy + 1
	assert outW >= 1 and outH >= 1

	cuda_op = _get_plugin().up_fir_dn2d
	kc = tf.constant(k, dtype=x.dtype)
	gkc = tf.constant(k[::-1, ::-1], dtype=x.dtype)
	gpadx0 = kernelW - padx0 - 1
	gpady0 = kernelH - pady0 - 1
	gpadx1 = inW * upx - outW * downx + padx0 - upx + 1
	gpady1 = inH * upy - outH * downy + pady0 - upy + 1

	@tf.custom_gradient
	def func(x):
	y = cuda_op(x=x, k=kc, upx=int(upx), upy=int(upy), downx=int(downx), downy=int(downy), padx0=int(padx0), padx1=int(padx1), pady0=int(pady0), pady1=int(pady1))
	y.set_shape([majorDim, outH, outW, minorDim])
	@tf.custom_gradient
	def grad(dy):
	dx = cuda_op(x=dy, k=gkc, upx=int(downx), upy=int(downy), downx=int(upx), downy=int(upy), padx0=int(gpadx0), padx1=int(gpadx1), pady0=int(gpady0), pady1=int(gpady1))
	dx.set_shape([majorDim, inH, inW, minorDim])
	return dx, func
	return y, grad
	return func(x)

	#----------------------------------------------------------------------------

	def filter_2d(x, k, gain=1, padding=0, data_format='NCHW', impl='cuda'):
	r"""Filter a batch of 2D images with the given FIR filter.

	Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]`
	and filters each image with the given filter. The filter is normalized so that
	if the input pixels are constant, they will be scaled by the specified `gain`.
	Pixels outside the image are assumed to be zero.

	Args:
	x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`.
	k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable).
	gain: Scaling factor for signal magnitude (default: 1.0).
	padding: Number of pixels to pad or crop the output on each side (default: 0).
	data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`).
	impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).

	Returns:
	Tensor of the same shape and datatype as `x`.
	"""

	assert isinstance(padding, int)
	k = _FilterKernel(k=k, gain=gain)
	assert k.w == k.h
	pad0 = k.w // 2 + padding
	pad1 = (k.w - 1) // 2 + padding
	return _simple_upfirdn_2d(x, k, pad0=pad0, pad1=pad1, data_format=data_format, impl=impl)

	#----------------------------------------------------------------------------

	def upsample_2d(x, k=None, factor=2, gain=1, padding=0, data_format='NCHW', impl='cuda'):
	r"""Upsample a batch of 2D images with the given filter.

	Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]`
	and upsamples each image with the given filter. The filter is normalized so that
	if the input pixels are constant, they will be scaled by the specified `gain`.
	Pixels outside the image are assumed to be zero, and the filter is padded with
	zeros so that its shape is a multiple of the upsampling factor.

	Args:
	x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`.
	k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable).
	The default is `[1] * factor`, which corresponds to nearest-neighbor
	upsampling.
	factor: Integer upsampling factor (default: 2).
	gain: Scaling factor for signal magnitude (default: 1.0).
	padding: Number of pixels to pad or crop the output on each side (default: 0).
	data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`).
	impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).

	Returns:
	Tensor of the shape `[N, C, H * factor, W * factor]` or
	`[N, H * factor, W * factor, C]`, and same datatype as `x`.
	"""

	assert isinstance(factor, int) and factor >= 1
	assert isinstance(padding, int)
	k = _FilterKernel(k if k is not None else [1] * factor, gain * (factor ** 2))
	assert k.w == k.h
	pad0 = (k.w + factor - 1) // 2 + padding
	pad1 = (k.w - factor) // 2 + padding
	return _simple_upfirdn_2d(x, k, up=factor, pad0=pad0, pad1=pad1, data_format=data_format, impl=impl)

	#----------------------------------------------------------------------------

	def downsample_2d(x, k=None, factor=2, gain=1, padding=0, data_format='NCHW', impl='cuda'):
	r"""Downsample a batch of 2D images with the given filter.

	Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]`
	and downsamples each image with the given filter. The filter is normalized so that
	if the input pixels are constant, they will be scaled by the specified `gain`.
	Pixels outside the image are assumed to be zero, and the filter is padded with
	zeros so that its shape is a multiple of the downsampling factor.

	Args:
	x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`.
	k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable).
	The default is `[1] * factor`, which corresponds to average pooling.
	factor: Integer downsampling factor (default: 2).
	gain: Scaling factor for signal magnitude (default: 1.0).
	padding: Number of pixels to pad or crop the output on each side (default: 0).
	data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`).
	impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).

	Returns:
	Tensor of the shape `[N, C, H // factor, W // factor]` or
	`[N, H // factor, W // factor, C]`, and same datatype as `x`.
	"""

	assert isinstance(factor, int) and factor >= 1
	assert isinstance(padding, int)
	k = _FilterKernel(k if k is not None else [1] * factor, gain)
	assert k.w == k.h
	pad0 = (k.w - factor + 1) // 2 + padding * factor
	pad1 = (k.w - factor) // 2 + padding * factor
	return _simple_upfirdn_2d(x, k, down=factor, pad0=pad0, pad1=pad1, data_format=data_format, impl=impl)

	#----------------------------------------------------------------------------

	def upsample_conv_2d(x, w, k=None, factor=2, gain=1, padding=0, data_format='NCHW', impl='cuda'):
	r"""Fused `upsample_2d()` followed by `tf.nn.conv2d()`.

	Padding is performed only once at the beginning, not between the operations.
	The fused op is considerably more efficient than performing the same calculation
	using standard TensorFlow ops. It supports gradients of arbitrary order.

	Args:
	x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`.
	w: Weight tensor of the shape `[filterH, filterW, inChannels, outChannels]`.
	Grouped convolution can be performed by `inChannels = x.shape[0] // numGroups`.
	k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable).
	The default is `[1] * factor`, which corresponds to nearest-neighbor
	upsampling.
	factor: Integer upsampling factor (default: 2).
	gain: Scaling factor for signal magnitude (default: 1.0).
	padding: Number of pixels to pad or crop the output on each side (default: 0).
	data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`).
	impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).

	Returns:
	Tensor of the shape `[N, C, H * factor, W * factor]` or
	`[N, H * factor, W * factor, C]`, and same datatype as `x`.
	"""

	assert isinstance(factor, int) and factor >= 1
	assert isinstance(padding, int)

	# Check weight shape.
	w = tf.convert_to_tensor(w)
	ch, cw, _inC, _outC = w.shape.as_list()
	inC = _shape(w, 2)
	outC = _shape(w, 3)
	assert cw == ch

	# Fast path for 1x1 convolution.
	if cw == 1 and ch == 1:
	x = tf.nn.conv2d(x, w, data_format=data_format, strides=[1,1,1,1], padding='VALID')
	x = upsample_2d(x, k, factor=factor, gain=gain, padding=padding, data_format=data_format, impl=impl)
	return x

	# Setup filter kernel.
	k = _FilterKernel(k if k is not None else [1] * factor, gain * (factor ** 2))
	assert k.w == k.h

	# Determine data dimensions.
	if data_format == 'NCHW':
	stride = [1, 1, factor, factor]
	output_shape = [_shape(x, 0), outC, (_shape(x, 2) - 1) * factor + ch, (_shape(x, 3) - 1) * factor + cw]
	num_groups = _shape(x, 1) // inC
	else:
	stride = [1, factor, factor, 1]
	output_shape = [_shape(x, 0), (_shape(x, 1) - 1) * factor + ch, (_shape(x, 2) - 1) * factor + cw, outC]
	num_groups = _shape(x, 3) // inC

	# Transpose weights.
	w = tf.reshape(w, [ch, cw, inC, num_groups, -1])
	w = tf.transpose(w[::-1, ::-1], [0, 1, 4, 3, 2])
	w = tf.reshape(w, [ch, cw, -1, num_groups * inC])

	# Execute.
	x = tf.nn.conv2d_transpose(x, w, output_shape=output_shape, strides=stride, padding='VALID', data_format=data_format)
	pad0 = (k.w + factor - cw) // 2 + padding
	pad1 = (k.w - factor - cw + 3) // 2 + padding
	return _simple_upfirdn_2d(x, k, pad0=pad0, pad1=pad1, data_format=data_format, impl=impl)

	#----------------------------------------------------------------------------

	def conv_downsample_2d(x, w, k=None, factor=2, gain=1, padding=0, data_format='NCHW', impl='cuda'):
	r"""Fused `tf.nn.conv2d()` followed by `downsample_2d()`.

	Padding is performed only once at the beginning, not between the operations.
	The fused op is considerably more efficient than performing the same calculation
	using standard TensorFlow ops. It supports gradients of arbitrary order.

	Args:
	x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`.
	w: Weight tensor of the shape `[filterH, filterW, inChannels, outChannels]`.
	Grouped convolution can be performed by `inChannels = x.shape[0] // numGroups`.
	k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable).
	The default is `[1] * factor`, which corresponds to average pooling.
	factor: Integer downsampling factor (default: 2).
	gain: Scaling factor for signal magnitude (default: 1.0).
	padding: Number of pixels to pad or crop the output on each side (default: 0).
	data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`).
	impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).

	Returns:
	Tensor of the shape `[N, C, H // factor, W // factor]` or
	`[N, H // factor, W // factor, C]`, and same datatype as `x`.
	"""

	assert isinstance(factor, int) and factor >= 1
	assert isinstance(padding, int)

	# Check weight shape.
	w = tf.convert_to_tensor(w)
	ch, cw, _inC, _outC = w.shape.as_list()
	assert cw == ch

	# Fast path for 1x1 convolution.
	if cw == 1 and ch == 1:
	x = downsample_2d(x, k, factor=factor, gain=gain, padding=padding, data_format=data_format, impl=impl)
	x = tf.nn.conv2d(x, w, data_format=data_format, strides=[1,1,1,1], padding='VALID')
	return x

	# Setup filter kernel.
	k = _FilterKernel(k if k is not None else [1] * factor, gain)
	assert k.w == k.h

	# Determine stride.
	if data_format == 'NCHW':
	s = [1, 1, factor, factor]
	else:
	s = [1, factor, factor, 1]

	# Execute.
	pad0 = (k.w - factor + cw) // 2 + padding * factor
	pad1 = (k.w - factor + cw - 1) // 2 + padding * factor
	x = _simple_upfirdn_2d(x, k, pad0=pad0, pad1=pad1, data_format=data_format, impl=impl)
	return tf.nn.conv2d(x, w, strides=s, padding='VALID', data_format=data_format)

	#----------------------------------------------------------------------------
	# Internal helpers.

	class _FilterKernel:
	def __init__(self, k, gain=1):
	k = np.asarray(k, dtype=np.float32)
	k /= np.sum(k)

	# Separable.
	if k.ndim == 1 and k.size >= 8:
	self.w = k.size
	self.h = k.size
	self.kx = k[np.newaxis, :]
	self.ky = k[:, np.newaxis] * gain
	self.kxy = None

	# Non-separable.
	else:
	if k.ndim == 1:
	k = np.outer(k, k)
	assert k.ndim == 2
	self.w = k.shape[1]
	self.h = k.shape[0]
	self.kx = None
	self.ky = None
	self.kxy = k * gain

	def _simple_upfirdn_2d(x, k, up=1, down=1, pad0=0, pad1=0, data_format='NCHW', impl='cuda'):
	assert isinstance(k, _FilterKernel)
	assert data_format in ['NCHW', 'NHWC']
	assert x.shape.rank == 4
	y = x
	if data_format == 'NCHW':
	y = tf.reshape(y, [-1, _shape(y, 2), _shape(y, 3), 1])
	if k.kx is not None:
	y = upfirdn_2d(y, k.kx, upx=up, downx=down, padx0=pad0, padx1=pad1, impl=impl)
	if k.ky is not None:
	y = upfirdn_2d(y, k.ky, upy=up, downy=down, pady0=pad0, pady1=pad1, impl=impl)
	if k.kxy is not None:
	y = upfirdn_2d(y, k.kxy, upx=up, upy=up, downx=down, downy=down, padx0=pad0, padx1=pad1, pady0=pad0, pady1=pad1, impl=impl)
	if data_format == 'NCHW':
	y = tf.reshape(y, [-1, _shape(x, 1), _shape(y, 1), _shape(y, 2)])
	return y

	def _shape(tf_expr, dim_idx):
	if tf_expr.shape.rank is not None:
	dim = tf_expr.shape[dim_idx].value
	if dim is not None:
	return dim
	return tf.shape(tf_expr)[dim_idx]

	#----------------------------------------------------------------------------