Spaces:

NCTCMumbai
/

NCTC

Running

App Files Files Community

NCTC / models /research /deeplab /core /conv2d_ws.py

NCTCMumbai

Upload 2571 files

0b8359d over 1 year ago

raw

history blame

15.9 kB

	# Lint as: python2, python3
	# Copyright 2019 The TensorFlow Authors All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ==============================================================================

	"""Augment slim.conv2d with optional Weight Standardization (WS).

	WS is a normalization method to accelerate micro-batch training. When used with
	Group Normalization and trained with 1 image/GPU, WS is able to match or
	outperform the performances of BN trained with large batch sizes.
	[1] Siyuan Qiao, Huiyu Wang, Chenxi Liu, Wei Shen, Alan Yuille
	Weight Standardization. arXiv:1903.10520
	[2] Lei Huang, Xianglong Liu, Yang Liu, Bo Lang, Dacheng Tao
	Centered Weight Normalization in Accelerating Training of Deep Neural
	Networks. ICCV 2017
	"""
	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function

	import tensorflow as tf
	from tensorflow.contrib import framework as contrib_framework
	from tensorflow.contrib import layers as contrib_layers

	from tensorflow.contrib.layers.python.layers import layers
	from tensorflow.contrib.layers.python.layers import utils


	class Conv2D(tf.keras.layers.Conv2D, tf.layers.Layer):
	"""2D convolution layer (e.g. spatial convolution over images).

	This layer creates a convolution kernel that is convolved
	(actually cross-correlated) with the layer input to produce a tensor of
	outputs. If `use_bias` is True (and a `bias_initializer` is provided),
	a bias vector is created and added to the outputs. Finally, if
	`activation` is not `None`, it is applied to the outputs as well.
	"""

	def __init__(self,
	filters,
	kernel_size,
	strides=(1, 1),
	padding='valid',
	data_format='channels_last',
	dilation_rate=(1, 1),
	activation=None,
	use_bias=True,
	kernel_initializer=None,
	bias_initializer=tf.zeros_initializer(),
	kernel_regularizer=None,
	bias_regularizer=None,
	use_weight_standardization=False,
	activity_regularizer=None,
	kernel_constraint=None,
	bias_constraint=None,
	trainable=True,
	name=None,
	**kwargs):
	"""Constructs the 2D convolution layer.

	Args:
	filters: Integer, the dimensionality of the output space (i.e. the number
	of filters in the convolution).
	kernel_size: An integer or tuple/list of 2 integers, specifying the height
	and width of the 2D convolution window. Can be a single integer to
	specify the same value for all spatial dimensions.
	strides: An integer or tuple/list of 2 integers, specifying the strides of
	the convolution along the height and width. Can be a single integer to
	specify the same value for all spatial dimensions. Specifying any stride
	value != 1 is incompatible with specifying any `dilation_rate` value !=
	1.
	padding: One of `"valid"` or `"same"` (case-insensitive).
	data_format: A string, one of `channels_last` (default) or
	`channels_first`. The ordering of the dimensions in the inputs.
	`channels_last` corresponds to inputs with shape `(batch, height, width,
	channels)` while `channels_first` corresponds to inputs with shape
	`(batch, channels, height, width)`.
	dilation_rate: An integer or tuple/list of 2 integers, specifying the
	dilation rate to use for dilated convolution. Can be a single integer to
	specify the same value for all spatial dimensions. Currently, specifying
	any `dilation_rate` value != 1 is incompatible with specifying any
	stride value != 1.
	activation: Activation function. Set it to None to maintain a linear
	activation.
	use_bias: Boolean, whether the layer uses a bias.
	kernel_initializer: An initializer for the convolution kernel.
	bias_initializer: An initializer for the bias vector. If None, the default
	initializer will be used.
	kernel_regularizer: Optional regularizer for the convolution kernel.
	bias_regularizer: Optional regularizer for the bias vector.
	use_weight_standardization: Boolean, whether the layer uses weight
	standardization.
	activity_regularizer: Optional regularizer function for the output.
	kernel_constraint: Optional projection function to be applied to the
	kernel after being updated by an `Optimizer` (e.g. used to implement
	norm constraints or value constraints for layer weights). The function
	must take as input the unprojected variable and must return the
	projected variable (which must have the same shape). Constraints are not
	safe to use when doing asynchronous distributed training.
	bias_constraint: Optional projection function to be applied to the bias
	after being updated by an `Optimizer`.
	trainable: Boolean, if `True` also add variables to the graph collection
	`GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
	name: A string, the name of the layer.
	**kwargs: Arbitrary keyword arguments passed to tf.keras.layers.Conv2D
	"""

	super(Conv2D, self).__init__(
	filters=filters,
	kernel_size=kernel_size,
	strides=strides,
	padding=padding,
	data_format=data_format,
	dilation_rate=dilation_rate,
	activation=activation,
	use_bias=use_bias,
	kernel_initializer=kernel_initializer,
	bias_initializer=bias_initializer,
	kernel_regularizer=kernel_regularizer,
	bias_regularizer=bias_regularizer,
	activity_regularizer=activity_regularizer,
	kernel_constraint=kernel_constraint,
	bias_constraint=bias_constraint,
	trainable=trainable,
	name=name,
	**kwargs)
	self.use_weight_standardization = use_weight_standardization

	def call(self, inputs):
	if self.use_weight_standardization:
	mean, var = tf.nn.moments(self.kernel, [0, 1, 2], keep_dims=True)
	kernel = (self.kernel - mean) / tf.sqrt(var + 1e-5)
	outputs = self._convolution_op(inputs, kernel)
	else:
	outputs = self._convolution_op(inputs, self.kernel)

	if self.use_bias:
	if self.data_format == 'channels_first':
	if self.rank == 1:
	# tf.nn.bias_add does not accept a 1D input tensor.
	bias = tf.reshape(self.bias, (1, self.filters, 1))
	outputs += bias
	else:
	outputs = tf.nn.bias_add(outputs, self.bias, data_format='NCHW')
	else:
	outputs = tf.nn.bias_add(outputs, self.bias, data_format='NHWC')

	if self.activation is not None:
	return self.activation(outputs)
	return outputs


	@contrib_framework.add_arg_scope
	def conv2d(inputs,
	num_outputs,
	kernel_size,
	stride=1,
	padding='SAME',
	data_format=None,
	rate=1,
	activation_fn=tf.nn.relu,
	normalizer_fn=None,
	normalizer_params=None,
	weights_initializer=contrib_layers.xavier_initializer(),
	weights_regularizer=None,
	biases_initializer=tf.zeros_initializer(),
	biases_regularizer=None,
	use_weight_standardization=False,
	reuse=None,
	variables_collections=None,
	outputs_collections=None,
	trainable=True,
	scope=None):
	"""Adds a 2D convolution followed by an optional batch_norm layer.

	`convolution` creates a variable called `weights`, representing the
	convolutional kernel, that is convolved (actually cross-correlated) with the
	`inputs` to produce a `Tensor` of activations. If a `normalizer_fn` is
	provided (such as `batch_norm`), it is then applied. Otherwise, if
	`normalizer_fn` is None and a `biases_initializer` is provided then a `biases`
	variable would be created and added the activations. Finally, if
	`activation_fn` is not `None`, it is applied to the activations as well.

	Performs atrous convolution with input stride/dilation rate equal to `rate`
	if a value > 1 for any dimension of `rate` is specified. In this case
	`stride` values != 1 are not supported.

	Args:
	inputs: A Tensor of rank N+2 of shape `[batch_size] + input_spatial_shape +
	[in_channels]` if data_format does not start with "NC" (default), or
	`[batch_size, in_channels] + input_spatial_shape` if data_format starts
	with "NC".
	num_outputs: Integer, the number of output filters.
	kernel_size: A sequence of N positive integers specifying the spatial
	dimensions of the filters. Can be a single integer to specify the same
	value for all spatial dimensions.
	stride: A sequence of N positive integers specifying the stride at which to
	compute output. Can be a single integer to specify the same value for all
	spatial dimensions. Specifying any `stride` value != 1 is incompatible
	with specifying any `rate` value != 1.
	padding: One of `"VALID"` or `"SAME"`.
	data_format: A string or None. Specifies whether the channel dimension of
	the `input` and output is the last dimension (default, or if `data_format`
	does not start with "NC"), or the second dimension (if `data_format`
	starts with "NC"). For N=1, the valid values are "NWC" (default) and
	"NCW". For N=2, the valid values are "NHWC" (default) and "NCHW". For
	N=3, the valid values are "NDHWC" (default) and "NCDHW".
	rate: A sequence of N positive integers specifying the dilation rate to use
	for atrous convolution. Can be a single integer to specify the same value
	for all spatial dimensions. Specifying any `rate` value != 1 is
	incompatible with specifying any `stride` value != 1.
	activation_fn: Activation function. The default value is a ReLU function.
	Explicitly set it to None to skip it and maintain a linear activation.
	normalizer_fn: Normalization function to use instead of `biases`. If
	`normalizer_fn` is provided then `biases_initializer` and
	`biases_regularizer` are ignored and `biases` are not created nor added.
	default set to None for no normalizer function
	normalizer_params: Normalization function parameters.
	weights_initializer: An initializer for the weights.
	weights_regularizer: Optional regularizer for the weights.
	biases_initializer: An initializer for the biases. If None skip biases.
	biases_regularizer: Optional regularizer for the biases.
	use_weight_standardization: Boolean, whether the layer uses weight
	standardization.
	reuse: Whether or not the layer and its variables should be reused. To be
	able to reuse the layer scope must be given.
	variables_collections: Optional list of collections for all the variables or
	a dictionary containing a different list of collection per variable.
	outputs_collections: Collection to add the outputs.
	trainable: If `True` also add variables to the graph collection
	`GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
	scope: Optional scope for `variable_scope`.

	Returns:
	A tensor representing the output of the operation.

	Raises:
	ValueError: If `data_format` is invalid.
	ValueError: Both 'rate' and `stride` are not uniformly 1.
	"""
	if data_format not in [None, 'NWC', 'NCW', 'NHWC', 'NCHW', 'NDHWC', 'NCDHW']:
	raise ValueError('Invalid data_format: %r' % (data_format,))

	# pylint: disable=protected-access
	layer_variable_getter = layers._build_variable_getter({
	'bias': 'biases',
	'kernel': 'weights'
	})
	# pylint: enable=protected-access
	with tf.variable_scope(
	scope, 'Conv', [inputs], reuse=reuse,
	custom_getter=layer_variable_getter) as sc:
	inputs = tf.convert_to_tensor(inputs)
	input_rank = inputs.get_shape().ndims

	if input_rank != 4:
	raise ValueError('Convolution expects input with rank %d, got %d' %
	(4, input_rank))

	data_format = ('channels_first' if data_format and
	data_format.startswith('NC') else 'channels_last')
	layer = Conv2D(
	filters=num_outputs,
	kernel_size=kernel_size,
	strides=stride,
	padding=padding,
	data_format=data_format,
	dilation_rate=rate,
	activation=None,
	use_bias=not normalizer_fn and biases_initializer,
	kernel_initializer=weights_initializer,
	bias_initializer=biases_initializer,
	kernel_regularizer=weights_regularizer,
	bias_regularizer=biases_regularizer,
	use_weight_standardization=use_weight_standardization,
	activity_regularizer=None,
	trainable=trainable,
	name=sc.name,
	dtype=inputs.dtype.base_dtype,
	_scope=sc,
	_reuse=reuse)
	outputs = layer.apply(inputs)

	# Add variables to collections.
	# pylint: disable=protected-access
	layers._add_variable_to_collections(layer.kernel, variables_collections,
	'weights')
	if layer.use_bias:
	layers._add_variable_to_collections(layer.bias, variables_collections,
	'biases')
	# pylint: enable=protected-access
	if normalizer_fn is not None:
	normalizer_params = normalizer_params or {}
	outputs = normalizer_fn(outputs, **normalizer_params)

	if activation_fn is not None:
	outputs = activation_fn(outputs)
	return utils.collect_named_outputs(outputs_collections, sc.name, outputs)


	def conv2d_same(inputs, num_outputs, kernel_size, stride, rate=1, scope=None):
	"""Strided 2-D convolution with 'SAME' padding.

	When stride > 1, then we do explicit zero-padding, followed by conv2d with
	'VALID' padding.

	Note that

	net = conv2d_same(inputs, num_outputs, 3, stride=stride)

	is equivalent to

	net = conv2d(inputs, num_outputs, 3, stride=1, padding='SAME')
	net = subsample(net, factor=stride)

	whereas

	net = conv2d(inputs, num_outputs, 3, stride=stride, padding='SAME')

	is different when the input's height or width is even, which is why we add the
	current function. For more details, see ResnetUtilsTest.testConv2DSameEven().

	Args:
	inputs: A 4-D tensor of size [batch, height_in, width_in, channels].
	num_outputs: An integer, the number of output filters.
	kernel_size: An int with the kernel_size of the filters.
	stride: An integer, the output stride.
	rate: An integer, rate for atrous convolution.
	scope: Scope.

	Returns:
	output: A 4-D tensor of size [batch, height_out, width_out, channels] with
	the convolution output.
	"""
	if stride == 1:
	return conv2d(
	inputs,
	num_outputs,
	kernel_size,
	stride=1,
	rate=rate,
	padding='SAME',
	scope=scope)
	else:
	kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
	pad_total = kernel_size_effective - 1
	pad_beg = pad_total // 2
	pad_end = pad_total - pad_beg
	inputs = tf.pad(inputs,
	[[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])
	return conv2d(
	inputs,
	num_outputs,
	kernel_size,
	stride=stride,
	rate=rate,
	padding='VALID',
	scope=scope)