Spaces:
Running
Running
# Lint as: python2, python3 | |
# Copyright 2019 The TensorFlow Authors All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# ============================================================================== | |
"""Augment slim.conv2d with optional Weight Standardization (WS). | |
WS is a normalization method to accelerate micro-batch training. When used with | |
Group Normalization and trained with 1 image/GPU, WS is able to match or | |
outperform the performances of BN trained with large batch sizes. | |
[1] Siyuan Qiao, Huiyu Wang, Chenxi Liu, Wei Shen, Alan Yuille | |
Weight Standardization. arXiv:1903.10520 | |
[2] Lei Huang, Xianglong Liu, Yang Liu, Bo Lang, Dacheng Tao | |
Centered Weight Normalization in Accelerating Training of Deep Neural | |
Networks. ICCV 2017 | |
""" | |
from __future__ import absolute_import | |
from __future__ import division | |
from __future__ import print_function | |
import tensorflow as tf | |
from tensorflow.contrib import framework as contrib_framework | |
from tensorflow.contrib import layers as contrib_layers | |
from tensorflow.contrib.layers.python.layers import layers | |
from tensorflow.contrib.layers.python.layers import utils | |
class Conv2D(tf.keras.layers.Conv2D, tf.layers.Layer): | |
"""2D convolution layer (e.g. spatial convolution over images). | |
This layer creates a convolution kernel that is convolved | |
(actually cross-correlated) with the layer input to produce a tensor of | |
outputs. If `use_bias` is True (and a `bias_initializer` is provided), | |
a bias vector is created and added to the outputs. Finally, if | |
`activation` is not `None`, it is applied to the outputs as well. | |
""" | |
def __init__(self, | |
filters, | |
kernel_size, | |
strides=(1, 1), | |
padding='valid', | |
data_format='channels_last', | |
dilation_rate=(1, 1), | |
activation=None, | |
use_bias=True, | |
kernel_initializer=None, | |
bias_initializer=tf.zeros_initializer(), | |
kernel_regularizer=None, | |
bias_regularizer=None, | |
use_weight_standardization=False, | |
activity_regularizer=None, | |
kernel_constraint=None, | |
bias_constraint=None, | |
trainable=True, | |
name=None, | |
**kwargs): | |
"""Constructs the 2D convolution layer. | |
Args: | |
filters: Integer, the dimensionality of the output space (i.e. the number | |
of filters in the convolution). | |
kernel_size: An integer or tuple/list of 2 integers, specifying the height | |
and width of the 2D convolution window. Can be a single integer to | |
specify the same value for all spatial dimensions. | |
strides: An integer or tuple/list of 2 integers, specifying the strides of | |
the convolution along the height and width. Can be a single integer to | |
specify the same value for all spatial dimensions. Specifying any stride | |
value != 1 is incompatible with specifying any `dilation_rate` value != | |
1. | |
padding: One of `"valid"` or `"same"` (case-insensitive). | |
data_format: A string, one of `channels_last` (default) or | |
`channels_first`. The ordering of the dimensions in the inputs. | |
`channels_last` corresponds to inputs with shape `(batch, height, width, | |
channels)` while `channels_first` corresponds to inputs with shape | |
`(batch, channels, height, width)`. | |
dilation_rate: An integer or tuple/list of 2 integers, specifying the | |
dilation rate to use for dilated convolution. Can be a single integer to | |
specify the same value for all spatial dimensions. Currently, specifying | |
any `dilation_rate` value != 1 is incompatible with specifying any | |
stride value != 1. | |
activation: Activation function. Set it to None to maintain a linear | |
activation. | |
use_bias: Boolean, whether the layer uses a bias. | |
kernel_initializer: An initializer for the convolution kernel. | |
bias_initializer: An initializer for the bias vector. If None, the default | |
initializer will be used. | |
kernel_regularizer: Optional regularizer for the convolution kernel. | |
bias_regularizer: Optional regularizer for the bias vector. | |
use_weight_standardization: Boolean, whether the layer uses weight | |
standardization. | |
activity_regularizer: Optional regularizer function for the output. | |
kernel_constraint: Optional projection function to be applied to the | |
kernel after being updated by an `Optimizer` (e.g. used to implement | |
norm constraints or value constraints for layer weights). The function | |
must take as input the unprojected variable and must return the | |
projected variable (which must have the same shape). Constraints are not | |
safe to use when doing asynchronous distributed training. | |
bias_constraint: Optional projection function to be applied to the bias | |
after being updated by an `Optimizer`. | |
trainable: Boolean, if `True` also add variables to the graph collection | |
`GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). | |
name: A string, the name of the layer. | |
**kwargs: Arbitrary keyword arguments passed to tf.keras.layers.Conv2D | |
""" | |
super(Conv2D, self).__init__( | |
filters=filters, | |
kernel_size=kernel_size, | |
strides=strides, | |
padding=padding, | |
data_format=data_format, | |
dilation_rate=dilation_rate, | |
activation=activation, | |
use_bias=use_bias, | |
kernel_initializer=kernel_initializer, | |
bias_initializer=bias_initializer, | |
kernel_regularizer=kernel_regularizer, | |
bias_regularizer=bias_regularizer, | |
activity_regularizer=activity_regularizer, | |
kernel_constraint=kernel_constraint, | |
bias_constraint=bias_constraint, | |
trainable=trainable, | |
name=name, | |
**kwargs) | |
self.use_weight_standardization = use_weight_standardization | |
def call(self, inputs): | |
if self.use_weight_standardization: | |
mean, var = tf.nn.moments(self.kernel, [0, 1, 2], keep_dims=True) | |
kernel = (self.kernel - mean) / tf.sqrt(var + 1e-5) | |
outputs = self._convolution_op(inputs, kernel) | |
else: | |
outputs = self._convolution_op(inputs, self.kernel) | |
if self.use_bias: | |
if self.data_format == 'channels_first': | |
if self.rank == 1: | |
# tf.nn.bias_add does not accept a 1D input tensor. | |
bias = tf.reshape(self.bias, (1, self.filters, 1)) | |
outputs += bias | |
else: | |
outputs = tf.nn.bias_add(outputs, self.bias, data_format='NCHW') | |
else: | |
outputs = tf.nn.bias_add(outputs, self.bias, data_format='NHWC') | |
if self.activation is not None: | |
return self.activation(outputs) | |
return outputs | |
def conv2d(inputs, | |
num_outputs, | |
kernel_size, | |
stride=1, | |
padding='SAME', | |
data_format=None, | |
rate=1, | |
activation_fn=tf.nn.relu, | |
normalizer_fn=None, | |
normalizer_params=None, | |
weights_initializer=contrib_layers.xavier_initializer(), | |
weights_regularizer=None, | |
biases_initializer=tf.zeros_initializer(), | |
biases_regularizer=None, | |
use_weight_standardization=False, | |
reuse=None, | |
variables_collections=None, | |
outputs_collections=None, | |
trainable=True, | |
scope=None): | |
"""Adds a 2D convolution followed by an optional batch_norm layer. | |
`convolution` creates a variable called `weights`, representing the | |
convolutional kernel, that is convolved (actually cross-correlated) with the | |
`inputs` to produce a `Tensor` of activations. If a `normalizer_fn` is | |
provided (such as `batch_norm`), it is then applied. Otherwise, if | |
`normalizer_fn` is None and a `biases_initializer` is provided then a `biases` | |
variable would be created and added the activations. Finally, if | |
`activation_fn` is not `None`, it is applied to the activations as well. | |
Performs atrous convolution with input stride/dilation rate equal to `rate` | |
if a value > 1 for any dimension of `rate` is specified. In this case | |
`stride` values != 1 are not supported. | |
Args: | |
inputs: A Tensor of rank N+2 of shape `[batch_size] + input_spatial_shape + | |
[in_channels]` if data_format does not start with "NC" (default), or | |
`[batch_size, in_channels] + input_spatial_shape` if data_format starts | |
with "NC". | |
num_outputs: Integer, the number of output filters. | |
kernel_size: A sequence of N positive integers specifying the spatial | |
dimensions of the filters. Can be a single integer to specify the same | |
value for all spatial dimensions. | |
stride: A sequence of N positive integers specifying the stride at which to | |
compute output. Can be a single integer to specify the same value for all | |
spatial dimensions. Specifying any `stride` value != 1 is incompatible | |
with specifying any `rate` value != 1. | |
padding: One of `"VALID"` or `"SAME"`. | |
data_format: A string or None. Specifies whether the channel dimension of | |
the `input` and output is the last dimension (default, or if `data_format` | |
does not start with "NC"), or the second dimension (if `data_format` | |
starts with "NC"). For N=1, the valid values are "NWC" (default) and | |
"NCW". For N=2, the valid values are "NHWC" (default) and "NCHW". For | |
N=3, the valid values are "NDHWC" (default) and "NCDHW". | |
rate: A sequence of N positive integers specifying the dilation rate to use | |
for atrous convolution. Can be a single integer to specify the same value | |
for all spatial dimensions. Specifying any `rate` value != 1 is | |
incompatible with specifying any `stride` value != 1. | |
activation_fn: Activation function. The default value is a ReLU function. | |
Explicitly set it to None to skip it and maintain a linear activation. | |
normalizer_fn: Normalization function to use instead of `biases`. If | |
`normalizer_fn` is provided then `biases_initializer` and | |
`biases_regularizer` are ignored and `biases` are not created nor added. | |
default set to None for no normalizer function | |
normalizer_params: Normalization function parameters. | |
weights_initializer: An initializer for the weights. | |
weights_regularizer: Optional regularizer for the weights. | |
biases_initializer: An initializer for the biases. If None skip biases. | |
biases_regularizer: Optional regularizer for the biases. | |
use_weight_standardization: Boolean, whether the layer uses weight | |
standardization. | |
reuse: Whether or not the layer and its variables should be reused. To be | |
able to reuse the layer scope must be given. | |
variables_collections: Optional list of collections for all the variables or | |
a dictionary containing a different list of collection per variable. | |
outputs_collections: Collection to add the outputs. | |
trainable: If `True` also add variables to the graph collection | |
`GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). | |
scope: Optional scope for `variable_scope`. | |
Returns: | |
A tensor representing the output of the operation. | |
Raises: | |
ValueError: If `data_format` is invalid. | |
ValueError: Both 'rate' and `stride` are not uniformly 1. | |
""" | |
if data_format not in [None, 'NWC', 'NCW', 'NHWC', 'NCHW', 'NDHWC', 'NCDHW']: | |
raise ValueError('Invalid data_format: %r' % (data_format,)) | |
# pylint: disable=protected-access | |
layer_variable_getter = layers._build_variable_getter({ | |
'bias': 'biases', | |
'kernel': 'weights' | |
}) | |
# pylint: enable=protected-access | |
with tf.variable_scope( | |
scope, 'Conv', [inputs], reuse=reuse, | |
custom_getter=layer_variable_getter) as sc: | |
inputs = tf.convert_to_tensor(inputs) | |
input_rank = inputs.get_shape().ndims | |
if input_rank != 4: | |
raise ValueError('Convolution expects input with rank %d, got %d' % | |
(4, input_rank)) | |
data_format = ('channels_first' if data_format and | |
data_format.startswith('NC') else 'channels_last') | |
layer = Conv2D( | |
filters=num_outputs, | |
kernel_size=kernel_size, | |
strides=stride, | |
padding=padding, | |
data_format=data_format, | |
dilation_rate=rate, | |
activation=None, | |
use_bias=not normalizer_fn and biases_initializer, | |
kernel_initializer=weights_initializer, | |
bias_initializer=biases_initializer, | |
kernel_regularizer=weights_regularizer, | |
bias_regularizer=biases_regularizer, | |
use_weight_standardization=use_weight_standardization, | |
activity_regularizer=None, | |
trainable=trainable, | |
name=sc.name, | |
dtype=inputs.dtype.base_dtype, | |
_scope=sc, | |
_reuse=reuse) | |
outputs = layer.apply(inputs) | |
# Add variables to collections. | |
# pylint: disable=protected-access | |
layers._add_variable_to_collections(layer.kernel, variables_collections, | |
'weights') | |
if layer.use_bias: | |
layers._add_variable_to_collections(layer.bias, variables_collections, | |
'biases') | |
# pylint: enable=protected-access | |
if normalizer_fn is not None: | |
normalizer_params = normalizer_params or {} | |
outputs = normalizer_fn(outputs, **normalizer_params) | |
if activation_fn is not None: | |
outputs = activation_fn(outputs) | |
return utils.collect_named_outputs(outputs_collections, sc.name, outputs) | |
def conv2d_same(inputs, num_outputs, kernel_size, stride, rate=1, scope=None): | |
"""Strided 2-D convolution with 'SAME' padding. | |
When stride > 1, then we do explicit zero-padding, followed by conv2d with | |
'VALID' padding. | |
Note that | |
net = conv2d_same(inputs, num_outputs, 3, stride=stride) | |
is equivalent to | |
net = conv2d(inputs, num_outputs, 3, stride=1, padding='SAME') | |
net = subsample(net, factor=stride) | |
whereas | |
net = conv2d(inputs, num_outputs, 3, stride=stride, padding='SAME') | |
is different when the input's height or width is even, which is why we add the | |
current function. For more details, see ResnetUtilsTest.testConv2DSameEven(). | |
Args: | |
inputs: A 4-D tensor of size [batch, height_in, width_in, channels]. | |
num_outputs: An integer, the number of output filters. | |
kernel_size: An int with the kernel_size of the filters. | |
stride: An integer, the output stride. | |
rate: An integer, rate for atrous convolution. | |
scope: Scope. | |
Returns: | |
output: A 4-D tensor of size [batch, height_out, width_out, channels] with | |
the convolution output. | |
""" | |
if stride == 1: | |
return conv2d( | |
inputs, | |
num_outputs, | |
kernel_size, | |
stride=1, | |
rate=rate, | |
padding='SAME', | |
scope=scope) | |
else: | |
kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1) | |
pad_total = kernel_size_effective - 1 | |
pad_beg = pad_total // 2 | |
pad_end = pad_total - pad_beg | |
inputs = tf.pad(inputs, | |
[[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]]) | |
return conv2d( | |
inputs, | |
num_outputs, | |
kernel_size, | |
stride=stride, | |
rate=rate, | |
padding='VALID', | |
scope=scope) | |