Spaces:
Running
Running
# Copyright 2017 The TensorFlow Authors. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# ============================================================================== | |
"""Builder function to construct tf-slim arg_scope for convolution, fc ops.""" | |
import tensorflow.compat.v1 as tf | |
import tf_slim as slim | |
from object_detection.core import freezable_batch_norm | |
from object_detection.protos import hyperparams_pb2 | |
from object_detection.utils import context_manager | |
# pylint: enable=g-import-not-at-top | |
class KerasLayerHyperparams(object): | |
""" | |
A hyperparameter configuration object for Keras layers used in | |
Object Detection models. | |
""" | |
def __init__(self, hyperparams_config): | |
"""Builds keras hyperparameter config for layers based on the proto config. | |
It automatically converts from Slim layer hyperparameter configs to | |
Keras layer hyperparameters. Namely, it: | |
- Builds Keras initializers/regularizers instead of Slim ones | |
- sets weights_regularizer/initializer to kernel_regularizer/initializer | |
- converts batchnorm decay to momentum | |
- converts Slim l2 regularizer weights to the equivalent Keras l2 weights | |
Contains a hyperparameter configuration for ops that specifies kernel | |
initializer, kernel regularizer, activation. Also contains parameters for | |
batch norm operators based on the configuration. | |
Note that if the batch_norm parameters are not specified in the config | |
(i.e. left to default) then batch norm is excluded from the config. | |
Args: | |
hyperparams_config: hyperparams.proto object containing | |
hyperparameters. | |
Raises: | |
ValueError: if hyperparams_config is not of type hyperparams.Hyperparams. | |
""" | |
if not isinstance(hyperparams_config, | |
hyperparams_pb2.Hyperparams): | |
raise ValueError('hyperparams_config not of type ' | |
'hyperparams_pb.Hyperparams.') | |
self._batch_norm_params = None | |
if hyperparams_config.HasField('batch_norm'): | |
self._batch_norm_params = _build_keras_batch_norm_params( | |
hyperparams_config.batch_norm) | |
self._activation_fn = _build_activation_fn(hyperparams_config.activation) | |
# TODO(kaftan): Unclear if these kwargs apply to separable & depthwise conv | |
# (Those might use depthwise_* instead of kernel_*) | |
# We should probably switch to using build_conv2d_layer and | |
# build_depthwise_conv2d_layer methods instead. | |
self._op_params = { | |
'kernel_regularizer': _build_keras_regularizer( | |
hyperparams_config.regularizer), | |
'kernel_initializer': _build_initializer( | |
hyperparams_config.initializer, build_for_keras=True), | |
'activation': _build_activation_fn(hyperparams_config.activation) | |
} | |
def use_batch_norm(self): | |
return self._batch_norm_params is not None | |
def batch_norm_params(self, **overrides): | |
"""Returns a dict containing batchnorm layer construction hyperparameters. | |
Optionally overrides values in the batchnorm hyperparam dict. Overrides | |
only apply to individual calls of this method, and do not affect | |
future calls. | |
Args: | |
**overrides: keyword arguments to override in the hyperparams dictionary | |
Returns: dict containing the layer construction keyword arguments, with | |
values overridden by the `overrides` keyword arguments. | |
""" | |
if self._batch_norm_params is None: | |
new_batch_norm_params = dict() | |
else: | |
new_batch_norm_params = self._batch_norm_params.copy() | |
new_batch_norm_params.update(overrides) | |
return new_batch_norm_params | |
def build_batch_norm(self, training=None, **overrides): | |
"""Returns a Batch Normalization layer with the appropriate hyperparams. | |
If the hyperparams are configured to not use batch normalization, | |
this will return a Keras Lambda layer that only applies tf.Identity, | |
without doing any normalization. | |
Optionally overrides values in the batch_norm hyperparam dict. Overrides | |
only apply to individual calls of this method, and do not affect | |
future calls. | |
Args: | |
training: if True, the normalization layer will normalize using the batch | |
statistics. If False, the normalization layer will be frozen and will | |
act as if it is being used for inference. If None, the layer | |
will look up the Keras learning phase at `call` time to decide what to | |
do. | |
**overrides: batch normalization construction args to override from the | |
batch_norm hyperparams dictionary. | |
Returns: Either a FreezableBatchNorm layer (if use_batch_norm() is True), | |
or a Keras Lambda layer that applies the identity (if use_batch_norm() | |
is False) | |
""" | |
if self.use_batch_norm(): | |
return freezable_batch_norm.FreezableBatchNorm( | |
training=training, | |
**self.batch_norm_params(**overrides) | |
) | |
else: | |
return tf.keras.layers.Lambda(tf.identity) | |
def build_activation_layer(self, name='activation'): | |
"""Returns a Keras layer that applies the desired activation function. | |
Args: | |
name: The name to assign the Keras layer. | |
Returns: A Keras lambda layer that applies the activation function | |
specified in the hyperparam config, or applies the identity if the | |
activation function is None. | |
""" | |
if self._activation_fn: | |
return tf.keras.layers.Lambda(self._activation_fn, name=name) | |
else: | |
return tf.keras.layers.Lambda(tf.identity, name=name) | |
def params(self, include_activation=False, **overrides): | |
"""Returns a dict containing the layer construction hyperparameters to use. | |
Optionally overrides values in the returned dict. Overrides | |
only apply to individual calls of this method, and do not affect | |
future calls. | |
Args: | |
include_activation: If False, activation in the returned dictionary will | |
be set to `None`, and the activation must be applied via a separate | |
layer created by `build_activation_layer`. If True, `activation` in the | |
output param dictionary will be set to the activation function | |
specified in the hyperparams config. | |
**overrides: keyword arguments to override in the hyperparams dictionary. | |
Returns: dict containing the layer construction keyword arguments, with | |
values overridden by the `overrides` keyword arguments. | |
""" | |
new_params = self._op_params.copy() | |
new_params['activation'] = None | |
if include_activation: | |
new_params['activation'] = self._activation_fn | |
if self.use_batch_norm() and self.batch_norm_params()['center']: | |
new_params['use_bias'] = False | |
else: | |
new_params['use_bias'] = True | |
new_params.update(**overrides) | |
return new_params | |
def build(hyperparams_config, is_training): | |
"""Builds tf-slim arg_scope for convolution ops based on the config. | |
Returns an arg_scope to use for convolution ops containing weights | |
initializer, weights regularizer, activation function, batch norm function | |
and batch norm parameters based on the configuration. | |
Note that if no normalization parameters are specified in the config, | |
(i.e. left to default) then both batch norm and group norm are excluded | |
from the arg_scope. | |
The batch norm parameters are set for updates based on `is_training` argument | |
and conv_hyperparams_config.batch_norm.train parameter. During training, they | |
are updated only if batch_norm.train parameter is true. However, during eval, | |
no updates are made to the batch norm variables. In both cases, their current | |
values are used during forward pass. | |
Args: | |
hyperparams_config: hyperparams.proto object containing | |
hyperparameters. | |
is_training: Whether the network is in training mode. | |
Returns: | |
arg_scope_fn: A function to construct tf-slim arg_scope containing | |
hyperparameters for ops. | |
Raises: | |
ValueError: if hyperparams_config is not of type hyperparams.Hyperparams. | |
""" | |
if not isinstance(hyperparams_config, | |
hyperparams_pb2.Hyperparams): | |
raise ValueError('hyperparams_config not of type ' | |
'hyperparams_pb.Hyperparams.') | |
normalizer_fn = None | |
batch_norm_params = None | |
if hyperparams_config.HasField('batch_norm'): | |
normalizer_fn = slim.batch_norm | |
batch_norm_params = _build_batch_norm_params( | |
hyperparams_config.batch_norm, is_training) | |
if hyperparams_config.HasField('group_norm'): | |
normalizer_fn = slim.group_norm | |
affected_ops = [slim.conv2d, slim.separable_conv2d, slim.conv2d_transpose] | |
if hyperparams_config.HasField('op') and ( | |
hyperparams_config.op == hyperparams_pb2.Hyperparams.FC): | |
affected_ops = [slim.fully_connected] | |
def scope_fn(): | |
with (slim.arg_scope([slim.batch_norm], **batch_norm_params) | |
if batch_norm_params is not None else | |
context_manager.IdentityContextManager()): | |
with slim.arg_scope( | |
affected_ops, | |
weights_regularizer=_build_slim_regularizer( | |
hyperparams_config.regularizer), | |
weights_initializer=_build_initializer( | |
hyperparams_config.initializer), | |
activation_fn=_build_activation_fn(hyperparams_config.activation), | |
normalizer_fn=normalizer_fn) as sc: | |
return sc | |
return scope_fn | |
def _build_activation_fn(activation_fn): | |
"""Builds a callable activation from config. | |
Args: | |
activation_fn: hyperparams_pb2.Hyperparams.activation | |
Returns: | |
Callable activation function. | |
Raises: | |
ValueError: On unknown activation function. | |
""" | |
if activation_fn == hyperparams_pb2.Hyperparams.NONE: | |
return None | |
if activation_fn == hyperparams_pb2.Hyperparams.RELU: | |
return tf.nn.relu | |
if activation_fn == hyperparams_pb2.Hyperparams.RELU_6: | |
return tf.nn.relu6 | |
if activation_fn == hyperparams_pb2.Hyperparams.SWISH: | |
return tf.nn.swish | |
raise ValueError('Unknown activation function: {}'.format(activation_fn)) | |
def _build_slim_regularizer(regularizer): | |
"""Builds a tf-slim regularizer from config. | |
Args: | |
regularizer: hyperparams_pb2.Hyperparams.regularizer proto. | |
Returns: | |
tf-slim regularizer. | |
Raises: | |
ValueError: On unknown regularizer. | |
""" | |
regularizer_oneof = regularizer.WhichOneof('regularizer_oneof') | |
if regularizer_oneof == 'l1_regularizer': | |
return slim.l1_regularizer(scale=float(regularizer.l1_regularizer.weight)) | |
if regularizer_oneof == 'l2_regularizer': | |
return slim.l2_regularizer(scale=float(regularizer.l2_regularizer.weight)) | |
if regularizer_oneof is None: | |
return None | |
raise ValueError('Unknown regularizer function: {}'.format(regularizer_oneof)) | |
def _build_keras_regularizer(regularizer): | |
"""Builds a keras regularizer from config. | |
Args: | |
regularizer: hyperparams_pb2.Hyperparams.regularizer proto. | |
Returns: | |
Keras regularizer. | |
Raises: | |
ValueError: On unknown regularizer. | |
""" | |
regularizer_oneof = regularizer.WhichOneof('regularizer_oneof') | |
if regularizer_oneof == 'l1_regularizer': | |
return tf.keras.regularizers.l1(float(regularizer.l1_regularizer.weight)) | |
if regularizer_oneof == 'l2_regularizer': | |
# The Keras L2 regularizer weight differs from the Slim L2 regularizer | |
# weight by a factor of 2 | |
return tf.keras.regularizers.l2( | |
float(regularizer.l2_regularizer.weight * 0.5)) | |
if regularizer_oneof is None: | |
return None | |
raise ValueError('Unknown regularizer function: {}'.format(regularizer_oneof)) | |
def _build_initializer(initializer, build_for_keras=False): | |
"""Build a tf initializer from config. | |
Args: | |
initializer: hyperparams_pb2.Hyperparams.regularizer proto. | |
build_for_keras: Whether the initializers should be built for Keras | |
operators. If false builds for Slim. | |
Returns: | |
tf initializer. | |
Raises: | |
ValueError: On unknown initializer. | |
""" | |
initializer_oneof = initializer.WhichOneof('initializer_oneof') | |
if initializer_oneof == 'truncated_normal_initializer': | |
return tf.truncated_normal_initializer( | |
mean=initializer.truncated_normal_initializer.mean, | |
stddev=initializer.truncated_normal_initializer.stddev) | |
if initializer_oneof == 'random_normal_initializer': | |
return tf.random_normal_initializer( | |
mean=initializer.random_normal_initializer.mean, | |
stddev=initializer.random_normal_initializer.stddev) | |
if initializer_oneof == 'variance_scaling_initializer': | |
enum_descriptor = (hyperparams_pb2.VarianceScalingInitializer. | |
DESCRIPTOR.enum_types_by_name['Mode']) | |
mode = enum_descriptor.values_by_number[initializer. | |
variance_scaling_initializer. | |
mode].name | |
if build_for_keras: | |
if initializer.variance_scaling_initializer.uniform: | |
return tf.variance_scaling_initializer( | |
scale=initializer.variance_scaling_initializer.factor, | |
mode=mode.lower(), | |
distribution='uniform') | |
else: | |
# In TF 1.9 release and earlier, the truncated_normal distribution was | |
# not supported correctly. So, in these earlier versions of tensorflow, | |
# the ValueError will be raised, and we manually truncate the | |
# distribution scale. | |
# | |
# It is insufficient to just set distribution to `normal` from the | |
# start, because the `normal` distribution in newer Tensorflow versions | |
# creates a truncated distribution, whereas it created untruncated | |
# distributions in older versions. | |
try: | |
return tf.variance_scaling_initializer( | |
scale=initializer.variance_scaling_initializer.factor, | |
mode=mode.lower(), | |
distribution='truncated_normal') | |
except ValueError: | |
truncate_constant = 0.87962566103423978 | |
truncated_scale = initializer.variance_scaling_initializer.factor / ( | |
truncate_constant * truncate_constant | |
) | |
return tf.variance_scaling_initializer( | |
scale=truncated_scale, | |
mode=mode.lower(), | |
distribution='normal') | |
else: | |
return slim.variance_scaling_initializer( | |
factor=initializer.variance_scaling_initializer.factor, | |
mode=mode, | |
uniform=initializer.variance_scaling_initializer.uniform) | |
if initializer_oneof is None: | |
return None | |
raise ValueError('Unknown initializer function: {}'.format( | |
initializer_oneof)) | |
def _build_batch_norm_params(batch_norm, is_training): | |
"""Build a dictionary of batch_norm params from config. | |
Args: | |
batch_norm: hyperparams_pb2.ConvHyperparams.batch_norm proto. | |
is_training: Whether the models is in training mode. | |
Returns: | |
A dictionary containing batch_norm parameters. | |
""" | |
batch_norm_params = { | |
'decay': batch_norm.decay, | |
'center': batch_norm.center, | |
'scale': batch_norm.scale, | |
'epsilon': batch_norm.epsilon, | |
# Remove is_training parameter from here and deprecate it in the proto | |
# once we refactor Faster RCNN models to set is_training through an outer | |
# arg_scope in the meta architecture. | |
'is_training': is_training and batch_norm.train, | |
} | |
return batch_norm_params | |
def _build_keras_batch_norm_params(batch_norm): | |
"""Build a dictionary of Keras BatchNormalization params from config. | |
Args: | |
batch_norm: hyperparams_pb2.ConvHyperparams.batch_norm proto. | |
Returns: | |
A dictionary containing Keras BatchNormalization parameters. | |
""" | |
# Note: Although decay is defined to be 1 - momentum in batch_norm, | |
# decay in the slim batch_norm layers was erroneously defined and is | |
# actually the same as momentum in the Keras batch_norm layers. | |
# For context, see: github.com/keras-team/keras/issues/6839 | |
batch_norm_params = { | |
'momentum': batch_norm.decay, | |
'center': batch_norm.center, | |
'scale': batch_norm.scale, | |
'epsilon': batch_norm.epsilon, | |
} | |
return batch_norm_params | |