|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""A wrapper around the Keras MobilenetV1 models for object detection.""" |
|
|
|
from __future__ import absolute_import |
|
from __future__ import division |
|
from __future__ import print_function |
|
|
|
import tensorflow as tf |
|
|
|
from object_detection.core import freezable_batch_norm |
|
|
|
|
|
def _fixed_padding(inputs, kernel_size, rate=1): |
|
"""Pads the input along the spatial dimensions independently of input size. |
|
|
|
Pads the input such that if it was used in a convolution with 'VALID' padding, |
|
the output would have the same dimensions as if the unpadded input was used |
|
in a convolution with 'SAME' padding. |
|
|
|
Args: |
|
inputs: A tensor of size [batch, height_in, width_in, channels]. |
|
kernel_size: The kernel to be used in the conv2d or max_pool2d operation. |
|
rate: An integer, rate for atrous convolution. |
|
|
|
Returns: |
|
output: A tensor of size [batch, height_out, width_out, channels] with the |
|
input, either intact (if kernel_size == 1) or padded (if kernel_size > 1). |
|
""" |
|
kernel_size_effective = [kernel_size[0] + (kernel_size[0] - 1) * (rate - 1), |
|
kernel_size[0] + (kernel_size[0] - 1) * (rate - 1)] |
|
pad_total = [kernel_size_effective[0] - 1, kernel_size_effective[1] - 1] |
|
pad_beg = [pad_total[0] // 2, pad_total[1] // 2] |
|
pad_end = [pad_total[0] - pad_beg[0], pad_total[1] - pad_beg[1]] |
|
padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg[0], pad_end[0]], |
|
[pad_beg[1], pad_end[1]], [0, 0]]) |
|
return padded_inputs |
|
|
|
|
|
class _LayersOverride(object): |
|
"""Alternative Keras layers interface for the Keras MobileNetV1.""" |
|
|
|
def __init__(self, |
|
batchnorm_training, |
|
default_batchnorm_momentum=0.999, |
|
conv_hyperparams=None, |
|
use_explicit_padding=False, |
|
alpha=1.0, |
|
min_depth=None): |
|
"""Alternative tf.keras.layers interface, for use by the Keras MobileNetV1. |
|
|
|
It is used by the Keras applications kwargs injection API to |
|
modify the MobilenetV1 Keras application with changes required by |
|
the Object Detection API. |
|
|
|
These injected interfaces make the following changes to the network: |
|
|
|
- Applies the Object Detection hyperparameter configuration |
|
- Supports FreezableBatchNorms |
|
- Adds support for a min number of filters for each layer |
|
- Makes the `alpha` parameter affect the final convolution block even if it |
|
is less than 1.0 |
|
- Adds support for explicit padding of convolutions |
|
|
|
Args: |
|
batchnorm_training: Bool. Assigned to Batch norm layer `training` param |
|
when constructing `freezable_batch_norm.FreezableBatchNorm` layers. |
|
default_batchnorm_momentum: Float. When 'conv_hyperparams' is None, |
|
batch norm layers will be constructed using this value as the momentum. |
|
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object |
|
containing hyperparameters for convolution ops. Optionally set to `None` |
|
to use default mobilenet_v1 layer builders. |
|
use_explicit_padding: If True, use 'valid' padding for convolutions, |
|
but explicitly pre-pads inputs so that the output dimensions are the |
|
same as if 'same' padding were used. Off by default. |
|
alpha: The width multiplier referenced in the MobileNetV1 paper. It |
|
modifies the number of filters in each convolutional layer. It's called |
|
depth multiplier in Keras application MobilenetV1. |
|
min_depth: Minimum number of filters in the convolutional layers. |
|
""" |
|
self._alpha = alpha |
|
self._batchnorm_training = batchnorm_training |
|
self._default_batchnorm_momentum = default_batchnorm_momentum |
|
self._conv_hyperparams = conv_hyperparams |
|
self._use_explicit_padding = use_explicit_padding |
|
self._min_depth = min_depth |
|
self.regularizer = tf.keras.regularizers.l2(0.00004 * 0.5) |
|
self.initializer = tf.truncated_normal_initializer(stddev=0.09) |
|
|
|
def _FixedPaddingLayer(self, kernel_size, rate=1): |
|
return tf.keras.layers.Lambda( |
|
lambda x: _fixed_padding(x, kernel_size, rate)) |
|
|
|
def Conv2D(self, filters, kernel_size, **kwargs): |
|
"""Builds a Conv2D layer according to the current Object Detection config. |
|
|
|
Overrides the Keras MobileNetV1 application's convolutions with ones that |
|
follow the spec specified by the Object Detection hyperparameters. |
|
|
|
Args: |
|
filters: The number of filters to use for the convolution. |
|
kernel_size: The kernel size to specify the height and width of the 2D |
|
convolution window. |
|
**kwargs: Keyword args specified by the Keras application for |
|
constructing the convolution. |
|
|
|
Returns: |
|
A one-arg callable that will either directly apply a Keras Conv2D layer to |
|
the input argument, or that will first pad the input then apply a Conv2D |
|
layer. |
|
""" |
|
|
|
filters = int(filters * self._alpha) |
|
if self._min_depth and filters < self._min_depth: |
|
filters = self._min_depth |
|
|
|
if self._conv_hyperparams: |
|
kwargs = self._conv_hyperparams.params(**kwargs) |
|
else: |
|
kwargs['kernel_regularizer'] = self.regularizer |
|
kwargs['kernel_initializer'] = self.initializer |
|
|
|
kwargs['padding'] = 'same' |
|
if self._use_explicit_padding and kernel_size > 1: |
|
kwargs['padding'] = 'valid' |
|
def padded_conv(features): |
|
padded_features = self._FixedPaddingLayer(kernel_size)(features) |
|
return tf.keras.layers.Conv2D( |
|
filters, kernel_size, **kwargs)(padded_features) |
|
return padded_conv |
|
else: |
|
return tf.keras.layers.Conv2D(filters, kernel_size, **kwargs) |
|
|
|
def DepthwiseConv2D(self, kernel_size, **kwargs): |
|
"""Builds a DepthwiseConv2D according to the Object Detection config. |
|
|
|
Overrides the Keras MobileNetV2 application's convolutions with ones that |
|
follow the spec specified by the Object Detection hyperparameters. |
|
|
|
Args: |
|
kernel_size: The kernel size to specify the height and width of the 2D |
|
convolution window. |
|
**kwargs: Keyword args specified by the Keras application for |
|
constructing the convolution. |
|
|
|
Returns: |
|
A one-arg callable that will either directly apply a Keras DepthwiseConv2D |
|
layer to the input argument, or that will first pad the input then apply |
|
the depthwise convolution. |
|
""" |
|
if self._conv_hyperparams: |
|
kwargs = self._conv_hyperparams.params(**kwargs) |
|
else: |
|
kwargs['depthwise_initializer'] = self.initializer |
|
|
|
kwargs['padding'] = 'same' |
|
if self._use_explicit_padding: |
|
kwargs['padding'] = 'valid' |
|
def padded_depthwise_conv(features): |
|
padded_features = self._FixedPaddingLayer(kernel_size)(features) |
|
return tf.keras.layers.DepthwiseConv2D( |
|
kernel_size, **kwargs)(padded_features) |
|
return padded_depthwise_conv |
|
else: |
|
return tf.keras.layers.DepthwiseConv2D(kernel_size, **kwargs) |
|
|
|
def BatchNormalization(self, **kwargs): |
|
"""Builds a normalization layer. |
|
|
|
Overrides the Keras application batch norm with the norm specified by the |
|
Object Detection configuration. |
|
|
|
Args: |
|
**kwargs: Only the name is used, all other params ignored. |
|
Required for matching `layers.BatchNormalization` calls in the Keras |
|
application. |
|
|
|
Returns: |
|
A normalization layer specified by the Object Detection hyperparameter |
|
configurations. |
|
""" |
|
name = kwargs.get('name') |
|
if self._conv_hyperparams: |
|
return self._conv_hyperparams.build_batch_norm( |
|
training=self._batchnorm_training, |
|
name=name) |
|
else: |
|
return freezable_batch_norm.FreezableBatchNorm( |
|
training=self._batchnorm_training, |
|
epsilon=1e-3, |
|
momentum=self._default_batchnorm_momentum, |
|
name=name) |
|
|
|
def Input(self, shape): |
|
"""Builds an Input layer. |
|
|
|
Overrides the Keras application Input layer with one that uses a |
|
tf.placeholder_with_default instead of a tf.placeholder. This is necessary |
|
to ensure the application works when run on a TPU. |
|
|
|
Args: |
|
shape: The shape for the input layer to use. (Does not include a dimension |
|
for the batch size). |
|
Returns: |
|
An input layer for the specified shape that internally uses a |
|
placeholder_with_default. |
|
""" |
|
default_size = 224 |
|
default_batch_size = 1 |
|
shape = list(shape) |
|
default_shape = [default_size if dim is None else dim for dim in shape] |
|
|
|
input_tensor = tf.constant(0.0, shape=[default_batch_size] + default_shape) |
|
|
|
placeholder_with_default = tf.placeholder_with_default( |
|
input=input_tensor, shape=[None] + shape) |
|
return tf.keras.layers.Input(tensor=placeholder_with_default) |
|
|
|
|
|
def ReLU(self, *args, **kwargs): |
|
"""Builds an activation layer. |
|
|
|
Overrides the Keras application ReLU with the activation specified by the |
|
Object Detection configuration. |
|
|
|
Args: |
|
*args: Ignored, required to match the `tf.keras.ReLU` interface |
|
**kwargs: Only the name is used, |
|
required to match `tf.keras.ReLU` interface |
|
|
|
Returns: |
|
An activation layer specified by the Object Detection hyperparameter |
|
configurations. |
|
""" |
|
name = kwargs.get('name') |
|
if self._conv_hyperparams: |
|
return self._conv_hyperparams.build_activation_layer(name=name) |
|
else: |
|
return tf.keras.layers.Lambda(tf.nn.relu6, name=name) |
|
|
|
|
|
|
|
def ZeroPadding2D(self, padding, **kwargs): |
|
"""Replaces explicit padding in the Keras application with a no-op. |
|
|
|
Args: |
|
padding: The padding values for image height and width. |
|
**kwargs: Ignored, required to match the Keras applications usage. |
|
|
|
Returns: |
|
A no-op identity lambda. |
|
""" |
|
return lambda x: x |
|
|
|
|
|
|
|
def __getattr__(self, item): |
|
return getattr(tf.keras.layers, item) |
|
|
|
|
|
|
|
def mobilenet_v1(batchnorm_training, |
|
default_batchnorm_momentum=0.9997, |
|
conv_hyperparams=None, |
|
use_explicit_padding=False, |
|
alpha=1.0, |
|
min_depth=None, |
|
**kwargs): |
|
"""Instantiates the MobileNetV1 architecture, modified for object detection. |
|
|
|
This wraps the MobileNetV1 tensorflow Keras application, but uses the |
|
Keras application's kwargs-based monkey-patching API to override the Keras |
|
architecture with the following changes: |
|
|
|
- Changes the default batchnorm momentum to 0.9997 |
|
- Applies the Object Detection hyperparameter configuration |
|
- Supports FreezableBatchNorms |
|
- Adds support for a min number of filters for each layer |
|
- Makes the `alpha` parameter affect the final convolution block even if it |
|
is less than 1.0 |
|
- Adds support for explicit padding of convolutions |
|
- Makes the Input layer use a tf.placeholder_with_default instead of a |
|
tf.placeholder, to work on TPUs. |
|
|
|
Args: |
|
batchnorm_training: Bool. Assigned to Batch norm layer `training` param |
|
when constructing `freezable_batch_norm.FreezableBatchNorm` layers. |
|
default_batchnorm_momentum: Float. When 'conv_hyperparams' is None, |
|
batch norm layers will be constructed using this value as the momentum. |
|
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object |
|
containing hyperparameters for convolution ops. Optionally set to `None` |
|
to use default mobilenet_v1 layer builders. |
|
use_explicit_padding: If True, use 'valid' padding for convolutions, |
|
but explicitly pre-pads inputs so that the output dimensions are the |
|
same as if 'same' padding were used. Off by default. |
|
alpha: The width multiplier referenced in the MobileNetV1 paper. It |
|
modifies the number of filters in each convolutional layer. |
|
min_depth: Minimum number of filters in the convolutional layers. |
|
**kwargs: Keyword arguments forwarded directly to the |
|
`tf.keras.applications.Mobilenet` method that constructs the Keras |
|
model. |
|
|
|
Returns: |
|
A Keras model instance. |
|
""" |
|
layers_override = _LayersOverride( |
|
batchnorm_training, |
|
default_batchnorm_momentum=default_batchnorm_momentum, |
|
conv_hyperparams=conv_hyperparams, |
|
use_explicit_padding=use_explicit_padding, |
|
min_depth=min_depth, |
|
alpha=alpha) |
|
return tf.keras.applications.MobileNet( |
|
alpha=alpha, layers=layers_override, **kwargs) |
|
|
|
|