"""Convolutional Box Predictors with and without weight sharing.""" |
import collections |
import tensorflow as tf |
from object_detection.core import box_predictor |
from object_detection.utils import static_shape |
keras = tf.keras.layers |
class _NoopVariableScope(object): |
"""A dummy class that does not push any scope.""" |
def __enter__(self): |
return None |
def __exit__(self, exc_type, exc_value, traceback): |
return False |
class ConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor): |
"""Convolutional Keras Box Predictor. |
Optionally add an intermediate 1x1 convolutional layer after features and |
predict in parallel branches box_encodings and |
class_predictions_with_background. |
Currently this box predictor assumes that predictions are "shared" across |
classes --- that is each anchor makes box predictions which do not depend |
on class. |
""" |
def __init__(self, |
is_training, |
num_classes, |
box_prediction_heads, |
class_prediction_heads, |
other_heads, |
conv_hyperparams, |
num_layers_before_predictor, |
min_depth, |
max_depth, |
freeze_batchnorm, |
inplace_batchnorm_update, |
name=None): |
"""Constructor. |
Args: |
is_training: Indicates whether the BoxPredictor is in training mode. |
num_classes: number of classes. Note that num_classes *does not* |
include the background category, so if groundtruth labels take values |
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the |
assigned classification targets can range from {0,... K}). |
box_prediction_heads: A list of heads that predict the boxes. |
class_prediction_heads: A list of heads that predict the classes. |
other_heads: A dictionary mapping head names to lists of convolutional |
heads. |
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object |
containing hyperparameters for convolution ops. |
num_layers_before_predictor: Number of the additional conv layers before |
the predictor. |
min_depth: Minimum feature depth prior to predicting box encodings |
and class predictions. |
max_depth: Maximum feature depth prior to predicting box encodings |
and class predictions. If max_depth is set to 0, no additional |
feature map will be inserted before location and class predictions. |
freeze_batchnorm: Whether to freeze batch norm parameters during |
training or not. When training with a small batch size (e.g. 1), it is |
desirable to freeze batch norm update and use pretrained batch norm |
params. |
inplace_batchnorm_update: Whether to update batch norm moving average |
values inplace. When this is false train op must add a control |
dependency on tf.graphkeys.UPDATE_OPS collection in order to update |
batch norm statistics. |
name: A string name scope to assign to the model. If `None`, Keras |
will auto-generate one from the class name. |
Raises: |
ValueError: if min_depth > max_depth. |
""" |
super(ConvolutionalBoxPredictor, self).__init__( |
is_training, num_classes, freeze_batchnorm=freeze_batchnorm, |
inplace_batchnorm_update=inplace_batchnorm_update, |
name=name) |
if min_depth > max_depth: |
raise ValueError('min_depth should be less than or equal to max_depth') |
if len(box_prediction_heads) != len(class_prediction_heads): |
raise ValueError('All lists of heads must be the same length.') |
for other_head_list in other_heads.values(): |
if len(box_prediction_heads) != len(other_head_list): |
raise ValueError('All lists of heads must be the same length.') |
self._prediction_heads = { |
BOX_ENCODINGS: box_prediction_heads, |
CLASS_PREDICTIONS_WITH_BACKGROUND: class_prediction_heads, |
} |
if other_heads: |
self._prediction_heads.update(other_heads) |
self._sorted_head_names = sorted(self._prediction_heads.keys()) |
self._conv_hyperparams = conv_hyperparams |
self._min_depth = min_depth |
self._max_depth = max_depth |
self._num_layers_before_predictor = num_layers_before_predictor |
self._shared_nets = [] |
def build(self, input_shapes): |
"""Creates the variables of the layer.""" |
if len(input_shapes) != len(self._prediction_heads[BOX_ENCODINGS]): |
raise ValueError('This box predictor was constructed with %d heads,' |
'but there are %d inputs.' % |
(len(self._prediction_heads[BOX_ENCODINGS]), |
len(input_shapes))) |
for stack_index, input_shape in enumerate(input_shapes): |
net = [] |
features_depth = static_shape.get_depth(input_shape) |
depth = max(min(features_depth, self._max_depth), self._min_depth) |
tf.logging.info( |
'depth of additional conv before box predictor: {}'.format(depth)) |
if depth > 0 and self._num_layers_before_predictor > 0: |
for i in range(self._num_layers_before_predictor): |
net.append(keras.Conv2D(depth, [1, 1], |
name='SharedConvolutions_%d/Conv2d_%d_1x1_%d' |
% (stack_index, i, depth), |
padding='SAME', |
**self._conv_hyperparams.params())) |
net.append(self._conv_hyperparams.build_batch_norm( |
training=(self._is_training and not self._freeze_batchnorm), |
name='SharedConvolutions_%d/Conv2d_%d_1x1_%d_norm' |
% (stack_index, i, depth))) |
net.append(self._conv_hyperparams.build_activation_layer( |
name='SharedConvolutions_%d/Conv2d_%d_1x1_%d_activation' |
% (stack_index, i, depth), |
)) |
self._shared_nets.append(net) |
self.built = True |
def _predict(self, image_features): |
"""Computes encoded object locations and corresponding confidences. |
Args: |
image_features: A list of float tensors of shape [batch_size, height_i, |
width_i, channels_i] containing features for a batch of images. |
Returns: |
box_encodings: A list of float tensors of shape |
[batch_size, num_anchors_i, q, code_size] representing the location of |
the objects, where q is 1 or the number of classes. Each entry in the |
list corresponds to a feature map in the input `image_features` list. |
class_predictions_with_background: A list of float tensors of shape |
[batch_size, num_anchors_i, num_classes + 1] representing the class |
predictions for the proposals. Each entry in the list corresponds to a |
feature map in the input `image_features` list. |
""" |
predictions = collections.defaultdict(list) |
for (index, net) in enumerate(image_features): |
for layer in self._shared_nets[index]: |
net = layer(net) |
for head_name in self._sorted_head_names: |
head_obj = self._prediction_heads[head_name][index] |
prediction = head_obj(net) |
predictions[head_name].append(prediction) |
return predictions |