|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Keras Mask Heads. |
|
|
|
Contains Mask prediction head classes for different meta architectures. |
|
All the mask prediction heads have a predict function that receives the |
|
`features` as the first argument and returns `mask_predictions`. |
|
""" |
|
import tensorflow as tf |
|
|
|
from object_detection.predictors.heads import head |
|
|
|
|
|
class ConvolutionalMaskHead(head.KerasHead): |
|
"""Convolutional class prediction head.""" |
|
|
|
def __init__(self, |
|
is_training, |
|
num_classes, |
|
use_dropout, |
|
dropout_keep_prob, |
|
kernel_size, |
|
num_predictions_per_location, |
|
conv_hyperparams, |
|
freeze_batchnorm, |
|
use_depthwise=False, |
|
mask_height=7, |
|
mask_width=7, |
|
masks_are_class_agnostic=False, |
|
name=None): |
|
"""Constructor. |
|
|
|
Args: |
|
is_training: Indicates whether the BoxPredictor is in training mode. |
|
num_classes: Number of classes. |
|
use_dropout: Option to use dropout or not. Note that a single dropout |
|
op is applied here prior to both box and class predictions, which stands |
|
in contrast to the ConvolutionalBoxPredictor below. |
|
dropout_keep_prob: Keep probability for dropout. |
|
This is only used if use_dropout is True. |
|
kernel_size: Size of final convolution kernel. If the |
|
spatial resolution of the feature map is smaller than the kernel size, |
|
then the kernel size is automatically set to be |
|
min(feature_width, feature_height). |
|
num_predictions_per_location: Number of box predictions to be made per |
|
spatial location. Int specifying number of boxes per location. |
|
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object |
|
containing hyperparameters for convolution ops. |
|
freeze_batchnorm: Bool. Whether to freeze batch norm parameters during |
|
training or not. When training with a small batch size (e.g. 1), it is |
|
desirable to freeze batch norm update and use pretrained batch norm |
|
params. |
|
use_depthwise: Whether to use depthwise convolutions for prediction |
|
steps. Default is False. |
|
mask_height: Desired output mask height. The default value is 7. |
|
mask_width: Desired output mask width. The default value is 7. |
|
masks_are_class_agnostic: Boolean determining if the mask-head is |
|
class-agnostic or not. |
|
name: A string name scope to assign to the model. If `None`, Keras |
|
will auto-generate one from the class name. |
|
|
|
Raises: |
|
ValueError: if min_depth > max_depth. |
|
""" |
|
super(ConvolutionalMaskHead, self).__init__(name=name) |
|
self._is_training = is_training |
|
self._num_classes = num_classes |
|
self._use_dropout = use_dropout |
|
self._dropout_keep_prob = dropout_keep_prob |
|
self._kernel_size = kernel_size |
|
self._num_predictions_per_location = num_predictions_per_location |
|
self._use_depthwise = use_depthwise |
|
self._mask_height = mask_height |
|
self._mask_width = mask_width |
|
self._masks_are_class_agnostic = masks_are_class_agnostic |
|
|
|
self._mask_predictor_layers = [] |
|
|
|
|
|
if self._masks_are_class_agnostic: |
|
self._num_masks = 1 |
|
else: |
|
self._num_masks = self._num_classes |
|
|
|
num_mask_channels = self._num_masks * self._mask_height * self._mask_width |
|
|
|
if self._use_dropout: |
|
self._mask_predictor_layers.append( |
|
|
|
|
|
|
|
tf.keras.layers.Dropout(rate=1.0 - self._dropout_keep_prob)) |
|
if self._use_depthwise: |
|
self._mask_predictor_layers.append( |
|
tf.keras.layers.DepthwiseConv2D( |
|
[self._kernel_size, self._kernel_size], |
|
padding='SAME', |
|
depth_multiplier=1, |
|
strides=1, |
|
dilation_rate=1, |
|
name='MaskPredictor_depthwise', |
|
**conv_hyperparams.params())) |
|
self._mask_predictor_layers.append( |
|
conv_hyperparams.build_batch_norm( |
|
training=(is_training and not freeze_batchnorm), |
|
name='MaskPredictor_depthwise_batchnorm')) |
|
self._mask_predictor_layers.append( |
|
conv_hyperparams.build_activation_layer( |
|
name='MaskPredictor_depthwise_activation')) |
|
self._mask_predictor_layers.append( |
|
tf.keras.layers.Conv2D( |
|
num_predictions_per_location * num_mask_channels, [1, 1], |
|
name='MaskPredictor', |
|
**conv_hyperparams.params(use_bias=True))) |
|
else: |
|
self._mask_predictor_layers.append( |
|
tf.keras.layers.Conv2D( |
|
num_predictions_per_location * num_mask_channels, |
|
[self._kernel_size, self._kernel_size], |
|
padding='SAME', |
|
name='MaskPredictor', |
|
**conv_hyperparams.params(use_bias=True))) |
|
|
|
def _predict(self, features): |
|
"""Predicts boxes. |
|
|
|
Args: |
|
features: A float tensor of shape [batch_size, height, width, channels] |
|
containing image features. |
|
|
|
Returns: |
|
mask_predictions: A float tensors of shape |
|
[batch_size, num_anchors, num_masks, mask_height, mask_width] |
|
representing the mask predictions for the proposals. |
|
""" |
|
mask_predictions = features |
|
for layer in self._mask_predictor_layers: |
|
mask_predictions = layer(mask_predictions) |
|
batch_size = features.get_shape().as_list()[0] |
|
if batch_size is None: |
|
batch_size = tf.shape(features)[0] |
|
mask_predictions = tf.reshape( |
|
mask_predictions, |
|
[batch_size, -1, self._num_masks, self._mask_height, self._mask_width]) |
|
return mask_predictions |
|
|