pat229988's picture
Upload 653 files
9a393e2
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Keras Mask Heads.
Contains Mask prediction head classes for different meta architectures.
All the mask prediction heads have a predict function that receives the
`features` as the first argument and returns `mask_predictions`.
"""
import tensorflow as tf
from object_detection.predictors.heads import head
class ConvolutionalMaskHead(head.KerasHead):
"""Convolutional class prediction head."""
def __init__(self,
is_training,
num_classes,
use_dropout,
dropout_keep_prob,
kernel_size,
num_predictions_per_location,
conv_hyperparams,
freeze_batchnorm,
use_depthwise=False,
mask_height=7,
mask_width=7,
masks_are_class_agnostic=False,
name=None):
"""Constructor.
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: Number of classes.
use_dropout: Option to use dropout or not. Note that a single dropout
op is applied here prior to both box and class predictions, which stands
in contrast to the ConvolutionalBoxPredictor below.
dropout_keep_prob: Keep probability for dropout.
This is only used if use_dropout is True.
kernel_size: Size of final convolution kernel. If the
spatial resolution of the feature map is smaller than the kernel size,
then the kernel size is automatically set to be
min(feature_width, feature_height).
num_predictions_per_location: Number of box predictions to be made per
spatial location. Int specifying number of boxes per location.
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
containing hyperparameters for convolution ops.
freeze_batchnorm: Bool. Whether to freeze batch norm parameters during
training or not. When training with a small batch size (e.g. 1), it is
desirable to freeze batch norm update and use pretrained batch norm
params.
use_depthwise: Whether to use depthwise convolutions for prediction
steps. Default is False.
mask_height: Desired output mask height. The default value is 7.
mask_width: Desired output mask width. The default value is 7.
masks_are_class_agnostic: Boolean determining if the mask-head is
class-agnostic or not.
name: A string name scope to assign to the model. If `None`, Keras
will auto-generate one from the class name.
Raises:
ValueError: if min_depth > max_depth.
"""
super(ConvolutionalMaskHead, self).__init__(name=name)
self._is_training = is_training
self._num_classes = num_classes
self._use_dropout = use_dropout
self._dropout_keep_prob = dropout_keep_prob
self._kernel_size = kernel_size
self._num_predictions_per_location = num_predictions_per_location
self._use_depthwise = use_depthwise
self._mask_height = mask_height
self._mask_width = mask_width
self._masks_are_class_agnostic = masks_are_class_agnostic
self._mask_predictor_layers = []
# Add a slot for the background class.
if self._masks_are_class_agnostic:
self._num_masks = 1
else:
self._num_masks = self._num_classes
num_mask_channels = self._num_masks * self._mask_height * self._mask_width
if self._use_dropout:
self._mask_predictor_layers.append(
# The Dropout layer's `training` parameter for the call method must
# be set implicitly by the Keras set_learning_phase. The object
# detection training code takes care of this.
tf.keras.layers.Dropout(rate=1.0 - self._dropout_keep_prob))
if self._use_depthwise:
self._mask_predictor_layers.append(
tf.keras.layers.DepthwiseConv2D(
[self._kernel_size, self._kernel_size],
padding='SAME',
depth_multiplier=1,
strides=1,
dilation_rate=1,
name='MaskPredictor_depthwise',
**conv_hyperparams.params()))
self._mask_predictor_layers.append(
conv_hyperparams.build_batch_norm(
training=(is_training and not freeze_batchnorm),
name='MaskPredictor_depthwise_batchnorm'))
self._mask_predictor_layers.append(
conv_hyperparams.build_activation_layer(
name='MaskPredictor_depthwise_activation'))
self._mask_predictor_layers.append(
tf.keras.layers.Conv2D(
num_predictions_per_location * num_mask_channels, [1, 1],
name='MaskPredictor',
**conv_hyperparams.params(use_bias=True)))
else:
self._mask_predictor_layers.append(
tf.keras.layers.Conv2D(
num_predictions_per_location * num_mask_channels,
[self._kernel_size, self._kernel_size],
padding='SAME',
name='MaskPredictor',
**conv_hyperparams.params(use_bias=True)))
def _predict(self, features):
"""Predicts boxes.
Args:
features: A float tensor of shape [batch_size, height, width, channels]
containing image features.
Returns:
mask_predictions: A float tensors of shape
[batch_size, num_anchors, num_masks, mask_height, mask_width]
representing the mask predictions for the proposals.
"""
mask_predictions = features
for layer in self._mask_predictor_layers:
mask_predictions = layer(mask_predictions)
batch_size = features.get_shape().as_list()[0]
if batch_size is None:
batch_size = tf.shape(features)[0]
mask_predictions = tf.reshape(
mask_predictions,
[batch_size, -1, self._num_masks, self._mask_height, self._mask_width])
return mask_predictions