DR-App / object_detection /core /box_predictor.py
pat229988's picture
Upload 653 files
9a393e2
raw
history blame contribute delete
No virus
10.2 kB
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Box predictor for object detectors.
Box predictors are classes that take a high level
image feature map as input and produce two predictions,
(1) a tensor encoding box locations, and
(2) a tensor encoding classes for each box.
These components are passed directly to loss functions
in our detection models.
These modules are separated from the main model since the same
few box predictor architectures are shared across many models.
"""
from abc import abstractmethod
import tensorflow as tf
BOX_ENCODINGS = 'box_encodings'
CLASS_PREDICTIONS_WITH_BACKGROUND = 'class_predictions_with_background'
MASK_PREDICTIONS = 'mask_predictions'
class BoxPredictor(object):
"""BoxPredictor."""
def __init__(self, is_training, num_classes):
"""Constructor.
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: number of classes. Note that num_classes *does not*
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
"""
self._is_training = is_training
self._num_classes = num_classes
@property
def is_keras_model(self):
return False
@property
def num_classes(self):
return self._num_classes
def predict(self, image_features, num_predictions_per_location,
scope=None, **params):
"""Computes encoded object locations and corresponding confidences.
Takes a list of high level image feature maps as input and produces a list
of box encodings and a list of class scores where each element in the output
lists correspond to the feature maps in the input list.
Args:
image_features: A list of float tensors of shape [batch_size, height_i,
width_i, channels_i] containing features for a batch of images.
num_predictions_per_location: A list of integers representing the number
of box predictions to be made per spatial location for each feature map.
scope: Variable and Op scope name.
**params: Additional keyword arguments for specific implementations of
BoxPredictor.
Returns:
A dictionary containing at least the following tensors.
box_encodings: A list of float tensors. Each entry in the list
corresponds to a feature map in the input `image_features` list. All
tensors in the list have one of the two following shapes:
a. [batch_size, num_anchors_i, q, code_size] representing the location
of the objects, where q is 1 or the number of classes.
b. [batch_size, num_anchors_i, code_size].
class_predictions_with_background: A list of float tensors of shape
[batch_size, num_anchors_i, num_classes + 1] representing the class
predictions for the proposals. Each entry in the list corresponds to a
feature map in the input `image_features` list.
Raises:
ValueError: If length of `image_features` is not equal to length of
`num_predictions_per_location`.
"""
if len(image_features) != len(num_predictions_per_location):
raise ValueError('image_feature and num_predictions_per_location must '
'be of same length, found: {} vs {}'.
format(len(image_features),
len(num_predictions_per_location)))
if scope is not None:
with tf.variable_scope(scope):
return self._predict(image_features, num_predictions_per_location,
**params)
return self._predict(image_features, num_predictions_per_location,
**params)
# TODO(rathodv): num_predictions_per_location could be moved to constructor.
# This is currently only used by ConvolutionalBoxPredictor.
@abstractmethod
def _predict(self, image_features, num_predictions_per_location, **params):
"""Implementations must override this method.
Args:
image_features: A list of float tensors of shape [batch_size, height_i,
width_i, channels_i] containing features for a batch of images.
num_predictions_per_location: A list of integers representing the number
of box predictions to be made per spatial location for each feature map.
**params: Additional keyword arguments for specific implementations of
BoxPredictor.
Returns:
A dictionary containing at least the following tensors.
box_encodings: A list of float tensors. Each entry in the list
corresponds to a feature map in the input `image_features` list. All
tensors in the list have one of the two following shapes:
a. [batch_size, num_anchors_i, q, code_size] representing the location
of the objects, where q is 1 or the number of classes.
b. [batch_size, num_anchors_i, code_size].
class_predictions_with_background: A list of float tensors of shape
[batch_size, num_anchors_i, num_classes + 1] representing the class
predictions for the proposals. Each entry in the list corresponds to a
feature map in the input `image_features` list.
"""
pass
class KerasBoxPredictor(tf.keras.Model):
"""Keras-based BoxPredictor."""
def __init__(self, is_training, num_classes, freeze_batchnorm,
inplace_batchnorm_update, name=None):
"""Constructor.
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: number of classes. Note that num_classes *does not*
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
freeze_batchnorm: Whether to freeze batch norm parameters during
training or not. When training with a small batch size (e.g. 1), it is
desirable to freeze batch norm update and use pretrained batch norm
params.
inplace_batchnorm_update: Whether to update batch norm moving average
values inplace. When this is false train op must add a control
dependency on tf.graphkeys.UPDATE_OPS collection in order to update
batch norm statistics.
name: A string name scope to assign to the model. If `None`, Keras
will auto-generate one from the class name.
"""
super(KerasBoxPredictor, self).__init__(name=name)
self._is_training = is_training
self._num_classes = num_classes
self._freeze_batchnorm = freeze_batchnorm
self._inplace_batchnorm_update = inplace_batchnorm_update
@property
def is_keras_model(self):
return True
@property
def num_classes(self):
return self._num_classes
def call(self, image_features, **kwargs):
"""Computes encoded object locations and corresponding confidences.
Takes a list of high level image feature maps as input and produces a list
of box encodings and a list of class scores where each element in the output
lists correspond to the feature maps in the input list.
Args:
image_features: A list of float tensors of shape [batch_size, height_i,
width_i, channels_i] containing features for a batch of images.
**kwargs: Additional keyword arguments for specific implementations of
BoxPredictor.
Returns:
A dictionary containing at least the following tensors.
box_encodings: A list of float tensors. Each entry in the list
corresponds to a feature map in the input `image_features` list. All
tensors in the list have one of the two following shapes:
a. [batch_size, num_anchors_i, q, code_size] representing the location
of the objects, where q is 1 or the number of classes.
b. [batch_size, num_anchors_i, code_size].
class_predictions_with_background: A list of float tensors of shape
[batch_size, num_anchors_i, num_classes + 1] representing the class
predictions for the proposals. Each entry in the list corresponds to a
feature map in the input `image_features` list.
"""
return self._predict(image_features, **kwargs)
@abstractmethod
def _predict(self, image_features, **kwargs):
"""Implementations must override this method.
Args:
image_features: A list of float tensors of shape [batch_size, height_i,
width_i, channels_i] containing features for a batch of images.
**kwargs: Additional keyword arguments for specific implementations of
BoxPredictor.
Returns:
A dictionary containing at least the following tensors.
box_encodings: A list of float tensors. Each entry in the list
corresponds to a feature map in the input `image_features` list. All
tensors in the list have one of the two following shapes:
a. [batch_size, num_anchors_i, q, code_size] representing the location
of the objects, where q is 1 or the number of classes.
b. [batch_size, num_anchors_i, code_size].
class_predictions_with_background: A list of float tensors of shape
[batch_size, num_anchors_i, num_classes + 1] representing the class
predictions for the proposals. Each entry in the list corresponds to a
feature map in the input `image_features` list.
"""
raise NotImplementedError