Spaces:

pat229988
/

DR-App

Sleeping

App Files Files Community

DR-App / object_detection /core /box_predictor.py

pat229988

Upload 653 files

9a393e2 over 1 year ago

raw

history blame contribute delete

No virus

10.2 kB

	# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ==============================================================================

	"""Box predictor for object detectors.

	Box predictors are classes that take a high level
	image feature map as input and produce two predictions,
	(1) a tensor encoding box locations, and
	(2) a tensor encoding classes for each box.

	These components are passed directly to loss functions
	in our detection models.

	These modules are separated from the main model since the same
	few box predictor architectures are shared across many models.
	"""
	from abc import abstractmethod
	import tensorflow as tf

	BOX_ENCODINGS = 'box_encodings'
	CLASS_PREDICTIONS_WITH_BACKGROUND = 'class_predictions_with_background'
	MASK_PREDICTIONS = 'mask_predictions'


	class BoxPredictor(object):
	"""BoxPredictor."""

	def __init__(self, is_training, num_classes):
	"""Constructor.

	Args:
	is_training: Indicates whether the BoxPredictor is in training mode.
	num_classes: number of classes. Note that num_classes does not
	include the background category, so if groundtruth labels take values
	in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
	assigned classification targets can range from {0,... K}).
	"""
	self._is_training = is_training
	self._num_classes = num_classes

	@property
	def is_keras_model(self):
	return False

	@property
	def num_classes(self):
	return self._num_classes

	def predict(self, image_features, num_predictions_per_location,
	scope=None, **params):
	"""Computes encoded object locations and corresponding confidences.

	Takes a list of high level image feature maps as input and produces a list
	of box encodings and a list of class scores where each element in the output
	lists correspond to the feature maps in the input list.

	Args:
	image_features: A list of float tensors of shape [batch_size, height_i,
	width_i, channels_i] containing features for a batch of images.
	num_predictions_per_location: A list of integers representing the number
	of box predictions to be made per spatial location for each feature map.
	scope: Variable and Op scope name.
	**params: Additional keyword arguments for specific implementations of
	BoxPredictor.

	Returns:
	A dictionary containing at least the following tensors.
	box_encodings: A list of float tensors. Each entry in the list
	corresponds to a feature map in the input `image_features` list. All
	tensors in the list have one of the two following shapes:
	a. [batch_size, num_anchors_i, q, code_size] representing the location
	of the objects, where q is 1 or the number of classes.
	b. [batch_size, num_anchors_i, code_size].
	class_predictions_with_background: A list of float tensors of shape
	[batch_size, num_anchors_i, num_classes + 1] representing the class
	predictions for the proposals. Each entry in the list corresponds to a
	feature map in the input `image_features` list.

	Raises:
	ValueError: If length of `image_features` is not equal to length of
	`num_predictions_per_location`.
	"""
	if len(image_features) != len(num_predictions_per_location):
	raise ValueError('image_feature and num_predictions_per_location must '
	'be of same length, found: {} vs {}'.
	format(len(image_features),
	len(num_predictions_per_location)))
	if scope is not None:
	with tf.variable_scope(scope):
	return self._predict(image_features, num_predictions_per_location,
	**params)
	return self._predict(image_features, num_predictions_per_location,
	**params)

	# TODO(rathodv): num_predictions_per_location could be moved to constructor.
	# This is currently only used by ConvolutionalBoxPredictor.
	@abstractmethod
	def _predict(self, image_features, num_predictions_per_location, **params):
	"""Implementations must override this method.

	Args:
	image_features: A list of float tensors of shape [batch_size, height_i,
	width_i, channels_i] containing features for a batch of images.
	num_predictions_per_location: A list of integers representing the number
	of box predictions to be made per spatial location for each feature map.
	**params: Additional keyword arguments for specific implementations of
	BoxPredictor.

	Returns:
	A dictionary containing at least the following tensors.
	box_encodings: A list of float tensors. Each entry in the list
	corresponds to a feature map in the input `image_features` list. All
	tensors in the list have one of the two following shapes:
	a. [batch_size, num_anchors_i, q, code_size] representing the location
	of the objects, where q is 1 or the number of classes.
	b. [batch_size, num_anchors_i, code_size].
	class_predictions_with_background: A list of float tensors of shape
	[batch_size, num_anchors_i, num_classes + 1] representing the class
	predictions for the proposals. Each entry in the list corresponds to a
	feature map in the input `image_features` list.
	"""
	pass


	class KerasBoxPredictor(tf.keras.Model):
	"""Keras-based BoxPredictor."""

	def __init__(self, is_training, num_classes, freeze_batchnorm,
	inplace_batchnorm_update, name=None):
	"""Constructor.

	Args:
	is_training: Indicates whether the BoxPredictor is in training mode.
	num_classes: number of classes. Note that num_classes does not
	include the background category, so if groundtruth labels take values
	in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
	assigned classification targets can range from {0,... K}).
	freeze_batchnorm: Whether to freeze batch norm parameters during
	training or not. When training with a small batch size (e.g. 1), it is
	desirable to freeze batch norm update and use pretrained batch norm
	params.
	inplace_batchnorm_update: Whether to update batch norm moving average
	values inplace. When this is false train op must add a control
	dependency on tf.graphkeys.UPDATE_OPS collection in order to update
	batch norm statistics.
	name: A string name scope to assign to the model. If `None`, Keras
	will auto-generate one from the class name.
	"""
	super(KerasBoxPredictor, self).__init__(name=name)

	self._is_training = is_training
	self._num_classes = num_classes
	self._freeze_batchnorm = freeze_batchnorm
	self._inplace_batchnorm_update = inplace_batchnorm_update

	@property
	def is_keras_model(self):
	return True

	@property
	def num_classes(self):
	return self._num_classes

	def call(self, image_features, **kwargs):
	"""Computes encoded object locations and corresponding confidences.

	Takes a list of high level image feature maps as input and produces a list
	of box encodings and a list of class scores where each element in the output
	lists correspond to the feature maps in the input list.

	Args:
	image_features: A list of float tensors of shape [batch_size, height_i,
	width_i, channels_i] containing features for a batch of images.
	**kwargs: Additional keyword arguments for specific implementations of
	BoxPredictor.

	Returns:
	A dictionary containing at least the following tensors.
	box_encodings: A list of float tensors. Each entry in the list
	corresponds to a feature map in the input `image_features` list. All
	tensors in the list have one of the two following shapes:
	a. [batch_size, num_anchors_i, q, code_size] representing the location
	of the objects, where q is 1 or the number of classes.
	b. [batch_size, num_anchors_i, code_size].
	class_predictions_with_background: A list of float tensors of shape
	[batch_size, num_anchors_i, num_classes + 1] representing the class
	predictions for the proposals. Each entry in the list corresponds to a
	feature map in the input `image_features` list.
	"""
	return self._predict(image_features, **kwargs)

	@abstractmethod
	def _predict(self, image_features, **kwargs):
	"""Implementations must override this method.

	Args:
	image_features: A list of float tensors of shape [batch_size, height_i,
	width_i, channels_i] containing features for a batch of images.
	**kwargs: Additional keyword arguments for specific implementations of
	BoxPredictor.

	Returns:
	A dictionary containing at least the following tensors.
	box_encodings: A list of float tensors. Each entry in the list
	corresponds to a feature map in the input `image_features` list. All
	tensors in the list have one of the two following shapes:
	a. [batch_size, num_anchors_i, q, code_size] representing the location
	of the objects, where q is 1 or the number of classes.
	b. [batch_size, num_anchors_i, code_size].
	class_predictions_with_background: A list of float tensors of shape
	[batch_size, num_anchors_i, num_classes + 1] representing the class
	predictions for the proposals. Each entry in the list corresponds to a
	feature map in the input `image_features` list.
	"""
	raise NotImplementedError