Spaces:

pat229988
/

DR-App

Sleeping

App Files Files Community

DR-App / object_detection /box_coders /faster_rcnn_box_coder.py

pat229988

Upload 653 files

9a393e2 almost 2 years ago

raw

history blame contribute delete

No virus

3.84 kB

	# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ==============================================================================

	"""Faster RCNN box coder.

	Faster RCNN box coder follows the coding schema described below:
	ty = (y - ya) / ha
	tx = (x - xa) / wa
	th = log(h / ha)
	tw = log(w / wa)
	where x, y, w, h denote the box's center coordinates, width and height
	respectively. Similarly, xa, ya, wa, ha denote the anchor's center
	coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
	center, width and height respectively.

	See http://arxiv.org/abs/1506.01497 for details.
	"""

	import tensorflow as tf

	from object_detection.core import box_coder
	from object_detection.core import box_list

	EPSILON = 1e-8


	class FasterRcnnBoxCoder(box_coder.BoxCoder):
	"""Faster RCNN box coder."""

	def __init__(self, scale_factors=None):
	"""Constructor for FasterRcnnBoxCoder.

	Args:
	scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
	If set to None, does not perform scaling. For Faster RCNN,
	the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0].
	"""
	if scale_factors:
	assert len(scale_factors) == 4
	for scalar in scale_factors:
	assert scalar > 0
	self._scale_factors = scale_factors

	@property
	def code_size(self):
	return 4

	def _encode(self, boxes, anchors):
	"""Encode a box collection with respect to anchor collection.

	Args:
	boxes: BoxList holding N boxes to be encoded.
	anchors: BoxList of anchors.

	Returns:
	a tensor representing N anchor-encoded boxes of the format
	[ty, tx, th, tw].
	"""
	# Convert anchors to the center coordinate representation.
	ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
	ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
	# Avoid NaN in division and log below.
	ha += EPSILON
	wa += EPSILON
	h += EPSILON
	w += EPSILON

	tx = (xcenter - xcenter_a) / wa
	ty = (ycenter - ycenter_a) / ha
	tw = tf.log(w / wa)
	th = tf.log(h / ha)
	# Scales location targets as used in paper for joint training.
	if self._scale_factors:
	ty *= self._scale_factors[0]
	tx *= self._scale_factors[1]
	th *= self._scale_factors[2]
	tw *= self._scale_factors[3]
	return tf.transpose(tf.stack([ty, tx, th, tw]))

	def _decode(self, rel_codes, anchors):
	"""Decode relative codes to boxes.

	Args:
	rel_codes: a tensor representing N anchor-encoded boxes.
	anchors: BoxList of anchors.

	Returns:
	boxes: BoxList holding N bounding boxes.
	"""
	ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()

	ty, tx, th, tw = tf.unstack(tf.transpose(rel_codes))
	if self._scale_factors:
	ty /= self._scale_factors[0]
	tx /= self._scale_factors[1]
	th /= self._scale_factors[2]
	tw /= self._scale_factors[3]
	w = tf.exp(tw) * wa
	h = tf.exp(th) * ha
	ycenter = ty * ha + ycenter_a
	xcenter = tx * wa + xcenter_a
	ymin = ycenter - h / 2.
	xmin = xcenter - w / 2.
	ymax = ycenter + h / 2.
	xmax = xcenter + w / 2.
	return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))