Spaces:

pat229988
/

DR-App

Sleeping

App Files Files Community

DR-App / object_detection /core /keypoint_ops.py

pat229988

Upload 653 files

9a393e2 almost 2 years ago

raw

history blame

No virus

11.2 kB

	# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ==============================================================================

	"""Keypoint operations.

	Keypoints are represented as tensors of shape [num_instances, num_keypoints, 2],
	where the last dimension holds rank 2 tensors of the form [y, x] representing
	the coordinates of the keypoint.
	"""
	import numpy as np
	import tensorflow as tf


	def scale(keypoints, y_scale, x_scale, scope=None):
	"""Scales keypoint coordinates in x and y dimensions.

	Args:
	keypoints: a tensor of shape [num_instances, num_keypoints, 2]
	y_scale: (float) scalar tensor
	x_scale: (float) scalar tensor
	scope: name scope.

	Returns:
	new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
	"""
	with tf.name_scope(scope, 'Scale'):
	y_scale = tf.cast(y_scale, tf.float32)
	x_scale = tf.cast(x_scale, tf.float32)
	new_keypoints = keypoints * [[[y_scale, x_scale]]]
	return new_keypoints


	def clip_to_window(keypoints, window, scope=None):
	"""Clips keypoints to a window.

	This op clips any input keypoints to a window.

	Args:
	keypoints: a tensor of shape [num_instances, num_keypoints, 2]
	window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
	window to which the op should clip the keypoints.
	scope: name scope.

	Returns:
	new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
	"""
	with tf.name_scope(scope, 'ClipToWindow'):
	y, x = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
	win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
	y = tf.maximum(tf.minimum(y, win_y_max), win_y_min)
	x = tf.maximum(tf.minimum(x, win_x_max), win_x_min)
	new_keypoints = tf.concat([y, x], 2)
	return new_keypoints


	def prune_outside_window(keypoints, window, scope=None):
	"""Prunes keypoints that fall outside a given window.

	This function replaces keypoints that fall outside the given window with nan.
	See also clip_to_window which clips any keypoints that fall outside the given
	window.

	Args:
	keypoints: a tensor of shape [num_instances, num_keypoints, 2]
	window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
	window outside of which the op should prune the keypoints.
	scope: name scope.

	Returns:
	new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
	"""
	with tf.name_scope(scope, 'PruneOutsideWindow'):
	y, x = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
	win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)

	valid_indices = tf.logical_and(
	tf.logical_and(y >= win_y_min, y <= win_y_max),
	tf.logical_and(x >= win_x_min, x <= win_x_max))

	new_y = tf.where(valid_indices, y, np.nan * tf.ones_like(y))
	new_x = tf.where(valid_indices, x, np.nan * tf.ones_like(x))
	new_keypoints = tf.concat([new_y, new_x], 2)

	return new_keypoints


	def change_coordinate_frame(keypoints, window, scope=None):
	"""Changes coordinate frame of the keypoints to be relative to window's frame.

	Given a window of the form [y_min, x_min, y_max, x_max], changes keypoint
	coordinates from keypoints of shape [num_instances, num_keypoints, 2]
	to be relative to this window.

	An example use case is data augmentation: where we are given groundtruth
	keypoints and would like to randomly crop the image to some window. In this
	case we need to change the coordinate frame of each groundtruth keypoint to be
	relative to this new window.

	Args:
	keypoints: a tensor of shape [num_instances, num_keypoints, 2]
	window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
	window we should change the coordinate frame to.
	scope: name scope.

	Returns:
	new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
	"""
	with tf.name_scope(scope, 'ChangeCoordinateFrame'):
	win_height = window[2] - window[0]
	win_width = window[3] - window[1]
	new_keypoints = scale(keypoints - [window[0], window[1]], 1.0 / win_height,
	1.0 / win_width)
	return new_keypoints


	def to_normalized_coordinates(keypoints, height, width,
	check_range=True, scope=None):
	"""Converts absolute keypoint coordinates to normalized coordinates in [0, 1].

	Usually one uses the dynamic shape of the image or conv-layer tensor:
	keypoints = keypoint_ops.to_normalized_coordinates(keypoints,
	tf.shape(images)[1],
	tf.shape(images)[2]),

	This function raises an assertion failed error at graph execution time when
	the maximum coordinate is smaller than 1.01 (which means that coordinates are
	already normalized). The value 1.01 is to deal with small rounding errors.

	Args:
	keypoints: A tensor of shape [num_instances, num_keypoints, 2].
	height: Maximum value for y coordinate of absolute keypoint coordinates.
	width: Maximum value for x coordinate of absolute keypoint coordinates.
	check_range: If True, checks if the coordinates are normalized.
	scope: name scope.

	Returns:
	tensor of shape [num_instances, num_keypoints, 2] with normalized
	coordinates in [0, 1].
	"""
	with tf.name_scope(scope, 'ToNormalizedCoordinates'):
	height = tf.cast(height, tf.float32)
	width = tf.cast(width, tf.float32)

	if check_range:
	max_val = tf.reduce_max(keypoints)
	max_assert = tf.Assert(tf.greater(max_val, 1.01),
	['max value is lower than 1.01: ', max_val])
	with tf.control_dependencies([max_assert]):
	width = tf.identity(width)

	return scale(keypoints, 1.0 / height, 1.0 / width)


	def to_absolute_coordinates(keypoints, height, width,
	check_range=True, scope=None):
	"""Converts normalized keypoint coordinates to absolute pixel coordinates.

	This function raises an assertion failed error when the maximum keypoint
	coordinate value is larger than 1.01 (in which case coordinates are already
	absolute).

	Args:
	keypoints: A tensor of shape [num_instances, num_keypoints, 2]
	height: Maximum value for y coordinate of absolute keypoint coordinates.
	width: Maximum value for x coordinate of absolute keypoint coordinates.
	check_range: If True, checks if the coordinates are normalized or not.
	scope: name scope.

	Returns:
	tensor of shape [num_instances, num_keypoints, 2] with absolute coordinates
	in terms of the image size.

	"""
	with tf.name_scope(scope, 'ToAbsoluteCoordinates'):
	height = tf.cast(height, tf.float32)
	width = tf.cast(width, tf.float32)

	# Ensure range of input keypoints is correct.
	if check_range:
	max_val = tf.reduce_max(keypoints)
	max_assert = tf.Assert(tf.greater_equal(1.01, max_val),
	['maximum keypoint coordinate value is larger '
	'than 1.01: ', max_val])
	with tf.control_dependencies([max_assert]):
	width = tf.identity(width)

	return scale(keypoints, height, width)


	def flip_horizontal(keypoints, flip_point, flip_permutation, scope=None):
	"""Flips the keypoints horizontally around the flip_point.

	This operation flips the x coordinate for each keypoint around the flip_point
	and also permutes the keypoints in a manner specified by flip_permutation.

	Args:
	keypoints: a tensor of shape [num_instances, num_keypoints, 2]
	flip_point: (float) scalar tensor representing the x coordinate to flip the
	keypoints around.
	flip_permutation: rank 1 int32 tensor containing the keypoint flip
	permutation. This specifies the mapping from original keypoint indices
	to the flipped keypoint indices. This is used primarily for keypoints
	that are not reflection invariant. E.g. Suppose there are 3 keypoints
	representing ['head', 'right_eye', 'left_eye'], then a logical choice for
	flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
	and 'right_eye' after a horizontal flip.
	scope: name scope.

	Returns:
	new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
	"""
	with tf.name_scope(scope, 'FlipHorizontal'):
	keypoints = tf.transpose(keypoints, [1, 0, 2])
	keypoints = tf.gather(keypoints, flip_permutation)
	v, u = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
	u = flip_point * 2.0 - u
	new_keypoints = tf.concat([v, u], 2)
	new_keypoints = tf.transpose(new_keypoints, [1, 0, 2])
	return new_keypoints


	def flip_vertical(keypoints, flip_point, flip_permutation, scope=None):
	"""Flips the keypoints vertically around the flip_point.

	This operation flips the y coordinate for each keypoint around the flip_point
	and also permutes the keypoints in a manner specified by flip_permutation.

	Args:
	keypoints: a tensor of shape [num_instances, num_keypoints, 2]
	flip_point: (float) scalar tensor representing the y coordinate to flip the
	keypoints around.
	flip_permutation: rank 1 int32 tensor containing the keypoint flip
	permutation. This specifies the mapping from original keypoint indices
	to the flipped keypoint indices. This is used primarily for keypoints
	that are not reflection invariant. E.g. Suppose there are 3 keypoints
	representing ['head', 'right_eye', 'left_eye'], then a logical choice for
	flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
	and 'right_eye' after a horizontal flip.
	scope: name scope.

	Returns:
	new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
	"""
	with tf.name_scope(scope, 'FlipVertical'):
	keypoints = tf.transpose(keypoints, [1, 0, 2])
	keypoints = tf.gather(keypoints, flip_permutation)
	v, u = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
	v = flip_point * 2.0 - v
	new_keypoints = tf.concat([v, u], 2)
	new_keypoints = tf.transpose(new_keypoints, [1, 0, 2])
	return new_keypoints


	def rot90(keypoints, scope=None):
	"""Rotates the keypoints counter-clockwise by 90 degrees.

	Args:
	keypoints: a tensor of shape [num_instances, num_keypoints, 2]
	scope: name scope.

	Returns:
	new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
	"""
	with tf.name_scope(scope, 'Rot90'):
	keypoints = tf.transpose(keypoints, [1, 0, 2])
	v, u = tf.split(value=keypoints[:, :, ::-1], num_or_size_splits=2, axis=2)
	v = 1.0 - v
	new_keypoints = tf.concat([v, u], 2)
	new_keypoints = tf.transpose(new_keypoints, [1, 0, 2])
	return new_keypoints