|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Utils used to manipulate tensor shapes.""" |
|
|
|
import tensorflow as tf |
|
|
|
from object_detection.utils import static_shape |
|
|
|
|
|
def _is_tensor(t): |
|
"""Returns a boolean indicating whether the input is a tensor. |
|
|
|
Args: |
|
t: the input to be tested. |
|
|
|
Returns: |
|
a boolean that indicates whether t is a tensor. |
|
""" |
|
return isinstance(t, (tf.Tensor, tf.SparseTensor, tf.Variable)) |
|
|
|
|
|
def _set_dim_0(t, d0): |
|
"""Sets the 0-th dimension of the input tensor. |
|
|
|
Args: |
|
t: the input tensor, assuming the rank is at least 1. |
|
d0: an integer indicating the 0-th dimension of the input tensor. |
|
|
|
Returns: |
|
the tensor t with the 0-th dimension set. |
|
""" |
|
t_shape = t.get_shape().as_list() |
|
t_shape[0] = d0 |
|
t.set_shape(t_shape) |
|
return t |
|
|
|
|
|
def pad_tensor(t, length): |
|
"""Pads the input tensor with 0s along the first dimension up to the length. |
|
|
|
Args: |
|
t: the input tensor, assuming the rank is at least 1. |
|
length: a tensor of shape [1] or an integer, indicating the first dimension |
|
of the input tensor t after padding, assuming length <= t.shape[0]. |
|
|
|
Returns: |
|
padded_t: the padded tensor, whose first dimension is length. If the length |
|
is an integer, the first dimension of padded_t is set to length |
|
statically. |
|
""" |
|
t_rank = tf.rank(t) |
|
t_shape = tf.shape(t) |
|
t_d0 = t_shape[0] |
|
pad_d0 = tf.expand_dims(length - t_d0, 0) |
|
pad_shape = tf.cond( |
|
tf.greater(t_rank, 1), lambda: tf.concat([pad_d0, t_shape[1:]], 0), |
|
lambda: tf.expand_dims(length - t_d0, 0)) |
|
padded_t = tf.concat([t, tf.zeros(pad_shape, dtype=t.dtype)], 0) |
|
if not _is_tensor(length): |
|
padded_t = _set_dim_0(padded_t, length) |
|
return padded_t |
|
|
|
|
|
def clip_tensor(t, length): |
|
"""Clips the input tensor along the first dimension up to the length. |
|
|
|
Args: |
|
t: the input tensor, assuming the rank is at least 1. |
|
length: a tensor of shape [1] or an integer, indicating the first dimension |
|
of the input tensor t after clipping, assuming length <= t.shape[0]. |
|
|
|
Returns: |
|
clipped_t: the clipped tensor, whose first dimension is length. If the |
|
length is an integer, the first dimension of clipped_t is set to length |
|
statically. |
|
""" |
|
clipped_t = tf.gather(t, tf.range(length)) |
|
if not _is_tensor(length): |
|
clipped_t = _set_dim_0(clipped_t, length) |
|
return clipped_t |
|
|
|
|
|
def pad_or_clip_tensor(t, length): |
|
"""Pad or clip the input tensor along the first dimension. |
|
|
|
Args: |
|
t: the input tensor, assuming the rank is at least 1. |
|
length: a tensor of shape [1] or an integer, indicating the first dimension |
|
of the input tensor t after processing. |
|
|
|
Returns: |
|
processed_t: the processed tensor, whose first dimension is length. If the |
|
length is an integer, the first dimension of the processed tensor is set |
|
to length statically. |
|
""" |
|
return pad_or_clip_nd(t, [length] + t.shape.as_list()[1:]) |
|
|
|
|
|
def pad_or_clip_nd(tensor, output_shape): |
|
"""Pad or Clip given tensor to the output shape. |
|
|
|
Args: |
|
tensor: Input tensor to pad or clip. |
|
output_shape: A list of integers / scalar tensors (or None for dynamic dim) |
|
representing the size to pad or clip each dimension of the input tensor. |
|
|
|
Returns: |
|
Input tensor padded and clipped to the output shape. |
|
""" |
|
tensor_shape = tf.shape(tensor) |
|
clip_size = [ |
|
tf.where(tensor_shape[i] - shape > 0, shape, -1) |
|
if shape is not None else -1 for i, shape in enumerate(output_shape) |
|
] |
|
clipped_tensor = tf.slice( |
|
tensor, |
|
begin=tf.zeros(len(clip_size), dtype=tf.int32), |
|
size=clip_size) |
|
|
|
|
|
|
|
clipped_tensor_shape = tf.shape(clipped_tensor) |
|
trailing_paddings = [ |
|
shape - clipped_tensor_shape[i] if shape is not None else 0 |
|
for i, shape in enumerate(output_shape) |
|
] |
|
paddings = tf.stack( |
|
[ |
|
tf.zeros(len(trailing_paddings), dtype=tf.int32), |
|
trailing_paddings |
|
], |
|
axis=1) |
|
padded_tensor = tf.pad(clipped_tensor, paddings=paddings) |
|
output_static_shape = [ |
|
dim if not isinstance(dim, tf.Tensor) else None for dim in output_shape |
|
] |
|
padded_tensor.set_shape(output_static_shape) |
|
return padded_tensor |
|
|
|
|
|
def combined_static_and_dynamic_shape(tensor): |
|
"""Returns a list containing static and dynamic values for the dimensions. |
|
|
|
Returns a list of static and dynamic values for shape dimensions. This is |
|
useful to preserve static shapes when available in reshape operation. |
|
|
|
Args: |
|
tensor: A tensor of any type. |
|
|
|
Returns: |
|
A list of size tensor.shape.ndims containing integers or a scalar tensor. |
|
""" |
|
static_tensor_shape = tensor.shape.as_list() |
|
dynamic_tensor_shape = tf.shape(tensor) |
|
combined_shape = [] |
|
for index, dim in enumerate(static_tensor_shape): |
|
if dim is not None: |
|
combined_shape.append(dim) |
|
else: |
|
combined_shape.append(dynamic_tensor_shape[index]) |
|
return combined_shape |
|
|
|
|
|
def static_or_dynamic_map_fn(fn, elems, dtype=None, |
|
parallel_iterations=32, back_prop=True): |
|
"""Runs map_fn as a (static) for loop when possible. |
|
|
|
This function rewrites the map_fn as an explicit unstack input -> for loop |
|
over function calls -> stack result combination. This allows our graphs to |
|
be acyclic when the batch size is static. |
|
For comparison, see https://www.tensorflow.org/api_docs/python/tf/map_fn. |
|
|
|
Note that `static_or_dynamic_map_fn` currently is not *fully* interchangeable |
|
with the default tf.map_fn function as it does not accept nested inputs (only |
|
Tensors or lists of Tensors). Likewise, the output of `fn` can only be a |
|
Tensor or list of Tensors. |
|
|
|
TODO(jonathanhuang): make this function fully interchangeable with tf.map_fn. |
|
|
|
Args: |
|
fn: The callable to be performed. It accepts one argument, which will have |
|
the same structure as elems. Its output must have the |
|
same structure as elems. |
|
elems: A tensor or list of tensors, each of which will |
|
be unpacked along their first dimension. The sequence of the |
|
resulting slices will be applied to fn. |
|
dtype: (optional) The output type(s) of fn. If fn returns a structure of |
|
Tensors differing from the structure of elems, then dtype is not optional |
|
and must have the same structure as the output of fn. |
|
parallel_iterations: (optional) number of batch items to process in |
|
parallel. This flag is only used if the native tf.map_fn is used |
|
and defaults to 32 instead of 10 (unlike the standard tf.map_fn default). |
|
back_prop: (optional) True enables support for back propagation. |
|
This flag is only used if the native tf.map_fn is used. |
|
|
|
Returns: |
|
A tensor or sequence of tensors. Each tensor packs the |
|
results of applying fn to tensors unpacked from elems along the first |
|
dimension, from first to last. |
|
Raises: |
|
ValueError: if `elems` a Tensor or a list of Tensors. |
|
ValueError: if `fn` does not return a Tensor or list of Tensors |
|
""" |
|
if isinstance(elems, list): |
|
for elem in elems: |
|
if not isinstance(elem, tf.Tensor): |
|
raise ValueError('`elems` must be a Tensor or list of Tensors.') |
|
|
|
elem_shapes = [elem.shape.as_list() for elem in elems] |
|
|
|
|
|
for elem_shape in elem_shapes: |
|
if (not elem_shape or not elem_shape[0] |
|
or elem_shape[0] != elem_shapes[0][0]): |
|
return tf.map_fn(fn, elems, dtype, parallel_iterations, back_prop) |
|
arg_tuples = zip(*[tf.unstack(elem) for elem in elems]) |
|
outputs = [fn(arg_tuple) for arg_tuple in arg_tuples] |
|
else: |
|
if not isinstance(elems, tf.Tensor): |
|
raise ValueError('`elems` must be a Tensor or list of Tensors.') |
|
elems_shape = elems.shape.as_list() |
|
if not elems_shape or not elems_shape[0]: |
|
return tf.map_fn(fn, elems, dtype, parallel_iterations, back_prop) |
|
outputs = [fn(arg) for arg in tf.unstack(elems)] |
|
|
|
if all([isinstance(output, tf.Tensor) for output in outputs]): |
|
return tf.stack(outputs) |
|
else: |
|
if all([isinstance(output, list) for output in outputs]): |
|
if all([all( |
|
[isinstance(entry, tf.Tensor) for entry in output_list]) |
|
for output_list in outputs]): |
|
return [tf.stack(output_tuple) for output_tuple in zip(*outputs)] |
|
raise ValueError('`fn` should return a Tensor or a list of Tensors.') |
|
|
|
|
|
def check_min_image_dim(min_dim, image_tensor): |
|
"""Checks that the image width/height are greater than some number. |
|
|
|
This function is used to check that the width and height of an image are above |
|
a certain value. If the image shape is static, this function will perform the |
|
check at graph construction time. Otherwise, if the image shape varies, an |
|
Assertion control dependency will be added to the graph. |
|
|
|
Args: |
|
min_dim: The minimum number of pixels along the width and height of the |
|
image. |
|
image_tensor: The image tensor to check size for. |
|
|
|
Returns: |
|
If `image_tensor` has dynamic size, return `image_tensor` with a Assert |
|
control dependency. Otherwise returns image_tensor. |
|
|
|
Raises: |
|
ValueError: if `image_tensor`'s' width or height is smaller than `min_dim`. |
|
""" |
|
image_shape = image_tensor.get_shape() |
|
image_height = static_shape.get_height(image_shape) |
|
image_width = static_shape.get_width(image_shape) |
|
if image_height is None or image_width is None: |
|
shape_assert = tf.Assert( |
|
tf.logical_and(tf.greater_equal(tf.shape(image_tensor)[1], min_dim), |
|
tf.greater_equal(tf.shape(image_tensor)[2], min_dim)), |
|
['image size must be >= {} in both height and width.'.format(min_dim)]) |
|
with tf.control_dependencies([shape_assert]): |
|
return tf.identity(image_tensor) |
|
|
|
if image_height < min_dim or image_width < min_dim: |
|
raise ValueError( |
|
'image size must be >= %d in both height and width; image dim = %d,%d' % |
|
(min_dim, image_height, image_width)) |
|
|
|
return image_tensor |
|
|
|
|
|
def assert_shape_equal(shape_a, shape_b): |
|
"""Asserts that shape_a and shape_b are equal. |
|
|
|
If the shapes are static, raises a ValueError when the shapes |
|
mismatch. |
|
|
|
If the shapes are dynamic, raises a tf InvalidArgumentError when the shapes |
|
mismatch. |
|
|
|
Args: |
|
shape_a: a list containing shape of the first tensor. |
|
shape_b: a list containing shape of the second tensor. |
|
|
|
Returns: |
|
Either a tf.no_op() when shapes are all static and a tf.assert_equal() op |
|
when the shapes are dynamic. |
|
|
|
Raises: |
|
ValueError: When shapes are both static and unequal. |
|
""" |
|
if (all(isinstance(dim, int) for dim in shape_a) and |
|
all(isinstance(dim, int) for dim in shape_b)): |
|
if shape_a != shape_b: |
|
raise ValueError('Unequal shapes {}, {}'.format(shape_a, shape_b)) |
|
else: return tf.no_op() |
|
else: |
|
return tf.assert_equal(shape_a, shape_b) |
|
|
|
|
|
def assert_shape_equal_along_first_dimension(shape_a, shape_b): |
|
"""Asserts that shape_a and shape_b are the same along the 0th-dimension. |
|
|
|
If the shapes are static, raises a ValueError when the shapes |
|
mismatch. |
|
|
|
If the shapes are dynamic, raises a tf InvalidArgumentError when the shapes |
|
mismatch. |
|
|
|
Args: |
|
shape_a: a list containing shape of the first tensor. |
|
shape_b: a list containing shape of the second tensor. |
|
|
|
Returns: |
|
Either a tf.no_op() when shapes are all static and a tf.assert_equal() op |
|
when the shapes are dynamic. |
|
|
|
Raises: |
|
ValueError: When shapes are both static and unequal. |
|
""" |
|
if isinstance(shape_a[0], int) and isinstance(shape_b[0], int): |
|
if shape_a[0] != shape_b[0]: |
|
raise ValueError('Unequal first dimension {}, {}'.format( |
|
shape_a[0], shape_b[0])) |
|
else: return tf.no_op() |
|
else: |
|
return tf.assert_equal(shape_a[0], shape_b[0]) |
|
|
|
|
|
def assert_box_normalized(boxes, maximum_normalized_coordinate=1.1): |
|
"""Asserts the input box tensor is normalized. |
|
|
|
Args: |
|
boxes: a tensor of shape [N, 4] where N is the number of boxes. |
|
maximum_normalized_coordinate: Maximum coordinate value to be considered |
|
as normalized, default to 1.1. |
|
|
|
Returns: |
|
a tf.Assert op which fails when the input box tensor is not normalized. |
|
|
|
Raises: |
|
ValueError: When the input box tensor is not normalized. |
|
""" |
|
box_minimum = tf.reduce_min(boxes) |
|
box_maximum = tf.reduce_max(boxes) |
|
return tf.Assert( |
|
tf.logical_and( |
|
tf.less_equal(box_maximum, maximum_normalized_coordinate), |
|
tf.greater_equal(box_minimum, 0)), |
|
[boxes]) |
|
|