Spaces:
Running
Running
# coding=utf-8 | |
# Copyright 2023 The Google Research Authors. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
"""Transform functions for preprocessing.""" | |
from typing import Any, Optional, Tuple | |
import tensorflow as tf | |
SizeTuple = Tuple[tf.Tensor, tf.Tensor] # (height, width). | |
Self = Any | |
PADDING_VALUE = -1 | |
PADDING_VALUE_STR = b"" | |
NOTRACK_BOX = (0., 0., 0., 0.) # No-track bounding box for padding. | |
NOTRACK_RBOX = (0., 0., 0., 0., 0.) # No-track bounding rbox for padding. | |
def crop_or_pad_boxes(boxes, top, left, height, | |
width, h_orig, w_orig, | |
min_cropped_area = None): | |
"""Transforms the relative box coordinates according to the frame crop. | |
Note that, if height/width are larger than h_orig/w_orig, this function | |
implements the equivalent of padding. | |
Args: | |
boxes: Tensor of bounding boxes with shape (..., 4). | |
top: Top of crop box in absolute pixel coordinates. | |
left: Left of crop box in absolute pixel coordinates. | |
height: Height of crop box in absolute pixel coordinates. | |
width: Width of crop box in absolute pixel coordinates. | |
h_orig: Original image height in absolute pixel coordinates. | |
w_orig: Original image width in absolute pixel coordinates. | |
min_cropped_area: If set, remove cropped boxes whose area relative to the | |
original box is less than min_cropped_area or that covers the entire | |
image. | |
Returns: | |
Boxes tensor with same shape as input boxes but updated values. | |
""" | |
# Video track bound boxes: [num_instances, num_tracks, 4] | |
# Image bounding boxes: [num_instances, 4] | |
assert boxes.shape[-1] == 4 | |
seq_len = tf.shape(boxes)[0] | |
not_padding = tf.reduce_any(tf.not_equal(boxes, PADDING_VALUE), axis=-1) | |
has_tracks = len(boxes.shape) == 3 | |
if has_tracks: | |
num_tracks = tf.shape(boxes)[1] | |
else: | |
assert len(boxes.shape) == 2 | |
num_tracks = 1 | |
# Transform the box coordinates. | |
a = tf.cast(tf.stack([h_orig, w_orig]), tf.float32) | |
b = tf.cast(tf.stack([top, left]), tf.float32) | |
c = tf.cast(tf.stack([height, width]), tf.float32) | |
boxes = tf.reshape( | |
(tf.reshape(boxes, (seq_len, num_tracks, 2, 2)) * a - b) / c, | |
(seq_len, num_tracks, len(NOTRACK_BOX)), | |
) | |
# Filter the valid boxes. | |
areas_uncropped = tf.reduce_prod( | |
tf.maximum(boxes[Ellipsis, 2:] - boxes[Ellipsis, :2], 0), axis=-1 | |
) | |
boxes = tf.minimum(tf.maximum(boxes, 0.0), 1.0) | |
if has_tracks: | |
cond = tf.reduce_all((boxes[:, :, 2:] - boxes[:, :, :2]) > 0.0, axis=-1) | |
boxes = tf.where(cond[:, :, tf.newaxis], boxes, NOTRACK_BOX) | |
if min_cropped_area is not None: | |
areas_cropped = tf.reduce_prod( | |
tf.maximum(boxes[Ellipsis, 2:] - boxes[Ellipsis, :2], 0), axis=-1 | |
) | |
boxes = tf.where( | |
tf.logical_and( | |
tf.reduce_max(areas_cropped, axis=0, keepdims=True) | |
> min_cropped_area * areas_uncropped, | |
tf.reduce_min(areas_cropped, axis=0, keepdims=True) < 1, | |
)[Ellipsis, tf.newaxis], | |
boxes, | |
tf.constant(NOTRACK_BOX)[tf.newaxis, tf.newaxis], | |
) | |
else: | |
boxes = tf.reshape(boxes, (seq_len, 4)) | |
# Image ops use `-1``, whereas video ops above use `NOTRACK_BOX`. | |
boxes = tf.where(not_padding[Ellipsis, tf.newaxis], boxes, PADDING_VALUE) | |
return boxes | |
def cxcywha_to_corners(cxcywha): | |
"""Convert [cx, cy, w, h, a] to four corners of [x, y]. | |
TF version of cxcywha_to_corners in | |
third_party/py/scenic/model_lib/base_models/box_utils.py. | |
Args: | |
cxcywha: [..., 5]-tf.Tensor of [center-x, center-y, width, height, angle] | |
representation of rotated boxes. Angle is in radians and center of rotation | |
is defined by [center-x, center-y] point. | |
Returns: | |
[..., 4, 2]-tf.Tensor of four corners of the rotated box as [x, y] points. | |
""" | |
assert cxcywha.shape[-1] == 5, "Expected [..., [cx, cy, w, h, a] input." | |
bs = cxcywha.shape[:-1] | |
cx, cy, w, h, a = tf.split(cxcywha, num_or_size_splits=5, axis=-1) | |
xs = tf.constant([.5, .5, -.5, -.5]) * w | |
ys = tf.constant([-.5, .5, .5, -.5]) * h | |
pts = tf.stack([xs, ys], axis=-1) | |
sin = tf.sin(a) | |
cos = tf.cos(a) | |
rot = tf.reshape(tf.concat([cos, -sin, sin, cos], axis=-1), (*bs, 2, 2)) | |
offset = tf.reshape(tf.concat([cx, cy], -1), (*bs, 1, 2)) | |
corners = pts @ rot + offset | |
return corners | |
def corners_to_cxcywha(corners): | |
"""Convert four corners of [x, y] to [cx, cy, w, h, a]. | |
Args: | |
corners: [..., 4, 2]-tf.Tensor of four corners of the rotated box as [x, y] | |
points. | |
Returns: | |
[..., 5]-tf.Tensor of [center-x, center-y, width, height, angle] | |
representation of rotated boxes. Angle is in radians and center of rotation | |
is defined by [center-x, center-y] point. | |
""" | |
assert corners.shape[-2] == 4 and corners.shape[-1] == 2, ( | |
"Expected [..., [cx, cy, w, h, a] input.") | |
cornersx, cornersy = tf.unstack(corners, axis=-1) | |
cx = tf.reduce_mean(cornersx, axis=-1) | |
cy = tf.reduce_mean(cornersy, axis=-1) | |
wcornersx = ( | |
cornersx[Ellipsis, 0] + cornersx[Ellipsis, 1] - cornersx[Ellipsis, 2] - cornersx[Ellipsis, 3]) | |
wcornersy = ( | |
cornersy[Ellipsis, 0] + cornersy[Ellipsis, 1] - cornersy[Ellipsis, 2] - cornersy[Ellipsis, 3]) | |
hcornersy = (-cornersy[Ellipsis, 0,] + cornersy[Ellipsis, 1] + cornersy[Ellipsis, 2] - | |
cornersy[Ellipsis, 3]) | |
a = -tf.atan2(wcornersy, wcornersx) | |
cos = tf.cos(a) | |
w = wcornersx / (2 * cos) | |
h = hcornersy / (2 * cos) | |
cxcywha = tf.stack([cx, cy, w, h, a], axis=-1) | |
return cxcywha | |