Spaces:

ondrejbiza
/

isa

Running

File size: 6,011 Bytes

a560c26

# coding=utf-8
# Copyright 2023 The Google Research Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Transform functions for preprocessing."""
from typing import Any, Optional, Tuple

import tensorflow as tf


SizeTuple = Tuple[tf.Tensor, tf.Tensor]  # (height, width).
Self = Any

PADDING_VALUE = -1
PADDING_VALUE_STR = b""

NOTRACK_BOX = (0., 0., 0., 0.)  # No-track bounding box for padding.
NOTRACK_RBOX = (0., 0., 0., 0., 0.)  # No-track bounding rbox for padding.


def crop_or_pad_boxes(boxes, top, left, height,
                      width, h_orig, w_orig,
                      min_cropped_area = None):
  """Transforms the relative box coordinates according to the frame crop.

  Note that, if height/width are larger than h_orig/w_orig, this function
  implements the equivalent of padding.

  Args:
    boxes: Tensor of bounding boxes with shape (..., 4).
    top: Top of crop box in absolute pixel coordinates.
    left: Left of crop box in absolute pixel coordinates.
    height: Height of crop box in absolute pixel coordinates.
    width: Width of crop box in absolute pixel coordinates.
    h_orig: Original image height in absolute pixel coordinates.
    w_orig: Original image width in absolute pixel coordinates.
    min_cropped_area: If set, remove cropped boxes whose area relative to the
      original box is less than min_cropped_area or that covers the entire
      image.

  Returns:
    Boxes tensor with same shape as input boxes but updated values.
  """
  # Video track bound boxes: [num_instances, num_tracks, 4]
  # Image bounding boxes: [num_instances, 4]
  assert boxes.shape[-1] == 4
  seq_len = tf.shape(boxes)[0]
  not_padding = tf.reduce_any(tf.not_equal(boxes, PADDING_VALUE), axis=-1)
  has_tracks = len(boxes.shape) == 3
  if has_tracks:
    num_tracks = tf.shape(boxes)[1]
  else:
    assert len(boxes.shape) == 2
    num_tracks = 1

  # Transform the box coordinates.
  a = tf.cast(tf.stack([h_orig, w_orig]), tf.float32)
  b = tf.cast(tf.stack([top, left]), tf.float32)
  c = tf.cast(tf.stack([height, width]), tf.float32)
  boxes = tf.reshape(
      (tf.reshape(boxes, (seq_len, num_tracks, 2, 2)) * a - b) / c,
      (seq_len, num_tracks, len(NOTRACK_BOX)),
  )

  # Filter the valid boxes.
  areas_uncropped = tf.reduce_prod(
      tf.maximum(boxes[Ellipsis, 2:] - boxes[Ellipsis, :2], 0), axis=-1
  )
  boxes = tf.minimum(tf.maximum(boxes, 0.0), 1.0)
  if has_tracks:
    cond = tf.reduce_all((boxes[:, :, 2:] - boxes[:, :, :2]) > 0.0, axis=-1)
    boxes = tf.where(cond[:, :, tf.newaxis], boxes, NOTRACK_BOX)
    if min_cropped_area is not None:
      areas_cropped = tf.reduce_prod(
          tf.maximum(boxes[Ellipsis, 2:] - boxes[Ellipsis, :2], 0), axis=-1
      )
      boxes = tf.where(
          tf.logical_and(
              tf.reduce_max(areas_cropped, axis=0, keepdims=True)
              > min_cropped_area * areas_uncropped,
              tf.reduce_min(areas_cropped, axis=0, keepdims=True) < 1,
          )[Ellipsis, tf.newaxis],
          boxes,
          tf.constant(NOTRACK_BOX)[tf.newaxis, tf.newaxis],
      )
  else:
    boxes = tf.reshape(boxes, (seq_len, 4))
    # Image ops use `-1``, whereas video ops above use `NOTRACK_BOX`.
    boxes = tf.where(not_padding[Ellipsis, tf.newaxis], boxes, PADDING_VALUE)

  return boxes


def cxcywha_to_corners(cxcywha):
  """Convert [cx, cy, w, h, a] to four corners of [x, y].

  TF version of cxcywha_to_corners in
  third_party/py/scenic/model_lib/base_models/box_utils.py.

  Args:
    cxcywha: [..., 5]-tf.Tensor of [center-x, center-y, width, height, angle]
    representation of rotated boxes. Angle is in radians and center of rotation
    is defined by [center-x, center-y] point.

  Returns:
    [..., 4, 2]-tf.Tensor of four corners of the rotated box as [x, y] points.
  """
  assert cxcywha.shape[-1] == 5, "Expected [..., [cx, cy, w, h, a] input."
  bs = cxcywha.shape[:-1]
  cx, cy, w, h, a = tf.split(cxcywha, num_or_size_splits=5, axis=-1)
  xs = tf.constant([.5, .5, -.5, -.5]) * w
  ys = tf.constant([-.5, .5, .5, -.5]) * h
  pts = tf.stack([xs, ys], axis=-1)
  sin = tf.sin(a)
  cos = tf.cos(a)
  rot = tf.reshape(tf.concat([cos, -sin, sin, cos], axis=-1), (*bs, 2, 2))
  offset = tf.reshape(tf.concat([cx, cy], -1), (*bs, 1, 2))
  corners = pts @ rot + offset
  return corners


def corners_to_cxcywha(corners):
  """Convert four corners of [x, y] to [cx, cy, w, h, a].

  Args:
    corners: [..., 4, 2]-tf.Tensor of four corners of the rotated box as [x, y]
      points.

  Returns:
    [..., 5]-tf.Tensor of [center-x, center-y, width, height, angle]
    representation of rotated boxes. Angle is in radians and center of rotation
    is defined by [center-x, center-y] point.
  """
  assert corners.shape[-2] == 4 and corners.shape[-1] == 2, (
      "Expected [..., [cx, cy, w, h, a] input.")

  cornersx, cornersy = tf.unstack(corners, axis=-1)
  cx = tf.reduce_mean(cornersx, axis=-1)
  cy = tf.reduce_mean(cornersy, axis=-1)
  wcornersx = (
      cornersx[Ellipsis, 0] + cornersx[Ellipsis, 1] - cornersx[Ellipsis, 2] - cornersx[Ellipsis, 3])
  wcornersy = (
      cornersy[Ellipsis, 0] + cornersy[Ellipsis, 1] - cornersy[Ellipsis, 2] - cornersy[Ellipsis, 3])
  hcornersy = (-cornersy[Ellipsis, 0,] + cornersy[Ellipsis, 1] + cornersy[Ellipsis, 2] -
               cornersy[Ellipsis, 3])
  a = -tf.atan2(wcornersy, wcornersx)
  cos = tf.cos(a)
  w = wcornersx / (2 * cos)
  h = hcornersy / (2 * cos)
  cxcywha = tf.stack([cx, cy, w, h, a], axis=-1)

  return cxcywha