Spaces:

ondrejbiza
/

isa

Running

App Files Files Community

isa / invariant_slot_attention /lib /transforms.py

ondrejbiza

Working on isa demo.

a560c26 about 1 year ago

raw history blame contribute delete

No virus

6.01 kB

	# coding=utf-8
	# Copyright 2023 The Google Research Authors.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""Transform functions for preprocessing."""
	from typing import Any, Optional, Tuple

	import tensorflow as tf


	SizeTuple = Tuple[tf.Tensor, tf.Tensor] # (height, width).
	Self = Any

	PADDING_VALUE = -1
	PADDING_VALUE_STR = b""

	NOTRACK_BOX = (0., 0., 0., 0.) # No-track bounding box for padding.
	NOTRACK_RBOX = (0., 0., 0., 0., 0.) # No-track bounding rbox for padding.


	def crop_or_pad_boxes(boxes, top, left, height,
	width, h_orig, w_orig,
	min_cropped_area = None):
	"""Transforms the relative box coordinates according to the frame crop.

	Note that, if height/width are larger than h_orig/w_orig, this function
	implements the equivalent of padding.

	Args:
	boxes: Tensor of bounding boxes with shape (..., 4).
	top: Top of crop box in absolute pixel coordinates.
	left: Left of crop box in absolute pixel coordinates.
	height: Height of crop box in absolute pixel coordinates.
	width: Width of crop box in absolute pixel coordinates.
	h_orig: Original image height in absolute pixel coordinates.
	w_orig: Original image width in absolute pixel coordinates.
	min_cropped_area: If set, remove cropped boxes whose area relative to the
	original box is less than min_cropped_area or that covers the entire
	image.

	Returns:
	Boxes tensor with same shape as input boxes but updated values.
	"""
	# Video track bound boxes: [num_instances, num_tracks, 4]
	# Image bounding boxes: [num_instances, 4]
	assert boxes.shape[-1] == 4
	seq_len = tf.shape(boxes)[0]
	not_padding = tf.reduce_any(tf.not_equal(boxes, PADDING_VALUE), axis=-1)
	has_tracks = len(boxes.shape) == 3
	if has_tracks:
	num_tracks = tf.shape(boxes)[1]
	else:
	assert len(boxes.shape) == 2
	num_tracks = 1

	# Transform the box coordinates.
	a = tf.cast(tf.stack([h_orig, w_orig]), tf.float32)
	b = tf.cast(tf.stack([top, left]), tf.float32)
	c = tf.cast(tf.stack([height, width]), tf.float32)
	boxes = tf.reshape(
	(tf.reshape(boxes, (seq_len, num_tracks, 2, 2)) * a - b) / c,
	(seq_len, num_tracks, len(NOTRACK_BOX)),
	)

	# Filter the valid boxes.
	areas_uncropped = tf.reduce_prod(
	tf.maximum(boxes[Ellipsis, 2:] - boxes[Ellipsis, :2], 0), axis=-1
	)
	boxes = tf.minimum(tf.maximum(boxes, 0.0), 1.0)
	if has_tracks:
	cond = tf.reduce_all((boxes[:, :, 2:] - boxes[:, :, :2]) > 0.0, axis=-1)
	boxes = tf.where(cond[:, :, tf.newaxis], boxes, NOTRACK_BOX)
	if min_cropped_area is not None:
	areas_cropped = tf.reduce_prod(
	tf.maximum(boxes[Ellipsis, 2:] - boxes[Ellipsis, :2], 0), axis=-1
	)
	boxes = tf.where(
	tf.logical_and(
	tf.reduce_max(areas_cropped, axis=0, keepdims=True)
	> min_cropped_area * areas_uncropped,
	tf.reduce_min(areas_cropped, axis=0, keepdims=True) < 1,
	)[Ellipsis, tf.newaxis],
	boxes,
	tf.constant(NOTRACK_BOX)[tf.newaxis, tf.newaxis],
	)
	else:
	boxes = tf.reshape(boxes, (seq_len, 4))
	# Image ops use `-1``, whereas video ops above use `NOTRACK_BOX`.
	boxes = tf.where(not_padding[Ellipsis, tf.newaxis], boxes, PADDING_VALUE)

	return boxes


	def cxcywha_to_corners(cxcywha):
	"""Convert [cx, cy, w, h, a] to four corners of [x, y].

	TF version of cxcywha_to_corners in
	third_party/py/scenic/model_lib/base_models/box_utils.py.

	Args:
	cxcywha: [..., 5]-tf.Tensor of [center-x, center-y, width, height, angle]
	representation of rotated boxes. Angle is in radians and center of rotation
	is defined by [center-x, center-y] point.

	Returns:
	[..., 4, 2]-tf.Tensor of four corners of the rotated box as [x, y] points.
	"""
	assert cxcywha.shape[-1] == 5, "Expected [..., [cx, cy, w, h, a] input."
	bs = cxcywha.shape[:-1]
	cx, cy, w, h, a = tf.split(cxcywha, num_or_size_splits=5, axis=-1)
	xs = tf.constant([.5, .5, -.5, -.5]) * w
	ys = tf.constant([-.5, .5, .5, -.5]) * h
	pts = tf.stack([xs, ys], axis=-1)
	sin = tf.sin(a)
	cos = tf.cos(a)
	rot = tf.reshape(tf.concat([cos, -sin, sin, cos], axis=-1), (*bs, 2, 2))
	offset = tf.reshape(tf.concat([cx, cy], -1), (*bs, 1, 2))
	corners = pts @ rot + offset
	return corners


	def corners_to_cxcywha(corners):
	"""Convert four corners of [x, y] to [cx, cy, w, h, a].

	Args:
	corners: [..., 4, 2]-tf.Tensor of four corners of the rotated box as [x, y]
	points.

	Returns:
	[..., 5]-tf.Tensor of [center-x, center-y, width, height, angle]
	representation of rotated boxes. Angle is in radians and center of rotation
	is defined by [center-x, center-y] point.
	"""
	assert corners.shape[-2] == 4 and corners.shape[-1] == 2, (
	"Expected [..., [cx, cy, w, h, a] input.")

	cornersx, cornersy = tf.unstack(corners, axis=-1)
	cx = tf.reduce_mean(cornersx, axis=-1)
	cy = tf.reduce_mean(cornersy, axis=-1)
	wcornersx = (
	cornersx[Ellipsis, 0] + cornersx[Ellipsis, 1] - cornersx[Ellipsis, 2] - cornersx[Ellipsis, 3])
	wcornersy = (
	cornersy[Ellipsis, 0] + cornersy[Ellipsis, 1] - cornersy[Ellipsis, 2] - cornersy[Ellipsis, 3])
	hcornersy = (-cornersy[Ellipsis, 0,] + cornersy[Ellipsis, 1] + cornersy[Ellipsis, 2] -
	cornersy[Ellipsis, 3])
	a = -tf.atan2(wcornersy, wcornersx)
	cos = tf.cos(a)
	w = wcornersx / (2 * cos)
	h = hcornersy / (2 * cos)
	cxcywha = tf.stack([cx, cy, w, h, a], axis=-1)

	return cxcywha