Spaces:

PAIR
/

Text2Video-Zero

Runtime error

App Files Files Community

Text2Video-Zero / annotator /uniformer /mmcv /image /geometric.py

lev1

auto anotators

f7ac35e over 1 year ago

raw

history blame

25.2 kB

	# Copyright (c) OpenMMLab. All rights reserved.
	import numbers

	import cv2
	import numpy as np

	from ..utils import to_2tuple
	from .io import imread_backend

	try:
	from PIL import Image
	except ImportError:
	Image = None


	def _scale_size(size, scale):
	"""Rescale a size by a ratio.

	Args:
	size (tuple[int]): (w, h).
	scale (float \| tuple(float)): Scaling factor.

	Returns:
	tuple[int]: scaled size.
	"""
	if isinstance(scale, (float, int)):
	scale = (scale, scale)
	w, h = size
	return int(w * float(scale[0]) + 0.5), int(h * float(scale[1]) + 0.5)


	cv2_interp_codes = {
	'nearest': cv2.INTER_NEAREST,
	'bilinear': cv2.INTER_LINEAR,
	'bicubic': cv2.INTER_CUBIC,
	'area': cv2.INTER_AREA,
	'lanczos': cv2.INTER_LANCZOS4
	}

	if Image is not None:
	pillow_interp_codes = {
	'nearest': Image.NEAREST,
	'bilinear': Image.BILINEAR,
	'bicubic': Image.BICUBIC,
	'box': Image.BOX,
	'lanczos': Image.LANCZOS,
	'hamming': Image.HAMMING
	}


	def imresize(img,
	size,
	return_scale=False,
	interpolation='bilinear',
	out=None,
	backend=None):
	"""Resize image to a given size.

	Args:
	img (ndarray): The input image.
	size (tuple[int]): Target size (w, h).
	return_scale (bool): Whether to return `w_scale` and `h_scale`.
	interpolation (str): Interpolation method, accepted values are
	"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
	backend, "nearest", "bilinear" for 'pillow' backend.
	out (ndarray): The output destination.
	backend (str \| None): The image resize backend type. Options are `cv2`,
	`pillow`, `None`. If backend is None, the global imread_backend
	specified by ``mmcv.use_backend()`` will be used. Default: None.

	Returns:
	tuple \| ndarray: (`resized_img`, `w_scale`, `h_scale`) or
	`resized_img`.
	"""
	h, w = img.shape[:2]
	if backend is None:
	backend = imread_backend
	if backend not in ['cv2', 'pillow']:
	raise ValueError(f'backend: {backend} is not supported for resize.'
	f"Supported backends are 'cv2', 'pillow'")

	if backend == 'pillow':
	assert img.dtype == np.uint8, 'Pillow backend only support uint8 type'
	pil_image = Image.fromarray(img)
	pil_image = pil_image.resize(size, pillow_interp_codes[interpolation])
	resized_img = np.array(pil_image)
	else:
	resized_img = cv2.resize(
	img, size, dst=out, interpolation=cv2_interp_codes[interpolation])
	if not return_scale:
	return resized_img
	else:
	w_scale = size[0] / w
	h_scale = size[1] / h
	return resized_img, w_scale, h_scale


	def imresize_to_multiple(img,
	divisor,
	size=None,
	scale_factor=None,
	keep_ratio=False,
	return_scale=False,
	interpolation='bilinear',
	out=None,
	backend=None):
	"""Resize image according to a given size or scale factor and then rounds
	up the the resized or rescaled image size to the nearest value that can be
	divided by the divisor.

	Args:
	img (ndarray): The input image.
	divisor (int \| tuple): Resized image size will be a multiple of
	divisor. If divisor is a tuple, divisor should be
	(w_divisor, h_divisor).
	size (None \| int \| tuple[int]): Target size (w, h). Default: None.
	scale_factor (None \| float \| tuple[float]): Multiplier for spatial
	size. Should match input size if it is a tuple and the 2D style is
	(w_scale_factor, h_scale_factor). Default: None.
	keep_ratio (bool): Whether to keep the aspect ratio when resizing the
	image. Default: False.
	return_scale (bool): Whether to return `w_scale` and `h_scale`.
	interpolation (str): Interpolation method, accepted values are
	"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
	backend, "nearest", "bilinear" for 'pillow' backend.
	out (ndarray): The output destination.
	backend (str \| None): The image resize backend type. Options are `cv2`,
	`pillow`, `None`. If backend is None, the global imread_backend
	specified by ``mmcv.use_backend()`` will be used. Default: None.

	Returns:
	tuple \| ndarray: (`resized_img`, `w_scale`, `h_scale`) or
	`resized_img`.
	"""
	h, w = img.shape[:2]
	if size is not None and scale_factor is not None:
	raise ValueError('only one of size or scale_factor should be defined')
	elif size is None and scale_factor is None:
	raise ValueError('one of size or scale_factor should be defined')
	elif size is not None:
	size = to_2tuple(size)
	if keep_ratio:
	size = rescale_size((w, h), size, return_scale=False)
	else:
	size = _scale_size((w, h), scale_factor)

	divisor = to_2tuple(divisor)
	size = tuple([int(np.ceil(s / d)) * d for s, d in zip(size, divisor)])
	resized_img, w_scale, h_scale = imresize(
	img,
	size,
	return_scale=True,
	interpolation=interpolation,
	out=out,
	backend=backend)
	if return_scale:
	return resized_img, w_scale, h_scale
	else:
	return resized_img


	def imresize_like(img,
	dst_img,
	return_scale=False,
	interpolation='bilinear',
	backend=None):
	"""Resize image to the same size of a given image.

	Args:
	img (ndarray): The input image.
	dst_img (ndarray): The target image.
	return_scale (bool): Whether to return `w_scale` and `h_scale`.
	interpolation (str): Same as :func:`resize`.
	backend (str \| None): Same as :func:`resize`.

	Returns:
	tuple or ndarray: (`resized_img`, `w_scale`, `h_scale`) or
	`resized_img`.
	"""
	h, w = dst_img.shape[:2]
	return imresize(img, (w, h), return_scale, interpolation, backend=backend)


	def rescale_size(old_size, scale, return_scale=False):
	"""Calculate the new size to be rescaled to.

	Args:
	old_size (tuple[int]): The old size (w, h) of image.
	scale (float \| tuple[int]): The scaling factor or maximum size.
	If it is a float number, then the image will be rescaled by this
	factor, else if it is a tuple of 2 integers, then the image will
	be rescaled as large as possible within the scale.
	return_scale (bool): Whether to return the scaling factor besides the
	rescaled image size.

	Returns:
	tuple[int]: The new rescaled image size.
	"""
	w, h = old_size
	if isinstance(scale, (float, int)):
	if scale <= 0:
	raise ValueError(f'Invalid scale {scale}, must be positive.')
	scale_factor = scale
	elif isinstance(scale, tuple):
	max_long_edge = max(scale)
	max_short_edge = min(scale)
	scale_factor = min(max_long_edge / max(h, w),
	max_short_edge / min(h, w))
	else:
	raise TypeError(
	f'Scale must be a number or tuple of int, but got {type(scale)}')

	new_size = _scale_size((w, h), scale_factor)

	if return_scale:
	return new_size, scale_factor
	else:
	return new_size


	def imrescale(img,
	scale,
	return_scale=False,
	interpolation='bilinear',
	backend=None):
	"""Resize image while keeping the aspect ratio.

	Args:
	img (ndarray): The input image.
	scale (float \| tuple[int]): The scaling factor or maximum size.
	If it is a float number, then the image will be rescaled by this
	factor, else if it is a tuple of 2 integers, then the image will
	be rescaled as large as possible within the scale.
	return_scale (bool): Whether to return the scaling factor besides the
	rescaled image.
	interpolation (str): Same as :func:`resize`.
	backend (str \| None): Same as :func:`resize`.

	Returns:
	ndarray: The rescaled image.
	"""
	h, w = img.shape[:2]
	new_size, scale_factor = rescale_size((w, h), scale, return_scale=True)
	rescaled_img = imresize(
	img, new_size, interpolation=interpolation, backend=backend)
	if return_scale:
	return rescaled_img, scale_factor
	else:
	return rescaled_img


	def imflip(img, direction='horizontal'):
	"""Flip an image horizontally or vertically.

	Args:
	img (ndarray): Image to be flipped.
	direction (str): The flip direction, either "horizontal" or
	"vertical" or "diagonal".

	Returns:
	ndarray: The flipped image.
	"""
	assert direction in ['horizontal', 'vertical', 'diagonal']
	if direction == 'horizontal':
	return np.flip(img, axis=1)
	elif direction == 'vertical':
	return np.flip(img, axis=0)
	else:
	return np.flip(img, axis=(0, 1))


	def imflip_(img, direction='horizontal'):
	"""Inplace flip an image horizontally or vertically.

	Args:
	img (ndarray): Image to be flipped.
	direction (str): The flip direction, either "horizontal" or
	"vertical" or "diagonal".

	Returns:
	ndarray: The flipped image (inplace).
	"""
	assert direction in ['horizontal', 'vertical', 'diagonal']
	if direction == 'horizontal':
	return cv2.flip(img, 1, img)
	elif direction == 'vertical':
	return cv2.flip(img, 0, img)
	else:
	return cv2.flip(img, -1, img)


	def imrotate(img,
	angle,
	center=None,
	scale=1.0,
	border_value=0,
	interpolation='bilinear',
	auto_bound=False):
	"""Rotate an image.

	Args:
	img (ndarray): Image to be rotated.
	angle (float): Rotation angle in degrees, positive values mean
	clockwise rotation.
	center (tuple[float], optional): Center point (w, h) of the rotation in
	the source image. If not specified, the center of the image will be
	used.
	scale (float): Isotropic scale factor.
	border_value (int): Border value.
	interpolation (str): Same as :func:`resize`.
	auto_bound (bool): Whether to adjust the image size to cover the whole
	rotated image.

	Returns:
	ndarray: The rotated image.
	"""
	if center is not None and auto_bound:
	raise ValueError('`auto_bound` conflicts with `center`')
	h, w = img.shape[:2]
	if center is None:
	center = ((w - 1) * 0.5, (h - 1) * 0.5)
	assert isinstance(center, tuple)

	matrix = cv2.getRotationMatrix2D(center, -angle, scale)
	if auto_bound:
	cos = np.abs(matrix[0, 0])
	sin = np.abs(matrix[0, 1])
	new_w = h * sin + w * cos
	new_h = h * cos + w * sin
	matrix[0, 2] += (new_w - w) * 0.5
	matrix[1, 2] += (new_h - h) * 0.5
	w = int(np.round(new_w))
	h = int(np.round(new_h))
	rotated = cv2.warpAffine(
	img,
	matrix, (w, h),
	flags=cv2_interp_codes[interpolation],
	borderValue=border_value)
	return rotated


	def bbox_clip(bboxes, img_shape):
	"""Clip bboxes to fit the image shape.

	Args:
	bboxes (ndarray): Shape (..., 4*k)
	img_shape (tuple[int]): (height, width) of the image.

	Returns:
	ndarray: Clipped bboxes.
	"""
	assert bboxes.shape[-1] % 4 == 0
	cmin = np.empty(bboxes.shape[-1], dtype=bboxes.dtype)
	cmin[0::2] = img_shape[1] - 1
	cmin[1::2] = img_shape[0] - 1
	clipped_bboxes = np.maximum(np.minimum(bboxes, cmin), 0)
	return clipped_bboxes


	def bbox_scaling(bboxes, scale, clip_shape=None):
	"""Scaling bboxes w.r.t the box center.

	Args:
	bboxes (ndarray): Shape(..., 4).
	scale (float): Scaling factor.
	clip_shape (tuple[int], optional): If specified, bboxes that exceed the
	boundary will be clipped according to the given shape (h, w).

	Returns:
	ndarray: Scaled bboxes.
	"""
	if float(scale) == 1.0:
	scaled_bboxes = bboxes.copy()
	else:
	w = bboxes[..., 2] - bboxes[..., 0] + 1
	h = bboxes[..., 3] - bboxes[..., 1] + 1
	dw = (w * (scale - 1)) * 0.5
	dh = (h * (scale - 1)) * 0.5
	scaled_bboxes = bboxes + np.stack((-dw, -dh, dw, dh), axis=-1)
	if clip_shape is not None:
	return bbox_clip(scaled_bboxes, clip_shape)
	else:
	return scaled_bboxes


	def imcrop(img, bboxes, scale=1.0, pad_fill=None):
	"""Crop image patches.

	3 steps: scale the bboxes -> clip bboxes -> crop and pad.

	Args:
	img (ndarray): Image to be cropped.
	bboxes (ndarray): Shape (k, 4) or (4, ), location of cropped bboxes.
	scale (float, optional): Scale ratio of bboxes, the default value
	1.0 means no padding.
	pad_fill (Number \| list[Number]): Value to be filled for padding.
	Default: None, which means no padding.

	Returns:
	list[ndarray] \| ndarray: The cropped image patches.
	"""
	chn = 1 if img.ndim == 2 else img.shape[2]
	if pad_fill is not None:
	if isinstance(pad_fill, (int, float)):
	pad_fill = [pad_fill for _ in range(chn)]
	assert len(pad_fill) == chn

	_bboxes = bboxes[None, ...] if bboxes.ndim == 1 else bboxes
	scaled_bboxes = bbox_scaling(_bboxes, scale).astype(np.int32)
	clipped_bbox = bbox_clip(scaled_bboxes, img.shape)

	patches = []
	for i in range(clipped_bbox.shape[0]):
	x1, y1, x2, y2 = tuple(clipped_bbox[i, :])
	if pad_fill is None:
	patch = img[y1:y2 + 1, x1:x2 + 1, ...]
	else:
	_x1, _y1, _x2, _y2 = tuple(scaled_bboxes[i, :])
	if chn == 1:
	patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1)
	else:
	patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1, chn)
	patch = np.array(
	pad_fill, dtype=img.dtype) * np.ones(
	patch_shape, dtype=img.dtype)
	x_start = 0 if _x1 >= 0 else -_x1
	y_start = 0 if _y1 >= 0 else -_y1
	w = x2 - x1 + 1
	h = y2 - y1 + 1
	patch[y_start:y_start + h, x_start:x_start + w,
	...] = img[y1:y1 + h, x1:x1 + w, ...]
	patches.append(patch)

	if bboxes.ndim == 1:
	return patches[0]
	else:
	return patches


	def impad(img,
	*,
	shape=None,
	padding=None,
	pad_val=0,
	padding_mode='constant'):
	"""Pad the given image to a certain shape or pad on all sides with
	specified padding mode and padding value.

	Args:
	img (ndarray): Image to be padded.
	shape (tuple[int]): Expected padding shape (h, w). Default: None.
	padding (int or tuple[int]): Padding on each border. If a single int is
	provided this is used to pad all borders. If tuple of length 2 is
	provided this is the padding on left/right and top/bottom
	respectively. If a tuple of length 4 is provided this is the
	padding for the left, top, right and bottom borders respectively.
	Default: None. Note that `shape` and `padding` can not be both
	set.
	pad_val (Number \| Sequence[Number]): Values to be filled in padding
	areas when padding_mode is 'constant'. Default: 0.
	padding_mode (str): Type of padding. Should be: constant, edge,
	reflect or symmetric. Default: constant.

	- constant: pads with a constant value, this value is specified
	with pad_val.
	- edge: pads with the last value at the edge of the image.
	- reflect: pads with reflection of image without repeating the
	last value on the edge. For example, padding [1, 2, 3, 4]
	with 2 elements on both sides in reflect mode will result
	in [3, 2, 1, 2, 3, 4, 3, 2].
	- symmetric: pads with reflection of image repeating the last
	value on the edge. For example, padding [1, 2, 3, 4] with
	2 elements on both sides in symmetric mode will result in
	[2, 1, 1, 2, 3, 4, 4, 3]

	Returns:
	ndarray: The padded image.
	"""

	assert (shape is not None) ^ (padding is not None)
	if shape is not None:
	padding = (0, 0, shape[1] - img.shape[1], shape[0] - img.shape[0])

	# check pad_val
	if isinstance(pad_val, tuple):
	assert len(pad_val) == img.shape[-1]
	elif not isinstance(pad_val, numbers.Number):
	raise TypeError('pad_val must be a int or a tuple. '
	f'But received {type(pad_val)}')

	# check padding
	if isinstance(padding, tuple) and len(padding) in [2, 4]:
	if len(padding) == 2:
	padding = (padding[0], padding[1], padding[0], padding[1])
	elif isinstance(padding, numbers.Number):
	padding = (padding, padding, padding, padding)
	else:
	raise ValueError('Padding must be a int or a 2, or 4 element tuple.'
	f'But received {padding}')

	# check padding mode
	assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric']

	border_type = {
	'constant': cv2.BORDER_CONSTANT,
	'edge': cv2.BORDER_REPLICATE,
	'reflect': cv2.BORDER_REFLECT_101,
	'symmetric': cv2.BORDER_REFLECT
	}
	img = cv2.copyMakeBorder(
	img,
	padding[1],
	padding[3],
	padding[0],
	padding[2],
	border_type[padding_mode],
	value=pad_val)

	return img


	def impad_to_multiple(img, divisor, pad_val=0):
	"""Pad an image to ensure each edge to be multiple to some number.

	Args:
	img (ndarray): Image to be padded.
	divisor (int): Padded image edges will be multiple to divisor.
	pad_val (Number \| Sequence[Number]): Same as :func:`impad`.

	Returns:
	ndarray: The padded image.
	"""
	pad_h = int(np.ceil(img.shape[0] / divisor)) * divisor
	pad_w = int(np.ceil(img.shape[1] / divisor)) * divisor
	return impad(img, shape=(pad_h, pad_w), pad_val=pad_val)


	def cutout(img, shape, pad_val=0):
	"""Randomly cut out a rectangle from the original img.

	Args:
	img (ndarray): Image to be cutout.
	shape (int \| tuple[int]): Expected cutout shape (h, w). If given as a
	int, the value will be used for both h and w.
	pad_val (int \| float \| tuple[int \| float]): Values to be filled in the
	cut area. Defaults to 0.

	Returns:
	ndarray: The cutout image.
	"""

	channels = 1 if img.ndim == 2 else img.shape[2]
	if isinstance(shape, int):
	cut_h, cut_w = shape, shape
	else:
	assert isinstance(shape, tuple) and len(shape) == 2, \
	f'shape must be a int or a tuple with length 2, but got type ' \
	f'{type(shape)} instead.'
	cut_h, cut_w = shape
	if isinstance(pad_val, (int, float)):
	pad_val = tuple([pad_val] * channels)
	elif isinstance(pad_val, tuple):
	assert len(pad_val) == channels, \
	'Expected the num of elements in tuple equals the channels' \
	'of input image. Found {} vs {}'.format(
	len(pad_val), channels)
	else:
	raise TypeError(f'Invalid type {type(pad_val)} for `pad_val`')

	img_h, img_w = img.shape[:2]
	y0 = np.random.uniform(img_h)
	x0 = np.random.uniform(img_w)

	y1 = int(max(0, y0 - cut_h / 2.))
	x1 = int(max(0, x0 - cut_w / 2.))
	y2 = min(img_h, y1 + cut_h)
	x2 = min(img_w, x1 + cut_w)

	if img.ndim == 2:
	patch_shape = (y2 - y1, x2 - x1)
	else:
	patch_shape = (y2 - y1, x2 - x1, channels)

	img_cutout = img.copy()
	patch = np.array(
	pad_val, dtype=img.dtype) * np.ones(
	patch_shape, dtype=img.dtype)
	img_cutout[y1:y2, x1:x2, ...] = patch

	return img_cutout


	def _get_shear_matrix(magnitude, direction='horizontal'):
	"""Generate the shear matrix for transformation.

	Args:
	magnitude (int \| float): The magnitude used for shear.
	direction (str): The flip direction, either "horizontal"
	or "vertical".

	Returns:
	ndarray: The shear matrix with dtype float32.
	"""
	if direction == 'horizontal':
	shear_matrix = np.float32([[1, magnitude, 0], [0, 1, 0]])
	elif direction == 'vertical':
	shear_matrix = np.float32([[1, 0, 0], [magnitude, 1, 0]])
	return shear_matrix


	def imshear(img,
	magnitude,
	direction='horizontal',
	border_value=0,
	interpolation='bilinear'):
	"""Shear an image.

	Args:
	img (ndarray): Image to be sheared with format (h, w)
	or (h, w, c).
	magnitude (int \| float): The magnitude used for shear.
	direction (str): The flip direction, either "horizontal"
	or "vertical".
	border_value (int \| tuple[int]): Value used in case of a
	constant border.
	interpolation (str): Same as :func:`resize`.

	Returns:
	ndarray: The sheared image.
	"""
	assert direction in ['horizontal',
	'vertical'], f'Invalid direction: {direction}'
	height, width = img.shape[:2]
	if img.ndim == 2:
	channels = 1
	elif img.ndim == 3:
	channels = img.shape[-1]
	if isinstance(border_value, int):
	border_value = tuple([border_value] * channels)
	elif isinstance(border_value, tuple):
	assert len(border_value) == channels, \
	'Expected the num of elements in tuple equals the channels' \
	'of input image. Found {} vs {}'.format(
	len(border_value), channels)
	else:
	raise ValueError(
	f'Invalid type {type(border_value)} for `border_value`')
	shear_matrix = _get_shear_matrix(magnitude, direction)
	sheared = cv2.warpAffine(
	img,
	shear_matrix,
	(width, height),
	# Note case when the number elements in `border_value`
	# greater than 3 (e.g. shearing masks whose channels large
	# than 3) will raise TypeError in `cv2.warpAffine`.
	# Here simply slice the first 3 values in `border_value`.
	borderValue=border_value[:3],
	flags=cv2_interp_codes[interpolation])
	return sheared


	def _get_translate_matrix(offset, direction='horizontal'):
	"""Generate the translate matrix.

	Args:
	offset (int \| float): The offset used for translate.
	direction (str): The translate direction, either
	"horizontal" or "vertical".

	Returns:
	ndarray: The translate matrix with dtype float32.
	"""
	if direction == 'horizontal':
	translate_matrix = np.float32([[1, 0, offset], [0, 1, 0]])
	elif direction == 'vertical':
	translate_matrix = np.float32([[1, 0, 0], [0, 1, offset]])
	return translate_matrix


	def imtranslate(img,
	offset,
	direction='horizontal',
	border_value=0,
	interpolation='bilinear'):
	"""Translate an image.

	Args:
	img (ndarray): Image to be translated with format
	(h, w) or (h, w, c).
	offset (int \| float): The offset used for translate.
	direction (str): The translate direction, either "horizontal"
	or "vertical".
	border_value (int \| tuple[int]): Value used in case of a
	constant border.
	interpolation (str): Same as :func:`resize`.

	Returns:
	ndarray: The translated image.
	"""
	assert direction in ['horizontal',
	'vertical'], f'Invalid direction: {direction}'
	height, width = img.shape[:2]
	if img.ndim == 2:
	channels = 1
	elif img.ndim == 3:
	channels = img.shape[-1]
	if isinstance(border_value, int):
	border_value = tuple([border_value] * channels)
	elif isinstance(border_value, tuple):
	assert len(border_value) == channels, \
	'Expected the num of elements in tuple equals the channels' \
	'of input image. Found {} vs {}'.format(
	len(border_value), channels)
	else:
	raise ValueError(
	f'Invalid type {type(border_value)} for `border_value`.')
	translate_matrix = _get_translate_matrix(offset, direction)
	translated = cv2.warpAffine(
	img,
	translate_matrix,
	(width, height),
	# Note case when the number elements in `border_value`
	# greater than 3 (e.g. translating masks whose channels
	# large than 3) will raise TypeError in `cv2.warpAffine`.
	# Here simply slice the first 3 values in `border_value`.
	borderValue=border_value[:3],
	flags=cv2_interp_codes[interpolation])
	return translated