Spaces:

naver-iv
/

ZIM_demo_with_SAM

Sleeping

App Files Files Community

ZIM_demo_with_SAM / zim /utils /utils.py

scyonggg

Initial commit

9860a06 about 1 year ago

raw

history blame contribute delete

5.35 kB

	"""
	Copyright (c) 2024-present Naver Cloud Corp.

	This source code is licensed under the license found in the
	LICENSE file in the root directory of this source tree.
	"""

	import numpy as np
	import torch
	from torch.nn import functional as F
	from torchvision.transforms.functional import resize, to_pil_image, InterpolationMode
	from copy import deepcopy
	from typing import Optional, Tuple, List

	class ResizeLongestSide:
	"""
	Resizes images to the longest side 'target_length', as well as provides
	methods for resizing coordinates and boxes. Provides methods for
	transforming both numpy array and batched torch tensors.
	"""

	def __init__(self, target_length: int) -> None:
	self.target_length = target_length

	def apply_image(self, image: np.ndarray) -> np.ndarray:
	"""
	Expects a numpy array with shape HxWxC in uint8 format.
	"""
	target_size = self.get_preprocess_shape(image.shape[0], image.shape[1], self.target_length)
	return np.array(resize(to_pil_image(image), target_size))

	def apply_coords(self, coords: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray:
	"""
	Expects a numpy array of length 2 in the final dimension. Requires the
	original image size in (H, W) format.
	"""
	old_h, old_w = original_size
	new_h, new_w = self.get_preprocess_shape(
	original_size[0], original_size[1], self.target_length
	)
	coords = deepcopy(coords).astype(float)
	coords[..., 0] = coords[..., 0] * (new_w / old_w)
	coords[..., 1] = coords[..., 1] * (new_h / old_h)
	return coords

	def apply_boxes(self, boxes: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray:
	"""
	Expects a numpy array shape Bx4. Requires the original image size
	in (H, W) format.
	"""
	boxes = self.apply_coords(boxes.reshape(-1, 2, 2), original_size)
	return boxes.reshape(-1, 4)

	def apply_image_torch(self, image: torch.Tensor) -> torch.Tensor:
	"""
	Expects batched images with shape BxCxHxW and float format. This
	transformation may not exactly match apply_image. apply_image is
	the transformation expected by the model.
	"""
	# Expects an image in BCHW format. May not exactly match apply_image.
	target_size = self.get_preprocess_shape(image.shape[2], image.shape[3], self.target_length)
	return F.interpolate(
	image, target_size, mode="bilinear", align_corners=False, antialias=True
	)

	def apply_coords_torch(
	self, coords: torch.Tensor, original_size: Tuple[int, ...]
	) -> torch.Tensor:
	"""
	Expects a torch tensor with length 2 in the last dimension. Requires the
	original image size in (H, W) format.
	"""
	old_h, old_w = original_size
	new_h, new_w = self.get_preprocess_shape(
	original_size[0], original_size[1], self.target_length
	)
	coords = deepcopy(coords).to(torch.float)
	coords[..., 0] = coords[..., 0] * (new_w / old_w)
	coords[..., 1] = coords[..., 1] * (new_h / old_h)
	return coords

	def apply_boxes_torch(
	self, boxes: torch.Tensor, original_size: Tuple[int, ...]
	) -> torch.Tensor:
	"""
	Expects a torch tensor with shape Bx4. Requires the original image
	size in (H, W) format.
	"""
	boxes = self.apply_coords_torch(boxes.reshape(-1, 2, 2), original_size)
	return boxes.reshape(-1, 4)

	def apply_mask(self, image: np.ndarray) -> np.ndarray:
	"""
	Expects a numpy array with shape HxWxC in uint8 format.
	"""
	target_size = self.get_preprocess_shape(image.shape[0], image.shape[1], self.target_length)
	return np.array(resize(to_pil_image(image), target_size, interpolation=InterpolationMode.NEAREST))

	@staticmethod
	def get_preprocess_shape(oldh: int, oldw: int, long_side_length: int) -> Tuple[int, int]:
	"""
	Compute the output size given input size and target long side length.
	"""
	scale = long_side_length * 1.0 / max(oldh, oldw)
	newh, neww = oldh * scale, oldw * scale
	neww = int(neww + 0.5)
	newh = int(newh + 0.5)
	return (newh, neww)


	def remove_prefix(text, prefix):
	if text.startswith(prefix):
	return text[len(prefix) :]
	return text

	class AverageMeter(object):
	"""Computes and stores the average and current value"""

	def __init__(self, is_ddp):
	self.is_ddp = is_ddp
	self.reset()

	def reset(self):
	self.val = 0.0
	self.avg = 0.0
	self.sum = 0.0
	self.count = 0.0

	def update(self, val, n=1):
	self.val = val
	self.sum += val * n
	self.count += n
	self.avg = self.sum / (self.count + 1e-5)

	def synch(self, device):
	if self.is_ddp is False:
	return

	_sum = torch.tensor(self.sum).to(device)
	_count = torch.tensor(self.count).to(device)

	torch.distributed.reduce(_sum, dst=0)
	torch.distributed.reduce(_count, dst=0)

	if torch.distributed.get_rank() == 0:
	self.sum = _sum.item()
	self.count = _count.item()
	self.avg = self.sum / (self.count + 1e-5)