Spaces:

CVPR
/

regionclip-demo

Runtime error

regionclip-demo / detectron2 /structures /keypoints.py

jwyang

first commit

4121bec almost 2 years ago

No virus

8.57 kB

	# Copyright (c) Facebook, Inc. and its affiliates.
	import numpy as np
	from typing import Any, List, Tuple, Union
	import torch
	from torch.nn import functional as F

	from detectron2.utils.env import TORCH_VERSION

	if TORCH_VERSION < (1, 8):

	def script_if_tracing(fn):
	return fn


	else:
	script_if_tracing = torch.jit.script_if_tracing


	class Keypoints:
	"""
	Stores keypoint annotation data. GT Instances have a `gt_keypoints` property
	containing the x,y location and visibility flag of each keypoint. This tensor has shape
	(N, K, 3) where N is the number of instances and K is the number of keypoints per instance.

	The visibility flag follows the COCO format and must be one of three integers:

	* v=0: not labeled (in which case x=y=0)
	* v=1: labeled but not visible
	* v=2: labeled and visible
	"""

	def __init__(self, keypoints: Union[torch.Tensor, np.ndarray, List[List[float]]]):
	"""
	Arguments:
	keypoints: A Tensor, numpy array, or list of the x, y, and visibility of each keypoint.
	The shape should be (N, K, 3) where N is the number of
	instances, and K is the number of keypoints per instance.
	"""
	device = keypoints.device if isinstance(keypoints, torch.Tensor) else torch.device("cpu")
	keypoints = torch.as_tensor(keypoints, dtype=torch.float32, device=device)
	assert keypoints.dim() == 3 and keypoints.shape[2] == 3, keypoints.shape
	self.tensor = keypoints

	def __len__(self) -> int:
	return self.tensor.size(0)

	def to(self, args: Any, *kwargs: Any) -> "Keypoints":
	return type(self)(self.tensor.to(args, *kwargs))

	@property
	def device(self) -> torch.device:
	return self.tensor.device

	def to_heatmap(self, boxes: torch.Tensor, heatmap_size: int) -> torch.Tensor:
	"""
	Convert keypoint annotations to a heatmap of one-hot labels for training,
	as described in :paper:`Mask R-CNN`.

	Arguments:
	boxes: Nx4 tensor, the boxes to draw the keypoints to

	Returns:
	heatmaps:
	A tensor of shape (N, K), each element is integer spatial label
	in the range [0, heatmap_size**2 - 1] for each keypoint in the input.
	valid:
	A tensor of shape (N, K) containing whether each keypoint is in the roi or not.
	"""
	return _keypoints_to_heatmap(self.tensor, boxes, heatmap_size)

	def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "Keypoints":
	"""
	Create a new `Keypoints` by indexing on this `Keypoints`.

	The following usage are allowed:

	1. `new_kpts = kpts[3]`: return a `Keypoints` which contains only one instance.
	2. `new_kpts = kpts[2:10]`: return a slice of key points.
	3. `new_kpts = kpts[vector]`, where vector is a torch.ByteTensor
	with `length = len(kpts)`. Nonzero elements in the vector will be selected.

	Note that the returned Keypoints might share storage with this Keypoints,
	subject to Pytorch's indexing semantics.
	"""
	if isinstance(item, int):
	return Keypoints([self.tensor[item]])
	return Keypoints(self.tensor[item])

	def __repr__(self) -> str:
	s = self.__class__.__name__ + "("
	s += "num_instances={})".format(len(self.tensor))
	return s


	# TODO make this nicer, this is a direct translation from C2 (but removing the inner loop)
	def _keypoints_to_heatmap(
	keypoints: torch.Tensor, rois: torch.Tensor, heatmap_size: int
	) -> Tuple[torch.Tensor, torch.Tensor]:
	"""
	Encode keypoint locations into a target heatmap for use in SoftmaxWithLoss across space.

	Maps keypoints from the half-open interval [x1, x2) on continuous image coordinates to the
	closed interval [0, heatmap_size - 1] on discrete image coordinates. We use the
	continuous-discrete conversion from Heckbert 1990 ("What is the coordinate of a pixel?"):
	d = floor(c) and c = d + 0.5, where d is a discrete coordinate and c is a continuous coordinate.

	Arguments:
	keypoints: tensor of keypoint locations in of shape (N, K, 3).
	rois: Nx4 tensor of rois in xyxy format
	heatmap_size: integer side length of square heatmap.

	Returns:
	heatmaps: A tensor of shape (N, K) containing an integer spatial label
	in the range [0, heatmap_size**2 - 1] for each keypoint in the input.
	valid: A tensor of shape (N, K) containing whether each keypoint is in
	the roi or not.
	"""

	if rois.numel() == 0:
	return rois.new().long(), rois.new().long()
	offset_x = rois[:, 0]
	offset_y = rois[:, 1]
	scale_x = heatmap_size / (rois[:, 2] - rois[:, 0])
	scale_y = heatmap_size / (rois[:, 3] - rois[:, 1])

	offset_x = offset_x[:, None]
	offset_y = offset_y[:, None]
	scale_x = scale_x[:, None]
	scale_y = scale_y[:, None]

	x = keypoints[..., 0]
	y = keypoints[..., 1]

	x_boundary_inds = x == rois[:, 2][:, None]
	y_boundary_inds = y == rois[:, 3][:, None]

	x = (x - offset_x) * scale_x
	x = x.floor().long()
	y = (y - offset_y) * scale_y
	y = y.floor().long()

	x[x_boundary_inds] = heatmap_size - 1
	y[y_boundary_inds] = heatmap_size - 1

	valid_loc = (x >= 0) & (y >= 0) & (x < heatmap_size) & (y < heatmap_size)
	vis = keypoints[..., 2] > 0
	valid = (valid_loc & vis).long()

	lin_ind = y * heatmap_size + x
	heatmaps = lin_ind * valid

	return heatmaps, valid


	@script_if_tracing
	def heatmaps_to_keypoints(maps: torch.Tensor, rois: torch.Tensor) -> torch.Tensor:
	"""
	Extract predicted keypoint locations from heatmaps.

	Args:
	maps (Tensor): (#ROIs, #keypoints, POOL_H, POOL_W). The predicted heatmap of logits for
	each ROI and each keypoint.
	rois (Tensor): (#ROIs, 4). The box of each ROI.

	Returns:
	Tensor of shape (#ROIs, #keypoints, 4) with the last dimension corresponding to
	(x, y, logit, score) for each keypoint.

	When converting discrete pixel indices in an NxN image to a continuous keypoint coordinate,
	we maintain consistency with :meth:`Keypoints.to_heatmap` by using the conversion from
	Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a continuous coordinate.
	"""
	# The decorator use of torch.no_grad() was not supported by torchscript.
	# https://github.com/pytorch/pytorch/issues/44768
	maps = maps.detach()
	rois = rois.detach()

	offset_x = rois[:, 0]
	offset_y = rois[:, 1]

	widths = (rois[:, 2] - rois[:, 0]).clamp(min=1)
	heights = (rois[:, 3] - rois[:, 1]).clamp(min=1)
	widths_ceil = widths.ceil()
	heights_ceil = heights.ceil()

	num_rois, num_keypoints = maps.shape[:2]
	xy_preds = maps.new_zeros(rois.shape[0], num_keypoints, 4)

	width_corrections = widths / widths_ceil
	height_corrections = heights / heights_ceil

	keypoints_idx = torch.arange(num_keypoints, device=maps.device)

	for i in range(num_rois):
	outsize = (int(heights_ceil[i]), int(widths_ceil[i]))
	roi_map = F.interpolate(
	maps[[i]], size=outsize, mode="bicubic", align_corners=False
	).squeeze(
	0
	) # #keypoints x H x W

	# softmax over the spatial region
	max_score, _ = roi_map.view(num_keypoints, -1).max(1)
	max_score = max_score.view(num_keypoints, 1, 1)
	tmp_full_resolution = (roi_map - max_score).exp_()
	tmp_pool_resolution = (maps[i] - max_score).exp_()
	# Produce scores over the region H x W, but normalize with POOL_H x POOL_W,
	# so that the scores of objects of different absolute sizes will be more comparable
	roi_map_scores = tmp_full_resolution / tmp_pool_resolution.sum((1, 2), keepdim=True)

	w = roi_map.shape[2]
	pos = roi_map.view(num_keypoints, -1).argmax(1)

	x_int = pos % w
	y_int = (pos - x_int) // w

	assert (
	roi_map_scores[keypoints_idx, y_int, x_int]
	== roi_map_scores.view(num_keypoints, -1).max(1)[0]
	).all()

	x = (x_int.float() + 0.5) * width_corrections[i]
	y = (y_int.float() + 0.5) * height_corrections[i]

	xy_preds[i, :, 0] = x + offset_x[i]
	xy_preds[i, :, 1] = y + offset_y[i]
	xy_preds[i, :, 2] = roi_map[keypoints_idx, y_int, x_int]
	xy_preds[i, :, 3] = roi_map_scores[keypoints_idx, y_int, x_int]

	return xy_preds