Spaces:

teemosliang
/

SDPose

Running on Zero

App Files Files Community

SDPose / mmpose /codecs /image_pose_lifting.py

teemosliang

realease: publish the huggingface space demo of SDPose

d5532b9 verified about 1 month ago

raw

history blame contribute delete

8.77 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the license found in the
	# LICENSE file in the root directory of this source tree.

	from typing import Optional, Tuple

	import numpy as np

	from mmpose.registry import KEYPOINT_CODECS
	from .base import BaseKeypointCodec


	@KEYPOINT_CODECS.register_module()
	class ImagePoseLifting(BaseKeypointCodec):
	r"""Generate keypoint coordinates for pose lifter.

	Note:

	- instance number: N
	- keypoint number: K
	- keypoint dimension: D
	- pose-lifitng target dimension: C

	Args:
	num_keypoints (int): The number of keypoints in the dataset.
	root_index (int): Root keypoint index in the pose.
	remove_root (bool): If true, remove the root keypoint from the pose.
	Default: ``False``.
	save_index (bool): If true, store the root position separated from the
	original pose. Default: ``False``.
	keypoints_mean (np.ndarray, optional): Mean values of keypoints
	coordinates in shape (K, D).
	keypoints_std (np.ndarray, optional): Std values of keypoints
	coordinates in shape (K, D).
	target_mean (np.ndarray, optional): Mean values of pose-lifitng target
	coordinates in shape (K, C).
	target_std (np.ndarray, optional): Std values of pose-lifitng target
	coordinates in shape (K, C).
	"""

	auxiliary_encode_keys = {'lifting_target', 'lifting_target_visible'}

	def __init__(self,
	num_keypoints: int,
	root_index: int,
	remove_root: bool = False,
	save_index: bool = False,
	keypoints_mean: Optional[np.ndarray] = None,
	keypoints_std: Optional[np.ndarray] = None,
	target_mean: Optional[np.ndarray] = None,
	target_std: Optional[np.ndarray] = None):
	super().__init__()

	self.num_keypoints = num_keypoints
	self.root_index = root_index
	self.remove_root = remove_root
	self.save_index = save_index
	if keypoints_mean is not None and keypoints_std is not None:
	assert keypoints_mean.shape == keypoints_std.shape
	if target_mean is not None and target_std is not None:
	assert target_mean.shape == target_std.shape
	self.keypoints_mean = keypoints_mean
	self.keypoints_std = keypoints_std
	self.target_mean = target_mean
	self.target_std = target_std

	def encode(self,
	keypoints: np.ndarray,
	keypoints_visible: Optional[np.ndarray] = None,
	lifting_target: Optional[np.ndarray] = None,
	lifting_target_visible: Optional[np.ndarray] = None) -> dict:
	"""Encoding keypoints from input image space to normalized space.

	Args:
	keypoints (np.ndarray): Keypoint coordinates in shape (N, K, D).
	keypoints_visible (np.ndarray, optional): Keypoint visibilities in
	shape (N, K).
	lifting_target (np.ndarray, optional): 3d target coordinate in
	shape (K, C).
	lifting_target_visible (np.ndarray, optional): Target coordinate in
	shape (K, ).

	Returns:
	encoded (dict): Contains the following items:

	- keypoint_labels (np.ndarray): The processed keypoints in
	shape (K * D, N) where D is 2 for 2d coordinates.
	- lifting_target_label: The processed target coordinate in
	shape (K, C) or (K-1, C).
	- lifting_target_weights (np.ndarray): The target weights in
	shape (K, ) or (K-1, ).
	- trajectory_weights (np.ndarray): The trajectory weights in
	shape (K, ).
	- target_root (np.ndarray): The root coordinate of target in
	shape (C, ).

	In addition, there are some optional items it may contain:

	- target_root_removed (bool): Indicate whether the root of
	pose lifting target is removed. Added if ``self.remove_root``
	is ``True``.
	- target_root_index (int): An integer indicating the index of
	root. Added if ``self.remove_root`` and ``self.save_index``
	are ``True``.
	"""
	if keypoints_visible is None:
	keypoints_visible = np.ones(keypoints.shape[:2], dtype=np.float32)

	if lifting_target is None:
	lifting_target = keypoints[0]

	# set initial value for `lifting_target_weights`
	# and `trajectory_weights`
	if lifting_target_visible is None:
	lifting_target_visible = np.ones(
	lifting_target.shape[:-1], dtype=np.float32)
	lifting_target_weights = lifting_target_visible
	trajectory_weights = (1 / lifting_target[:, 2])
	else:
	valid = lifting_target_visible > 0.5
	lifting_target_weights = np.where(valid, 1., 0.).astype(np.float32)
	trajectory_weights = lifting_target_weights

	encoded = dict()

	# Zero-center the target pose around a given root keypoint
	assert (lifting_target.ndim >= 2 and
	lifting_target.shape[-2] > self.root_index), \
	f'Got invalid joint shape {lifting_target.shape}'

	root = lifting_target[..., self.root_index, :]
	lifting_target_label = lifting_target - root

	if self.remove_root:
	lifting_target_label = np.delete(
	lifting_target_label, self.root_index, axis=-2)
	assert lifting_target_weights.ndim in {1, 2}
	axis_to_remove = -2 if lifting_target_weights.ndim == 2 else -1
	lifting_target_weights = np.delete(
	lifting_target_weights, self.root_index, axis=axis_to_remove)
	# Add a flag to avoid latter transforms that rely on the root
	# joint or the original joint index
	encoded['target_root_removed'] = True

	# Save the root index which is necessary to restore the global pose
	if self.save_index:
	encoded['target_root_index'] = self.root_index

	# Normalize the 2D keypoint coordinate with mean and std
	keypoint_labels = keypoints.copy()
	if self.keypoints_mean is not None and self.keypoints_std is not None:
	keypoints_shape = keypoints.shape
	assert self.keypoints_mean.shape == keypoints_shape[1:]

	keypoint_labels = (keypoint_labels -
	self.keypoints_mean) / self.keypoints_std
	if self.target_mean is not None and self.target_std is not None:
	target_shape = lifting_target_label.shape
	assert self.target_mean.shape == target_shape

	lifting_target_label = (lifting_target_label -
	self.target_mean) / self.target_std

	# Generate reshaped keypoint coordinates
	assert keypoint_labels.ndim in {2, 3}
	if keypoint_labels.ndim == 2:
	keypoint_labels = keypoint_labels[None, ...]

	encoded['keypoint_labels'] = keypoint_labels
	encoded['lifting_target_label'] = lifting_target_label
	encoded['lifting_target_weights'] = lifting_target_weights
	encoded['trajectory_weights'] = trajectory_weights
	encoded['target_root'] = root

	return encoded

	def decode(self,
	encoded: np.ndarray,
	target_root: Optional[np.ndarray] = None
	) -> Tuple[np.ndarray, np.ndarray]:
	"""Decode keypoint coordinates from normalized space to input image
	space.

	Args:
	encoded (np.ndarray): Coordinates in shape (N, K, C).
	target_root (np.ndarray, optional): The target root coordinate.
	Default: ``None``.

	Returns:
	keypoints (np.ndarray): Decoded coordinates in shape (N, K, C).
	scores (np.ndarray): The keypoint scores in shape (N, K).
	"""
	keypoints = encoded.copy()

	if self.target_mean is not None and self.target_std is not None:
	assert self.target_mean.shape == keypoints.shape[1:]
	keypoints = keypoints * self.target_std + self.target_mean

	if target_root.size > 0:
	keypoints = keypoints + np.expand_dims(target_root, axis=0)
	if self.remove_root:
	keypoints = np.insert(
	keypoints, self.root_index, target_root, axis=1)
	scores = np.ones(keypoints.shape[:-1], dtype=np.float32)

	return keypoints, scores