Spaces:
Sleeping
Sleeping
import numpy as np | |
import math | |
class FullBodyPoseEmbedding(object): | |
"""Converts 3D pose landmarks into 3D embedding.""" | |
def __init__(self, torso_size_multiplier=2.5): | |
# Multiplier to apply to the torso to get minimal body size. | |
self._torso_size_multiplier = torso_size_multiplier | |
# Names of the landmarks as they appear in the prediction. | |
self._landmark_names = [ | |
"nose", | |
"left_eye_inner", | |
"left_eye", | |
"left_eye_outer", | |
"right_eye_inner", | |
"right_eye", | |
"right_eye_outer", | |
"left_ear", | |
"right_ear", | |
"mouth_left", | |
"mouth_right", | |
"left_shoulder", | |
"right_shoulder", | |
"left_elbow", | |
"right_elbow", | |
"left_wrist", | |
"right_wrist", | |
"left_pinky_1", | |
"right_pinky_1", | |
"left_index_1", | |
"right_index_1", | |
"left_thumb_2", | |
"right_thumb_2", | |
"left_hip", | |
"right_hip", | |
"left_knee", | |
"right_knee", | |
"left_ankle", | |
"right_ankle", | |
"left_heel", | |
"right_heel", | |
"left_foot_index", | |
"right_foot_index", | |
] | |
def __call__(self, landmarks): | |
"""Normalizes pose landmarks and converts to embedding | |
Args: | |
landmarks - NumPy array with 3D landmarks of shape (N, 3). | |
Result: | |
Numpy array with pose embedding of shape (M, 3) where `M` is the number of | |
pairwise distances defined in `_get_pose_distance_embedding`. | |
""" | |
assert landmarks.shape[0] == len( | |
self._landmark_names | |
), "Unexpected number of landmarks: {}".format(landmarks.shape[0]) | |
# Get pose landmarks. | |
landmarks = np.copy(landmarks) | |
# Normalize landmarks. | |
landmarks = self._normalize_pose_landmarks(landmarks) | |
# Get embedding. | |
embedding = self._get_pose_distance_embedding(landmarks) | |
return embedding | |
def _normalize_pose_landmarks(self, landmarks): | |
"""Normalizes landmarks translation and scale.""" | |
landmarks = np.copy(landmarks) | |
# Normalize translation. | |
pose_center = self._get_pose_center(landmarks) | |
landmarks -= pose_center | |
# Normalize scale. | |
pose_size = self._get_pose_size(landmarks, self._torso_size_multiplier) | |
landmarks /= pose_size | |
# Multiplication by 100 is not required, but makes it eaasier to debug. | |
landmarks *= 100 | |
return landmarks | |
def _get_pose_center(self, landmarks): | |
"""Calculates pose center as point between hips.""" | |
left_hip = landmarks[self._landmark_names.index("left_hip")] | |
right_hip = landmarks[self._landmark_names.index("right_hip")] | |
center = (left_hip + right_hip) * 0.5 | |
return center | |
def _get_pose_size(self, landmarks, torso_size_multiplier): | |
"""Calculates pose size. | |
It is the maximum of two values: | |
* Torso size multiplied by `torso_size_multiplier` | |
* Maximum distance from pose center to any pose landmark | |
""" | |
# This approach uses only 2D landmarks to compute pose size. | |
landmarks = landmarks[:, :2] | |
# Hips center. | |
left_hip = landmarks[self._landmark_names.index("left_hip")] | |
right_hip = landmarks[self._landmark_names.index("right_hip")] | |
hips = (left_hip + right_hip) * 0.5 | |
# Shoulders center. | |
left_shoulder = landmarks[self._landmark_names.index("left_shoulder")] | |
right_shoulder = landmarks[self._landmark_names.index("right_shoulder")] | |
shoulders = (left_shoulder + right_shoulder) * 0.5 | |
# Torso size as the minimum body size. | |
torso_size = np.linalg.norm(shoulders - hips) | |
# Max dist to pose center. | |
pose_center = self._get_pose_center(landmarks) | |
max_dist = np.max(np.linalg.norm(landmarks - pose_center, axis=1)) | |
return max(torso_size * torso_size_multiplier, max_dist) | |
def _get_pose_distance_embedding(self, landmarks): | |
"""Converts pose landmarks into 3D embedding. | |
We use several pairwise 3D distances to form pose embedding. All distances | |
include X and Y components with sign. We differnt types of pairs to cover | |
different pose classes. Feel free to remove some or add new. | |
Args: | |
landmarks - NumPy array with 3D landmarks of shape (N, 3). | |
Result: | |
Numpy array with pose embedding of shape (M, 3) where `M` is the number of | |
pairwise distances. | |
""" | |
embedding = np.array( | |
[ | |
# One joint. | |
self._get_distance( | |
self._get_average_by_names(landmarks, "left_hip", "right_hip"), | |
self._get_average_by_names( | |
landmarks, "left_shoulder", "right_shoulder" | |
), | |
), | |
self._get_distance_by_names(landmarks, "left_shoulder", "left_elbow"), | |
self._get_distance_by_names(landmarks, "right_shoulder", "right_elbow"), | |
self._get_distance_by_names(landmarks, "left_elbow", "left_wrist"), | |
self._get_distance_by_names(landmarks, "right_elbow", "right_wrist"), | |
self._get_distance_by_names(landmarks, "left_hip", "left_knee"), | |
self._get_distance_by_names(landmarks, "right_hip", "right_knee"), | |
self._get_distance_by_names(landmarks, "left_knee", "left_ankle"), | |
self._get_distance_by_names(landmarks, "right_knee", "right_ankle"), | |
# Two joints. | |
self._get_distance_by_names(landmarks, "left_shoulder", "left_wrist"), | |
self._get_distance_by_names(landmarks, "right_shoulder", "right_wrist"), | |
self._get_distance_by_names(landmarks, "left_hip", "left_ankle"), | |
self._get_distance_by_names(landmarks, "right_hip", "right_ankle"), | |
# Four joints. | |
self._get_distance_by_names(landmarks, "left_hip", "left_wrist"), | |
self._get_distance_by_names(landmarks, "right_hip", "right_wrist"), | |
# Five joints. | |
self._get_distance_by_names(landmarks, "left_shoulder", "left_ankle"), | |
self._get_distance_by_names(landmarks, "right_shoulder", "right_ankle"), | |
self._get_distance_by_names(landmarks, "left_hip", "left_wrist"), | |
self._get_distance_by_names(landmarks, "right_hip", "right_wrist"), | |
# Cross body. | |
self._get_distance_by_names(landmarks, "left_elbow", "right_elbow"), | |
self._get_distance_by_names(landmarks, "left_knee", "right_knee"), | |
self._get_distance_by_names(landmarks, "left_wrist", "right_wrist"), | |
self._get_distance_by_names(landmarks, "left_ankle", "right_ankle"), | |
# Body bent direction. | |
self._get_distance( | |
self._get_average_by_names(landmarks, 'left_wrist', 'left_ankle'), | |
landmarks[self._landmark_names.index('left_hip')]), | |
self._get_distance( | |
self._get_average_by_names(landmarks, 'right_wrist', 'right_ankle'), | |
landmarks[self._landmark_names.index('right_hip')]), | |
# Angle between landmarks - cf https://www.kaggle.com/code/venkatkumar001/yoga-pose-recognition-mediapipe | |
# self._calculateAngle(landmarks, "left_hip", "left_knee", "left_ankle"), | |
# self._calculateAngle(landmarks, "right_hip", "right_knee", "right_ankle"), | |
# self._calculateAngle(landmarks, "left_shoulder", "left_elbow", "left_wrist"), | |
# self._calculateAngle(landmarks, "right_shoulder", "right_elbow", "right_wrist") | |
] | |
) | |
# print(embedding) | |
# print(embbeding.shape) | |
# print(type(embedding)) | |
# print(type(landmarks[self._landmark_names.index('right_hip')])) | |
# print(landmarks[self._landmark_names.index('right_hip')]) | |
return embedding | |
def _get_average_by_names(self, landmarks, name_from, name_to): | |
lmk_from = landmarks[self._landmark_names.index(name_from)] | |
lmk_to = landmarks[self._landmark_names.index(name_to)] | |
return (lmk_from + lmk_to) * 0.5 | |
def _get_distance_by_names(self, landmarks, name_from, name_to): | |
lmk_from = landmarks[self._landmark_names.index(name_from)] | |
lmk_to = landmarks[self._landmark_names.index(name_to)] | |
return self._get_distance(lmk_from, lmk_to) | |
def _get_distance(self, lmk_from, lmk_to): | |
return lmk_to - lmk_from | |
def _calculateAngle(self, landmarks, name1, name2, name3): | |
''' | |
This function calculates angle between three different landmarks. | |
Args: | |
landmark1: The first landmark containing the x,y and z coordinates. | |
landmark2: The second landmark containing the x,y and z coordinates. | |
landmark3: The third landmark containing the x,y and z coordinates. | |
Returns: | |
angle: The calculated angle between the three landmarks. | |
cf https://www.kaggle.com/code/venkatkumar001/yoga-pose-recognition-mediapipe | |
''' | |
# Get the required landmarks coordinates. | |
x1, y1, _ = landmarks[self._landmark_names.index(name1)] | |
x2, y2, _ = landmarks[self._landmark_names.index(name2)] | |
x3, y3, _ = landmarks[self._landmark_names.index(name3)] | |
# Calculate the angle between the three points | |
angle = math.degrees(math.atan2(y3 - y2, x3 - x2) - math.atan2(y1 - y2, x1 - x2)) | |
# Check if the angle is less than zero. | |
if angle < 0: | |
# Add 360 to the found angle. | |
angle += 360 | |
# Return the calculated angle. | |
return angle |