YOGAI / PoseClassification /pose_embedding.py
1mpreccable's picture
Upload 35 files
0ccc9b6 verified
raw
history blame
10 kB
import numpy as np
import math
class FullBodyPoseEmbedding(object):
"""Converts 3D pose landmarks into 3D embedding."""
def __init__(self, torso_size_multiplier=2.5):
# Multiplier to apply to the torso to get minimal body size.
self._torso_size_multiplier = torso_size_multiplier
# Names of the landmarks as they appear in the prediction.
self._landmark_names = [
"nose",
"left_eye_inner",
"left_eye",
"left_eye_outer",
"right_eye_inner",
"right_eye",
"right_eye_outer",
"left_ear",
"right_ear",
"mouth_left",
"mouth_right",
"left_shoulder",
"right_shoulder",
"left_elbow",
"right_elbow",
"left_wrist",
"right_wrist",
"left_pinky_1",
"right_pinky_1",
"left_index_1",
"right_index_1",
"left_thumb_2",
"right_thumb_2",
"left_hip",
"right_hip",
"left_knee",
"right_knee",
"left_ankle",
"right_ankle",
"left_heel",
"right_heel",
"left_foot_index",
"right_foot_index",
]
def __call__(self, landmarks):
"""Normalizes pose landmarks and converts to embedding
Args:
landmarks - NumPy array with 3D landmarks of shape (N, 3).
Result:
Numpy array with pose embedding of shape (M, 3) where `M` is the number of
pairwise distances defined in `_get_pose_distance_embedding`.
"""
assert landmarks.shape[0] == len(
self._landmark_names
), "Unexpected number of landmarks: {}".format(landmarks.shape[0])
# Get pose landmarks.
landmarks = np.copy(landmarks)
# Normalize landmarks.
landmarks = self._normalize_pose_landmarks(landmarks)
# Get embedding.
embedding = self._get_pose_distance_embedding(landmarks)
return embedding
def _normalize_pose_landmarks(self, landmarks):
"""Normalizes landmarks translation and scale."""
landmarks = np.copy(landmarks)
# Normalize translation.
pose_center = self._get_pose_center(landmarks)
landmarks -= pose_center
# Normalize scale.
pose_size = self._get_pose_size(landmarks, self._torso_size_multiplier)
landmarks /= pose_size
# Multiplication by 100 is not required, but makes it eaasier to debug.
landmarks *= 100
return landmarks
def _get_pose_center(self, landmarks):
"""Calculates pose center as point between hips."""
left_hip = landmarks[self._landmark_names.index("left_hip")]
right_hip = landmarks[self._landmark_names.index("right_hip")]
center = (left_hip + right_hip) * 0.5
return center
def _get_pose_size(self, landmarks, torso_size_multiplier):
"""Calculates pose size.
It is the maximum of two values:
* Torso size multiplied by `torso_size_multiplier`
* Maximum distance from pose center to any pose landmark
"""
# This approach uses only 2D landmarks to compute pose size.
landmarks = landmarks[:, :2]
# Hips center.
left_hip = landmarks[self._landmark_names.index("left_hip")]
right_hip = landmarks[self._landmark_names.index("right_hip")]
hips = (left_hip + right_hip) * 0.5
# Shoulders center.
left_shoulder = landmarks[self._landmark_names.index("left_shoulder")]
right_shoulder = landmarks[self._landmark_names.index("right_shoulder")]
shoulders = (left_shoulder + right_shoulder) * 0.5
# Torso size as the minimum body size.
torso_size = np.linalg.norm(shoulders - hips)
# Max dist to pose center.
pose_center = self._get_pose_center(landmarks)
max_dist = np.max(np.linalg.norm(landmarks - pose_center, axis=1))
return max(torso_size * torso_size_multiplier, max_dist)
def _get_pose_distance_embedding(self, landmarks):
"""Converts pose landmarks into 3D embedding.
We use several pairwise 3D distances to form pose embedding. All distances
include X and Y components with sign. We differnt types of pairs to cover
different pose classes. Feel free to remove some or add new.
Args:
landmarks - NumPy array with 3D landmarks of shape (N, 3).
Result:
Numpy array with pose embedding of shape (M, 3) where `M` is the number of
pairwise distances.
"""
embedding = np.array(
[
# One joint.
self._get_distance(
self._get_average_by_names(landmarks, "left_hip", "right_hip"),
self._get_average_by_names(
landmarks, "left_shoulder", "right_shoulder"
),
),
self._get_distance_by_names(landmarks, "left_shoulder", "left_elbow"),
self._get_distance_by_names(landmarks, "right_shoulder", "right_elbow"),
self._get_distance_by_names(landmarks, "left_elbow", "left_wrist"),
self._get_distance_by_names(landmarks, "right_elbow", "right_wrist"),
self._get_distance_by_names(landmarks, "left_hip", "left_knee"),
self._get_distance_by_names(landmarks, "right_hip", "right_knee"),
self._get_distance_by_names(landmarks, "left_knee", "left_ankle"),
self._get_distance_by_names(landmarks, "right_knee", "right_ankle"),
# Two joints.
self._get_distance_by_names(landmarks, "left_shoulder", "left_wrist"),
self._get_distance_by_names(landmarks, "right_shoulder", "right_wrist"),
self._get_distance_by_names(landmarks, "left_hip", "left_ankle"),
self._get_distance_by_names(landmarks, "right_hip", "right_ankle"),
# Four joints.
self._get_distance_by_names(landmarks, "left_hip", "left_wrist"),
self._get_distance_by_names(landmarks, "right_hip", "right_wrist"),
# Five joints.
self._get_distance_by_names(landmarks, "left_shoulder", "left_ankle"),
self._get_distance_by_names(landmarks, "right_shoulder", "right_ankle"),
self._get_distance_by_names(landmarks, "left_hip", "left_wrist"),
self._get_distance_by_names(landmarks, "right_hip", "right_wrist"),
# Cross body.
self._get_distance_by_names(landmarks, "left_elbow", "right_elbow"),
self._get_distance_by_names(landmarks, "left_knee", "right_knee"),
self._get_distance_by_names(landmarks, "left_wrist", "right_wrist"),
self._get_distance_by_names(landmarks, "left_ankle", "right_ankle"),
# Body bent direction.
self._get_distance(
self._get_average_by_names(landmarks, 'left_wrist', 'left_ankle'),
landmarks[self._landmark_names.index('left_hip')]),
self._get_distance(
self._get_average_by_names(landmarks, 'right_wrist', 'right_ankle'),
landmarks[self._landmark_names.index('right_hip')]),
# Angle between landmarks - cf https://www.kaggle.com/code/venkatkumar001/yoga-pose-recognition-mediapipe
# self._calculateAngle(landmarks, "left_hip", "left_knee", "left_ankle"),
# self._calculateAngle(landmarks, "right_hip", "right_knee", "right_ankle"),
# self._calculateAngle(landmarks, "left_shoulder", "left_elbow", "left_wrist"),
# self._calculateAngle(landmarks, "right_shoulder", "right_elbow", "right_wrist")
]
)
# print(embedding)
# print(embbeding.shape)
# print(type(embedding))
# print(type(landmarks[self._landmark_names.index('right_hip')]))
# print(landmarks[self._landmark_names.index('right_hip')])
return embedding
def _get_average_by_names(self, landmarks, name_from, name_to):
lmk_from = landmarks[self._landmark_names.index(name_from)]
lmk_to = landmarks[self._landmark_names.index(name_to)]
return (lmk_from + lmk_to) * 0.5
def _get_distance_by_names(self, landmarks, name_from, name_to):
lmk_from = landmarks[self._landmark_names.index(name_from)]
lmk_to = landmarks[self._landmark_names.index(name_to)]
return self._get_distance(lmk_from, lmk_to)
def _get_distance(self, lmk_from, lmk_to):
return lmk_to - lmk_from
def _calculateAngle(self, landmarks, name1, name2, name3):
'''
This function calculates angle between three different landmarks.
Args:
landmark1: The first landmark containing the x,y and z coordinates.
landmark2: The second landmark containing the x,y and z coordinates.
landmark3: The third landmark containing the x,y and z coordinates.
Returns:
angle: The calculated angle between the three landmarks.
cf https://www.kaggle.com/code/venkatkumar001/yoga-pose-recognition-mediapipe
'''
# Get the required landmarks coordinates.
x1, y1, _ = landmarks[self._landmark_names.index(name1)]
x2, y2, _ = landmarks[self._landmark_names.index(name2)]
x3, y3, _ = landmarks[self._landmark_names.index(name3)]
# Calculate the angle between the three points
angle = math.degrees(math.atan2(y3 - y2, x3 - x2) - math.atan2(y1 - y2, x1 - x2))
# Check if the angle is less than zero.
if angle < 0:
# Add 360 to the found angle.
angle += 360
# Return the calculated angle.
return angle