Spaces:
Sleeping
Sleeping
File size: 10,401 Bytes
0ccc9b6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 |
import numpy as np
import math
class FullBodyPoseEmbedding(object):
"""Converts 3D pose landmarks into 3D embedding."""
def __init__(self, torso_size_multiplier=2.5):
# Multiplier to apply to the torso to get minimal body size.
self._torso_size_multiplier = torso_size_multiplier
# Names of the landmarks as they appear in the prediction.
self._landmark_names = [
"nose",
"left_eye_inner",
"left_eye",
"left_eye_outer",
"right_eye_inner",
"right_eye",
"right_eye_outer",
"left_ear",
"right_ear",
"mouth_left",
"mouth_right",
"left_shoulder",
"right_shoulder",
"left_elbow",
"right_elbow",
"left_wrist",
"right_wrist",
"left_pinky_1",
"right_pinky_1",
"left_index_1",
"right_index_1",
"left_thumb_2",
"right_thumb_2",
"left_hip",
"right_hip",
"left_knee",
"right_knee",
"left_ankle",
"right_ankle",
"left_heel",
"right_heel",
"left_foot_index",
"right_foot_index",
]
def __call__(self, landmarks):
"""Normalizes pose landmarks and converts to embedding
Args:
landmarks - NumPy array with 3D landmarks of shape (N, 3).
Result:
Numpy array with pose embedding of shape (M, 3) where `M` is the number of
pairwise distances defined in `_get_pose_distance_embedding`.
"""
assert landmarks.shape[0] == len(
self._landmark_names
), "Unexpected number of landmarks: {}".format(landmarks.shape[0])
# Get pose landmarks.
landmarks = np.copy(landmarks)
# Normalize landmarks.
landmarks = self._normalize_pose_landmarks(landmarks)
# Get embedding.
embedding = self._get_pose_distance_embedding(landmarks)
# Add angle embedding
embedding_angle = self._get_pose_angle_embedding(landmarks)
assert embedding.shape == embedding_angle.shape, f"Error in embeddings shape : distance embed {embedding.shape} and angle {embedding_angle.shape}"
return embedding
def _normalize_pose_landmarks(self, landmarks):
"""Normalizes landmarks translation and scale."""
landmarks = np.copy(landmarks)
# Normalize translation.
pose_center = self._get_pose_center(landmarks)
landmarks -= pose_center
# Normalize scale.
pose_size = self._get_pose_size(landmarks, self._torso_size_multiplier)
landmarks /= pose_size
# Multiplication by 100 is not required, but makes it eaasier to debug.
landmarks *= 100
return landmarks
def _get_pose_center(self, landmarks):
"""Calculates pose center as point between hips."""
left_hip = landmarks[self._landmark_names.index("left_hip")]
right_hip = landmarks[self._landmark_names.index("right_hip")]
center = (left_hip + right_hip) * 0.5
return center
def _get_pose_size(self, landmarks, torso_size_multiplier):
"""Calculates pose size.
It is the maximum of two values:
* Torso size multiplied by `torso_size_multiplier`
* Maximum distance from pose center to any pose landmark
"""
# This approach uses only 2D landmarks to compute pose size.
landmarks = landmarks[:, :2]
# Hips center.
left_hip = landmarks[self._landmark_names.index("left_hip")]
right_hip = landmarks[self._landmark_names.index("right_hip")]
hips = (left_hip + right_hip) * 0.5
# Shoulders center.
left_shoulder = landmarks[self._landmark_names.index("left_shoulder")]
right_shoulder = landmarks[self._landmark_names.index("right_shoulder")]
shoulders = (left_shoulder + right_shoulder) * 0.5
# Torso size as the minimum body size.
torso_size = np.linalg.norm(shoulders - hips)
# Max dist to pose center.
pose_center = self._get_pose_center(landmarks)
max_dist = np.max(np.linalg.norm(landmarks - pose_center, axis=1))
return max(torso_size * torso_size_multiplier, max_dist)
def _get_pose_distance_embedding(self, landmarks):
"""Converts pose landmarks into 3D embedding.
We use several pairwise 3D distances to form pose embedding. All distances
include X and Y components with sign. We differnt types of pairs to cover
different pose classes. Feel free to remove some or add new.
Args:
landmarks - NumPy array with 3D landmarks of shape (N, 3).
Result:
Numpy array with pose embedding of shape (M, 3) where `M` is the number of
pairwise distances.
"""
embedding = np.array(
[
# One joint.
self._get_distance(
self._get_average_by_names(landmarks, "left_hip", "right_hip"),
self._get_average_by_names(
landmarks, "left_shoulder", "right_shoulder"
),
),
self._get_distance_by_names(landmarks, "left_shoulder", "left_elbow"),
self._get_distance_by_names(landmarks, "right_shoulder", "right_elbow"),
self._get_distance_by_names(landmarks, "left_elbow", "left_wrist"),
self._get_distance_by_names(landmarks, "right_elbow", "right_wrist"),
self._get_distance_by_names(landmarks, "left_hip", "left_knee"),
self._get_distance_by_names(landmarks, "right_hip", "right_knee"),
self._get_distance_by_names(landmarks, "left_knee", "left_ankle"),
self._get_distance_by_names(landmarks, "right_knee", "right_ankle"),
# Two joints.
self._get_distance_by_names(landmarks, "left_shoulder", "left_wrist"),
self._get_distance_by_names(landmarks, "right_shoulder", "right_wrist"),
self._get_distance_by_names(landmarks, "left_hip", "left_ankle"),
self._get_distance_by_names(landmarks, "right_hip", "right_ankle"),
# Four joints.
self._get_distance_by_names(landmarks, "left_hip", "left_wrist"),
self._get_distance_by_names(landmarks, "right_hip", "right_wrist"),
# Five joints.
self._get_distance_by_names(landmarks, "left_shoulder", "left_ankle"),
self._get_distance_by_names(landmarks, "right_shoulder", "right_ankle"),
self._get_distance_by_names(landmarks, "left_hip", "left_wrist"),
self._get_distance_by_names(landmarks, "right_hip", "right_wrist"),
# Cross body.
self._get_distance_by_names(landmarks, "left_elbow", "right_elbow"),
self._get_distance_by_names(landmarks, "left_knee", "right_knee"),
self._get_distance_by_names(landmarks, "left_wrist", "right_wrist"),
self._get_distance_by_names(landmarks, "left_ankle", "right_ankle"),
# Body bent direction.
self._get_distance(
self._get_average_by_names(landmarks, 'left_wrist', 'left_ankle'),
landmarks[self._landmark_names.index('left_hip')]),
self._get_distance(
self._get_average_by_names(landmarks, 'right_wrist', 'right_ankle'),
landmarks[self._landmark_names.index('right_hip')])
]
)
# print(embedding)
# print(embbeding.shape)
# print(type(embedding))
# print(type(landmarks[self._landmark_names.index('right_hip')]))
# print(landmarks[self._landmark_names.index('right_hip')])
return embedding
def _get_average_by_names(self, landmarks, name_from, name_to):
lmk_from = landmarks[self._landmark_names.index(name_from)]
lmk_to = landmarks[self._landmark_names.index(name_to)]
return (lmk_from + lmk_to) * 0.5
def _get_distance_by_names(self, landmarks, name_from, name_to):
lmk_from = landmarks[self._landmark_names.index(name_from)]
lmk_to = landmarks[self._landmark_names.index(name_to)]
return self._get_distance(lmk_from, lmk_to)
def _get_distance(self, lmk_from, lmk_to):
return lmk_to - lmk_from
def _get_pose_angle_embedding(self, landmarks):
embedding = [
# Angle between landmarks - cf https://www.kaggle.com/code/venkatkumar001/yoga-pose-recognition-mediapipe
self._calculateAngle(landmarks, "left_hip", "left_knee", "left_ankle"),
self._calculateAngle(landmarks, "right_hip", "right_knee", "right_ankle"),
self._calculateAngle(landmarks, "left_shoulder", "left_elbow", "left_wrist"),
self._calculateAngle(landmarks, "right_shoulder", "right_elbow", "right_wrist")
]
return embedding
def _calculateAngle(self, landmarks, name1, name2, name3):
'''
This function calculates angle between three different landmarks.
Args:
landmark1: The first landmark containing the x,y and z coordinates.
landmark2: The second landmark containing the x,y and z coordinates.
landmark3: The third landmark containing the x,y and z coordinates.
Returns:
angle: The calculated angle between the three landmarks.
cf https://www.kaggle.com/code/venkatkumar001/yoga-pose-recognition-mediapipe
'''
# Get the required landmarks coordinates.
x1, y1, _ = landmarks[self._landmark_names.index(name1)]
x2, y2, _ = landmarks[self._landmark_names.index(name2)]
x3, y3, _ = landmarks[self._landmark_names.index(name3)]
# Calculate the angle between the three points
angle = math.degrees(math.atan2(y3 - y2, x3 - x2) - math.atan2(y1 - y2, x1 - x2))
# Check if the angle is less than zero.
if angle < 0:
# Add 360 to the found angle.
angle += 360
# Return the calculated angle.
return angle |