Spaces:
Running
Running
# SPDX-FileCopyrightText: 2025 Idiap Research Institute | |
# SPDX-FileContributor: Anjith George | |
# SPDX-License-Identifier: BSD-3-Clause | |
import cv2 | |
import numpy as np | |
from numpy.linalg import inv, norm, lstsq, matrix_rank | |
import mediapipe as mp | |
# ============================================================================= | |
# Constants | |
# ============================================================================= | |
REFERENCE_FACIAL_POINTS = np.array([ | |
[38.2946, 51.6963], | |
[73.5318, 51.5014], | |
[56.0252, 71.7366], | |
[41.5493, 92.3655], | |
[70.7299, 92.2041] | |
], dtype=np.float32) | |
# ============================================================================= | |
# Landmark Extraction | |
# ============================================================================= | |
mp_face_mesh = mp.solutions.face_mesh | |
face_mesh = mp_face_mesh.FaceMesh( | |
static_image_mode=True, | |
refine_landmarks=True, | |
min_detection_confidence=0.5, | |
) | |
# ============================================================================= | |
# Custom Exceptions | |
# ============================================================================= | |
class MatlabCp2tormException(Exception): | |
def __str__(self): | |
return f"In File {__file__}: {super().__str__()}" | |
class FaceWarpException(Exception): | |
def __str__(self): | |
return f"In File {__file__}: {super().__str__()}" | |
# ============================================================================= | |
# Similarity Transform Utilities | |
# ============================================================================= | |
def tformfwd(trans: np.ndarray, uv: np.ndarray) -> np.ndarray: | |
"""Apply forward affine transform.""" | |
uv_h = np.hstack((uv, np.ones((uv.shape[0], 1)))) | |
xy = uv_h @ trans | |
return xy[:, :-1] | |
def tforminv(trans: np.ndarray, uv: np.ndarray) -> np.ndarray: | |
"""Apply inverse affine transform.""" | |
return tformfwd(inv(trans), uv) | |
def findNonreflectiveSimilarity(uv: np.ndarray, xy: np.ndarray, options: dict = None): | |
"""Find non-reflective similarity transform between uv and xy.""" | |
K = options.get('K', 2) if options else 2 | |
M = xy.shape[0] | |
x, y = xy[:, 0:1], xy[:, 1:2] | |
u, v = uv[:, 0:1], uv[:, 1:2] | |
X = np.vstack(( | |
np.hstack((x, y, np.ones((M, 1)), np.zeros((M, 1)))), | |
np.hstack((y, -x, np.zeros((M, 1)), np.ones((M, 1)))) | |
)) | |
U = np.vstack((u, v)) | |
if matrix_rank(X) >= 2 * K: | |
r, _, _, _ = lstsq(X, U, rcond=None) | |
else: | |
raise ValueError("cp2tform:twoUniquePointsReq") | |
sc, ss, tx, ty = r.flatten() | |
Tinv = np.array([[sc, -ss, 0], [ss, sc, 0], [tx, ty, 1]]) | |
T = inv(Tinv) | |
T[:, 2] = [0, 0, 1] | |
return T, Tinv | |
def findSimilarity(uv: np.ndarray, xy: np.ndarray, options: dict = None): | |
"""Find similarity transform with optional reflection.""" | |
trans1, trans1_inv = findNonreflectiveSimilarity(uv, xy, options) | |
xyR = xy.copy() | |
xyR[:, 0] *= -1 | |
trans2r, _ = findNonreflectiveSimilarity(uv, xyR, options) | |
TreflectY = np.array([[-1, 0, 0], [0, 1, 0], [0, 0, 1]]) | |
trans2 = trans2r @ TreflectY | |
norm1 = norm(tformfwd(trans1, uv) - xy) | |
norm2 = norm(tformfwd(trans2, uv) - xy) | |
return (trans1, trans1_inv) if norm1 <= norm2 else (trans2, inv(trans2)) | |
def get_similarity_transform(src_pts, dst_pts, reflective=True): | |
"""Get similarity transform between source and destination points.""" | |
return findSimilarity(src_pts, dst_pts) if reflective else findNonreflectiveSimilarity(src_pts, dst_pts) | |
def cvt_tform_mat_for_cv2(trans: np.ndarray) -> np.ndarray: | |
"""Convert transformation matrix to OpenCV-compatible format.""" | |
return trans[:, :2].T | |
def get_similarity_transform_for_cv2(src_pts, dst_pts, reflective=True) -> np.ndarray: | |
"""Get cv2-compatible affine transform matrix.""" | |
trans, _ = get_similarity_transform(src_pts, dst_pts, reflective) | |
return cvt_tform_mat_for_cv2(trans) | |
# ============================================================================= | |
# Face Warping | |
# ============================================================================= | |
def warp_and_crop_face(src_img, facial_pts, reference_pts=REFERENCE_FACIAL_POINTS, crop_size=(112, 112), scale=1): | |
"""Warp and crop face using similarity transform.""" | |
ref_pts = reference_pts * scale | |
ref_pts += np.mean(reference_pts, axis=0) - np.mean(ref_pts, axis=0) | |
src_pts = np.array(facial_pts, dtype=np.float32) | |
if src_pts.shape != ref_pts.shape: | |
raise FaceWarpException("facial_pts and reference_pts must have the same shape") | |
tfm = get_similarity_transform_for_cv2(src_pts, ref_pts) | |
return cv2.warpAffine(src_img, tfm, crop_size) | |
def extract_landmarks(image) -> dict: | |
"""Extract key facial landmarks using MediaPipe.""" | |
img_h, img_w, _ = image.shape | |
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
image_rgb.flags.writeable = False | |
results = face_mesh.process(image_rgb) | |
fr_landmarks = {} | |
if results.multi_face_landmarks: | |
key_mapping = { | |
1: 'nose', | |
287: 'mouthright', | |
57: 'mouthleft', | |
362: 'righteye_left', | |
263: 'righteye_right', | |
33: 'lefteye_left', | |
243: 'lefteye_right' | |
} | |
for face_landmarks in results.multi_face_landmarks: | |
for idx, lm in enumerate(face_landmarks.landmark): | |
if idx in key_mapping: | |
x, y = int(lm.x * img_w), int(lm.y * img_h) | |
fr_landmarks[key_mapping[idx]] = (x, y) | |
if 'righteye_left' in fr_landmarks and 'righteye_right' in fr_landmarks: | |
fr_landmarks['reye'] = ( | |
(fr_landmarks['righteye_left'][0] + fr_landmarks['righteye_right'][0]) // 2, | |
(fr_landmarks['righteye_left'][1] + fr_landmarks['righteye_right'][1]) // 2 | |
) | |
if 'lefteye_left' in fr_landmarks and 'lefteye_right' in fr_landmarks: | |
fr_landmarks['leye'] = ( | |
(fr_landmarks['lefteye_left'][0] + fr_landmarks['lefteye_right'][0]) // 2, | |
(fr_landmarks['lefteye_left'][1] + fr_landmarks['lefteye_right'][1]) // 2 | |
) | |
for key in ['righteye_left', 'righteye_right', 'lefteye_left', 'lefteye_right']: | |
fr_landmarks.pop(key, None) | |
return fr_landmarks | |
# ============================================================================= | |
# Face Alignment Pipeline | |
# ============================================================================= | |
def align_face(frame, annotations: dict, scale=1, convention="yx"): | |
"""Align face based on 5 landmarks.""" | |
required_landmarks = ["reye", "leye", "nose", "mouthright", "mouthleft"] | |
if not set(required_landmarks).issubset(annotations): | |
raise ValueError("Annotations must contain required landmarks.") | |
facial5points = [ | |
annotations[lm][::-1] if convention == "yx" else annotations[lm] | |
for lm in required_landmarks | |
] | |
return warp_and_crop_face(frame, facial5points, scale=scale) | |
def align_crop(image): | |
"""Extract and align face crop from an image.""" | |
landmarks = extract_landmarks(image) | |
if not landmarks: | |
return None | |
try: | |
crop_img = align_face(image, landmarks, scale=1, convention="xy") | |
except Exception as e: | |
print(f"Error during face alignment: {e}") | |
return None | |
return crop_img | |