# SPDX-FileCopyrightText: 2025 Idiap Research Institute # SPDX-FileContributor: Anjith George # SPDX-License-Identifier: BSD-3-Clause import cv2 import numpy as np from numpy.linalg import inv, norm, lstsq, matrix_rank import mediapipe as mp # ============================================================================= # Constants # ============================================================================= REFERENCE_FACIAL_POINTS = np.array([ [38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366], [41.5493, 92.3655], [70.7299, 92.2041] ], dtype=np.float32) # ============================================================================= # Landmark Extraction # ============================================================================= mp_face_mesh = mp.solutions.face_mesh face_mesh = mp_face_mesh.FaceMesh( static_image_mode=True, refine_landmarks=True, min_detection_confidence=0.5, ) # ============================================================================= # Custom Exceptions # ============================================================================= class MatlabCp2tormException(Exception): def __str__(self): return f"In File {__file__}: {super().__str__()}" class FaceWarpException(Exception): def __str__(self): return f"In File {__file__}: {super().__str__()}" # ============================================================================= # Similarity Transform Utilities # ============================================================================= def tformfwd(trans: np.ndarray, uv: np.ndarray) -> np.ndarray: """Apply forward affine transform.""" uv_h = np.hstack((uv, np.ones((uv.shape[0], 1)))) xy = uv_h @ trans return xy[:, :-1] def tforminv(trans: np.ndarray, uv: np.ndarray) -> np.ndarray: """Apply inverse affine transform.""" return tformfwd(inv(trans), uv) def findNonreflectiveSimilarity(uv: np.ndarray, xy: np.ndarray, options: dict = None): """Find non-reflective similarity transform between uv and xy.""" K = options.get('K', 2) if options else 2 M = xy.shape[0] x, y = xy[:, 0:1], xy[:, 1:2] u, v = uv[:, 0:1], uv[:, 1:2] X = np.vstack(( np.hstack((x, y, np.ones((M, 1)), np.zeros((M, 1)))), np.hstack((y, -x, np.zeros((M, 1)), np.ones((M, 1)))) )) U = np.vstack((u, v)) if matrix_rank(X) >= 2 * K: r, _, _, _ = lstsq(X, U, rcond=None) else: raise ValueError("cp2tform:twoUniquePointsReq") sc, ss, tx, ty = r.flatten() Tinv = np.array([[sc, -ss, 0], [ss, sc, 0], [tx, ty, 1]]) T = inv(Tinv) T[:, 2] = [0, 0, 1] return T, Tinv def findSimilarity(uv: np.ndarray, xy: np.ndarray, options: dict = None): """Find similarity transform with optional reflection.""" trans1, trans1_inv = findNonreflectiveSimilarity(uv, xy, options) xyR = xy.copy() xyR[:, 0] *= -1 trans2r, _ = findNonreflectiveSimilarity(uv, xyR, options) TreflectY = np.array([[-1, 0, 0], [0, 1, 0], [0, 0, 1]]) trans2 = trans2r @ TreflectY norm1 = norm(tformfwd(trans1, uv) - xy) norm2 = norm(tformfwd(trans2, uv) - xy) return (trans1, trans1_inv) if norm1 <= norm2 else (trans2, inv(trans2)) def get_similarity_transform(src_pts, dst_pts, reflective=True): """Get similarity transform between source and destination points.""" return findSimilarity(src_pts, dst_pts) if reflective else findNonreflectiveSimilarity(src_pts, dst_pts) def cvt_tform_mat_for_cv2(trans: np.ndarray) -> np.ndarray: """Convert transformation matrix to OpenCV-compatible format.""" return trans[:, :2].T def get_similarity_transform_for_cv2(src_pts, dst_pts, reflective=True) -> np.ndarray: """Get cv2-compatible affine transform matrix.""" trans, _ = get_similarity_transform(src_pts, dst_pts, reflective) return cvt_tform_mat_for_cv2(trans) # ============================================================================= # Face Warping # ============================================================================= def warp_and_crop_face(src_img, facial_pts, reference_pts=REFERENCE_FACIAL_POINTS, crop_size=(112, 112), scale=1): """Warp and crop face using similarity transform.""" ref_pts = reference_pts * scale ref_pts += np.mean(reference_pts, axis=0) - np.mean(ref_pts, axis=0) src_pts = np.array(facial_pts, dtype=np.float32) if src_pts.shape != ref_pts.shape: raise FaceWarpException("facial_pts and reference_pts must have the same shape") tfm = get_similarity_transform_for_cv2(src_pts, ref_pts) return cv2.warpAffine(src_img, tfm, crop_size) def extract_landmarks(image) -> dict: """Extract key facial landmarks using MediaPipe.""" img_h, img_w, _ = image.shape image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image_rgb.flags.writeable = False results = face_mesh.process(image_rgb) fr_landmarks = {} if results.multi_face_landmarks: key_mapping = { 1: 'nose', 287: 'mouthright', 57: 'mouthleft', 362: 'righteye_left', 263: 'righteye_right', 33: 'lefteye_left', 243: 'lefteye_right' } for face_landmarks in results.multi_face_landmarks: for idx, lm in enumerate(face_landmarks.landmark): if idx in key_mapping: x, y = int(lm.x * img_w), int(lm.y * img_h) fr_landmarks[key_mapping[idx]] = (x, y) if 'righteye_left' in fr_landmarks and 'righteye_right' in fr_landmarks: fr_landmarks['reye'] = ( (fr_landmarks['righteye_left'][0] + fr_landmarks['righteye_right'][0]) // 2, (fr_landmarks['righteye_left'][1] + fr_landmarks['righteye_right'][1]) // 2 ) if 'lefteye_left' in fr_landmarks and 'lefteye_right' in fr_landmarks: fr_landmarks['leye'] = ( (fr_landmarks['lefteye_left'][0] + fr_landmarks['lefteye_right'][0]) // 2, (fr_landmarks['lefteye_left'][1] + fr_landmarks['lefteye_right'][1]) // 2 ) for key in ['righteye_left', 'righteye_right', 'lefteye_left', 'lefteye_right']: fr_landmarks.pop(key, None) return fr_landmarks # ============================================================================= # Face Alignment Pipeline # ============================================================================= def align_face(frame, annotations: dict, scale=1, convention="yx"): """Align face based on 5 landmarks.""" required_landmarks = ["reye", "leye", "nose", "mouthright", "mouthleft"] if not set(required_landmarks).issubset(annotations): raise ValueError("Annotations must contain required landmarks.") facial5points = [ annotations[lm][::-1] if convention == "yx" else annotations[lm] for lm in required_landmarks ] return warp_and_crop_face(frame, facial5points, scale=scale) def align_crop(image): """Extract and align face crop from an image.""" landmarks = extract_landmarks(image) if not landmarks: return None try: crop_img = align_face(image, landmarks, scale=1, convention="xy") except Exception as e: print(f"Error during face alignment: {e}") return None return crop_img