import cv2 import math import random import numpy as np import mediapipe as mp from scipy.spatial.distance import euclidean as dist from sklearn.preprocessing import LabelEncoder, MinMaxScaler # feature definitions DIMS = (224,224,3) # dimensions of the image RIGHT = [[33, 133], [160, 144], [159, 145], [158, 153]] # right eye landmark positions LEFT = [[263, 362], [387, 373], [386, 374], [385, 380]] # left eye landmark positions MOUTH = [[61, 291], [39, 181], [0, 17], [269, 405]] # mouth landmark coordinates EYE_AR_THRESH = 0.45 PROB_THRESH = 0.3 EYE_AR_CONSEC_FRAMES = 15 MOUTH_AR_THRESH = 0.33 MOUTH_AR_CONSEC_FRAMES = 20 MP_FACE_DETECTION = mp.solutions.face_detection MP_DRAWING = mp.solutions.drawing_utils MP_DRAWING_STYLES = mp.solutions.drawing_styles MP_FACE_MESH = mp.solutions.face_mesh DRAWING_SPEC = MP_DRAWING.DrawingSpec(thickness=1, circle_radius=1) def get_ear(landmarks,eye): ''' Calculate the ratio of the eye length to eye width. :param landmarks: Face Landmarks returned from FaceMesh MediaPipe model :param eye: List containing positions which correspond to the eye :return: Eye aspect ratio value ''' N1 = dist(landmarks[eye[1][0]], landmarks[eye[1][1]]) N2 = dist(landmarks[eye[2][0]], landmarks[eye[2][1]]) N3 = dist(landmarks[eye[3][0]], landmarks[eye[3][1]]) D = dist(landmarks[eye[0][0]], landmarks[eye[0][1]]) return (N1 + N2 + N3) / (3 * D) def get_eye_feature(landmarks): ''' Calculate the eye feature as the average of the eye aspect ratio for the two eyes :param landmarks: Face Landmarks returned from FaceMesh MediaPipe model :return: Eye feature value ''' return (get_ear(landmarks,LEFT) + get_ear(landmarks,RIGHT)) def get_mouth_feature(landmarks): ''' Calculate mouth feature as the ratio of the mouth length to mouth width :param landmarks: Face Landmarks returned from FaceMesh MediaPipe model :return: Mouth feature value ''' n_1 = dist(landmarks[MOUTH[1][0]], landmarks[MOUTH[1][1]]) n_2 = dist(landmarks[MOUTH[2][0]], landmarks[MOUTH[2][1]]) n_3 = dist(landmarks[MOUTH[3][0]], landmarks[MOUTH[3][1]]) dst = dist(landmarks[MOUTH[0][0]], landmarks[MOUTH[0][1]]) return (n_1 + n_2 + n_3)/(3*dst) # image processing def process_mp_img(frame): """ returns features and/or processed image """ with MP_FACE_MESH.FaceMesh( min_detection_confidence=0.3, min_tracking_confidence=0.8) as face_mesh: # convert the img to RGB and process it with MediaPipe Face Detection results = face_mesh.process(cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)) if results.multi_face_landmarks is not None: landmark_pos = [] for i, data in enumerate(results.multi_face_landmarks[0].landmark): landmark_pos.append([data.x, data.y, data.z]) landmark_pos = np.array(landmark_pos) # draw face detections of each face annotated_img = frame.copy() for face_landmarks in results.multi_face_landmarks: # Calculate eye and mouth features eye_feature = get_eye_feature(landmark_pos) mouth_feature = get_mouth_feature(landmark_pos) # Binary classification: drowsy (1) or non-drowsy (0) drowsy = (eye_feature <= EYE_AR_THRESH) or (mouth_feature > MOUTH_AR_THRESH) # face mesh MP_DRAWING.draw_landmarks( image=annotated_img, landmark_list=face_landmarks, connections=MP_FACE_MESH.FACEMESH_TESSELATION, landmark_drawing_spec=None, connection_drawing_spec=MP_DRAWING_STYLES .get_default_face_mesh_tesselation_style() ) # eyes and mouth regions MP_DRAWING.draw_landmarks( image=annotated_img, landmark_list=face_landmarks, connections=MP_FACE_MESH.FACEMESH_CONTOURS, landmark_drawing_spec=None, connection_drawing_spec=MP_DRAWING_STYLES .get_default_face_mesh_contours_style() ) return annotated_img, eye_feature, mouth_feature, drowsy def mediapipe_process(frames): """ Process all videos using MediaPipe and returns a dictionary with the eye and mouth features in the format {frame_number: {"eye_feature":0, "mouth_feature":0, "drowsy":0}} """ mp_features = {} eye_features_all = [] mouth_features_all = [] # Extract eye and mouth features for all videos for frame in frames: mp_features[frame] = {"eye_feature": 0, "mouth_feature": 0, "drowsy": 0} _,eye_feature,mouth_feature,drowsy = process_mp_img(frame) mp_features[frame]["eye_feature"] = eye_feature mp_features[frame]["mouth_feature"] = mouth_feature mp_features[frame]["drowsy"] = drowsy eye_features_all.append(eye_feature) mouth_features_all.append(mouth_feature) # Calculate mean and standard deviation for normalization eye_mean, eye_std = np.mean(eye_features_all), np.std(eye_features_all) mouth_mean, mouth_std = np.mean(mouth_features_all), np.std(mouth_features_all) # Normalize eye and mouth features for all videos for frame,features in mp_features.items(): features["eye_feature"] = (features["eye_feature"] - eye_mean) / eye_std features[frame]["mouth_feature"] = (features["mouth_feature"] - mouth_mean) / mouth_std return mp_features