antisomnus / model /data /mp_process.py
rnair's picture
added stuff
8ff5f44
import cv2
import math
import random
import numpy as np
import mediapipe as mp
from scipy.spatial.distance import euclidean as dist
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
# feature definitions
DIMS = (224,224,3) # dimensions of the image
RIGHT = [[33, 133], [160, 144], [159, 145], [158, 153]] # right eye landmark positions
LEFT = [[263, 362], [387, 373], [386, 374], [385, 380]] # left eye landmark positions
MOUTH = [[61, 291], [39, 181], [0, 17], [269, 405]] # mouth landmark coordinates
EYE_AR_THRESH = 0.45
PROB_THRESH = 0.3
EYE_AR_CONSEC_FRAMES = 15
MOUTH_AR_THRESH = 0.33
MOUTH_AR_CONSEC_FRAMES = 20
MP_FACE_DETECTION = mp.solutions.face_detection
MP_DRAWING = mp.solutions.drawing_utils
MP_DRAWING_STYLES = mp.solutions.drawing_styles
MP_FACE_MESH = mp.solutions.face_mesh
DRAWING_SPEC = MP_DRAWING.DrawingSpec(thickness=1, circle_radius=1)
def get_ear(landmarks,eye):
''' Calculate the ratio of the eye length to eye width.
:param landmarks: Face Landmarks returned from FaceMesh MediaPipe model
:param eye: List containing positions which correspond to the eye
:return: Eye aspect ratio value
'''
N1 = dist(landmarks[eye[1][0]], landmarks[eye[1][1]])
N2 = dist(landmarks[eye[2][0]], landmarks[eye[2][1]])
N3 = dist(landmarks[eye[3][0]], landmarks[eye[3][1]])
D = dist(landmarks[eye[0][0]], landmarks[eye[0][1]])
return (N1 + N2 + N3) / (3 * D)
def get_eye_feature(landmarks):
''' Calculate the eye feature as the average of the eye aspect ratio for the two eyes
:param landmarks: Face Landmarks returned from FaceMesh MediaPipe model
:return: Eye feature value
'''
return (get_ear(landmarks,LEFT) + get_ear(landmarks,RIGHT))
def get_mouth_feature(landmarks):
''' Calculate mouth feature as the ratio of the mouth length to mouth width
:param landmarks: Face Landmarks returned from FaceMesh MediaPipe model
:return: Mouth feature value
'''
n_1 = dist(landmarks[MOUTH[1][0]], landmarks[MOUTH[1][1]])
n_2 = dist(landmarks[MOUTH[2][0]], landmarks[MOUTH[2][1]])
n_3 = dist(landmarks[MOUTH[3][0]], landmarks[MOUTH[3][1]])
dst = dist(landmarks[MOUTH[0][0]], landmarks[MOUTH[0][1]])
return (n_1 + n_2 + n_3)/(3*dst)
# image processing
def process_mp_img(frame):
"""
returns features and/or processed image
"""
with MP_FACE_MESH.FaceMesh(
min_detection_confidence=0.3,
min_tracking_confidence=0.8) as face_mesh:
# convert the img to RGB and process it with MediaPipe Face Detection
results = face_mesh.process(cv2.cvtColor(frame,cv2.COLOR_BGR2RGB))
if results.multi_face_landmarks is not None:
landmark_pos = []
for i, data in enumerate(results.multi_face_landmarks[0].landmark):
landmark_pos.append([data.x, data.y, data.z])
landmark_pos = np.array(landmark_pos)
# draw face detections of each face
annotated_img = frame.copy()
for face_landmarks in results.multi_face_landmarks:
# Calculate eye and mouth features
eye_feature = get_eye_feature(landmark_pos)
mouth_feature = get_mouth_feature(landmark_pos)
# Binary classification: drowsy (1) or non-drowsy (0)
drowsy = (eye_feature <= EYE_AR_THRESH) or (mouth_feature > MOUTH_AR_THRESH)
# face mesh
MP_DRAWING.draw_landmarks(
image=annotated_img,
landmark_list=face_landmarks,
connections=MP_FACE_MESH.FACEMESH_TESSELATION,
landmark_drawing_spec=None,
connection_drawing_spec=MP_DRAWING_STYLES
.get_default_face_mesh_tesselation_style()
)
# eyes and mouth regions
MP_DRAWING.draw_landmarks(
image=annotated_img,
landmark_list=face_landmarks,
connections=MP_FACE_MESH.FACEMESH_CONTOURS,
landmark_drawing_spec=None,
connection_drawing_spec=MP_DRAWING_STYLES
.get_default_face_mesh_contours_style()
)
return annotated_img, eye_feature, mouth_feature, drowsy
def mediapipe_process(frames):
"""
Process all videos using MediaPipe and returns a
dictionary with the eye and mouth features in
the format {frame_number: {"eye_feature":0, "mouth_feature":0, "drowsy":0}}
"""
mp_features = {}
eye_features_all = []
mouth_features_all = []
# Extract eye and mouth features for all videos
for frame in frames:
mp_features[frame] = {"eye_feature": 0, "mouth_feature": 0, "drowsy": 0}
_,eye_feature,mouth_feature,drowsy = process_mp_img(frame)
mp_features[frame]["eye_feature"] = eye_feature
mp_features[frame]["mouth_feature"] = mouth_feature
mp_features[frame]["drowsy"] = drowsy
eye_features_all.append(eye_feature)
mouth_features_all.append(mouth_feature)
# Calculate mean and standard deviation for normalization
eye_mean, eye_std = np.mean(eye_features_all), np.std(eye_features_all)
mouth_mean, mouth_std = np.mean(mouth_features_all), np.std(mouth_features_all)
# Normalize eye and mouth features for all videos
for frame,features in mp_features.items():
features["eye_feature"] = (features["eye_feature"] - eye_mean) / eye_std
features[frame]["mouth_feature"] = (features["mouth_feature"] - mouth_mean) / mouth_std
return mp_features