k22056537 commited on
Commit
76adc7f
·
1 Parent(s): d582dbd

feat: stage 2 head pose, eye behaviour, MAR/yawn, tighter focus; add torch deps

Browse files
models/eye_behaviour/eye_attention_model.py CHANGED
@@ -1 +1,48 @@
1
- # stub
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MobileNetV2 eye attention classifier (attentive vs inattentive)
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+ import torchvision.models as models
6
+
7
+
8
+ class EyeAttentionModel(nn.Module):
9
+ def __init__(
10
+ self,
11
+ pretrained: bool = True,
12
+ dropout1: float = 0.3,
13
+ dropout2: float = 0.2,
14
+ ):
15
+ super().__init__()
16
+
17
+ weights = models.MobileNet_V2_Weights.DEFAULT if pretrained else None
18
+ backbone = models.mobilenet_v2(weights=weights)
19
+
20
+ self.features = backbone.features
21
+ self.pool = nn.AdaptiveAvgPool2d(1)
22
+ self.classifier = nn.Sequential(
23
+ nn.Dropout(dropout1),
24
+ nn.Linear(1280, 256),
25
+ nn.ReLU(),
26
+ nn.Dropout(dropout2),
27
+ nn.Linear(256, 2),
28
+ )
29
+
30
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
31
+ x = self.features(x)
32
+ x = self.pool(x).flatten(1)
33
+ return self.classifier(x)
34
+
35
+ def predict_score(self, x: torch.Tensor) -> torch.Tensor:
36
+ logits = self.forward(x)
37
+ probs = torch.softmax(logits, dim=1)
38
+ return probs[:, 1]
39
+
40
+ def freeze_backbone(self):
41
+ for param in self.features.parameters():
42
+ param.requires_grad = False
43
+
44
+ def unfreeze_last_blocks(self, n: int = 4):
45
+ total_blocks = len(self.features)
46
+ for i in range(max(0, total_blocks - n), total_blocks):
47
+ for param in self.features[i].parameters():
48
+ param.requires_grad = True
models/eye_behaviour/eye_crop.py CHANGED
@@ -1 +1,70 @@
1
- # stub
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Eye region extraction from Face Mesh landmarks
2
+
3
+ import cv2
4
+ import numpy as np
5
+
6
+ LEFT_EYE_CONTOUR = [33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161, 246]
7
+ RIGHT_EYE_CONTOUR = [362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385, 384, 398]
8
+
9
+ IMAGENET_MEAN = (0.485, 0.456, 0.406)
10
+ IMAGENET_STD = (0.229, 0.224, 0.225)
11
+
12
+ CROP_SIZE = 96
13
+
14
+
15
+ def _bbox_from_landmarks(
16
+ landmarks: np.ndarray,
17
+ indices: list[int],
18
+ frame_w: int,
19
+ frame_h: int,
20
+ expand: float = 0.4,
21
+ ) -> tuple[int, int, int, int]:
22
+ pts = landmarks[indices, :2]
23
+ px = pts[:, 0] * frame_w
24
+ py = pts[:, 1] * frame_h
25
+
26
+ x_min, x_max = px.min(), px.max()
27
+ y_min, y_max = py.min(), py.max()
28
+ w = x_max - x_min
29
+ h = y_max - y_min
30
+ cx = (x_min + x_max) / 2
31
+ cy = (y_min + y_max) / 2
32
+
33
+ size = max(w, h) * (1 + expand)
34
+ half = size / 2
35
+
36
+ x1 = int(max(cx - half, 0))
37
+ y1 = int(max(cy - half, 0))
38
+ x2 = int(min(cx + half, frame_w))
39
+ y2 = int(min(cy + half, frame_h))
40
+
41
+ return x1, y1, x2, y2
42
+
43
+
44
+ def extract_eye_crops(
45
+ frame: np.ndarray,
46
+ landmarks: np.ndarray,
47
+ expand: float = 0.4,
48
+ crop_size: int = CROP_SIZE,
49
+ ) -> tuple[np.ndarray, np.ndarray, tuple, tuple]:
50
+ h, w = frame.shape[:2]
51
+
52
+ left_bbox = _bbox_from_landmarks(landmarks, LEFT_EYE_CONTOUR, w, h, expand)
53
+ right_bbox = _bbox_from_landmarks(landmarks, RIGHT_EYE_CONTOUR, w, h, expand)
54
+
55
+ left_crop = frame[left_bbox[1] : left_bbox[3], left_bbox[0] : left_bbox[2]]
56
+ right_crop = frame[right_bbox[1] : right_bbox[3], right_bbox[0] : right_bbox[2]]
57
+
58
+ left_crop = cv2.resize(left_crop, (crop_size, crop_size), interpolation=cv2.INTER_AREA)
59
+ right_crop = cv2.resize(right_crop, (crop_size, crop_size), interpolation=cv2.INTER_AREA)
60
+
61
+ return left_crop, right_crop, left_bbox, right_bbox
62
+
63
+
64
+ def crop_to_tensor(crop_bgr: np.ndarray):
65
+ import torch
66
+
67
+ rgb = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
68
+ for c in range(3):
69
+ rgb[:, :, c] = (rgb[:, :, c] - IMAGENET_MEAN[c]) / IMAGENET_STD[c]
70
+ return torch.from_numpy(rgb.transpose(2, 0, 1))
models/eye_behaviour/eye_scorer.py CHANGED
@@ -1 +1,167 @@
1
- # stub
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # EAR + gaze from landmarks -> S_eye (no model)
2
+
3
+ import math
4
+
5
+ import numpy as np
6
+
7
+ _LEFT_EYE_EAR = [33, 160, 158, 133, 153, 145]
8
+ _RIGHT_EYE_EAR = [362, 385, 387, 263, 373, 380]
9
+
10
+ _LEFT_IRIS_CENTER = 468
11
+ _RIGHT_IRIS_CENTER = 473
12
+
13
+ _LEFT_EYE_INNER = 133
14
+ _LEFT_EYE_OUTER = 33
15
+ _RIGHT_EYE_INNER = 362
16
+ _RIGHT_EYE_OUTER = 263
17
+
18
+ _LEFT_EYE_TOP = 159
19
+ _LEFT_EYE_BOTTOM = 145
20
+ _RIGHT_EYE_TOP = 386
21
+ _RIGHT_EYE_BOTTOM = 374
22
+
23
+ # Mouth (MAR) — inner lip landmarks
24
+ _MOUTH_TOP = 13
25
+ _MOUTH_BOTTOM = 14
26
+ _MOUTH_LEFT = 78
27
+ _MOUTH_RIGHT = 308
28
+ _MOUTH_UPPER_1 = 82
29
+ _MOUTH_UPPER_2 = 312
30
+ _MOUTH_LOWER_1 = 87
31
+ _MOUTH_LOWER_2 = 317
32
+
33
+ MAR_YAWN_THRESHOLD = 0.55 # MAR above this = mouth open (e.g. yawning / sleepy)
34
+
35
+
36
+ def _distance(p1: np.ndarray, p2: np.ndarray) -> float:
37
+ return float(np.linalg.norm(p1 - p2))
38
+
39
+
40
+ def compute_ear(landmarks: np.ndarray, eye_indices: list[int]) -> float:
41
+ p1 = landmarks[eye_indices[0], :2]
42
+ p2 = landmarks[eye_indices[1], :2]
43
+ p3 = landmarks[eye_indices[2], :2]
44
+ p4 = landmarks[eye_indices[3], :2]
45
+ p5 = landmarks[eye_indices[4], :2]
46
+ p6 = landmarks[eye_indices[5], :2]
47
+
48
+ vertical1 = _distance(p2, p6)
49
+ vertical2 = _distance(p3, p5)
50
+ horizontal = _distance(p1, p4)
51
+
52
+ if horizontal < 1e-6:
53
+ return 0.0
54
+
55
+ return (vertical1 + vertical2) / (2.0 * horizontal)
56
+
57
+
58
+ def compute_avg_ear(landmarks: np.ndarray) -> float:
59
+ left_ear = compute_ear(landmarks, _LEFT_EYE_EAR)
60
+ right_ear = compute_ear(landmarks, _RIGHT_EYE_EAR)
61
+ return (left_ear + right_ear) / 2.0
62
+
63
+
64
+ def compute_gaze_ratio(landmarks: np.ndarray) -> tuple[float, float]:
65
+ left_iris = landmarks[_LEFT_IRIS_CENTER, :2]
66
+ left_inner = landmarks[_LEFT_EYE_INNER, :2]
67
+ left_outer = landmarks[_LEFT_EYE_OUTER, :2]
68
+ left_top = landmarks[_LEFT_EYE_TOP, :2]
69
+ left_bottom = landmarks[_LEFT_EYE_BOTTOM, :2]
70
+
71
+ right_iris = landmarks[_RIGHT_IRIS_CENTER, :2]
72
+ right_inner = landmarks[_RIGHT_EYE_INNER, :2]
73
+ right_outer = landmarks[_RIGHT_EYE_OUTER, :2]
74
+ right_top = landmarks[_RIGHT_EYE_TOP, :2]
75
+ right_bottom = landmarks[_RIGHT_EYE_BOTTOM, :2]
76
+
77
+ left_h_total = _distance(left_inner, left_outer)
78
+ right_h_total = _distance(right_inner, right_outer)
79
+
80
+ if left_h_total < 1e-6 or right_h_total < 1e-6:
81
+ return 0.5, 0.5
82
+
83
+ left_h_ratio = _distance(left_outer, left_iris) / left_h_total
84
+ right_h_ratio = _distance(right_outer, right_iris) / right_h_total
85
+ h_ratio = (left_h_ratio + right_h_ratio) / 2.0
86
+
87
+ left_v_total = _distance(left_top, left_bottom)
88
+ right_v_total = _distance(right_top, right_bottom)
89
+
90
+ if left_v_total < 1e-6 or right_v_total < 1e-6:
91
+ return h_ratio, 0.5
92
+
93
+ left_v_ratio = _distance(left_top, left_iris) / left_v_total
94
+ right_v_ratio = _distance(right_top, right_iris) / right_v_total
95
+ v_ratio = (left_v_ratio + right_v_ratio) / 2.0
96
+
97
+ return float(np.clip(h_ratio, 0, 1)), float(np.clip(v_ratio, 0, 1))
98
+
99
+
100
+ def compute_mar(landmarks: np.ndarray) -> float:
101
+ # Mouth aspect ratio: high = mouth open (yawning / sleepy)
102
+ top = landmarks[_MOUTH_TOP, :2]
103
+ bottom = landmarks[_MOUTH_BOTTOM, :2]
104
+ left = landmarks[_MOUTH_LEFT, :2]
105
+ right = landmarks[_MOUTH_RIGHT, :2]
106
+ upper1 = landmarks[_MOUTH_UPPER_1, :2]
107
+ lower1 = landmarks[_MOUTH_LOWER_1, :2]
108
+ upper2 = landmarks[_MOUTH_UPPER_2, :2]
109
+ lower2 = landmarks[_MOUTH_LOWER_2, :2]
110
+
111
+ horizontal = _distance(left, right)
112
+ if horizontal < 1e-6:
113
+ return 0.0
114
+ v1 = _distance(upper1, lower1)
115
+ v2 = _distance(top, bottom)
116
+ v3 = _distance(upper2, lower2)
117
+ return (v1 + v2 + v3) / (2.0 * horizontal)
118
+
119
+
120
+ class EyeBehaviourScorer:
121
+ def __init__(
122
+ self,
123
+ ear_open: float = 0.30,
124
+ ear_closed: float = 0.16,
125
+ gaze_max_offset: float = 0.28,
126
+ ):
127
+ self.ear_open = ear_open
128
+ self.ear_closed = ear_closed
129
+ self.gaze_max_offset = gaze_max_offset
130
+
131
+ def _ear_score(self, ear: float) -> float:
132
+ if ear >= self.ear_open:
133
+ return 1.0
134
+ if ear <= self.ear_closed:
135
+ return 0.0
136
+ return (ear - self.ear_closed) / (self.ear_open - self.ear_closed)
137
+
138
+ def _gaze_score(self, h_ratio: float, v_ratio: float) -> float:
139
+ h_offset = abs(h_ratio - 0.5)
140
+ v_offset = abs(v_ratio - 0.5)
141
+ offset = math.sqrt(h_offset**2 + v_offset**2)
142
+ t = min(offset / self.gaze_max_offset, 1.0)
143
+ return 0.5 * (1.0 + math.cos(math.pi * t))
144
+
145
+ def score(self, landmarks: np.ndarray) -> float:
146
+ ear = compute_avg_ear(landmarks)
147
+ ear_s = self._ear_score(ear)
148
+ if ear_s < 0.3:
149
+ return ear_s
150
+ h_ratio, v_ratio = compute_gaze_ratio(landmarks)
151
+ gaze_s = self._gaze_score(h_ratio, v_ratio)
152
+ return ear_s * gaze_s
153
+
154
+ def detailed_score(self, landmarks: np.ndarray) -> dict:
155
+ ear = compute_avg_ear(landmarks)
156
+ ear_s = self._ear_score(ear)
157
+ h_ratio, v_ratio = compute_gaze_ratio(landmarks)
158
+ gaze_s = self._gaze_score(h_ratio, v_ratio)
159
+ s_eye = ear_s if ear_s < 0.3 else ear_s * gaze_s
160
+ return {
161
+ "ear": round(ear, 4),
162
+ "ear_score": round(ear_s, 4),
163
+ "h_gaze": round(h_ratio, 4),
164
+ "v_gaze": round(v_ratio, 4),
165
+ "gaze_score": round(gaze_s, 4),
166
+ "s_eye": round(s_eye, 4),
167
+ }
models/face_orientation/head_pose.py CHANGED
@@ -1 +1,114 @@
1
- # stub
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Head pose from 6 Face Mesh landmarks (solvePnP) -> yaw/pitch/roll, S_face
2
+
3
+ import math
4
+
5
+ import cv2
6
+ import numpy as np
7
+
8
+ _LANDMARK_INDICES = [1, 152, 33, 263, 61, 291]
9
+
10
+ _MODEL_POINTS = np.array(
11
+ [
12
+ [0.0, 0.0, 0.0],
13
+ [0.0, -330.0, -65.0],
14
+ [-225.0, 170.0, -135.0],
15
+ [225.0, 170.0, -135.0],
16
+ [-150.0, -150.0, -125.0],
17
+ [150.0, -150.0, -125.0],
18
+ ],
19
+ dtype=np.float64,
20
+ )
21
+
22
+
23
+ class HeadPoseEstimator:
24
+ def __init__(self, max_angle: float = 30.0, roll_weight: float = 0.5):
25
+ self.max_angle = max_angle
26
+ self.roll_weight = roll_weight
27
+ self._camera_matrix = None
28
+ self._frame_size = None
29
+ self._dist_coeffs = np.zeros((4, 1), dtype=np.float64)
30
+
31
+ def _get_camera_matrix(self, frame_w: int, frame_h: int) -> np.ndarray:
32
+ if self._camera_matrix is not None and self._frame_size == (frame_w, frame_h):
33
+ return self._camera_matrix
34
+ focal_length = float(frame_w)
35
+ cx, cy = frame_w / 2.0, frame_h / 2.0
36
+ self._camera_matrix = np.array(
37
+ [[focal_length, 0, cx], [0, focal_length, cy], [0, 0, 1]],
38
+ dtype=np.float64,
39
+ )
40
+ self._frame_size = (frame_w, frame_h)
41
+ return self._camera_matrix
42
+
43
+ def _solve(self, landmarks: np.ndarray, frame_w: int, frame_h: int):
44
+ image_points = np.array(
45
+ [
46
+ [landmarks[i, 0] * frame_w, landmarks[i, 1] * frame_h]
47
+ for i in _LANDMARK_INDICES
48
+ ],
49
+ dtype=np.float64,
50
+ )
51
+ camera_matrix = self._get_camera_matrix(frame_w, frame_h)
52
+ success, rvec, tvec = cv2.solvePnP(
53
+ _MODEL_POINTS,
54
+ image_points,
55
+ camera_matrix,
56
+ self._dist_coeffs,
57
+ flags=cv2.SOLVEPNP_ITERATIVE,
58
+ )
59
+ return success, rvec, tvec, image_points
60
+
61
+ def estimate(
62
+ self, landmarks: np.ndarray, frame_w: int, frame_h: int
63
+ ) -> tuple[float, float, float] | None:
64
+ success, rvec, tvec, _ = self._solve(landmarks, frame_w, frame_h)
65
+ if not success:
66
+ return None
67
+
68
+ rmat, _ = cv2.Rodrigues(rvec)
69
+ nose_dir = rmat @ np.array([0.0, 0.0, 1.0])
70
+ face_up = rmat @ np.array([0.0, 1.0, 0.0])
71
+
72
+ yaw = math.degrees(math.atan2(nose_dir[0], -nose_dir[2]))
73
+ pitch = math.degrees(math.asin(np.clip(-nose_dir[1], -1.0, 1.0)))
74
+ roll = math.degrees(math.atan2(face_up[0], -face_up[1]))
75
+
76
+ return (yaw, pitch, roll)
77
+
78
+ def score(self, landmarks: np.ndarray, frame_w: int, frame_h: int) -> float:
79
+ angles = self.estimate(landmarks, frame_w, frame_h)
80
+ if angles is None:
81
+ return 0.0
82
+
83
+ yaw, pitch, roll = angles
84
+ deviation = math.sqrt(yaw**2 + pitch**2 + (self.roll_weight * roll) ** 2)
85
+ t = min(deviation / self.max_angle, 1.0)
86
+ return 0.5 * (1.0 + math.cos(math.pi * t))
87
+
88
+ def draw_axes(
89
+ self,
90
+ frame: np.ndarray,
91
+ landmarks: np.ndarray,
92
+ axis_length: float = 50.0,
93
+ ) -> np.ndarray:
94
+ h, w = frame.shape[:2]
95
+ success, rvec, tvec, image_points = self._solve(landmarks, w, h)
96
+ if not success:
97
+ return frame
98
+
99
+ camera_matrix = self._get_camera_matrix(w, h)
100
+ nose = tuple(image_points[0].astype(int))
101
+
102
+ axes_3d = np.float64(
103
+ [[axis_length, 0, 0], [0, axis_length, 0], [0, 0, axis_length]]
104
+ )
105
+ projected, _ = cv2.projectPoints(
106
+ axes_3d, rvec, tvec, camera_matrix, self._dist_coeffs
107
+ )
108
+
109
+ colors = [(0, 0, 255), (0, 255, 0), (255, 0, 0)]
110
+ for i, color in enumerate(colors):
111
+ pt = tuple(projected[i].ravel().astype(int))
112
+ cv2.line(frame, nose, pt, color, 2)
113
+
114
+ return frame
requirements.txt CHANGED
@@ -1,4 +1,6 @@
1
- # Stage 1: face mesh + test UI (no torch)
2
  mediapipe>=0.10.14
3
  opencv-python>=4.8.0
4
  numpy>=1.24.0
 
 
 
1
+ # Face mesh + head pose + eye behaviour (Stage 2); eye CNN needs torch
2
  mediapipe>=0.10.14
3
  opencv-python>=4.8.0
4
  numpy>=1.24.0
5
+ torch>=2.0.0
6
+ torchvision>=0.15.0
ui/live_demo.py CHANGED
@@ -119,12 +119,16 @@ def draw_eyes_and_irises(frame, landmarks, w, h):
119
 
120
 
121
  def main():
122
- parser = argparse.ArgumentParser(description="FocusGuard — Face mesh (Stage 1)")
123
  parser.add_argument("--camera", type=int, default=0, help="Camera index")
 
 
 
 
124
  args = parser.parse_args()
125
 
126
- print("[DEMO] Face mesh only (Stage 1)")
127
- pipeline = FaceMeshPipeline()
128
 
129
  cap = cv2.VideoCapture(args.camera)
130
  if not cap.isOpened():
@@ -156,10 +160,21 @@ def main():
156
  elif mesh_mode == MESH_CONTOURS:
157
  draw_contours(frame, lm, w, h)
158
  draw_eyes_and_irises(frame, lm, w, h)
159
-
160
- cv2.rectangle(frame, (0, 0), (w, 28), (0, 0, 0), -1)
161
- cv2.putText(frame, f"{_MESH_NAMES[mesh_mode]} FPS: {fps:.0f}", (10, 20), FONT, 0.5, (255, 255, 255), 1, cv2.LINE_AA)
162
- cv2.putText(frame, "q:quit m:mesh", (w - 140, 20), FONT, 0.4, (180, 180, 180), 1, cv2.LINE_AA)
 
 
 
 
 
 
 
 
 
 
 
163
 
164
  cv2.imshow("FocusGuard", frame)
165
 
 
119
 
120
 
121
  def main():
122
+ parser = argparse.ArgumentParser(description="FocusGuard — Face mesh + focus (Stage 2)")
123
  parser.add_argument("--camera", type=int, default=0, help="Camera index")
124
+ parser.add_argument("--max-angle", type=float, default=22.0, help="Max head angle for S_face (deg), smaller = tighter")
125
+ parser.add_argument("--alpha", type=float, default=0.4, help="S_face weight")
126
+ parser.add_argument("--beta", type=float, default=0.6, help="S_eye weight")
127
+ parser.add_argument("--threshold", type=float, default=0.55, help="Score >= this = FOCUSED (higher = stricter)")
128
  args = parser.parse_args()
129
 
130
+ print("[DEMO] Face mesh + head pose + eye behaviour (Stage 2)")
131
+ pipeline = FaceMeshPipeline(max_angle=args.max_angle, alpha=args.alpha, beta=args.beta, threshold=args.threshold)
132
 
133
  cap = cv2.VideoCapture(args.camera)
134
  if not cap.isOpened():
 
160
  elif mesh_mode == MESH_CONTOURS:
161
  draw_contours(frame, lm, w, h)
162
  draw_eyes_and_irises(frame, lm, w, h)
163
+ pipeline.head_pose.draw_axes(frame, lm)
164
+
165
+ # Status bar: FOCUSED / NOT FOCUSED; YAWN when mouth open (sleepy)
166
+ status = "FOCUSED" if result["is_focused"] else "NOT FOCUSED"
167
+ status_color = GREEN if result["is_focused"] else RED
168
+ cv2.rectangle(frame, (0, 0), (w, 55), (0, 0, 0), -1)
169
+ cv2.putText(frame, status, (10, 28), FONT, 0.8, status_color, 2, cv2.LINE_AA)
170
+ mar_str = f" MAR:{result['mar']:.2f}" if result.get("mar") is not None else ""
171
+ cv2.putText(frame, f"S_face:{result['s_face']:.2f} S_eye:{result['s_eye']:.2f}{mar_str} score:{result['raw_score']:.2f}", (10, 48), FONT, 0.45, WHITE, 1, cv2.LINE_AA)
172
+ if result.get("is_yawning"):
173
+ cv2.putText(frame, "YAWN", (10, 75), FONT, 0.7, ORANGE, 2, cv2.LINE_AA)
174
+ if result["yaw"] is not None:
175
+ cv2.putText(frame, f"yaw:{result['yaw']:+.0f} pitch:{result['pitch']:+.0f} roll:{result['roll']:+.0f}", (w - 280, 48), FONT, 0.4, (180, 180, 180), 1, cv2.LINE_AA)
176
+ cv2.putText(frame, f"{_MESH_NAMES[mesh_mode]} FPS: {fps:.0f}", (w - 200, 28), FONT, 0.45, WHITE, 1, cv2.LINE_AA)
177
+ cv2.putText(frame, "q:quit m:mesh", (w - 140, 48), FONT, 0.4, (180, 180, 180), 1, cv2.LINE_AA)
178
 
179
  cv2.imshow("FocusGuard", frame)
180
 
ui/pipeline.py CHANGED
@@ -1,4 +1,4 @@
1
- # Stage 1: face mesh only (no head pose / eye model / fusion)
2
 
3
  import os
4
  import sys
@@ -10,17 +10,59 @@ if _PROJECT_ROOT not in sys.path:
10
  sys.path.insert(0, _PROJECT_ROOT)
11
 
12
  from models.face_mesh.face_mesh import FaceMeshDetector
 
 
13
 
14
 
15
  class FaceMeshPipeline:
16
- # frame -> face mesh -> 478 landmarks
17
 
18
- def __init__(self):
19
  self.detector = FaceMeshDetector()
 
 
 
 
 
20
 
21
  def process_frame(self, bgr_frame: np.ndarray) -> dict:
22
  landmarks = self.detector.process(bgr_frame)
23
- return {"landmarks": landmarks}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  def close(self):
26
  self.detector.close()
 
1
+ # Stage 2: face mesh + head pose (S_face) + eye behaviour (S_eye) -> focus
2
 
3
  import os
4
  import sys
 
10
  sys.path.insert(0, _PROJECT_ROOT)
11
 
12
  from models.face_mesh.face_mesh import FaceMeshDetector
13
+ from models.face_orientation.head_pose import HeadPoseEstimator
14
+ from models.eye_behaviour.eye_scorer import EyeBehaviourScorer, compute_mar, MAR_YAWN_THRESHOLD
15
 
16
 
17
  class FaceMeshPipeline:
18
+ # frame -> face mesh -> S_face + S_eye -> focused / not focused
19
 
20
+ def __init__(self, max_angle: float = 22.0, alpha: float = 0.4, beta: float = 0.6, threshold: float = 0.55):
21
  self.detector = FaceMeshDetector()
22
+ self.head_pose = HeadPoseEstimator(max_angle=max_angle)
23
+ self.eye_scorer = EyeBehaviourScorer()
24
+ self.alpha = alpha
25
+ self.beta = beta
26
+ self.threshold = threshold
27
 
28
  def process_frame(self, bgr_frame: np.ndarray) -> dict:
29
  landmarks = self.detector.process(bgr_frame)
30
+ h, w = bgr_frame.shape[:2]
31
+
32
+ out = {
33
+ "landmarks": landmarks,
34
+ "s_face": 0.0,
35
+ "s_eye": 0.0,
36
+ "raw_score": 0.0,
37
+ "is_focused": False,
38
+ "yaw": None,
39
+ "pitch": None,
40
+ "roll": None,
41
+ "mar": None,
42
+ "is_yawning": False,
43
+ }
44
+
45
+ if landmarks is None:
46
+ return out
47
+
48
+ # Head pose -> S_face, yaw/pitch/roll
49
+ angles = self.head_pose.estimate(landmarks, w, h)
50
+ if angles is not None:
51
+ out["yaw"], out["pitch"], out["roll"] = angles
52
+ out["s_face"] = self.head_pose.score(landmarks, w, h)
53
+
54
+ # Eye behaviour (EAR + gaze) -> S_eye
55
+ out["s_eye"] = self.eye_scorer.score(landmarks)
56
+
57
+ # Mouth open (MAR) -> yawn / sleepy: force NOT FOCUSED when mouth open
58
+ out["mar"] = compute_mar(landmarks)
59
+ out["is_yawning"] = out["mar"] > MAR_YAWN_THRESHOLD
60
+
61
+ # Fusion: alpha*S_face + beta*S_eye; if yawning (mouth open) -> not focused
62
+ out["raw_score"] = self.alpha * out["s_face"] + self.beta * out["s_eye"]
63
+ out["is_focused"] = out["raw_score"] >= self.threshold and not out["is_yawning"]
64
+
65
+ return out
66
 
67
  def close(self):
68
  self.detector.close()