Abdelrahman Almatrooshi commited on
Commit
82d2ab7
·
1 Parent(s): 2eaf50d

integrated open/closed eye cnn in script

Browse files
best_eye_cnn.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3c3d85de013387e8583fe7218daabb83a8a6f46ca5bcacbf6fbf3619b688da8
3
+ size 2103809
models/cnn/CNN_MODEL/.claude/settings.local.json CHANGED
@@ -1,7 +1,7 @@
1
- {
2
- "permissions": {
3
- "allow": [
4
- "Bash(# Check Dataset_subset counts echo \"\"=== Dataset_subset/train/open ===\"\" && ls /Users/mohammedalketbi22/Downloads/GAP_Large_project-feature-dataset-model-test-92_30-clean/Dataset_subset/train/open/ | wc -l && echo \"\"=== Dataset_subset/train/closed ===\"\" && ls /Users/mohammedalketbi22/Downloads/GAP_Large_project-feature-dataset-model-test-92_30-clean/Dataset_subset/train/closed/ | wc -l && echo \"\"=== Dataset_subset/val/open ===\"\" && ls /Users/mohammedalketbi22/Downloads/GAP_Large_project-feature-dataset-model-test-92_30-clean/Dataset_subset/val/open/ | wc -l && echo \"\"=== Dataset_subset/val/closed ===\"\" && ls /Users/mohammedalketbi22/Downloads/GAP_Large_project-feature-dataset-model-test-92_30-clean/Dataset_subset/val/closed/)"
5
- ]
6
- }
7
- }
 
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(# Check Dataset_subset counts echo \"\"=== Dataset_subset/train/open ===\"\" && ls /Users/mohammedalketbi22/Downloads/GAP_Large_project-feature-dataset-model-test-92_30-clean/Dataset_subset/train/open/ | wc -l && echo \"\"=== Dataset_subset/train/closed ===\"\" && ls /Users/mohammedalketbi22/Downloads/GAP_Large_project-feature-dataset-model-test-92_30-clean/Dataset_subset/train/closed/ | wc -l && echo \"\"=== Dataset_subset/val/open ===\"\" && ls /Users/mohammedalketbi22/Downloads/GAP_Large_project-feature-dataset-model-test-92_30-clean/Dataset_subset/val/open/ | wc -l && echo \"\"=== Dataset_subset/val/closed ===\"\" && ls /Users/mohammedalketbi22/Downloads/GAP_Large_project-feature-dataset-model-test-92_30-clean/Dataset_subset/val/closed/)"
5
+ ]
6
+ }
7
+ }
models/cnn/CNN_MODEL/.gitattributes CHANGED
@@ -1 +1 @@
1
- DATA/** filter=lfs diff=lfs merge=lfs -text
 
1
+ DATA/** filter=lfs diff=lfs merge=lfs -text
models/cnn/CNN_MODEL/.gitignore CHANGED
@@ -1,4 +1,4 @@
1
- Dataset/train/
2
- Dataset/val/
3
- Dataset/test/
4
- .DS_Store
 
1
+ Dataset/train/
2
+ Dataset/val/
3
+ Dataset/test/
4
+ .DS_Store
models/cnn/CNN_MODEL/README.md CHANGED
@@ -1,74 +1,74 @@
1
- # Eye Open / Closed Classifier (YOLOv11-CLS)
2
-
3
-
4
- Binary classifier: **open** vs **closed** eyes.
5
- Used as a baseline for eye-tracking, drowsiness, or focus detection.
6
-
7
- ---
8
-
9
- ## Model team task
10
-
11
- - **Train** the YOLOv11s-cls eye classifier in a **separate notebook** (data split, epochs, GPU, export `best.pt`).
12
- - Provide **trained weights** (`best.pt`) for this repo’s evaluation and inference scripts.
13
-
14
-
15
-
16
- ---
17
-
18
- ## Repo contents
19
-
20
- - **notebooks/eye_classifier_colab.ipynb** — Data download (Kaggle), clean, split, undersample, **evaluate** (needs `best.pt` from model team), export.
21
- - **scripts/predict_image.py** — Run classifier on single images (needs `best.pt`).
22
- - **scripts/webcam_live.py** — Live webcam open/closed (needs `best.pt` + optional `weights/face_landmarker.task`).
23
- - **scripts/video_infer.py** — Run on video files.
24
- - **scripts/focus_infer.py** — Focus/attention inference.
25
- - **weights/** — Put `best.pt` here; `face_landmarker.task` is downloaded on first webcam run if missing.
26
- - **docs/** — Extra docs (e.g. UNNECESSARY_FILES.md if present).
27
-
28
- ---
29
-
30
- ## Dataset
31
-
32
- - **Source:** [Kaggle — open/closed eyes](https://www.kaggle.com/datasets/sehriyarmemmedli/open-closed-eyes-dataset)
33
- - The Colab notebook downloads it via `kagglehub`; no local copy in repo.
34
-
35
- ---
36
-
37
- ## Weights
38
-
39
- - Put **best.pt** from the model team in **weights/best.pt** (or `runs/classify/runs_cls/eye_open_closed_cpu/weights/best.pt`).
40
- - For webcam: **face_landmarker.task** is downloaded into **weights/** on first run if missing.
41
-
42
- ---
43
-
44
- ## Local setup
45
-
46
- ```bash
47
- pip install ultralytics opencv-python mediapipe "numpy<2"
48
- ```
49
-
50
- Optional: use a venv. From repo root:
51
- - `python scripts/predict_image.py <image.png>`
52
- - `python scripts/webcam_live.py`
53
- - `python scripts/video_infer.py` (expects 1.mp4 / 2.mp4 in repo root or set `VIDEOS` env)
54
- - `python scripts/focus_infer.py`
55
-
56
- ---
57
-
58
- ## Project structure
59
-
60
- ```
61
- ├── notebooks/
62
- │ └── eye_classifier_colab.ipynb # Data + eval (no training)
63
- ├── scripts/
64
- │ ├── predict_image.py
65
- │ ├── webcam_live.py
66
- │ ├── video_infer.py
67
- │ └── focus_infer.py
68
- ├── weights/ # best.pt, face_landmarker.task
69
- ├── docs/ # extra docs
70
- ├── README.md
71
- └── venv/ # optional
72
- ```
73
-
74
- Training and weight generation: **model team, separate notebook.**
 
1
+ # Eye Open / Closed Classifier (YOLOv11-CLS)
2
+
3
+
4
+ Binary classifier: **open** vs **closed** eyes.
5
+ Used as a baseline for eye-tracking, drowsiness, or focus detection.
6
+
7
+ ---
8
+
9
+ ## Model team task
10
+
11
+ - **Train** the YOLOv11s-cls eye classifier in a **separate notebook** (data split, epochs, GPU, export `best.pt`).
12
+ - Provide **trained weights** (`best.pt`) for this repo’s evaluation and inference scripts.
13
+
14
+
15
+
16
+ ---
17
+
18
+ ## Repo contents
19
+
20
+ - **notebooks/eye_classifier_colab.ipynb** — Data download (Kaggle), clean, split, undersample, **evaluate** (needs `best.pt` from model team), export.
21
+ - **scripts/predict_image.py** — Run classifier on single images (needs `best.pt`).
22
+ - **scripts/webcam_live.py** — Live webcam open/closed (needs `best.pt` + optional `weights/face_landmarker.task`).
23
+ - **scripts/video_infer.py** — Run on video files.
24
+ - **scripts/focus_infer.py** — Focus/attention inference.
25
+ - **weights/** — Put `best.pt` here; `face_landmarker.task` is downloaded on first webcam run if missing.
26
+ - **docs/** — Extra docs (e.g. UNNECESSARY_FILES.md if present).
27
+
28
+ ---
29
+
30
+ ## Dataset
31
+
32
+ - **Source:** [Kaggle — open/closed eyes](https://www.kaggle.com/datasets/sehriyarmemmedli/open-closed-eyes-dataset)
33
+ - The Colab notebook downloads it via `kagglehub`; no local copy in repo.
34
+
35
+ ---
36
+
37
+ ## Weights
38
+
39
+ - Put **best.pt** from the model team in **weights/best.pt** (or `runs/classify/runs_cls/eye_open_closed_cpu/weights/best.pt`).
40
+ - For webcam: **face_landmarker.task** is downloaded into **weights/** on first run if missing.
41
+
42
+ ---
43
+
44
+ ## Local setup
45
+
46
+ ```bash
47
+ pip install ultralytics opencv-python mediapipe "numpy<2"
48
+ ```
49
+
50
+ Optional: use a venv. From repo root:
51
+ - `python scripts/predict_image.py <image.png>`
52
+ - `python scripts/webcam_live.py`
53
+ - `python scripts/video_infer.py` (expects 1.mp4 / 2.mp4 in repo root or set `VIDEOS` env)
54
+ - `python scripts/focus_infer.py`
55
+
56
+ ---
57
+
58
+ ## Project structure
59
+
60
+ ```
61
+ ├── notebooks/
62
+ │ └── eye_classifier_colab.ipynb # Data + eval (no training)
63
+ ├── scripts/
64
+ │ ├── predict_image.py
65
+ │ ├── webcam_live.py
66
+ │ ├── video_infer.py
67
+ │ └── focus_infer.py
68
+ ├── weights/ # best.pt, face_landmarker.task
69
+ ├── docs/ # extra docs
70
+ ├── README.md
71
+ └── venv/ # optional
72
+ ```
73
+
74
+ Training and weight generation: **model team, separate notebook.**
models/cnn/CNN_MODEL/notebooks/eye_classifier_colab.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
models/cnn/CNN_MODEL/scripts/focus_infer.py CHANGED
@@ -1,199 +1,199 @@
1
- from __future__ import annotations
2
-
3
- from pathlib import Path
4
- import os
5
-
6
- import cv2
7
- import numpy as np
8
- from ultralytics import YOLO
9
-
10
-
11
- def list_images(folder: Path):
12
- exts = {".png", ".jpg", ".jpeg", ".bmp", ".webp"}
13
- return sorted([p for p in folder.iterdir() if p.suffix.lower() in exts])
14
-
15
-
16
- def find_weights(project_root: Path) -> Path | None:
17
- candidates = [
18
- project_root / "weights" / "best.pt",
19
- project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
20
- project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
21
- project_root / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
22
- project_root / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
23
- ]
24
- return next((p for p in candidates if p.is_file()), None)
25
-
26
-
27
- def detect_eyelid_boundary(gray: np.ndarray) -> np.ndarray | None:
28
- """
29
- Returns an ellipse fit to the largest contour near the eye boundary.
30
- Output format: (center(x,y), (axis1, axis2), angle) or None.
31
- """
32
- blur = cv2.GaussianBlur(gray, (5, 5), 0)
33
- edges = cv2.Canny(blur, 40, 120)
34
- edges = cv2.dilate(edges, np.ones((3, 3), np.uint8), iterations=1)
35
- contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
36
- if not contours:
37
- return None
38
- contours = sorted(contours, key=cv2.contourArea, reverse=True)
39
- for c in contours:
40
- if len(c) >= 5 and cv2.contourArea(c) > 50:
41
- return cv2.fitEllipse(c)
42
- return None
43
-
44
-
45
- def detect_pupil_center(gray: np.ndarray) -> tuple[int, int] | None:
46
- """
47
- More robust pupil detection:
48
- - enhance contrast (CLAHE)
49
- - find dark blobs
50
- - score by circularity and proximity to center
51
- """
52
- h, w = gray.shape
53
- clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
54
- eq = clahe.apply(gray)
55
- blur = cv2.GaussianBlur(eq, (7, 7), 0)
56
-
57
- # Focus on the central region to avoid eyelashes/edges
58
- cx, cy = w // 2, h // 2
59
- rx, ry = int(w * 0.3), int(h * 0.3)
60
- x0, x1 = max(cx - rx, 0), min(cx + rx, w)
61
- y0, y1 = max(cy - ry, 0), min(cy + ry, h)
62
- roi = blur[y0:y1, x0:x1]
63
-
64
- # Inverted threshold to capture dark pupil
65
- _, thresh = cv2.threshold(roi, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
66
- thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, np.ones((3, 3), np.uint8), iterations=2)
67
- thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8), iterations=1)
68
-
69
- contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
70
- if not contours:
71
- return None
72
-
73
- best = None
74
- best_score = -1.0
75
- for c in contours:
76
- area = cv2.contourArea(c)
77
- if area < 15:
78
- continue
79
- perimeter = cv2.arcLength(c, True)
80
- if perimeter <= 0:
81
- continue
82
- circularity = 4 * np.pi * (area / (perimeter * perimeter))
83
- if circularity < 0.3:
84
- continue
85
- m = cv2.moments(c)
86
- if m["m00"] == 0:
87
- continue
88
- px = int(m["m10"] / m["m00"]) + x0
89
- py = int(m["m01"] / m["m00"]) + y0
90
-
91
- # Score by circularity and distance to center
92
- dist = np.hypot(px - cx, py - cy) / max(w, h)
93
- score = circularity - dist
94
- if score > best_score:
95
- best_score = score
96
- best = (px, py)
97
-
98
- return best
99
-
100
-
101
- def is_focused(pupil_center: tuple[int, int], img_shape: tuple[int, int]) -> bool:
102
- """
103
- Decide focus based on pupil offset from image center.
104
- """
105
- h, w = img_shape
106
- cx, cy = w // 2, h // 2
107
- px, py = pupil_center
108
- dx = abs(px - cx) / max(w, 1)
109
- dy = abs(py - cy) / max(h, 1)
110
- return (dx < 0.12) and (dy < 0.12)
111
-
112
-
113
- def annotate(img_bgr: np.ndarray, ellipse, pupil_center, focused: bool, cls_label: str, conf: float):
114
- out = img_bgr.copy()
115
- if ellipse is not None:
116
- cv2.ellipse(out, ellipse, (0, 255, 255), 2)
117
- if pupil_center is not None:
118
- cv2.circle(out, pupil_center, 4, (0, 0, 255), -1)
119
- label = f"{cls_label} ({conf:.2f}) | focused={int(focused)}"
120
- cv2.putText(out, label, (8, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
121
- return out
122
-
123
-
124
- def main():
125
- project_root = Path(__file__).resolve().parent.parent
126
- data_dir = project_root / "Dataset"
127
- alt_data_dir = project_root / "DATA"
128
- out_dir = project_root / "runs_focus"
129
- out_dir.mkdir(parents=True, exist_ok=True)
130
-
131
- weights = find_weights(project_root)
132
- if weights is None:
133
- print("Weights not found. Train first.")
134
- return
135
-
136
- # Support both Dataset/test/{open,closed} and Dataset/{open,closed}
137
- def resolve_test_dirs(root: Path):
138
- test_open = root / "test" / "open"
139
- test_closed = root / "test" / "closed"
140
- if test_open.exists() and test_closed.exists():
141
- return test_open, test_closed
142
- test_open = root / "open"
143
- test_closed = root / "closed"
144
- if test_open.exists() and test_closed.exists():
145
- return test_open, test_closed
146
- alt_closed = root / "close"
147
- if test_open.exists() and alt_closed.exists():
148
- return test_open, alt_closed
149
- return None, None
150
-
151
- test_open, test_closed = resolve_test_dirs(data_dir)
152
- if (test_open is None or test_closed is None) and alt_data_dir.exists():
153
- test_open, test_closed = resolve_test_dirs(alt_data_dir)
154
-
155
- if not test_open.exists() or not test_closed.exists():
156
- print("Test folders missing. Expected:")
157
- print(test_open)
158
- print(test_closed)
159
- return
160
-
161
- test_files = list_images(test_open) + list_images(test_closed)
162
- print("Total test images:", len(test_files))
163
- max_images = int(os.getenv("MAX_IMAGES", "0"))
164
- if max_images > 0:
165
- test_files = test_files[:max_images]
166
- print("Limiting to MAX_IMAGES:", max_images)
167
-
168
- model = YOLO(str(weights))
169
- results = model.predict(test_files, imgsz=224, device="cpu", verbose=False)
170
-
171
- names = model.names
172
- for r in results:
173
- probs = r.probs
174
- top_idx = int(probs.top1)
175
- top_conf = float(probs.top1conf)
176
- pred_label = names[top_idx]
177
-
178
- img = cv2.imread(r.path)
179
- if img is None:
180
- continue
181
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
182
-
183
- ellipse = detect_eyelid_boundary(gray)
184
- pupil_center = detect_pupil_center(gray)
185
- focused = False
186
- if pred_label.lower() == "open" and pupil_center is not None:
187
- focused = is_focused(pupil_center, gray.shape)
188
-
189
- annotated = annotate(img, ellipse, pupil_center, focused, pred_label, top_conf)
190
- out_path = out_dir / (Path(r.path).stem + "_annotated.jpg")
191
- cv2.imwrite(str(out_path), annotated)
192
-
193
- print(f"{Path(r.path).name}: pred={pred_label} conf={top_conf:.3f} focused={focused}")
194
-
195
- print(f"\nAnnotated outputs saved to: {out_dir}")
196
-
197
-
198
- if __name__ == "__main__":
199
- main()
 
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ import os
5
+
6
+ import cv2
7
+ import numpy as np
8
+ from ultralytics import YOLO
9
+
10
+
11
+ def list_images(folder: Path):
12
+ exts = {".png", ".jpg", ".jpeg", ".bmp", ".webp"}
13
+ return sorted([p for p in folder.iterdir() if p.suffix.lower() in exts])
14
+
15
+
16
+ def find_weights(project_root: Path) -> Path | None:
17
+ candidates = [
18
+ project_root / "weights" / "best.pt",
19
+ project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
20
+ project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
21
+ project_root / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
22
+ project_root / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
23
+ ]
24
+ return next((p for p in candidates if p.is_file()), None)
25
+
26
+
27
+ def detect_eyelid_boundary(gray: np.ndarray) -> np.ndarray | None:
28
+ """
29
+ Returns an ellipse fit to the largest contour near the eye boundary.
30
+ Output format: (center(x,y), (axis1, axis2), angle) or None.
31
+ """
32
+ blur = cv2.GaussianBlur(gray, (5, 5), 0)
33
+ edges = cv2.Canny(blur, 40, 120)
34
+ edges = cv2.dilate(edges, np.ones((3, 3), np.uint8), iterations=1)
35
+ contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
36
+ if not contours:
37
+ return None
38
+ contours = sorted(contours, key=cv2.contourArea, reverse=True)
39
+ for c in contours:
40
+ if len(c) >= 5 and cv2.contourArea(c) > 50:
41
+ return cv2.fitEllipse(c)
42
+ return None
43
+
44
+
45
+ def detect_pupil_center(gray: np.ndarray) -> tuple[int, int] | None:
46
+ """
47
+ More robust pupil detection:
48
+ - enhance contrast (CLAHE)
49
+ - find dark blobs
50
+ - score by circularity and proximity to center
51
+ """
52
+ h, w = gray.shape
53
+ clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
54
+ eq = clahe.apply(gray)
55
+ blur = cv2.GaussianBlur(eq, (7, 7), 0)
56
+
57
+ # Focus on the central region to avoid eyelashes/edges
58
+ cx, cy = w // 2, h // 2
59
+ rx, ry = int(w * 0.3), int(h * 0.3)
60
+ x0, x1 = max(cx - rx, 0), min(cx + rx, w)
61
+ y0, y1 = max(cy - ry, 0), min(cy + ry, h)
62
+ roi = blur[y0:y1, x0:x1]
63
+
64
+ # Inverted threshold to capture dark pupil
65
+ _, thresh = cv2.threshold(roi, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
66
+ thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, np.ones((3, 3), np.uint8), iterations=2)
67
+ thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8), iterations=1)
68
+
69
+ contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
70
+ if not contours:
71
+ return None
72
+
73
+ best = None
74
+ best_score = -1.0
75
+ for c in contours:
76
+ area = cv2.contourArea(c)
77
+ if area < 15:
78
+ continue
79
+ perimeter = cv2.arcLength(c, True)
80
+ if perimeter <= 0:
81
+ continue
82
+ circularity = 4 * np.pi * (area / (perimeter * perimeter))
83
+ if circularity < 0.3:
84
+ continue
85
+ m = cv2.moments(c)
86
+ if m["m00"] == 0:
87
+ continue
88
+ px = int(m["m10"] / m["m00"]) + x0
89
+ py = int(m["m01"] / m["m00"]) + y0
90
+
91
+ # Score by circularity and distance to center
92
+ dist = np.hypot(px - cx, py - cy) / max(w, h)
93
+ score = circularity - dist
94
+ if score > best_score:
95
+ best_score = score
96
+ best = (px, py)
97
+
98
+ return best
99
+
100
+
101
+ def is_focused(pupil_center: tuple[int, int], img_shape: tuple[int, int]) -> bool:
102
+ """
103
+ Decide focus based on pupil offset from image center.
104
+ """
105
+ h, w = img_shape
106
+ cx, cy = w // 2, h // 2
107
+ px, py = pupil_center
108
+ dx = abs(px - cx) / max(w, 1)
109
+ dy = abs(py - cy) / max(h, 1)
110
+ return (dx < 0.12) and (dy < 0.12)
111
+
112
+
113
+ def annotate(img_bgr: np.ndarray, ellipse, pupil_center, focused: bool, cls_label: str, conf: float):
114
+ out = img_bgr.copy()
115
+ if ellipse is not None:
116
+ cv2.ellipse(out, ellipse, (0, 255, 255), 2)
117
+ if pupil_center is not None:
118
+ cv2.circle(out, pupil_center, 4, (0, 0, 255), -1)
119
+ label = f"{cls_label} ({conf:.2f}) | focused={int(focused)}"
120
+ cv2.putText(out, label, (8, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
121
+ return out
122
+
123
+
124
+ def main():
125
+ project_root = Path(__file__).resolve().parent.parent
126
+ data_dir = project_root / "Dataset"
127
+ alt_data_dir = project_root / "DATA"
128
+ out_dir = project_root / "runs_focus"
129
+ out_dir.mkdir(parents=True, exist_ok=True)
130
+
131
+ weights = find_weights(project_root)
132
+ if weights is None:
133
+ print("Weights not found. Train first.")
134
+ return
135
+
136
+ # Support both Dataset/test/{open,closed} and Dataset/{open,closed}
137
+ def resolve_test_dirs(root: Path):
138
+ test_open = root / "test" / "open"
139
+ test_closed = root / "test" / "closed"
140
+ if test_open.exists() and test_closed.exists():
141
+ return test_open, test_closed
142
+ test_open = root / "open"
143
+ test_closed = root / "closed"
144
+ if test_open.exists() and test_closed.exists():
145
+ return test_open, test_closed
146
+ alt_closed = root / "close"
147
+ if test_open.exists() and alt_closed.exists():
148
+ return test_open, alt_closed
149
+ return None, None
150
+
151
+ test_open, test_closed = resolve_test_dirs(data_dir)
152
+ if (test_open is None or test_closed is None) and alt_data_dir.exists():
153
+ test_open, test_closed = resolve_test_dirs(alt_data_dir)
154
+
155
+ if not test_open.exists() or not test_closed.exists():
156
+ print("Test folders missing. Expected:")
157
+ print(test_open)
158
+ print(test_closed)
159
+ return
160
+
161
+ test_files = list_images(test_open) + list_images(test_closed)
162
+ print("Total test images:", len(test_files))
163
+ max_images = int(os.getenv("MAX_IMAGES", "0"))
164
+ if max_images > 0:
165
+ test_files = test_files[:max_images]
166
+ print("Limiting to MAX_IMAGES:", max_images)
167
+
168
+ model = YOLO(str(weights))
169
+ results = model.predict(test_files, imgsz=224, device="cpu", verbose=False)
170
+
171
+ names = model.names
172
+ for r in results:
173
+ probs = r.probs
174
+ top_idx = int(probs.top1)
175
+ top_conf = float(probs.top1conf)
176
+ pred_label = names[top_idx]
177
+
178
+ img = cv2.imread(r.path)
179
+ if img is None:
180
+ continue
181
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
182
+
183
+ ellipse = detect_eyelid_boundary(gray)
184
+ pupil_center = detect_pupil_center(gray)
185
+ focused = False
186
+ if pred_label.lower() == "open" and pupil_center is not None:
187
+ focused = is_focused(pupil_center, gray.shape)
188
+
189
+ annotated = annotate(img, ellipse, pupil_center, focused, pred_label, top_conf)
190
+ out_path = out_dir / (Path(r.path).stem + "_annotated.jpg")
191
+ cv2.imwrite(str(out_path), annotated)
192
+
193
+ print(f"{Path(r.path).name}: pred={pred_label} conf={top_conf:.3f} focused={focused}")
194
+
195
+ print(f"\nAnnotated outputs saved to: {out_dir}")
196
+
197
+
198
+ if __name__ == "__main__":
199
+ main()
models/cnn/CNN_MODEL/scripts/predict_image.py CHANGED
@@ -1,49 +1,49 @@
1
- """Run the eye open/closed model on one or more images."""
2
- import sys
3
- from pathlib import Path
4
-
5
- from ultralytics import YOLO
6
-
7
-
8
- def main():
9
- project_root = Path(__file__).resolve().parent.parent
10
- weight_candidates = [
11
- project_root / "weights" / "best.pt",
12
- project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
13
- project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
14
- ]
15
- weights = next((p for p in weight_candidates if p.is_file()), None)
16
- if weights is None:
17
- print("Weights not found. Put best.pt in weights/ or runs/.../weights/ (from model team).")
18
- sys.exit(1)
19
-
20
- if len(sys.argv) < 2:
21
- print("Usage: python scripts/predict_image.py <image1> [image2 ...]")
22
- print("Example: python scripts/predict_image.py path/to/image.png")
23
- sys.exit(0)
24
-
25
- model = YOLO(str(weights))
26
- names = model.names
27
-
28
- for path in sys.argv[1:]:
29
- p = Path(path)
30
- if not p.is_file():
31
- print(p, "- file not found")
32
- continue
33
- try:
34
- results = model.predict(str(p), imgsz=224, device="cpu", verbose=False)
35
- except Exception as e:
36
- print(p, "- error:", e)
37
- continue
38
- if not results:
39
- print(p, "- no result")
40
- continue
41
- r = results[0]
42
- top_idx = int(r.probs.top1)
43
- conf = float(r.probs.top1conf)
44
- label = names[top_idx]
45
- print(f"{p.name}: {label} ({conf:.2%})")
46
-
47
-
48
- if __name__ == "__main__":
49
- main()
 
1
+ """Run the eye open/closed model on one or more images."""
2
+ import sys
3
+ from pathlib import Path
4
+
5
+ from ultralytics import YOLO
6
+
7
+
8
+ def main():
9
+ project_root = Path(__file__).resolve().parent.parent
10
+ weight_candidates = [
11
+ project_root / "weights" / "best.pt",
12
+ project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
13
+ project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
14
+ ]
15
+ weights = next((p for p in weight_candidates if p.is_file()), None)
16
+ if weights is None:
17
+ print("Weights not found. Put best.pt in weights/ or runs/.../weights/ (from model team).")
18
+ sys.exit(1)
19
+
20
+ if len(sys.argv) < 2:
21
+ print("Usage: python scripts/predict_image.py <image1> [image2 ...]")
22
+ print("Example: python scripts/predict_image.py path/to/image.png")
23
+ sys.exit(0)
24
+
25
+ model = YOLO(str(weights))
26
+ names = model.names
27
+
28
+ for path in sys.argv[1:]:
29
+ p = Path(path)
30
+ if not p.is_file():
31
+ print(p, "- file not found")
32
+ continue
33
+ try:
34
+ results = model.predict(str(p), imgsz=224, device="cpu", verbose=False)
35
+ except Exception as e:
36
+ print(p, "- error:", e)
37
+ continue
38
+ if not results:
39
+ print(p, "- no result")
40
+ continue
41
+ r = results[0]
42
+ top_idx = int(r.probs.top1)
43
+ conf = float(r.probs.top1conf)
44
+ label = names[top_idx]
45
+ print(f"{p.name}: {label} ({conf:.2%})")
46
+
47
+
48
+ if __name__ == "__main__":
49
+ main()
models/cnn/CNN_MODEL/scripts/video_infer.py CHANGED
@@ -1,281 +1,281 @@
1
- from __future__ import annotations
2
-
3
- import os
4
- from pathlib import Path
5
-
6
- import cv2
7
- import numpy as np
8
- from ultralytics import YOLO
9
-
10
- try:
11
- import mediapipe as mp
12
- except Exception: # pragma: no cover
13
- mp = None
14
-
15
-
16
- def find_weights(project_root: Path) -> Path | None:
17
- candidates = [
18
- project_root / "weights" / "best.pt",
19
- project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
20
- project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
21
- project_root / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
22
- project_root / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
23
- ]
24
- return next((p for p in candidates if p.is_file()), None)
25
-
26
-
27
- def detect_pupil_center(gray: np.ndarray) -> tuple[int, int] | None:
28
- h, w = gray.shape
29
- clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
30
- eq = clahe.apply(gray)
31
- blur = cv2.GaussianBlur(eq, (7, 7), 0)
32
-
33
- cx, cy = w // 2, h // 2
34
- rx, ry = int(w * 0.3), int(h * 0.3)
35
- x0, x1 = max(cx - rx, 0), min(cx + rx, w)
36
- y0, y1 = max(cy - ry, 0), min(cy + ry, h)
37
- roi = blur[y0:y1, x0:x1]
38
-
39
- _, thresh = cv2.threshold(roi, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
40
- thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, np.ones((3, 3), np.uint8), iterations=2)
41
- thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8), iterations=1)
42
-
43
- contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
44
- if not contours:
45
- return None
46
-
47
- best = None
48
- best_score = -1.0
49
- for c in contours:
50
- area = cv2.contourArea(c)
51
- if area < 15:
52
- continue
53
- perimeter = cv2.arcLength(c, True)
54
- if perimeter <= 0:
55
- continue
56
- circularity = 4 * np.pi * (area / (perimeter * perimeter))
57
- if circularity < 0.3:
58
- continue
59
- m = cv2.moments(c)
60
- if m["m00"] == 0:
61
- continue
62
- px = int(m["m10"] / m["m00"]) + x0
63
- py = int(m["m01"] / m["m00"]) + y0
64
-
65
- dist = np.hypot(px - cx, py - cy) / max(w, h)
66
- score = circularity - dist
67
- if score > best_score:
68
- best_score = score
69
- best = (px, py)
70
-
71
- return best
72
-
73
-
74
- def is_focused(pupil_center: tuple[int, int], img_shape: tuple[int, int]) -> bool:
75
- h, w = img_shape
76
- cx = w // 2
77
- px, _ = pupil_center
78
- dx = abs(px - cx) / max(w, 1)
79
- return dx < 0.12
80
-
81
-
82
- def classify_frame(model: YOLO, frame: np.ndarray) -> tuple[str, float]:
83
- # Use classifier directly on frame (assumes frame is eye crop)
84
- results = model.predict(frame, imgsz=224, device="cpu", verbose=False)
85
- r = results[0]
86
- probs = r.probs
87
- top_idx = int(probs.top1)
88
- top_conf = float(probs.top1conf)
89
- pred_label = model.names[top_idx]
90
- return pred_label, top_conf
91
-
92
-
93
- def annotate_frame(frame: np.ndarray, label: str, focused: bool, conf: float, time_sec: float):
94
- out = frame.copy()
95
- text = f"{label} | focused={int(focused)} | conf={conf:.2f} | t={time_sec:.2f}s"
96
- cv2.putText(out, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
97
- return out
98
-
99
-
100
- def write_segments(path: Path, segments: list[tuple[float, float, str]]):
101
- with path.open("w") as f:
102
- for start, end, label in segments:
103
- f.write(f"{start:.2f},{end:.2f},{label}\n")
104
-
105
-
106
- def process_video(video_path: Path, model: YOLO | None):
107
- cap = cv2.VideoCapture(str(video_path))
108
- if not cap.isOpened():
109
- print(f"Failed to open {video_path}")
110
- return
111
-
112
- fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
113
- width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
114
- height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
115
-
116
- out_path = video_path.with_name(video_path.stem + "_pred.mp4")
117
- fourcc = cv2.VideoWriter_fourcc(*"mp4v")
118
- writer = cv2.VideoWriter(str(out_path), fourcc, fps, (width, height))
119
-
120
- csv_path = video_path.with_name(video_path.stem + "_predictions.csv")
121
- seg_path = video_path.with_name(video_path.stem + "_segments.txt")
122
-
123
- frame_idx = 0
124
- last_label = None
125
- seg_start = 0.0
126
- segments: list[tuple[float, float, str]] = []
127
-
128
- with csv_path.open("w") as fcsv:
129
- fcsv.write("time_sec,label,focused,conf\n")
130
- if mp is None:
131
- print("mediapipe is not installed. Falling back to classifier-only mode.")
132
- use_mp = mp is not None
133
- if use_mp:
134
- mp_face_mesh = mp.solutions.face_mesh
135
- face_mesh = mp_face_mesh.FaceMesh(
136
- static_image_mode=False,
137
- max_num_faces=1,
138
- refine_landmarks=True,
139
- min_detection_confidence=0.5,
140
- min_tracking_confidence=0.5,
141
- )
142
-
143
- while True:
144
- ret, frame = cap.read()
145
- if not ret:
146
- break
147
- time_sec = frame_idx / fps
148
- conf = 0.0
149
- pred_label = "open"
150
- focused = False
151
-
152
- if use_mp:
153
- rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
154
- res = face_mesh.process(rgb)
155
- if res.multi_face_landmarks:
156
- lm = res.multi_face_landmarks[0].landmark
157
- h, w = frame.shape[:2]
158
-
159
- # Eye landmarks (MediaPipe FaceMesh)
160
- left_eye = [33, 160, 158, 133, 153, 144]
161
- right_eye = [362, 385, 387, 263, 373, 380]
162
- left_iris = [468, 469, 470, 471]
163
- right_iris = [473, 474, 475, 476]
164
-
165
- def pts(idxs):
166
- return np.array([(int(lm[i].x * w), int(lm[i].y * h)) for i in idxs])
167
-
168
- def ear(eye_pts):
169
- # EAR using 6 points
170
- p1, p2, p3, p4, p5, p6 = eye_pts
171
- v1 = np.linalg.norm(p2 - p6)
172
- v2 = np.linalg.norm(p3 - p5)
173
- h1 = np.linalg.norm(p1 - p4)
174
- return (v1 + v2) / (2.0 * h1 + 1e-6)
175
-
176
- le = pts(left_eye)
177
- re = pts(right_eye)
178
- le_ear = ear(le)
179
- re_ear = ear(re)
180
- ear_avg = (le_ear + re_ear) / 2.0
181
-
182
- # openness threshold
183
- pred_label = "open" if ear_avg > 0.22 else "closed"
184
-
185
- # iris centers
186
- li = pts(left_iris)
187
- ri = pts(right_iris)
188
- li_c = li.mean(axis=0).astype(int)
189
- ri_c = ri.mean(axis=0).astype(int)
190
-
191
- # eye centers (midpoint of corners)
192
- le_c = ((le[0] + le[3]) / 2).astype(int)
193
- re_c = ((re[0] + re[3]) / 2).astype(int)
194
-
195
- # focus = iris close to eye center horizontally for both eyes
196
- le_dx = abs(li_c[0] - le_c[0]) / max(np.linalg.norm(le[0] - le[3]), 1)
197
- re_dx = abs(ri_c[0] - re_c[0]) / max(np.linalg.norm(re[0] - re[3]), 1)
198
- focused = (pred_label == "open") and (le_dx < 0.18) and (re_dx < 0.18)
199
-
200
- # draw eye boundaries
201
- cv2.polylines(frame, [le], True, (0, 255, 255), 1)
202
- cv2.polylines(frame, [re], True, (0, 255, 255), 1)
203
- # draw iris centers
204
- cv2.circle(frame, tuple(li_c), 3, (0, 0, 255), -1)
205
- cv2.circle(frame, tuple(ri_c), 3, (0, 0, 255), -1)
206
- else:
207
- pred_label = "closed"
208
- focused = False
209
- else:
210
- if model is not None:
211
- pred_label, conf = classify_frame(model, frame)
212
- gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
213
- pupil_center = detect_pupil_center(gray) if pred_label.lower() == "open" else None
214
- focused = False
215
- if pred_label.lower() == "open" and pupil_center is not None:
216
- focused = is_focused(pupil_center, gray.shape)
217
-
218
- if pred_label.lower() != "open":
219
- focused = False
220
-
221
- label = "open_focused" if (pred_label.lower() == "open" and focused) else "open_not_focused"
222
- if pred_label.lower() != "open":
223
- label = "closed_not_focused"
224
-
225
- fcsv.write(f"{time_sec:.2f},{label},{int(focused)},{conf:.4f}\n")
226
-
227
- if last_label is None:
228
- last_label = label
229
- seg_start = time_sec
230
- elif label != last_label:
231
- segments.append((seg_start, time_sec, last_label))
232
- seg_start = time_sec
233
- last_label = label
234
-
235
- annotated = annotate_frame(frame, label, focused, conf, time_sec)
236
- writer.write(annotated)
237
- frame_idx += 1
238
-
239
- if last_label is not None:
240
- end_time = frame_idx / fps
241
- segments.append((seg_start, end_time, last_label))
242
- write_segments(seg_path, segments)
243
-
244
- cap.release()
245
- writer.release()
246
- print(f"Saved: {out_path}")
247
- print(f"CSV: {csv_path}")
248
- print(f"Segments: {seg_path}")
249
-
250
-
251
- def main():
252
- project_root = Path(__file__).resolve().parent.parent
253
- weights = find_weights(project_root)
254
- model = YOLO(str(weights)) if weights is not None else None
255
-
256
- # Default to 1.mp4 and 2.mp4 in project root
257
- videos = []
258
- for name in ["1.mp4", "2.mp4"]:
259
- p = project_root / name
260
- if p.exists():
261
- videos.append(p)
262
-
263
- # Also allow passing paths via env var
264
- extra = os.getenv("VIDEOS", "")
265
- for v in [x.strip() for x in extra.split(",") if x.strip()]:
266
- vp = Path(v)
267
- if not vp.is_absolute():
268
- vp = project_root / vp
269
- if vp.exists():
270
- videos.append(vp)
271
-
272
- if not videos:
273
- print("No videos found. Expected 1.mp4 / 2.mp4 in project root.")
274
- return
275
-
276
- for v in videos:
277
- process_video(v, model)
278
-
279
-
280
- if __name__ == "__main__":
281
- main()
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from pathlib import Path
5
+
6
+ import cv2
7
+ import numpy as np
8
+ from ultralytics import YOLO
9
+
10
+ try:
11
+ import mediapipe as mp
12
+ except Exception: # pragma: no cover
13
+ mp = None
14
+
15
+
16
+ def find_weights(project_root: Path) -> Path | None:
17
+ candidates = [
18
+ project_root / "weights" / "best.pt",
19
+ project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
20
+ project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
21
+ project_root / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
22
+ project_root / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
23
+ ]
24
+ return next((p for p in candidates if p.is_file()), None)
25
+
26
+
27
+ def detect_pupil_center(gray: np.ndarray) -> tuple[int, int] | None:
28
+ h, w = gray.shape
29
+ clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
30
+ eq = clahe.apply(gray)
31
+ blur = cv2.GaussianBlur(eq, (7, 7), 0)
32
+
33
+ cx, cy = w // 2, h // 2
34
+ rx, ry = int(w * 0.3), int(h * 0.3)
35
+ x0, x1 = max(cx - rx, 0), min(cx + rx, w)
36
+ y0, y1 = max(cy - ry, 0), min(cy + ry, h)
37
+ roi = blur[y0:y1, x0:x1]
38
+
39
+ _, thresh = cv2.threshold(roi, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
40
+ thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, np.ones((3, 3), np.uint8), iterations=2)
41
+ thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8), iterations=1)
42
+
43
+ contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
44
+ if not contours:
45
+ return None
46
+
47
+ best = None
48
+ best_score = -1.0
49
+ for c in contours:
50
+ area = cv2.contourArea(c)
51
+ if area < 15:
52
+ continue
53
+ perimeter = cv2.arcLength(c, True)
54
+ if perimeter <= 0:
55
+ continue
56
+ circularity = 4 * np.pi * (area / (perimeter * perimeter))
57
+ if circularity < 0.3:
58
+ continue
59
+ m = cv2.moments(c)
60
+ if m["m00"] == 0:
61
+ continue
62
+ px = int(m["m10"] / m["m00"]) + x0
63
+ py = int(m["m01"] / m["m00"]) + y0
64
+
65
+ dist = np.hypot(px - cx, py - cy) / max(w, h)
66
+ score = circularity - dist
67
+ if score > best_score:
68
+ best_score = score
69
+ best = (px, py)
70
+
71
+ return best
72
+
73
+
74
+ def is_focused(pupil_center: tuple[int, int], img_shape: tuple[int, int]) -> bool:
75
+ h, w = img_shape
76
+ cx = w // 2
77
+ px, _ = pupil_center
78
+ dx = abs(px - cx) / max(w, 1)
79
+ return dx < 0.12
80
+
81
+
82
+ def classify_frame(model: YOLO, frame: np.ndarray) -> tuple[str, float]:
83
+ # Use classifier directly on frame (assumes frame is eye crop)
84
+ results = model.predict(frame, imgsz=224, device="cpu", verbose=False)
85
+ r = results[0]
86
+ probs = r.probs
87
+ top_idx = int(probs.top1)
88
+ top_conf = float(probs.top1conf)
89
+ pred_label = model.names[top_idx]
90
+ return pred_label, top_conf
91
+
92
+
93
+ def annotate_frame(frame: np.ndarray, label: str, focused: bool, conf: float, time_sec: float):
94
+ out = frame.copy()
95
+ text = f"{label} | focused={int(focused)} | conf={conf:.2f} | t={time_sec:.2f}s"
96
+ cv2.putText(out, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
97
+ return out
98
+
99
+
100
+ def write_segments(path: Path, segments: list[tuple[float, float, str]]):
101
+ with path.open("w") as f:
102
+ for start, end, label in segments:
103
+ f.write(f"{start:.2f},{end:.2f},{label}\n")
104
+
105
+
106
+ def process_video(video_path: Path, model: YOLO | None):
107
+ cap = cv2.VideoCapture(str(video_path))
108
+ if not cap.isOpened():
109
+ print(f"Failed to open {video_path}")
110
+ return
111
+
112
+ fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
113
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
114
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
115
+
116
+ out_path = video_path.with_name(video_path.stem + "_pred.mp4")
117
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
118
+ writer = cv2.VideoWriter(str(out_path), fourcc, fps, (width, height))
119
+
120
+ csv_path = video_path.with_name(video_path.stem + "_predictions.csv")
121
+ seg_path = video_path.with_name(video_path.stem + "_segments.txt")
122
+
123
+ frame_idx = 0
124
+ last_label = None
125
+ seg_start = 0.0
126
+ segments: list[tuple[float, float, str]] = []
127
+
128
+ with csv_path.open("w") as fcsv:
129
+ fcsv.write("time_sec,label,focused,conf\n")
130
+ if mp is None:
131
+ print("mediapipe is not installed. Falling back to classifier-only mode.")
132
+ use_mp = mp is not None
133
+ if use_mp:
134
+ mp_face_mesh = mp.solutions.face_mesh
135
+ face_mesh = mp_face_mesh.FaceMesh(
136
+ static_image_mode=False,
137
+ max_num_faces=1,
138
+ refine_landmarks=True,
139
+ min_detection_confidence=0.5,
140
+ min_tracking_confidence=0.5,
141
+ )
142
+
143
+ while True:
144
+ ret, frame = cap.read()
145
+ if not ret:
146
+ break
147
+ time_sec = frame_idx / fps
148
+ conf = 0.0
149
+ pred_label = "open"
150
+ focused = False
151
+
152
+ if use_mp:
153
+ rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
154
+ res = face_mesh.process(rgb)
155
+ if res.multi_face_landmarks:
156
+ lm = res.multi_face_landmarks[0].landmark
157
+ h, w = frame.shape[:2]
158
+
159
+ # Eye landmarks (MediaPipe FaceMesh)
160
+ left_eye = [33, 160, 158, 133, 153, 144]
161
+ right_eye = [362, 385, 387, 263, 373, 380]
162
+ left_iris = [468, 469, 470, 471]
163
+ right_iris = [473, 474, 475, 476]
164
+
165
+ def pts(idxs):
166
+ return np.array([(int(lm[i].x * w), int(lm[i].y * h)) for i in idxs])
167
+
168
+ def ear(eye_pts):
169
+ # EAR using 6 points
170
+ p1, p2, p3, p4, p5, p6 = eye_pts
171
+ v1 = np.linalg.norm(p2 - p6)
172
+ v2 = np.linalg.norm(p3 - p5)
173
+ h1 = np.linalg.norm(p1 - p4)
174
+ return (v1 + v2) / (2.0 * h1 + 1e-6)
175
+
176
+ le = pts(left_eye)
177
+ re = pts(right_eye)
178
+ le_ear = ear(le)
179
+ re_ear = ear(re)
180
+ ear_avg = (le_ear + re_ear) / 2.0
181
+
182
+ # openness threshold
183
+ pred_label = "open" if ear_avg > 0.22 else "closed"
184
+
185
+ # iris centers
186
+ li = pts(left_iris)
187
+ ri = pts(right_iris)
188
+ li_c = li.mean(axis=0).astype(int)
189
+ ri_c = ri.mean(axis=0).astype(int)
190
+
191
+ # eye centers (midpoint of corners)
192
+ le_c = ((le[0] + le[3]) / 2).astype(int)
193
+ re_c = ((re[0] + re[3]) / 2).astype(int)
194
+
195
+ # focus = iris close to eye center horizontally for both eyes
196
+ le_dx = abs(li_c[0] - le_c[0]) / max(np.linalg.norm(le[0] - le[3]), 1)
197
+ re_dx = abs(ri_c[0] - re_c[0]) / max(np.linalg.norm(re[0] - re[3]), 1)
198
+ focused = (pred_label == "open") and (le_dx < 0.18) and (re_dx < 0.18)
199
+
200
+ # draw eye boundaries
201
+ cv2.polylines(frame, [le], True, (0, 255, 255), 1)
202
+ cv2.polylines(frame, [re], True, (0, 255, 255), 1)
203
+ # draw iris centers
204
+ cv2.circle(frame, tuple(li_c), 3, (0, 0, 255), -1)
205
+ cv2.circle(frame, tuple(ri_c), 3, (0, 0, 255), -1)
206
+ else:
207
+ pred_label = "closed"
208
+ focused = False
209
+ else:
210
+ if model is not None:
211
+ pred_label, conf = classify_frame(model, frame)
212
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
213
+ pupil_center = detect_pupil_center(gray) if pred_label.lower() == "open" else None
214
+ focused = False
215
+ if pred_label.lower() == "open" and pupil_center is not None:
216
+ focused = is_focused(pupil_center, gray.shape)
217
+
218
+ if pred_label.lower() != "open":
219
+ focused = False
220
+
221
+ label = "open_focused" if (pred_label.lower() == "open" and focused) else "open_not_focused"
222
+ if pred_label.lower() != "open":
223
+ label = "closed_not_focused"
224
+
225
+ fcsv.write(f"{time_sec:.2f},{label},{int(focused)},{conf:.4f}\n")
226
+
227
+ if last_label is None:
228
+ last_label = label
229
+ seg_start = time_sec
230
+ elif label != last_label:
231
+ segments.append((seg_start, time_sec, last_label))
232
+ seg_start = time_sec
233
+ last_label = label
234
+
235
+ annotated = annotate_frame(frame, label, focused, conf, time_sec)
236
+ writer.write(annotated)
237
+ frame_idx += 1
238
+
239
+ if last_label is not None:
240
+ end_time = frame_idx / fps
241
+ segments.append((seg_start, end_time, last_label))
242
+ write_segments(seg_path, segments)
243
+
244
+ cap.release()
245
+ writer.release()
246
+ print(f"Saved: {out_path}")
247
+ print(f"CSV: {csv_path}")
248
+ print(f"Segments: {seg_path}")
249
+
250
+
251
+ def main():
252
+ project_root = Path(__file__).resolve().parent.parent
253
+ weights = find_weights(project_root)
254
+ model = YOLO(str(weights)) if weights is not None else None
255
+
256
+ # Default to 1.mp4 and 2.mp4 in project root
257
+ videos = []
258
+ for name in ["1.mp4", "2.mp4"]:
259
+ p = project_root / name
260
+ if p.exists():
261
+ videos.append(p)
262
+
263
+ # Also allow passing paths via env var
264
+ extra = os.getenv("VIDEOS", "")
265
+ for v in [x.strip() for x in extra.split(",") if x.strip()]:
266
+ vp = Path(v)
267
+ if not vp.is_absolute():
268
+ vp = project_root / vp
269
+ if vp.exists():
270
+ videos.append(vp)
271
+
272
+ if not videos:
273
+ print("No videos found. Expected 1.mp4 / 2.mp4 in project root.")
274
+ return
275
+
276
+ for v in videos:
277
+ process_video(v, model)
278
+
279
+
280
+ if __name__ == "__main__":
281
+ main()
models/cnn/CNN_MODEL/scripts/webcam_live.py CHANGED
@@ -1,184 +1,184 @@
1
- """
2
- Live webcam: detect face, crop each eye, run open/closed classifier, show on screen.
3
- Requires: opencv-python, ultralytics, mediapipe (pip install mediapipe).
4
- Press 'q' to quit.
5
- """
6
- import urllib.request
7
- from pathlib import Path
8
-
9
- import cv2
10
- import numpy as np
11
- from ultralytics import YOLO
12
-
13
- try:
14
- import mediapipe as mp
15
- _mp_has_solutions = hasattr(mp, "solutions")
16
- except ImportError:
17
- mp = None
18
- _mp_has_solutions = False
19
-
20
- # New MediaPipe Tasks API (Face Landmarker) eye indices
21
- LEFT_EYE_INDICES_NEW = [263, 249, 390, 373, 374, 380, 381, 382, 362, 466, 388, 387, 386, 385, 384, 398]
22
- RIGHT_EYE_INDICES_NEW = [33, 7, 163, 144, 145, 153, 154, 155, 133, 246, 161, 160, 159, 158, 157, 173]
23
- # Old Face Mesh (solutions) indices
24
- LEFT_EYE_INDICES_OLD = [33, 160, 158, 133, 153, 144]
25
- RIGHT_EYE_INDICES_OLD = [362, 385, 387, 263, 373, 380]
26
- EYE_PADDING = 0.35
27
-
28
-
29
- def find_weights(project_root: Path) -> Path | None:
30
- candidates = [
31
- project_root / "weights" / "best.pt",
32
- project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
33
- project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
34
- ]
35
- return next((p for p in candidates if p.is_file()), None)
36
-
37
-
38
- def get_eye_roi(frame: np.ndarray, landmarks, indices: list[int]) -> np.ndarray | None:
39
- h, w = frame.shape[:2]
40
- pts = np.array([(int(landmarks[i].x * w), int(landmarks[i].y * h)) for i in indices])
41
- x_min, y_min = pts.min(axis=0)
42
- x_max, y_max = pts.max(axis=0)
43
- dx = max(int((x_max - x_min) * EYE_PADDING), 8)
44
- dy = max(int((y_max - y_min) * EYE_PADDING), 8)
45
- x0 = max(0, x_min - dx)
46
- y0 = max(0, y_min - dy)
47
- x1 = min(w, x_max + dx)
48
- y1 = min(h, y_max + dy)
49
- if x1 <= x0 or y1 <= y0:
50
- return None
51
- return frame[y0:y1, x0:x1].copy()
52
-
53
-
54
- def _run_with_solutions(mp, model, cap):
55
- face_mesh = mp.solutions.face_mesh.FaceMesh(
56
- static_image_mode=False,
57
- max_num_faces=1,
58
- refine_landmarks=True,
59
- min_detection_confidence=0.5,
60
- min_tracking_confidence=0.5,
61
- )
62
- while True:
63
- ret, frame = cap.read()
64
- if not ret:
65
- break
66
- rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
67
- results = face_mesh.process(rgb)
68
- left_label, left_conf = "—", 0.0
69
- right_label, right_conf = "—", 0.0
70
- if results.multi_face_landmarks:
71
- lm = results.multi_face_landmarks[0].landmark
72
- for roi, indices, side in [
73
- (get_eye_roi(frame, lm, LEFT_EYE_INDICES_OLD), LEFT_EYE_INDICES_OLD, "left"),
74
- (get_eye_roi(frame, lm, RIGHT_EYE_INDICES_OLD), RIGHT_EYE_INDICES_OLD, "right"),
75
- ]:
76
- if roi is not None and roi.size > 0:
77
- try:
78
- pred = model.predict(roi, imgsz=224, device="cpu", verbose=False)
79
- if pred:
80
- r = pred[0]
81
- label = model.names[int(r.probs.top1)]
82
- conf = float(r.probs.top1conf)
83
- if side == "left":
84
- left_label, left_conf = label, conf
85
- else:
86
- right_label, right_conf = label, conf
87
- except Exception:
88
- pass
89
- cv2.putText(frame, f"L: {left_label} ({left_conf:.0%})", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
90
- cv2.putText(frame, f"R: {right_label} ({right_conf:.0%})", (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
91
- cv2.imshow("Eye open/closed (q to quit)", frame)
92
- if cv2.waitKey(1) & 0xFF == ord("q"):
93
- break
94
-
95
-
96
- def _run_with_tasks(project_root: Path, model, cap):
97
- from mediapipe.tasks.python import BaseOptions
98
- from mediapipe.tasks.python.vision import FaceLandmarker, FaceLandmarkerOptions
99
- from mediapipe.tasks.python.vision.core import vision_task_running_mode as running_mode
100
- from mediapipe.tasks.python.vision.core import image as image_lib
101
-
102
- model_path = project_root / "weights" / "face_landmarker.task"
103
- if not model_path.is_file():
104
- print("Downloading face_landmarker.task ...")
105
- url = "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task"
106
- urllib.request.urlretrieve(url, model_path)
107
- print("Done.")
108
-
109
- options = FaceLandmarkerOptions(
110
- base_options=BaseOptions(model_asset_path=str(model_path)),
111
- running_mode=running_mode.VisionTaskRunningMode.IMAGE,
112
- num_faces=1,
113
- )
114
- face_landmarker = FaceLandmarker.create_from_options(options)
115
- ImageFormat = image_lib.ImageFormat
116
-
117
- while True:
118
- ret, frame = cap.read()
119
- if not ret:
120
- break
121
- left_label, left_conf = "—", 0.0
122
- right_label, right_conf = "—", 0.0
123
-
124
- rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
125
- rgb_contiguous = np.ascontiguousarray(rgb)
126
- mp_image = image_lib.Image(ImageFormat.SRGB, rgb_contiguous)
127
- result = face_landmarker.detect(mp_image)
128
-
129
- if result.face_landmarks:
130
- lm = result.face_landmarks[0]
131
- for roi, side in [
132
- (get_eye_roi(frame, lm, LEFT_EYE_INDICES_NEW), "left"),
133
- (get_eye_roi(frame, lm, RIGHT_EYE_INDICES_NEW), "right"),
134
- ]:
135
- if roi is not None and roi.size > 0:
136
- try:
137
- pred = model.predict(roi, imgsz=224, device="cpu", verbose=False)
138
- if pred:
139
- r = pred[0]
140
- label = model.names[int(r.probs.top1)]
141
- conf = float(r.probs.top1conf)
142
- if side == "left":
143
- left_label, left_conf = label, conf
144
- else:
145
- right_label, right_conf = label, conf
146
- except Exception:
147
- pass
148
-
149
- cv2.putText(frame, f"L: {left_label} ({left_conf:.0%})", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
150
- cv2.putText(frame, f"R: {right_label} ({right_conf:.0%})", (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
151
- cv2.imshow("Eye open/closed (q to quit)", frame)
152
- if cv2.waitKey(1) & 0xFF == ord("q"):
153
- break
154
-
155
-
156
- def main():
157
- project_root = Path(__file__).resolve().parent.parent
158
- weights = find_weights(project_root)
159
- if weights is None:
160
- print("Weights not found. Put best.pt in weights/ or runs/.../weights/ (from model team).")
161
- return
162
- if mp is None:
163
- print("MediaPipe required. Install: pip install mediapipe")
164
- return
165
-
166
- model = YOLO(str(weights))
167
- cap = cv2.VideoCapture(0)
168
- if not cap.isOpened():
169
- print("Could not open webcam.")
170
- return
171
-
172
- print("Live eye open/closed on your face. Press 'q' to quit.")
173
- try:
174
- if _mp_has_solutions:
175
- _run_with_solutions(mp, model, cap)
176
- else:
177
- _run_with_tasks(project_root, model, cap)
178
- finally:
179
- cap.release()
180
- cv2.destroyAllWindows()
181
-
182
-
183
- if __name__ == "__main__":
184
- main()
 
1
+ """
2
+ Live webcam: detect face, crop each eye, run open/closed classifier, show on screen.
3
+ Requires: opencv-python, ultralytics, mediapipe (pip install mediapipe).
4
+ Press 'q' to quit.
5
+ """
6
+ import urllib.request
7
+ from pathlib import Path
8
+
9
+ import cv2
10
+ import numpy as np
11
+ from ultralytics import YOLO
12
+
13
+ try:
14
+ import mediapipe as mp
15
+ _mp_has_solutions = hasattr(mp, "solutions")
16
+ except ImportError:
17
+ mp = None
18
+ _mp_has_solutions = False
19
+
20
+ # New MediaPipe Tasks API (Face Landmarker) eye indices
21
+ LEFT_EYE_INDICES_NEW = [263, 249, 390, 373, 374, 380, 381, 382, 362, 466, 388, 387, 386, 385, 384, 398]
22
+ RIGHT_EYE_INDICES_NEW = [33, 7, 163, 144, 145, 153, 154, 155, 133, 246, 161, 160, 159, 158, 157, 173]
23
+ # Old Face Mesh (solutions) indices
24
+ LEFT_EYE_INDICES_OLD = [33, 160, 158, 133, 153, 144]
25
+ RIGHT_EYE_INDICES_OLD = [362, 385, 387, 263, 373, 380]
26
+ EYE_PADDING = 0.35
27
+
28
+
29
+ def find_weights(project_root: Path) -> Path | None:
30
+ candidates = [
31
+ project_root / "weights" / "best.pt",
32
+ project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
33
+ project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
34
+ ]
35
+ return next((p for p in candidates if p.is_file()), None)
36
+
37
+
38
+ def get_eye_roi(frame: np.ndarray, landmarks, indices: list[int]) -> np.ndarray | None:
39
+ h, w = frame.shape[:2]
40
+ pts = np.array([(int(landmarks[i].x * w), int(landmarks[i].y * h)) for i in indices])
41
+ x_min, y_min = pts.min(axis=0)
42
+ x_max, y_max = pts.max(axis=0)
43
+ dx = max(int((x_max - x_min) * EYE_PADDING), 8)
44
+ dy = max(int((y_max - y_min) * EYE_PADDING), 8)
45
+ x0 = max(0, x_min - dx)
46
+ y0 = max(0, y_min - dy)
47
+ x1 = min(w, x_max + dx)
48
+ y1 = min(h, y_max + dy)
49
+ if x1 <= x0 or y1 <= y0:
50
+ return None
51
+ return frame[y0:y1, x0:x1].copy()
52
+
53
+
54
+ def _run_with_solutions(mp, model, cap):
55
+ face_mesh = mp.solutions.face_mesh.FaceMesh(
56
+ static_image_mode=False,
57
+ max_num_faces=1,
58
+ refine_landmarks=True,
59
+ min_detection_confidence=0.5,
60
+ min_tracking_confidence=0.5,
61
+ )
62
+ while True:
63
+ ret, frame = cap.read()
64
+ if not ret:
65
+ break
66
+ rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
67
+ results = face_mesh.process(rgb)
68
+ left_label, left_conf = "—", 0.0
69
+ right_label, right_conf = "—", 0.0
70
+ if results.multi_face_landmarks:
71
+ lm = results.multi_face_landmarks[0].landmark
72
+ for roi, indices, side in [
73
+ (get_eye_roi(frame, lm, LEFT_EYE_INDICES_OLD), LEFT_EYE_INDICES_OLD, "left"),
74
+ (get_eye_roi(frame, lm, RIGHT_EYE_INDICES_OLD), RIGHT_EYE_INDICES_OLD, "right"),
75
+ ]:
76
+ if roi is not None and roi.size > 0:
77
+ try:
78
+ pred = model.predict(roi, imgsz=224, device="cpu", verbose=False)
79
+ if pred:
80
+ r = pred[0]
81
+ label = model.names[int(r.probs.top1)]
82
+ conf = float(r.probs.top1conf)
83
+ if side == "left":
84
+ left_label, left_conf = label, conf
85
+ else:
86
+ right_label, right_conf = label, conf
87
+ except Exception:
88
+ pass
89
+ cv2.putText(frame, f"L: {left_label} ({left_conf:.0%})", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
90
+ cv2.putText(frame, f"R: {right_label} ({right_conf:.0%})", (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
91
+ cv2.imshow("Eye open/closed (q to quit)", frame)
92
+ if cv2.waitKey(1) & 0xFF == ord("q"):
93
+ break
94
+
95
+
96
+ def _run_with_tasks(project_root: Path, model, cap):
97
+ from mediapipe.tasks.python import BaseOptions
98
+ from mediapipe.tasks.python.vision import FaceLandmarker, FaceLandmarkerOptions
99
+ from mediapipe.tasks.python.vision.core import vision_task_running_mode as running_mode
100
+ from mediapipe.tasks.python.vision.core import image as image_lib
101
+
102
+ model_path = project_root / "weights" / "face_landmarker.task"
103
+ if not model_path.is_file():
104
+ print("Downloading face_landmarker.task ...")
105
+ url = "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task"
106
+ urllib.request.urlretrieve(url, model_path)
107
+ print("Done.")
108
+
109
+ options = FaceLandmarkerOptions(
110
+ base_options=BaseOptions(model_asset_path=str(model_path)),
111
+ running_mode=running_mode.VisionTaskRunningMode.IMAGE,
112
+ num_faces=1,
113
+ )
114
+ face_landmarker = FaceLandmarker.create_from_options(options)
115
+ ImageFormat = image_lib.ImageFormat
116
+
117
+ while True:
118
+ ret, frame = cap.read()
119
+ if not ret:
120
+ break
121
+ left_label, left_conf = "—", 0.0
122
+ right_label, right_conf = "—", 0.0
123
+
124
+ rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
125
+ rgb_contiguous = np.ascontiguousarray(rgb)
126
+ mp_image = image_lib.Image(ImageFormat.SRGB, rgb_contiguous)
127
+ result = face_landmarker.detect(mp_image)
128
+
129
+ if result.face_landmarks:
130
+ lm = result.face_landmarks[0]
131
+ for roi, side in [
132
+ (get_eye_roi(frame, lm, LEFT_EYE_INDICES_NEW), "left"),
133
+ (get_eye_roi(frame, lm, RIGHT_EYE_INDICES_NEW), "right"),
134
+ ]:
135
+ if roi is not None and roi.size > 0:
136
+ try:
137
+ pred = model.predict(roi, imgsz=224, device="cpu", verbose=False)
138
+ if pred:
139
+ r = pred[0]
140
+ label = model.names[int(r.probs.top1)]
141
+ conf = float(r.probs.top1conf)
142
+ if side == "left":
143
+ left_label, left_conf = label, conf
144
+ else:
145
+ right_label, right_conf = label, conf
146
+ except Exception:
147
+ pass
148
+
149
+ cv2.putText(frame, f"L: {left_label} ({left_conf:.0%})", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
150
+ cv2.putText(frame, f"R: {right_label} ({right_conf:.0%})", (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
151
+ cv2.imshow("Eye open/closed (q to quit)", frame)
152
+ if cv2.waitKey(1) & 0xFF == ord("q"):
153
+ break
154
+
155
+
156
+ def main():
157
+ project_root = Path(__file__).resolve().parent.parent
158
+ weights = find_weights(project_root)
159
+ if weights is None:
160
+ print("Weights not found. Put best.pt in weights/ or runs/.../weights/ (from model team).")
161
+ return
162
+ if mp is None:
163
+ print("MediaPipe required. Install: pip install mediapipe")
164
+ return
165
+
166
+ model = YOLO(str(weights))
167
+ cap = cv2.VideoCapture(0)
168
+ if not cap.isOpened():
169
+ print("Could not open webcam.")
170
+ return
171
+
172
+ print("Live eye open/closed on your face. Press 'q' to quit.")
173
+ try:
174
+ if _mp_has_solutions:
175
+ _run_with_solutions(mp, model, cap)
176
+ else:
177
+ _run_with_tasks(project_root, model, cap)
178
+ finally:
179
+ cap.release()
180
+ cv2.destroyAllWindows()
181
+
182
+
183
+ if __name__ == "__main__":
184
+ main()
models/cnn/eye_attention/__init__.py CHANGED
@@ -1 +1 @@
1
-
 
1
+
models/cnn/eye_attention/classifier.py CHANGED
@@ -1,5 +1,6 @@
1
  from __future__ import annotations
2
 
 
3
  from abc import ABC, abstractmethod
4
 
5
  import numpy as np
@@ -54,16 +55,115 @@ class YOLOv11Classifier(EyeClassifier):
54
  return sum(scores) / len(scores) if scores else 1.0
55
 
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  def load_eye_classifier(
58
  path: str | None = None,
59
  backend: str = "yolo",
60
  device: str = "cpu",
61
  ) -> EyeClassifier:
62
- if path is None or backend == "geometric":
 
 
 
 
63
  return GeometricOnlyClassifier()
64
 
65
- try:
66
- return YOLOv11Classifier(path, device=device)
67
- except ImportError:
68
- print("[CLASSIFIER] ultralytics required for YOLO. pip install ultralytics")
69
- raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from __future__ import annotations
2
 
3
+ import os
4
  from abc import ABC, abstractmethod
5
 
6
  import numpy as np
 
55
  return sum(scores) / len(scores) if scores else 1.0
56
 
57
 
58
+ class EyeCNNClassifier(EyeClassifier):
59
+ """Loader for the custom PyTorch EyeCNN (trained on Kaggle eye crops)."""
60
+
61
+ def __init__(self, checkpoint_path: str, device: str = "cpu"):
62
+ import torch
63
+ import torch.nn as nn
64
+
65
+ class EyeCNN(nn.Module):
66
+ def __init__(self, num_classes=2, dropout_rate=0.3):
67
+ super().__init__()
68
+ self.conv_layers = nn.Sequential(
69
+ nn.Conv2d(3, 32, 3, 1, 1), nn.BatchNorm2d(32), nn.ReLU(), nn.MaxPool2d(2, 2),
70
+ nn.Conv2d(32, 64, 3, 1, 1), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2, 2),
71
+ nn.Conv2d(64, 128, 3, 1, 1), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2, 2),
72
+ nn.Conv2d(128, 256, 3, 1, 1), nn.BatchNorm2d(256), nn.ReLU(), nn.MaxPool2d(2, 2),
73
+ )
74
+ self.fc_layers = nn.Sequential(
75
+ nn.AdaptiveAvgPool2d((1, 1)), nn.Flatten(),
76
+ nn.Linear(256, 512), nn.ReLU(), nn.Dropout(dropout_rate),
77
+ nn.Linear(512, num_classes),
78
+ )
79
+
80
+ def forward(self, x):
81
+ return self.fc_layers(self.conv_layers(x))
82
+
83
+ self._device = torch.device(device)
84
+ checkpoint = torch.load(checkpoint_path, map_location=self._device, weights_only=False)
85
+ dropout_rate = checkpoint.get("config", {}).get("dropout_rate", 0.35)
86
+ self._model = EyeCNN(num_classes=2, dropout_rate=dropout_rate)
87
+ self._model.load_state_dict(checkpoint["model_state_dict"])
88
+ self._model.to(self._device)
89
+ self._model.eval()
90
+
91
+ self._transform = None # built lazily
92
+
93
+ def _get_transform(self):
94
+ if self._transform is None:
95
+ from torchvision import transforms
96
+ self._transform = transforms.Compose([
97
+ transforms.ToPILImage(),
98
+ transforms.Resize((96, 96)),
99
+ transforms.ToTensor(),
100
+ transforms.Normalize(
101
+ mean=[0.485, 0.456, 0.406],
102
+ std=[0.229, 0.224, 0.225],
103
+ ),
104
+ ])
105
+ return self._transform
106
+
107
+ @property
108
+ def name(self) -> str:
109
+ return "eye_cnn"
110
+
111
+ def predict_score(self, crops_bgr: list[np.ndarray]) -> float:
112
+ if not crops_bgr:
113
+ return 1.0
114
+
115
+ import torch
116
+ import cv2
117
+
118
+ transform = self._get_transform()
119
+ scores = []
120
+ for crop in crops_bgr:
121
+ if crop is None or crop.size == 0:
122
+ scores.append(1.0)
123
+ continue
124
+ rgb = cv2.cvtColor(crop, cv2.COLOR_BGR2RGB)
125
+ tensor = transform(rgb).unsqueeze(0).to(self._device)
126
+ with torch.no_grad():
127
+ output = self._model(tensor)
128
+ prob = torch.softmax(output, dim=1)[0, 1].item() # prob of "open"
129
+ scores.append(prob)
130
+ return sum(scores) / len(scores)
131
+
132
+
133
+ _EXT_TO_BACKEND = {".pth": "cnn", ".pt": "yolo"}
134
+
135
+
136
  def load_eye_classifier(
137
  path: str | None = None,
138
  backend: str = "yolo",
139
  device: str = "cpu",
140
  ) -> EyeClassifier:
141
+ if backend == "geometric":
142
+ return GeometricOnlyClassifier()
143
+
144
+ if path is None:
145
+ print(f"[CLASSIFIER] No model path for backend {backend!r}, falling back to geometric")
146
  return GeometricOnlyClassifier()
147
 
148
+ ext = os.path.splitext(path)[1].lower()
149
+ inferred = _EXT_TO_BACKEND.get(ext)
150
+ if inferred and inferred != backend:
151
+ print(f"[CLASSIFIER] File extension {ext!r} implies backend {inferred!r}, "
152
+ f"overriding requested {backend!r}")
153
+ backend = inferred
154
+
155
+ print(f"[CLASSIFIER] backend={backend!r}, path={path!r}")
156
+
157
+ if backend == "cnn":
158
+ return EyeCNNClassifier(path, device=device)
159
+
160
+ if backend == "yolo":
161
+ try:
162
+ return YOLOv11Classifier(path, device=device)
163
+ except ImportError:
164
+ print("[CLASSIFIER] ultralytics required for YOLO. pip install ultralytics")
165
+ raise
166
+
167
+ raise ValueError(
168
+ f"Unknown eye backend {backend!r}. Choose from: yolo, cnn, geometric"
169
+ )
models/cnn/eye_attention/crop.py CHANGED
@@ -1,70 +1,70 @@
1
- import cv2
2
- import numpy as np
3
-
4
- from models.pretrained.face_mesh.face_mesh import FaceMeshDetector
5
-
6
- LEFT_EYE_CONTOUR = FaceMeshDetector.LEFT_EYE_INDICES
7
- RIGHT_EYE_CONTOUR = FaceMeshDetector.RIGHT_EYE_INDICES
8
-
9
- IMAGENET_MEAN = (0.485, 0.456, 0.406)
10
- IMAGENET_STD = (0.229, 0.224, 0.225)
11
-
12
- CROP_SIZE = 96
13
-
14
-
15
- def _bbox_from_landmarks(
16
- landmarks: np.ndarray,
17
- indices: list[int],
18
- frame_w: int,
19
- frame_h: int,
20
- expand: float = 0.4,
21
- ) -> tuple[int, int, int, int]:
22
- pts = landmarks[indices, :2]
23
- px = pts[:, 0] * frame_w
24
- py = pts[:, 1] * frame_h
25
-
26
- x_min, x_max = px.min(), px.max()
27
- y_min, y_max = py.min(), py.max()
28
- w = x_max - x_min
29
- h = y_max - y_min
30
- cx = (x_min + x_max) / 2
31
- cy = (y_min + y_max) / 2
32
-
33
- size = max(w, h) * (1 + expand)
34
- half = size / 2
35
-
36
- x1 = int(max(cx - half, 0))
37
- y1 = int(max(cy - half, 0))
38
- x2 = int(min(cx + half, frame_w))
39
- y2 = int(min(cy + half, frame_h))
40
-
41
- return x1, y1, x2, y2
42
-
43
-
44
- def extract_eye_crops(
45
- frame: np.ndarray,
46
- landmarks: np.ndarray,
47
- expand: float = 0.4,
48
- crop_size: int = CROP_SIZE,
49
- ) -> tuple[np.ndarray, np.ndarray, tuple, tuple]:
50
- h, w = frame.shape[:2]
51
-
52
- left_bbox = _bbox_from_landmarks(landmarks, LEFT_EYE_CONTOUR, w, h, expand)
53
- right_bbox = _bbox_from_landmarks(landmarks, RIGHT_EYE_CONTOUR, w, h, expand)
54
-
55
- left_crop = frame[left_bbox[1] : left_bbox[3], left_bbox[0] : left_bbox[2]]
56
- right_crop = frame[right_bbox[1] : right_bbox[3], right_bbox[0] : right_bbox[2]]
57
-
58
- left_crop = cv2.resize(left_crop, (crop_size, crop_size), interpolation=cv2.INTER_AREA)
59
- right_crop = cv2.resize(right_crop, (crop_size, crop_size), interpolation=cv2.INTER_AREA)
60
-
61
- return left_crop, right_crop, left_bbox, right_bbox
62
-
63
-
64
- def crop_to_tensor(crop_bgr: np.ndarray):
65
- import torch
66
-
67
- rgb = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
68
- for c in range(3):
69
- rgb[:, :, c] = (rgb[:, :, c] - IMAGENET_MEAN[c]) / IMAGENET_STD[c]
70
- return torch.from_numpy(rgb.transpose(2, 0, 1))
 
1
+ import cv2
2
+ import numpy as np
3
+
4
+ from models.pretrained.face_mesh.face_mesh import FaceMeshDetector
5
+
6
+ LEFT_EYE_CONTOUR = FaceMeshDetector.LEFT_EYE_INDICES
7
+ RIGHT_EYE_CONTOUR = FaceMeshDetector.RIGHT_EYE_INDICES
8
+
9
+ IMAGENET_MEAN = (0.485, 0.456, 0.406)
10
+ IMAGENET_STD = (0.229, 0.224, 0.225)
11
+
12
+ CROP_SIZE = 96
13
+
14
+
15
+ def _bbox_from_landmarks(
16
+ landmarks: np.ndarray,
17
+ indices: list[int],
18
+ frame_w: int,
19
+ frame_h: int,
20
+ expand: float = 0.4,
21
+ ) -> tuple[int, int, int, int]:
22
+ pts = landmarks[indices, :2]
23
+ px = pts[:, 0] * frame_w
24
+ py = pts[:, 1] * frame_h
25
+
26
+ x_min, x_max = px.min(), px.max()
27
+ y_min, y_max = py.min(), py.max()
28
+ w = x_max - x_min
29
+ h = y_max - y_min
30
+ cx = (x_min + x_max) / 2
31
+ cy = (y_min + y_max) / 2
32
+
33
+ size = max(w, h) * (1 + expand)
34
+ half = size / 2
35
+
36
+ x1 = int(max(cx - half, 0))
37
+ y1 = int(max(cy - half, 0))
38
+ x2 = int(min(cx + half, frame_w))
39
+ y2 = int(min(cy + half, frame_h))
40
+
41
+ return x1, y1, x2, y2
42
+
43
+
44
+ def extract_eye_crops(
45
+ frame: np.ndarray,
46
+ landmarks: np.ndarray,
47
+ expand: float = 0.4,
48
+ crop_size: int = CROP_SIZE,
49
+ ) -> tuple[np.ndarray, np.ndarray, tuple, tuple]:
50
+ h, w = frame.shape[:2]
51
+
52
+ left_bbox = _bbox_from_landmarks(landmarks, LEFT_EYE_CONTOUR, w, h, expand)
53
+ right_bbox = _bbox_from_landmarks(landmarks, RIGHT_EYE_CONTOUR, w, h, expand)
54
+
55
+ left_crop = frame[left_bbox[1] : left_bbox[3], left_bbox[0] : left_bbox[2]]
56
+ right_crop = frame[right_bbox[1] : right_bbox[3], right_bbox[0] : right_bbox[2]]
57
+
58
+ left_crop = cv2.resize(left_crop, (crop_size, crop_size), interpolation=cv2.INTER_AREA)
59
+ right_crop = cv2.resize(right_crop, (crop_size, crop_size), interpolation=cv2.INTER_AREA)
60
+
61
+ return left_crop, right_crop, left_bbox, right_bbox
62
+
63
+
64
+ def crop_to_tensor(crop_bgr: np.ndarray):
65
+ import torch
66
+
67
+ rgb = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
68
+ for c in range(3):
69
+ rgb[:, :, c] = (rgb[:, :, c] - IMAGENET_MEAN[c]) / IMAGENET_STD[c]
70
+ return torch.from_numpy(rgb.transpose(2, 0, 1))
ui/README.md CHANGED
@@ -1,20 +1,20 @@
1
- # ui
2
-
3
- - **pipeline.py** — `FaceMeshPipeline` (head + eye geo ± YOLO → focus) and `MLPPipeline` (loads latest MLP from `MLP/models/`, 10 features → focus)
4
- - **live_demo.py** — webcam window, mesh overlay, FOCUSED / NOT FOCUSED
5
-
6
- From repo root:
7
-
8
- ```bash
9
- python ui/live_demo.py
10
- ```
11
-
12
- MLP only (no head/eye fusion, just your trained MLP):
13
-
14
- ```bash
15
- python ui/live_demo.py --mlp
16
- ```
17
-
18
- With YOLO eye model: `python ui/live_demo.py --eye-model path/to/yolo.pt`
19
-
20
- `q` quit, `m` cycle mesh (full / contours / off).
 
1
+ # ui
2
+
3
+ - **pipeline.py** — `FaceMeshPipeline` (head + eye geo ± YOLO → focus) and `MLPPipeline` (loads latest MLP from `MLP/models/`, 10 features → focus)
4
+ - **live_demo.py** — webcam window, mesh overlay, FOCUSED / NOT FOCUSED
5
+
6
+ From repo root:
7
+
8
+ ```bash
9
+ python ui/live_demo.py
10
+ ```
11
+
12
+ MLP only (no head/eye fusion, just your trained MLP):
13
+
14
+ ```bash
15
+ python ui/live_demo.py --mlp
16
+ ```
17
+
18
+ With YOLO eye model: `python ui/live_demo.py --eye-model path/to/yolo.pt`
19
+
20
+ `q` quit, `m` cycle mesh (full / contours / off).
ui/live_demo.py CHANGED
@@ -1,224 +1,224 @@
1
- import argparse
2
- import os
3
- import sys
4
- import time
5
-
6
- import cv2
7
- import numpy as np
8
- from mediapipe.tasks.python.vision import FaceLandmarksConnections
9
-
10
- _PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
11
- if _PROJECT_ROOT not in sys.path:
12
- sys.path.insert(0, _PROJECT_ROOT)
13
-
14
- from ui.pipeline import FaceMeshPipeline, MLPPipeline
15
- from models.pretrained.face_mesh.face_mesh import FaceMeshDetector
16
-
17
- FONT = cv2.FONT_HERSHEY_SIMPLEX
18
- CYAN = (255, 255, 0)
19
- GREEN = (0, 255, 0)
20
- MAGENTA = (255, 0, 255)
21
- ORANGE = (0, 165, 255)
22
- RED = (0, 0, 255)
23
- WHITE = (255, 255, 255)
24
- YELLOW = (0, 255, 255)
25
- LIGHT_GREEN = (144, 238, 144)
26
-
27
- _TESSELATION = [(c.start, c.end) for c in FaceLandmarksConnections.FACE_LANDMARKS_TESSELATION]
28
- _CONTOURS = [(c.start, c.end) for c in FaceLandmarksConnections.FACE_LANDMARKS_CONTOURS]
29
- _LEFT_EYEBROW = [70, 63, 105, 66, 107, 55, 65, 52, 53, 46]
30
- _RIGHT_EYEBROW = [300, 293, 334, 296, 336, 285, 295, 282, 283, 276]
31
- _NOSE_BRIDGE = [6, 197, 195, 5, 4, 1, 19, 94, 2]
32
- _LIPS_OUTER = [61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291, 409, 270, 269, 267, 0, 37, 39, 40, 185, 61]
33
- _LIPS_INNER = [78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308, 415, 310, 311, 312, 13, 82, 81, 80, 191, 78]
34
- _LEFT_EAR_POINTS = [33, 160, 158, 133, 153, 145]
35
- _RIGHT_EAR_POINTS = [362, 385, 387, 263, 373, 380]
36
-
37
- MESH_FULL = 0
38
- MESH_CONTOURS = 1
39
- MESH_OFF = 2
40
- _MESH_NAMES = ["FULL MESH", "CONTOURS", "MESH OFF"]
41
-
42
-
43
- def _lm_to_px(landmarks, idx, w, h):
44
- return (int(landmarks[idx, 0] * w), int(landmarks[idx, 1] * h))
45
-
46
-
47
- def draw_tessellation(frame, landmarks, w, h):
48
- overlay = frame.copy()
49
- for conn in _TESSELATION:
50
- pt1 = _lm_to_px(landmarks, conn[0], w, h)
51
- pt2 = _lm_to_px(landmarks, conn[1], w, h)
52
- cv2.line(overlay, pt1, pt2, (200, 200, 200), 1, cv2.LINE_AA)
53
- cv2.addWeighted(overlay, 0.3, frame, 0.7, 0, frame)
54
-
55
-
56
- def draw_contours(frame, landmarks, w, h):
57
- for conn in _CONTOURS:
58
- pt1 = _lm_to_px(landmarks, conn[0], w, h)
59
- pt2 = _lm_to_px(landmarks, conn[1], w, h)
60
- cv2.line(frame, pt1, pt2, CYAN, 1, cv2.LINE_AA)
61
- for indices in [_LEFT_EYEBROW, _RIGHT_EYEBROW]:
62
- for i in range(len(indices) - 1):
63
- pt1 = _lm_to_px(landmarks, indices[i], w, h)
64
- pt2 = _lm_to_px(landmarks, indices[i + 1], w, h)
65
- cv2.line(frame, pt1, pt2, LIGHT_GREEN, 2, cv2.LINE_AA)
66
- for i in range(len(_NOSE_BRIDGE) - 1):
67
- pt1 = _lm_to_px(landmarks, _NOSE_BRIDGE[i], w, h)
68
- pt2 = _lm_to_px(landmarks, _NOSE_BRIDGE[i + 1], w, h)
69
- cv2.line(frame, pt1, pt2, ORANGE, 1, cv2.LINE_AA)
70
- for i in range(len(_LIPS_OUTER) - 1):
71
- pt1 = _lm_to_px(landmarks, _LIPS_OUTER[i], w, h)
72
- pt2 = _lm_to_px(landmarks, _LIPS_OUTER[i + 1], w, h)
73
- cv2.line(frame, pt1, pt2, MAGENTA, 1, cv2.LINE_AA)
74
- for i in range(len(_LIPS_INNER) - 1):
75
- pt1 = _lm_to_px(landmarks, _LIPS_INNER[i], w, h)
76
- pt2 = _lm_to_px(landmarks, _LIPS_INNER[i + 1], w, h)
77
- cv2.line(frame, pt1, pt2, (200, 0, 200), 1, cv2.LINE_AA)
78
-
79
-
80
- def draw_eyes_and_irises(frame, landmarks, w, h):
81
- left_pts = np.array(
82
- [_lm_to_px(landmarks, i, w, h) for i in FaceMeshDetector.LEFT_EYE_INDICES],
83
- dtype=np.int32,
84
- )
85
- cv2.polylines(frame, [left_pts], True, GREEN, 2, cv2.LINE_AA)
86
- right_pts = np.array(
87
- [_lm_to_px(landmarks, i, w, h) for i in FaceMeshDetector.RIGHT_EYE_INDICES],
88
- dtype=np.int32,
89
- )
90
- cv2.polylines(frame, [right_pts], True, GREEN, 2, cv2.LINE_AA)
91
- for indices in [_LEFT_EAR_POINTS, _RIGHT_EAR_POINTS]:
92
- for idx in indices:
93
- pt = _lm_to_px(landmarks, idx, w, h)
94
- cv2.circle(frame, pt, 3, YELLOW, -1, cv2.LINE_AA)
95
- for iris_indices, eye_inner, eye_outer in [
96
- (FaceMeshDetector.LEFT_IRIS_INDICES, 133, 33),
97
- (FaceMeshDetector.RIGHT_IRIS_INDICES, 362, 263),
98
- ]:
99
- iris_pts = np.array(
100
- [_lm_to_px(landmarks, i, w, h) for i in iris_indices],
101
- dtype=np.int32,
102
- )
103
- center = iris_pts[0]
104
- if len(iris_pts) >= 5:
105
- radii = [np.linalg.norm(iris_pts[j] - center) for j in range(1, 5)]
106
- radius = max(int(np.mean(radii)), 2)
107
- cv2.circle(frame, tuple(center), radius, MAGENTA, 2, cv2.LINE_AA)
108
- cv2.circle(frame, tuple(center), 2, WHITE, -1, cv2.LINE_AA)
109
- eye_center_x = (landmarks[eye_inner, 0] + landmarks[eye_outer, 0]) / 2.0
110
- eye_center_y = (landmarks[eye_inner, 1] + landmarks[eye_outer, 1]) / 2.0
111
- eye_center = (int(eye_center_x * w), int(eye_center_y * h))
112
- dx = center[0] - eye_center[0]
113
- dy = center[1] - eye_center[1]
114
- gaze_end = (int(center[0] + dx * 3), int(center[1] + dy * 3))
115
- cv2.line(frame, tuple(center), gaze_end, RED, 1, cv2.LINE_AA)
116
-
117
-
118
- def main():
119
- parser = argparse.ArgumentParser()
120
- parser.add_argument("--camera", type=int, default=0)
121
- parser.add_argument("--mlp", action="store_true", help="Use MLP model only (load latest from MLP/models/)")
122
- parser.add_argument("--mlp-dir", type=str, default=None, help="MLP models dir (default: shared/MLP/models)")
123
- parser.add_argument("--max-angle", type=float, default=22.0)
124
- parser.add_argument("--alpha", type=float, default=0.4)
125
- parser.add_argument("--beta", type=float, default=0.6)
126
- parser.add_argument("--threshold", type=float, default=0.55)
127
- parser.add_argument("--eye-model", type=str, default=None)
128
- parser.add_argument("--eye-backend", type=str, default="yolo", choices=["yolo", "geometric"])
129
- parser.add_argument("--eye-blend", type=float, default=0.5)
130
- args = parser.parse_args()
131
-
132
- use_mlp_only = args.mlp
133
-
134
- if use_mlp_only:
135
- print("[DEMO] MLP only — loading latest from MLP/models/")
136
- pipeline = MLPPipeline(model_dir=args.mlp_dir)
137
- else:
138
- eye_mode = " + model" if args.eye_model else " only"
139
- print("[DEMO] Face mesh + head pose + eye (geometry" + eye_mode + ")")
140
- pipeline = FaceMeshPipeline(
141
- max_angle=args.max_angle,
142
- alpha=args.alpha,
143
- beta=args.beta,
144
- threshold=args.threshold,
145
- eye_model_path=args.eye_model,
146
- eye_backend=args.eye_backend,
147
- eye_blend=args.eye_blend,
148
- )
149
-
150
- cap = cv2.VideoCapture(args.camera)
151
- if not cap.isOpened():
152
- print("[DEMO] ERROR: Cannot open camera")
153
- return
154
-
155
- print("[DEMO] q = quit, m = cycle mesh (full/contours/off)" if not use_mlp_only else "[DEMO] q = quit, m = mesh")
156
- prev_time = time.time()
157
- fps = 0.0
158
- mesh_mode = MESH_FULL
159
-
160
- try:
161
- while True:
162
- ret, frame = cap.read()
163
- if not ret:
164
- break
165
-
166
- result = pipeline.process_frame(frame)
167
- now = time.time()
168
- fps = 0.9 * fps + 0.1 * (1.0 / max(now - prev_time, 1e-6))
169
- prev_time = now
170
-
171
- h, w = frame.shape[:2]
172
- if result["landmarks"] is not None:
173
- lm = result["landmarks"]
174
- if mesh_mode == MESH_FULL:
175
- draw_tessellation(frame, lm, w, h)
176
- draw_contours(frame, lm, w, h)
177
- elif mesh_mode == MESH_CONTOURS:
178
- draw_contours(frame, lm, w, h)
179
- draw_eyes_and_irises(frame, lm, w, h)
180
- if not use_mlp_only:
181
- pipeline.head_pose.draw_axes(frame, lm)
182
- if result.get("left_bbox") and result.get("right_bbox"):
183
- lx1, ly1, lx2, ly2 = result["left_bbox"]
184
- rx1, ry1, rx2, ry2 = result["right_bbox"]
185
- cv2.rectangle(frame, (lx1, ly1), (lx2, ly2), YELLOW, 1)
186
- cv2.rectangle(frame, (rx1, ry1), (rx2, ry2), YELLOW, 1)
187
-
188
- status = "FOCUSED" if result["is_focused"] else "NOT FOCUSED"
189
- status_color = GREEN if result["is_focused"] else RED
190
- cv2.rectangle(frame, (0, 0), (w, 55), (0, 0, 0), -1)
191
- cv2.putText(frame, status, (10, 28), FONT, 0.8, status_color, 2, cv2.LINE_AA)
192
- if use_mlp_only:
193
- cv2.putText(frame, "MLP", (10, 48), FONT, 0.45, WHITE, 1, cv2.LINE_AA)
194
- cv2.putText(frame, f"FPS: {fps:.0f}", (w - 80, 28), FONT, 0.45, WHITE, 1, cv2.LINE_AA)
195
- cv2.putText(frame, "q:quit m:mesh", (w - 120, 48), FONT, 0.4, (180, 180, 180), 1, cv2.LINE_AA)
196
- else:
197
- mar_str = f" MAR:{result['mar']:.2f}" if result.get("mar") is not None else ""
198
- cv2.putText(frame, f"S_face:{result['s_face']:.2f} S_eye:{result['s_eye']:.2f}{mar_str} score:{result['raw_score']:.2f}", (10, 48), FONT, 0.45, WHITE, 1, cv2.LINE_AA)
199
- if result.get("is_yawning"):
200
- cv2.putText(frame, "YAWN", (10, 75), FONT, 0.7, ORANGE, 2, cv2.LINE_AA)
201
- if result["yaw"] is not None:
202
- cv2.putText(frame, f"yaw:{result['yaw']:+.0f} pitch:{result['pitch']:+.0f} roll:{result['roll']:+.0f}", (w - 280, 48), FONT, 0.4, (180, 180, 180), 1, cv2.LINE_AA)
203
- eye_label = f"eye:{pipeline.eye_classifier.name}" if pipeline.has_eye_model else "eye:geo"
204
- cv2.putText(frame, f"{_MESH_NAMES[mesh_mode]} {eye_label} FPS: {fps:.0f}", (w - 320, 28), FONT, 0.45, WHITE, 1, cv2.LINE_AA)
205
- cv2.putText(frame, "q:quit m:mesh", (w - 140, 48), FONT, 0.4, (180, 180, 180), 1, cv2.LINE_AA)
206
-
207
- cv2.imshow("FocusGuard", frame)
208
-
209
- key = cv2.waitKey(1) & 0xFF
210
- if key == ord("q"):
211
- break
212
- elif key == ord("m"):
213
- mesh_mode = (mesh_mode + 1) % 3
214
- print(f"[DEMO] Mesh: {_MESH_NAMES[mesh_mode]}")
215
-
216
- finally:
217
- cap.release()
218
- cv2.destroyAllWindows()
219
- pipeline.close()
220
- print("[DEMO] Done")
221
-
222
-
223
- if __name__ == "__main__":
224
- main()
 
1
+ import argparse
2
+ import os
3
+ import sys
4
+ import time
5
+
6
+ import cv2
7
+ import numpy as np
8
+ from mediapipe.tasks.python.vision import FaceLandmarksConnections
9
+
10
+ _PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
11
+ if _PROJECT_ROOT not in sys.path:
12
+ sys.path.insert(0, _PROJECT_ROOT)
13
+
14
+ from ui.pipeline import FaceMeshPipeline, MLPPipeline
15
+ from models.pretrained.face_mesh.face_mesh import FaceMeshDetector
16
+
17
+ FONT = cv2.FONT_HERSHEY_SIMPLEX
18
+ CYAN = (255, 255, 0)
19
+ GREEN = (0, 255, 0)
20
+ MAGENTA = (255, 0, 255)
21
+ ORANGE = (0, 165, 255)
22
+ RED = (0, 0, 255)
23
+ WHITE = (255, 255, 255)
24
+ YELLOW = (0, 255, 255)
25
+ LIGHT_GREEN = (144, 238, 144)
26
+
27
+ _TESSELATION = [(c.start, c.end) for c in FaceLandmarksConnections.FACE_LANDMARKS_TESSELATION]
28
+ _CONTOURS = [(c.start, c.end) for c in FaceLandmarksConnections.FACE_LANDMARKS_CONTOURS]
29
+ _LEFT_EYEBROW = [70, 63, 105, 66, 107, 55, 65, 52, 53, 46]
30
+ _RIGHT_EYEBROW = [300, 293, 334, 296, 336, 285, 295, 282, 283, 276]
31
+ _NOSE_BRIDGE = [6, 197, 195, 5, 4, 1, 19, 94, 2]
32
+ _LIPS_OUTER = [61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291, 409, 270, 269, 267, 0, 37, 39, 40, 185, 61]
33
+ _LIPS_INNER = [78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308, 415, 310, 311, 312, 13, 82, 81, 80, 191, 78]
34
+ _LEFT_EAR_POINTS = [33, 160, 158, 133, 153, 145]
35
+ _RIGHT_EAR_POINTS = [362, 385, 387, 263, 373, 380]
36
+
37
+ MESH_FULL = 0
38
+ MESH_CONTOURS = 1
39
+ MESH_OFF = 2
40
+ _MESH_NAMES = ["FULL MESH", "CONTOURS", "MESH OFF"]
41
+
42
+
43
+ def _lm_to_px(landmarks, idx, w, h):
44
+ return (int(landmarks[idx, 0] * w), int(landmarks[idx, 1] * h))
45
+
46
+
47
+ def draw_tessellation(frame, landmarks, w, h):
48
+ overlay = frame.copy()
49
+ for conn in _TESSELATION:
50
+ pt1 = _lm_to_px(landmarks, conn[0], w, h)
51
+ pt2 = _lm_to_px(landmarks, conn[1], w, h)
52
+ cv2.line(overlay, pt1, pt2, (200, 200, 200), 1, cv2.LINE_AA)
53
+ cv2.addWeighted(overlay, 0.3, frame, 0.7, 0, frame)
54
+
55
+
56
+ def draw_contours(frame, landmarks, w, h):
57
+ for conn in _CONTOURS:
58
+ pt1 = _lm_to_px(landmarks, conn[0], w, h)
59
+ pt2 = _lm_to_px(landmarks, conn[1], w, h)
60
+ cv2.line(frame, pt1, pt2, CYAN, 1, cv2.LINE_AA)
61
+ for indices in [_LEFT_EYEBROW, _RIGHT_EYEBROW]:
62
+ for i in range(len(indices) - 1):
63
+ pt1 = _lm_to_px(landmarks, indices[i], w, h)
64
+ pt2 = _lm_to_px(landmarks, indices[i + 1], w, h)
65
+ cv2.line(frame, pt1, pt2, LIGHT_GREEN, 2, cv2.LINE_AA)
66
+ for i in range(len(_NOSE_BRIDGE) - 1):
67
+ pt1 = _lm_to_px(landmarks, _NOSE_BRIDGE[i], w, h)
68
+ pt2 = _lm_to_px(landmarks, _NOSE_BRIDGE[i + 1], w, h)
69
+ cv2.line(frame, pt1, pt2, ORANGE, 1, cv2.LINE_AA)
70
+ for i in range(len(_LIPS_OUTER) - 1):
71
+ pt1 = _lm_to_px(landmarks, _LIPS_OUTER[i], w, h)
72
+ pt2 = _lm_to_px(landmarks, _LIPS_OUTER[i + 1], w, h)
73
+ cv2.line(frame, pt1, pt2, MAGENTA, 1, cv2.LINE_AA)
74
+ for i in range(len(_LIPS_INNER) - 1):
75
+ pt1 = _lm_to_px(landmarks, _LIPS_INNER[i], w, h)
76
+ pt2 = _lm_to_px(landmarks, _LIPS_INNER[i + 1], w, h)
77
+ cv2.line(frame, pt1, pt2, (200, 0, 200), 1, cv2.LINE_AA)
78
+
79
+
80
+ def draw_eyes_and_irises(frame, landmarks, w, h):
81
+ left_pts = np.array(
82
+ [_lm_to_px(landmarks, i, w, h) for i in FaceMeshDetector.LEFT_EYE_INDICES],
83
+ dtype=np.int32,
84
+ )
85
+ cv2.polylines(frame, [left_pts], True, GREEN, 2, cv2.LINE_AA)
86
+ right_pts = np.array(
87
+ [_lm_to_px(landmarks, i, w, h) for i in FaceMeshDetector.RIGHT_EYE_INDICES],
88
+ dtype=np.int32,
89
+ )
90
+ cv2.polylines(frame, [right_pts], True, GREEN, 2, cv2.LINE_AA)
91
+ for indices in [_LEFT_EAR_POINTS, _RIGHT_EAR_POINTS]:
92
+ for idx in indices:
93
+ pt = _lm_to_px(landmarks, idx, w, h)
94
+ cv2.circle(frame, pt, 3, YELLOW, -1, cv2.LINE_AA)
95
+ for iris_indices, eye_inner, eye_outer in [
96
+ (FaceMeshDetector.LEFT_IRIS_INDICES, 133, 33),
97
+ (FaceMeshDetector.RIGHT_IRIS_INDICES, 362, 263),
98
+ ]:
99
+ iris_pts = np.array(
100
+ [_lm_to_px(landmarks, i, w, h) for i in iris_indices],
101
+ dtype=np.int32,
102
+ )
103
+ center = iris_pts[0]
104
+ if len(iris_pts) >= 5:
105
+ radii = [np.linalg.norm(iris_pts[j] - center) for j in range(1, 5)]
106
+ radius = max(int(np.mean(radii)), 2)
107
+ cv2.circle(frame, tuple(center), radius, MAGENTA, 2, cv2.LINE_AA)
108
+ cv2.circle(frame, tuple(center), 2, WHITE, -1, cv2.LINE_AA)
109
+ eye_center_x = (landmarks[eye_inner, 0] + landmarks[eye_outer, 0]) / 2.0
110
+ eye_center_y = (landmarks[eye_inner, 1] + landmarks[eye_outer, 1]) / 2.0
111
+ eye_center = (int(eye_center_x * w), int(eye_center_y * h))
112
+ dx = center[0] - eye_center[0]
113
+ dy = center[1] - eye_center[1]
114
+ gaze_end = (int(center[0] + dx * 3), int(center[1] + dy * 3))
115
+ cv2.line(frame, tuple(center), gaze_end, RED, 1, cv2.LINE_AA)
116
+
117
+
118
+ def main():
119
+ parser = argparse.ArgumentParser()
120
+ parser.add_argument("--camera", type=int, default=0)
121
+ parser.add_argument("--mlp", action="store_true", help="Use MLP model only (load latest from MLP/models/)")
122
+ parser.add_argument("--mlp-dir", type=str, default=None, help="MLP models dir (default: shared/MLP/models)")
123
+ parser.add_argument("--max-angle", type=float, default=22.0)
124
+ parser.add_argument("--alpha", type=float, default=0.4)
125
+ parser.add_argument("--beta", type=float, default=0.6)
126
+ parser.add_argument("--threshold", type=float, default=0.55)
127
+ parser.add_argument("--eye-model", type=str, default=None)
128
+ parser.add_argument("--eye-backend", type=str, default="yolo", choices=["yolo", "geometric", "cnn"])
129
+ parser.add_argument("--eye-blend", type=float, default=0.5)
130
+ args = parser.parse_args()
131
+
132
+ use_mlp_only = args.mlp
133
+
134
+ if use_mlp_only:
135
+ print("[DEMO] MLP only — loading latest from MLP/models/")
136
+ pipeline = MLPPipeline(model_dir=args.mlp_dir)
137
+ else:
138
+ eye_mode = " + model" if args.eye_model else " only"
139
+ print("[DEMO] Face mesh + head pose + eye (geometry" + eye_mode + ")")
140
+ pipeline = FaceMeshPipeline(
141
+ max_angle=args.max_angle,
142
+ alpha=args.alpha,
143
+ beta=args.beta,
144
+ threshold=args.threshold,
145
+ eye_model_path=args.eye_model,
146
+ eye_backend=args.eye_backend,
147
+ eye_blend=args.eye_blend,
148
+ )
149
+
150
+ cap = cv2.VideoCapture(args.camera)
151
+ if not cap.isOpened():
152
+ print("[DEMO] ERROR: Cannot open camera")
153
+ return
154
+
155
+ print("[DEMO] q = quit, m = cycle mesh (full/contours/off)" if not use_mlp_only else "[DEMO] q = quit, m = mesh")
156
+ prev_time = time.time()
157
+ fps = 0.0
158
+ mesh_mode = MESH_FULL
159
+
160
+ try:
161
+ while True:
162
+ ret, frame = cap.read()
163
+ if not ret:
164
+ break
165
+
166
+ result = pipeline.process_frame(frame)
167
+ now = time.time()
168
+ fps = 0.9 * fps + 0.1 * (1.0 / max(now - prev_time, 1e-6))
169
+ prev_time = now
170
+
171
+ h, w = frame.shape[:2]
172
+ if result["landmarks"] is not None:
173
+ lm = result["landmarks"]
174
+ if mesh_mode == MESH_FULL:
175
+ draw_tessellation(frame, lm, w, h)
176
+ draw_contours(frame, lm, w, h)
177
+ elif mesh_mode == MESH_CONTOURS:
178
+ draw_contours(frame, lm, w, h)
179
+ draw_eyes_and_irises(frame, lm, w, h)
180
+ if not use_mlp_only:
181
+ pipeline.head_pose.draw_axes(frame, lm)
182
+ if result.get("left_bbox") and result.get("right_bbox"):
183
+ lx1, ly1, lx2, ly2 = result["left_bbox"]
184
+ rx1, ry1, rx2, ry2 = result["right_bbox"]
185
+ cv2.rectangle(frame, (lx1, ly1), (lx2, ly2), YELLOW, 1)
186
+ cv2.rectangle(frame, (rx1, ry1), (rx2, ry2), YELLOW, 1)
187
+
188
+ status = "FOCUSED" if result["is_focused"] else "NOT FOCUSED"
189
+ status_color = GREEN if result["is_focused"] else RED
190
+ cv2.rectangle(frame, (0, 0), (w, 55), (0, 0, 0), -1)
191
+ cv2.putText(frame, status, (10, 28), FONT, 0.8, status_color, 2, cv2.LINE_AA)
192
+ if use_mlp_only:
193
+ cv2.putText(frame, "MLP", (10, 48), FONT, 0.45, WHITE, 1, cv2.LINE_AA)
194
+ cv2.putText(frame, f"FPS: {fps:.0f}", (w - 80, 28), FONT, 0.45, WHITE, 1, cv2.LINE_AA)
195
+ cv2.putText(frame, "q:quit m:mesh", (w - 120, 48), FONT, 0.4, (180, 180, 180), 1, cv2.LINE_AA)
196
+ else:
197
+ mar_str = f" MAR:{result['mar']:.2f}" if result.get("mar") is not None else ""
198
+ cv2.putText(frame, f"S_face:{result['s_face']:.2f} S_eye:{result['s_eye']:.2f}{mar_str} score:{result['raw_score']:.2f}", (10, 48), FONT, 0.45, WHITE, 1, cv2.LINE_AA)
199
+ if result.get("is_yawning"):
200
+ cv2.putText(frame, "YAWN", (10, 75), FONT, 0.7, ORANGE, 2, cv2.LINE_AA)
201
+ if result["yaw"] is not None:
202
+ cv2.putText(frame, f"yaw:{result['yaw']:+.0f} pitch:{result['pitch']:+.0f} roll:{result['roll']:+.0f}", (w - 280, 48), FONT, 0.4, (180, 180, 180), 1, cv2.LINE_AA)
203
+ eye_label = f"eye:{pipeline.eye_classifier.name}" if pipeline.has_eye_model else "eye:geo"
204
+ cv2.putText(frame, f"{_MESH_NAMES[mesh_mode]} {eye_label} FPS: {fps:.0f}", (w - 320, 28), FONT, 0.45, WHITE, 1, cv2.LINE_AA)
205
+ cv2.putText(frame, "q:quit m:mesh", (w - 140, 48), FONT, 0.4, (180, 180, 180), 1, cv2.LINE_AA)
206
+
207
+ cv2.imshow("FocusGuard", frame)
208
+
209
+ key = cv2.waitKey(1) & 0xFF
210
+ if key == ord("q"):
211
+ break
212
+ elif key == ord("m"):
213
+ mesh_mode = (mesh_mode + 1) % 3
214
+ print(f"[DEMO] Mesh: {_MESH_NAMES[mesh_mode]}")
215
+
216
+ finally:
217
+ cap.release()
218
+ cv2.destroyAllWindows()
219
+ pipeline.close()
220
+ print("[DEMO] Done")
221
+
222
+
223
+ if __name__ == "__main__":
224
+ main()
ui/pipeline.py CHANGED
@@ -36,6 +36,9 @@ class FaceMeshPipeline:
36
  self.threshold = threshold
37
  self.eye_blend = eye_blend
38
 
 
 
 
39
  self.eye_classifier = load_eye_classifier(
40
  path=eye_model_path if eye_model_path and os.path.exists(eye_model_path) else None,
41
  backend=eye_backend,
 
36
  self.threshold = threshold
37
  self.eye_blend = eye_blend
38
 
39
+ if eye_model_path and not os.path.exists(eye_model_path):
40
+ print(f"[PIPELINE] WARNING: eye model file not found: {eye_model_path}")
41
+
42
  self.eye_classifier = load_eye_classifier(
43
  path=eye_model_path if eye_model_path and os.path.exists(eye_model_path) else None,
44
  backend=eye_backend,