Spaces:
Sleeping
Sleeping
k22056537 commited on
Commit ·
ea3fe1b
1
Parent(s): df9f1dd
reorg folders, add MLP demo
Browse files- .gitignore +0 -16
- MLP/models/meta_20260224_024200.npz +3 -0
- MLP/models/mlp_20260224_024200.joblib +3 -0
- MLP/models/scaler_20260224_024200.joblib +3 -0
- README.md +7 -7
- data_preparation/CNN/eye_crops/val/open/.gitkeep +1 -0
- data_preparation/MLP/explore_collected_data.ipynb +0 -0
- data_preparation/MLP/train_mlp.ipynb +0 -0
- data_preparation/README.md +40 -2
- data_preparation/collected_Mohamed/session_20260224_010131.npz +3 -0
- evaluation/README.md +1 -1
- models/README.md +7 -5
- models/attention/__init__.py +1 -0
- models/attention/classifier.py +0 -0
- models/attention/collect_features.py +349 -0
- models/attention/fusion.py +0 -0
- models/attention/train.py +0 -0
- models/cnn/__init__.py +0 -0
- models/cnn/eye_attention/__init__.py +1 -0
- models/cnn/eye_attention/classifier.py +69 -0
- models/cnn/eye_attention/crop.py +70 -0
- models/cnn/eye_attention/train.py +0 -0
- models/geometric/__init__.py +0 -0
- models/geometric/eye_behaviour/__init__.py +0 -0
- models/geometric/eye_behaviour/eye_scorer.py +164 -0
- models/geometric/face_orientation/__init__.py +1 -0
- models/geometric/face_orientation/head_pose.py +112 -0
- models/mlp/__init__.py +0 -0
- models/mlp/train.py +184 -0
- models/pretrained/__init__.py +0 -0
- models/pretrained/face_mesh/.gitkeep +0 -0
- models/pretrained/face_mesh/__init__.py +0 -0
- models/pretrained/face_mesh/face_mesh.py +91 -0
- requirements.txt +2 -2
- ui/README.md +10 -12
- ui/live_demo.py +47 -37
- ui/pipeline.py +73 -15
.gitignore
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
# Python
|
| 2 |
__pycache__/
|
| 3 |
*.py[cod]
|
| 4 |
*$py.class
|
|
@@ -12,25 +11,10 @@ env/
|
|
| 12 |
.eggs/
|
| 13 |
dist/
|
| 14 |
build/
|
| 15 |
-
|
| 16 |
-
# IDE
|
| 17 |
.idea/
|
| 18 |
.vscode/
|
| 19 |
*.swp
|
| 20 |
*.swo
|
| 21 |
-
|
| 22 |
-
# Data and outputs (optional: uncomment if you don’t want to track large files)
|
| 23 |
-
# data_preparation/raw/
|
| 24 |
-
# data_preparation/processed/*.npy
|
| 25 |
-
# evaluation/logs/
|
| 26 |
-
# evaluation/results/
|
| 27 |
-
|
| 28 |
-
# Model checkpoints (uncomment to ignore .pt files)
|
| 29 |
-
# *.pt
|
| 30 |
-
|
| 31 |
-
# Project
|
| 32 |
docs/
|
| 33 |
-
|
| 34 |
-
# OS
|
| 35 |
.DS_Store
|
| 36 |
Thumbs.db
|
|
|
|
|
|
|
| 1 |
__pycache__/
|
| 2 |
*.py[cod]
|
| 3 |
*$py.class
|
|
|
|
| 11 |
.eggs/
|
| 12 |
dist/
|
| 13 |
build/
|
|
|
|
|
|
|
| 14 |
.idea/
|
| 15 |
.vscode/
|
| 16 |
*.swp
|
| 17 |
*.swo
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
docs/
|
|
|
|
|
|
|
| 19 |
.DS_Store
|
| 20 |
Thumbs.db
|
MLP/models/meta_20260224_024200.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:769bb62c7bf04aafd808e9b2623e795c2d92bcb933313ebf553d6fce5ebe7143
|
| 3 |
+
size 1616
|
MLP/models/mlp_20260224_024200.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a72933fcf2d0aed998c6303ea4298c04618d937c7f17bf492e76efcf3b4b54d7
|
| 3 |
+
size 50484
|
MLP/models/scaler_20260224_024200.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f9ef3721cee28f1472886556e001d0f6ed0abe09011d979a70ca9bf447d453e
|
| 3 |
+
size 823
|
README.md
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
-
#
|
| 2 |
|
| 3 |
-
|
| 4 |
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
-
|
| 8 |
-
- **models/** — Face orientation, eye behaviour, fusion, landmarks. Training entry: `models/train.py`
|
| 9 |
-
- **evaluation/** — Metrics, runs, results
|
| 10 |
-
- **ui/** — Live demo + session view
|
|
|
|
| 1 |
+
# FocusGuard
|
| 2 |
|
| 3 |
+
Webcam-based focus detection: face mesh, head pose, eye (geometry or YOLO), plus an MLP trained on collected features.
|
| 4 |
|
| 5 |
+
- **data_preparation/** — collect data, notebooks, processed/collected files
|
| 6 |
+
- **models/** — face mesh, head pose, eye scorer, YOLO classifier, MLP training, attention feature collection
|
| 7 |
+
- **evaluation/** — metrics and run logs
|
| 8 |
+
- **ui/** — live demo (geometry+YOLO or MLP-only)
|
| 9 |
|
| 10 |
+
Run from here: `pip install -r requirements.txt` then `python ui/live_demo.py` or `python ui/live_demo.py --mlp`.
|
|
|
|
|
|
|
|
|
data_preparation/CNN/eye_crops/val/open/.gitkeep
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
data_preparation/MLP/explore_collected_data.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data_preparation/MLP/train_mlp.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data_preparation/README.md
CHANGED
|
@@ -1,3 +1,41 @@
|
|
| 1 |
-
#
|
| 2 |
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Data Preparation
|
| 2 |
|
| 3 |
+
## Folder Structure
|
| 4 |
+
|
| 5 |
+
### collected/
|
| 6 |
+
Contains raw session files in `.npz` format.
|
| 7 |
+
Generated using:
|
| 8 |
+
|
| 9 |
+
python -m models.attention.collect_features
|
| 10 |
+
|
| 11 |
+
Each session includes:
|
| 12 |
+
- 17-dimensional feature vectors
|
| 13 |
+
- Corresponding labels
|
| 14 |
+
|
| 15 |
+
---
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
### MLP/
|
| 19 |
+
Contains notebooks for:
|
| 20 |
+
- Exploring collected data
|
| 21 |
+
- Training the sklearn MLP model (10 features)
|
| 22 |
+
|
| 23 |
+
Trained models are saved to:
|
| 24 |
+
../MLP/models/
|
| 25 |
+
|
| 26 |
+
---
|
| 27 |
+
|
| 28 |
+
### CNN/
|
| 29 |
+
Eye crop directory structure for CNN training (YOLO).
|
| 30 |
+
|
| 31 |
+
---
|
| 32 |
+
|
| 33 |
+
## Collecting Data
|
| 34 |
+
|
| 35 |
+
**Step-by-step**
|
| 36 |
+
|
| 37 |
+
1. From repo root Install deps: `pip install -r requirements.txt`.
|
| 38 |
+
3. Run: `python -m models.attention.collect_features --name yourname`.
|
| 39 |
+
4. Webcam opens. Look at the camera; press **1** when focused, **0** when unfocused. Switch every 10–30 sec so you get both labels.
|
| 40 |
+
5. Press **p** to pause/resume.
|
| 41 |
+
6. Press **q** when done. One `.npz` is saved to `data_preparation/collected/` (17 features + labels).
|
data_preparation/collected_Mohamed/session_20260224_010131.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0a784f703c13b83911f47ec507d32c25942a07572314b8a77cbf40ca8cdff16f
|
| 3 |
+
size 1006428
|
evaluation/README.md
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
# evaluation
|
| 2 |
|
| 3 |
-
|
|
|
|
| 1 |
# evaluation
|
| 2 |
|
| 3 |
+
Place metrics scripts, run configs, and results here. Logs dir is used by `models.mlp.train` for training logs.
|
models/README.md
CHANGED
|
@@ -1,8 +1,10 @@
|
|
| 1 |
# models
|
| 2 |
|
| 3 |
-
-
|
| 4 |
-
-
|
| 5 |
-
-
|
| 6 |
-
-
|
|
|
|
|
|
|
| 7 |
|
| 8 |
-
`
|
|
|
|
| 1 |
# models
|
| 2 |
|
| 3 |
+
- **cnn/eye_attention/** — YOLO open/closed eye classifier, crop helper, train stub
|
| 4 |
+
- **mlp/** — PyTorch MLP on feature vectors (face_orientation / eye_behaviour); checkpoints under `mlp/face_orientation_model/`, `mlp/eye_behaviour_model/`
|
| 5 |
+
- **geometric/face_orientation/** — head pose (solvePnP). **geometric/eye_behaviour/** — EAR, gaze, MAR
|
| 6 |
+
- **pretrained/face_mesh/** — MediaPipe face landmarks (no training)
|
| 7 |
+
- **attention/** — webcam feature collection (17-d), stubs for train/classifier/fusion
|
| 8 |
+
- **prepare_dataset.py** — loads from `data_preparation/processed/` or synthetic; used by `mlp/train.py`
|
| 9 |
|
| 10 |
+
Run legacy MLP training: `python -m models.mlp.train`. The sklearn MLP used in the live demo is trained in `data_preparation/MLP/train_mlp.ipynb` and saved under `../MLP/models/`.
|
models/attention/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
models/attention/classifier.py
ADDED
|
File without changes
|
models/attention/collect_features.py
ADDED
|
@@ -0,0 +1,349 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Usage: python -m models.attention.collect_features [--name alice] [--duration 600]
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
import collections
|
| 5 |
+
import math
|
| 6 |
+
import os
|
| 7 |
+
import sys
|
| 8 |
+
import time
|
| 9 |
+
|
| 10 |
+
import cv2
|
| 11 |
+
import numpy as np
|
| 12 |
+
|
| 13 |
+
_PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 14 |
+
if _PROJECT_ROOT not in sys.path:
|
| 15 |
+
sys.path.insert(0, _PROJECT_ROOT)
|
| 16 |
+
|
| 17 |
+
from models.pretrained.face_mesh.face_mesh import FaceMeshDetector
|
| 18 |
+
from models.geometric.face_orientation.head_pose import HeadPoseEstimator
|
| 19 |
+
from models.geometric.eye_behaviour.eye_scorer import EyeBehaviourScorer, compute_gaze_ratio, compute_mar
|
| 20 |
+
|
| 21 |
+
FONT = cv2.FONT_HERSHEY_SIMPLEX
|
| 22 |
+
GREEN = (0, 255, 0)
|
| 23 |
+
RED = (0, 0, 255)
|
| 24 |
+
WHITE = (255, 255, 255)
|
| 25 |
+
YELLOW = (0, 255, 255)
|
| 26 |
+
ORANGE = (0, 165, 255)
|
| 27 |
+
GRAY = (120, 120, 120)
|
| 28 |
+
|
| 29 |
+
FEATURE_NAMES = [
|
| 30 |
+
"ear_left", "ear_right", "ear_avg", "h_gaze", "v_gaze", "mar",
|
| 31 |
+
"yaw", "pitch", "roll", "s_face", "s_eye", "gaze_offset", "head_deviation",
|
| 32 |
+
"perclos", "blink_rate", "closure_duration", "yawn_duration",
|
| 33 |
+
]
|
| 34 |
+
|
| 35 |
+
NUM_FEATURES = len(FEATURE_NAMES)
|
| 36 |
+
assert NUM_FEATURES == 17
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
class TemporalTracker:
|
| 40 |
+
EAR_BLINK_THRESH = 0.21
|
| 41 |
+
MAR_YAWN_THRESH = 0.04
|
| 42 |
+
PERCLOS_WINDOW = 60
|
| 43 |
+
BLINK_WINDOW_SEC = 30.0
|
| 44 |
+
|
| 45 |
+
def __init__(self):
|
| 46 |
+
self.ear_history = collections.deque(maxlen=self.PERCLOS_WINDOW)
|
| 47 |
+
self.blink_timestamps = collections.deque()
|
| 48 |
+
self._eyes_closed = False
|
| 49 |
+
self._closure_start = None
|
| 50 |
+
self._yawn_start = None
|
| 51 |
+
|
| 52 |
+
def update(self, ear_avg, mar, now=None):
|
| 53 |
+
if now is None:
|
| 54 |
+
now = time.time()
|
| 55 |
+
|
| 56 |
+
closed = ear_avg < self.EAR_BLINK_THRESH
|
| 57 |
+
self.ear_history.append(1.0 if closed else 0.0)
|
| 58 |
+
perclos = sum(self.ear_history) / len(self.ear_history) if self.ear_history else 0.0
|
| 59 |
+
|
| 60 |
+
if self._eyes_closed and not closed:
|
| 61 |
+
self.blink_timestamps.append(now)
|
| 62 |
+
self._eyes_closed = closed
|
| 63 |
+
|
| 64 |
+
cutoff = now - self.BLINK_WINDOW_SEC
|
| 65 |
+
while self.blink_timestamps and self.blink_timestamps[0] < cutoff:
|
| 66 |
+
self.blink_timestamps.popleft()
|
| 67 |
+
blink_rate = len(self.blink_timestamps) * (60.0 / self.BLINK_WINDOW_SEC)
|
| 68 |
+
|
| 69 |
+
if closed:
|
| 70 |
+
if self._closure_start is None:
|
| 71 |
+
self._closure_start = now
|
| 72 |
+
closure_dur = now - self._closure_start
|
| 73 |
+
else:
|
| 74 |
+
self._closure_start = None
|
| 75 |
+
closure_dur = 0.0
|
| 76 |
+
|
| 77 |
+
yawning = mar > self.MAR_YAWN_THRESH
|
| 78 |
+
if yawning:
|
| 79 |
+
if self._yawn_start is None:
|
| 80 |
+
self._yawn_start = now
|
| 81 |
+
yawn_dur = now - self._yawn_start
|
| 82 |
+
else:
|
| 83 |
+
self._yawn_start = None
|
| 84 |
+
yawn_dur = 0.0
|
| 85 |
+
|
| 86 |
+
return perclos, blink_rate, closure_dur, yawn_dur
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def extract_features(landmarks, w, h, head_pose, eye_scorer, temporal):
|
| 90 |
+
from models.geometric.eye_behaviour.eye_scorer import _LEFT_EYE_EAR, _RIGHT_EYE_EAR, compute_ear
|
| 91 |
+
|
| 92 |
+
ear_left = compute_ear(landmarks, _LEFT_EYE_EAR)
|
| 93 |
+
ear_right = compute_ear(landmarks, _RIGHT_EYE_EAR)
|
| 94 |
+
ear_avg = (ear_left + ear_right) / 2.0
|
| 95 |
+
h_gaze, v_gaze = compute_gaze_ratio(landmarks)
|
| 96 |
+
mar = compute_mar(landmarks)
|
| 97 |
+
|
| 98 |
+
angles = head_pose.estimate(landmarks, w, h)
|
| 99 |
+
yaw = angles[0] if angles else 0.0
|
| 100 |
+
pitch = angles[1] if angles else 0.0
|
| 101 |
+
roll = angles[2] if angles else 0.0
|
| 102 |
+
|
| 103 |
+
s_face = head_pose.score(landmarks, w, h)
|
| 104 |
+
s_eye = eye_scorer.score(landmarks)
|
| 105 |
+
|
| 106 |
+
gaze_offset = math.sqrt((h_gaze - 0.5) ** 2 + (v_gaze - 0.5) ** 2)
|
| 107 |
+
head_deviation = math.sqrt(yaw ** 2 + pitch ** 2)
|
| 108 |
+
|
| 109 |
+
perclos, blink_rate, closure_dur, yawn_dur = temporal.update(ear_avg, mar)
|
| 110 |
+
|
| 111 |
+
return np.array([
|
| 112 |
+
ear_left, ear_right, ear_avg,
|
| 113 |
+
h_gaze, v_gaze,
|
| 114 |
+
mar,
|
| 115 |
+
yaw, pitch, roll,
|
| 116 |
+
s_face, s_eye,
|
| 117 |
+
gaze_offset,
|
| 118 |
+
head_deviation,
|
| 119 |
+
perclos, blink_rate, closure_dur, yawn_dur,
|
| 120 |
+
], dtype=np.float32)
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def quality_report(labels):
|
| 124 |
+
n = len(labels)
|
| 125 |
+
n1 = int((labels == 1).sum())
|
| 126 |
+
n0 = n - n1
|
| 127 |
+
transitions = int(np.sum(np.diff(labels) != 0))
|
| 128 |
+
duration_sec = n / 30.0 # approximate at 30fps
|
| 129 |
+
|
| 130 |
+
warnings = []
|
| 131 |
+
|
| 132 |
+
print(f"\n{'='*50}")
|
| 133 |
+
print(f" DATA QUALITY REPORT")
|
| 134 |
+
print(f"{'='*50}")
|
| 135 |
+
print(f" Total samples : {n}")
|
| 136 |
+
print(f" Focused : {n1} ({n1/max(n,1)*100:.1f}%)")
|
| 137 |
+
print(f" Unfocused : {n0} ({n0/max(n,1)*100:.1f}%)")
|
| 138 |
+
print(f" Duration : {duration_sec:.0f}s ({duration_sec/60:.1f} min)")
|
| 139 |
+
print(f" Transitions : {transitions}")
|
| 140 |
+
if transitions > 0:
|
| 141 |
+
print(f" Avg segment : {n/transitions:.0f} frames ({n/transitions/30:.1f}s)")
|
| 142 |
+
|
| 143 |
+
# checks
|
| 144 |
+
if duration_sec < 120:
|
| 145 |
+
warnings.append(f"TOO SHORT: {duration_sec:.0f}s — aim for 5-10 minutes (300-600s)")
|
| 146 |
+
|
| 147 |
+
if n < 3000:
|
| 148 |
+
warnings.append(f"LOW SAMPLE COUNT: {n} frames — aim for 9000+ (5 min at 30fps)")
|
| 149 |
+
|
| 150 |
+
balance = n1 / max(n, 1)
|
| 151 |
+
if balance < 0.3 or balance > 0.7:
|
| 152 |
+
warnings.append(f"IMBALANCED: {balance:.0%} focused — aim for 35-65% focused")
|
| 153 |
+
|
| 154 |
+
if transitions < 10:
|
| 155 |
+
warnings.append(f"TOO FEW TRANSITIONS: {transitions} — switch every 10-30s, aim for 20+")
|
| 156 |
+
|
| 157 |
+
if transitions == 1:
|
| 158 |
+
warnings.append("SINGLE BLOCK: you recorded one unfocused + one focused block — "
|
| 159 |
+
"model will learn temporal position, not focus patterns")
|
| 160 |
+
|
| 161 |
+
if warnings:
|
| 162 |
+
print(f"\n ⚠️ WARNINGS ({len(warnings)}):")
|
| 163 |
+
for w in warnings:
|
| 164 |
+
print(f" • {w}")
|
| 165 |
+
print(f"\n Consider re-recording this session.")
|
| 166 |
+
else:
|
| 167 |
+
print(f"\n ✅ All checks passed!")
|
| 168 |
+
|
| 169 |
+
print(f"{'='*50}\n")
|
| 170 |
+
return len(warnings) == 0
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
# ---------------------------------------------------------------------------
|
| 174 |
+
# Main
|
| 175 |
+
def main():
|
| 176 |
+
parser = argparse.ArgumentParser()
|
| 177 |
+
parser.add_argument("--name", type=str, default="session",
|
| 178 |
+
help="Your name or session ID")
|
| 179 |
+
parser.add_argument("--camera", type=int, default=0,
|
| 180 |
+
help="Camera index")
|
| 181 |
+
parser.add_argument("--duration", type=int, default=600,
|
| 182 |
+
help="Max recording time (seconds, default 10 min)")
|
| 183 |
+
parser.add_argument("--output-dir", type=str,
|
| 184 |
+
default=os.path.join(_PROJECT_ROOT, "data_preparation", "collected"),
|
| 185 |
+
help="Where to save .npz files")
|
| 186 |
+
args = parser.parse_args()
|
| 187 |
+
|
| 188 |
+
os.makedirs(args.output_dir, exist_ok=True)
|
| 189 |
+
|
| 190 |
+
detector = FaceMeshDetector()
|
| 191 |
+
head_pose = HeadPoseEstimator()
|
| 192 |
+
eye_scorer = EyeBehaviourScorer()
|
| 193 |
+
temporal = TemporalTracker()
|
| 194 |
+
|
| 195 |
+
cap = cv2.VideoCapture(args.camera)
|
| 196 |
+
if not cap.isOpened():
|
| 197 |
+
print("[COLLECT] ERROR: can't open camera")
|
| 198 |
+
return
|
| 199 |
+
|
| 200 |
+
print("[COLLECT] Data Collection Tool")
|
| 201 |
+
print(f"[COLLECT] Session: {args.name}, max {args.duration}s")
|
| 202 |
+
print(f"[COLLECT] Features per frame: {NUM_FEATURES}")
|
| 203 |
+
print("[COLLECT] Controls:")
|
| 204 |
+
print(" 1 = FOCUSED (looking at screen normally)")
|
| 205 |
+
print(" 0 = NOT FOCUSED (phone, away, eyes closed, yawning)")
|
| 206 |
+
print(" p = pause")
|
| 207 |
+
print(" q = save & quit")
|
| 208 |
+
print()
|
| 209 |
+
print("[COLLECT] TIPS for good data:")
|
| 210 |
+
print(" • Switch between 1 and 0 every 10-30 seconds")
|
| 211 |
+
print(" • Aim for 20+ transitions total")
|
| 212 |
+
print(" • Act out varied scenarios: reading, phone, talking, drowsy")
|
| 213 |
+
print(" • Record at least 5 minutes")
|
| 214 |
+
print()
|
| 215 |
+
|
| 216 |
+
features_list = []
|
| 217 |
+
labels_list = []
|
| 218 |
+
label = None # None = paused
|
| 219 |
+
transitions = 0 # count label switches
|
| 220 |
+
prev_label = None
|
| 221 |
+
status = "PAUSED -- press 1 (focused) or 0 (not focused)"
|
| 222 |
+
t_start = time.time()
|
| 223 |
+
prev_time = time.time()
|
| 224 |
+
fps = 0.0
|
| 225 |
+
|
| 226 |
+
try:
|
| 227 |
+
while True:
|
| 228 |
+
elapsed = time.time() - t_start
|
| 229 |
+
if elapsed > args.duration:
|
| 230 |
+
print(f"[COLLECT] Time limit ({args.duration}s)")
|
| 231 |
+
break
|
| 232 |
+
|
| 233 |
+
ret, frame = cap.read()
|
| 234 |
+
if not ret:
|
| 235 |
+
break
|
| 236 |
+
|
| 237 |
+
h, w = frame.shape[:2]
|
| 238 |
+
landmarks = detector.process(frame)
|
| 239 |
+
face_ok = landmarks is not None
|
| 240 |
+
|
| 241 |
+
# record if labeling + face visible
|
| 242 |
+
if face_ok and label is not None:
|
| 243 |
+
vec = extract_features(landmarks, w, h, head_pose, eye_scorer, temporal)
|
| 244 |
+
features_list.append(vec)
|
| 245 |
+
labels_list.append(label)
|
| 246 |
+
|
| 247 |
+
# count transitions
|
| 248 |
+
if prev_label is not None and label != prev_label:
|
| 249 |
+
transitions += 1
|
| 250 |
+
prev_label = label
|
| 251 |
+
|
| 252 |
+
now = time.time()
|
| 253 |
+
fps = 0.9 * fps + 0.1 * (1.0 / max(now - prev_time, 1e-6))
|
| 254 |
+
prev_time = now
|
| 255 |
+
|
| 256 |
+
# --- draw UI ---
|
| 257 |
+
n = len(labels_list)
|
| 258 |
+
n1 = sum(1 for x in labels_list if x == 1)
|
| 259 |
+
n0 = n - n1
|
| 260 |
+
remaining = max(0, args.duration - elapsed)
|
| 261 |
+
|
| 262 |
+
bar_color = GREEN if label == 1 else (RED if label == 0 else (80, 80, 80))
|
| 263 |
+
cv2.rectangle(frame, (0, 0), (w, 70), (0, 0, 0), -1)
|
| 264 |
+
cv2.putText(frame, status, (10, 22), FONT, 0.55, bar_color, 2, cv2.LINE_AA)
|
| 265 |
+
cv2.putText(frame, f"Samples: {n} (F:{n1} U:{n0}) Switches: {transitions}",
|
| 266 |
+
(10, 48), FONT, 0.42, WHITE, 1, cv2.LINE_AA)
|
| 267 |
+
cv2.putText(frame, f"FPS:{fps:.0f}", (w - 80, 22), FONT, 0.45, WHITE, 1, cv2.LINE_AA)
|
| 268 |
+
cv2.putText(frame, f"{int(remaining)}s left", (w - 80, 48), FONT, 0.42, YELLOW, 1, cv2.LINE_AA)
|
| 269 |
+
|
| 270 |
+
if n > 0:
|
| 271 |
+
bar_w = min(w - 20, 300)
|
| 272 |
+
bar_x = w - bar_w - 10
|
| 273 |
+
bar_y = 58
|
| 274 |
+
frac = n1 / n
|
| 275 |
+
cv2.rectangle(frame, (bar_x, bar_y), (bar_x + bar_w, bar_y + 8), (40, 40, 40), -1)
|
| 276 |
+
cv2.rectangle(frame, (bar_x, bar_y), (bar_x + int(bar_w * frac), bar_y + 8), GREEN, -1)
|
| 277 |
+
cv2.putText(frame, f"{frac:.0%}F", (bar_x + bar_w + 4, bar_y + 8),
|
| 278 |
+
FONT, 0.3, GRAY, 1, cv2.LINE_AA)
|
| 279 |
+
|
| 280 |
+
if not face_ok:
|
| 281 |
+
cv2.putText(frame, "NO FACE", (w // 2 - 60, h // 2), FONT, 0.7, RED, 2, cv2.LINE_AA)
|
| 282 |
+
|
| 283 |
+
# red dot = recording
|
| 284 |
+
if label is not None and face_ok:
|
| 285 |
+
cv2.circle(frame, (w - 20, 80), 8, RED, -1)
|
| 286 |
+
|
| 287 |
+
# live warnings
|
| 288 |
+
warn_y = h - 35
|
| 289 |
+
if n > 100 and transitions < 3:
|
| 290 |
+
cv2.putText(frame, "! Switch more often (aim for 20+ transitions)",
|
| 291 |
+
(10, warn_y), FONT, 0.38, ORANGE, 1, cv2.LINE_AA)
|
| 292 |
+
warn_y -= 18
|
| 293 |
+
if elapsed > 30 and n > 0:
|
| 294 |
+
bal = n1 / n
|
| 295 |
+
if bal < 0.25 or bal > 0.75:
|
| 296 |
+
cv2.putText(frame, f"! Imbalanced ({bal:.0%} focused) - record more of the other",
|
| 297 |
+
(10, warn_y), FONT, 0.38, ORANGE, 1, cv2.LINE_AA)
|
| 298 |
+
warn_y -= 18
|
| 299 |
+
|
| 300 |
+
cv2.putText(frame, "1:focused 0:unfocused p:pause q:save+quit",
|
| 301 |
+
(10, h - 10), FONT, 0.38, GRAY, 1, cv2.LINE_AA)
|
| 302 |
+
|
| 303 |
+
cv2.imshow("FocusGuard -- Data Collection", frame)
|
| 304 |
+
|
| 305 |
+
key = cv2.waitKey(1) & 0xFF
|
| 306 |
+
if key == ord("1"):
|
| 307 |
+
label = 1
|
| 308 |
+
status = "Recording: FOCUSED"
|
| 309 |
+
print(f"[COLLECT] -> FOCUSED (n={n}, transitions={transitions})")
|
| 310 |
+
elif key == ord("0"):
|
| 311 |
+
label = 0
|
| 312 |
+
status = "Recording: NOT FOCUSED"
|
| 313 |
+
print(f"[COLLECT] -> NOT FOCUSED (n={n}, transitions={transitions})")
|
| 314 |
+
elif key == ord("p"):
|
| 315 |
+
label = None
|
| 316 |
+
status = "PAUSED"
|
| 317 |
+
print(f"[COLLECT] paused (n={n})")
|
| 318 |
+
elif key == ord("q"):
|
| 319 |
+
break
|
| 320 |
+
|
| 321 |
+
finally:
|
| 322 |
+
cap.release()
|
| 323 |
+
cv2.destroyAllWindows()
|
| 324 |
+
detector.close()
|
| 325 |
+
|
| 326 |
+
if len(features_list) > 0:
|
| 327 |
+
feats = np.stack(features_list)
|
| 328 |
+
labs = np.array(labels_list, dtype=np.int64)
|
| 329 |
+
|
| 330 |
+
ts = time.strftime("%Y%m%d_%H%M%S")
|
| 331 |
+
fname = f"{args.name}_{ts}.npz"
|
| 332 |
+
fpath = os.path.join(args.output_dir, fname)
|
| 333 |
+
np.savez(fpath,
|
| 334 |
+
features=feats,
|
| 335 |
+
labels=labs,
|
| 336 |
+
feature_names=np.array(FEATURE_NAMES))
|
| 337 |
+
|
| 338 |
+
print(f"\n[COLLECT] Saved {len(labs)} samples -> {fpath}")
|
| 339 |
+
print(f" Shape: {feats.shape} ({NUM_FEATURES} features)")
|
| 340 |
+
|
| 341 |
+
quality_report(labs)
|
| 342 |
+
else:
|
| 343 |
+
print("\n[COLLECT] No data collected")
|
| 344 |
+
|
| 345 |
+
print("[COLLECT] Done")
|
| 346 |
+
|
| 347 |
+
|
| 348 |
+
if __name__ == "__main__":
|
| 349 |
+
main()
|
models/attention/fusion.py
ADDED
|
File without changes
|
models/attention/train.py
ADDED
|
File without changes
|
models/cnn/__init__.py
ADDED
|
File without changes
|
models/cnn/eye_attention/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
models/cnn/eye_attention/classifier.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from abc import ABC, abstractmethod
|
| 4 |
+
|
| 5 |
+
import numpy as np
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class EyeClassifier(ABC):
|
| 9 |
+
@property
|
| 10 |
+
@abstractmethod
|
| 11 |
+
def name(self) -> str:
|
| 12 |
+
pass
|
| 13 |
+
|
| 14 |
+
@abstractmethod
|
| 15 |
+
def predict_score(self, crops_bgr: list[np.ndarray]) -> float:
|
| 16 |
+
pass
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class GeometricOnlyClassifier(EyeClassifier):
|
| 20 |
+
@property
|
| 21 |
+
def name(self) -> str:
|
| 22 |
+
return "geometric"
|
| 23 |
+
|
| 24 |
+
def predict_score(self, crops_bgr: list[np.ndarray]) -> float:
|
| 25 |
+
return 1.0
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class YOLOv11Classifier(EyeClassifier):
|
| 29 |
+
def __init__(self, checkpoint_path: str, device: str = "cpu"):
|
| 30 |
+
from ultralytics import YOLO
|
| 31 |
+
|
| 32 |
+
self._model = YOLO(checkpoint_path)
|
| 33 |
+
self._device = device
|
| 34 |
+
|
| 35 |
+
names = self._model.names
|
| 36 |
+
self._attentive_idx = None
|
| 37 |
+
for idx, cls_name in names.items():
|
| 38 |
+
if cls_name in ("open", "attentive"):
|
| 39 |
+
self._attentive_idx = idx
|
| 40 |
+
break
|
| 41 |
+
if self._attentive_idx is None:
|
| 42 |
+
self._attentive_idx = max(names.keys())
|
| 43 |
+
print(f"[YOLO] Classes: {names}, attentive_idx={self._attentive_idx}")
|
| 44 |
+
|
| 45 |
+
@property
|
| 46 |
+
def name(self) -> str:
|
| 47 |
+
return "yolo"
|
| 48 |
+
|
| 49 |
+
def predict_score(self, crops_bgr: list[np.ndarray]) -> float:
|
| 50 |
+
if not crops_bgr:
|
| 51 |
+
return 1.0
|
| 52 |
+
results = self._model.predict(crops_bgr, device=self._device, verbose=False)
|
| 53 |
+
scores = [float(r.probs.data[self._attentive_idx]) for r in results]
|
| 54 |
+
return sum(scores) / len(scores) if scores else 1.0
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def load_eye_classifier(
|
| 58 |
+
path: str | None = None,
|
| 59 |
+
backend: str = "yolo",
|
| 60 |
+
device: str = "cpu",
|
| 61 |
+
) -> EyeClassifier:
|
| 62 |
+
if path is None or backend == "geometric":
|
| 63 |
+
return GeometricOnlyClassifier()
|
| 64 |
+
|
| 65 |
+
try:
|
| 66 |
+
return YOLOv11Classifier(path, device=device)
|
| 67 |
+
except ImportError:
|
| 68 |
+
print("[CLASSIFIER] ultralytics required for YOLO. pip install ultralytics")
|
| 69 |
+
raise
|
models/cnn/eye_attention/crop.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import cv2
|
| 2 |
+
import numpy as np
|
| 3 |
+
|
| 4 |
+
from models.pretrained.face_mesh.face_mesh import FaceMeshDetector
|
| 5 |
+
|
| 6 |
+
LEFT_EYE_CONTOUR = FaceMeshDetector.LEFT_EYE_INDICES
|
| 7 |
+
RIGHT_EYE_CONTOUR = FaceMeshDetector.RIGHT_EYE_INDICES
|
| 8 |
+
|
| 9 |
+
IMAGENET_MEAN = (0.485, 0.456, 0.406)
|
| 10 |
+
IMAGENET_STD = (0.229, 0.224, 0.225)
|
| 11 |
+
|
| 12 |
+
CROP_SIZE = 96
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def _bbox_from_landmarks(
|
| 16 |
+
landmarks: np.ndarray,
|
| 17 |
+
indices: list[int],
|
| 18 |
+
frame_w: int,
|
| 19 |
+
frame_h: int,
|
| 20 |
+
expand: float = 0.4,
|
| 21 |
+
) -> tuple[int, int, int, int]:
|
| 22 |
+
pts = landmarks[indices, :2]
|
| 23 |
+
px = pts[:, 0] * frame_w
|
| 24 |
+
py = pts[:, 1] * frame_h
|
| 25 |
+
|
| 26 |
+
x_min, x_max = px.min(), px.max()
|
| 27 |
+
y_min, y_max = py.min(), py.max()
|
| 28 |
+
w = x_max - x_min
|
| 29 |
+
h = y_max - y_min
|
| 30 |
+
cx = (x_min + x_max) / 2
|
| 31 |
+
cy = (y_min + y_max) / 2
|
| 32 |
+
|
| 33 |
+
size = max(w, h) * (1 + expand)
|
| 34 |
+
half = size / 2
|
| 35 |
+
|
| 36 |
+
x1 = int(max(cx - half, 0))
|
| 37 |
+
y1 = int(max(cy - half, 0))
|
| 38 |
+
x2 = int(min(cx + half, frame_w))
|
| 39 |
+
y2 = int(min(cy + half, frame_h))
|
| 40 |
+
|
| 41 |
+
return x1, y1, x2, y2
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def extract_eye_crops(
|
| 45 |
+
frame: np.ndarray,
|
| 46 |
+
landmarks: np.ndarray,
|
| 47 |
+
expand: float = 0.4,
|
| 48 |
+
crop_size: int = CROP_SIZE,
|
| 49 |
+
) -> tuple[np.ndarray, np.ndarray, tuple, tuple]:
|
| 50 |
+
h, w = frame.shape[:2]
|
| 51 |
+
|
| 52 |
+
left_bbox = _bbox_from_landmarks(landmarks, LEFT_EYE_CONTOUR, w, h, expand)
|
| 53 |
+
right_bbox = _bbox_from_landmarks(landmarks, RIGHT_EYE_CONTOUR, w, h, expand)
|
| 54 |
+
|
| 55 |
+
left_crop = frame[left_bbox[1] : left_bbox[3], left_bbox[0] : left_bbox[2]]
|
| 56 |
+
right_crop = frame[right_bbox[1] : right_bbox[3], right_bbox[0] : right_bbox[2]]
|
| 57 |
+
|
| 58 |
+
left_crop = cv2.resize(left_crop, (crop_size, crop_size), interpolation=cv2.INTER_AREA)
|
| 59 |
+
right_crop = cv2.resize(right_crop, (crop_size, crop_size), interpolation=cv2.INTER_AREA)
|
| 60 |
+
|
| 61 |
+
return left_crop, right_crop, left_bbox, right_bbox
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def crop_to_tensor(crop_bgr: np.ndarray):
|
| 65 |
+
import torch
|
| 66 |
+
|
| 67 |
+
rgb = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
|
| 68 |
+
for c in range(3):
|
| 69 |
+
rgb[:, :, c] = (rgb[:, :, c] - IMAGENET_MEAN[c]) / IMAGENET_STD[c]
|
| 70 |
+
return torch.from_numpy(rgb.transpose(2, 0, 1))
|
models/cnn/eye_attention/train.py
ADDED
|
File without changes
|
models/geometric/__init__.py
ADDED
|
File without changes
|
models/geometric/eye_behaviour/__init__.py
ADDED
|
File without changes
|
models/geometric/eye_behaviour/eye_scorer.py
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import math
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
_LEFT_EYE_EAR = [33, 160, 158, 133, 153, 145]
|
| 6 |
+
_RIGHT_EYE_EAR = [362, 385, 387, 263, 373, 380]
|
| 7 |
+
|
| 8 |
+
_LEFT_IRIS_CENTER = 468
|
| 9 |
+
_RIGHT_IRIS_CENTER = 473
|
| 10 |
+
|
| 11 |
+
_LEFT_EYE_INNER = 133
|
| 12 |
+
_LEFT_EYE_OUTER = 33
|
| 13 |
+
_RIGHT_EYE_INNER = 362
|
| 14 |
+
_RIGHT_EYE_OUTER = 263
|
| 15 |
+
|
| 16 |
+
_LEFT_EYE_TOP = 159
|
| 17 |
+
_LEFT_EYE_BOTTOM = 145
|
| 18 |
+
_RIGHT_EYE_TOP = 386
|
| 19 |
+
_RIGHT_EYE_BOTTOM = 374
|
| 20 |
+
|
| 21 |
+
_MOUTH_TOP = 13
|
| 22 |
+
_MOUTH_BOTTOM = 14
|
| 23 |
+
_MOUTH_LEFT = 78
|
| 24 |
+
_MOUTH_RIGHT = 308
|
| 25 |
+
_MOUTH_UPPER_1 = 82
|
| 26 |
+
_MOUTH_UPPER_2 = 312
|
| 27 |
+
_MOUTH_LOWER_1 = 87
|
| 28 |
+
_MOUTH_LOWER_2 = 317
|
| 29 |
+
|
| 30 |
+
MAR_YAWN_THRESHOLD = 0.55
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def _distance(p1: np.ndarray, p2: np.ndarray) -> float:
|
| 34 |
+
return float(np.linalg.norm(p1 - p2))
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def compute_ear(landmarks: np.ndarray, eye_indices: list[int]) -> float:
|
| 38 |
+
p1 = landmarks[eye_indices[0], :2]
|
| 39 |
+
p2 = landmarks[eye_indices[1], :2]
|
| 40 |
+
p3 = landmarks[eye_indices[2], :2]
|
| 41 |
+
p4 = landmarks[eye_indices[3], :2]
|
| 42 |
+
p5 = landmarks[eye_indices[4], :2]
|
| 43 |
+
p6 = landmarks[eye_indices[5], :2]
|
| 44 |
+
|
| 45 |
+
vertical1 = _distance(p2, p6)
|
| 46 |
+
vertical2 = _distance(p3, p5)
|
| 47 |
+
horizontal = _distance(p1, p4)
|
| 48 |
+
|
| 49 |
+
if horizontal < 1e-6:
|
| 50 |
+
return 0.0
|
| 51 |
+
|
| 52 |
+
return (vertical1 + vertical2) / (2.0 * horizontal)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def compute_avg_ear(landmarks: np.ndarray) -> float:
|
| 56 |
+
left_ear = compute_ear(landmarks, _LEFT_EYE_EAR)
|
| 57 |
+
right_ear = compute_ear(landmarks, _RIGHT_EYE_EAR)
|
| 58 |
+
return (left_ear + right_ear) / 2.0
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def compute_gaze_ratio(landmarks: np.ndarray) -> tuple[float, float]:
|
| 62 |
+
left_iris = landmarks[_LEFT_IRIS_CENTER, :2]
|
| 63 |
+
left_inner = landmarks[_LEFT_EYE_INNER, :2]
|
| 64 |
+
left_outer = landmarks[_LEFT_EYE_OUTER, :2]
|
| 65 |
+
left_top = landmarks[_LEFT_EYE_TOP, :2]
|
| 66 |
+
left_bottom = landmarks[_LEFT_EYE_BOTTOM, :2]
|
| 67 |
+
|
| 68 |
+
right_iris = landmarks[_RIGHT_IRIS_CENTER, :2]
|
| 69 |
+
right_inner = landmarks[_RIGHT_EYE_INNER, :2]
|
| 70 |
+
right_outer = landmarks[_RIGHT_EYE_OUTER, :2]
|
| 71 |
+
right_top = landmarks[_RIGHT_EYE_TOP, :2]
|
| 72 |
+
right_bottom = landmarks[_RIGHT_EYE_BOTTOM, :2]
|
| 73 |
+
|
| 74 |
+
left_h_total = _distance(left_inner, left_outer)
|
| 75 |
+
right_h_total = _distance(right_inner, right_outer)
|
| 76 |
+
|
| 77 |
+
if left_h_total < 1e-6 or right_h_total < 1e-6:
|
| 78 |
+
return 0.5, 0.5
|
| 79 |
+
|
| 80 |
+
left_h_ratio = _distance(left_outer, left_iris) / left_h_total
|
| 81 |
+
right_h_ratio = _distance(right_outer, right_iris) / right_h_total
|
| 82 |
+
h_ratio = (left_h_ratio + right_h_ratio) / 2.0
|
| 83 |
+
|
| 84 |
+
left_v_total = _distance(left_top, left_bottom)
|
| 85 |
+
right_v_total = _distance(right_top, right_bottom)
|
| 86 |
+
|
| 87 |
+
if left_v_total < 1e-6 or right_v_total < 1e-6:
|
| 88 |
+
return h_ratio, 0.5
|
| 89 |
+
|
| 90 |
+
left_v_ratio = _distance(left_top, left_iris) / left_v_total
|
| 91 |
+
right_v_ratio = _distance(right_top, right_iris) / right_v_total
|
| 92 |
+
v_ratio = (left_v_ratio + right_v_ratio) / 2.0
|
| 93 |
+
|
| 94 |
+
return float(np.clip(h_ratio, 0, 1)), float(np.clip(v_ratio, 0, 1))
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def compute_mar(landmarks: np.ndarray) -> float:
|
| 98 |
+
# Mouth aspect ratio: high = mouth open (yawning / sleepy)
|
| 99 |
+
top = landmarks[_MOUTH_TOP, :2]
|
| 100 |
+
bottom = landmarks[_MOUTH_BOTTOM, :2]
|
| 101 |
+
left = landmarks[_MOUTH_LEFT, :2]
|
| 102 |
+
right = landmarks[_MOUTH_RIGHT, :2]
|
| 103 |
+
upper1 = landmarks[_MOUTH_UPPER_1, :2]
|
| 104 |
+
lower1 = landmarks[_MOUTH_LOWER_1, :2]
|
| 105 |
+
upper2 = landmarks[_MOUTH_UPPER_2, :2]
|
| 106 |
+
lower2 = landmarks[_MOUTH_LOWER_2, :2]
|
| 107 |
+
|
| 108 |
+
horizontal = _distance(left, right)
|
| 109 |
+
if horizontal < 1e-6:
|
| 110 |
+
return 0.0
|
| 111 |
+
v1 = _distance(upper1, lower1)
|
| 112 |
+
v2 = _distance(top, bottom)
|
| 113 |
+
v3 = _distance(upper2, lower2)
|
| 114 |
+
return (v1 + v2 + v3) / (2.0 * horizontal)
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
class EyeBehaviourScorer:
|
| 118 |
+
def __init__(
|
| 119 |
+
self,
|
| 120 |
+
ear_open: float = 0.30,
|
| 121 |
+
ear_closed: float = 0.16,
|
| 122 |
+
gaze_max_offset: float = 0.28,
|
| 123 |
+
):
|
| 124 |
+
self.ear_open = ear_open
|
| 125 |
+
self.ear_closed = ear_closed
|
| 126 |
+
self.gaze_max_offset = gaze_max_offset
|
| 127 |
+
|
| 128 |
+
def _ear_score(self, ear: float) -> float:
|
| 129 |
+
if ear >= self.ear_open:
|
| 130 |
+
return 1.0
|
| 131 |
+
if ear <= self.ear_closed:
|
| 132 |
+
return 0.0
|
| 133 |
+
return (ear - self.ear_closed) / (self.ear_open - self.ear_closed)
|
| 134 |
+
|
| 135 |
+
def _gaze_score(self, h_ratio: float, v_ratio: float) -> float:
|
| 136 |
+
h_offset = abs(h_ratio - 0.5)
|
| 137 |
+
v_offset = abs(v_ratio - 0.5)
|
| 138 |
+
offset = math.sqrt(h_offset**2 + v_offset**2)
|
| 139 |
+
t = min(offset / self.gaze_max_offset, 1.0)
|
| 140 |
+
return 0.5 * (1.0 + math.cos(math.pi * t))
|
| 141 |
+
|
| 142 |
+
def score(self, landmarks: np.ndarray) -> float:
|
| 143 |
+
ear = compute_avg_ear(landmarks)
|
| 144 |
+
ear_s = self._ear_score(ear)
|
| 145 |
+
if ear_s < 0.3:
|
| 146 |
+
return ear_s
|
| 147 |
+
h_ratio, v_ratio = compute_gaze_ratio(landmarks)
|
| 148 |
+
gaze_s = self._gaze_score(h_ratio, v_ratio)
|
| 149 |
+
return ear_s * gaze_s
|
| 150 |
+
|
| 151 |
+
def detailed_score(self, landmarks: np.ndarray) -> dict:
|
| 152 |
+
ear = compute_avg_ear(landmarks)
|
| 153 |
+
ear_s = self._ear_score(ear)
|
| 154 |
+
h_ratio, v_ratio = compute_gaze_ratio(landmarks)
|
| 155 |
+
gaze_s = self._gaze_score(h_ratio, v_ratio)
|
| 156 |
+
s_eye = ear_s if ear_s < 0.3 else ear_s * gaze_s
|
| 157 |
+
return {
|
| 158 |
+
"ear": round(ear, 4),
|
| 159 |
+
"ear_score": round(ear_s, 4),
|
| 160 |
+
"h_gaze": round(h_ratio, 4),
|
| 161 |
+
"v_gaze": round(v_ratio, 4),
|
| 162 |
+
"gaze_score": round(gaze_s, 4),
|
| 163 |
+
"s_eye": round(s_eye, 4),
|
| 164 |
+
}
|
models/geometric/face_orientation/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
models/geometric/face_orientation/head_pose.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import math
|
| 2 |
+
|
| 3 |
+
import cv2
|
| 4 |
+
import numpy as np
|
| 5 |
+
|
| 6 |
+
_LANDMARK_INDICES = [1, 152, 33, 263, 61, 291]
|
| 7 |
+
|
| 8 |
+
_MODEL_POINTS = np.array(
|
| 9 |
+
[
|
| 10 |
+
[0.0, 0.0, 0.0],
|
| 11 |
+
[0.0, -330.0, -65.0],
|
| 12 |
+
[-225.0, 170.0, -135.0],
|
| 13 |
+
[225.0, 170.0, -135.0],
|
| 14 |
+
[-150.0, -150.0, -125.0],
|
| 15 |
+
[150.0, -150.0, -125.0],
|
| 16 |
+
],
|
| 17 |
+
dtype=np.float64,
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class HeadPoseEstimator:
|
| 22 |
+
def __init__(self, max_angle: float = 30.0, roll_weight: float = 0.5):
|
| 23 |
+
self.max_angle = max_angle
|
| 24 |
+
self.roll_weight = roll_weight
|
| 25 |
+
self._camera_matrix = None
|
| 26 |
+
self._frame_size = None
|
| 27 |
+
self._dist_coeffs = np.zeros((4, 1), dtype=np.float64)
|
| 28 |
+
|
| 29 |
+
def _get_camera_matrix(self, frame_w: int, frame_h: int) -> np.ndarray:
|
| 30 |
+
if self._camera_matrix is not None and self._frame_size == (frame_w, frame_h):
|
| 31 |
+
return self._camera_matrix
|
| 32 |
+
focal_length = float(frame_w)
|
| 33 |
+
cx, cy = frame_w / 2.0, frame_h / 2.0
|
| 34 |
+
self._camera_matrix = np.array(
|
| 35 |
+
[[focal_length, 0, cx], [0, focal_length, cy], [0, 0, 1]],
|
| 36 |
+
dtype=np.float64,
|
| 37 |
+
)
|
| 38 |
+
self._frame_size = (frame_w, frame_h)
|
| 39 |
+
return self._camera_matrix
|
| 40 |
+
|
| 41 |
+
def _solve(self, landmarks: np.ndarray, frame_w: int, frame_h: int):
|
| 42 |
+
image_points = np.array(
|
| 43 |
+
[
|
| 44 |
+
[landmarks[i, 0] * frame_w, landmarks[i, 1] * frame_h]
|
| 45 |
+
for i in _LANDMARK_INDICES
|
| 46 |
+
],
|
| 47 |
+
dtype=np.float64,
|
| 48 |
+
)
|
| 49 |
+
camera_matrix = self._get_camera_matrix(frame_w, frame_h)
|
| 50 |
+
success, rvec, tvec = cv2.solvePnP(
|
| 51 |
+
_MODEL_POINTS,
|
| 52 |
+
image_points,
|
| 53 |
+
camera_matrix,
|
| 54 |
+
self._dist_coeffs,
|
| 55 |
+
flags=cv2.SOLVEPNP_ITERATIVE,
|
| 56 |
+
)
|
| 57 |
+
return success, rvec, tvec, image_points
|
| 58 |
+
|
| 59 |
+
def estimate(
|
| 60 |
+
self, landmarks: np.ndarray, frame_w: int, frame_h: int
|
| 61 |
+
) -> tuple[float, float, float] | None:
|
| 62 |
+
success, rvec, tvec, _ = self._solve(landmarks, frame_w, frame_h)
|
| 63 |
+
if not success:
|
| 64 |
+
return None
|
| 65 |
+
|
| 66 |
+
rmat, _ = cv2.Rodrigues(rvec)
|
| 67 |
+
nose_dir = rmat @ np.array([0.0, 0.0, 1.0])
|
| 68 |
+
face_up = rmat @ np.array([0.0, 1.0, 0.0])
|
| 69 |
+
|
| 70 |
+
yaw = math.degrees(math.atan2(nose_dir[0], -nose_dir[2]))
|
| 71 |
+
pitch = math.degrees(math.asin(np.clip(-nose_dir[1], -1.0, 1.0)))
|
| 72 |
+
roll = math.degrees(math.atan2(face_up[0], -face_up[1]))
|
| 73 |
+
|
| 74 |
+
return (yaw, pitch, roll)
|
| 75 |
+
|
| 76 |
+
def score(self, landmarks: np.ndarray, frame_w: int, frame_h: int) -> float:
|
| 77 |
+
angles = self.estimate(landmarks, frame_w, frame_h)
|
| 78 |
+
if angles is None:
|
| 79 |
+
return 0.0
|
| 80 |
+
|
| 81 |
+
yaw, pitch, roll = angles
|
| 82 |
+
deviation = math.sqrt(yaw**2 + pitch**2 + (self.roll_weight * roll) ** 2)
|
| 83 |
+
t = min(deviation / self.max_angle, 1.0)
|
| 84 |
+
return 0.5 * (1.0 + math.cos(math.pi * t))
|
| 85 |
+
|
| 86 |
+
def draw_axes(
|
| 87 |
+
self,
|
| 88 |
+
frame: np.ndarray,
|
| 89 |
+
landmarks: np.ndarray,
|
| 90 |
+
axis_length: float = 50.0,
|
| 91 |
+
) -> np.ndarray:
|
| 92 |
+
h, w = frame.shape[:2]
|
| 93 |
+
success, rvec, tvec, image_points = self._solve(landmarks, w, h)
|
| 94 |
+
if not success:
|
| 95 |
+
return frame
|
| 96 |
+
|
| 97 |
+
camera_matrix = self._get_camera_matrix(w, h)
|
| 98 |
+
nose = tuple(image_points[0].astype(int))
|
| 99 |
+
|
| 100 |
+
axes_3d = np.float64(
|
| 101 |
+
[[axis_length, 0, 0], [0, axis_length, 0], [0, 0, axis_length]]
|
| 102 |
+
)
|
| 103 |
+
projected, _ = cv2.projectPoints(
|
| 104 |
+
axes_3d, rvec, tvec, camera_matrix, self._dist_coeffs
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
colors = [(0, 0, 255), (0, 255, 0), (255, 0, 0)]
|
| 108 |
+
for i, color in enumerate(colors):
|
| 109 |
+
pt = tuple(projected[i].ravel().astype(int))
|
| 110 |
+
cv2.line(frame, nose, pt, color, 2)
|
| 111 |
+
|
| 112 |
+
return frame
|
models/mlp/__init__.py
ADDED
|
File without changes
|
models/mlp/train.py
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
import random
|
| 4 |
+
|
| 5 |
+
import numpy as np
|
| 6 |
+
import torch
|
| 7 |
+
import torch.nn as nn
|
| 8 |
+
import torch.optim as optim
|
| 9 |
+
|
| 10 |
+
from models.prepare_dataset import get_dataloaders
|
| 11 |
+
|
| 12 |
+
CFG = {
|
| 13 |
+
"model_name": "face_orientation",
|
| 14 |
+
"epochs": 30,
|
| 15 |
+
"batch_size": 32,
|
| 16 |
+
"lr": 1e-3,
|
| 17 |
+
"seed": 42,
|
| 18 |
+
"split_ratios": (0.7, 0.15, 0.15),
|
| 19 |
+
"checkpoints_dir": {
|
| 20 |
+
"face_orientation": os.path.join(os.path.dirname(__file__), "face_orientation_model"),
|
| 21 |
+
"eye_behaviour": os.path.join(os.path.dirname(__file__), "eye_behaviour_model"),
|
| 22 |
+
},
|
| 23 |
+
"logs_dir": os.path.join(os.path.dirname(__file__), "..", "..", "evaluation", "logs"),
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def set_seed(seed: int):
|
| 28 |
+
random.seed(seed)
|
| 29 |
+
np.random.seed(seed)
|
| 30 |
+
torch.manual_seed(seed)
|
| 31 |
+
if torch.cuda.is_available():
|
| 32 |
+
torch.cuda.manual_seed_all(seed)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
class BaseModel(nn.Module):
|
| 36 |
+
def __init__(self, num_features: int, num_classes: int):
|
| 37 |
+
super().__init__()
|
| 38 |
+
self.network = nn.Sequential(
|
| 39 |
+
nn.Linear(num_features, 64),
|
| 40 |
+
nn.ReLU(),
|
| 41 |
+
nn.Linear(64, 32),
|
| 42 |
+
nn.ReLU(),
|
| 43 |
+
nn.Linear(32, num_classes),
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
def forward(self, x):
|
| 47 |
+
return self.network(x)
|
| 48 |
+
|
| 49 |
+
def training_step(self, loader, optimizer, criterion, device):
|
| 50 |
+
self.train()
|
| 51 |
+
total_loss = 0.0
|
| 52 |
+
correct = 0
|
| 53 |
+
total = 0
|
| 54 |
+
|
| 55 |
+
for features, labels in loader:
|
| 56 |
+
features, labels = features.to(device), labels.to(device)
|
| 57 |
+
|
| 58 |
+
optimizer.zero_grad()
|
| 59 |
+
outputs = self(features)
|
| 60 |
+
loss = criterion(outputs, labels)
|
| 61 |
+
loss.backward()
|
| 62 |
+
optimizer.step()
|
| 63 |
+
|
| 64 |
+
total_loss += loss.item() * features.size(0)
|
| 65 |
+
correct += (outputs.argmax(dim=1) == labels).sum().item()
|
| 66 |
+
total += features.size(0)
|
| 67 |
+
|
| 68 |
+
return total_loss / total, correct / total
|
| 69 |
+
|
| 70 |
+
@torch.no_grad()
|
| 71 |
+
def validation_step(self, loader, criterion, device):
|
| 72 |
+
self.eval()
|
| 73 |
+
total_loss = 0.0
|
| 74 |
+
correct = 0
|
| 75 |
+
total = 0
|
| 76 |
+
|
| 77 |
+
for features, labels in loader:
|
| 78 |
+
features, labels = features.to(device), labels.to(device)
|
| 79 |
+
outputs = self(features)
|
| 80 |
+
loss = criterion(outputs, labels)
|
| 81 |
+
|
| 82 |
+
total_loss += loss.item() * features.size(0)
|
| 83 |
+
correct += (outputs.argmax(dim=1) == labels).sum().item()
|
| 84 |
+
total += features.size(0)
|
| 85 |
+
|
| 86 |
+
return total_loss / total, correct / total
|
| 87 |
+
|
| 88 |
+
@torch.no_grad()
|
| 89 |
+
def test_step(self, loader, criterion, device):
|
| 90 |
+
self.eval()
|
| 91 |
+
total_loss = 0.0
|
| 92 |
+
correct = 0
|
| 93 |
+
total = 0
|
| 94 |
+
|
| 95 |
+
for features, labels in loader:
|
| 96 |
+
features, labels = features.to(device), labels.to(device)
|
| 97 |
+
outputs = self(features)
|
| 98 |
+
loss = criterion(outputs, labels)
|
| 99 |
+
|
| 100 |
+
total_loss += loss.item() * features.size(0)
|
| 101 |
+
correct += (outputs.argmax(dim=1) == labels).sum().item()
|
| 102 |
+
total += features.size(0)
|
| 103 |
+
|
| 104 |
+
return total_loss / total, correct / total
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def main():
|
| 108 |
+
set_seed(CFG["seed"])
|
| 109 |
+
|
| 110 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 111 |
+
print(f"[TRAIN] Device: {device}")
|
| 112 |
+
print(f"[TRAIN] Model: {CFG['model_name']}")
|
| 113 |
+
|
| 114 |
+
train_loader, val_loader, test_loader, num_features, num_classes = get_dataloaders(
|
| 115 |
+
model_name=CFG["model_name"],
|
| 116 |
+
batch_size=CFG["batch_size"],
|
| 117 |
+
split_ratios=CFG["split_ratios"],
|
| 118 |
+
seed=CFG["seed"],
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
model = BaseModel(num_features, num_classes).to(device)
|
| 122 |
+
criterion = nn.CrossEntropyLoss()
|
| 123 |
+
optimizer = optim.Adam(model.parameters(), lr=CFG["lr"])
|
| 124 |
+
|
| 125 |
+
print(f"[TRAIN] Parameters: {sum(p.numel() for p in model.parameters()):,}")
|
| 126 |
+
|
| 127 |
+
ckpt_dir = CFG["checkpoints_dir"][CFG["model_name"]]
|
| 128 |
+
os.makedirs(ckpt_dir, exist_ok=True)
|
| 129 |
+
best_ckpt_path = os.path.join(ckpt_dir, "best_model.pt")
|
| 130 |
+
|
| 131 |
+
history = {
|
| 132 |
+
"model_name": CFG["model_name"],
|
| 133 |
+
"epochs": [],
|
| 134 |
+
"train_loss": [],
|
| 135 |
+
"train_acc": [],
|
| 136 |
+
"val_loss": [],
|
| 137 |
+
"val_acc": [],
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
best_val_acc = 0.0
|
| 141 |
+
|
| 142 |
+
print(f"\n{'Epoch':>6} | {'Train Loss':>10} | {'Train Acc':>9} | {'Val Loss':>10} | {'Val Acc':>9}")
|
| 143 |
+
print("-" * 60)
|
| 144 |
+
|
| 145 |
+
for epoch in range(1, CFG["epochs"] + 1):
|
| 146 |
+
train_loss, train_acc = model.training_step(train_loader, optimizer, criterion, device)
|
| 147 |
+
val_loss, val_acc = model.validation_step(val_loader, criterion, device)
|
| 148 |
+
|
| 149 |
+
history["epochs"].append(epoch)
|
| 150 |
+
history["train_loss"].append(round(train_loss, 4))
|
| 151 |
+
history["train_acc"].append(round(train_acc, 4))
|
| 152 |
+
history["val_loss"].append(round(val_loss, 4))
|
| 153 |
+
history["val_acc"].append(round(val_acc, 4))
|
| 154 |
+
|
| 155 |
+
marker = ""
|
| 156 |
+
if val_acc > best_val_acc:
|
| 157 |
+
best_val_acc = val_acc
|
| 158 |
+
torch.save(model.state_dict(), best_ckpt_path)
|
| 159 |
+
marker = " *"
|
| 160 |
+
|
| 161 |
+
print(f"{epoch:>6} | {train_loss:>10.4f} | {train_acc:>8.2%} | {val_loss:>10.4f} | {val_acc:>8.2%}{marker}")
|
| 162 |
+
|
| 163 |
+
print(f"\nBest validation accuracy: {best_val_acc:.2%}")
|
| 164 |
+
print(f"Checkpoint saved to: {best_ckpt_path}")
|
| 165 |
+
|
| 166 |
+
model.load_state_dict(torch.load(best_ckpt_path, weights_only=True))
|
| 167 |
+
test_loss, test_acc = model.test_step(test_loader, criterion, device)
|
| 168 |
+
print(f"\n[TEST] Loss: {test_loss:.4f} | Accuracy: {test_acc:.2%}")
|
| 169 |
+
|
| 170 |
+
history["test_loss"] = round(test_loss, 4)
|
| 171 |
+
history["test_acc"] = round(test_acc, 4)
|
| 172 |
+
|
| 173 |
+
logs_dir = CFG["logs_dir"]
|
| 174 |
+
os.makedirs(logs_dir, exist_ok=True)
|
| 175 |
+
log_path = os.path.join(logs_dir, f"{CFG['model_name']}_training_log.json")
|
| 176 |
+
|
| 177 |
+
with open(log_path, "w") as f:
|
| 178 |
+
json.dump(history, f, indent=2)
|
| 179 |
+
|
| 180 |
+
print(f"[LOG] Training history saved to: {log_path}")
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
if __name__ == "__main__":
|
| 184 |
+
main()
|
models/pretrained/__init__.py
ADDED
|
File without changes
|
models/pretrained/face_mesh/.gitkeep
ADDED
|
File without changes
|
models/pretrained/face_mesh/__init__.py
ADDED
|
File without changes
|
models/pretrained/face_mesh/face_mesh.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
from urllib.request import urlretrieve
|
| 4 |
+
|
| 5 |
+
import cv2
|
| 6 |
+
import numpy as np
|
| 7 |
+
import mediapipe as mp
|
| 8 |
+
from mediapipe.tasks.python.vision import FaceLandmarkerOptions, FaceLandmarker, RunningMode
|
| 9 |
+
from mediapipe.tasks import python as mp_tasks
|
| 10 |
+
|
| 11 |
+
_MODEL_URL = (
|
| 12 |
+
"https://storage.googleapis.com/mediapipe-models/face_landmarker/"
|
| 13 |
+
"face_landmarker/float16/latest/face_landmarker.task"
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def _ensure_model() -> str:
|
| 18 |
+
cache_dir = Path(os.environ.get(
|
| 19 |
+
"FOCUSGUARD_CACHE_DIR",
|
| 20 |
+
Path.home() / ".cache" / "focusguard",
|
| 21 |
+
))
|
| 22 |
+
model_path = cache_dir / "face_landmarker.task"
|
| 23 |
+
if model_path.exists():
|
| 24 |
+
return str(model_path)
|
| 25 |
+
cache_dir.mkdir(parents=True, exist_ok=True)
|
| 26 |
+
print(f"[FACE_MESH] Downloading model to {model_path}...")
|
| 27 |
+
urlretrieve(_MODEL_URL, model_path)
|
| 28 |
+
print("[FACE_MESH] Download complete.")
|
| 29 |
+
return str(model_path)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class FaceMeshDetector:
|
| 33 |
+
LEFT_EYE_INDICES = [33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161, 246]
|
| 34 |
+
RIGHT_EYE_INDICES = [362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385, 384, 398]
|
| 35 |
+
LEFT_IRIS_INDICES = [468, 469, 470, 471, 472]
|
| 36 |
+
RIGHT_IRIS_INDICES = [473, 474, 475, 476, 477]
|
| 37 |
+
|
| 38 |
+
def __init__(
|
| 39 |
+
self,
|
| 40 |
+
max_num_faces: int = 1,
|
| 41 |
+
min_detection_confidence: float = 0.5,
|
| 42 |
+
min_tracking_confidence: float = 0.5,
|
| 43 |
+
):
|
| 44 |
+
model_path = _ensure_model()
|
| 45 |
+
options = FaceLandmarkerOptions(
|
| 46 |
+
base_options=mp_tasks.BaseOptions(model_asset_path=model_path),
|
| 47 |
+
num_faces=max_num_faces,
|
| 48 |
+
min_face_detection_confidence=min_detection_confidence,
|
| 49 |
+
min_face_presence_confidence=min_detection_confidence,
|
| 50 |
+
min_tracking_confidence=min_tracking_confidence,
|
| 51 |
+
running_mode=RunningMode.VIDEO,
|
| 52 |
+
)
|
| 53 |
+
self._landmarker = FaceLandmarker.create_from_options(options)
|
| 54 |
+
self._frame_ts = 0 # ms, for video API
|
| 55 |
+
|
| 56 |
+
def process(self, bgr_frame: np.ndarray) -> np.ndarray | None:
|
| 57 |
+
# BGR in -> (478,3) norm x,y,z or None
|
| 58 |
+
rgb = cv2.cvtColor(bgr_frame, cv2.COLOR_BGR2RGB)
|
| 59 |
+
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb)
|
| 60 |
+
self._frame_ts += 33 # ~30fps
|
| 61 |
+
result = self._landmarker.detect_for_video(mp_image, self._frame_ts)
|
| 62 |
+
|
| 63 |
+
if not result.face_landmarks:
|
| 64 |
+
return None
|
| 65 |
+
|
| 66 |
+
face = result.face_landmarks[0]
|
| 67 |
+
return np.array([(lm.x, lm.y, lm.z) for lm in face], dtype=np.float32)
|
| 68 |
+
|
| 69 |
+
def get_pixel_landmarks(self, landmarks: np.ndarray, frame_w: int, frame_h: int) -> np.ndarray:
|
| 70 |
+
# norm -> pixel (x,y)
|
| 71 |
+
pixel = np.zeros((landmarks.shape[0], 2), dtype=np.int32)
|
| 72 |
+
pixel[:, 0] = (landmarks[:, 0] * frame_w).astype(np.int32)
|
| 73 |
+
pixel[:, 1] = (landmarks[:, 1] * frame_h).astype(np.int32)
|
| 74 |
+
return pixel
|
| 75 |
+
|
| 76 |
+
def get_3d_landmarks(self, landmarks: np.ndarray, frame_w: int, frame_h: int) -> np.ndarray:
|
| 77 |
+
# norm -> pixel-scale x,y,z (z scaled by width)
|
| 78 |
+
pts = np.zeros_like(landmarks)
|
| 79 |
+
pts[:, 0] = landmarks[:, 0] * frame_w
|
| 80 |
+
pts[:, 1] = landmarks[:, 1] * frame_h
|
| 81 |
+
pts[:, 2] = landmarks[:, 2] * frame_w
|
| 82 |
+
return pts
|
| 83 |
+
|
| 84 |
+
def close(self):
|
| 85 |
+
self._landmarker.close()
|
| 86 |
+
|
| 87 |
+
def __enter__(self):
|
| 88 |
+
return self
|
| 89 |
+
|
| 90 |
+
def __exit__(self, *args):
|
| 91 |
+
self.close()
|
requirements.txt
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
-
# Face mesh + head pose + eye behaviour (Stage 2); eye CNN needs torch
|
| 2 |
mediapipe>=0.10.14
|
| 3 |
opencv-python>=4.8.0
|
| 4 |
numpy>=1.24.0
|
| 5 |
torch>=2.0.0
|
| 6 |
torchvision>=0.15.0
|
| 7 |
-
|
|
|
|
|
|
|
|
|
| 1 |
mediapipe>=0.10.14
|
| 2 |
opencv-python>=4.8.0
|
| 3 |
numpy>=1.24.0
|
| 4 |
torch>=2.0.0
|
| 5 |
torchvision>=0.15.0
|
| 6 |
+
scikit-learn>=1.2.0
|
| 7 |
+
joblib>=1.2.0
|
ui/README.md
CHANGED
|
@@ -1,22 +1,20 @@
|
|
| 1 |
# ui
|
| 2 |
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
## Stage 2 (face mesh + head pose + eye)
|
| 6 |
-
|
| 7 |
-
- **pipeline.py** — face mesh → S_face (head pose) + S_eye (geometry + optional YOLO/MobileNet) + MAR/yawn → focus.
|
| 8 |
-
- **live_demo.py** — webcam + mesh, FOCUSED/NOT FOCUSED, MAR, YAWN, optional eye model.
|
| 9 |
|
| 10 |
From repo root:
|
|
|
|
| 11 |
```bash
|
| 12 |
-
pip install -r requirements.txt
|
| 13 |
python ui/live_demo.py
|
| 14 |
```
|
| 15 |
-
|
|
|
|
|
|
|
| 16 |
```bash
|
| 17 |
-
|
| 18 |
-
python ui/live_demo.py --eye-model path/to/yolo.pt --eye-backend yolo
|
| 19 |
```
|
| 20 |
-
With MobileNetV2 (96×96 crops): `--eye-model path/to/best_model.pt --eye-backend mobilenet`.
|
| 21 |
|
| 22 |
-
|
|
|
|
|
|
|
|
|
| 1 |
# ui
|
| 2 |
|
| 3 |
+
- **pipeline.py** — `FaceMeshPipeline` (head + eye geo ± YOLO → focus) and `MLPPipeline` (loads latest MLP from `MLP/models/`, 10 features → focus)
|
| 4 |
+
- **live_demo.py** — webcam window, mesh overlay, FOCUSED / NOT FOCUSED
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
From repo root:
|
| 7 |
+
|
| 8 |
```bash
|
|
|
|
| 9 |
python ui/live_demo.py
|
| 10 |
```
|
| 11 |
+
|
| 12 |
+
MLP only (no head/eye fusion, just your trained MLP):
|
| 13 |
+
|
| 14 |
```bash
|
| 15 |
+
python ui/live_demo.py --mlp
|
|
|
|
| 16 |
```
|
|
|
|
| 17 |
|
| 18 |
+
With YOLO eye model: `python ui/live_demo.py --eye-model path/to/yolo.pt`
|
| 19 |
+
|
| 20 |
+
`q` quit, `m` cycle mesh (full / contours / off).
|
ui/live_demo.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
| 1 |
-
# Stage 1 demo — webcam + face mesh overlay
|
| 2 |
-
|
| 3 |
import argparse
|
| 4 |
import os
|
| 5 |
import sys
|
|
@@ -13,10 +11,9 @@ _PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
| 13 |
if _PROJECT_ROOT not in sys.path:
|
| 14 |
sys.path.insert(0, _PROJECT_ROOT)
|
| 15 |
|
| 16 |
-
from ui.pipeline import FaceMeshPipeline
|
| 17 |
-
from models.face_mesh.face_mesh import FaceMeshDetector
|
| 18 |
|
| 19 |
-
# Drawing
|
| 20 |
FONT = cv2.FONT_HERSHEY_SIMPLEX
|
| 21 |
CYAN = (255, 255, 0)
|
| 22 |
GREEN = (0, 255, 0)
|
|
@@ -119,35 +116,43 @@ def draw_eyes_and_irises(frame, landmarks, w, h):
|
|
| 119 |
|
| 120 |
|
| 121 |
def main():
|
| 122 |
-
parser = argparse.ArgumentParser(
|
| 123 |
-
parser.add_argument("--camera", type=int, default=0
|
| 124 |
-
parser.add_argument("--
|
| 125 |
-
parser.add_argument("--
|
| 126 |
-
parser.add_argument("--
|
| 127 |
-
parser.add_argument("--
|
| 128 |
-
parser.add_argument("--
|
| 129 |
-
parser.add_argument("--
|
| 130 |
-
parser.add_argument("--eye-
|
|
|
|
|
|
|
| 131 |
args = parser.parse_args()
|
| 132 |
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
|
| 145 |
cap = cv2.VideoCapture(args.camera)
|
| 146 |
if not cap.isOpened():
|
| 147 |
print("[DEMO] ERROR: Cannot open camera")
|
| 148 |
return
|
| 149 |
|
| 150 |
-
print("[DEMO] q = quit, m = cycle mesh
|
| 151 |
prev_time = time.time()
|
| 152 |
fps = 0.0
|
| 153 |
mesh_mode = MESH_FULL
|
|
@@ -172,27 +177,32 @@ def main():
|
|
| 172 |
elif mesh_mode == MESH_CONTOURS:
|
| 173 |
draw_contours(frame, lm, w, h)
|
| 174 |
draw_eyes_and_irises(frame, lm, w, h)
|
| 175 |
-
|
|
|
|
| 176 |
if result.get("left_bbox") and result.get("right_bbox"):
|
| 177 |
lx1, ly1, lx2, ly2 = result["left_bbox"]
|
| 178 |
rx1, ry1, rx2, ry2 = result["right_bbox"]
|
| 179 |
cv2.rectangle(frame, (lx1, ly1), (lx2, ly2), YELLOW, 1)
|
| 180 |
cv2.rectangle(frame, (rx1, ry1), (rx2, ry2), YELLOW, 1)
|
| 181 |
|
| 182 |
-
# Status bar: FOCUSED / NOT FOCUSED; YAWN when mouth open (sleepy)
|
| 183 |
status = "FOCUSED" if result["is_focused"] else "NOT FOCUSED"
|
| 184 |
status_color = GREEN if result["is_focused"] else RED
|
| 185 |
cv2.rectangle(frame, (0, 0), (w, 55), (0, 0, 0), -1)
|
| 186 |
cv2.putText(frame, status, (10, 28), FONT, 0.8, status_color, 2, cv2.LINE_AA)
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
cv2.putText(frame, "
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
|
| 197 |
cv2.imshow("FocusGuard", frame)
|
| 198 |
|
|
|
|
|
|
|
|
|
|
| 1 |
import argparse
|
| 2 |
import os
|
| 3 |
import sys
|
|
|
|
| 11 |
if _PROJECT_ROOT not in sys.path:
|
| 12 |
sys.path.insert(0, _PROJECT_ROOT)
|
| 13 |
|
| 14 |
+
from ui.pipeline import FaceMeshPipeline, MLPPipeline
|
| 15 |
+
from models.pretrained.face_mesh.face_mesh import FaceMeshDetector
|
| 16 |
|
|
|
|
| 17 |
FONT = cv2.FONT_HERSHEY_SIMPLEX
|
| 18 |
CYAN = (255, 255, 0)
|
| 19 |
GREEN = (0, 255, 0)
|
|
|
|
| 116 |
|
| 117 |
|
| 118 |
def main():
|
| 119 |
+
parser = argparse.ArgumentParser()
|
| 120 |
+
parser.add_argument("--camera", type=int, default=0)
|
| 121 |
+
parser.add_argument("--mlp", action="store_true", help="Use MLP model only (load latest from MLP/models/)")
|
| 122 |
+
parser.add_argument("--mlp-dir", type=str, default=None, help="MLP models dir (default: shared/MLP/models)")
|
| 123 |
+
parser.add_argument("--max-angle", type=float, default=22.0)
|
| 124 |
+
parser.add_argument("--alpha", type=float, default=0.4)
|
| 125 |
+
parser.add_argument("--beta", type=float, default=0.6)
|
| 126 |
+
parser.add_argument("--threshold", type=float, default=0.55)
|
| 127 |
+
parser.add_argument("--eye-model", type=str, default=None)
|
| 128 |
+
parser.add_argument("--eye-backend", type=str, default="yolo", choices=["yolo", "geometric"])
|
| 129 |
+
parser.add_argument("--eye-blend", type=float, default=0.5)
|
| 130 |
args = parser.parse_args()
|
| 131 |
|
| 132 |
+
use_mlp_only = args.mlp
|
| 133 |
+
|
| 134 |
+
if use_mlp_only:
|
| 135 |
+
print("[DEMO] MLP only — loading latest from MLP/models/")
|
| 136 |
+
pipeline = MLPPipeline(model_dir=args.mlp_dir)
|
| 137 |
+
else:
|
| 138 |
+
eye_mode = " + model" if args.eye_model else " only"
|
| 139 |
+
print("[DEMO] Face mesh + head pose + eye (geometry" + eye_mode + ")")
|
| 140 |
+
pipeline = FaceMeshPipeline(
|
| 141 |
+
max_angle=args.max_angle,
|
| 142 |
+
alpha=args.alpha,
|
| 143 |
+
beta=args.beta,
|
| 144 |
+
threshold=args.threshold,
|
| 145 |
+
eye_model_path=args.eye_model,
|
| 146 |
+
eye_backend=args.eye_backend,
|
| 147 |
+
eye_blend=args.eye_blend,
|
| 148 |
+
)
|
| 149 |
|
| 150 |
cap = cv2.VideoCapture(args.camera)
|
| 151 |
if not cap.isOpened():
|
| 152 |
print("[DEMO] ERROR: Cannot open camera")
|
| 153 |
return
|
| 154 |
|
| 155 |
+
print("[DEMO] q = quit, m = cycle mesh (full/contours/off)" if not use_mlp_only else "[DEMO] q = quit, m = mesh")
|
| 156 |
prev_time = time.time()
|
| 157 |
fps = 0.0
|
| 158 |
mesh_mode = MESH_FULL
|
|
|
|
| 177 |
elif mesh_mode == MESH_CONTOURS:
|
| 178 |
draw_contours(frame, lm, w, h)
|
| 179 |
draw_eyes_and_irises(frame, lm, w, h)
|
| 180 |
+
if not use_mlp_only:
|
| 181 |
+
pipeline.head_pose.draw_axes(frame, lm)
|
| 182 |
if result.get("left_bbox") and result.get("right_bbox"):
|
| 183 |
lx1, ly1, lx2, ly2 = result["left_bbox"]
|
| 184 |
rx1, ry1, rx2, ry2 = result["right_bbox"]
|
| 185 |
cv2.rectangle(frame, (lx1, ly1), (lx2, ly2), YELLOW, 1)
|
| 186 |
cv2.rectangle(frame, (rx1, ry1), (rx2, ry2), YELLOW, 1)
|
| 187 |
|
|
|
|
| 188 |
status = "FOCUSED" if result["is_focused"] else "NOT FOCUSED"
|
| 189 |
status_color = GREEN if result["is_focused"] else RED
|
| 190 |
cv2.rectangle(frame, (0, 0), (w, 55), (0, 0, 0), -1)
|
| 191 |
cv2.putText(frame, status, (10, 28), FONT, 0.8, status_color, 2, cv2.LINE_AA)
|
| 192 |
+
if use_mlp_only:
|
| 193 |
+
cv2.putText(frame, "MLP", (10, 48), FONT, 0.45, WHITE, 1, cv2.LINE_AA)
|
| 194 |
+
cv2.putText(frame, f"FPS: {fps:.0f}", (w - 80, 28), FONT, 0.45, WHITE, 1, cv2.LINE_AA)
|
| 195 |
+
cv2.putText(frame, "q:quit m:mesh", (w - 120, 48), FONT, 0.4, (180, 180, 180), 1, cv2.LINE_AA)
|
| 196 |
+
else:
|
| 197 |
+
mar_str = f" MAR:{result['mar']:.2f}" if result.get("mar") is not None else ""
|
| 198 |
+
cv2.putText(frame, f"S_face:{result['s_face']:.2f} S_eye:{result['s_eye']:.2f}{mar_str} score:{result['raw_score']:.2f}", (10, 48), FONT, 0.45, WHITE, 1, cv2.LINE_AA)
|
| 199 |
+
if result.get("is_yawning"):
|
| 200 |
+
cv2.putText(frame, "YAWN", (10, 75), FONT, 0.7, ORANGE, 2, cv2.LINE_AA)
|
| 201 |
+
if result["yaw"] is not None:
|
| 202 |
+
cv2.putText(frame, f"yaw:{result['yaw']:+.0f} pitch:{result['pitch']:+.0f} roll:{result['roll']:+.0f}", (w - 280, 48), FONT, 0.4, (180, 180, 180), 1, cv2.LINE_AA)
|
| 203 |
+
eye_label = f"eye:{pipeline.eye_classifier.name}" if pipeline.has_eye_model else "eye:geo"
|
| 204 |
+
cv2.putText(frame, f"{_MESH_NAMES[mesh_mode]} {eye_label} FPS: {fps:.0f}", (w - 320, 28), FONT, 0.45, WHITE, 1, cv2.LINE_AA)
|
| 205 |
+
cv2.putText(frame, "q:quit m:mesh", (w - 140, 48), FONT, 0.4, (180, 180, 180), 1, cv2.LINE_AA)
|
| 206 |
|
| 207 |
cv2.imshow("FocusGuard", frame)
|
| 208 |
|
ui/pipeline.py
CHANGED
|
@@ -1,24 +1,23 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
import os
|
| 4 |
import sys
|
| 5 |
|
| 6 |
import numpy as np
|
|
|
|
| 7 |
|
| 8 |
_PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
| 9 |
if _PROJECT_ROOT not in sys.path:
|
| 10 |
sys.path.insert(0, _PROJECT_ROOT)
|
| 11 |
|
| 12 |
-
from models.face_mesh.face_mesh import FaceMeshDetector
|
| 13 |
-
from models.face_orientation.head_pose import HeadPoseEstimator
|
| 14 |
-
from models.eye_behaviour.eye_scorer import EyeBehaviourScorer, compute_mar, MAR_YAWN_THRESHOLD
|
| 15 |
-
from models.
|
| 16 |
-
from models.
|
|
|
|
| 17 |
|
| 18 |
|
| 19 |
class FaceMeshPipeline:
|
| 20 |
-
# frame -> face mesh -> S_face + S_eye (geo + optional YOLO/MobileNet) -> focused / not focused
|
| 21 |
-
|
| 22 |
def __init__(
|
| 23 |
self,
|
| 24 |
max_angle: float = 22.0,
|
|
@@ -26,7 +25,7 @@ class FaceMeshPipeline:
|
|
| 26 |
beta: float = 0.6,
|
| 27 |
threshold: float = 0.55,
|
| 28 |
eye_model_path: str | None = None,
|
| 29 |
-
eye_backend: str = "
|
| 30 |
eye_blend: float = 0.5,
|
| 31 |
):
|
| 32 |
self.detector = FaceMeshDetector()
|
|
@@ -35,7 +34,7 @@ class FaceMeshPipeline:
|
|
| 35 |
self.alpha = alpha
|
| 36 |
self.beta = beta
|
| 37 |
self.threshold = threshold
|
| 38 |
-
self.eye_blend = eye_blend
|
| 39 |
|
| 40 |
self.eye_classifier = load_eye_classifier(
|
| 41 |
path=eye_model_path if eye_model_path and os.path.exists(eye_model_path) else None,
|
|
@@ -68,13 +67,11 @@ class FaceMeshPipeline:
|
|
| 68 |
if landmarks is None:
|
| 69 |
return out
|
| 70 |
|
| 71 |
-
# Head pose -> S_face, yaw/pitch/roll
|
| 72 |
angles = self.head_pose.estimate(landmarks, w, h)
|
| 73 |
if angles is not None:
|
| 74 |
out["yaw"], out["pitch"], out["roll"] = angles
|
| 75 |
out["s_face"] = self.head_pose.score(landmarks, w, h)
|
| 76 |
|
| 77 |
-
# Eye: geometry (EAR + gaze) always; optional model (YOLO/MobileNet) on cropped eyes
|
| 78 |
s_eye_geo = self.eye_scorer.score(landmarks)
|
| 79 |
if self._has_eye_model:
|
| 80 |
left_crop, right_crop, left_bbox, right_bbox = extract_eye_crops(bgr_frame, landmarks)
|
|
@@ -85,11 +82,9 @@ class FaceMeshPipeline:
|
|
| 85 |
else:
|
| 86 |
out["s_eye"] = s_eye_geo
|
| 87 |
|
| 88 |
-
# Mouth open (MAR) -> yawn: force NOT FOCUSED when mouth open
|
| 89 |
out["mar"] = compute_mar(landmarks)
|
| 90 |
out["is_yawning"] = out["mar"] > MAR_YAWN_THRESHOLD
|
| 91 |
|
| 92 |
-
# Fusion; yawn overrides
|
| 93 |
out["raw_score"] = self.alpha * out["s_face"] + self.beta * out["s_eye"]
|
| 94 |
out["is_focused"] = out["raw_score"] >= self.threshold and not out["is_yawning"]
|
| 95 |
|
|
@@ -107,3 +102,66 @@ class FaceMeshPipeline:
|
|
| 107 |
|
| 108 |
def __exit__(self, *args):
|
| 109 |
self.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import glob
|
|
|
|
| 2 |
import os
|
| 3 |
import sys
|
| 4 |
|
| 5 |
import numpy as np
|
| 6 |
+
import joblib
|
| 7 |
|
| 8 |
_PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
| 9 |
if _PROJECT_ROOT not in sys.path:
|
| 10 |
sys.path.insert(0, _PROJECT_ROOT)
|
| 11 |
|
| 12 |
+
from models.pretrained.face_mesh.face_mesh import FaceMeshDetector
|
| 13 |
+
from models.geometric.face_orientation.head_pose import HeadPoseEstimator
|
| 14 |
+
from models.geometric.eye_behaviour.eye_scorer import EyeBehaviourScorer, compute_mar, MAR_YAWN_THRESHOLD
|
| 15 |
+
from models.cnn.eye_attention.crop import extract_eye_crops
|
| 16 |
+
from models.cnn.eye_attention.classifier import load_eye_classifier, GeometricOnlyClassifier
|
| 17 |
+
from models.attention.collect_features import FEATURE_NAMES, TemporalTracker, extract_features
|
| 18 |
|
| 19 |
|
| 20 |
class FaceMeshPipeline:
|
|
|
|
|
|
|
| 21 |
def __init__(
|
| 22 |
self,
|
| 23 |
max_angle: float = 22.0,
|
|
|
|
| 25 |
beta: float = 0.6,
|
| 26 |
threshold: float = 0.55,
|
| 27 |
eye_model_path: str | None = None,
|
| 28 |
+
eye_backend: str = "yolo",
|
| 29 |
eye_blend: float = 0.5,
|
| 30 |
):
|
| 31 |
self.detector = FaceMeshDetector()
|
|
|
|
| 34 |
self.alpha = alpha
|
| 35 |
self.beta = beta
|
| 36 |
self.threshold = threshold
|
| 37 |
+
self.eye_blend = eye_blend
|
| 38 |
|
| 39 |
self.eye_classifier = load_eye_classifier(
|
| 40 |
path=eye_model_path if eye_model_path and os.path.exists(eye_model_path) else None,
|
|
|
|
| 67 |
if landmarks is None:
|
| 68 |
return out
|
| 69 |
|
|
|
|
| 70 |
angles = self.head_pose.estimate(landmarks, w, h)
|
| 71 |
if angles is not None:
|
| 72 |
out["yaw"], out["pitch"], out["roll"] = angles
|
| 73 |
out["s_face"] = self.head_pose.score(landmarks, w, h)
|
| 74 |
|
|
|
|
| 75 |
s_eye_geo = self.eye_scorer.score(landmarks)
|
| 76 |
if self._has_eye_model:
|
| 77 |
left_crop, right_crop, left_bbox, right_bbox = extract_eye_crops(bgr_frame, landmarks)
|
|
|
|
| 82 |
else:
|
| 83 |
out["s_eye"] = s_eye_geo
|
| 84 |
|
|
|
|
| 85 |
out["mar"] = compute_mar(landmarks)
|
| 86 |
out["is_yawning"] = out["mar"] > MAR_YAWN_THRESHOLD
|
| 87 |
|
|
|
|
| 88 |
out["raw_score"] = self.alpha * out["s_face"] + self.beta * out["s_eye"]
|
| 89 |
out["is_focused"] = out["raw_score"] >= self.threshold and not out["is_yawning"]
|
| 90 |
|
|
|
|
| 102 |
|
| 103 |
def __exit__(self, *args):
|
| 104 |
self.close()
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def _latest_mlp_artifacts(model_dir):
|
| 108 |
+
mlp_files = sorted(glob.glob(os.path.join(model_dir, "mlp_*.joblib")))
|
| 109 |
+
if not mlp_files:
|
| 110 |
+
return None, None, None
|
| 111 |
+
base = os.path.basename(mlp_files[-1]).replace("mlp_", "").replace(".joblib", "")
|
| 112 |
+
scaler_path = os.path.join(model_dir, f"scaler_{base}.joblib")
|
| 113 |
+
meta_path = os.path.join(model_dir, f"meta_{base}.npz")
|
| 114 |
+
if not os.path.isfile(scaler_path) or not os.path.isfile(meta_path):
|
| 115 |
+
return None, None, None
|
| 116 |
+
return mlp_files[-1], scaler_path, meta_path
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
class MLPPipeline:
|
| 120 |
+
def __init__(self, model_dir=None):
|
| 121 |
+
if model_dir is None:
|
| 122 |
+
model_dir = os.path.join(_PROJECT_ROOT, "MLP", "models")
|
| 123 |
+
mlp_path, scaler_path, meta_path = _latest_mlp_artifacts(model_dir)
|
| 124 |
+
if mlp_path is None:
|
| 125 |
+
raise FileNotFoundError(f"No MLP artifacts in {model_dir}")
|
| 126 |
+
self._mlp = joblib.load(mlp_path)
|
| 127 |
+
self._scaler = joblib.load(scaler_path)
|
| 128 |
+
meta = np.load(meta_path, allow_pickle=True)
|
| 129 |
+
self._feature_names = list(meta["feature_names"])
|
| 130 |
+
self._detector = FaceMeshDetector()
|
| 131 |
+
self._head_pose = HeadPoseEstimator()
|
| 132 |
+
self._eye_scorer = EyeBehaviourScorer()
|
| 133 |
+
self._temporal = TemporalTracker()
|
| 134 |
+
self._indices = [FEATURE_NAMES.index(n) for n in self._feature_names]
|
| 135 |
+
print(f"[MLP] Loaded {mlp_path} | {len(self._feature_names)} features")
|
| 136 |
+
|
| 137 |
+
def process_frame(self, bgr_frame):
|
| 138 |
+
landmarks = self._detector.process(bgr_frame)
|
| 139 |
+
h, w = bgr_frame.shape[:2]
|
| 140 |
+
out = {
|
| 141 |
+
"landmarks": landmarks,
|
| 142 |
+
"is_focused": False,
|
| 143 |
+
"s_face": 0.0,
|
| 144 |
+
"s_eye": 0.0,
|
| 145 |
+
"raw_score": 0.0,
|
| 146 |
+
"mar": None,
|
| 147 |
+
"yaw": None,
|
| 148 |
+
"pitch": None,
|
| 149 |
+
"roll": None,
|
| 150 |
+
}
|
| 151 |
+
if landmarks is None:
|
| 152 |
+
return out
|
| 153 |
+
vec = extract_features(landmarks, w, h, self._head_pose, self._eye_scorer, self._temporal)
|
| 154 |
+
X = vec[self._indices].reshape(1, -1).astype(np.float64)
|
| 155 |
+
X_sc = self._scaler.transform(X)
|
| 156 |
+
pred = self._mlp.predict(X_sc)
|
| 157 |
+
out["is_focused"] = bool(pred[0] == 1)
|
| 158 |
+
return out
|
| 159 |
+
|
| 160 |
+
def close(self):
|
| 161 |
+
self._detector.close()
|
| 162 |
+
|
| 163 |
+
def __enter__(self):
|
| 164 |
+
return self
|
| 165 |
+
|
| 166 |
+
def __exit__(self, *args):
|
| 167 |
+
self.close()
|