Spaces:
Sleeping
Sleeping
k22056537 commited on
Commit ·
6114098
1
Parent(s): 633f159
chore: integration cleanup — remove eye CNN, add threshold justification, fix pipeline
Browse files- .gitignore +1 -0
- FOCUS_SCORE_EQUATIONS.md +147 -0
- checkpoints/hybrid_focus_config.json +5 -5
- data/CNN/eye_crops/val/open/.gitkeep +0 -1
- evaluation/README.md +34 -8
- evaluation/THRESHOLD_JUSTIFICATION.md +89 -0
- evaluation/justify_thresholds.py +463 -0
- evaluation/plots/ear_distribution.png +0 -0
- evaluation/plots/geo_weight_search.png +0 -0
- evaluation/plots/hybrid_weight_search.png +0 -0
- evaluation/plots/mar_distribution.png +0 -0
- evaluation/plots/roc_mlp.png +0 -0
- evaluation/plots/roc_xgb.png +0 -0
- models/README.md +0 -2
- models/cnn/CNN_MODEL/.claude/settings.local.json +0 -7
- models/cnn/CNN_MODEL/.gitattributes +0 -1
- models/cnn/CNN_MODEL/.gitignore +0 -4
- models/cnn/CNN_MODEL/README.md +0 -74
- models/cnn/CNN_MODEL/notebooks/eye_classifier_colab.ipynb +0 -0
- models/cnn/CNN_MODEL/scripts/focus_infer.py +0 -199
- models/cnn/CNN_MODEL/scripts/predict_image.py +0 -49
- models/cnn/CNN_MODEL/scripts/video_infer.py +0 -281
- models/cnn/CNN_MODEL/scripts/webcam_live.py +0 -184
- models/cnn/CNN_MODEL/weights/yolo11s-cls.pt +0 -3
- models/cnn/__init__.py +0 -0
- models/cnn/eye_attention/__init__.py +0 -1
- models/cnn/eye_attention/classifier.py +0 -169
- models/cnn/eye_attention/crop.py +0 -70
- models/cnn/eye_attention/train.py +0 -0
- models/cnn/notebooks/EyeCNN.ipynb +0 -107
- models/cnn/notebooks/EyeCNN_Train_Evaluate_new.ipynb +0 -0
- models/cnn/notebooks/EyeCNN_Training_Evaluate.ipynb +0 -0
- models/cnn/notebooks/README.md +0 -1
- models/eye_classifier.py +0 -69
- models/eye_crop.py +0 -77
- models/xgboost/checkpoints/face_orientation_best.json +0 -0
- public/assets/111.jpg +0 -0
- src/assets/react.svg +0 -1
- ui/live_demo.py +0 -14
- ui/pipeline.py +20 -87
- yolov8n.pt +0 -3
.gitignore
CHANGED
|
@@ -37,6 +37,7 @@ ignore/
|
|
| 37 |
|
| 38 |
# Project specific
|
| 39 |
focus_guard.db
|
|
|
|
| 40 |
static/
|
| 41 |
__pycache__/
|
| 42 |
docs/
|
|
|
|
| 37 |
|
| 38 |
# Project specific
|
| 39 |
focus_guard.db
|
| 40 |
+
test_focus_guard.db
|
| 41 |
static/
|
| 42 |
__pycache__/
|
| 43 |
docs/
|
FOCUS_SCORE_EQUATIONS.md
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# How the focused/unfocused score is computed
|
| 2 |
+
|
| 3 |
+
The system outputs a **focus score** in `[0, 1]` and a binary **focused/unfocused** label. The label is derived from the score and a threshold. The exact equation depends on which pipeline (model) you use.
|
| 4 |
+
|
| 5 |
+
---
|
| 6 |
+
|
| 7 |
+
## 1. Final output (all pipelines)
|
| 8 |
+
|
| 9 |
+
- **`raw_score`** (or **`focus_score`** in Hybrid): value in `[0, 1]` after optional smoothing.
|
| 10 |
+
- **`is_focused`**: binary label.
|
| 11 |
+
|
| 12 |
+
**Equation:**
|
| 13 |
+
|
| 14 |
+
```text
|
| 15 |
+
is_focused = (smoothed_score >= threshold)
|
| 16 |
+
```
|
| 17 |
+
|
| 18 |
+
- **Smoothed score:** the pipeline may apply an exponential moving average (EMA) to the raw score; that smoothed value is what you see as `raw_score` / `focus_score` in the API.
|
| 19 |
+
- **Threshold:** set in the UI (sensitivity) or in pipeline config; typical default **0.5** or **0.55**.
|
| 20 |
+
|
| 21 |
+
So: **focus score** is the continuous value; **focused vs unfocused** is **score ≥ threshold** vs **score < threshold**.
|
| 22 |
+
|
| 23 |
+
---
|
| 24 |
+
|
| 25 |
+
## 2. Geometric pipeline (rule-based, no ML)
|
| 26 |
+
|
| 27 |
+
**Raw score (before smoothing):**
|
| 28 |
+
|
| 29 |
+
```text
|
| 30 |
+
raw = α · s_face + β · s_eye
|
| 31 |
+
```
|
| 32 |
+
|
| 33 |
+
- Default: **α = 0.4**, **β = 0.6** (face weight 40%, eye weight 60%).
|
| 34 |
+
- If **yawning** (MAR > 0.55): **raw = 0**.
|
| 35 |
+
|
| 36 |
+
**Face score `s_face`** (head pose, from `HeadPoseEstimator`):
|
| 37 |
+
|
| 38 |
+
- **deviation** = √( yaw² + pitch² + (0.5·roll)² )
|
| 39 |
+
- **t** = min( deviation / max_angle , 1 ), with **max_angle = 22°** (default).
|
| 40 |
+
- **s_face** = 0.5 · (1 + cos(π · t))
|
| 41 |
+
→ 1 when head is straight, 0 when deviation ≥ max_angle.
|
| 42 |
+
|
| 43 |
+
**Eye score `s_eye`** (from `EyeBehaviourScorer`):
|
| 44 |
+
|
| 45 |
+
- **EAR** = Eye Aspect Ratio (from landmarks); use **min(left_ear, right_ear)**.
|
| 46 |
+
- **ear_s** = linear map of EAR to [0,1] between `ear_closed=0.16` and `ear_open=0.30`.
|
| 47 |
+
- **Gaze:** horizontal/vertical gaze ratios from iris position; **offset** = distance from center (0.5, 0.5).
|
| 48 |
+
- **gaze_s** = 0.5 · (1 + cos(π · t)), with **t** = min( offset / gaze_max_offset , 1 ), **gaze_max_offset = 0.28**.
|
| 49 |
+
- **s_eye** = ear_s · gaze_s (or just ear_s if ear_s < 0.3).
|
| 50 |
+
|
| 51 |
+
Then:
|
| 52 |
+
|
| 53 |
+
```text
|
| 54 |
+
smoothed_score = EMA(raw)
|
| 55 |
+
is_focused = (smoothed_score >= threshold)
|
| 56 |
+
```
|
| 57 |
+
|
| 58 |
+
---
|
| 59 |
+
|
| 60 |
+
## 3. MLP pipeline
|
| 61 |
+
|
| 62 |
+
- Features are extracted (same 17-d feature vector as in training), clipped, then optionally extended (magnitudes, velocities, variances) and scaled with the **training-time scaler**.
|
| 63 |
+
- The MLP outputs either:
|
| 64 |
+
- **Probability of class 1 (focused):** `mlp_prob = predict_proba(X_sc)[0, 1]`, or
|
| 65 |
+
- If no `predict_proba`: **mlp_prob = 1 if predict(X_sc) == 1 else 0**.
|
| 66 |
+
|
| 67 |
+
**Equations:**
|
| 68 |
+
|
| 69 |
+
```text
|
| 70 |
+
raw_score = mlp_prob (clipped to [0, 1])
|
| 71 |
+
smoothed_score = EMA(raw_score)
|
| 72 |
+
is_focused = (smoothed_score >= threshold)
|
| 73 |
+
```
|
| 74 |
+
|
| 75 |
+
So the **focus score** is the **MLP’s estimated probability of being focused** (after optional smoothing).
|
| 76 |
+
|
| 77 |
+
---
|
| 78 |
+
|
| 79 |
+
## 4. XGBoost pipeline
|
| 80 |
+
|
| 81 |
+
- Same feature extraction and clipping; uses the **same feature subset** as in XGBoost training (no runtime magnitude/velocity extension).
|
| 82 |
+
- **prob** = `predict_proba(X)[0]` → **[P(unfocused), P(focused)]**.
|
| 83 |
+
|
| 84 |
+
**Equations:**
|
| 85 |
+
|
| 86 |
+
```text
|
| 87 |
+
raw_score = prob[1] (probability of focused class)
|
| 88 |
+
smoothed_score = EMA(raw_score)
|
| 89 |
+
is_focused = (smoothed_score >= threshold)
|
| 90 |
+
```
|
| 91 |
+
|
| 92 |
+
So the **focus score** is the **XGBoost probability of the focused class**.
|
| 93 |
+
|
| 94 |
+
---
|
| 95 |
+
|
| 96 |
+
## 5. Hybrid pipeline (MLP + geometric)
|
| 97 |
+
|
| 98 |
+
Combines the MLP’s probability with a geometric score, then applies a single threshold.
|
| 99 |
+
|
| 100 |
+
**Geometric part:**
|
| 101 |
+
|
| 102 |
+
```text
|
| 103 |
+
geo_score = geo_face_weight · s_face + geo_eye_weight · s_eye
|
| 104 |
+
```
|
| 105 |
+
|
| 106 |
+
- Default: **geo_face_weight = 0.4**, **geo_eye_weight = 0.6**.
|
| 107 |
+
- **s_face** and **s_eye** as in the Geometric pipeline (with optional yawn veto: if yawning, **geo_score = 0**).
|
| 108 |
+
- **geo_score** is clipped to [0, 1].
|
| 109 |
+
|
| 110 |
+
**MLP part:** same as MLP pipeline → **mlp_prob** in [0, 1].
|
| 111 |
+
|
| 112 |
+
**Combined focus score (default weights):**
|
| 113 |
+
|
| 114 |
+
```text
|
| 115 |
+
focus_score = w_mlp · mlp_prob + w_geo · geo_score
|
| 116 |
+
```
|
| 117 |
+
|
| 118 |
+
- Default: **w_mlp = 0.7**, **w_geo = 0.3** (after normalising so weights sum to 1).
|
| 119 |
+
- **focus_score** is clipped to [0, 1], then smoothed.
|
| 120 |
+
|
| 121 |
+
**Equations:**
|
| 122 |
+
|
| 123 |
+
```text
|
| 124 |
+
focus_score = clip( w_mlp · mlp_prob + w_geo · geo_score , 0 , 1 )
|
| 125 |
+
smoothed_score = EMA(focus_score)
|
| 126 |
+
is_focused = (smoothed_score >= threshold)
|
| 127 |
+
```
|
| 128 |
+
|
| 129 |
+
Default **threshold** in hybrid config is **0.55**.
|
| 130 |
+
|
| 131 |
+
---
|
| 132 |
+
|
| 133 |
+
## 6. Summary table
|
| 134 |
+
|
| 135 |
+
| Pipeline | Raw score formula | Focused condition |
|
| 136 |
+
|-----------|--------------------------------------|-----------------------------|
|
| 137 |
+
| Geometric | α·s_face + β·s_eye (0 if yawn) | smoothed ≥ threshold |
|
| 138 |
+
| MLP | MLP P(focused) | smoothed ≥ threshold |
|
| 139 |
+
| XGBoost | XGB P(focused) | smoothed ≥ threshold |
|
| 140 |
+
| Hybrid | w_mlp·mlp_prob + w_geo·geo_score | smoothed ≥ threshold |
|
| 141 |
+
|
| 142 |
+
**s_face** = head-pose score (cosine of normalised deviation).
|
| 143 |
+
**s_eye** = eye score (EAR × gaze score, or blend with CNN).
|
| 144 |
+
**geo_score** = geo_face_weight·s_face + geo_eye_weight·s_eye (with optional yawn veto).
|
| 145 |
+
**EMA** = exponential moving average (e.g. α=0.3) for temporal smoothing.
|
| 146 |
+
|
| 147 |
+
So: **focus score** is always a number in [0, 1]; **focused vs unfocused** is **score ≥ threshold** in all pipelines.
|
checkpoints/hybrid_focus_config.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"w_mlp": 0.
|
| 3 |
-
"w_geo": 0.
|
| 4 |
"threshold": 0.35,
|
| 5 |
"use_yawn_veto": true,
|
| 6 |
-
"geo_face_weight": 0.
|
| 7 |
-
"geo_eye_weight": 0.
|
| 8 |
"mar_yawn_threshold": 0.55,
|
| 9 |
"metric": "f1"
|
| 10 |
-
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"w_mlp": 0.3,
|
| 3 |
+
"w_geo": 0.7,
|
| 4 |
"threshold": 0.35,
|
| 5 |
"use_yawn_veto": true,
|
| 6 |
+
"geo_face_weight": 0.7,
|
| 7 |
+
"geo_eye_weight": 0.3,
|
| 8 |
"mar_yawn_threshold": 0.55,
|
| 9 |
"metric": "f1"
|
| 10 |
+
}
|
data/CNN/eye_crops/val/open/.gitkeep
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
|
|
|
|
|
|
evaluation/README.md
CHANGED
|
@@ -1,14 +1,22 @@
|
|
| 1 |
# evaluation/
|
| 2 |
|
| 3 |
-
Training logs and performance metrics.
|
| 4 |
|
| 5 |
## 1. Contents
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
```
|
| 8 |
logs/
|
| 9 |
-
├── face_orientation_training_log.json
|
| 10 |
-
├── mlp_face_orientation_training_log.json
|
| 11 |
-
└── xgboost_face_orientation_training_log.json
|
| 12 |
```
|
| 13 |
|
| 14 |
## 2. Log Format
|
|
@@ -39,8 +47,26 @@ Each JSON file records the full training history:
|
|
| 39 |
}
|
| 40 |
```
|
| 41 |
|
| 42 |
-
## 3.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
-
- `python -m models.mlp.train` →
|
| 45 |
-
- `python -m models.xgboost.train` →
|
| 46 |
-
-
|
|
|
|
|
|
| 1 |
# evaluation/
|
| 2 |
|
| 3 |
+
Training logs, threshold analysis, and performance metrics.
|
| 4 |
|
| 5 |
## 1. Contents
|
| 6 |
|
| 7 |
+
```
|
| 8 |
+
logs/ # training run logs (JSON)
|
| 9 |
+
plots/ # threshold justification figures (ROC, weight search, EAR/MAR)
|
| 10 |
+
justify_thresholds.py # LOPO analysis script
|
| 11 |
+
THRESHOLD_JUSTIFICATION.md # report (auto-generated by script)
|
| 12 |
+
```
|
| 13 |
+
|
| 14 |
+
**Logs (when present):**
|
| 15 |
```
|
| 16 |
logs/
|
| 17 |
+
├── face_orientation_training_log.json
|
| 18 |
+
├── mlp_face_orientation_training_log.json
|
| 19 |
+
└── xgboost_face_orientation_training_log.json
|
| 20 |
```
|
| 21 |
|
| 22 |
## 2. Log Format
|
|
|
|
| 47 |
}
|
| 48 |
```
|
| 49 |
|
| 50 |
+
## 3. Threshold justification
|
| 51 |
+
|
| 52 |
+
Thresholds and weights used in the app (geometric, MLP, XGBoost, hybrid) are justified in **THRESHOLD_JUSTIFICATION.md**. The report is generated by:
|
| 53 |
+
|
| 54 |
+
```bash
|
| 55 |
+
python -m evaluation.justify_thresholds
|
| 56 |
+
```
|
| 57 |
+
|
| 58 |
+
From repo root, with venv active. The script runs LOPO over 9 participants (~145k samples), computes ROC + Youden's J for ML/XGB thresholds, grid-searches geometric and hybrid weights, and plots EAR/MAR distributions. It writes:
|
| 59 |
+
|
| 60 |
+
- `plots/roc_mlp.png`, `plots/roc_xgb.png`
|
| 61 |
+
- `plots/geo_weight_search.png`, `plots/hybrid_weight_search.png`
|
| 62 |
+
- `plots/ear_distribution.png`, `plots/mar_distribution.png`
|
| 63 |
+
- `THRESHOLD_JUSTIFICATION.md`
|
| 64 |
+
|
| 65 |
+
Takes ~10–15 minutes. Re-run after changing data or pipeline weights (e.g. geometric face/eye); hybrid optimal w_mlp depends on the geometric sub-score weights.
|
| 66 |
+
|
| 67 |
+
## 4. Generated by
|
| 68 |
|
| 69 |
+
- `python -m models.mlp.train` → MLP log in `logs/`
|
| 70 |
+
- `python -m models.xgboost.train` → XGBoost log in `logs/`
|
| 71 |
+
- `python -m evaluation.justify_thresholds` → plots + THRESHOLD_JUSTIFICATION.md
|
| 72 |
+
- Notebooks in `notebooks/` can also write logs here
|
evaluation/THRESHOLD_JUSTIFICATION.md
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Threshold Justification Report
|
| 2 |
+
|
| 3 |
+
Auto-generated by `evaluation/justify_thresholds.py` using LOPO cross-validation over 9 participants (~145k samples).
|
| 4 |
+
|
| 5 |
+
## 1. ML Model Decision Thresholds
|
| 6 |
+
|
| 7 |
+
Thresholds selected via **Youden's J statistic** (J = sensitivity + specificity - 1) on pooled LOPO held-out predictions.
|
| 8 |
+
|
| 9 |
+
| Model | LOPO AUC | Optimal Threshold (Youden's J) | F1 @ Optimal | F1 @ 0.50 |
|
| 10 |
+
|-------|----------|-------------------------------|--------------|-----------|
|
| 11 |
+
| MLP | 0.8624 | **0.228** | 0.8578 | 0.8149 |
|
| 12 |
+
| XGBoost | 0.8804 | **0.377** | 0.8585 | 0.8424 |
|
| 13 |
+
|
| 14 |
+

|
| 15 |
+
|
| 16 |
+

|
| 17 |
+
|
| 18 |
+
## 2. Geometric Pipeline Weights (s_face vs s_eye)
|
| 19 |
+
|
| 20 |
+
Grid search over face weight alpha in {0.2 ... 0.8}. Eye weight = 1 - alpha. Threshold per fold via Youden's J.
|
| 21 |
+
|
| 22 |
+
| Face Weight (alpha) | Mean LOPO F1 |
|
| 23 |
+
|--------------------:|-------------:|
|
| 24 |
+
| 0.2 | 0.7926 |
|
| 25 |
+
| 0.3 | 0.8002 |
|
| 26 |
+
| 0.4 | 0.7719 |
|
| 27 |
+
| 0.5 | 0.7868 |
|
| 28 |
+
| 0.6 | 0.8184 |
|
| 29 |
+
| 0.7 | 0.8195 **<-- selected** |
|
| 30 |
+
| 0.8 | 0.8126 |
|
| 31 |
+
|
| 32 |
+
**Best:** alpha = 0.7 (face 70%, eye 30%)
|
| 33 |
+
|
| 34 |
+

|
| 35 |
+
|
| 36 |
+
## 3. Hybrid Pipeline Weights (MLP vs Geometric)
|
| 37 |
+
|
| 38 |
+
Grid search over w_mlp in {0.3 ... 0.8}. w_geo = 1 - w_mlp. Geometric sub-score uses same weights as geometric pipeline (face=0.7, eye=0.3). If you change geometric weights, re-run this script — optimal w_mlp can shift.
|
| 39 |
+
|
| 40 |
+
| MLP Weight (w_mlp) | Mean LOPO F1 |
|
| 41 |
+
|-------------------:|-------------:|
|
| 42 |
+
| 0.3 | 0.8409 **<-- selected** |
|
| 43 |
+
| 0.4 | 0.8246 |
|
| 44 |
+
| 0.5 | 0.8164 |
|
| 45 |
+
| 0.6 | 0.8106 |
|
| 46 |
+
| 0.7 | 0.8039 |
|
| 47 |
+
| 0.8 | 0.8016 |
|
| 48 |
+
|
| 49 |
+
**Best:** w_mlp = 0.3 (MLP 30%, geometric 70%)
|
| 50 |
+
|
| 51 |
+

|
| 52 |
+
|
| 53 |
+
## 4. Eye and Mouth Aspect Ratio Thresholds
|
| 54 |
+
|
| 55 |
+
### EAR (Eye Aspect Ratio)
|
| 56 |
+
|
| 57 |
+
Reference: Soukupova & Cech, "Real-Time Eye Blink Detection Using Facial Landmarks" (2016) established EAR ~ 0.2 as a blink threshold.
|
| 58 |
+
|
| 59 |
+
Our thresholds define a linear interpolation zone around this established value:
|
| 60 |
+
|
| 61 |
+
| Constant | Value | Justification |
|
| 62 |
+
|----------|------:|---------------|
|
| 63 |
+
| `ear_closed` | 0.16 | Below this, eyes are fully shut. 16.3% of samples fall here. |
|
| 64 |
+
| `EAR_BLINK_THRESH` | 0.21 | Blink detection point; close to the 0.2 reference. 21.2% of samples below. |
|
| 65 |
+
| `ear_open` | 0.30 | Above this, eyes are fully open. 70.4% of samples here. |
|
| 66 |
+
|
| 67 |
+
Between 0.16 and 0.30 the `_ear_score` function linearly interpolates from 0 to 1, providing a smooth transition rather than a hard binary cutoff.
|
| 68 |
+
|
| 69 |
+

|
| 70 |
+
|
| 71 |
+
### MAR (Mouth Aspect Ratio)
|
| 72 |
+
|
| 73 |
+
| Constant | Value | Justification |
|
| 74 |
+
|----------|------:|---------------|
|
| 75 |
+
| `MAR_YAWN_THRESHOLD` | 0.55 | Only 1.7% of samples exceed this, confirming it captures genuine yawns without false positives. |
|
| 76 |
+
|
| 77 |
+

|
| 78 |
+
|
| 79 |
+
## 5. Other Constants
|
| 80 |
+
|
| 81 |
+
| Constant | Value | Rationale |
|
| 82 |
+
|----------|------:|-----------|
|
| 83 |
+
| `gaze_max_offset` | 0.28 | Max iris displacement (normalised) before gaze score drops to zero. Corresponds to ~56% of the eye width; beyond this the iris is at the extreme edge. |
|
| 84 |
+
| `max_angle` | 22.0 deg | Head deviation beyond which face score = 0. Based on typical monitor-viewing cone: at 60 cm distance and a 24" monitor, the viewing angle is ~20-25 degrees. |
|
| 85 |
+
| `roll_weight` | 0.5 | Roll is less indicative of inattention than yaw/pitch (tilting head doesn't mean looking away), so it's down-weighted by 50%. |
|
| 86 |
+
| `EMA alpha` | 0.3 | Smoothing factor for focus score. Gives ~3-4 frame effective window; balances responsiveness vs flicker. |
|
| 87 |
+
| `grace_frames` | 15 | ~0.5 s at 30 fps before penalising no-face. Allows brief occlusions (e.g. hand gesture) without dropping score. |
|
| 88 |
+
| `PERCLOS_WINDOW` | 60 frames | 2 s at 30 fps; standard PERCLOS measurement window (Dinges & Grace, 1998). |
|
| 89 |
+
| `BLINK_WINDOW_SEC` | 30 s | Blink rate measured over 30 s; typical spontaneous blink rate is 15-20/min (Bentivoglio et al., 1997). |
|
evaluation/justify_thresholds.py
ADDED
|
@@ -0,0 +1,463 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# LOPO threshold/weight analysis. Run: python -m evaluation.justify_thresholds
|
| 2 |
+
|
| 3 |
+
import glob
|
| 4 |
+
import os
|
| 5 |
+
import sys
|
| 6 |
+
|
| 7 |
+
import numpy as np
|
| 8 |
+
import matplotlib
|
| 9 |
+
matplotlib.use("Agg")
|
| 10 |
+
import matplotlib.pyplot as plt
|
| 11 |
+
from sklearn.neural_network import MLPClassifier
|
| 12 |
+
from sklearn.preprocessing import StandardScaler
|
| 13 |
+
from sklearn.metrics import roc_curve, roc_auc_score, f1_score
|
| 14 |
+
from xgboost import XGBClassifier
|
| 15 |
+
|
| 16 |
+
_PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
| 17 |
+
sys.path.insert(0, _PROJECT_ROOT)
|
| 18 |
+
|
| 19 |
+
from data_preparation.prepare_dataset import load_per_person, SELECTED_FEATURES
|
| 20 |
+
|
| 21 |
+
PLOTS_DIR = os.path.join(os.path.dirname(__file__), "plots")
|
| 22 |
+
REPORT_PATH = os.path.join(os.path.dirname(__file__), "THRESHOLD_JUSTIFICATION.md")
|
| 23 |
+
SEED = 42
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def _youdens_j(y_true, y_prob):
|
| 27 |
+
fpr, tpr, thresholds = roc_curve(y_true, y_prob)
|
| 28 |
+
j = tpr - fpr
|
| 29 |
+
idx = j.argmax()
|
| 30 |
+
auc = roc_auc_score(y_true, y_prob)
|
| 31 |
+
return float(thresholds[idx]), fpr, tpr, thresholds, float(auc)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def _f1_at_threshold(y_true, y_prob, threshold):
|
| 35 |
+
return f1_score(y_true, (y_prob >= threshold).astype(int), zero_division=0)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def _plot_roc(fpr, tpr, auc, opt_thresh, opt_idx, title, path):
|
| 39 |
+
fig, ax = plt.subplots(figsize=(6, 5))
|
| 40 |
+
ax.plot(fpr, tpr, lw=2, label=f"ROC (AUC = {auc:.4f})")
|
| 41 |
+
ax.plot(fpr[opt_idx], tpr[opt_idx], "ro", markersize=10,
|
| 42 |
+
label=f"Youden's J optimum (t = {opt_thresh:.3f})")
|
| 43 |
+
ax.plot([0, 1], [0, 1], "k--", lw=1, alpha=0.5)
|
| 44 |
+
ax.set_xlabel("False Positive Rate")
|
| 45 |
+
ax.set_ylabel("True Positive Rate")
|
| 46 |
+
ax.set_title(title)
|
| 47 |
+
ax.legend(loc="lower right")
|
| 48 |
+
fig.tight_layout()
|
| 49 |
+
fig.savefig(path, dpi=150)
|
| 50 |
+
plt.close(fig)
|
| 51 |
+
print(f" saved {path}")
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def run_lopo_models():
|
| 55 |
+
print("\n=== LOPO: MLP and XGBoost ===")
|
| 56 |
+
by_person, _, _ = load_per_person("face_orientation")
|
| 57 |
+
persons = sorted(by_person.keys())
|
| 58 |
+
|
| 59 |
+
results = {"mlp": {"y": [], "p": []}, "xgb": {"y": [], "p": []}}
|
| 60 |
+
|
| 61 |
+
for i, held_out in enumerate(persons):
|
| 62 |
+
X_test, y_test = by_person[held_out]
|
| 63 |
+
|
| 64 |
+
train_X = np.concatenate([by_person[p][0] for p in persons if p != held_out])
|
| 65 |
+
train_y = np.concatenate([by_person[p][1] for p in persons if p != held_out])
|
| 66 |
+
|
| 67 |
+
scaler = StandardScaler().fit(train_X)
|
| 68 |
+
X_tr_sc = scaler.transform(train_X)
|
| 69 |
+
X_te_sc = scaler.transform(X_test)
|
| 70 |
+
|
| 71 |
+
mlp = MLPClassifier(
|
| 72 |
+
hidden_layer_sizes=(64, 32), activation="relu",
|
| 73 |
+
max_iter=200, early_stopping=True, validation_fraction=0.15,
|
| 74 |
+
random_state=SEED, verbose=False,
|
| 75 |
+
)
|
| 76 |
+
mlp.fit(X_tr_sc, train_y)
|
| 77 |
+
mlp_prob = mlp.predict_proba(X_te_sc)[:, 1]
|
| 78 |
+
results["mlp"]["y"].append(y_test)
|
| 79 |
+
results["mlp"]["p"].append(mlp_prob)
|
| 80 |
+
|
| 81 |
+
xgb = XGBClassifier(
|
| 82 |
+
n_estimators=600, max_depth=8, learning_rate=0.05,
|
| 83 |
+
subsample=0.8, colsample_bytree=0.8,
|
| 84 |
+
reg_alpha=0.1, reg_lambda=1.0,
|
| 85 |
+
use_label_encoder=False, eval_metric="logloss",
|
| 86 |
+
random_state=SEED, verbosity=0,
|
| 87 |
+
)
|
| 88 |
+
xgb.fit(X_tr_sc, train_y)
|
| 89 |
+
xgb_prob = xgb.predict_proba(X_te_sc)[:, 1]
|
| 90 |
+
results["xgb"]["y"].append(y_test)
|
| 91 |
+
results["xgb"]["p"].append(xgb_prob)
|
| 92 |
+
|
| 93 |
+
print(f" fold {i+1}/{len(persons)}: held out {held_out} "
|
| 94 |
+
f"({X_test.shape[0]} samples)")
|
| 95 |
+
|
| 96 |
+
for key in results:
|
| 97 |
+
results[key]["y"] = np.concatenate(results[key]["y"])
|
| 98 |
+
results[key]["p"] = np.concatenate(results[key]["p"])
|
| 99 |
+
|
| 100 |
+
return results
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def analyse_model_thresholds(results):
|
| 104 |
+
print("\n=== Model threshold analysis ===")
|
| 105 |
+
model_stats = {}
|
| 106 |
+
|
| 107 |
+
for name, label in [("mlp", "MLP"), ("xgb", "XGBoost")]:
|
| 108 |
+
y, p = results[name]["y"], results[name]["p"]
|
| 109 |
+
opt_t, fpr, tpr, thresholds, auc = _youdens_j(y, p)
|
| 110 |
+
j = tpr - fpr
|
| 111 |
+
opt_idx = j.argmax()
|
| 112 |
+
f1_opt = _f1_at_threshold(y, p, opt_t)
|
| 113 |
+
f1_50 = _f1_at_threshold(y, p, 0.50)
|
| 114 |
+
|
| 115 |
+
path = os.path.join(PLOTS_DIR, f"roc_{name}.png")
|
| 116 |
+
_plot_roc(fpr, tpr, auc, opt_t, opt_idx,
|
| 117 |
+
f"LOPO ROC — {label} (9 folds, 144k samples)", path)
|
| 118 |
+
|
| 119 |
+
model_stats[name] = {
|
| 120 |
+
"label": label, "auc": auc,
|
| 121 |
+
"opt_threshold": opt_t, "f1_opt": f1_opt, "f1_50": f1_50,
|
| 122 |
+
}
|
| 123 |
+
print(f" {label}: AUC={auc:.4f}, optimal threshold={opt_t:.3f} "
|
| 124 |
+
f"(F1={f1_opt:.4f}), F1@0.50={f1_50:.4f}")
|
| 125 |
+
|
| 126 |
+
return model_stats
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
def run_geo_weight_search():
|
| 130 |
+
print("\n=== Geometric weight grid search ===")
|
| 131 |
+
|
| 132 |
+
by_person, _, _ = load_per_person("face_orientation")
|
| 133 |
+
persons = sorted(by_person.keys())
|
| 134 |
+
features = SELECTED_FEATURES["face_orientation"]
|
| 135 |
+
sf_idx = features.index("s_face")
|
| 136 |
+
se_idx = features.index("s_eye")
|
| 137 |
+
|
| 138 |
+
alphas = np.arange(0.2, 0.85, 0.1).round(1)
|
| 139 |
+
alpha_f1 = {a: [] for a in alphas}
|
| 140 |
+
|
| 141 |
+
for held_out in persons:
|
| 142 |
+
X_test, y_test = by_person[held_out]
|
| 143 |
+
sf = X_test[:, sf_idx]
|
| 144 |
+
se = X_test[:, se_idx]
|
| 145 |
+
|
| 146 |
+
train_X = np.concatenate([by_person[p][0] for p in persons if p != held_out])
|
| 147 |
+
train_y = np.concatenate([by_person[p][1] for p in persons if p != held_out])
|
| 148 |
+
sf_tr = train_X[:, sf_idx]
|
| 149 |
+
se_tr = train_X[:, se_idx]
|
| 150 |
+
|
| 151 |
+
for a in alphas:
|
| 152 |
+
score_tr = a * sf_tr + (1.0 - a) * se_tr
|
| 153 |
+
opt_t, *_ = _youdens_j(train_y, score_tr)
|
| 154 |
+
|
| 155 |
+
score_te = a * sf + (1.0 - a) * se
|
| 156 |
+
f1 = _f1_at_threshold(y_test, score_te, opt_t)
|
| 157 |
+
alpha_f1[a].append(f1)
|
| 158 |
+
|
| 159 |
+
mean_f1 = {a: np.mean(f1s) for a, f1s in alpha_f1.items()}
|
| 160 |
+
best_alpha = max(mean_f1, key=mean_f1.get)
|
| 161 |
+
|
| 162 |
+
fig, ax = plt.subplots(figsize=(7, 4))
|
| 163 |
+
ax.bar([f"{a:.1f}" for a in alphas],
|
| 164 |
+
[mean_f1[a] for a in alphas], color="steelblue")
|
| 165 |
+
ax.set_xlabel("Face weight (alpha); eye weight = 1 - alpha")
|
| 166 |
+
ax.set_ylabel("Mean LOPO F1")
|
| 167 |
+
ax.set_title("Geometric Pipeline: Face vs Eye Weight Search")
|
| 168 |
+
ax.set_ylim(bottom=max(0, min(mean_f1.values()) - 0.05))
|
| 169 |
+
for i, a in enumerate(alphas):
|
| 170 |
+
ax.text(i, mean_f1[a] + 0.003, f"{mean_f1[a]:.3f}",
|
| 171 |
+
ha="center", va="bottom", fontsize=8)
|
| 172 |
+
fig.tight_layout()
|
| 173 |
+
path = os.path.join(PLOTS_DIR, "geo_weight_search.png")
|
| 174 |
+
fig.savefig(path, dpi=150)
|
| 175 |
+
plt.close(fig)
|
| 176 |
+
print(f" saved {path}")
|
| 177 |
+
|
| 178 |
+
print(f" Best alpha (face weight) = {best_alpha:.1f}, "
|
| 179 |
+
f"mean LOPO F1 = {mean_f1[best_alpha]:.4f}")
|
| 180 |
+
return dict(mean_f1), best_alpha
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def run_hybrid_weight_search(lopo_results):
|
| 184 |
+
print("\n=== Hybrid weight grid search ===")
|
| 185 |
+
|
| 186 |
+
by_person, _, _ = load_per_person("face_orientation")
|
| 187 |
+
persons = sorted(by_person.keys())
|
| 188 |
+
features = SELECTED_FEATURES["face_orientation"]
|
| 189 |
+
sf_idx = features.index("s_face")
|
| 190 |
+
se_idx = features.index("s_eye")
|
| 191 |
+
|
| 192 |
+
GEO_FACE_W = 0.7
|
| 193 |
+
GEO_EYE_W = 0.3
|
| 194 |
+
|
| 195 |
+
w_mlps = np.arange(0.3, 0.85, 0.1).round(1)
|
| 196 |
+
wmf1 = {w: [] for w in w_mlps}
|
| 197 |
+
mlp_p = lopo_results["mlp"]["p"]
|
| 198 |
+
offset = 0
|
| 199 |
+
for held_out in persons:
|
| 200 |
+
X_test, y_test = by_person[held_out]
|
| 201 |
+
n = X_test.shape[0]
|
| 202 |
+
mlp_prob_fold = mlp_p[offset:offset + n]
|
| 203 |
+
offset += n
|
| 204 |
+
|
| 205 |
+
sf = X_test[:, sf_idx]
|
| 206 |
+
se = X_test[:, se_idx]
|
| 207 |
+
geo_score = np.clip(GEO_FACE_W * sf + GEO_EYE_W * se, 0, 1)
|
| 208 |
+
|
| 209 |
+
train_X = np.concatenate([by_person[p][0] for p in persons if p != held_out])
|
| 210 |
+
train_y = np.concatenate([by_person[p][1] for p in persons if p != held_out])
|
| 211 |
+
sf_tr = train_X[:, sf_idx]
|
| 212 |
+
se_tr = train_X[:, se_idx]
|
| 213 |
+
geo_tr = np.clip(GEO_FACE_W * sf_tr + GEO_EYE_W * se_tr, 0, 1)
|
| 214 |
+
|
| 215 |
+
scaler = StandardScaler().fit(train_X)
|
| 216 |
+
mlp_tr = MLPClassifier(
|
| 217 |
+
hidden_layer_sizes=(64, 32), activation="relu",
|
| 218 |
+
max_iter=200, early_stopping=True, validation_fraction=0.15,
|
| 219 |
+
random_state=SEED, verbose=False,
|
| 220 |
+
)
|
| 221 |
+
mlp_tr.fit(scaler.transform(train_X), train_y)
|
| 222 |
+
mlp_prob_tr = mlp_tr.predict_proba(scaler.transform(train_X))[:, 1]
|
| 223 |
+
|
| 224 |
+
for w in w_mlps:
|
| 225 |
+
combo_tr = w * mlp_prob_tr + (1.0 - w) * geo_tr
|
| 226 |
+
opt_t, *_ = _youdens_j(train_y, combo_tr)
|
| 227 |
+
|
| 228 |
+
combo_te = w * mlp_prob_fold + (1.0 - w) * geo_score
|
| 229 |
+
f1 = _f1_at_threshold(y_test, combo_te, opt_t)
|
| 230 |
+
wmf1[w].append(f1)
|
| 231 |
+
|
| 232 |
+
mean_f1 = {w: np.mean(f1s) for w, f1s in wmf1.items()}
|
| 233 |
+
best_w = max(mean_f1, key=mean_f1.get)
|
| 234 |
+
|
| 235 |
+
fig, ax = plt.subplots(figsize=(7, 4))
|
| 236 |
+
ax.bar([f"{w:.1f}" for w in w_mlps],
|
| 237 |
+
[mean_f1[w] for w in w_mlps], color="darkorange")
|
| 238 |
+
ax.set_xlabel("MLP weight (w_mlp); geo weight = 1 - w_mlp")
|
| 239 |
+
ax.set_ylabel("Mean LOPO F1")
|
| 240 |
+
ax.set_title("Hybrid Pipeline: MLP vs Geometric Weight Search")
|
| 241 |
+
ax.set_ylim(bottom=max(0, min(mean_f1.values()) - 0.05))
|
| 242 |
+
for i, w in enumerate(w_mlps):
|
| 243 |
+
ax.text(i, mean_f1[w] + 0.003, f"{mean_f1[w]:.3f}",
|
| 244 |
+
ha="center", va="bottom", fontsize=8)
|
| 245 |
+
fig.tight_layout()
|
| 246 |
+
path = os.path.join(PLOTS_DIR, "hybrid_weight_search.png")
|
| 247 |
+
fig.savefig(path, dpi=150)
|
| 248 |
+
plt.close(fig)
|
| 249 |
+
print(f" saved {path}")
|
| 250 |
+
|
| 251 |
+
print(f" Best w_mlp = {best_w:.1f}, mean LOPO F1 = {mean_f1[best_w]:.4f}")
|
| 252 |
+
return dict(mean_f1), best_w
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
def plot_distributions():
|
| 256 |
+
print("\n=== EAR / MAR distributions ===")
|
| 257 |
+
npz_files = sorted(glob.glob(os.path.join(_PROJECT_ROOT, "data", "collected_*", "*.npz")))
|
| 258 |
+
|
| 259 |
+
all_ear_l, all_ear_r, all_mar, all_labels = [], [], [], []
|
| 260 |
+
for f in npz_files:
|
| 261 |
+
d = np.load(f, allow_pickle=True)
|
| 262 |
+
names = list(d["feature_names"])
|
| 263 |
+
feat = d["features"].astype(np.float32)
|
| 264 |
+
lab = d["labels"].astype(np.int64)
|
| 265 |
+
all_ear_l.append(feat[:, names.index("ear_left")])
|
| 266 |
+
all_ear_r.append(feat[:, names.index("ear_right")])
|
| 267 |
+
all_mar.append(feat[:, names.index("mar")])
|
| 268 |
+
all_labels.append(lab)
|
| 269 |
+
|
| 270 |
+
ear_l = np.concatenate(all_ear_l)
|
| 271 |
+
ear_r = np.concatenate(all_ear_r)
|
| 272 |
+
mar = np.concatenate(all_mar)
|
| 273 |
+
labels = np.concatenate(all_labels)
|
| 274 |
+
ear_min = np.minimum(ear_l, ear_r)
|
| 275 |
+
ear_plot = np.clip(ear_min, 0, 0.85)
|
| 276 |
+
mar_plot = np.clip(mar, 0, 1.5)
|
| 277 |
+
|
| 278 |
+
fig, ax = plt.subplots(figsize=(7, 4))
|
| 279 |
+
ax.hist(ear_plot[labels == 1], bins=100, alpha=0.6, label="Focused (1)", density=True)
|
| 280 |
+
ax.hist(ear_plot[labels == 0], bins=100, alpha=0.6, label="Unfocused (0)", density=True)
|
| 281 |
+
for val, lbl, c in [
|
| 282 |
+
(0.16, "ear_closed = 0.16", "red"),
|
| 283 |
+
(0.21, "EAR_BLINK = 0.21", "orange"),
|
| 284 |
+
(0.30, "ear_open = 0.30", "green"),
|
| 285 |
+
]:
|
| 286 |
+
ax.axvline(val, color=c, ls="--", lw=1.5, label=lbl)
|
| 287 |
+
ax.set_xlabel("min(left_EAR, right_EAR)")
|
| 288 |
+
ax.set_ylabel("Density")
|
| 289 |
+
ax.set_title("EAR Distribution by Class (144k samples)")
|
| 290 |
+
ax.legend(fontsize=8)
|
| 291 |
+
fig.tight_layout()
|
| 292 |
+
path = os.path.join(PLOTS_DIR, "ear_distribution.png")
|
| 293 |
+
fig.savefig(path, dpi=150)
|
| 294 |
+
plt.close(fig)
|
| 295 |
+
print(f" saved {path}")
|
| 296 |
+
|
| 297 |
+
fig, ax = plt.subplots(figsize=(7, 4))
|
| 298 |
+
ax.hist(mar_plot[labels == 1], bins=100, alpha=0.6, label="Focused (1)", density=True)
|
| 299 |
+
ax.hist(mar_plot[labels == 0], bins=100, alpha=0.6, label="Unfocused (0)", density=True)
|
| 300 |
+
ax.axvline(0.55, color="red", ls="--", lw=1.5, label="MAR_YAWN = 0.55")
|
| 301 |
+
ax.set_xlabel("Mouth Aspect Ratio (MAR)")
|
| 302 |
+
ax.set_ylabel("Density")
|
| 303 |
+
ax.set_title("MAR Distribution by Class (144k samples)")
|
| 304 |
+
ax.legend(fontsize=8)
|
| 305 |
+
fig.tight_layout()
|
| 306 |
+
path = os.path.join(PLOTS_DIR, "mar_distribution.png")
|
| 307 |
+
fig.savefig(path, dpi=150)
|
| 308 |
+
plt.close(fig)
|
| 309 |
+
print(f" saved {path}")
|
| 310 |
+
|
| 311 |
+
closed_pct = np.mean(ear_min < 0.16) * 100
|
| 312 |
+
blink_pct = np.mean(ear_min < 0.21) * 100
|
| 313 |
+
open_pct = np.mean(ear_min >= 0.30) * 100
|
| 314 |
+
yawn_pct = np.mean(mar > 0.55) * 100
|
| 315 |
+
|
| 316 |
+
stats = {
|
| 317 |
+
"ear_below_016": closed_pct,
|
| 318 |
+
"ear_below_021": blink_pct,
|
| 319 |
+
"ear_above_030": open_pct,
|
| 320 |
+
"mar_above_055": yawn_pct,
|
| 321 |
+
"n_samples": len(ear_min),
|
| 322 |
+
}
|
| 323 |
+
print(f" EAR<0.16 (closed): {closed_pct:.1f}% | EAR<0.21 (blink): {blink_pct:.1f}% | "
|
| 324 |
+
f"EAR>=0.30 (open): {open_pct:.1f}%")
|
| 325 |
+
print(f" MAR>0.55 (yawn): {yawn_pct:.1f}%")
|
| 326 |
+
return stats
|
| 327 |
+
|
| 328 |
+
|
| 329 |
+
def write_report(model_stats, geo_f1, best_alpha, hybrid_f1, best_w, dist_stats):
|
| 330 |
+
lines = []
|
| 331 |
+
lines.append("# Threshold Justification Report")
|
| 332 |
+
lines.append("")
|
| 333 |
+
lines.append("Auto-generated by `evaluation/justify_thresholds.py` using LOPO cross-validation "
|
| 334 |
+
"over 9 participants (~145k samples).")
|
| 335 |
+
lines.append("")
|
| 336 |
+
|
| 337 |
+
lines.append("## 1. ML Model Decision Thresholds")
|
| 338 |
+
lines.append("")
|
| 339 |
+
lines.append("Thresholds selected via **Youden's J statistic** (J = sensitivity + specificity - 1) "
|
| 340 |
+
"on pooled LOPO held-out predictions.")
|
| 341 |
+
lines.append("")
|
| 342 |
+
lines.append("| Model | LOPO AUC | Optimal Threshold (Youden's J) | F1 @ Optimal | F1 @ 0.50 |")
|
| 343 |
+
lines.append("|-------|----------|-------------------------------|--------------|-----------|")
|
| 344 |
+
for key in ("mlp", "xgb"):
|
| 345 |
+
s = model_stats[key]
|
| 346 |
+
lines.append(f"| {s['label']} | {s['auc']:.4f} | **{s['opt_threshold']:.3f}** | "
|
| 347 |
+
f"{s['f1_opt']:.4f} | {s['f1_50']:.4f} |")
|
| 348 |
+
lines.append("")
|
| 349 |
+
lines.append("")
|
| 350 |
+
lines.append("")
|
| 351 |
+
lines.append("")
|
| 352 |
+
lines.append("")
|
| 353 |
+
|
| 354 |
+
lines.append("## 2. Geometric Pipeline Weights (s_face vs s_eye)")
|
| 355 |
+
lines.append("")
|
| 356 |
+
lines.append("Grid search over face weight alpha in {0.2 ... 0.8}. "
|
| 357 |
+
"Eye weight = 1 - alpha. Threshold per fold via Youden's J.")
|
| 358 |
+
lines.append("")
|
| 359 |
+
lines.append("| Face Weight (alpha) | Mean LOPO F1 |")
|
| 360 |
+
lines.append("|--------------------:|-------------:|")
|
| 361 |
+
for a in sorted(geo_f1.keys()):
|
| 362 |
+
marker = " **<-- selected**" if a == best_alpha else ""
|
| 363 |
+
lines.append(f"| {a:.1f} | {geo_f1[a]:.4f}{marker} |")
|
| 364 |
+
lines.append("")
|
| 365 |
+
lines.append(f"**Best:** alpha = {best_alpha:.1f} (face {best_alpha*100:.0f}%, "
|
| 366 |
+
f"eye {(1-best_alpha)*100:.0f}%)")
|
| 367 |
+
lines.append("")
|
| 368 |
+
lines.append("")
|
| 369 |
+
lines.append("")
|
| 370 |
+
|
| 371 |
+
lines.append("## 3. Hybrid Pipeline Weights (MLP vs Geometric)")
|
| 372 |
+
lines.append("")
|
| 373 |
+
lines.append("Grid search over w_mlp in {0.3 ... 0.8}. w_geo = 1 - w_mlp. "
|
| 374 |
+
"Geometric sub-score uses same weights as geometric pipeline (face=0.7, eye=0.3). "
|
| 375 |
+
"If you change geometric weights, re-run this script — optimal w_mlp can shift.")
|
| 376 |
+
lines.append("")
|
| 377 |
+
lines.append("| MLP Weight (w_mlp) | Mean LOPO F1 |")
|
| 378 |
+
lines.append("|-------------------:|-------------:|")
|
| 379 |
+
for w in sorted(hybrid_f1.keys()):
|
| 380 |
+
marker = " **<-- selected**" if w == best_w else ""
|
| 381 |
+
lines.append(f"| {w:.1f} | {hybrid_f1[w]:.4f}{marker} |")
|
| 382 |
+
lines.append("")
|
| 383 |
+
lines.append(f"**Best:** w_mlp = {best_w:.1f} (MLP {best_w*100:.0f}%, "
|
| 384 |
+
f"geometric {(1-best_w)*100:.0f}%)")
|
| 385 |
+
lines.append("")
|
| 386 |
+
lines.append("")
|
| 387 |
+
lines.append("")
|
| 388 |
+
|
| 389 |
+
lines.append("## 4. Eye and Mouth Aspect Ratio Thresholds")
|
| 390 |
+
lines.append("")
|
| 391 |
+
lines.append("### EAR (Eye Aspect Ratio)")
|
| 392 |
+
lines.append("")
|
| 393 |
+
lines.append("Reference: Soukupova & Cech, \"Real-Time Eye Blink Detection Using Facial "
|
| 394 |
+
"Landmarks\" (2016) established EAR ~ 0.2 as a blink threshold.")
|
| 395 |
+
lines.append("")
|
| 396 |
+
lines.append("Our thresholds define a linear interpolation zone around this established value:")
|
| 397 |
+
lines.append("")
|
| 398 |
+
lines.append("| Constant | Value | Justification |")
|
| 399 |
+
lines.append("|----------|------:|---------------|")
|
| 400 |
+
lines.append(f"| `ear_closed` | 0.16 | Below this, eyes are fully shut. "
|
| 401 |
+
f"{dist_stats['ear_below_016']:.1f}% of samples fall here. |")
|
| 402 |
+
lines.append(f"| `EAR_BLINK_THRESH` | 0.21 | Blink detection point; close to the 0.2 reference. "
|
| 403 |
+
f"{dist_stats['ear_below_021']:.1f}% of samples below. |")
|
| 404 |
+
lines.append(f"| `ear_open` | 0.30 | Above this, eyes are fully open. "
|
| 405 |
+
f"{dist_stats['ear_above_030']:.1f}% of samples here. |")
|
| 406 |
+
lines.append("")
|
| 407 |
+
lines.append("Between 0.16 and 0.30 the `_ear_score` function linearly interpolates from 0 to 1, "
|
| 408 |
+
"providing a smooth transition rather than a hard binary cutoff.")
|
| 409 |
+
lines.append("")
|
| 410 |
+
lines.append("")
|
| 411 |
+
lines.append("")
|
| 412 |
+
lines.append("### MAR (Mouth Aspect Ratio)")
|
| 413 |
+
lines.append("")
|
| 414 |
+
lines.append(f"| Constant | Value | Justification |")
|
| 415 |
+
lines.append("|----------|------:|---------------|")
|
| 416 |
+
lines.append(f"| `MAR_YAWN_THRESHOLD` | 0.55 | Only {dist_stats['mar_above_055']:.1f}% of "
|
| 417 |
+
f"samples exceed this, confirming it captures genuine yawns without false positives. |")
|
| 418 |
+
lines.append("")
|
| 419 |
+
lines.append("")
|
| 420 |
+
lines.append("")
|
| 421 |
+
|
| 422 |
+
lines.append("## 5. Other Constants")
|
| 423 |
+
lines.append("")
|
| 424 |
+
lines.append("| Constant | Value | Rationale |")
|
| 425 |
+
lines.append("|----------|------:|-----------|")
|
| 426 |
+
lines.append("| `gaze_max_offset` | 0.28 | Max iris displacement (normalised) before gaze score "
|
| 427 |
+
"drops to zero. Corresponds to ~56% of the eye width; beyond this the iris is at "
|
| 428 |
+
"the extreme edge. |")
|
| 429 |
+
lines.append("| `max_angle` | 22.0 deg | Head deviation beyond which face score = 0. Based on "
|
| 430 |
+
"typical monitor-viewing cone: at 60 cm distance and a 24\" monitor, the viewing "
|
| 431 |
+
"angle is ~20-25 degrees. |")
|
| 432 |
+
lines.append("| `roll_weight` | 0.5 | Roll is less indicative of inattention than yaw/pitch "
|
| 433 |
+
"(tilting head doesn't mean looking away), so it's down-weighted by 50%. |")
|
| 434 |
+
lines.append("| `EMA alpha` | 0.3 | Smoothing factor for focus score. "
|
| 435 |
+
"Gives ~3-4 frame effective window; balances responsiveness vs flicker. |")
|
| 436 |
+
lines.append("| `grace_frames` | 15 | ~0.5 s at 30 fps before penalising no-face. Allows brief "
|
| 437 |
+
"occlusions (e.g. hand gesture) without dropping score. |")
|
| 438 |
+
lines.append("| `PERCLOS_WINDOW` | 60 frames | 2 s at 30 fps; standard PERCLOS measurement "
|
| 439 |
+
"window (Dinges & Grace, 1998). |")
|
| 440 |
+
lines.append("| `BLINK_WINDOW_SEC` | 30 s | Blink rate measured over 30 s; typical spontaneous "
|
| 441 |
+
"blink rate is 15-20/min (Bentivoglio et al., 1997). |")
|
| 442 |
+
lines.append("")
|
| 443 |
+
|
| 444 |
+
with open(REPORT_PATH, "w", encoding="utf-8") as f:
|
| 445 |
+
f.write("\n".join(lines))
|
| 446 |
+
print(f"\nReport written to {REPORT_PATH}")
|
| 447 |
+
|
| 448 |
+
|
| 449 |
+
def main():
|
| 450 |
+
os.makedirs(PLOTS_DIR, exist_ok=True)
|
| 451 |
+
|
| 452 |
+
lopo_results = run_lopo_models()
|
| 453 |
+
model_stats = analyse_model_thresholds(lopo_results)
|
| 454 |
+
geo_f1, best_alpha = run_geo_weight_search()
|
| 455 |
+
hybrid_f1, best_w = run_hybrid_weight_search(lopo_results)
|
| 456 |
+
dist_stats = plot_distributions()
|
| 457 |
+
|
| 458 |
+
write_report(model_stats, geo_f1, best_alpha, hybrid_f1, best_w, dist_stats)
|
| 459 |
+
print("\nDone.")
|
| 460 |
+
|
| 461 |
+
|
| 462 |
+
if __name__ == "__main__":
|
| 463 |
+
main()
|
evaluation/plots/ear_distribution.png
ADDED
|
evaluation/plots/geo_weight_search.png
ADDED
|
evaluation/plots/hybrid_weight_search.png
ADDED
|
evaluation/plots/mar_distribution.png
ADDED
|
evaluation/plots/roc_mlp.png
ADDED
|
evaluation/plots/roc_xgb.png
ADDED
|
models/README.md
CHANGED
|
@@ -11,8 +11,6 @@ Root-level modules form the real-time inference pipeline:
|
|
| 11 |
| `face_mesh.py` | BGR frame | 478 MediaPipe landmarks |
|
| 12 |
| `head_pose.py` | Landmarks, frame size | yaw, pitch, roll, face/eye score, gaze offset, head deviation |
|
| 13 |
| `eye_scorer.py` | Landmarks | EAR (left/right/avg), gaze ratio (h/v), MAR |
|
| 14 |
-
| `eye_crop.py` | Landmarks, frame | Cropped eye region images |
|
| 15 |
-
| `eye_classifier.py` | Eye crops or landmarks | Eye open/closed prediction (geometric fallback) |
|
| 16 |
| `collect_features.py` | BGR frame | 17-d feature vector + temporal features (PERCLOS, blink rate, etc.) |
|
| 17 |
|
| 18 |
## 2. Training Scripts
|
|
|
|
| 11 |
| `face_mesh.py` | BGR frame | 478 MediaPipe landmarks |
|
| 12 |
| `head_pose.py` | Landmarks, frame size | yaw, pitch, roll, face/eye score, gaze offset, head deviation |
|
| 13 |
| `eye_scorer.py` | Landmarks | EAR (left/right/avg), gaze ratio (h/v), MAR |
|
|
|
|
|
|
|
| 14 |
| `collect_features.py` | BGR frame | 17-d feature vector + temporal features (PERCLOS, blink rate, etc.) |
|
| 15 |
|
| 16 |
## 2. Training Scripts
|
models/cnn/CNN_MODEL/.claude/settings.local.json
DELETED
|
@@ -1,7 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"permissions": {
|
| 3 |
-
"allow": [
|
| 4 |
-
"Bash(# Check Dataset_subset counts echo \"\"=== Dataset_subset/train/open ===\"\" && ls /Users/mohammedalketbi22/Downloads/GAP_Large_project-feature-dataset-model-test-92_30-clean/Dataset_subset/train/open/ | wc -l && echo \"\"=== Dataset_subset/train/closed ===\"\" && ls /Users/mohammedalketbi22/Downloads/GAP_Large_project-feature-dataset-model-test-92_30-clean/Dataset_subset/train/closed/ | wc -l && echo \"\"=== Dataset_subset/val/open ===\"\" && ls /Users/mohammedalketbi22/Downloads/GAP_Large_project-feature-dataset-model-test-92_30-clean/Dataset_subset/val/open/ | wc -l && echo \"\"=== Dataset_subset/val/closed ===\"\" && ls /Users/mohammedalketbi22/Downloads/GAP_Large_project-feature-dataset-model-test-92_30-clean/Dataset_subset/val/closed/)"
|
| 5 |
-
]
|
| 6 |
-
}
|
| 7 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
models/cnn/CNN_MODEL/.gitattributes
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
DATA/** filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
models/cnn/CNN_MODEL/.gitignore
DELETED
|
@@ -1,4 +0,0 @@
|
|
| 1 |
-
Dataset/train/
|
| 2 |
-
Dataset/val/
|
| 3 |
-
Dataset/test/
|
| 4 |
-
.DS_Store
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
models/cnn/CNN_MODEL/README.md
DELETED
|
@@ -1,74 +0,0 @@
|
|
| 1 |
-
# Eye Open / Closed Classifier (YOLOv11-CLS)
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
Binary classifier: **open** vs **closed** eyes.
|
| 5 |
-
Used as a baseline for eye-tracking, drowsiness, or focus detection.
|
| 6 |
-
|
| 7 |
-
---
|
| 8 |
-
|
| 9 |
-
## Model team task
|
| 10 |
-
|
| 11 |
-
- **Train** the YOLOv11s-cls eye classifier in a **separate notebook** (data split, epochs, GPU, export `best.pt`).
|
| 12 |
-
- Provide **trained weights** (`best.pt`) for this repo’s evaluation and inference scripts.
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
---
|
| 17 |
-
|
| 18 |
-
## Repo contents
|
| 19 |
-
|
| 20 |
-
- **notebooks/eye_classifier_colab.ipynb** — Data download (Kaggle), clean, split, undersample, **evaluate** (needs `best.pt` from model team), export.
|
| 21 |
-
- **scripts/predict_image.py** — Run classifier on single images (needs `best.pt`).
|
| 22 |
-
- **scripts/webcam_live.py** — Live webcam open/closed (needs `best.pt` + optional `weights/face_landmarker.task`).
|
| 23 |
-
- **scripts/video_infer.py** — Run on video files.
|
| 24 |
-
- **scripts/focus_infer.py** — Focus/attention inference.
|
| 25 |
-
- **weights/** — Put `best.pt` here; `face_landmarker.task` is downloaded on first webcam run if missing.
|
| 26 |
-
- **docs/** — Extra docs (e.g. UNNECESSARY_FILES.md if present).
|
| 27 |
-
|
| 28 |
-
---
|
| 29 |
-
|
| 30 |
-
## Dataset
|
| 31 |
-
|
| 32 |
-
- **Source:** [Kaggle — open/closed eyes](https://www.kaggle.com/datasets/sehriyarmemmedli/open-closed-eyes-dataset)
|
| 33 |
-
- The Colab notebook downloads it via `kagglehub`; no local copy in repo.
|
| 34 |
-
|
| 35 |
-
---
|
| 36 |
-
|
| 37 |
-
## Weights
|
| 38 |
-
|
| 39 |
-
- Put **best.pt** from the model team in **weights/best.pt** (or `runs/classify/runs_cls/eye_open_closed_cpu/weights/best.pt`).
|
| 40 |
-
- For webcam: **face_landmarker.task** is downloaded into **weights/** on first run if missing.
|
| 41 |
-
|
| 42 |
-
---
|
| 43 |
-
|
| 44 |
-
## Local setup
|
| 45 |
-
|
| 46 |
-
```bash
|
| 47 |
-
pip install ultralytics opencv-python mediapipe "numpy<2"
|
| 48 |
-
```
|
| 49 |
-
|
| 50 |
-
Optional: use a venv. From repo root:
|
| 51 |
-
- `python scripts/predict_image.py <image.png>`
|
| 52 |
-
- `python scripts/webcam_live.py`
|
| 53 |
-
- `python scripts/video_infer.py` (expects 1.mp4 / 2.mp4 in repo root or set `VIDEOS` env)
|
| 54 |
-
- `python scripts/focus_infer.py`
|
| 55 |
-
|
| 56 |
-
---
|
| 57 |
-
|
| 58 |
-
## Project structure
|
| 59 |
-
|
| 60 |
-
```
|
| 61 |
-
├── notebooks/
|
| 62 |
-
│ └── eye_classifier_colab.ipynb # Data + eval (no training)
|
| 63 |
-
├── scripts/
|
| 64 |
-
│ ├── predict_image.py
|
| 65 |
-
│ ├── webcam_live.py
|
| 66 |
-
│ ├── video_infer.py
|
| 67 |
-
│ └── focus_infer.py
|
| 68 |
-
├── weights/ # best.pt, face_landmarker.task
|
| 69 |
-
├── docs/ # extra docs
|
| 70 |
-
├── README.md
|
| 71 |
-
└── venv/ # optional
|
| 72 |
-
```
|
| 73 |
-
|
| 74 |
-
Training and weight generation: **model team, separate notebook.**
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
models/cnn/CNN_MODEL/notebooks/eye_classifier_colab.ipynb
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
models/cnn/CNN_MODEL/scripts/focus_infer.py
DELETED
|
@@ -1,199 +0,0 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
from pathlib import Path
|
| 4 |
-
import os
|
| 5 |
-
|
| 6 |
-
import cv2
|
| 7 |
-
import numpy as np
|
| 8 |
-
from ultralytics import YOLO
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
def list_images(folder: Path):
|
| 12 |
-
exts = {".png", ".jpg", ".jpeg", ".bmp", ".webp"}
|
| 13 |
-
return sorted([p for p in folder.iterdir() if p.suffix.lower() in exts])
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
def find_weights(project_root: Path) -> Path | None:
|
| 17 |
-
candidates = [
|
| 18 |
-
project_root / "weights" / "best.pt",
|
| 19 |
-
project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
|
| 20 |
-
project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
|
| 21 |
-
project_root / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
|
| 22 |
-
project_root / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
|
| 23 |
-
]
|
| 24 |
-
return next((p for p in candidates if p.is_file()), None)
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
def detect_eyelid_boundary(gray: np.ndarray) -> np.ndarray | None:
|
| 28 |
-
"""
|
| 29 |
-
Returns an ellipse fit to the largest contour near the eye boundary.
|
| 30 |
-
Output format: (center(x,y), (axis1, axis2), angle) or None.
|
| 31 |
-
"""
|
| 32 |
-
blur = cv2.GaussianBlur(gray, (5, 5), 0)
|
| 33 |
-
edges = cv2.Canny(blur, 40, 120)
|
| 34 |
-
edges = cv2.dilate(edges, np.ones((3, 3), np.uint8), iterations=1)
|
| 35 |
-
contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 36 |
-
if not contours:
|
| 37 |
-
return None
|
| 38 |
-
contours = sorted(contours, key=cv2.contourArea, reverse=True)
|
| 39 |
-
for c in contours:
|
| 40 |
-
if len(c) >= 5 and cv2.contourArea(c) > 50:
|
| 41 |
-
return cv2.fitEllipse(c)
|
| 42 |
-
return None
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
def detect_pupil_center(gray: np.ndarray) -> tuple[int, int] | None:
|
| 46 |
-
"""
|
| 47 |
-
More robust pupil detection:
|
| 48 |
-
- enhance contrast (CLAHE)
|
| 49 |
-
- find dark blobs
|
| 50 |
-
- score by circularity and proximity to center
|
| 51 |
-
"""
|
| 52 |
-
h, w = gray.shape
|
| 53 |
-
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
| 54 |
-
eq = clahe.apply(gray)
|
| 55 |
-
blur = cv2.GaussianBlur(eq, (7, 7), 0)
|
| 56 |
-
|
| 57 |
-
# Focus on the central region to avoid eyelashes/edges
|
| 58 |
-
cx, cy = w // 2, h // 2
|
| 59 |
-
rx, ry = int(w * 0.3), int(h * 0.3)
|
| 60 |
-
x0, x1 = max(cx - rx, 0), min(cx + rx, w)
|
| 61 |
-
y0, y1 = max(cy - ry, 0), min(cy + ry, h)
|
| 62 |
-
roi = blur[y0:y1, x0:x1]
|
| 63 |
-
|
| 64 |
-
# Inverted threshold to capture dark pupil
|
| 65 |
-
_, thresh = cv2.threshold(roi, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
|
| 66 |
-
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, np.ones((3, 3), np.uint8), iterations=2)
|
| 67 |
-
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8), iterations=1)
|
| 68 |
-
|
| 69 |
-
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 70 |
-
if not contours:
|
| 71 |
-
return None
|
| 72 |
-
|
| 73 |
-
best = None
|
| 74 |
-
best_score = -1.0
|
| 75 |
-
for c in contours:
|
| 76 |
-
area = cv2.contourArea(c)
|
| 77 |
-
if area < 15:
|
| 78 |
-
continue
|
| 79 |
-
perimeter = cv2.arcLength(c, True)
|
| 80 |
-
if perimeter <= 0:
|
| 81 |
-
continue
|
| 82 |
-
circularity = 4 * np.pi * (area / (perimeter * perimeter))
|
| 83 |
-
if circularity < 0.3:
|
| 84 |
-
continue
|
| 85 |
-
m = cv2.moments(c)
|
| 86 |
-
if m["m00"] == 0:
|
| 87 |
-
continue
|
| 88 |
-
px = int(m["m10"] / m["m00"]) + x0
|
| 89 |
-
py = int(m["m01"] / m["m00"]) + y0
|
| 90 |
-
|
| 91 |
-
# Score by circularity and distance to center
|
| 92 |
-
dist = np.hypot(px - cx, py - cy) / max(w, h)
|
| 93 |
-
score = circularity - dist
|
| 94 |
-
if score > best_score:
|
| 95 |
-
best_score = score
|
| 96 |
-
best = (px, py)
|
| 97 |
-
|
| 98 |
-
return best
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
def is_focused(pupil_center: tuple[int, int], img_shape: tuple[int, int]) -> bool:
|
| 102 |
-
"""
|
| 103 |
-
Decide focus based on pupil offset from image center.
|
| 104 |
-
"""
|
| 105 |
-
h, w = img_shape
|
| 106 |
-
cx, cy = w // 2, h // 2
|
| 107 |
-
px, py = pupil_center
|
| 108 |
-
dx = abs(px - cx) / max(w, 1)
|
| 109 |
-
dy = abs(py - cy) / max(h, 1)
|
| 110 |
-
return (dx < 0.12) and (dy < 0.12)
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
def annotate(img_bgr: np.ndarray, ellipse, pupil_center, focused: bool, cls_label: str, conf: float):
|
| 114 |
-
out = img_bgr.copy()
|
| 115 |
-
if ellipse is not None:
|
| 116 |
-
cv2.ellipse(out, ellipse, (0, 255, 255), 2)
|
| 117 |
-
if pupil_center is not None:
|
| 118 |
-
cv2.circle(out, pupil_center, 4, (0, 0, 255), -1)
|
| 119 |
-
label = f"{cls_label} ({conf:.2f}) | focused={int(focused)}"
|
| 120 |
-
cv2.putText(out, label, (8, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
|
| 121 |
-
return out
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
def main():
|
| 125 |
-
project_root = Path(__file__).resolve().parent.parent
|
| 126 |
-
data_dir = project_root / "Dataset"
|
| 127 |
-
alt_data_dir = project_root / "DATA"
|
| 128 |
-
out_dir = project_root / "runs_focus"
|
| 129 |
-
out_dir.mkdir(parents=True, exist_ok=True)
|
| 130 |
-
|
| 131 |
-
weights = find_weights(project_root)
|
| 132 |
-
if weights is None:
|
| 133 |
-
print("Weights not found. Train first.")
|
| 134 |
-
return
|
| 135 |
-
|
| 136 |
-
# Support both Dataset/test/{open,closed} and Dataset/{open,closed}
|
| 137 |
-
def resolve_test_dirs(root: Path):
|
| 138 |
-
test_open = root / "test" / "open"
|
| 139 |
-
test_closed = root / "test" / "closed"
|
| 140 |
-
if test_open.exists() and test_closed.exists():
|
| 141 |
-
return test_open, test_closed
|
| 142 |
-
test_open = root / "open"
|
| 143 |
-
test_closed = root / "closed"
|
| 144 |
-
if test_open.exists() and test_closed.exists():
|
| 145 |
-
return test_open, test_closed
|
| 146 |
-
alt_closed = root / "close"
|
| 147 |
-
if test_open.exists() and alt_closed.exists():
|
| 148 |
-
return test_open, alt_closed
|
| 149 |
-
return None, None
|
| 150 |
-
|
| 151 |
-
test_open, test_closed = resolve_test_dirs(data_dir)
|
| 152 |
-
if (test_open is None or test_closed is None) and alt_data_dir.exists():
|
| 153 |
-
test_open, test_closed = resolve_test_dirs(alt_data_dir)
|
| 154 |
-
|
| 155 |
-
if not test_open.exists() or not test_closed.exists():
|
| 156 |
-
print("Test folders missing. Expected:")
|
| 157 |
-
print(test_open)
|
| 158 |
-
print(test_closed)
|
| 159 |
-
return
|
| 160 |
-
|
| 161 |
-
test_files = list_images(test_open) + list_images(test_closed)
|
| 162 |
-
print("Total test images:", len(test_files))
|
| 163 |
-
max_images = int(os.getenv("MAX_IMAGES", "0"))
|
| 164 |
-
if max_images > 0:
|
| 165 |
-
test_files = test_files[:max_images]
|
| 166 |
-
print("Limiting to MAX_IMAGES:", max_images)
|
| 167 |
-
|
| 168 |
-
model = YOLO(str(weights))
|
| 169 |
-
results = model.predict(test_files, imgsz=224, device="cpu", verbose=False)
|
| 170 |
-
|
| 171 |
-
names = model.names
|
| 172 |
-
for r in results:
|
| 173 |
-
probs = r.probs
|
| 174 |
-
top_idx = int(probs.top1)
|
| 175 |
-
top_conf = float(probs.top1conf)
|
| 176 |
-
pred_label = names[top_idx]
|
| 177 |
-
|
| 178 |
-
img = cv2.imread(r.path)
|
| 179 |
-
if img is None:
|
| 180 |
-
continue
|
| 181 |
-
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
| 182 |
-
|
| 183 |
-
ellipse = detect_eyelid_boundary(gray)
|
| 184 |
-
pupil_center = detect_pupil_center(gray)
|
| 185 |
-
focused = False
|
| 186 |
-
if pred_label.lower() == "open" and pupil_center is not None:
|
| 187 |
-
focused = is_focused(pupil_center, gray.shape)
|
| 188 |
-
|
| 189 |
-
annotated = annotate(img, ellipse, pupil_center, focused, pred_label, top_conf)
|
| 190 |
-
out_path = out_dir / (Path(r.path).stem + "_annotated.jpg")
|
| 191 |
-
cv2.imwrite(str(out_path), annotated)
|
| 192 |
-
|
| 193 |
-
print(f"{Path(r.path).name}: pred={pred_label} conf={top_conf:.3f} focused={focused}")
|
| 194 |
-
|
| 195 |
-
print(f"\nAnnotated outputs saved to: {out_dir}")
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
if __name__ == "__main__":
|
| 199 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
models/cnn/CNN_MODEL/scripts/predict_image.py
DELETED
|
@@ -1,49 +0,0 @@
|
|
| 1 |
-
"""Run the eye open/closed model on one or more images."""
|
| 2 |
-
import sys
|
| 3 |
-
from pathlib import Path
|
| 4 |
-
|
| 5 |
-
from ultralytics import YOLO
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
def main():
|
| 9 |
-
project_root = Path(__file__).resolve().parent.parent
|
| 10 |
-
weight_candidates = [
|
| 11 |
-
project_root / "weights" / "best.pt",
|
| 12 |
-
project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
|
| 13 |
-
project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
|
| 14 |
-
]
|
| 15 |
-
weights = next((p for p in weight_candidates if p.is_file()), None)
|
| 16 |
-
if weights is None:
|
| 17 |
-
print("Weights not found. Put best.pt in weights/ or runs/.../weights/ (from model team).")
|
| 18 |
-
sys.exit(1)
|
| 19 |
-
|
| 20 |
-
if len(sys.argv) < 2:
|
| 21 |
-
print("Usage: python scripts/predict_image.py <image1> [image2 ...]")
|
| 22 |
-
print("Example: python scripts/predict_image.py path/to/image.png")
|
| 23 |
-
sys.exit(0)
|
| 24 |
-
|
| 25 |
-
model = YOLO(str(weights))
|
| 26 |
-
names = model.names
|
| 27 |
-
|
| 28 |
-
for path in sys.argv[1:]:
|
| 29 |
-
p = Path(path)
|
| 30 |
-
if not p.is_file():
|
| 31 |
-
print(p, "- file not found")
|
| 32 |
-
continue
|
| 33 |
-
try:
|
| 34 |
-
results = model.predict(str(p), imgsz=224, device="cpu", verbose=False)
|
| 35 |
-
except Exception as e:
|
| 36 |
-
print(p, "- error:", e)
|
| 37 |
-
continue
|
| 38 |
-
if not results:
|
| 39 |
-
print(p, "- no result")
|
| 40 |
-
continue
|
| 41 |
-
r = results[0]
|
| 42 |
-
top_idx = int(r.probs.top1)
|
| 43 |
-
conf = float(r.probs.top1conf)
|
| 44 |
-
label = names[top_idx]
|
| 45 |
-
print(f"{p.name}: {label} ({conf:.2%})")
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
if __name__ == "__main__":
|
| 49 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
models/cnn/CNN_MODEL/scripts/video_infer.py
DELETED
|
@@ -1,281 +0,0 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
import os
|
| 4 |
-
from pathlib import Path
|
| 5 |
-
|
| 6 |
-
import cv2
|
| 7 |
-
import numpy as np
|
| 8 |
-
from ultralytics import YOLO
|
| 9 |
-
|
| 10 |
-
try:
|
| 11 |
-
import mediapipe as mp
|
| 12 |
-
except Exception: # pragma: no cover
|
| 13 |
-
mp = None
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
def find_weights(project_root: Path) -> Path | None:
|
| 17 |
-
candidates = [
|
| 18 |
-
project_root / "weights" / "best.pt",
|
| 19 |
-
project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
|
| 20 |
-
project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
|
| 21 |
-
project_root / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
|
| 22 |
-
project_root / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
|
| 23 |
-
]
|
| 24 |
-
return next((p for p in candidates if p.is_file()), None)
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
def detect_pupil_center(gray: np.ndarray) -> tuple[int, int] | None:
|
| 28 |
-
h, w = gray.shape
|
| 29 |
-
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
| 30 |
-
eq = clahe.apply(gray)
|
| 31 |
-
blur = cv2.GaussianBlur(eq, (7, 7), 0)
|
| 32 |
-
|
| 33 |
-
cx, cy = w // 2, h // 2
|
| 34 |
-
rx, ry = int(w * 0.3), int(h * 0.3)
|
| 35 |
-
x0, x1 = max(cx - rx, 0), min(cx + rx, w)
|
| 36 |
-
y0, y1 = max(cy - ry, 0), min(cy + ry, h)
|
| 37 |
-
roi = blur[y0:y1, x0:x1]
|
| 38 |
-
|
| 39 |
-
_, thresh = cv2.threshold(roi, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
|
| 40 |
-
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, np.ones((3, 3), np.uint8), iterations=2)
|
| 41 |
-
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8), iterations=1)
|
| 42 |
-
|
| 43 |
-
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 44 |
-
if not contours:
|
| 45 |
-
return None
|
| 46 |
-
|
| 47 |
-
best = None
|
| 48 |
-
best_score = -1.0
|
| 49 |
-
for c in contours:
|
| 50 |
-
area = cv2.contourArea(c)
|
| 51 |
-
if area < 15:
|
| 52 |
-
continue
|
| 53 |
-
perimeter = cv2.arcLength(c, True)
|
| 54 |
-
if perimeter <= 0:
|
| 55 |
-
continue
|
| 56 |
-
circularity = 4 * np.pi * (area / (perimeter * perimeter))
|
| 57 |
-
if circularity < 0.3:
|
| 58 |
-
continue
|
| 59 |
-
m = cv2.moments(c)
|
| 60 |
-
if m["m00"] == 0:
|
| 61 |
-
continue
|
| 62 |
-
px = int(m["m10"] / m["m00"]) + x0
|
| 63 |
-
py = int(m["m01"] / m["m00"]) + y0
|
| 64 |
-
|
| 65 |
-
dist = np.hypot(px - cx, py - cy) / max(w, h)
|
| 66 |
-
score = circularity - dist
|
| 67 |
-
if score > best_score:
|
| 68 |
-
best_score = score
|
| 69 |
-
best = (px, py)
|
| 70 |
-
|
| 71 |
-
return best
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
def is_focused(pupil_center: tuple[int, int], img_shape: tuple[int, int]) -> bool:
|
| 75 |
-
h, w = img_shape
|
| 76 |
-
cx = w // 2
|
| 77 |
-
px, _ = pupil_center
|
| 78 |
-
dx = abs(px - cx) / max(w, 1)
|
| 79 |
-
return dx < 0.12
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
def classify_frame(model: YOLO, frame: np.ndarray) -> tuple[str, float]:
|
| 83 |
-
# Use classifier directly on frame (assumes frame is eye crop)
|
| 84 |
-
results = model.predict(frame, imgsz=224, device="cpu", verbose=False)
|
| 85 |
-
r = results[0]
|
| 86 |
-
probs = r.probs
|
| 87 |
-
top_idx = int(probs.top1)
|
| 88 |
-
top_conf = float(probs.top1conf)
|
| 89 |
-
pred_label = model.names[top_idx]
|
| 90 |
-
return pred_label, top_conf
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
def annotate_frame(frame: np.ndarray, label: str, focused: bool, conf: float, time_sec: float):
|
| 94 |
-
out = frame.copy()
|
| 95 |
-
text = f"{label} | focused={int(focused)} | conf={conf:.2f} | t={time_sec:.2f}s"
|
| 96 |
-
cv2.putText(out, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
|
| 97 |
-
return out
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
def write_segments(path: Path, segments: list[tuple[float, float, str]]):
|
| 101 |
-
with path.open("w") as f:
|
| 102 |
-
for start, end, label in segments:
|
| 103 |
-
f.write(f"{start:.2f},{end:.2f},{label}\n")
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
def process_video(video_path: Path, model: YOLO | None):
|
| 107 |
-
cap = cv2.VideoCapture(str(video_path))
|
| 108 |
-
if not cap.isOpened():
|
| 109 |
-
print(f"Failed to open {video_path}")
|
| 110 |
-
return
|
| 111 |
-
|
| 112 |
-
fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
|
| 113 |
-
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 114 |
-
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 115 |
-
|
| 116 |
-
out_path = video_path.with_name(video_path.stem + "_pred.mp4")
|
| 117 |
-
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
|
| 118 |
-
writer = cv2.VideoWriter(str(out_path), fourcc, fps, (width, height))
|
| 119 |
-
|
| 120 |
-
csv_path = video_path.with_name(video_path.stem + "_predictions.csv")
|
| 121 |
-
seg_path = video_path.with_name(video_path.stem + "_segments.txt")
|
| 122 |
-
|
| 123 |
-
frame_idx = 0
|
| 124 |
-
last_label = None
|
| 125 |
-
seg_start = 0.0
|
| 126 |
-
segments: list[tuple[float, float, str]] = []
|
| 127 |
-
|
| 128 |
-
with csv_path.open("w") as fcsv:
|
| 129 |
-
fcsv.write("time_sec,label,focused,conf\n")
|
| 130 |
-
if mp is None:
|
| 131 |
-
print("mediapipe is not installed. Falling back to classifier-only mode.")
|
| 132 |
-
use_mp = mp is not None
|
| 133 |
-
if use_mp:
|
| 134 |
-
mp_face_mesh = mp.solutions.face_mesh
|
| 135 |
-
face_mesh = mp_face_mesh.FaceMesh(
|
| 136 |
-
static_image_mode=False,
|
| 137 |
-
max_num_faces=1,
|
| 138 |
-
refine_landmarks=True,
|
| 139 |
-
min_detection_confidence=0.5,
|
| 140 |
-
min_tracking_confidence=0.5,
|
| 141 |
-
)
|
| 142 |
-
|
| 143 |
-
while True:
|
| 144 |
-
ret, frame = cap.read()
|
| 145 |
-
if not ret:
|
| 146 |
-
break
|
| 147 |
-
time_sec = frame_idx / fps
|
| 148 |
-
conf = 0.0
|
| 149 |
-
pred_label = "open"
|
| 150 |
-
focused = False
|
| 151 |
-
|
| 152 |
-
if use_mp:
|
| 153 |
-
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 154 |
-
res = face_mesh.process(rgb)
|
| 155 |
-
if res.multi_face_landmarks:
|
| 156 |
-
lm = res.multi_face_landmarks[0].landmark
|
| 157 |
-
h, w = frame.shape[:2]
|
| 158 |
-
|
| 159 |
-
# Eye landmarks (MediaPipe FaceMesh)
|
| 160 |
-
left_eye = [33, 160, 158, 133, 153, 144]
|
| 161 |
-
right_eye = [362, 385, 387, 263, 373, 380]
|
| 162 |
-
left_iris = [468, 469, 470, 471]
|
| 163 |
-
right_iris = [473, 474, 475, 476]
|
| 164 |
-
|
| 165 |
-
def pts(idxs):
|
| 166 |
-
return np.array([(int(lm[i].x * w), int(lm[i].y * h)) for i in idxs])
|
| 167 |
-
|
| 168 |
-
def ear(eye_pts):
|
| 169 |
-
# EAR using 6 points
|
| 170 |
-
p1, p2, p3, p4, p5, p6 = eye_pts
|
| 171 |
-
v1 = np.linalg.norm(p2 - p6)
|
| 172 |
-
v2 = np.linalg.norm(p3 - p5)
|
| 173 |
-
h1 = np.linalg.norm(p1 - p4)
|
| 174 |
-
return (v1 + v2) / (2.0 * h1 + 1e-6)
|
| 175 |
-
|
| 176 |
-
le = pts(left_eye)
|
| 177 |
-
re = pts(right_eye)
|
| 178 |
-
le_ear = ear(le)
|
| 179 |
-
re_ear = ear(re)
|
| 180 |
-
ear_avg = (le_ear + re_ear) / 2.0
|
| 181 |
-
|
| 182 |
-
# openness threshold
|
| 183 |
-
pred_label = "open" if ear_avg > 0.22 else "closed"
|
| 184 |
-
|
| 185 |
-
# iris centers
|
| 186 |
-
li = pts(left_iris)
|
| 187 |
-
ri = pts(right_iris)
|
| 188 |
-
li_c = li.mean(axis=0).astype(int)
|
| 189 |
-
ri_c = ri.mean(axis=0).astype(int)
|
| 190 |
-
|
| 191 |
-
# eye centers (midpoint of corners)
|
| 192 |
-
le_c = ((le[0] + le[3]) / 2).astype(int)
|
| 193 |
-
re_c = ((re[0] + re[3]) / 2).astype(int)
|
| 194 |
-
|
| 195 |
-
# focus = iris close to eye center horizontally for both eyes
|
| 196 |
-
le_dx = abs(li_c[0] - le_c[0]) / max(np.linalg.norm(le[0] - le[3]), 1)
|
| 197 |
-
re_dx = abs(ri_c[0] - re_c[0]) / max(np.linalg.norm(re[0] - re[3]), 1)
|
| 198 |
-
focused = (pred_label == "open") and (le_dx < 0.18) and (re_dx < 0.18)
|
| 199 |
-
|
| 200 |
-
# draw eye boundaries
|
| 201 |
-
cv2.polylines(frame, [le], True, (0, 255, 255), 1)
|
| 202 |
-
cv2.polylines(frame, [re], True, (0, 255, 255), 1)
|
| 203 |
-
# draw iris centers
|
| 204 |
-
cv2.circle(frame, tuple(li_c), 3, (0, 0, 255), -1)
|
| 205 |
-
cv2.circle(frame, tuple(ri_c), 3, (0, 0, 255), -1)
|
| 206 |
-
else:
|
| 207 |
-
pred_label = "closed"
|
| 208 |
-
focused = False
|
| 209 |
-
else:
|
| 210 |
-
if model is not None:
|
| 211 |
-
pred_label, conf = classify_frame(model, frame)
|
| 212 |
-
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
| 213 |
-
pupil_center = detect_pupil_center(gray) if pred_label.lower() == "open" else None
|
| 214 |
-
focused = False
|
| 215 |
-
if pred_label.lower() == "open" and pupil_center is not None:
|
| 216 |
-
focused = is_focused(pupil_center, gray.shape)
|
| 217 |
-
|
| 218 |
-
if pred_label.lower() != "open":
|
| 219 |
-
focused = False
|
| 220 |
-
|
| 221 |
-
label = "open_focused" if (pred_label.lower() == "open" and focused) else "open_not_focused"
|
| 222 |
-
if pred_label.lower() != "open":
|
| 223 |
-
label = "closed_not_focused"
|
| 224 |
-
|
| 225 |
-
fcsv.write(f"{time_sec:.2f},{label},{int(focused)},{conf:.4f}\n")
|
| 226 |
-
|
| 227 |
-
if last_label is None:
|
| 228 |
-
last_label = label
|
| 229 |
-
seg_start = time_sec
|
| 230 |
-
elif label != last_label:
|
| 231 |
-
segments.append((seg_start, time_sec, last_label))
|
| 232 |
-
seg_start = time_sec
|
| 233 |
-
last_label = label
|
| 234 |
-
|
| 235 |
-
annotated = annotate_frame(frame, label, focused, conf, time_sec)
|
| 236 |
-
writer.write(annotated)
|
| 237 |
-
frame_idx += 1
|
| 238 |
-
|
| 239 |
-
if last_label is not None:
|
| 240 |
-
end_time = frame_idx / fps
|
| 241 |
-
segments.append((seg_start, end_time, last_label))
|
| 242 |
-
write_segments(seg_path, segments)
|
| 243 |
-
|
| 244 |
-
cap.release()
|
| 245 |
-
writer.release()
|
| 246 |
-
print(f"Saved: {out_path}")
|
| 247 |
-
print(f"CSV: {csv_path}")
|
| 248 |
-
print(f"Segments: {seg_path}")
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
def main():
|
| 252 |
-
project_root = Path(__file__).resolve().parent.parent
|
| 253 |
-
weights = find_weights(project_root)
|
| 254 |
-
model = YOLO(str(weights)) if weights is not None else None
|
| 255 |
-
|
| 256 |
-
# Default to 1.mp4 and 2.mp4 in project root
|
| 257 |
-
videos = []
|
| 258 |
-
for name in ["1.mp4", "2.mp4"]:
|
| 259 |
-
p = project_root / name
|
| 260 |
-
if p.exists():
|
| 261 |
-
videos.append(p)
|
| 262 |
-
|
| 263 |
-
# Also allow passing paths via env var
|
| 264 |
-
extra = os.getenv("VIDEOS", "")
|
| 265 |
-
for v in [x.strip() for x in extra.split(",") if x.strip()]:
|
| 266 |
-
vp = Path(v)
|
| 267 |
-
if not vp.is_absolute():
|
| 268 |
-
vp = project_root / vp
|
| 269 |
-
if vp.exists():
|
| 270 |
-
videos.append(vp)
|
| 271 |
-
|
| 272 |
-
if not videos:
|
| 273 |
-
print("No videos found. Expected 1.mp4 / 2.mp4 in project root.")
|
| 274 |
-
return
|
| 275 |
-
|
| 276 |
-
for v in videos:
|
| 277 |
-
process_video(v, model)
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
if __name__ == "__main__":
|
| 281 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
models/cnn/CNN_MODEL/scripts/webcam_live.py
DELETED
|
@@ -1,184 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Live webcam: detect face, crop each eye, run open/closed classifier, show on screen.
|
| 3 |
-
Requires: opencv-python, ultralytics, mediapipe (pip install mediapipe).
|
| 4 |
-
Press 'q' to quit.
|
| 5 |
-
"""
|
| 6 |
-
import urllib.request
|
| 7 |
-
from pathlib import Path
|
| 8 |
-
|
| 9 |
-
import cv2
|
| 10 |
-
import numpy as np
|
| 11 |
-
from ultralytics import YOLO
|
| 12 |
-
|
| 13 |
-
try:
|
| 14 |
-
import mediapipe as mp
|
| 15 |
-
_mp_has_solutions = hasattr(mp, "solutions")
|
| 16 |
-
except ImportError:
|
| 17 |
-
mp = None
|
| 18 |
-
_mp_has_solutions = False
|
| 19 |
-
|
| 20 |
-
# New MediaPipe Tasks API (Face Landmarker) eye indices
|
| 21 |
-
LEFT_EYE_INDICES_NEW = [263, 249, 390, 373, 374, 380, 381, 382, 362, 466, 388, 387, 386, 385, 384, 398]
|
| 22 |
-
RIGHT_EYE_INDICES_NEW = [33, 7, 163, 144, 145, 153, 154, 155, 133, 246, 161, 160, 159, 158, 157, 173]
|
| 23 |
-
# Old Face Mesh (solutions) indices
|
| 24 |
-
LEFT_EYE_INDICES_OLD = [33, 160, 158, 133, 153, 144]
|
| 25 |
-
RIGHT_EYE_INDICES_OLD = [362, 385, 387, 263, 373, 380]
|
| 26 |
-
EYE_PADDING = 0.35
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
def find_weights(project_root: Path) -> Path | None:
|
| 30 |
-
candidates = [
|
| 31 |
-
project_root / "weights" / "best.pt",
|
| 32 |
-
project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
|
| 33 |
-
project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
|
| 34 |
-
]
|
| 35 |
-
return next((p for p in candidates if p.is_file()), None)
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
def get_eye_roi(frame: np.ndarray, landmarks, indices: list[int]) -> np.ndarray | None:
|
| 39 |
-
h, w = frame.shape[:2]
|
| 40 |
-
pts = np.array([(int(landmarks[i].x * w), int(landmarks[i].y * h)) for i in indices])
|
| 41 |
-
x_min, y_min = pts.min(axis=0)
|
| 42 |
-
x_max, y_max = pts.max(axis=0)
|
| 43 |
-
dx = max(int((x_max - x_min) * EYE_PADDING), 8)
|
| 44 |
-
dy = max(int((y_max - y_min) * EYE_PADDING), 8)
|
| 45 |
-
x0 = max(0, x_min - dx)
|
| 46 |
-
y0 = max(0, y_min - dy)
|
| 47 |
-
x1 = min(w, x_max + dx)
|
| 48 |
-
y1 = min(h, y_max + dy)
|
| 49 |
-
if x1 <= x0 or y1 <= y0:
|
| 50 |
-
return None
|
| 51 |
-
return frame[y0:y1, x0:x1].copy()
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
def _run_with_solutions(mp, model, cap):
|
| 55 |
-
face_mesh = mp.solutions.face_mesh.FaceMesh(
|
| 56 |
-
static_image_mode=False,
|
| 57 |
-
max_num_faces=1,
|
| 58 |
-
refine_landmarks=True,
|
| 59 |
-
min_detection_confidence=0.5,
|
| 60 |
-
min_tracking_confidence=0.5,
|
| 61 |
-
)
|
| 62 |
-
while True:
|
| 63 |
-
ret, frame = cap.read()
|
| 64 |
-
if not ret:
|
| 65 |
-
break
|
| 66 |
-
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 67 |
-
results = face_mesh.process(rgb)
|
| 68 |
-
left_label, left_conf = "—", 0.0
|
| 69 |
-
right_label, right_conf = "—", 0.0
|
| 70 |
-
if results.multi_face_landmarks:
|
| 71 |
-
lm = results.multi_face_landmarks[0].landmark
|
| 72 |
-
for roi, indices, side in [
|
| 73 |
-
(get_eye_roi(frame, lm, LEFT_EYE_INDICES_OLD), LEFT_EYE_INDICES_OLD, "left"),
|
| 74 |
-
(get_eye_roi(frame, lm, RIGHT_EYE_INDICES_OLD), RIGHT_EYE_INDICES_OLD, "right"),
|
| 75 |
-
]:
|
| 76 |
-
if roi is not None and roi.size > 0:
|
| 77 |
-
try:
|
| 78 |
-
pred = model.predict(roi, imgsz=224, device="cpu", verbose=False)
|
| 79 |
-
if pred:
|
| 80 |
-
r = pred[0]
|
| 81 |
-
label = model.names[int(r.probs.top1)]
|
| 82 |
-
conf = float(r.probs.top1conf)
|
| 83 |
-
if side == "left":
|
| 84 |
-
left_label, left_conf = label, conf
|
| 85 |
-
else:
|
| 86 |
-
right_label, right_conf = label, conf
|
| 87 |
-
except Exception:
|
| 88 |
-
pass
|
| 89 |
-
cv2.putText(frame, f"L: {left_label} ({left_conf:.0%})", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
|
| 90 |
-
cv2.putText(frame, f"R: {right_label} ({right_conf:.0%})", (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
|
| 91 |
-
cv2.imshow("Eye open/closed (q to quit)", frame)
|
| 92 |
-
if cv2.waitKey(1) & 0xFF == ord("q"):
|
| 93 |
-
break
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
def _run_with_tasks(project_root: Path, model, cap):
|
| 97 |
-
from mediapipe.tasks.python import BaseOptions
|
| 98 |
-
from mediapipe.tasks.python.vision import FaceLandmarker, FaceLandmarkerOptions
|
| 99 |
-
from mediapipe.tasks.python.vision.core import vision_task_running_mode as running_mode
|
| 100 |
-
from mediapipe.tasks.python.vision.core import image as image_lib
|
| 101 |
-
|
| 102 |
-
model_path = project_root / "weights" / "face_landmarker.task"
|
| 103 |
-
if not model_path.is_file():
|
| 104 |
-
print("Downloading face_landmarker.task ...")
|
| 105 |
-
url = "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task"
|
| 106 |
-
urllib.request.urlretrieve(url, model_path)
|
| 107 |
-
print("Done.")
|
| 108 |
-
|
| 109 |
-
options = FaceLandmarkerOptions(
|
| 110 |
-
base_options=BaseOptions(model_asset_path=str(model_path)),
|
| 111 |
-
running_mode=running_mode.VisionTaskRunningMode.IMAGE,
|
| 112 |
-
num_faces=1,
|
| 113 |
-
)
|
| 114 |
-
face_landmarker = FaceLandmarker.create_from_options(options)
|
| 115 |
-
ImageFormat = image_lib.ImageFormat
|
| 116 |
-
|
| 117 |
-
while True:
|
| 118 |
-
ret, frame = cap.read()
|
| 119 |
-
if not ret:
|
| 120 |
-
break
|
| 121 |
-
left_label, left_conf = "—", 0.0
|
| 122 |
-
right_label, right_conf = "—", 0.0
|
| 123 |
-
|
| 124 |
-
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 125 |
-
rgb_contiguous = np.ascontiguousarray(rgb)
|
| 126 |
-
mp_image = image_lib.Image(ImageFormat.SRGB, rgb_contiguous)
|
| 127 |
-
result = face_landmarker.detect(mp_image)
|
| 128 |
-
|
| 129 |
-
if result.face_landmarks:
|
| 130 |
-
lm = result.face_landmarks[0]
|
| 131 |
-
for roi, side in [
|
| 132 |
-
(get_eye_roi(frame, lm, LEFT_EYE_INDICES_NEW), "left"),
|
| 133 |
-
(get_eye_roi(frame, lm, RIGHT_EYE_INDICES_NEW), "right"),
|
| 134 |
-
]:
|
| 135 |
-
if roi is not None and roi.size > 0:
|
| 136 |
-
try:
|
| 137 |
-
pred = model.predict(roi, imgsz=224, device="cpu", verbose=False)
|
| 138 |
-
if pred:
|
| 139 |
-
r = pred[0]
|
| 140 |
-
label = model.names[int(r.probs.top1)]
|
| 141 |
-
conf = float(r.probs.top1conf)
|
| 142 |
-
if side == "left":
|
| 143 |
-
left_label, left_conf = label, conf
|
| 144 |
-
else:
|
| 145 |
-
right_label, right_conf = label, conf
|
| 146 |
-
except Exception:
|
| 147 |
-
pass
|
| 148 |
-
|
| 149 |
-
cv2.putText(frame, f"L: {left_label} ({left_conf:.0%})", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
|
| 150 |
-
cv2.putText(frame, f"R: {right_label} ({right_conf:.0%})", (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
|
| 151 |
-
cv2.imshow("Eye open/closed (q to quit)", frame)
|
| 152 |
-
if cv2.waitKey(1) & 0xFF == ord("q"):
|
| 153 |
-
break
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
def main():
|
| 157 |
-
project_root = Path(__file__).resolve().parent.parent
|
| 158 |
-
weights = find_weights(project_root)
|
| 159 |
-
if weights is None:
|
| 160 |
-
print("Weights not found. Put best.pt in weights/ or runs/.../weights/ (from model team).")
|
| 161 |
-
return
|
| 162 |
-
if mp is None:
|
| 163 |
-
print("MediaPipe required. Install: pip install mediapipe")
|
| 164 |
-
return
|
| 165 |
-
|
| 166 |
-
model = YOLO(str(weights))
|
| 167 |
-
cap = cv2.VideoCapture(0)
|
| 168 |
-
if not cap.isOpened():
|
| 169 |
-
print("Could not open webcam.")
|
| 170 |
-
return
|
| 171 |
-
|
| 172 |
-
print("Live eye open/closed on your face. Press 'q' to quit.")
|
| 173 |
-
try:
|
| 174 |
-
if _mp_has_solutions:
|
| 175 |
-
_run_with_solutions(mp, model, cap)
|
| 176 |
-
else:
|
| 177 |
-
_run_with_tasks(project_root, model, cap)
|
| 178 |
-
finally:
|
| 179 |
-
cap.release()
|
| 180 |
-
cv2.destroyAllWindows()
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
if __name__ == "__main__":
|
| 184 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
models/cnn/CNN_MODEL/weights/yolo11s-cls.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:e2b605d1c8c212b434a75a32759a6f7adf1d2b29c35f76bdccd4c794cb653cf2
|
| 3 |
-
size 13630112
|
|
|
|
|
|
|
|
|
|
|
|
models/cnn/__init__.py
DELETED
|
File without changes
|
models/cnn/eye_attention/__init__.py
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
|
|
|
|
|
|
models/cnn/eye_attention/classifier.py
DELETED
|
@@ -1,169 +0,0 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
import os
|
| 4 |
-
from abc import ABC, abstractmethod
|
| 5 |
-
|
| 6 |
-
import numpy as np
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
class EyeClassifier(ABC):
|
| 10 |
-
@property
|
| 11 |
-
@abstractmethod
|
| 12 |
-
def name(self) -> str:
|
| 13 |
-
pass
|
| 14 |
-
|
| 15 |
-
@abstractmethod
|
| 16 |
-
def predict_score(self, crops_bgr: list[np.ndarray]) -> float:
|
| 17 |
-
pass
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
class GeometricOnlyClassifier(EyeClassifier):
|
| 21 |
-
@property
|
| 22 |
-
def name(self) -> str:
|
| 23 |
-
return "geometric"
|
| 24 |
-
|
| 25 |
-
def predict_score(self, crops_bgr: list[np.ndarray]) -> float:
|
| 26 |
-
return 1.0
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
class YOLOv11Classifier(EyeClassifier):
|
| 30 |
-
def __init__(self, checkpoint_path: str, device: str = "cpu"):
|
| 31 |
-
from ultralytics import YOLO
|
| 32 |
-
|
| 33 |
-
self._model = YOLO(checkpoint_path)
|
| 34 |
-
self._device = device
|
| 35 |
-
|
| 36 |
-
names = self._model.names
|
| 37 |
-
self._attentive_idx = None
|
| 38 |
-
for idx, cls_name in names.items():
|
| 39 |
-
if cls_name in ("open", "attentive"):
|
| 40 |
-
self._attentive_idx = idx
|
| 41 |
-
break
|
| 42 |
-
if self._attentive_idx is None:
|
| 43 |
-
self._attentive_idx = max(names.keys())
|
| 44 |
-
print(f"[YOLO] Classes: {names}, attentive_idx={self._attentive_idx}")
|
| 45 |
-
|
| 46 |
-
@property
|
| 47 |
-
def name(self) -> str:
|
| 48 |
-
return "yolo"
|
| 49 |
-
|
| 50 |
-
def predict_score(self, crops_bgr: list[np.ndarray]) -> float:
|
| 51 |
-
if not crops_bgr:
|
| 52 |
-
return 1.0
|
| 53 |
-
results = self._model.predict(crops_bgr, device=self._device, verbose=False)
|
| 54 |
-
scores = [float(r.probs.data[self._attentive_idx]) for r in results]
|
| 55 |
-
return sum(scores) / len(scores) if scores else 1.0
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
class EyeCNNClassifier(EyeClassifier):
|
| 59 |
-
"""Loader for the custom PyTorch EyeCNN (trained on Kaggle eye crops)."""
|
| 60 |
-
|
| 61 |
-
def __init__(self, checkpoint_path: str, device: str = "cpu"):
|
| 62 |
-
import torch
|
| 63 |
-
import torch.nn as nn
|
| 64 |
-
|
| 65 |
-
class EyeCNN(nn.Module):
|
| 66 |
-
def __init__(self, num_classes=2, dropout_rate=0.3):
|
| 67 |
-
super().__init__()
|
| 68 |
-
self.conv_layers = nn.Sequential(
|
| 69 |
-
nn.Conv2d(3, 32, 3, 1, 1), nn.BatchNorm2d(32), nn.ReLU(), nn.MaxPool2d(2, 2),
|
| 70 |
-
nn.Conv2d(32, 64, 3, 1, 1), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2, 2),
|
| 71 |
-
nn.Conv2d(64, 128, 3, 1, 1), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2, 2),
|
| 72 |
-
nn.Conv2d(128, 256, 3, 1, 1), nn.BatchNorm2d(256), nn.ReLU(), nn.MaxPool2d(2, 2),
|
| 73 |
-
)
|
| 74 |
-
self.fc_layers = nn.Sequential(
|
| 75 |
-
nn.AdaptiveAvgPool2d((1, 1)), nn.Flatten(),
|
| 76 |
-
nn.Linear(256, 512), nn.ReLU(), nn.Dropout(dropout_rate),
|
| 77 |
-
nn.Linear(512, num_classes),
|
| 78 |
-
)
|
| 79 |
-
|
| 80 |
-
def forward(self, x):
|
| 81 |
-
return self.fc_layers(self.conv_layers(x))
|
| 82 |
-
|
| 83 |
-
self._device = torch.device(device)
|
| 84 |
-
checkpoint = torch.load(checkpoint_path, map_location=self._device, weights_only=False)
|
| 85 |
-
dropout_rate = checkpoint.get("config", {}).get("dropout_rate", 0.35)
|
| 86 |
-
self._model = EyeCNN(num_classes=2, dropout_rate=dropout_rate)
|
| 87 |
-
self._model.load_state_dict(checkpoint["model_state_dict"])
|
| 88 |
-
self._model.to(self._device)
|
| 89 |
-
self._model.eval()
|
| 90 |
-
|
| 91 |
-
self._transform = None # built lazily
|
| 92 |
-
|
| 93 |
-
def _get_transform(self):
|
| 94 |
-
if self._transform is None:
|
| 95 |
-
from torchvision import transforms
|
| 96 |
-
self._transform = transforms.Compose([
|
| 97 |
-
transforms.ToPILImage(),
|
| 98 |
-
transforms.Resize((96, 96)),
|
| 99 |
-
transforms.ToTensor(),
|
| 100 |
-
transforms.Normalize(
|
| 101 |
-
mean=[0.485, 0.456, 0.406],
|
| 102 |
-
std=[0.229, 0.224, 0.225],
|
| 103 |
-
),
|
| 104 |
-
])
|
| 105 |
-
return self._transform
|
| 106 |
-
|
| 107 |
-
@property
|
| 108 |
-
def name(self) -> str:
|
| 109 |
-
return "eye_cnn"
|
| 110 |
-
|
| 111 |
-
def predict_score(self, crops_bgr: list[np.ndarray]) -> float:
|
| 112 |
-
if not crops_bgr:
|
| 113 |
-
return 1.0
|
| 114 |
-
|
| 115 |
-
import torch
|
| 116 |
-
import cv2
|
| 117 |
-
|
| 118 |
-
transform = self._get_transform()
|
| 119 |
-
scores = []
|
| 120 |
-
for crop in crops_bgr:
|
| 121 |
-
if crop is None or crop.size == 0:
|
| 122 |
-
scores.append(1.0)
|
| 123 |
-
continue
|
| 124 |
-
rgb = cv2.cvtColor(crop, cv2.COLOR_BGR2RGB)
|
| 125 |
-
tensor = transform(rgb).unsqueeze(0).to(self._device)
|
| 126 |
-
with torch.no_grad():
|
| 127 |
-
output = self._model(tensor)
|
| 128 |
-
prob = torch.softmax(output, dim=1)[0, 1].item() # prob of "open"
|
| 129 |
-
scores.append(prob)
|
| 130 |
-
return sum(scores) / len(scores)
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
_EXT_TO_BACKEND = {".pth": "cnn", ".pt": "yolo"}
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
def load_eye_classifier(
|
| 137 |
-
path: str | None = None,
|
| 138 |
-
backend: str = "yolo",
|
| 139 |
-
device: str = "cpu",
|
| 140 |
-
) -> EyeClassifier:
|
| 141 |
-
if backend == "geometric":
|
| 142 |
-
return GeometricOnlyClassifier()
|
| 143 |
-
|
| 144 |
-
if path is None:
|
| 145 |
-
print(f"[CLASSIFIER] No model path for backend {backend!r}, falling back to geometric")
|
| 146 |
-
return GeometricOnlyClassifier()
|
| 147 |
-
|
| 148 |
-
ext = os.path.splitext(path)[1].lower()
|
| 149 |
-
inferred = _EXT_TO_BACKEND.get(ext)
|
| 150 |
-
if inferred and inferred != backend:
|
| 151 |
-
print(f"[CLASSIFIER] File extension {ext!r} implies backend {inferred!r}, "
|
| 152 |
-
f"overriding requested {backend!r}")
|
| 153 |
-
backend = inferred
|
| 154 |
-
|
| 155 |
-
print(f"[CLASSIFIER] backend={backend!r}, path={path!r}")
|
| 156 |
-
|
| 157 |
-
if backend == "cnn":
|
| 158 |
-
return EyeCNNClassifier(path, device=device)
|
| 159 |
-
|
| 160 |
-
if backend == "yolo":
|
| 161 |
-
try:
|
| 162 |
-
return YOLOv11Classifier(path, device=device)
|
| 163 |
-
except ImportError:
|
| 164 |
-
print("[CLASSIFIER] ultralytics required for YOLO. pip install ultralytics")
|
| 165 |
-
raise
|
| 166 |
-
|
| 167 |
-
raise ValueError(
|
| 168 |
-
f"Unknown eye backend {backend!r}. Choose from: yolo, cnn, geometric"
|
| 169 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
models/cnn/eye_attention/crop.py
DELETED
|
@@ -1,70 +0,0 @@
|
|
| 1 |
-
import cv2
|
| 2 |
-
import numpy as np
|
| 3 |
-
|
| 4 |
-
from models.pretrained.face_mesh.face_mesh import FaceMeshDetector
|
| 5 |
-
|
| 6 |
-
LEFT_EYE_CONTOUR = FaceMeshDetector.LEFT_EYE_INDICES
|
| 7 |
-
RIGHT_EYE_CONTOUR = FaceMeshDetector.RIGHT_EYE_INDICES
|
| 8 |
-
|
| 9 |
-
IMAGENET_MEAN = (0.485, 0.456, 0.406)
|
| 10 |
-
IMAGENET_STD = (0.229, 0.224, 0.225)
|
| 11 |
-
|
| 12 |
-
CROP_SIZE = 96
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
def _bbox_from_landmarks(
|
| 16 |
-
landmarks: np.ndarray,
|
| 17 |
-
indices: list[int],
|
| 18 |
-
frame_w: int,
|
| 19 |
-
frame_h: int,
|
| 20 |
-
expand: float = 0.4,
|
| 21 |
-
) -> tuple[int, int, int, int]:
|
| 22 |
-
pts = landmarks[indices, :2]
|
| 23 |
-
px = pts[:, 0] * frame_w
|
| 24 |
-
py = pts[:, 1] * frame_h
|
| 25 |
-
|
| 26 |
-
x_min, x_max = px.min(), px.max()
|
| 27 |
-
y_min, y_max = py.min(), py.max()
|
| 28 |
-
w = x_max - x_min
|
| 29 |
-
h = y_max - y_min
|
| 30 |
-
cx = (x_min + x_max) / 2
|
| 31 |
-
cy = (y_min + y_max) / 2
|
| 32 |
-
|
| 33 |
-
size = max(w, h) * (1 + expand)
|
| 34 |
-
half = size / 2
|
| 35 |
-
|
| 36 |
-
x1 = int(max(cx - half, 0))
|
| 37 |
-
y1 = int(max(cy - half, 0))
|
| 38 |
-
x2 = int(min(cx + half, frame_w))
|
| 39 |
-
y2 = int(min(cy + half, frame_h))
|
| 40 |
-
|
| 41 |
-
return x1, y1, x2, y2
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
def extract_eye_crops(
|
| 45 |
-
frame: np.ndarray,
|
| 46 |
-
landmarks: np.ndarray,
|
| 47 |
-
expand: float = 0.4,
|
| 48 |
-
crop_size: int = CROP_SIZE,
|
| 49 |
-
) -> tuple[np.ndarray, np.ndarray, tuple, tuple]:
|
| 50 |
-
h, w = frame.shape[:2]
|
| 51 |
-
|
| 52 |
-
left_bbox = _bbox_from_landmarks(landmarks, LEFT_EYE_CONTOUR, w, h, expand)
|
| 53 |
-
right_bbox = _bbox_from_landmarks(landmarks, RIGHT_EYE_CONTOUR, w, h, expand)
|
| 54 |
-
|
| 55 |
-
left_crop = frame[left_bbox[1] : left_bbox[3], left_bbox[0] : left_bbox[2]]
|
| 56 |
-
right_crop = frame[right_bbox[1] : right_bbox[3], right_bbox[0] : right_bbox[2]]
|
| 57 |
-
|
| 58 |
-
left_crop = cv2.resize(left_crop, (crop_size, crop_size), interpolation=cv2.INTER_AREA)
|
| 59 |
-
right_crop = cv2.resize(right_crop, (crop_size, crop_size), interpolation=cv2.INTER_AREA)
|
| 60 |
-
|
| 61 |
-
return left_crop, right_crop, left_bbox, right_bbox
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
def crop_to_tensor(crop_bgr: np.ndarray):
|
| 65 |
-
import torch
|
| 66 |
-
|
| 67 |
-
rgb = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
|
| 68 |
-
for c in range(3):
|
| 69 |
-
rgb[:, :, c] = (rgb[:, :, c] - IMAGENET_MEAN[c]) / IMAGENET_STD[c]
|
| 70 |
-
return torch.from_numpy(rgb.transpose(2, 0, 1))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
models/cnn/eye_attention/train.py
DELETED
|
File without changes
|
models/cnn/notebooks/EyeCNN.ipynb
DELETED
|
@@ -1,107 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"nbformat": 4,
|
| 3 |
-
"nbformat_minor": 0,
|
| 4 |
-
"metadata": {
|
| 5 |
-
"colab": {
|
| 6 |
-
"provenance": [],
|
| 7 |
-
"gpuType": "T4"
|
| 8 |
-
},
|
| 9 |
-
"kernelspec": {
|
| 10 |
-
"name": "python3",
|
| 11 |
-
"display_name": "Python 3"
|
| 12 |
-
},
|
| 13 |
-
"language_info": {
|
| 14 |
-
"name": "python"
|
| 15 |
-
},
|
| 16 |
-
"accelerator": "GPU"
|
| 17 |
-
},
|
| 18 |
-
"cells": [
|
| 19 |
-
{
|
| 20 |
-
"cell_type": "code",
|
| 21 |
-
"source": [
|
| 22 |
-
"import os\n",
|
| 23 |
-
"import torch\n",
|
| 24 |
-
"import torch.nn as nn\n",
|
| 25 |
-
"import torch.optim as optim\n",
|
| 26 |
-
"from torch.utils.data import DataLoader\n",
|
| 27 |
-
"from torchvision import datasets, transforms\n",
|
| 28 |
-
"\n",
|
| 29 |
-
"from google.colab import drive\n",
|
| 30 |
-
"drive.mount('/content/drive')\n",
|
| 31 |
-
"!cp -r /content/drive/MyDrive/Dataset_clean /content/\n",
|
| 32 |
-
"\n",
|
| 33 |
-
"#Verify structure\n",
|
| 34 |
-
"for split in ['train', 'val', 'test']:\n",
|
| 35 |
-
" path = f'/content/Dataset_clean/{split}'\n",
|
| 36 |
-
" classes = os.listdir(path)\n",
|
| 37 |
-
" total = sum(len(os.listdir(os.path.join(path, c))) for c in classes)\n",
|
| 38 |
-
" print(f'{split}: {total} images | classes: {classes}')"
|
| 39 |
-
],
|
| 40 |
-
"metadata": {
|
| 41 |
-
"colab": {
|
| 42 |
-
"base_uri": "https://localhost:8080/"
|
| 43 |
-
},
|
| 44 |
-
"id": "sE1F3em-V5go",
|
| 45 |
-
"outputId": "2c73a9a6-a198-468c-a2cc-253b2de7cc3f"
|
| 46 |
-
},
|
| 47 |
-
"execution_count": null,
|
| 48 |
-
"outputs": [
|
| 49 |
-
{
|
| 50 |
-
"output_type": "stream",
|
| 51 |
-
"name": "stdout",
|
| 52 |
-
"text": [
|
| 53 |
-
"Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
|
| 54 |
-
]
|
| 55 |
-
}
|
| 56 |
-
]
|
| 57 |
-
},
|
| 58 |
-
{
|
| 59 |
-
"cell_type": "code",
|
| 60 |
-
"execution_count": null,
|
| 61 |
-
"metadata": {
|
| 62 |
-
"id": "nG2bh66rQ56G"
|
| 63 |
-
},
|
| 64 |
-
"outputs": [],
|
| 65 |
-
"source": [
|
| 66 |
-
"class EyeCNN(nn.Module):\n",
|
| 67 |
-
" def __init__(self, num_classes=2):\n",
|
| 68 |
-
" super(EyeCNN, self).__init__()\n",
|
| 69 |
-
" self.conv_layers = nn.Sequential(\n",
|
| 70 |
-
" nn.Conv2d(3, 32, 3, 1, 1),\n",
|
| 71 |
-
" nn.BatchNorm2d(32),\n",
|
| 72 |
-
" nn.ReLU(),\n",
|
| 73 |
-
" nn.MaxPool2d(2, 2),\n",
|
| 74 |
-
"\n",
|
| 75 |
-
" nn.Conv2d(32, 64, 3, 1, 1),\n",
|
| 76 |
-
" nn.BatchNorm2d(64),\n",
|
| 77 |
-
" nn.ReLU(),\n",
|
| 78 |
-
" nn.MaxPool2d(2, 2),\n",
|
| 79 |
-
"\n",
|
| 80 |
-
" nn.Conv2d(64, 128, 3, 1, 1),\n",
|
| 81 |
-
" nn.BatchNorm2d(128),\n",
|
| 82 |
-
" nn.ReLU(),\n",
|
| 83 |
-
" nn.MaxPool2d(2, 2),\n",
|
| 84 |
-
"\n",
|
| 85 |
-
" nn.Conv2d(128, 256, 3, 1, 1),\n",
|
| 86 |
-
" nn.BatchNorm2d(256),\n",
|
| 87 |
-
" nn.ReLU(),\n",
|
| 88 |
-
" nn.MaxPool2d(2, 2)\n",
|
| 89 |
-
" )\n",
|
| 90 |
-
"\n",
|
| 91 |
-
" self.fc_layers = nn.Sequential(\n",
|
| 92 |
-
" nn.AdaptiveAvgPool2d((1, 1)),\n",
|
| 93 |
-
" nn.Flatten(),\n",
|
| 94 |
-
" nn.Linear(256, 512),\n",
|
| 95 |
-
" nn.ReLU(),\n",
|
| 96 |
-
" nn.Dropout(0.35),\n",
|
| 97 |
-
" nn.Linear(512, num_classes)\n",
|
| 98 |
-
" )\n",
|
| 99 |
-
"\n",
|
| 100 |
-
" def forward(self, x):\n",
|
| 101 |
-
" x = self.conv_layers(x)\n",
|
| 102 |
-
" x = self.fc_layers(x)\n",
|
| 103 |
-
" return x"
|
| 104 |
-
]
|
| 105 |
-
}
|
| 106 |
-
]
|
| 107 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
models/cnn/notebooks/EyeCNN_Train_Evaluate_new.ipynb
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
models/cnn/notebooks/EyeCNN_Training_Evaluate.ipynb
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
models/cnn/notebooks/README.md
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
# GAP Large Project
|
|
|
|
|
|
models/eye_classifier.py
DELETED
|
@@ -1,69 +0,0 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
from abc import ABC, abstractmethod
|
| 4 |
-
|
| 5 |
-
import numpy as np
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
class EyeClassifier(ABC):
|
| 9 |
-
@property
|
| 10 |
-
@abstractmethod
|
| 11 |
-
def name(self) -> str:
|
| 12 |
-
pass
|
| 13 |
-
|
| 14 |
-
@abstractmethod
|
| 15 |
-
def predict_score(self, crops_bgr: list[np.ndarray]) -> float:
|
| 16 |
-
pass
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
class GeometricOnlyClassifier(EyeClassifier):
|
| 20 |
-
@property
|
| 21 |
-
def name(self) -> str:
|
| 22 |
-
return "geometric"
|
| 23 |
-
|
| 24 |
-
def predict_score(self, crops_bgr: list[np.ndarray]) -> float:
|
| 25 |
-
return 1.0
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
class YOLOv11Classifier(EyeClassifier):
|
| 29 |
-
def __init__(self, checkpoint_path: str, device: str = "cpu"):
|
| 30 |
-
from ultralytics import YOLO
|
| 31 |
-
|
| 32 |
-
self._model = YOLO(checkpoint_path)
|
| 33 |
-
self._device = device
|
| 34 |
-
|
| 35 |
-
names = self._model.names
|
| 36 |
-
self._attentive_idx = None
|
| 37 |
-
for idx, cls_name in names.items():
|
| 38 |
-
if cls_name in ("open", "attentive"):
|
| 39 |
-
self._attentive_idx = idx
|
| 40 |
-
break
|
| 41 |
-
if self._attentive_idx is None:
|
| 42 |
-
self._attentive_idx = max(names.keys())
|
| 43 |
-
print(f"[YOLO] Classes: {names}, attentive_idx={self._attentive_idx}")
|
| 44 |
-
|
| 45 |
-
@property
|
| 46 |
-
def name(self) -> str:
|
| 47 |
-
return "yolo"
|
| 48 |
-
|
| 49 |
-
def predict_score(self, crops_bgr: list[np.ndarray]) -> float:
|
| 50 |
-
if not crops_bgr:
|
| 51 |
-
return 1.0
|
| 52 |
-
results = self._model.predict(crops_bgr, device=self._device, verbose=False)
|
| 53 |
-
scores = [float(r.probs.data[self._attentive_idx]) for r in results]
|
| 54 |
-
return sum(scores) / len(scores) if scores else 1.0
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
def load_eye_classifier(
|
| 58 |
-
path: str | None = None,
|
| 59 |
-
backend: str = "yolo",
|
| 60 |
-
device: str = "cpu",
|
| 61 |
-
) -> EyeClassifier:
|
| 62 |
-
if path is None or backend == "geometric":
|
| 63 |
-
return GeometricOnlyClassifier()
|
| 64 |
-
|
| 65 |
-
try:
|
| 66 |
-
return YOLOv11Classifier(path, device=device)
|
| 67 |
-
except ImportError:
|
| 68 |
-
print("[CLASSIFIER] ultralytics required for YOLO. pip install ultralytics")
|
| 69 |
-
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
models/eye_crop.py
DELETED
|
@@ -1,77 +0,0 @@
|
|
| 1 |
-
import cv2
|
| 2 |
-
import numpy as np
|
| 3 |
-
|
| 4 |
-
from models.face_mesh import FaceMeshDetector
|
| 5 |
-
|
| 6 |
-
LEFT_EYE_CONTOUR = FaceMeshDetector.LEFT_EYE_INDICES
|
| 7 |
-
RIGHT_EYE_CONTOUR = FaceMeshDetector.RIGHT_EYE_INDICES
|
| 8 |
-
|
| 9 |
-
IMAGENET_MEAN = (0.485, 0.456, 0.406)
|
| 10 |
-
IMAGENET_STD = (0.229, 0.224, 0.225)
|
| 11 |
-
|
| 12 |
-
CROP_SIZE = 96
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
def _bbox_from_landmarks(
|
| 16 |
-
landmarks: np.ndarray,
|
| 17 |
-
indices: list[int],
|
| 18 |
-
frame_w: int,
|
| 19 |
-
frame_h: int,
|
| 20 |
-
expand: float = 0.4,
|
| 21 |
-
) -> tuple[int, int, int, int]:
|
| 22 |
-
pts = landmarks[indices, :2]
|
| 23 |
-
px = pts[:, 0] * frame_w
|
| 24 |
-
py = pts[:, 1] * frame_h
|
| 25 |
-
|
| 26 |
-
x_min, x_max = px.min(), px.max()
|
| 27 |
-
y_min, y_max = py.min(), py.max()
|
| 28 |
-
w = x_max - x_min
|
| 29 |
-
h = y_max - y_min
|
| 30 |
-
cx = (x_min + x_max) / 2
|
| 31 |
-
cy = (y_min + y_max) / 2
|
| 32 |
-
|
| 33 |
-
size = max(w, h) * (1 + expand)
|
| 34 |
-
half = size / 2
|
| 35 |
-
|
| 36 |
-
x1 = int(max(cx - half, 0))
|
| 37 |
-
y1 = int(max(cy - half, 0))
|
| 38 |
-
x2 = int(min(cx + half, frame_w))
|
| 39 |
-
y2 = int(min(cy + half, frame_h))
|
| 40 |
-
|
| 41 |
-
return x1, y1, x2, y2
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
def extract_eye_crops(
|
| 45 |
-
frame: np.ndarray,
|
| 46 |
-
landmarks: np.ndarray,
|
| 47 |
-
expand: float = 0.4,
|
| 48 |
-
crop_size: int = CROP_SIZE,
|
| 49 |
-
) -> tuple[np.ndarray, np.ndarray, tuple, tuple]:
|
| 50 |
-
h, w = frame.shape[:2]
|
| 51 |
-
|
| 52 |
-
left_bbox = _bbox_from_landmarks(landmarks, LEFT_EYE_CONTOUR, w, h, expand)
|
| 53 |
-
right_bbox = _bbox_from_landmarks(landmarks, RIGHT_EYE_CONTOUR, w, h, expand)
|
| 54 |
-
|
| 55 |
-
left_crop = frame[left_bbox[1] : left_bbox[3], left_bbox[0] : left_bbox[2]]
|
| 56 |
-
right_crop = frame[right_bbox[1] : right_bbox[3], right_bbox[0] : right_bbox[2]]
|
| 57 |
-
|
| 58 |
-
if left_crop.size == 0:
|
| 59 |
-
left_crop = np.zeros((crop_size, crop_size, 3), dtype=np.uint8)
|
| 60 |
-
else:
|
| 61 |
-
left_crop = cv2.resize(left_crop, (crop_size, crop_size), interpolation=cv2.INTER_AREA)
|
| 62 |
-
|
| 63 |
-
if right_crop.size == 0:
|
| 64 |
-
right_crop = np.zeros((crop_size, crop_size, 3), dtype=np.uint8)
|
| 65 |
-
else:
|
| 66 |
-
right_crop = cv2.resize(right_crop, (crop_size, crop_size), interpolation=cv2.INTER_AREA)
|
| 67 |
-
|
| 68 |
-
return left_crop, right_crop, left_bbox, right_bbox
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
def crop_to_tensor(crop_bgr: np.ndarray):
|
| 72 |
-
import torch
|
| 73 |
-
|
| 74 |
-
rgb = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
|
| 75 |
-
for c in range(3):
|
| 76 |
-
rgb[:, :, c] = (rgb[:, :, c] - IMAGENET_MEAN[c]) / IMAGENET_STD[c]
|
| 77 |
-
return torch.from_numpy(rgb.transpose(2, 0, 1))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
models/xgboost/checkpoints/face_orientation_best.json
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
public/assets/111.jpg
DELETED
|
Binary file (73.4 kB)
|
|
|
src/assets/react.svg
DELETED
ui/live_demo.py
CHANGED
|
@@ -130,9 +130,6 @@ def main():
|
|
| 130 |
parser.add_argument("--camera", type=int, default=0)
|
| 131 |
parser.add_argument("--mlp-dir", type=str, default=None)
|
| 132 |
parser.add_argument("--max-angle", type=float, default=22.0)
|
| 133 |
-
parser.add_argument("--eye-model", type=str, default=None)
|
| 134 |
-
parser.add_argument("--eye-backend", type=str, default="yolo", choices=["yolo", "geometric"])
|
| 135 |
-
parser.add_argument("--eye-blend", type=float, default=0.5)
|
| 136 |
parser.add_argument("--xgb-path", type=str, default=None)
|
| 137 |
parser.add_argument("--xgb", action="store_true", help="Start in XGBoost mode")
|
| 138 |
args = parser.parse_args()
|
|
@@ -148,9 +145,6 @@ def main():
|
|
| 148 |
# 1. Geometric
|
| 149 |
pipelines[MODE_GEO] = FaceMeshPipeline(
|
| 150 |
max_angle=args.max_angle,
|
| 151 |
-
eye_model_path=args.eye_model,
|
| 152 |
-
eye_backend=args.eye_backend,
|
| 153 |
-
eye_blend=args.eye_blend,
|
| 154 |
detector=detector,
|
| 155 |
)
|
| 156 |
available_modes.append(MODE_GEO)
|
|
@@ -174,9 +168,6 @@ def main():
|
|
| 174 |
try:
|
| 175 |
pipelines[MODE_HYBRID] = HybridFocusPipeline(
|
| 176 |
model_dir=model_dir,
|
| 177 |
-
eye_model_path=args.eye_model,
|
| 178 |
-
eye_backend=args.eye_backend,
|
| 179 |
-
eye_blend=args.eye_blend,
|
| 180 |
max_angle=args.max_angle,
|
| 181 |
detector=detector,
|
| 182 |
)
|
|
@@ -235,11 +226,6 @@ def main():
|
|
| 235 |
|
| 236 |
if hasattr(pipeline, "head_pose"):
|
| 237 |
pipeline.head_pose.draw_axes(frame, lm)
|
| 238 |
-
if result.get("left_bbox") and result.get("right_bbox"):
|
| 239 |
-
lx1, ly1, lx2, ly2 = result["left_bbox"]
|
| 240 |
-
rx1, ry1, rx2, ry2 = result["right_bbox"]
|
| 241 |
-
cv2.rectangle(frame, (lx1, ly1), (lx2, ly2), YELLOW, 1)
|
| 242 |
-
cv2.rectangle(frame, (rx1, ry1), (rx2, ry2), YELLOW, 1)
|
| 243 |
|
| 244 |
# --- HUD ---
|
| 245 |
status = "FOCUSED" if result["is_focused"] else "NOT FOCUSED"
|
|
|
|
| 130 |
parser.add_argument("--camera", type=int, default=0)
|
| 131 |
parser.add_argument("--mlp-dir", type=str, default=None)
|
| 132 |
parser.add_argument("--max-angle", type=float, default=22.0)
|
|
|
|
|
|
|
|
|
|
| 133 |
parser.add_argument("--xgb-path", type=str, default=None)
|
| 134 |
parser.add_argument("--xgb", action="store_true", help="Start in XGBoost mode")
|
| 135 |
args = parser.parse_args()
|
|
|
|
| 145 |
# 1. Geometric
|
| 146 |
pipelines[MODE_GEO] = FaceMeshPipeline(
|
| 147 |
max_angle=args.max_angle,
|
|
|
|
|
|
|
|
|
|
| 148 |
detector=detector,
|
| 149 |
)
|
| 150 |
available_modes.append(MODE_GEO)
|
|
|
|
| 168 |
try:
|
| 169 |
pipelines[MODE_HYBRID] = HybridFocusPipeline(
|
| 170 |
model_dir=model_dir,
|
|
|
|
|
|
|
|
|
|
| 171 |
max_angle=args.max_angle,
|
| 172 |
detector=detector,
|
| 173 |
)
|
|
|
|
| 226 |
|
| 227 |
if hasattr(pipeline, "head_pose"):
|
| 228 |
pipeline.head_pose.draw_axes(frame, lm)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
|
| 230 |
# --- HUD ---
|
| 231 |
status = "FOCUSED" if result["is_focused"] else "NOT FOCUSED"
|
ui/pipeline.py
CHANGED
|
@@ -12,18 +12,19 @@ _PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
| 12 |
if _PROJECT_ROOT not in sys.path:
|
| 13 |
sys.path.insert(0, _PROJECT_ROOT)
|
| 14 |
|
|
|
|
| 15 |
from models.face_mesh import FaceMeshDetector
|
| 16 |
from models.head_pose import HeadPoseEstimator
|
| 17 |
from models.eye_scorer import EyeBehaviourScorer, compute_mar, MAR_YAWN_THRESHOLD
|
| 18 |
-
from models.eye_crop import extract_eye_crops
|
| 19 |
-
from models.eye_classifier import load_eye_classifier, GeometricOnlyClassifier
|
| 20 |
from models.collect_features import FEATURE_NAMES, TemporalTracker, extract_features
|
| 21 |
|
|
|
|
|
|
|
|
|
|
| 22 |
_FEAT_IDX = {name: i for i, name in enumerate(FEATURE_NAMES)}
|
| 23 |
|
| 24 |
|
| 25 |
def _clip_features(vec):
|
| 26 |
-
"""Clip raw features to the same ranges used during training."""
|
| 27 |
out = vec.copy()
|
| 28 |
_i = _FEAT_IDX
|
| 29 |
|
|
@@ -49,8 +50,6 @@ def _clip_features(vec):
|
|
| 49 |
|
| 50 |
|
| 51 |
class _OutputSmoother:
|
| 52 |
-
"""EMA smoothing on focus score with no-face grace period."""
|
| 53 |
-
|
| 54 |
def __init__(self, alpha: float = 0.3, grace_frames: int = 15):
|
| 55 |
self._alpha = alpha
|
| 56 |
self._grace = grace_frames
|
|
@@ -73,19 +72,17 @@ class _OutputSmoother:
|
|
| 73 |
|
| 74 |
|
| 75 |
DEFAULT_HYBRID_CONFIG = {
|
| 76 |
-
"w_mlp": 0.
|
| 77 |
-
"w_geo": 0.
|
| 78 |
-
"threshold": 0.
|
| 79 |
"use_yawn_veto": True,
|
| 80 |
-
"geo_face_weight": 0.
|
| 81 |
-
"geo_eye_weight": 0.
|
| 82 |
"mar_yawn_threshold": float(MAR_YAWN_THRESHOLD),
|
| 83 |
}
|
| 84 |
|
| 85 |
|
| 86 |
class _RuntimeFeatureEngine:
|
| 87 |
-
"""Runtime feature engineering (magnitudes, velocities, variances) with EMA baselines."""
|
| 88 |
-
|
| 89 |
_MAG_FEATURES = ["pitch", "yaw", "head_deviation", "gaze_offset", "v_gaze", "h_gaze"]
|
| 90 |
_VEL_FEATURES = ["pitch", "yaw", "h_gaze", "v_gaze", "head_deviation", "gaze_offset"]
|
| 91 |
_VAR_FEATURES = ["h_gaze", "v_gaze", "pitch"]
|
|
@@ -171,12 +168,9 @@ class FaceMeshPipeline:
|
|
| 171 |
def __init__(
|
| 172 |
self,
|
| 173 |
max_angle: float = 22.0,
|
| 174 |
-
alpha: float = 0.
|
| 175 |
-
beta: float = 0.
|
| 176 |
threshold: float = 0.55,
|
| 177 |
-
eye_model_path: str | None = None,
|
| 178 |
-
eye_backend: str = "yolo",
|
| 179 |
-
eye_blend: float = 0.5,
|
| 180 |
detector=None,
|
| 181 |
):
|
| 182 |
self.detector = detector or FaceMeshDetector()
|
|
@@ -186,16 +180,6 @@ class FaceMeshPipeline:
|
|
| 186 |
self.alpha = alpha
|
| 187 |
self.beta = beta
|
| 188 |
self.threshold = threshold
|
| 189 |
-
self.eye_blend = eye_blend
|
| 190 |
-
|
| 191 |
-
self.eye_classifier = load_eye_classifier(
|
| 192 |
-
path=eye_model_path if eye_model_path and os.path.exists(eye_model_path) else None,
|
| 193 |
-
backend=eye_backend,
|
| 194 |
-
device="cpu",
|
| 195 |
-
)
|
| 196 |
-
self._has_eye_model = not isinstance(self.eye_classifier, GeometricOnlyClassifier)
|
| 197 |
-
if self._has_eye_model:
|
| 198 |
-
print(f"[PIPELINE] Eye model: {self.eye_classifier.name}")
|
| 199 |
self._smoother = _OutputSmoother()
|
| 200 |
|
| 201 |
def process_frame(self, bgr_frame: np.ndarray) -> dict:
|
|
@@ -227,17 +211,7 @@ class FaceMeshPipeline:
|
|
| 227 |
if angles is not None:
|
| 228 |
out["yaw"], out["pitch"], out["roll"] = angles
|
| 229 |
out["s_face"] = self.head_pose.score(landmarks, w, h)
|
| 230 |
-
|
| 231 |
-
s_eye_geo = self.eye_scorer.score(landmarks)
|
| 232 |
-
if self._has_eye_model:
|
| 233 |
-
left_crop, right_crop, left_bbox, right_bbox = extract_eye_crops(bgr_frame, landmarks)
|
| 234 |
-
out["left_bbox"] = left_bbox
|
| 235 |
-
out["right_bbox"] = right_bbox
|
| 236 |
-
s_eye_model = self.eye_classifier.predict_score([left_crop, right_crop])
|
| 237 |
-
out["s_eye"] = (1.0 - self.eye_blend) * s_eye_geo + self.eye_blend * s_eye_model
|
| 238 |
-
else:
|
| 239 |
-
out["s_eye"] = s_eye_geo
|
| 240 |
-
|
| 241 |
out["mar"] = compute_mar(landmarks)
|
| 242 |
out["is_yawning"] = out["mar"] > MAR_YAWN_THRESHOLD
|
| 243 |
|
|
@@ -249,10 +223,6 @@ class FaceMeshPipeline:
|
|
| 249 |
|
| 250 |
return out
|
| 251 |
|
| 252 |
-
@property
|
| 253 |
-
def has_eye_model(self) -> bool:
|
| 254 |
-
return self._has_eye_model
|
| 255 |
-
|
| 256 |
def reset_session(self):
|
| 257 |
self._smoother.reset()
|
| 258 |
|
|
@@ -318,7 +288,7 @@ def _load_hybrid_config(model_dir: str, config_path: str | None = None):
|
|
| 318 |
|
| 319 |
|
| 320 |
class MLPPipeline:
|
| 321 |
-
def __init__(self, model_dir=None, detector=None, threshold=0.
|
| 322 |
if model_dir is None:
|
| 323 |
# Check primary location
|
| 324 |
model_dir = os.path.join(_PROJECT_ROOT, "MLP", "models")
|
|
@@ -332,11 +302,7 @@ class MLPPipeline:
|
|
| 332 |
self._scaler = joblib.load(scaler_path)
|
| 333 |
meta = np.load(meta_path, allow_pickle=True)
|
| 334 |
self._feature_names = list(meta["feature_names"])
|
| 335 |
-
|
| 336 |
-
norm_feats = list(meta["norm_features"]) if "norm_features" in meta else []
|
| 337 |
-
self._engine = _RuntimeFeatureEngine(FEATURE_NAMES, norm_features=norm_feats)
|
| 338 |
-
ext_names = self._engine.extended_names
|
| 339 |
-
self._indices = [ext_names.index(n) for n in self._feature_names]
|
| 340 |
|
| 341 |
self._detector = detector or FaceMeshDetector()
|
| 342 |
self._owns_detector = detector is None
|
|
@@ -378,8 +344,7 @@ class MLPPipeline:
|
|
| 378 |
out["s_eye"] = float(vec[_FEAT_IDX["s_eye"]])
|
| 379 |
out["mar"] = float(vec[_FEAT_IDX["mar"]])
|
| 380 |
|
| 381 |
-
|
| 382 |
-
X = ext_vec[self._indices].reshape(1, -1).astype(np.float64)
|
| 383 |
X_sc = self._scaler.transform(X)
|
| 384 |
if hasattr(self._mlp, "predict_proba"):
|
| 385 |
mlp_prob = float(self._mlp.predict_proba(X_sc)[0, 1])
|
|
@@ -410,9 +375,6 @@ class HybridFocusPipeline:
|
|
| 410 |
self,
|
| 411 |
model_dir=None,
|
| 412 |
config_path: str | None = None,
|
| 413 |
-
eye_model_path: str | None = None,
|
| 414 |
-
eye_backend: str = "yolo",
|
| 415 |
-
eye_blend: float = 0.5,
|
| 416 |
max_angle: float = 22.0,
|
| 417 |
detector=None,
|
| 418 |
):
|
|
@@ -426,11 +388,7 @@ class HybridFocusPipeline:
|
|
| 426 |
self._scaler = joblib.load(scaler_path)
|
| 427 |
meta = np.load(meta_path, allow_pickle=True)
|
| 428 |
self._feature_names = list(meta["feature_names"])
|
| 429 |
-
|
| 430 |
-
norm_feats = list(meta["norm_features"]) if "norm_features" in meta else []
|
| 431 |
-
self._engine = _RuntimeFeatureEngine(FEATURE_NAMES, norm_features=norm_feats)
|
| 432 |
-
ext_names = self._engine.extended_names
|
| 433 |
-
self._indices = [ext_names.index(n) for n in self._feature_names]
|
| 434 |
|
| 435 |
self._cfg, self._cfg_path = _load_hybrid_config(model_dir=model_dir, config_path=config_path)
|
| 436 |
|
|
@@ -439,16 +397,6 @@ class HybridFocusPipeline:
|
|
| 439 |
self._head_pose = HeadPoseEstimator(max_angle=max_angle)
|
| 440 |
self._eye_scorer = EyeBehaviourScorer()
|
| 441 |
self._temporal = TemporalTracker()
|
| 442 |
-
self._eye_blend = eye_blend
|
| 443 |
-
self.eye_classifier = load_eye_classifier(
|
| 444 |
-
path=eye_model_path if eye_model_path and os.path.exists(eye_model_path) else None,
|
| 445 |
-
backend=eye_backend,
|
| 446 |
-
device="cpu",
|
| 447 |
-
)
|
| 448 |
-
self._has_eye_model = not isinstance(self.eye_classifier, GeometricOnlyClassifier)
|
| 449 |
-
if self._has_eye_model:
|
| 450 |
-
print(f"[HYBRID] Eye model: {self.eye_classifier.name}")
|
| 451 |
-
|
| 452 |
self.head_pose = self._head_pose
|
| 453 |
self._smoother = _OutputSmoother()
|
| 454 |
|
|
@@ -458,10 +406,6 @@ class HybridFocusPipeline:
|
|
| 458 |
f"threshold={self._cfg['threshold']:.2f}"
|
| 459 |
)
|
| 460 |
|
| 461 |
-
@property
|
| 462 |
-
def has_eye_model(self) -> bool:
|
| 463 |
-
return self._has_eye_model
|
| 464 |
-
|
| 465 |
@property
|
| 466 |
def config(self) -> dict:
|
| 467 |
return dict(self._cfg)
|
|
@@ -498,15 +442,8 @@ class HybridFocusPipeline:
|
|
| 498 |
out["yaw"], out["pitch"], out["roll"] = angles
|
| 499 |
|
| 500 |
out["s_face"] = self._head_pose.score(landmarks, w, h)
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
left_crop, right_crop, left_bbox, right_bbox = extract_eye_crops(bgr_frame, landmarks)
|
| 504 |
-
out["left_bbox"] = left_bbox
|
| 505 |
-
out["right_bbox"] = right_bbox
|
| 506 |
-
s_eye_model = self.eye_classifier.predict_score([left_crop, right_crop])
|
| 507 |
-
out["s_eye"] = (1.0 - self._eye_blend) * s_eye_geo + self._eye_blend * s_eye_model
|
| 508 |
-
else:
|
| 509 |
-
out["s_eye"] = s_eye_geo
|
| 510 |
|
| 511 |
geo_score = (
|
| 512 |
self._cfg["geo_face_weight"] * out["s_face"] +
|
|
@@ -528,8 +465,7 @@ class HybridFocusPipeline:
|
|
| 528 |
}
|
| 529 |
vec = extract_features(landmarks, w, h, self._head_pose, self._eye_scorer, self._temporal, _pre=pre)
|
| 530 |
vec = _clip_features(vec)
|
| 531 |
-
|
| 532 |
-
X = ext_vec[self._indices].reshape(1, -1).astype(np.float64)
|
| 533 |
X_sc = self._scaler.transform(X)
|
| 534 |
if hasattr(self._mlp, "predict_proba"):
|
| 535 |
mlp_prob = float(self._mlp.predict_proba(X_sc)[0, 1])
|
|
@@ -559,15 +495,12 @@ class HybridFocusPipeline:
|
|
| 559 |
|
| 560 |
|
| 561 |
class XGBoostPipeline:
|
| 562 |
-
"""Real-time XGBoost inference pipeline using the same feature extraction as MLPPipeline."""
|
| 563 |
-
|
| 564 |
-
# Same 10 features used during training (data_preparation.prepare_dataset.SELECTED_FEATURES)
|
| 565 |
SELECTED = [
|
| 566 |
'head_deviation', 's_face', 's_eye', 'h_gaze', 'pitch',
|
| 567 |
'ear_left', 'ear_avg', 'ear_right', 'gaze_offset', 'perclos',
|
| 568 |
]
|
| 569 |
|
| 570 |
-
def __init__(self, model_path=None, threshold=0.
|
| 571 |
from xgboost import XGBClassifier
|
| 572 |
|
| 573 |
if model_path is None:
|
|
|
|
| 12 |
if _PROJECT_ROOT not in sys.path:
|
| 13 |
sys.path.insert(0, _PROJECT_ROOT)
|
| 14 |
|
| 15 |
+
from data_preparation.prepare_dataset import SELECTED_FEATURES
|
| 16 |
from models.face_mesh import FaceMeshDetector
|
| 17 |
from models.head_pose import HeadPoseEstimator
|
| 18 |
from models.eye_scorer import EyeBehaviourScorer, compute_mar, MAR_YAWN_THRESHOLD
|
|
|
|
|
|
|
| 19 |
from models.collect_features import FEATURE_NAMES, TemporalTracker, extract_features
|
| 20 |
|
| 21 |
+
# Same 10 features used for MLP training (prepare_dataset) and inference
|
| 22 |
+
MLP_FEATURE_NAMES = SELECTED_FEATURES["face_orientation"]
|
| 23 |
+
|
| 24 |
_FEAT_IDX = {name: i for i, name in enumerate(FEATURE_NAMES)}
|
| 25 |
|
| 26 |
|
| 27 |
def _clip_features(vec):
|
|
|
|
| 28 |
out = vec.copy()
|
| 29 |
_i = _FEAT_IDX
|
| 30 |
|
|
|
|
| 50 |
|
| 51 |
|
| 52 |
class _OutputSmoother:
|
|
|
|
|
|
|
| 53 |
def __init__(self, alpha: float = 0.3, grace_frames: int = 15):
|
| 54 |
self._alpha = alpha
|
| 55 |
self._grace = grace_frames
|
|
|
|
| 72 |
|
| 73 |
|
| 74 |
DEFAULT_HYBRID_CONFIG = {
|
| 75 |
+
"w_mlp": 0.3,
|
| 76 |
+
"w_geo": 0.7,
|
| 77 |
+
"threshold": 0.35,
|
| 78 |
"use_yawn_veto": True,
|
| 79 |
+
"geo_face_weight": 0.7,
|
| 80 |
+
"geo_eye_weight": 0.3,
|
| 81 |
"mar_yawn_threshold": float(MAR_YAWN_THRESHOLD),
|
| 82 |
}
|
| 83 |
|
| 84 |
|
| 85 |
class _RuntimeFeatureEngine:
|
|
|
|
|
|
|
| 86 |
_MAG_FEATURES = ["pitch", "yaw", "head_deviation", "gaze_offset", "v_gaze", "h_gaze"]
|
| 87 |
_VEL_FEATURES = ["pitch", "yaw", "h_gaze", "v_gaze", "head_deviation", "gaze_offset"]
|
| 88 |
_VAR_FEATURES = ["h_gaze", "v_gaze", "pitch"]
|
|
|
|
| 168 |
def __init__(
|
| 169 |
self,
|
| 170 |
max_angle: float = 22.0,
|
| 171 |
+
alpha: float = 0.7,
|
| 172 |
+
beta: float = 0.3,
|
| 173 |
threshold: float = 0.55,
|
|
|
|
|
|
|
|
|
|
| 174 |
detector=None,
|
| 175 |
):
|
| 176 |
self.detector = detector or FaceMeshDetector()
|
|
|
|
| 180 |
self.alpha = alpha
|
| 181 |
self.beta = beta
|
| 182 |
self.threshold = threshold
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
self._smoother = _OutputSmoother()
|
| 184 |
|
| 185 |
def process_frame(self, bgr_frame: np.ndarray) -> dict:
|
|
|
|
| 211 |
if angles is not None:
|
| 212 |
out["yaw"], out["pitch"], out["roll"] = angles
|
| 213 |
out["s_face"] = self.head_pose.score(landmarks, w, h)
|
| 214 |
+
out["s_eye"] = self.eye_scorer.score(landmarks)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
out["mar"] = compute_mar(landmarks)
|
| 216 |
out["is_yawning"] = out["mar"] > MAR_YAWN_THRESHOLD
|
| 217 |
|
|
|
|
| 223 |
|
| 224 |
return out
|
| 225 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
def reset_session(self):
|
| 227 |
self._smoother.reset()
|
| 228 |
|
|
|
|
| 288 |
|
| 289 |
|
| 290 |
class MLPPipeline:
|
| 291 |
+
def __init__(self, model_dir=None, detector=None, threshold=0.23):
|
| 292 |
if model_dir is None:
|
| 293 |
# Check primary location
|
| 294 |
model_dir = os.path.join(_PROJECT_ROOT, "MLP", "models")
|
|
|
|
| 302 |
self._scaler = joblib.load(scaler_path)
|
| 303 |
meta = np.load(meta_path, allow_pickle=True)
|
| 304 |
self._feature_names = list(meta["feature_names"])
|
| 305 |
+
self._indices = [FEATURE_NAMES.index(n) for n in self._feature_names]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 306 |
|
| 307 |
self._detector = detector or FaceMeshDetector()
|
| 308 |
self._owns_detector = detector is None
|
|
|
|
| 344 |
out["s_eye"] = float(vec[_FEAT_IDX["s_eye"]])
|
| 345 |
out["mar"] = float(vec[_FEAT_IDX["mar"]])
|
| 346 |
|
| 347 |
+
X = vec[self._indices].reshape(1, -1).astype(np.float64)
|
|
|
|
| 348 |
X_sc = self._scaler.transform(X)
|
| 349 |
if hasattr(self._mlp, "predict_proba"):
|
| 350 |
mlp_prob = float(self._mlp.predict_proba(X_sc)[0, 1])
|
|
|
|
| 375 |
self,
|
| 376 |
model_dir=None,
|
| 377 |
config_path: str | None = None,
|
|
|
|
|
|
|
|
|
|
| 378 |
max_angle: float = 22.0,
|
| 379 |
detector=None,
|
| 380 |
):
|
|
|
|
| 388 |
self._scaler = joblib.load(scaler_path)
|
| 389 |
meta = np.load(meta_path, allow_pickle=True)
|
| 390 |
self._feature_names = list(meta["feature_names"])
|
| 391 |
+
self._indices = [FEATURE_NAMES.index(n) for n in self._feature_names]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 392 |
|
| 393 |
self._cfg, self._cfg_path = _load_hybrid_config(model_dir=model_dir, config_path=config_path)
|
| 394 |
|
|
|
|
| 397 |
self._head_pose = HeadPoseEstimator(max_angle=max_angle)
|
| 398 |
self._eye_scorer = EyeBehaviourScorer()
|
| 399 |
self._temporal = TemporalTracker()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 400 |
self.head_pose = self._head_pose
|
| 401 |
self._smoother = _OutputSmoother()
|
| 402 |
|
|
|
|
| 406 |
f"threshold={self._cfg['threshold']:.2f}"
|
| 407 |
)
|
| 408 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 409 |
@property
|
| 410 |
def config(self) -> dict:
|
| 411 |
return dict(self._cfg)
|
|
|
|
| 442 |
out["yaw"], out["pitch"], out["roll"] = angles
|
| 443 |
|
| 444 |
out["s_face"] = self._head_pose.score(landmarks, w, h)
|
| 445 |
+
out["s_eye"] = self._eye_scorer.score(landmarks)
|
| 446 |
+
s_eye_geo = out["s_eye"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 447 |
|
| 448 |
geo_score = (
|
| 449 |
self._cfg["geo_face_weight"] * out["s_face"] +
|
|
|
|
| 465 |
}
|
| 466 |
vec = extract_features(landmarks, w, h, self._head_pose, self._eye_scorer, self._temporal, _pre=pre)
|
| 467 |
vec = _clip_features(vec)
|
| 468 |
+
X = vec[self._indices].reshape(1, -1).astype(np.float64)
|
|
|
|
| 469 |
X_sc = self._scaler.transform(X)
|
| 470 |
if hasattr(self._mlp, "predict_proba"):
|
| 471 |
mlp_prob = float(self._mlp.predict_proba(X_sc)[0, 1])
|
|
|
|
| 495 |
|
| 496 |
|
| 497 |
class XGBoostPipeline:
|
|
|
|
|
|
|
|
|
|
| 498 |
SELECTED = [
|
| 499 |
'head_deviation', 's_face', 's_eye', 'h_gaze', 'pitch',
|
| 500 |
'ear_left', 'ear_avg', 'ear_right', 'gaze_offset', 'perclos',
|
| 501 |
]
|
| 502 |
|
| 503 |
+
def __init__(self, model_path=None, threshold=0.38):
|
| 504 |
from xgboost import XGBClassifier
|
| 505 |
|
| 506 |
if model_path is None:
|
yolov8n.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:31e20dde3def09e2cf938c7be6fe23d9150bbbe503982af13345706515f2ef95
|
| 3 |
-
size 6534387
|
|
|
|
|
|
|
|
|
|
|
|