Spaces:
Sleeping
Sleeping
Abdelrahman Almatrooshi commited on
Commit ·
82d2ab7
1
Parent(s): 2eaf50d
integrated open/closed eye cnn in script
Browse files- best_eye_cnn.pth +3 -0
- models/cnn/CNN_MODEL/.claude/settings.local.json +7 -7
- models/cnn/CNN_MODEL/.gitattributes +1 -1
- models/cnn/CNN_MODEL/.gitignore +4 -4
- models/cnn/CNN_MODEL/README.md +74 -74
- models/cnn/CNN_MODEL/notebooks/eye_classifier_colab.ipynb +0 -0
- models/cnn/CNN_MODEL/scripts/focus_infer.py +199 -199
- models/cnn/CNN_MODEL/scripts/predict_image.py +49 -49
- models/cnn/CNN_MODEL/scripts/video_infer.py +281 -281
- models/cnn/CNN_MODEL/scripts/webcam_live.py +184 -184
- models/cnn/eye_attention/__init__.py +1 -1
- models/cnn/eye_attention/classifier.py +106 -6
- models/cnn/eye_attention/crop.py +70 -70
- ui/README.md +20 -20
- ui/live_demo.py +224 -224
- ui/pipeline.py +3 -0
best_eye_cnn.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c3c3d85de013387e8583fe7218daabb83a8a6f46ca5bcacbf6fbf3619b688da8
|
| 3 |
+
size 2103809
|
models/cnn/CNN_MODEL/.claude/settings.local.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
-
{
|
| 2 |
-
"permissions": {
|
| 3 |
-
"allow": [
|
| 4 |
-
"Bash(# Check Dataset_subset counts echo \"\"=== Dataset_subset/train/open ===\"\" && ls /Users/mohammedalketbi22/Downloads/GAP_Large_project-feature-dataset-model-test-92_30-clean/Dataset_subset/train/open/ | wc -l && echo \"\"=== Dataset_subset/train/closed ===\"\" && ls /Users/mohammedalketbi22/Downloads/GAP_Large_project-feature-dataset-model-test-92_30-clean/Dataset_subset/train/closed/ | wc -l && echo \"\"=== Dataset_subset/val/open ===\"\" && ls /Users/mohammedalketbi22/Downloads/GAP_Large_project-feature-dataset-model-test-92_30-clean/Dataset_subset/val/open/ | wc -l && echo \"\"=== Dataset_subset/val/closed ===\"\" && ls /Users/mohammedalketbi22/Downloads/GAP_Large_project-feature-dataset-model-test-92_30-clean/Dataset_subset/val/closed/)"
|
| 5 |
-
]
|
| 6 |
-
}
|
| 7 |
-
}
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"permissions": {
|
| 3 |
+
"allow": [
|
| 4 |
+
"Bash(# Check Dataset_subset counts echo \"\"=== Dataset_subset/train/open ===\"\" && ls /Users/mohammedalketbi22/Downloads/GAP_Large_project-feature-dataset-model-test-92_30-clean/Dataset_subset/train/open/ | wc -l && echo \"\"=== Dataset_subset/train/closed ===\"\" && ls /Users/mohammedalketbi22/Downloads/GAP_Large_project-feature-dataset-model-test-92_30-clean/Dataset_subset/train/closed/ | wc -l && echo \"\"=== Dataset_subset/val/open ===\"\" && ls /Users/mohammedalketbi22/Downloads/GAP_Large_project-feature-dataset-model-test-92_30-clean/Dataset_subset/val/open/ | wc -l && echo \"\"=== Dataset_subset/val/closed ===\"\" && ls /Users/mohammedalketbi22/Downloads/GAP_Large_project-feature-dataset-model-test-92_30-clean/Dataset_subset/val/closed/)"
|
| 5 |
+
]
|
| 6 |
+
}
|
| 7 |
+
}
|
models/cnn/CNN_MODEL/.gitattributes
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
DATA/** filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 1 |
+
DATA/** filter=lfs diff=lfs merge=lfs -text
|
models/cnn/CNN_MODEL/.gitignore
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
Dataset/train/
|
| 2 |
-
Dataset/val/
|
| 3 |
-
Dataset/test/
|
| 4 |
-
.DS_Store
|
|
|
|
| 1 |
+
Dataset/train/
|
| 2 |
+
Dataset/val/
|
| 3 |
+
Dataset/test/
|
| 4 |
+
.DS_Store
|
models/cnn/CNN_MODEL/README.md
CHANGED
|
@@ -1,74 +1,74 @@
|
|
| 1 |
-
# Eye Open / Closed Classifier (YOLOv11-CLS)
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
Binary classifier: **open** vs **closed** eyes.
|
| 5 |
-
Used as a baseline for eye-tracking, drowsiness, or focus detection.
|
| 6 |
-
|
| 7 |
-
---
|
| 8 |
-
|
| 9 |
-
## Model team task
|
| 10 |
-
|
| 11 |
-
- **Train** the YOLOv11s-cls eye classifier in a **separate notebook** (data split, epochs, GPU, export `best.pt`).
|
| 12 |
-
- Provide **trained weights** (`best.pt`) for this repo’s evaluation and inference scripts.
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
---
|
| 17 |
-
|
| 18 |
-
## Repo contents
|
| 19 |
-
|
| 20 |
-
- **notebooks/eye_classifier_colab.ipynb** — Data download (Kaggle), clean, split, undersample, **evaluate** (needs `best.pt` from model team), export.
|
| 21 |
-
- **scripts/predict_image.py** — Run classifier on single images (needs `best.pt`).
|
| 22 |
-
- **scripts/webcam_live.py** — Live webcam open/closed (needs `best.pt` + optional `weights/face_landmarker.task`).
|
| 23 |
-
- **scripts/video_infer.py** — Run on video files.
|
| 24 |
-
- **scripts/focus_infer.py** — Focus/attention inference.
|
| 25 |
-
- **weights/** — Put `best.pt` here; `face_landmarker.task` is downloaded on first webcam run if missing.
|
| 26 |
-
- **docs/** — Extra docs (e.g. UNNECESSARY_FILES.md if present).
|
| 27 |
-
|
| 28 |
-
---
|
| 29 |
-
|
| 30 |
-
## Dataset
|
| 31 |
-
|
| 32 |
-
- **Source:** [Kaggle — open/closed eyes](https://www.kaggle.com/datasets/sehriyarmemmedli/open-closed-eyes-dataset)
|
| 33 |
-
- The Colab notebook downloads it via `kagglehub`; no local copy in repo.
|
| 34 |
-
|
| 35 |
-
---
|
| 36 |
-
|
| 37 |
-
## Weights
|
| 38 |
-
|
| 39 |
-
- Put **best.pt** from the model team in **weights/best.pt** (or `runs/classify/runs_cls/eye_open_closed_cpu/weights/best.pt`).
|
| 40 |
-
- For webcam: **face_landmarker.task** is downloaded into **weights/** on first run if missing.
|
| 41 |
-
|
| 42 |
-
---
|
| 43 |
-
|
| 44 |
-
## Local setup
|
| 45 |
-
|
| 46 |
-
```bash
|
| 47 |
-
pip install ultralytics opencv-python mediapipe "numpy<2"
|
| 48 |
-
```
|
| 49 |
-
|
| 50 |
-
Optional: use a venv. From repo root:
|
| 51 |
-
- `python scripts/predict_image.py <image.png>`
|
| 52 |
-
- `python scripts/webcam_live.py`
|
| 53 |
-
- `python scripts/video_infer.py` (expects 1.mp4 / 2.mp4 in repo root or set `VIDEOS` env)
|
| 54 |
-
- `python scripts/focus_infer.py`
|
| 55 |
-
|
| 56 |
-
---
|
| 57 |
-
|
| 58 |
-
## Project structure
|
| 59 |
-
|
| 60 |
-
```
|
| 61 |
-
├── notebooks/
|
| 62 |
-
│ └── eye_classifier_colab.ipynb # Data + eval (no training)
|
| 63 |
-
├── scripts/
|
| 64 |
-
│ ├── predict_image.py
|
| 65 |
-
│ ├── webcam_live.py
|
| 66 |
-
│ ├── video_infer.py
|
| 67 |
-
│ └── focus_infer.py
|
| 68 |
-
├── weights/ # best.pt, face_landmarker.task
|
| 69 |
-
├── docs/ # extra docs
|
| 70 |
-
├── README.md
|
| 71 |
-
└── venv/ # optional
|
| 72 |
-
```
|
| 73 |
-
|
| 74 |
-
Training and weight generation: **model team, separate notebook.**
|
|
|
|
| 1 |
+
# Eye Open / Closed Classifier (YOLOv11-CLS)
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
Binary classifier: **open** vs **closed** eyes.
|
| 5 |
+
Used as a baseline for eye-tracking, drowsiness, or focus detection.
|
| 6 |
+
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
## Model team task
|
| 10 |
+
|
| 11 |
+
- **Train** the YOLOv11s-cls eye classifier in a **separate notebook** (data split, epochs, GPU, export `best.pt`).
|
| 12 |
+
- Provide **trained weights** (`best.pt`) for this repo’s evaluation and inference scripts.
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
---
|
| 17 |
+
|
| 18 |
+
## Repo contents
|
| 19 |
+
|
| 20 |
+
- **notebooks/eye_classifier_colab.ipynb** — Data download (Kaggle), clean, split, undersample, **evaluate** (needs `best.pt` from model team), export.
|
| 21 |
+
- **scripts/predict_image.py** — Run classifier on single images (needs `best.pt`).
|
| 22 |
+
- **scripts/webcam_live.py** — Live webcam open/closed (needs `best.pt` + optional `weights/face_landmarker.task`).
|
| 23 |
+
- **scripts/video_infer.py** — Run on video files.
|
| 24 |
+
- **scripts/focus_infer.py** — Focus/attention inference.
|
| 25 |
+
- **weights/** — Put `best.pt` here; `face_landmarker.task` is downloaded on first webcam run if missing.
|
| 26 |
+
- **docs/** — Extra docs (e.g. UNNECESSARY_FILES.md if present).
|
| 27 |
+
|
| 28 |
+
---
|
| 29 |
+
|
| 30 |
+
## Dataset
|
| 31 |
+
|
| 32 |
+
- **Source:** [Kaggle — open/closed eyes](https://www.kaggle.com/datasets/sehriyarmemmedli/open-closed-eyes-dataset)
|
| 33 |
+
- The Colab notebook downloads it via `kagglehub`; no local copy in repo.
|
| 34 |
+
|
| 35 |
+
---
|
| 36 |
+
|
| 37 |
+
## Weights
|
| 38 |
+
|
| 39 |
+
- Put **best.pt** from the model team in **weights/best.pt** (or `runs/classify/runs_cls/eye_open_closed_cpu/weights/best.pt`).
|
| 40 |
+
- For webcam: **face_landmarker.task** is downloaded into **weights/** on first run if missing.
|
| 41 |
+
|
| 42 |
+
---
|
| 43 |
+
|
| 44 |
+
## Local setup
|
| 45 |
+
|
| 46 |
+
```bash
|
| 47 |
+
pip install ultralytics opencv-python mediapipe "numpy<2"
|
| 48 |
+
```
|
| 49 |
+
|
| 50 |
+
Optional: use a venv. From repo root:
|
| 51 |
+
- `python scripts/predict_image.py <image.png>`
|
| 52 |
+
- `python scripts/webcam_live.py`
|
| 53 |
+
- `python scripts/video_infer.py` (expects 1.mp4 / 2.mp4 in repo root or set `VIDEOS` env)
|
| 54 |
+
- `python scripts/focus_infer.py`
|
| 55 |
+
|
| 56 |
+
---
|
| 57 |
+
|
| 58 |
+
## Project structure
|
| 59 |
+
|
| 60 |
+
```
|
| 61 |
+
├── notebooks/
|
| 62 |
+
│ └── eye_classifier_colab.ipynb # Data + eval (no training)
|
| 63 |
+
├── scripts/
|
| 64 |
+
│ ├── predict_image.py
|
| 65 |
+
│ ├── webcam_live.py
|
| 66 |
+
│ ├── video_infer.py
|
| 67 |
+
│ └── focus_infer.py
|
| 68 |
+
├── weights/ # best.pt, face_landmarker.task
|
| 69 |
+
├── docs/ # extra docs
|
| 70 |
+
├── README.md
|
| 71 |
+
└── venv/ # optional
|
| 72 |
+
```
|
| 73 |
+
|
| 74 |
+
Training and weight generation: **model team, separate notebook.**
|
models/cnn/CNN_MODEL/notebooks/eye_classifier_colab.ipynb
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
models/cnn/CNN_MODEL/scripts/focus_infer.py
CHANGED
|
@@ -1,199 +1,199 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
from pathlib import Path
|
| 4 |
-
import os
|
| 5 |
-
|
| 6 |
-
import cv2
|
| 7 |
-
import numpy as np
|
| 8 |
-
from ultralytics import YOLO
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
def list_images(folder: Path):
|
| 12 |
-
exts = {".png", ".jpg", ".jpeg", ".bmp", ".webp"}
|
| 13 |
-
return sorted([p for p in folder.iterdir() if p.suffix.lower() in exts])
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
def find_weights(project_root: Path) -> Path | None:
|
| 17 |
-
candidates = [
|
| 18 |
-
project_root / "weights" / "best.pt",
|
| 19 |
-
project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
|
| 20 |
-
project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
|
| 21 |
-
project_root / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
|
| 22 |
-
project_root / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
|
| 23 |
-
]
|
| 24 |
-
return next((p for p in candidates if p.is_file()), None)
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
def detect_eyelid_boundary(gray: np.ndarray) -> np.ndarray | None:
|
| 28 |
-
"""
|
| 29 |
-
Returns an ellipse fit to the largest contour near the eye boundary.
|
| 30 |
-
Output format: (center(x,y), (axis1, axis2), angle) or None.
|
| 31 |
-
"""
|
| 32 |
-
blur = cv2.GaussianBlur(gray, (5, 5), 0)
|
| 33 |
-
edges = cv2.Canny(blur, 40, 120)
|
| 34 |
-
edges = cv2.dilate(edges, np.ones((3, 3), np.uint8), iterations=1)
|
| 35 |
-
contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 36 |
-
if not contours:
|
| 37 |
-
return None
|
| 38 |
-
contours = sorted(contours, key=cv2.contourArea, reverse=True)
|
| 39 |
-
for c in contours:
|
| 40 |
-
if len(c) >= 5 and cv2.contourArea(c) > 50:
|
| 41 |
-
return cv2.fitEllipse(c)
|
| 42 |
-
return None
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
def detect_pupil_center(gray: np.ndarray) -> tuple[int, int] | None:
|
| 46 |
-
"""
|
| 47 |
-
More robust pupil detection:
|
| 48 |
-
- enhance contrast (CLAHE)
|
| 49 |
-
- find dark blobs
|
| 50 |
-
- score by circularity and proximity to center
|
| 51 |
-
"""
|
| 52 |
-
h, w = gray.shape
|
| 53 |
-
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
| 54 |
-
eq = clahe.apply(gray)
|
| 55 |
-
blur = cv2.GaussianBlur(eq, (7, 7), 0)
|
| 56 |
-
|
| 57 |
-
# Focus on the central region to avoid eyelashes/edges
|
| 58 |
-
cx, cy = w // 2, h // 2
|
| 59 |
-
rx, ry = int(w * 0.3), int(h * 0.3)
|
| 60 |
-
x0, x1 = max(cx - rx, 0), min(cx + rx, w)
|
| 61 |
-
y0, y1 = max(cy - ry, 0), min(cy + ry, h)
|
| 62 |
-
roi = blur[y0:y1, x0:x1]
|
| 63 |
-
|
| 64 |
-
# Inverted threshold to capture dark pupil
|
| 65 |
-
_, thresh = cv2.threshold(roi, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
|
| 66 |
-
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, np.ones((3, 3), np.uint8), iterations=2)
|
| 67 |
-
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8), iterations=1)
|
| 68 |
-
|
| 69 |
-
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 70 |
-
if not contours:
|
| 71 |
-
return None
|
| 72 |
-
|
| 73 |
-
best = None
|
| 74 |
-
best_score = -1.0
|
| 75 |
-
for c in contours:
|
| 76 |
-
area = cv2.contourArea(c)
|
| 77 |
-
if area < 15:
|
| 78 |
-
continue
|
| 79 |
-
perimeter = cv2.arcLength(c, True)
|
| 80 |
-
if perimeter <= 0:
|
| 81 |
-
continue
|
| 82 |
-
circularity = 4 * np.pi * (area / (perimeter * perimeter))
|
| 83 |
-
if circularity < 0.3:
|
| 84 |
-
continue
|
| 85 |
-
m = cv2.moments(c)
|
| 86 |
-
if m["m00"] == 0:
|
| 87 |
-
continue
|
| 88 |
-
px = int(m["m10"] / m["m00"]) + x0
|
| 89 |
-
py = int(m["m01"] / m["m00"]) + y0
|
| 90 |
-
|
| 91 |
-
# Score by circularity and distance to center
|
| 92 |
-
dist = np.hypot(px - cx, py - cy) / max(w, h)
|
| 93 |
-
score = circularity - dist
|
| 94 |
-
if score > best_score:
|
| 95 |
-
best_score = score
|
| 96 |
-
best = (px, py)
|
| 97 |
-
|
| 98 |
-
return best
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
def is_focused(pupil_center: tuple[int, int], img_shape: tuple[int, int]) -> bool:
|
| 102 |
-
"""
|
| 103 |
-
Decide focus based on pupil offset from image center.
|
| 104 |
-
"""
|
| 105 |
-
h, w = img_shape
|
| 106 |
-
cx, cy = w // 2, h // 2
|
| 107 |
-
px, py = pupil_center
|
| 108 |
-
dx = abs(px - cx) / max(w, 1)
|
| 109 |
-
dy = abs(py - cy) / max(h, 1)
|
| 110 |
-
return (dx < 0.12) and (dy < 0.12)
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
def annotate(img_bgr: np.ndarray, ellipse, pupil_center, focused: bool, cls_label: str, conf: float):
|
| 114 |
-
out = img_bgr.copy()
|
| 115 |
-
if ellipse is not None:
|
| 116 |
-
cv2.ellipse(out, ellipse, (0, 255, 255), 2)
|
| 117 |
-
if pupil_center is not None:
|
| 118 |
-
cv2.circle(out, pupil_center, 4, (0, 0, 255), -1)
|
| 119 |
-
label = f"{cls_label} ({conf:.2f}) | focused={int(focused)}"
|
| 120 |
-
cv2.putText(out, label, (8, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
|
| 121 |
-
return out
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
def main():
|
| 125 |
-
project_root = Path(__file__).resolve().parent.parent
|
| 126 |
-
data_dir = project_root / "Dataset"
|
| 127 |
-
alt_data_dir = project_root / "DATA"
|
| 128 |
-
out_dir = project_root / "runs_focus"
|
| 129 |
-
out_dir.mkdir(parents=True, exist_ok=True)
|
| 130 |
-
|
| 131 |
-
weights = find_weights(project_root)
|
| 132 |
-
if weights is None:
|
| 133 |
-
print("Weights not found. Train first.")
|
| 134 |
-
return
|
| 135 |
-
|
| 136 |
-
# Support both Dataset/test/{open,closed} and Dataset/{open,closed}
|
| 137 |
-
def resolve_test_dirs(root: Path):
|
| 138 |
-
test_open = root / "test" / "open"
|
| 139 |
-
test_closed = root / "test" / "closed"
|
| 140 |
-
if test_open.exists() and test_closed.exists():
|
| 141 |
-
return test_open, test_closed
|
| 142 |
-
test_open = root / "open"
|
| 143 |
-
test_closed = root / "closed"
|
| 144 |
-
if test_open.exists() and test_closed.exists():
|
| 145 |
-
return test_open, test_closed
|
| 146 |
-
alt_closed = root / "close"
|
| 147 |
-
if test_open.exists() and alt_closed.exists():
|
| 148 |
-
return test_open, alt_closed
|
| 149 |
-
return None, None
|
| 150 |
-
|
| 151 |
-
test_open, test_closed = resolve_test_dirs(data_dir)
|
| 152 |
-
if (test_open is None or test_closed is None) and alt_data_dir.exists():
|
| 153 |
-
test_open, test_closed = resolve_test_dirs(alt_data_dir)
|
| 154 |
-
|
| 155 |
-
if not test_open.exists() or not test_closed.exists():
|
| 156 |
-
print("Test folders missing. Expected:")
|
| 157 |
-
print(test_open)
|
| 158 |
-
print(test_closed)
|
| 159 |
-
return
|
| 160 |
-
|
| 161 |
-
test_files = list_images(test_open) + list_images(test_closed)
|
| 162 |
-
print("Total test images:", len(test_files))
|
| 163 |
-
max_images = int(os.getenv("MAX_IMAGES", "0"))
|
| 164 |
-
if max_images > 0:
|
| 165 |
-
test_files = test_files[:max_images]
|
| 166 |
-
print("Limiting to MAX_IMAGES:", max_images)
|
| 167 |
-
|
| 168 |
-
model = YOLO(str(weights))
|
| 169 |
-
results = model.predict(test_files, imgsz=224, device="cpu", verbose=False)
|
| 170 |
-
|
| 171 |
-
names = model.names
|
| 172 |
-
for r in results:
|
| 173 |
-
probs = r.probs
|
| 174 |
-
top_idx = int(probs.top1)
|
| 175 |
-
top_conf = float(probs.top1conf)
|
| 176 |
-
pred_label = names[top_idx]
|
| 177 |
-
|
| 178 |
-
img = cv2.imread(r.path)
|
| 179 |
-
if img is None:
|
| 180 |
-
continue
|
| 181 |
-
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
| 182 |
-
|
| 183 |
-
ellipse = detect_eyelid_boundary(gray)
|
| 184 |
-
pupil_center = detect_pupil_center(gray)
|
| 185 |
-
focused = False
|
| 186 |
-
if pred_label.lower() == "open" and pupil_center is not None:
|
| 187 |
-
focused = is_focused(pupil_center, gray.shape)
|
| 188 |
-
|
| 189 |
-
annotated = annotate(img, ellipse, pupil_center, focused, pred_label, top_conf)
|
| 190 |
-
out_path = out_dir / (Path(r.path).stem + "_annotated.jpg")
|
| 191 |
-
cv2.imwrite(str(out_path), annotated)
|
| 192 |
-
|
| 193 |
-
print(f"{Path(r.path).name}: pred={pred_label} conf={top_conf:.3f} focused={focused}")
|
| 194 |
-
|
| 195 |
-
print(f"\nAnnotated outputs saved to: {out_dir}")
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
if __name__ == "__main__":
|
| 199 |
-
main()
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
import cv2
|
| 7 |
+
import numpy as np
|
| 8 |
+
from ultralytics import YOLO
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def list_images(folder: Path):
|
| 12 |
+
exts = {".png", ".jpg", ".jpeg", ".bmp", ".webp"}
|
| 13 |
+
return sorted([p for p in folder.iterdir() if p.suffix.lower() in exts])
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def find_weights(project_root: Path) -> Path | None:
|
| 17 |
+
candidates = [
|
| 18 |
+
project_root / "weights" / "best.pt",
|
| 19 |
+
project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
|
| 20 |
+
project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
|
| 21 |
+
project_root / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
|
| 22 |
+
project_root / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
|
| 23 |
+
]
|
| 24 |
+
return next((p for p in candidates if p.is_file()), None)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def detect_eyelid_boundary(gray: np.ndarray) -> np.ndarray | None:
|
| 28 |
+
"""
|
| 29 |
+
Returns an ellipse fit to the largest contour near the eye boundary.
|
| 30 |
+
Output format: (center(x,y), (axis1, axis2), angle) or None.
|
| 31 |
+
"""
|
| 32 |
+
blur = cv2.GaussianBlur(gray, (5, 5), 0)
|
| 33 |
+
edges = cv2.Canny(blur, 40, 120)
|
| 34 |
+
edges = cv2.dilate(edges, np.ones((3, 3), np.uint8), iterations=1)
|
| 35 |
+
contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 36 |
+
if not contours:
|
| 37 |
+
return None
|
| 38 |
+
contours = sorted(contours, key=cv2.contourArea, reverse=True)
|
| 39 |
+
for c in contours:
|
| 40 |
+
if len(c) >= 5 and cv2.contourArea(c) > 50:
|
| 41 |
+
return cv2.fitEllipse(c)
|
| 42 |
+
return None
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def detect_pupil_center(gray: np.ndarray) -> tuple[int, int] | None:
|
| 46 |
+
"""
|
| 47 |
+
More robust pupil detection:
|
| 48 |
+
- enhance contrast (CLAHE)
|
| 49 |
+
- find dark blobs
|
| 50 |
+
- score by circularity and proximity to center
|
| 51 |
+
"""
|
| 52 |
+
h, w = gray.shape
|
| 53 |
+
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
| 54 |
+
eq = clahe.apply(gray)
|
| 55 |
+
blur = cv2.GaussianBlur(eq, (7, 7), 0)
|
| 56 |
+
|
| 57 |
+
# Focus on the central region to avoid eyelashes/edges
|
| 58 |
+
cx, cy = w // 2, h // 2
|
| 59 |
+
rx, ry = int(w * 0.3), int(h * 0.3)
|
| 60 |
+
x0, x1 = max(cx - rx, 0), min(cx + rx, w)
|
| 61 |
+
y0, y1 = max(cy - ry, 0), min(cy + ry, h)
|
| 62 |
+
roi = blur[y0:y1, x0:x1]
|
| 63 |
+
|
| 64 |
+
# Inverted threshold to capture dark pupil
|
| 65 |
+
_, thresh = cv2.threshold(roi, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
|
| 66 |
+
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, np.ones((3, 3), np.uint8), iterations=2)
|
| 67 |
+
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8), iterations=1)
|
| 68 |
+
|
| 69 |
+
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 70 |
+
if not contours:
|
| 71 |
+
return None
|
| 72 |
+
|
| 73 |
+
best = None
|
| 74 |
+
best_score = -1.0
|
| 75 |
+
for c in contours:
|
| 76 |
+
area = cv2.contourArea(c)
|
| 77 |
+
if area < 15:
|
| 78 |
+
continue
|
| 79 |
+
perimeter = cv2.arcLength(c, True)
|
| 80 |
+
if perimeter <= 0:
|
| 81 |
+
continue
|
| 82 |
+
circularity = 4 * np.pi * (area / (perimeter * perimeter))
|
| 83 |
+
if circularity < 0.3:
|
| 84 |
+
continue
|
| 85 |
+
m = cv2.moments(c)
|
| 86 |
+
if m["m00"] == 0:
|
| 87 |
+
continue
|
| 88 |
+
px = int(m["m10"] / m["m00"]) + x0
|
| 89 |
+
py = int(m["m01"] / m["m00"]) + y0
|
| 90 |
+
|
| 91 |
+
# Score by circularity and distance to center
|
| 92 |
+
dist = np.hypot(px - cx, py - cy) / max(w, h)
|
| 93 |
+
score = circularity - dist
|
| 94 |
+
if score > best_score:
|
| 95 |
+
best_score = score
|
| 96 |
+
best = (px, py)
|
| 97 |
+
|
| 98 |
+
return best
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def is_focused(pupil_center: tuple[int, int], img_shape: tuple[int, int]) -> bool:
|
| 102 |
+
"""
|
| 103 |
+
Decide focus based on pupil offset from image center.
|
| 104 |
+
"""
|
| 105 |
+
h, w = img_shape
|
| 106 |
+
cx, cy = w // 2, h // 2
|
| 107 |
+
px, py = pupil_center
|
| 108 |
+
dx = abs(px - cx) / max(w, 1)
|
| 109 |
+
dy = abs(py - cy) / max(h, 1)
|
| 110 |
+
return (dx < 0.12) and (dy < 0.12)
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def annotate(img_bgr: np.ndarray, ellipse, pupil_center, focused: bool, cls_label: str, conf: float):
|
| 114 |
+
out = img_bgr.copy()
|
| 115 |
+
if ellipse is not None:
|
| 116 |
+
cv2.ellipse(out, ellipse, (0, 255, 255), 2)
|
| 117 |
+
if pupil_center is not None:
|
| 118 |
+
cv2.circle(out, pupil_center, 4, (0, 0, 255), -1)
|
| 119 |
+
label = f"{cls_label} ({conf:.2f}) | focused={int(focused)}"
|
| 120 |
+
cv2.putText(out, label, (8, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
|
| 121 |
+
return out
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def main():
|
| 125 |
+
project_root = Path(__file__).resolve().parent.parent
|
| 126 |
+
data_dir = project_root / "Dataset"
|
| 127 |
+
alt_data_dir = project_root / "DATA"
|
| 128 |
+
out_dir = project_root / "runs_focus"
|
| 129 |
+
out_dir.mkdir(parents=True, exist_ok=True)
|
| 130 |
+
|
| 131 |
+
weights = find_weights(project_root)
|
| 132 |
+
if weights is None:
|
| 133 |
+
print("Weights not found. Train first.")
|
| 134 |
+
return
|
| 135 |
+
|
| 136 |
+
# Support both Dataset/test/{open,closed} and Dataset/{open,closed}
|
| 137 |
+
def resolve_test_dirs(root: Path):
|
| 138 |
+
test_open = root / "test" / "open"
|
| 139 |
+
test_closed = root / "test" / "closed"
|
| 140 |
+
if test_open.exists() and test_closed.exists():
|
| 141 |
+
return test_open, test_closed
|
| 142 |
+
test_open = root / "open"
|
| 143 |
+
test_closed = root / "closed"
|
| 144 |
+
if test_open.exists() and test_closed.exists():
|
| 145 |
+
return test_open, test_closed
|
| 146 |
+
alt_closed = root / "close"
|
| 147 |
+
if test_open.exists() and alt_closed.exists():
|
| 148 |
+
return test_open, alt_closed
|
| 149 |
+
return None, None
|
| 150 |
+
|
| 151 |
+
test_open, test_closed = resolve_test_dirs(data_dir)
|
| 152 |
+
if (test_open is None or test_closed is None) and alt_data_dir.exists():
|
| 153 |
+
test_open, test_closed = resolve_test_dirs(alt_data_dir)
|
| 154 |
+
|
| 155 |
+
if not test_open.exists() or not test_closed.exists():
|
| 156 |
+
print("Test folders missing. Expected:")
|
| 157 |
+
print(test_open)
|
| 158 |
+
print(test_closed)
|
| 159 |
+
return
|
| 160 |
+
|
| 161 |
+
test_files = list_images(test_open) + list_images(test_closed)
|
| 162 |
+
print("Total test images:", len(test_files))
|
| 163 |
+
max_images = int(os.getenv("MAX_IMAGES", "0"))
|
| 164 |
+
if max_images > 0:
|
| 165 |
+
test_files = test_files[:max_images]
|
| 166 |
+
print("Limiting to MAX_IMAGES:", max_images)
|
| 167 |
+
|
| 168 |
+
model = YOLO(str(weights))
|
| 169 |
+
results = model.predict(test_files, imgsz=224, device="cpu", verbose=False)
|
| 170 |
+
|
| 171 |
+
names = model.names
|
| 172 |
+
for r in results:
|
| 173 |
+
probs = r.probs
|
| 174 |
+
top_idx = int(probs.top1)
|
| 175 |
+
top_conf = float(probs.top1conf)
|
| 176 |
+
pred_label = names[top_idx]
|
| 177 |
+
|
| 178 |
+
img = cv2.imread(r.path)
|
| 179 |
+
if img is None:
|
| 180 |
+
continue
|
| 181 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
| 182 |
+
|
| 183 |
+
ellipse = detect_eyelid_boundary(gray)
|
| 184 |
+
pupil_center = detect_pupil_center(gray)
|
| 185 |
+
focused = False
|
| 186 |
+
if pred_label.lower() == "open" and pupil_center is not None:
|
| 187 |
+
focused = is_focused(pupil_center, gray.shape)
|
| 188 |
+
|
| 189 |
+
annotated = annotate(img, ellipse, pupil_center, focused, pred_label, top_conf)
|
| 190 |
+
out_path = out_dir / (Path(r.path).stem + "_annotated.jpg")
|
| 191 |
+
cv2.imwrite(str(out_path), annotated)
|
| 192 |
+
|
| 193 |
+
print(f"{Path(r.path).name}: pred={pred_label} conf={top_conf:.3f} focused={focused}")
|
| 194 |
+
|
| 195 |
+
print(f"\nAnnotated outputs saved to: {out_dir}")
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
if __name__ == "__main__":
|
| 199 |
+
main()
|
models/cnn/CNN_MODEL/scripts/predict_image.py
CHANGED
|
@@ -1,49 +1,49 @@
|
|
| 1 |
-
"""Run the eye open/closed model on one or more images."""
|
| 2 |
-
import sys
|
| 3 |
-
from pathlib import Path
|
| 4 |
-
|
| 5 |
-
from ultralytics import YOLO
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
def main():
|
| 9 |
-
project_root = Path(__file__).resolve().parent.parent
|
| 10 |
-
weight_candidates = [
|
| 11 |
-
project_root / "weights" / "best.pt",
|
| 12 |
-
project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
|
| 13 |
-
project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
|
| 14 |
-
]
|
| 15 |
-
weights = next((p for p in weight_candidates if p.is_file()), None)
|
| 16 |
-
if weights is None:
|
| 17 |
-
print("Weights not found. Put best.pt in weights/ or runs/.../weights/ (from model team).")
|
| 18 |
-
sys.exit(1)
|
| 19 |
-
|
| 20 |
-
if len(sys.argv) < 2:
|
| 21 |
-
print("Usage: python scripts/predict_image.py <image1> [image2 ...]")
|
| 22 |
-
print("Example: python scripts/predict_image.py path/to/image.png")
|
| 23 |
-
sys.exit(0)
|
| 24 |
-
|
| 25 |
-
model = YOLO(str(weights))
|
| 26 |
-
names = model.names
|
| 27 |
-
|
| 28 |
-
for path in sys.argv[1:]:
|
| 29 |
-
p = Path(path)
|
| 30 |
-
if not p.is_file():
|
| 31 |
-
print(p, "- file not found")
|
| 32 |
-
continue
|
| 33 |
-
try:
|
| 34 |
-
results = model.predict(str(p), imgsz=224, device="cpu", verbose=False)
|
| 35 |
-
except Exception as e:
|
| 36 |
-
print(p, "- error:", e)
|
| 37 |
-
continue
|
| 38 |
-
if not results:
|
| 39 |
-
print(p, "- no result")
|
| 40 |
-
continue
|
| 41 |
-
r = results[0]
|
| 42 |
-
top_idx = int(r.probs.top1)
|
| 43 |
-
conf = float(r.probs.top1conf)
|
| 44 |
-
label = names[top_idx]
|
| 45 |
-
print(f"{p.name}: {label} ({conf:.2%})")
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
if __name__ == "__main__":
|
| 49 |
-
main()
|
|
|
|
| 1 |
+
"""Run the eye open/closed model on one or more images."""
|
| 2 |
+
import sys
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
|
| 5 |
+
from ultralytics import YOLO
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def main():
|
| 9 |
+
project_root = Path(__file__).resolve().parent.parent
|
| 10 |
+
weight_candidates = [
|
| 11 |
+
project_root / "weights" / "best.pt",
|
| 12 |
+
project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
|
| 13 |
+
project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
|
| 14 |
+
]
|
| 15 |
+
weights = next((p for p in weight_candidates if p.is_file()), None)
|
| 16 |
+
if weights is None:
|
| 17 |
+
print("Weights not found. Put best.pt in weights/ or runs/.../weights/ (from model team).")
|
| 18 |
+
sys.exit(1)
|
| 19 |
+
|
| 20 |
+
if len(sys.argv) < 2:
|
| 21 |
+
print("Usage: python scripts/predict_image.py <image1> [image2 ...]")
|
| 22 |
+
print("Example: python scripts/predict_image.py path/to/image.png")
|
| 23 |
+
sys.exit(0)
|
| 24 |
+
|
| 25 |
+
model = YOLO(str(weights))
|
| 26 |
+
names = model.names
|
| 27 |
+
|
| 28 |
+
for path in sys.argv[1:]:
|
| 29 |
+
p = Path(path)
|
| 30 |
+
if not p.is_file():
|
| 31 |
+
print(p, "- file not found")
|
| 32 |
+
continue
|
| 33 |
+
try:
|
| 34 |
+
results = model.predict(str(p), imgsz=224, device="cpu", verbose=False)
|
| 35 |
+
except Exception as e:
|
| 36 |
+
print(p, "- error:", e)
|
| 37 |
+
continue
|
| 38 |
+
if not results:
|
| 39 |
+
print(p, "- no result")
|
| 40 |
+
continue
|
| 41 |
+
r = results[0]
|
| 42 |
+
top_idx = int(r.probs.top1)
|
| 43 |
+
conf = float(r.probs.top1conf)
|
| 44 |
+
label = names[top_idx]
|
| 45 |
+
print(f"{p.name}: {label} ({conf:.2%})")
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
if __name__ == "__main__":
|
| 49 |
+
main()
|
models/cnn/CNN_MODEL/scripts/video_infer.py
CHANGED
|
@@ -1,281 +1,281 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
import os
|
| 4 |
-
from pathlib import Path
|
| 5 |
-
|
| 6 |
-
import cv2
|
| 7 |
-
import numpy as np
|
| 8 |
-
from ultralytics import YOLO
|
| 9 |
-
|
| 10 |
-
try:
|
| 11 |
-
import mediapipe as mp
|
| 12 |
-
except Exception: # pragma: no cover
|
| 13 |
-
mp = None
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
def find_weights(project_root: Path) -> Path | None:
|
| 17 |
-
candidates = [
|
| 18 |
-
project_root / "weights" / "best.pt",
|
| 19 |
-
project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
|
| 20 |
-
project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
|
| 21 |
-
project_root / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
|
| 22 |
-
project_root / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
|
| 23 |
-
]
|
| 24 |
-
return next((p for p in candidates if p.is_file()), None)
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
def detect_pupil_center(gray: np.ndarray) -> tuple[int, int] | None:
|
| 28 |
-
h, w = gray.shape
|
| 29 |
-
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
| 30 |
-
eq = clahe.apply(gray)
|
| 31 |
-
blur = cv2.GaussianBlur(eq, (7, 7), 0)
|
| 32 |
-
|
| 33 |
-
cx, cy = w // 2, h // 2
|
| 34 |
-
rx, ry = int(w * 0.3), int(h * 0.3)
|
| 35 |
-
x0, x1 = max(cx - rx, 0), min(cx + rx, w)
|
| 36 |
-
y0, y1 = max(cy - ry, 0), min(cy + ry, h)
|
| 37 |
-
roi = blur[y0:y1, x0:x1]
|
| 38 |
-
|
| 39 |
-
_, thresh = cv2.threshold(roi, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
|
| 40 |
-
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, np.ones((3, 3), np.uint8), iterations=2)
|
| 41 |
-
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8), iterations=1)
|
| 42 |
-
|
| 43 |
-
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 44 |
-
if not contours:
|
| 45 |
-
return None
|
| 46 |
-
|
| 47 |
-
best = None
|
| 48 |
-
best_score = -1.0
|
| 49 |
-
for c in contours:
|
| 50 |
-
area = cv2.contourArea(c)
|
| 51 |
-
if area < 15:
|
| 52 |
-
continue
|
| 53 |
-
perimeter = cv2.arcLength(c, True)
|
| 54 |
-
if perimeter <= 0:
|
| 55 |
-
continue
|
| 56 |
-
circularity = 4 * np.pi * (area / (perimeter * perimeter))
|
| 57 |
-
if circularity < 0.3:
|
| 58 |
-
continue
|
| 59 |
-
m = cv2.moments(c)
|
| 60 |
-
if m["m00"] == 0:
|
| 61 |
-
continue
|
| 62 |
-
px = int(m["m10"] / m["m00"]) + x0
|
| 63 |
-
py = int(m["m01"] / m["m00"]) + y0
|
| 64 |
-
|
| 65 |
-
dist = np.hypot(px - cx, py - cy) / max(w, h)
|
| 66 |
-
score = circularity - dist
|
| 67 |
-
if score > best_score:
|
| 68 |
-
best_score = score
|
| 69 |
-
best = (px, py)
|
| 70 |
-
|
| 71 |
-
return best
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
def is_focused(pupil_center: tuple[int, int], img_shape: tuple[int, int]) -> bool:
|
| 75 |
-
h, w = img_shape
|
| 76 |
-
cx = w // 2
|
| 77 |
-
px, _ = pupil_center
|
| 78 |
-
dx = abs(px - cx) / max(w, 1)
|
| 79 |
-
return dx < 0.12
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
def classify_frame(model: YOLO, frame: np.ndarray) -> tuple[str, float]:
|
| 83 |
-
# Use classifier directly on frame (assumes frame is eye crop)
|
| 84 |
-
results = model.predict(frame, imgsz=224, device="cpu", verbose=False)
|
| 85 |
-
r = results[0]
|
| 86 |
-
probs = r.probs
|
| 87 |
-
top_idx = int(probs.top1)
|
| 88 |
-
top_conf = float(probs.top1conf)
|
| 89 |
-
pred_label = model.names[top_idx]
|
| 90 |
-
return pred_label, top_conf
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
def annotate_frame(frame: np.ndarray, label: str, focused: bool, conf: float, time_sec: float):
|
| 94 |
-
out = frame.copy()
|
| 95 |
-
text = f"{label} | focused={int(focused)} | conf={conf:.2f} | t={time_sec:.2f}s"
|
| 96 |
-
cv2.putText(out, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
|
| 97 |
-
return out
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
def write_segments(path: Path, segments: list[tuple[float, float, str]]):
|
| 101 |
-
with path.open("w") as f:
|
| 102 |
-
for start, end, label in segments:
|
| 103 |
-
f.write(f"{start:.2f},{end:.2f},{label}\n")
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
def process_video(video_path: Path, model: YOLO | None):
|
| 107 |
-
cap = cv2.VideoCapture(str(video_path))
|
| 108 |
-
if not cap.isOpened():
|
| 109 |
-
print(f"Failed to open {video_path}")
|
| 110 |
-
return
|
| 111 |
-
|
| 112 |
-
fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
|
| 113 |
-
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 114 |
-
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 115 |
-
|
| 116 |
-
out_path = video_path.with_name(video_path.stem + "_pred.mp4")
|
| 117 |
-
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
|
| 118 |
-
writer = cv2.VideoWriter(str(out_path), fourcc, fps, (width, height))
|
| 119 |
-
|
| 120 |
-
csv_path = video_path.with_name(video_path.stem + "_predictions.csv")
|
| 121 |
-
seg_path = video_path.with_name(video_path.stem + "_segments.txt")
|
| 122 |
-
|
| 123 |
-
frame_idx = 0
|
| 124 |
-
last_label = None
|
| 125 |
-
seg_start = 0.0
|
| 126 |
-
segments: list[tuple[float, float, str]] = []
|
| 127 |
-
|
| 128 |
-
with csv_path.open("w") as fcsv:
|
| 129 |
-
fcsv.write("time_sec,label,focused,conf\n")
|
| 130 |
-
if mp is None:
|
| 131 |
-
print("mediapipe is not installed. Falling back to classifier-only mode.")
|
| 132 |
-
use_mp = mp is not None
|
| 133 |
-
if use_mp:
|
| 134 |
-
mp_face_mesh = mp.solutions.face_mesh
|
| 135 |
-
face_mesh = mp_face_mesh.FaceMesh(
|
| 136 |
-
static_image_mode=False,
|
| 137 |
-
max_num_faces=1,
|
| 138 |
-
refine_landmarks=True,
|
| 139 |
-
min_detection_confidence=0.5,
|
| 140 |
-
min_tracking_confidence=0.5,
|
| 141 |
-
)
|
| 142 |
-
|
| 143 |
-
while True:
|
| 144 |
-
ret, frame = cap.read()
|
| 145 |
-
if not ret:
|
| 146 |
-
break
|
| 147 |
-
time_sec = frame_idx / fps
|
| 148 |
-
conf = 0.0
|
| 149 |
-
pred_label = "open"
|
| 150 |
-
focused = False
|
| 151 |
-
|
| 152 |
-
if use_mp:
|
| 153 |
-
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 154 |
-
res = face_mesh.process(rgb)
|
| 155 |
-
if res.multi_face_landmarks:
|
| 156 |
-
lm = res.multi_face_landmarks[0].landmark
|
| 157 |
-
h, w = frame.shape[:2]
|
| 158 |
-
|
| 159 |
-
# Eye landmarks (MediaPipe FaceMesh)
|
| 160 |
-
left_eye = [33, 160, 158, 133, 153, 144]
|
| 161 |
-
right_eye = [362, 385, 387, 263, 373, 380]
|
| 162 |
-
left_iris = [468, 469, 470, 471]
|
| 163 |
-
right_iris = [473, 474, 475, 476]
|
| 164 |
-
|
| 165 |
-
def pts(idxs):
|
| 166 |
-
return np.array([(int(lm[i].x * w), int(lm[i].y * h)) for i in idxs])
|
| 167 |
-
|
| 168 |
-
def ear(eye_pts):
|
| 169 |
-
# EAR using 6 points
|
| 170 |
-
p1, p2, p3, p4, p5, p6 = eye_pts
|
| 171 |
-
v1 = np.linalg.norm(p2 - p6)
|
| 172 |
-
v2 = np.linalg.norm(p3 - p5)
|
| 173 |
-
h1 = np.linalg.norm(p1 - p4)
|
| 174 |
-
return (v1 + v2) / (2.0 * h1 + 1e-6)
|
| 175 |
-
|
| 176 |
-
le = pts(left_eye)
|
| 177 |
-
re = pts(right_eye)
|
| 178 |
-
le_ear = ear(le)
|
| 179 |
-
re_ear = ear(re)
|
| 180 |
-
ear_avg = (le_ear + re_ear) / 2.0
|
| 181 |
-
|
| 182 |
-
# openness threshold
|
| 183 |
-
pred_label = "open" if ear_avg > 0.22 else "closed"
|
| 184 |
-
|
| 185 |
-
# iris centers
|
| 186 |
-
li = pts(left_iris)
|
| 187 |
-
ri = pts(right_iris)
|
| 188 |
-
li_c = li.mean(axis=0).astype(int)
|
| 189 |
-
ri_c = ri.mean(axis=0).astype(int)
|
| 190 |
-
|
| 191 |
-
# eye centers (midpoint of corners)
|
| 192 |
-
le_c = ((le[0] + le[3]) / 2).astype(int)
|
| 193 |
-
re_c = ((re[0] + re[3]) / 2).astype(int)
|
| 194 |
-
|
| 195 |
-
# focus = iris close to eye center horizontally for both eyes
|
| 196 |
-
le_dx = abs(li_c[0] - le_c[0]) / max(np.linalg.norm(le[0] - le[3]), 1)
|
| 197 |
-
re_dx = abs(ri_c[0] - re_c[0]) / max(np.linalg.norm(re[0] - re[3]), 1)
|
| 198 |
-
focused = (pred_label == "open") and (le_dx < 0.18) and (re_dx < 0.18)
|
| 199 |
-
|
| 200 |
-
# draw eye boundaries
|
| 201 |
-
cv2.polylines(frame, [le], True, (0, 255, 255), 1)
|
| 202 |
-
cv2.polylines(frame, [re], True, (0, 255, 255), 1)
|
| 203 |
-
# draw iris centers
|
| 204 |
-
cv2.circle(frame, tuple(li_c), 3, (0, 0, 255), -1)
|
| 205 |
-
cv2.circle(frame, tuple(ri_c), 3, (0, 0, 255), -1)
|
| 206 |
-
else:
|
| 207 |
-
pred_label = "closed"
|
| 208 |
-
focused = False
|
| 209 |
-
else:
|
| 210 |
-
if model is not None:
|
| 211 |
-
pred_label, conf = classify_frame(model, frame)
|
| 212 |
-
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
| 213 |
-
pupil_center = detect_pupil_center(gray) if pred_label.lower() == "open" else None
|
| 214 |
-
focused = False
|
| 215 |
-
if pred_label.lower() == "open" and pupil_center is not None:
|
| 216 |
-
focused = is_focused(pupil_center, gray.shape)
|
| 217 |
-
|
| 218 |
-
if pred_label.lower() != "open":
|
| 219 |
-
focused = False
|
| 220 |
-
|
| 221 |
-
label = "open_focused" if (pred_label.lower() == "open" and focused) else "open_not_focused"
|
| 222 |
-
if pred_label.lower() != "open":
|
| 223 |
-
label = "closed_not_focused"
|
| 224 |
-
|
| 225 |
-
fcsv.write(f"{time_sec:.2f},{label},{int(focused)},{conf:.4f}\n")
|
| 226 |
-
|
| 227 |
-
if last_label is None:
|
| 228 |
-
last_label = label
|
| 229 |
-
seg_start = time_sec
|
| 230 |
-
elif label != last_label:
|
| 231 |
-
segments.append((seg_start, time_sec, last_label))
|
| 232 |
-
seg_start = time_sec
|
| 233 |
-
last_label = label
|
| 234 |
-
|
| 235 |
-
annotated = annotate_frame(frame, label, focused, conf, time_sec)
|
| 236 |
-
writer.write(annotated)
|
| 237 |
-
frame_idx += 1
|
| 238 |
-
|
| 239 |
-
if last_label is not None:
|
| 240 |
-
end_time = frame_idx / fps
|
| 241 |
-
segments.append((seg_start, end_time, last_label))
|
| 242 |
-
write_segments(seg_path, segments)
|
| 243 |
-
|
| 244 |
-
cap.release()
|
| 245 |
-
writer.release()
|
| 246 |
-
print(f"Saved: {out_path}")
|
| 247 |
-
print(f"CSV: {csv_path}")
|
| 248 |
-
print(f"Segments: {seg_path}")
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
def main():
|
| 252 |
-
project_root = Path(__file__).resolve().parent.parent
|
| 253 |
-
weights = find_weights(project_root)
|
| 254 |
-
model = YOLO(str(weights)) if weights is not None else None
|
| 255 |
-
|
| 256 |
-
# Default to 1.mp4 and 2.mp4 in project root
|
| 257 |
-
videos = []
|
| 258 |
-
for name in ["1.mp4", "2.mp4"]:
|
| 259 |
-
p = project_root / name
|
| 260 |
-
if p.exists():
|
| 261 |
-
videos.append(p)
|
| 262 |
-
|
| 263 |
-
# Also allow passing paths via env var
|
| 264 |
-
extra = os.getenv("VIDEOS", "")
|
| 265 |
-
for v in [x.strip() for x in extra.split(",") if x.strip()]:
|
| 266 |
-
vp = Path(v)
|
| 267 |
-
if not vp.is_absolute():
|
| 268 |
-
vp = project_root / vp
|
| 269 |
-
if vp.exists():
|
| 270 |
-
videos.append(vp)
|
| 271 |
-
|
| 272 |
-
if not videos:
|
| 273 |
-
print("No videos found. Expected 1.mp4 / 2.mp4 in project root.")
|
| 274 |
-
return
|
| 275 |
-
|
| 276 |
-
for v in videos:
|
| 277 |
-
process_video(v, model)
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
if __name__ == "__main__":
|
| 281 |
-
main()
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
import cv2
|
| 7 |
+
import numpy as np
|
| 8 |
+
from ultralytics import YOLO
|
| 9 |
+
|
| 10 |
+
try:
|
| 11 |
+
import mediapipe as mp
|
| 12 |
+
except Exception: # pragma: no cover
|
| 13 |
+
mp = None
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def find_weights(project_root: Path) -> Path | None:
|
| 17 |
+
candidates = [
|
| 18 |
+
project_root / "weights" / "best.pt",
|
| 19 |
+
project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
|
| 20 |
+
project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
|
| 21 |
+
project_root / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
|
| 22 |
+
project_root / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
|
| 23 |
+
]
|
| 24 |
+
return next((p for p in candidates if p.is_file()), None)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def detect_pupil_center(gray: np.ndarray) -> tuple[int, int] | None:
|
| 28 |
+
h, w = gray.shape
|
| 29 |
+
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
| 30 |
+
eq = clahe.apply(gray)
|
| 31 |
+
blur = cv2.GaussianBlur(eq, (7, 7), 0)
|
| 32 |
+
|
| 33 |
+
cx, cy = w // 2, h // 2
|
| 34 |
+
rx, ry = int(w * 0.3), int(h * 0.3)
|
| 35 |
+
x0, x1 = max(cx - rx, 0), min(cx + rx, w)
|
| 36 |
+
y0, y1 = max(cy - ry, 0), min(cy + ry, h)
|
| 37 |
+
roi = blur[y0:y1, x0:x1]
|
| 38 |
+
|
| 39 |
+
_, thresh = cv2.threshold(roi, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
|
| 40 |
+
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, np.ones((3, 3), np.uint8), iterations=2)
|
| 41 |
+
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8), iterations=1)
|
| 42 |
+
|
| 43 |
+
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 44 |
+
if not contours:
|
| 45 |
+
return None
|
| 46 |
+
|
| 47 |
+
best = None
|
| 48 |
+
best_score = -1.0
|
| 49 |
+
for c in contours:
|
| 50 |
+
area = cv2.contourArea(c)
|
| 51 |
+
if area < 15:
|
| 52 |
+
continue
|
| 53 |
+
perimeter = cv2.arcLength(c, True)
|
| 54 |
+
if perimeter <= 0:
|
| 55 |
+
continue
|
| 56 |
+
circularity = 4 * np.pi * (area / (perimeter * perimeter))
|
| 57 |
+
if circularity < 0.3:
|
| 58 |
+
continue
|
| 59 |
+
m = cv2.moments(c)
|
| 60 |
+
if m["m00"] == 0:
|
| 61 |
+
continue
|
| 62 |
+
px = int(m["m10"] / m["m00"]) + x0
|
| 63 |
+
py = int(m["m01"] / m["m00"]) + y0
|
| 64 |
+
|
| 65 |
+
dist = np.hypot(px - cx, py - cy) / max(w, h)
|
| 66 |
+
score = circularity - dist
|
| 67 |
+
if score > best_score:
|
| 68 |
+
best_score = score
|
| 69 |
+
best = (px, py)
|
| 70 |
+
|
| 71 |
+
return best
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def is_focused(pupil_center: tuple[int, int], img_shape: tuple[int, int]) -> bool:
|
| 75 |
+
h, w = img_shape
|
| 76 |
+
cx = w // 2
|
| 77 |
+
px, _ = pupil_center
|
| 78 |
+
dx = abs(px - cx) / max(w, 1)
|
| 79 |
+
return dx < 0.12
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def classify_frame(model: YOLO, frame: np.ndarray) -> tuple[str, float]:
|
| 83 |
+
# Use classifier directly on frame (assumes frame is eye crop)
|
| 84 |
+
results = model.predict(frame, imgsz=224, device="cpu", verbose=False)
|
| 85 |
+
r = results[0]
|
| 86 |
+
probs = r.probs
|
| 87 |
+
top_idx = int(probs.top1)
|
| 88 |
+
top_conf = float(probs.top1conf)
|
| 89 |
+
pred_label = model.names[top_idx]
|
| 90 |
+
return pred_label, top_conf
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def annotate_frame(frame: np.ndarray, label: str, focused: bool, conf: float, time_sec: float):
|
| 94 |
+
out = frame.copy()
|
| 95 |
+
text = f"{label} | focused={int(focused)} | conf={conf:.2f} | t={time_sec:.2f}s"
|
| 96 |
+
cv2.putText(out, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
|
| 97 |
+
return out
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def write_segments(path: Path, segments: list[tuple[float, float, str]]):
|
| 101 |
+
with path.open("w") as f:
|
| 102 |
+
for start, end, label in segments:
|
| 103 |
+
f.write(f"{start:.2f},{end:.2f},{label}\n")
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def process_video(video_path: Path, model: YOLO | None):
|
| 107 |
+
cap = cv2.VideoCapture(str(video_path))
|
| 108 |
+
if not cap.isOpened():
|
| 109 |
+
print(f"Failed to open {video_path}")
|
| 110 |
+
return
|
| 111 |
+
|
| 112 |
+
fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
|
| 113 |
+
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 114 |
+
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 115 |
+
|
| 116 |
+
out_path = video_path.with_name(video_path.stem + "_pred.mp4")
|
| 117 |
+
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
|
| 118 |
+
writer = cv2.VideoWriter(str(out_path), fourcc, fps, (width, height))
|
| 119 |
+
|
| 120 |
+
csv_path = video_path.with_name(video_path.stem + "_predictions.csv")
|
| 121 |
+
seg_path = video_path.with_name(video_path.stem + "_segments.txt")
|
| 122 |
+
|
| 123 |
+
frame_idx = 0
|
| 124 |
+
last_label = None
|
| 125 |
+
seg_start = 0.0
|
| 126 |
+
segments: list[tuple[float, float, str]] = []
|
| 127 |
+
|
| 128 |
+
with csv_path.open("w") as fcsv:
|
| 129 |
+
fcsv.write("time_sec,label,focused,conf\n")
|
| 130 |
+
if mp is None:
|
| 131 |
+
print("mediapipe is not installed. Falling back to classifier-only mode.")
|
| 132 |
+
use_mp = mp is not None
|
| 133 |
+
if use_mp:
|
| 134 |
+
mp_face_mesh = mp.solutions.face_mesh
|
| 135 |
+
face_mesh = mp_face_mesh.FaceMesh(
|
| 136 |
+
static_image_mode=False,
|
| 137 |
+
max_num_faces=1,
|
| 138 |
+
refine_landmarks=True,
|
| 139 |
+
min_detection_confidence=0.5,
|
| 140 |
+
min_tracking_confidence=0.5,
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
while True:
|
| 144 |
+
ret, frame = cap.read()
|
| 145 |
+
if not ret:
|
| 146 |
+
break
|
| 147 |
+
time_sec = frame_idx / fps
|
| 148 |
+
conf = 0.0
|
| 149 |
+
pred_label = "open"
|
| 150 |
+
focused = False
|
| 151 |
+
|
| 152 |
+
if use_mp:
|
| 153 |
+
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 154 |
+
res = face_mesh.process(rgb)
|
| 155 |
+
if res.multi_face_landmarks:
|
| 156 |
+
lm = res.multi_face_landmarks[0].landmark
|
| 157 |
+
h, w = frame.shape[:2]
|
| 158 |
+
|
| 159 |
+
# Eye landmarks (MediaPipe FaceMesh)
|
| 160 |
+
left_eye = [33, 160, 158, 133, 153, 144]
|
| 161 |
+
right_eye = [362, 385, 387, 263, 373, 380]
|
| 162 |
+
left_iris = [468, 469, 470, 471]
|
| 163 |
+
right_iris = [473, 474, 475, 476]
|
| 164 |
+
|
| 165 |
+
def pts(idxs):
|
| 166 |
+
return np.array([(int(lm[i].x * w), int(lm[i].y * h)) for i in idxs])
|
| 167 |
+
|
| 168 |
+
def ear(eye_pts):
|
| 169 |
+
# EAR using 6 points
|
| 170 |
+
p1, p2, p3, p4, p5, p6 = eye_pts
|
| 171 |
+
v1 = np.linalg.norm(p2 - p6)
|
| 172 |
+
v2 = np.linalg.norm(p3 - p5)
|
| 173 |
+
h1 = np.linalg.norm(p1 - p4)
|
| 174 |
+
return (v1 + v2) / (2.0 * h1 + 1e-6)
|
| 175 |
+
|
| 176 |
+
le = pts(left_eye)
|
| 177 |
+
re = pts(right_eye)
|
| 178 |
+
le_ear = ear(le)
|
| 179 |
+
re_ear = ear(re)
|
| 180 |
+
ear_avg = (le_ear + re_ear) / 2.0
|
| 181 |
+
|
| 182 |
+
# openness threshold
|
| 183 |
+
pred_label = "open" if ear_avg > 0.22 else "closed"
|
| 184 |
+
|
| 185 |
+
# iris centers
|
| 186 |
+
li = pts(left_iris)
|
| 187 |
+
ri = pts(right_iris)
|
| 188 |
+
li_c = li.mean(axis=0).astype(int)
|
| 189 |
+
ri_c = ri.mean(axis=0).astype(int)
|
| 190 |
+
|
| 191 |
+
# eye centers (midpoint of corners)
|
| 192 |
+
le_c = ((le[0] + le[3]) / 2).astype(int)
|
| 193 |
+
re_c = ((re[0] + re[3]) / 2).astype(int)
|
| 194 |
+
|
| 195 |
+
# focus = iris close to eye center horizontally for both eyes
|
| 196 |
+
le_dx = abs(li_c[0] - le_c[0]) / max(np.linalg.norm(le[0] - le[3]), 1)
|
| 197 |
+
re_dx = abs(ri_c[0] - re_c[0]) / max(np.linalg.norm(re[0] - re[3]), 1)
|
| 198 |
+
focused = (pred_label == "open") and (le_dx < 0.18) and (re_dx < 0.18)
|
| 199 |
+
|
| 200 |
+
# draw eye boundaries
|
| 201 |
+
cv2.polylines(frame, [le], True, (0, 255, 255), 1)
|
| 202 |
+
cv2.polylines(frame, [re], True, (0, 255, 255), 1)
|
| 203 |
+
# draw iris centers
|
| 204 |
+
cv2.circle(frame, tuple(li_c), 3, (0, 0, 255), -1)
|
| 205 |
+
cv2.circle(frame, tuple(ri_c), 3, (0, 0, 255), -1)
|
| 206 |
+
else:
|
| 207 |
+
pred_label = "closed"
|
| 208 |
+
focused = False
|
| 209 |
+
else:
|
| 210 |
+
if model is not None:
|
| 211 |
+
pred_label, conf = classify_frame(model, frame)
|
| 212 |
+
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
| 213 |
+
pupil_center = detect_pupil_center(gray) if pred_label.lower() == "open" else None
|
| 214 |
+
focused = False
|
| 215 |
+
if pred_label.lower() == "open" and pupil_center is not None:
|
| 216 |
+
focused = is_focused(pupil_center, gray.shape)
|
| 217 |
+
|
| 218 |
+
if pred_label.lower() != "open":
|
| 219 |
+
focused = False
|
| 220 |
+
|
| 221 |
+
label = "open_focused" if (pred_label.lower() == "open" and focused) else "open_not_focused"
|
| 222 |
+
if pred_label.lower() != "open":
|
| 223 |
+
label = "closed_not_focused"
|
| 224 |
+
|
| 225 |
+
fcsv.write(f"{time_sec:.2f},{label},{int(focused)},{conf:.4f}\n")
|
| 226 |
+
|
| 227 |
+
if last_label is None:
|
| 228 |
+
last_label = label
|
| 229 |
+
seg_start = time_sec
|
| 230 |
+
elif label != last_label:
|
| 231 |
+
segments.append((seg_start, time_sec, last_label))
|
| 232 |
+
seg_start = time_sec
|
| 233 |
+
last_label = label
|
| 234 |
+
|
| 235 |
+
annotated = annotate_frame(frame, label, focused, conf, time_sec)
|
| 236 |
+
writer.write(annotated)
|
| 237 |
+
frame_idx += 1
|
| 238 |
+
|
| 239 |
+
if last_label is not None:
|
| 240 |
+
end_time = frame_idx / fps
|
| 241 |
+
segments.append((seg_start, end_time, last_label))
|
| 242 |
+
write_segments(seg_path, segments)
|
| 243 |
+
|
| 244 |
+
cap.release()
|
| 245 |
+
writer.release()
|
| 246 |
+
print(f"Saved: {out_path}")
|
| 247 |
+
print(f"CSV: {csv_path}")
|
| 248 |
+
print(f"Segments: {seg_path}")
|
| 249 |
+
|
| 250 |
+
|
| 251 |
+
def main():
|
| 252 |
+
project_root = Path(__file__).resolve().parent.parent
|
| 253 |
+
weights = find_weights(project_root)
|
| 254 |
+
model = YOLO(str(weights)) if weights is not None else None
|
| 255 |
+
|
| 256 |
+
# Default to 1.mp4 and 2.mp4 in project root
|
| 257 |
+
videos = []
|
| 258 |
+
for name in ["1.mp4", "2.mp4"]:
|
| 259 |
+
p = project_root / name
|
| 260 |
+
if p.exists():
|
| 261 |
+
videos.append(p)
|
| 262 |
+
|
| 263 |
+
# Also allow passing paths via env var
|
| 264 |
+
extra = os.getenv("VIDEOS", "")
|
| 265 |
+
for v in [x.strip() for x in extra.split(",") if x.strip()]:
|
| 266 |
+
vp = Path(v)
|
| 267 |
+
if not vp.is_absolute():
|
| 268 |
+
vp = project_root / vp
|
| 269 |
+
if vp.exists():
|
| 270 |
+
videos.append(vp)
|
| 271 |
+
|
| 272 |
+
if not videos:
|
| 273 |
+
print("No videos found. Expected 1.mp4 / 2.mp4 in project root.")
|
| 274 |
+
return
|
| 275 |
+
|
| 276 |
+
for v in videos:
|
| 277 |
+
process_video(v, model)
|
| 278 |
+
|
| 279 |
+
|
| 280 |
+
if __name__ == "__main__":
|
| 281 |
+
main()
|
models/cnn/CNN_MODEL/scripts/webcam_live.py
CHANGED
|
@@ -1,184 +1,184 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Live webcam: detect face, crop each eye, run open/closed classifier, show on screen.
|
| 3 |
-
Requires: opencv-python, ultralytics, mediapipe (pip install mediapipe).
|
| 4 |
-
Press 'q' to quit.
|
| 5 |
-
"""
|
| 6 |
-
import urllib.request
|
| 7 |
-
from pathlib import Path
|
| 8 |
-
|
| 9 |
-
import cv2
|
| 10 |
-
import numpy as np
|
| 11 |
-
from ultralytics import YOLO
|
| 12 |
-
|
| 13 |
-
try:
|
| 14 |
-
import mediapipe as mp
|
| 15 |
-
_mp_has_solutions = hasattr(mp, "solutions")
|
| 16 |
-
except ImportError:
|
| 17 |
-
mp = None
|
| 18 |
-
_mp_has_solutions = False
|
| 19 |
-
|
| 20 |
-
# New MediaPipe Tasks API (Face Landmarker) eye indices
|
| 21 |
-
LEFT_EYE_INDICES_NEW = [263, 249, 390, 373, 374, 380, 381, 382, 362, 466, 388, 387, 386, 385, 384, 398]
|
| 22 |
-
RIGHT_EYE_INDICES_NEW = [33, 7, 163, 144, 145, 153, 154, 155, 133, 246, 161, 160, 159, 158, 157, 173]
|
| 23 |
-
# Old Face Mesh (solutions) indices
|
| 24 |
-
LEFT_EYE_INDICES_OLD = [33, 160, 158, 133, 153, 144]
|
| 25 |
-
RIGHT_EYE_INDICES_OLD = [362, 385, 387, 263, 373, 380]
|
| 26 |
-
EYE_PADDING = 0.35
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
def find_weights(project_root: Path) -> Path | None:
|
| 30 |
-
candidates = [
|
| 31 |
-
project_root / "weights" / "best.pt",
|
| 32 |
-
project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
|
| 33 |
-
project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
|
| 34 |
-
]
|
| 35 |
-
return next((p for p in candidates if p.is_file()), None)
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
def get_eye_roi(frame: np.ndarray, landmarks, indices: list[int]) -> np.ndarray | None:
|
| 39 |
-
h, w = frame.shape[:2]
|
| 40 |
-
pts = np.array([(int(landmarks[i].x * w), int(landmarks[i].y * h)) for i in indices])
|
| 41 |
-
x_min, y_min = pts.min(axis=0)
|
| 42 |
-
x_max, y_max = pts.max(axis=0)
|
| 43 |
-
dx = max(int((x_max - x_min) * EYE_PADDING), 8)
|
| 44 |
-
dy = max(int((y_max - y_min) * EYE_PADDING), 8)
|
| 45 |
-
x0 = max(0, x_min - dx)
|
| 46 |
-
y0 = max(0, y_min - dy)
|
| 47 |
-
x1 = min(w, x_max + dx)
|
| 48 |
-
y1 = min(h, y_max + dy)
|
| 49 |
-
if x1 <= x0 or y1 <= y0:
|
| 50 |
-
return None
|
| 51 |
-
return frame[y0:y1, x0:x1].copy()
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
def _run_with_solutions(mp, model, cap):
|
| 55 |
-
face_mesh = mp.solutions.face_mesh.FaceMesh(
|
| 56 |
-
static_image_mode=False,
|
| 57 |
-
max_num_faces=1,
|
| 58 |
-
refine_landmarks=True,
|
| 59 |
-
min_detection_confidence=0.5,
|
| 60 |
-
min_tracking_confidence=0.5,
|
| 61 |
-
)
|
| 62 |
-
while True:
|
| 63 |
-
ret, frame = cap.read()
|
| 64 |
-
if not ret:
|
| 65 |
-
break
|
| 66 |
-
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 67 |
-
results = face_mesh.process(rgb)
|
| 68 |
-
left_label, left_conf = "—", 0.0
|
| 69 |
-
right_label, right_conf = "—", 0.0
|
| 70 |
-
if results.multi_face_landmarks:
|
| 71 |
-
lm = results.multi_face_landmarks[0].landmark
|
| 72 |
-
for roi, indices, side in [
|
| 73 |
-
(get_eye_roi(frame, lm, LEFT_EYE_INDICES_OLD), LEFT_EYE_INDICES_OLD, "left"),
|
| 74 |
-
(get_eye_roi(frame, lm, RIGHT_EYE_INDICES_OLD), RIGHT_EYE_INDICES_OLD, "right"),
|
| 75 |
-
]:
|
| 76 |
-
if roi is not None and roi.size > 0:
|
| 77 |
-
try:
|
| 78 |
-
pred = model.predict(roi, imgsz=224, device="cpu", verbose=False)
|
| 79 |
-
if pred:
|
| 80 |
-
r = pred[0]
|
| 81 |
-
label = model.names[int(r.probs.top1)]
|
| 82 |
-
conf = float(r.probs.top1conf)
|
| 83 |
-
if side == "left":
|
| 84 |
-
left_label, left_conf = label, conf
|
| 85 |
-
else:
|
| 86 |
-
right_label, right_conf = label, conf
|
| 87 |
-
except Exception:
|
| 88 |
-
pass
|
| 89 |
-
cv2.putText(frame, f"L: {left_label} ({left_conf:.0%})", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
|
| 90 |
-
cv2.putText(frame, f"R: {right_label} ({right_conf:.0%})", (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
|
| 91 |
-
cv2.imshow("Eye open/closed (q to quit)", frame)
|
| 92 |
-
if cv2.waitKey(1) & 0xFF == ord("q"):
|
| 93 |
-
break
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
def _run_with_tasks(project_root: Path, model, cap):
|
| 97 |
-
from mediapipe.tasks.python import BaseOptions
|
| 98 |
-
from mediapipe.tasks.python.vision import FaceLandmarker, FaceLandmarkerOptions
|
| 99 |
-
from mediapipe.tasks.python.vision.core import vision_task_running_mode as running_mode
|
| 100 |
-
from mediapipe.tasks.python.vision.core import image as image_lib
|
| 101 |
-
|
| 102 |
-
model_path = project_root / "weights" / "face_landmarker.task"
|
| 103 |
-
if not model_path.is_file():
|
| 104 |
-
print("Downloading face_landmarker.task ...")
|
| 105 |
-
url = "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task"
|
| 106 |
-
urllib.request.urlretrieve(url, model_path)
|
| 107 |
-
print("Done.")
|
| 108 |
-
|
| 109 |
-
options = FaceLandmarkerOptions(
|
| 110 |
-
base_options=BaseOptions(model_asset_path=str(model_path)),
|
| 111 |
-
running_mode=running_mode.VisionTaskRunningMode.IMAGE,
|
| 112 |
-
num_faces=1,
|
| 113 |
-
)
|
| 114 |
-
face_landmarker = FaceLandmarker.create_from_options(options)
|
| 115 |
-
ImageFormat = image_lib.ImageFormat
|
| 116 |
-
|
| 117 |
-
while True:
|
| 118 |
-
ret, frame = cap.read()
|
| 119 |
-
if not ret:
|
| 120 |
-
break
|
| 121 |
-
left_label, left_conf = "—", 0.0
|
| 122 |
-
right_label, right_conf = "—", 0.0
|
| 123 |
-
|
| 124 |
-
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 125 |
-
rgb_contiguous = np.ascontiguousarray(rgb)
|
| 126 |
-
mp_image = image_lib.Image(ImageFormat.SRGB, rgb_contiguous)
|
| 127 |
-
result = face_landmarker.detect(mp_image)
|
| 128 |
-
|
| 129 |
-
if result.face_landmarks:
|
| 130 |
-
lm = result.face_landmarks[0]
|
| 131 |
-
for roi, side in [
|
| 132 |
-
(get_eye_roi(frame, lm, LEFT_EYE_INDICES_NEW), "left"),
|
| 133 |
-
(get_eye_roi(frame, lm, RIGHT_EYE_INDICES_NEW), "right"),
|
| 134 |
-
]:
|
| 135 |
-
if roi is not None and roi.size > 0:
|
| 136 |
-
try:
|
| 137 |
-
pred = model.predict(roi, imgsz=224, device="cpu", verbose=False)
|
| 138 |
-
if pred:
|
| 139 |
-
r = pred[0]
|
| 140 |
-
label = model.names[int(r.probs.top1)]
|
| 141 |
-
conf = float(r.probs.top1conf)
|
| 142 |
-
if side == "left":
|
| 143 |
-
left_label, left_conf = label, conf
|
| 144 |
-
else:
|
| 145 |
-
right_label, right_conf = label, conf
|
| 146 |
-
except Exception:
|
| 147 |
-
pass
|
| 148 |
-
|
| 149 |
-
cv2.putText(frame, f"L: {left_label} ({left_conf:.0%})", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
|
| 150 |
-
cv2.putText(frame, f"R: {right_label} ({right_conf:.0%})", (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
|
| 151 |
-
cv2.imshow("Eye open/closed (q to quit)", frame)
|
| 152 |
-
if cv2.waitKey(1) & 0xFF == ord("q"):
|
| 153 |
-
break
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
def main():
|
| 157 |
-
project_root = Path(__file__).resolve().parent.parent
|
| 158 |
-
weights = find_weights(project_root)
|
| 159 |
-
if weights is None:
|
| 160 |
-
print("Weights not found. Put best.pt in weights/ or runs/.../weights/ (from model team).")
|
| 161 |
-
return
|
| 162 |
-
if mp is None:
|
| 163 |
-
print("MediaPipe required. Install: pip install mediapipe")
|
| 164 |
-
return
|
| 165 |
-
|
| 166 |
-
model = YOLO(str(weights))
|
| 167 |
-
cap = cv2.VideoCapture(0)
|
| 168 |
-
if not cap.isOpened():
|
| 169 |
-
print("Could not open webcam.")
|
| 170 |
-
return
|
| 171 |
-
|
| 172 |
-
print("Live eye open/closed on your face. Press 'q' to quit.")
|
| 173 |
-
try:
|
| 174 |
-
if _mp_has_solutions:
|
| 175 |
-
_run_with_solutions(mp, model, cap)
|
| 176 |
-
else:
|
| 177 |
-
_run_with_tasks(project_root, model, cap)
|
| 178 |
-
finally:
|
| 179 |
-
cap.release()
|
| 180 |
-
cv2.destroyAllWindows()
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
if __name__ == "__main__":
|
| 184 |
-
main()
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Live webcam: detect face, crop each eye, run open/closed classifier, show on screen.
|
| 3 |
+
Requires: opencv-python, ultralytics, mediapipe (pip install mediapipe).
|
| 4 |
+
Press 'q' to quit.
|
| 5 |
+
"""
|
| 6 |
+
import urllib.request
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
|
| 9 |
+
import cv2
|
| 10 |
+
import numpy as np
|
| 11 |
+
from ultralytics import YOLO
|
| 12 |
+
|
| 13 |
+
try:
|
| 14 |
+
import mediapipe as mp
|
| 15 |
+
_mp_has_solutions = hasattr(mp, "solutions")
|
| 16 |
+
except ImportError:
|
| 17 |
+
mp = None
|
| 18 |
+
_mp_has_solutions = False
|
| 19 |
+
|
| 20 |
+
# New MediaPipe Tasks API (Face Landmarker) eye indices
|
| 21 |
+
LEFT_EYE_INDICES_NEW = [263, 249, 390, 373, 374, 380, 381, 382, 362, 466, 388, 387, 386, 385, 384, 398]
|
| 22 |
+
RIGHT_EYE_INDICES_NEW = [33, 7, 163, 144, 145, 153, 154, 155, 133, 246, 161, 160, 159, 158, 157, 173]
|
| 23 |
+
# Old Face Mesh (solutions) indices
|
| 24 |
+
LEFT_EYE_INDICES_OLD = [33, 160, 158, 133, 153, 144]
|
| 25 |
+
RIGHT_EYE_INDICES_OLD = [362, 385, 387, 263, 373, 380]
|
| 26 |
+
EYE_PADDING = 0.35
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def find_weights(project_root: Path) -> Path | None:
|
| 30 |
+
candidates = [
|
| 31 |
+
project_root / "weights" / "best.pt",
|
| 32 |
+
project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
|
| 33 |
+
project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
|
| 34 |
+
]
|
| 35 |
+
return next((p for p in candidates if p.is_file()), None)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def get_eye_roi(frame: np.ndarray, landmarks, indices: list[int]) -> np.ndarray | None:
|
| 39 |
+
h, w = frame.shape[:2]
|
| 40 |
+
pts = np.array([(int(landmarks[i].x * w), int(landmarks[i].y * h)) for i in indices])
|
| 41 |
+
x_min, y_min = pts.min(axis=0)
|
| 42 |
+
x_max, y_max = pts.max(axis=0)
|
| 43 |
+
dx = max(int((x_max - x_min) * EYE_PADDING), 8)
|
| 44 |
+
dy = max(int((y_max - y_min) * EYE_PADDING), 8)
|
| 45 |
+
x0 = max(0, x_min - dx)
|
| 46 |
+
y0 = max(0, y_min - dy)
|
| 47 |
+
x1 = min(w, x_max + dx)
|
| 48 |
+
y1 = min(h, y_max + dy)
|
| 49 |
+
if x1 <= x0 or y1 <= y0:
|
| 50 |
+
return None
|
| 51 |
+
return frame[y0:y1, x0:x1].copy()
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def _run_with_solutions(mp, model, cap):
|
| 55 |
+
face_mesh = mp.solutions.face_mesh.FaceMesh(
|
| 56 |
+
static_image_mode=False,
|
| 57 |
+
max_num_faces=1,
|
| 58 |
+
refine_landmarks=True,
|
| 59 |
+
min_detection_confidence=0.5,
|
| 60 |
+
min_tracking_confidence=0.5,
|
| 61 |
+
)
|
| 62 |
+
while True:
|
| 63 |
+
ret, frame = cap.read()
|
| 64 |
+
if not ret:
|
| 65 |
+
break
|
| 66 |
+
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 67 |
+
results = face_mesh.process(rgb)
|
| 68 |
+
left_label, left_conf = "—", 0.0
|
| 69 |
+
right_label, right_conf = "—", 0.0
|
| 70 |
+
if results.multi_face_landmarks:
|
| 71 |
+
lm = results.multi_face_landmarks[0].landmark
|
| 72 |
+
for roi, indices, side in [
|
| 73 |
+
(get_eye_roi(frame, lm, LEFT_EYE_INDICES_OLD), LEFT_EYE_INDICES_OLD, "left"),
|
| 74 |
+
(get_eye_roi(frame, lm, RIGHT_EYE_INDICES_OLD), RIGHT_EYE_INDICES_OLD, "right"),
|
| 75 |
+
]:
|
| 76 |
+
if roi is not None and roi.size > 0:
|
| 77 |
+
try:
|
| 78 |
+
pred = model.predict(roi, imgsz=224, device="cpu", verbose=False)
|
| 79 |
+
if pred:
|
| 80 |
+
r = pred[0]
|
| 81 |
+
label = model.names[int(r.probs.top1)]
|
| 82 |
+
conf = float(r.probs.top1conf)
|
| 83 |
+
if side == "left":
|
| 84 |
+
left_label, left_conf = label, conf
|
| 85 |
+
else:
|
| 86 |
+
right_label, right_conf = label, conf
|
| 87 |
+
except Exception:
|
| 88 |
+
pass
|
| 89 |
+
cv2.putText(frame, f"L: {left_label} ({left_conf:.0%})", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
|
| 90 |
+
cv2.putText(frame, f"R: {right_label} ({right_conf:.0%})", (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
|
| 91 |
+
cv2.imshow("Eye open/closed (q to quit)", frame)
|
| 92 |
+
if cv2.waitKey(1) & 0xFF == ord("q"):
|
| 93 |
+
break
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def _run_with_tasks(project_root: Path, model, cap):
|
| 97 |
+
from mediapipe.tasks.python import BaseOptions
|
| 98 |
+
from mediapipe.tasks.python.vision import FaceLandmarker, FaceLandmarkerOptions
|
| 99 |
+
from mediapipe.tasks.python.vision.core import vision_task_running_mode as running_mode
|
| 100 |
+
from mediapipe.tasks.python.vision.core import image as image_lib
|
| 101 |
+
|
| 102 |
+
model_path = project_root / "weights" / "face_landmarker.task"
|
| 103 |
+
if not model_path.is_file():
|
| 104 |
+
print("Downloading face_landmarker.task ...")
|
| 105 |
+
url = "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task"
|
| 106 |
+
urllib.request.urlretrieve(url, model_path)
|
| 107 |
+
print("Done.")
|
| 108 |
+
|
| 109 |
+
options = FaceLandmarkerOptions(
|
| 110 |
+
base_options=BaseOptions(model_asset_path=str(model_path)),
|
| 111 |
+
running_mode=running_mode.VisionTaskRunningMode.IMAGE,
|
| 112 |
+
num_faces=1,
|
| 113 |
+
)
|
| 114 |
+
face_landmarker = FaceLandmarker.create_from_options(options)
|
| 115 |
+
ImageFormat = image_lib.ImageFormat
|
| 116 |
+
|
| 117 |
+
while True:
|
| 118 |
+
ret, frame = cap.read()
|
| 119 |
+
if not ret:
|
| 120 |
+
break
|
| 121 |
+
left_label, left_conf = "—", 0.0
|
| 122 |
+
right_label, right_conf = "—", 0.0
|
| 123 |
+
|
| 124 |
+
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 125 |
+
rgb_contiguous = np.ascontiguousarray(rgb)
|
| 126 |
+
mp_image = image_lib.Image(ImageFormat.SRGB, rgb_contiguous)
|
| 127 |
+
result = face_landmarker.detect(mp_image)
|
| 128 |
+
|
| 129 |
+
if result.face_landmarks:
|
| 130 |
+
lm = result.face_landmarks[0]
|
| 131 |
+
for roi, side in [
|
| 132 |
+
(get_eye_roi(frame, lm, LEFT_EYE_INDICES_NEW), "left"),
|
| 133 |
+
(get_eye_roi(frame, lm, RIGHT_EYE_INDICES_NEW), "right"),
|
| 134 |
+
]:
|
| 135 |
+
if roi is not None and roi.size > 0:
|
| 136 |
+
try:
|
| 137 |
+
pred = model.predict(roi, imgsz=224, device="cpu", verbose=False)
|
| 138 |
+
if pred:
|
| 139 |
+
r = pred[0]
|
| 140 |
+
label = model.names[int(r.probs.top1)]
|
| 141 |
+
conf = float(r.probs.top1conf)
|
| 142 |
+
if side == "left":
|
| 143 |
+
left_label, left_conf = label, conf
|
| 144 |
+
else:
|
| 145 |
+
right_label, right_conf = label, conf
|
| 146 |
+
except Exception:
|
| 147 |
+
pass
|
| 148 |
+
|
| 149 |
+
cv2.putText(frame, f"L: {left_label} ({left_conf:.0%})", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
|
| 150 |
+
cv2.putText(frame, f"R: {right_label} ({right_conf:.0%})", (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
|
| 151 |
+
cv2.imshow("Eye open/closed (q to quit)", frame)
|
| 152 |
+
if cv2.waitKey(1) & 0xFF == ord("q"):
|
| 153 |
+
break
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
def main():
|
| 157 |
+
project_root = Path(__file__).resolve().parent.parent
|
| 158 |
+
weights = find_weights(project_root)
|
| 159 |
+
if weights is None:
|
| 160 |
+
print("Weights not found. Put best.pt in weights/ or runs/.../weights/ (from model team).")
|
| 161 |
+
return
|
| 162 |
+
if mp is None:
|
| 163 |
+
print("MediaPipe required. Install: pip install mediapipe")
|
| 164 |
+
return
|
| 165 |
+
|
| 166 |
+
model = YOLO(str(weights))
|
| 167 |
+
cap = cv2.VideoCapture(0)
|
| 168 |
+
if not cap.isOpened():
|
| 169 |
+
print("Could not open webcam.")
|
| 170 |
+
return
|
| 171 |
+
|
| 172 |
+
print("Live eye open/closed on your face. Press 'q' to quit.")
|
| 173 |
+
try:
|
| 174 |
+
if _mp_has_solutions:
|
| 175 |
+
_run_with_solutions(mp, model, cap)
|
| 176 |
+
else:
|
| 177 |
+
_run_with_tasks(project_root, model, cap)
|
| 178 |
+
finally:
|
| 179 |
+
cap.release()
|
| 180 |
+
cv2.destroyAllWindows()
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
if __name__ == "__main__":
|
| 184 |
+
main()
|
models/cnn/eye_attention/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
|
models/cnn/eye_attention/classifier.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
|
|
|
| 3 |
from abc import ABC, abstractmethod
|
| 4 |
|
| 5 |
import numpy as np
|
|
@@ -54,16 +55,115 @@ class YOLOv11Classifier(EyeClassifier):
|
|
| 54 |
return sum(scores) / len(scores) if scores else 1.0
|
| 55 |
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
def load_eye_classifier(
|
| 58 |
path: str | None = None,
|
| 59 |
backend: str = "yolo",
|
| 60 |
device: str = "cpu",
|
| 61 |
) -> EyeClassifier:
|
| 62 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
return GeometricOnlyClassifier()
|
| 64 |
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
print("[CLASSIFIER]
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
+
import os
|
| 4 |
from abc import ABC, abstractmethod
|
| 5 |
|
| 6 |
import numpy as np
|
|
|
|
| 55 |
return sum(scores) / len(scores) if scores else 1.0
|
| 56 |
|
| 57 |
|
| 58 |
+
class EyeCNNClassifier(EyeClassifier):
|
| 59 |
+
"""Loader for the custom PyTorch EyeCNN (trained on Kaggle eye crops)."""
|
| 60 |
+
|
| 61 |
+
def __init__(self, checkpoint_path: str, device: str = "cpu"):
|
| 62 |
+
import torch
|
| 63 |
+
import torch.nn as nn
|
| 64 |
+
|
| 65 |
+
class EyeCNN(nn.Module):
|
| 66 |
+
def __init__(self, num_classes=2, dropout_rate=0.3):
|
| 67 |
+
super().__init__()
|
| 68 |
+
self.conv_layers = nn.Sequential(
|
| 69 |
+
nn.Conv2d(3, 32, 3, 1, 1), nn.BatchNorm2d(32), nn.ReLU(), nn.MaxPool2d(2, 2),
|
| 70 |
+
nn.Conv2d(32, 64, 3, 1, 1), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2, 2),
|
| 71 |
+
nn.Conv2d(64, 128, 3, 1, 1), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2, 2),
|
| 72 |
+
nn.Conv2d(128, 256, 3, 1, 1), nn.BatchNorm2d(256), nn.ReLU(), nn.MaxPool2d(2, 2),
|
| 73 |
+
)
|
| 74 |
+
self.fc_layers = nn.Sequential(
|
| 75 |
+
nn.AdaptiveAvgPool2d((1, 1)), nn.Flatten(),
|
| 76 |
+
nn.Linear(256, 512), nn.ReLU(), nn.Dropout(dropout_rate),
|
| 77 |
+
nn.Linear(512, num_classes),
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
def forward(self, x):
|
| 81 |
+
return self.fc_layers(self.conv_layers(x))
|
| 82 |
+
|
| 83 |
+
self._device = torch.device(device)
|
| 84 |
+
checkpoint = torch.load(checkpoint_path, map_location=self._device, weights_only=False)
|
| 85 |
+
dropout_rate = checkpoint.get("config", {}).get("dropout_rate", 0.35)
|
| 86 |
+
self._model = EyeCNN(num_classes=2, dropout_rate=dropout_rate)
|
| 87 |
+
self._model.load_state_dict(checkpoint["model_state_dict"])
|
| 88 |
+
self._model.to(self._device)
|
| 89 |
+
self._model.eval()
|
| 90 |
+
|
| 91 |
+
self._transform = None # built lazily
|
| 92 |
+
|
| 93 |
+
def _get_transform(self):
|
| 94 |
+
if self._transform is None:
|
| 95 |
+
from torchvision import transforms
|
| 96 |
+
self._transform = transforms.Compose([
|
| 97 |
+
transforms.ToPILImage(),
|
| 98 |
+
transforms.Resize((96, 96)),
|
| 99 |
+
transforms.ToTensor(),
|
| 100 |
+
transforms.Normalize(
|
| 101 |
+
mean=[0.485, 0.456, 0.406],
|
| 102 |
+
std=[0.229, 0.224, 0.225],
|
| 103 |
+
),
|
| 104 |
+
])
|
| 105 |
+
return self._transform
|
| 106 |
+
|
| 107 |
+
@property
|
| 108 |
+
def name(self) -> str:
|
| 109 |
+
return "eye_cnn"
|
| 110 |
+
|
| 111 |
+
def predict_score(self, crops_bgr: list[np.ndarray]) -> float:
|
| 112 |
+
if not crops_bgr:
|
| 113 |
+
return 1.0
|
| 114 |
+
|
| 115 |
+
import torch
|
| 116 |
+
import cv2
|
| 117 |
+
|
| 118 |
+
transform = self._get_transform()
|
| 119 |
+
scores = []
|
| 120 |
+
for crop in crops_bgr:
|
| 121 |
+
if crop is None or crop.size == 0:
|
| 122 |
+
scores.append(1.0)
|
| 123 |
+
continue
|
| 124 |
+
rgb = cv2.cvtColor(crop, cv2.COLOR_BGR2RGB)
|
| 125 |
+
tensor = transform(rgb).unsqueeze(0).to(self._device)
|
| 126 |
+
with torch.no_grad():
|
| 127 |
+
output = self._model(tensor)
|
| 128 |
+
prob = torch.softmax(output, dim=1)[0, 1].item() # prob of "open"
|
| 129 |
+
scores.append(prob)
|
| 130 |
+
return sum(scores) / len(scores)
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
_EXT_TO_BACKEND = {".pth": "cnn", ".pt": "yolo"}
|
| 134 |
+
|
| 135 |
+
|
| 136 |
def load_eye_classifier(
|
| 137 |
path: str | None = None,
|
| 138 |
backend: str = "yolo",
|
| 139 |
device: str = "cpu",
|
| 140 |
) -> EyeClassifier:
|
| 141 |
+
if backend == "geometric":
|
| 142 |
+
return GeometricOnlyClassifier()
|
| 143 |
+
|
| 144 |
+
if path is None:
|
| 145 |
+
print(f"[CLASSIFIER] No model path for backend {backend!r}, falling back to geometric")
|
| 146 |
return GeometricOnlyClassifier()
|
| 147 |
|
| 148 |
+
ext = os.path.splitext(path)[1].lower()
|
| 149 |
+
inferred = _EXT_TO_BACKEND.get(ext)
|
| 150 |
+
if inferred and inferred != backend:
|
| 151 |
+
print(f"[CLASSIFIER] File extension {ext!r} implies backend {inferred!r}, "
|
| 152 |
+
f"overriding requested {backend!r}")
|
| 153 |
+
backend = inferred
|
| 154 |
+
|
| 155 |
+
print(f"[CLASSIFIER] backend={backend!r}, path={path!r}")
|
| 156 |
+
|
| 157 |
+
if backend == "cnn":
|
| 158 |
+
return EyeCNNClassifier(path, device=device)
|
| 159 |
+
|
| 160 |
+
if backend == "yolo":
|
| 161 |
+
try:
|
| 162 |
+
return YOLOv11Classifier(path, device=device)
|
| 163 |
+
except ImportError:
|
| 164 |
+
print("[CLASSIFIER] ultralytics required for YOLO. pip install ultralytics")
|
| 165 |
+
raise
|
| 166 |
+
|
| 167 |
+
raise ValueError(
|
| 168 |
+
f"Unknown eye backend {backend!r}. Choose from: yolo, cnn, geometric"
|
| 169 |
+
)
|
models/cnn/eye_attention/crop.py
CHANGED
|
@@ -1,70 +1,70 @@
|
|
| 1 |
-
import cv2
|
| 2 |
-
import numpy as np
|
| 3 |
-
|
| 4 |
-
from models.pretrained.face_mesh.face_mesh import FaceMeshDetector
|
| 5 |
-
|
| 6 |
-
LEFT_EYE_CONTOUR = FaceMeshDetector.LEFT_EYE_INDICES
|
| 7 |
-
RIGHT_EYE_CONTOUR = FaceMeshDetector.RIGHT_EYE_INDICES
|
| 8 |
-
|
| 9 |
-
IMAGENET_MEAN = (0.485, 0.456, 0.406)
|
| 10 |
-
IMAGENET_STD = (0.229, 0.224, 0.225)
|
| 11 |
-
|
| 12 |
-
CROP_SIZE = 96
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
def _bbox_from_landmarks(
|
| 16 |
-
landmarks: np.ndarray,
|
| 17 |
-
indices: list[int],
|
| 18 |
-
frame_w: int,
|
| 19 |
-
frame_h: int,
|
| 20 |
-
expand: float = 0.4,
|
| 21 |
-
) -> tuple[int, int, int, int]:
|
| 22 |
-
pts = landmarks[indices, :2]
|
| 23 |
-
px = pts[:, 0] * frame_w
|
| 24 |
-
py = pts[:, 1] * frame_h
|
| 25 |
-
|
| 26 |
-
x_min, x_max = px.min(), px.max()
|
| 27 |
-
y_min, y_max = py.min(), py.max()
|
| 28 |
-
w = x_max - x_min
|
| 29 |
-
h = y_max - y_min
|
| 30 |
-
cx = (x_min + x_max) / 2
|
| 31 |
-
cy = (y_min + y_max) / 2
|
| 32 |
-
|
| 33 |
-
size = max(w, h) * (1 + expand)
|
| 34 |
-
half = size / 2
|
| 35 |
-
|
| 36 |
-
x1 = int(max(cx - half, 0))
|
| 37 |
-
y1 = int(max(cy - half, 0))
|
| 38 |
-
x2 = int(min(cx + half, frame_w))
|
| 39 |
-
y2 = int(min(cy + half, frame_h))
|
| 40 |
-
|
| 41 |
-
return x1, y1, x2, y2
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
def extract_eye_crops(
|
| 45 |
-
frame: np.ndarray,
|
| 46 |
-
landmarks: np.ndarray,
|
| 47 |
-
expand: float = 0.4,
|
| 48 |
-
crop_size: int = CROP_SIZE,
|
| 49 |
-
) -> tuple[np.ndarray, np.ndarray, tuple, tuple]:
|
| 50 |
-
h, w = frame.shape[:2]
|
| 51 |
-
|
| 52 |
-
left_bbox = _bbox_from_landmarks(landmarks, LEFT_EYE_CONTOUR, w, h, expand)
|
| 53 |
-
right_bbox = _bbox_from_landmarks(landmarks, RIGHT_EYE_CONTOUR, w, h, expand)
|
| 54 |
-
|
| 55 |
-
left_crop = frame[left_bbox[1] : left_bbox[3], left_bbox[0] : left_bbox[2]]
|
| 56 |
-
right_crop = frame[right_bbox[1] : right_bbox[3], right_bbox[0] : right_bbox[2]]
|
| 57 |
-
|
| 58 |
-
left_crop = cv2.resize(left_crop, (crop_size, crop_size), interpolation=cv2.INTER_AREA)
|
| 59 |
-
right_crop = cv2.resize(right_crop, (crop_size, crop_size), interpolation=cv2.INTER_AREA)
|
| 60 |
-
|
| 61 |
-
return left_crop, right_crop, left_bbox, right_bbox
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
def crop_to_tensor(crop_bgr: np.ndarray):
|
| 65 |
-
import torch
|
| 66 |
-
|
| 67 |
-
rgb = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
|
| 68 |
-
for c in range(3):
|
| 69 |
-
rgb[:, :, c] = (rgb[:, :, c] - IMAGENET_MEAN[c]) / IMAGENET_STD[c]
|
| 70 |
-
return torch.from_numpy(rgb.transpose(2, 0, 1))
|
|
|
|
| 1 |
+
import cv2
|
| 2 |
+
import numpy as np
|
| 3 |
+
|
| 4 |
+
from models.pretrained.face_mesh.face_mesh import FaceMeshDetector
|
| 5 |
+
|
| 6 |
+
LEFT_EYE_CONTOUR = FaceMeshDetector.LEFT_EYE_INDICES
|
| 7 |
+
RIGHT_EYE_CONTOUR = FaceMeshDetector.RIGHT_EYE_INDICES
|
| 8 |
+
|
| 9 |
+
IMAGENET_MEAN = (0.485, 0.456, 0.406)
|
| 10 |
+
IMAGENET_STD = (0.229, 0.224, 0.225)
|
| 11 |
+
|
| 12 |
+
CROP_SIZE = 96
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def _bbox_from_landmarks(
|
| 16 |
+
landmarks: np.ndarray,
|
| 17 |
+
indices: list[int],
|
| 18 |
+
frame_w: int,
|
| 19 |
+
frame_h: int,
|
| 20 |
+
expand: float = 0.4,
|
| 21 |
+
) -> tuple[int, int, int, int]:
|
| 22 |
+
pts = landmarks[indices, :2]
|
| 23 |
+
px = pts[:, 0] * frame_w
|
| 24 |
+
py = pts[:, 1] * frame_h
|
| 25 |
+
|
| 26 |
+
x_min, x_max = px.min(), px.max()
|
| 27 |
+
y_min, y_max = py.min(), py.max()
|
| 28 |
+
w = x_max - x_min
|
| 29 |
+
h = y_max - y_min
|
| 30 |
+
cx = (x_min + x_max) / 2
|
| 31 |
+
cy = (y_min + y_max) / 2
|
| 32 |
+
|
| 33 |
+
size = max(w, h) * (1 + expand)
|
| 34 |
+
half = size / 2
|
| 35 |
+
|
| 36 |
+
x1 = int(max(cx - half, 0))
|
| 37 |
+
y1 = int(max(cy - half, 0))
|
| 38 |
+
x2 = int(min(cx + half, frame_w))
|
| 39 |
+
y2 = int(min(cy + half, frame_h))
|
| 40 |
+
|
| 41 |
+
return x1, y1, x2, y2
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def extract_eye_crops(
|
| 45 |
+
frame: np.ndarray,
|
| 46 |
+
landmarks: np.ndarray,
|
| 47 |
+
expand: float = 0.4,
|
| 48 |
+
crop_size: int = CROP_SIZE,
|
| 49 |
+
) -> tuple[np.ndarray, np.ndarray, tuple, tuple]:
|
| 50 |
+
h, w = frame.shape[:2]
|
| 51 |
+
|
| 52 |
+
left_bbox = _bbox_from_landmarks(landmarks, LEFT_EYE_CONTOUR, w, h, expand)
|
| 53 |
+
right_bbox = _bbox_from_landmarks(landmarks, RIGHT_EYE_CONTOUR, w, h, expand)
|
| 54 |
+
|
| 55 |
+
left_crop = frame[left_bbox[1] : left_bbox[3], left_bbox[0] : left_bbox[2]]
|
| 56 |
+
right_crop = frame[right_bbox[1] : right_bbox[3], right_bbox[0] : right_bbox[2]]
|
| 57 |
+
|
| 58 |
+
left_crop = cv2.resize(left_crop, (crop_size, crop_size), interpolation=cv2.INTER_AREA)
|
| 59 |
+
right_crop = cv2.resize(right_crop, (crop_size, crop_size), interpolation=cv2.INTER_AREA)
|
| 60 |
+
|
| 61 |
+
return left_crop, right_crop, left_bbox, right_bbox
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def crop_to_tensor(crop_bgr: np.ndarray):
|
| 65 |
+
import torch
|
| 66 |
+
|
| 67 |
+
rgb = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
|
| 68 |
+
for c in range(3):
|
| 69 |
+
rgb[:, :, c] = (rgb[:, :, c] - IMAGENET_MEAN[c]) / IMAGENET_STD[c]
|
| 70 |
+
return torch.from_numpy(rgb.transpose(2, 0, 1))
|
ui/README.md
CHANGED
|
@@ -1,20 +1,20 @@
|
|
| 1 |
-
# ui
|
| 2 |
-
|
| 3 |
-
- **pipeline.py** — `FaceMeshPipeline` (head + eye geo ± YOLO → focus) and `MLPPipeline` (loads latest MLP from `MLP/models/`, 10 features → focus)
|
| 4 |
-
- **live_demo.py** — webcam window, mesh overlay, FOCUSED / NOT FOCUSED
|
| 5 |
-
|
| 6 |
-
From repo root:
|
| 7 |
-
|
| 8 |
-
```bash
|
| 9 |
-
python ui/live_demo.py
|
| 10 |
-
```
|
| 11 |
-
|
| 12 |
-
MLP only (no head/eye fusion, just your trained MLP):
|
| 13 |
-
|
| 14 |
-
```bash
|
| 15 |
-
python ui/live_demo.py --mlp
|
| 16 |
-
```
|
| 17 |
-
|
| 18 |
-
With YOLO eye model: `python ui/live_demo.py --eye-model path/to/yolo.pt`
|
| 19 |
-
|
| 20 |
-
`q` quit, `m` cycle mesh (full / contours / off).
|
|
|
|
| 1 |
+
# ui
|
| 2 |
+
|
| 3 |
+
- **pipeline.py** — `FaceMeshPipeline` (head + eye geo ± YOLO → focus) and `MLPPipeline` (loads latest MLP from `MLP/models/`, 10 features → focus)
|
| 4 |
+
- **live_demo.py** — webcam window, mesh overlay, FOCUSED / NOT FOCUSED
|
| 5 |
+
|
| 6 |
+
From repo root:
|
| 7 |
+
|
| 8 |
+
```bash
|
| 9 |
+
python ui/live_demo.py
|
| 10 |
+
```
|
| 11 |
+
|
| 12 |
+
MLP only (no head/eye fusion, just your trained MLP):
|
| 13 |
+
|
| 14 |
+
```bash
|
| 15 |
+
python ui/live_demo.py --mlp
|
| 16 |
+
```
|
| 17 |
+
|
| 18 |
+
With YOLO eye model: `python ui/live_demo.py --eye-model path/to/yolo.pt`
|
| 19 |
+
|
| 20 |
+
`q` quit, `m` cycle mesh (full / contours / off).
|
ui/live_demo.py
CHANGED
|
@@ -1,224 +1,224 @@
|
|
| 1 |
-
import argparse
|
| 2 |
-
import os
|
| 3 |
-
import sys
|
| 4 |
-
import time
|
| 5 |
-
|
| 6 |
-
import cv2
|
| 7 |
-
import numpy as np
|
| 8 |
-
from mediapipe.tasks.python.vision import FaceLandmarksConnections
|
| 9 |
-
|
| 10 |
-
_PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
| 11 |
-
if _PROJECT_ROOT not in sys.path:
|
| 12 |
-
sys.path.insert(0, _PROJECT_ROOT)
|
| 13 |
-
|
| 14 |
-
from ui.pipeline import FaceMeshPipeline, MLPPipeline
|
| 15 |
-
from models.pretrained.face_mesh.face_mesh import FaceMeshDetector
|
| 16 |
-
|
| 17 |
-
FONT = cv2.FONT_HERSHEY_SIMPLEX
|
| 18 |
-
CYAN = (255, 255, 0)
|
| 19 |
-
GREEN = (0, 255, 0)
|
| 20 |
-
MAGENTA = (255, 0, 255)
|
| 21 |
-
ORANGE = (0, 165, 255)
|
| 22 |
-
RED = (0, 0, 255)
|
| 23 |
-
WHITE = (255, 255, 255)
|
| 24 |
-
YELLOW = (0, 255, 255)
|
| 25 |
-
LIGHT_GREEN = (144, 238, 144)
|
| 26 |
-
|
| 27 |
-
_TESSELATION = [(c.start, c.end) for c in FaceLandmarksConnections.FACE_LANDMARKS_TESSELATION]
|
| 28 |
-
_CONTOURS = [(c.start, c.end) for c in FaceLandmarksConnections.FACE_LANDMARKS_CONTOURS]
|
| 29 |
-
_LEFT_EYEBROW = [70, 63, 105, 66, 107, 55, 65, 52, 53, 46]
|
| 30 |
-
_RIGHT_EYEBROW = [300, 293, 334, 296, 336, 285, 295, 282, 283, 276]
|
| 31 |
-
_NOSE_BRIDGE = [6, 197, 195, 5, 4, 1, 19, 94, 2]
|
| 32 |
-
_LIPS_OUTER = [61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291, 409, 270, 269, 267, 0, 37, 39, 40, 185, 61]
|
| 33 |
-
_LIPS_INNER = [78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308, 415, 310, 311, 312, 13, 82, 81, 80, 191, 78]
|
| 34 |
-
_LEFT_EAR_POINTS = [33, 160, 158, 133, 153, 145]
|
| 35 |
-
_RIGHT_EAR_POINTS = [362, 385, 387, 263, 373, 380]
|
| 36 |
-
|
| 37 |
-
MESH_FULL = 0
|
| 38 |
-
MESH_CONTOURS = 1
|
| 39 |
-
MESH_OFF = 2
|
| 40 |
-
_MESH_NAMES = ["FULL MESH", "CONTOURS", "MESH OFF"]
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
def _lm_to_px(landmarks, idx, w, h):
|
| 44 |
-
return (int(landmarks[idx, 0] * w), int(landmarks[idx, 1] * h))
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
def draw_tessellation(frame, landmarks, w, h):
|
| 48 |
-
overlay = frame.copy()
|
| 49 |
-
for conn in _TESSELATION:
|
| 50 |
-
pt1 = _lm_to_px(landmarks, conn[0], w, h)
|
| 51 |
-
pt2 = _lm_to_px(landmarks, conn[1], w, h)
|
| 52 |
-
cv2.line(overlay, pt1, pt2, (200, 200, 200), 1, cv2.LINE_AA)
|
| 53 |
-
cv2.addWeighted(overlay, 0.3, frame, 0.7, 0, frame)
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
def draw_contours(frame, landmarks, w, h):
|
| 57 |
-
for conn in _CONTOURS:
|
| 58 |
-
pt1 = _lm_to_px(landmarks, conn[0], w, h)
|
| 59 |
-
pt2 = _lm_to_px(landmarks, conn[1], w, h)
|
| 60 |
-
cv2.line(frame, pt1, pt2, CYAN, 1, cv2.LINE_AA)
|
| 61 |
-
for indices in [_LEFT_EYEBROW, _RIGHT_EYEBROW]:
|
| 62 |
-
for i in range(len(indices) - 1):
|
| 63 |
-
pt1 = _lm_to_px(landmarks, indices[i], w, h)
|
| 64 |
-
pt2 = _lm_to_px(landmarks, indices[i + 1], w, h)
|
| 65 |
-
cv2.line(frame, pt1, pt2, LIGHT_GREEN, 2, cv2.LINE_AA)
|
| 66 |
-
for i in range(len(_NOSE_BRIDGE) - 1):
|
| 67 |
-
pt1 = _lm_to_px(landmarks, _NOSE_BRIDGE[i], w, h)
|
| 68 |
-
pt2 = _lm_to_px(landmarks, _NOSE_BRIDGE[i + 1], w, h)
|
| 69 |
-
cv2.line(frame, pt1, pt2, ORANGE, 1, cv2.LINE_AA)
|
| 70 |
-
for i in range(len(_LIPS_OUTER) - 1):
|
| 71 |
-
pt1 = _lm_to_px(landmarks, _LIPS_OUTER[i], w, h)
|
| 72 |
-
pt2 = _lm_to_px(landmarks, _LIPS_OUTER[i + 1], w, h)
|
| 73 |
-
cv2.line(frame, pt1, pt2, MAGENTA, 1, cv2.LINE_AA)
|
| 74 |
-
for i in range(len(_LIPS_INNER) - 1):
|
| 75 |
-
pt1 = _lm_to_px(landmarks, _LIPS_INNER[i], w, h)
|
| 76 |
-
pt2 = _lm_to_px(landmarks, _LIPS_INNER[i + 1], w, h)
|
| 77 |
-
cv2.line(frame, pt1, pt2, (200, 0, 200), 1, cv2.LINE_AA)
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
def draw_eyes_and_irises(frame, landmarks, w, h):
|
| 81 |
-
left_pts = np.array(
|
| 82 |
-
[_lm_to_px(landmarks, i, w, h) for i in FaceMeshDetector.LEFT_EYE_INDICES],
|
| 83 |
-
dtype=np.int32,
|
| 84 |
-
)
|
| 85 |
-
cv2.polylines(frame, [left_pts], True, GREEN, 2, cv2.LINE_AA)
|
| 86 |
-
right_pts = np.array(
|
| 87 |
-
[_lm_to_px(landmarks, i, w, h) for i in FaceMeshDetector.RIGHT_EYE_INDICES],
|
| 88 |
-
dtype=np.int32,
|
| 89 |
-
)
|
| 90 |
-
cv2.polylines(frame, [right_pts], True, GREEN, 2, cv2.LINE_AA)
|
| 91 |
-
for indices in [_LEFT_EAR_POINTS, _RIGHT_EAR_POINTS]:
|
| 92 |
-
for idx in indices:
|
| 93 |
-
pt = _lm_to_px(landmarks, idx, w, h)
|
| 94 |
-
cv2.circle(frame, pt, 3, YELLOW, -1, cv2.LINE_AA)
|
| 95 |
-
for iris_indices, eye_inner, eye_outer in [
|
| 96 |
-
(FaceMeshDetector.LEFT_IRIS_INDICES, 133, 33),
|
| 97 |
-
(FaceMeshDetector.RIGHT_IRIS_INDICES, 362, 263),
|
| 98 |
-
]:
|
| 99 |
-
iris_pts = np.array(
|
| 100 |
-
[_lm_to_px(landmarks, i, w, h) for i in iris_indices],
|
| 101 |
-
dtype=np.int32,
|
| 102 |
-
)
|
| 103 |
-
center = iris_pts[0]
|
| 104 |
-
if len(iris_pts) >= 5:
|
| 105 |
-
radii = [np.linalg.norm(iris_pts[j] - center) for j in range(1, 5)]
|
| 106 |
-
radius = max(int(np.mean(radii)), 2)
|
| 107 |
-
cv2.circle(frame, tuple(center), radius, MAGENTA, 2, cv2.LINE_AA)
|
| 108 |
-
cv2.circle(frame, tuple(center), 2, WHITE, -1, cv2.LINE_AA)
|
| 109 |
-
eye_center_x = (landmarks[eye_inner, 0] + landmarks[eye_outer, 0]) / 2.0
|
| 110 |
-
eye_center_y = (landmarks[eye_inner, 1] + landmarks[eye_outer, 1]) / 2.0
|
| 111 |
-
eye_center = (int(eye_center_x * w), int(eye_center_y * h))
|
| 112 |
-
dx = center[0] - eye_center[0]
|
| 113 |
-
dy = center[1] - eye_center[1]
|
| 114 |
-
gaze_end = (int(center[0] + dx * 3), int(center[1] + dy * 3))
|
| 115 |
-
cv2.line(frame, tuple(center), gaze_end, RED, 1, cv2.LINE_AA)
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
def main():
|
| 119 |
-
parser = argparse.ArgumentParser()
|
| 120 |
-
parser.add_argument("--camera", type=int, default=0)
|
| 121 |
-
parser.add_argument("--mlp", action="store_true", help="Use MLP model only (load latest from MLP/models/)")
|
| 122 |
-
parser.add_argument("--mlp-dir", type=str, default=None, help="MLP models dir (default: shared/MLP/models)")
|
| 123 |
-
parser.add_argument("--max-angle", type=float, default=22.0)
|
| 124 |
-
parser.add_argument("--alpha", type=float, default=0.4)
|
| 125 |
-
parser.add_argument("--beta", type=float, default=0.6)
|
| 126 |
-
parser.add_argument("--threshold", type=float, default=0.55)
|
| 127 |
-
parser.add_argument("--eye-model", type=str, default=None)
|
| 128 |
-
parser.add_argument("--eye-backend", type=str, default="yolo", choices=["yolo", "geometric"])
|
| 129 |
-
parser.add_argument("--eye-blend", type=float, default=0.5)
|
| 130 |
-
args = parser.parse_args()
|
| 131 |
-
|
| 132 |
-
use_mlp_only = args.mlp
|
| 133 |
-
|
| 134 |
-
if use_mlp_only:
|
| 135 |
-
print("[DEMO] MLP only — loading latest from MLP/models/")
|
| 136 |
-
pipeline = MLPPipeline(model_dir=args.mlp_dir)
|
| 137 |
-
else:
|
| 138 |
-
eye_mode = " + model" if args.eye_model else " only"
|
| 139 |
-
print("[DEMO] Face mesh + head pose + eye (geometry" + eye_mode + ")")
|
| 140 |
-
pipeline = FaceMeshPipeline(
|
| 141 |
-
max_angle=args.max_angle,
|
| 142 |
-
alpha=args.alpha,
|
| 143 |
-
beta=args.beta,
|
| 144 |
-
threshold=args.threshold,
|
| 145 |
-
eye_model_path=args.eye_model,
|
| 146 |
-
eye_backend=args.eye_backend,
|
| 147 |
-
eye_blend=args.eye_blend,
|
| 148 |
-
)
|
| 149 |
-
|
| 150 |
-
cap = cv2.VideoCapture(args.camera)
|
| 151 |
-
if not cap.isOpened():
|
| 152 |
-
print("[DEMO] ERROR: Cannot open camera")
|
| 153 |
-
return
|
| 154 |
-
|
| 155 |
-
print("[DEMO] q = quit, m = cycle mesh (full/contours/off)" if not use_mlp_only else "[DEMO] q = quit, m = mesh")
|
| 156 |
-
prev_time = time.time()
|
| 157 |
-
fps = 0.0
|
| 158 |
-
mesh_mode = MESH_FULL
|
| 159 |
-
|
| 160 |
-
try:
|
| 161 |
-
while True:
|
| 162 |
-
ret, frame = cap.read()
|
| 163 |
-
if not ret:
|
| 164 |
-
break
|
| 165 |
-
|
| 166 |
-
result = pipeline.process_frame(frame)
|
| 167 |
-
now = time.time()
|
| 168 |
-
fps = 0.9 * fps + 0.1 * (1.0 / max(now - prev_time, 1e-6))
|
| 169 |
-
prev_time = now
|
| 170 |
-
|
| 171 |
-
h, w = frame.shape[:2]
|
| 172 |
-
if result["landmarks"] is not None:
|
| 173 |
-
lm = result["landmarks"]
|
| 174 |
-
if mesh_mode == MESH_FULL:
|
| 175 |
-
draw_tessellation(frame, lm, w, h)
|
| 176 |
-
draw_contours(frame, lm, w, h)
|
| 177 |
-
elif mesh_mode == MESH_CONTOURS:
|
| 178 |
-
draw_contours(frame, lm, w, h)
|
| 179 |
-
draw_eyes_and_irises(frame, lm, w, h)
|
| 180 |
-
if not use_mlp_only:
|
| 181 |
-
pipeline.head_pose.draw_axes(frame, lm)
|
| 182 |
-
if result.get("left_bbox") and result.get("right_bbox"):
|
| 183 |
-
lx1, ly1, lx2, ly2 = result["left_bbox"]
|
| 184 |
-
rx1, ry1, rx2, ry2 = result["right_bbox"]
|
| 185 |
-
cv2.rectangle(frame, (lx1, ly1), (lx2, ly2), YELLOW, 1)
|
| 186 |
-
cv2.rectangle(frame, (rx1, ry1), (rx2, ry2), YELLOW, 1)
|
| 187 |
-
|
| 188 |
-
status = "FOCUSED" if result["is_focused"] else "NOT FOCUSED"
|
| 189 |
-
status_color = GREEN if result["is_focused"] else RED
|
| 190 |
-
cv2.rectangle(frame, (0, 0), (w, 55), (0, 0, 0), -1)
|
| 191 |
-
cv2.putText(frame, status, (10, 28), FONT, 0.8, status_color, 2, cv2.LINE_AA)
|
| 192 |
-
if use_mlp_only:
|
| 193 |
-
cv2.putText(frame, "MLP", (10, 48), FONT, 0.45, WHITE, 1, cv2.LINE_AA)
|
| 194 |
-
cv2.putText(frame, f"FPS: {fps:.0f}", (w - 80, 28), FONT, 0.45, WHITE, 1, cv2.LINE_AA)
|
| 195 |
-
cv2.putText(frame, "q:quit m:mesh", (w - 120, 48), FONT, 0.4, (180, 180, 180), 1, cv2.LINE_AA)
|
| 196 |
-
else:
|
| 197 |
-
mar_str = f" MAR:{result['mar']:.2f}" if result.get("mar") is not None else ""
|
| 198 |
-
cv2.putText(frame, f"S_face:{result['s_face']:.2f} S_eye:{result['s_eye']:.2f}{mar_str} score:{result['raw_score']:.2f}", (10, 48), FONT, 0.45, WHITE, 1, cv2.LINE_AA)
|
| 199 |
-
if result.get("is_yawning"):
|
| 200 |
-
cv2.putText(frame, "YAWN", (10, 75), FONT, 0.7, ORANGE, 2, cv2.LINE_AA)
|
| 201 |
-
if result["yaw"] is not None:
|
| 202 |
-
cv2.putText(frame, f"yaw:{result['yaw']:+.0f} pitch:{result['pitch']:+.0f} roll:{result['roll']:+.0f}", (w - 280, 48), FONT, 0.4, (180, 180, 180), 1, cv2.LINE_AA)
|
| 203 |
-
eye_label = f"eye:{pipeline.eye_classifier.name}" if pipeline.has_eye_model else "eye:geo"
|
| 204 |
-
cv2.putText(frame, f"{_MESH_NAMES[mesh_mode]} {eye_label} FPS: {fps:.0f}", (w - 320, 28), FONT, 0.45, WHITE, 1, cv2.LINE_AA)
|
| 205 |
-
cv2.putText(frame, "q:quit m:mesh", (w - 140, 48), FONT, 0.4, (180, 180, 180), 1, cv2.LINE_AA)
|
| 206 |
-
|
| 207 |
-
cv2.imshow("FocusGuard", frame)
|
| 208 |
-
|
| 209 |
-
key = cv2.waitKey(1) & 0xFF
|
| 210 |
-
if key == ord("q"):
|
| 211 |
-
break
|
| 212 |
-
elif key == ord("m"):
|
| 213 |
-
mesh_mode = (mesh_mode + 1) % 3
|
| 214 |
-
print(f"[DEMO] Mesh: {_MESH_NAMES[mesh_mode]}")
|
| 215 |
-
|
| 216 |
-
finally:
|
| 217 |
-
cap.release()
|
| 218 |
-
cv2.destroyAllWindows()
|
| 219 |
-
pipeline.close()
|
| 220 |
-
print("[DEMO] Done")
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
if __name__ == "__main__":
|
| 224 |
-
main()
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import os
|
| 3 |
+
import sys
|
| 4 |
+
import time
|
| 5 |
+
|
| 6 |
+
import cv2
|
| 7 |
+
import numpy as np
|
| 8 |
+
from mediapipe.tasks.python.vision import FaceLandmarksConnections
|
| 9 |
+
|
| 10 |
+
_PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
| 11 |
+
if _PROJECT_ROOT not in sys.path:
|
| 12 |
+
sys.path.insert(0, _PROJECT_ROOT)
|
| 13 |
+
|
| 14 |
+
from ui.pipeline import FaceMeshPipeline, MLPPipeline
|
| 15 |
+
from models.pretrained.face_mesh.face_mesh import FaceMeshDetector
|
| 16 |
+
|
| 17 |
+
FONT = cv2.FONT_HERSHEY_SIMPLEX
|
| 18 |
+
CYAN = (255, 255, 0)
|
| 19 |
+
GREEN = (0, 255, 0)
|
| 20 |
+
MAGENTA = (255, 0, 255)
|
| 21 |
+
ORANGE = (0, 165, 255)
|
| 22 |
+
RED = (0, 0, 255)
|
| 23 |
+
WHITE = (255, 255, 255)
|
| 24 |
+
YELLOW = (0, 255, 255)
|
| 25 |
+
LIGHT_GREEN = (144, 238, 144)
|
| 26 |
+
|
| 27 |
+
_TESSELATION = [(c.start, c.end) for c in FaceLandmarksConnections.FACE_LANDMARKS_TESSELATION]
|
| 28 |
+
_CONTOURS = [(c.start, c.end) for c in FaceLandmarksConnections.FACE_LANDMARKS_CONTOURS]
|
| 29 |
+
_LEFT_EYEBROW = [70, 63, 105, 66, 107, 55, 65, 52, 53, 46]
|
| 30 |
+
_RIGHT_EYEBROW = [300, 293, 334, 296, 336, 285, 295, 282, 283, 276]
|
| 31 |
+
_NOSE_BRIDGE = [6, 197, 195, 5, 4, 1, 19, 94, 2]
|
| 32 |
+
_LIPS_OUTER = [61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291, 409, 270, 269, 267, 0, 37, 39, 40, 185, 61]
|
| 33 |
+
_LIPS_INNER = [78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308, 415, 310, 311, 312, 13, 82, 81, 80, 191, 78]
|
| 34 |
+
_LEFT_EAR_POINTS = [33, 160, 158, 133, 153, 145]
|
| 35 |
+
_RIGHT_EAR_POINTS = [362, 385, 387, 263, 373, 380]
|
| 36 |
+
|
| 37 |
+
MESH_FULL = 0
|
| 38 |
+
MESH_CONTOURS = 1
|
| 39 |
+
MESH_OFF = 2
|
| 40 |
+
_MESH_NAMES = ["FULL MESH", "CONTOURS", "MESH OFF"]
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def _lm_to_px(landmarks, idx, w, h):
|
| 44 |
+
return (int(landmarks[idx, 0] * w), int(landmarks[idx, 1] * h))
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def draw_tessellation(frame, landmarks, w, h):
|
| 48 |
+
overlay = frame.copy()
|
| 49 |
+
for conn in _TESSELATION:
|
| 50 |
+
pt1 = _lm_to_px(landmarks, conn[0], w, h)
|
| 51 |
+
pt2 = _lm_to_px(landmarks, conn[1], w, h)
|
| 52 |
+
cv2.line(overlay, pt1, pt2, (200, 200, 200), 1, cv2.LINE_AA)
|
| 53 |
+
cv2.addWeighted(overlay, 0.3, frame, 0.7, 0, frame)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def draw_contours(frame, landmarks, w, h):
|
| 57 |
+
for conn in _CONTOURS:
|
| 58 |
+
pt1 = _lm_to_px(landmarks, conn[0], w, h)
|
| 59 |
+
pt2 = _lm_to_px(landmarks, conn[1], w, h)
|
| 60 |
+
cv2.line(frame, pt1, pt2, CYAN, 1, cv2.LINE_AA)
|
| 61 |
+
for indices in [_LEFT_EYEBROW, _RIGHT_EYEBROW]:
|
| 62 |
+
for i in range(len(indices) - 1):
|
| 63 |
+
pt1 = _lm_to_px(landmarks, indices[i], w, h)
|
| 64 |
+
pt2 = _lm_to_px(landmarks, indices[i + 1], w, h)
|
| 65 |
+
cv2.line(frame, pt1, pt2, LIGHT_GREEN, 2, cv2.LINE_AA)
|
| 66 |
+
for i in range(len(_NOSE_BRIDGE) - 1):
|
| 67 |
+
pt1 = _lm_to_px(landmarks, _NOSE_BRIDGE[i], w, h)
|
| 68 |
+
pt2 = _lm_to_px(landmarks, _NOSE_BRIDGE[i + 1], w, h)
|
| 69 |
+
cv2.line(frame, pt1, pt2, ORANGE, 1, cv2.LINE_AA)
|
| 70 |
+
for i in range(len(_LIPS_OUTER) - 1):
|
| 71 |
+
pt1 = _lm_to_px(landmarks, _LIPS_OUTER[i], w, h)
|
| 72 |
+
pt2 = _lm_to_px(landmarks, _LIPS_OUTER[i + 1], w, h)
|
| 73 |
+
cv2.line(frame, pt1, pt2, MAGENTA, 1, cv2.LINE_AA)
|
| 74 |
+
for i in range(len(_LIPS_INNER) - 1):
|
| 75 |
+
pt1 = _lm_to_px(landmarks, _LIPS_INNER[i], w, h)
|
| 76 |
+
pt2 = _lm_to_px(landmarks, _LIPS_INNER[i + 1], w, h)
|
| 77 |
+
cv2.line(frame, pt1, pt2, (200, 0, 200), 1, cv2.LINE_AA)
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def draw_eyes_and_irises(frame, landmarks, w, h):
|
| 81 |
+
left_pts = np.array(
|
| 82 |
+
[_lm_to_px(landmarks, i, w, h) for i in FaceMeshDetector.LEFT_EYE_INDICES],
|
| 83 |
+
dtype=np.int32,
|
| 84 |
+
)
|
| 85 |
+
cv2.polylines(frame, [left_pts], True, GREEN, 2, cv2.LINE_AA)
|
| 86 |
+
right_pts = np.array(
|
| 87 |
+
[_lm_to_px(landmarks, i, w, h) for i in FaceMeshDetector.RIGHT_EYE_INDICES],
|
| 88 |
+
dtype=np.int32,
|
| 89 |
+
)
|
| 90 |
+
cv2.polylines(frame, [right_pts], True, GREEN, 2, cv2.LINE_AA)
|
| 91 |
+
for indices in [_LEFT_EAR_POINTS, _RIGHT_EAR_POINTS]:
|
| 92 |
+
for idx in indices:
|
| 93 |
+
pt = _lm_to_px(landmarks, idx, w, h)
|
| 94 |
+
cv2.circle(frame, pt, 3, YELLOW, -1, cv2.LINE_AA)
|
| 95 |
+
for iris_indices, eye_inner, eye_outer in [
|
| 96 |
+
(FaceMeshDetector.LEFT_IRIS_INDICES, 133, 33),
|
| 97 |
+
(FaceMeshDetector.RIGHT_IRIS_INDICES, 362, 263),
|
| 98 |
+
]:
|
| 99 |
+
iris_pts = np.array(
|
| 100 |
+
[_lm_to_px(landmarks, i, w, h) for i in iris_indices],
|
| 101 |
+
dtype=np.int32,
|
| 102 |
+
)
|
| 103 |
+
center = iris_pts[0]
|
| 104 |
+
if len(iris_pts) >= 5:
|
| 105 |
+
radii = [np.linalg.norm(iris_pts[j] - center) for j in range(1, 5)]
|
| 106 |
+
radius = max(int(np.mean(radii)), 2)
|
| 107 |
+
cv2.circle(frame, tuple(center), radius, MAGENTA, 2, cv2.LINE_AA)
|
| 108 |
+
cv2.circle(frame, tuple(center), 2, WHITE, -1, cv2.LINE_AA)
|
| 109 |
+
eye_center_x = (landmarks[eye_inner, 0] + landmarks[eye_outer, 0]) / 2.0
|
| 110 |
+
eye_center_y = (landmarks[eye_inner, 1] + landmarks[eye_outer, 1]) / 2.0
|
| 111 |
+
eye_center = (int(eye_center_x * w), int(eye_center_y * h))
|
| 112 |
+
dx = center[0] - eye_center[0]
|
| 113 |
+
dy = center[1] - eye_center[1]
|
| 114 |
+
gaze_end = (int(center[0] + dx * 3), int(center[1] + dy * 3))
|
| 115 |
+
cv2.line(frame, tuple(center), gaze_end, RED, 1, cv2.LINE_AA)
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def main():
|
| 119 |
+
parser = argparse.ArgumentParser()
|
| 120 |
+
parser.add_argument("--camera", type=int, default=0)
|
| 121 |
+
parser.add_argument("--mlp", action="store_true", help="Use MLP model only (load latest from MLP/models/)")
|
| 122 |
+
parser.add_argument("--mlp-dir", type=str, default=None, help="MLP models dir (default: shared/MLP/models)")
|
| 123 |
+
parser.add_argument("--max-angle", type=float, default=22.0)
|
| 124 |
+
parser.add_argument("--alpha", type=float, default=0.4)
|
| 125 |
+
parser.add_argument("--beta", type=float, default=0.6)
|
| 126 |
+
parser.add_argument("--threshold", type=float, default=0.55)
|
| 127 |
+
parser.add_argument("--eye-model", type=str, default=None)
|
| 128 |
+
parser.add_argument("--eye-backend", type=str, default="yolo", choices=["yolo", "geometric", "cnn"])
|
| 129 |
+
parser.add_argument("--eye-blend", type=float, default=0.5)
|
| 130 |
+
args = parser.parse_args()
|
| 131 |
+
|
| 132 |
+
use_mlp_only = args.mlp
|
| 133 |
+
|
| 134 |
+
if use_mlp_only:
|
| 135 |
+
print("[DEMO] MLP only — loading latest from MLP/models/")
|
| 136 |
+
pipeline = MLPPipeline(model_dir=args.mlp_dir)
|
| 137 |
+
else:
|
| 138 |
+
eye_mode = " + model" if args.eye_model else " only"
|
| 139 |
+
print("[DEMO] Face mesh + head pose + eye (geometry" + eye_mode + ")")
|
| 140 |
+
pipeline = FaceMeshPipeline(
|
| 141 |
+
max_angle=args.max_angle,
|
| 142 |
+
alpha=args.alpha,
|
| 143 |
+
beta=args.beta,
|
| 144 |
+
threshold=args.threshold,
|
| 145 |
+
eye_model_path=args.eye_model,
|
| 146 |
+
eye_backend=args.eye_backend,
|
| 147 |
+
eye_blend=args.eye_blend,
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
cap = cv2.VideoCapture(args.camera)
|
| 151 |
+
if not cap.isOpened():
|
| 152 |
+
print("[DEMO] ERROR: Cannot open camera")
|
| 153 |
+
return
|
| 154 |
+
|
| 155 |
+
print("[DEMO] q = quit, m = cycle mesh (full/contours/off)" if not use_mlp_only else "[DEMO] q = quit, m = mesh")
|
| 156 |
+
prev_time = time.time()
|
| 157 |
+
fps = 0.0
|
| 158 |
+
mesh_mode = MESH_FULL
|
| 159 |
+
|
| 160 |
+
try:
|
| 161 |
+
while True:
|
| 162 |
+
ret, frame = cap.read()
|
| 163 |
+
if not ret:
|
| 164 |
+
break
|
| 165 |
+
|
| 166 |
+
result = pipeline.process_frame(frame)
|
| 167 |
+
now = time.time()
|
| 168 |
+
fps = 0.9 * fps + 0.1 * (1.0 / max(now - prev_time, 1e-6))
|
| 169 |
+
prev_time = now
|
| 170 |
+
|
| 171 |
+
h, w = frame.shape[:2]
|
| 172 |
+
if result["landmarks"] is not None:
|
| 173 |
+
lm = result["landmarks"]
|
| 174 |
+
if mesh_mode == MESH_FULL:
|
| 175 |
+
draw_tessellation(frame, lm, w, h)
|
| 176 |
+
draw_contours(frame, lm, w, h)
|
| 177 |
+
elif mesh_mode == MESH_CONTOURS:
|
| 178 |
+
draw_contours(frame, lm, w, h)
|
| 179 |
+
draw_eyes_and_irises(frame, lm, w, h)
|
| 180 |
+
if not use_mlp_only:
|
| 181 |
+
pipeline.head_pose.draw_axes(frame, lm)
|
| 182 |
+
if result.get("left_bbox") and result.get("right_bbox"):
|
| 183 |
+
lx1, ly1, lx2, ly2 = result["left_bbox"]
|
| 184 |
+
rx1, ry1, rx2, ry2 = result["right_bbox"]
|
| 185 |
+
cv2.rectangle(frame, (lx1, ly1), (lx2, ly2), YELLOW, 1)
|
| 186 |
+
cv2.rectangle(frame, (rx1, ry1), (rx2, ry2), YELLOW, 1)
|
| 187 |
+
|
| 188 |
+
status = "FOCUSED" if result["is_focused"] else "NOT FOCUSED"
|
| 189 |
+
status_color = GREEN if result["is_focused"] else RED
|
| 190 |
+
cv2.rectangle(frame, (0, 0), (w, 55), (0, 0, 0), -1)
|
| 191 |
+
cv2.putText(frame, status, (10, 28), FONT, 0.8, status_color, 2, cv2.LINE_AA)
|
| 192 |
+
if use_mlp_only:
|
| 193 |
+
cv2.putText(frame, "MLP", (10, 48), FONT, 0.45, WHITE, 1, cv2.LINE_AA)
|
| 194 |
+
cv2.putText(frame, f"FPS: {fps:.0f}", (w - 80, 28), FONT, 0.45, WHITE, 1, cv2.LINE_AA)
|
| 195 |
+
cv2.putText(frame, "q:quit m:mesh", (w - 120, 48), FONT, 0.4, (180, 180, 180), 1, cv2.LINE_AA)
|
| 196 |
+
else:
|
| 197 |
+
mar_str = f" MAR:{result['mar']:.2f}" if result.get("mar") is not None else ""
|
| 198 |
+
cv2.putText(frame, f"S_face:{result['s_face']:.2f} S_eye:{result['s_eye']:.2f}{mar_str} score:{result['raw_score']:.2f}", (10, 48), FONT, 0.45, WHITE, 1, cv2.LINE_AA)
|
| 199 |
+
if result.get("is_yawning"):
|
| 200 |
+
cv2.putText(frame, "YAWN", (10, 75), FONT, 0.7, ORANGE, 2, cv2.LINE_AA)
|
| 201 |
+
if result["yaw"] is not None:
|
| 202 |
+
cv2.putText(frame, f"yaw:{result['yaw']:+.0f} pitch:{result['pitch']:+.0f} roll:{result['roll']:+.0f}", (w - 280, 48), FONT, 0.4, (180, 180, 180), 1, cv2.LINE_AA)
|
| 203 |
+
eye_label = f"eye:{pipeline.eye_classifier.name}" if pipeline.has_eye_model else "eye:geo"
|
| 204 |
+
cv2.putText(frame, f"{_MESH_NAMES[mesh_mode]} {eye_label} FPS: {fps:.0f}", (w - 320, 28), FONT, 0.45, WHITE, 1, cv2.LINE_AA)
|
| 205 |
+
cv2.putText(frame, "q:quit m:mesh", (w - 140, 48), FONT, 0.4, (180, 180, 180), 1, cv2.LINE_AA)
|
| 206 |
+
|
| 207 |
+
cv2.imshow("FocusGuard", frame)
|
| 208 |
+
|
| 209 |
+
key = cv2.waitKey(1) & 0xFF
|
| 210 |
+
if key == ord("q"):
|
| 211 |
+
break
|
| 212 |
+
elif key == ord("m"):
|
| 213 |
+
mesh_mode = (mesh_mode + 1) % 3
|
| 214 |
+
print(f"[DEMO] Mesh: {_MESH_NAMES[mesh_mode]}")
|
| 215 |
+
|
| 216 |
+
finally:
|
| 217 |
+
cap.release()
|
| 218 |
+
cv2.destroyAllWindows()
|
| 219 |
+
pipeline.close()
|
| 220 |
+
print("[DEMO] Done")
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
if __name__ == "__main__":
|
| 224 |
+
main()
|
ui/pipeline.py
CHANGED
|
@@ -36,6 +36,9 @@ class FaceMeshPipeline:
|
|
| 36 |
self.threshold = threshold
|
| 37 |
self.eye_blend = eye_blend
|
| 38 |
|
|
|
|
|
|
|
|
|
|
| 39 |
self.eye_classifier = load_eye_classifier(
|
| 40 |
path=eye_model_path if eye_model_path and os.path.exists(eye_model_path) else None,
|
| 41 |
backend=eye_backend,
|
|
|
|
| 36 |
self.threshold = threshold
|
| 37 |
self.eye_blend = eye_blend
|
| 38 |
|
| 39 |
+
if eye_model_path and not os.path.exists(eye_model_path):
|
| 40 |
+
print(f"[PIPELINE] WARNING: eye model file not found: {eye_model_path}")
|
| 41 |
+
|
| 42 |
self.eye_classifier = load_eye_classifier(
|
| 43 |
path=eye_model_path if eye_model_path and os.path.exists(eye_model_path) else None,
|
| 44 |
backend=eye_backend,
|