Spaces:

FocusGuard
/

final

Sleeping

App Files Files Community

Yingtao Zheng (k23158987) commited on Mar 10

Commit

52d831a

unverified ·

2 Parent(s): 28d0d9e 2ea6266

Merge pull request #6 from k23172173/main

Browse files

Update Dev to newest version, correct wrong branch mistakes

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +0 -16
models/face_orientation_model/best_model.pt → MLP/models/meta_20260224_024200.npz +2 -2
MLP/models/mlp_20260224_024200.joblib +3 -0
MLP/models/scaler_20260224_024200.joblib +3 -0
README.md +7 -7
data_preparation/CNN/eye_crops/val/open/.gitkeep +1 -0
data_preparation/MLP/explore_collected_data.ipynb +0 -0
data_preparation/MLP/train_mlp.ipynb +0 -0
data_preparation/README.md +40 -2
data_preparation/collected_Abdelrahman/abdelrahman_20260306_023035.npz +3 -0
data_preparation/collected_Ayten/ayten_session_1.npz +3 -0
data_preparation/collected_Jarek/Jarek_20260225_012931.npz +3 -0
data_preparation/collected_Junhao/Junhao_20260303_113554.npz +3 -0
data_preparation/collected_Kexin/kexin2_20260305_180229.npz +3 -0
data_preparation/collected_Kexin/kexin_20260224_151043.npz +3 -0
data_preparation/collected_Langyuan/Langyuan_20260303_153145.npz +3 -0
data_preparation/collected_Mohamed/session_20260224_010131.npz +3 -0
data_preparation/collected_Saba/saba_20260306_230710.npz +3 -0
data_preparation/collected_Yingtao/Yingtao_20260306_023937.npz +3 -0
evaluation/README.md +1 -1
{models/attention_score_fusion → evaluation/logs}/.gitkeep +0 -0
models/README.md +7 -5
models/attention/__init__.py +1 -0
models/{eye_behaviour_model/.gitkeep → attention/classifier.py} +0 -0
models/attention/collect_features.py +349 -0
models/{face_landmarks_pretrained/.gitkeep → attention/fusion.py} +0 -0
models/{face_orientation_model/.gitkeep → attention/train.py} +0 -0
models/cnn/CNN_MODEL/.claude/settings.local.json +7 -0
models/cnn/CNN_MODEL/.gitattributes +1 -0
models/cnn/CNN_MODEL/.gitignore +4 -0
models/cnn/CNN_MODEL/README.md +74 -0
models/cnn/CNN_MODEL/notebooks/eye_classifier_colab.ipynb +0 -0
models/cnn/CNN_MODEL/scripts/focus_infer.py +199 -0
models/cnn/CNN_MODEL/scripts/predict_image.py +49 -0
models/cnn/CNN_MODEL/scripts/video_infer.py +281 -0
models/cnn/CNN_MODEL/scripts/webcam_live.py +184 -0
models/cnn/CNN_MODEL/weights/yolo11s-cls.pt +3 -0
models/cnn/__init__.py +0 -0
models/cnn/eye_attention/__init__.py +1 -0
models/cnn/eye_attention/classifier.py +69 -0
models/cnn/eye_attention/crop.py +70 -0
models/cnn/eye_attention/train.py +0 -0
models/geometric/__init__.py +0 -0
models/geometric/eye_behaviour/__init__.py +0 -0
models/geometric/eye_behaviour/eye_scorer.py +164 -0
models/geometric/face_orientation/__init__.py +1 -0
models/geometric/face_orientation/head_pose.py +112 -0
models/mlp/__init__.py +0 -0
models/{train.py → mlp/train.py} +32 -7
models/pretrained/__init__.py +0 -0

.gitignore CHANGED Viewed

@@ -1,4 +1,3 @@
-# Python
 __pycache__/
 *.py[cod]
 *$py.class
@@ -12,25 +11,10 @@ env/
 .eggs/
 dist/
 build/
-# IDE
 .idea/
 .vscode/
 *.swp
 *.swo
-# Data and outputs (optional: uncomment if you don’t want to track large files)
-# data_preparation/raw/
-# data_preparation/processed/*.npy
-# evaluation/logs/
-# evaluation/results/
-# Model checkpoints (uncomment to ignore .pt files)
-# *.pt
-# Project
 docs/
-# OS
 .DS_Store
 Thumbs.db

 __pycache__/
 *.py[cod]
 *$py.class
 .eggs/
 dist/
 build/
 .idea/
 .vscode/
 *.swp
 *.swo
 docs/
 .DS_Store
 Thumbs.db

models/face_orientation_model/best_model.pt → MLP/models/meta_20260224_024200.npz RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:18c1f2750c7274e72538b94afcc9f0243287a5b2eb8fcce6be6e4ae18ec59cb0
-size 15033

 version https://git-lfs.github.com/spec/v1
+oid sha256:769bb62c7bf04aafd808e9b2623e795c2d92bcb933313ebf553d6fce5ebe7143
+size 1616

MLP/models/mlp_20260224_024200.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a72933fcf2d0aed998c6303ea4298c04618d937c7f17bf492e76efcf3b4b54d7
+size 50484

MLP/models/scaler_20260224_024200.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3f9ef3721cee28f1472886556e001d0f6ed0abe09011d979a70ca9bf447d453e
+size 823

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
-# GAP — FocusGuard
-Real-time focus estimation from webcam (head pose + eye behaviour).
-## Layout
-- **data_preparation/** — Dataset team (raw data, processed, scripts)
-- **models/** — Face orientation, eye behaviour, fusion, landmarks. Training entry: `models/train.py`
-- **evaluation/** — Metrics, runs, results
-- **ui/** — Live demo + session view

+# FocusGuard
+Webcam-based focus detection: face mesh, head pose, eye (geometry or YOLO), plus an MLP trained on collected features.
+- **data_preparation/** — collect data, notebooks, processed/collected files
+- **models/** — face mesh, head pose, eye scorer, YOLO classifier, MLP training, attention feature collection
+- **evaluation/** — metrics and run logs
+- **ui/** — live demo (geometry+YOLO or MLP-only)
+Run from here: `pip install -r requirements.txt` then `python ui/live_demo.py` or `python ui/live_demo.py --mlp`.

data_preparation/CNN/eye_crops/val/open/.gitkeep ADDED Viewed

	@@ -0,0 +1 @@


1	+

data_preparation/MLP/explore_collected_data.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

data_preparation/MLP/train_mlp.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

data_preparation/README.md CHANGED Viewed

@@ -1,3 +1,41 @@
-# data_preparation
-Dataset team owns layout and scripts here.

+# Data Preparation
+## Folder Structure
+### collected/
+Contains raw session files in `.npz` format.
+Generated using:
+python -m models.attention.collect_features
+Each session includes:
+- 17-dimensional feature vectors
+- Corresponding labels
+---
+### MLP/
+Contains notebooks for:
+- Exploring collected data
+- Training the sklearn MLP model (10 features)
+Trained models are saved to:
+../MLP/models/
+---
+### CNN/
+Eye crop directory structure for CNN training (YOLO).
+---
+## Collecting Data
+**Step-by-step**
+1. From repo root Install deps: `pip install -r requirements.txt`.
+3. Run: `python -m models.attention.collect_features --name yourname`.
+4. Webcam opens. Look at the camera; press **1** when focused, **0** when unfocused. Switch every 10–30 sec so you get both labels.
+5. Press **p** to pause/resume.
+6. Press **q** when done. One `.npz` is saved to `data_preparation/collected/` (17 features + labels).

data_preparation/collected_Abdelrahman/abdelrahman_20260306_023035.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e2c48532150182c8933d4595e0a0711365645b699647e99976575b7c2adffaf8
+size 1207980

data_preparation/collected_Ayten/ayten_session_1.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fbecdbffa1c1b03b3b0fb5f715dcb4ff885ecc67da4aff78e6952b8847a96014
+size 1341056

data_preparation/collected_Jarek/Jarek_20260225_012931.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0fa68f4d587eee8d645b23b463a9f1c848b9bacc2adb68603d5fa9cd8cb744c7
+size 1128864

data_preparation/collected_Junhao/Junhao_20260303_113554.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ec321ee79800c04fdc0f999690d07970445aeca61f977bf6537880bbc996b5e5
+size 678336

data_preparation/collected_Kexin/kexin2_20260305_180229.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0e96fe17571fa1fcccc1b4bd0c8838270498883e4db6a608c4d4d4c3a8ac1d0d
+size 1129700

data_preparation/collected_Kexin/kexin_20260224_151043.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d402ca4e66910a2e174c4f4beec5d7b3db6a04213d29673b227ce6ef04b39c4
+size 1329732

data_preparation/collected_Langyuan/Langyuan_20260303_153145.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5c679cdba334b2f3f0953b7e44f7209056277c826e2b7b5cfcf2b8b750898400
+size 1198784

data_preparation/collected_Mohamed/session_20260224_010131.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0a784f703c13b83911f47ec507d32c25942a07572314b8a77cbf40ca8cdff16f
+size 1006428

data_preparation/collected_Saba/saba_20260306_230710.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:db1cab5ddcf9988856c5bdca1183c8eba4647365e675a1d8a200d12f6b5d2097
+size 663212

data_preparation/collected_Yingtao/Yingtao_20260306_023937.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7a75af17e25dca5f06ea9e7443ea5fee9db638f68a5910e014ee7cb8b7ae80fd
+size 1338776

evaluation/README.md CHANGED Viewed

@@ -1,3 +1,3 @@
 # evaluation
-Metrics, experiment configs, and results live here.


1	# evaluation
2
3	+ Place metrics scripts, run configs, and results here. Logs dir is used by `models.mlp.train` for training logs.

{models/attention_score_fusion → evaluation/logs}/.gitkeep RENAMED Viewed

File without changes

models/README.md CHANGED Viewed

@@ -1,8 +1,10 @@
 # models
-- `face_orientation_model/` — S_face
-- `eye_behaviour_model/` — S_eye
-- `attention_score_fusion/` — fusion + smoothing
-- `face_landmarks_pretrained/` — MediaPipe FaceMesh (no training)
-`train.py` trains the MLP on feature vectors; `prepare_dataset.py` loads from `data_preparation/processed/` or synthetic.

 # models
+- **cnn/eye_attention/** — YOLO open/closed eye classifier, crop helper, train stub
+- **mlp/** — PyTorch MLP on feature vectors (face_orientation / eye_behaviour); checkpoints under `mlp/face_orientation_model/`, `mlp/eye_behaviour_model/`
+- **geometric/face_orientation/** — head pose (solvePnP). **geometric/eye_behaviour/** — EAR, gaze, MAR
+- **pretrained/face_mesh/** — MediaPipe face landmarks (no training)
+- **attention/** — webcam feature collection (17-d), stubs for train/classifier/fusion
+- **prepare_dataset.py** — loads from `data_preparation/processed/` or synthetic; used by `mlp/train.py`
+Run legacy MLP training: `python -m models.mlp.train`. The sklearn MLP used in the live demo is trained in `data_preparation/MLP/train_mlp.ipynb` and saved under `../MLP/models/`.

models/attention/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

models/{eye_behaviour_model/.gitkeep → attention/classifier.py} RENAMED Viewed

File without changes

models/attention/collect_features.py ADDED Viewed

	@@ -0,0 +1,349 @@

+# Usage: python -m models.attention.collect_features [--name alice] [--duration 600]
+import argparse
+import collections
+import math
+import os
+import sys
+import time
+import cv2
+import numpy as np
+_PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+if _PROJECT_ROOT not in sys.path:
+    sys.path.insert(0, _PROJECT_ROOT)
+from models.pretrained.face_mesh.face_mesh import FaceMeshDetector
+from models.geometric.face_orientation.head_pose import HeadPoseEstimator
+from models.geometric.eye_behaviour.eye_scorer import EyeBehaviourScorer, compute_gaze_ratio, compute_mar
+FONT = cv2.FONT_HERSHEY_SIMPLEX
+GREEN = (0, 255, 0)
+RED = (0, 0, 255)
+WHITE = (255, 255, 255)
+YELLOW = (0, 255, 255)
+ORANGE = (0, 165, 255)
+GRAY = (120, 120, 120)
+FEATURE_NAMES = [
+    "ear_left", "ear_right", "ear_avg", "h_gaze", "v_gaze", "mar",
+    "yaw", "pitch", "roll", "s_face", "s_eye", "gaze_offset", "head_deviation",
+    "perclos", "blink_rate", "closure_duration", "yawn_duration",
+]
+NUM_FEATURES = len(FEATURE_NAMES)
+assert NUM_FEATURES == 17
+class TemporalTracker:
+    EAR_BLINK_THRESH = 0.21
+    MAR_YAWN_THRESH = 0.04
+    PERCLOS_WINDOW = 60
+    BLINK_WINDOW_SEC = 30.0
+    def __init__(self):
+        self.ear_history = collections.deque(maxlen=self.PERCLOS_WINDOW)
+        self.blink_timestamps = collections.deque()
+        self._eyes_closed = False
+        self._closure_start = None
+        self._yawn_start = None
+    def update(self, ear_avg, mar, now=None):
+        if now is None:
+            now = time.time()
+        closed = ear_avg < self.EAR_BLINK_THRESH
+        self.ear_history.append(1.0 if closed else 0.0)
+        perclos = sum(self.ear_history) / len(self.ear_history) if self.ear_history else 0.0
+        if self._eyes_closed and not closed:
+            self.blink_timestamps.append(now)
+        self._eyes_closed = closed
+        cutoff = now - self.BLINK_WINDOW_SEC
+        while self.blink_timestamps and self.blink_timestamps[0] < cutoff:
+            self.blink_timestamps.popleft()
+        blink_rate = len(self.blink_timestamps) * (60.0 / self.BLINK_WINDOW_SEC)
+        if closed:
+            if self._closure_start is None:
+                self._closure_start = now
+            closure_dur = now - self._closure_start
+        else:
+            self._closure_start = None
+            closure_dur = 0.0
+        yawning = mar > self.MAR_YAWN_THRESH
+        if yawning:
+            if self._yawn_start is None:
+                self._yawn_start = now
+            yawn_dur = now - self._yawn_start
+        else:
+            self._yawn_start = None
+            yawn_dur = 0.0
+        return perclos, blink_rate, closure_dur, yawn_dur
+def extract_features(landmarks, w, h, head_pose, eye_scorer, temporal):
+    from models.geometric.eye_behaviour.eye_scorer import _LEFT_EYE_EAR, _RIGHT_EYE_EAR, compute_ear
+    ear_left = compute_ear(landmarks, _LEFT_EYE_EAR)
+    ear_right = compute_ear(landmarks, _RIGHT_EYE_EAR)
+    ear_avg = (ear_left + ear_right) / 2.0
+    h_gaze, v_gaze = compute_gaze_ratio(landmarks)
+    mar = compute_mar(landmarks)
+    angles = head_pose.estimate(landmarks, w, h)
+    yaw = angles[0] if angles else 0.0
+    pitch = angles[1] if angles else 0.0
+    roll = angles[2] if angles else 0.0
+    s_face = head_pose.score(landmarks, w, h)
+    s_eye = eye_scorer.score(landmarks)
+    gaze_offset = math.sqrt((h_gaze - 0.5) ** 2 + (v_gaze - 0.5) ** 2)
+    head_deviation = math.sqrt(yaw ** 2 + pitch ** 2)
+    perclos, blink_rate, closure_dur, yawn_dur = temporal.update(ear_avg, mar)
+    return np.array([
+        ear_left, ear_right, ear_avg,
+        h_gaze, v_gaze,
+        mar,
+        yaw, pitch, roll,
+        s_face, s_eye,
+        gaze_offset,
+        head_deviation,
+        perclos, blink_rate, closure_dur, yawn_dur,
+    ], dtype=np.float32)
+def quality_report(labels):
+    n = len(labels)
+    n1 = int((labels == 1).sum())
+    n0 = n - n1
+    transitions = int(np.sum(np.diff(labels) != 0))
+    duration_sec = n / 30.0  # approximate at 30fps
+    warnings = []
+    print(f"\n{'='*50}")
+    print(f"  DATA QUALITY REPORT")
+    print(f"{'='*50}")
+    print(f"  Total samples : {n}")
+    print(f"  Focused       : {n1} ({n1/max(n,1)*100:.1f}%)")
+    print(f"  Unfocused     : {n0} ({n0/max(n,1)*100:.1f}%)")
+    print(f"  Duration      : {duration_sec:.0f}s ({duration_sec/60:.1f} min)")
+    print(f"  Transitions   : {transitions}")
+    if transitions > 0:
+        print(f"  Avg segment   : {n/transitions:.0f} frames ({n/transitions/30:.1f}s)")
+    # checks
+    if duration_sec < 120:
+        warnings.append(f"TOO SHORT: {duration_sec:.0f}s — aim for 5-10 minutes (300-600s)")
+    if n < 3000:
+        warnings.append(f"LOW SAMPLE COUNT: {n} frames — aim for 9000+ (5 min at 30fps)")
+    balance = n1 / max(n, 1)
+    if balance < 0.3 or balance > 0.7:
+        warnings.append(f"IMBALANCED: {balance:.0%} focused — aim for 35-65% focused")
+    if transitions < 10:
+        warnings.append(f"TOO FEW TRANSITIONS: {transitions} — switch every 10-30s, aim for 20+")
+    if transitions == 1:
+        warnings.append("SINGLE BLOCK: you recorded one unfocused + one focused block — "
+                         "model will learn temporal position, not focus patterns")
+    if warnings:
+        print(f"\n  ⚠️  WARNINGS ({len(warnings)}):")
+        for w in warnings:
+            print(f"    • {w}")
+        print(f"\n  Consider re-recording this session.")
+    else:
+        print(f"\n  ✅ All checks passed!")
+    print(f"{'='*50}\n")
+    return len(warnings) == 0
+# ---------------------------------------------------------------------------
+# Main
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--name", type=str, default="session",
+                        help="Your name or session ID")
+    parser.add_argument("--camera", type=int, default=0,
+                        help="Camera index")
+    parser.add_argument("--duration", type=int, default=600,
+                        help="Max recording time (seconds, default 10 min)")
+    parser.add_argument("--output-dir", type=str,
+                        default=os.path.join(_PROJECT_ROOT, "data_preparation", "collected"),
+                        help="Where to save .npz files")
+    args = parser.parse_args()
+    os.makedirs(args.output_dir, exist_ok=True)
+    detector = FaceMeshDetector()
+    head_pose = HeadPoseEstimator()
+    eye_scorer = EyeBehaviourScorer()
+    temporal = TemporalTracker()
+    cap = cv2.VideoCapture(args.camera)
+    if not cap.isOpened():
+        print("[COLLECT] ERROR: can't open camera")
+        return
+    print("[COLLECT] Data Collection Tool")
+    print(f"[COLLECT] Session: {args.name}, max {args.duration}s")
+    print(f"[COLLECT] Features per frame: {NUM_FEATURES}")
+    print("[COLLECT] Controls:")
+    print("  1 = FOCUSED       (looking at screen normally)")
+    print("  0 = NOT FOCUSED   (phone, away, eyes closed, yawning)")
+    print("  p = pause")
+    print("  q = save & quit")
+    print()
+    print("[COLLECT] TIPS for good data:")
+    print("  • Switch between 1 and 0 every 10-30 seconds")
+    print("  • Aim for 20+ transitions total")
+    print("  • Act out varied scenarios: reading, phone, talking, drowsy")
+    print("  • Record at least 5 minutes")
+    print()
+    features_list = []
+    labels_list = []
+    label = None        # None = paused
+    transitions = 0     # count label switches
+    prev_label = None
+    status = "PAUSED -- press 1 (focused) or 0 (not focused)"
+    t_start = time.time()
+    prev_time = time.time()
+    fps = 0.0
+    try:
+        while True:
+            elapsed = time.time() - t_start
+            if elapsed > args.duration:
+                print(f"[COLLECT] Time limit ({args.duration}s)")
+                break
+            ret, frame = cap.read()
+            if not ret:
+                break
+            h, w = frame.shape[:2]
+            landmarks = detector.process(frame)
+            face_ok = landmarks is not None
+            # record if labeling + face visible
+            if face_ok and label is not None:
+                vec = extract_features(landmarks, w, h, head_pose, eye_scorer, temporal)
+                features_list.append(vec)
+                labels_list.append(label)
+                # count transitions
+                if prev_label is not None and label != prev_label:
+                    transitions += 1
+                prev_label = label
+            now = time.time()
+            fps = 0.9 * fps + 0.1 * (1.0 / max(now - prev_time, 1e-6))
+            prev_time = now
+            # --- draw UI ---
+            n = len(labels_list)
+            n1 = sum(1 for x in labels_list if x == 1)
+            n0 = n - n1
+            remaining = max(0, args.duration - elapsed)
+            bar_color = GREEN if label == 1 else (RED if label == 0 else (80, 80, 80))
+            cv2.rectangle(frame, (0, 0), (w, 70), (0, 0, 0), -1)
+            cv2.putText(frame, status, (10, 22), FONT, 0.55, bar_color, 2, cv2.LINE_AA)
+            cv2.putText(frame, f"Samples: {n}  (F:{n1}  U:{n0})  Switches: {transitions}",
+                        (10, 48), FONT, 0.42, WHITE, 1, cv2.LINE_AA)
+            cv2.putText(frame, f"FPS:{fps:.0f}", (w - 80, 22), FONT, 0.45, WHITE, 1, cv2.LINE_AA)
+            cv2.putText(frame, f"{int(remaining)}s left", (w - 80, 48), FONT, 0.42, YELLOW, 1, cv2.LINE_AA)
+            if n > 0:
+                bar_w = min(w - 20, 300)
+                bar_x = w - bar_w - 10
+                bar_y = 58
+                frac = n1 / n
+                cv2.rectangle(frame, (bar_x, bar_y), (bar_x + bar_w, bar_y + 8), (40, 40, 40), -1)
+                cv2.rectangle(frame, (bar_x, bar_y), (bar_x + int(bar_w * frac), bar_y + 8), GREEN, -1)
+                cv2.putText(frame, f"{frac:.0%}F", (bar_x + bar_w + 4, bar_y + 8),
+                            FONT, 0.3, GRAY, 1, cv2.LINE_AA)
+            if not face_ok:
+                cv2.putText(frame, "NO FACE", (w // 2 - 60, h // 2), FONT, 0.7, RED, 2, cv2.LINE_AA)
+            # red dot = recording
+            if label is not None and face_ok:
+                cv2.circle(frame, (w - 20, 80), 8, RED, -1)
+            # live warnings
+            warn_y = h - 35
+            if n > 100 and transitions < 3:
+                cv2.putText(frame, "! Switch more often (aim for 20+ transitions)",
+                            (10, warn_y), FONT, 0.38, ORANGE, 1, cv2.LINE_AA)
+                warn_y -= 18
+            if elapsed > 30 and n > 0:
+                bal = n1 / n
+                if bal < 0.25 or bal > 0.75:
+                    cv2.putText(frame, f"! Imbalanced ({bal:.0%} focused) - record more of the other",
+                                (10, warn_y), FONT, 0.38, ORANGE, 1, cv2.LINE_AA)
+                    warn_y -= 18
+            cv2.putText(frame, "1:focused  0:unfocused  p:pause  q:save+quit",
+                        (10, h - 10), FONT, 0.38, GRAY, 1, cv2.LINE_AA)
+            cv2.imshow("FocusGuard -- Data Collection", frame)
+            key = cv2.waitKey(1) & 0xFF
+            if key == ord("1"):
+                label = 1
+                status = "Recording: FOCUSED"
+                print(f"[COLLECT] -> FOCUSED (n={n}, transitions={transitions})")
+            elif key == ord("0"):
+                label = 0
+                status = "Recording: NOT FOCUSED"
+                print(f"[COLLECT] -> NOT FOCUSED (n={n}, transitions={transitions})")
+            elif key == ord("p"):
+                label = None
+                status = "PAUSED"
+                print(f"[COLLECT] paused (n={n})")
+            elif key == ord("q"):
+                break
+    finally:
+        cap.release()
+        cv2.destroyAllWindows()
+        detector.close()
+        if len(features_list) > 0:
+            feats = np.stack(features_list)
+            labs = np.array(labels_list, dtype=np.int64)
+            ts = time.strftime("%Y%m%d_%H%M%S")
+            fname = f"{args.name}_{ts}.npz"
+            fpath = os.path.join(args.output_dir, fname)
+            np.savez(fpath,
+                     features=feats,
+                     labels=labs,
+                     feature_names=np.array(FEATURE_NAMES))
+            print(f"\n[COLLECT] Saved {len(labs)} samples -> {fpath}")
+            print(f"  Shape: {feats.shape}  ({NUM_FEATURES} features)")
+            quality_report(labs)
+        else:
+            print("\n[COLLECT] No data collected")
+        print("[COLLECT] Done")
+if __name__ == "__main__":
+    main()

models/{face_landmarks_pretrained/.gitkeep → attention/fusion.py} RENAMED Viewed

File without changes

models/{face_orientation_model/.gitkeep → attention/train.py} RENAMED Viewed

File without changes

models/cnn/CNN_MODEL/.claude/settings.local.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "permissions": {
+    "allow": [
+      "Bash(# Check Dataset_subset counts echo \"\"=== Dataset_subset/train/open ===\"\" && ls /Users/mohammedalketbi22/Downloads/GAP_Large_project-feature-dataset-model-test-92_30-clean/Dataset_subset/train/open/ | wc -l && echo \"\"=== Dataset_subset/train/closed ===\"\" && ls /Users/mohammedalketbi22/Downloads/GAP_Large_project-feature-dataset-model-test-92_30-clean/Dataset_subset/train/closed/ | wc -l && echo \"\"=== Dataset_subset/val/open ===\"\" && ls /Users/mohammedalketbi22/Downloads/GAP_Large_project-feature-dataset-model-test-92_30-clean/Dataset_subset/val/open/ | wc -l && echo \"\"=== Dataset_subset/val/closed ===\"\" && ls /Users/mohammedalketbi22/Downloads/GAP_Large_project-feature-dataset-model-test-92_30-clean/Dataset_subset/val/closed/)"
+    ]
+  }
+}

models/cnn/CNN_MODEL/.gitattributes ADDED Viewed

	@@ -0,0 +1 @@


1	+ DATA/** filter=lfs diff=lfs merge=lfs -text

models/cnn/CNN_MODEL/.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+Dataset/train/
+Dataset/val/
+Dataset/test/
+.DS_Store

models/cnn/CNN_MODEL/README.md ADDED Viewed

	@@ -0,0 +1,74 @@

+# Eye Open / Closed Classifier (YOLOv11-CLS)
+Binary classifier: **open** vs **closed** eyes.
+Used as a baseline for eye-tracking, drowsiness, or focus detection.
+---
+## Model team task
+- **Train** the YOLOv11s-cls eye classifier in a **separate notebook** (data split, epochs, GPU, export `best.pt`).
+- Provide **trained weights** (`best.pt`) for this repo’s evaluation and inference scripts.
+---
+## Repo contents
+- **notebooks/eye_classifier_colab.ipynb** — Data download (Kaggle), clean, split, undersample, **evaluate** (needs `best.pt` from model team), export.
+- **scripts/predict_image.py** — Run classifier on single images (needs `best.pt`).
+- **scripts/webcam_live.py** — Live webcam open/closed (needs `best.pt` + optional `weights/face_landmarker.task`).
+- **scripts/video_infer.py** — Run on video files.
+- **scripts/focus_infer.py** — Focus/attention inference.
+- **weights/** — Put `best.pt` here; `face_landmarker.task` is downloaded on first webcam run if missing.
+- **docs/** — Extra docs (e.g. UNNECESSARY_FILES.md if present).
+---
+## Dataset
+- **Source:** [Kaggle — open/closed eyes](https://www.kaggle.com/datasets/sehriyarmemmedli/open-closed-eyes-dataset)
+- The Colab notebook downloads it via `kagglehub`; no local copy in repo.
+---
+## Weights
+- Put **best.pt** from the model team in **weights/best.pt** (or `runs/classify/runs_cls/eye_open_closed_cpu/weights/best.pt`).
+- For webcam: **face_landmarker.task** is downloaded into **weights/** on first run if missing.
+---
+## Local setup
+```bash
+pip install ultralytics opencv-python mediapipe "numpy<2"
+```
+Optional: use a venv. From repo root:
+- `python scripts/predict_image.py <image.png>`
+- `python scripts/webcam_live.py`
+- `python scripts/video_infer.py` (expects 1.mp4 / 2.mp4 in repo root or set `VIDEOS` env)
+- `python scripts/focus_infer.py`
+---
+## Project structure
+```
+├── notebooks/
+│   └── eye_classifier_colab.ipynb   # Data + eval (no training)
+├── scripts/
+│   ├── predict_image.py
+│   ├── webcam_live.py
+│   ├── video_infer.py
+│   └── focus_infer.py
+├── weights/                     # best.pt, face_landmarker.task
+├── docs/                        # extra docs
+├── README.md
+└── venv/                        # optional
+```
+Training and weight generation: **model team, separate notebook.**

models/cnn/CNN_MODEL/notebooks/eye_classifier_colab.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

models/cnn/CNN_MODEL/scripts/focus_infer.py ADDED Viewed

	@@ -0,0 +1,199 @@

+from __future__ import annotations
+from pathlib import Path
+import os
+import cv2
+import numpy as np
+from ultralytics import YOLO
+def list_images(folder: Path):
+    exts = {".png", ".jpg", ".jpeg", ".bmp", ".webp"}
+    return sorted([p for p in folder.iterdir() if p.suffix.lower() in exts])
+def find_weights(project_root: Path) -> Path | None:
+    candidates = [
+        project_root / "weights" / "best.pt",
+        project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
+        project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
+        project_root / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
+        project_root / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
+    ]
+    return next((p for p in candidates if p.is_file()), None)
+def detect_eyelid_boundary(gray: np.ndarray) -> np.ndarray | None:
+    """
+    Returns an ellipse fit to the largest contour near the eye boundary.
+    Output format: (center(x,y), (axis1, axis2), angle) or None.
+    """
+    blur = cv2.GaussianBlur(gray, (5, 5), 0)
+    edges = cv2.Canny(blur, 40, 120)
+    edges = cv2.dilate(edges, np.ones((3, 3), np.uint8), iterations=1)
+    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    if not contours:
+        return None
+    contours = sorted(contours, key=cv2.contourArea, reverse=True)
+    for c in contours:
+        if len(c) >= 5 and cv2.contourArea(c) > 50:
+            return cv2.fitEllipse(c)
+    return None
+def detect_pupil_center(gray: np.ndarray) -> tuple[int, int] | None:
+    """
+    More robust pupil detection:
+    - enhance contrast (CLAHE)
+    - find dark blobs
+    - score by circularity and proximity to center
+    """
+    h, w = gray.shape
+    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
+    eq = clahe.apply(gray)
+    blur = cv2.GaussianBlur(eq, (7, 7), 0)
+    # Focus on the central region to avoid eyelashes/edges
+    cx, cy = w // 2, h // 2
+    rx, ry = int(w * 0.3), int(h * 0.3)
+    x0, x1 = max(cx - rx, 0), min(cx + rx, w)
+    y0, y1 = max(cy - ry, 0), min(cy + ry, h)
+    roi = blur[y0:y1, x0:x1]
+    # Inverted threshold to capture dark pupil
+    _, thresh = cv2.threshold(roi, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
+    thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, np.ones((3, 3), np.uint8), iterations=2)
+    thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8), iterations=1)
+    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    if not contours:
+        return None
+    best = None
+    best_score = -1.0
+    for c in contours:
+        area = cv2.contourArea(c)
+        if area < 15:
+            continue
+        perimeter = cv2.arcLength(c, True)
+        if perimeter <= 0:
+            continue
+        circularity = 4 * np.pi * (area / (perimeter * perimeter))
+        if circularity < 0.3:
+            continue
+        m = cv2.moments(c)
+        if m["m00"] == 0:
+            continue
+        px = int(m["m10"] / m["m00"]) + x0
+        py = int(m["m01"] / m["m00"]) + y0
+        # Score by circularity and distance to center
+        dist = np.hypot(px - cx, py - cy) / max(w, h)
+        score = circularity - dist
+        if score > best_score:
+            best_score = score
+            best = (px, py)
+    return best
+def is_focused(pupil_center: tuple[int, int], img_shape: tuple[int, int]) -> bool:
+    """
+    Decide focus based on pupil offset from image center.
+    """
+    h, w = img_shape
+    cx, cy = w // 2, h // 2
+    px, py = pupil_center
+    dx = abs(px - cx) / max(w, 1)
+    dy = abs(py - cy) / max(h, 1)
+    return (dx < 0.12) and (dy < 0.12)
+def annotate(img_bgr: np.ndarray, ellipse, pupil_center, focused: bool, cls_label: str, conf: float):
+    out = img_bgr.copy()
+    if ellipse is not None:
+        cv2.ellipse(out, ellipse, (0, 255, 255), 2)
+    if pupil_center is not None:
+        cv2.circle(out, pupil_center, 4, (0, 0, 255), -1)
+    label = f"{cls_label} ({conf:.2f}) | focused={int(focused)}"
+    cv2.putText(out, label, (8, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
+    return out
+def main():
+    project_root = Path(__file__).resolve().parent.parent
+    data_dir = project_root / "Dataset"
+    alt_data_dir = project_root / "DATA"
+    out_dir = project_root / "runs_focus"
+    out_dir.mkdir(parents=True, exist_ok=True)
+    weights = find_weights(project_root)
+    if weights is None:
+        print("Weights not found. Train first.")
+        return
+    # Support both Dataset/test/{open,closed} and Dataset/{open,closed}
+    def resolve_test_dirs(root: Path):
+        test_open = root / "test" / "open"
+        test_closed = root / "test" / "closed"
+        if test_open.exists() and test_closed.exists():
+            return test_open, test_closed
+        test_open = root / "open"
+        test_closed = root / "closed"
+        if test_open.exists() and test_closed.exists():
+            return test_open, test_closed
+        alt_closed = root / "close"
+        if test_open.exists() and alt_closed.exists():
+            return test_open, alt_closed
+        return None, None
+    test_open, test_closed = resolve_test_dirs(data_dir)
+    if (test_open is None or test_closed is None) and alt_data_dir.exists():
+        test_open, test_closed = resolve_test_dirs(alt_data_dir)
+    if not test_open.exists() or not test_closed.exists():
+        print("Test folders missing. Expected:")
+        print(test_open)
+        print(test_closed)
+        return
+    test_files = list_images(test_open) + list_images(test_closed)
+    print("Total test images:", len(test_files))
+    max_images = int(os.getenv("MAX_IMAGES", "0"))
+    if max_images > 0:
+        test_files = test_files[:max_images]
+        print("Limiting to MAX_IMAGES:", max_images)
+    model = YOLO(str(weights))
+    results = model.predict(test_files, imgsz=224, device="cpu", verbose=False)
+    names = model.names
+    for r in results:
+        probs = r.probs
+        top_idx = int(probs.top1)
+        top_conf = float(probs.top1conf)
+        pred_label = names[top_idx]
+        img = cv2.imread(r.path)
+        if img is None:
+            continue
+        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        ellipse = detect_eyelid_boundary(gray)
+        pupil_center = detect_pupil_center(gray)
+        focused = False
+        if pred_label.lower() == "open" and pupil_center is not None:
+            focused = is_focused(pupil_center, gray.shape)
+        annotated = annotate(img, ellipse, pupil_center, focused, pred_label, top_conf)
+        out_path = out_dir / (Path(r.path).stem + "_annotated.jpg")
+        cv2.imwrite(str(out_path), annotated)
+        print(f"{Path(r.path).name}: pred={pred_label} conf={top_conf:.3f} focused={focused}")
+    print(f"\nAnnotated outputs saved to: {out_dir}")
+if __name__ == "__main__":
+    main()

models/cnn/CNN_MODEL/scripts/predict_image.py ADDED Viewed

	@@ -0,0 +1,49 @@

+"""Run the eye open/closed model on one or more images."""
+import sys
+from pathlib import Path
+from ultralytics import YOLO
+def main():
+    project_root = Path(__file__).resolve().parent.parent
+    weight_candidates = [
+        project_root / "weights" / "best.pt",
+        project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
+        project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
+    ]
+    weights = next((p for p in weight_candidates if p.is_file()), None)
+    if weights is None:
+        print("Weights not found. Put best.pt in weights/ or runs/.../weights/ (from model team).")
+        sys.exit(1)
+    if len(sys.argv) < 2:
+        print("Usage: python scripts/predict_image.py <image1> [image2 ...]")
+        print("Example: python scripts/predict_image.py path/to/image.png")
+        sys.exit(0)
+    model = YOLO(str(weights))
+    names = model.names
+    for path in sys.argv[1:]:
+        p = Path(path)
+        if not p.is_file():
+            print(p, "- file not found")
+            continue
+        try:
+            results = model.predict(str(p), imgsz=224, device="cpu", verbose=False)
+        except Exception as e:
+            print(p, "- error:", e)
+            continue
+        if not results:
+            print(p, "- no result")
+            continue
+        r = results[0]
+        top_idx = int(r.probs.top1)
+        conf = float(r.probs.top1conf)
+        label = names[top_idx]
+        print(f"{p.name}: {label} ({conf:.2%})")
+if __name__ == "__main__":
+    main()

models/cnn/CNN_MODEL/scripts/video_infer.py ADDED Viewed

	@@ -0,0 +1,281 @@

+from __future__ import annotations
+import os
+from pathlib import Path
+import cv2
+import numpy as np
+from ultralytics import YOLO
+try:
+    import mediapipe as mp
+except Exception:  # pragma: no cover
+    mp = None
+def find_weights(project_root: Path) -> Path | None:
+    candidates = [
+        project_root / "weights" / "best.pt",
+        project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
+        project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
+        project_root / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
+        project_root / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
+    ]
+    return next((p for p in candidates if p.is_file()), None)
+def detect_pupil_center(gray: np.ndarray) -> tuple[int, int] | None:
+    h, w = gray.shape
+    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
+    eq = clahe.apply(gray)
+    blur = cv2.GaussianBlur(eq, (7, 7), 0)
+    cx, cy = w // 2, h // 2
+    rx, ry = int(w * 0.3), int(h * 0.3)
+    x0, x1 = max(cx - rx, 0), min(cx + rx, w)
+    y0, y1 = max(cy - ry, 0), min(cy + ry, h)
+    roi = blur[y0:y1, x0:x1]
+    _, thresh = cv2.threshold(roi, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
+    thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, np.ones((3, 3), np.uint8), iterations=2)
+    thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8), iterations=1)
+    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    if not contours:
+        return None
+    best = None
+    best_score = -1.0
+    for c in contours:
+        area = cv2.contourArea(c)
+        if area < 15:
+            continue
+        perimeter = cv2.arcLength(c, True)
+        if perimeter <= 0:
+            continue
+        circularity = 4 * np.pi * (area / (perimeter * perimeter))
+        if circularity < 0.3:
+            continue
+        m = cv2.moments(c)
+        if m["m00"] == 0:
+            continue
+        px = int(m["m10"] / m["m00"]) + x0
+        py = int(m["m01"] / m["m00"]) + y0
+        dist = np.hypot(px - cx, py - cy) / max(w, h)
+        score = circularity - dist
+        if score > best_score:
+            best_score = score
+            best = (px, py)
+    return best
+def is_focused(pupil_center: tuple[int, int], img_shape: tuple[int, int]) -> bool:
+    h, w = img_shape
+    cx = w // 2
+    px, _ = pupil_center
+    dx = abs(px - cx) / max(w, 1)
+    return dx < 0.12
+def classify_frame(model: YOLO, frame: np.ndarray) -> tuple[str, float]:
+    # Use classifier directly on frame (assumes frame is eye crop)
+    results = model.predict(frame, imgsz=224, device="cpu", verbose=False)
+    r = results[0]
+    probs = r.probs
+    top_idx = int(probs.top1)
+    top_conf = float(probs.top1conf)
+    pred_label = model.names[top_idx]
+    return pred_label, top_conf
+def annotate_frame(frame: np.ndarray, label: str, focused: bool, conf: float, time_sec: float):
+    out = frame.copy()
+    text = f"{label} | focused={int(focused)} | conf={conf:.2f} | t={time_sec:.2f}s"
+    cv2.putText(out, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
+    return out
+def write_segments(path: Path, segments: list[tuple[float, float, str]]):
+    with path.open("w") as f:
+        for start, end, label in segments:
+            f.write(f"{start:.2f},{end:.2f},{label}\n")
+def process_video(video_path: Path, model: YOLO | None):
+    cap = cv2.VideoCapture(str(video_path))
+    if not cap.isOpened():
+        print(f"Failed to open {video_path}")
+        return
+    fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
+    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    out_path = video_path.with_name(video_path.stem + "_pred.mp4")
+    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+    writer = cv2.VideoWriter(str(out_path), fourcc, fps, (width, height))
+    csv_path = video_path.with_name(video_path.stem + "_predictions.csv")
+    seg_path = video_path.with_name(video_path.stem + "_segments.txt")
+    frame_idx = 0
+    last_label = None
+    seg_start = 0.0
+    segments: list[tuple[float, float, str]] = []
+    with csv_path.open("w") as fcsv:
+        fcsv.write("time_sec,label,focused,conf\n")
+        if mp is None:
+            print("mediapipe is not installed. Falling back to classifier-only mode.")
+        use_mp = mp is not None
+        if use_mp:
+            mp_face_mesh = mp.solutions.face_mesh
+            face_mesh = mp_face_mesh.FaceMesh(
+                static_image_mode=False,
+                max_num_faces=1,
+                refine_landmarks=True,
+                min_detection_confidence=0.5,
+                min_tracking_confidence=0.5,
+            )
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                break
+            time_sec = frame_idx / fps
+            conf = 0.0
+            pred_label = "open"
+            focused = False
+            if use_mp:
+                rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                res = face_mesh.process(rgb)
+                if res.multi_face_landmarks:
+                    lm = res.multi_face_landmarks[0].landmark
+                    h, w = frame.shape[:2]
+                    # Eye landmarks (MediaPipe FaceMesh)
+                    left_eye = [33, 160, 158, 133, 153, 144]
+                    right_eye = [362, 385, 387, 263, 373, 380]
+                    left_iris = [468, 469, 470, 471]
+                    right_iris = [473, 474, 475, 476]
+                    def pts(idxs):
+                        return np.array([(int(lm[i].x * w), int(lm[i].y * h)) for i in idxs])
+                    def ear(eye_pts):
+                        # EAR using 6 points
+                        p1, p2, p3, p4, p5, p6 = eye_pts
+                        v1 = np.linalg.norm(p2 - p6)
+                        v2 = np.linalg.norm(p3 - p5)
+                        h1 = np.linalg.norm(p1 - p4)
+                        return (v1 + v2) / (2.0 * h1 + 1e-6)
+                    le = pts(left_eye)
+                    re = pts(right_eye)
+                    le_ear = ear(le)
+                    re_ear = ear(re)
+                    ear_avg = (le_ear + re_ear) / 2.0
+                    # openness threshold
+                    pred_label = "open" if ear_avg > 0.22 else "closed"
+                    # iris centers
+                    li = pts(left_iris)
+                    ri = pts(right_iris)
+                    li_c = li.mean(axis=0).astype(int)
+                    ri_c = ri.mean(axis=0).astype(int)
+                    # eye centers (midpoint of corners)
+                    le_c = ((le[0] + le[3]) / 2).astype(int)
+                    re_c = ((re[0] + re[3]) / 2).astype(int)
+                    # focus = iris close to eye center horizontally for both eyes
+                    le_dx = abs(li_c[0] - le_c[0]) / max(np.linalg.norm(le[0] - le[3]), 1)
+                    re_dx = abs(ri_c[0] - re_c[0]) / max(np.linalg.norm(re[0] - re[3]), 1)
+                    focused = (pred_label == "open") and (le_dx < 0.18) and (re_dx < 0.18)
+                    # draw eye boundaries
+                    cv2.polylines(frame, [le], True, (0, 255, 255), 1)
+                    cv2.polylines(frame, [re], True, (0, 255, 255), 1)
+                    # draw iris centers
+                    cv2.circle(frame, tuple(li_c), 3, (0, 0, 255), -1)
+                    cv2.circle(frame, tuple(ri_c), 3, (0, 0, 255), -1)
+                else:
+                    pred_label = "closed"
+                    focused = False
+            else:
+                if model is not None:
+                    pred_label, conf = classify_frame(model, frame)
+                gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+                pupil_center = detect_pupil_center(gray) if pred_label.lower() == "open" else None
+                focused = False
+                if pred_label.lower() == "open" and pupil_center is not None:
+                    focused = is_focused(pupil_center, gray.shape)
+            if pred_label.lower() != "open":
+                focused = False
+            label = "open_focused" if (pred_label.lower() == "open" and focused) else "open_not_focused"
+            if pred_label.lower() != "open":
+                label = "closed_not_focused"
+            fcsv.write(f"{time_sec:.2f},{label},{int(focused)},{conf:.4f}\n")
+            if last_label is None:
+                last_label = label
+                seg_start = time_sec
+            elif label != last_label:
+                segments.append((seg_start, time_sec, last_label))
+                seg_start = time_sec
+                last_label = label
+            annotated = annotate_frame(frame, label, focused, conf, time_sec)
+            writer.write(annotated)
+            frame_idx += 1
+    if last_label is not None:
+        end_time = frame_idx / fps
+        segments.append((seg_start, end_time, last_label))
+    write_segments(seg_path, segments)
+    cap.release()
+    writer.release()
+    print(f"Saved: {out_path}")
+    print(f"CSV: {csv_path}")
+    print(f"Segments: {seg_path}")
+def main():
+    project_root = Path(__file__).resolve().parent.parent
+    weights = find_weights(project_root)
+    model = YOLO(str(weights)) if weights is not None else None
+    # Default to 1.mp4 and 2.mp4 in project root
+    videos = []
+    for name in ["1.mp4", "2.mp4"]:
+        p = project_root / name
+        if p.exists():
+            videos.append(p)
+    # Also allow passing paths via env var
+    extra = os.getenv("VIDEOS", "")
+    for v in [x.strip() for x in extra.split(",") if x.strip()]:
+        vp = Path(v)
+        if not vp.is_absolute():
+            vp = project_root / vp
+        if vp.exists():
+            videos.append(vp)
+    if not videos:
+        print("No videos found. Expected 1.mp4 / 2.mp4 in project root.")
+        return
+    for v in videos:
+        process_video(v, model)
+if __name__ == "__main__":
+    main()

models/cnn/CNN_MODEL/scripts/webcam_live.py ADDED Viewed

	@@ -0,0 +1,184 @@

+"""
+Live webcam: detect face, crop each eye, run open/closed classifier, show on screen.
+Requires: opencv-python, ultralytics, mediapipe (pip install mediapipe).
+Press 'q' to quit.
+"""
+import urllib.request
+from pathlib import Path
+import cv2
+import numpy as np
+from ultralytics import YOLO
+try:
+    import mediapipe as mp
+    _mp_has_solutions = hasattr(mp, "solutions")
+except ImportError:
+    mp = None
+    _mp_has_solutions = False
+# New MediaPipe Tasks API (Face Landmarker) eye indices
+LEFT_EYE_INDICES_NEW = [263, 249, 390, 373, 374, 380, 381, 382, 362, 466, 388, 387, 386, 385, 384, 398]
+RIGHT_EYE_INDICES_NEW = [33, 7, 163, 144, 145, 153, 154, 155, 133, 246, 161, 160, 159, 158, 157, 173]
+# Old Face Mesh (solutions) indices
+LEFT_EYE_INDICES_OLD = [33, 160, 158, 133, 153, 144]
+RIGHT_EYE_INDICES_OLD = [362, 385, 387, 263, 373, 380]
+EYE_PADDING = 0.35
+def find_weights(project_root: Path) -> Path | None:
+    candidates = [
+        project_root / "weights" / "best.pt",
+        project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "best.pt",
+        project_root / "runs" / "classify" / "runs_cls" / "eye_open_closed_cpu" / "weights" / "last.pt",
+    ]
+    return next((p for p in candidates if p.is_file()), None)
+def get_eye_roi(frame: np.ndarray, landmarks, indices: list[int]) -> np.ndarray | None:
+    h, w = frame.shape[:2]
+    pts = np.array([(int(landmarks[i].x * w), int(landmarks[i].y * h)) for i in indices])
+    x_min, y_min = pts.min(axis=0)
+    x_max, y_max = pts.max(axis=0)
+    dx = max(int((x_max - x_min) * EYE_PADDING), 8)
+    dy = max(int((y_max - y_min) * EYE_PADDING), 8)
+    x0 = max(0, x_min - dx)
+    y0 = max(0, y_min - dy)
+    x1 = min(w, x_max + dx)
+    y1 = min(h, y_max + dy)
+    if x1 <= x0 or y1 <= y0:
+        return None
+    return frame[y0:y1, x0:x1].copy()
+def _run_with_solutions(mp, model, cap):
+    face_mesh = mp.solutions.face_mesh.FaceMesh(
+        static_image_mode=False,
+        max_num_faces=1,
+        refine_landmarks=True,
+        min_detection_confidence=0.5,
+        min_tracking_confidence=0.5,
+    )
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        results = face_mesh.process(rgb)
+        left_label, left_conf = "—", 0.0
+        right_label, right_conf = "—", 0.0
+        if results.multi_face_landmarks:
+            lm = results.multi_face_landmarks[0].landmark
+            for roi, indices, side in [
+                (get_eye_roi(frame, lm, LEFT_EYE_INDICES_OLD), LEFT_EYE_INDICES_OLD, "left"),
+                (get_eye_roi(frame, lm, RIGHT_EYE_INDICES_OLD), RIGHT_EYE_INDICES_OLD, "right"),
+            ]:
+                if roi is not None and roi.size > 0:
+                    try:
+                        pred = model.predict(roi, imgsz=224, device="cpu", verbose=False)
+                        if pred:
+                            r = pred[0]
+                            label = model.names[int(r.probs.top1)]
+                            conf = float(r.probs.top1conf)
+                            if side == "left":
+                                left_label, left_conf = label, conf
+                            else:
+                                right_label, right_conf = label, conf
+                    except Exception:
+                        pass
+        cv2.putText(frame, f"L: {left_label} ({left_conf:.0%})", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
+        cv2.putText(frame, f"R: {right_label} ({right_conf:.0%})", (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
+        cv2.imshow("Eye open/closed (q to quit)", frame)
+        if cv2.waitKey(1) & 0xFF == ord("q"):
+            break
+def _run_with_tasks(project_root: Path, model, cap):
+    from mediapipe.tasks.python import BaseOptions
+    from mediapipe.tasks.python.vision import FaceLandmarker, FaceLandmarkerOptions
+    from mediapipe.tasks.python.vision.core import vision_task_running_mode as running_mode
+    from mediapipe.tasks.python.vision.core import image as image_lib
+    model_path = project_root / "weights" / "face_landmarker.task"
+    if not model_path.is_file():
+        print("Downloading face_landmarker.task ...")
+        url = "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task"
+        urllib.request.urlretrieve(url, model_path)
+        print("Done.")
+    options = FaceLandmarkerOptions(
+        base_options=BaseOptions(model_asset_path=str(model_path)),
+        running_mode=running_mode.VisionTaskRunningMode.IMAGE,
+        num_faces=1,
+    )
+    face_landmarker = FaceLandmarker.create_from_options(options)
+    ImageFormat = image_lib.ImageFormat
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+        left_label, left_conf = "—", 0.0
+        right_label, right_conf = "—", 0.0
+        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        rgb_contiguous = np.ascontiguousarray(rgb)
+        mp_image = image_lib.Image(ImageFormat.SRGB, rgb_contiguous)
+        result = face_landmarker.detect(mp_image)
+        if result.face_landmarks:
+            lm = result.face_landmarks[0]
+            for roi, side in [
+                (get_eye_roi(frame, lm, LEFT_EYE_INDICES_NEW), "left"),
+                (get_eye_roi(frame, lm, RIGHT_EYE_INDICES_NEW), "right"),
+            ]:
+                if roi is not None and roi.size > 0:
+                    try:
+                        pred = model.predict(roi, imgsz=224, device="cpu", verbose=False)
+                        if pred:
+                            r = pred[0]
+                            label = model.names[int(r.probs.top1)]
+                            conf = float(r.probs.top1conf)
+                            if side == "left":
+                                left_label, left_conf = label, conf
+                            else:
+                                right_label, right_conf = label, conf
+                    except Exception:
+                        pass
+        cv2.putText(frame, f"L: {left_label} ({left_conf:.0%})", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
+        cv2.putText(frame, f"R: {right_label} ({right_conf:.0%})", (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
+        cv2.imshow("Eye open/closed (q to quit)", frame)
+        if cv2.waitKey(1) & 0xFF == ord("q"):
+            break
+def main():
+    project_root = Path(__file__).resolve().parent.parent
+    weights = find_weights(project_root)
+    if weights is None:
+        print("Weights not found. Put best.pt in weights/ or runs/.../weights/ (from model team).")
+        return
+    if mp is None:
+        print("MediaPipe required. Install: pip install mediapipe")
+        return
+    model = YOLO(str(weights))
+    cap = cv2.VideoCapture(0)
+    if not cap.isOpened():
+        print("Could not open webcam.")
+        return
+    print("Live eye open/closed on your face. Press 'q' to quit.")
+    try:
+        if _mp_has_solutions:
+            _run_with_solutions(mp, model, cap)
+        else:
+            _run_with_tasks(project_root, model, cap)
+    finally:
+        cap.release()
+        cv2.destroyAllWindows()
+if __name__ == "__main__":
+    main()

models/cnn/CNN_MODEL/weights/yolo11s-cls.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e2b605d1c8c212b434a75a32759a6f7adf1d2b29c35f76bdccd4c794cb653cf2
+size 13630112

models/cnn/__init__.py ADDED Viewed

File without changes

models/cnn/eye_attention/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

models/cnn/eye_attention/classifier.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from __future__ import annotations
+from abc import ABC, abstractmethod
+import numpy as np
+class EyeClassifier(ABC):
+    @property
+    @abstractmethod
+    def name(self) -> str:
+        pass
+    @abstractmethod
+    def predict_score(self, crops_bgr: list[np.ndarray]) -> float:
+        pass
+class GeometricOnlyClassifier(EyeClassifier):
+    @property
+    def name(self) -> str:
+        return "geometric"
+    def predict_score(self, crops_bgr: list[np.ndarray]) -> float:
+        return 1.0
+class YOLOv11Classifier(EyeClassifier):
+    def __init__(self, checkpoint_path: str, device: str = "cpu"):
+        from ultralytics import YOLO
+        self._model = YOLO(checkpoint_path)
+        self._device = device
+        names = self._model.names
+        self._attentive_idx = None
+        for idx, cls_name in names.items():
+            if cls_name in ("open", "attentive"):
+                self._attentive_idx = idx
+                break
+        if self._attentive_idx is None:
+            self._attentive_idx = max(names.keys())
+        print(f"[YOLO] Classes: {names}, attentive_idx={self._attentive_idx}")
+    @property
+    def name(self) -> str:
+        return "yolo"
+    def predict_score(self, crops_bgr: list[np.ndarray]) -> float:
+        if not crops_bgr:
+            return 1.0
+        results = self._model.predict(crops_bgr, device=self._device, verbose=False)
+        scores = [float(r.probs.data[self._attentive_idx]) for r in results]
+        return sum(scores) / len(scores) if scores else 1.0
+def load_eye_classifier(
+    path: str | None = None,
+    backend: str = "yolo",
+    device: str = "cpu",
+) -> EyeClassifier:
+    if path is None or backend == "geometric":
+        return GeometricOnlyClassifier()
+    try:
+        return YOLOv11Classifier(path, device=device)
+    except ImportError:
+        print("[CLASSIFIER] ultralytics required for YOLO. pip install ultralytics")
+        raise

models/cnn/eye_attention/crop.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import cv2
+import numpy as np
+from models.pretrained.face_mesh.face_mesh import FaceMeshDetector
+LEFT_EYE_CONTOUR = FaceMeshDetector.LEFT_EYE_INDICES
+RIGHT_EYE_CONTOUR = FaceMeshDetector.RIGHT_EYE_INDICES
+IMAGENET_MEAN = (0.485, 0.456, 0.406)
+IMAGENET_STD = (0.229, 0.224, 0.225)
+CROP_SIZE = 96
+def _bbox_from_landmarks(
+    landmarks: np.ndarray,
+    indices: list[int],
+    frame_w: int,
+    frame_h: int,
+    expand: float = 0.4,
+) -> tuple[int, int, int, int]:
+    pts = landmarks[indices, :2]
+    px = pts[:, 0] * frame_w
+    py = pts[:, 1] * frame_h
+    x_min, x_max = px.min(), px.max()
+    y_min, y_max = py.min(), py.max()
+    w = x_max - x_min
+    h = y_max - y_min
+    cx = (x_min + x_max) / 2
+    cy = (y_min + y_max) / 2
+    size = max(w, h) * (1 + expand)
+    half = size / 2
+    x1 = int(max(cx - half, 0))
+    y1 = int(max(cy - half, 0))
+    x2 = int(min(cx + half, frame_w))
+    y2 = int(min(cy + half, frame_h))
+    return x1, y1, x2, y2
+def extract_eye_crops(
+    frame: np.ndarray,
+    landmarks: np.ndarray,
+    expand: float = 0.4,
+    crop_size: int = CROP_SIZE,
+) -> tuple[np.ndarray, np.ndarray, tuple, tuple]:
+    h, w = frame.shape[:2]
+    left_bbox = _bbox_from_landmarks(landmarks, LEFT_EYE_CONTOUR, w, h, expand)
+    right_bbox = _bbox_from_landmarks(landmarks, RIGHT_EYE_CONTOUR, w, h, expand)
+    left_crop = frame[left_bbox[1] : left_bbox[3], left_bbox[0] : left_bbox[2]]
+    right_crop = frame[right_bbox[1] : right_bbox[3], right_bbox[0] : right_bbox[2]]
+    left_crop = cv2.resize(left_crop, (crop_size, crop_size), interpolation=cv2.INTER_AREA)
+    right_crop = cv2.resize(right_crop, (crop_size, crop_size), interpolation=cv2.INTER_AREA)
+    return left_crop, right_crop, left_bbox, right_bbox
+def crop_to_tensor(crop_bgr: np.ndarray):
+    import torch
+    rgb = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
+    for c in range(3):
+        rgb[:, :, c] = (rgb[:, :, c] - IMAGENET_MEAN[c]) / IMAGENET_STD[c]
+    return torch.from_numpy(rgb.transpose(2, 0, 1))

models/cnn/eye_attention/train.py ADDED Viewed

File without changes

models/geometric/__init__.py ADDED Viewed

File without changes

models/geometric/eye_behaviour/__init__.py ADDED Viewed

File without changes

models/geometric/eye_behaviour/eye_scorer.py ADDED Viewed

	@@ -0,0 +1,164 @@

+import math
+import numpy as np
+_LEFT_EYE_EAR = [33, 160, 158, 133, 153, 145]
+_RIGHT_EYE_EAR = [362, 385, 387, 263, 373, 380]
+_LEFT_IRIS_CENTER = 468
+_RIGHT_IRIS_CENTER = 473
+_LEFT_EYE_INNER = 133
+_LEFT_EYE_OUTER = 33
+_RIGHT_EYE_INNER = 362
+_RIGHT_EYE_OUTER = 263
+_LEFT_EYE_TOP = 159
+_LEFT_EYE_BOTTOM = 145
+_RIGHT_EYE_TOP = 386
+_RIGHT_EYE_BOTTOM = 374
+_MOUTH_TOP = 13
+_MOUTH_BOTTOM = 14
+_MOUTH_LEFT = 78
+_MOUTH_RIGHT = 308
+_MOUTH_UPPER_1 = 82
+_MOUTH_UPPER_2 = 312
+_MOUTH_LOWER_1 = 87
+_MOUTH_LOWER_2 = 317
+MAR_YAWN_THRESHOLD = 0.55
+def _distance(p1: np.ndarray, p2: np.ndarray) -> float:
+    return float(np.linalg.norm(p1 - p2))
+def compute_ear(landmarks: np.ndarray, eye_indices: list[int]) -> float:
+    p1 = landmarks[eye_indices[0], :2]
+    p2 = landmarks[eye_indices[1], :2]
+    p3 = landmarks[eye_indices[2], :2]
+    p4 = landmarks[eye_indices[3], :2]
+    p5 = landmarks[eye_indices[4], :2]
+    p6 = landmarks[eye_indices[5], :2]
+    vertical1 = _distance(p2, p6)
+    vertical2 = _distance(p3, p5)
+    horizontal = _distance(p1, p4)
+    if horizontal < 1e-6:
+        return 0.0
+    return (vertical1 + vertical2) / (2.0 * horizontal)
+def compute_avg_ear(landmarks: np.ndarray) -> float:
+    left_ear = compute_ear(landmarks, _LEFT_EYE_EAR)
+    right_ear = compute_ear(landmarks, _RIGHT_EYE_EAR)
+    return (left_ear + right_ear) / 2.0
+def compute_gaze_ratio(landmarks: np.ndarray) -> tuple[float, float]:
+    left_iris = landmarks[_LEFT_IRIS_CENTER, :2]
+    left_inner = landmarks[_LEFT_EYE_INNER, :2]
+    left_outer = landmarks[_LEFT_EYE_OUTER, :2]
+    left_top = landmarks[_LEFT_EYE_TOP, :2]
+    left_bottom = landmarks[_LEFT_EYE_BOTTOM, :2]
+    right_iris = landmarks[_RIGHT_IRIS_CENTER, :2]
+    right_inner = landmarks[_RIGHT_EYE_INNER, :2]
+    right_outer = landmarks[_RIGHT_EYE_OUTER, :2]
+    right_top = landmarks[_RIGHT_EYE_TOP, :2]
+    right_bottom = landmarks[_RIGHT_EYE_BOTTOM, :2]
+    left_h_total = _distance(left_inner, left_outer)
+    right_h_total = _distance(right_inner, right_outer)
+    if left_h_total < 1e-6 or right_h_total < 1e-6:
+        return 0.5, 0.5
+    left_h_ratio = _distance(left_outer, left_iris) / left_h_total
+    right_h_ratio = _distance(right_outer, right_iris) / right_h_total
+    h_ratio = (left_h_ratio + right_h_ratio) / 2.0
+    left_v_total = _distance(left_top, left_bottom)
+    right_v_total = _distance(right_top, right_bottom)
+    if left_v_total < 1e-6 or right_v_total < 1e-6:
+        return h_ratio, 0.5
+    left_v_ratio = _distance(left_top, left_iris) / left_v_total
+    right_v_ratio = _distance(right_top, right_iris) / right_v_total
+    v_ratio = (left_v_ratio + right_v_ratio) / 2.0
+    return float(np.clip(h_ratio, 0, 1)), float(np.clip(v_ratio, 0, 1))
+def compute_mar(landmarks: np.ndarray) -> float:
+    # Mouth aspect ratio: high = mouth open (yawning / sleepy)
+    top = landmarks[_MOUTH_TOP, :2]
+    bottom = landmarks[_MOUTH_BOTTOM, :2]
+    left = landmarks[_MOUTH_LEFT, :2]
+    right = landmarks[_MOUTH_RIGHT, :2]
+    upper1 = landmarks[_MOUTH_UPPER_1, :2]
+    lower1 = landmarks[_MOUTH_LOWER_1, :2]
+    upper2 = landmarks[_MOUTH_UPPER_2, :2]
+    lower2 = landmarks[_MOUTH_LOWER_2, :2]
+    horizontal = _distance(left, right)
+    if horizontal < 1e-6:
+        return 0.0
+    v1 = _distance(upper1, lower1)
+    v2 = _distance(top, bottom)
+    v3 = _distance(upper2, lower2)
+    return (v1 + v2 + v3) / (2.0 * horizontal)
+class EyeBehaviourScorer:
+    def __init__(
+        self,
+        ear_open: float = 0.30,
+        ear_closed: float = 0.16,
+        gaze_max_offset: float = 0.28,
+    ):
+        self.ear_open = ear_open
+        self.ear_closed = ear_closed
+        self.gaze_max_offset = gaze_max_offset
+    def _ear_score(self, ear: float) -> float:
+        if ear >= self.ear_open:
+            return 1.0
+        if ear <= self.ear_closed:
+            return 0.0
+        return (ear - self.ear_closed) / (self.ear_open - self.ear_closed)
+    def _gaze_score(self, h_ratio: float, v_ratio: float) -> float:
+        h_offset = abs(h_ratio - 0.5)
+        v_offset = abs(v_ratio - 0.5)
+        offset = math.sqrt(h_offset**2 + v_offset**2)
+        t = min(offset / self.gaze_max_offset, 1.0)
+        return 0.5 * (1.0 + math.cos(math.pi * t))
+    def score(self, landmarks: np.ndarray) -> float:
+        ear = compute_avg_ear(landmarks)
+        ear_s = self._ear_score(ear)
+        if ear_s < 0.3:
+            return ear_s
+        h_ratio, v_ratio = compute_gaze_ratio(landmarks)
+        gaze_s = self._gaze_score(h_ratio, v_ratio)
+        return ear_s * gaze_s
+    def detailed_score(self, landmarks: np.ndarray) -> dict:
+        ear = compute_avg_ear(landmarks)
+        ear_s = self._ear_score(ear)
+        h_ratio, v_ratio = compute_gaze_ratio(landmarks)
+        gaze_s = self._gaze_score(h_ratio, v_ratio)
+        s_eye = ear_s if ear_s < 0.3 else ear_s * gaze_s
+        return {
+            "ear": round(ear, 4),
+            "ear_score": round(ear_s, 4),
+            "h_gaze": round(h_ratio, 4),
+            "v_gaze": round(v_ratio, 4),
+            "gaze_score": round(gaze_s, 4),
+            "s_eye": round(s_eye, 4),
+        }

models/geometric/face_orientation/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

models/geometric/face_orientation/head_pose.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import math
+import cv2
+import numpy as np
+_LANDMARK_INDICES = [1, 152, 33, 263, 61, 291]
+_MODEL_POINTS = np.array(
+    [
+        [0.0, 0.0, 0.0],
+        [0.0, -330.0, -65.0],
+        [-225.0, 170.0, -135.0],
+        [225.0, 170.0, -135.0],
+        [-150.0, -150.0, -125.0],
+        [150.0, -150.0, -125.0],
+    ],
+    dtype=np.float64,
+)
+class HeadPoseEstimator:
+    def __init__(self, max_angle: float = 30.0, roll_weight: float = 0.5):
+        self.max_angle = max_angle
+        self.roll_weight = roll_weight
+        self._camera_matrix = None
+        self._frame_size = None
+        self._dist_coeffs = np.zeros((4, 1), dtype=np.float64)
+    def _get_camera_matrix(self, frame_w: int, frame_h: int) -> np.ndarray:
+        if self._camera_matrix is not None and self._frame_size == (frame_w, frame_h):
+            return self._camera_matrix
+        focal_length = float(frame_w)
+        cx, cy = frame_w / 2.0, frame_h / 2.0
+        self._camera_matrix = np.array(
+            [[focal_length, 0, cx], [0, focal_length, cy], [0, 0, 1]],
+            dtype=np.float64,
+        )
+        self._frame_size = (frame_w, frame_h)
+        return self._camera_matrix
+    def _solve(self, landmarks: np.ndarray, frame_w: int, frame_h: int):
+        image_points = np.array(
+            [
+                [landmarks[i, 0] * frame_w, landmarks[i, 1] * frame_h]
+                for i in _LANDMARK_INDICES
+            ],
+            dtype=np.float64,
+        )
+        camera_matrix = self._get_camera_matrix(frame_w, frame_h)
+        success, rvec, tvec = cv2.solvePnP(
+            _MODEL_POINTS,
+            image_points,
+            camera_matrix,
+            self._dist_coeffs,
+            flags=cv2.SOLVEPNP_ITERATIVE,
+        )
+        return success, rvec, tvec, image_points
+    def estimate(
+        self, landmarks: np.ndarray, frame_w: int, frame_h: int
+    ) -> tuple[float, float, float] | None:
+        success, rvec, tvec, _ = self._solve(landmarks, frame_w, frame_h)
+        if not success:
+            return None
+        rmat, _ = cv2.Rodrigues(rvec)
+        nose_dir = rmat @ np.array([0.0, 0.0, 1.0])
+        face_up = rmat @ np.array([0.0, 1.0, 0.0])
+        yaw = math.degrees(math.atan2(nose_dir[0], -nose_dir[2]))
+        pitch = math.degrees(math.asin(np.clip(-nose_dir[1], -1.0, 1.0)))
+        roll = math.degrees(math.atan2(face_up[0], -face_up[1]))
+        return (yaw, pitch, roll)
+    def score(self, landmarks: np.ndarray, frame_w: int, frame_h: int) -> float:
+        angles = self.estimate(landmarks, frame_w, frame_h)
+        if angles is None:
+            return 0.0
+        yaw, pitch, roll = angles
+        deviation = math.sqrt(yaw**2 + pitch**2 + (self.roll_weight * roll) ** 2)
+        t = min(deviation / self.max_angle, 1.0)
+        return 0.5 * (1.0 + math.cos(math.pi * t))
+    def draw_axes(
+        self,
+        frame: np.ndarray,
+        landmarks: np.ndarray,
+        axis_length: float = 50.0,
+    ) -> np.ndarray:
+        h, w = frame.shape[:2]
+        success, rvec, tvec, image_points = self._solve(landmarks, w, h)
+        if not success:
+            return frame
+        camera_matrix = self._get_camera_matrix(w, h)
+        nose = tuple(image_points[0].astype(int))
+        axes_3d = np.float64(
+            [[axis_length, 0, 0], [0, axis_length, 0], [0, 0, axis_length]]
+        )
+        projected, _ = cv2.projectPoints(
+            axes_3d, rvec, tvec, camera_matrix, self._dist_coeffs
+        )
+        colors = [(0, 0, 255), (0, 255, 0), (255, 0, 0)]
+        for i, color in enumerate(colors):
+            pt = tuple(projected[i].ravel().astype(int))
+            cv2.line(frame, nose, pt, color, 2)
+        return frame

models/mlp/__init__.py ADDED Viewed

File without changes

models/{train.py → mlp/train.py} RENAMED Viewed

@@ -1,18 +1,18 @@
-# Run from repo root: python -m models.train (or cd models && python train.py)
 import json
-import os
 import random
-import numpy as np as np
 import torch
 import torch.nn as nn
 import torch.optim as optim
-from prepare_dataset import get_dataloaders
 CFG = {
-    "model_name": "face_orientation",  # "face_orientation" or "eye_behaviour"
     "epochs": 30,
     "batch_size": 32,
     "lr": 1e-3,
@@ -22,10 +22,25 @@ CFG = {
         "face_orientation": os.path.join(os.path.dirname(__file__), "face_orientation_model"),
         "eye_behaviour": os.path.join(os.path.dirname(__file__), "eye_behaviour_model"),
     },
-    "logs_dir": os.path.join(os.path.dirname(__file__), "..", "evaluation", "logs"),
 }
 def set_seed(seed: int):
     random.seed(seed)
     np.random.seed(seed)
@@ -154,6 +169,16 @@ def main():
         history["val_loss"].append(round(val_loss, 4))
         history["val_acc"].append(round(val_acc, 4))
         marker = ""
         if val_acc > best_val_acc:
             best_val_acc = val_acc

 import json
+import os, sys
 import random
+import numpy as np
 import torch
 import torch.nn as nn
 import torch.optim as optim
+from clearml import Task
+from models.prepare_dataset import get_dataloaders
 CFG = {
+    "model_name": "face_orientation",
     "epochs": 30,
     "batch_size": 32,
     "lr": 1e-3,
         "face_orientation": os.path.join(os.path.dirname(__file__), "face_orientation_model"),
         "eye_behaviour": os.path.join(os.path.dirname(__file__), "eye_behaviour_model"),
     },
+    "logs_dir": os.path.join(os.path.dirname(__file__), "..", "..", "evaluation", "logs"),
 }
+# ==== ClearML Initialisation =============================================
+task = Task.init(
+    project_name="Focus Guard",
+    task_name=f"MLP Model Training",
+    tags=["training", "mlp_model"]
+)
+prefix = 'checkpoints/'+task.name+'_'+task.id+'/'
+os.makedirs(prefix, exist_ok=True)
+task.connect(CFG)
+# ==== Model =============================================
 def set_seed(seed: int):
     random.seed(seed)
     np.random.seed(seed)
         history["val_loss"].append(round(val_loss, 4))
         history["val_acc"].append(round(val_acc, 4))
+        # Log scalars to ClearML
+        current_lr = optimizer.param_groups[0]['lr']
+        task.logger.report_scalar("Loss", "Train", float(train_loss), iteration=epoch)
+        task.logger.report_scalar("Accuracy", "Train", float(train_acc), iteration=epoch)
+        task.logger.report_scalar("Loss", "Val", float(val_loss), iteration=epoch)
+        task.logger.report_scalar("Accuracy", "Val", float(val_acc), iteration=epoch)
+        task.logger.report_scalar("Learning Rate", "LR", float(current_lr), iteration=epoch)
+        task.logger.flush()
         marker = ""
         if val_acc > best_val_acc:
             best_val_acc = val_acc

models/pretrained/__init__.py ADDED Viewed

File without changes