Spaces:
Running
Running
Abdelrahman Almatrooshi commited on
Commit ·
e0507e7
1
Parent(s): 87209fb
Add missing eye_crop and eye_classifier modules
Browse files- models/eye_classifier.py +69 -0
- models/eye_crop.py +77 -0
models/eye_classifier.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from abc import ABC, abstractmethod
|
| 4 |
+
|
| 5 |
+
import numpy as np
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class EyeClassifier(ABC):
|
| 9 |
+
@property
|
| 10 |
+
@abstractmethod
|
| 11 |
+
def name(self) -> str:
|
| 12 |
+
pass
|
| 13 |
+
|
| 14 |
+
@abstractmethod
|
| 15 |
+
def predict_score(self, crops_bgr: list[np.ndarray]) -> float:
|
| 16 |
+
pass
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class GeometricOnlyClassifier(EyeClassifier):
|
| 20 |
+
@property
|
| 21 |
+
def name(self) -> str:
|
| 22 |
+
return "geometric"
|
| 23 |
+
|
| 24 |
+
def predict_score(self, crops_bgr: list[np.ndarray]) -> float:
|
| 25 |
+
return 1.0
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class YOLOv11Classifier(EyeClassifier):
|
| 29 |
+
def __init__(self, checkpoint_path: str, device: str = "cpu"):
|
| 30 |
+
from ultralytics import YOLO
|
| 31 |
+
|
| 32 |
+
self._model = YOLO(checkpoint_path)
|
| 33 |
+
self._device = device
|
| 34 |
+
|
| 35 |
+
names = self._model.names
|
| 36 |
+
self._attentive_idx = None
|
| 37 |
+
for idx, cls_name in names.items():
|
| 38 |
+
if cls_name in ("open", "attentive"):
|
| 39 |
+
self._attentive_idx = idx
|
| 40 |
+
break
|
| 41 |
+
if self._attentive_idx is None:
|
| 42 |
+
self._attentive_idx = max(names.keys())
|
| 43 |
+
print(f"[YOLO] Classes: {names}, attentive_idx={self._attentive_idx}")
|
| 44 |
+
|
| 45 |
+
@property
|
| 46 |
+
def name(self) -> str:
|
| 47 |
+
return "yolo"
|
| 48 |
+
|
| 49 |
+
def predict_score(self, crops_bgr: list[np.ndarray]) -> float:
|
| 50 |
+
if not crops_bgr:
|
| 51 |
+
return 1.0
|
| 52 |
+
results = self._model.predict(crops_bgr, device=self._device, verbose=False)
|
| 53 |
+
scores = [float(r.probs.data[self._attentive_idx]) for r in results]
|
| 54 |
+
return sum(scores) / len(scores) if scores else 1.0
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def load_eye_classifier(
|
| 58 |
+
path: str | None = None,
|
| 59 |
+
backend: str = "yolo",
|
| 60 |
+
device: str = "cpu",
|
| 61 |
+
) -> EyeClassifier:
|
| 62 |
+
if path is None or backend == "geometric":
|
| 63 |
+
return GeometricOnlyClassifier()
|
| 64 |
+
|
| 65 |
+
try:
|
| 66 |
+
return YOLOv11Classifier(path, device=device)
|
| 67 |
+
except ImportError:
|
| 68 |
+
print("[CLASSIFIER] ultralytics required for YOLO. pip install ultralytics")
|
| 69 |
+
raise
|
models/eye_crop.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import cv2
|
| 2 |
+
import numpy as np
|
| 3 |
+
|
| 4 |
+
from models.face_mesh import FaceMeshDetector
|
| 5 |
+
|
| 6 |
+
LEFT_EYE_CONTOUR = FaceMeshDetector.LEFT_EYE_INDICES
|
| 7 |
+
RIGHT_EYE_CONTOUR = FaceMeshDetector.RIGHT_EYE_INDICES
|
| 8 |
+
|
| 9 |
+
IMAGENET_MEAN = (0.485, 0.456, 0.406)
|
| 10 |
+
IMAGENET_STD = (0.229, 0.224, 0.225)
|
| 11 |
+
|
| 12 |
+
CROP_SIZE = 96
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def _bbox_from_landmarks(
|
| 16 |
+
landmarks: np.ndarray,
|
| 17 |
+
indices: list[int],
|
| 18 |
+
frame_w: int,
|
| 19 |
+
frame_h: int,
|
| 20 |
+
expand: float = 0.4,
|
| 21 |
+
) -> tuple[int, int, int, int]:
|
| 22 |
+
pts = landmarks[indices, :2]
|
| 23 |
+
px = pts[:, 0] * frame_w
|
| 24 |
+
py = pts[:, 1] * frame_h
|
| 25 |
+
|
| 26 |
+
x_min, x_max = px.min(), px.max()
|
| 27 |
+
y_min, y_max = py.min(), py.max()
|
| 28 |
+
w = x_max - x_min
|
| 29 |
+
h = y_max - y_min
|
| 30 |
+
cx = (x_min + x_max) / 2
|
| 31 |
+
cy = (y_min + y_max) / 2
|
| 32 |
+
|
| 33 |
+
size = max(w, h) * (1 + expand)
|
| 34 |
+
half = size / 2
|
| 35 |
+
|
| 36 |
+
x1 = int(max(cx - half, 0))
|
| 37 |
+
y1 = int(max(cy - half, 0))
|
| 38 |
+
x2 = int(min(cx + half, frame_w))
|
| 39 |
+
y2 = int(min(cy + half, frame_h))
|
| 40 |
+
|
| 41 |
+
return x1, y1, x2, y2
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def extract_eye_crops(
|
| 45 |
+
frame: np.ndarray,
|
| 46 |
+
landmarks: np.ndarray,
|
| 47 |
+
expand: float = 0.4,
|
| 48 |
+
crop_size: int = CROP_SIZE,
|
| 49 |
+
) -> tuple[np.ndarray, np.ndarray, tuple, tuple]:
|
| 50 |
+
h, w = frame.shape[:2]
|
| 51 |
+
|
| 52 |
+
left_bbox = _bbox_from_landmarks(landmarks, LEFT_EYE_CONTOUR, w, h, expand)
|
| 53 |
+
right_bbox = _bbox_from_landmarks(landmarks, RIGHT_EYE_CONTOUR, w, h, expand)
|
| 54 |
+
|
| 55 |
+
left_crop = frame[left_bbox[1] : left_bbox[3], left_bbox[0] : left_bbox[2]]
|
| 56 |
+
right_crop = frame[right_bbox[1] : right_bbox[3], right_bbox[0] : right_bbox[2]]
|
| 57 |
+
|
| 58 |
+
if left_crop.size == 0:
|
| 59 |
+
left_crop = np.zeros((crop_size, crop_size, 3), dtype=np.uint8)
|
| 60 |
+
else:
|
| 61 |
+
left_crop = cv2.resize(left_crop, (crop_size, crop_size), interpolation=cv2.INTER_AREA)
|
| 62 |
+
|
| 63 |
+
if right_crop.size == 0:
|
| 64 |
+
right_crop = np.zeros((crop_size, crop_size, 3), dtype=np.uint8)
|
| 65 |
+
else:
|
| 66 |
+
right_crop = cv2.resize(right_crop, (crop_size, crop_size), interpolation=cv2.INTER_AREA)
|
| 67 |
+
|
| 68 |
+
return left_crop, right_crop, left_bbox, right_bbox
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def crop_to_tensor(crop_bgr: np.ndarray):
|
| 72 |
+
import torch
|
| 73 |
+
|
| 74 |
+
rgb = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
|
| 75 |
+
for c in range(3):
|
| 76 |
+
rgb[:, :, c] = (rgb[:, :, c] - IMAGENET_MEAN[c]) / IMAGENET_STD[c]
|
| 77 |
+
return torch.from_numpy(rgb.transpose(2, 0, 1))
|