Spaces:
Sleeping
Sleeping
fix : improved the retrival using face search
Browse files- src/models.py +100 -25
src/models.py
CHANGED
|
@@ -50,7 +50,7 @@ ADAFACE_WEIGHTS_AVAILABLE = False # controlled by ENABLE_ADAFACE env var
|
|
| 50 |
|
| 51 |
# ── Constants ─────────────────────────────────────────────────────
|
| 52 |
YOLO_PERSON_CLASS_ID = 0
|
| 53 |
-
MIN_FACE_SIZE =
|
| 54 |
MAX_FACES_PER_IMAGE = 12 # slightly higher cap for group photos
|
| 55 |
MAX_CROPS = 6 # max YOLO object crops per image
|
| 56 |
MAX_IMAGE_SIZE = 640 # object lane longest edge
|
|
@@ -58,7 +58,10 @@ DET_SIZE_PRIMARY = (1280, 1280) # V4: 1280 for small-face detection
|
|
| 58 |
DET_SIZE_SECONDARY = (640, 640) # fallback / 2nd scale
|
| 59 |
FACE_CROP_THUMB_SIZE = 112 # face thumbnail for Pinecone metadata
|
| 60 |
FACE_CROP_QUALITY = 80 # JPEG quality for thumbnails
|
| 61 |
-
FACE_QUALITY_GATE = 0.35 #
|
|
|
|
|
|
|
|
|
|
| 62 |
FACE_DIM = 512 # ArcFace embedding dimension
|
| 63 |
ADAFACE_DIM = 512 # AdaFace embedding dimension
|
| 64 |
FUSED_FACE_DIM = 1024 # ArcFace + AdaFace concatenated
|
|
@@ -133,6 +136,42 @@ def _face_crop_for_adaface(
|
|
| 133 |
return arr.transpose(2, 0, 1) # HWC → CHW
|
| 134 |
|
| 135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
# ════════════════════════════════════════════════════════════════
|
| 137 |
# AIModelManager — V4
|
| 138 |
# ════════════════════════════════════════════════════════════════
|
|
@@ -367,10 +406,62 @@ class AIModelManager:
|
|
| 367 |
img_np = (img_np * 255).astype(np.uint8)
|
| 368 |
bgr = img_np[:, :, ::-1].copy() if img_np.shape[2] == 3 else img_np.copy()
|
| 369 |
|
| 370 |
-
|
| 371 |
-
with
|
| 372 |
-
|
| 373 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 374 |
|
| 375 |
results = []
|
| 376 |
accepted = 0
|
|
@@ -493,29 +584,13 @@ class AIModelManager:
|
|
| 493 |
# the original resolution (multi-scale fallback).
|
| 494 |
# ════════════════════════════════════════════════════════
|
| 495 |
if detect_faces and self.face_app is not None:
|
| 496 |
-
#
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
face_results = self._detect_and_encode_faces(detect_np_1280)
|
| 500 |
-
|
| 501 |
-
# Scale 2: if nothing found, try original resolution
|
| 502 |
-
# (sometimes resizing DOWN helps when image is already small)
|
| 503 |
-
if not face_results and max(original_pil.size) < 1280:
|
| 504 |
-
print("🔄 Multi-scale fallback: retrying at original resolution")
|
| 505 |
-
face_results = self._detect_and_encode_faces(img_np)
|
| 506 |
|
| 507 |
if face_results:
|
| 508 |
faces_found = True
|
| 509 |
-
# Scale bboxes back to original-image coordinates
|
| 510 |
-
sx = original_pil.width / detect_pil_1280.width
|
| 511 |
-
sy = original_pil.height / detect_pil_1280.height
|
| 512 |
for fr in face_results:
|
| 513 |
-
if sx != 1.0 or sy != 1.0:
|
| 514 |
-
bx, by, bw, bh = fr["bbox"]
|
| 515 |
-
fr["bbox"] = [
|
| 516 |
-
int(bx * sx), int(by * sy),
|
| 517 |
-
int(bw * sx), int(bh * sy),
|
| 518 |
-
]
|
| 519 |
extracted.append(fr)
|
| 520 |
|
| 521 |
# ════════════════════════════════════════════════════════
|
|
|
|
| 50 |
|
| 51 |
# ── Constants ─────────────────────────────────────────────────────
|
| 52 |
YOLO_PERSON_CLASS_ID = 0
|
| 53 |
+
MIN_FACE_SIZE = 20 # lowered: 40 missed small faces in group photos
|
| 54 |
MAX_FACES_PER_IMAGE = 12 # slightly higher cap for group photos
|
| 55 |
MAX_CROPS = 6 # max YOLO object crops per image
|
| 56 |
MAX_IMAGE_SIZE = 640 # object lane longest edge
|
|
|
|
| 58 |
DET_SIZE_SECONDARY = (640, 640) # fallback / 2nd scale
|
| 59 |
FACE_CROP_THUMB_SIZE = 112 # face thumbnail for Pinecone metadata
|
| 60 |
FACE_CROP_QUALITY = 80 # JPEG quality for thumbnails
|
| 61 |
+
FACE_QUALITY_GATE = 0.35 # lowered from 0.60 — accepts sunglasses, angles, smiles
|
| 62 |
+
# Multi-scale pyramid — tried in order, results merged with IoU dedup
|
| 63 |
+
DET_SCALES = [(1280, 1280), (960, 960), (640, 640)]
|
| 64 |
+
IOU_DEDUP_THRESHOLD = 0.45 # suppress duplicate detections across scales
|
| 65 |
FACE_DIM = 512 # ArcFace embedding dimension
|
| 66 |
ADAFACE_DIM = 512 # AdaFace embedding dimension
|
| 67 |
FUSED_FACE_DIM = 1024 # ArcFace + AdaFace concatenated
|
|
|
|
| 136 |
return arr.transpose(2, 0, 1) # HWC → CHW
|
| 137 |
|
| 138 |
|
| 139 |
+
|
| 140 |
+
def _clahe_enhance(bgr: np.ndarray) -> np.ndarray:
|
| 141 |
+
"""CLAHE on luminance — improves detection on dark/washed/low-contrast photos."""
|
| 142 |
+
lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB)
|
| 143 |
+
l, a, b = cv2.split(lab)
|
| 144 |
+
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
| 145 |
+
l_eq = clahe.apply(l)
|
| 146 |
+
return cv2.cvtColor(cv2.merge([l_eq, a, b]), cv2.COLOR_LAB2BGR)
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
def _iou(box_a: list, box_b: list) -> float:
|
| 150 |
+
"""IoU between two [x1,y1,x2,y2] boxes."""
|
| 151 |
+
xa = max(box_a[0], box_b[0]); ya = max(box_a[1], box_b[1])
|
| 152 |
+
xb = min(box_a[2], box_b[2]); yb = min(box_a[3], box_b[3])
|
| 153 |
+
inter = max(0, xb - xa) * max(0, yb - ya)
|
| 154 |
+
if inter == 0:
|
| 155 |
+
return 0.0
|
| 156 |
+
area_a = (box_a[2]-box_a[0]) * (box_a[3]-box_a[1])
|
| 157 |
+
area_b = (box_b[2]-box_b[0]) * (box_b[3]-box_b[1])
|
| 158 |
+
return inter / (area_a + area_b - inter)
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
def _dedup_faces(faces_list: list, iou_thresh: float = IOU_DEDUP_THRESHOLD) -> list:
|
| 162 |
+
"""Remove duplicate detections across scales/flips. Keep highest det_score."""
|
| 163 |
+
if not faces_list:
|
| 164 |
+
return []
|
| 165 |
+
faces_list = sorted(faces_list, key=lambda f: float(f.det_score), reverse=True)
|
| 166 |
+
kept = []
|
| 167 |
+
for face in faces_list:
|
| 168 |
+
b = face.bbox.astype(int)
|
| 169 |
+
box = [b[0], b[1], b[2], b[3]]
|
| 170 |
+
duplicate = any(_iou(box, [k.bbox.astype(int)[i] for i in range(4)]) > iou_thresh for k in kept)
|
| 171 |
+
if not duplicate:
|
| 172 |
+
kept.append(face)
|
| 173 |
+
return kept
|
| 174 |
+
|
| 175 |
# ════════════════════════════════════════════════════════════════
|
| 176 |
# AIModelManager — V4
|
| 177 |
# ════════════════════════════════════════════════════════════════
|
|
|
|
| 406 |
img_np = (img_np * 255).astype(np.uint8)
|
| 407 |
bgr = img_np[:, :, ::-1].copy() if img_np.shape[2] == 3 else img_np.copy()
|
| 408 |
|
| 409 |
+
# ── Preprocessing: CLAHE contrast enhancement ─────────
|
| 410 |
+
# Helps with dark/overexposed/low-contrast photos
|
| 411 |
+
bgr_enhanced = _clahe_enhance(bgr)
|
| 412 |
+
|
| 413 |
+
# ── Multi-scale + flip detection ──────────────────────
|
| 414 |
+
# Run SCRFD at multiple resolutions AND on horizontally
|
| 415 |
+
# flipped image. Catches faces that one scale/orientation misses.
|
| 416 |
+
# Results are merged and deduplicated by IoU.
|
| 417 |
+
all_raw_faces = []
|
| 418 |
+
H, W = bgr.shape[:2]
|
| 419 |
+
|
| 420 |
+
for scale in DET_SCALES:
|
| 421 |
+
# Resize to this scale for detection
|
| 422 |
+
scale_w = min(W, scale[0])
|
| 423 |
+
scale_h = min(H, scale[1])
|
| 424 |
+
if scale_w == W and scale_h == H:
|
| 425 |
+
bgr_scaled = bgr_enhanced
|
| 426 |
+
else:
|
| 427 |
+
bgr_scaled = cv2.resize(bgr_enhanced, (scale_w, scale_h))
|
| 428 |
+
|
| 429 |
+
print(f"🔍 SCRFD detection at {scale_w}×{scale_h}...")
|
| 430 |
+
# Temporarily set det_size for this scale
|
| 431 |
+
try:
|
| 432 |
+
self.face_app.det_model.input_size = scale
|
| 433 |
+
with self._face_lock:
|
| 434 |
+
faces_at_scale = self.face_app.get(bgr_scaled)
|
| 435 |
+
# Scale bboxes back to original dimensions
|
| 436 |
+
sx = W / scale_w; sy = H / scale_h
|
| 437 |
+
for f in faces_at_scale:
|
| 438 |
+
if sx != 1.0 or sy != 1.0:
|
| 439 |
+
f.bbox[0] *= sx; f.bbox[1] *= sy
|
| 440 |
+
f.bbox[2] *= sx; f.bbox[3] *= sy
|
| 441 |
+
all_raw_faces.extend(faces_at_scale)
|
| 442 |
+
except Exception:
|
| 443 |
+
pass # scale failed, continue
|
| 444 |
+
|
| 445 |
+
# Horizontal flip pass — catches profile/turned faces
|
| 446 |
+
bgr_flip = cv2.flip(bgr_enhanced, 1)
|
| 447 |
+
try:
|
| 448 |
+
self.face_app.det_model.input_size = DET_SIZE_PRIMARY
|
| 449 |
+
with self._face_lock:
|
| 450 |
+
faces_flip = self.face_app.get(bgr_flip)
|
| 451 |
+
# Mirror bboxes back to original orientation
|
| 452 |
+
for f in faces_flip:
|
| 453 |
+
x1, y1, x2, y2 = f.bbox
|
| 454 |
+
f.bbox[0] = W - x2; f.bbox[2] = W - x1
|
| 455 |
+
all_raw_faces.extend(faces_flip)
|
| 456 |
+
except Exception:
|
| 457 |
+
pass
|
| 458 |
+
|
| 459 |
+
# Restore primary det_size
|
| 460 |
+
self.face_app.det_model.input_size = DET_SIZE_PRIMARY
|
| 461 |
+
|
| 462 |
+
# Deduplicate across scales and flip
|
| 463 |
+
faces = _dedup_faces(all_raw_faces)
|
| 464 |
+
print(f" Raw detections: {len(all_raw_faces)} → after dedup: {len(faces)}")
|
| 465 |
|
| 466 |
results = []
|
| 467 |
accepted = 0
|
|
|
|
| 584 |
# the original resolution (multi-scale fallback).
|
| 585 |
# ════════════════════════════════════════════════════════
|
| 586 |
if detect_faces and self.face_app is not None:
|
| 587 |
+
# Multi-scale + CLAHE + flip all handled inside _detect_and_encode_faces
|
| 588 |
+
# Pass the full-resolution image — internal scaling handles the rest
|
| 589 |
+
face_results = self._detect_and_encode_faces(img_np)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 590 |
|
| 591 |
if face_results:
|
| 592 |
faces_found = True
|
|
|
|
|
|
|
|
|
|
| 593 |
for fr in face_results:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 594 |
extracted.append(fr)
|
| 595 |
|
| 596 |
# ════════════════════════════════════════════════════════
|