AdarshDRC commited on
Commit
572243e
·
1 Parent(s): b87f832

fix : improved the retrival using face search

Browse files
Files changed (1) hide show
  1. src/models.py +100 -25
src/models.py CHANGED
@@ -50,7 +50,7 @@ ADAFACE_WEIGHTS_AVAILABLE = False # controlled by ENABLE_ADAFACE env var
50
 
51
  # ── Constants ─────────────────────────────────────────────────────
52
  YOLO_PERSON_CLASS_ID = 0
53
- MIN_FACE_SIZE = 40 # V4: stricter tiny faces embed poorly
54
  MAX_FACES_PER_IMAGE = 12 # slightly higher cap for group photos
55
  MAX_CROPS = 6 # max YOLO object crops per image
56
  MAX_IMAGE_SIZE = 640 # object lane longest edge
@@ -58,7 +58,10 @@ DET_SIZE_PRIMARY = (1280, 1280) # V4: 1280 for small-face detection
58
  DET_SIZE_SECONDARY = (640, 640) # fallback / 2nd scale
59
  FACE_CROP_THUMB_SIZE = 112 # face thumbnail for Pinecone metadata
60
  FACE_CROP_QUALITY = 80 # JPEG quality for thumbnails
61
- FACE_QUALITY_GATE = 0.35 # minimum det_score to accept a face
 
 
 
62
  FACE_DIM = 512 # ArcFace embedding dimension
63
  ADAFACE_DIM = 512 # AdaFace embedding dimension
64
  FUSED_FACE_DIM = 1024 # ArcFace + AdaFace concatenated
@@ -133,6 +136,42 @@ def _face_crop_for_adaface(
133
  return arr.transpose(2, 0, 1) # HWC → CHW
134
 
135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  # ════════════════════════════════════════════════════════════════
137
  # AIModelManager — V4
138
  # ════════════════════════════════════════════════════════════════
@@ -367,10 +406,62 @@ class AIModelManager:
367
  img_np = (img_np * 255).astype(np.uint8)
368
  bgr = img_np[:, :, ::-1].copy() if img_np.shape[2] == 3 else img_np.copy()
369
 
370
- print(f"🔍 SCRFD detection on {bgr.shape[1]}×{bgr.shape[0]} image...")
371
- with self._face_lock:
372
- faces = self.face_app.get(bgr)
373
- print(f" Raw detections: {len(faces)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
 
375
  results = []
376
  accepted = 0
@@ -493,29 +584,13 @@ class AIModelManager:
493
  # the original resolution (multi-scale fallback).
494
  # ════════════════════════════════════════════════════════
495
  if detect_faces and self.face_app is not None:
496
- # Scale 1: resize longest edge to 1280 for detection
497
- detect_pil_1280 = _resize_pil(original_pil, 1280)
498
- detect_np_1280 = np.array(detect_pil_1280)
499
- face_results = self._detect_and_encode_faces(detect_np_1280)
500
-
501
- # Scale 2: if nothing found, try original resolution
502
- # (sometimes resizing DOWN helps when image is already small)
503
- if not face_results and max(original_pil.size) < 1280:
504
- print("🔄 Multi-scale fallback: retrying at original resolution")
505
- face_results = self._detect_and_encode_faces(img_np)
506
 
507
  if face_results:
508
  faces_found = True
509
- # Scale bboxes back to original-image coordinates
510
- sx = original_pil.width / detect_pil_1280.width
511
- sy = original_pil.height / detect_pil_1280.height
512
  for fr in face_results:
513
- if sx != 1.0 or sy != 1.0:
514
- bx, by, bw, bh = fr["bbox"]
515
- fr["bbox"] = [
516
- int(bx * sx), int(by * sy),
517
- int(bw * sx), int(bh * sy),
518
- ]
519
  extracted.append(fr)
520
 
521
  # ════════════════════════════════════════════════════════
 
50
 
51
  # ── Constants ─────────────────────────────────────────────────────
52
  YOLO_PERSON_CLASS_ID = 0
53
+ MIN_FACE_SIZE = 20 # lowered: 40 missed small faces in group photos
54
  MAX_FACES_PER_IMAGE = 12 # slightly higher cap for group photos
55
  MAX_CROPS = 6 # max YOLO object crops per image
56
  MAX_IMAGE_SIZE = 640 # object lane longest edge
 
58
  DET_SIZE_SECONDARY = (640, 640) # fallback / 2nd scale
59
  FACE_CROP_THUMB_SIZE = 112 # face thumbnail for Pinecone metadata
60
  FACE_CROP_QUALITY = 80 # JPEG quality for thumbnails
61
+ FACE_QUALITY_GATE = 0.35 # lowered from 0.60 accepts sunglasses, angles, smiles
62
+ # Multi-scale pyramid — tried in order, results merged with IoU dedup
63
+ DET_SCALES = [(1280, 1280), (960, 960), (640, 640)]
64
+ IOU_DEDUP_THRESHOLD = 0.45 # suppress duplicate detections across scales
65
  FACE_DIM = 512 # ArcFace embedding dimension
66
  ADAFACE_DIM = 512 # AdaFace embedding dimension
67
  FUSED_FACE_DIM = 1024 # ArcFace + AdaFace concatenated
 
136
  return arr.transpose(2, 0, 1) # HWC → CHW
137
 
138
 
139
+
140
+ def _clahe_enhance(bgr: np.ndarray) -> np.ndarray:
141
+ """CLAHE on luminance — improves detection on dark/washed/low-contrast photos."""
142
+ lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB)
143
+ l, a, b = cv2.split(lab)
144
+ clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
145
+ l_eq = clahe.apply(l)
146
+ return cv2.cvtColor(cv2.merge([l_eq, a, b]), cv2.COLOR_LAB2BGR)
147
+
148
+
149
+ def _iou(box_a: list, box_b: list) -> float:
150
+ """IoU between two [x1,y1,x2,y2] boxes."""
151
+ xa = max(box_a[0], box_b[0]); ya = max(box_a[1], box_b[1])
152
+ xb = min(box_a[2], box_b[2]); yb = min(box_a[3], box_b[3])
153
+ inter = max(0, xb - xa) * max(0, yb - ya)
154
+ if inter == 0:
155
+ return 0.0
156
+ area_a = (box_a[2]-box_a[0]) * (box_a[3]-box_a[1])
157
+ area_b = (box_b[2]-box_b[0]) * (box_b[3]-box_b[1])
158
+ return inter / (area_a + area_b - inter)
159
+
160
+
161
+ def _dedup_faces(faces_list: list, iou_thresh: float = IOU_DEDUP_THRESHOLD) -> list:
162
+ """Remove duplicate detections across scales/flips. Keep highest det_score."""
163
+ if not faces_list:
164
+ return []
165
+ faces_list = sorted(faces_list, key=lambda f: float(f.det_score), reverse=True)
166
+ kept = []
167
+ for face in faces_list:
168
+ b = face.bbox.astype(int)
169
+ box = [b[0], b[1], b[2], b[3]]
170
+ duplicate = any(_iou(box, [k.bbox.astype(int)[i] for i in range(4)]) > iou_thresh for k in kept)
171
+ if not duplicate:
172
+ kept.append(face)
173
+ return kept
174
+
175
  # ════════════════════════════════════════════════════════════════
176
  # AIModelManager — V4
177
  # ════════════════════════════════════════════════════════════════
 
406
  img_np = (img_np * 255).astype(np.uint8)
407
  bgr = img_np[:, :, ::-1].copy() if img_np.shape[2] == 3 else img_np.copy()
408
 
409
+ # ── Preprocessing: CLAHE contrast enhancement ─────────
410
+ # Helps with dark/overexposed/low-contrast photos
411
+ bgr_enhanced = _clahe_enhance(bgr)
412
+
413
+ # ── Multi-scale + flip detection ──────────────────────
414
+ # Run SCRFD at multiple resolutions AND on horizontally
415
+ # flipped image. Catches faces that one scale/orientation misses.
416
+ # Results are merged and deduplicated by IoU.
417
+ all_raw_faces = []
418
+ H, W = bgr.shape[:2]
419
+
420
+ for scale in DET_SCALES:
421
+ # Resize to this scale for detection
422
+ scale_w = min(W, scale[0])
423
+ scale_h = min(H, scale[1])
424
+ if scale_w == W and scale_h == H:
425
+ bgr_scaled = bgr_enhanced
426
+ else:
427
+ bgr_scaled = cv2.resize(bgr_enhanced, (scale_w, scale_h))
428
+
429
+ print(f"🔍 SCRFD detection at {scale_w}×{scale_h}...")
430
+ # Temporarily set det_size for this scale
431
+ try:
432
+ self.face_app.det_model.input_size = scale
433
+ with self._face_lock:
434
+ faces_at_scale = self.face_app.get(bgr_scaled)
435
+ # Scale bboxes back to original dimensions
436
+ sx = W / scale_w; sy = H / scale_h
437
+ for f in faces_at_scale:
438
+ if sx != 1.0 or sy != 1.0:
439
+ f.bbox[0] *= sx; f.bbox[1] *= sy
440
+ f.bbox[2] *= sx; f.bbox[3] *= sy
441
+ all_raw_faces.extend(faces_at_scale)
442
+ except Exception:
443
+ pass # scale failed, continue
444
+
445
+ # Horizontal flip pass — catches profile/turned faces
446
+ bgr_flip = cv2.flip(bgr_enhanced, 1)
447
+ try:
448
+ self.face_app.det_model.input_size = DET_SIZE_PRIMARY
449
+ with self._face_lock:
450
+ faces_flip = self.face_app.get(bgr_flip)
451
+ # Mirror bboxes back to original orientation
452
+ for f in faces_flip:
453
+ x1, y1, x2, y2 = f.bbox
454
+ f.bbox[0] = W - x2; f.bbox[2] = W - x1
455
+ all_raw_faces.extend(faces_flip)
456
+ except Exception:
457
+ pass
458
+
459
+ # Restore primary det_size
460
+ self.face_app.det_model.input_size = DET_SIZE_PRIMARY
461
+
462
+ # Deduplicate across scales and flip
463
+ faces = _dedup_faces(all_raw_faces)
464
+ print(f" Raw detections: {len(all_raw_faces)} → after dedup: {len(faces)}")
465
 
466
  results = []
467
  accepted = 0
 
584
  # the original resolution (multi-scale fallback).
585
  # ════════════════════════════════════════════════════════
586
  if detect_faces and self.face_app is not None:
587
+ # Multi-scale + CLAHE + flip all handled inside _detect_and_encode_faces
588
+ # Pass the full-resolution image — internal scaling handles the rest
589
+ face_results = self._detect_and_encode_faces(img_np)
 
 
 
 
 
 
 
590
 
591
  if face_results:
592
  faces_found = True
 
 
 
593
  for fr in face_results:
 
 
 
 
 
 
594
  extracted.append(fr)
595
 
596
  # ════════════════════════════════════════════════════════