visual-search-api2

Sleeping

App Files Files Community

AdarshDRC commited on 29 days ago

Commit

572243e

1 Parent(s): b87f832

fix : improved the retrival using face search

Browse files

Files changed (1) hide show

src/models.py +100 -25

src/models.py CHANGED Viewed

@@ -50,7 +50,7 @@ ADAFACE_WEIGHTS_AVAILABLE = False  # controlled by ENABLE_ADAFACE env var
 # ── Constants ─────────────────────────────────────────────────────
 YOLO_PERSON_CLASS_ID  = 0
-MIN_FACE_SIZE         = 40      # V4: stricter — tiny faces embed poorly
 MAX_FACES_PER_IMAGE   = 12      # slightly higher cap for group photos
 MAX_CROPS             = 6       # max YOLO object crops per image
 MAX_IMAGE_SIZE        = 640     # object lane longest edge
@@ -58,7 +58,10 @@ DET_SIZE_PRIMARY      = (1280, 1280)  # V4: 1280 for small-face detection
 DET_SIZE_SECONDARY    = (640, 640)    # fallback / 2nd scale
 FACE_CROP_THUMB_SIZE  = 112     # face thumbnail for Pinecone metadata
 FACE_CROP_QUALITY     = 80      # JPEG quality for thumbnails
-FACE_QUALITY_GATE     = 0.35    # minimum det_score to accept a face
 FACE_DIM              = 512     # ArcFace embedding dimension
 ADAFACE_DIM           = 512     # AdaFace embedding dimension
 FUSED_FACE_DIM        = 1024    # ArcFace + AdaFace concatenated
@@ -133,6 +136,42 @@ def _face_crop_for_adaface(
     return arr.transpose(2, 0, 1)                    # HWC → CHW
 # ════════════════════════════════════════════════════════════════
 #  AIModelManager — V4
 # ════════════════════════════════════════════════════════════════
@@ -367,10 +406,62 @@ class AIModelManager:
                 img_np = (img_np * 255).astype(np.uint8)
             bgr = img_np[:, :, ::-1].copy() if img_np.shape[2] == 3 else img_np.copy()
-            print(f"🔍 SCRFD detection on {bgr.shape[1]}×{bgr.shape[0]} image...")
-            with self._face_lock:
-                faces = self.face_app.get(bgr)
-            print(f"   Raw detections: {len(faces)}")
             results  = []
             accepted = 0
@@ -493,29 +584,13 @@ class AIModelManager:
         #     the original resolution (multi-scale fallback).
         # ════════════════════════════════════════════════════════
         if detect_faces and self.face_app is not None:
-            # Scale 1: resize longest edge to 1280 for detection
-            detect_pil_1280 = _resize_pil(original_pil, 1280)
-            detect_np_1280  = np.array(detect_pil_1280)
-            face_results    = self._detect_and_encode_faces(detect_np_1280)
-            # Scale 2: if nothing found, try original resolution
-            # (sometimes resizing DOWN helps when image is already small)
-            if not face_results and max(original_pil.size) < 1280:
-                print("🔄 Multi-scale fallback: retrying at original resolution")
-                face_results = self._detect_and_encode_faces(img_np)
             if face_results:
                 faces_found = True
-                # Scale bboxes back to original-image coordinates
-                sx = original_pil.width  / detect_pil_1280.width
-                sy = original_pil.height / detect_pil_1280.height
                 for fr in face_results:
-                    if sx != 1.0 or sy != 1.0:
-                        bx, by, bw, bh = fr["bbox"]
-                        fr["bbox"] = [
-                            int(bx * sx), int(by * sy),
-                            int(bw * sx), int(bh * sy),
-                        ]
                     extracted.append(fr)
         # ════════════════════════════════════════════════════════

 # ── Constants ─────────────────────────────────────────────────────
 YOLO_PERSON_CLASS_ID  = 0
+MIN_FACE_SIZE         = 20      # lowered: 40 missed small faces in group photos
 MAX_FACES_PER_IMAGE   = 12      # slightly higher cap for group photos
 MAX_CROPS             = 6       # max YOLO object crops per image
 MAX_IMAGE_SIZE        = 640     # object lane longest edge
 DET_SIZE_SECONDARY    = (640, 640)    # fallback / 2nd scale
 FACE_CROP_THUMB_SIZE  = 112     # face thumbnail for Pinecone metadata
 FACE_CROP_QUALITY     = 80      # JPEG quality for thumbnails
+FACE_QUALITY_GATE     = 0.35    # lowered from 0.60 — accepts sunglasses, angles, smiles
+# Multi-scale pyramid — tried in order, results merged with IoU dedup
+DET_SCALES            = [(1280, 1280), (960, 960), (640, 640)]
+IOU_DEDUP_THRESHOLD   = 0.45    # suppress duplicate detections across scales
 FACE_DIM              = 512     # ArcFace embedding dimension
 ADAFACE_DIM           = 512     # AdaFace embedding dimension
 FUSED_FACE_DIM        = 1024    # ArcFace + AdaFace concatenated
     return arr.transpose(2, 0, 1)                    # HWC → CHW
+def _clahe_enhance(bgr: np.ndarray) -> np.ndarray:
+    """CLAHE on luminance — improves detection on dark/washed/low-contrast photos."""
+    lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB)
+    l, a, b = cv2.split(lab)
+    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
+    l_eq = clahe.apply(l)
+    return cv2.cvtColor(cv2.merge([l_eq, a, b]), cv2.COLOR_LAB2BGR)
+def _iou(box_a: list, box_b: list) -> float:
+    """IoU between two [x1,y1,x2,y2] boxes."""
+    xa = max(box_a[0], box_b[0]); ya = max(box_a[1], box_b[1])
+    xb = min(box_a[2], box_b[2]); yb = min(box_a[3], box_b[3])
+    inter = max(0, xb - xa) * max(0, yb - ya)
+    if inter == 0:
+        return 0.0
+    area_a = (box_a[2]-box_a[0]) * (box_a[3]-box_a[1])
+    area_b = (box_b[2]-box_b[0]) * (box_b[3]-box_b[1])
+    return inter / (area_a + area_b - inter)
+def _dedup_faces(faces_list: list, iou_thresh: float = IOU_DEDUP_THRESHOLD) -> list:
+    """Remove duplicate detections across scales/flips. Keep highest det_score."""
+    if not faces_list:
+        return []
+    faces_list = sorted(faces_list, key=lambda f: float(f.det_score), reverse=True)
+    kept = []
+    for face in faces_list:
+        b = face.bbox.astype(int)
+        box = [b[0], b[1], b[2], b[3]]
+        duplicate = any(_iou(box, [k.bbox.astype(int)[i] for i in range(4)]) > iou_thresh for k in kept)
+        if not duplicate:
+            kept.append(face)
+    return kept
 # ════════════════════════════════════════════════════════════════
 #  AIModelManager — V4
 # ════════════════════════════════════════════════════════════════
                 img_np = (img_np * 255).astype(np.uint8)
             bgr = img_np[:, :, ::-1].copy() if img_np.shape[2] == 3 else img_np.copy()
+            # ── Preprocessing: CLAHE contrast enhancement ─────────
+            # Helps with dark/overexposed/low-contrast photos
+            bgr_enhanced = _clahe_enhance(bgr)
+            # ── Multi-scale + flip detection ──────────────────────
+            # Run SCRFD at multiple resolutions AND on horizontally
+            # flipped image. Catches faces that one scale/orientation misses.
+            # Results are merged and deduplicated by IoU.
+            all_raw_faces = []
+            H, W = bgr.shape[:2]
+            for scale in DET_SCALES:
+                # Resize to this scale for detection
+                scale_w = min(W, scale[0])
+                scale_h = min(H, scale[1])
+                if scale_w == W and scale_h == H:
+                    bgr_scaled = bgr_enhanced
+                else:
+                    bgr_scaled = cv2.resize(bgr_enhanced, (scale_w, scale_h))
+                print(f"🔍 SCRFD detection at {scale_w}×{scale_h}...")
+                # Temporarily set det_size for this scale
+                try:
+                    self.face_app.det_model.input_size = scale
+                    with self._face_lock:
+                        faces_at_scale = self.face_app.get(bgr_scaled)
+                    # Scale bboxes back to original dimensions
+                    sx = W / scale_w; sy = H / scale_h
+                    for f in faces_at_scale:
+                        if sx != 1.0 or sy != 1.0:
+                            f.bbox[0] *= sx; f.bbox[1] *= sy
+                            f.bbox[2] *= sx; f.bbox[3] *= sy
+                    all_raw_faces.extend(faces_at_scale)
+                except Exception:
+                    pass  # scale failed, continue
+            # Horizontal flip pass — catches profile/turned faces
+            bgr_flip = cv2.flip(bgr_enhanced, 1)
+            try:
+                self.face_app.det_model.input_size = DET_SIZE_PRIMARY
+                with self._face_lock:
+                    faces_flip = self.face_app.get(bgr_flip)
+                # Mirror bboxes back to original orientation
+                for f in faces_flip:
+                    x1, y1, x2, y2 = f.bbox
+                    f.bbox[0] = W - x2; f.bbox[2] = W - x1
+                all_raw_faces.extend(faces_flip)
+            except Exception:
+                pass
+            # Restore primary det_size
+            self.face_app.det_model.input_size = DET_SIZE_PRIMARY
+            # Deduplicate across scales and flip
+            faces = _dedup_faces(all_raw_faces)
+            print(f"   Raw detections: {len(all_raw_faces)} → after dedup: {len(faces)}")
             results  = []
             accepted = 0
         #     the original resolution (multi-scale fallback).
         # ════════════════════════════════════════════════════════
         if detect_faces and self.face_app is not None:
+            # Multi-scale + CLAHE + flip all handled inside _detect_and_encode_faces
+            # Pass the full-resolution image — internal scaling handles the rest
+            face_results = self._detect_and_encode_faces(img_np)
             if face_results:
                 faces_found = True
                 for fr in face_results:
                     extracted.append(fr)
         # ════════════════════════════════════════════════════════