visual-search-api2

Sleeping

App Files Files Community

AdarshDRC commited on 24 days ago

Commit

1feca1e

1 Parent(s): 3933e8f

fix : search engine

Browse files

Files changed (4) hide show

Dockerfile +111 -37
requirements.txt +56 -0
src/cloud_db.py +222 -44
src/models.py +122 -130

Dockerfile CHANGED Viewed

@@ -1,69 +1,143 @@
-# Dockerfile — Enterprise Lens V3
-# InsightFace models download on first run (not at build time)
-# This avoids build timeout and network issues during Docker build
 FROM python:3.10-slim
 WORKDIR /app
-# ── System deps ──────────────────────────────────────────────────
 RUN apt-get update && apt-get install -y --no-install-recommends \
-        libgl1 libglib2.0-0 libgomp1 git \
-        build-essential cmake g++ \
-        libopenblas-dev liblapack-dev \
-        wget ca-certificates \
     && rm -rf /var/lib/apt/lists/*
-# ── Step 1: Build tools (MUST be before insightface) ─────────────
-RUN pip install --no-cache-dir \
-        "numpy<2.0" \
-        "setuptools>=65" \
-        wheel \
-        cython \
-        scikit-build \
-        cmake
-# ── Step 2: onnxruntime (MUST be before insightface) ─────────────
-RUN pip install --no-cache-dir onnxruntime
-# ── Step 3: insightface ───────────────────────────────────────────
-RUN pip install --no-cache-dir --prefer-binary "insightface>=0.7.3"
-# ── Step 4: Remaining requirements ───────────────────────────────
 COPY requirements.txt .
-RUN pip install --no-cache-dir --prefer-binary -r requirements.txt
-# ── Copy app code ─────────────────────────────────────────────────
 COPY . .
 RUN mkdir -p temp_uploads saved_images && chmod -R 777 temp_uploads saved_images
-# ── Pre-download ONLY transformers + YOLO at build time ──────────
-# InsightFace models download on first startup (cached after that)
 RUN python - <<'EOF'
-import os
-os.environ["TRANSFORMERS_VERBOSITY"] = "error"
-print("Pre-downloading SigLIP...")
 from transformers import AutoProcessor, AutoModel
 AutoProcessor.from_pretrained("google/siglip-base-patch16-224", use_fast=True)
 AutoModel.from_pretrained("google/siglip-base-patch16-224")
-print("SigLIP done")
-print("Pre-downloading DINOv2...")
 from transformers import AutoImageProcessor
 AutoImageProcessor.from_pretrained("facebook/dinov2-base")
 AutoModel.from_pretrained("facebook/dinov2-base")
-print("DINOv2 done")
-print("Pre-downloading YOLO seg...")
 from ultralytics import YOLO
 YOLO("yolo11n-seg.pt")
-print("YOLO done")
-print("Build complete! InsightFace models download on first startup.")
 EOF
 EXPOSE 7860
 ENV WEB_CONCURRENCY=1
 CMD uvicorn main:app \

+# Dockerfile — Enterprise Lens V4
+#
+# Changes vs V3:
+#   • Removed deepface / GhostFaceNet / RetinaFace entirely
+#   • Added insightface + onnxruntime (SCRFD + ArcFace-R100)
+#   • Added huggingface_hub for AdaFace weight download
+#   • Pre-downloads AdaFace IR-50 WebFace4M weights at build time
+#   • Pre-downloads InsightFace buffalo_l pack at build time
+#   • Single worker (InsightFace ONNX is NOT thread-safe)
+#   • index dimensions: enterprise-faces=1024, enterprise-objects=1536
 FROM python:3.10-slim
 WORKDIR /app
+# ── System deps ───────────────────────────────────────────────────
+# libGL + libGLib  : OpenCV headless
+# libgomp1         : OpenMP (used by ONNX runtime + numpy)
+# git              : needed by some HF hub downloads
+# curl             : useful for health checks / debug
 RUN apt-get update && apt-get install -y --no-install-recommends \
+        libgl1 \
+        libglib2.0-0 \
+        libgomp1 \
+        git \
+        curl \
     && rm -rf /var/lib/apt/lists/*
+# ── Python deps ───────────────────────────────────────────────────
 COPY requirements.txt .
+RUN pip install --no-cache-dir --compile -r requirements.txt
+# ── Copy application code ────────────────────────────────────────
 COPY . .
 RUN mkdir -p temp_uploads saved_images && chmod -R 777 temp_uploads saved_images
+# ── Pre-download ALL AI models at BUILD time ─────────────────────
+# Bakes weights into image layer → cold start ~10s instead of ~5min
+#
+# Model sizes (approximate):
+#   SigLIP base    ~380 MB
+#   DINOv2 base    ~330 MB
+#   YOLO11n-seg    ~6 MB
+#   InsightFace buffalo_l (SCRFD-10GF + ArcFace-R100)  ~280 MB
+#   AdaFace IR-50 WebFace4M  ~170 MB
+#   Total image delta: ~1.2 GB
 RUN python - <<'EOF'
+import os, sys
+# ── SigLIP ────────────────────────────────────────────────────────
+print("📦 Pre-downloading SigLIP...")
 from transformers import AutoProcessor, AutoModel
 AutoProcessor.from_pretrained("google/siglip-base-patch16-224", use_fast=True)
 AutoModel.from_pretrained("google/siglip-base-patch16-224")
+print("   ✅ SigLIP done")
+# ── DINOv2 ───────────────────────────────────────────────────────
+print("📦 Pre-downloading DINOv2...")
 from transformers import AutoImageProcessor
 AutoImageProcessor.from_pretrained("facebook/dinov2-base")
 AutoModel.from_pretrained("facebook/dinov2-base")
+print("   ✅ DINOv2 done")
+# ── YOLO11n-seg ───────────────────────────────────────────────────
+print("📦 Pre-downloading YOLO11n-seg...")
 from ultralytics import YOLO
 YOLO("yolo11n-seg.pt")
+print("   ✅ YOLO done")
+# ── InsightFace buffalo_l ─────────────────────────────────────────
+# buffalo_l = SCRFD-10GF (detector) + ArcFace-R100 (encoder)
+# Handles small faces in group photos (det_size up to 1280x1280)
+print("📦 Pre-downloading InsightFace buffalo_l...")
+import numpy as np
+from insightface.app import FaceAnalysis
+face_app = FaceAnalysis(
+    name="buffalo_l",
+    providers=["CPUExecutionProvider"],
+)
+face_app.prepare(ctx_id=-1, det_size=(640, 640))
+# Warmup inference to confirm weights loaded
+test = np.zeros((112, 112, 3), dtype=np.uint8)
+face_app.get(test)
+print("   ✅ InsightFace buffalo_l done")
+# ── AdaFace IR-50 MS1MV2 ─────────────────────────────────────────
+# Repo: minchul/cvlface_adaface_ir50_ms1mv2
+# Loaded via AutoModel + trust_remote_code=True
+# Requires HF_TOKEN build arg (set in HF Space secrets)
+print("📦 Pre-downloading AdaFace IR-50 MS1MV2...")
+import os, sys
+from huggingface_hub import hf_hub_download
+from transformers import AutoModel
+HF_TOKEN   = os.getenv("HF_TOKEN", None)
+REPO_ID    = "minchul/cvlface_adaface_ir50_ms1mv2"
+CACHE_PATH = os.path.expanduser("~/.cvlface_cache/minchul/cvlface_adaface_ir50_ms1mv2")
+os.makedirs(CACHE_PATH, exist_ok=True)
+# Download files.txt manifest
+hf_hub_download(repo_id=REPO_ID, filename="files.txt",
+    token=HF_TOKEN, local_dir=CACHE_PATH, local_dir_use_symlinks=False)
+with open(os.path.join(CACHE_PATH, "files.txt")) as f:
+    extra = [x.strip() for x in f.read().split("\n") if x.strip()]
+for fname in extra + ["config.json", "wrapper.py", "model.safetensors"]:
+    fpath = os.path.join(CACHE_PATH, fname)
+    if not os.path.exists(fpath):
+        hf_hub_download(repo_id=REPO_ID, filename=fname,
+            token=HF_TOKEN, local_dir=CACHE_PATH, local_dir_use_symlinks=False)
+# Load and verify
+cwd = os.getcwd(); os.chdir(CACHE_PATH); sys.path.insert(0, CACHE_PATH)
+try:
+    model = AutoModel.from_pretrained(CACHE_PATH, trust_remote_code=True, token=HF_TOKEN)
+finally:
+    os.chdir(cwd)
+    if CACHE_PATH in sys.path: sys.path.remove(CACHE_PATH)
+import torch
+with torch.no_grad():
+    out = model(torch.zeros(1, 3, 112, 112))
+emb = out if isinstance(out, torch.Tensor) else out.embedding
+print(f"   ✅ AdaFace loaded — output dim={emb.shape[-1]}")
+print("")
+print("✅ All V4 models pre-downloaded and verified!")
+print("   enterprise-faces  index dim : 1024  (ArcFace-512 + AdaFace-512)")
+print("   enterprise-objects index dim: 1536  (SigLIP-768 + DINOv2-768)")
 EOF
 EXPOSE 7860
+# ── Single worker — InsightFace ONNX is NOT thread-safe ──────────
+# Each request acquires _face_lock before ONNX inference.
+# Multiple workers would each load their own model copy into RAM
+# (~1.5 GB each) which OOMs free HF Spaces (16 GB limit).
+# If you have a paid GPU Space with >32 GB RAM, set WEB_CONCURRENCY=2.
 ENV WEB_CONCURRENCY=1
 CMD uvicorn main:app \

requirements.txt CHANGED Viewed

	@@ -76,3 +76,59 @@ onnxruntime>=1.16.0
76	insightface>=0.7.3
77
78

 insightface>=0.7.3
+# requirements.txt — Enterprise Lens V4
+# ════════════════════════════════════════════════════════════════
+# Face Lane  : insightface (SCRFD-10GF + ArcFace-R100)
+#              + AdaFace IR-50 (custom PyTorch backbone)
+#              + huggingface_hub (AdaFace weight download)
+# Object Lane: transformers (SigLIP + DINOv2) + ultralytics (YOLO)
+# API        : fastapi + uvicorn + python-multipart
+# Storage    : pinecone + cloudinary
+# Utilities  : loguru + inflect + aiohttp + python-dotenv
+# ════════════════════════════════════════════════════════════════
+# ── Web framework ────────────────────────────────────────────────
+fastapi==0.115.6
+uvicorn[standard]==0.32.1
+python-multipart==0.0.20
+# ── AI / ML core ────────────────────────────────────────────────
+# CPU-only torch — swap index URL for CUDA build on GPU spaces
+torch==2.4.1+cpu
+torchvision==0.19.1+cpu
+--extra-index-url https://download.pytorch.org/whl/cpu
+# ── HuggingFace — SigLIP, DINOv2, AdaFace weight download ───────
+transformers==4.46.3
+huggingface_hub==0.26.2
+safetensors==0.4.5
+tokenizers==0.20.3
+accelerate==1.1.1
+# ── InsightFace — SCRFD detection + ArcFace-R100 encoding ────────
+insightface==0.7.3
+onnxruntime==1.19.2          # CPU ONNX runtime for InsightFace models
+# ── YOLO — object segmentation crops ────────────────────────────
+ultralytics==8.3.27
+# ── Computer vision utilities ────────────────────────────────────
+opencv-python-headless==4.10.0.84
+Pillow==11.0.0
+numpy==1.26.4
+# ── Vector database ──────────────────────────────────────────────
+pinecone==5.4.1
+# ── Image CDN ────────────────────────────────────────────────────
+cloudinary==1.41.0
+# ── Async HTTP (Supabase logging) ────────────────────────────────
+aiohttp==3.11.9
+# ── Logging + text utils ─────────────────────────────────────────
+loguru==0.7.2
+inflect==7.4.0
+# ── Config ───────────────────────────────────────────────────────
+python-dotenv==1.0.1

src/cloud_db.py CHANGED Viewed

@@ -1,68 +1,246 @@
 import os
 import cloudinary
 import cloudinary.uploader
-from pinecone import Pinecone
 from dotenv import load_dotenv
 load_dotenv()
 class CloudDB:
     def __init__(self):
         cloudinary.config(
-            cloud_name=os.getenv("CLOUDINARY_CLOUD_NAME"),
-            api_key=os.getenv("CLOUDINARY_API_KEY"),
-            api_secret=os.getenv("CLOUDINARY_API_SECRET")
         )
         self.pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
-        # Connect to the TWO new indexes
-        self.index_faces = self.pc.Index("enterprise-faces")
-        self.index_objects = self.pc.Index("enterprise-objects")
-    def upload_image(self, file_path, folder_name="visual_search"):
         response = cloudinary.uploader.upload(file_path, folder=folder_name)
-        return response['secure_url']
-    def add_vector(self, data_dict, image_url, image_id):
-        vector_list = data_dict["vector"].tolist() if hasattr(data_dict["vector"], 'tolist') else data_dict["vector"]
-        payload = [{
-            "id": image_id,
-            "values": vector_list,
-            "metadata": {"image_url": image_url}
-        }]
         if data_dict["type"] == "face":
             self.index_faces.upsert(vectors=payload)
         else:
             self.index_objects.upsert(vectors=payload)
-    def search(self, query_dict, top_k=10, min_score=0.45):
-        vector_list = query_dict["vector"].tolist() if hasattr(query_dict["vector"], 'tolist') else query_dict["vector"]
-        results = []
         if query_dict["type"] == "face":
-            response = self.index_faces.query(vector=vector_list, top_k=top_k, include_metadata=True)
-            RAW_THRESHOLD = 0.35
-            for match in response['matches']:
-                raw_score = match['score']
-                if raw_score >= RAW_THRESHOLD:
-                    ui_score = 0.75 + ((raw_score - RAW_THRESHOLD) / (1.0 - RAW_THRESHOLD)) * 0.24
-                    ui_score = min(0.99, ui_score)
-                    results.append({
-                        "url": match['metadata']['image_url'],
-                        "score": ui_score,
-                        "caption": "👤 Verified Identity Match"
-                    })
         else:
-            response = self.index_objects.query(vector=vector_list, top_k=top_k, include_metadata=True)
-            for match in response['matches']:
-                if match['score'] >= min_score:
-                    results.append({
-                        "url": match['metadata']['image_url'],
-                        "score": match['score'],
-                        "caption": "🎯 Visual & Semantic Match"
-                    })
         return results

+# src/cloud_db.py — Enterprise Lens V4
+# ════════════════════════════════════════════════════════════════
+# NOTE: In the production FastAPI app (main.py), ALL Pinecone and
+# Cloudinary operations are performed directly — this class is NOT
+# called by main.py. It exists as a standalone utility / SDK wrapper
+# for scripts, notebooks, or future use outside the API.
+#
+# If you use this class, ensure your Pinecone indexes match V4 dims:
+#   enterprise-faces   → 1024-D  (ArcFace-512 + AdaFace-512, fused)
+#   enterprise-objects → 1536-D  (SigLIP-768  + DINOv2-768,  fused)
+# ════════════════════════════════════════════════════════════════
 import os
+import uuid
 import cloudinary
 import cloudinary.uploader
+from pinecone import Pinecone, ServerlessSpec
 from dotenv import load_dotenv
 load_dotenv()
+# ── V4 Index constants — MUST match main.py and models.py ────────
+IDX_FACES         = "enterprise-faces"
+IDX_OBJECTS       = "enterprise-objects"
+IDX_FACES_DIM     = 1024   # ArcFace(512) + AdaFace(512) fused, always 1024
+IDX_OBJECTS_DIM   = 1536   # SigLIP(768)  + DINOv2(768)  fused, always 1536
+# V4 face similarity thresholds (fused 1024-D cosine space)
+# These MUST stay in sync with main.py FACE_THRESHOLD_* constants
+FACE_THRESHOLD_HIGH = 0.40   # high-quality face (det_score >= 0.85)
+FACE_THRESHOLD_LOW  = 0.32   # lower-quality face (det_score < 0.85)
+OBJECT_THRESHOLD    = 0.45   # object/scene similarity threshold
 class CloudDB:
+    """
+    Utility wrapper around Pinecone + Cloudinary for Enterprise Lens V4.
+    Index dimensions:
+      enterprise-faces   : 1024-D cosine
+      enterprise-objects : 1536-D cosine
+    Face vectors: ArcFace(512) + AdaFace(512) concatenated + L2-normalised
+    Object vectors: SigLIP(768) + DINOv2(768) concatenated + L2-normalised
+    """
     def __init__(self):
+        # ── Cloudinary ────────────────────────────────────────────
         cloudinary.config(
+            cloud_name = os.getenv("CLOUDINARY_CLOUD_NAME"),
+            api_key    = os.getenv("CLOUDINARY_API_KEY"),
+            api_secret = os.getenv("CLOUDINARY_API_SECRET"),
         )
+        # ── Pinecone ──────────────────────────────────────────────
         self.pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
+        self._ensure_indexes()
+        self.index_faces   = self.pc.Index(IDX_FACES)
+        self.index_objects = self.pc.Index(IDX_OBJECTS)
+    def _ensure_indexes(self):
+        """
+        Create Pinecone indexes at correct V4 dimensions if they don't exist.
+        Safe to call multiple times — skips existing indexes.
+        """
+        existing = {idx.name for idx in self.pc.list_indexes()}
+        if IDX_FACES not in existing:
+            print(f"📦 Creating {IDX_FACES} at {IDX_FACES_DIM}-D...")
+            self.pc.create_index(
+                name      = IDX_FACES,
+                dimension = IDX_FACES_DIM,   # 1024-D — ArcFace+AdaFace
+                metric    = "cosine",
+                spec      = ServerlessSpec(cloud="aws", region="us-east-1"),
+            )
+            print(f"   ✅ {IDX_FACES} created at {IDX_FACES_DIM}-D")
+        else:
+            # Validate existing index has correct dimension
+            desc = self.pc.describe_index(IDX_FACES)
+            actual_dim = desc.dimension
+            if actual_dim != IDX_FACES_DIM:
+                raise ValueError(
+                    f"❌ {IDX_FACES} exists at {actual_dim}-D but V4 needs "
+                    f"{IDX_FACES_DIM}-D. Go to Settings → Danger Zone → "
+                    f"Reset Database to recreate at correct dimensions."
+                )
+        if IDX_OBJECTS not in existing:
+            print(f"📦 Creating {IDX_OBJECTS} at {IDX_OBJECTS_DIM}-D...")
+            self.pc.create_index(
+                name      = IDX_OBJECTS,
+                dimension = IDX_OBJECTS_DIM,   # 1536-D — SigLIP+DINOv2
+                metric    = "cosine",
+                spec      = ServerlessSpec(cloud="aws", region="us-east-1"),
+            )
+            print(f"   ✅ {IDX_OBJECTS} created at {IDX_OBJECTS_DIM}-D")
+        else:
+            desc = self.pc.describe_index(IDX_OBJECTS)
+            actual_dim = desc.dimension
+            if actual_dim != IDX_OBJECTS_DIM:
+                raise ValueError(
+                    f"❌ {IDX_OBJECTS} exists at {actual_dim}-D but V4 needs "
+                    f"{IDX_OBJECTS_DIM}-D. Go to Settings → Danger Zone → "
+                    f"Reset Database to recreate at correct dimensions."
+                )
+    # ── Upload image to Cloudinary ────────────────────────────────
+    def upload_image(self, file_path: str, folder_name: str = "visual_search") -> str:
+        """Upload image to Cloudinary, return secure_url."""
         response = cloudinary.uploader.upload(file_path, folder=folder_name)
+        return response["secure_url"]
+    # ── Store vector in correct Pinecone index ────────────────────
+    def add_vector(self, data_dict: dict, image_url: str, image_id: str = None):
+        """
+        Upsert one vector into the correct Pinecone index.
+        data_dict keys:
+          type       : "face" or "object"
+          vector     : np.ndarray or list — must match index dimension
+          face_crop  : str  (base64 JPEG thumbnail, face only)
+          det_score  : float (InsightFace detection confidence, face only)
+          face_quality: float (alias for det_score)
+          face_width_px: int (face bounding box width in pixels)
+          face_idx   : int (face index within the source image)
+          bbox       : list [x, y, w, h]
+          folder     : str (Cloudinary folder / category name)
+        """
+        vec_id   = image_id or str(uuid.uuid4())
+        vec_list = (data_dict["vector"].tolist()
+                    if hasattr(data_dict["vector"], "tolist")
+                    else list(data_dict["vector"]))
         if data_dict["type"] == "face":
+            # ── V4 face metadata — full set required for UI ───────
+            payload = [{
+                "id":     vec_id,
+                "values": vec_list,
+                "metadata": {
+                    "image_url":     image_url,
+                    "url":           image_url,          # alias for compatibility
+                    "folder":        data_dict.get("folder", ""),
+                    "face_idx":      data_dict.get("face_idx", 0),
+                    "bbox":          str(data_dict.get("bbox", [])),
+                    "face_crop":     data_dict.get("face_crop", ""),    # base64 thumb
+                    "det_score":     data_dict.get("det_score", 1.0),
+                    "face_quality":  data_dict.get("face_quality",
+                                     data_dict.get("det_score", 1.0)),
+                    "face_width_px": data_dict.get("face_width_px", 0),
+                },
+            }]
             self.index_faces.upsert(vectors=payload)
         else:
+            # ── V4 object metadata ────────────────────────────────
+            payload = [{
+                "id":     vec_id,
+                "values": vec_list,
+                "metadata": {
+                    "image_url": image_url,
+                    "url":       image_url,
+                    "folder":    data_dict.get("folder", ""),
+                },
+            }]
             self.index_objects.upsert(vectors=payload)
+    # ── Search ────────────────────────────────────────────────────
+    def search(self, query_dict: dict, top_k: int = 10,
+               min_score: float = None) -> list:
+        """
+        Search the correct Pinecone index for one query vector.
+        For face vectors: uses adaptive threshold based on det_score.
+        For object vectors: uses OBJECT_THRESHOLD (default 0.45).
+        Returns list of dicts: {url, score, caption, [face_crop, folder]}
+        """
+        vec_list = (query_dict["vector"].tolist()
+                    if hasattr(query_dict["vector"], "tolist")
+                    else list(query_dict["vector"]))
+        results  = []
         if query_dict["type"] == "face":
+            # ── V4 face search ────────────────────────────────────
+            # Adaptive threshold: high-quality faces are stricter
+            det_score = query_dict.get("det_score", 1.0)
+            threshold = (FACE_THRESHOLD_HIGH if det_score >= 0.85
+                         else FACE_THRESHOLD_LOW)
+            if min_score is not None:
+                threshold = min_score
+            response = self.index_faces.query(
+                vector=vec_list, top_k=top_k * 3,   # over-fetch, filter below
+                include_metadata=True,
+            )
+            # Deduplicate by image_url — keep best score per image
+            image_map = {}
+            for match in response.get("matches", []):
+                raw = match["score"]
+                if raw < threshold:
+                    continue
+                url = (match["metadata"].get("url") or
+                       match["metadata"].get("image_url", ""))
+                if not url:
+                    continue
+                if url not in image_map or raw > image_map[url]["raw"]:
+                    image_map[url] = {
+                        "raw":       raw,
+                        "face_crop": match["metadata"].get("face_crop", ""),
+                        "folder":    match["metadata"].get("folder", ""),
+                    }
+            # Remap raw cosine → UI percentage (75%–99%)
+            for url, d in image_map.items():
+                lo = FACE_THRESHOLD_LOW
+                ui = round(min(0.99, 0.75 + ((d["raw"] - lo) / (1.0 - lo)) * 0.24), 4)
+                results.append({
+                    "url":       url,
+                    "score":     ui,
+                    "raw_score": round(d["raw"], 4),
+                    "face_crop": d["face_crop"],
+                    "folder":    d["folder"],
+                    "caption":   "👤 Verified Identity Match",
+                })
+            results = sorted(results, key=lambda x: x["score"], reverse=True)[:top_k]
         else:
+            # ── V4 object search ──────────────────────────────────
+            threshold = min_score if min_score is not None else OBJECT_THRESHOLD
+            response  = self.index_objects.query(
+                vector=vec_list, top_k=top_k, include_metadata=True)
+            for match in response.get("matches", []):
+                if match["score"] < threshold:
+                    continue
+                results.append({
+                    "url":     (match["metadata"].get("url") or
+                                match["metadata"].get("image_url", "")),
+                    "score":   round(match["score"], 4),
+                    "folder":  match["metadata"].get("folder", ""),
+                    "caption": "🎯 Visual & Semantic Match",
+                })
         return results

src/models.py CHANGED Viewed

@@ -44,15 +44,18 @@ except ImportError:
     print("         pip install insightface onnxruntime          (linux/win)")
 # ── AdaFace ──────────────────────────────────────────────────────
-# AdaFace IR-50 backbone (CVPR 2022) — quality-adaptive margin loss
-# Much more robust than ArcFace on low-quality / occluded faces
-# Weights auto-downloaded from HuggingFace on first run
 try:
     from huggingface_hub import hf_hub_download
     ADAFACE_WEIGHTS_AVAILABLE = True
 except ImportError:
     ADAFACE_WEIGHTS_AVAILABLE = False
-    print("⚠️  huggingface_hub not installed — AdaFace fusion disabled")
 # ── Constants ─────────────────────────────────────────────────────
 YOLO_PERSON_CLASS_ID  = 0
@@ -70,95 +73,6 @@ ADAFACE_DIM           = 512     # AdaFace embedding dimension
 FUSED_FACE_DIM        = 1024    # ArcFace + AdaFace concatenated
-# ════════════════════════════════════════════════════════════════
-#  AdaFace IR-50 Backbone
-#  Lightweight reimplementation of the IR-50 network head used
-#  to load pretrained AdaFace weights (WebFace4M checkpoint).
-#  Only the feature-extraction layers are used — no classifier.
-# ════════════════════════════════════════════════════════════════
-def _conv_bn(inp, oup, k, s, p, groups=1):
-    return nn.Sequential(
-        nn.Conv2d(inp, oup, k, s, p, groups=groups, bias=False),
-        nn.BatchNorm2d(oup),
-    )
-class _IBasicBlock(nn.Module):
-    """Basic residual block used in IR-50."""
-    expansion = 1
-    def __init__(self, inplanes, planes, stride=1, downsample=None):
-        super().__init__()
-        self.bn1  = nn.BatchNorm2d(inplanes)
-        self.conv1 = nn.Conv2d(inplanes, planes, 3, 1, 1, bias=False)
-        self.bn2  = nn.BatchNorm2d(planes)
-        self.prelu = nn.PReLU(planes)
-        self.conv2 = nn.Conv2d(planes, planes, 3, stride, 1, bias=False)
-        self.bn3  = nn.BatchNorm2d(planes)
-        self.downsample = downsample
-        self.stride = stride
-    def forward(self, x):
-        identity = x
-        out = self.bn1(x)
-        out = self.conv1(out)
-        out = self.bn2(out)
-        out = self.prelu(out)
-        out = self.conv2(out)
-        out = self.bn3(out)
-        if self.downsample is not None:
-            identity = self.downsample(x)
-        out += identity
-        return out
-class AdaFaceIR50(nn.Module):
-    """
-    IR-50 backbone for AdaFace.
-    Produces a 512-D L2-normalised face embedding.
-    Input: (N, 3, 112, 112) normalised face crop (mean 0.5, std 0.5)
-    Output: (N, 512) L2-normalised embedding
-    """
-    def __init__(self):
-        super().__init__()
-        self.input_layer = nn.Sequential(
-            nn.Conv2d(3, 64, 3, 1, 1, bias=False),
-            nn.BatchNorm2d(64),
-            nn.PReLU(64),
-        )
-        self.layer1 = self._make_layer(64,  64,  3, stride=2)
-        self.layer2 = self._make_layer(64,  128, 4, stride=2)
-        self.layer3 = self._make_layer(128, 256, 14, stride=2)
-        self.layer4 = self._make_layer(256, 512, 3, stride=2)
-        self.bn2    = nn.BatchNorm2d(512)
-        self.dropout = nn.Dropout(p=0.4)
-        self.fc     = nn.Linear(512 * 7 * 7, 512)
-        self.features = nn.BatchNorm1d(512)
-    def _make_layer(self, inplanes, planes, blocks, stride=1):
-        downsample = None
-        if stride != 1 or inplanes != planes:
-            downsample = nn.Sequential(
-                nn.Conv2d(inplanes, planes, 1, stride, bias=False),
-                nn.BatchNorm2d(planes),
-            )
-        layers = [_IBasicBlock(inplanes, planes, stride, downsample)]
-        for _ in range(1, blocks):
-            layers.append(_IBasicBlock(planes, planes))
-        return nn.Sequential(*layers)
-    def forward(self, x):
-        x = self.input_layer(x)
-        x = self.layer1(x)
-        x = self.layer2(x)
-        x = self.layer3(x)
-        x = self.layer4(x)
-        x = self.bn2(x)
-        x = self.dropout(x)
-        x = x.flatten(1)
-        x = self.fc(x)
-        x = self.features(x)
-        return F.normalize(x, p=2, dim=1)
 # ════════════════════════════════════════════════════════════════
 #  Utility functions
 # ════════════════════════════════════════════════════════════════
@@ -302,43 +216,108 @@ class AIModelManager:
         self._face_lock     = threading.Lock()
         self._cache         = {}
         self._cache_maxsize = 128
-        print("✅ All models ready!")
-        print(f"   Face vector dim : {FUSED_FACE_DIM if self.adaface_model else FACE_DIM}")
-        print(f"   Object vector dim: 1536")
     def _load_adaface(self):
-        """Download and load AdaFace IR-50 WebFace4M weights."""
         if not ADAFACE_WEIGHTS_AVAILABLE:
-            print("⚠️  AdaFace skipped — huggingface_hub not installed")
             return
         try:
-            print("📦 Loading AdaFace IR-50 (WebFace4M)...")
-            # Weights hosted on HuggingFace — ~170MB download on first run
-            ckpt_path = hf_hub_download(
-                repo_id  = "minchul/adaface_ir50_webface4m",
-                filename = "adaface_ir50_webface4m.ckpt",
-            )
-            model = AdaFaceIR50()
-            state = torch.load(ckpt_path, map_location="cpu")
-            # Checkpoint may be wrapped in {"state_dict": ...}
-            if "state_dict" in state:
-                state = state["state_dict"]
-            # Strip any "model." prefix that some checkpoints add
-            state = {k.replace("model.", ""): v for k, v in state.items()}
-            # Only load keys that exist in our model
-            model_keys  = set(model.state_dict().keys())
-            filtered    = {k: v for k, v in state.items() if k in model_keys}
-            missing, _  = model.load_state_dict(filtered, strict=False)
-            if missing:
-                print(f"   AdaFace: {len(missing)} missing keys (expected for head layers)")
             model = model.to(self.device).eval()
             if self.device == "cuda":
                 model = model.half()
             self.adaface_model = model
-            print("✅ AdaFace IR-50 loaded — 1024-D fused face vectors ACTIVE")
         except Exception as e:
-            print(f"⚠️  AdaFace load failed: {e} — falling back to ArcFace-only (512-D)")
-            print(f"   Detail: {traceback.format_exc()[-400:]}")
             self.adaface_model = None
     # ── Object Lane: batched SigLIP + DINOv2 embedding ───────────
@@ -374,8 +353,12 @@ class AIModelManager:
     # ── AdaFace embedding for a single face crop ─────────────────
     def _adaface_embed(self, face_arr_chw: np.ndarray) -> np.ndarray:
         """
-        Run AdaFace IR-50 on a preprocessed (3,112,112) float32 array.
-        Returns 512-D L2-normalised numpy embedding.
         """
         if self.adaface_model is None or face_arr_chw is None:
             return None
@@ -385,8 +368,11 @@ class AIModelManager:
             if self.device == "cuda":
                 t = t.half()
             with torch.no_grad():
-                emb = self.adaface_model(t)                  # (1,512)
-            return emb[0].float().cpu().numpy()
         except Exception as e:
             print(f"⚠️  AdaFace inference error: {e}")
             return None
@@ -461,15 +447,21 @@ class AIModelManager:
                 adaface_vec = self._adaface_embed(face_chw)
                 # ── Fuse: ArcFace + AdaFace → 1024-D ─────────────
                 if adaface_vec is not None:
                     fused_raw = np.concatenate([arcface_vec, adaface_vec])
-                    n2 = np.linalg.norm(fused_raw)
-                    final_vec = (fused_raw / n2) if n2 > 0 else fused_raw
-                    vec_dim   = FUSED_FACE_DIM
                 else:
-                    # AdaFace unavailable — fall back to ArcFace only
-                    final_vec = arcface_vec
-                    vec_dim   = FACE_DIM
                 # ── Face crop thumbnail for UI ─────────────────────
                 face_crop_b64 = _crop_to_b64(bgr, x1, y1, x2, y2)

     print("         pip install insightface onnxruntime          (linux/win)")
 # ── AdaFace ──────────────────────────────────────────────────────
+# AdaFace IR-50 MS1MV2 (CVPR 2022) — quality-adaptive margin loss
+# Repo   : minchul/cvlface_adaface_ir50_ms1mv2 (HuggingFace)
+# Loaded : AutoModel + trust_remote_code=True (custom_code repo)
+# Needs  : HF_TOKEN env var set in HF Space secrets
 try:
+    import shutil as _shutil
     from huggingface_hub import hf_hub_download
+    from transformers import AutoModel as _HF_AutoModel
     ADAFACE_WEIGHTS_AVAILABLE = True
 except ImportError:
     ADAFACE_WEIGHTS_AVAILABLE = False
+    print("⚠️  huggingface_hub / transformers not installed — AdaFace fusion disabled")
 # ── Constants ─────────────────────────────────────────────────────
 YOLO_PERSON_CLASS_ID  = 0
 FUSED_FACE_DIM        = 1024    # ArcFace + AdaFace concatenated
 # ════════════════════════════════════════════════════════════════
 #  Utility functions
 # ════════════════════════════════════════════════════════════════
         self._face_lock     = threading.Lock()
         self._cache         = {}
         self._cache_maxsize = 128
+        adaface_status = "FULL FUSION u2705" if self.adaface_model else "ZERO-PADDED u26a0ufe0f  (AdaFace weights missing)"
+        print("")
+        print("u2705 Enterprise Lens V4 u2014 Models Ready")
+        print(f"   Device            : {self.device.upper()}")
+        print(f"   InsightFace       : buffalo_l (SCRFD-10GF + ArcFace-R100)")
+        print(f"   AdaFace           : {adaface_status}")
+        print(f"   Face vector dim   : {FUSED_FACE_DIM}  <- enterprise-faces MUST be {FUSED_FACE_DIM}-D")
+        print(f"   Object vector dim : 1536  <- enterprise-objects MUST be 1536-D")
+        print(f"   Quality gate      : det_score >= {FACE_QUALITY_GATE}, face_px >= {MIN_FACE_SIZE}")
+        print(f"   Detection size    : {DET_SIZE_PRIMARY}")
+        print("")
     def _load_adaface(self):
+        """
+        Load AdaFace IR-50 MS1MV2 from HuggingFace.
+        Repo    : minchul/cvlface_adaface_ir50_ms1mv2
+        Method  : AutoModel + trust_remote_code (repo has custom_code)
+        Token   : HF_TOKEN env var (required for custom_code repos)
+        Output  : 512-D L2-normalised embedding per face crop
+        """
         if not ADAFACE_WEIGHTS_AVAILABLE:
+            print("⚠️  AdaFace skipped — huggingface_hub / transformers not installed")
             return
+        import os, sys
+        REPO_ID    = "minchul/cvlface_adaface_ir50_ms1mv2"
+        HF_TOKEN   = os.getenv("HF_TOKEN", None)
+        CACHE_PATH = os.path.expanduser("~/.cvlface_cache/minchul/cvlface_adaface_ir50_ms1mv2")
         try:
+            print("📦 Loading AdaFace IR-50 MS1MV2 from HuggingFace...")
+            if HF_TOKEN:
+                print("   HF_TOKEN found ✅")
+            else:
+                print("   ⚠️  HF_TOKEN not set — may fail on gated/custom_code repos")
+            # ── Step 1: Download all repo files ──────────────────
+            os.makedirs(CACHE_PATH, exist_ok=True)
+            # Download files.txt manifest first
+            files_txt = os.path.join(CACHE_PATH, "files.txt")
+            if not os.path.exists(files_txt):
+                hf_hub_download(
+                    repo_id=REPO_ID, filename="files.txt",
+                    token=HF_TOKEN, local_dir=CACHE_PATH,
+                    local_dir_use_symlinks=False,
+                )
+            # Read manifest and download each listed file
+            with open(files_txt, "r") as f:
+                extra_files = [x.strip() for x in f.read().split("\n") if x.strip()]
+            for fname in extra_files + ["config.json", "wrapper.py", "model.safetensors"]:
+                fpath = os.path.join(CACHE_PATH, fname)
+                if not os.path.exists(fpath):
+                    print(f"   Downloading {fname}...")
+                    hf_hub_download(
+                        repo_id=REPO_ID, filename=fname,
+                        token=HF_TOKEN, local_dir=CACHE_PATH,
+                        local_dir_use_symlinks=False,
+                    )
+            # ── Step 2: Load model from local cache ──────────────
+            # Must chdir + add to sys.path because the repo uses
+            # trust_remote_code with relative imports in wrapper.py
+            cwd = os.getcwd()
+            os.chdir(CACHE_PATH)
+            sys.path.insert(0, CACHE_PATH)
+            try:
+                model = _HF_AutoModel.from_pretrained(
+                    CACHE_PATH,
+                    trust_remote_code=True,
+                    token=HF_TOKEN,
+                )
+            finally:
+                os.chdir(cwd)
+                if CACHE_PATH in sys.path:
+                    sys.path.remove(CACHE_PATH)
             model = model.to(self.device).eval()
             if self.device == "cuda":
                 model = model.half()
+            # ── Step 3: Verify output shape ───────────────────────
+            with torch.no_grad():
+                dummy = torch.zeros(1, 3, 112, 112).to(self.device)
+                out   = model(dummy)
+            # Model may return tensor directly or an object with .embedding
+            out_vec = out if isinstance(out, torch.Tensor) else out.embedding
+            out_dim = out_vec.shape[-1]
+            if out_dim != ADAFACE_DIM:
+                raise ValueError(
+                    f"AdaFace output dim={out_dim}, expected {ADAFACE_DIM}")
             self.adaface_model = model
+            print(f"✅ AdaFace IR-50 MS1MV2 loaded — output dim={out_dim} — 1024-D fusion ACTIVE")
         except Exception as e:
+            print(f"⚠️  AdaFace load failed: {e}")
+            print(f"   Detail: {traceback.format_exc()[-500:]}")
+            print("   Falling back to ArcFace-only (zero-padded to 1024-D)")
             self.adaface_model = None
     # ── Object Lane: batched SigLIP + DINOv2 embedding ───────────
     # ── AdaFace embedding for a single face crop ─────────────────
     def _adaface_embed(self, face_arr_chw: np.ndarray) -> np.ndarray:
         """
+        Run AdaFace IR-50 MS1MV2 on a preprocessed (3,112,112) float32 array.
+        Input : CHW float32, normalised to [-1, 1]
+        Output: 512-D L2-normalised numpy embedding, or None on failure.
+        The cvlface model may return a tensor directly or an object
+        with an .embedding attribute — both cases handled.
         """
         if self.adaface_model is None or face_arr_chw is None:
             return None
             if self.device == "cuda":
                 t = t.half()
             with torch.no_grad():
+                out = self.adaface_model(t)
+            # Handle both raw tensor and object-with-embedding outputs
+            emb = out if isinstance(out, torch.Tensor) else out.embedding
+            emb = F.normalize(emb.float(), p=2, dim=1)
+            return emb[0].cpu().numpy()
         except Exception as e:
             print(f"⚠️  AdaFace inference error: {e}")
             return None
                 adaface_vec = self._adaface_embed(face_chw)
                 # ── Fuse: ArcFace + AdaFace → 1024-D ─────────────
+                # ALWAYS output FUSED_FACE_DIM (1024) so Pinecone index
+                # dimension never mismatches, regardless of AdaFace status.
                 if adaface_vec is not None:
+                    # Full fusion: ArcFace(512) + AdaFace(512) → 1024-D
                     fused_raw = np.concatenate([arcface_vec, adaface_vec])
                 else:
+                    # AdaFace unavailable — pad with zeros to maintain 1024-D
+                    # The ArcFace half still carries full identity signal;
+                    # zero padding is neutral and doesn't corrupt similarity.
+                    print("   ⚠️  AdaFace unavailable — padding to 1024-D")
+                    fused_raw = np.concatenate([arcface_vec,
+                                                np.zeros(ADAFACE_DIM, dtype=np.float32)])
+                n2 = np.linalg.norm(fused_raw)
+                final_vec = (fused_raw / n2) if n2 > 0 else fused_raw
+                vec_dim   = FUSED_FACE_DIM   # always 1024
                 # ── Face crop thumbnail for UI ─────────────────────
                 face_crop_b64 = _crop_to_b64(bgr, x1, y1, x2, y2)