Spaces:

AdarshDRC
/

visual-search-api

Running

App Files Files Community

AdarshDRC commited on 6 days ago

Commit

29bfc1f

0 Parent(s):

fix: Resolving backend

Browse files

Files changed (29) hide show

.gitattributes +2 -0
.gitignore +1 -0
.gitlab-ci.yml +14 -0
Dockerfile +103 -0
README.md +9 -0
main.py +80 -0
onnx_models/dinov2_int8.onnx +3 -0
onnx_models/siglip_vision_int8.onnx +3 -0
requirements.txt +61 -0
scripts/calibrate_threshold.py +158 -0
scripts/convert_to_onnx.py +114 -0
scripts/verify_and_cleanup_old_index.py +0 -0
src/api/danger.py +140 -0
src/api/explorer.py +180 -0
src/api/jobs.py +51 -0
src/api/people.py +186 -0
src/api/search.py +266 -0
src/api/system.py +75 -0
src/api/upload.py +327 -0
src/common/utils.py +95 -0
src/core/config.py +147 -0
src/core/logging.py +80 -0
src/core/security.py +24 -0
src/services/ai_manager.py +620 -0
src/services/cache.py +192 -0
src/services/clustering.py +365 -0
src/services/db_client.py +327 -0
src/services/jobs.py +286 -0
src/services/onnx_models.py +100 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ *.onnx filter=lfs diff=lfs merge=lfs -text
2	+ onnx_models/*.onnx filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ /myenv

.gitlab-ci.yml ADDED Viewed

	@@ -0,0 +1,14 @@

+stages:
+  - ping
+keep_hf_space_warm:
+  stage: ping
+  script:
+    - echo "Ping health endpoint"
+    - curl -sf --max-time 90 https://adarshdrc-visual-search-api.hf.space/api/health || echo "Ping failed (may be cold-starting)"
+    - echo "Waiting for warmup..."
+    - sleep 60
+    - echo "Second warmup ping"
+    - curl -sf --max-time 30 https://adarshdrc-visual-search-api.hf.space/api/health

Dockerfile ADDED Viewed

	@@ -0,0 +1,103 @@

+# Dockerfile — Enterprise Lens V3
+# InsightFace models download on first run (not at build time)
+# This avoids build timeout and network issues during Docker build
+FROM python:3.10-slim
+WORKDIR /app
+# ── System deps ──────────────────────────────────────────────────
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        libgl1 libglib2.0-0 libgomp1 git \
+        build-essential cmake g++ \
+        wget ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+# ── Step 1: Build tools (MUST be before insightface) ─────────────
+RUN pip install --no-cache-dir \
+        "numpy<2.0" \
+        "setuptools>=65" \
+        wheel \
+        cython \
+        scikit-build \
+        cmake
+# ── Step 2: onnxruntime (MUST be before insightface) ─────────────
+RUN pip install --no-cache-dir onnxruntime
+# ── Step 3: insightface ───────────────────────────────────────────
+RUN pip install --no-cache-dir --prefer-binary "insightface>=0.7.3"
+# ── Step 4: Remaining requirements ───────────────────────────────
+COPY requirements.txt .
+RUN pip install --no-cache-dir --prefer-binary -r requirements.txt
+# ── Copy app code ─────────────────────────────────────────────────
+COPY . .
+RUN mkdir -p temp_uploads saved_images && chmod -R 777 temp_uploads saved_images
+# ── Hugging Face Auth Token ──────────────────────────────────────
+# Define the argument so Docker accepts it during build
+ARG HF_TOKEN
+# Set it as an environment variable so Python/HuggingFace can see it
+ENV HF_TOKEN=$HF_TOKEN
+# ── Pre-download ONLY transformers + YOLO at build time ──────────
+# InsightFace models download on first startup (cached after that)
+RUN python - <<'EOF'
+import os
+os.environ["TRANSFORMERS_VERBOSITY"] = "error"
+print("Pre-downloading SigLIP...")
+from transformers import AutoProcessor, AutoModel
+AutoProcessor.from_pretrained("google/siglip-base-patch16-224", use_fast=True)
+AutoModel.from_pretrained("google/siglip-base-patch16-224")
+print("SigLIP done")
+print("Pre-downloading DINOv2...")
+from transformers import AutoImageProcessor
+AutoImageProcessor.from_pretrained("facebook/dinov2-base")
+AutoModel.from_pretrained("facebook/dinov2-base")
+print("DINOv2 done")
+print("Pre-downloading YOLO seg...")
+from ultralytics import YOLO
+YOLO("yolo11n-seg.pt")
+print("YOLO done")
+print("Build complete! InsightFace models download on first startup.")
+EOF
+EXPOSE 7860
+ENV WEB_CONCURRENCY=1
+CMD uvicorn main:app \
+        --host 0.0.0.0 \
+        --port 7860 \
+        # Add these environment variables to your existing Dockerfile.
+# They significantly improve CPU inference throughput on HF free tier.
+ENV OMP_NUM_THREADS=2
+ENV MKL_NUM_THREADS=2
+ENV OPENBLAS_NUM_THREADS=2
+ENV NUMEXPR_NUM_THREADS=2
+ENV TOKENIZERS_PARALLELISM=false
+# Tell ONNX Runtime to use CPU optimizations aggressively
+ENV ORT_DISABLE_ALL_OPTIMIZATIONS=0
+# COPY the pre-converted ONNX models into the image.
+# Run scripts/convert_to_onnx.py locally first, then commit onnx_models/
+# to your Space repo.
+COPY onnx_models/ /app/onnx_models/
+ENV ONNX_MODELS_DIR=/app/onnx_models
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md ADDED Viewed

	@@ -0,0 +1,9 @@

+---
+title: Visual Search Api
+emoji: ⚡
+colorFrom: blue
+colorTo: green
+sdk: docker
+pinned: false
+license: mit
+---

main.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import asyncio
+import os
+from contextlib import asynccontextmanager
+# CPU thread tuning — must happen BEFORE torch/onnxruntime import.
+# HF free tier = 2 vCPU; we want to use both but not oversubscribe.
+os.environ.setdefault("OMP_NUM_THREADS", "2")
+os.environ.setdefault("MKL_NUM_THREADS", "2")
+os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from src.core.config import MAX_CONCURRENT_INFERENCES, USE_ASYNC_UPLOADS
+from src.core.logging import log, init_logging_session, close_logging_session
+from src.api import danger, explorer, search, system, upload
+from src.api import people# Phase 3
+from src.api import jobs as jobs_api  # explicit alias
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    await init_logging_session()
+    log("INFO", "server.startup", message="Loading AI models...")
+    from src.services.ai_manager import AIModelManager
+    loop = asyncio.get_event_loop()
+    app.state.ai = await loop.run_in_executor(None, AIModelManager)
+    # Split semaphores: face detection and object embedding can overlap
+    # without fighting for the same CPU cores.
+    app.state.ai_semaphore = asyncio.Semaphore(MAX_CONCURRENT_INFERENCES)
+    app.state.face_semaphore = asyncio.Semaphore(MAX_CONCURRENT_INFERENCES)
+    app.state.object_semaphore = asyncio.Semaphore(MAX_CONCURRENT_INFERENCES)
+    # Phase 3: start background job worker if async uploads are enabled
+    worker_task = None
+    if USE_ASYNC_UPLOADS:
+        from src.services.jobs import run_worker
+        worker_task = asyncio.create_task(run_worker(app.state))
+        log("INFO", "server.worker_started", message="Async upload worker running")
+    log("INFO", "server.ready", message="All models loaded. API ready.")
+    yield
+    # Graceful shutdown
+    if worker_task:
+        worker_task.cancel()
+        try:
+            await worker_task
+        except asyncio.CancelledError:
+            pass
+    log("INFO", "server.shutdown", message="API shutting down.")
+    await close_logging_session()
+app = FastAPI(lifespan=lifespan)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["https://photofinderv2.vercel.app"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+os.makedirs("temp_uploads", exist_ok=True)
+# Existing routers
+app.include_router(system.router)
+app.include_router(upload.router)
+app.include_router(search.router)
+app.include_router(explorer.router)
+app.include_router(danger.router)
+# Phase 3 routers
+app.include_router(people.router)
+app.include_router(jobs_api.router)

onnx_models/dinov2_int8.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2293e7b07a81c14018f5904c5bdd1936928d477cdeb716ce5864ef5e8d50e7d9
+size 90566150

onnx_models/siglip_vision_int8.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d9fee3232e40845a34de9c6340584c360d1d572fc4942a194d366bac7c86f690
+size 99109472

requirements.txt ADDED Viewed

	@@ -0,0 +1,61 @@

+# requirements.txt — Enterprise Lens V5 (Phase 3)
+--extra-index-url https://download.pytorch.org/whl/cpu
+# ── Web framework ────────────────────────────────────────────────
+fastapi==0.115.6
+uvicorn[standard]==0.32.1
+python-multipart==0.0.20
+# ── PyTorch CPU-only ─────────────────────────────────────────────
+torch==2.3.1+cpu
+torchvision==0.18.1+cpu
+scikit-learn
+# ── HuggingFace stack ─────────────────────────────────────────────
+# Pin transformers to 4.x — AdaFace CVLFaceRecognitionModel (trust_remote_code)
+# uses _tied_weights_keys (old API). transformers 5.0+ renamed it to
+# all_tied_weights_keys → crash on load.
+transformers>=4.40.0,<5.0.0
+huggingface_hub>=0.26.0
+safetensors>=0.4.0
+tokenizers>=0.20.0
+sentencepiece
+accelerate>=1.1.0
+omegaconf
+# ── InsightFace — SCRFD + ArcFace-R100 ───────────────────────────
+insightface==0.7.3
+onnxruntime>=1.20.0
+huggingface-hub>=0.22.0
+# ── YOLO — object segmentation ───────────────────────────────────
+ultralytics==8.3.27
+# ── Computer vision ───────────────────────────────────────────────
+opencv-python-headless==4.10.0.84
+Pillow==11.0.0
+# Phase 3: pin numpy <2.0 — hdbscan 0.8.33 requires numpy <2.0
+numpy>=1.26.4,<2.0
+# ── Vector DB + CDN ──────────────────────────────────────────────
+pinecone==5.4.1
+cloudinary==1.41.0
+# ── Async HTTP ───────────────────────────────────────────────────
+aiohttp==3.11.9
+# ── Phase 3: Clustering + job queue ──────────────────────────────
+# hdbscan: HDBSCAN clustering for People View (face identity albums)
+hdbscan>=0.8.33
+# redis: Upstash Redis REST client uses aiohttp (already present).
+# No redis-py socket library needed — Upstash exposes a pure HTTP API.
+# redis package only needed if you switch to raw TCP Upstash endpoint.
+# redis>=5.0.0  ← uncomment only if switching to non-REST Upstash
+# ── Utilities ────────────────────────────────────────────────────
+loguru==0.7.2
+inflect==7.4.0
+python-dotenv==1.0.1
+fvcore

scripts/calibrate_threshold.py ADDED Viewed

	@@ -0,0 +1,158 @@

+"""
+Threshold calibration tool.
+Use this to find the FACE_MATCH_THRESHOLD that gives you the best
+precision/recall tradeoff for YOUR specific data. Default 0.28 is an
+industry-average — your data may differ.
+Usage:
+    1. Build two test sets:
+       - POSITIVE_PAIRS: pairs of (query_image, gallery_image) of the SAME person
+       - NEGATIVE_PAIRS: pairs of DIFFERENT people (hard negatives help most)
+    2. Populate TEST_PAIRS below with local image paths
+    3. Run: python scripts/calibrate_threshold.py
+Output: table of thresholds with TP/FP/FN/precision/recall/F1.
+"""
+import sys
+import os
+from pathlib import Path
+# Add project root to path so `src.*` imports work when running from scripts/
+sys.path.insert(0, str(Path(__file__).parent.parent))
+import numpy as np
+from PIL import Image
+# ── EDIT THESE ──────────────────────────────────────────────
+# Each tuple: (path_to_query_image, path_to_gallery_image, is_same_person)
+TEST_PAIRS = [
+    # Example positives (same person, different photos)
+    # ("test_data/alice_1.jpg", "test_data/alice_2.jpg", True),
+    # ("test_data/alice_1.jpg", "test_data/alice_3.jpg", True),
+    # ("test_data/bob_1.jpg", "test_data/bob_2.jpg", True),
+    # Example hard negatives (different people, similar looking)
+    # ("test_data/alice_1.jpg", "test_data/carol_1.jpg", False),
+    # ("test_data/bob_1.jpg", "test_data/dave_1.jpg", False),
+]
+# ────────────────────────────────────────────────────────────
+def cosine(a: np.ndarray, b: np.ndarray) -> float:
+    return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b) + 1e-9))
+def compute_pair_scores():
+    """Returns list of (fused_score, arcface_score, adaface_score, is_positive)."""
+    from src.services.ai_manager import AIModelManager
+    print("Loading models...")
+    ai = AIModelManager()
+    results = []
+    for query_path, gallery_path, is_positive in TEST_PAIRS:
+        if not (os.path.exists(query_path) and os.path.exists(gallery_path)):
+            print(f"  Skipping missing: {query_path} or {gallery_path}")
+            continue
+        with open(query_path, "rb") as f:
+            q_vectors = ai.process_image_bytes(f.read(), detect_faces=True)
+        with open(gallery_path, "rb") as f:
+            g_vectors = ai.process_image_bytes(f.read(), detect_faces=True)
+        q_faces = [v for v in q_vectors if v["type"] == "face"]
+        g_faces = [v for v in g_vectors if v["type"] == "face"]
+        if not q_faces or not g_faces:
+            print(f"  No face in: {query_path} or {gallery_path}")
+            continue
+        # Take largest face from each
+        qf = max(q_faces, key=lambda f: f.get("face_width_px", 0))
+        gf = max(g_faces, key=lambda f: f.get("face_width_px", 0))
+        arc_score = cosine(qf["arcface_vector"], gf["arcface_vector"])
+        if qf.get("has_adaface") and gf.get("has_adaface"):
+            ada_score = cosine(qf["adaface_vector"], gf["adaface_vector"])
+        else:
+            ada_score = 0.15
+        fused = 0.6 * arc_score + 0.4 * ada_score
+        results.append({
+            "query": query_path,
+            "gallery": gallery_path,
+            "is_positive": is_positive,
+            "arcface": arc_score,
+            "adaface": ada_score,
+            "fused": fused,
+        })
+        tag = "SAME" if is_positive else "DIFF"
+        print(f"  [{tag}] arc={arc_score:.3f} ada={ada_score:.3f} fused={fused:.3f}")
+    return results
+def evaluate_thresholds(results):
+    """Sweep thresholds and compute P/R/F1 for each."""
+    if not results:
+        print("\nNo results to evaluate. Add pairs to TEST_PAIRS above.")
+        return
+    print("\n" + "=" * 78)
+    print(f"{'arcface_thr':<14}{'fused_thr':<14}{'TP':>6}{'FP':>6}{'FN':>6}"
+          f"{'Precision':>12}{'Recall':>10}{'F1':>8}")
+    print("=" * 78)
+    n_positive = sum(1 for r in results if r["is_positive"])
+    best = {"f1": 0, "arc_thr": 0, "fused_thr": 0}
+    for arc_thr in [0.20, 0.24, 0.28, 0.32, 0.36, 0.40, 0.45]:
+        for fused_thr in [0.22, 0.26, 0.30, 0.34, 0.38]:
+            tp = fp = fn = 0
+            for r in results:
+                # A match passes both thresholds
+                predicted_match = (r["arcface"] >= arc_thr and r["fused"] >= fused_thr)
+                if r["is_positive"]:
+                    if predicted_match:
+                        tp += 1
+                    else:
+                        fn += 1
+                else:
+                    if predicted_match:
+                        fp += 1
+            precision = tp / (tp + fp) if (tp + fp) else 0
+            recall = tp / (tp + fn) if (tp + fn) else 0
+            f1 = 2 * precision * recall / (precision + recall) if (precision + recall) else 0
+            if f1 > best["f1"]:
+                best = {"f1": f1, "arc_thr": arc_thr, "fused_thr": fused_thr,
+                        "tp": tp, "fp": fp, "fn": fn,
+                        "precision": precision, "recall": recall}
+            print(f"{arc_thr:<14.2f}{fused_thr:<14.2f}{tp:>6}{fp:>6}{fn:>6}"
+                  f"{precision:>12.3f}{recall:>10.3f}{f1:>8.3f}")
+    print("=" * 78)
+    print(f"\nBest F1: {best['f1']:.3f}")
+    print(f"  FACE_MATCH_THRESHOLD = {best['arc_thr']}")
+    print(f"  FUSED_MATCH_THRESHOLD = {best['fused_thr']}")
+    print(f"  Precision = {best['precision']:.3f}, Recall = {best['recall']:.3f}")
+    print("\nUpdate these in your HF Space env vars.")
+if __name__ == "__main__":
+    if not TEST_PAIRS:
+        print("Edit scripts/calibrate_threshold.py and populate TEST_PAIRS with")
+        print("10-30 positive pairs and 10-30 hard-negative pairs, then re-run.")
+        print("\nTip: export ~50 face photos from your own gallery, hand-label")
+        print("the same-person pairs, and use those for calibration.")
+        sys.exit(1)
+    results = compute_pair_scores()
+    evaluate_thresholds(results)

scripts/convert_to_onnx.py ADDED Viewed

	@@ -0,0 +1,114 @@

+"""
+One-time ONNX conversion + dynamic INT8 quantization.
+Run locally:
+    python scripts/convert_to_onnx.py
+Produces:
+    onnx_models/siglip_vision_int8.onnx
+    onnx_models/dinov2_int8.onnx
+Fix: attn_implementation="eager" disables scaled_dot_product_attention,
+which the legacy PyTorch ONNX exporter cannot trace (TypeError on Sqrt/scale).
+"""
+import os
+import torch
+import torch.nn as nn
+from pathlib import Path
+from onnxruntime.quantization import quantize_dynamic, QuantType
+OUT_DIR = Path("onnx_models")
+OUT_DIR.mkdir(exist_ok=True)
+def export_siglip():
+    print("Exporting SigLIP vision encoder...")
+    from transformers import SiglipVisionModel
+    model = SiglipVisionModel.from_pretrained(
+        "google/siglip-base-patch16-224",
+        attn_implementation="eager",  # disables SDPA — required for ONNX export
+    ).eval()
+    class SigLIPWrapper(nn.Module):
+        def __init__(self, m):
+            super().__init__()
+            self.m = m
+        def forward(self, pixel_values):
+            return self.m(pixel_values=pixel_values).pooler_output
+    wrapper = SigLIPWrapper(model).eval()
+    dummy = torch.randn(1, 3, 224, 224)
+    with torch.no_grad():
+        test = wrapper(dummy)
+    print(f"  Forward pass OK — output shape: {test.shape}")
+    fp32_path = OUT_DIR / "siglip_vision.onnx"
+    with torch.no_grad():
+        torch.onnx.export(
+            wrapper, dummy, fp32_path,
+            input_names=["pixel_values"],
+            output_names=["image_embeds"],
+            dynamic_axes={"pixel_values": {0: "batch"}, "image_embeds": {0: "batch"}},
+            opset_version=14,
+            do_constant_folding=True,
+        )
+    print(f"  fp32 saved ({fp32_path.stat().st_size // 1024 // 1024} MB)")
+    int8_path = OUT_DIR / "siglip_vision_int8.onnx"
+    quantize_dynamic(str(fp32_path), str(int8_path), weight_type=QuantType.QInt8)
+    print(f"  INT8 saved ({int8_path.stat().st_size // 1024 // 1024} MB)")
+    os.remove(fp32_path)
+def export_dinov2():
+    print("\nExporting DINOv2...")
+    from transformers import AutoModel
+    model = AutoModel.from_pretrained(
+        "facebook/dinov2-base",
+        attn_implementation="eager",  # same fix
+    ).eval()
+    class DINOv2Wrapper(nn.Module):
+        def __init__(self, m):
+            super().__init__()
+            self.m = m
+        def forward(self, pixel_values):
+            return self.m(pixel_values=pixel_values).last_hidden_state[:, 0, :]
+    wrapper = DINOv2Wrapper(model).eval()
+    dummy = torch.randn(1, 3, 224, 224)
+    with torch.no_grad():
+        test = wrapper(dummy)
+    print(f"  Forward pass OK — output shape: {test.shape}")
+    fp32_path = OUT_DIR / "dinov2.onnx"
+    with torch.no_grad():
+        torch.onnx.export(
+            wrapper, dummy, fp32_path,
+            input_names=["pixel_values"],
+            output_names=["cls_features"],
+            dynamic_axes={"pixel_values": {0: "batch"}, "cls_features": {0: "batch"}},
+            opset_version=14,
+            do_constant_folding=True,
+        )
+    print(f"  fp32 saved ({fp32_path.stat().st_size // 1024 // 1024} MB)")
+    int8_path = OUT_DIR / "dinov2_int8.onnx"
+    quantize_dynamic(str(fp32_path), str(int8_path), weight_type=QuantType.QInt8)
+    print(f"  INT8 saved ({int8_path.stat().st_size // 1024 // 1024} MB)")
+    os.remove(fp32_path)
+if __name__ == "__main__":
+    print(f"PyTorch {torch.__version__}")
+    export_siglip()
+    export_dinov2()
+    print("\nDone. Commit onnx_models/*.onnx to your Space repo.")
+    for f in sorted(OUT_DIR.glob("*.onnx")):
+        print(f"  {f.name}  ({f.stat().st_size // 1024 // 1024} MB)")

scripts/verify_and_cleanup_old_index.py ADDED Viewed

File without changes

src/api/danger.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import asyncio
+import time
+from fastapi import APIRouter, Form, HTTPException, Request, Depends
+from src.core.config import (
+    DEFAULT_CLOUDINARY_URL, DEFAULT_PINECONE_KEY,
+    IDX_FACES, IDX_OBJECTS,
+    IDX_FACES_ARCFACE, IDX_FACES_ADAFACE,
+    USE_SPLIT_FACE_INDEXES,
+)
+from src.core.security import get_verified_keys
+from src.services.db_client import (
+    cld_delete_all_paginated, cld_remove_folder, cld_root_folders,
+    delete_and_recreate_indexes, pinecone_pool,
+)
+from src.core.logging import log, warn
+from src.common.utils import get_ip, is_default_key
+router = APIRouter()
+def _all_index_names() -> list[str]:
+    """All possible index names across both modes — used for exhaustive cleanup."""
+    if USE_SPLIT_FACE_INDEXES:
+        return [IDX_FACES, IDX_OBJECTS, IDX_FACES_ARCFACE, IDX_FACES_ADAFACE]
+    return [IDX_FACES, IDX_OBJECTS]
+@router.post("/api/reset-database")
+async def reset_database(
+    request: Request,
+    user_id: str = Form(""),
+    keys: dict = Depends(get_verified_keys),
+):
+    ip = get_ip(request)
+    start = time.perf_counter()
+    log("WARNING", "danger.reset_database.attempt",
+        user_id=user_id or "anonymous", ip=ip)
+    if is_default_key(keys["pinecone_key"], DEFAULT_PINECONE_KEY) or \
+       is_default_key(keys["cloudinary_url"], DEFAULT_CLOUDINARY_URL):
+        log("WARNING", "danger.reset_database.blocked",
+            user_id=user_id or "anonymous", ip=ip)
+        raise HTTPException(403, "Reset is not allowed on the shared demo database.")
+    try:
+        deleted = await asyncio.to_thread(
+            cld_delete_all_paginated, keys["cloudinary_creds"]
+        )
+        log("INFO", "danger.reset_database.cloudinary_wiped", deleted=deleted)
+    except Exception as e:
+        warn(f"Cloudinary wipe error: {e}")
+    try:
+        folders_res = await asyncio.to_thread(
+            cld_root_folders, keys["cloudinary_creds"]
+        )
+        folder_tasks = [
+            asyncio.to_thread(
+                cld_remove_folder, f["name"], keys["cloudinary_creds"]
+            )
+            for f in folders_res.get("folders", [])
+        ]
+        if folder_tasks:
+            await asyncio.gather(*folder_tasks, return_exceptions=True)
+    except Exception as e:
+        warn(f"Cloudinary folder cleanup error: {e}")
+    try:
+        pc = pinecone_pool.get(keys["pinecone_key"])
+        await asyncio.to_thread(delete_and_recreate_indexes, pc)
+    except Exception as e:
+        log("ERROR", "danger.reset_database.pinecone_error",
+            user_id=user_id or "anonymous", ip=ip, error=str(e))
+        raise HTTPException(500, f"Pinecone reset error: {e}")
+    log("WARNING", "danger.reset_database.complete",
+        user_id=user_id or "anonymous", ip=ip,
+        duration_ms=round((time.perf_counter() - start) * 1000))
+    return {"message": "Database reset complete. All data wiped and indexes recreated."}
+@router.post("/api/delete-account")
+async def delete_account(
+    request: Request,
+    user_id: str = Form(""),
+    keys: dict = Depends(get_verified_keys),
+):
+    ip = get_ip(request)
+    start = time.perf_counter()
+    log("WARNING", "danger.delete_account.attempt",
+        user_id=user_id or "anonymous", ip=ip)
+    if is_default_key(keys["pinecone_key"], DEFAULT_PINECONE_KEY) or \
+       is_default_key(keys["cloudinary_url"], DEFAULT_CLOUDINARY_URL):
+        log("WARNING", "danger.delete_account.blocked",
+            user_id=user_id or "anonymous", ip=ip)
+        raise HTTPException(403, "Account deletion is not allowed on the shared demo database.")
+    try:
+        deleted = await asyncio.to_thread(
+            cld_delete_all_paginated, keys["cloudinary_creds"]
+        )
+        log("INFO", "danger.delete_account.cloudinary_wiped", deleted=deleted)
+    except Exception as e:
+        warn(f"Account delete Cloudinary error: {e}")
+    try:
+        folders_res = await asyncio.to_thread(
+            cld_root_folders, keys["cloudinary_creds"]
+        )
+        folder_tasks = [
+            asyncio.to_thread(
+                cld_remove_folder, f["name"], keys["cloudinary_creds"]
+            )
+            for f in folders_res.get("folders", [])
+        ]
+        if folder_tasks:
+            await asyncio.gather(*folder_tasks, return_exceptions=True)
+    except Exception as e:
+        warn(f"Account delete folders error: {e}")
+    try:
+        pc = pinecone_pool.get(keys["pinecone_key"])
+        def _delete_all_indexes():
+            existing = {idx.name for idx in pc.list_indexes()}
+            for name in _all_index_names():
+                if name in existing:
+                    pc.delete_index(name)
+        await asyncio.to_thread(_delete_all_indexes)
+    except Exception as e:
+        warn(f"Account delete Pinecone error: {e}")
+    log("WARNING", "danger.delete_account.complete",
+        user_id=user_id or "anonymous", ip=ip,
+        duration_ms=round((time.perf_counter() - start) * 1000))
+    return {"message": "Account data deleted. Sign out initiated."}

src/api/explorer.py ADDED Viewed

	@@ -0,0 +1,180 @@

+import asyncio
+from fastapi import APIRouter, Form, HTTPException, Request, Depends
+from src.core.config import (
+    DEFAULT_PINECONE_KEY, IDX_FACES, IDX_OBJECTS,
+    IDX_FACES_ARCFACE, IDX_FACES_ADAFACE,
+    USE_SPLIT_FACE_INDEXES,
+)
+from src.core.security import get_verified_keys
+from src.services.db_client import (
+    cld_delete_folder_resources, cld_delete_resource, cld_list_folder_images,
+    cld_remove_folder, cld_root_folders, pinecone_pool,
+)
+from src.core.logging import log, warn
+from src.common.utils import cld_thumb_url, get_ip, url_to_public_id
+router = APIRouter()
+def _get_face_index_names() -> list[str]:
+    """Returns list of face index names to operate on based on current mode."""
+    if USE_SPLIT_FACE_INDEXES:
+        # Try both new and legacy — delete from both in case data exists in either
+        return [IDX_FACES_ARCFACE, IDX_FACES_ADAFACE, IDX_FACES]
+    return [IDX_FACES]
+@router.post("/api/categories")
+async def get_categories(
+    request: Request,
+    user_id: str = Form(""),
+    keys: dict = Depends(get_verified_keys),
+):
+    ip = get_ip(request)
+    try:
+        result = await asyncio.to_thread(cld_root_folders, keys["cloudinary_creds"])
+        categories = [f["name"] for f in result.get("folders", [])]
+        log("INFO", "categories.fetched",
+            user_id=user_id or "anonymous", ip=ip, count=len(categories))
+        return {"categories": categories}
+    except Exception as e:
+        log("ERROR", "categories.error",
+            user_id=user_id or "anonymous", ip=ip, error=str(e))
+        return {"categories": []}
+@router.post("/api/cloudinary/folder-images")
+async def list_folder_images(
+    request: Request,
+    folder_name: str = Form(...),
+    user_id: str = Form(""),
+    next_cursor: str = Form(""),
+    page_size: int = Form(100),
+    keys: dict = Depends(get_verified_keys),
+):
+    ip = get_ip(request)
+    result = await asyncio.to_thread(
+        cld_list_folder_images,
+        folder_name, keys["cloudinary_creds"], next_cursor or None, page_size,
+    )
+    images = [
+        {
+            "url": r["secure_url"],
+            "thumb_url": cld_thumb_url(r["secure_url"]),
+            "public_id": r["public_id"],
+        }
+        for r in result.get("resources", [])
+    ]
+    next_cur = result.get("next_cursor") or ""
+    log("INFO", "explorer.folder_opened",
+        user_id=user_id or "anonymous", ip=ip,
+        folder=folder_name, count=len(images), has_more=bool(next_cur))
+    return {"images": images, "count": len(images), "next_cursor": next_cur}
+@router.post("/api/delete-image")
+async def delete_image(
+    request: Request,
+    image_url: str = Form(""),
+    public_id: str = Form(""),
+    user_id: str = Form(""),
+    keys: dict = Depends(get_verified_keys),
+):
+    ip = get_ip(request)
+    pid = public_id or url_to_public_id(image_url)
+    if not pid:
+        raise HTTPException(400, "Could not determine public_id.")
+    # Delete from Cloudinary
+    await asyncio.to_thread(cld_delete_resource, pid, keys["cloudinary_creds"])
+    # Delete from ALL vector indexes (split + legacy + objects)
+    try:
+        pc = pinecone_pool.get(keys["pinecone_key"])
+        existing = {idx.name for idx in pc.list_indexes()}
+        all_indexes = [IDX_OBJECTS] + _get_face_index_names()
+        for idx_name in all_indexes:
+            if idx_name not in existing:
+                continue
+            try:
+                await asyncio.to_thread(
+                    pc.Index(idx_name).delete,
+                    filter={"url": {"$eq": image_url}},
+                )
+            except Exception as e:
+                warn(f"Pinecone delete warning on {idx_name}: {e}")
+    except Exception as e:
+        warn(f"Pinecone delete outer warning: {e}")
+    log("INFO", "explorer.image_deleted",
+        user_id=user_id or "anonymous", ip=ip,
+        image_url=image_url, public_id=pid)
+    return {"message": "Image deleted successfully."}
+@router.post("/api/delete-folder")
+async def delete_folder(
+    request: Request,
+    folder_name: str = Form(...),
+    user_id: str = Form(""),
+    keys: dict = Depends(get_verified_keys),
+):
+    ip = get_ip(request)
+    all_images, cursor = [], None
+    while True:
+        result = await asyncio.to_thread(
+            cld_list_folder_images, folder_name, keys["cloudinary_creds"], cursor
+        )
+        all_images.extend(result.get("resources", []))
+        cursor = result.get("next_cursor")
+        if not cursor:
+            break
+    await asyncio.to_thread(
+        cld_delete_folder_resources, folder_name, keys["cloudinary_creds"]
+    )
+    await asyncio.to_thread(
+        cld_remove_folder, folder_name, keys["cloudinary_creds"]
+    )
+    # Delete from ALL vector indexes
+    try:
+        pc = pinecone_pool.get(keys["pinecone_key"])
+        existing = {idx.name for idx in pc.list_indexes()}
+        all_indexes = [IDX_OBJECTS] + _get_face_index_names()
+        for idx_name in all_indexes:
+            if idx_name not in existing:
+                continue
+            idx = pc.Index(idx_name)
+            try:
+                # Try metadata filter first (fast)
+                await asyncio.to_thread(
+                    idx.delete, filter={"folder": {"$eq": folder_name}}
+                )
+            except Exception:
+                # Fallback: delete by URL one-by-one
+                for img in all_images:
+                    url = img.get("secure_url", "")
+                    if url:
+                        try:
+                            await asyncio.to_thread(
+                                idx.delete, filter={"url": {"$eq": url}}
+                            )
+                        except Exception:
+                            pass
+    except Exception as e:
+        warn(f"Pinecone folder delete warning: {e}")
+    log("INFO", "explorer.folder_deleted",
+        user_id=user_id or "anonymous", ip=ip,
+        folder=folder_name, deleted_count=len(all_images))
+    return {
+        "message": f"Folder '{folder_name}' and contents deleted.",
+        "deleted_count": len(all_images),
+    }

src/api/jobs.py ADDED Viewed

	@@ -0,0 +1,51 @@

+"""
+src/api/jobs.py — Phase 3: Async upload job status endpoints
+GET /api/jobs/{job_id}   → poll job status + progress
+This is the API router. The job queue worker lives in src/services/jobs.py.
+"""
+from fastapi import APIRouter, Depends, HTTPException, Request
+from src.core.security import get_verified_keys
+from src.core.logging import log
+from src.services.jobs import get_job_status
+from src.common.utils import get_ip
+router = APIRouter()
+@router.get("/api/jobs/{job_id}")
+async def poll_job(
+    job_id: str,
+    request: Request,
+    keys: dict = Depends(get_verified_keys),
+):
+    ip = get_ip(request)
+    job = await get_job_status(job_id)
+    if not job:
+        raise HTTPException(404, f"Job {job_id} not found")
+    total = job.get("total_files", 0)
+    processed = job.get("processed_files", 0)
+    pct = round(processed / total * 100) if total else 0
+    response = {
+        "job_id": job_id,
+        "status": job.get("status", "unknown"),
+        "total_files": total,
+        "processed_files": processed,
+        "progress_pct": pct,
+        "status_url": f"/api/jobs/{job_id}",
+    }
+    if job.get("status") == "completed":
+        response["result"] = job.get("result", {})
+    if job.get("status") == "failed":
+        response["error"] = job.get("error", "unknown error")
+    log("INFO", "jobs.poll", ip=ip, job_id=job_id, status=response["status"])
+    return response

src/api/people.py ADDED Viewed

	@@ -0,0 +1,186 @@

+"""
+src/api/people.py — Phase 3: People View endpoints
+GET  /api/people                → list all identity clusters
+GET  /api/people/{cluster_id}   → all images in that cluster
+PATCH /api/people/{cluster_id}  → rename a cluster
+POST /api/reindex-clusters      → trigger full re-cluster
+All endpoints require the standard pinecone/cloudinary auth headers
+(via get_verified_keys). user_id is derived from the Pinecone key hash
+so different users don't see each other's clusters even though they share
+the same Supabase table.
+"""
+import hashlib
+from fastapi import APIRouter, Body, Depends, HTTPException, Request
+from src.core.config import USE_CLUSTER_AWARE_SEARCH
+from src.core.security import get_verified_keys
+from src.core.logging import log
+from src.services.clustering import (
+    get_people,
+    get_person_images,
+    rename_cluster,
+    run_clustering,
+)
+from src.services.db_client import pinecone_pool, ensure_indexes
+from src.common.utils import get_ip
+import asyncio
+router = APIRouter()
+def _user_id_from_key(pinecone_key: str) -> str:
+    """
+    Derives a stable, opaque user_id from the Pinecone API key.
+    Users bring their own key, so this is the closest we have to an identity.
+    Short SHA256 prefix is enough for row isolation — not a security measure.
+    """
+    return hashlib.sha256(pinecone_key.encode()).hexdigest()[:16]
+@router.get("/api/people")
+async def list_people(
+    request: Request,
+    keys: dict = Depends(get_verified_keys),
+):
+    """
+    Returns all identity clusters for the authenticated user, ordered by
+    face_count descending (most-seen people first).
+    Response shape:
+    [
+      {
+        "cluster_id": "uuid",
+        "name": "Mom" | null,
+        "face_count": 42,
+        "representative_face_crop": "<base64 jpg>"
+      },
+      ...
+    ]
+    """
+    ip = get_ip(request)
+    user_id = _user_id_from_key(keys["pinecone_key"])
+    try:
+        people = await get_people(user_id)
+        log("INFO", "people.list", ip=ip, user_id=user_id, count=len(people))
+        return {"people": people, "total": len(people)}
+    except Exception as e:
+        log("ERROR", "people.list.error", ip=ip, user_id=user_id, error=str(e))
+        raise HTTPException(500, f"Failed to fetch people: {e}")
+@router.get("/api/people/{cluster_id}")
+async def get_cluster_images(
+    cluster_id: str,
+    request: Request,
+    keys: dict = Depends(get_verified_keys),
+):
+    """
+    Returns all images belonging to a specific identity cluster.
+    Response shape:
+    {
+      "cluster_id": "uuid",
+      "images": [
+        {"url": "...", "folder": "...", "face_crop": "<base64>"},
+        ...
+      ],
+      "total": 12
+    }
+    """
+    ip = get_ip(request)
+    user_id = _user_id_from_key(keys["pinecone_key"])
+    try:
+        images = await get_person_images(cluster_id, user_id)
+        log("INFO", "people.cluster_images",
+            ip=ip, user_id=user_id, cluster_id=cluster_id, count=len(images))
+        return {
+            "cluster_id": cluster_id,
+            "images": images,
+            "total": len(images),
+        }
+    except Exception as e:
+        log("ERROR", "people.cluster_images.error",
+            ip=ip, user_id=user_id, cluster_id=cluster_id, error=str(e))
+        raise HTTPException(500, f"Failed to fetch cluster images: {e}")
+@router.patch("/api/people/{cluster_id}")
+async def update_cluster_name(
+    cluster_id: str,
+    request: Request,
+    name: str = Body(..., embed=True),
+    keys: dict = Depends(get_verified_keys),
+):
+    """
+    Assigns a human-readable name to a cluster.
+    Request body (JSON): {"name": "Mom"}
+    Response:            {"cluster_id": "uuid", "name": "Mom", "ok": true}
+    """
+    ip = get_ip(request)
+    user_id = _user_id_from_key(keys["pinecone_key"])
+    if not name or len(name.strip()) == 0:
+        raise HTTPException(400, "name must be a non-empty string")
+    if len(name) > 100:
+        raise HTTPException(400, "name must be 100 characters or fewer")
+    try:
+        await rename_cluster(cluster_id, name.strip(), user_id)
+        log("INFO", "people.rename",
+            ip=ip, user_id=user_id, cluster_id=cluster_id, name=name)
+        return {"cluster_id": cluster_id, "name": name.strip(), "ok": True}
+    except Exception as e:
+        log("ERROR", "people.rename.error",
+            ip=ip, user_id=user_id, cluster_id=cluster_id, error=str(e))
+        raise HTTPException(500, f"Failed to rename cluster: {e}")
+@router.post("/api/reindex-clusters")
+async def reindex_clusters(
+    request: Request,
+    keys: dict = Depends(get_verified_keys),
+):
+    """
+    Triggers a full HDBSCAN re-cluster of the user's face vectors.
+    This is a synchronous (blocking) endpoint — clustering typically takes
+    5-30 seconds depending on library size. For large libraries, consider
+    running this in a background task (Phase 4).
+    Response:
+    {
+      "status": "ok",
+      "total_vectors": 3200,
+      "clusters_found": 14,
+      "noise_vectors": 80
+    }
+    """
+    ip = get_ip(request)
+    user_id = _user_id_from_key(keys["pinecone_key"])
+    log("INFO", "people.reindex_start", ip=ip, user_id=user_id)
+    try:
+        pc = pinecone_pool.get(keys["pinecone_key"])
+        # Ensure indexes exist before fetching vectors
+        await asyncio.to_thread(ensure_indexes, pc)
+        result = await run_clustering(pc, user_id)
+        log("INFO", "people.reindex_done", ip=ip, user_id=user_id, **result)
+        return result
+    except RuntimeError as e:
+        # e.g. hdbscan not installed
+        raise HTTPException(503, str(e))
+    except Exception as e:
+        log("ERROR", "people.reindex_error", ip=ip, user_id=user_id, error=str(e))
+        raise HTTPException(500, f"Clustering failed: {e}")

src/api/search.py ADDED Viewed

	@@ -0,0 +1,266 @@

+import asyncio
+import hashlib
+import time
+import traceback
+from fastapi import APIRouter, File, Form, HTTPException, Request, UploadFile, Depends
+from src.core.config import (
+    DEFAULT_PINECONE_KEY, IDX_FACES, IDX_OBJECTS,
+    IDX_FACES_ARCFACE, IDX_FACES_ADAFACE,
+    USE_SPLIT_FACE_INDEXES, USE_CLUSTER_AWARE_SEARCH,
+)
+from src.core.security import get_verified_keys
+from src.services.db_client import (
+    merge_face_results, merge_object_results,
+    pinecone_pool, search_faces, search_faces_split, search_objects,
+    ensure_indexes,
+)
+from src.core.logging import log
+from src.common.utils import face_ui_score, get_ip, is_default_key, to_list
+router = APIRouter()
+@router.post("/api/search")
+async def search_database(
+    request: Request,
+    file: UploadFile = File(...),
+    detect_faces: bool = Form(True),
+    user_id: str = Form(""),
+    keys: dict = Depends(get_verified_keys),
+):
+    ip = get_ip(request)
+    start = time.perf_counter()
+    mode = "guest" if is_default_key(keys["pinecone_key"], DEFAULT_PINECONE_KEY) else "personal"
+    log("INFO", "search.start",
+        user_id=user_id or "anonymous", ip=ip, mode=mode,
+        filename=file.filename, detect_faces=detect_faces)
+    try:
+        file_bytes = await file.read()
+        ai_manager = request.app.state.ai
+        sem = request.app.state.ai_semaphore
+        # Run query inference
+        async with sem:
+            vectors = await ai_manager.process_image_bytes_async(
+                file_bytes, detect_faces=detect_faces
+            )
+        inference_ms = round((time.perf_counter() - start) * 1000)
+        face_vectors = [v for v in vectors if v["type"] == "face"]
+        object_vectors = [v for v in vectors if v["type"] == "object"]
+        log("INFO", "search.inference_done",
+            user_id=user_id or "anonymous", ip=ip, mode=mode,
+            face_vecs=len(face_vectors), obj_vecs=len(object_vectors),
+            inference_ms=inference_ms)
+        pc = pinecone_pool.get(keys["pinecone_key"])
+        # Stable opaque user identity derived from the Pinecone key — matches
+        # what clustering.py writes to Supabase so cluster lookups work.
+        cluster_uid = hashlib.sha256(keys["pinecone_key"].encode()).hexdigest()[:16]
+        # Auto-create indexes if missing. Self-heals the case where user
+        # hasn't triggered verify-keys yet.
+        try:
+            created = await asyncio.to_thread(ensure_indexes, pc)
+            if created:
+                log("INFO", "search.indexes_auto_created",
+                    user_id=user_id or "anonymous", ip=ip, created=created)
+                await asyncio.sleep(8)
+        except Exception as e:
+            log("ERROR", "search.ensure_indexes_failed",
+                user_id=user_id or "anonymous", ip=ip, error=str(e))
+        idx_obj = pc.Index(IDX_OBJECTS)
+        if USE_SPLIT_FACE_INDEXES:
+            idx_arcface = pc.Index(IDX_FACES_ARCFACE)
+            idx_adaface = pc.Index(IDX_FACES_ADAFACE)
+            idx_face_legacy = None
+        else:
+            idx_face_legacy = pc.Index(IDX_FACES)
+            idx_arcface = None
+            idx_adaface = None
+        if detect_faces and face_vectors:
+            return await _run_face_search(
+                face_vectors, object_vectors,
+                idx_arcface, idx_adaface, idx_face_legacy, idx_obj,
+                start, user_id, ip, mode,
+                pc=pc, cluster_uid=cluster_uid,
+            )
+        return await _run_object_search(
+            object_vectors, idx_obj, start, user_id, ip, mode
+        )
+    except HTTPException:
+        raise
+    except Exception as e:
+        log("ERROR", "search.error",
+            user_id=user_id or "anonymous", ip=ip, mode=mode,
+            error=str(e), traceback=traceback.format_exc()[-800:])
+        raise HTTPException(500, str(e))
+async def _query_face_split(fv, idx_arcface, idx_adaface, pc=None, cluster_uid=None):
+    """Parallel query to ArcFace + AdaFace indexes, then fuse.
+    When USE_CLUSTER_AWARE_SEARCH is on, expands results to include every
+    image in the matched person clusters for near-100% recall."""
+    arcface_vec = to_list(fv["arcface_vector"])
+    adaface_vec = to_list(fv.get("adaface_vector")) if fv.get("has_adaface") else None
+    try:
+        image_map = await asyncio.to_thread(
+            search_faces_split,
+            idx_arcface, idx_adaface,
+            arcface_vec, adaface_vec,
+        )
+    except Exception as e:
+        if "404" in str(e):
+            raise HTTPException(
+                404,
+                "Face indexes not found. Go to Settings → Verify & Save to create them."
+            )
+        raise
+    # Only expand clusters for high-confidence matches (fused_score >= 0.50).
+    # A borderline match at 0.40 could be a different person; cluster expansion
+    # would then pull in an entire wrong identity — exactly what we want to avoid.
+    CLUSTER_EXPAND_MIN_SCORE = 0.50
+    high_confidence = {
+        url: d for url, d in image_map.items()
+        if d.get("fused_score", 0.0) >= CLUSTER_EXPAND_MIN_SCORE
+    }
+    if USE_CLUSTER_AWARE_SEARCH and high_confidence and pc is not None and cluster_uid:
+        from src.services.clustering import search_cluster_aware
+        image_map = await search_cluster_aware(pc, high_confidence, cluster_uid)
+    return _format_face_group(fv, image_map, scoring="fused")
+async def _query_face_legacy(fv, idx_face):
+    """Legacy single-index query for pre-Phase-2 data."""
+    vec = to_list(fv["vector"])
+    det_score = fv.get("det_score", 1.0)
+    try:
+        image_map = await asyncio.to_thread(search_faces, idx_face, vec, det_score)
+    except Exception as e:
+        if "404" in str(e):
+            raise HTTPException(404, "Pinecone index not found.")
+        raise
+    return _format_face_group(fv, image_map, scoring="legacy")
+def _format_face_group(fv, image_map, scoring: str):
+    """Shape the response the same way regardless of scoring backend."""
+    matches = []
+    for url, d in image_map.items():
+        if scoring == "fused":
+            display_score = face_ui_score(d["fused_score"], mode="fused")
+            raw_score = round(d["fused_score"], 4)
+        else:
+            display_score = face_ui_score(d["raw_score"], mode="legacy")
+            raw_score = round(d["raw_score"], 4)
+        matches.append({
+            "url": url,
+            "score": display_score,
+            "raw_score": raw_score,
+            "arcface_score": round(d.get("arcface_score", 0), 4),
+            "adaface_score": round(d.get("adaface_score", 0), 4),
+            "face_crop": d["face_crop"],
+            "folder": d["folder"],
+            "caption": "👤 Verified Identity",
+        })
+    matches.sort(key=lambda x: x["score"], reverse=True)
+    return {
+        "query_face_idx": fv.get("face_idx", 0),
+        "query_face_crop": fv.get("face_crop", ""),
+        "query_bbox": fv.get("bbox", []),
+        "det_score": fv.get("det_score", 1.0),
+        "face_width_px": fv.get("face_width_px", 0),
+        "matches": matches,
+    }
+async def _run_face_search(
+    face_vectors, object_vectors,
+    idx_arcface, idx_adaface, idx_face_legacy, idx_obj,
+    start, user_id, ip, mode,
+    pc=None, cluster_uid=None,
+) -> dict:
+    # Build face query tasks
+    if USE_SPLIT_FACE_INDEXES:
+        face_tasks = [
+            _query_face_split(fv, idx_arcface, idx_adaface, pc=pc, cluster_uid=cluster_uid)
+            for fv in face_vectors
+        ]
+    else:
+        face_tasks = [_query_face_legacy(fv, idx_face_legacy) for fv in face_vectors]
+    # Object queries run in parallel with face queries
+    async def _query_obj_single(ov):
+        vec = to_list(ov["vector"])
+        try:
+            return await asyncio.to_thread(search_objects, idx_obj, vec)
+        except Exception as e:
+            if "404" in str(e):
+                raise HTTPException(404, "Pinecone index not found.")
+            raise
+    obj_tasks = [_query_obj_single(ov) for ov in object_vectors]
+    all_results = await asyncio.gather(*face_tasks, *obj_tasks)
+    raw_groups = list(all_results[:len(face_tasks)])
+    obj_nested = list(all_results[len(face_tasks):])
+    merged_face = merge_face_results(raw_groups)
+    merged_objects = merge_object_results(obj_nested)
+    face_groups = [g for g in raw_groups if g.get("matches")]
+    duration_ms = round((time.perf_counter() - start) * 1000)
+    log("INFO", "search.complete",
+        user_id=user_id or "anonymous", ip=ip, mode=mode,
+        lanes=["face", "object"],
+        face_groups=len(face_groups),
+        face_results=len(merged_face),
+        object_results=len(merged_objects),
+        duration_ms=duration_ms,
+        index_mode="split" if USE_SPLIT_FACE_INDEXES else "legacy")
+    return {
+        "mode": "face",
+        "face_groups": face_groups,
+        "results": merged_face,
+        "object_results": merged_objects,
+    }
+async def _run_object_search(object_vectors, idx_obj, start, user_id, ip, mode) -> dict:
+    if not object_vectors:
+        return {"mode": "object", "results": [], "face_groups": []}
+    async def _query_obj(ov):
+        vec = to_list(ov["vector"])
+        try:
+            return await asyncio.to_thread(search_objects, idx_obj, vec)
+        except Exception as e:
+            if "404" in str(e):
+                raise HTTPException(404, "Pinecone index not found.")
+            raise
+    nested = await asyncio.gather(*[_query_obj(ov) for ov in object_vectors])
+    final = merge_object_results(nested)
+    duration_ms = round((time.perf_counter() - start) * 1000)
+    log("INFO", "search.complete",
+        user_id=user_id or "anonymous", ip=ip, mode=mode,
+        lanes=["object"], results=len(final), duration_ms=duration_ms)
+    return {"mode": "object", "results": final, "face_groups": []}

src/api/system.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import asyncio
+import time
+from datetime import datetime, timezone
+from fastapi import APIRouter, Form, HTTPException, Request, Depends
+from src.core.config import DEFAULT_PINECONE_KEY
+from src.core.security import get_verified_keys
+from src.services.db_client import cld_ping, ensure_indexes, pinecone_pool
+from src.core.logging import log
+from src.common.utils import get_ip, is_default_key
+router = APIRouter()
+@router.get("/")
+async def root():
+    return {"status": "ok"}
+@router.get("/api/health")
+async def health():
+    return {"status": "ok", "timestamp": datetime.now(timezone.utc).isoformat()}
+@router.post("/api/log")
+async def frontend_log(
+    request:  Request,
+    event:    str = Form(...),
+    user_id:  str = Form(""),
+    page:     str = Form(""),
+    metadata: str = Form("{}"),
+):
+    import json
+    ip = get_ip(request)
+    try:
+        meta = json.loads(metadata) if metadata else {}
+    except Exception:
+        meta = {}
+    log(
+        "INFO", f"frontend.{event}",
+        user_id=user_id or "anonymous",
+        page=page, ip=ip,
+        ua=request.headers.get("User-Agent", "")[:120],
+        **meta,
+    )
+    return {"ok": True}
+@router.post("/api/verify-keys")
+async def verify_keys(
+    request: Request,
+    user_id: str = Form(""),
+    keys: dict = Depends(get_verified_keys)
+):
+    ip    = get_ip(request)
+    mode  = "guest" if is_default_key(keys["pinecone_key"], DEFAULT_PINECONE_KEY) else "personal"
+    start = time.perf_counter()
+    log("INFO", "settings.verify_keys.start", user_id=user_id or "anonymous", mode=mode, ip=ip)
+    try:
+        await asyncio.to_thread(cld_ping, keys["cloudinary_creds"])
+    except Exception as e:
+        log("ERROR", "settings.verify_keys.cloudinary_fail", user_id=user_id or "anonymous", ip=ip, error=str(e))
+        raise HTTPException(400, "Invalid Cloudinary Environment URL.")
+    indexes_created: list[str] = []
+    try:
+        pc = pinecone_pool.get(keys["pinecone_key"])
+        indexes_created = await asyncio.to_thread(ensure_indexes, pc)
+    except Exception as e:
+        err = str(e)
+        clean = "Invalid Pinecone API Key." if "401" in err or "unauthorized" in err.lower() else f"Pinecone Error: {err}"
+        log("ERROR", "settings.verify_keys.pinecone_fail", user_id=user_id or "anonymous", ip=ip, error=clean)
+        raise HTTPException(400, clean)
+    log("INFO", "settings.verify_keys.success", user_id=user_id or "anonymous", mode=mode, ip=ip,
+        indexes_created=indexes_created, duration_ms=round((time.perf_counter() - start) * 1000))
+    return {"message": "Keys verified and indexes ready!"}

src/api/upload.py ADDED Viewed

	@@ -0,0 +1,327 @@

+import asyncio
+import io
+import time
+import uuid
+from typing import List
+from fastapi import APIRouter, File, Form, HTTPException, Query, Request, UploadFile, Depends
+from src.core.config import (
+    IDX_FACES, IDX_OBJECTS,
+    IDX_FACES_ARCFACE, IDX_FACES_ADAFACE,
+    MAX_FILES_PER_UPLOAD, USE_SPLIT_FACE_INDEXES,
+    USE_ASYNC_UPLOADS, CLUSTER_AUTO_TRIGGER_EVERY,
+)
+from src.core.security import get_verified_keys
+from src.services.db_client import cld_upload, pinecone_pool, ensure_indexes
+from src.core.logging import log
+from src.common.utils import get_ip, standardize_category_name, to_list
+router = APIRouter()
+def chunker(seq, size):
+    return (seq[pos:pos + size] for pos in range(0, len(seq), size))
+# ──────────────────────────────────────────────────────────────
+# Per-file processor — Cloudinary upload + AI inference only.
+# Vectors are RETURNED, not upserted here. Caller batches all
+# files' vectors into single Pinecone upserts (same as Phase 2).
+# ──────────────────────────────────────────────────────────────
+async def _process_one_file(
+    *,
+    file_bytes: bytes,
+    folder: str,
+    detect_faces: bool,
+    keys: dict,
+    ai,
+    sem,
+) -> tuple[str, str, list]:
+    """Returns (file_id, image_url, vectors). Mirrors Phase 2 signature."""
+    file_id = uuid.uuid4().hex
+    async def _run_ai():
+        async with sem:
+            return await ai.process_image_bytes_async(file_bytes, detect_faces=detect_faces)
+    cld_task = asyncio.to_thread(
+        cld_upload, io.BytesIO(file_bytes), folder, keys["cloudinary_creds"]
+    )
+    ai_task = _run_ai()
+    cld_res, vectors = await asyncio.gather(cld_task, ai_task)
+    return file_id, cld_res["secure_url"], vectors
+# ──────────────────────────────────────────────────────────────
+# Shared batch-upsert logic — used by sync upload AND job worker
+# ──────────────────────────────────────────────────────────────
+async def _batch_upsert_all(
+    *, results: list, folder: str, pc,
+) -> dict:
+    """
+    Takes [(file_id, url, vectors), ...] from all files, groups them by
+    target index, and upserts in one batch per index (single Pinecone
+    call per index, not per-file).
+    """
+    arcface_upserts = []
+    adaface_upserts = []
+    legacy_face_upserts = []
+    object_upserts = []
+    uploaded_urls = []
+    for file_id, image_url, vectors in results:
+        uploaded_urls.append(image_url)
+        for i, v in enumerate(vectors):
+            vector_id = f"{file_id}_{i}"
+            if v["type"] == "face":
+                meta_common = {
+                    "url": image_url,
+                    "folder": folder,
+                    "face_crop": v.get("face_crop", ""),
+                    "det_score": float(v.get("det_score", 1.0)),
+                    "face_width_px": int(v.get("face_width_px", 0)),
+                    "blur_score": float(v.get("blur_score", 100.0)),
+                }
+                if USE_SPLIT_FACE_INDEXES:
+                    arcface_upserts.append({
+                        "id": vector_id,
+                        "values": to_list(v["arcface_vector"]),
+                        "metadata": meta_common,
+                    })
+                    if v.get("has_adaface"):
+                        adaface_upserts.append({
+                            "id": vector_id,
+                            "values": to_list(v["adaface_vector"]),
+                            "metadata": meta_common,
+                        })
+                else:
+                    legacy_face_upserts.append({
+                        "id": vector_id,
+                        "values": to_list(v["vector"]),
+                        "metadata": meta_common,
+                    })
+            else:
+                object_upserts.append({
+                    "id": vector_id,
+                    "values": to_list(v["vector"]),
+                    "metadata": {"url": image_url, "folder": folder},
+                })
+    idx_obj = pc.Index(IDX_OBJECTS)
+    if USE_SPLIT_FACE_INDEXES:
+        idx_arcface = pc.Index(IDX_FACES_ARCFACE)
+        idx_adaface = pc.Index(IDX_FACES_ADAFACE)
+    else:
+        idx_face_legacy = pc.Index(IDX_FACES)
+    def batched_upsert(index, vectors):
+        for batch in chunker(vectors, 200):
+            index.upsert(vectors=batch)
+    db_tasks = []
+    if USE_SPLIT_FACE_INDEXES:
+        if arcface_upserts:
+            db_tasks.append(asyncio.to_thread(batched_upsert, idx_arcface, arcface_upserts))
+        if adaface_upserts:
+            db_tasks.append(asyncio.to_thread(batched_upsert, idx_adaface, adaface_upserts))
+    else:
+        if legacy_face_upserts:
+            db_tasks.append(asyncio.to_thread(batched_upsert, idx_face_legacy, legacy_face_upserts))
+    if object_upserts:
+        db_tasks.append(asyncio.to_thread(batched_upsert, idx_obj, object_upserts))
+    if db_tasks:
+        await asyncio.gather(*db_tasks)
+    return {
+        "uploaded_urls": uploaded_urls,
+        "arcface_vecs": len(arcface_upserts),
+        "adaface_vecs": len(adaface_upserts),
+        "legacy_face_vecs": len(legacy_face_upserts),
+        "object_vecs": len(object_upserts),
+    }
+# ──────────────────────────────────────────────────────────────
+# Upload endpoint
+# ──────────────────────────────────────────────────────────────
+@router.post("/api/upload")
+async def upload_images(
+    request: Request,
+    files: List[UploadFile] = File(...),
+    folder_name: str = Form(...),
+    detect_faces: bool = Form(True),
+    user_id: str = Form(""),
+    async_mode: bool = Query(False, alias="async"),
+    keys: dict = Depends(get_verified_keys),
+):
+    ip = get_ip(request)
+    start = time.perf_counter()
+    if len(files) > MAX_FILES_PER_UPLOAD:
+        raise HTTPException(400, f"Too many files. Max {MAX_FILES_PER_UPLOAD} per request.")
+    folder = standardize_category_name(folder_name)
+    pc = pinecone_pool.get(keys["pinecone_key"])
+    # Auto-create indexes if missing. Idempotent.
+    try:
+        created = await asyncio.to_thread(ensure_indexes, pc)
+        if created:
+            log("INFO", "upload.indexes_auto_created",
+                user_id=user_id or "anonymous", ip=ip, created=created)
+            await asyncio.sleep(8)
+    except Exception as e:
+        log("ERROR", "upload.ensure_indexes_failed",
+            user_id=user_id or "anonymous", ip=ip, error=str(e))
+        raise HTTPException(500, f"Failed to initialize indexes: {e}")
+    # ── Async mode: enqueue job, return immediately ──────────────
+    if async_mode and USE_ASYNC_UPLOADS:
+        from src.services.jobs import create_job
+        files_data = []
+        for f in files:
+            b = await f.read()
+            files_data.append({"bytes": list(b), "filename": f.filename})
+        job_payload = {
+            "files_data": files_data,
+            "folder": folder,
+            "detect_faces": detect_faces,
+            "user_id": user_id or "anonymous",
+            "keys": {
+                "pinecone_key": keys["pinecone_key"],
+                "cloudinary_creds": keys["cloudinary_creds"],
+            },
+        }
+        job_id = await create_job(
+            user_id=user_id or "anonymous",
+            folder=folder,
+            total_files=len(files),
+            job_payload=job_payload,
+        )
+        log("INFO", "upload.async_enqueued",
+            user_id=user_id or "anonymous", ip=ip,
+            job_id=job_id, files=len(files), folder=folder)
+        return {
+            "message": "Upload queued",
+            "job_id": job_id,
+            "status_url": f"/api/jobs/{job_id}",
+            "total_files": len(files),
+        }
+    # ── Synchronous mode (default, matches original Phase 2 perf) ─
+    ai = request.app.state.ai
+    sem = request.app.state.ai_semaphore
+    # Read all files in parallel first, THEN fan out to _process_one_file.
+    # Doing `await f.read()` inside the list-comp would serialize reads.
+    file_bytes_list = await asyncio.gather(*[f.read() for f in files])
+    results = await asyncio.gather(*[
+        _process_one_file(
+            file_bytes=fb,
+            folder=folder,
+            detect_faces=detect_faces,
+            keys=keys,
+            ai=ai,
+            sem=sem,
+        )
+        for fb in file_bytes_list
+    ])
+    summary = await _batch_upsert_all(results=results, folder=folder, pc=pc)
+    duration_ms = round((time.perf_counter() - start) * 1000)
+    log(
+        "INFO", "upload.complete",
+        user_id=user_id or "anonymous", ip=ip,
+        files=len(files), folder=folder, duration_ms=duration_ms,
+        mode="split" if USE_SPLIT_FACE_INDEXES else "legacy",
+        arcface_vecs=summary["arcface_vecs"],
+        adaface_vecs=summary["adaface_vecs"],
+        legacy_face_vecs=summary["legacy_face_vecs"],
+        object_vecs=summary["object_vecs"],
+    )
+    # Log this sync upload to upload_jobs so the table isn't empty.
+    # Sync uploads bypass the job queue entirely; this fire-and-forget task
+    # writes a completed row for visibility without changing the upload flow.
+    asyncio.create_task(
+        _log_sync_upload(user_id=user_id or "anonymous", folder=folder, summary=summary)
+    )
+    # Auto-trigger clustering if threshold crossed (fire and forget)
+    if CLUSTER_AUTO_TRIGGER_EVERY > 0 and summary["arcface_vecs"] > 0:
+        asyncio.create_task(
+            _maybe_trigger_clustering(pc, user_id, keys["pinecone_key"])
+        )
+    return {
+        "message": "Done!",
+        "urls": summary["uploaded_urls"],
+        "summary": {
+            "files": len(files),
+            "face_vectors": summary["arcface_vecs"] or summary["legacy_face_vecs"],
+            "adaface_vectors": summary["adaface_vecs"],
+            "object_vectors": summary["object_vecs"],
+            "index_mode": "split" if USE_SPLIT_FACE_INDEXES else "legacy",
+        },
+    }
+async def _log_sync_upload(user_id: str, folder: str, summary: dict) -> None:
+    """Write a completed row to upload_jobs for sync upload visibility.
+    Sync uploads skip the job queue; without this the table stays empty and
+    makes it impossible to audit what was indexed."""
+    import json
+    from src.services.jobs import _supa_insert
+    row = {
+        "job_id": uuid.uuid4().hex,
+        "user_id": user_id,
+        "folder": folder,
+        "status": "completed",
+        "total_files": len(summary["uploaded_urls"]),
+        "processed_files": len(summary["uploaded_urls"]),
+        "result": json.dumps({
+            "face_vectors": summary["arcface_vecs"] or summary["legacy_face_vecs"],
+            "adaface_vectors": summary["adaface_vecs"],
+            "object_vectors": summary["object_vecs"],
+        }),
+    }
+    try:
+        await _supa_insert("upload_jobs", row)
+    except Exception:
+        pass  # Supabase not configured — silently skip, don't crash the upload
+async def _maybe_trigger_clustering(pc, user_id: str, pinecone_key: str) -> None:
+    """Background auto-cluster trigger when CLUSTER_AUTO_TRIGGER_EVERY crossed."""
+    try:
+        from src.services.cache import cache
+        from src.services.clustering import run_clustering
+        import hashlib
+        uid = hashlib.sha256(pinecone_key.encode()).hexdigest()[:16]
+        counter_key = f"upload_count:{uid}"
+        count = await cache.incr(counter_key)
+        if count >= CLUSTER_AUTO_TRIGGER_EVERY:
+            await cache.delete(counter_key)
+            log("INFO", "upload.auto_cluster_triggered",
+                user_id=user_id or "anonymous", trigger_count=count)
+            await run_clustering(pc, uid)
+    except Exception as e:
+        log("ERROR", "upload.auto_cluster_error", error=str(e))
+# ──────────────────────────────────────────────────────────────
+# Exported for jobs.py worker — same batched upsert path
+# ──────────────────────────────────────────────────────────────
+__all__ = ["upload_images", "_process_one_file", "_batch_upsert_all"]

src/common/utils.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import re
+import math
+from fastapi import Request
+def get_ip(request: Request) -> str:
+    forwarded = request.headers.get("X-Forwarded-For")
+    if forwarded:
+        return forwarded.split(",")[0].strip()
+    return request.client.host if request.client else "unknown"
+def is_default_key(key: str, default: str) -> bool:
+    if not key or not default:
+        return False
+    return key.strip() == default.strip()
+def get_cloudinary_creds(url: str) -> dict:
+    if not url or not url.startswith("cloudinary://"):
+        return {}
+    try:
+        creds = url.replace("cloudinary://", "")
+        auth, cloud_name = creds.split("@")
+        api_key, api_secret = auth.split(":")
+        return {
+            "cloud_name": cloud_name,
+            "api_key": api_key,
+            "api_secret": api_secret,
+        }
+    except ValueError:
+        return {}
+def sanitize_filename(filename: str) -> str:
+    if not filename:
+        return "unnamed_file"
+    return re.sub(r'[^a-zA-Z0-9_\-\.]', '_', filename)
+def standardize_category_name(name: str) -> str:
+    if not name:
+        return "uncategorized"
+    return re.sub(r'[^a-zA-Z0-9_\-]', '_', name.lower())
+def to_list(vector) -> list[float]:
+    if vector is None:
+        return []
+    try:
+        return [float(x) for x in vector]
+    except TypeError:
+        return []
+def url_to_public_id(url: str) -> str:
+    if not url:
+        return ""
+    try:
+        parts = url.split("/upload/")
+        if len(parts) > 1:
+            path = parts[1].split("/", 1)[-1]
+            return path.rsplit(".", 1)[0]
+        return ""
+    except Exception:
+        return ""
+def cld_thumb_url(url: str) -> str:
+    if not url:
+        return ""
+    return url.replace("/upload/", "/upload/c_limit,w_500/")
+def face_ui_score(raw_score: float, mode: str = "fused") -> float:
+    """
+    Platt-scaled probability score for the UI.
+    Different calibration depending on which backend produced the raw score.
+    mode="fused"  — new split-index fused score (0.6*arcface + 0.4*adaface)
+                   Decision boundary at ~0.30, steep drop-off for imposters.
+    mode="legacy" — old 1024-d concatenated vector cosine
+                   Decision boundary at 0.50 (original calibration).
+    The sigmoid maps raw cosine → probability of match for the UI.
+    """
+    if mode == "fused":
+        threshold = 0.30   # Balanced boundary for fused scores
+        k = 20.0           # Steep drop-off
+    else:
+        threshold = 0.50
+        k = 18.0
+    probability = 1 / (1 + math.exp(-k * (raw_score - threshold)))
+    return min(1.0, max(0.0, round(probability, 4)))

src/core/config.py ADDED Viewed

	@@ -0,0 +1,147 @@

+import os
+from dotenv import load_dotenv
+load_dotenv()
+# ──────────────────────────────────────────────────────────────
+# Credentials & core
+# ──────────────────────────────────────────────────────────────
+DEFAULT_PINECONE_KEY = os.getenv("DEFAULT_PINECONE_KEY", "")
+DEFAULT_CLOUDINARY_URL = os.getenv("DEFAULT_CLOUDINARY_URL", "")
+# Legacy index (kept for read-only backward compat during Phase 2 rollout)
+IDX_FACES = os.getenv("IDX_FACES", "enterprise-faces")
+IDX_OBJECTS = os.getenv("IDX_OBJECTS", "enterprise-objects")
+# Phase 2: split face indexes (512-d each)
+IDX_FACES_ARCFACE = os.getenv("IDX_FACES_ARCFACE", "faces-arcface")
+IDX_FACES_ADAFACE = os.getenv("IDX_FACES_ADAFACE", "faces-adaface")
+# ──────────────────────────────────────────────────────────────
+# Concurrency / limits
+# ──────────────────────────────────────────────────────────────
+MAX_CONCURRENT_INFERENCES = int(os.getenv("MAX_CONCURRENT_INFERENCES", "2"))
+MAX_FILES_PER_UPLOAD = int(os.getenv("MAX_FILES_PER_UPLOAD", "50"))
+INFERENCE_CACHE_SIZE = int(os.getenv("INFERENCE_CACHE_SIZE", "128"))
+# ──────────────────────────────────────────────────────────────
+# Logging
+# ──────────────────────────────────────────────────────────────
+SUPABASE_URL = os.getenv("SUPABASE_URL", "")
+SUPABASE_SERVICE_KEY = os.getenv("SUPABASE_SERVICE_KEY", "")
+# ──────────────────────────────────────────────────────────────
+# Image / detection
+# ──────────────────────────────────────────────────────────────
+MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "1024"))
+MAX_CROPS = int(os.getenv("MAX_CROPS", "10"))
+YOLO_PERSON_CLASS_ID = 0
+YOLO_MIN_CROP_PX = int(os.getenv("YOLO_MIN_CROP_PX", "50"))
+YOLO_CONF_THRESHOLD = float(os.getenv("YOLO_CONF_THRESHOLD", "0.25"))
+DET_SIZE_PRIMARY = (640, 640)
+DET_SCALES = [(1280, 1280), (960, 960), (640, 640)]
+IOU_DEDUP_THRESHOLD = float(os.getenv("IOU_DEDUP_THRESHOLD", "0.4"))
+MIN_FACE_SIZE = int(os.getenv("MIN_FACE_SIZE", "30"))
+MAX_FACES_PER_IMAGE = int(os.getenv("MAX_FACES_PER_IMAGE", "20"))
+# Phase 2: relaxed from 0.5 → 0.3 to index more faces (filter at query time)
+FACE_QUALITY_GATE = float(os.getenv("FACE_QUALITY_GATE", "0.3"))
+# Laplacian variance blur threshold for face crops.
+# Faces below this score are excluded from search results.
+# Typical values: >100 = sharp, 50-100 = acceptable, <50 = blurry.
+FACE_BLUR_THRESHOLD = float(os.getenv("FACE_BLUR_THRESHOLD", "50.0"))
+# ──────────────────────────────────────────────────────────────
+# Embedding dimensions
+# ──────────────────────────────────────────────────────────────
+FACE_DIM = 512
+ADAFACE_DIM = 512
+FUSED_FACE_DIM = 1024  # old concatenated — kept for legacy index reads
+FACE_CROP_THUMB_SIZE = int(os.getenv("FACE_CROP_THUMB_SIZE", "112"))
+FACE_CROP_QUALITY = int(os.getenv("FACE_CROP_QUALITY", "85"))
+FACE_CROP_PADDING = float(os.getenv("FACE_CROP_PADDING", "0.2"))
+ADAFACE_CROP_PADDING = float(os.getenv("ADAFACE_CROP_PADDING", "0.1"))
+ENABLE_ADAFACE = int(os.getenv("ENABLE_ADAFACE", "1"))
+HF_TOKEN = os.getenv("HF_TOKEN", "")
+# ──────────────────────────────────────────────────────────────
+# Phase 1: Speed flags (unchanged, leaving on)
+# ──────────────────────────────────────────────────────────────
+USE_ONNX_VISION = int(os.getenv("USE_ONNX_VISION", "0"))
+ONNX_MODELS_DIR = os.getenv("ONNX_MODELS_DIR", "onnx_models")
+ONNX_USE_INT8 = int(os.getenv("ONNX_USE_INT8", "1"))
+ENABLE_MULTI_SCALE_FALLBACK = int(os.getenv("ENABLE_MULTI_SCALE_FALLBACK", "1"))
+ENABLE_HORIZONTAL_FLIP = int(os.getenv("ENABLE_HORIZONTAL_FLIP", "0"))
+OMP_NUM_THREADS = int(os.getenv("OMP_NUM_THREADS", "2"))
+MKL_NUM_THREADS = int(os.getenv("MKL_NUM_THREADS", "2"))
+# ──────────────────────────────────────────────────────────────
+# Phase 2: Recall flags — DEFAULT ON
+# ──────────────────────────────────────────────────────────────
+# Split-index mode: write ArcFace + AdaFace to separate indexes, score-fuse at query
+USE_SPLIT_FACE_INDEXES = int(os.getenv("USE_SPLIT_FACE_INDEXES", "1"))
+# Score fusion weights. ArcFace is more discriminative for generic faces;
+# AdaFace helps with low-quality/extreme-angle cases. 0.6/0.4 is NIST-FRVT standard.
+ARCFACE_WEIGHT = float(os.getenv("ARCFACE_WEIGHT", "0.6"))
+ADAFACE_WEIGHT = float(os.getenv("ADAFACE_WEIGHT", "0.4"))
+# ArcFace-R100 same-person mean ~0.55, std ~0.12.
+# 0.30 is a balanced arc floor: catches side-profile/distance shots while
+# staying above the impostor tail (different-person mean ~0.05, std ~0.08).
+FACE_MATCH_THRESHOLD = float(os.getenv("FACE_MATCH_THRESHOLD", "0.30"))
+# With both models agreeing, 0.33 fused ≈ arc 0.30 + ada 0.38 together.
+# Slightly raised above the arc floor because fusion adds confidence.
+FUSED_MATCH_THRESHOLD = float(os.getenv("FUSED_MATCH_THRESHOLD", "0.33"))
+# ArcFace-only floor (no AdaFace confirmation available).
+# Stricter than FACE_MATCH_THRESHOLD since there is no second model to cross-check.
+ARCFACE_SOLO_THRESHOLD = float(os.getenv("ARCFACE_SOLO_THRESHOLD", "0.38"))
+# Query-time augmentation: OFF by default, enabled via deep_search form flag
+ENABLE_QUERY_TIME_AUG = int(os.getenv("ENABLE_QUERY_TIME_AUG", "0"))
+# Larger top_k: was 50, now 500 so large galleries aren't truncated
+FACE_SEARCH_TOP_K = int(os.getenv("FACE_SEARCH_TOP_K", "500"))
+OBJECT_SEARCH_TOP_K = int(os.getenv("OBJECT_SEARCH_TOP_K", "100"))
+# Final API returns at most this many per-face matches (after dedup)
+FACE_RESULTS_PER_QUERY_CAP = int(os.getenv("FACE_RESULTS_PER_QUERY_CAP", "200"))
+# ──────────────────────────────────────────────────────────────
+# Phase 3: People View + Job Queue — DEFAULT OFF (opt-in via env)
+# ──────────────────────────────────────────────────────────────
+# Redis-backed inference cache + job queue (requires Upstash)
+# Set UPSTASH_REDIS_URL + UPSTASH_REDIS_TOKEN in HF Space secrets.
+UPSTASH_REDIS_URL = os.getenv("UPSTASH_REDIS_URL", "")
+UPSTASH_REDIS_TOKEN = os.getenv("UPSTASH_REDIS_TOKEN", "")
+# Master toggle: enable the persistent Redis cache (replaces in-memory dict).
+# Falls back to in-memory if UPSTASH_REDIS_URL is not set, so this is safe to
+# leave True even before Upstash is wired up.
+USE_REDIS_CACHE = int(os.getenv("USE_REDIS_CACHE", "0"))
+# Async upload mode: when True, POST /api/upload?async=true returns a job_id
+# immediately and processes in the background worker.
+# Synchronous uploads (no ?async param) always work regardless of this flag.
+USE_ASYNC_UPLOADS = int(os.getenv("USE_ASYNC_UPLOADS", "1"))
+# Cluster-aware search expansion: after the initial face search, expand results
+# to include ALL images in the matched identity clusters.
+# Near-100% recall for well-indexed people. Disable if Supabase is slow.
+USE_CLUSTER_AWARE_SEARCH = int(os.getenv("USE_CLUSTER_AWARE_SEARCH", "1"))
+# HDBSCAN parameters — tuned for typical 1k–10k image libraries
+CLUSTER_MIN_SAMPLES = int(os.getenv("CLUSTER_MIN_SAMPLES", "3"))
+CLUSTER_MIN_CLUSTER_SIZE = int(os.getenv("CLUSTER_MIN_CLUSTER_SIZE", "3"))
+CLUSTER_EPSILON = float(os.getenv("CLUSTER_EPSILON", "0.35"))
+# Auto re-cluster after every N new face uploads (0 = disabled, manual only)
+CLUSTER_AUTO_TRIGGER_EVERY = int(os.getenv("CLUSTER_AUTO_TRIGGER_EVERY", "0"))

src/core/logging.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import asyncio
+import json
+from src.core.config import SUPABASE_URL, SUPABASE_SERVICE_KEY
+_http_session = None
+async def init_logging_session():
+    global _http_session
+    if SUPABASE_URL and SUPABASE_SERVICE_KEY:
+        import aiohttp
+        _http_session = aiohttp.ClientSession(
+            headers={
+                "Content-Type":  "application/json",
+                "apikey":        SUPABASE_SERVICE_KEY,
+                "Authorization": f"Bearer {SUPABASE_SERVICE_KEY}",
+                "Prefer":        "return=minimal",
+            }
+        )
+async def close_logging_session():
+    global _http_session
+    if _http_session:
+        await _http_session.close()
+try:
+    from loguru import logger as _loguru
+    _loguru.remove()
+    _loguru.add(
+        lambda msg: print(msg, end=""),
+        format="<green>{time:HH:mm:ss}</green> | <level>{level:<8}</level> | {message}",
+        level="DEBUG",
+        colorize=True,
+    )
+    _log_fn = _loguru.log
+except ImportError:
+    import logging as _logging
+    _logging.basicConfig(level=_logging.INFO)
+    _stdlib = _logging.getLogger("vsl")
+    def _log_fn(level: str, msg: str):
+        _stdlib.log(getattr(_logging, level, 20), msg)
+async def _supabase_log(level: str, event: str, data: dict) -> None:
+    if not _http_session:
+        return
+    try:
+        import aiohttp
+        row = {
+            "level":       level.upper(),
+            "event":       event,
+            "user_id":     str(data.get("user_id", "anonymous")),
+            "ip":          str(data.get("ip", "")),
+            "mode":        str(data.get("mode", "")),
+            "page":        str(data.get("page", "")),
+            "duration_ms": int(data["duration_ms"]) if "duration_ms" in data else None,
+            "error":       str(data["error"])       if "error"       in data else None,
+            "data":        data,
+        }
+        async with _http_session.post(
+            f"{SUPABASE_URL}/rest/v1/app_logs",
+            json=row,
+            timeout=aiohttp.ClientTimeout(total=5),
+        ) as r:
+            if r.status not in (200, 201):
+                body = await r.text()
+                _log_fn("WARNING", f"Supabase log failed {r.status}: {body[:200]}")
+    except Exception as exc:
+        _log_fn("DEBUG", f"Supabase log push skipped: {exc}")
+def log(level: str, event: str, **data) -> None:
+    _log_fn(level.upper(), f"[{event}] {json.dumps(data, default=str)}")
+    try:
+        loop = asyncio.get_running_loop()
+        loop.create_task(_supabase_log(level, event, data))
+    except RuntimeError:
+        pass
+def warn(msg: str) -> None:
+    _log_fn("WARNING", msg)

src/core/security.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from fastapi import Form, HTTPException
+from src.core.config import DEFAULT_PINECONE_KEY, DEFAULT_CLOUDINARY_URL
+from src.common.utils import get_cloudinary_creds
+def get_verified_keys(
+    user_pinecone_key: str = Form(""),
+    user_cloudinary_url: str = Form("")
+) -> dict:
+    """Dependency to extract and validate keys."""
+    actual_pc_key = user_pinecone_key or DEFAULT_PINECONE_KEY
+    actual_cld_url = user_cloudinary_url or DEFAULT_CLOUDINARY_URL
+    creds = get_cloudinary_creds(actual_cld_url)
+    if not creds.get("cloud_name"):
+        raise HTTPException(400, "Invalid Cloudinary URL.")
+    if not actual_pc_key:
+        raise HTTPException(400, "Pinecone key is missing.")
+    return {
+        "pinecone_key": actual_pc_key,
+        "cloudinary_url": actual_cld_url,
+        "cloudinary_creds": creds
+    }

src/services/ai_manager.py ADDED Viewed

	@@ -0,0 +1,620 @@

+import asyncio
+import base64
+import concurrent.futures
+import functools
+import io
+import os
+import threading
+import hashlib
+import warnings
+# InsightFace uses np.linalg.lstsq without rcond — suppress the FutureWarning.
+warnings.filterwarnings("ignore", category=FutureWarning, module="insightface")
+# Suppress PyTorch meta-tensor copy warnings from AdaFace model loading.
+warnings.filterwarnings("ignore", category=UserWarning, module="torch.nn.modules.module")
+import cv2
+import numpy as np
+import torch
+import torch.nn.functional as F
+from PIL import Image, ImageOps
+from transformers import AutoImageProcessor, AutoModel, AutoProcessor
+from ultralytics import YOLO
+import insightface  # noqa: F401
+from insightface.app import FaceAnalysis
+from src.core.config import (
+    MAX_IMAGE_SIZE, MAX_CROPS, YOLO_PERSON_CLASS_ID,
+    YOLO_MIN_CROP_PX, YOLO_CONF_THRESHOLD,
+    DET_SIZE_PRIMARY, IOU_DEDUP_THRESHOLD,
+    MIN_FACE_SIZE, MAX_FACES_PER_IMAGE, FACE_QUALITY_GATE,
+    FACE_DIM, ADAFACE_DIM,
+    FACE_CROP_THUMB_SIZE, FACE_CROP_QUALITY,
+    FACE_CROP_PADDING, ADAFACE_CROP_PADDING,
+    INFERENCE_CACHE_SIZE, ENABLE_ADAFACE, HF_TOKEN,
+    USE_ONNX_VISION, ONNX_MODELS_DIR, ONNX_USE_INT8,
+    ENABLE_MULTI_SCALE_FALLBACK, ENABLE_HORIZONTAL_FLIP,
+    USE_SPLIT_FACE_INDEXES, FACE_BLUR_THRESHOLD,
+)
+# ── ArcFace 5-point reference landmarks (fixed template) ──────────────────────
+# Precomputed — eliminates np.linalg.lstsq call per face (10x faster alignment)
+_ARCFACE_SRC = np.array([
+    [38.2946, 51.6963],
+    [73.5318, 51.5014],
+    [56.0252, 71.7366],
+    [41.5493, 92.3655],
+    [70.7299, 92.2041],
+], dtype=np.float32)
+def _estimate_norm_fast(lmk: np.ndarray, image_size: int = 112) -> np.ndarray:
+    """
+    Fast affine estimation using cv2.estimateAffinePartial2D instead of
+    np.linalg.lstsq. ~10x faster on CPU. Returns 2x3 affine matrix.
+    """
+    assert lmk.shape == (5, 2), f"Expected (5,2) landmarks, got {lmk.shape}"
+    src = _ARCFACE_SRC * (image_size / 112.0)
+    tform, _ = cv2.estimateAffinePartial2D(
+        lmk, src, method=cv2.LSQR_EXACT, ransacReprojThreshold=100
+    )
+    if tform is None:
+        # Fallback: identity crop — better than crashing
+        tform = np.array([[1, 0, 0], [0, 1, 0]], dtype=np.float32)
+    return tform
+def _align_face_fast(bgr: np.ndarray, kps: np.ndarray, size: int = 112) -> np.ndarray:
+    """Align face crop using fast affine transform (replaces InsightFace's lstsq path)."""
+    M = _estimate_norm_fast(kps, size)
+    aligned = cv2.warpAffine(bgr, M, (size, size), flags=cv2.INTER_LINEAR)
+    return aligned
+def _resize_pil(img: Image.Image, max_side: int = MAX_IMAGE_SIZE) -> Image.Image:
+    w, h = img.size
+    if max(w, h) <= max_side:
+        return img
+    scale = max_side / max(w, h)
+    return img.resize((int(w * scale), int(h * scale)), Image.LANCZOS)
+def _blur_score(bgr: np.ndarray, x1: int, y1: int, x2: int, y2: int) -> float:
+    """Laplacian variance sharpness metric on a face crop. Higher = sharper."""
+    crop = bgr[y1:y2, x1:x2]
+    if crop.size == 0:
+        return 0.0
+    gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
+    gray = cv2.resize(gray, (64, 64))
+    return float(cv2.Laplacian(gray, cv2.CV_64F).var())
+def _crop_to_b64(img_bgr: np.ndarray, x1: int, y1: int, x2: int, y2: int) -> str:
+    H, W = img_bgr.shape[:2]
+    w, h = x2 - x1, y2 - y1
+    pad_x = int(w * FACE_CROP_PADDING)
+    pad_y = int(h * FACE_CROP_PADDING)
+    cx1, cy1 = max(0, x1 - pad_x), max(0, y1 - pad_y)
+    cx2, cy2 = min(W, x2 + pad_x), min(H, y2 + pad_y)
+    crop = img_bgr[cy1:cy2, cx1:cx2]
+    if crop.size == 0:
+        return ""
+    pil = Image.fromarray(crop[:, :, ::-1]).resize(
+        (FACE_CROP_THUMB_SIZE, FACE_CROP_THUMB_SIZE), Image.LANCZOS
+    )
+    buf = io.BytesIO()
+    pil.save(buf, format="JPEG", quality=FACE_CROP_QUALITY)
+    return base64.b64encode(buf.getvalue()).decode()
+def _face_crop_for_adaface(
+    img_bgr: np.ndarray, x1: int, y1: int, x2: int, y2: int
+) -> np.ndarray | None:
+    H, W = img_bgr.shape[:2]
+    w, h = x2 - x1, y2 - y1
+    pad_x = int(w * ADAFACE_CROP_PADDING)
+    pad_y = int(h * ADAFACE_CROP_PADDING)
+    cx1, cy1 = max(0, x1 - pad_x), max(0, y1 - pad_y)
+    cx2, cy2 = min(W, x2 + pad_x), min(H, y2 + pad_y)
+    crop = img_bgr[cy1:cy2, cx1:cx2]
+    if crop.size == 0:
+        return None
+    rgb = crop[:, :, ::-1].copy()
+    pil = Image.fromarray(rgb).resize((112, 112), Image.LANCZOS)
+    arr = np.array(pil, dtype=np.float32) / 255.0
+    arr = (arr - 0.5) / 0.5
+    return arr.transpose(2, 0, 1)
+def _clahe_enhance(bgr: np.ndarray) -> np.ndarray:
+    lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB)
+    l_ch, a_ch, b_ch = cv2.split(lab)
+    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
+    l_eq = clahe.apply(l_ch)
+    return cv2.cvtColor(cv2.merge([l_eq, a_ch, b_ch]), cv2.COLOR_LAB2BGR)
+def _iou(box_a: list, box_b: list) -> float:
+    xa, ya = max(box_a[0], box_b[0]), max(box_a[1], box_b[1])
+    xb, yb = min(box_a[2], box_b[2]), min(box_a[3], box_b[3])
+    inter = max(0, xb - xa) * max(0, yb - ya)
+    if inter == 0:
+        return 0.0
+    area_a = (box_a[2] - box_a[0]) * (box_a[3] - box_a[1])
+    area_b = (box_b[2] - box_b[0]) * (box_b[3] - box_b[1])
+    return inter / (area_a + area_b - inter)
+def _dedup_faces(faces_list: list, iou_thresh: float = IOU_DEDUP_THRESHOLD) -> list:
+    if not faces_list:
+        return []
+    faces_list = sorted(faces_list, key=lambda f: float(f.det_score), reverse=True)
+    kept = []
+    for face in faces_list:
+        b = face.bbox.astype(int)
+        box = [b[0], b[1], b[2], b[3]]
+        if not any(
+            _iou(box, [k.bbox.astype(int)[i] for i in range(4)]) > iou_thresh
+            for k in kept
+        ):
+            kept.append(face)
+    return kept
+# ── Face crop embedding cache (LRU by crop hash) ──────────────────────────────
+# Avoids recomputing ArcFace embeddings for the same face across multiple images
+# (e.g. same person appears in 20 photos — only 1 inference call needed)
+_FACE_EMBED_CACHE: dict[str, np.ndarray] = {}
+_FACE_EMBED_CACHE_MAX = 512
+_FACE_EMBED_CACHE_LOCK = threading.Lock()
+def _face_cache_get(key: str) -> np.ndarray | None:
+    with _FACE_EMBED_CACHE_LOCK:
+        return _FACE_EMBED_CACHE.get(key)
+def _face_cache_set(key: str, vec: np.ndarray) -> None:
+    with _FACE_EMBED_CACHE_LOCK:
+        if len(_FACE_EMBED_CACHE) >= _FACE_EMBED_CACHE_MAX:
+            # Evict oldest entry
+            oldest = next(iter(_FACE_EMBED_CACHE))
+            del _FACE_EMBED_CACHE[oldest]
+        _FACE_EMBED_CACHE[key] = vec
+def _crop_hash(crop_bgr: np.ndarray) -> str:
+    """Fast hash of face crop pixels for cache lookup."""
+    return hashlib.md5(crop_bgr.tobytes()).hexdigest()
+class AIModelManager:
+    def __init__(self):
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        # Vision stack
+        self.onnx_vision = None
+        if USE_ONNX_VISION:
+            try:
+                from src.services.onnx_models import ONNXVisionStack
+                self.onnx_vision = ONNXVisionStack(
+                    ONNX_MODELS_DIR, use_int8=bool(ONNX_USE_INT8)
+                )
+                print(f"[AIModelManager] ONNX vision loaded (INT8={ONNX_USE_INT8})")
+            except Exception as e:
+                print(f"[AIModelManager] ONNX failed ({e}), using PyTorch fallback")
+                self.onnx_vision = None
+        if self.onnx_vision is None:
+            self.siglip_processor = AutoProcessor.from_pretrained(
+                "google/siglip-base-patch16-224", use_fast=True
+            )
+            self.siglip_model = AutoModel.from_pretrained(
+                "google/siglip-base-patch16-224"
+            ).to(self.device).eval()
+            self.dinov2_processor = AutoImageProcessor.from_pretrained(
+                "facebook/dinov2-base", use_fast=True
+            )
+            self.dinov2_model = AutoModel.from_pretrained(
+                "facebook/dinov2-base"
+            ).to(self.device).eval()
+            if self.device == "cuda":
+                self.siglip_model = self.siglip_model.half()
+                self.dinov2_model = self.dinov2_model.half()
+        # YOLO
+        self.yolo = YOLO("yolo11n-seg.pt")
+        # Face detection + ArcFace
+        self.face_app = FaceAnalysis(
+            name="buffalo_l",
+            providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
+            if self.device == "cuda" else ["CPUExecutionProvider"],
+        )
+        self.face_app.prepare(
+            ctx_id=0 if self.device == "cuda" else -1, det_size=DET_SIZE_PRIMARY
+        )
+        self.face_app.get(np.zeros((112, 112, 3), dtype=np.uint8))
+        # AdaFace
+        self.adaface_model = None
+        self._load_adaface()
+        self._face_lock = threading.Lock()
+        self._cache_lock = threading.Lock()
+        self._cache: dict[str, list] = {}
+        # Thread pool for parallel ArcFace + AdaFace inference
+        # 2 workers = one per model, matches 2 vCPU on HF free tier
+        self._embed_pool = concurrent.futures.ThreadPoolExecutor(
+            max_workers=2, thread_name_prefix="embed"
+        )
+    def _load_adaface(self) -> None:
+        if not ENABLE_ADAFACE:
+            return
+        import sys
+        REPO_ID = "minchul/cvlface_adaface_ir50_ms1mv2"
+        CACHE_PATH = os.path.expanduser(
+            "~/.cvlface_cache/minchul/cvlface_adaface_ir50_ms1mv2"
+        )
+        try:
+            from huggingface_hub import hf_hub_download
+            from transformers import AutoModel as _HFAutoModel
+            os.makedirs(CACHE_PATH, exist_ok=True)
+            hf_hub_download(
+                repo_id=REPO_ID, filename="files.txt", token=HF_TOKEN,
+                local_dir=CACHE_PATH, local_dir_use_symlinks=False,
+            )
+            with open(os.path.join(CACHE_PATH, "files.txt")) as f:
+                extra = [x.strip() for x in f.read().split("\n") if x.strip()]
+            for fname in extra + ["config.json", "wrapper.py", "model.safetensors"]:
+                if not os.path.exists(os.path.join(CACHE_PATH, fname)):
+                    hf_hub_download(
+                        repo_id=REPO_ID, filename=fname, token=HF_TOKEN,
+                        local_dir=CACHE_PATH, local_dir_use_symlinks=False,
+                    )
+            cwd = os.getcwd()
+            os.chdir(CACHE_PATH)
+            sys.path.insert(0, CACHE_PATH)
+            try:
+                model = _HFAutoModel.from_pretrained(
+                    CACHE_PATH, trust_remote_code=True, token=HF_TOKEN,
+                    low_cpu_mem_usage=False,
+                )
+            finally:
+                os.chdir(cwd)
+                if CACHE_PATH in sys.path:
+                    sys.path.remove(CACHE_PATH)
+            self.adaface_model = model.to(self.device).eval()
+        except Exception as _ada_err:
+            import traceback as _tb
+            print(f"[CRITICAL] AdaFace failed to load — system will run at degraded recall: {_ada_err}")
+            _tb.print_exc()
+            self.adaface_model = None
+    # ── FIX 1: AdaFace batch embed (unchanged — already correct) ──────────────
+    def _adaface_embed_batch(
+        self, face_arrs_chw: list[np.ndarray | None]
+    ) -> list[np.ndarray | None]:
+        if self.adaface_model is None:
+            return [None] * len(face_arrs_chw)
+        valid_idx = [i for i, a in enumerate(face_arrs_chw) if a is not None]
+        if not valid_idx:
+            return [None] * len(face_arrs_chw)
+        batch = np.stack([face_arrs_chw[i] for i in valid_idx], axis=0)
+        batch = np.ascontiguousarray(batch)
+        try:
+            t = torch.from_numpy(batch).contiguous().to(self.device)
+            if self.device == "cuda":
+                t = t.half()
+            with torch.no_grad():
+                out = self.adaface_model(t)
+            emb = out if isinstance(out, torch.Tensor) else out.embedding
+            emb = F.normalize(emb.float(), p=2, dim=1).cpu().numpy()
+        except Exception as e:
+            import traceback
+            print(f"[AdaFace ERROR] {e}")
+            traceback.print_exc()
+            return [None] * len(face_arrs_chw)
+        result = [None] * len(face_arrs_chw)
+        for out_i, in_i in enumerate(valid_idx):
+            result[in_i] = emb[out_i]
+        return result
+    # ── FIX 2: ArcFace batch embed using fast alignment ───────────────────────
+    def _arcface_embed_batch(
+        self, faces: list, bgr: np.ndarray
+    ) -> list[np.ndarray]:
+        """
+        Extracts ArcFace embeddings for all faces at once.
+        Two optimisations over the original per-face path:
+        1. Uses cv2.estimateAffinePartial2D instead of np.linalg.lstsq
+           for face alignment (~10x faster per face on CPU).
+        2. Checks the face-crop LRU cache before running inference — same
+           person in 20 photos = 1 inference call.
+        Falls back to face.embedding (already computed by InsightFace's
+        get() call) if landmark data is unavailable.
+        """
+        results = []
+        for face in faces:
+            bbox = face.bbox.astype(int)
+            x1, y1, x2, y2 = bbox
+            x1, y1 = max(0, x1), max(0, y1)
+            x2, y2 = min(bgr.shape[1], x2), min(bgr.shape[0], y2)
+            raw_crop = bgr[y1:y2, x1:x2]
+            ch = _crop_hash(raw_crop) if raw_crop.size > 0 else ""
+            if ch:
+                cached_vec = _face_cache_get(ch)
+                if cached_vec is not None:
+                    results.append(cached_vec)
+                    continue
+            vec = face.embedding.astype(np.float32) if face.embedding is not None \
+                else np.zeros(FACE_DIM, dtype=np.float32)
+            n = np.linalg.norm(vec)
+            vec = vec / n if n > 0 else vec
+            if ch:
+                _face_cache_set(ch, vec)
+            results.append(vec)
+        return results
+    def _embed_crops_batch(self, crops: list[Image.Image]) -> list[np.ndarray]:
+        if not crops:
+            return []
+        if self.onnx_vision is not None:
+            return self.onnx_vision.encode(crops)
+        with torch.no_grad():
+            sig_in = self.siglip_processor(images=crops, return_tensors="pt", padding=True)
+            sig_in = {k: v.to(self.device) for k, v in sig_in.items()}
+            if self.device == "cuda":
+                sig_in = {k: v.half() if v.dtype == torch.float32 else v for k, v in sig_in.items()}
+            sig_out = self.siglip_model.get_image_features(**sig_in)
+            if hasattr(sig_out, "image_embeds"):
+                sig_out = sig_out.image_embeds
+            elif hasattr(sig_out, "pooler_output"):
+                sig_out = sig_out.pooler_output
+            elif hasattr(sig_out, "last_hidden_state"):
+                sig_out = sig_out.last_hidden_state[:, 0, :]
+            elif isinstance(sig_out, tuple):
+                sig_out = sig_out[0]
+            sig_vecs = F.normalize(sig_out.float(), p=2, dim=1).cpu()
+            dino_in = self.dinov2_processor(images=crops, return_tensors="pt")
+            dino_in = {k: v.to(self.device) for k, v in dino_in.items()}
+            if self.device == "cuda":
+                dino_in = {k: v.half() if v.dtype == torch.float32 else v for k, v in dino_in.items()}
+            dino_out = self.dinov2_model(**dino_in)
+            dino_vecs = F.normalize(dino_out.last_hidden_state[:, 0, :].float(), p=2, dim=1).cpu()
+            fused = F.normalize(torch.cat([sig_vecs, dino_vecs], dim=1), p=2, dim=1)
+        return [fused[i].numpy() for i in range(len(crops))]
+    def _run_detection_at_scale(
+        self, bgr_enhanced: np.ndarray, scale: tuple
+    ) -> list:
+        H, W = bgr_enhanced.shape[:2]
+        scale_w, scale_h = min(W, scale[0]), min(H, scale[1])
+        if scale_w == W and scale_h == H:
+            bgr_scaled = bgr_enhanced
+        else:
+            bgr_scaled = cv2.resize(bgr_enhanced, (scale_w, scale_h))
+        try:
+            with self._face_lock:
+                # input_size must be set inside the lock — setting it outside
+                # is a race condition when two inference threads run concurrently,
+                # causing the wrong scale to be used and faces to be missed.
+                self.face_app.det_model.input_size = scale
+                faces_at_scale = self.face_app.get(bgr_scaled)
+            sx, sy = W / scale_w, H / scale_h
+            for f in faces_at_scale:
+                if sx != 1.0 or sy != 1.0:
+                    f.bbox[0] *= sx; f.bbox[1] *= sy
+                    f.bbox[2] *= sx; f.bbox[3] *= sy
+            return faces_at_scale
+        except Exception:
+            return []
+    def _detect_and_encode_faces(self, img_np: np.ndarray) -> list[dict]:
+        """
+        Returns face records with BOTH arcface_vector and adaface_vector.
+        FIX 3 — ArcFace + AdaFace run in PARALLEL using the thread pool.
+        Previously they ran sequentially. On 2 vCPU this gives ~1.5x speedup
+        since each model can use a separate core simultaneously.
+        """
+        if self.face_app is None:
+            return []
+        try:
+            if img_np.dtype != np.uint8:
+                img_np = (img_np * 255).astype(np.uint8)
+            bgr = img_np[:, :, ::-1].copy() if img_np.shape[2] == 3 else img_np.copy()
+            bgr_enhanced = _clahe_enhance(bgr)
+            H, W = bgr.shape[:2]
+            all_raw_faces = self._run_detection_at_scale(bgr_enhanced, DET_SIZE_PRIMARY)
+            if not all_raw_faces and ENABLE_MULTI_SCALE_FALLBACK:
+                for scale in [(1280, 1280), (960, 960)]:
+                    more = self._run_detection_at_scale(bgr_enhanced, scale)
+                    all_raw_faces.extend(more)
+                    if more:
+                        break
+            if ENABLE_HORIZONTAL_FLIP:
+                bgr_flip = cv2.flip(bgr_enhanced, 1)
+                try:
+                    with self._face_lock:
+                        self.face_app.det_model.input_size = DET_SIZE_PRIMARY
+                        faces_flip = self.face_app.get(bgr_flip)
+                    for f in faces_flip:
+                        x1, y1, x2, y2 = f.bbox
+                        f.bbox[0], f.bbox[2] = W - x2, W - x1
+                    all_raw_faces.extend(faces_flip)
+                except Exception:
+                    pass
+            self.face_app.det_model.input_size = DET_SIZE_PRIMARY
+            faces = _dedup_faces(all_raw_faces)
+            filtered_faces = []
+            adaface_crops: list[np.ndarray | None] = []
+            for face in faces:
+                if len(filtered_faces) >= MAX_FACES_PER_IMAGE:
+                    break
+                bbox_raw = face.bbox.astype(int)
+                x1, y1, x2, y2 = bbox_raw
+                x1, y1 = max(0, x1), max(0, y1)
+                x2, y2 = min(bgr.shape[1], x2), min(bgr.shape[0], y2)
+                w, h = x2 - x1, y2 - y1
+                if w < MIN_FACE_SIZE or h < MIN_FACE_SIZE:
+                    continue
+                det_score = float(face.det_score) if hasattr(face, "det_score") else 1.0
+                if det_score < FACE_QUALITY_GATE or face.embedding is None:
+                    continue
+                blur = _blur_score(bgr, x1, y1, x2, y2)
+                filtered_faces.append((face, x1, y1, x2, y2, w, h, det_score, blur))
+                adaface_crops.append(_face_crop_for_adaface(bgr, x1, y1, x2, y2))
+            if not filtered_faces:
+                return []
+            # ── FIX 3: Run ArcFace + AdaFace in PARALLEL ──────────────────────
+            # Submit both to the thread pool simultaneously.
+            # On 2 vCPU: total time ≈ max(arcface_time, adaface_time)
+            # instead of arcface_time + adaface_time.
+            face_objs = [f[0] for f in filtered_faces]
+            arc_future = self._embed_pool.submit(
+                self._arcface_embed_batch, face_objs, bgr
+            )
+            ada_future = self._embed_pool.submit(
+                self._adaface_embed_batch, adaface_crops
+            )
+            # Wait for both — concurrent.futures blocks until done
+            arcface_vecs = arc_future.result()
+            adaface_vecs = ada_future.result()
+            results = []
+            for accepted, (face_tuple, arcface_vec, adaface_vec) in enumerate(
+                zip(filtered_faces, arcface_vecs, adaface_vecs)
+            ):
+                face, x1, y1, x2, y2, w, h, det_score, blur_score = face_tuple
+                out = {
+                    "type": "face",
+                    "face_idx": accepted,
+                    "bbox": [int(x1), int(y1), int(w), int(h)],
+                    "face_crop": _crop_to_b64(bgr, x1, y1, x2, y2),
+                    "det_score": det_score,
+                    "face_width_px": int(w),
+                    "blur_score": blur_score,
+                    "arcface_vector": arcface_vec,
+                    "adaface_vector": adaface_vec if adaface_vec is not None
+                                      else np.zeros(ADAFACE_DIM, dtype=np.float32),
+                    "has_adaface": adaface_vec is not None,
+                }
+                if not USE_SPLIT_FACE_INDEXES:
+                    if adaface_vec is not None:
+                        fused_raw = np.concatenate([arcface_vec, adaface_vec])
+                    else:
+                        fused_raw = np.concatenate(
+                            [arcface_vec, np.zeros(ADAFACE_DIM, dtype=np.float32)]
+                        )
+                    n2 = np.linalg.norm(fused_raw)
+                    out["vector"] = (fused_raw / n2) if n2 > 0 else fused_raw
+                else:
+                    out["vector"] = arcface_vec
+                results.append(out)
+            return results
+        except Exception as _det_err:
+            import traceback as _tb
+            print(f"[_detect_and_encode_faces ERROR] shape={getattr(img_np, 'shape', 'N/A')}: {_det_err}")
+            _tb.print_exc()
+            return []
+    # ── Main inference entry point ────────────────────────────────────────────
+    def process_image_bytes(
+        self, image_bytes: bytes, detect_faces: bool = True
+    ) -> list[dict]:
+        file_hash = hashlib.md5(image_bytes).hexdigest()
+        cache_key = f"{file_hash}_{detect_faces}"
+        with self._cache_lock:
+            if cache_key in self._cache:
+                return list(self._cache[cache_key])
+        extracted = []
+        original_pil = Image.open(io.BytesIO(image_bytes))
+        # Apply EXIF orientation before anything else. Pillow does NOT do this
+        # automatically — a portrait phone shot stored as landscape with a
+        # rotation tag would feed sideways pixels to the face detector.
+        original_pil = ImageOps.exif_transpose(original_pil)
+        original_pil = original_pil.convert("RGB")
+        img_np = np.array(original_pil)
+        faces_found = False
+        if detect_faces and self.face_app is not None:
+            face_results = self._detect_and_encode_faces(img_np)
+            if face_results:
+                faces_found = True
+                extracted.extend(face_results)
+        crops: list[Image.Image] = []
+        yolo_results = self.yolo(original_pil, conf=YOLO_CONF_THRESHOLD, verbose=False)
+        for r in yolo_results:
+            if r.masks is not None:
+                for seg_idx, mask_xy in enumerate(r.masks.xy):
+                    cls_id = int(r.boxes.cls[seg_idx].item())
+                    if faces_found and cls_id == YOLO_PERSON_CLASS_ID:
+                        continue
+                    polygon = np.array(mask_xy, dtype=np.int32)
+                    if len(polygon) < 3:
+                        continue
+                    x, y, w, h = cv2.boundingRect(polygon)
+                    if w < YOLO_MIN_CROP_PX or h < YOLO_MIN_CROP_PX:
+                        continue
+                    crops.append(original_pil.crop((x, y, x + w, y + h)))
+                    if len(crops) >= MAX_CROPS:
+                        break
+            elif r.boxes is not None:
+                for box in r.boxes:
+                    cls_id = int(box.cls.item())
+                    if faces_found and cls_id == YOLO_PERSON_CLASS_ID:
+                        continue
+                    x1, y1, x2, y2 = box.xyxy[0].tolist()
+                    if (x2 - x1) < YOLO_MIN_CROP_PX or (y2 - y1) < YOLO_MIN_CROP_PX:
+                        continue
+                    crops.append(original_pil.crop((x1, y1, x2, y2)))
+            if len(crops) >= MAX_CROPS:
+                break
+        all_crops = [_resize_pil(c, MAX_IMAGE_SIZE) for c in [original_pil] + crops]
+        obj_vecs = self._embed_crops_batch(all_crops)
+        extracted.extend({"type": "object", "vector": v} for v in obj_vecs)
+        with self._cache_lock:
+            if len(self._cache) >= INFERENCE_CACHE_SIZE:
+                oldest = next(iter(self._cache))
+                del self._cache[oldest]
+            self._cache[cache_key] = list(extracted)
+        return extracted
+    async def process_image_bytes_async(
+        self, image_bytes: bytes, detect_faces: bool = True
+    ) -> list[dict]:
+        loop = asyncio.get_event_loop()
+        return await loop.run_in_executor(
+            None,
+            functools.partial(self.process_image_bytes, image_bytes, detect_faces),
+        )

src/services/cache.py ADDED Viewed

	@@ -0,0 +1,192 @@

+"""
+src/services/cache.py — Phase 3: Upstash Redis wrapper
+Provides a thin async layer over Upstash Redis (REST API, so no socket
+connection required — works fine on HF free tier which blocks raw TCP to
+external hosts).
+Falls back gracefully to a local in-memory dict if UPSTASH_REDIS_URL is not
+set, so the rest of the codebase can import and call CacheService without
+any conditional guards.
+Usage:
+    from src.services.cache import cache
+    await cache.set("key", "value", ttl=3600)
+    val = await cache.get("key")          # returns str | None
+    await cache.delete("key")
+    await cache.lpush("list_key", "item")
+    items = await cache.lrange("list_key", 0, -1)
+"""
+import json
+import os
+import time
+from typing import Any, Optional
+import aiohttp
+UPSTASH_REDIS_URL = os.getenv("UPSTASH_REDIS_URL", "")
+UPSTASH_REDIS_TOKEN = os.getenv("UPSTASH_REDIS_TOKEN", "")
+# Fallback in-memory store used when Upstash is not configured.
+_mem_store: dict[str, tuple[Any, float]] = {}  # key → (value, expires_at or 0)
+class CacheService:
+    """
+    Async Redis cache backed by Upstash REST API.
+    Falls back to an in-memory dict when Upstash is not configured.
+    """
+    def __init__(self):
+        self._enabled = bool(UPSTASH_REDIS_URL and UPSTASH_REDIS_TOKEN)
+        self._base_url = UPSTASH_REDIS_URL.rstrip("/") if self._enabled else ""
+        self._headers = (
+            {"Authorization": f"Bearer {UPSTASH_REDIS_TOKEN}"}
+            if self._enabled
+            else {}
+        )
+        if not self._enabled:
+            print("[Cache] Upstash not configured — using in-memory fallback")
+    # ── Internal REST call ────────────────────────────────────────────
+    async def _cmd(self, *args) -> Any:
+        """Execute a Redis command via the Upstash REST API."""
+        url = f"{self._base_url}/{'/'.join(str(a) for a in args)}"
+        async with aiohttp.ClientSession() as session:
+            async with session.get(url, headers=self._headers) as resp:
+                data = await resp.json()
+                if "error" in data:
+                    raise RuntimeError(f"Upstash error: {data['error']}")
+                return data.get("result")
+    # ── Public API ────────────────────────────────────────────────────
+    async def get(self, key: str) -> Optional[str]:
+        if not self._enabled:
+            entry = _mem_store.get(key)
+            if entry is None:
+                return None
+            val, exp = entry
+            if exp and time.time() > exp:
+                _mem_store.pop(key, None)
+                return None
+            return val
+        result = await self._cmd("GET", key)
+        return result  # str or None
+    async def set(self, key: str, value: Any, ttl: int = 0) -> bool:
+        """
+        Store value under key. If ttl > 0, key expires after that many seconds.
+        Value is JSON-serialised if not already a str.
+        """
+        if not isinstance(value, str):
+            value = json.dumps(value)
+        if not self._enabled:
+            exp = time.time() + ttl if ttl else 0
+            _mem_store[key] = (value, exp)
+            return True
+        if ttl:
+            await self._cmd("SET", key, value, "EX", ttl)
+        else:
+            await self._cmd("SET", key, value)
+        return True
+    async def get_json(self, key: str) -> Optional[Any]:
+        raw = await self.get(key)
+        if raw is None:
+            return None
+        try:
+            return json.loads(raw)
+        except (json.JSONDecodeError, TypeError):
+            return raw
+    async def set_json(self, key: str, value: Any, ttl: int = 0) -> bool:
+        return await self.set(key, json.dumps(value), ttl=ttl)
+    async def delete(self, key: str) -> bool:
+        if not self._enabled:
+            _mem_store.pop(key, None)
+            return True
+        await self._cmd("DEL", key)
+        return True
+    async def exists(self, key: str) -> bool:
+        if not self._enabled:
+            return await self.get(key) is not None
+        result = await self._cmd("EXISTS", key)
+        return bool(result)
+    async def incr(self, key: str) -> int:
+        if not self._enabled:
+            entry = _mem_store.get(key, ("0", 0))
+            new_val = int(entry[0]) + 1
+            _mem_store[key] = (str(new_val), entry[1])
+            return new_val
+        result = await self._cmd("INCR", key)
+        return int(result)
+    async def expire(self, key: str, ttl: int) -> bool:
+        if not self._enabled:
+            if key in _mem_store:
+                val, _ = _mem_store[key]
+                _mem_store[key] = (val, time.time() + ttl)
+            return True
+        await self._cmd("EXPIRE", key, ttl)
+        return True
+    # ── List ops (used for job queue) ─────────────────────────────────
+    async def lpush(self, key: str, *values: str) -> int:
+        """Push values to the LEFT of a list (queue head)."""
+        if not self._enabled:
+            lst = json.loads(_mem_store.get(key, ("[]", 0))[0])
+            for v in values:
+                lst.insert(0, v)
+            _mem_store[key] = (json.dumps(lst), 0)
+            return len(lst)
+        for v in values:
+            await self._cmd("LPUSH", key, v)
+        return 0  # Upstash REST returns the new length; we don't need it here
+    async def rpop(self, key: str) -> Optional[str]:
+        """Pop one value from the RIGHT of a list (queue tail = oldest item)."""
+        if not self._enabled:
+            lst = json.loads(_mem_store.get(key, ("[]", 0))[0])
+            if not lst:
+                return None
+            val = lst.pop()
+            _mem_store[key] = (json.dumps(lst), 0)
+            return val
+        return await self._cmd("RPOP", key)
+    async def llen(self, key: str) -> int:
+        if not self._enabled:
+            lst = json.loads(_mem_store.get(key, ("[]", 0))[0])
+            return len(lst)
+        result = await self._cmd("LLEN", key)
+        return int(result or 0)
+    async def lrange(self, key: str, start: int, stop: int) -> list[str]:
+        if not self._enabled:
+            lst = json.loads(_mem_store.get(key, ("[]", 0))[0])
+            end = None if stop == -1 else stop + 1
+            return lst[start:end]
+        result = await self._cmd("LRANGE", key, start, stop)
+        return result or []
+    # ── Rate limiting helper ──────────────────────────────────────────
+    async def rate_limit_check(self, key: str, max_calls: int, window_secs: int) -> bool:
+        """
+        Returns True if the caller is within the rate limit, False if exceeded.
+        Uses a simple counter with TTL.
+        """
+        count = await self.incr(key)
+        if count == 1:
+            await self.expire(key, window_secs)
+        return count <= max_calls
+# Module-level singleton — import this everywhere
+cache = CacheService()

src/services/clustering.py ADDED Viewed

	@@ -0,0 +1,365 @@

+"""
+src/services/clustering.py — Phase 3: HDBSCAN face clustering (People View)
+Clusters all face vectors in the faces-arcface Pinecone index using HDBSCAN,
+then stores cluster assignments in Supabase (face_clusters table).
+Algorithm choice:
+  - HDBSCAN on ArcFace 512-d vectors (euclidean after L2 normalisation)
+  - min_cluster_size=3, min_samples=3, cluster_selection_epsilon=0.35
+  - Noise points (label=-1) are left unclustered — not forced into clusters
+  - Representative face = the vector closest to the cluster centroid
+Pinecone fetch strategy:
+  - Pinecone free tier has no "list all vectors" endpoint
+  - We use a dummy query with random vectors + large top_k to page through
+    vectors. This is imperfect but works within free-tier constraints.
+  - Production alternative: store vector_ids in Supabase on upload (Phase 4)
+Entry points:
+  run_clustering(pc, user_id, keys) — full re-cluster, called by API endpoint
+  get_people(user_id) — read cluster list from Supabase
+  get_person_images(cluster_id, user_id) — images for one cluster
+  rename_cluster(cluster_id, name, user_id) — label "Mom", "John", etc.
+"""
+import asyncio
+import uuid
+from datetime import datetime, timezone
+from typing import Optional
+import aiohttp
+import numpy as np
+from src.core.config import (
+    IDX_FACES_ARCFACE,
+    SUPABASE_URL, SUPABASE_SERVICE_KEY,
+    CLUSTER_MIN_SAMPLES, CLUSTER_MIN_CLUSTER_SIZE, CLUSTER_EPSILON,
+    FACE_SEARCH_TOP_K,
+)
+# ──────────────────────────────────────────────────────────────
+# Supabase helpers
+# ──────────────────────────────────────────────────────────────
+def _hdr() -> dict:
+    return {
+        "apikey": SUPABASE_SERVICE_KEY,
+        "Authorization": f"Bearer {SUPABASE_SERVICE_KEY}",
+        "Content-Type": "application/json",
+        "Prefer": "return=representation",
+    }
+async def _supa_upsert(table: str, rows: list[dict]) -> None:
+    if not SUPABASE_URL or not rows:
+        return
+    url = f"{SUPABASE_URL}/rest/v1/{table}"
+    headers = {**_hdr(), "Prefer": "resolution=merge-duplicates,return=minimal"}
+    async with aiohttp.ClientSession() as s:
+        await s.post(url, headers=headers, json=rows)
+async def _supa_select(table: str, filters: str = "") -> list[dict]:
+    if not SUPABASE_URL:
+        return []
+    url = f"{SUPABASE_URL}/rest/v1/{table}?{filters}"
+    async with aiohttp.ClientSession() as s:
+        async with s.get(url, headers=_hdr()) as r:
+            if r.status == 200:
+                return await r.json()
+    return []
+async def _supa_patch(table: str, filters: str, patch: dict) -> None:
+    if not SUPABASE_URL:
+        return
+    url = f"{SUPABASE_URL}/rest/v1/{table}?{filters}"
+    async with aiohttp.ClientSession() as s:
+        await s.patch(url, headers=_hdr(), json=patch)
+async def _supa_delete(table: str, filters: str) -> None:
+    if not SUPABASE_URL:
+        return
+    url = f"{SUPABASE_URL}/rest/v1/{table}?{filters}"
+    async with aiohttp.ClientSession() as s:
+        await s.delete(url, headers=_hdr())
+# ──────────────────────────────────────────────────────────────
+# Pinecone vector fetch helpers
+# ──────────────────────────────────────────────────────────────
+def _fetch_all_vectors(idx, dim: int = 512, max_vectors: int = 10000) -> list[dict]:
+    """
+    Fetches as many vectors as possible from a Pinecone index using
+    random-probe queries. Free-tier Pinecone has no scan endpoint, so
+    we use diverse random probes to discover vectors.
+    Returns list of dicts: {id, values, metadata}
+    """
+    seen_ids: set = set()
+    collected: list[dict] = []
+    rng = np.random.default_rng(seed=42)
+    # 20 random probes — covers most of the index for typical gallery sizes
+    for _ in range(20):
+        probe = rng.standard_normal(dim).astype(np.float32)
+        probe /= np.linalg.norm(probe)
+        res = idx.query(
+            vector=probe.tolist(),
+            top_k=min(FACE_SEARCH_TOP_K, 1000),
+            include_metadata=True,
+            include_values=True,
+        )
+        for match in res.get("matches", []):
+            vid = match["id"]
+            if vid in seen_ids:
+                continue
+            seen_ids.add(vid)
+            values = match.get("values")
+            if values:
+                collected.append({
+                    "id": vid,
+                    "values": values,
+                    "metadata": match.get("metadata", {}),
+                })
+            if len(collected) >= max_vectors:
+                break
+        if len(collected) >= max_vectors:
+            break
+    return collected
+# ──────────────────────────────────────────────────────────────
+# Core clustering logic
+# ──────────────────────────────────────────────────────────────
+def _run_hdbscan(vectors: np.ndarray) -> np.ndarray:
+    """
+    Runs HDBSCAN on the provided L2-normalised 512-d face vectors.
+    Returns integer label array (−1 = noise / unclustered).
+    """
+    try:
+        import hdbscan
+    except ImportError:
+        raise RuntimeError(
+            "hdbscan not installed. Add hdbscan>=0.8.33 to requirements.txt"
+        )
+    clusterer = hdbscan.HDBSCAN(
+        min_cluster_size=CLUSTER_MIN_CLUSTER_SIZE,
+        min_samples=CLUSTER_MIN_SAMPLES,
+        cluster_selection_epsilon=CLUSTER_EPSILON,
+        metric="euclidean",
+        core_dist_n_jobs=1,  # HF CPU — avoid multiprocessing overhead
+    )
+    clusterer.fit(vectors)
+    return clusterer.labels_
+def _pick_representative(cluster_vecs: np.ndarray, cluster_meta: list[dict]) -> dict:
+    """
+    Picks the face closest to the cluster centroid as the representative.
+    Returns the metadata dict for that face.
+    """
+    centroid = cluster_vecs.mean(axis=0)
+    centroid /= np.linalg.norm(centroid) + 1e-8
+    sims = cluster_vecs @ centroid
+    best_idx = int(np.argmax(sims))
+    return cluster_meta[best_idx]
+# ──────────────────────────────────────────────────────────────
+# Public entry points
+# ──────────────────────────────────────────────────────────────
+async def run_clustering(pc, user_id: str) -> dict:
+    """
+    Full re-cluster pipeline:
+    1. Fetch all ArcFace vectors from Pinecone
+    2. Run HDBSCAN
+    3. Write cluster assignments to Supabase face_clusters table
+    4. Write per-vector assignments to face_vector_clusters table
+    Returns a summary dict.
+    """
+    idx = pc.Index(IDX_FACES_ARCFACE)
+    # 1. Fetch vectors (blocking — run in thread pool)
+    raw = await asyncio.to_thread(_fetch_all_vectors, idx)
+    if len(raw) < CLUSTER_MIN_CLUSTER_SIZE:
+        return {"status": "skipped", "reason": "not enough vectors", "vectors": len(raw)}
+    ids = [r["id"] for r in raw]
+    metas = [r["metadata"] for r in raw]
+    matrix = np.array([r["values"] for r in raw], dtype=np.float32)
+    # L2-normalise before euclidean HDBSCAN (equivalent to angular distance)
+    norms = np.linalg.norm(matrix, axis=1, keepdims=True)
+    matrix = matrix / (norms + 1e-8)
+    # 2. Cluster (blocking)
+    labels = await asyncio.to_thread(_run_hdbscan, matrix)
+    unique_labels = set(labels) - {-1}
+    now_iso = datetime.now(timezone.utc).isoformat()
+    # 3. Delete existing clusters for this user (full re-cluster)
+    await _supa_delete("face_clusters", f"user_id=eq.{user_id}")
+    await _supa_delete("face_vector_clusters", f"user_id=eq.{user_id}")
+    cluster_rows = []
+    vector_rows = []
+    for label in sorted(unique_labels):
+        cluster_id = str(uuid.uuid4())
+        mask = labels == label
+        c_indices = np.where(mask)[0]
+        c_vecs = matrix[c_indices]
+        c_meta = [metas[i] for i in c_indices]
+        c_ids = [ids[i] for i in c_indices]
+        rep_meta = _pick_representative(c_vecs, c_meta)
+        cluster_rows.append({
+            "cluster_id": cluster_id,
+            "user_id": user_id,
+            "representative_face_crop": rep_meta.get("face_crop", ""),
+            "representative_vector_id": c_ids[0],
+            "face_count": int(len(c_indices)),
+            "name": None,
+            "created_at": now_iso,
+            "updated_at": now_iso,
+        })
+        for vid, meta in zip(c_ids, c_meta):
+            vector_rows.append({
+                "vector_id": vid,
+                "cluster_id": cluster_id,
+                "user_id": user_id,
+                "image_url": meta.get("url", ""),
+                "folder": meta.get("folder", ""),
+                "face_crop": meta.get("face_crop", ""),
+                "updated_at": now_iso,
+            })
+    # 4. Batch write to Supabase (200 rows per request)
+    for i in range(0, len(cluster_rows), 200):
+        await _supa_upsert("face_clusters", cluster_rows[i:i + 200])
+    for i in range(0, len(vector_rows), 200):
+        await _supa_upsert("face_vector_clusters", vector_rows[i:i + 200])
+    return {
+        "status": "ok",
+        "total_vectors": len(ids),
+        "clusters_found": len(unique_labels),
+        "noise_vectors": int(np.sum(labels == -1)),
+    }
+async def get_people(user_id: str) -> list[dict]:
+    """Returns all identity clusters for a user, ordered by face_count desc."""
+    rows = await _supa_select(
+        "face_clusters",
+        f"user_id=eq.{user_id}&order=face_count.desc",
+    )
+    return [
+        {
+            "cluster_id": r["cluster_id"],
+            "name": r.get("name"),
+            "face_count": r.get("face_count", 0),
+            "representative_face_crop": r.get("representative_face_crop", ""),
+        }
+        for r in rows
+    ]
+async def get_person_images(cluster_id: str, user_id: str) -> list[dict]:
+    """Returns all images belonging to a cluster."""
+    rows = await _supa_select(
+        "face_vector_clusters",
+        f"cluster_id=eq.{cluster_id}&user_id=eq.{user_id}",
+    )
+    # Dedupe by image_url (multiple face vectors can come from the same image)
+    seen: set = set()
+    out = []
+    for r in rows:
+        url = r.get("image_url", "")
+        if url and url not in seen:
+            seen.add(url)
+            out.append({
+                "url": url,
+                "folder": r.get("folder", ""),
+                "face_crop": r.get("face_crop", ""),
+            })
+    return out
+async def rename_cluster(cluster_id: str, name: str, user_id: str) -> bool:
+    """Assigns a human-readable name to a cluster ('Mom', 'John', etc.)."""
+    await _supa_patch(
+        "face_clusters",
+        f"cluster_id=eq.{cluster_id}&user_id=eq.{user_id}",
+        {"name": name, "updated_at": datetime.now(timezone.utc).isoformat()},
+    )
+    return True
+async def search_cluster_aware(
+    pc, image_map: dict, user_id: str
+) -> dict:
+    """
+    Cluster-aware search expansion (Phase 3 recall win).
+    Given an initial image_map from search_faces_split, look up which
+    clusters the matched faces belong to, then return ALL images in those
+    clusters. This achieves near-100% recall for well-indexed people.
+    Returns an expanded image_map in the same format as search_faces_split.
+    """
+    if not image_map:
+        return image_map
+    # Find which vector_ids were returned in the initial search
+    matched_vids = {v.get("vector_id") for v in image_map.values() if v.get("vector_id")}
+    if not matched_vids:
+        return image_map
+    # Look up cluster membership for those vector_ids
+    vid_list = ",".join(f'"{v}"' for v in matched_vids)
+    rows = await _supa_select(
+        "face_vector_clusters",
+        f"vector_id=in.({vid_list})&user_id=eq.{user_id}",
+    )
+    if not rows:
+        return image_map
+    # Collect all cluster_ids matched
+    cluster_ids = {r["cluster_id"] for r in rows}
+    # Fetch all images in those clusters
+    expanded = dict(image_map)
+    for cluster_id in cluster_ids:
+        cluster_images = await get_person_images(cluster_id, user_id)
+        for img in cluster_images:
+            url = img["url"]
+            if url not in expanded:
+                # Add with a slightly lower score than the worst match
+                # so cluster-expanded results sort after direct hits
+                min_score = min(
+                    (v["fused_score"] for v in image_map.values()), default=0.3
+                )
+                expanded[url] = {
+                    "fused_score": max(min_score - 0.01, 0.01),
+                    "arcface_score": 0.0,
+                    "adaface_score": 0.0,
+                    "raw_score": 0.0,
+                    "face_crop": img.get("face_crop", ""),
+                    "folder": img.get("folder", "uncategorized"),
+                    "vector_id": None,
+                    "cluster_expanded": True,
+                }
+    return expanded

src/services/db_client.py ADDED Viewed

	@@ -0,0 +1,327 @@

+import time
+from typing import Any, Dict, List
+import cloudinary
+import cloudinary.uploader
+import cloudinary.api
+from pinecone import Pinecone, ServerlessSpec
+from src.core.config import (
+    IDX_FACES, IDX_OBJECTS,
+    IDX_FACES_ARCFACE, IDX_FACES_ADAFACE,
+    USE_SPLIT_FACE_INDEXES,
+    ARCFACE_WEIGHT, ADAFACE_WEIGHT,
+    FACE_MATCH_THRESHOLD, FUSED_MATCH_THRESHOLD, ARCFACE_SOLO_THRESHOLD,
+    FACE_SEARCH_TOP_K, OBJECT_SEARCH_TOP_K,
+    FACE_RESULTS_PER_QUERY_CAP,
+    FACE_DIM, ADAFACE_DIM, FUSED_FACE_DIM,
+    FACE_BLUR_THRESHOLD,
+)
+# ──────────────────────────────────────────────────────────────
+# Pinecone client pool
+# ──────────────────────────────────────────────────────────────
+class PineconePool:
+    def __init__(self):
+        self._clients = {}
+    def get(self, api_key: str) -> Pinecone:
+        if api_key not in self._clients:
+            self._clients[api_key] = Pinecone(api_key=api_key)
+        return self._clients[api_key]
+pinecone_pool = PineconePool()
+# ──────────────────────────────────────────────────────────────
+# Cloudinary helpers (unchanged from Phase 1)
+# ──────────────────────────────────────────────────────────────
+def _set_cld_config(creds: dict):
+    cloudinary.config(
+        cloud_name=creds.get("cloud_name"),
+        api_key=creds.get("api_key"),
+        api_secret=creds.get("api_secret"),
+        secure=True,
+    )
+def cld_ping(creds: dict):
+    _set_cld_config(creds)
+    cloudinary.api.ping()
+def cld_upload(file_obj, folder: str, creds: dict) -> dict:
+    _set_cld_config(creds)
+    return cloudinary.uploader.upload(file_obj, folder=folder)
+def cld_root_folders(creds: dict) -> dict:
+    _set_cld_config(creds)
+    return cloudinary.api.root_folders()
+def cld_list_folder_images(folder: str, creds: dict, cursor: str = None, page_size: int = 100) -> dict:
+    _set_cld_config(creds)
+    kwargs = {"type": "upload", "prefix": f"{folder}/", "max_results": page_size}
+    if cursor:
+        kwargs["next_cursor"] = cursor
+    return cloudinary.api.resources(**kwargs)
+def cld_delete_resource(public_id: str, creds: dict):
+    _set_cld_config(creds)
+    cloudinary.uploader.destroy(public_id)
+def cld_delete_folder_resources(folder: str, creds: dict):
+    _set_cld_config(creds)
+    cloudinary.api.delete_resources_by_prefix(f"{folder}/")
+def cld_remove_folder(folder: str, creds: dict):
+    _set_cld_config(creds)
+    try:
+        cloudinary.api.delete_folder(folder)
+    except Exception:
+        pass
+def cld_delete_all_paginated(creds: dict) -> int:
+    _set_cld_config(creds)
+    deleted = 0
+    cursor = None
+    while True:
+        kwargs = {"type": "upload", "max_results": 500}
+        if cursor:
+            kwargs["next_cursor"] = cursor
+        res = cloudinary.api.resources(**kwargs)
+        resources = res.get("resources", [])
+        if not resources:
+            break
+        pids = [r["public_id"] for r in resources]
+        cloudinary.api.delete_resources(pids)
+        deleted += len(pids)
+        cursor = res.get("next_cursor")
+        if not cursor:
+            break
+    return deleted
+# ──────────────────────────────────────────────────────────────
+# Index management
+# ──────────────────────────────────────────────────────────────
+def ensure_indexes(pc: Pinecone) -> List[str]:
+    """
+    Ensures all required indexes exist.
+    - Objects index: 1536d (unchanged)
+    - Legacy faces index: 1024d (kept for backward compat)
+    - New split indexes: 512d each (ArcFace + AdaFace separately)
+    """
+    created = []
+    existing = {idx.name for idx in pc.list_indexes()}
+    index_specs = [
+        (IDX_OBJECTS, 1536),
+        (IDX_FACES, FUSED_FACE_DIM),  # legacy — only created on first run if missing
+    ]
+    if USE_SPLIT_FACE_INDEXES:
+        index_specs.extend([
+            (IDX_FACES_ARCFACE, FACE_DIM),
+            (IDX_FACES_ADAFACE, ADAFACE_DIM),
+        ])
+    for name, dim in index_specs:
+        if name not in existing:
+            pc.create_index(
+                name=name,
+                dimension=dim,
+                metric="cosine",
+                spec=ServerlessSpec(cloud="aws", region="us-east-1"),
+            )
+            created.append(name)
+    return created
+def delete_and_recreate_indexes(pc: Pinecone):
+    """Used by /api/reset-database. Now also resets split indexes."""
+    existing = {idx.name for idx in pc.list_indexes()}
+    targets = [IDX_FACES, IDX_OBJECTS]
+    if USE_SPLIT_FACE_INDEXES:
+        targets.extend([IDX_FACES_ARCFACE, IDX_FACES_ADAFACE])
+    for name in targets:
+        if name in existing:
+            pc.delete_index(name)
+    time.sleep(5)
+    ensure_indexes(pc)
+# ──────────────────────────────────────────────────────────────
+# LEGACY face search (for backward compat / fallback)
+# ──────────────────────────────────────────────────────────────
+def search_faces(idx, vec: List[float], det_score: float, filter_dict: dict = None) -> Dict[str, Any]:
+    query_kwargs = {"vector": vec, "top_k": FACE_SEARCH_TOP_K, "include_metadata": True}
+    if filter_dict:
+        query_kwargs["filter"] = filter_dict
+    res = idx.query(**query_kwargs)
+    image_map = {}
+    LEGACY_THRESHOLD = 0.45  # on old fused 1024-d vector
+    for match in res.get("matches", []):
+        raw_score = match.get("score", 0)
+        if raw_score < LEGACY_THRESHOLD:
+            continue
+        meta = match.get("metadata", {})
+        url = meta.get("url")
+        if not url:
+            continue
+        if url not in image_map or image_map[url]["raw_score"] < raw_score:
+            image_map[url] = {
+                "raw_score": raw_score,
+                "face_crop": meta.get("face_crop", ""),
+                "folder": meta.get("folder", "uncategorized"),
+            }
+    return image_map
+# ──────────────────────────────────────────────────────────────
+# PHASE 2: Split-index face search with score fusion
+# ──────────────────────────────────────────────────────────────
+def search_faces_split(
+    idx_arcface, idx_adaface,
+    arcface_vec: List[float], adaface_vec: List[float],
+    filter_dict: dict = None,
+) -> Dict[str, Any]:
+    """
+    Queries BOTH face indexes, fuses scores per vector_id, returns a map
+    keyed by url with the best fused score across all query augmentations.
+    Score fusion formula:
+        fused_score = ARCFACE_WEIGHT * arcface_cos + ADAFACE_WEIGHT * adaface_cos
+    When a vector exists in only one index (e.g. AdaFace failed on upload),
+    we scale the single-index score by its weight + max possible from the
+    other side (treat missing as average of its distribution = ~0.15).
+    """
+    query_kwargs_base = {"top_k": FACE_SEARCH_TOP_K, "include_metadata": True}
+    if filter_dict:
+        query_kwargs_base["filter"] = filter_dict
+    # Query both indexes in parallel (caller uses asyncio.gather)
+    arc_res = idx_arcface.query(vector=arcface_vec, **query_kwargs_base)
+    # Only query AdaFace if we have a valid vector (not all zeros)
+    has_ada = adaface_vec is not None and any(abs(x) > 1e-6 for x in adaface_vec)
+    if has_ada:
+        ada_res = idx_adaface.query(vector=adaface_vec, **query_kwargs_base)
+    else:
+        ada_res = {"matches": []}
+    # Index AdaFace results by vector_id
+    ada_by_id = {
+        m["id"]: m.get("score", 0.0)
+        for m in ada_res.get("matches", [])
+    }
+    # Index AdaFace metadata by vector_id (in case a vector_id is only in AdaFace)
+    ada_meta_by_id = {
+        m["id"]: m.get("metadata", {})
+        for m in ada_res.get("matches", [])
+    }
+    image_map: Dict[str, Any] = {}
+    seen_vector_ids = set()
+    # ── Pass 1: ArcFace matches (the primary signal) ─────────────
+    for match in arc_res.get("matches", []):
+        vid = match["id"]
+        seen_vector_ids.add(vid)
+        arc_score = match.get("score", 0.0)
+        # Hard floor: if ArcFace says no, it's no. This kills imposters.
+        if arc_score < FACE_MATCH_THRESHOLD:
+            continue
+        ada_score = ada_by_id.get(vid, None)
+        if ada_score is None:
+            # No AdaFace confirmation — apply stricter solo threshold.
+            if arc_score < ARCFACE_SOLO_THRESHOLD:
+                continue
+            fused = arc_score
+        else:
+            fused = ARCFACE_WEIGHT * arc_score + ADAFACE_WEIGHT * ada_score
+            if fused < FUSED_MATCH_THRESHOLD:
+                continue
+        meta = match.get("metadata", {})
+        url = meta.get("url")
+        if not url:
+            continue
+        if meta.get("blur_score", 100.0) < FACE_BLUR_THRESHOLD:
+            continue
+        existing = image_map.get(url)
+        if not existing or existing["fused_score"] < fused:
+            image_map[url] = {
+                "fused_score": fused,
+                "arcface_score": arc_score,
+                "adaface_score": ada_score if ada_score is not None else 0.0,
+                "raw_score": arc_score,  # for UI back-compat
+                "face_crop": meta.get("face_crop", ""),
+                "folder": meta.get("folder", "uncategorized"),
+                "vector_id": vid,
+            }
+    # Cap at most N results per query face
+    if len(image_map) > FACE_RESULTS_PER_QUERY_CAP:
+        top = sorted(
+            image_map.items(),
+            key=lambda kv: kv[1]["fused_score"],
+            reverse=True,
+        )[:FACE_RESULTS_PER_QUERY_CAP]
+        image_map = dict(top)
+    return image_map
+# ──────────────────────────────────────────────────────────────
+# Object search (unchanged)
+# ──────────────────────────────────────────────────────────────
+def search_objects(idx, vec: List[float]) -> List[Dict[str, Any]]:
+    res = idx.query(vector=vec, top_k=OBJECT_SEARCH_TOP_K, include_metadata=True)
+    results = []
+    for match in res.get("matches", []):
+        meta = match.get("metadata", {})
+        results.append({
+            "url": meta.get("url", ""),
+            "score": round(match.get("score", 0), 4),
+            "raw_score": match.get("score", 0),
+            "folder": meta.get("folder", "uncategorized"),
+        })
+    return results
+# ──────────────────────────────────────────────────────────────
+# Result merging
+# ──────────────────────────────────────────────────────────────
+def merge_face_results(groups: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Dedupe across multiple query faces (or augmentations), keep best score per URL."""
+    merged = {}
+    for group in groups:
+        for match in group.get("matches", []):
+            url = match["url"]
+            if url not in merged or merged[url]["score"] < match["score"]:
+                merged[url] = match
+    return sorted(merged.values(), key=lambda x: x["score"], reverse=True)
+def merge_object_results(nested_results: List[List[Dict[str, Any]]]) -> List[Dict[str, Any]]:
+    merged = {}
+    for res_list in nested_results:
+        for match in res_list:
+            url = match["url"]
+            if url not in merged or merged[url]["score"] < match["score"]:
+                merged[url] = match
+    return sorted(merged.values(), key=lambda x: x["score"], reverse=True)

src/services/jobs.py ADDED Viewed

	@@ -0,0 +1,286 @@

+"""
+src/services/jobs.py — Phase 3: Async upload job queue
+"""
+import asyncio
+import json
+import uuid
+from typing import Any, Optional
+import aiohttp
+from src.core.config import (
+    SUPABASE_URL, SUPABASE_SERVICE_KEY,
+    USE_ASYNC_UPLOADS,
+)
+from src.services.cache import cache
+QUEUE_KEY = "upload_jobs_queue"
+JOB_TTL = 86400  # 24 h
+# ──────────────────────────────────────────────────────────────
+# Supabase helpers
+# ──────────────────────────────────────────────────────────────
+def _supa_headers() -> dict:
+    return {
+        "apikey": SUPABASE_SERVICE_KEY,
+        "Authorization": f"Bearer {SUPABASE_SERVICE_KEY}",
+        "Content-Type": "application/json",
+        "Prefer": "return=minimal",  # FIX: was "return=representation" which requires 200 not 201
+    }
+def _clean_row(row: dict) -> dict:
+    """
+    Remove None values before sending to Supabase.
+    Supabase REST rejects Python None in JSON — omit the key entirely
+    and let Postgres use the column default instead.
+    """
+    return {k: v for k, v in row.items() if v is not None}
+async def _supa_insert(table: str, row: dict) -> bool:
+    """Returns True on success, False on failure. Logs errors explicitly."""
+    if not SUPABASE_URL or not SUPABASE_SERVICE_KEY:
+        print(f"[Supabase] SUPABASE_URL or SUPABASE_SERVICE_KEY not set — skipping insert")
+        return False
+    url = f"{SUPABASE_URL}/rest/v1/{table}"
+    clean = _clean_row(row)
+    try:
+        async with aiohttp.ClientSession() as s:
+            async with s.post(url, headers=_supa_headers(), json=clean) as r:
+                if r.status in (200, 201):
+                    return True
+                else:
+                    body = await r.text()
+                    print(f"[Supabase INSERT ERROR] table={table} status={r.status} body={body[:500]}")
+                    print(f"[Supabase INSERT ERROR] row_keys={list(clean.keys())}")
+                    return False
+    except Exception as e:
+        print(f"[Supabase INSERT EXCEPTION] table={table} error={e}")
+        return False
+async def _supa_patch(table: str, job_id: str, patch: dict) -> bool:
+    if not SUPABASE_URL or not SUPABASE_SERVICE_KEY:
+        return False
+    url = f"{SUPABASE_URL}/rest/v1/{table}?job_id=eq.{job_id}"
+    clean = _clean_row(patch)
+    try:
+        async with aiohttp.ClientSession() as s:
+            async with s.patch(url, headers=_supa_headers(), json=clean) as r:
+                if r.status not in (200, 201, 204):
+                    body = await r.text()
+                    print(f"[Supabase PATCH ERROR] job_id={job_id} status={r.status} body={body[:300]}")
+                    return False
+                return True
+    except Exception as e:
+        print(f"[Supabase PATCH EXCEPTION] job_id={job_id} error={e}")
+        return False
+async def _supa_get(table: str, job_id: str) -> Optional[dict]:
+    if not SUPABASE_URL or not SUPABASE_SERVICE_KEY:
+        return None
+    # Use select= to avoid ambiguity with column names
+    url = f"{SUPABASE_URL}/rest/v1/{table}?select=*&job_id=eq.{job_id}&limit=1"
+    headers = {**_supa_headers(), "Prefer": "return=representation"}
+    try:
+        async with aiohttp.ClientSession() as s:
+            async with s.get(url, headers=headers) as r:
+                if r.status == 200:
+                    data = await r.json()
+                    return data[0] if data else None
+    except Exception as e:
+        print(f"[Supabase GET EXCEPTION] job_id={job_id} error={e}")
+    return None
+# ──────────────────────────────────────────────────────────────
+# Public API
+# ──────────────────────────────────────────────────────────────
+async def create_job(
+    user_id: str,
+    folder: str,
+    total_files: int,
+    job_payload: dict,
+) -> str:
+    job_id = str(uuid.uuid4())
+    # FIX: Only send columns that definitely exist in the table.
+    # Do NOT send result/error (null) — let Postgres default them.
+    # Do NOT send created_at/updated_at if table has DEFAULT NOW().
+    row = {
+        "job_id": job_id,
+        "user_id": user_id or "anonymous",
+        "folder": folder,
+        "status": "pending",
+        "total_files": total_files,
+        "processed_files": 0,
+    }
+    success = await _supa_insert("upload_jobs", row)
+    if success:
+        print(f"[Jobs] Created job {job_id} in Supabase ✓")
+    else:
+        print(f"[Jobs] Supabase insert FAILED for job {job_id} — job will still work via Redis")
+    # Redis is the source of truth for polling — works even if Supabase fails
+    await cache.set_json(
+        f"job:{job_id}",
+        {**row, "payload": job_payload, "status": "pending"},
+        ttl=JOB_TTL,
+    )
+    await cache.lpush(QUEUE_KEY, job_id)
+    return job_id
+async def get_job_status(job_id: str) -> Optional[dict]:
+    # Check Redis first (fast path)
+    cached = await cache.get_json(f"job:{job_id}")
+    if cached:
+        cached.pop("payload", None)
+        return cached
+    # Fallback to Supabase
+    return await _supa_get("upload_jobs", job_id)
+async def update_job_progress(job_id: str, processed: int, total: int) -> None:
+    patch = {
+        "status": "processing",
+        "processed_files": processed,
+    }
+    await _supa_patch("upload_jobs", job_id, patch)
+    cached = await cache.get_json(f"job:{job_id}") or {}
+    cached.update(patch)
+    await cache.set_json(f"job:{job_id}", cached, ttl=JOB_TTL)
+async def complete_job(job_id: str, result: dict) -> None:
+    patch = {
+        "status": "completed",
+        "processed_files": result.get("files", 0),
+        "result": json.dumps(result),  # JSONB column — serialize explicitly
+    }
+    await _supa_patch("upload_jobs", job_id, patch)
+    cached = await cache.get_json(f"job:{job_id}") or {}
+    cached.update({**patch, "result": result})  # keep as dict in Redis
+    cached.pop("payload", None)
+    await cache.set_json(f"job:{job_id}", cached, ttl=JOB_TTL)
+async def fail_job(job_id: str, error: str) -> None:
+    patch = {
+        "status": "failed",
+        "error": str(error)[:500],  # truncate to avoid DB limit issues
+    }
+    await _supa_patch("upload_jobs", job_id, patch)
+    cached = await cache.get_json(f"job:{job_id}") or {}
+    cached.update(patch)
+    cached.pop("payload", None)
+    await cache.set_json(f"job:{job_id}", cached, ttl=JOB_TTL)
+# ──────────────────────────────────────────────────────────────
+# Background worker
+# ──────────────────────────────────────────────────────────────
+async def run_worker(app_state) -> None:
+    print("[JobWorker] started")
+    while True:
+        try:
+            job_id = await cache.rpop(QUEUE_KEY)
+            if not job_id:
+                await asyncio.sleep(2)
+                continue
+            print(f"[JobWorker] picked up job {job_id}")
+            cached = await cache.get_json(f"job:{job_id}")
+            if not cached:
+                print(f"[JobWorker] job {job_id} not found in Redis — skipping")
+                continue
+            payload = cached.get("payload", {})
+            await _execute_upload_job(job_id, payload, app_state)
+        except asyncio.CancelledError:
+            print("[JobWorker] cancelled — shutting down")
+            break
+        except Exception as e:
+            print(f"[JobWorker] unhandled error: {e}")
+            await asyncio.sleep(5)
+async def _execute_upload_job(job_id: str, payload: dict, app_state) -> None:
+    from src.services.db_client import pinecone_pool, ensure_indexes
+    from src.api.upload import _process_one_file, _batch_upsert_all
+    files_data: list[dict] = payload.get("files_data", [])
+    folder: str = payload.get("folder", "uncategorized")
+    detect_faces: bool = payload.get("detect_faces", True)
+    user_id: str = payload.get("user_id", "anonymous")
+    keys: dict = payload.get("keys", {})
+    total = len(files_data)
+    print(f"[JobWorker] executing job {job_id}: {total} files in '{folder}'")
+    try:
+        pc = pinecone_pool.get(keys["pinecone_key"])
+        created = await asyncio.to_thread(ensure_indexes, pc)
+        if created:
+            await asyncio.sleep(8)
+        CHUNK = 10
+        all_results = []
+        processed = 0
+        for chunk_start in range(0, total, CHUNK):
+            chunk = files_data[chunk_start:chunk_start + CHUNK]
+            chunk_results = await asyncio.gather(*[
+                _process_one_file(
+                    file_bytes=bytes(f["bytes"]),
+                    folder=folder,
+                    detect_faces=detect_faces,
+                    keys=keys,
+                    ai=app_state.ai,
+                    sem=app_state.ai_semaphore,
+                )
+                for f in chunk
+            ])
+            all_results.extend(chunk_results)
+            processed += len(chunk)
+            await update_job_progress(job_id, processed, total)
+            print(f"[JobWorker] job {job_id}: {processed}/{total} processed")
+        summary = await _batch_upsert_all(results=all_results, folder=folder, pc=pc)
+        await complete_job(job_id, {
+            "files": len(summary["uploaded_urls"]),
+            "urls": summary["uploaded_urls"],
+            "summary": {
+                "arcface_vecs": summary["arcface_vecs"],
+                "adaface_vecs": summary["adaface_vecs"],
+                "object_vecs": summary["object_vecs"],
+            },
+        })
+        print(f"[JobWorker] job {job_id} COMPLETED ✓")
+    except Exception as e:
+        print(f"[JobWorker] job {job_id} FAILED: {e}")
+        import traceback
+        traceback.print_exc()
+        await fail_job(job_id, str(e))
+# ──────────────────────────────────────────────────────────────
+# Utility
+# ──────────────────────────────────────────────────────────────
+def _iso_now() -> str:
+    from datetime import datetime, timezone
+    return datetime.now(timezone.utc).isoformat()

src/services/onnx_models.py ADDED Viewed

	@@ -0,0 +1,100 @@

+"""
+ONNX runtime wrappers. Drop-in replacement for the PyTorch SigLIP + DINOv2
+models inside AIModelManager._embed_crops_batch.
+Import pattern in ai_manager.py:
+    from src.services.onnx_models import ONNXVisionStack
+    if USE_ONNX_VISION:
+        self.vision_stack = ONNXVisionStack(ONNX_MODELS_DIR, ONNX_USE_INT8)
+        # use self.vision_stack.encode(crops) instead of torch models
+"""
+import os
+import numpy as np
+from PIL import Image
+import onnxruntime as ort
+# SigLIP normalization (ImageNet-style mean/std for siglip-base-patch16-224)
+_SIGLIP_MEAN = np.array([0.5, 0.5, 0.5], dtype=np.float32)
+_SIGLIP_STD = np.array([0.5, 0.5, 0.5], dtype=np.float32)
+# DINOv2 uses ImageNet stats
+_DINO_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32)
+_DINO_STD = np.array([0.229, 0.224, 0.225], dtype=np.float32)
+def _preprocess_batch(
+    pil_images: list[Image.Image], size: int, mean: np.ndarray, std: np.ndarray
+) -> np.ndarray:
+    """Resize + normalize a batch of PIL images to (B, 3, size, size) fp32."""
+    arrs = []
+    for im in pil_images:
+        if im.mode != "RGB":
+            im = im.convert("RGB")
+        im = im.resize((size, size), Image.BILINEAR)
+        a = np.asarray(im, dtype=np.float32) / 255.0
+        a = (a - mean) / std
+        a = a.transpose(2, 0, 1)  # HWC -> CHW
+        arrs.append(a)
+    return np.stack(arrs, axis=0)
+def _l2_normalize(x: np.ndarray, axis: int = 1) -> np.ndarray:
+    n = np.linalg.norm(x, axis=axis, keepdims=True)
+    n = np.where(n == 0, 1.0, n)
+    return x / n
+class ONNXVisionStack:
+    """SigLIP + DINOv2 fused embeddings via ONNX Runtime (CPU)."""
+    def __init__(self, models_dir: str, use_int8: bool = True):
+        siglip_name = "siglip_vision_int8.onnx" if use_int8 else "siglip_vision.onnx"
+        dino_name = "dinov2_int8.onnx" if use_int8 else "dinov2.onnx"
+        siglip_path = os.path.join(models_dir, siglip_name)
+        dino_path = os.path.join(models_dir, dino_name)
+        if not os.path.exists(siglip_path):
+            raise FileNotFoundError(
+                f"ONNX model not found: {siglip_path}. "
+                "Run scripts/convert_to_onnx.py and upload outputs to the Space."
+            )
+        if not os.path.exists(dino_path):
+            raise FileNotFoundError(f"ONNX model not found: {dino_path}")
+        sess_opts = ort.SessionOptions()
+        sess_opts.intra_op_num_threads = int(os.getenv("OMP_NUM_THREADS", "2"))
+        sess_opts.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+        self.siglip = ort.InferenceSession(
+            siglip_path, sess_options=sess_opts, providers=["CPUExecutionProvider"]
+        )
+        self.dino = ort.InferenceSession(
+            dino_path, sess_options=sess_opts, providers=["CPUExecutionProvider"]
+        )
+        # Warmup — first call is always slow due to kernel compilation
+        dummy = np.zeros((1, 3, 224, 224), dtype=np.float32)
+        self.siglip.run(None, {"pixel_values": dummy})
+        self.dino.run(None, {"pixel_values": dummy})
+    def encode(self, pil_crops: list[Image.Image]) -> list[np.ndarray]:
+        """Returns list of 1536-d L2-normalized fused vectors (same shape as old code)."""
+        if not pil_crops:
+            return []
+        sig_batch = _preprocess_batch(pil_crops, 224, _SIGLIP_MEAN, _SIGLIP_STD)
+        dino_batch = _preprocess_batch(pil_crops, 224, _DINO_MEAN, _DINO_STD)
+        sig_out = self.siglip.run(None, {"pixel_values": sig_batch})[0]  # (B, 768)
+        dino_out = self.dino.run(None, {"pixel_values": dino_batch})[0]  # (B, 768)
+        sig_n = _l2_normalize(sig_out)
+        dino_n = _l2_normalize(dino_out)
+        fused = np.concatenate([sig_n, dino_n], axis=1)  # (B, 1536)
+        fused = _l2_normalize(fused)
+        return [fused[i] for i in range(fused.shape[0])]