AdarshDRC commited on
Commit
29bfc1f
·
0 Parent(s):

fix: Resolving backend

Browse files
.gitattributes ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *.onnx filter=lfs diff=lfs merge=lfs -text
2
+ onnx_models/*.onnx filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ /myenv
.gitlab-ci.yml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ stages:
2
+ - ping
3
+
4
+ keep_hf_space_warm:
5
+ stage: ping
6
+ script:
7
+ - echo "Ping health endpoint"
8
+ - curl -sf --max-time 90 https://adarshdrc-visual-search-api.hf.space/api/health || echo "Ping failed (may be cold-starting)"
9
+
10
+ - echo "Waiting for warmup..."
11
+ - sleep 60
12
+
13
+ - echo "Second warmup ping"
14
+ - curl -sf --max-time 30 https://adarshdrc-visual-search-api.hf.space/api/health
Dockerfile ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Dockerfile — Enterprise Lens V3
2
+ # InsightFace models download on first run (not at build time)
3
+ # This avoids build timeout and network issues during Docker build
4
+
5
+ FROM python:3.10-slim
6
+
7
+ WORKDIR /app
8
+
9
+ # ── System deps ──────────────────────────────────────────────────
10
+ RUN apt-get update && apt-get install -y --no-install-recommends \
11
+ libgl1 libglib2.0-0 libgomp1 git \
12
+ build-essential cmake g++ \
13
+ wget ca-certificates \
14
+ && rm -rf /var/lib/apt/lists/*
15
+
16
+ # ── Step 1: Build tools (MUST be before insightface) ─────────────
17
+ RUN pip install --no-cache-dir \
18
+ "numpy<2.0" \
19
+ "setuptools>=65" \
20
+ wheel \
21
+ cython \
22
+ scikit-build \
23
+ cmake
24
+
25
+ # ── Step 2: onnxruntime (MUST be before insightface) ─────────────
26
+ RUN pip install --no-cache-dir onnxruntime
27
+
28
+ # ── Step 3: insightface ───────────────────────────────────────────
29
+ RUN pip install --no-cache-dir --prefer-binary "insightface>=0.7.3"
30
+
31
+ # ── Step 4: Remaining requirements ───────────────────────────────
32
+ COPY requirements.txt .
33
+ RUN pip install --no-cache-dir --prefer-binary -r requirements.txt
34
+
35
+ # ── Copy app code ─────────────────────────────────────────────────
36
+ COPY . .
37
+ RUN mkdir -p temp_uploads saved_images && chmod -R 777 temp_uploads saved_images
38
+
39
+ # ── Hugging Face Auth Token ──────────────────────────────────────
40
+ # Define the argument so Docker accepts it during build
41
+ ARG HF_TOKEN
42
+ # Set it as an environment variable so Python/HuggingFace can see it
43
+ ENV HF_TOKEN=$HF_TOKEN
44
+
45
+ # ── Pre-download ONLY transformers + YOLO at build time ──────────
46
+ # InsightFace models download on first startup (cached after that)
47
+
48
+ RUN python - <<'EOF'
49
+ import os
50
+ os.environ["TRANSFORMERS_VERBOSITY"] = "error"
51
+
52
+
53
+ print("Pre-downloading SigLIP...")
54
+ from transformers import AutoProcessor, AutoModel
55
+ AutoProcessor.from_pretrained("google/siglip-base-patch16-224", use_fast=True)
56
+ AutoModel.from_pretrained("google/siglip-base-patch16-224")
57
+ print("SigLIP done")
58
+
59
+
60
+ print("Pre-downloading DINOv2...")
61
+ from transformers import AutoImageProcessor
62
+ AutoImageProcessor.from_pretrained("facebook/dinov2-base")
63
+ AutoModel.from_pretrained("facebook/dinov2-base")
64
+ print("DINOv2 done")
65
+
66
+ print("Pre-downloading YOLO seg...")
67
+
68
+ from ultralytics import YOLO
69
+ YOLO("yolo11n-seg.pt")
70
+ print("YOLO done")
71
+
72
+ print("Build complete! InsightFace models download on first startup.")
73
+
74
+ EOF
75
+
76
+ EXPOSE 7860
77
+ ENV WEB_CONCURRENCY=1
78
+
79
+
80
+ CMD uvicorn main:app \
81
+ --host 0.0.0.0 \
82
+ --port 7860 \
83
+
84
+ # Add these environment variables to your existing Dockerfile.
85
+ # They significantly improve CPU inference throughput on HF free tier.
86
+
87
+ ENV OMP_NUM_THREADS=2
88
+ ENV MKL_NUM_THREADS=2
89
+ ENV OPENBLAS_NUM_THREADS=2
90
+ ENV NUMEXPR_NUM_THREADS=2
91
+ ENV TOKENIZERS_PARALLELISM=false
92
+
93
+ # Tell ONNX Runtime to use CPU optimizations aggressively
94
+ ENV ORT_DISABLE_ALL_OPTIMIZATIONS=0
95
+
96
+ # COPY the pre-converted ONNX models into the image.
97
+ # Run scripts/convert_to_onnx.py locally first, then commit onnx_models/
98
+ # to your Space repo.
99
+ COPY onnx_models/ /app/onnx_models/
100
+ ENV ONNX_MODELS_DIR=/app/onnx_models
101
+
102
+
103
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Visual Search Api
3
+ emoji: ⚡
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: docker
7
+ pinned: false
8
+ license: mit
9
+ ---
main.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import os
3
+ from contextlib import asynccontextmanager
4
+
5
+ # CPU thread tuning — must happen BEFORE torch/onnxruntime import.
6
+ # HF free tier = 2 vCPU; we want to use both but not oversubscribe.
7
+ os.environ.setdefault("OMP_NUM_THREADS", "2")
8
+ os.environ.setdefault("MKL_NUM_THREADS", "2")
9
+ os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
10
+
11
+ from fastapi import FastAPI
12
+ from fastapi.middleware.cors import CORSMiddleware
13
+
14
+ from src.core.config import MAX_CONCURRENT_INFERENCES, USE_ASYNC_UPLOADS
15
+ from src.core.logging import log, init_logging_session, close_logging_session
16
+ from src.api import danger, explorer, search, system, upload
17
+ from src.api import people# Phase 3
18
+ from src.api import jobs as jobs_api # explicit alias
19
+
20
+
21
+ @asynccontextmanager
22
+ async def lifespan(app: FastAPI):
23
+ await init_logging_session()
24
+ log("INFO", "server.startup", message="Loading AI models...")
25
+
26
+ from src.services.ai_manager import AIModelManager
27
+
28
+ loop = asyncio.get_event_loop()
29
+ app.state.ai = await loop.run_in_executor(None, AIModelManager)
30
+
31
+ # Split semaphores: face detection and object embedding can overlap
32
+ # without fighting for the same CPU cores.
33
+ app.state.ai_semaphore = asyncio.Semaphore(MAX_CONCURRENT_INFERENCES)
34
+ app.state.face_semaphore = asyncio.Semaphore(MAX_CONCURRENT_INFERENCES)
35
+ app.state.object_semaphore = asyncio.Semaphore(MAX_CONCURRENT_INFERENCES)
36
+
37
+ # Phase 3: start background job worker if async uploads are enabled
38
+ worker_task = None
39
+ if USE_ASYNC_UPLOADS:
40
+ from src.services.jobs import run_worker
41
+ worker_task = asyncio.create_task(run_worker(app.state))
42
+ log("INFO", "server.worker_started", message="Async upload worker running")
43
+
44
+ log("INFO", "server.ready", message="All models loaded. API ready.")
45
+ yield
46
+
47
+ # Graceful shutdown
48
+ if worker_task:
49
+ worker_task.cancel()
50
+ try:
51
+ await worker_task
52
+ except asyncio.CancelledError:
53
+ pass
54
+
55
+ log("INFO", "server.shutdown", message="API shutting down.")
56
+ await close_logging_session()
57
+
58
+
59
+ app = FastAPI(lifespan=lifespan)
60
+
61
+ app.add_middleware(
62
+ CORSMiddleware,
63
+ allow_origins=["https://photofinderv2.vercel.app"],
64
+ allow_credentials=True,
65
+ allow_methods=["*"],
66
+ allow_headers=["*"],
67
+ )
68
+
69
+ os.makedirs("temp_uploads", exist_ok=True)
70
+
71
+ # Existing routers
72
+ app.include_router(system.router)
73
+ app.include_router(upload.router)
74
+ app.include_router(search.router)
75
+ app.include_router(explorer.router)
76
+ app.include_router(danger.router)
77
+
78
+ # Phase 3 routers
79
+ app.include_router(people.router)
80
+ app.include_router(jobs_api.router)
onnx_models/dinov2_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2293e7b07a81c14018f5904c5bdd1936928d477cdeb716ce5864ef5e8d50e7d9
3
+ size 90566150
onnx_models/siglip_vision_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9fee3232e40845a34de9c6340584c360d1d572fc4942a194d366bac7c86f690
3
+ size 99109472
requirements.txt ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # requirements.txt — Enterprise Lens V5 (Phase 3)
2
+
3
+ --extra-index-url https://download.pytorch.org/whl/cpu
4
+
5
+ # ── Web framework ────────────────────────────────────────────────
6
+ fastapi==0.115.6
7
+ uvicorn[standard]==0.32.1
8
+ python-multipart==0.0.20
9
+
10
+ # ── PyTorch CPU-only ─────────────────────────────────────────────
11
+ torch==2.3.1+cpu
12
+ torchvision==0.18.1+cpu
13
+ scikit-learn
14
+
15
+ # ── HuggingFace stack ─────────────────────────────────────────────
16
+ # Pin transformers to 4.x — AdaFace CVLFaceRecognitionModel (trust_remote_code)
17
+ # uses _tied_weights_keys (old API). transformers 5.0+ renamed it to
18
+ # all_tied_weights_keys → crash on load.
19
+ transformers>=4.40.0,<5.0.0
20
+ huggingface_hub>=0.26.0
21
+ safetensors>=0.4.0
22
+ tokenizers>=0.20.0
23
+ sentencepiece
24
+ accelerate>=1.1.0
25
+ omegaconf
26
+
27
+ # ── InsightFace — SCRFD + ArcFace-R100 ───────────────────────────
28
+ insightface==0.7.3
29
+ onnxruntime>=1.20.0
30
+ huggingface-hub>=0.22.0
31
+
32
+ # ── YOLO — object segmentation ───────────────────────────────────
33
+ ultralytics==8.3.27
34
+
35
+ # ── Computer vision ───────────────────────────────────────────────
36
+ opencv-python-headless==4.10.0.84
37
+ Pillow==11.0.0
38
+ # Phase 3: pin numpy <2.0 — hdbscan 0.8.33 requires numpy <2.0
39
+ numpy>=1.26.4,<2.0
40
+
41
+ # ── Vector DB + CDN ──────────────────────────────────────────────
42
+ pinecone==5.4.1
43
+ cloudinary==1.41.0
44
+
45
+ # ── Async HTTP ───────────────────────────────────────────────────
46
+ aiohttp==3.11.9
47
+
48
+ # ── Phase 3: Clustering + job queue ──────────────────────────────
49
+ # hdbscan: HDBSCAN clustering for People View (face identity albums)
50
+ hdbscan>=0.8.33
51
+ # redis: Upstash Redis REST client uses aiohttp (already present).
52
+ # No redis-py socket library needed — Upstash exposes a pure HTTP API.
53
+ # redis package only needed if you switch to raw TCP Upstash endpoint.
54
+ # redis>=5.0.0 ← uncomment only if switching to non-REST Upstash
55
+
56
+ # ── Utilities ────────────────────────────────────────────────────
57
+ loguru==0.7.2
58
+ inflect==7.4.0
59
+ python-dotenv==1.0.1
60
+
61
+ fvcore
scripts/calibrate_threshold.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Threshold calibration tool.
3
+
4
+ Use this to find the FACE_MATCH_THRESHOLD that gives you the best
5
+ precision/recall tradeoff for YOUR specific data. Default 0.28 is an
6
+ industry-average — your data may differ.
7
+
8
+ Usage:
9
+ 1. Build two test sets:
10
+ - POSITIVE_PAIRS: pairs of (query_image, gallery_image) of the SAME person
11
+ - NEGATIVE_PAIRS: pairs of DIFFERENT people (hard negatives help most)
12
+
13
+ 2. Populate TEST_PAIRS below with local image paths
14
+
15
+ 3. Run: python scripts/calibrate_threshold.py
16
+
17
+ Output: table of thresholds with TP/FP/FN/precision/recall/F1.
18
+ """
19
+ import sys
20
+ import os
21
+ from pathlib import Path
22
+
23
+ # Add project root to path so `src.*` imports work when running from scripts/
24
+ sys.path.insert(0, str(Path(__file__).parent.parent))
25
+
26
+ import numpy as np
27
+ from PIL import Image
28
+
29
+
30
+ # ── EDIT THESE ──────────────────────────────────────────────
31
+ # Each tuple: (path_to_query_image, path_to_gallery_image, is_same_person)
32
+ TEST_PAIRS = [
33
+ # Example positives (same person, different photos)
34
+ # ("test_data/alice_1.jpg", "test_data/alice_2.jpg", True),
35
+ # ("test_data/alice_1.jpg", "test_data/alice_3.jpg", True),
36
+ # ("test_data/bob_1.jpg", "test_data/bob_2.jpg", True),
37
+
38
+ # Example hard negatives (different people, similar looking)
39
+ # ("test_data/alice_1.jpg", "test_data/carol_1.jpg", False),
40
+ # ("test_data/bob_1.jpg", "test_data/dave_1.jpg", False),
41
+ ]
42
+ # ────────────────────────────────────────────────────────────
43
+
44
+
45
+ def cosine(a: np.ndarray, b: np.ndarray) -> float:
46
+ return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b) + 1e-9))
47
+
48
+
49
+ def compute_pair_scores():
50
+ """Returns list of (fused_score, arcface_score, adaface_score, is_positive)."""
51
+ from src.services.ai_manager import AIModelManager
52
+ print("Loading models...")
53
+ ai = AIModelManager()
54
+
55
+ results = []
56
+ for query_path, gallery_path, is_positive in TEST_PAIRS:
57
+ if not (os.path.exists(query_path) and os.path.exists(gallery_path)):
58
+ print(f" Skipping missing: {query_path} or {gallery_path}")
59
+ continue
60
+
61
+ with open(query_path, "rb") as f:
62
+ q_vectors = ai.process_image_bytes(f.read(), detect_faces=True)
63
+ with open(gallery_path, "rb") as f:
64
+ g_vectors = ai.process_image_bytes(f.read(), detect_faces=True)
65
+
66
+ q_faces = [v for v in q_vectors if v["type"] == "face"]
67
+ g_faces = [v for v in g_vectors if v["type"] == "face"]
68
+
69
+ if not q_faces or not g_faces:
70
+ print(f" No face in: {query_path} or {gallery_path}")
71
+ continue
72
+
73
+ # Take largest face from each
74
+ qf = max(q_faces, key=lambda f: f.get("face_width_px", 0))
75
+ gf = max(g_faces, key=lambda f: f.get("face_width_px", 0))
76
+
77
+ arc_score = cosine(qf["arcface_vector"], gf["arcface_vector"])
78
+ if qf.get("has_adaface") and gf.get("has_adaface"):
79
+ ada_score = cosine(qf["adaface_vector"], gf["adaface_vector"])
80
+ else:
81
+ ada_score = 0.15
82
+
83
+ fused = 0.6 * arc_score + 0.4 * ada_score
84
+
85
+ results.append({
86
+ "query": query_path,
87
+ "gallery": gallery_path,
88
+ "is_positive": is_positive,
89
+ "arcface": arc_score,
90
+ "adaface": ada_score,
91
+ "fused": fused,
92
+ })
93
+
94
+ tag = "SAME" if is_positive else "DIFF"
95
+ print(f" [{tag}] arc={arc_score:.3f} ada={ada_score:.3f} fused={fused:.3f}")
96
+
97
+ return results
98
+
99
+
100
+ def evaluate_thresholds(results):
101
+ """Sweep thresholds and compute P/R/F1 for each."""
102
+ if not results:
103
+ print("\nNo results to evaluate. Add pairs to TEST_PAIRS above.")
104
+ return
105
+
106
+ print("\n" + "=" * 78)
107
+ print(f"{'arcface_thr':<14}{'fused_thr':<14}{'TP':>6}{'FP':>6}{'FN':>6}"
108
+ f"{'Precision':>12}{'Recall':>10}{'F1':>8}")
109
+ print("=" * 78)
110
+
111
+ n_positive = sum(1 for r in results if r["is_positive"])
112
+
113
+ best = {"f1": 0, "arc_thr": 0, "fused_thr": 0}
114
+
115
+ for arc_thr in [0.20, 0.24, 0.28, 0.32, 0.36, 0.40, 0.45]:
116
+ for fused_thr in [0.22, 0.26, 0.30, 0.34, 0.38]:
117
+ tp = fp = fn = 0
118
+ for r in results:
119
+ # A match passes both thresholds
120
+ predicted_match = (r["arcface"] >= arc_thr and r["fused"] >= fused_thr)
121
+ if r["is_positive"]:
122
+ if predicted_match:
123
+ tp += 1
124
+ else:
125
+ fn += 1
126
+ else:
127
+ if predicted_match:
128
+ fp += 1
129
+ precision = tp / (tp + fp) if (tp + fp) else 0
130
+ recall = tp / (tp + fn) if (tp + fn) else 0
131
+ f1 = 2 * precision * recall / (precision + recall) if (precision + recall) else 0
132
+
133
+ if f1 > best["f1"]:
134
+ best = {"f1": f1, "arc_thr": arc_thr, "fused_thr": fused_thr,
135
+ "tp": tp, "fp": fp, "fn": fn,
136
+ "precision": precision, "recall": recall}
137
+
138
+ print(f"{arc_thr:<14.2f}{fused_thr:<14.2f}{tp:>6}{fp:>6}{fn:>6}"
139
+ f"{precision:>12.3f}{recall:>10.3f}{f1:>8.3f}")
140
+
141
+ print("=" * 78)
142
+ print(f"\nBest F1: {best['f1']:.3f}")
143
+ print(f" FACE_MATCH_THRESHOLD = {best['arc_thr']}")
144
+ print(f" FUSED_MATCH_THRESHOLD = {best['fused_thr']}")
145
+ print(f" Precision = {best['precision']:.3f}, Recall = {best['recall']:.3f}")
146
+ print("\nUpdate these in your HF Space env vars.")
147
+
148
+
149
+ if __name__ == "__main__":
150
+ if not TEST_PAIRS:
151
+ print("Edit scripts/calibrate_threshold.py and populate TEST_PAIRS with")
152
+ print("10-30 positive pairs and 10-30 hard-negative pairs, then re-run.")
153
+ print("\nTip: export ~50 face photos from your own gallery, hand-label")
154
+ print("the same-person pairs, and use those for calibration.")
155
+ sys.exit(1)
156
+
157
+ results = compute_pair_scores()
158
+ evaluate_thresholds(results)
scripts/convert_to_onnx.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ One-time ONNX conversion + dynamic INT8 quantization.
3
+
4
+ Run locally:
5
+ python scripts/convert_to_onnx.py
6
+
7
+ Produces:
8
+ onnx_models/siglip_vision_int8.onnx
9
+ onnx_models/dinov2_int8.onnx
10
+
11
+ Fix: attn_implementation="eager" disables scaled_dot_product_attention,
12
+ which the legacy PyTorch ONNX exporter cannot trace (TypeError on Sqrt/scale).
13
+ """
14
+ import os
15
+ import torch
16
+ import torch.nn as nn
17
+ from pathlib import Path
18
+ from onnxruntime.quantization import quantize_dynamic, QuantType
19
+
20
+ OUT_DIR = Path("onnx_models")
21
+ OUT_DIR.mkdir(exist_ok=True)
22
+
23
+
24
+ def export_siglip():
25
+ print("Exporting SigLIP vision encoder...")
26
+ from transformers import SiglipVisionModel
27
+
28
+ model = SiglipVisionModel.from_pretrained(
29
+ "google/siglip-base-patch16-224",
30
+ attn_implementation="eager", # disables SDPA — required for ONNX export
31
+ ).eval()
32
+
33
+ class SigLIPWrapper(nn.Module):
34
+ def __init__(self, m):
35
+ super().__init__()
36
+ self.m = m
37
+
38
+ def forward(self, pixel_values):
39
+ return self.m(pixel_values=pixel_values).pooler_output
40
+
41
+ wrapper = SigLIPWrapper(model).eval()
42
+ dummy = torch.randn(1, 3, 224, 224)
43
+
44
+ with torch.no_grad():
45
+ test = wrapper(dummy)
46
+ print(f" Forward pass OK — output shape: {test.shape}")
47
+
48
+ fp32_path = OUT_DIR / "siglip_vision.onnx"
49
+ with torch.no_grad():
50
+ torch.onnx.export(
51
+ wrapper, dummy, fp32_path,
52
+ input_names=["pixel_values"],
53
+ output_names=["image_embeds"],
54
+ dynamic_axes={"pixel_values": {0: "batch"}, "image_embeds": {0: "batch"}},
55
+ opset_version=14,
56
+ do_constant_folding=True,
57
+ )
58
+ print(f" fp32 saved ({fp32_path.stat().st_size // 1024 // 1024} MB)")
59
+
60
+ int8_path = OUT_DIR / "siglip_vision_int8.onnx"
61
+ quantize_dynamic(str(fp32_path), str(int8_path), weight_type=QuantType.QInt8)
62
+ print(f" INT8 saved ({int8_path.stat().st_size // 1024 // 1024} MB)")
63
+ os.remove(fp32_path)
64
+
65
+
66
+ def export_dinov2():
67
+ print("\nExporting DINOv2...")
68
+ from transformers import AutoModel
69
+
70
+ model = AutoModel.from_pretrained(
71
+ "facebook/dinov2-base",
72
+ attn_implementation="eager", # same fix
73
+ ).eval()
74
+
75
+ class DINOv2Wrapper(nn.Module):
76
+ def __init__(self, m):
77
+ super().__init__()
78
+ self.m = m
79
+
80
+ def forward(self, pixel_values):
81
+ return self.m(pixel_values=pixel_values).last_hidden_state[:, 0, :]
82
+
83
+ wrapper = DINOv2Wrapper(model).eval()
84
+ dummy = torch.randn(1, 3, 224, 224)
85
+
86
+ with torch.no_grad():
87
+ test = wrapper(dummy)
88
+ print(f" Forward pass OK — output shape: {test.shape}")
89
+
90
+ fp32_path = OUT_DIR / "dinov2.onnx"
91
+ with torch.no_grad():
92
+ torch.onnx.export(
93
+ wrapper, dummy, fp32_path,
94
+ input_names=["pixel_values"],
95
+ output_names=["cls_features"],
96
+ dynamic_axes={"pixel_values": {0: "batch"}, "cls_features": {0: "batch"}},
97
+ opset_version=14,
98
+ do_constant_folding=True,
99
+ )
100
+ print(f" fp32 saved ({fp32_path.stat().st_size // 1024 // 1024} MB)")
101
+
102
+ int8_path = OUT_DIR / "dinov2_int8.onnx"
103
+ quantize_dynamic(str(fp32_path), str(int8_path), weight_type=QuantType.QInt8)
104
+ print(f" INT8 saved ({int8_path.stat().st_size // 1024 // 1024} MB)")
105
+ os.remove(fp32_path)
106
+
107
+
108
+ if __name__ == "__main__":
109
+ print(f"PyTorch {torch.__version__}")
110
+ export_siglip()
111
+ export_dinov2()
112
+ print("\nDone. Commit onnx_models/*.onnx to your Space repo.")
113
+ for f in sorted(OUT_DIR.glob("*.onnx")):
114
+ print(f" {f.name} ({f.stat().st_size // 1024 // 1024} MB)")
scripts/verify_and_cleanup_old_index.py ADDED
File without changes
src/api/danger.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import time
3
+
4
+ from fastapi import APIRouter, Form, HTTPException, Request, Depends
5
+
6
+ from src.core.config import (
7
+ DEFAULT_CLOUDINARY_URL, DEFAULT_PINECONE_KEY,
8
+ IDX_FACES, IDX_OBJECTS,
9
+ IDX_FACES_ARCFACE, IDX_FACES_ADAFACE,
10
+ USE_SPLIT_FACE_INDEXES,
11
+ )
12
+ from src.core.security import get_verified_keys
13
+ from src.services.db_client import (
14
+ cld_delete_all_paginated, cld_remove_folder, cld_root_folders,
15
+ delete_and_recreate_indexes, pinecone_pool,
16
+ )
17
+ from src.core.logging import log, warn
18
+ from src.common.utils import get_ip, is_default_key
19
+
20
+ router = APIRouter()
21
+
22
+
23
+ def _all_index_names() -> list[str]:
24
+ """All possible index names across both modes — used for exhaustive cleanup."""
25
+ if USE_SPLIT_FACE_INDEXES:
26
+ return [IDX_FACES, IDX_OBJECTS, IDX_FACES_ARCFACE, IDX_FACES_ADAFACE]
27
+ return [IDX_FACES, IDX_OBJECTS]
28
+
29
+
30
+ @router.post("/api/reset-database")
31
+ async def reset_database(
32
+ request: Request,
33
+ user_id: str = Form(""),
34
+ keys: dict = Depends(get_verified_keys),
35
+ ):
36
+ ip = get_ip(request)
37
+ start = time.perf_counter()
38
+ log("WARNING", "danger.reset_database.attempt",
39
+ user_id=user_id or "anonymous", ip=ip)
40
+
41
+ if is_default_key(keys["pinecone_key"], DEFAULT_PINECONE_KEY) or \
42
+ is_default_key(keys["cloudinary_url"], DEFAULT_CLOUDINARY_URL):
43
+ log("WARNING", "danger.reset_database.blocked",
44
+ user_id=user_id or "anonymous", ip=ip)
45
+ raise HTTPException(403, "Reset is not allowed on the shared demo database.")
46
+
47
+ try:
48
+ deleted = await asyncio.to_thread(
49
+ cld_delete_all_paginated, keys["cloudinary_creds"]
50
+ )
51
+ log("INFO", "danger.reset_database.cloudinary_wiped", deleted=deleted)
52
+ except Exception as e:
53
+ warn(f"Cloudinary wipe error: {e}")
54
+
55
+ try:
56
+ folders_res = await asyncio.to_thread(
57
+ cld_root_folders, keys["cloudinary_creds"]
58
+ )
59
+ folder_tasks = [
60
+ asyncio.to_thread(
61
+ cld_remove_folder, f["name"], keys["cloudinary_creds"]
62
+ )
63
+ for f in folders_res.get("folders", [])
64
+ ]
65
+ if folder_tasks:
66
+ await asyncio.gather(*folder_tasks, return_exceptions=True)
67
+ except Exception as e:
68
+ warn(f"Cloudinary folder cleanup error: {e}")
69
+
70
+ try:
71
+ pc = pinecone_pool.get(keys["pinecone_key"])
72
+ await asyncio.to_thread(delete_and_recreate_indexes, pc)
73
+ except Exception as e:
74
+ log("ERROR", "danger.reset_database.pinecone_error",
75
+ user_id=user_id or "anonymous", ip=ip, error=str(e))
76
+ raise HTTPException(500, f"Pinecone reset error: {e}")
77
+
78
+ log("WARNING", "danger.reset_database.complete",
79
+ user_id=user_id or "anonymous", ip=ip,
80
+ duration_ms=round((time.perf_counter() - start) * 1000))
81
+ return {"message": "Database reset complete. All data wiped and indexes recreated."}
82
+
83
+
84
+ @router.post("/api/delete-account")
85
+ async def delete_account(
86
+ request: Request,
87
+ user_id: str = Form(""),
88
+ keys: dict = Depends(get_verified_keys),
89
+ ):
90
+ ip = get_ip(request)
91
+ start = time.perf_counter()
92
+ log("WARNING", "danger.delete_account.attempt",
93
+ user_id=user_id or "anonymous", ip=ip)
94
+
95
+ if is_default_key(keys["pinecone_key"], DEFAULT_PINECONE_KEY) or \
96
+ is_default_key(keys["cloudinary_url"], DEFAULT_CLOUDINARY_URL):
97
+ log("WARNING", "danger.delete_account.blocked",
98
+ user_id=user_id or "anonymous", ip=ip)
99
+ raise HTTPException(403, "Account deletion is not allowed on the shared demo database.")
100
+
101
+ try:
102
+ deleted = await asyncio.to_thread(
103
+ cld_delete_all_paginated, keys["cloudinary_creds"]
104
+ )
105
+ log("INFO", "danger.delete_account.cloudinary_wiped", deleted=deleted)
106
+ except Exception as e:
107
+ warn(f"Account delete Cloudinary error: {e}")
108
+
109
+ try:
110
+ folders_res = await asyncio.to_thread(
111
+ cld_root_folders, keys["cloudinary_creds"]
112
+ )
113
+ folder_tasks = [
114
+ asyncio.to_thread(
115
+ cld_remove_folder, f["name"], keys["cloudinary_creds"]
116
+ )
117
+ for f in folders_res.get("folders", [])
118
+ ]
119
+ if folder_tasks:
120
+ await asyncio.gather(*folder_tasks, return_exceptions=True)
121
+ except Exception as e:
122
+ warn(f"Account delete folders error: {e}")
123
+
124
+ try:
125
+ pc = pinecone_pool.get(keys["pinecone_key"])
126
+
127
+ def _delete_all_indexes():
128
+ existing = {idx.name for idx in pc.list_indexes()}
129
+ for name in _all_index_names():
130
+ if name in existing:
131
+ pc.delete_index(name)
132
+
133
+ await asyncio.to_thread(_delete_all_indexes)
134
+ except Exception as e:
135
+ warn(f"Account delete Pinecone error: {e}")
136
+
137
+ log("WARNING", "danger.delete_account.complete",
138
+ user_id=user_id or "anonymous", ip=ip,
139
+ duration_ms=round((time.perf_counter() - start) * 1000))
140
+ return {"message": "Account data deleted. Sign out initiated."}
src/api/explorer.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+
3
+ from fastapi import APIRouter, Form, HTTPException, Request, Depends
4
+
5
+ from src.core.config import (
6
+ DEFAULT_PINECONE_KEY, IDX_FACES, IDX_OBJECTS,
7
+ IDX_FACES_ARCFACE, IDX_FACES_ADAFACE,
8
+ USE_SPLIT_FACE_INDEXES,
9
+ )
10
+ from src.core.security import get_verified_keys
11
+ from src.services.db_client import (
12
+ cld_delete_folder_resources, cld_delete_resource, cld_list_folder_images,
13
+ cld_remove_folder, cld_root_folders, pinecone_pool,
14
+ )
15
+ from src.core.logging import log, warn
16
+ from src.common.utils import cld_thumb_url, get_ip, url_to_public_id
17
+
18
+ router = APIRouter()
19
+
20
+
21
+ def _get_face_index_names() -> list[str]:
22
+ """Returns list of face index names to operate on based on current mode."""
23
+ if USE_SPLIT_FACE_INDEXES:
24
+ # Try both new and legacy — delete from both in case data exists in either
25
+ return [IDX_FACES_ARCFACE, IDX_FACES_ADAFACE, IDX_FACES]
26
+ return [IDX_FACES]
27
+
28
+
29
+ @router.post("/api/categories")
30
+ async def get_categories(
31
+ request: Request,
32
+ user_id: str = Form(""),
33
+ keys: dict = Depends(get_verified_keys),
34
+ ):
35
+ ip = get_ip(request)
36
+ try:
37
+ result = await asyncio.to_thread(cld_root_folders, keys["cloudinary_creds"])
38
+ categories = [f["name"] for f in result.get("folders", [])]
39
+ log("INFO", "categories.fetched",
40
+ user_id=user_id or "anonymous", ip=ip, count=len(categories))
41
+ return {"categories": categories}
42
+ except Exception as e:
43
+ log("ERROR", "categories.error",
44
+ user_id=user_id or "anonymous", ip=ip, error=str(e))
45
+ return {"categories": []}
46
+
47
+
48
+ @router.post("/api/cloudinary/folder-images")
49
+ async def list_folder_images(
50
+ request: Request,
51
+ folder_name: str = Form(...),
52
+ user_id: str = Form(""),
53
+ next_cursor: str = Form(""),
54
+ page_size: int = Form(100),
55
+ keys: dict = Depends(get_verified_keys),
56
+ ):
57
+ ip = get_ip(request)
58
+ result = await asyncio.to_thread(
59
+ cld_list_folder_images,
60
+ folder_name, keys["cloudinary_creds"], next_cursor or None, page_size,
61
+ )
62
+ images = [
63
+ {
64
+ "url": r["secure_url"],
65
+ "thumb_url": cld_thumb_url(r["secure_url"]),
66
+ "public_id": r["public_id"],
67
+ }
68
+ for r in result.get("resources", [])
69
+ ]
70
+ next_cur = result.get("next_cursor") or ""
71
+ log("INFO", "explorer.folder_opened",
72
+ user_id=user_id or "anonymous", ip=ip,
73
+ folder=folder_name, count=len(images), has_more=bool(next_cur))
74
+ return {"images": images, "count": len(images), "next_cursor": next_cur}
75
+
76
+
77
+ @router.post("/api/delete-image")
78
+ async def delete_image(
79
+ request: Request,
80
+ image_url: str = Form(""),
81
+ public_id: str = Form(""),
82
+ user_id: str = Form(""),
83
+ keys: dict = Depends(get_verified_keys),
84
+ ):
85
+ ip = get_ip(request)
86
+ pid = public_id or url_to_public_id(image_url)
87
+ if not pid:
88
+ raise HTTPException(400, "Could not determine public_id.")
89
+
90
+ # Delete from Cloudinary
91
+ await asyncio.to_thread(cld_delete_resource, pid, keys["cloudinary_creds"])
92
+
93
+ # Delete from ALL vector indexes (split + legacy + objects)
94
+ try:
95
+ pc = pinecone_pool.get(keys["pinecone_key"])
96
+ existing = {idx.name for idx in pc.list_indexes()}
97
+
98
+ all_indexes = [IDX_OBJECTS] + _get_face_index_names()
99
+
100
+ for idx_name in all_indexes:
101
+ if idx_name not in existing:
102
+ continue
103
+ try:
104
+ await asyncio.to_thread(
105
+ pc.Index(idx_name).delete,
106
+ filter={"url": {"$eq": image_url}},
107
+ )
108
+ except Exception as e:
109
+ warn(f"Pinecone delete warning on {idx_name}: {e}")
110
+ except Exception as e:
111
+ warn(f"Pinecone delete outer warning: {e}")
112
+
113
+ log("INFO", "explorer.image_deleted",
114
+ user_id=user_id or "anonymous", ip=ip,
115
+ image_url=image_url, public_id=pid)
116
+ return {"message": "Image deleted successfully."}
117
+
118
+
119
+ @router.post("/api/delete-folder")
120
+ async def delete_folder(
121
+ request: Request,
122
+ folder_name: str = Form(...),
123
+ user_id: str = Form(""),
124
+ keys: dict = Depends(get_verified_keys),
125
+ ):
126
+ ip = get_ip(request)
127
+ all_images, cursor = [], None
128
+ while True:
129
+ result = await asyncio.to_thread(
130
+ cld_list_folder_images, folder_name, keys["cloudinary_creds"], cursor
131
+ )
132
+ all_images.extend(result.get("resources", []))
133
+ cursor = result.get("next_cursor")
134
+ if not cursor:
135
+ break
136
+
137
+ await asyncio.to_thread(
138
+ cld_delete_folder_resources, folder_name, keys["cloudinary_creds"]
139
+ )
140
+ await asyncio.to_thread(
141
+ cld_remove_folder, folder_name, keys["cloudinary_creds"]
142
+ )
143
+
144
+ # Delete from ALL vector indexes
145
+ try:
146
+ pc = pinecone_pool.get(keys["pinecone_key"])
147
+ existing = {idx.name for idx in pc.list_indexes()}
148
+
149
+ all_indexes = [IDX_OBJECTS] + _get_face_index_names()
150
+
151
+ for idx_name in all_indexes:
152
+ if idx_name not in existing:
153
+ continue
154
+ idx = pc.Index(idx_name)
155
+ try:
156
+ # Try metadata filter first (fast)
157
+ await asyncio.to_thread(
158
+ idx.delete, filter={"folder": {"$eq": folder_name}}
159
+ )
160
+ except Exception:
161
+ # Fallback: delete by URL one-by-one
162
+ for img in all_images:
163
+ url = img.get("secure_url", "")
164
+ if url:
165
+ try:
166
+ await asyncio.to_thread(
167
+ idx.delete, filter={"url": {"$eq": url}}
168
+ )
169
+ except Exception:
170
+ pass
171
+ except Exception as e:
172
+ warn(f"Pinecone folder delete warning: {e}")
173
+
174
+ log("INFO", "explorer.folder_deleted",
175
+ user_id=user_id or "anonymous", ip=ip,
176
+ folder=folder_name, deleted_count=len(all_images))
177
+ return {
178
+ "message": f"Folder '{folder_name}' and contents deleted.",
179
+ "deleted_count": len(all_images),
180
+ }
src/api/jobs.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ src/api/jobs.py — Phase 3: Async upload job status endpoints
3
+
4
+ GET /api/jobs/{job_id} → poll job status + progress
5
+
6
+ This is the API router. The job queue worker lives in src/services/jobs.py.
7
+ """
8
+
9
+ from fastapi import APIRouter, Depends, HTTPException, Request
10
+
11
+ from src.core.security import get_verified_keys
12
+ from src.core.logging import log
13
+ from src.services.jobs import get_job_status
14
+ from src.common.utils import get_ip
15
+
16
+ router = APIRouter()
17
+
18
+
19
+ @router.get("/api/jobs/{job_id}")
20
+ async def poll_job(
21
+ job_id: str,
22
+ request: Request,
23
+ keys: dict = Depends(get_verified_keys),
24
+ ):
25
+ ip = get_ip(request)
26
+
27
+ job = await get_job_status(job_id)
28
+ if not job:
29
+ raise HTTPException(404, f"Job {job_id} not found")
30
+
31
+ total = job.get("total_files", 0)
32
+ processed = job.get("processed_files", 0)
33
+ pct = round(processed / total * 100) if total else 0
34
+
35
+ response = {
36
+ "job_id": job_id,
37
+ "status": job.get("status", "unknown"),
38
+ "total_files": total,
39
+ "processed_files": processed,
40
+ "progress_pct": pct,
41
+ "status_url": f"/api/jobs/{job_id}",
42
+ }
43
+
44
+ if job.get("status") == "completed":
45
+ response["result"] = job.get("result", {})
46
+
47
+ if job.get("status") == "failed":
48
+ response["error"] = job.get("error", "unknown error")
49
+
50
+ log("INFO", "jobs.poll", ip=ip, job_id=job_id, status=response["status"])
51
+ return response
src/api/people.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ src/api/people.py — Phase 3: People View endpoints
3
+
4
+ GET /api/people → list all identity clusters
5
+ GET /api/people/{cluster_id} → all images in that cluster
6
+ PATCH /api/people/{cluster_id} → rename a cluster
7
+ POST /api/reindex-clusters → trigger full re-cluster
8
+
9
+ All endpoints require the standard pinecone/cloudinary auth headers
10
+ (via get_verified_keys). user_id is derived from the Pinecone key hash
11
+ so different users don't see each other's clusters even though they share
12
+ the same Supabase table.
13
+ """
14
+
15
+ import hashlib
16
+
17
+ from fastapi import APIRouter, Body, Depends, HTTPException, Request
18
+
19
+ from src.core.config import USE_CLUSTER_AWARE_SEARCH
20
+ from src.core.security import get_verified_keys
21
+ from src.core.logging import log
22
+ from src.services.clustering import (
23
+ get_people,
24
+ get_person_images,
25
+ rename_cluster,
26
+ run_clustering,
27
+ )
28
+ from src.services.db_client import pinecone_pool, ensure_indexes
29
+ from src.common.utils import get_ip
30
+
31
+ import asyncio
32
+
33
+ router = APIRouter()
34
+
35
+
36
+ def _user_id_from_key(pinecone_key: str) -> str:
37
+ """
38
+ Derives a stable, opaque user_id from the Pinecone API key.
39
+ Users bring their own key, so this is the closest we have to an identity.
40
+ Short SHA256 prefix is enough for row isolation — not a security measure.
41
+ """
42
+ return hashlib.sha256(pinecone_key.encode()).hexdigest()[:16]
43
+
44
+
45
+ @router.get("/api/people")
46
+ async def list_people(
47
+ request: Request,
48
+ keys: dict = Depends(get_verified_keys),
49
+ ):
50
+ """
51
+ Returns all identity clusters for the authenticated user, ordered by
52
+ face_count descending (most-seen people first).
53
+
54
+ Response shape:
55
+ [
56
+ {
57
+ "cluster_id": "uuid",
58
+ "name": "Mom" | null,
59
+ "face_count": 42,
60
+ "representative_face_crop": "<base64 jpg>"
61
+ },
62
+ ...
63
+ ]
64
+ """
65
+ ip = get_ip(request)
66
+ user_id = _user_id_from_key(keys["pinecone_key"])
67
+
68
+ try:
69
+ people = await get_people(user_id)
70
+ log("INFO", "people.list", ip=ip, user_id=user_id, count=len(people))
71
+ return {"people": people, "total": len(people)}
72
+ except Exception as e:
73
+ log("ERROR", "people.list.error", ip=ip, user_id=user_id, error=str(e))
74
+ raise HTTPException(500, f"Failed to fetch people: {e}")
75
+
76
+
77
+ @router.get("/api/people/{cluster_id}")
78
+ async def get_cluster_images(
79
+ cluster_id: str,
80
+ request: Request,
81
+ keys: dict = Depends(get_verified_keys),
82
+ ):
83
+ """
84
+ Returns all images belonging to a specific identity cluster.
85
+
86
+ Response shape:
87
+ {
88
+ "cluster_id": "uuid",
89
+ "images": [
90
+ {"url": "...", "folder": "...", "face_crop": "<base64>"},
91
+ ...
92
+ ],
93
+ "total": 12
94
+ }
95
+ """
96
+ ip = get_ip(request)
97
+ user_id = _user_id_from_key(keys["pinecone_key"])
98
+
99
+ try:
100
+ images = await get_person_images(cluster_id, user_id)
101
+ log("INFO", "people.cluster_images",
102
+ ip=ip, user_id=user_id, cluster_id=cluster_id, count=len(images))
103
+ return {
104
+ "cluster_id": cluster_id,
105
+ "images": images,
106
+ "total": len(images),
107
+ }
108
+ except Exception as e:
109
+ log("ERROR", "people.cluster_images.error",
110
+ ip=ip, user_id=user_id, cluster_id=cluster_id, error=str(e))
111
+ raise HTTPException(500, f"Failed to fetch cluster images: {e}")
112
+
113
+
114
+ @router.patch("/api/people/{cluster_id}")
115
+ async def update_cluster_name(
116
+ cluster_id: str,
117
+ request: Request,
118
+ name: str = Body(..., embed=True),
119
+ keys: dict = Depends(get_verified_keys),
120
+ ):
121
+ """
122
+ Assigns a human-readable name to a cluster.
123
+
124
+ Request body (JSON): {"name": "Mom"}
125
+ Response: {"cluster_id": "uuid", "name": "Mom", "ok": true}
126
+ """
127
+ ip = get_ip(request)
128
+ user_id = _user_id_from_key(keys["pinecone_key"])
129
+
130
+ if not name or len(name.strip()) == 0:
131
+ raise HTTPException(400, "name must be a non-empty string")
132
+ if len(name) > 100:
133
+ raise HTTPException(400, "name must be 100 characters or fewer")
134
+
135
+ try:
136
+ await rename_cluster(cluster_id, name.strip(), user_id)
137
+ log("INFO", "people.rename",
138
+ ip=ip, user_id=user_id, cluster_id=cluster_id, name=name)
139
+ return {"cluster_id": cluster_id, "name": name.strip(), "ok": True}
140
+ except Exception as e:
141
+ log("ERROR", "people.rename.error",
142
+ ip=ip, user_id=user_id, cluster_id=cluster_id, error=str(e))
143
+ raise HTTPException(500, f"Failed to rename cluster: {e}")
144
+
145
+
146
+ @router.post("/api/reindex-clusters")
147
+ async def reindex_clusters(
148
+ request: Request,
149
+ keys: dict = Depends(get_verified_keys),
150
+ ):
151
+ """
152
+ Triggers a full HDBSCAN re-cluster of the user's face vectors.
153
+
154
+ This is a synchronous (blocking) endpoint — clustering typically takes
155
+ 5-30 seconds depending on library size. For large libraries, consider
156
+ running this in a background task (Phase 4).
157
+
158
+ Response:
159
+ {
160
+ "status": "ok",
161
+ "total_vectors": 3200,
162
+ "clusters_found": 14,
163
+ "noise_vectors": 80
164
+ }
165
+ """
166
+ ip = get_ip(request)
167
+ user_id = _user_id_from_key(keys["pinecone_key"])
168
+
169
+ log("INFO", "people.reindex_start", ip=ip, user_id=user_id)
170
+
171
+ try:
172
+ pc = pinecone_pool.get(keys["pinecone_key"])
173
+
174
+ # Ensure indexes exist before fetching vectors
175
+ await asyncio.to_thread(ensure_indexes, pc)
176
+
177
+ result = await run_clustering(pc, user_id)
178
+ log("INFO", "people.reindex_done", ip=ip, user_id=user_id, **result)
179
+ return result
180
+
181
+ except RuntimeError as e:
182
+ # e.g. hdbscan not installed
183
+ raise HTTPException(503, str(e))
184
+ except Exception as e:
185
+ log("ERROR", "people.reindex_error", ip=ip, user_id=user_id, error=str(e))
186
+ raise HTTPException(500, f"Clustering failed: {e}")
src/api/search.py ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import hashlib
3
+ import time
4
+ import traceback
5
+
6
+ from fastapi import APIRouter, File, Form, HTTPException, Request, UploadFile, Depends
7
+
8
+ from src.core.config import (
9
+ DEFAULT_PINECONE_KEY, IDX_FACES, IDX_OBJECTS,
10
+ IDX_FACES_ARCFACE, IDX_FACES_ADAFACE,
11
+ USE_SPLIT_FACE_INDEXES, USE_CLUSTER_AWARE_SEARCH,
12
+ )
13
+ from src.core.security import get_verified_keys
14
+ from src.services.db_client import (
15
+ merge_face_results, merge_object_results,
16
+ pinecone_pool, search_faces, search_faces_split, search_objects,
17
+ ensure_indexes,
18
+ )
19
+ from src.core.logging import log
20
+ from src.common.utils import face_ui_score, get_ip, is_default_key, to_list
21
+
22
+ router = APIRouter()
23
+
24
+
25
+ @router.post("/api/search")
26
+ async def search_database(
27
+ request: Request,
28
+ file: UploadFile = File(...),
29
+ detect_faces: bool = Form(True),
30
+ user_id: str = Form(""),
31
+ keys: dict = Depends(get_verified_keys),
32
+ ):
33
+ ip = get_ip(request)
34
+ start = time.perf_counter()
35
+ mode = "guest" if is_default_key(keys["pinecone_key"], DEFAULT_PINECONE_KEY) else "personal"
36
+
37
+ log("INFO", "search.start",
38
+ user_id=user_id or "anonymous", ip=ip, mode=mode,
39
+ filename=file.filename, detect_faces=detect_faces)
40
+
41
+ try:
42
+ file_bytes = await file.read()
43
+ ai_manager = request.app.state.ai
44
+ sem = request.app.state.ai_semaphore
45
+
46
+ # Run query inference
47
+ async with sem:
48
+ vectors = await ai_manager.process_image_bytes_async(
49
+ file_bytes, detect_faces=detect_faces
50
+ )
51
+
52
+ inference_ms = round((time.perf_counter() - start) * 1000)
53
+ face_vectors = [v for v in vectors if v["type"] == "face"]
54
+ object_vectors = [v for v in vectors if v["type"] == "object"]
55
+
56
+ log("INFO", "search.inference_done",
57
+ user_id=user_id or "anonymous", ip=ip, mode=mode,
58
+ face_vecs=len(face_vectors), obj_vecs=len(object_vectors),
59
+ inference_ms=inference_ms)
60
+
61
+ pc = pinecone_pool.get(keys["pinecone_key"])
62
+ # Stable opaque user identity derived from the Pinecone key — matches
63
+ # what clustering.py writes to Supabase so cluster lookups work.
64
+ cluster_uid = hashlib.sha256(keys["pinecone_key"].encode()).hexdigest()[:16]
65
+
66
+ # Auto-create indexes if missing. Self-heals the case where user
67
+ # hasn't triggered verify-keys yet.
68
+ try:
69
+ created = await asyncio.to_thread(ensure_indexes, pc)
70
+ if created:
71
+ log("INFO", "search.indexes_auto_created",
72
+ user_id=user_id or "anonymous", ip=ip, created=created)
73
+ await asyncio.sleep(8)
74
+ except Exception as e:
75
+ log("ERROR", "search.ensure_indexes_failed",
76
+ user_id=user_id or "anonymous", ip=ip, error=str(e))
77
+
78
+ idx_obj = pc.Index(IDX_OBJECTS)
79
+
80
+ if USE_SPLIT_FACE_INDEXES:
81
+ idx_arcface = pc.Index(IDX_FACES_ARCFACE)
82
+ idx_adaface = pc.Index(IDX_FACES_ADAFACE)
83
+ idx_face_legacy = None
84
+ else:
85
+ idx_face_legacy = pc.Index(IDX_FACES)
86
+ idx_arcface = None
87
+ idx_adaface = None
88
+
89
+ if detect_faces and face_vectors:
90
+ return await _run_face_search(
91
+ face_vectors, object_vectors,
92
+ idx_arcface, idx_adaface, idx_face_legacy, idx_obj,
93
+ start, user_id, ip, mode,
94
+ pc=pc, cluster_uid=cluster_uid,
95
+ )
96
+ return await _run_object_search(
97
+ object_vectors, idx_obj, start, user_id, ip, mode
98
+ )
99
+
100
+ except HTTPException:
101
+ raise
102
+ except Exception as e:
103
+ log("ERROR", "search.error",
104
+ user_id=user_id or "anonymous", ip=ip, mode=mode,
105
+ error=str(e), traceback=traceback.format_exc()[-800:])
106
+ raise HTTPException(500, str(e))
107
+
108
+
109
+ async def _query_face_split(fv, idx_arcface, idx_adaface, pc=None, cluster_uid=None):
110
+ """Parallel query to ArcFace + AdaFace indexes, then fuse.
111
+ When USE_CLUSTER_AWARE_SEARCH is on, expands results to include every
112
+ image in the matched person clusters for near-100% recall."""
113
+ arcface_vec = to_list(fv["arcface_vector"])
114
+ adaface_vec = to_list(fv.get("adaface_vector")) if fv.get("has_adaface") else None
115
+
116
+ try:
117
+ image_map = await asyncio.to_thread(
118
+ search_faces_split,
119
+ idx_arcface, idx_adaface,
120
+ arcface_vec, adaface_vec,
121
+ )
122
+ except Exception as e:
123
+ if "404" in str(e):
124
+ raise HTTPException(
125
+ 404,
126
+ "Face indexes not found. Go to Settings → Verify & Save to create them."
127
+ )
128
+ raise
129
+
130
+ # Only expand clusters for high-confidence matches (fused_score >= 0.50).
131
+ # A borderline match at 0.40 could be a different person; cluster expansion
132
+ # would then pull in an entire wrong identity — exactly what we want to avoid.
133
+ CLUSTER_EXPAND_MIN_SCORE = 0.50
134
+ high_confidence = {
135
+ url: d for url, d in image_map.items()
136
+ if d.get("fused_score", 0.0) >= CLUSTER_EXPAND_MIN_SCORE
137
+ }
138
+ if USE_CLUSTER_AWARE_SEARCH and high_confidence and pc is not None and cluster_uid:
139
+ from src.services.clustering import search_cluster_aware
140
+ image_map = await search_cluster_aware(pc, high_confidence, cluster_uid)
141
+
142
+ return _format_face_group(fv, image_map, scoring="fused")
143
+
144
+
145
+ async def _query_face_legacy(fv, idx_face):
146
+ """Legacy single-index query for pre-Phase-2 data."""
147
+ vec = to_list(fv["vector"])
148
+ det_score = fv.get("det_score", 1.0)
149
+ try:
150
+ image_map = await asyncio.to_thread(search_faces, idx_face, vec, det_score)
151
+ except Exception as e:
152
+ if "404" in str(e):
153
+ raise HTTPException(404, "Pinecone index not found.")
154
+ raise
155
+ return _format_face_group(fv, image_map, scoring="legacy")
156
+
157
+
158
+ def _format_face_group(fv, image_map, scoring: str):
159
+ """Shape the response the same way regardless of scoring backend."""
160
+ matches = []
161
+ for url, d in image_map.items():
162
+ if scoring == "fused":
163
+ display_score = face_ui_score(d["fused_score"], mode="fused")
164
+ raw_score = round(d["fused_score"], 4)
165
+ else:
166
+ display_score = face_ui_score(d["raw_score"], mode="legacy")
167
+ raw_score = round(d["raw_score"], 4)
168
+
169
+ matches.append({
170
+ "url": url,
171
+ "score": display_score,
172
+ "raw_score": raw_score,
173
+ "arcface_score": round(d.get("arcface_score", 0), 4),
174
+ "adaface_score": round(d.get("adaface_score", 0), 4),
175
+ "face_crop": d["face_crop"],
176
+ "folder": d["folder"],
177
+ "caption": "👤 Verified Identity",
178
+ })
179
+
180
+ matches.sort(key=lambda x: x["score"], reverse=True)
181
+
182
+ return {
183
+ "query_face_idx": fv.get("face_idx", 0),
184
+ "query_face_crop": fv.get("face_crop", ""),
185
+ "query_bbox": fv.get("bbox", []),
186
+ "det_score": fv.get("det_score", 1.0),
187
+ "face_width_px": fv.get("face_width_px", 0),
188
+ "matches": matches,
189
+ }
190
+
191
+
192
+ async def _run_face_search(
193
+ face_vectors, object_vectors,
194
+ idx_arcface, idx_adaface, idx_face_legacy, idx_obj,
195
+ start, user_id, ip, mode,
196
+ pc=None, cluster_uid=None,
197
+ ) -> dict:
198
+ # Build face query tasks
199
+ if USE_SPLIT_FACE_INDEXES:
200
+ face_tasks = [
201
+ _query_face_split(fv, idx_arcface, idx_adaface, pc=pc, cluster_uid=cluster_uid)
202
+ for fv in face_vectors
203
+ ]
204
+ else:
205
+ face_tasks = [_query_face_legacy(fv, idx_face_legacy) for fv in face_vectors]
206
+
207
+ # Object queries run in parallel with face queries
208
+ async def _query_obj_single(ov):
209
+ vec = to_list(ov["vector"])
210
+ try:
211
+ return await asyncio.to_thread(search_objects, idx_obj, vec)
212
+ except Exception as e:
213
+ if "404" in str(e):
214
+ raise HTTPException(404, "Pinecone index not found.")
215
+ raise
216
+
217
+ obj_tasks = [_query_obj_single(ov) for ov in object_vectors]
218
+ all_results = await asyncio.gather(*face_tasks, *obj_tasks)
219
+
220
+ raw_groups = list(all_results[:len(face_tasks)])
221
+ obj_nested = list(all_results[len(face_tasks):])
222
+
223
+ merged_face = merge_face_results(raw_groups)
224
+ merged_objects = merge_object_results(obj_nested)
225
+ face_groups = [g for g in raw_groups if g.get("matches")]
226
+
227
+ duration_ms = round((time.perf_counter() - start) * 1000)
228
+ log("INFO", "search.complete",
229
+ user_id=user_id or "anonymous", ip=ip, mode=mode,
230
+ lanes=["face", "object"],
231
+ face_groups=len(face_groups),
232
+ face_results=len(merged_face),
233
+ object_results=len(merged_objects),
234
+ duration_ms=duration_ms,
235
+ index_mode="split" if USE_SPLIT_FACE_INDEXES else "legacy")
236
+
237
+ return {
238
+ "mode": "face",
239
+ "face_groups": face_groups,
240
+ "results": merged_face,
241
+ "object_results": merged_objects,
242
+ }
243
+
244
+
245
+ async def _run_object_search(object_vectors, idx_obj, start, user_id, ip, mode) -> dict:
246
+ if not object_vectors:
247
+ return {"mode": "object", "results": [], "face_groups": []}
248
+
249
+ async def _query_obj(ov):
250
+ vec = to_list(ov["vector"])
251
+ try:
252
+ return await asyncio.to_thread(search_objects, idx_obj, vec)
253
+ except Exception as e:
254
+ if "404" in str(e):
255
+ raise HTTPException(404, "Pinecone index not found.")
256
+ raise
257
+
258
+ nested = await asyncio.gather(*[_query_obj(ov) for ov in object_vectors])
259
+ final = merge_object_results(nested)
260
+
261
+ duration_ms = round((time.perf_counter() - start) * 1000)
262
+ log("INFO", "search.complete",
263
+ user_id=user_id or "anonymous", ip=ip, mode=mode,
264
+ lanes=["object"], results=len(final), duration_ms=duration_ms)
265
+
266
+ return {"mode": "object", "results": final, "face_groups": []}
src/api/system.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import time
3
+ from datetime import datetime, timezone
4
+
5
+ from fastapi import APIRouter, Form, HTTPException, Request, Depends
6
+
7
+ from src.core.config import DEFAULT_PINECONE_KEY
8
+ from src.core.security import get_verified_keys
9
+ from src.services.db_client import cld_ping, ensure_indexes, pinecone_pool
10
+ from src.core.logging import log
11
+ from src.common.utils import get_ip, is_default_key
12
+
13
+ router = APIRouter()
14
+
15
+ @router.get("/")
16
+ async def root():
17
+ return {"status": "ok"}
18
+
19
+ @router.get("/api/health")
20
+ async def health():
21
+ return {"status": "ok", "timestamp": datetime.now(timezone.utc).isoformat()}
22
+
23
+ @router.post("/api/log")
24
+ async def frontend_log(
25
+ request: Request,
26
+ event: str = Form(...),
27
+ user_id: str = Form(""),
28
+ page: str = Form(""),
29
+ metadata: str = Form("{}"),
30
+ ):
31
+ import json
32
+ ip = get_ip(request)
33
+ try:
34
+ meta = json.loads(metadata) if metadata else {}
35
+ except Exception:
36
+ meta = {}
37
+ log(
38
+ "INFO", f"frontend.{event}",
39
+ user_id=user_id or "anonymous",
40
+ page=page, ip=ip,
41
+ ua=request.headers.get("User-Agent", "")[:120],
42
+ **meta,
43
+ )
44
+ return {"ok": True}
45
+
46
+ @router.post("/api/verify-keys")
47
+ async def verify_keys(
48
+ request: Request,
49
+ user_id: str = Form(""),
50
+ keys: dict = Depends(get_verified_keys)
51
+ ):
52
+ ip = get_ip(request)
53
+ mode = "guest" if is_default_key(keys["pinecone_key"], DEFAULT_PINECONE_KEY) else "personal"
54
+ start = time.perf_counter()
55
+ log("INFO", "settings.verify_keys.start", user_id=user_id or "anonymous", mode=mode, ip=ip)
56
+
57
+ try:
58
+ await asyncio.to_thread(cld_ping, keys["cloudinary_creds"])
59
+ except Exception as e:
60
+ log("ERROR", "settings.verify_keys.cloudinary_fail", user_id=user_id or "anonymous", ip=ip, error=str(e))
61
+ raise HTTPException(400, "Invalid Cloudinary Environment URL.")
62
+
63
+ indexes_created: list[str] = []
64
+ try:
65
+ pc = pinecone_pool.get(keys["pinecone_key"])
66
+ indexes_created = await asyncio.to_thread(ensure_indexes, pc)
67
+ except Exception as e:
68
+ err = str(e)
69
+ clean = "Invalid Pinecone API Key." if "401" in err or "unauthorized" in err.lower() else f"Pinecone Error: {err}"
70
+ log("ERROR", "settings.verify_keys.pinecone_fail", user_id=user_id or "anonymous", ip=ip, error=clean)
71
+ raise HTTPException(400, clean)
72
+
73
+ log("INFO", "settings.verify_keys.success", user_id=user_id or "anonymous", mode=mode, ip=ip,
74
+ indexes_created=indexes_created, duration_ms=round((time.perf_counter() - start) * 1000))
75
+ return {"message": "Keys verified and indexes ready!"}
src/api/upload.py ADDED
@@ -0,0 +1,327 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import io
3
+ import time
4
+ import uuid
5
+ from typing import List
6
+
7
+ from fastapi import APIRouter, File, Form, HTTPException, Query, Request, UploadFile, Depends
8
+
9
+ from src.core.config import (
10
+ IDX_FACES, IDX_OBJECTS,
11
+ IDX_FACES_ARCFACE, IDX_FACES_ADAFACE,
12
+ MAX_FILES_PER_UPLOAD, USE_SPLIT_FACE_INDEXES,
13
+ USE_ASYNC_UPLOADS, CLUSTER_AUTO_TRIGGER_EVERY,
14
+ )
15
+ from src.core.security import get_verified_keys
16
+ from src.services.db_client import cld_upload, pinecone_pool, ensure_indexes
17
+ from src.core.logging import log
18
+ from src.common.utils import get_ip, standardize_category_name, to_list
19
+
20
+ router = APIRouter()
21
+
22
+
23
+ def chunker(seq, size):
24
+ return (seq[pos:pos + size] for pos in range(0, len(seq), size))
25
+
26
+
27
+ # ──────────────────────────────────────────────────────────────
28
+ # Per-file processor — Cloudinary upload + AI inference only.
29
+ # Vectors are RETURNED, not upserted here. Caller batches all
30
+ # files' vectors into single Pinecone upserts (same as Phase 2).
31
+ # ──────────────────────────────────────────────────────────────
32
+ async def _process_one_file(
33
+ *,
34
+ file_bytes: bytes,
35
+ folder: str,
36
+ detect_faces: bool,
37
+ keys: dict,
38
+ ai,
39
+ sem,
40
+ ) -> tuple[str, str, list]:
41
+ """Returns (file_id, image_url, vectors). Mirrors Phase 2 signature."""
42
+ file_id = uuid.uuid4().hex
43
+
44
+ async def _run_ai():
45
+ async with sem:
46
+ return await ai.process_image_bytes_async(file_bytes, detect_faces=detect_faces)
47
+
48
+ cld_task = asyncio.to_thread(
49
+ cld_upload, io.BytesIO(file_bytes), folder, keys["cloudinary_creds"]
50
+ )
51
+ ai_task = _run_ai()
52
+ cld_res, vectors = await asyncio.gather(cld_task, ai_task)
53
+ return file_id, cld_res["secure_url"], vectors
54
+
55
+
56
+ # ──────────────────────────────────────────────────────────────
57
+ # Shared batch-upsert logic — used by sync upload AND job worker
58
+ # ──────────────────────────────────────────────────────────────
59
+ async def _batch_upsert_all(
60
+ *, results: list, folder: str, pc,
61
+ ) -> dict:
62
+ """
63
+ Takes [(file_id, url, vectors), ...] from all files, groups them by
64
+ target index, and upserts in one batch per index (single Pinecone
65
+ call per index, not per-file).
66
+ """
67
+ arcface_upserts = []
68
+ adaface_upserts = []
69
+ legacy_face_upserts = []
70
+ object_upserts = []
71
+ uploaded_urls = []
72
+
73
+ for file_id, image_url, vectors in results:
74
+ uploaded_urls.append(image_url)
75
+ for i, v in enumerate(vectors):
76
+ vector_id = f"{file_id}_{i}"
77
+
78
+ if v["type"] == "face":
79
+ meta_common = {
80
+ "url": image_url,
81
+ "folder": folder,
82
+ "face_crop": v.get("face_crop", ""),
83
+ "det_score": float(v.get("det_score", 1.0)),
84
+ "face_width_px": int(v.get("face_width_px", 0)),
85
+ "blur_score": float(v.get("blur_score", 100.0)),
86
+ }
87
+ if USE_SPLIT_FACE_INDEXES:
88
+ arcface_upserts.append({
89
+ "id": vector_id,
90
+ "values": to_list(v["arcface_vector"]),
91
+ "metadata": meta_common,
92
+ })
93
+ if v.get("has_adaface"):
94
+ adaface_upserts.append({
95
+ "id": vector_id,
96
+ "values": to_list(v["adaface_vector"]),
97
+ "metadata": meta_common,
98
+ })
99
+ else:
100
+ legacy_face_upserts.append({
101
+ "id": vector_id,
102
+ "values": to_list(v["vector"]),
103
+ "metadata": meta_common,
104
+ })
105
+ else:
106
+ object_upserts.append({
107
+ "id": vector_id,
108
+ "values": to_list(v["vector"]),
109
+ "metadata": {"url": image_url, "folder": folder},
110
+ })
111
+
112
+ idx_obj = pc.Index(IDX_OBJECTS)
113
+ if USE_SPLIT_FACE_INDEXES:
114
+ idx_arcface = pc.Index(IDX_FACES_ARCFACE)
115
+ idx_adaface = pc.Index(IDX_FACES_ADAFACE)
116
+ else:
117
+ idx_face_legacy = pc.Index(IDX_FACES)
118
+
119
+ def batched_upsert(index, vectors):
120
+ for batch in chunker(vectors, 200):
121
+ index.upsert(vectors=batch)
122
+
123
+ db_tasks = []
124
+ if USE_SPLIT_FACE_INDEXES:
125
+ if arcface_upserts:
126
+ db_tasks.append(asyncio.to_thread(batched_upsert, idx_arcface, arcface_upserts))
127
+ if adaface_upserts:
128
+ db_tasks.append(asyncio.to_thread(batched_upsert, idx_adaface, adaface_upserts))
129
+ else:
130
+ if legacy_face_upserts:
131
+ db_tasks.append(asyncio.to_thread(batched_upsert, idx_face_legacy, legacy_face_upserts))
132
+ if object_upserts:
133
+ db_tasks.append(asyncio.to_thread(batched_upsert, idx_obj, object_upserts))
134
+
135
+ if db_tasks:
136
+ await asyncio.gather(*db_tasks)
137
+
138
+ return {
139
+ "uploaded_urls": uploaded_urls,
140
+ "arcface_vecs": len(arcface_upserts),
141
+ "adaface_vecs": len(adaface_upserts),
142
+ "legacy_face_vecs": len(legacy_face_upserts),
143
+ "object_vecs": len(object_upserts),
144
+ }
145
+
146
+
147
+ # ──────────────────────────────────────────────────────────────
148
+ # Upload endpoint
149
+ # ──────────────────────────────────────────────────────────────
150
+ @router.post("/api/upload")
151
+ async def upload_images(
152
+ request: Request,
153
+ files: List[UploadFile] = File(...),
154
+ folder_name: str = Form(...),
155
+ detect_faces: bool = Form(True),
156
+ user_id: str = Form(""),
157
+ async_mode: bool = Query(False, alias="async"),
158
+ keys: dict = Depends(get_verified_keys),
159
+ ):
160
+ ip = get_ip(request)
161
+ start = time.perf_counter()
162
+
163
+ if len(files) > MAX_FILES_PER_UPLOAD:
164
+ raise HTTPException(400, f"Too many files. Max {MAX_FILES_PER_UPLOAD} per request.")
165
+
166
+ folder = standardize_category_name(folder_name)
167
+ pc = pinecone_pool.get(keys["pinecone_key"])
168
+
169
+ # Auto-create indexes if missing. Idempotent.
170
+ try:
171
+ created = await asyncio.to_thread(ensure_indexes, pc)
172
+ if created:
173
+ log("INFO", "upload.indexes_auto_created",
174
+ user_id=user_id or "anonymous", ip=ip, created=created)
175
+ await asyncio.sleep(8)
176
+ except Exception as e:
177
+ log("ERROR", "upload.ensure_indexes_failed",
178
+ user_id=user_id or "anonymous", ip=ip, error=str(e))
179
+ raise HTTPException(500, f"Failed to initialize indexes: {e}")
180
+
181
+ # ── Async mode: enqueue job, return immediately ──────────────
182
+ if async_mode and USE_ASYNC_UPLOADS:
183
+ from src.services.jobs import create_job
184
+
185
+ files_data = []
186
+ for f in files:
187
+ b = await f.read()
188
+ files_data.append({"bytes": list(b), "filename": f.filename})
189
+
190
+ job_payload = {
191
+ "files_data": files_data,
192
+ "folder": folder,
193
+ "detect_faces": detect_faces,
194
+ "user_id": user_id or "anonymous",
195
+ "keys": {
196
+ "pinecone_key": keys["pinecone_key"],
197
+ "cloudinary_creds": keys["cloudinary_creds"],
198
+ },
199
+ }
200
+
201
+ job_id = await create_job(
202
+ user_id=user_id or "anonymous",
203
+ folder=folder,
204
+ total_files=len(files),
205
+ job_payload=job_payload,
206
+ )
207
+
208
+ log("INFO", "upload.async_enqueued",
209
+ user_id=user_id or "anonymous", ip=ip,
210
+ job_id=job_id, files=len(files), folder=folder)
211
+
212
+ return {
213
+ "message": "Upload queued",
214
+ "job_id": job_id,
215
+ "status_url": f"/api/jobs/{job_id}",
216
+ "total_files": len(files),
217
+ }
218
+
219
+ # ── Synchronous mode (default, matches original Phase 2 perf) ─
220
+ ai = request.app.state.ai
221
+ sem = request.app.state.ai_semaphore
222
+
223
+ # Read all files in parallel first, THEN fan out to _process_one_file.
224
+ # Doing `await f.read()` inside the list-comp would serialize reads.
225
+ file_bytes_list = await asyncio.gather(*[f.read() for f in files])
226
+
227
+ results = await asyncio.gather(*[
228
+ _process_one_file(
229
+ file_bytes=fb,
230
+ folder=folder,
231
+ detect_faces=detect_faces,
232
+ keys=keys,
233
+ ai=ai,
234
+ sem=sem,
235
+ )
236
+ for fb in file_bytes_list
237
+ ])
238
+
239
+ summary = await _batch_upsert_all(results=results, folder=folder, pc=pc)
240
+
241
+ duration_ms = round((time.perf_counter() - start) * 1000)
242
+ log(
243
+ "INFO", "upload.complete",
244
+ user_id=user_id or "anonymous", ip=ip,
245
+ files=len(files), folder=folder, duration_ms=duration_ms,
246
+ mode="split" if USE_SPLIT_FACE_INDEXES else "legacy",
247
+ arcface_vecs=summary["arcface_vecs"],
248
+ adaface_vecs=summary["adaface_vecs"],
249
+ legacy_face_vecs=summary["legacy_face_vecs"],
250
+ object_vecs=summary["object_vecs"],
251
+ )
252
+
253
+ # Log this sync upload to upload_jobs so the table isn't empty.
254
+ # Sync uploads bypass the job queue entirely; this fire-and-forget task
255
+ # writes a completed row for visibility without changing the upload flow.
256
+ asyncio.create_task(
257
+ _log_sync_upload(user_id=user_id or "anonymous", folder=folder, summary=summary)
258
+ )
259
+
260
+ # Auto-trigger clustering if threshold crossed (fire and forget)
261
+ if CLUSTER_AUTO_TRIGGER_EVERY > 0 and summary["arcface_vecs"] > 0:
262
+ asyncio.create_task(
263
+ _maybe_trigger_clustering(pc, user_id, keys["pinecone_key"])
264
+ )
265
+
266
+ return {
267
+ "message": "Done!",
268
+ "urls": summary["uploaded_urls"],
269
+ "summary": {
270
+ "files": len(files),
271
+ "face_vectors": summary["arcface_vecs"] or summary["legacy_face_vecs"],
272
+ "adaface_vectors": summary["adaface_vecs"],
273
+ "object_vectors": summary["object_vecs"],
274
+ "index_mode": "split" if USE_SPLIT_FACE_INDEXES else "legacy",
275
+ },
276
+ }
277
+
278
+
279
+ async def _log_sync_upload(user_id: str, folder: str, summary: dict) -> None:
280
+ """Write a completed row to upload_jobs for sync upload visibility.
281
+ Sync uploads skip the job queue; without this the table stays empty and
282
+ makes it impossible to audit what was indexed."""
283
+ import json
284
+ from src.services.jobs import _supa_insert
285
+ row = {
286
+ "job_id": uuid.uuid4().hex,
287
+ "user_id": user_id,
288
+ "folder": folder,
289
+ "status": "completed",
290
+ "total_files": len(summary["uploaded_urls"]),
291
+ "processed_files": len(summary["uploaded_urls"]),
292
+ "result": json.dumps({
293
+ "face_vectors": summary["arcface_vecs"] or summary["legacy_face_vecs"],
294
+ "adaface_vectors": summary["adaface_vecs"],
295
+ "object_vectors": summary["object_vecs"],
296
+ }),
297
+ }
298
+ try:
299
+ await _supa_insert("upload_jobs", row)
300
+ except Exception:
301
+ pass # Supabase not configured — silently skip, don't crash the upload
302
+
303
+
304
+ async def _maybe_trigger_clustering(pc, user_id: str, pinecone_key: str) -> None:
305
+ """Background auto-cluster trigger when CLUSTER_AUTO_TRIGGER_EVERY crossed."""
306
+ try:
307
+ from src.services.cache import cache
308
+ from src.services.clustering import run_clustering
309
+ import hashlib
310
+
311
+ uid = hashlib.sha256(pinecone_key.encode()).hexdigest()[:16]
312
+ counter_key = f"upload_count:{uid}"
313
+ count = await cache.incr(counter_key)
314
+
315
+ if count >= CLUSTER_AUTO_TRIGGER_EVERY:
316
+ await cache.delete(counter_key)
317
+ log("INFO", "upload.auto_cluster_triggered",
318
+ user_id=user_id or "anonymous", trigger_count=count)
319
+ await run_clustering(pc, uid)
320
+ except Exception as e:
321
+ log("ERROR", "upload.auto_cluster_error", error=str(e))
322
+
323
+
324
+ # ──────────────────────────────────────────────────────────────
325
+ # Exported for jobs.py worker — same batched upsert path
326
+ # ──────────────────────────────────────────────────────────────
327
+ __all__ = ["upload_images", "_process_one_file", "_batch_upsert_all"]
src/common/utils.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import math
3
+ from fastapi import Request
4
+
5
+
6
+ def get_ip(request: Request) -> str:
7
+ forwarded = request.headers.get("X-Forwarded-For")
8
+ if forwarded:
9
+ return forwarded.split(",")[0].strip()
10
+ return request.client.host if request.client else "unknown"
11
+
12
+
13
+ def is_default_key(key: str, default: str) -> bool:
14
+ if not key or not default:
15
+ return False
16
+ return key.strip() == default.strip()
17
+
18
+
19
+ def get_cloudinary_creds(url: str) -> dict:
20
+ if not url or not url.startswith("cloudinary://"):
21
+ return {}
22
+ try:
23
+ creds = url.replace("cloudinary://", "")
24
+ auth, cloud_name = creds.split("@")
25
+ api_key, api_secret = auth.split(":")
26
+ return {
27
+ "cloud_name": cloud_name,
28
+ "api_key": api_key,
29
+ "api_secret": api_secret,
30
+ }
31
+ except ValueError:
32
+ return {}
33
+
34
+
35
+ def sanitize_filename(filename: str) -> str:
36
+ if not filename:
37
+ return "unnamed_file"
38
+ return re.sub(r'[^a-zA-Z0-9_\-\.]', '_', filename)
39
+
40
+
41
+ def standardize_category_name(name: str) -> str:
42
+ if not name:
43
+ return "uncategorized"
44
+ return re.sub(r'[^a-zA-Z0-9_\-]', '_', name.lower())
45
+
46
+
47
+ def to_list(vector) -> list[float]:
48
+ if vector is None:
49
+ return []
50
+ try:
51
+ return [float(x) for x in vector]
52
+ except TypeError:
53
+ return []
54
+
55
+
56
+ def url_to_public_id(url: str) -> str:
57
+ if not url:
58
+ return ""
59
+ try:
60
+ parts = url.split("/upload/")
61
+ if len(parts) > 1:
62
+ path = parts[1].split("/", 1)[-1]
63
+ return path.rsplit(".", 1)[0]
64
+ return ""
65
+ except Exception:
66
+ return ""
67
+
68
+
69
+ def cld_thumb_url(url: str) -> str:
70
+ if not url:
71
+ return ""
72
+ return url.replace("/upload/", "/upload/c_limit,w_500/")
73
+
74
+
75
+ def face_ui_score(raw_score: float, mode: str = "fused") -> float:
76
+ """
77
+ Platt-scaled probability score for the UI.
78
+ Different calibration depending on which backend produced the raw score.
79
+
80
+ mode="fused" — new split-index fused score (0.6*arcface + 0.4*adaface)
81
+ Decision boundary at ~0.30, steep drop-off for imposters.
82
+ mode="legacy" — old 1024-d concatenated vector cosine
83
+ Decision boundary at 0.50 (original calibration).
84
+
85
+ The sigmoid maps raw cosine → probability of match for the UI.
86
+ """
87
+ if mode == "fused":
88
+ threshold = 0.30 # Balanced boundary for fused scores
89
+ k = 20.0 # Steep drop-off
90
+ else:
91
+ threshold = 0.50
92
+ k = 18.0
93
+
94
+ probability = 1 / (1 + math.exp(-k * (raw_score - threshold)))
95
+ return min(1.0, max(0.0, round(probability, 4)))
src/core/config.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ load_dotenv()
5
+
6
+ # ──────────────────────────────────────────────────────────────
7
+ # Credentials & core
8
+ # ──────────────────────────────────────────────────────────────
9
+ DEFAULT_PINECONE_KEY = os.getenv("DEFAULT_PINECONE_KEY", "")
10
+ DEFAULT_CLOUDINARY_URL = os.getenv("DEFAULT_CLOUDINARY_URL", "")
11
+
12
+ # Legacy index (kept for read-only backward compat during Phase 2 rollout)
13
+ IDX_FACES = os.getenv("IDX_FACES", "enterprise-faces")
14
+ IDX_OBJECTS = os.getenv("IDX_OBJECTS", "enterprise-objects")
15
+
16
+ # Phase 2: split face indexes (512-d each)
17
+ IDX_FACES_ARCFACE = os.getenv("IDX_FACES_ARCFACE", "faces-arcface")
18
+ IDX_FACES_ADAFACE = os.getenv("IDX_FACES_ADAFACE", "faces-adaface")
19
+
20
+ # ──────────────────────────────────────────────────────────────
21
+ # Concurrency / limits
22
+ # ──────────────────────────────────────────────────────────────
23
+ MAX_CONCURRENT_INFERENCES = int(os.getenv("MAX_CONCURRENT_INFERENCES", "2"))
24
+ MAX_FILES_PER_UPLOAD = int(os.getenv("MAX_FILES_PER_UPLOAD", "50"))
25
+ INFERENCE_CACHE_SIZE = int(os.getenv("INFERENCE_CACHE_SIZE", "128"))
26
+
27
+ # ──────────────────────────────────────────────────────────────
28
+ # Logging
29
+ # ──────────────────────────────────────────────────────────────
30
+ SUPABASE_URL = os.getenv("SUPABASE_URL", "")
31
+ SUPABASE_SERVICE_KEY = os.getenv("SUPABASE_SERVICE_KEY", "")
32
+
33
+ # ──────────────────────────────────────────────────────────────
34
+ # Image / detection
35
+ # ──────────────────────────────────────────────────────────────
36
+ MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "1024"))
37
+ MAX_CROPS = int(os.getenv("MAX_CROPS", "10"))
38
+ YOLO_PERSON_CLASS_ID = 0
39
+ YOLO_MIN_CROP_PX = int(os.getenv("YOLO_MIN_CROP_PX", "50"))
40
+ YOLO_CONF_THRESHOLD = float(os.getenv("YOLO_CONF_THRESHOLD", "0.25"))
41
+
42
+ DET_SIZE_PRIMARY = (640, 640)
43
+ DET_SCALES = [(1280, 1280), (960, 960), (640, 640)]
44
+ IOU_DEDUP_THRESHOLD = float(os.getenv("IOU_DEDUP_THRESHOLD", "0.4"))
45
+ MIN_FACE_SIZE = int(os.getenv("MIN_FACE_SIZE", "30"))
46
+ MAX_FACES_PER_IMAGE = int(os.getenv("MAX_FACES_PER_IMAGE", "20"))
47
+
48
+ # Phase 2: relaxed from 0.5 → 0.3 to index more faces (filter at query time)
49
+ FACE_QUALITY_GATE = float(os.getenv("FACE_QUALITY_GATE", "0.3"))
50
+
51
+ # Laplacian variance blur threshold for face crops.
52
+ # Faces below this score are excluded from search results.
53
+ # Typical values: >100 = sharp, 50-100 = acceptable, <50 = blurry.
54
+ FACE_BLUR_THRESHOLD = float(os.getenv("FACE_BLUR_THRESHOLD", "50.0"))
55
+
56
+ # ──────────────────────────────────────────────────────────────
57
+ # Embedding dimensions
58
+ # ──────────────────────────────────────────────────────────────
59
+ FACE_DIM = 512
60
+ ADAFACE_DIM = 512
61
+ FUSED_FACE_DIM = 1024 # old concatenated — kept for legacy index reads
62
+
63
+ FACE_CROP_THUMB_SIZE = int(os.getenv("FACE_CROP_THUMB_SIZE", "112"))
64
+ FACE_CROP_QUALITY = int(os.getenv("FACE_CROP_QUALITY", "85"))
65
+ FACE_CROP_PADDING = float(os.getenv("FACE_CROP_PADDING", "0.2"))
66
+ ADAFACE_CROP_PADDING = float(os.getenv("ADAFACE_CROP_PADDING", "0.1"))
67
+
68
+ ENABLE_ADAFACE = int(os.getenv("ENABLE_ADAFACE", "1"))
69
+ HF_TOKEN = os.getenv("HF_TOKEN", "")
70
+
71
+ # ──────────────────────────────────────────────────────────────
72
+ # Phase 1: Speed flags (unchanged, leaving on)
73
+ # ──────────────────────────────────────────────────────────────
74
+ USE_ONNX_VISION = int(os.getenv("USE_ONNX_VISION", "0"))
75
+ ONNX_MODELS_DIR = os.getenv("ONNX_MODELS_DIR", "onnx_models")
76
+ ONNX_USE_INT8 = int(os.getenv("ONNX_USE_INT8", "1"))
77
+ ENABLE_MULTI_SCALE_FALLBACK = int(os.getenv("ENABLE_MULTI_SCALE_FALLBACK", "1"))
78
+ ENABLE_HORIZONTAL_FLIP = int(os.getenv("ENABLE_HORIZONTAL_FLIP", "0"))
79
+ OMP_NUM_THREADS = int(os.getenv("OMP_NUM_THREADS", "2"))
80
+ MKL_NUM_THREADS = int(os.getenv("MKL_NUM_THREADS", "2"))
81
+
82
+ # ──────────────────────────────────────────────────────────────
83
+ # Phase 2: Recall flags — DEFAULT ON
84
+ # ──────────────────────────────────────────────────────────────
85
+
86
+ # Split-index mode: write ArcFace + AdaFace to separate indexes, score-fuse at query
87
+ USE_SPLIT_FACE_INDEXES = int(os.getenv("USE_SPLIT_FACE_INDEXES", "1"))
88
+
89
+ # Score fusion weights. ArcFace is more discriminative for generic faces;
90
+ # AdaFace helps with low-quality/extreme-angle cases. 0.6/0.4 is NIST-FRVT standard.
91
+ ARCFACE_WEIGHT = float(os.getenv("ARCFACE_WEIGHT", "0.6"))
92
+ ADAFACE_WEIGHT = float(os.getenv("ADAFACE_WEIGHT", "0.4"))
93
+
94
+ # ArcFace-R100 same-person mean ~0.55, std ~0.12.
95
+ # 0.30 is a balanced arc floor: catches side-profile/distance shots while
96
+ # staying above the impostor tail (different-person mean ~0.05, std ~0.08).
97
+ FACE_MATCH_THRESHOLD = float(os.getenv("FACE_MATCH_THRESHOLD", "0.30"))
98
+
99
+ # With both models agreeing, 0.33 fused ≈ arc 0.30 + ada 0.38 together.
100
+ # Slightly raised above the arc floor because fusion adds confidence.
101
+ FUSED_MATCH_THRESHOLD = float(os.getenv("FUSED_MATCH_THRESHOLD", "0.33"))
102
+
103
+ # ArcFace-only floor (no AdaFace confirmation available).
104
+ # Stricter than FACE_MATCH_THRESHOLD since there is no second model to cross-check.
105
+ ARCFACE_SOLO_THRESHOLD = float(os.getenv("ARCFACE_SOLO_THRESHOLD", "0.38"))
106
+
107
+ # Query-time augmentation: OFF by default, enabled via deep_search form flag
108
+ ENABLE_QUERY_TIME_AUG = int(os.getenv("ENABLE_QUERY_TIME_AUG", "0"))
109
+
110
+ # Larger top_k: was 50, now 500 so large galleries aren't truncated
111
+ FACE_SEARCH_TOP_K = int(os.getenv("FACE_SEARCH_TOP_K", "500"))
112
+ OBJECT_SEARCH_TOP_K = int(os.getenv("OBJECT_SEARCH_TOP_K", "100"))
113
+
114
+ # Final API returns at most this many per-face matches (after dedup)
115
+ FACE_RESULTS_PER_QUERY_CAP = int(os.getenv("FACE_RESULTS_PER_QUERY_CAP", "200"))
116
+
117
+ # ──────────────────────────────────────────────────────────────
118
+ # Phase 3: People View + Job Queue — DEFAULT OFF (opt-in via env)
119
+ # ──────────────────────────────────────────────────────────────
120
+
121
+ # Redis-backed inference cache + job queue (requires Upstash)
122
+ # Set UPSTASH_REDIS_URL + UPSTASH_REDIS_TOKEN in HF Space secrets.
123
+ UPSTASH_REDIS_URL = os.getenv("UPSTASH_REDIS_URL", "")
124
+ UPSTASH_REDIS_TOKEN = os.getenv("UPSTASH_REDIS_TOKEN", "")
125
+
126
+ # Master toggle: enable the persistent Redis cache (replaces in-memory dict).
127
+ # Falls back to in-memory if UPSTASH_REDIS_URL is not set, so this is safe to
128
+ # leave True even before Upstash is wired up.
129
+ USE_REDIS_CACHE = int(os.getenv("USE_REDIS_CACHE", "0"))
130
+
131
+ # Async upload mode: when True, POST /api/upload?async=true returns a job_id
132
+ # immediately and processes in the background worker.
133
+ # Synchronous uploads (no ?async param) always work regardless of this flag.
134
+ USE_ASYNC_UPLOADS = int(os.getenv("USE_ASYNC_UPLOADS", "1"))
135
+
136
+ # Cluster-aware search expansion: after the initial face search, expand results
137
+ # to include ALL images in the matched identity clusters.
138
+ # Near-100% recall for well-indexed people. Disable if Supabase is slow.
139
+ USE_CLUSTER_AWARE_SEARCH = int(os.getenv("USE_CLUSTER_AWARE_SEARCH", "1"))
140
+
141
+ # HDBSCAN parameters — tuned for typical 1k–10k image libraries
142
+ CLUSTER_MIN_SAMPLES = int(os.getenv("CLUSTER_MIN_SAMPLES", "3"))
143
+ CLUSTER_MIN_CLUSTER_SIZE = int(os.getenv("CLUSTER_MIN_CLUSTER_SIZE", "3"))
144
+ CLUSTER_EPSILON = float(os.getenv("CLUSTER_EPSILON", "0.35"))
145
+
146
+ # Auto re-cluster after every N new face uploads (0 = disabled, manual only)
147
+ CLUSTER_AUTO_TRIGGER_EVERY = int(os.getenv("CLUSTER_AUTO_TRIGGER_EVERY", "0"))
src/core/logging.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import json
3
+
4
+ from src.core.config import SUPABASE_URL, SUPABASE_SERVICE_KEY
5
+
6
+ _http_session = None
7
+
8
+ async def init_logging_session():
9
+ global _http_session
10
+ if SUPABASE_URL and SUPABASE_SERVICE_KEY:
11
+ import aiohttp
12
+ _http_session = aiohttp.ClientSession(
13
+ headers={
14
+ "Content-Type": "application/json",
15
+ "apikey": SUPABASE_SERVICE_KEY,
16
+ "Authorization": f"Bearer {SUPABASE_SERVICE_KEY}",
17
+ "Prefer": "return=minimal",
18
+ }
19
+ )
20
+
21
+ async def close_logging_session():
22
+ global _http_session
23
+ if _http_session:
24
+ await _http_session.close()
25
+
26
+ try:
27
+ from loguru import logger as _loguru
28
+ _loguru.remove()
29
+ _loguru.add(
30
+ lambda msg: print(msg, end=""),
31
+ format="<green>{time:HH:mm:ss}</green> | <level>{level:<8}</level> | {message}",
32
+ level="DEBUG",
33
+ colorize=True,
34
+ )
35
+ _log_fn = _loguru.log
36
+ except ImportError:
37
+ import logging as _logging
38
+ _logging.basicConfig(level=_logging.INFO)
39
+ _stdlib = _logging.getLogger("vsl")
40
+
41
+ def _log_fn(level: str, msg: str):
42
+ _stdlib.log(getattr(_logging, level, 20), msg)
43
+
44
+ async def _supabase_log(level: str, event: str, data: dict) -> None:
45
+ if not _http_session:
46
+ return
47
+ try:
48
+ import aiohttp
49
+ row = {
50
+ "level": level.upper(),
51
+ "event": event,
52
+ "user_id": str(data.get("user_id", "anonymous")),
53
+ "ip": str(data.get("ip", "")),
54
+ "mode": str(data.get("mode", "")),
55
+ "page": str(data.get("page", "")),
56
+ "duration_ms": int(data["duration_ms"]) if "duration_ms" in data else None,
57
+ "error": str(data["error"]) if "error" in data else None,
58
+ "data": data,
59
+ }
60
+ async with _http_session.post(
61
+ f"{SUPABASE_URL}/rest/v1/app_logs",
62
+ json=row,
63
+ timeout=aiohttp.ClientTimeout(total=5),
64
+ ) as r:
65
+ if r.status not in (200, 201):
66
+ body = await r.text()
67
+ _log_fn("WARNING", f"Supabase log failed {r.status}: {body[:200]}")
68
+ except Exception as exc:
69
+ _log_fn("DEBUG", f"Supabase log push skipped: {exc}")
70
+
71
+ def log(level: str, event: str, **data) -> None:
72
+ _log_fn(level.upper(), f"[{event}] {json.dumps(data, default=str)}")
73
+ try:
74
+ loop = asyncio.get_running_loop()
75
+ loop.create_task(_supabase_log(level, event, data))
76
+ except RuntimeError:
77
+ pass
78
+
79
+ def warn(msg: str) -> None:
80
+ _log_fn("WARNING", msg)
src/core/security.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import Form, HTTPException
2
+ from src.core.config import DEFAULT_PINECONE_KEY, DEFAULT_CLOUDINARY_URL
3
+ from src.common.utils import get_cloudinary_creds
4
+
5
+ def get_verified_keys(
6
+ user_pinecone_key: str = Form(""),
7
+ user_cloudinary_url: str = Form("")
8
+ ) -> dict:
9
+ """Dependency to extract and validate keys."""
10
+ actual_pc_key = user_pinecone_key or DEFAULT_PINECONE_KEY
11
+ actual_cld_url = user_cloudinary_url or DEFAULT_CLOUDINARY_URL
12
+
13
+ creds = get_cloudinary_creds(actual_cld_url)
14
+ if not creds.get("cloud_name"):
15
+ raise HTTPException(400, "Invalid Cloudinary URL.")
16
+
17
+ if not actual_pc_key:
18
+ raise HTTPException(400, "Pinecone key is missing.")
19
+
20
+ return {
21
+ "pinecone_key": actual_pc_key,
22
+ "cloudinary_url": actual_cld_url,
23
+ "cloudinary_creds": creds
24
+ }
src/services/ai_manager.py ADDED
@@ -0,0 +1,620 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import base64
3
+ import concurrent.futures
4
+ import functools
5
+ import io
6
+ import os
7
+ import threading
8
+ import hashlib
9
+ import warnings
10
+
11
+ # InsightFace uses np.linalg.lstsq without rcond — suppress the FutureWarning.
12
+ warnings.filterwarnings("ignore", category=FutureWarning, module="insightface")
13
+ # Suppress PyTorch meta-tensor copy warnings from AdaFace model loading.
14
+ warnings.filterwarnings("ignore", category=UserWarning, module="torch.nn.modules.module")
15
+
16
+ import cv2
17
+ import numpy as np
18
+ import torch
19
+ import torch.nn.functional as F
20
+ from PIL import Image, ImageOps
21
+ from transformers import AutoImageProcessor, AutoModel, AutoProcessor
22
+ from ultralytics import YOLO
23
+ import insightface # noqa: F401
24
+ from insightface.app import FaceAnalysis
25
+
26
+ from src.core.config import (
27
+ MAX_IMAGE_SIZE, MAX_CROPS, YOLO_PERSON_CLASS_ID,
28
+ YOLO_MIN_CROP_PX, YOLO_CONF_THRESHOLD,
29
+ DET_SIZE_PRIMARY, IOU_DEDUP_THRESHOLD,
30
+ MIN_FACE_SIZE, MAX_FACES_PER_IMAGE, FACE_QUALITY_GATE,
31
+ FACE_DIM, ADAFACE_DIM,
32
+ FACE_CROP_THUMB_SIZE, FACE_CROP_QUALITY,
33
+ FACE_CROP_PADDING, ADAFACE_CROP_PADDING,
34
+ INFERENCE_CACHE_SIZE, ENABLE_ADAFACE, HF_TOKEN,
35
+ USE_ONNX_VISION, ONNX_MODELS_DIR, ONNX_USE_INT8,
36
+ ENABLE_MULTI_SCALE_FALLBACK, ENABLE_HORIZONTAL_FLIP,
37
+ USE_SPLIT_FACE_INDEXES, FACE_BLUR_THRESHOLD,
38
+ )
39
+
40
+ # ── ArcFace 5-point reference landmarks (fixed template) ──────────────────────
41
+ # Precomputed — eliminates np.linalg.lstsq call per face (10x faster alignment)
42
+ _ARCFACE_SRC = np.array([
43
+ [38.2946, 51.6963],
44
+ [73.5318, 51.5014],
45
+ [56.0252, 71.7366],
46
+ [41.5493, 92.3655],
47
+ [70.7299, 92.2041],
48
+ ], dtype=np.float32)
49
+
50
+
51
+ def _estimate_norm_fast(lmk: np.ndarray, image_size: int = 112) -> np.ndarray:
52
+ """
53
+ Fast affine estimation using cv2.estimateAffinePartial2D instead of
54
+ np.linalg.lstsq. ~10x faster on CPU. Returns 2x3 affine matrix.
55
+ """
56
+ assert lmk.shape == (5, 2), f"Expected (5,2) landmarks, got {lmk.shape}"
57
+ src = _ARCFACE_SRC * (image_size / 112.0)
58
+ tform, _ = cv2.estimateAffinePartial2D(
59
+ lmk, src, method=cv2.LSQR_EXACT, ransacReprojThreshold=100
60
+ )
61
+ if tform is None:
62
+ # Fallback: identity crop — better than crashing
63
+ tform = np.array([[1, 0, 0], [0, 1, 0]], dtype=np.float32)
64
+ return tform
65
+
66
+
67
+ def _align_face_fast(bgr: np.ndarray, kps: np.ndarray, size: int = 112) -> np.ndarray:
68
+ """Align face crop using fast affine transform (replaces InsightFace's lstsq path)."""
69
+ M = _estimate_norm_fast(kps, size)
70
+ aligned = cv2.warpAffine(bgr, M, (size, size), flags=cv2.INTER_LINEAR)
71
+ return aligned
72
+
73
+
74
+ def _resize_pil(img: Image.Image, max_side: int = MAX_IMAGE_SIZE) -> Image.Image:
75
+ w, h = img.size
76
+ if max(w, h) <= max_side:
77
+ return img
78
+ scale = max_side / max(w, h)
79
+ return img.resize((int(w * scale), int(h * scale)), Image.LANCZOS)
80
+
81
+
82
+ def _blur_score(bgr: np.ndarray, x1: int, y1: int, x2: int, y2: int) -> float:
83
+ """Laplacian variance sharpness metric on a face crop. Higher = sharper."""
84
+ crop = bgr[y1:y2, x1:x2]
85
+ if crop.size == 0:
86
+ return 0.0
87
+ gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
88
+ gray = cv2.resize(gray, (64, 64))
89
+ return float(cv2.Laplacian(gray, cv2.CV_64F).var())
90
+
91
+
92
+ def _crop_to_b64(img_bgr: np.ndarray, x1: int, y1: int, x2: int, y2: int) -> str:
93
+ H, W = img_bgr.shape[:2]
94
+ w, h = x2 - x1, y2 - y1
95
+ pad_x = int(w * FACE_CROP_PADDING)
96
+ pad_y = int(h * FACE_CROP_PADDING)
97
+ cx1, cy1 = max(0, x1 - pad_x), max(0, y1 - pad_y)
98
+ cx2, cy2 = min(W, x2 + pad_x), min(H, y2 + pad_y)
99
+ crop = img_bgr[cy1:cy2, cx1:cx2]
100
+ if crop.size == 0:
101
+ return ""
102
+ pil = Image.fromarray(crop[:, :, ::-1]).resize(
103
+ (FACE_CROP_THUMB_SIZE, FACE_CROP_THUMB_SIZE), Image.LANCZOS
104
+ )
105
+ buf = io.BytesIO()
106
+ pil.save(buf, format="JPEG", quality=FACE_CROP_QUALITY)
107
+ return base64.b64encode(buf.getvalue()).decode()
108
+
109
+
110
+ def _face_crop_for_adaface(
111
+ img_bgr: np.ndarray, x1: int, y1: int, x2: int, y2: int
112
+ ) -> np.ndarray | None:
113
+ H, W = img_bgr.shape[:2]
114
+ w, h = x2 - x1, y2 - y1
115
+ pad_x = int(w * ADAFACE_CROP_PADDING)
116
+ pad_y = int(h * ADAFACE_CROP_PADDING)
117
+ cx1, cy1 = max(0, x1 - pad_x), max(0, y1 - pad_y)
118
+ cx2, cy2 = min(W, x2 + pad_x), min(H, y2 + pad_y)
119
+ crop = img_bgr[cy1:cy2, cx1:cx2]
120
+ if crop.size == 0:
121
+ return None
122
+ rgb = crop[:, :, ::-1].copy()
123
+ pil = Image.fromarray(rgb).resize((112, 112), Image.LANCZOS)
124
+ arr = np.array(pil, dtype=np.float32) / 255.0
125
+ arr = (arr - 0.5) / 0.5
126
+ return arr.transpose(2, 0, 1)
127
+
128
+
129
+ def _clahe_enhance(bgr: np.ndarray) -> np.ndarray:
130
+ lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB)
131
+ l_ch, a_ch, b_ch = cv2.split(lab)
132
+ clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
133
+ l_eq = clahe.apply(l_ch)
134
+ return cv2.cvtColor(cv2.merge([l_eq, a_ch, b_ch]), cv2.COLOR_LAB2BGR)
135
+
136
+
137
+ def _iou(box_a: list, box_b: list) -> float:
138
+ xa, ya = max(box_a[0], box_b[0]), max(box_a[1], box_b[1])
139
+ xb, yb = min(box_a[2], box_b[2]), min(box_a[3], box_b[3])
140
+ inter = max(0, xb - xa) * max(0, yb - ya)
141
+ if inter == 0:
142
+ return 0.0
143
+ area_a = (box_a[2] - box_a[0]) * (box_a[3] - box_a[1])
144
+ area_b = (box_b[2] - box_b[0]) * (box_b[3] - box_b[1])
145
+ return inter / (area_a + area_b - inter)
146
+
147
+
148
+ def _dedup_faces(faces_list: list, iou_thresh: float = IOU_DEDUP_THRESHOLD) -> list:
149
+ if not faces_list:
150
+ return []
151
+ faces_list = sorted(faces_list, key=lambda f: float(f.det_score), reverse=True)
152
+ kept = []
153
+ for face in faces_list:
154
+ b = face.bbox.astype(int)
155
+ box = [b[0], b[1], b[2], b[3]]
156
+ if not any(
157
+ _iou(box, [k.bbox.astype(int)[i] for i in range(4)]) > iou_thresh
158
+ for k in kept
159
+ ):
160
+ kept.append(face)
161
+ return kept
162
+
163
+
164
+ # ── Face crop embedding cache (LRU by crop hash) ──────────────────────────────
165
+ # Avoids recomputing ArcFace embeddings for the same face across multiple images
166
+ # (e.g. same person appears in 20 photos — only 1 inference call needed)
167
+ _FACE_EMBED_CACHE: dict[str, np.ndarray] = {}
168
+ _FACE_EMBED_CACHE_MAX = 512
169
+ _FACE_EMBED_CACHE_LOCK = threading.Lock()
170
+
171
+
172
+ def _face_cache_get(key: str) -> np.ndarray | None:
173
+ with _FACE_EMBED_CACHE_LOCK:
174
+ return _FACE_EMBED_CACHE.get(key)
175
+
176
+
177
+ def _face_cache_set(key: str, vec: np.ndarray) -> None:
178
+ with _FACE_EMBED_CACHE_LOCK:
179
+ if len(_FACE_EMBED_CACHE) >= _FACE_EMBED_CACHE_MAX:
180
+ # Evict oldest entry
181
+ oldest = next(iter(_FACE_EMBED_CACHE))
182
+ del _FACE_EMBED_CACHE[oldest]
183
+ _FACE_EMBED_CACHE[key] = vec
184
+
185
+
186
+ def _crop_hash(crop_bgr: np.ndarray) -> str:
187
+ """Fast hash of face crop pixels for cache lookup."""
188
+ return hashlib.md5(crop_bgr.tobytes()).hexdigest()
189
+
190
+
191
+ class AIModelManager:
192
+ def __init__(self):
193
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
194
+
195
+ # Vision stack
196
+ self.onnx_vision = None
197
+ if USE_ONNX_VISION:
198
+ try:
199
+ from src.services.onnx_models import ONNXVisionStack
200
+ self.onnx_vision = ONNXVisionStack(
201
+ ONNX_MODELS_DIR, use_int8=bool(ONNX_USE_INT8)
202
+ )
203
+ print(f"[AIModelManager] ONNX vision loaded (INT8={ONNX_USE_INT8})")
204
+ except Exception as e:
205
+ print(f"[AIModelManager] ONNX failed ({e}), using PyTorch fallback")
206
+ self.onnx_vision = None
207
+
208
+ if self.onnx_vision is None:
209
+ self.siglip_processor = AutoProcessor.from_pretrained(
210
+ "google/siglip-base-patch16-224", use_fast=True
211
+ )
212
+ self.siglip_model = AutoModel.from_pretrained(
213
+ "google/siglip-base-patch16-224"
214
+ ).to(self.device).eval()
215
+ self.dinov2_processor = AutoImageProcessor.from_pretrained(
216
+ "facebook/dinov2-base", use_fast=True
217
+ )
218
+ self.dinov2_model = AutoModel.from_pretrained(
219
+ "facebook/dinov2-base"
220
+ ).to(self.device).eval()
221
+ if self.device == "cuda":
222
+ self.siglip_model = self.siglip_model.half()
223
+ self.dinov2_model = self.dinov2_model.half()
224
+
225
+ # YOLO
226
+ self.yolo = YOLO("yolo11n-seg.pt")
227
+
228
+ # Face detection + ArcFace
229
+ self.face_app = FaceAnalysis(
230
+ name="buffalo_l",
231
+ providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
232
+ if self.device == "cuda" else ["CPUExecutionProvider"],
233
+ )
234
+ self.face_app.prepare(
235
+ ctx_id=0 if self.device == "cuda" else -1, det_size=DET_SIZE_PRIMARY
236
+ )
237
+ self.face_app.get(np.zeros((112, 112, 3), dtype=np.uint8))
238
+
239
+ # AdaFace
240
+ self.adaface_model = None
241
+ self._load_adaface()
242
+
243
+ self._face_lock = threading.Lock()
244
+ self._cache_lock = threading.Lock()
245
+ self._cache: dict[str, list] = {}
246
+
247
+ # Thread pool for parallel ArcFace + AdaFace inference
248
+ # 2 workers = one per model, matches 2 vCPU on HF free tier
249
+ self._embed_pool = concurrent.futures.ThreadPoolExecutor(
250
+ max_workers=2, thread_name_prefix="embed"
251
+ )
252
+
253
+ def _load_adaface(self) -> None:
254
+ if not ENABLE_ADAFACE:
255
+ return
256
+ import sys
257
+ REPO_ID = "minchul/cvlface_adaface_ir50_ms1mv2"
258
+ CACHE_PATH = os.path.expanduser(
259
+ "~/.cvlface_cache/minchul/cvlface_adaface_ir50_ms1mv2"
260
+ )
261
+ try:
262
+ from huggingface_hub import hf_hub_download
263
+ from transformers import AutoModel as _HFAutoModel
264
+ os.makedirs(CACHE_PATH, exist_ok=True)
265
+ hf_hub_download(
266
+ repo_id=REPO_ID, filename="files.txt", token=HF_TOKEN,
267
+ local_dir=CACHE_PATH, local_dir_use_symlinks=False,
268
+ )
269
+ with open(os.path.join(CACHE_PATH, "files.txt")) as f:
270
+ extra = [x.strip() for x in f.read().split("\n") if x.strip()]
271
+ for fname in extra + ["config.json", "wrapper.py", "model.safetensors"]:
272
+ if not os.path.exists(os.path.join(CACHE_PATH, fname)):
273
+ hf_hub_download(
274
+ repo_id=REPO_ID, filename=fname, token=HF_TOKEN,
275
+ local_dir=CACHE_PATH, local_dir_use_symlinks=False,
276
+ )
277
+ cwd = os.getcwd()
278
+ os.chdir(CACHE_PATH)
279
+ sys.path.insert(0, CACHE_PATH)
280
+ try:
281
+ model = _HFAutoModel.from_pretrained(
282
+ CACHE_PATH, trust_remote_code=True, token=HF_TOKEN,
283
+ low_cpu_mem_usage=False,
284
+ )
285
+ finally:
286
+ os.chdir(cwd)
287
+ if CACHE_PATH in sys.path:
288
+ sys.path.remove(CACHE_PATH)
289
+ self.adaface_model = model.to(self.device).eval()
290
+ except Exception as _ada_err:
291
+ import traceback as _tb
292
+ print(f"[CRITICAL] AdaFace failed to load — system will run at degraded recall: {_ada_err}")
293
+ _tb.print_exc()
294
+ self.adaface_model = None
295
+
296
+ # ── FIX 1: AdaFace batch embed (unchanged — already correct) ──────────────
297
+ def _adaface_embed_batch(
298
+ self, face_arrs_chw: list[np.ndarray | None]
299
+ ) -> list[np.ndarray | None]:
300
+ if self.adaface_model is None:
301
+ return [None] * len(face_arrs_chw)
302
+ valid_idx = [i for i, a in enumerate(face_arrs_chw) if a is not None]
303
+ if not valid_idx:
304
+ return [None] * len(face_arrs_chw)
305
+ batch = np.stack([face_arrs_chw[i] for i in valid_idx], axis=0)
306
+ batch = np.ascontiguousarray(batch)
307
+ try:
308
+ t = torch.from_numpy(batch).contiguous().to(self.device)
309
+ if self.device == "cuda":
310
+ t = t.half()
311
+ with torch.no_grad():
312
+ out = self.adaface_model(t)
313
+ emb = out if isinstance(out, torch.Tensor) else out.embedding
314
+ emb = F.normalize(emb.float(), p=2, dim=1).cpu().numpy()
315
+ except Exception as e:
316
+ import traceback
317
+ print(f"[AdaFace ERROR] {e}")
318
+ traceback.print_exc()
319
+ return [None] * len(face_arrs_chw)
320
+ result = [None] * len(face_arrs_chw)
321
+ for out_i, in_i in enumerate(valid_idx):
322
+ result[in_i] = emb[out_i]
323
+ return result
324
+
325
+ # ── FIX 2: ArcFace batch embed using fast alignment ───────────────────────
326
+ def _arcface_embed_batch(
327
+ self, faces: list, bgr: np.ndarray
328
+ ) -> list[np.ndarray]:
329
+ """
330
+ Extracts ArcFace embeddings for all faces at once.
331
+
332
+ Two optimisations over the original per-face path:
333
+ 1. Uses cv2.estimateAffinePartial2D instead of np.linalg.lstsq
334
+ for face alignment (~10x faster per face on CPU).
335
+ 2. Checks the face-crop LRU cache before running inference — same
336
+ person in 20 photos = 1 inference call.
337
+
338
+ Falls back to face.embedding (already computed by InsightFace's
339
+ get() call) if landmark data is unavailable.
340
+ """
341
+ results = []
342
+
343
+ for face in faces:
344
+ bbox = face.bbox.astype(int)
345
+ x1, y1, x2, y2 = bbox
346
+ x1, y1 = max(0, x1), max(0, y1)
347
+ x2, y2 = min(bgr.shape[1], x2), min(bgr.shape[0], y2)
348
+ raw_crop = bgr[y1:y2, x1:x2]
349
+ ch = _crop_hash(raw_crop) if raw_crop.size > 0 else ""
350
+
351
+ if ch:
352
+ cached_vec = _face_cache_get(ch)
353
+ if cached_vec is not None:
354
+ results.append(cached_vec)
355
+ continue
356
+
357
+ vec = face.embedding.astype(np.float32) if face.embedding is not None \
358
+ else np.zeros(FACE_DIM, dtype=np.float32)
359
+ n = np.linalg.norm(vec)
360
+ vec = vec / n if n > 0 else vec
361
+ if ch:
362
+ _face_cache_set(ch, vec)
363
+ results.append(vec)
364
+
365
+ return results
366
+
367
+ def _embed_crops_batch(self, crops: list[Image.Image]) -> list[np.ndarray]:
368
+ if not crops:
369
+ return []
370
+ if self.onnx_vision is not None:
371
+ return self.onnx_vision.encode(crops)
372
+ with torch.no_grad():
373
+ sig_in = self.siglip_processor(images=crops, return_tensors="pt", padding=True)
374
+ sig_in = {k: v.to(self.device) for k, v in sig_in.items()}
375
+ if self.device == "cuda":
376
+ sig_in = {k: v.half() if v.dtype == torch.float32 else v for k, v in sig_in.items()}
377
+ sig_out = self.siglip_model.get_image_features(**sig_in)
378
+ if hasattr(sig_out, "image_embeds"):
379
+ sig_out = sig_out.image_embeds
380
+ elif hasattr(sig_out, "pooler_output"):
381
+ sig_out = sig_out.pooler_output
382
+ elif hasattr(sig_out, "last_hidden_state"):
383
+ sig_out = sig_out.last_hidden_state[:, 0, :]
384
+ elif isinstance(sig_out, tuple):
385
+ sig_out = sig_out[0]
386
+ sig_vecs = F.normalize(sig_out.float(), p=2, dim=1).cpu()
387
+
388
+ dino_in = self.dinov2_processor(images=crops, return_tensors="pt")
389
+ dino_in = {k: v.to(self.device) for k, v in dino_in.items()}
390
+ if self.device == "cuda":
391
+ dino_in = {k: v.half() if v.dtype == torch.float32 else v for k, v in dino_in.items()}
392
+ dino_out = self.dinov2_model(**dino_in)
393
+ dino_vecs = F.normalize(dino_out.last_hidden_state[:, 0, :].float(), p=2, dim=1).cpu()
394
+ fused = F.normalize(torch.cat([sig_vecs, dino_vecs], dim=1), p=2, dim=1)
395
+ return [fused[i].numpy() for i in range(len(crops))]
396
+
397
+ def _run_detection_at_scale(
398
+ self, bgr_enhanced: np.ndarray, scale: tuple
399
+ ) -> list:
400
+ H, W = bgr_enhanced.shape[:2]
401
+ scale_w, scale_h = min(W, scale[0]), min(H, scale[1])
402
+ if scale_w == W and scale_h == H:
403
+ bgr_scaled = bgr_enhanced
404
+ else:
405
+ bgr_scaled = cv2.resize(bgr_enhanced, (scale_w, scale_h))
406
+ try:
407
+ with self._face_lock:
408
+ # input_size must be set inside the lock — setting it outside
409
+ # is a race condition when two inference threads run concurrently,
410
+ # causing the wrong scale to be used and faces to be missed.
411
+ self.face_app.det_model.input_size = scale
412
+ faces_at_scale = self.face_app.get(bgr_scaled)
413
+ sx, sy = W / scale_w, H / scale_h
414
+ for f in faces_at_scale:
415
+ if sx != 1.0 or sy != 1.0:
416
+ f.bbox[0] *= sx; f.bbox[1] *= sy
417
+ f.bbox[2] *= sx; f.bbox[3] *= sy
418
+ return faces_at_scale
419
+ except Exception:
420
+ return []
421
+
422
+ def _detect_and_encode_faces(self, img_np: np.ndarray) -> list[dict]:
423
+ """
424
+ Returns face records with BOTH arcface_vector and adaface_vector.
425
+
426
+ FIX 3 — ArcFace + AdaFace run in PARALLEL using the thread pool.
427
+ Previously they ran sequentially. On 2 vCPU this gives ~1.5x speedup
428
+ since each model can use a separate core simultaneously.
429
+ """
430
+ if self.face_app is None:
431
+ return []
432
+ try:
433
+ if img_np.dtype != np.uint8:
434
+ img_np = (img_np * 255).astype(np.uint8)
435
+ bgr = img_np[:, :, ::-1].copy() if img_np.shape[2] == 3 else img_np.copy()
436
+ bgr_enhanced = _clahe_enhance(bgr)
437
+ H, W = bgr.shape[:2]
438
+
439
+ all_raw_faces = self._run_detection_at_scale(bgr_enhanced, DET_SIZE_PRIMARY)
440
+
441
+ if not all_raw_faces and ENABLE_MULTI_SCALE_FALLBACK:
442
+ for scale in [(1280, 1280), (960, 960)]:
443
+ more = self._run_detection_at_scale(bgr_enhanced, scale)
444
+ all_raw_faces.extend(more)
445
+ if more:
446
+ break
447
+
448
+ if ENABLE_HORIZONTAL_FLIP:
449
+ bgr_flip = cv2.flip(bgr_enhanced, 1)
450
+ try:
451
+ with self._face_lock:
452
+ self.face_app.det_model.input_size = DET_SIZE_PRIMARY
453
+ faces_flip = self.face_app.get(bgr_flip)
454
+ for f in faces_flip:
455
+ x1, y1, x2, y2 = f.bbox
456
+ f.bbox[0], f.bbox[2] = W - x2, W - x1
457
+ all_raw_faces.extend(faces_flip)
458
+ except Exception:
459
+ pass
460
+
461
+ self.face_app.det_model.input_size = DET_SIZE_PRIMARY
462
+ faces = _dedup_faces(all_raw_faces)
463
+
464
+ filtered_faces = []
465
+ adaface_crops: list[np.ndarray | None] = []
466
+
467
+ for face in faces:
468
+ if len(filtered_faces) >= MAX_FACES_PER_IMAGE:
469
+ break
470
+ bbox_raw = face.bbox.astype(int)
471
+ x1, y1, x2, y2 = bbox_raw
472
+ x1, y1 = max(0, x1), max(0, y1)
473
+ x2, y2 = min(bgr.shape[1], x2), min(bgr.shape[0], y2)
474
+ w, h = x2 - x1, y2 - y1
475
+ if w < MIN_FACE_SIZE or h < MIN_FACE_SIZE:
476
+ continue
477
+ det_score = float(face.det_score) if hasattr(face, "det_score") else 1.0
478
+ if det_score < FACE_QUALITY_GATE or face.embedding is None:
479
+ continue
480
+ blur = _blur_score(bgr, x1, y1, x2, y2)
481
+ filtered_faces.append((face, x1, y1, x2, y2, w, h, det_score, blur))
482
+ adaface_crops.append(_face_crop_for_adaface(bgr, x1, y1, x2, y2))
483
+
484
+ if not filtered_faces:
485
+ return []
486
+
487
+ # ── FIX 3: Run ArcFace + AdaFace in PARALLEL ──────────────────────
488
+ # Submit both to the thread pool simultaneously.
489
+ # On 2 vCPU: total time ≈ max(arcface_time, adaface_time)
490
+ # instead of arcface_time + adaface_time.
491
+ face_objs = [f[0] for f in filtered_faces]
492
+
493
+ arc_future = self._embed_pool.submit(
494
+ self._arcface_embed_batch, face_objs, bgr
495
+ )
496
+ ada_future = self._embed_pool.submit(
497
+ self._adaface_embed_batch, adaface_crops
498
+ )
499
+
500
+ # Wait for both — concurrent.futures blocks until done
501
+ arcface_vecs = arc_future.result()
502
+ adaface_vecs = ada_future.result()
503
+
504
+ results = []
505
+ for accepted, (face_tuple, arcface_vec, adaface_vec) in enumerate(
506
+ zip(filtered_faces, arcface_vecs, adaface_vecs)
507
+ ):
508
+ face, x1, y1, x2, y2, w, h, det_score, blur_score = face_tuple
509
+
510
+ out = {
511
+ "type": "face",
512
+ "face_idx": accepted,
513
+ "bbox": [int(x1), int(y1), int(w), int(h)],
514
+ "face_crop": _crop_to_b64(bgr, x1, y1, x2, y2),
515
+ "det_score": det_score,
516
+ "face_width_px": int(w),
517
+ "blur_score": blur_score,
518
+ "arcface_vector": arcface_vec,
519
+ "adaface_vector": adaface_vec if adaface_vec is not None
520
+ else np.zeros(ADAFACE_DIM, dtype=np.float32),
521
+ "has_adaface": adaface_vec is not None,
522
+ }
523
+
524
+ if not USE_SPLIT_FACE_INDEXES:
525
+ if adaface_vec is not None:
526
+ fused_raw = np.concatenate([arcface_vec, adaface_vec])
527
+ else:
528
+ fused_raw = np.concatenate(
529
+ [arcface_vec, np.zeros(ADAFACE_DIM, dtype=np.float32)]
530
+ )
531
+ n2 = np.linalg.norm(fused_raw)
532
+ out["vector"] = (fused_raw / n2) if n2 > 0 else fused_raw
533
+ else:
534
+ out["vector"] = arcface_vec
535
+
536
+ results.append(out)
537
+ return results
538
+ except Exception as _det_err:
539
+ import traceback as _tb
540
+ print(f"[_detect_and_encode_faces ERROR] shape={getattr(img_np, 'shape', 'N/A')}: {_det_err}")
541
+ _tb.print_exc()
542
+ return []
543
+
544
+ # ── Main inference entry point ────────────────────────────────────────────
545
+ def process_image_bytes(
546
+ self, image_bytes: bytes, detect_faces: bool = True
547
+ ) -> list[dict]:
548
+ file_hash = hashlib.md5(image_bytes).hexdigest()
549
+ cache_key = f"{file_hash}_{detect_faces}"
550
+
551
+ with self._cache_lock:
552
+ if cache_key in self._cache:
553
+ return list(self._cache[cache_key])
554
+
555
+ extracted = []
556
+ original_pil = Image.open(io.BytesIO(image_bytes))
557
+ # Apply EXIF orientation before anything else. Pillow does NOT do this
558
+ # automatically — a portrait phone shot stored as landscape with a
559
+ # rotation tag would feed sideways pixels to the face detector.
560
+ original_pil = ImageOps.exif_transpose(original_pil)
561
+ original_pil = original_pil.convert("RGB")
562
+ img_np = np.array(original_pil)
563
+ faces_found = False
564
+
565
+ if detect_faces and self.face_app is not None:
566
+ face_results = self._detect_and_encode_faces(img_np)
567
+ if face_results:
568
+ faces_found = True
569
+ extracted.extend(face_results)
570
+
571
+ crops: list[Image.Image] = []
572
+ yolo_results = self.yolo(original_pil, conf=YOLO_CONF_THRESHOLD, verbose=False)
573
+
574
+ for r in yolo_results:
575
+ if r.masks is not None:
576
+ for seg_idx, mask_xy in enumerate(r.masks.xy):
577
+ cls_id = int(r.boxes.cls[seg_idx].item())
578
+ if faces_found and cls_id == YOLO_PERSON_CLASS_ID:
579
+ continue
580
+ polygon = np.array(mask_xy, dtype=np.int32)
581
+ if len(polygon) < 3:
582
+ continue
583
+ x, y, w, h = cv2.boundingRect(polygon)
584
+ if w < YOLO_MIN_CROP_PX or h < YOLO_MIN_CROP_PX:
585
+ continue
586
+ crops.append(original_pil.crop((x, y, x + w, y + h)))
587
+ if len(crops) >= MAX_CROPS:
588
+ break
589
+ elif r.boxes is not None:
590
+ for box in r.boxes:
591
+ cls_id = int(box.cls.item())
592
+ if faces_found and cls_id == YOLO_PERSON_CLASS_ID:
593
+ continue
594
+ x1, y1, x2, y2 = box.xyxy[0].tolist()
595
+ if (x2 - x1) < YOLO_MIN_CROP_PX or (y2 - y1) < YOLO_MIN_CROP_PX:
596
+ continue
597
+ crops.append(original_pil.crop((x1, y1, x2, y2)))
598
+ if len(crops) >= MAX_CROPS:
599
+ break
600
+
601
+ all_crops = [_resize_pil(c, MAX_IMAGE_SIZE) for c in [original_pil] + crops]
602
+ obj_vecs = self._embed_crops_batch(all_crops)
603
+ extracted.extend({"type": "object", "vector": v} for v in obj_vecs)
604
+
605
+ with self._cache_lock:
606
+ if len(self._cache) >= INFERENCE_CACHE_SIZE:
607
+ oldest = next(iter(self._cache))
608
+ del self._cache[oldest]
609
+ self._cache[cache_key] = list(extracted)
610
+
611
+ return extracted
612
+
613
+ async def process_image_bytes_async(
614
+ self, image_bytes: bytes, detect_faces: bool = True
615
+ ) -> list[dict]:
616
+ loop = asyncio.get_event_loop()
617
+ return await loop.run_in_executor(
618
+ None,
619
+ functools.partial(self.process_image_bytes, image_bytes, detect_faces),
620
+ )
src/services/cache.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ src/services/cache.py — Phase 3: Upstash Redis wrapper
3
+
4
+ Provides a thin async layer over Upstash Redis (REST API, so no socket
5
+ connection required — works fine on HF free tier which blocks raw TCP to
6
+ external hosts).
7
+
8
+ Falls back gracefully to a local in-memory dict if UPSTASH_REDIS_URL is not
9
+ set, so the rest of the codebase can import and call CacheService without
10
+ any conditional guards.
11
+
12
+ Usage:
13
+ from src.services.cache import cache
14
+ await cache.set("key", "value", ttl=3600)
15
+ val = await cache.get("key") # returns str | None
16
+ await cache.delete("key")
17
+ await cache.lpush("list_key", "item")
18
+ items = await cache.lrange("list_key", 0, -1)
19
+ """
20
+
21
+ import json
22
+ import os
23
+ import time
24
+ from typing import Any, Optional
25
+
26
+ import aiohttp
27
+
28
+ UPSTASH_REDIS_URL = os.getenv("UPSTASH_REDIS_URL", "")
29
+ UPSTASH_REDIS_TOKEN = os.getenv("UPSTASH_REDIS_TOKEN", "")
30
+
31
+ # Fallback in-memory store used when Upstash is not configured.
32
+ _mem_store: dict[str, tuple[Any, float]] = {} # key → (value, expires_at or 0)
33
+
34
+
35
+ class CacheService:
36
+ """
37
+ Async Redis cache backed by Upstash REST API.
38
+ Falls back to an in-memory dict when Upstash is not configured.
39
+ """
40
+
41
+ def __init__(self):
42
+ self._enabled = bool(UPSTASH_REDIS_URL and UPSTASH_REDIS_TOKEN)
43
+ self._base_url = UPSTASH_REDIS_URL.rstrip("/") if self._enabled else ""
44
+ self._headers = (
45
+ {"Authorization": f"Bearer {UPSTASH_REDIS_TOKEN}"}
46
+ if self._enabled
47
+ else {}
48
+ )
49
+ if not self._enabled:
50
+ print("[Cache] Upstash not configured — using in-memory fallback")
51
+
52
+ # ── Internal REST call ────────────────────────────────────────────
53
+ async def _cmd(self, *args) -> Any:
54
+ """Execute a Redis command via the Upstash REST API."""
55
+ url = f"{self._base_url}/{'/'.join(str(a) for a in args)}"
56
+ async with aiohttp.ClientSession() as session:
57
+ async with session.get(url, headers=self._headers) as resp:
58
+ data = await resp.json()
59
+ if "error" in data:
60
+ raise RuntimeError(f"Upstash error: {data['error']}")
61
+ return data.get("result")
62
+
63
+ # ── Public API ────────────────────────────────────────────────────
64
+ async def get(self, key: str) -> Optional[str]:
65
+ if not self._enabled:
66
+ entry = _mem_store.get(key)
67
+ if entry is None:
68
+ return None
69
+ val, exp = entry
70
+ if exp and time.time() > exp:
71
+ _mem_store.pop(key, None)
72
+ return None
73
+ return val
74
+
75
+ result = await self._cmd("GET", key)
76
+ return result # str or None
77
+
78
+ async def set(self, key: str, value: Any, ttl: int = 0) -> bool:
79
+ """
80
+ Store value under key. If ttl > 0, key expires after that many seconds.
81
+ Value is JSON-serialised if not already a str.
82
+ """
83
+ if not isinstance(value, str):
84
+ value = json.dumps(value)
85
+
86
+ if not self._enabled:
87
+ exp = time.time() + ttl if ttl else 0
88
+ _mem_store[key] = (value, exp)
89
+ return True
90
+
91
+ if ttl:
92
+ await self._cmd("SET", key, value, "EX", ttl)
93
+ else:
94
+ await self._cmd("SET", key, value)
95
+ return True
96
+
97
+ async def get_json(self, key: str) -> Optional[Any]:
98
+ raw = await self.get(key)
99
+ if raw is None:
100
+ return None
101
+ try:
102
+ return json.loads(raw)
103
+ except (json.JSONDecodeError, TypeError):
104
+ return raw
105
+
106
+ async def set_json(self, key: str, value: Any, ttl: int = 0) -> bool:
107
+ return await self.set(key, json.dumps(value), ttl=ttl)
108
+
109
+ async def delete(self, key: str) -> bool:
110
+ if not self._enabled:
111
+ _mem_store.pop(key, None)
112
+ return True
113
+ await self._cmd("DEL", key)
114
+ return True
115
+
116
+ async def exists(self, key: str) -> bool:
117
+ if not self._enabled:
118
+ return await self.get(key) is not None
119
+ result = await self._cmd("EXISTS", key)
120
+ return bool(result)
121
+
122
+ async def incr(self, key: str) -> int:
123
+ if not self._enabled:
124
+ entry = _mem_store.get(key, ("0", 0))
125
+ new_val = int(entry[0]) + 1
126
+ _mem_store[key] = (str(new_val), entry[1])
127
+ return new_val
128
+ result = await self._cmd("INCR", key)
129
+ return int(result)
130
+
131
+ async def expire(self, key: str, ttl: int) -> bool:
132
+ if not self._enabled:
133
+ if key in _mem_store:
134
+ val, _ = _mem_store[key]
135
+ _mem_store[key] = (val, time.time() + ttl)
136
+ return True
137
+ await self._cmd("EXPIRE", key, ttl)
138
+ return True
139
+
140
+ # ── List ops (used for job queue) ─────────────────────────────────
141
+ async def lpush(self, key: str, *values: str) -> int:
142
+ """Push values to the LEFT of a list (queue head)."""
143
+ if not self._enabled:
144
+ lst = json.loads(_mem_store.get(key, ("[]", 0))[0])
145
+ for v in values:
146
+ lst.insert(0, v)
147
+ _mem_store[key] = (json.dumps(lst), 0)
148
+ return len(lst)
149
+ for v in values:
150
+ await self._cmd("LPUSH", key, v)
151
+ return 0 # Upstash REST returns the new length; we don't need it here
152
+
153
+ async def rpop(self, key: str) -> Optional[str]:
154
+ """Pop one value from the RIGHT of a list (queue tail = oldest item)."""
155
+ if not self._enabled:
156
+ lst = json.loads(_mem_store.get(key, ("[]", 0))[0])
157
+ if not lst:
158
+ return None
159
+ val = lst.pop()
160
+ _mem_store[key] = (json.dumps(lst), 0)
161
+ return val
162
+ return await self._cmd("RPOP", key)
163
+
164
+ async def llen(self, key: str) -> int:
165
+ if not self._enabled:
166
+ lst = json.loads(_mem_store.get(key, ("[]", 0))[0])
167
+ return len(lst)
168
+ result = await self._cmd("LLEN", key)
169
+ return int(result or 0)
170
+
171
+ async def lrange(self, key: str, start: int, stop: int) -> list[str]:
172
+ if not self._enabled:
173
+ lst = json.loads(_mem_store.get(key, ("[]", 0))[0])
174
+ end = None if stop == -1 else stop + 1
175
+ return lst[start:end]
176
+ result = await self._cmd("LRANGE", key, start, stop)
177
+ return result or []
178
+
179
+ # ── Rate limiting helper ──────────────────────────────────────────
180
+ async def rate_limit_check(self, key: str, max_calls: int, window_secs: int) -> bool:
181
+ """
182
+ Returns True if the caller is within the rate limit, False if exceeded.
183
+ Uses a simple counter with TTL.
184
+ """
185
+ count = await self.incr(key)
186
+ if count == 1:
187
+ await self.expire(key, window_secs)
188
+ return count <= max_calls
189
+
190
+
191
+ # Module-level singleton — import this everywhere
192
+ cache = CacheService()
src/services/clustering.py ADDED
@@ -0,0 +1,365 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ src/services/clustering.py — Phase 3: HDBSCAN face clustering (People View)
3
+
4
+ Clusters all face vectors in the faces-arcface Pinecone index using HDBSCAN,
5
+ then stores cluster assignments in Supabase (face_clusters table).
6
+
7
+ Algorithm choice:
8
+ - HDBSCAN on ArcFace 512-d vectors (euclidean after L2 normalisation)
9
+ - min_cluster_size=3, min_samples=3, cluster_selection_epsilon=0.35
10
+ - Noise points (label=-1) are left unclustered — not forced into clusters
11
+ - Representative face = the vector closest to the cluster centroid
12
+
13
+ Pinecone fetch strategy:
14
+ - Pinecone free tier has no "list all vectors" endpoint
15
+ - We use a dummy query with random vectors + large top_k to page through
16
+ vectors. This is imperfect but works within free-tier constraints.
17
+ - Production alternative: store vector_ids in Supabase on upload (Phase 4)
18
+
19
+ Entry points:
20
+ run_clustering(pc, user_id, keys) — full re-cluster, called by API endpoint
21
+ get_people(user_id) — read cluster list from Supabase
22
+ get_person_images(cluster_id, user_id) — images for one cluster
23
+ rename_cluster(cluster_id, name, user_id) — label "Mom", "John", etc.
24
+ """
25
+
26
+ import asyncio
27
+ import uuid
28
+ from datetime import datetime, timezone
29
+ from typing import Optional
30
+
31
+ import aiohttp
32
+ import numpy as np
33
+
34
+ from src.core.config import (
35
+ IDX_FACES_ARCFACE,
36
+ SUPABASE_URL, SUPABASE_SERVICE_KEY,
37
+ CLUSTER_MIN_SAMPLES, CLUSTER_MIN_CLUSTER_SIZE, CLUSTER_EPSILON,
38
+ FACE_SEARCH_TOP_K,
39
+ )
40
+
41
+
42
+ # ──────────────────────────────────────────────────────────────
43
+ # Supabase helpers
44
+ # ──────────────────────────────────────────────────────────────
45
+ def _hdr() -> dict:
46
+ return {
47
+ "apikey": SUPABASE_SERVICE_KEY,
48
+ "Authorization": f"Bearer {SUPABASE_SERVICE_KEY}",
49
+ "Content-Type": "application/json",
50
+ "Prefer": "return=representation",
51
+ }
52
+
53
+
54
+ async def _supa_upsert(table: str, rows: list[dict]) -> None:
55
+ if not SUPABASE_URL or not rows:
56
+ return
57
+ url = f"{SUPABASE_URL}/rest/v1/{table}"
58
+ headers = {**_hdr(), "Prefer": "resolution=merge-duplicates,return=minimal"}
59
+ async with aiohttp.ClientSession() as s:
60
+ await s.post(url, headers=headers, json=rows)
61
+
62
+
63
+ async def _supa_select(table: str, filters: str = "") -> list[dict]:
64
+ if not SUPABASE_URL:
65
+ return []
66
+ url = f"{SUPABASE_URL}/rest/v1/{table}?{filters}"
67
+ async with aiohttp.ClientSession() as s:
68
+ async with s.get(url, headers=_hdr()) as r:
69
+ if r.status == 200:
70
+ return await r.json()
71
+ return []
72
+
73
+
74
+ async def _supa_patch(table: str, filters: str, patch: dict) -> None:
75
+ if not SUPABASE_URL:
76
+ return
77
+ url = f"{SUPABASE_URL}/rest/v1/{table}?{filters}"
78
+ async with aiohttp.ClientSession() as s:
79
+ await s.patch(url, headers=_hdr(), json=patch)
80
+
81
+
82
+ async def _supa_delete(table: str, filters: str) -> None:
83
+ if not SUPABASE_URL:
84
+ return
85
+ url = f"{SUPABASE_URL}/rest/v1/{table}?{filters}"
86
+ async with aiohttp.ClientSession() as s:
87
+ await s.delete(url, headers=_hdr())
88
+
89
+
90
+ # ──────────────────────────────────────────────────────────────
91
+ # Pinecone vector fetch helpers
92
+ # ──────────────────────────────────────────────────────────────
93
+ def _fetch_all_vectors(idx, dim: int = 512, max_vectors: int = 10000) -> list[dict]:
94
+ """
95
+ Fetches as many vectors as possible from a Pinecone index using
96
+ random-probe queries. Free-tier Pinecone has no scan endpoint, so
97
+ we use diverse random probes to discover vectors.
98
+
99
+ Returns list of dicts: {id, values, metadata}
100
+ """
101
+ seen_ids: set = set()
102
+ collected: list[dict] = []
103
+ rng = np.random.default_rng(seed=42)
104
+
105
+ # 20 random probes — covers most of the index for typical gallery sizes
106
+ for _ in range(20):
107
+ probe = rng.standard_normal(dim).astype(np.float32)
108
+ probe /= np.linalg.norm(probe)
109
+
110
+ res = idx.query(
111
+ vector=probe.tolist(),
112
+ top_k=min(FACE_SEARCH_TOP_K, 1000),
113
+ include_metadata=True,
114
+ include_values=True,
115
+ )
116
+ for match in res.get("matches", []):
117
+ vid = match["id"]
118
+ if vid in seen_ids:
119
+ continue
120
+ seen_ids.add(vid)
121
+ values = match.get("values")
122
+ if values:
123
+ collected.append({
124
+ "id": vid,
125
+ "values": values,
126
+ "metadata": match.get("metadata", {}),
127
+ })
128
+ if len(collected) >= max_vectors:
129
+ break
130
+ if len(collected) >= max_vectors:
131
+ break
132
+
133
+ return collected
134
+
135
+
136
+ # ──────────────────────────────────────────────────────────────
137
+ # Core clustering logic
138
+ # ──────────────────────────────────────────────────────────────
139
+ def _run_hdbscan(vectors: np.ndarray) -> np.ndarray:
140
+ """
141
+ Runs HDBSCAN on the provided L2-normalised 512-d face vectors.
142
+ Returns integer label array (−1 = noise / unclustered).
143
+ """
144
+ try:
145
+ import hdbscan
146
+ except ImportError:
147
+ raise RuntimeError(
148
+ "hdbscan not installed. Add hdbscan>=0.8.33 to requirements.txt"
149
+ )
150
+
151
+ clusterer = hdbscan.HDBSCAN(
152
+ min_cluster_size=CLUSTER_MIN_CLUSTER_SIZE,
153
+ min_samples=CLUSTER_MIN_SAMPLES,
154
+ cluster_selection_epsilon=CLUSTER_EPSILON,
155
+ metric="euclidean",
156
+ core_dist_n_jobs=1, # HF CPU — avoid multiprocessing overhead
157
+ )
158
+ clusterer.fit(vectors)
159
+ return clusterer.labels_
160
+
161
+
162
+ def _pick_representative(cluster_vecs: np.ndarray, cluster_meta: list[dict]) -> dict:
163
+ """
164
+ Picks the face closest to the cluster centroid as the representative.
165
+ Returns the metadata dict for that face.
166
+ """
167
+ centroid = cluster_vecs.mean(axis=0)
168
+ centroid /= np.linalg.norm(centroid) + 1e-8
169
+ sims = cluster_vecs @ centroid
170
+ best_idx = int(np.argmax(sims))
171
+ return cluster_meta[best_idx]
172
+
173
+
174
+ # ──────────────────────────────────────────────────────────────
175
+ # Public entry points
176
+ # ──────────────────────────────────────────────────────────────
177
+ async def run_clustering(pc, user_id: str) -> dict:
178
+ """
179
+ Full re-cluster pipeline:
180
+ 1. Fetch all ArcFace vectors from Pinecone
181
+ 2. Run HDBSCAN
182
+ 3. Write cluster assignments to Supabase face_clusters table
183
+ 4. Write per-vector assignments to face_vector_clusters table
184
+
185
+ Returns a summary dict.
186
+ """
187
+ idx = pc.Index(IDX_FACES_ARCFACE)
188
+
189
+ # 1. Fetch vectors (blocking — run in thread pool)
190
+ raw = await asyncio.to_thread(_fetch_all_vectors, idx)
191
+ if len(raw) < CLUSTER_MIN_CLUSTER_SIZE:
192
+ return {"status": "skipped", "reason": "not enough vectors", "vectors": len(raw)}
193
+
194
+ ids = [r["id"] for r in raw]
195
+ metas = [r["metadata"] for r in raw]
196
+ matrix = np.array([r["values"] for r in raw], dtype=np.float32)
197
+
198
+ # L2-normalise before euclidean HDBSCAN (equivalent to angular distance)
199
+ norms = np.linalg.norm(matrix, axis=1, keepdims=True)
200
+ matrix = matrix / (norms + 1e-8)
201
+
202
+ # 2. Cluster (blocking)
203
+ labels = await asyncio.to_thread(_run_hdbscan, matrix)
204
+
205
+ unique_labels = set(labels) - {-1}
206
+ now_iso = datetime.now(timezone.utc).isoformat()
207
+
208
+ # 3. Delete existing clusters for this user (full re-cluster)
209
+ await _supa_delete("face_clusters", f"user_id=eq.{user_id}")
210
+ await _supa_delete("face_vector_clusters", f"user_id=eq.{user_id}")
211
+
212
+ cluster_rows = []
213
+ vector_rows = []
214
+
215
+ for label in sorted(unique_labels):
216
+ cluster_id = str(uuid.uuid4())
217
+ mask = labels == label
218
+ c_indices = np.where(mask)[0]
219
+ c_vecs = matrix[c_indices]
220
+ c_meta = [metas[i] for i in c_indices]
221
+ c_ids = [ids[i] for i in c_indices]
222
+
223
+ rep_meta = _pick_representative(c_vecs, c_meta)
224
+
225
+ cluster_rows.append({
226
+ "cluster_id": cluster_id,
227
+ "user_id": user_id,
228
+ "representative_face_crop": rep_meta.get("face_crop", ""),
229
+ "representative_vector_id": c_ids[0],
230
+ "face_count": int(len(c_indices)),
231
+ "name": None,
232
+ "created_at": now_iso,
233
+ "updated_at": now_iso,
234
+ })
235
+
236
+ for vid, meta in zip(c_ids, c_meta):
237
+ vector_rows.append({
238
+ "vector_id": vid,
239
+ "cluster_id": cluster_id,
240
+ "user_id": user_id,
241
+ "image_url": meta.get("url", ""),
242
+ "folder": meta.get("folder", ""),
243
+ "face_crop": meta.get("face_crop", ""),
244
+ "updated_at": now_iso,
245
+ })
246
+
247
+ # 4. Batch write to Supabase (200 rows per request)
248
+ for i in range(0, len(cluster_rows), 200):
249
+ await _supa_upsert("face_clusters", cluster_rows[i:i + 200])
250
+ for i in range(0, len(vector_rows), 200):
251
+ await _supa_upsert("face_vector_clusters", vector_rows[i:i + 200])
252
+
253
+ return {
254
+ "status": "ok",
255
+ "total_vectors": len(ids),
256
+ "clusters_found": len(unique_labels),
257
+ "noise_vectors": int(np.sum(labels == -1)),
258
+ }
259
+
260
+
261
+ async def get_people(user_id: str) -> list[dict]:
262
+ """Returns all identity clusters for a user, ordered by face_count desc."""
263
+ rows = await _supa_select(
264
+ "face_clusters",
265
+ f"user_id=eq.{user_id}&order=face_count.desc",
266
+ )
267
+ return [
268
+ {
269
+ "cluster_id": r["cluster_id"],
270
+ "name": r.get("name"),
271
+ "face_count": r.get("face_count", 0),
272
+ "representative_face_crop": r.get("representative_face_crop", ""),
273
+ }
274
+ for r in rows
275
+ ]
276
+
277
+
278
+ async def get_person_images(cluster_id: str, user_id: str) -> list[dict]:
279
+ """Returns all images belonging to a cluster."""
280
+ rows = await _supa_select(
281
+ "face_vector_clusters",
282
+ f"cluster_id=eq.{cluster_id}&user_id=eq.{user_id}",
283
+ )
284
+ # Dedupe by image_url (multiple face vectors can come from the same image)
285
+ seen: set = set()
286
+ out = []
287
+ for r in rows:
288
+ url = r.get("image_url", "")
289
+ if url and url not in seen:
290
+ seen.add(url)
291
+ out.append({
292
+ "url": url,
293
+ "folder": r.get("folder", ""),
294
+ "face_crop": r.get("face_crop", ""),
295
+ })
296
+ return out
297
+
298
+
299
+ async def rename_cluster(cluster_id: str, name: str, user_id: str) -> bool:
300
+ """Assigns a human-readable name to a cluster ('Mom', 'John', etc.)."""
301
+ await _supa_patch(
302
+ "face_clusters",
303
+ f"cluster_id=eq.{cluster_id}&user_id=eq.{user_id}",
304
+ {"name": name, "updated_at": datetime.now(timezone.utc).isoformat()},
305
+ )
306
+ return True
307
+
308
+
309
+ async def search_cluster_aware(
310
+ pc, image_map: dict, user_id: str
311
+ ) -> dict:
312
+ """
313
+ Cluster-aware search expansion (Phase 3 recall win).
314
+
315
+ Given an initial image_map from search_faces_split, look up which
316
+ clusters the matched faces belong to, then return ALL images in those
317
+ clusters. This achieves near-100% recall for well-indexed people.
318
+
319
+ Returns an expanded image_map in the same format as search_faces_split.
320
+ """
321
+ if not image_map:
322
+ return image_map
323
+
324
+ # Find which vector_ids were returned in the initial search
325
+ matched_vids = {v.get("vector_id") for v in image_map.values() if v.get("vector_id")}
326
+ if not matched_vids:
327
+ return image_map
328
+
329
+ # Look up cluster membership for those vector_ids
330
+ vid_list = ",".join(f'"{v}"' for v in matched_vids)
331
+ rows = await _supa_select(
332
+ "face_vector_clusters",
333
+ f"vector_id=in.({vid_list})&user_id=eq.{user_id}",
334
+ )
335
+
336
+ if not rows:
337
+ return image_map
338
+
339
+ # Collect all cluster_ids matched
340
+ cluster_ids = {r["cluster_id"] for r in rows}
341
+
342
+ # Fetch all images in those clusters
343
+ expanded = dict(image_map)
344
+ for cluster_id in cluster_ids:
345
+ cluster_images = await get_person_images(cluster_id, user_id)
346
+ for img in cluster_images:
347
+ url = img["url"]
348
+ if url not in expanded:
349
+ # Add with a slightly lower score than the worst match
350
+ # so cluster-expanded results sort after direct hits
351
+ min_score = min(
352
+ (v["fused_score"] for v in image_map.values()), default=0.3
353
+ )
354
+ expanded[url] = {
355
+ "fused_score": max(min_score - 0.01, 0.01),
356
+ "arcface_score": 0.0,
357
+ "adaface_score": 0.0,
358
+ "raw_score": 0.0,
359
+ "face_crop": img.get("face_crop", ""),
360
+ "folder": img.get("folder", "uncategorized"),
361
+ "vector_id": None,
362
+ "cluster_expanded": True,
363
+ }
364
+
365
+ return expanded
src/services/db_client.py ADDED
@@ -0,0 +1,327 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from typing import Any, Dict, List
3
+
4
+ import cloudinary
5
+ import cloudinary.uploader
6
+ import cloudinary.api
7
+ from pinecone import Pinecone, ServerlessSpec
8
+
9
+ from src.core.config import (
10
+ IDX_FACES, IDX_OBJECTS,
11
+ IDX_FACES_ARCFACE, IDX_FACES_ADAFACE,
12
+ USE_SPLIT_FACE_INDEXES,
13
+ ARCFACE_WEIGHT, ADAFACE_WEIGHT,
14
+ FACE_MATCH_THRESHOLD, FUSED_MATCH_THRESHOLD, ARCFACE_SOLO_THRESHOLD,
15
+ FACE_SEARCH_TOP_K, OBJECT_SEARCH_TOP_K,
16
+ FACE_RESULTS_PER_QUERY_CAP,
17
+ FACE_DIM, ADAFACE_DIM, FUSED_FACE_DIM,
18
+ FACE_BLUR_THRESHOLD,
19
+ )
20
+
21
+
22
+ # ──────────────────────────────────────────────────────────────
23
+ # Pinecone client pool
24
+ # ──────────────────────────────────────────────────────────────
25
+ class PineconePool:
26
+ def __init__(self):
27
+ self._clients = {}
28
+
29
+ def get(self, api_key: str) -> Pinecone:
30
+ if api_key not in self._clients:
31
+ self._clients[api_key] = Pinecone(api_key=api_key)
32
+ return self._clients[api_key]
33
+
34
+
35
+ pinecone_pool = PineconePool()
36
+
37
+
38
+ # ──────────────────────────────────────────────────────────────
39
+ # Cloudinary helpers (unchanged from Phase 1)
40
+ # ──────────────────────────────────────────────────────────────
41
+ def _set_cld_config(creds: dict):
42
+ cloudinary.config(
43
+ cloud_name=creds.get("cloud_name"),
44
+ api_key=creds.get("api_key"),
45
+ api_secret=creds.get("api_secret"),
46
+ secure=True,
47
+ )
48
+
49
+
50
+ def cld_ping(creds: dict):
51
+ _set_cld_config(creds)
52
+ cloudinary.api.ping()
53
+
54
+
55
+ def cld_upload(file_obj, folder: str, creds: dict) -> dict:
56
+ _set_cld_config(creds)
57
+ return cloudinary.uploader.upload(file_obj, folder=folder)
58
+
59
+
60
+ def cld_root_folders(creds: dict) -> dict:
61
+ _set_cld_config(creds)
62
+ return cloudinary.api.root_folders()
63
+
64
+
65
+ def cld_list_folder_images(folder: str, creds: dict, cursor: str = None, page_size: int = 100) -> dict:
66
+ _set_cld_config(creds)
67
+ kwargs = {"type": "upload", "prefix": f"{folder}/", "max_results": page_size}
68
+ if cursor:
69
+ kwargs["next_cursor"] = cursor
70
+ return cloudinary.api.resources(**kwargs)
71
+
72
+
73
+ def cld_delete_resource(public_id: str, creds: dict):
74
+ _set_cld_config(creds)
75
+ cloudinary.uploader.destroy(public_id)
76
+
77
+
78
+ def cld_delete_folder_resources(folder: str, creds: dict):
79
+ _set_cld_config(creds)
80
+ cloudinary.api.delete_resources_by_prefix(f"{folder}/")
81
+
82
+
83
+ def cld_remove_folder(folder: str, creds: dict):
84
+ _set_cld_config(creds)
85
+ try:
86
+ cloudinary.api.delete_folder(folder)
87
+ except Exception:
88
+ pass
89
+
90
+
91
+ def cld_delete_all_paginated(creds: dict) -> int:
92
+ _set_cld_config(creds)
93
+ deleted = 0
94
+ cursor = None
95
+ while True:
96
+ kwargs = {"type": "upload", "max_results": 500}
97
+ if cursor:
98
+ kwargs["next_cursor"] = cursor
99
+ res = cloudinary.api.resources(**kwargs)
100
+ resources = res.get("resources", [])
101
+ if not resources:
102
+ break
103
+ pids = [r["public_id"] for r in resources]
104
+ cloudinary.api.delete_resources(pids)
105
+ deleted += len(pids)
106
+ cursor = res.get("next_cursor")
107
+ if not cursor:
108
+ break
109
+ return deleted
110
+
111
+
112
+ # ──────────────────────────────────────────────────────────────
113
+ # Index management
114
+ # ──────────────────────────────────────────────────────────────
115
+ def ensure_indexes(pc: Pinecone) -> List[str]:
116
+ """
117
+ Ensures all required indexes exist.
118
+ - Objects index: 1536d (unchanged)
119
+ - Legacy faces index: 1024d (kept for backward compat)
120
+ - New split indexes: 512d each (ArcFace + AdaFace separately)
121
+ """
122
+ created = []
123
+ existing = {idx.name for idx in pc.list_indexes()}
124
+
125
+ index_specs = [
126
+ (IDX_OBJECTS, 1536),
127
+ (IDX_FACES, FUSED_FACE_DIM), # legacy — only created on first run if missing
128
+ ]
129
+
130
+ if USE_SPLIT_FACE_INDEXES:
131
+ index_specs.extend([
132
+ (IDX_FACES_ARCFACE, FACE_DIM),
133
+ (IDX_FACES_ADAFACE, ADAFACE_DIM),
134
+ ])
135
+
136
+ for name, dim in index_specs:
137
+ if name not in existing:
138
+ pc.create_index(
139
+ name=name,
140
+ dimension=dim,
141
+ metric="cosine",
142
+ spec=ServerlessSpec(cloud="aws", region="us-east-1"),
143
+ )
144
+ created.append(name)
145
+ return created
146
+
147
+
148
+ def delete_and_recreate_indexes(pc: Pinecone):
149
+ """Used by /api/reset-database. Now also resets split indexes."""
150
+ existing = {idx.name for idx in pc.list_indexes()}
151
+ targets = [IDX_FACES, IDX_OBJECTS]
152
+ if USE_SPLIT_FACE_INDEXES:
153
+ targets.extend([IDX_FACES_ARCFACE, IDX_FACES_ADAFACE])
154
+ for name in targets:
155
+ if name in existing:
156
+ pc.delete_index(name)
157
+ time.sleep(5)
158
+ ensure_indexes(pc)
159
+
160
+
161
+ # ──────────────────────────────────────────────────────────────
162
+ # LEGACY face search (for backward compat / fallback)
163
+ # ──────────────────────────────────────────────────────────────
164
+ def search_faces(idx, vec: List[float], det_score: float, filter_dict: dict = None) -> Dict[str, Any]:
165
+ query_kwargs = {"vector": vec, "top_k": FACE_SEARCH_TOP_K, "include_metadata": True}
166
+ if filter_dict:
167
+ query_kwargs["filter"] = filter_dict
168
+ res = idx.query(**query_kwargs)
169
+ image_map = {}
170
+ LEGACY_THRESHOLD = 0.45 # on old fused 1024-d vector
171
+
172
+ for match in res.get("matches", []):
173
+ raw_score = match.get("score", 0)
174
+ if raw_score < LEGACY_THRESHOLD:
175
+ continue
176
+ meta = match.get("metadata", {})
177
+ url = meta.get("url")
178
+ if not url:
179
+ continue
180
+ if url not in image_map or image_map[url]["raw_score"] < raw_score:
181
+ image_map[url] = {
182
+ "raw_score": raw_score,
183
+ "face_crop": meta.get("face_crop", ""),
184
+ "folder": meta.get("folder", "uncategorized"),
185
+ }
186
+ return image_map
187
+
188
+
189
+ # ──────────────────────────────────────────────────────────────
190
+ # PHASE 2: Split-index face search with score fusion
191
+ # ──────────────────────────────────────────────────────────────
192
+ def search_faces_split(
193
+ idx_arcface, idx_adaface,
194
+ arcface_vec: List[float], adaface_vec: List[float],
195
+ filter_dict: dict = None,
196
+ ) -> Dict[str, Any]:
197
+ """
198
+ Queries BOTH face indexes, fuses scores per vector_id, returns a map
199
+ keyed by url with the best fused score across all query augmentations.
200
+
201
+ Score fusion formula:
202
+ fused_score = ARCFACE_WEIGHT * arcface_cos + ADAFACE_WEIGHT * adaface_cos
203
+
204
+ When a vector exists in only one index (e.g. AdaFace failed on upload),
205
+ we scale the single-index score by its weight + max possible from the
206
+ other side (treat missing as average of its distribution = ~0.15).
207
+ """
208
+ query_kwargs_base = {"top_k": FACE_SEARCH_TOP_K, "include_metadata": True}
209
+ if filter_dict:
210
+ query_kwargs_base["filter"] = filter_dict
211
+
212
+ # Query both indexes in parallel (caller uses asyncio.gather)
213
+ arc_res = idx_arcface.query(vector=arcface_vec, **query_kwargs_base)
214
+
215
+ # Only query AdaFace if we have a valid vector (not all zeros)
216
+ has_ada = adaface_vec is not None and any(abs(x) > 1e-6 for x in adaface_vec)
217
+ if has_ada:
218
+ ada_res = idx_adaface.query(vector=adaface_vec, **query_kwargs_base)
219
+ else:
220
+ ada_res = {"matches": []}
221
+
222
+ # Index AdaFace results by vector_id
223
+ ada_by_id = {
224
+ m["id"]: m.get("score", 0.0)
225
+ for m in ada_res.get("matches", [])
226
+ }
227
+
228
+ # Index AdaFace metadata by vector_id (in case a vector_id is only in AdaFace)
229
+ ada_meta_by_id = {
230
+ m["id"]: m.get("metadata", {})
231
+ for m in ada_res.get("matches", [])
232
+ }
233
+
234
+ image_map: Dict[str, Any] = {}
235
+ seen_vector_ids = set()
236
+
237
+ # ── Pass 1: ArcFace matches (the primary signal) ─────────────
238
+ for match in arc_res.get("matches", []):
239
+ vid = match["id"]
240
+ seen_vector_ids.add(vid)
241
+ arc_score = match.get("score", 0.0)
242
+
243
+ # Hard floor: if ArcFace says no, it's no. This kills imposters.
244
+ if arc_score < FACE_MATCH_THRESHOLD:
245
+ continue
246
+
247
+ ada_score = ada_by_id.get(vid, None)
248
+ if ada_score is None:
249
+ # No AdaFace confirmation — apply stricter solo threshold.
250
+ if arc_score < ARCFACE_SOLO_THRESHOLD:
251
+ continue
252
+ fused = arc_score
253
+ else:
254
+ fused = ARCFACE_WEIGHT * arc_score + ADAFACE_WEIGHT * ada_score
255
+ if fused < FUSED_MATCH_THRESHOLD:
256
+ continue
257
+
258
+ meta = match.get("metadata", {})
259
+ url = meta.get("url")
260
+ if not url:
261
+ continue
262
+ if meta.get("blur_score", 100.0) < FACE_BLUR_THRESHOLD:
263
+ continue
264
+
265
+ existing = image_map.get(url)
266
+ if not existing or existing["fused_score"] < fused:
267
+ image_map[url] = {
268
+ "fused_score": fused,
269
+ "arcface_score": arc_score,
270
+ "adaface_score": ada_score if ada_score is not None else 0.0,
271
+ "raw_score": arc_score, # for UI back-compat
272
+ "face_crop": meta.get("face_crop", ""),
273
+ "folder": meta.get("folder", "uncategorized"),
274
+ "vector_id": vid,
275
+ }
276
+
277
+ # Cap at most N results per query face
278
+ if len(image_map) > FACE_RESULTS_PER_QUERY_CAP:
279
+ top = sorted(
280
+ image_map.items(),
281
+ key=lambda kv: kv[1]["fused_score"],
282
+ reverse=True,
283
+ )[:FACE_RESULTS_PER_QUERY_CAP]
284
+ image_map = dict(top)
285
+
286
+ return image_map
287
+
288
+
289
+ # ──────────────────────────────────────────────────────────────
290
+ # Object search (unchanged)
291
+ # ──────────────────────────────────────────────────────────────
292
+ def search_objects(idx, vec: List[float]) -> List[Dict[str, Any]]:
293
+ res = idx.query(vector=vec, top_k=OBJECT_SEARCH_TOP_K, include_metadata=True)
294
+ results = []
295
+ for match in res.get("matches", []):
296
+ meta = match.get("metadata", {})
297
+ results.append({
298
+ "url": meta.get("url", ""),
299
+ "score": round(match.get("score", 0), 4),
300
+ "raw_score": match.get("score", 0),
301
+ "folder": meta.get("folder", "uncategorized"),
302
+ })
303
+ return results
304
+
305
+
306
+ # ──────────────────────────────────────────────────────────────
307
+ # Result merging
308
+ # ──────────────────────────────────────────────────────────────
309
+ def merge_face_results(groups: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
310
+ """Dedupe across multiple query faces (or augmentations), keep best score per URL."""
311
+ merged = {}
312
+ for group in groups:
313
+ for match in group.get("matches", []):
314
+ url = match["url"]
315
+ if url not in merged or merged[url]["score"] < match["score"]:
316
+ merged[url] = match
317
+ return sorted(merged.values(), key=lambda x: x["score"], reverse=True)
318
+
319
+
320
+ def merge_object_results(nested_results: List[List[Dict[str, Any]]]) -> List[Dict[str, Any]]:
321
+ merged = {}
322
+ for res_list in nested_results:
323
+ for match in res_list:
324
+ url = match["url"]
325
+ if url not in merged or merged[url]["score"] < match["score"]:
326
+ merged[url] = match
327
+ return sorted(merged.values(), key=lambda x: x["score"], reverse=True)
src/services/jobs.py ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ src/services/jobs.py — Phase 3: Async upload job queue
3
+ """
4
+
5
+ import asyncio
6
+ import json
7
+ import uuid
8
+ from typing import Any, Optional
9
+
10
+ import aiohttp
11
+
12
+ from src.core.config import (
13
+ SUPABASE_URL, SUPABASE_SERVICE_KEY,
14
+ USE_ASYNC_UPLOADS,
15
+ )
16
+ from src.services.cache import cache
17
+
18
+ QUEUE_KEY = "upload_jobs_queue"
19
+ JOB_TTL = 86400 # 24 h
20
+
21
+
22
+ # ──────────────────────────────────────────────────────────────
23
+ # Supabase helpers
24
+ # ──────────────────────────────────────────────────────────────
25
+ def _supa_headers() -> dict:
26
+ return {
27
+ "apikey": SUPABASE_SERVICE_KEY,
28
+ "Authorization": f"Bearer {SUPABASE_SERVICE_KEY}",
29
+ "Content-Type": "application/json",
30
+ "Prefer": "return=minimal", # FIX: was "return=representation" which requires 200 not 201
31
+ }
32
+
33
+
34
+ def _clean_row(row: dict) -> dict:
35
+ """
36
+ Remove None values before sending to Supabase.
37
+ Supabase REST rejects Python None in JSON — omit the key entirely
38
+ and let Postgres use the column default instead.
39
+ """
40
+ return {k: v for k, v in row.items() if v is not None}
41
+
42
+
43
+ async def _supa_insert(table: str, row: dict) -> bool:
44
+ """Returns True on success, False on failure. Logs errors explicitly."""
45
+ if not SUPABASE_URL or not SUPABASE_SERVICE_KEY:
46
+ print(f"[Supabase] SUPABASE_URL or SUPABASE_SERVICE_KEY not set — skipping insert")
47
+ return False
48
+
49
+ url = f"{SUPABASE_URL}/rest/v1/{table}"
50
+ clean = _clean_row(row)
51
+
52
+ try:
53
+ async with aiohttp.ClientSession() as s:
54
+ async with s.post(url, headers=_supa_headers(), json=clean) as r:
55
+ if r.status in (200, 201):
56
+ return True
57
+ else:
58
+ body = await r.text()
59
+ print(f"[Supabase INSERT ERROR] table={table} status={r.status} body={body[:500]}")
60
+ print(f"[Supabase INSERT ERROR] row_keys={list(clean.keys())}")
61
+ return False
62
+ except Exception as e:
63
+ print(f"[Supabase INSERT EXCEPTION] table={table} error={e}")
64
+ return False
65
+
66
+
67
+ async def _supa_patch(table: str, job_id: str, patch: dict) -> bool:
68
+ if not SUPABASE_URL or not SUPABASE_SERVICE_KEY:
69
+ return False
70
+ url = f"{SUPABASE_URL}/rest/v1/{table}?job_id=eq.{job_id}"
71
+ clean = _clean_row(patch)
72
+ try:
73
+ async with aiohttp.ClientSession() as s:
74
+ async with s.patch(url, headers=_supa_headers(), json=clean) as r:
75
+ if r.status not in (200, 201, 204):
76
+ body = await r.text()
77
+ print(f"[Supabase PATCH ERROR] job_id={job_id} status={r.status} body={body[:300]}")
78
+ return False
79
+ return True
80
+ except Exception as e:
81
+ print(f"[Supabase PATCH EXCEPTION] job_id={job_id} error={e}")
82
+ return False
83
+
84
+
85
+ async def _supa_get(table: str, job_id: str) -> Optional[dict]:
86
+ if not SUPABASE_URL or not SUPABASE_SERVICE_KEY:
87
+ return None
88
+ # Use select= to avoid ambiguity with column names
89
+ url = f"{SUPABASE_URL}/rest/v1/{table}?select=*&job_id=eq.{job_id}&limit=1"
90
+ headers = {**_supa_headers(), "Prefer": "return=representation"}
91
+ try:
92
+ async with aiohttp.ClientSession() as s:
93
+ async with s.get(url, headers=headers) as r:
94
+ if r.status == 200:
95
+ data = await r.json()
96
+ return data[0] if data else None
97
+ except Exception as e:
98
+ print(f"[Supabase GET EXCEPTION] job_id={job_id} error={e}")
99
+ return None
100
+
101
+
102
+ # ──────────────────────────────────────────────────────────────
103
+ # Public API
104
+ # ──────────────────────────────────────────────────────────────
105
+ async def create_job(
106
+ user_id: str,
107
+ folder: str,
108
+ total_files: int,
109
+ job_payload: dict,
110
+ ) -> str:
111
+ job_id = str(uuid.uuid4())
112
+
113
+ # FIX: Only send columns that definitely exist in the table.
114
+ # Do NOT send result/error (null) — let Postgres default them.
115
+ # Do NOT send created_at/updated_at if table has DEFAULT NOW().
116
+ row = {
117
+ "job_id": job_id,
118
+ "user_id": user_id or "anonymous",
119
+ "folder": folder,
120
+ "status": "pending",
121
+ "total_files": total_files,
122
+ "processed_files": 0,
123
+ }
124
+
125
+ success = await _supa_insert("upload_jobs", row)
126
+ if success:
127
+ print(f"[Jobs] Created job {job_id} in Supabase ✓")
128
+ else:
129
+ print(f"[Jobs] Supabase insert FAILED for job {job_id} — job will still work via Redis")
130
+
131
+ # Redis is the source of truth for polling — works even if Supabase fails
132
+ await cache.set_json(
133
+ f"job:{job_id}",
134
+ {**row, "payload": job_payload, "status": "pending"},
135
+ ttl=JOB_TTL,
136
+ )
137
+ await cache.lpush(QUEUE_KEY, job_id)
138
+
139
+ return job_id
140
+
141
+
142
+ async def get_job_status(job_id: str) -> Optional[dict]:
143
+ # Check Redis first (fast path)
144
+ cached = await cache.get_json(f"job:{job_id}")
145
+ if cached:
146
+ cached.pop("payload", None)
147
+ return cached
148
+ # Fallback to Supabase
149
+ return await _supa_get("upload_jobs", job_id)
150
+
151
+
152
+ async def update_job_progress(job_id: str, processed: int, total: int) -> None:
153
+ patch = {
154
+ "status": "processing",
155
+ "processed_files": processed,
156
+ }
157
+ await _supa_patch("upload_jobs", job_id, patch)
158
+
159
+ cached = await cache.get_json(f"job:{job_id}") or {}
160
+ cached.update(patch)
161
+ await cache.set_json(f"job:{job_id}", cached, ttl=JOB_TTL)
162
+
163
+
164
+ async def complete_job(job_id: str, result: dict) -> None:
165
+ patch = {
166
+ "status": "completed",
167
+ "processed_files": result.get("files", 0),
168
+ "result": json.dumps(result), # JSONB column — serialize explicitly
169
+ }
170
+ await _supa_patch("upload_jobs", job_id, patch)
171
+
172
+ cached = await cache.get_json(f"job:{job_id}") or {}
173
+ cached.update({**patch, "result": result}) # keep as dict in Redis
174
+ cached.pop("payload", None)
175
+ await cache.set_json(f"job:{job_id}", cached, ttl=JOB_TTL)
176
+
177
+
178
+ async def fail_job(job_id: str, error: str) -> None:
179
+ patch = {
180
+ "status": "failed",
181
+ "error": str(error)[:500], # truncate to avoid DB limit issues
182
+ }
183
+ await _supa_patch("upload_jobs", job_id, patch)
184
+
185
+ cached = await cache.get_json(f"job:{job_id}") or {}
186
+ cached.update(patch)
187
+ cached.pop("payload", None)
188
+ await cache.set_json(f"job:{job_id}", cached, ttl=JOB_TTL)
189
+
190
+
191
+ # ──────────────────────────────────────────────────────────────
192
+ # Background worker
193
+ # ──────────────────────────────────────────────────────────────
194
+ async def run_worker(app_state) -> None:
195
+ print("[JobWorker] started")
196
+ while True:
197
+ try:
198
+ job_id = await cache.rpop(QUEUE_KEY)
199
+ if not job_id:
200
+ await asyncio.sleep(2)
201
+ continue
202
+
203
+ print(f"[JobWorker] picked up job {job_id}")
204
+ cached = await cache.get_json(f"job:{job_id}")
205
+ if not cached:
206
+ print(f"[JobWorker] job {job_id} not found in Redis — skipping")
207
+ continue
208
+
209
+ payload = cached.get("payload", {})
210
+ await _execute_upload_job(job_id, payload, app_state)
211
+
212
+ except asyncio.CancelledError:
213
+ print("[JobWorker] cancelled — shutting down")
214
+ break
215
+ except Exception as e:
216
+ print(f"[JobWorker] unhandled error: {e}")
217
+ await asyncio.sleep(5)
218
+
219
+
220
+ async def _execute_upload_job(job_id: str, payload: dict, app_state) -> None:
221
+ from src.services.db_client import pinecone_pool, ensure_indexes
222
+ from src.api.upload import _process_one_file, _batch_upsert_all
223
+
224
+ files_data: list[dict] = payload.get("files_data", [])
225
+ folder: str = payload.get("folder", "uncategorized")
226
+ detect_faces: bool = payload.get("detect_faces", True)
227
+ user_id: str = payload.get("user_id", "anonymous")
228
+ keys: dict = payload.get("keys", {})
229
+ total = len(files_data)
230
+
231
+ print(f"[JobWorker] executing job {job_id}: {total} files in '{folder}'")
232
+
233
+ try:
234
+ pc = pinecone_pool.get(keys["pinecone_key"])
235
+ created = await asyncio.to_thread(ensure_indexes, pc)
236
+ if created:
237
+ await asyncio.sleep(8)
238
+
239
+ CHUNK = 10
240
+ all_results = []
241
+ processed = 0
242
+
243
+ for chunk_start in range(0, total, CHUNK):
244
+ chunk = files_data[chunk_start:chunk_start + CHUNK]
245
+ chunk_results = await asyncio.gather(*[
246
+ _process_one_file(
247
+ file_bytes=bytes(f["bytes"]),
248
+ folder=folder,
249
+ detect_faces=detect_faces,
250
+ keys=keys,
251
+ ai=app_state.ai,
252
+ sem=app_state.ai_semaphore,
253
+ )
254
+ for f in chunk
255
+ ])
256
+ all_results.extend(chunk_results)
257
+ processed += len(chunk)
258
+ await update_job_progress(job_id, processed, total)
259
+ print(f"[JobWorker] job {job_id}: {processed}/{total} processed")
260
+
261
+ summary = await _batch_upsert_all(results=all_results, folder=folder, pc=pc)
262
+
263
+ await complete_job(job_id, {
264
+ "files": len(summary["uploaded_urls"]),
265
+ "urls": summary["uploaded_urls"],
266
+ "summary": {
267
+ "arcface_vecs": summary["arcface_vecs"],
268
+ "adaface_vecs": summary["adaface_vecs"],
269
+ "object_vecs": summary["object_vecs"],
270
+ },
271
+ })
272
+ print(f"[JobWorker] job {job_id} COMPLETED ✓")
273
+
274
+ except Exception as e:
275
+ print(f"[JobWorker] job {job_id} FAILED: {e}")
276
+ import traceback
277
+ traceback.print_exc()
278
+ await fail_job(job_id, str(e))
279
+
280
+
281
+ # ──────────────────────────────────────────────────────────────
282
+ # Utility
283
+ # ──────────────────────────────────────────────────────────────
284
+ def _iso_now() -> str:
285
+ from datetime import datetime, timezone
286
+ return datetime.now(timezone.utc).isoformat()
src/services/onnx_models.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ONNX runtime wrappers. Drop-in replacement for the PyTorch SigLIP + DINOv2
3
+ models inside AIModelManager._embed_crops_batch.
4
+
5
+ Import pattern in ai_manager.py:
6
+
7
+ from src.services.onnx_models import ONNXVisionStack
8
+ if USE_ONNX_VISION:
9
+ self.vision_stack = ONNXVisionStack(ONNX_MODELS_DIR, ONNX_USE_INT8)
10
+ # use self.vision_stack.encode(crops) instead of torch models
11
+ """
12
+ import os
13
+ import numpy as np
14
+ from PIL import Image
15
+ import onnxruntime as ort
16
+
17
+
18
+ # SigLIP normalization (ImageNet-style mean/std for siglip-base-patch16-224)
19
+ _SIGLIP_MEAN = np.array([0.5, 0.5, 0.5], dtype=np.float32)
20
+ _SIGLIP_STD = np.array([0.5, 0.5, 0.5], dtype=np.float32)
21
+
22
+ # DINOv2 uses ImageNet stats
23
+ _DINO_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32)
24
+ _DINO_STD = np.array([0.229, 0.224, 0.225], dtype=np.float32)
25
+
26
+
27
+ def _preprocess_batch(
28
+ pil_images: list[Image.Image], size: int, mean: np.ndarray, std: np.ndarray
29
+ ) -> np.ndarray:
30
+ """Resize + normalize a batch of PIL images to (B, 3, size, size) fp32."""
31
+ arrs = []
32
+ for im in pil_images:
33
+ if im.mode != "RGB":
34
+ im = im.convert("RGB")
35
+ im = im.resize((size, size), Image.BILINEAR)
36
+ a = np.asarray(im, dtype=np.float32) / 255.0
37
+ a = (a - mean) / std
38
+ a = a.transpose(2, 0, 1) # HWC -> CHW
39
+ arrs.append(a)
40
+ return np.stack(arrs, axis=0)
41
+
42
+
43
+ def _l2_normalize(x: np.ndarray, axis: int = 1) -> np.ndarray:
44
+ n = np.linalg.norm(x, axis=axis, keepdims=True)
45
+ n = np.where(n == 0, 1.0, n)
46
+ return x / n
47
+
48
+
49
+ class ONNXVisionStack:
50
+ """SigLIP + DINOv2 fused embeddings via ONNX Runtime (CPU)."""
51
+
52
+ def __init__(self, models_dir: str, use_int8: bool = True):
53
+ siglip_name = "siglip_vision_int8.onnx" if use_int8 else "siglip_vision.onnx"
54
+ dino_name = "dinov2_int8.onnx" if use_int8 else "dinov2.onnx"
55
+
56
+ siglip_path = os.path.join(models_dir, siglip_name)
57
+ dino_path = os.path.join(models_dir, dino_name)
58
+
59
+ if not os.path.exists(siglip_path):
60
+ raise FileNotFoundError(
61
+ f"ONNX model not found: {siglip_path}. "
62
+ "Run scripts/convert_to_onnx.py and upload outputs to the Space."
63
+ )
64
+ if not os.path.exists(dino_path):
65
+ raise FileNotFoundError(f"ONNX model not found: {dino_path}")
66
+
67
+ sess_opts = ort.SessionOptions()
68
+ sess_opts.intra_op_num_threads = int(os.getenv("OMP_NUM_THREADS", "2"))
69
+ sess_opts.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
70
+
71
+ self.siglip = ort.InferenceSession(
72
+ siglip_path, sess_options=sess_opts, providers=["CPUExecutionProvider"]
73
+ )
74
+ self.dino = ort.InferenceSession(
75
+ dino_path, sess_options=sess_opts, providers=["CPUExecutionProvider"]
76
+ )
77
+
78
+ # Warmup — first call is always slow due to kernel compilation
79
+ dummy = np.zeros((1, 3, 224, 224), dtype=np.float32)
80
+ self.siglip.run(None, {"pixel_values": dummy})
81
+ self.dino.run(None, {"pixel_values": dummy})
82
+
83
+ def encode(self, pil_crops: list[Image.Image]) -> list[np.ndarray]:
84
+ """Returns list of 1536-d L2-normalized fused vectors (same shape as old code)."""
85
+ if not pil_crops:
86
+ return []
87
+
88
+ sig_batch = _preprocess_batch(pil_crops, 224, _SIGLIP_MEAN, _SIGLIP_STD)
89
+ dino_batch = _preprocess_batch(pil_crops, 224, _DINO_MEAN, _DINO_STD)
90
+
91
+ sig_out = self.siglip.run(None, {"pixel_values": sig_batch})[0] # (B, 768)
92
+ dino_out = self.dino.run(None, {"pixel_values": dino_batch})[0] # (B, 768)
93
+
94
+ sig_n = _l2_normalize(sig_out)
95
+ dino_n = _l2_normalize(dino_out)
96
+
97
+ fused = np.concatenate([sig_n, dino_n], axis=1) # (B, 1536)
98
+ fused = _l2_normalize(fused)
99
+
100
+ return [fused[i] for i in range(fused.shape[0])]