Spaces:

chouchouvs
/

DeepIndex

Running

App Files Files Community

chouchouvs commited on Sep 7

Commit

dd055bb

verified ·

1 Parent(s): 13ebb90

Update main.py

Browse files

Files changed (1) hide show

main.py +106 -27

main.py CHANGED Viewed

@@ -11,23 +11,37 @@ from qdrant_client import QdrantClient
 from qdrant_client.http.models import VectorParams, Distance, PointStruct
 # ---------- logging ----------
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(levelname)s:%(name)s:%(message)s"
-)
 LOG = logging.getLogger("remote_indexer")
 # ---------- ENV ----------
-AUTH_TOKEN = os.getenv("REMOTE_INDEX_TOKEN", "").strip()   # simple header auth
 HF_TOKEN   = os.getenv("HF_API_TOKEN", "").strip()
 HF_MODEL   = os.getenv("HF_EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
-HF_URL     = os.getenv("HF_API_URL", "").strip() or f"https://api-inference.huggingface.co/pipeline/feature-extraction/{HF_MODEL}"
 QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
 QDRANT_API = os.getenv("QDRANT_API_KEY", "").strip()
-if not HF_TOKEN:
-    LOG.warning("HF_API_TOKEN manquant — le service refusera /index et /query.")
 # ---------- Clients ----------
 try:
@@ -61,24 +75,74 @@ def _auth(x_auth: Optional[str]):
     if AUTH_TOKEN and (x_auth or "") != AUTH_TOKEN:
         raise HTTPException(status_code=401, detail="Unauthorized")
-def _post_embeddings(batch: List[str]) -> Tuple[np.ndarray, int]:
     if not HF_TOKEN:
-        raise RuntimeError("HF_API_TOKEN manquant (server).")
     headers = {"Authorization": f"Bearer {HF_TOKEN}"}
-    r = requests.post(HF_URL, headers=headers, json=batch, timeout=120)
-    size = int(r.headers.get("Content-Length", "0"))
-    r.raise_for_status()
-    data = r.json()
     arr = np.array(data, dtype=np.float32)
     # [batch, dim] (sentence-transformers) ou [batch, tokens, dim] -> mean-pooling
     if arr.ndim == 3:
         arr = arr.mean(axis=1)
     if arr.ndim != 2:
-        raise RuntimeError(f"Unexpected embeddings shape: {arr.shape}")
     norms = np.linalg.norm(arr, axis=1, keepdims=True) + 1e-12
     arr = arr / norms
     return arr.astype(np.float32), size
 def _ensure_collection(name: str, dim: int):
     try:
         qdr.get_collection(name)
@@ -119,9 +183,9 @@ def run_index_job(job_id: str, req: IndexRequest):
         _set_status(job_id, "running")
         total_chunks = 0
         LOG.info(f"[{job_id}] Index start project={req.project_id} files={len(req.files)}")
-        _append_log(job_id, f"Start project={req.project_id} files={len(req.files)}")
-        # warmup pour dimension
         warmup = []
         for f in req.files[:1]:
             warmup.append(next(_chunk_with_spans(f.text, req.chunk_size, req.overlap))[2])
@@ -178,17 +242,30 @@ app = FastAPI()
 @app.get("/")
 def root():
-    return {"ok": True, "service": "remote-indexer", "docs": "/health, /index, /status/{job_id}, /query, /wipe"}
 @app.get("/health")
 def health():
     return {"ok": True}
 @app.post("/index")
 def start_index(req: IndexRequest, background_tasks: BackgroundTasks, x_auth_token: Optional[str] = Header(default=None)):
-    _auth(x_auth_token)
-    if not HF_TOKEN:
-        raise HTTPException(400, "HF_API_TOKEN manquant côté serveur.")
     job_id = uuid.uuid4().hex[:12]
     JOBS[job_id] = {"status": "queued", "logs": [], "created": time.time()}
     background_tasks.add_task(run_index_job, job_id, req)
@@ -196,7 +273,8 @@ def start_index(req: IndexRequest, background_tasks: BackgroundTasks, x_auth_tok
 @app.get("/status/{job_id}")
 def status(job_id: str, x_auth_token: Optional[str] = Header(default=None)):
-    _auth(x_auth_token)
     j = JOBS.get(job_id)
     if not j:
         raise HTTPException(404, "job inconnu")
@@ -204,9 +282,9 @@ def status(job_id: str, x_auth_token: Optional[str] = Header(default=None)):
 @app.post("/query")
 def query(req: QueryRequest, x_auth_token: Optional[str] = Header(default=None)):
-    _auth(x_auth_token)
-    if not HF_TOKEN:
-        raise HTTPException(400, "HF_API_TOKEN manquant côté serveur.")
     vec, _ = _post_embeddings([req.query])
     vec = vec[0].tolist()
     col = f"proj_{req.project_id}"
@@ -225,7 +303,8 @@ def query(req: QueryRequest, x_auth_token: Optional[str] = Header(default=None))
 @app.post("/wipe")
 def wipe_collection(project_id: str, x_auth_token: Optional[str] = Header(default=None)):
-    _auth(x_auth_token)
     col = f"proj_{project_id}"
     try:
         qdr.delete_collection(col)
@@ -233,7 +312,7 @@ def wipe_collection(project_id: str, x_auth_token: Optional[str] = Header(defaul
     except Exception as e:
         raise HTTPException(400, f"wipe failed: {e}")
-# ---------- Entrypoint (respecte $PORT des Spaces) ----------
 if __name__ == "__main__":
     import uvicorn
     port = int(os.getenv("PORT", "7860"))

 from qdrant_client.http.models import VectorParams, Distance, PointStruct
 # ---------- logging ----------
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(name)s:%(message)s")
 LOG = logging.getLogger("remote_indexer")
 # ---------- ENV ----------
+EMB_BACKEND = os.getenv("EMB_BACKEND", "hf").strip().lower()   # "hf" (défaut) ou "deepinfra"
+# HF
 HF_TOKEN   = os.getenv("HF_API_TOKEN", "").strip()
 HF_MODEL   = os.getenv("HF_EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
+# Si tu as un Inference Endpoint privé, ou si tu veux l’API "models/..." :
+# ex: https://api-inference.huggingface.co/models/sentence-transformers/all-MiniLM-L6-v2
+HF_URL     = (os.getenv("HF_API_URL", "").strip()
+              or f"https://api-inference.huggingface.co/pipeline/feature-extraction/{HF_MODEL}")
+# DeepInfra
+DI_TOKEN   = os.getenv("DEEPINFRA_API_KEY", "").strip()
+DI_MODEL   = os.getenv("DEEPINFRA_EMBED_MODEL", "thenlper/gte-small").strip()
+DI_URL     = os.getenv("DEEPINFRA_EMBED_URL", "https://api.deepinfra.com/v1/embeddings").strip()
+# Qdrant
 QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
 QDRANT_API = os.getenv("QDRANT_API_KEY", "").strip()
+# Auth d’API du service (simple header)
+AUTH_TOKEN = os.getenv("REMOTE_INDEX_TOKEN", "").strip()
+LOG.info(f"Embeddings backend = {EMB_BACKEND}")
+if EMB_BACKEND == "hf" and not HF_TOKEN:
+    LOG.warning("HF_API_TOKEN manquant — HF /index et /query échoueront.")
+if EMB_BACKEND == "deepinfra" and not DI_TOKEN:
+    LOG.warning("DEEPINFRA_API_KEY manquant — DeepInfra embeddings échoueront.")
 # ---------- Clients ----------
 try:
     if AUTH_TOKEN and (x_auth or "") != AUTH_TOKEN:
         raise HTTPException(status_code=401, detail="Unauthorized")
+def _hf_post_embeddings(batch: List[str]) -> Tuple[np.ndarray, int]:
     if not HF_TOKEN:
+        raise RuntimeError("HF_API_TOKEN manquant (backend=hf).")
     headers = {"Authorization": f"Bearer {HF_TOKEN}"}
+    try:
+        r = requests.post(HF_URL, headers=headers, json=batch, timeout=120)
+        size = int(r.headers.get("Content-Length", "0"))
+        if r.status_code >= 400:
+            # Log détaillé pour comprendre le 403/4xx
+            try:
+                LOG.error(f"HF error {r.status_code}: {r.text}")
+            except Exception:
+                LOG.error(f"HF error {r.status_code} (no body)")
+            r.raise_for_status()
+        data = r.json()
+    except Exception as e:
+        raise RuntimeError(f"HF POST failed: {e}")
     arr = np.array(data, dtype=np.float32)
     # [batch, dim] (sentence-transformers) ou [batch, tokens, dim] -> mean-pooling
     if arr.ndim == 3:
         arr = arr.mean(axis=1)
     if arr.ndim != 2:
+        raise RuntimeError(f"HF: unexpected embeddings shape: {arr.shape}")
+    # normalisation
+    norms = np.linalg.norm(arr, axis=1, keepdims=True) + 1e-12
+    arr = arr / norms
+    return arr.astype(np.float32), size
+def _di_post_embeddings(batch: List[str]) -> Tuple[np.ndarray, int]:
+    if not DI_TOKEN:
+        raise RuntimeError("DEEPINFRA_API_KEY manquant (backend=deepinfra).")
+    headers = {"Authorization": f"Bearer {DI_TOKEN}", "Content-Type": "application/json"}
+    payload = {"model": DI_MODEL, "input": batch}
+    try:
+        r = requests.post(DI_URL, headers=headers, json=payload, timeout=120)
+        size = int(r.headers.get("Content-Length", "0"))
+        if r.status_code >= 400:
+            try:
+                LOG.error(f"DeepInfra error {r.status_code}: {r.text}")
+            except Exception:
+                LOG.error(f"DeepInfra error {r.status_code} (no body)")
+            r.raise_for_status()
+        js = r.json()
+    except Exception as e:
+        raise RuntimeError(f"DeepInfra POST failed: {e}")
+    # OpenAI-like : {"data":[{"embedding":[...],"index":0}, ...]}
+    data = js.get("data")
+    if not isinstance(data, list) or not data:
+        raise RuntimeError(f"DeepInfra embeddings: réponse invalide {js}")
+    embs = [d.get("embedding") for d in data]
+    arr = np.asarray(embs, dtype=np.float32)
+    if arr.ndim != 2:
+        raise RuntimeError(f"DeepInfra: unexpected embeddings shape: {arr.shape}")
+    # normalisation
     norms = np.linalg.norm(arr, axis=1, keepdims=True) + 1e-12
     arr = arr / norms
     return arr.astype(np.float32), size
+def _post_embeddings(batch: List[str]) -> Tuple[np.ndarray, int]:
+    if EMB_BACKEND == "hf":
+        return _hf_post_embeddings(batch)
+    elif EMB_BACKEND == "deepinfra":
+        return _di_post_embeddings(batch)
+    else:
+        raise RuntimeError(f"EMB_BACKEND inconnu: {EMB_BACKEND}")
 def _ensure_collection(name: str, dim: int):
     try:
         qdr.get_collection(name)
         _set_status(job_id, "running")
         total_chunks = 0
         LOG.info(f"[{job_id}] Index start project={req.project_id} files={len(req.files)}")
+        _append_log(job_id, f"Start project={req.project_id} files={len(req.files)} | backend={EMB_BACKEND}")
+        # warmup -> dimension
         warmup = []
         for f in req.files[:1]:
             warmup.append(next(_chunk_with_spans(f.text, req.chunk_size, req.overlap))[2])
 @app.get("/")
 def root():
+    return {
+        "ok": True,
+        "service": "remote-indexer",
+        "backend": EMB_BACKEND,
+        "hf_url": HF_URL if EMB_BACKEND == "hf" else None,
+        "di_model": DI_MODEL if EMB_BACKEND == "deepinfra" else None,
+        "docs": "/health, /index, /status/{job_id}, /query, /wipe"
+    }
 @app.get("/health")
 def health():
     return {"ok": True}
+def _check_backend_ready(for_query=False):
+    if EMB_BACKEND == "hf" and not HF_TOKEN:
+        raise HTTPException(400, "HF_API_TOKEN manquant côté serveur (backend=hf).")
+    if EMB_BACKEND == "deepinfra" and not DI_TOKEN:
+        raise HTTPException(400, "DEEPINFRA_API_KEY manquant côté serveur (backend=deepinfra).")
 @app.post("/index")
 def start_index(req: IndexRequest, background_tasks: BackgroundTasks, x_auth_token: Optional[str] = Header(default=None)):
+    if AUTH_TOKEN and (x_auth_token or "") != AUTH_TOKEN:
+        raise HTTPException(401, "Unauthorized")
+    _check_backend_ready()
     job_id = uuid.uuid4().hex[:12]
     JOBS[job_id] = {"status": "queued", "logs": [], "created": time.time()}
     background_tasks.add_task(run_index_job, job_id, req)
 @app.get("/status/{job_id}")
 def status(job_id: str, x_auth_token: Optional[str] = Header(default=None)):
+    if AUTH_TOKEN and (x_auth_token or "") != AUTH_TOKEN:
+        raise HTTPException(401, "Unauthorized")
     j = JOBS.get(job_id)
     if not j:
         raise HTTPException(404, "job inconnu")
 @app.post("/query")
 def query(req: QueryRequest, x_auth_token: Optional[str] = Header(default=None)):
+    if AUTH_TOKEN and (x_auth_token or "") != AUTH_TOKEN:
+        raise HTTPException(401, "Unauthorized")
+    _check_backend_ready(for_query=True)
     vec, _ = _post_embeddings([req.query])
     vec = vec[0].tolist()
     col = f"proj_{req.project_id}"
 @app.post("/wipe")
 def wipe_collection(project_id: str, x_auth_token: Optional[str] = Header(default=None)):
+    if AUTH_TOKEN and (x_auth_token or "") != AUTH_TOKEN:
+        raise HTTPException(401, "Unauthorized")
     col = f"proj_{project_id}"
     try:
         qdr.delete_collection(col)
     except Exception as e:
         raise HTTPException(400, f"wipe failed: {e}")
+# ---------- Entrypoint ----------
 if __name__ == "__main__":
     import uvicorn
     port = int(os.getenv("PORT", "7860"))