Spaces:

chouchouvs
/

DeepIndex

Running

App Files Files Community

chouchouvs commited on Sep 16

Commit

b3f4ecb

verified ·

1 Parent(s): 820af78

Update main.py

Browse files

Files changed (1) hide show

main.py +59 -116

main.py CHANGED Viewed

@@ -2,39 +2,34 @@
 """
 HF Space - main.py de substitution pour tests Qdrant / indexation minimale
-Fonctions clés :
-- POST /wipe?project_id=XXX         : supprime la collection Qdrant
-- POST /index                       : lance un job d'indexation (JSON files=[{path,text},...])
-- GET  /status/{job_id}             : état du job + logs
-- GET  /collections/{proj}/count    : retourne le nombre de points dans Qdrant
-- POST /query                       : recherche sémantique (top_k, text, project_id)
-Une UI Gradio minimale est montée sur "/" pour déclencher les tests sans console.
 ENV attendues :
-- QDRANT_URL            : ex. https://xxxxx.eu-central-1-0.aws.cloud.qdrant.io:6333
-- QDRANT_API_KEY        : clé Qdrant Cloud
-- COLLECTION_PREFIX     : défaut "proj_"
 - EMB_PROVIDER          : "hf" (défaut) ou "dummy"
-- HF_EMBED_MODEL        : défaut "BAAI/bge-m3"
-- HUGGINGFACEHUB_API_TOKEN : token HF Inference (si EMB_PROVIDER=hf)
-- LOG_LEVEL             : DEBUG (défaut), INFO...
-Dépendances (requirements) suggérées :
-fastapi>=0.111
-uvicorn>=0.30
-httpx>=0.27
-pydantic>=2.7
-gradio>=4.43
-numpy>=2.0
 """
 from __future__ import annotations
 import os
 import time
 import uuid
-import math
-import json
 import hashlib
 import logging
 import asyncio
@@ -42,10 +37,10 @@ from typing import List, Dict, Any, Optional, Tuple
 import numpy as np
 import httpx
 from pydantic import BaseModel, Field, ValidationError
 from fastapi import FastAPI, HTTPException, Query
 from fastapi.middleware.cors import CORSMiddleware
 import gradio as gr
 # ------------------------------------------------------------------------------
@@ -67,10 +62,10 @@ HF_EMBED_MODEL = os.getenv("HF_EMBED_MODEL", "BAAI/bge-m3")
 HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
 if not QDRANT_URL or not QDRANT_API_KEY:
-    LOG.warning("QDRANT_URL / QDRANT_API_KEY non fournis : l'upsert échouera. Fournis-les dans les Secrets du Space.")
 if EMB_PROVIDER == "hf" and not HF_TOKEN:
-    LOG.warning("EMB_PROVIDER=hf mais HUGGINGFACEHUB_API_TOKEN absent. Tu peux basculer EMB_PROVIDER=dummy pour tester sans token.")
 # ------------------------------------------------------------------------------
 # Schémas Pydantic
@@ -130,21 +125,15 @@ def l2_normalize(vec: List[float]) -> List[float]:
     return arr.astype(np.float32).tolist()
 def flatten_any(x: Any) -> List[float]:
-    """
-    Certaines APIs renvoient [[...]] ou [[[...]]]; on aplanit en 1D.
-    """
     if isinstance(x, (list, tuple)):
         if len(x) > 0 and isinstance(x[0], (list, tuple)):
-            # Aplanit récursif
             return flatten_any(x[0])
         return list(map(float, x))
     raise ValueError("Embedding vector mal formé")
 def chunk_text(text: str, chunk_size: int, overlap: int) -> List[Tuple[int, int, str]]:
-    """
-    Retourne une liste de (start, end, chunk_text)
-    Ignore les petits fragments (< 30 chars) pour éviter le bruit.
-    """
     text = text or ""
     if not text.strip():
         return []
@@ -162,34 +151,26 @@ def chunk_text(text: str, chunk_size: int, overlap: int) -> List[Tuple[int, int,
     return res
 async def ensure_collection(client: httpx.AsyncClient, coll: str, vector_size: int) -> None:
-    """
-    Crée ou ajuste la collection Qdrant (distance = Cosine).
-    """
     url = f"{QDRANT_URL}/collections/{coll}"
-    # Vérifie l'existence
     r = await client.get(url, headers={"api-key": QDRANT_API_KEY}, timeout=20)
     if r.status_code == 200:
-        # Optionnel: vérifier la taille du vecteur ; si mismatch, on peut supprimer/recréer
         data = r.json()
         existing_size = data.get("result", {}).get("vectors", {}).get("size")
         if existing_size and int(existing_size) != int(vector_size):
             LOG.warning(f"Collection {coll} dim={existing_size} ≠ attendu {vector_size} → recréation")
             await client.delete(url, headers={"api-key": QDRANT_API_KEY}, timeout=20)
         else:
-            LOG.debug(f"Collection {coll} déjà prête (dim={existing_size})")
-    # (Re)création
-    body = {
-        "vectors": {"size": vector_size, "distance": "Cosine"}
-    }
-    r2 = await client.put(url, headers={"api-key": QDRANT_API_KEY}, json=body, timeout=30)
-    if r2.status_code not in (200, 201):
-        raise HTTPException(status_code=500, detail=f"Qdrant PUT collection a échoué: {r2.text}")
-async def qdrant_upsert(
-    client: httpx.AsyncClient,
-    coll: str,
-    points: List[Dict[str, Any]],
-) -> int:
     if not points:
         return 0
     url = f"{QDRANT_URL}/collections/{coll}/points?wait=true"
@@ -201,22 +182,12 @@ async def qdrant_upsert(
 async def qdrant_count(client: httpx.AsyncClient, coll: str) -> int:
     url = f"{QDRANT_URL}/collections/{coll}/points/count"
-    r = await client.post(
-        url,
-        headers={"api-key": QDRANT_API_KEY},
-        json={"exact": True},
-        timeout=20,
-    )
     if r.status_code != 200:
         raise HTTPException(status_code=500, detail=f"Qdrant count échoué: {r.text}")
     return int(r.json().get("result", {}).get("count", 0))
-async def qdrant_search(
-    client: httpx.AsyncClient,
-    coll: str,
-    vector: List[float],
-    limit: int = 5,
-) -> Dict[str, Any]:
     url = f"{QDRANT_URL}/collections/{coll}/points/search"
     r = await client.post(
         url,
@@ -231,27 +202,16 @@ async def qdrant_search(
 # ------------------------------------------------------------------------------
 # Embeddings (HF Inference ou dummy)
 # ------------------------------------------------------------------------------
-async def embed_hf(
-    client: httpx.AsyncClient,
-    texts: List[str],
-    model: str = HF_EMBED_MODEL,
-    token: str = HF_TOKEN,
-) -> List[List[float]]:
-    """
-    Appel HuggingFace Inference (feature extraction) - batch.
-    Normalise L2 les vecteurs.
-    """
     if not token:
         raise HTTPException(status_code=400, detail="HUGGINGFACEHUB_API_TOKEN manquant pour EMB_PROVIDER=hf")
     url = f"https://api-inference.huggingface.co/models/{model}"
     headers = {"Authorization": f"Bearer {token}"}
-    # HF accepte une liste de textes directement
     payload = {"inputs": texts, "options": {"wait_for_model": True}}
     r = await client.post(url, headers=headers, json=payload, timeout=120)
     if r.status_code != 200:
         raise HTTPException(status_code=502, detail=f"HF Inference error: {r.text}")
     data = r.json()
-    # data peut être une liste de listes (ou de listes de listes...)
     embeddings: List[List[float]] = []
     if isinstance(data, list):
         for row in data:
@@ -263,16 +223,10 @@ async def embed_hf(
     return embeddings
 def embed_dummy(texts: List[str], dim: int = 128) -> List[List[float]]:
-    """
-    Embedding déterministe basé sur un hash -> vecteur pseudo-aléatoire stable.
-    Suffisant pour tester le pipeline Qdrant (dimensions cohérentes, upsert, count, search).
-    """
     out: List[List[float]] = []
     for t in texts:
         h = hashlib.sha256(t.encode("utf-8")).digest()
-        # Étale sur dim floats
         arr = np.frombuffer((h * ((dim // len(h)) + 1))[:dim], dtype=np.uint8).astype(np.float32)
-        # Centrage et normalisation
         arr = (arr - 127.5) / 127.5
         arr = arr / (np.linalg.norm(arr) + 1e-9)
         out.append(arr.astype(np.float32).tolist())
@@ -291,11 +245,11 @@ async def run_index_job(job: JobState, req: IndexRequest) -> None:
     job.total_files = len(req.files)
     job.log(f"Index start project={req.project_id} files={len(req.files)} chunk_size={req.chunk_size} overlap={req.overlap} batch_size={req.batch_size} store_text={req.store_text}")
-    # Dédup global par hash du texte brut des fichiers
     file_hashes = [hash8(f.text) for f in req.files]
     uniq = len(set(file_hashes))
     if uniq != len(file_hashes):
-        job.log(f"Attention: {len(file_hashes)-uniq} fichiers ont un texte identique (hash dupliqué).")
     # Chunking
     records: List[Dict[str, Any]] = []
@@ -304,12 +258,7 @@ async def run_index_job(job: JobState, req: IndexRequest) -> None:
         if not chunks:
             job.log(f"{f.path}: 0 chunk (trop court ou vide)")
         for idx, (start, end, ch) in enumerate(chunks):
-            payload = {
-                "path": f.path,
-                "chunk": idx,
-                "start": start,
-                "end": end,
-            }
             if req.store_text:
                 payload["text"] = ch
             records.append({"payload": payload, "raw": ch})
@@ -322,33 +271,29 @@ async def run_index_job(job: JobState, req: IndexRequest) -> None:
         job.finished_at = time.time()
         return
-    # Embedding + Upsert (en batches)
     async with httpx.AsyncClient(timeout=120) as client:
-        # Dimension à partir du 1er embedding (warmup)
         warmup_vec = (await embed_texts(client, [records[0]["raw"]]))[0]
         vec_dim = len(warmup_vec)
         job.log(f"Warmup embeddings dim={vec_dim} provider={EMB_PROVIDER}")
-        # Qdrant collection
         coll = f"{COLLECTION_PREFIX}{req.project_id}"
         await ensure_collection(client, coll, vector_size=vec_dim)
         job.stage = "upserting"
-        batch_vectors: List[List[float]] = []
         batch_points: List[Dict[str, Any]] = []
         async def flush_batch():
-            nonlocal batch_vectors, batch_points
             if not batch_points:
                 return 0
             added = await qdrant_upsert(client, coll, batch_points)
             job.upserted += added
             job.log(f"+{added} points upsert (total={job.upserted})")
-            batch_vectors = []
             batch_points = []
             return added
-        # Traite par lot d'embeddings (embedding_batch_size indépendant de l'upsert batch_size)
         EMB_BATCH = max(8, min(64, req.batch_size * 2))
         i = 0
         while i < len(records):
@@ -360,18 +305,12 @@ async def run_index_job(job: JobState, req: IndexRequest) -> None:
             job.embedded += len(vecs)
             for r, v in zip(sub, vecs):
-                payload = r["payload"]
-                point = {
-                    "id": str(uuid.uuid4()),
-                    "vector": v,
-                    "payload": payload,
-                }
                 batch_points.append(point)
                 if len(batch_points) >= req.batch_size:
                     await flush_batch()
             i += EMB_BATCH
-        # Flush final
         await flush_batch()
     job.stage = "done"
@@ -389,6 +328,10 @@ fastapi_app.add_middleware(
     allow_headers=["*"],
 )
 @fastapi_app.get("/")
 async def root():
     return {"ok": True, "service": "remote-indexer-min", "qdrant": bool(QDRANT_URL), "emb_provider": EMB_PROVIDER}
@@ -411,7 +354,6 @@ async def index(req: IndexRequest):
     job_id = uuid.uuid4().hex[:12]
     job = JobState(job_id=job_id, project_id=req.project_id)
     JOBS[job_id] = job
-    # Lance en tâche de fond
     asyncio.create_task(run_index_job(job, req))
     job.log(f"Job {job_id} créé pour project {req.project_id}")
     return {"job_id": job_id, "project_id": req.project_id}
@@ -443,15 +385,12 @@ async def query(req: QueryRequest):
     return data
 # ------------------------------------------------------------------------------
-# Gradio UI (montée sur "/")
 # ------------------------------------------------------------------------------
 def _default_two_docs() -> List[Dict[str, str]]:
     a = "Alpha bravo charlie delta echo foxtrot golf hotel india. " * 3
-    b = "Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy." * 3
-    return [
-        {"path": "a.txt", "text": a},
-        {"path": "b.txt", "text": b},
-    ]
 async def ui_wipe(project: str):
     try:
@@ -486,9 +425,8 @@ async def ui_status(job_id: str):
         return "⚠️ Renseigne un job_id"
     try:
         st = await status(job_id)
-        # Formatage
         lines = [f"Job {st['job_id']} — stage={st['stage']} files={st['total_files']} chunks={st['total_chunks']} embedded={st['embedded']} upserted={st['upserted']}"]
-        lines += st.get("messages", [])[-50:]  # dernières lignes
         if st.get("errors"):
             lines.append("Erreurs:")
             lines += [f" - {e}" for e in st["errors"]]
@@ -528,7 +466,7 @@ with gr.Blocks(title="Remote Indexer - Minimal Test", analytics_enabled=False) a
                 "Wipe → Index 2 docs → Status → Count → Query\n"
                 f"- **Embeddings**: `{EMB_PROVIDER}` (model: `{HF_EMBED_MODEL}`)\n"
                 f"- **Qdrant**: `{'OK' if QDRANT_URL else 'ABSENT'}`\n"
-                "Conseil: si tu n'as pas de token HF, mets `EMB_PROVIDER=dummy` dans les Secrets du Space.")
     with gr.Row():
         project_tb = gr.Textbox(label="Project ID", value="DEEPWEB")
         jobid_tb = gr.Textbox(label="Job ID (pour Status)", value="", interactive=True)
@@ -550,9 +488,14 @@ with gr.Blocks(title="Remote Indexer - Minimal Test", analytics_enabled=False) a
     wipe_btn.click(ui_wipe, inputs=[project_tb], outputs=[out_log])
     index_btn.click(ui_index_sample, inputs=[project_tb, chunk_size, overlap, batch_size, store_text], outputs=[out_log])
-    # Petit auto-poll status: on relance ui_status à la main en collant le job_id
     count_btn.click(ui_count, inputs=[project_tb], outputs=[out_log])
     query_btn.click(ui_query, inputs=[project_tb, query_tb, topk], outputs=[query_out])
 # Monte l'UI Gradio sur la FastAPI
 app = gr.mount_gradio_app(fastapi_app, ui, path="/")

 """
 HF Space - main.py de substitution pour tests Qdrant / indexation minimale
+Endpoints:
+- POST /wipe?project_id=XXX
+- POST /index
+- GET  /status/{job_id}
+- GET  /collections/{project_id}/count
+- POST /query
+- GET  /health        <-- healthcheck OK
+UI Gradio montée sur "/" pour tests sans console.
 ENV attendues :
+- QDRANT_URL            : https://...qdrant.io:6333
+- QDRANT_API_KEY        : clé Qdrant
+- COLLECTION_PREFIX     : "proj_" par défaut
 - EMB_PROVIDER          : "hf" (défaut) ou "dummy"
+- HF_EMBED_MODEL        : "BAAI/bge-m3" par défaut
+- HUGGINGFACEHUB_API_TOKEN (si EMB_PROVIDER=hf)
+- LOG_LEVEL             : DEBUG (défaut)
+- PORT                  : 7860 (fourni par HF)
+Dépendances suggérées :
+fastapi>=0.111, uvicorn>=0.30, httpx>=0.27, pydantic>=2.7, gradio>=4.43, numpy>=2.0
 """
 from __future__ import annotations
 import os
 import time
 import uuid
 import hashlib
 import logging
 import asyncio
 import numpy as np
 import httpx
+import uvicorn
 from pydantic import BaseModel, Field, ValidationError
 from fastapi import FastAPI, HTTPException, Query
 from fastapi.middleware.cors import CORSMiddleware
 import gradio as gr
 # ------------------------------------------------------------------------------
 HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
 if not QDRANT_URL or not QDRANT_API_KEY:
+    LOG.warning("QDRANT_URL / QDRANT_API_KEY non fournis : l'upsert échouera.")
 if EMB_PROVIDER == "hf" and not HF_TOKEN:
+    LOG.warning("EMB_PROVIDER=hf sans HUGGINGFACEHUB_API_TOKEN. Utilise EMB_PROVIDER=dummy pour tester sans token.")
 # ------------------------------------------------------------------------------
 # Schémas Pydantic
     return arr.astype(np.float32).tolist()
 def flatten_any(x: Any) -> List[float]:
+    """Aplatis potentiels [[...]] ou [[[...]]] en 1D."""
     if isinstance(x, (list, tuple)):
         if len(x) > 0 and isinstance(x[0], (list, tuple)):
             return flatten_any(x[0])
         return list(map(float, x))
     raise ValueError("Embedding vector mal formé")
 def chunk_text(text: str, chunk_size: int, overlap: int) -> List[Tuple[int, int, str]]:
+    """Retourne [(start, end, chunk)] et ignore les fragments < 30 chars."""
     text = text or ""
     if not text.strip():
         return []
     return res
 async def ensure_collection(client: httpx.AsyncClient, coll: str, vector_size: int) -> None:
+    """Crée la collection Qdrant (distance=Cosine), ou la recrée si dim mismatch."""
     url = f"{QDRANT_URL}/collections/{coll}"
     r = await client.get(url, headers={"api-key": QDRANT_API_KEY}, timeout=20)
+    recreate = False
     if r.status_code == 200:
         data = r.json()
         existing_size = data.get("result", {}).get("vectors", {}).get("size")
         if existing_size and int(existing_size) != int(vector_size):
             LOG.warning(f"Collection {coll} dim={existing_size} ≠ attendu {vector_size} → recréation")
             await client.delete(url, headers={"api-key": QDRANT_API_KEY}, timeout=20)
+            recreate = True
         else:
+            LOG.debug(f"Collection {coll} existante (dim={existing_size})")
+    if r.status_code != 200 or recreate:
+        body = {"vectors": {"size": vector_size, "distance": "Cosine"}}
+        r2 = await client.put(url, headers={"api-key": QDRANT_API_KEY}, json=body, timeout=30)
+        if r2.status_code not in (200, 201):
+            raise HTTPException(status_code=500, detail=f"Qdrant PUT collection a échoué: {r2.text}")
+async def qdrant_upsert(client: httpx.AsyncClient, coll: str, points: List[Dict[str, Any]]) -> int:
     if not points:
         return 0
     url = f"{QDRANT_URL}/collections/{coll}/points?wait=true"
 async def qdrant_count(client: httpx.AsyncClient, coll: str) -> int:
     url = f"{QDRANT_URL}/collections/{coll}/points/count"
+    r = await client.post(url, headers={"api-key": QDRANT_API_KEY}, json={"exact": True}, timeout=20)
     if r.status_code != 200:
         raise HTTPException(status_code=500, detail=f"Qdrant count échoué: {r.text}")
     return int(r.json().get("result", {}).get("count", 0))
+async def qdrant_search(client: httpx.AsyncClient, coll: str, vector: List[float], limit: int = 5) -> Dict[str, Any]:
     url = f"{QDRANT_URL}/collections/{coll}/points/search"
     r = await client.post(
         url,
 # ------------------------------------------------------------------------------
 # Embeddings (HF Inference ou dummy)
 # ------------------------------------------------------------------------------
+async def embed_hf(client: httpx.AsyncClient, texts: List[str], model: str = HF_EMBED_MODEL, token: str = HF_TOKEN) -> List[List[float]]:
     if not token:
         raise HTTPException(status_code=400, detail="HUGGINGFACEHUB_API_TOKEN manquant pour EMB_PROVIDER=hf")
     url = f"https://api-inference.huggingface.co/models/{model}"
     headers = {"Authorization": f"Bearer {token}"}
     payload = {"inputs": texts, "options": {"wait_for_model": True}}
     r = await client.post(url, headers=headers, json=payload, timeout=120)
     if r.status_code != 200:
         raise HTTPException(status_code=502, detail=f"HF Inference error: {r.text}")
     data = r.json()
     embeddings: List[List[float]] = []
     if isinstance(data, list):
         for row in data:
     return embeddings
 def embed_dummy(texts: List[str], dim: int = 128) -> List[List[float]]:
     out: List[List[float]] = []
     for t in texts:
         h = hashlib.sha256(t.encode("utf-8")).digest()
         arr = np.frombuffer((h * ((dim // len(h)) + 1))[:dim], dtype=np.uint8).astype(np.float32)
         arr = (arr - 127.5) / 127.5
         arr = arr / (np.linalg.norm(arr) + 1e-9)
         out.append(arr.astype(np.float32).tolist())
     job.total_files = len(req.files)
     job.log(f"Index start project={req.project_id} files={len(req.files)} chunk_size={req.chunk_size} overlap={req.overlap} batch_size={req.batch_size} store_text={req.store_text}")
+    # Dédup global par hash du texte de fichier
     file_hashes = [hash8(f.text) for f in req.files]
     uniq = len(set(file_hashes))
     if uniq != len(file_hashes):
+        job.log(f"Attention: {len(file_hashes)-uniq} fichier(s) ont un texte identique (hash dupliqué).")
     # Chunking
     records: List[Dict[str, Any]] = []
         if not chunks:
             job.log(f"{f.path}: 0 chunk (trop court ou vide)")
         for idx, (start, end, ch) in enumerate(chunks):
+            payload = {"path": f.path, "chunk": idx, "start": start, "end": end}
             if req.store_text:
                 payload["text"] = ch
             records.append({"payload": payload, "raw": ch})
         job.finished_at = time.time()
         return
     async with httpx.AsyncClient(timeout=120) as client:
+        # Warmup dim
         warmup_vec = (await embed_texts(client, [records[0]["raw"]]))[0]
         vec_dim = len(warmup_vec)
         job.log(f"Warmup embeddings dim={vec_dim} provider={EMB_PROVIDER}")
+        # Collection Qdrant
         coll = f"{COLLECTION_PREFIX}{req.project_id}"
         await ensure_collection(client, coll, vector_size=vec_dim)
         job.stage = "upserting"
         batch_points: List[Dict[str, Any]] = []
         async def flush_batch():
+            nonlocal batch_points
             if not batch_points:
                 return 0
             added = await qdrant_upsert(client, coll, batch_points)
             job.upserted += added
             job.log(f"+{added} points upsert (total={job.upserted})")
             batch_points = []
             return added
         EMB_BATCH = max(8, min(64, req.batch_size * 2))
         i = 0
         while i < len(records):
             job.embedded += len(vecs)
             for r, v in zip(sub, vecs):
+                point = {"id": str(uuid.uuid4()), "vector": v, "payload": r["payload"]}
                 batch_points.append(point)
                 if len(batch_points) >= req.batch_size:
                     await flush_batch()
             i += EMB_BATCH
         await flush_batch()
     job.stage = "done"
     allow_headers=["*"],
 )
+@fastapi_app.get("/health")
+async def health():
+    return {"status": "ok"}
 @fastapi_app.get("/")
 async def root():
     return {"ok": True, "service": "remote-indexer-min", "qdrant": bool(QDRANT_URL), "emb_provider": EMB_PROVIDER}
     job_id = uuid.uuid4().hex[:12]
     job = JobState(job_id=job_id, project_id=req.project_id)
     JOBS[job_id] = job
     asyncio.create_task(run_index_job(job, req))
     job.log(f"Job {job_id} créé pour project {req.project_id}")
     return {"job_id": job_id, "project_id": req.project_id}
     return data
 # ------------------------------------------------------------------------------
+# Gradio UI
 # ------------------------------------------------------------------------------
 def _default_two_docs() -> List[Dict[str, str]]:
     a = "Alpha bravo charlie delta echo foxtrot golf hotel india. " * 3
+    b = "Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy. " * 3
+    return [{"path": "a.txt", "text": a}, {"path": "b.txt", "text": b}]
 async def ui_wipe(project: str):
     try:
         return "⚠️ Renseigne un job_id"
     try:
         st = await status(job_id)
         lines = [f"Job {st['job_id']} — stage={st['stage']} files={st['total_files']} chunks={st['total_chunks']} embedded={st['embedded']} upserted={st['upserted']}"]
+        lines += st.get("messages", [])[-50:]
         if st.get("errors"):
             lines.append("Erreurs:")
             lines += [f" - {e}" for e in st["errors"]]
                 "Wipe → Index 2 docs → Status → Count → Query\n"
                 f"- **Embeddings**: `{EMB_PROVIDER}` (model: `{HF_EMBED_MODEL}`)\n"
                 f"- **Qdrant**: `{'OK' if QDRANT_URL else 'ABSENT'}`\n"
+                "Astuce: si pas de token HF, mets `EMB_PROVIDER=dummy`.")
     with gr.Row():
         project_tb = gr.Textbox(label="Project ID", value="DEEPWEB")
         jobid_tb = gr.Textbox(label="Job ID (pour Status)", value="", interactive=True)
     wipe_btn.click(ui_wipe, inputs=[project_tb], outputs=[out_log])
     index_btn.click(ui_index_sample, inputs=[project_tb, chunk_size, overlap, batch_size, store_text], outputs=[out_log])
     count_btn.click(ui_count, inputs=[project_tb], outputs=[out_log])
     query_btn.click(ui_query, inputs=[project_tb, query_tb, topk], outputs=[query_out])
 # Monte l'UI Gradio sur la FastAPI
 app = gr.mount_gradio_app(fastapi_app, ui, path="/")
+if __name__ == "__main__":
+    # Démarre Uvicorn pour les Spaces Docker (CMD: python -u /app/main.py)
+    port = int(os.getenv("PORT", "7860"))
+    LOG.info(f"Démarrage Uvicorn sur 0.0.0.0:{port}")
+    uvicorn.run(app, host="0.0.0.0", port=port)