chouchouvs commited on
Commit
b3f4ecb
·
verified ·
1 Parent(s): 820af78

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +59 -116
main.py CHANGED
@@ -2,39 +2,34 @@
2
  """
3
  HF Space - main.py de substitution pour tests Qdrant / indexation minimale
4
 
5
- Fonctions clés :
6
- - POST /wipe?project_id=XXX : supprime la collection Qdrant
7
- - POST /index : lance un job d'indexation (JSON files=[{path,text},...])
8
- - GET /status/{job_id} : état du job + logs
9
- - GET /collections/{proj}/count : retourne le nombre de points dans Qdrant
10
- - POST /query : recherche sémantique (top_k, text, project_id)
 
11
 
12
- Une UI Gradio minimale est montée sur "/" pour déclencher les tests sans console.
13
 
14
  ENV attendues :
15
- - QDRANT_URL : ex. https://xxxxx.eu-central-1-0.aws.cloud.qdrant.io:6333
16
- - QDRANT_API_KEY : clé Qdrant Cloud
17
- - COLLECTION_PREFIX : défaut "proj_"
18
  - EMB_PROVIDER : "hf" (défaut) ou "dummy"
19
- - HF_EMBED_MODEL : défaut "BAAI/bge-m3"
20
- - HUGGINGFACEHUB_API_TOKEN : token HF Inference (si EMB_PROVIDER=hf)
21
- - LOG_LEVEL : DEBUG (défaut), INFO...
22
-
23
- Dépendances (requirements) suggérées :
24
- fastapi>=0.111
25
- uvicorn>=0.30
26
- httpx>=0.27
27
- pydantic>=2.7
28
- gradio>=4.43
29
- numpy>=2.0
30
  """
31
 
32
  from __future__ import annotations
33
  import os
34
  import time
35
  import uuid
36
- import math
37
- import json
38
  import hashlib
39
  import logging
40
  import asyncio
@@ -42,10 +37,10 @@ from typing import List, Dict, Any, Optional, Tuple
42
 
43
  import numpy as np
44
  import httpx
 
45
  from pydantic import BaseModel, Field, ValidationError
46
  from fastapi import FastAPI, HTTPException, Query
47
  from fastapi.middleware.cors import CORSMiddleware
48
-
49
  import gradio as gr
50
 
51
  # ------------------------------------------------------------------------------
@@ -67,10 +62,10 @@ HF_EMBED_MODEL = os.getenv("HF_EMBED_MODEL", "BAAI/bge-m3")
67
  HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
68
 
69
  if not QDRANT_URL or not QDRANT_API_KEY:
70
- LOG.warning("QDRANT_URL / QDRANT_API_KEY non fournis : l'upsert échouera. Fournis-les dans les Secrets du Space.")
71
 
72
  if EMB_PROVIDER == "hf" and not HF_TOKEN:
73
- LOG.warning("EMB_PROVIDER=hf mais HUGGINGFACEHUB_API_TOKEN absent. Tu peux basculer EMB_PROVIDER=dummy pour tester sans token.")
74
 
75
  # ------------------------------------------------------------------------------
76
  # Schémas Pydantic
@@ -130,21 +125,15 @@ def l2_normalize(vec: List[float]) -> List[float]:
130
  return arr.astype(np.float32).tolist()
131
 
132
  def flatten_any(x: Any) -> List[float]:
133
- """
134
- Certaines APIs renvoient [[...]] ou [[[...]]]; on aplanit en 1D.
135
- """
136
  if isinstance(x, (list, tuple)):
137
  if len(x) > 0 and isinstance(x[0], (list, tuple)):
138
- # Aplanit récursif
139
  return flatten_any(x[0])
140
  return list(map(float, x))
141
  raise ValueError("Embedding vector mal formé")
142
 
143
  def chunk_text(text: str, chunk_size: int, overlap: int) -> List[Tuple[int, int, str]]:
144
- """
145
- Retourne une liste de (start, end, chunk_text)
146
- Ignore les petits fragments (< 30 chars) pour éviter le bruit.
147
- """
148
  text = text or ""
149
  if not text.strip():
150
  return []
@@ -162,34 +151,26 @@ def chunk_text(text: str, chunk_size: int, overlap: int) -> List[Tuple[int, int,
162
  return res
163
 
164
  async def ensure_collection(client: httpx.AsyncClient, coll: str, vector_size: int) -> None:
165
- """
166
- Crée ou ajuste la collection Qdrant (distance = Cosine).
167
- """
168
  url = f"{QDRANT_URL}/collections/{coll}"
169
- # Vérifie l'existence
170
  r = await client.get(url, headers={"api-key": QDRANT_API_KEY}, timeout=20)
 
171
  if r.status_code == 200:
172
- # Optionnel: vérifier la taille du vecteur ; si mismatch, on peut supprimer/recréer
173
  data = r.json()
174
  existing_size = data.get("result", {}).get("vectors", {}).get("size")
175
  if existing_size and int(existing_size) != int(vector_size):
176
  LOG.warning(f"Collection {coll} dim={existing_size} ≠ attendu {vector_size} → recréation")
177
  await client.delete(url, headers={"api-key": QDRANT_API_KEY}, timeout=20)
 
178
  else:
179
- LOG.debug(f"Collection {coll} déjà prête (dim={existing_size})")
180
- # (Re)création
181
- body = {
182
- "vectors": {"size": vector_size, "distance": "Cosine"}
183
- }
184
- r2 = await client.put(url, headers={"api-key": QDRANT_API_KEY}, json=body, timeout=30)
185
- if r2.status_code not in (200, 201):
186
- raise HTTPException(status_code=500, detail=f"Qdrant PUT collection a échoué: {r2.text}")
187
-
188
- async def qdrant_upsert(
189
- client: httpx.AsyncClient,
190
- coll: str,
191
- points: List[Dict[str, Any]],
192
- ) -> int:
193
  if not points:
194
  return 0
195
  url = f"{QDRANT_URL}/collections/{coll}/points?wait=true"
@@ -201,22 +182,12 @@ async def qdrant_upsert(
201
 
202
  async def qdrant_count(client: httpx.AsyncClient, coll: str) -> int:
203
  url = f"{QDRANT_URL}/collections/{coll}/points/count"
204
- r = await client.post(
205
- url,
206
- headers={"api-key": QDRANT_API_KEY},
207
- json={"exact": True},
208
- timeout=20,
209
- )
210
  if r.status_code != 200:
211
  raise HTTPException(status_code=500, detail=f"Qdrant count échoué: {r.text}")
212
  return int(r.json().get("result", {}).get("count", 0))
213
 
214
- async def qdrant_search(
215
- client: httpx.AsyncClient,
216
- coll: str,
217
- vector: List[float],
218
- limit: int = 5,
219
- ) -> Dict[str, Any]:
220
  url = f"{QDRANT_URL}/collections/{coll}/points/search"
221
  r = await client.post(
222
  url,
@@ -231,27 +202,16 @@ async def qdrant_search(
231
  # ------------------------------------------------------------------------------
232
  # Embeddings (HF Inference ou dummy)
233
  # ------------------------------------------------------------------------------
234
- async def embed_hf(
235
- client: httpx.AsyncClient,
236
- texts: List[str],
237
- model: str = HF_EMBED_MODEL,
238
- token: str = HF_TOKEN,
239
- ) -> List[List[float]]:
240
- """
241
- Appel HuggingFace Inference (feature extraction) - batch.
242
- Normalise L2 les vecteurs.
243
- """
244
  if not token:
245
  raise HTTPException(status_code=400, detail="HUGGINGFACEHUB_API_TOKEN manquant pour EMB_PROVIDER=hf")
246
  url = f"https://api-inference.huggingface.co/models/{model}"
247
  headers = {"Authorization": f"Bearer {token}"}
248
- # HF accepte une liste de textes directement
249
  payload = {"inputs": texts, "options": {"wait_for_model": True}}
250
  r = await client.post(url, headers=headers, json=payload, timeout=120)
251
  if r.status_code != 200:
252
  raise HTTPException(status_code=502, detail=f"HF Inference error: {r.text}")
253
  data = r.json()
254
- # data peut être une liste de listes (ou de listes de listes...)
255
  embeddings: List[List[float]] = []
256
  if isinstance(data, list):
257
  for row in data:
@@ -263,16 +223,10 @@ async def embed_hf(
263
  return embeddings
264
 
265
  def embed_dummy(texts: List[str], dim: int = 128) -> List[List[float]]:
266
- """
267
- Embedding déterministe basé sur un hash -> vecteur pseudo-aléatoire stable.
268
- Suffisant pour tester le pipeline Qdrant (dimensions cohérentes, upsert, count, search).
269
- """
270
  out: List[List[float]] = []
271
  for t in texts:
272
  h = hashlib.sha256(t.encode("utf-8")).digest()
273
- # Étale sur dim floats
274
  arr = np.frombuffer((h * ((dim // len(h)) + 1))[:dim], dtype=np.uint8).astype(np.float32)
275
- # Centrage et normalisation
276
  arr = (arr - 127.5) / 127.5
277
  arr = arr / (np.linalg.norm(arr) + 1e-9)
278
  out.append(arr.astype(np.float32).tolist())
@@ -291,11 +245,11 @@ async def run_index_job(job: JobState, req: IndexRequest) -> None:
291
  job.total_files = len(req.files)
292
  job.log(f"Index start project={req.project_id} files={len(req.files)} chunk_size={req.chunk_size} overlap={req.overlap} batch_size={req.batch_size} store_text={req.store_text}")
293
 
294
- # Dédup global par hash du texte brut des fichiers
295
  file_hashes = [hash8(f.text) for f in req.files]
296
  uniq = len(set(file_hashes))
297
  if uniq != len(file_hashes):
298
- job.log(f"Attention: {len(file_hashes)-uniq} fichiers ont un texte identique (hash dupliqué).")
299
 
300
  # Chunking
301
  records: List[Dict[str, Any]] = []
@@ -304,12 +258,7 @@ async def run_index_job(job: JobState, req: IndexRequest) -> None:
304
  if not chunks:
305
  job.log(f"{f.path}: 0 chunk (trop court ou vide)")
306
  for idx, (start, end, ch) in enumerate(chunks):
307
- payload = {
308
- "path": f.path,
309
- "chunk": idx,
310
- "start": start,
311
- "end": end,
312
- }
313
  if req.store_text:
314
  payload["text"] = ch
315
  records.append({"payload": payload, "raw": ch})
@@ -322,33 +271,29 @@ async def run_index_job(job: JobState, req: IndexRequest) -> None:
322
  job.finished_at = time.time()
323
  return
324
 
325
- # Embedding + Upsert (en batches)
326
  async with httpx.AsyncClient(timeout=120) as client:
327
- # Dimension à partir du 1er embedding (warmup)
328
  warmup_vec = (await embed_texts(client, [records[0]["raw"]]))[0]
329
  vec_dim = len(warmup_vec)
330
  job.log(f"Warmup embeddings dim={vec_dim} provider={EMB_PROVIDER}")
331
 
332
- # Qdrant collection
333
  coll = f"{COLLECTION_PREFIX}{req.project_id}"
334
  await ensure_collection(client, coll, vector_size=vec_dim)
335
 
336
  job.stage = "upserting"
337
- batch_vectors: List[List[float]] = []
338
  batch_points: List[Dict[str, Any]] = []
339
 
340
  async def flush_batch():
341
- nonlocal batch_vectors, batch_points
342
  if not batch_points:
343
  return 0
344
  added = await qdrant_upsert(client, coll, batch_points)
345
  job.upserted += added
346
  job.log(f"+{added} points upsert (total={job.upserted})")
347
- batch_vectors = []
348
  batch_points = []
349
  return added
350
 
351
- # Traite par lot d'embeddings (embedding_batch_size indépendant de l'upsert batch_size)
352
  EMB_BATCH = max(8, min(64, req.batch_size * 2))
353
  i = 0
354
  while i < len(records):
@@ -360,18 +305,12 @@ async def run_index_job(job: JobState, req: IndexRequest) -> None:
360
  job.embedded += len(vecs)
361
 
362
  for r, v in zip(sub, vecs):
363
- payload = r["payload"]
364
- point = {
365
- "id": str(uuid.uuid4()),
366
- "vector": v,
367
- "payload": payload,
368
- }
369
  batch_points.append(point)
370
  if len(batch_points) >= req.batch_size:
371
  await flush_batch()
372
  i += EMB_BATCH
373
 
374
- # Flush final
375
  await flush_batch()
376
 
377
  job.stage = "done"
@@ -389,6 +328,10 @@ fastapi_app.add_middleware(
389
  allow_headers=["*"],
390
  )
391
 
 
 
 
 
392
  @fastapi_app.get("/")
393
  async def root():
394
  return {"ok": True, "service": "remote-indexer-min", "qdrant": bool(QDRANT_URL), "emb_provider": EMB_PROVIDER}
@@ -411,7 +354,6 @@ async def index(req: IndexRequest):
411
  job_id = uuid.uuid4().hex[:12]
412
  job = JobState(job_id=job_id, project_id=req.project_id)
413
  JOBS[job_id] = job
414
- # Lance en tâche de fond
415
  asyncio.create_task(run_index_job(job, req))
416
  job.log(f"Job {job_id} créé pour project {req.project_id}")
417
  return {"job_id": job_id, "project_id": req.project_id}
@@ -443,15 +385,12 @@ async def query(req: QueryRequest):
443
  return data
444
 
445
  # ------------------------------------------------------------------------------
446
- # Gradio UI (montée sur "/")
447
  # ------------------------------------------------------------------------------
448
  def _default_two_docs() -> List[Dict[str, str]]:
449
  a = "Alpha bravo charlie delta echo foxtrot golf hotel india. " * 3
450
- b = "Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy." * 3
451
- return [
452
- {"path": "a.txt", "text": a},
453
- {"path": "b.txt", "text": b},
454
- ]
455
 
456
  async def ui_wipe(project: str):
457
  try:
@@ -486,9 +425,8 @@ async def ui_status(job_id: str):
486
  return "⚠️ Renseigne un job_id"
487
  try:
488
  st = await status(job_id)
489
- # Formatage
490
  lines = [f"Job {st['job_id']} — stage={st['stage']} files={st['total_files']} chunks={st['total_chunks']} embedded={st['embedded']} upserted={st['upserted']}"]
491
- lines += st.get("messages", [])[-50:] # dernières lignes
492
  if st.get("errors"):
493
  lines.append("Erreurs:")
494
  lines += [f" - {e}" for e in st["errors"]]
@@ -528,7 +466,7 @@ with gr.Blocks(title="Remote Indexer - Minimal Test", analytics_enabled=False) a
528
  "Wipe → Index 2 docs → Status → Count → Query\n"
529
  f"- **Embeddings**: `{EMB_PROVIDER}` (model: `{HF_EMBED_MODEL}`)\n"
530
  f"- **Qdrant**: `{'OK' if QDRANT_URL else 'ABSENT'}`\n"
531
- "Conseil: si tu n'as pas de token HF, mets `EMB_PROVIDER=dummy` dans les Secrets du Space.")
532
  with gr.Row():
533
  project_tb = gr.Textbox(label="Project ID", value="DEEPWEB")
534
  jobid_tb = gr.Textbox(label="Job ID (pour Status)", value="", interactive=True)
@@ -550,9 +488,14 @@ with gr.Blocks(title="Remote Indexer - Minimal Test", analytics_enabled=False) a
550
 
551
  wipe_btn.click(ui_wipe, inputs=[project_tb], outputs=[out_log])
552
  index_btn.click(ui_index_sample, inputs=[project_tb, chunk_size, overlap, batch_size, store_text], outputs=[out_log])
553
- # Petit auto-poll status: on relance ui_status à la main en collant le job_id
554
  count_btn.click(ui_count, inputs=[project_tb], outputs=[out_log])
555
  query_btn.click(ui_query, inputs=[project_tb, query_tb, topk], outputs=[query_out])
556
 
557
  # Monte l'UI Gradio sur la FastAPI
558
  app = gr.mount_gradio_app(fastapi_app, ui, path="/")
 
 
 
 
 
 
 
2
  """
3
  HF Space - main.py de substitution pour tests Qdrant / indexation minimale
4
 
5
+ Endpoints:
6
+ - POST /wipe?project_id=XXX
7
+ - POST /index
8
+ - GET /status/{job_id}
9
+ - GET /collections/{project_id}/count
10
+ - POST /query
11
+ - GET /health <-- healthcheck OK
12
 
13
+ UI Gradio montée sur "/" pour tests sans console.
14
 
15
  ENV attendues :
16
+ - QDRANT_URL : https://...qdrant.io:6333
17
+ - QDRANT_API_KEY : clé Qdrant
18
+ - COLLECTION_PREFIX : "proj_" par défaut
19
  - EMB_PROVIDER : "hf" (défaut) ou "dummy"
20
+ - HF_EMBED_MODEL : "BAAI/bge-m3" par défaut
21
+ - HUGGINGFACEHUB_API_TOKEN (si EMB_PROVIDER=hf)
22
+ - LOG_LEVEL : DEBUG (défaut)
23
+ - PORT : 7860 (fourni par HF)
24
+
25
+ Dépendances suggérées :
26
+ fastapi>=0.111, uvicorn>=0.30, httpx>=0.27, pydantic>=2.7, gradio>=4.43, numpy>=2.0
 
 
 
 
27
  """
28
 
29
  from __future__ import annotations
30
  import os
31
  import time
32
  import uuid
 
 
33
  import hashlib
34
  import logging
35
  import asyncio
 
37
 
38
  import numpy as np
39
  import httpx
40
+ import uvicorn
41
  from pydantic import BaseModel, Field, ValidationError
42
  from fastapi import FastAPI, HTTPException, Query
43
  from fastapi.middleware.cors import CORSMiddleware
 
44
  import gradio as gr
45
 
46
  # ------------------------------------------------------------------------------
 
62
  HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
63
 
64
  if not QDRANT_URL or not QDRANT_API_KEY:
65
+ LOG.warning("QDRANT_URL / QDRANT_API_KEY non fournis : l'upsert échouera.")
66
 
67
  if EMB_PROVIDER == "hf" and not HF_TOKEN:
68
+ LOG.warning("EMB_PROVIDER=hf sans HUGGINGFACEHUB_API_TOKEN. Utilise EMB_PROVIDER=dummy pour tester sans token.")
69
 
70
  # ------------------------------------------------------------------------------
71
  # Schémas Pydantic
 
125
  return arr.astype(np.float32).tolist()
126
 
127
  def flatten_any(x: Any) -> List[float]:
128
+ """Aplatis potentiels [[...]] ou [[[...]]] en 1D."""
 
 
129
  if isinstance(x, (list, tuple)):
130
  if len(x) > 0 and isinstance(x[0], (list, tuple)):
 
131
  return flatten_any(x[0])
132
  return list(map(float, x))
133
  raise ValueError("Embedding vector mal formé")
134
 
135
  def chunk_text(text: str, chunk_size: int, overlap: int) -> List[Tuple[int, int, str]]:
136
+ """Retourne [(start, end, chunk)] et ignore les fragments < 30 chars."""
 
 
 
137
  text = text or ""
138
  if not text.strip():
139
  return []
 
151
  return res
152
 
153
  async def ensure_collection(client: httpx.AsyncClient, coll: str, vector_size: int) -> None:
154
+ """Crée la collection Qdrant (distance=Cosine), ou la recrée si dim mismatch."""
 
 
155
  url = f"{QDRANT_URL}/collections/{coll}"
 
156
  r = await client.get(url, headers={"api-key": QDRANT_API_KEY}, timeout=20)
157
+ recreate = False
158
  if r.status_code == 200:
 
159
  data = r.json()
160
  existing_size = data.get("result", {}).get("vectors", {}).get("size")
161
  if existing_size and int(existing_size) != int(vector_size):
162
  LOG.warning(f"Collection {coll} dim={existing_size} ≠ attendu {vector_size} → recréation")
163
  await client.delete(url, headers={"api-key": QDRANT_API_KEY}, timeout=20)
164
+ recreate = True
165
  else:
166
+ LOG.debug(f"Collection {coll} existante (dim={existing_size})")
167
+ if r.status_code != 200 or recreate:
168
+ body = {"vectors": {"size": vector_size, "distance": "Cosine"}}
169
+ r2 = await client.put(url, headers={"api-key": QDRANT_API_KEY}, json=body, timeout=30)
170
+ if r2.status_code not in (200, 201):
171
+ raise HTTPException(status_code=500, detail=f"Qdrant PUT collection a échoué: {r2.text}")
172
+
173
+ async def qdrant_upsert(client: httpx.AsyncClient, coll: str, points: List[Dict[str, Any]]) -> int:
 
 
 
 
 
 
174
  if not points:
175
  return 0
176
  url = f"{QDRANT_URL}/collections/{coll}/points?wait=true"
 
182
 
183
  async def qdrant_count(client: httpx.AsyncClient, coll: str) -> int:
184
  url = f"{QDRANT_URL}/collections/{coll}/points/count"
185
+ r = await client.post(url, headers={"api-key": QDRANT_API_KEY}, json={"exact": True}, timeout=20)
 
 
 
 
 
186
  if r.status_code != 200:
187
  raise HTTPException(status_code=500, detail=f"Qdrant count échoué: {r.text}")
188
  return int(r.json().get("result", {}).get("count", 0))
189
 
190
+ async def qdrant_search(client: httpx.AsyncClient, coll: str, vector: List[float], limit: int = 5) -> Dict[str, Any]:
 
 
 
 
 
191
  url = f"{QDRANT_URL}/collections/{coll}/points/search"
192
  r = await client.post(
193
  url,
 
202
  # ------------------------------------------------------------------------------
203
  # Embeddings (HF Inference ou dummy)
204
  # ------------------------------------------------------------------------------
205
+ async def embed_hf(client: httpx.AsyncClient, texts: List[str], model: str = HF_EMBED_MODEL, token: str = HF_TOKEN) -> List[List[float]]:
 
 
 
 
 
 
 
 
 
206
  if not token:
207
  raise HTTPException(status_code=400, detail="HUGGINGFACEHUB_API_TOKEN manquant pour EMB_PROVIDER=hf")
208
  url = f"https://api-inference.huggingface.co/models/{model}"
209
  headers = {"Authorization": f"Bearer {token}"}
 
210
  payload = {"inputs": texts, "options": {"wait_for_model": True}}
211
  r = await client.post(url, headers=headers, json=payload, timeout=120)
212
  if r.status_code != 200:
213
  raise HTTPException(status_code=502, detail=f"HF Inference error: {r.text}")
214
  data = r.json()
 
215
  embeddings: List[List[float]] = []
216
  if isinstance(data, list):
217
  for row in data:
 
223
  return embeddings
224
 
225
  def embed_dummy(texts: List[str], dim: int = 128) -> List[List[float]]:
 
 
 
 
226
  out: List[List[float]] = []
227
  for t in texts:
228
  h = hashlib.sha256(t.encode("utf-8")).digest()
 
229
  arr = np.frombuffer((h * ((dim // len(h)) + 1))[:dim], dtype=np.uint8).astype(np.float32)
 
230
  arr = (arr - 127.5) / 127.5
231
  arr = arr / (np.linalg.norm(arr) + 1e-9)
232
  out.append(arr.astype(np.float32).tolist())
 
245
  job.total_files = len(req.files)
246
  job.log(f"Index start project={req.project_id} files={len(req.files)} chunk_size={req.chunk_size} overlap={req.overlap} batch_size={req.batch_size} store_text={req.store_text}")
247
 
248
+ # Dédup global par hash du texte de fichier
249
  file_hashes = [hash8(f.text) for f in req.files]
250
  uniq = len(set(file_hashes))
251
  if uniq != len(file_hashes):
252
+ job.log(f"Attention: {len(file_hashes)-uniq} fichier(s) ont un texte identique (hash dupliqué).")
253
 
254
  # Chunking
255
  records: List[Dict[str, Any]] = []
 
258
  if not chunks:
259
  job.log(f"{f.path}: 0 chunk (trop court ou vide)")
260
  for idx, (start, end, ch) in enumerate(chunks):
261
+ payload = {"path": f.path, "chunk": idx, "start": start, "end": end}
 
 
 
 
 
262
  if req.store_text:
263
  payload["text"] = ch
264
  records.append({"payload": payload, "raw": ch})
 
271
  job.finished_at = time.time()
272
  return
273
 
 
274
  async with httpx.AsyncClient(timeout=120) as client:
275
+ # Warmup dim
276
  warmup_vec = (await embed_texts(client, [records[0]["raw"]]))[0]
277
  vec_dim = len(warmup_vec)
278
  job.log(f"Warmup embeddings dim={vec_dim} provider={EMB_PROVIDER}")
279
 
280
+ # Collection Qdrant
281
  coll = f"{COLLECTION_PREFIX}{req.project_id}"
282
  await ensure_collection(client, coll, vector_size=vec_dim)
283
 
284
  job.stage = "upserting"
 
285
  batch_points: List[Dict[str, Any]] = []
286
 
287
  async def flush_batch():
288
+ nonlocal batch_points
289
  if not batch_points:
290
  return 0
291
  added = await qdrant_upsert(client, coll, batch_points)
292
  job.upserted += added
293
  job.log(f"+{added} points upsert (total={job.upserted})")
 
294
  batch_points = []
295
  return added
296
 
 
297
  EMB_BATCH = max(8, min(64, req.batch_size * 2))
298
  i = 0
299
  while i < len(records):
 
305
  job.embedded += len(vecs)
306
 
307
  for r, v in zip(sub, vecs):
308
+ point = {"id": str(uuid.uuid4()), "vector": v, "payload": r["payload"]}
 
 
 
 
 
309
  batch_points.append(point)
310
  if len(batch_points) >= req.batch_size:
311
  await flush_batch()
312
  i += EMB_BATCH
313
 
 
314
  await flush_batch()
315
 
316
  job.stage = "done"
 
328
  allow_headers=["*"],
329
  )
330
 
331
+ @fastapi_app.get("/health")
332
+ async def health():
333
+ return {"status": "ok"}
334
+
335
  @fastapi_app.get("/")
336
  async def root():
337
  return {"ok": True, "service": "remote-indexer-min", "qdrant": bool(QDRANT_URL), "emb_provider": EMB_PROVIDER}
 
354
  job_id = uuid.uuid4().hex[:12]
355
  job = JobState(job_id=job_id, project_id=req.project_id)
356
  JOBS[job_id] = job
 
357
  asyncio.create_task(run_index_job(job, req))
358
  job.log(f"Job {job_id} créé pour project {req.project_id}")
359
  return {"job_id": job_id, "project_id": req.project_id}
 
385
  return data
386
 
387
  # ------------------------------------------------------------------------------
388
+ # Gradio UI
389
  # ------------------------------------------------------------------------------
390
  def _default_two_docs() -> List[Dict[str, str]]:
391
  a = "Alpha bravo charlie delta echo foxtrot golf hotel india. " * 3
392
+ b = "Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy. " * 3
393
+ return [{"path": "a.txt", "text": a}, {"path": "b.txt", "text": b}]
 
 
 
394
 
395
  async def ui_wipe(project: str):
396
  try:
 
425
  return "⚠️ Renseigne un job_id"
426
  try:
427
  st = await status(job_id)
 
428
  lines = [f"Job {st['job_id']} — stage={st['stage']} files={st['total_files']} chunks={st['total_chunks']} embedded={st['embedded']} upserted={st['upserted']}"]
429
+ lines += st.get("messages", [])[-50:]
430
  if st.get("errors"):
431
  lines.append("Erreurs:")
432
  lines += [f" - {e}" for e in st["errors"]]
 
466
  "Wipe → Index 2 docs → Status → Count → Query\n"
467
  f"- **Embeddings**: `{EMB_PROVIDER}` (model: `{HF_EMBED_MODEL}`)\n"
468
  f"- **Qdrant**: `{'OK' if QDRANT_URL else 'ABSENT'}`\n"
469
+ "Astuce: si pas de token HF, mets `EMB_PROVIDER=dummy`.")
470
  with gr.Row():
471
  project_tb = gr.Textbox(label="Project ID", value="DEEPWEB")
472
  jobid_tb = gr.Textbox(label="Job ID (pour Status)", value="", interactive=True)
 
488
 
489
  wipe_btn.click(ui_wipe, inputs=[project_tb], outputs=[out_log])
490
  index_btn.click(ui_index_sample, inputs=[project_tb, chunk_size, overlap, batch_size, store_text], outputs=[out_log])
 
491
  count_btn.click(ui_count, inputs=[project_tb], outputs=[out_log])
492
  query_btn.click(ui_query, inputs=[project_tb, query_tb, topk], outputs=[query_out])
493
 
494
  # Monte l'UI Gradio sur la FastAPI
495
  app = gr.mount_gradio_app(fastapi_app, ui, path="/")
496
+
497
+ if __name__ == "__main__":
498
+ # Démarre Uvicorn pour les Spaces Docker (CMD: python -u /app/main.py)
499
+ port = int(os.getenv("PORT", "7860"))
500
+ LOG.info(f"Démarrage Uvicorn sur 0.0.0.0:{port}")
501
+ uvicorn.run(app, host="0.0.0.0", port=port)