Spaces:
Sleeping
Sleeping
Update rfp_api_app.py
Browse files- rfp_api_app.py +123 -7
rfp_api_app.py
CHANGED
|
@@ -1,7 +1,49 @@
|
|
| 1 |
# rfp_api_app.py
|
| 2 |
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
from __future__ import annotations
|
| 4 |
-
from typing import Dict, Any
|
| 5 |
import os, json, uuid, threading, time, traceback
|
| 6 |
from pathlib import Path
|
| 7 |
import logging
|
|
@@ -11,9 +53,11 @@ from fastapi import FastAPI, HTTPException, Query
|
|
| 11 |
from fastapi.responses import JSONResponse, FileResponse
|
| 12 |
from fastapi.middleware.cors import CORSMiddleware
|
| 13 |
|
| 14 |
-
# === Imports depuis ton repo
|
| 15 |
from rfp_parser.prompting import build_chat_payload
|
| 16 |
from rfp_parser.exports_xls import build_xls_from_doc
|
|
|
|
|
|
|
| 17 |
|
| 18 |
# --------- Config ---------
|
| 19 |
DEEPINFRA_API_KEY = os.environ.get("DEEPINFRA_API_KEY", "")
|
|
@@ -39,11 +83,16 @@ def new_job(text: str) -> str:
|
|
| 39 |
JOBS[job_id] = {
|
| 40 |
"status": "queued",
|
| 41 |
"error": None,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
"xlsx_path": None,
|
| 43 |
"xlsx_url": None,
|
| 44 |
"started_at": time.time(),
|
| 45 |
"done_at": None,
|
| 46 |
"meta": {"model": MODEL_NAME, "length": len(text or "")},
|
|
|
|
| 47 |
}
|
| 48 |
return job_id
|
| 49 |
|
|
@@ -76,6 +125,29 @@ def parse_with_deepinfra(text: str) -> Dict[str, Any]:
|
|
| 76 |
raise RuntimeError("Le contenu renvoyé n'est pas un objet JSON.")
|
| 77 |
return doc
|
| 78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
def build_xlsx(doc: Dict[str, Any], job_dir: Path) -> str:
|
| 80 |
job_dir.mkdir(parents=True, exist_ok=True)
|
| 81 |
out_path = str(job_dir / "feuille_de_charge.xlsx")
|
|
@@ -91,10 +163,24 @@ def run_job(job_id: str, text: str) -> None:
|
|
| 91 |
set_job_status(job_id, status="running")
|
| 92 |
job_dir = BASE_TMP / job_id
|
| 93 |
try:
|
|
|
|
| 94 |
doc = parse_with_deepinfra(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
xlsx_path = build_xlsx(doc, job_dir)
|
| 96 |
-
# Ici, comme on n’est PAS monté sous /api : le chemin est direct
|
| 97 |
xlsx_url = f"/results/{job_id}/feuille_de_charge.xlsx"
|
|
|
|
|
|
|
| 98 |
set_job_status(
|
| 99 |
job_id,
|
| 100 |
status="done",
|
|
@@ -109,10 +195,10 @@ def run_job(job_id: str, text: str) -> None:
|
|
| 109 |
set_job_status(job_id, status="error", error=str(e), done_at=time.time())
|
| 110 |
|
| 111 |
# --------- FastAPI ---------
|
| 112 |
-
app = FastAPI(title="RFP_MASTER API", version="1.
|
| 113 |
app.add_middleware(
|
| 114 |
CORSMiddleware,
|
| 115 |
-
allow_origins=["*"], #
|
| 116 |
allow_credentials=True,
|
| 117 |
allow_methods=["*"],
|
| 118 |
allow_headers=["*"],
|
|
@@ -128,6 +214,7 @@ def submit(payload: Dict[str, Any]):
|
|
| 128 |
if not isinstance(text, str) or not text.strip():
|
| 129 |
raise HTTPException(400, "Champ 'text' manquant ou vide.")
|
| 130 |
job_id = new_job(text)
|
|
|
|
| 131 |
t = threading.Thread(target=run_job, args=(job_id, text), daemon=True)
|
| 132 |
t.start()
|
| 133 |
return JSONResponse({"job_id": job_id, "status": "queued"})
|
|
@@ -138,16 +225,45 @@ def status(job_id: str = Query(..., description="Identifiant renvoyé par /submi
|
|
| 138 |
info = JOBS.get(job_id)
|
| 139 |
if not info:
|
| 140 |
raise HTTPException(404, f"job_id inconnu: {job_id}")
|
|
|
|
| 141 |
return JSONResponse({
|
| 142 |
"job_id": job_id,
|
| 143 |
"status": info.get("status"),
|
| 144 |
-
"xlsx_url": info.get("xlsx_url"),
|
| 145 |
"error": info.get("error"),
|
| 146 |
"meta": info.get("meta"),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
})
|
| 148 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
@app.get("/results/{job_id}/feuille_de_charge.xlsx")
|
| 150 |
-
def
|
| 151 |
with JOBS_LOCK:
|
| 152 |
info = JOBS.get(job_id)
|
| 153 |
if not info:
|
|
|
|
| 1 |
# rfp_api_app.py
|
| 2 |
# -*- coding: utf-8 -*-
|
| 3 |
+
"""
|
| 4 |
+
API FastAPI pour RFP Parser & Exports (DeepInfra en backend)
|
| 5 |
+
===========================================================
|
| 6 |
+
|
| 7 |
+
Ce que fait ce fichier
|
| 8 |
+
----------------------
|
| 9 |
+
- Expose 4 endpoints principaux :
|
| 10 |
+
- GET /health : ping + modèle utilisé.
|
| 11 |
+
- POST /submit : lance un job asynchrone {text} -> job_id.
|
| 12 |
+
- GET /status : renvoie l'état du job, liens de résultats (raw.json, own.csv, xlsx),
|
| 13 |
+
et un aperçu court du JSON pour “live JSON”.
|
| 14 |
+
- GET /results/{job_id}/... : sert les artefacts (raw.json, own.csv, feuille_de_charge.xlsx).
|
| 15 |
+
|
| 16 |
+
Ce que propose chaque fonction
|
| 17 |
+
------------------------------
|
| 18 |
+
- new_job(text: str) -> str :
|
| 19 |
+
Crée un job en mémoire, status "queued", retourne job_id.
|
| 20 |
+
- set_job_status(job_id: str, **updates) -> None :
|
| 21 |
+
Met à jour les métadonnées d'un job (thread-safe).
|
| 22 |
+
- parse_with_deepinfra(text: str) -> dict :
|
| 23 |
+
Construit le payload LLM (via rfp_parser.prompting.build_chat_payload),
|
| 24 |
+
appelle DeepInfra et parse le JSON renvoyé (strippé si fenced).
|
| 25 |
+
- persist_doc(job_dir: Path, doc: dict) -> tuple[str, str] :
|
| 26 |
+
Sauvegarde doc dans raw.json (UTF-8), retourne (path, url).
|
| 27 |
+
- build_csv_if_available(doc: dict, job_dir: Path) -> tuple[str|None, str|None] :
|
| 28 |
+
(Optionnel) si ton repo expose un export CSV, le génère; sinon None.
|
| 29 |
+
- build_xlsx(doc: dict, job_dir: Path) -> str :
|
| 30 |
+
Construit la feuille Excel dynamique (via rfp_parser.exports_xls.build_xls_from_doc).
|
| 31 |
+
- run_job(job_id: str, text: str) -> None :
|
| 32 |
+
Orchestration : parse -> persist raw.json -> (own.csv si dispo) -> xlsx -> maj status.
|
| 33 |
+
|
| 34 |
+
Logs
|
| 35 |
+
----
|
| 36 |
+
- Logger [API] avec niveau INFO (ou DEBUG si RFP_DEBUG=1).
|
| 37 |
+
- Traces détaillées sur DeepInfra / persistences / erreurs.
|
| 38 |
+
|
| 39 |
+
Notes
|
| 40 |
+
-----
|
| 41 |
+
- Jobs en mémoire : si le process redémarre, l'état est perdu (simple mais suffisant en Space).
|
| 42 |
+
- CORS permissif (*): autorise les requêtes depuis le Space Gradio.
|
| 43 |
+
"""
|
| 44 |
+
|
| 45 |
from __future__ import annotations
|
| 46 |
+
from typing import Dict, Any, Tuple, Optional
|
| 47 |
import os, json, uuid, threading, time, traceback
|
| 48 |
from pathlib import Path
|
| 49 |
import logging
|
|
|
|
| 53 |
from fastapi.responses import JSONResponse, FileResponse
|
| 54 |
from fastapi.middleware.cors import CORSMiddleware
|
| 55 |
|
| 56 |
+
# === Imports depuis ton repo RFPmaster ===
|
| 57 |
from rfp_parser.prompting import build_chat_payload
|
| 58 |
from rfp_parser.exports_xls import build_xls_from_doc
|
| 59 |
+
# Si tu as un export CSV (ex: rfp_parser.exports_csv), décommente et branche ici :
|
| 60 |
+
# from rfp_parser.exports_csv import build_csv_from_doc
|
| 61 |
|
| 62 |
# --------- Config ---------
|
| 63 |
DEEPINFRA_API_KEY = os.environ.get("DEEPINFRA_API_KEY", "")
|
|
|
|
| 83 |
JOBS[job_id] = {
|
| 84 |
"status": "queued",
|
| 85 |
"error": None,
|
| 86 |
+
"raw_json_path": None,
|
| 87 |
+
"raw_json_url": None,
|
| 88 |
+
"own_csv_path": None,
|
| 89 |
+
"own_csv_url": None,
|
| 90 |
"xlsx_path": None,
|
| 91 |
"xlsx_url": None,
|
| 92 |
"started_at": time.time(),
|
| 93 |
"done_at": None,
|
| 94 |
"meta": {"model": MODEL_NAME, "length": len(text or "")},
|
| 95 |
+
"json_preview": None, # court extrait pour le “live JSON”
|
| 96 |
}
|
| 97 |
return job_id
|
| 98 |
|
|
|
|
| 125 |
raise RuntimeError("Le contenu renvoyé n'est pas un objet JSON.")
|
| 126 |
return doc
|
| 127 |
|
| 128 |
+
def persist_doc(job_dir: Path, doc: Dict[str, Any]) -> Tuple[str, str]:
|
| 129 |
+
job_dir.mkdir(parents=True, exist_ok=True)
|
| 130 |
+
raw_path = job_dir / "raw.json"
|
| 131 |
+
with open(raw_path, "w", encoding="utf-8") as f:
|
| 132 |
+
json.dump(doc, f, indent=2, ensure_ascii=False)
|
| 133 |
+
raw_url = f"/results/{job_dir.name}/raw.json"
|
| 134 |
+
return str(raw_path), raw_url
|
| 135 |
+
|
| 136 |
+
def build_csv_if_available(doc: Dict[str, Any], job_dir: Path) -> Tuple[Optional[str], Optional[str]]:
|
| 137 |
+
"""
|
| 138 |
+
Si tu as un export CSV dans ton repo, branche-le ici.
|
| 139 |
+
Sinon, on renvoie (None, None) sans erreur pour rester permissif.
|
| 140 |
+
"""
|
| 141 |
+
try:
|
| 142 |
+
# Exemple si tu as build_csv_from_doc(doc, out_path)
|
| 143 |
+
# out_path = job_dir / "own.csv"
|
| 144 |
+
# build_csv_from_doc(doc, str(out_path))
|
| 145 |
+
# return str(out_path), f"/results/{job_dir.name}/own.csv"
|
| 146 |
+
return None, None
|
| 147 |
+
except Exception as e:
|
| 148 |
+
logger.warning("CSV non généré: %s", e)
|
| 149 |
+
return None, None
|
| 150 |
+
|
| 151 |
def build_xlsx(doc: Dict[str, Any], job_dir: Path) -> str:
|
| 152 |
job_dir.mkdir(parents=True, exist_ok=True)
|
| 153 |
out_path = str(job_dir / "feuille_de_charge.xlsx")
|
|
|
|
| 163 |
set_job_status(job_id, status="running")
|
| 164 |
job_dir = BASE_TMP / job_id
|
| 165 |
try:
|
| 166 |
+
# 1) Parse LLM
|
| 167 |
doc = parse_with_deepinfra(text)
|
| 168 |
+
|
| 169 |
+
# 2) Persist raw.json immédiatement (pour JSON-first côté client)
|
| 170 |
+
raw_path, raw_url = persist_doc(job_dir, doc)
|
| 171 |
+
preview = json.dumps(doc, ensure_ascii=False)[:1500] # court extrait
|
| 172 |
+
set_job_status(job_id, raw_json_path=raw_path, raw_json_url=raw_url, json_preview=preview)
|
| 173 |
+
|
| 174 |
+
# 3) (Optionnel) Génère own.csv si dispo
|
| 175 |
+
csv_path, csv_url = build_csv_if_available(doc, job_dir)
|
| 176 |
+
if csv_path and csv_url:
|
| 177 |
+
set_job_status(job_id, own_csv_path=csv_path, own_csv_url=csv_url)
|
| 178 |
+
|
| 179 |
+
# 4) XLSX (peut être le plus long)
|
| 180 |
xlsx_path = build_xlsx(doc, job_dir)
|
|
|
|
| 181 |
xlsx_url = f"/results/{job_id}/feuille_de_charge.xlsx"
|
| 182 |
+
|
| 183 |
+
# 5) Terminé
|
| 184 |
set_job_status(
|
| 185 |
job_id,
|
| 186 |
status="done",
|
|
|
|
| 195 |
set_job_status(job_id, status="error", error=str(e), done_at=time.time())
|
| 196 |
|
| 197 |
# --------- FastAPI ---------
|
| 198 |
+
app = FastAPI(title="RFP_MASTER API", version="1.1.0")
|
| 199 |
app.add_middleware(
|
| 200 |
CORSMiddleware,
|
| 201 |
+
allow_origins=["*"], # restreins ici si tu veux limiter au Space Gradio
|
| 202 |
allow_credentials=True,
|
| 203 |
allow_methods=["*"],
|
| 204 |
allow_headers=["*"],
|
|
|
|
| 214 |
if not isinstance(text, str) or not text.strip():
|
| 215 |
raise HTTPException(400, "Champ 'text' manquant ou vide.")
|
| 216 |
job_id = new_job(text)
|
| 217 |
+
logger.info("Submit reçu job_id=%s len(text)=%d", job_id, len(text))
|
| 218 |
t = threading.Thread(target=run_job, args=(job_id, text), daemon=True)
|
| 219 |
t.start()
|
| 220 |
return JSONResponse({"job_id": job_id, "status": "queued"})
|
|
|
|
| 225 |
info = JOBS.get(job_id)
|
| 226 |
if not info:
|
| 227 |
raise HTTPException(404, f"job_id inconnu: {job_id}")
|
| 228 |
+
# On expose les URLs disponibles + un petit aperçu JSON (pour “live JSON”)
|
| 229 |
return JSONResponse({
|
| 230 |
"job_id": job_id,
|
| 231 |
"status": info.get("status"),
|
|
|
|
| 232 |
"error": info.get("error"),
|
| 233 |
"meta": info.get("meta"),
|
| 234 |
+
"raw_json_url": info.get("raw_json_url"),
|
| 235 |
+
"own_csv_url": info.get("own_csv_url"),
|
| 236 |
+
"xlsx_url": info.get("xlsx_url"),
|
| 237 |
+
"json_preview": info.get("json_preview"),
|
| 238 |
})
|
| 239 |
|
| 240 |
+
# ---- Résultats ----
|
| 241 |
+
@app.get("/results/{job_id}/raw.json")
|
| 242 |
+
def download_raw(job_id: str):
|
| 243 |
+
with JOBS_LOCK:
|
| 244 |
+
info = JOBS.get(job_id)
|
| 245 |
+
if not info:
|
| 246 |
+
raise HTTPException(404, f"job_id inconnu: {job_id}")
|
| 247 |
+
raw_path = info.get("raw_json_path")
|
| 248 |
+
if not raw_path or not Path(raw_path).exists():
|
| 249 |
+
raise HTTPException(404, "raw.json indisponible.")
|
| 250 |
+
return FileResponse(raw_path, media_type="application/json", filename="raw.json")
|
| 251 |
+
|
| 252 |
+
@app.get("/results/{job_id}/own.csv")
|
| 253 |
+
def download_csv(job_id: str):
|
| 254 |
+
with JOBS_LOCK:
|
| 255 |
+
info = JOBS.get(job_id)
|
| 256 |
+
if not info:
|
| 257 |
+
raise HTTPException(404, f"job_id inconnu: {job_id}")
|
| 258 |
+
csv_path = info.get("own_csv_path")
|
| 259 |
+
if not csv_path:
|
| 260 |
+
raise HTTPException(404, "own.csv non généré sur ce job.")
|
| 261 |
+
if not Path(csv_path).exists():
|
| 262 |
+
raise HTTPException(404, "own.csv indisponible.")
|
| 263 |
+
return FileResponse(csv_path, media_type="text/csv", filename="own.csv")
|
| 264 |
+
|
| 265 |
@app.get("/results/{job_id}/feuille_de_charge.xlsx")
|
| 266 |
+
def download_xlsx(job_id: str):
|
| 267 |
with JOBS_LOCK:
|
| 268 |
info = JOBS.get(job_id)
|
| 269 |
if not info:
|