HamidOmarov commited on
Commit
41018f6
·
1 Parent(s): 3e29f58

RAG API 1.3.1: retrieval+encoding fixes; stats/history; HF-safe paths

Browse files
Files changed (1) hide show
  1. app/api.py +121 -183
app/api.py CHANGED
@@ -1,144 +1,66 @@
1
  # app/api.py
2
  from __future__ import annotations
3
 
4
- import os
5
- import re
6
- from collections import deque
7
  from datetime import datetime, timezone
8
- from time import perf_counter
9
- from typing import List, Optional, Dict, Any
10
 
11
  import faiss
12
- from fastapi import FastAPI, UploadFile, File, HTTPException
13
  from fastapi.middleware.cors import CORSMiddleware
14
- from fastapi.responses import JSONResponse, RedirectResponse
15
  from pydantic import BaseModel, Field
16
 
17
  from .rag_system import SimpleRAG, UPLOAD_DIR, INDEX_DIR
18
 
19
- __version__ = "1.3.1"
20
 
21
  app = FastAPI(title="RAG API", version=__version__)
22
 
23
- # CORS (Streamlit UI üçün)
24
  app.add_middleware(
25
  CORSMiddleware,
26
- allow_origins=["*"],
27
  allow_credentials=True,
28
  allow_methods=["*"],
29
  allow_headers=["*"],
30
  )
31
 
 
32
  rag = SimpleRAG()
33
 
34
- # -------------------- Schemas --------------------
 
 
 
 
 
 
 
 
35
  class UploadResponse(BaseModel):
 
36
  filename: str
37
  chunks_added: int
 
38
 
39
  class AskRequest(BaseModel):
40
- question: str = Field(..., min_length=1)
41
- top_k: int = Field(5, ge=1, le=20)
 
 
42
 
43
  class AskResponse(BaseModel):
44
  answer: str
45
  contexts: List[str]
46
-
47
- class HistoryItem(BaseModel):
48
- question: str
49
- timestamp: str
50
 
51
  class HistoryResponse(BaseModel):
52
  total_chunks: int
53
- history: List[HistoryItem] = []
54
-
55
- # -------------------- Stats (in-memory) --------------------
56
- class StatsStore:
57
- def __init__(self):
58
- self.documents_indexed = 0
59
- self.questions_answered = 0
60
- self.latencies_ms = deque(maxlen=500)
61
- self.last7_questions = deque([0] * 7, maxlen=7) # sadə günlük sayğac
62
- self.history = deque(maxlen=50)
63
-
64
- def add_docs(self, n: int):
65
- if n > 0:
66
- self.documents_indexed += int(n)
67
-
68
- def add_question(self, latency_ms: Optional[int] = None, q: Optional[str] = None):
69
- self.questions_answered += 1
70
- if latency_ms is not None:
71
- self.latencies_ms.append(int(latency_ms))
72
- if len(self.last7_questions) == 7:
73
- self.last7_questions[0] += 1
74
- if q:
75
- self.history.appendleft(
76
- {"question": q, "timestamp": datetime.now(timezone.utc).isoformat(timespec="seconds")}
77
- )
78
-
79
- @property
80
- def avg_ms(self) -> int:
81
- return int(sum(self.latencies_ms) / len(self.latencies_ms)) if self.latencies_ms else 0
82
-
83
- stats = StatsStore()
84
-
85
- # -------------------- Helpers --------------------
86
- _STOPWORDS = {
87
- "the","a","an","of","for","and","or","in","on","to","from","with","by","is","are",
88
- "was","were","be","been","being","at","as","that","this","these","those","it","its",
89
- "into","than","then","so","such","about","over","per","via","vs","within"
90
- }
91
-
92
- def _tokenize(s: str) -> List[str]:
93
- return [w for w in re.findall(r"[a-zA-Z0-9]+", s.lower()) if w and w not in _STOPWORDS and len(w) > 2]
94
-
95
- def _is_generic_answer(text: str) -> bool:
96
- if not text:
97
- return True
98
- low = text.strip().lower()
99
- if len(low) < 15:
100
- return True
101
- # tipik generik pattern-lər
102
- if "based on document context" in low or "appears to be" in low:
103
- return True
104
- return False
105
-
106
- def _extractive_fallback(question: str, contexts: List[str], max_chars: int = 600) -> str:
107
- """ Sualın açar sözlərinə əsasən kontekstdən cümlələr seç. """
108
- if not contexts:
109
- return "I couldn't find relevant information in the indexed documents for this question."
110
- qtok = set(_tokenize(question))
111
- if not qtok:
112
- return (contexts[0] or "")[:max_chars]
113
-
114
- # cümlələrə böl və skorla
115
- sentences: List[str] = []
116
- for c in contexts:
117
- for s in re.split(r"(?<=[\.!\?])\s+|\n+", (c or "").strip()):
118
- s = s.strip()
119
- if s:
120
- sentences.append(s)
121
-
122
- scored: List[tuple[int, str]] = []
123
- for s in sentences:
124
- st = set(_tokenize(s))
125
- scored.append((len(qtok & st), s))
126
- scored.sort(key=lambda x: (x[0], len(x[1])), reverse=True)
127
-
128
- picked: List[str] = []
129
- for sc, s in scored:
130
- if sc <= 0 and picked:
131
- break
132
- if len((" ".join(picked) + " " + s).strip()) > max_chars:
133
- break
134
- picked.append(s)
135
 
136
- if not picked:
137
- return (contexts[0] or "")[:max_chars]
138
- bullets = "\n".join(f"- {p}" for p in picked)
139
- return f"Answer (based on document context):\n{bullets}"
140
-
141
- # -------------------- Routes --------------------
142
  @app.get("/")
143
  def root():
144
  return RedirectResponse(url="/docs")
@@ -147,100 +69,121 @@ def root():
147
  def health():
148
  return {
149
  "status": "ok",
150
- "version": app.version,
151
  "summarizer": "extractive_en + translate + keyword_fallback",
152
- "faiss_ntotal": int(getattr(rag.index, "ntotal", 0)),
153
- "model_dim": int(getattr(rag.index, "d", rag.embed_dim)),
154
  }
155
 
156
  @app.get("/debug/translate")
157
  def debug_translate():
 
 
 
158
  try:
159
- from transformers import pipeline
160
- tr = pipeline("translation", model="Helsinki-NLP/opus-mt-az-en", cache_dir=str(rag.cache_dir), device=-1)
 
 
 
 
 
161
  out = tr("Sənəd təmiri və quraşdırılması ilə bağlı işlər görülüb.", max_length=80)[0]["translation_text"]
162
  return {"ok": True, "example_out": out}
163
  except Exception as e:
164
- return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})
165
 
166
  @app.post("/upload_pdf", response_model=UploadResponse)
167
- async def upload_pdf(file: UploadFile = File(...)):
168
- if not file.filename.lower().endswith(".pdf"):
169
- raise HTTPException(status_code=400, detail="Only PDF files are allowed.")
170
-
171
- dest = UPLOAD_DIR / file.filename
172
- with open(dest, "wb") as f:
173
- while True:
174
- chunk = await file.read(1024 * 1024)
175
- if not chunk:
176
- break
177
- f.write(chunk)
 
 
 
 
 
 
 
 
178
 
179
- added = rag.add_pdf(dest)
180
- if added == 0:
181
- raise HTTPException(status_code=400, detail="No extractable text found (likely a scanned image PDF).")
 
 
 
 
 
182
 
183
- stats.add_docs(added)
184
- return UploadResponse(filename=file.filename, chunks_added=added)
 
 
 
 
 
185
 
186
  @app.post("/ask_question", response_model=AskResponse)
187
- def ask_question(payload: AskRequest):
188
- q = (payload.question or "").strip()
189
- if not q:
190
- raise HTTPException(status_code=400, detail="Missing 'question'.")
191
-
192
- k = max(1, int(payload.top_k))
193
- t0 = perf_counter()
194
-
195
- # 1) Həmişə sual embedding-i ilə axtar
 
 
 
196
  try:
197
- hits = rag.search(q, k=k) # List[Tuple[text, score]]
198
- except Exception as e:
199
- raise HTTPException(status_code=500, detail=f"Search failed: {e}")
200
-
201
- contexts = [c for c, _ in (hits or []) if c] or (getattr(rag, "last_added", [])[:k] if getattr(rag, "last_added", None) else [])
202
 
203
- if not contexts:
204
- latency_ms = int((perf_counter() - t0) * 1000)
205
- stats.add_question(latency_ms, q=q)
206
- return AskResponse(
207
- answer="I couldn't find relevant information in the indexed documents for this question.",
208
- contexts=[]
209
- )
210
 
211
- # 2) Cavabı sintez et (rag içində LLM/rule-based ola bilər)
212
- try:
213
- synthesized = (rag.synthesize_answer(q, contexts) or "").strip()
214
- except Exception:
215
- synthesized = ""
216
 
217
- # 3) Generic görünürsə, extractive fallback
218
- if _is_generic_answer(synthesized):
219
- synthesized = _extractive_fallback(q, contexts, max_chars=600)
 
 
 
 
220
 
221
- latency_ms = int((perf_counter() - t0) * 1000)
222
- stats.add_question(latency_ms, q=q)
223
- return AskResponse(answer=synthesized, contexts=contexts)
224
 
225
  @app.get("/get_history", response_model=HistoryResponse)
226
  def get_history():
227
- return HistoryResponse(
228
- total_chunks=len(rag.chunks),
229
- history=[HistoryItem(**h) for h in list(stats.history)]
230
- )
231
 
232
  @app.get("/stats")
233
- def stats_endpoint():
234
  return {
235
- "documents_indexed": stats.documents_indexed,
236
- "questions_answered": stats.questions_answered,
237
- "avg_ms": stats.avg_ms,
238
- "last7_questions": list(stats.last7_questions),
239
  "total_chunks": len(rag.chunks),
240
- "faiss_ntotal": int(getattr(rag.index, "ntotal", 0)),
241
- "model_dim": int(getattr(rag.index, "d", rag.embed_dim)),
242
- "last_added_chunks": len(getattr(rag, "last_added", [])),
243
- "version": app.version,
244
  }
245
 
246
  @app.post("/reset_index")
@@ -249,16 +192,11 @@ def reset_index():
249
  rag.index = faiss.IndexFlatIP(rag.embed_dim)
250
  rag.chunks = []
251
  rag.last_added = []
252
- for p in [INDEX_DIR / "faiss.index", INDEX_DIR / "meta.npy"]:
253
- try:
254
- os.remove(p)
255
- except FileNotFoundError:
256
- pass
257
- stats.documents_indexed = 0
258
- stats.questions_answered = 0
259
- stats.latencies_ms.clear()
260
- stats.last7_questions = deque([0] * 7, maxlen=7)
261
- stats.history.clear()
262
- return {"ok": True}
263
  except Exception as e:
264
  raise HTTPException(status_code=500, detail=str(e))
 
1
  # app/api.py
2
  from __future__ import annotations
3
 
4
+ import time
 
 
5
  from datetime import datetime, timezone
6
+ from pathlib import Path
7
+ from typing import Any, Dict, List
8
 
9
  import faiss
10
+ from fastapi import FastAPI, File, HTTPException, UploadFile
11
  from fastapi.middleware.cors import CORSMiddleware
12
+ from fastapi.responses import RedirectResponse
13
  from pydantic import BaseModel, Field
14
 
15
  from .rag_system import SimpleRAG, UPLOAD_DIR, INDEX_DIR
16
 
17
+ __version__ = "1.3.2"
18
 
19
  app = FastAPI(title="RAG API", version=__version__)
20
 
21
+ # ───────────────────────── CORS ─────────────────────────
22
  app.add_middleware(
23
  CORSMiddleware,
24
+ allow_origins=["*"], # tighten if needed
25
  allow_credentials=True,
26
  allow_methods=["*"],
27
  allow_headers=["*"],
28
  )
29
 
30
+ # ──────────────────── Core singleton & metrics ────────────────────
31
  rag = SimpleRAG()
32
 
33
+ METRICS: Dict[str, Any] = {
34
+ "questions_answered": 0,
35
+ "avg_ms": 0.0,
36
+ "last7_questions": [5, 8, 12, 7, 15, 11, 9], # placeholder sample
37
+ "last_added_chunks": 0,
38
+ }
39
+ HISTORY: List[Dict[str, Any]] = [] # [{"question":..., "timestamp":...}]
40
+
41
+ # ───────────────────────── Models ─────────────────────────
42
  class UploadResponse(BaseModel):
43
+ message: str
44
  filename: str
45
  chunks_added: int
46
+ total_chunks: int
47
 
48
  class AskRequest(BaseModel):
49
+ question: str = Field(min_length=3)
50
+ top_k: int = 5
51
+ # Optional routing hint: "all" (default) or "last"
52
+ scope: str = Field(default="all", pattern="^(all|last)$")
53
 
54
  class AskResponse(BaseModel):
55
  answer: str
56
  contexts: List[str]
57
+ used_top_k: int
 
 
 
58
 
59
  class HistoryResponse(BaseModel):
60
  total_chunks: int
61
+ history: List[Dict[str, Any]]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
+ # ───────────────────────── Routes ─────────────────────────
 
 
 
 
 
64
  @app.get("/")
65
  def root():
66
  return RedirectResponse(url="/docs")
 
69
  def health():
70
  return {
71
  "status": "ok",
72
+ "version": __version__,
73
  "summarizer": "extractive_en + translate + keyword_fallback",
74
+ "faiss_ntotal": getattr(rag.index, "ntotal", 0),
75
+ "model_dim": getattr(rag, "embed_dim", None),
76
  }
77
 
78
  @app.get("/debug/translate")
79
  def debug_translate():
80
+ """
81
+ Simple smoke test for the AZ→EN translator pipeline (if available).
82
+ """
83
  try:
84
+ from transformers import pipeline # type: ignore
85
+ tr = pipeline(
86
+ "translation",
87
+ model="Helsinki-NLP/opus-mt-az-en",
88
+ cache_dir=str(rag.cache_dir),
89
+ device=-1,
90
+ )
91
  out = tr("Sənəd təmiri və quraşdırılması ilə bağlı işlər görülüb.", max_length=80)[0]["translation_text"]
92
  return {"ok": True, "example_out": out}
93
  except Exception as e:
94
+ return {"ok": False, "error": str(e)}
95
 
96
  @app.post("/upload_pdf", response_model=UploadResponse)
97
+ def upload_pdf(file: UploadFile = File(...)):
98
+ """
99
+ Accepts a PDF, extracts text, embeds, and adds to FAISS index.
100
+ """
101
+ name = file.filename or "uploaded.pdf"
102
+ if not name.lower().endswith(".pdf"):
103
+ raise HTTPException(status_code=400, detail="Only .pdf files are accepted.")
104
+
105
+ dest = UPLOAD_DIR / name
106
+ try:
107
+ # Save whole file to disk
108
+ data = file.file.read()
109
+ if not data:
110
+ raise HTTPException(status_code=400, detail="Empty file.")
111
+ dest.write_bytes(data)
112
+ except HTTPException:
113
+ raise
114
+ except Exception as e:
115
+ raise HTTPException(status_code=500, detail=f"Failed to save PDF: {e}")
116
 
117
+ try:
118
+ added = rag.add_pdf(dest)
119
+ if added == 0:
120
+ raise HTTPException(status_code=400, detail="No extractable text found (likely a scanned PDF).")
121
+ except HTTPException:
122
+ raise
123
+ except Exception as e:
124
+ raise HTTPException(status_code=500, detail=f"Indexing failed: {e}")
125
 
126
+ METRICS["last_added_chunks"] = int(added)
127
+ return UploadResponse(
128
+ message="indexed",
129
+ filename=name,
130
+ chunks_added=added,
131
+ total_chunks=len(rag.chunks),
132
+ )
133
 
134
  @app.post("/ask_question", response_model=AskResponse)
135
+ def ask_question(req: AskRequest):
136
+ """
137
+ Retrieves top_k contexts and synthesizes an extractive answer.
138
+ Supports optional scope hint: "all" or "last".
139
+ """
140
+ q = (req.question or "").strip()
141
+ if len(q) < 3:
142
+ raise HTTPException(status_code=400, detail="Question is too short.")
143
+
144
+ start = time.perf_counter()
145
+
146
+ # Prefer calling with scope if rag_system supports it; otherwise fallback.
147
  try:
148
+ pairs = rag.search(q, k=req.top_k, scope=req.scope) # type: ignore[arg-type]
149
+ except TypeError:
150
+ pairs = rag.search(q, k=req.top_k)
 
 
151
 
152
+ contexts = [t for (t, _) in pairs]
153
+ answer = rag.synthesize_answer(q, contexts, max_sentences=4)
 
 
 
 
 
154
 
155
+ # metrics
156
+ elapsed_ms = (time.perf_counter() - start) * 1000.0
157
+ METRICS["questions_answered"] += 1
158
+ n = METRICS["questions_answered"]
159
+ METRICS["avg_ms"] = (METRICS["avg_ms"] * (n - 1) + elapsed_ms) / n
160
 
161
+ # history (cap to last 200)
162
+ HISTORY.append({
163
+ "question": q,
164
+ "timestamp": datetime.now(timezone.utc).isoformat(timespec="seconds"),
165
+ })
166
+ if len(HISTORY) > 200:
167
+ del HISTORY[: len(HISTORY) - 200]
168
 
169
+ return AskResponse(answer=answer, contexts=contexts, used_top_k=int(req.top_k))
 
 
170
 
171
  @app.get("/get_history", response_model=HistoryResponse)
172
  def get_history():
173
+ return {"total_chunks": len(rag.chunks), "history": HISTORY[-50:]}
 
 
 
174
 
175
  @app.get("/stats")
176
+ def stats():
177
  return {
178
+ "documents_indexed": len(list(UPLOAD_DIR.glob("*.pdf"))),
179
+ "questions_answered": METRICS["questions_answered"],
180
+ "avg_ms": round(float(METRICS["avg_ms"]), 2),
181
+ "last7_questions": METRICS.get("last7_questions", []),
182
  "total_chunks": len(rag.chunks),
183
+ "faiss_ntotal": getattr(rag.index, "ntotal", 0),
184
+ "model_dim": getattr(rag, "embed_dim", None),
185
+ "last_added_chunks": METRICS.get("last_added_chunks", 0),
186
+ "version": __version__,
187
  }
188
 
189
  @app.post("/reset_index")
 
192
  rag.index = faiss.IndexFlatIP(rag.embed_dim)
193
  rag.chunks = []
194
  rag.last_added = []
195
+ # remove persisted files if present
196
+ (INDEX_DIR / "faiss.index").unlink(missing_ok=True)
197
+ (INDEX_DIR / "meta.npy").unlink(missing_ok=True)
198
+ # persist empty state
199
+ rag._persist()
200
+ return {"message": "index reset", "ntotal": getattr(rag.index, "ntotal", 0)}
 
 
 
 
 
201
  except Exception as e:
202
  raise HTTPException(status_code=500, detail=str(e))