Spaces:

GenAIDevTOProd
/

rag-as-a-service

Sleeping

File size: 4,622 Bytes

import gradio as gr
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer

# --- minimal core (in-memory only) ---
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
_model = SentenceTransformer(MODEL_NAME)
_dim = int(_model.encode(["_probe_"], convert_to_numpy=True).shape[1])  # 384

_index = faiss.IndexFlatIP(_dim)  # cosine via L2-normalized IP
_ids, _texts, _metas = [], [], []

def _normalize(v: np.ndarray) -> np.ndarray:
    n = np.linalg.norm(v, axis=1, keepdims=True) + 1e-12
    return (v / n).astype("float32")

def _chunk(text: str, size: int, overlap: int):
    t = " ".join((text or "").split())
    n = len(t); s = 0; out = []
    if overlap >= size: overlap = max(size - 1, 0)
    while s < n:
        e = min(s + size, n)
        out.append((t[s:e], s, e))
        if e == n: break
        s = max(e - overlap, 0)
    return out

def reset():
    global _index, _ids, _texts, _metas
    _index = faiss.IndexFlatIP(_dim)
    _ids, _texts, _metas = [], [], []
    return gr.update(value="Index reset."), gr.update(value=0)

def load_sample():
    docs = [
        ("a", "PySpark scales ETL across clusters.", {"tag":"spark"}),
        ("b", "FAISS powers fast vector similarity search used in retrieval.", {"tag":"faiss"})
    ]
    return "\n".join([d[1] for d in docs])

def ingest(docs_text, size, overlap):
    if not docs_text.strip():
        return "Provide at least one line of text.", len(_ids)
    # one document per line
    lines = [ln.strip() for ln in docs_text.splitlines() if ln.strip()]
    rows = []
    for i, ln in enumerate(lines):
        pid = f"doc-{len(_ids)}-{i}"
        for ctext, s, e in _chunk(ln, size, overlap):
            rows.append((f"{pid}::offset:{s}-{e}", ctext, {"parent_id": pid, "start": s, "end": e}))
    if not rows:
        return "No chunks produced.", len(_ids)
    vecs = _normalize(_model.encode([r[1] for r in rows], convert_to_numpy=True))
    _index.add(vecs)
    for rid, txt, meta in rows:
        _ids.append(rid); _texts.append(txt); _metas.append(meta)
    return f"Ingested docs={len(lines)} chunks={len(rows)}", len(_ids)

def answer(q, k, max_context_chars):
    if _index.ntotal == 0:
        return {"answer": "Index is empty. Ingest first.", "matches": []}
    qv = _normalize(_model.encode([q], convert_to_numpy=True))
    D, I = _index.search(qv, int(k))

    matches = []
    for i, s in zip(I[0].tolist(), D[0].tolist()):
        if i < 0:
            continue
        matches.append({
            "id": _ids[i],
            "score": float(s),
            "text": _texts[i],
            "meta": _metas[i]
        })

    if not matches:
        out = "No relevant context."
    else:
        # 👇 only use the top match for the answer
        top = matches[0]["text"]
        out = f"Based on retrieved context:\n- {top}"

    return {"answer": out, "matches": matches}

with gr.Blocks(title="RAG-as-a-Service") as demo:
    gr.Markdown("### RAG-as-a-Service - Gradio\nIn-memory FAISS + MiniLM\n; one-line-per-doc ingest\n; quick answers.")

    with gr.Row():
        with gr.Column():
            docs = gr.Textbox(label="Documents (one per line)", lines=6, placeholder="One document per line…")
            with gr.Row():
                chunk_size = gr.Slider(64, 1024, value=256, step=16, label="Chunk size")
                overlap = gr.Slider(0, 256, value=32, step=8, label="Overlap")
            with gr.Row():
                ingest_btn = gr.Button("Ingest")
                sample_btn = gr.Button("Load sample")
                reset_btn = gr.Button("Reset")
            ingest_status = gr.Textbox(label="Ingest status", interactive=False)
            index_size = gr.Number(label="Index size", interactive=False, value=0)
        with gr.Column():
            q = gr.Textbox(label="Query", placeholder="Ask something...")
            k = gr.Slider(1, 10, value=5, step=1, label="Top-K")
            max_chars = gr.Slider(200, 4000, value=1000, step=100, label="Max context chars")
            run = gr.Button("Answer")
            out = gr.JSON(label="Answer + matches")

    ingest_btn.click(
    ingest,
    [docs, chunk_size, overlap],
    [ingest_status, index_size],
    api_name="ingest"      # exposes POST /api/ingest
)
    sample_btn.click(load_sample, None, docs)
    reset_btn.click(
    reset,
    None,
    [ingest_status, index_size],
    api_name="reset"       # exposes POST /api/reset (optional)
)
    run.click(
    answer,
    [q, k, max_chars],
    out,
    api_name="answer"      # exposes POST /api/answer
)

if __name__ == "__main__":
    demo.launch(share=True)