Spaces:

07Codex07
/

PrepGraph-Backend

Running

App Files Files Community

07Codex07 commited on Nov 2, 2025

Commit

30f67dd

1 Parent(s): 463a7b5

Initial PrepGraph backend

Browse files

Files changed (5) hide show

chatbot_graph.py +210 -0
chatbot_retriever.py +417 -0
main_api.py +309 -0
memory_store.py +110 -0
requirements.txt +10 -0

chatbot_graph.py ADDED Viewed

	@@ -0,0 +1,210 @@

+# chatbot_graph.py
+import os
+from dotenv import load_dotenv
+import gradio as gr
+import logging
+from typing import List
+load_dotenv()
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+# LLM client (Groq wrapper)
+try:
+    from langchain_groq import ChatGroq
+except Exception:
+    ChatGroq = None
+    logger.warning("langchain_groq.ChatGroq not importable. Ensure langchain-groq is installed in requirements.")
+from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
+from chatbot_retriever import retrieve_node_from_rows
+from memory_store import init_db, save_message, get_last_messages, build_gradio_history
+# initialize DB early
+init_db()
+# Instantiate Groq LLM (will require GROQ_API_KEY in env)
+GROQ_MODEL = os.getenv("GROQ_MODEL", "llama-3.1-8b-instant")
+GROQ_API_KEY = os.getenv("GROQ_API_KEY", None)
+GROQ_TEMP = float(os.getenv("GROQ_TEMP", "0.2"))
+if ChatGroq:
+    llm = ChatGroq(model=GROQ_MODEL, api_key=GROQ_API_KEY, temperature=GROQ_TEMP)
+else:
+    llm = None
+def _extract_answer_from_response(response):
+    # robust extraction similar to your previous helper - simplified
+    try:
+        if hasattr(response, "content"):
+            c = response.content
+            if isinstance(c, str) and c.strip():
+                return c.strip()
+            if isinstance(c, (list, tuple)):
+                parts = [str(x) for x in c if x is not None]
+                if parts:
+                    return "".join(parts).strip()
+            if isinstance(c, dict):
+                for key in ("answer", "text", "content", "output_text", "generated_text"):
+                    v = c.get(key)
+                    if v:
+                        if isinstance(v, (list, tuple)):
+                            return "".join([str(x) for x in v]).strip()
+                        return str(v).strip()
+        if isinstance(response, dict):
+            for key in ("answer", "text", "content"):
+                v = response.get(key)
+                if v:
+                    return str(v)
+            choices = response.get("choices") or response.get("outputs")
+            if isinstance(choices, (list, tuple)) and choices:
+                first = choices[0]
+                if isinstance(first, dict):
+                    msg = first.get("message") or first.get("text") or first.get("content")
+                    if msg:
+                        if isinstance(msg, (list, tuple)):
+                            return "".join([str(x) for x in msg])
+                        return str(msg)
+        if hasattr(response, "generations"):
+            gens = getattr(response, "generations")
+            if gens:
+                for outer in gens:
+                    for g in outer:
+                        if hasattr(g, "text") and g.text:
+                            return str(g.text)
+                        if hasattr(g, "message") and getattr(g.message, "content", None):
+                            return str(g.message.content)
+        s = str(response)
+        if s and s.strip():
+            return s.strip()
+    except Exception:
+        logger.exception("Failed extracting answer")
+    return None
+SYSTEM_PROMPT = (
+    "You are PrepGraph — an accurate, concise AI tutor specialized in academic and technical content.\n"
+    "Rules:\n"
+    "1) Always prioritize answering the CURRENT user question directly and clearly.\n"
+    "2) Refer to provided CONTEXT (delimited below) if relevant. Cite which doc (filename) or say 'from provided context' when applicable.\n"
+    "3) If the current query is unclear, use ONLY the immediate previous user question to infer intent — not older ones.\n"
+    "4) Provide step-by-step explanations when appropriate, using short, structured points.\n"
+    "5) Include ASCII diagrams or flowcharts if they help understanding (e.g., for protocols, layers, architectures, etc.).\n"
+    "6) If the context is insufficient or ambiguous, clearly say 'I’m unsure' and specify what extra information is needed.\n"
+    "7) Avoid repetition, speculation, and hallucination — answer precisely what is asked.\n\n"
+    "CONTEXT:\n"
+)
+# ---- helper: call the LLM with a list of messages (SystemMessage + HumanMessage...) ----
+def call_llm(messages: List):
+    if not llm:
+        raise RuntimeError("LLM client (ChatGroq) not configured or import failed. Set up langchain_groq and GROQ_API_KEY.")
+    # many wrappers accept the langchain message objects; keep using llm.invoke
+    response = llm.invoke(messages)
+    return response
+# ---- Gradio UI functions ----
+def load_history(user_id: str):
+    uid = (user_id or os.getenv("DEFAULT_USER", "vinayak")).strip() or "vinayak"
+    try:
+        hist = build_gradio_history(uid)
+        logger.info("Loaded %d messages for user %s", len(hist), uid)
+        return hist
+    except Exception:
+        logger.exception("Failed to load history for %s", uid)
+        return []
+def chat_interface(user_input: str, chat_state: List[dict], user_id: str):
+    """
+    Receives user_input (string), chat_state (list of {'role':..., 'content':...}),
+    user_id (string). Returns: (clear_input_str, new_chat_state)
+    """
+    uid = (user_id or os.getenv("DEFAULT_USER", "vinayak")).strip() or "vinayak"
+    history = chat_state or []
+    # Save user's message immediately
+    try:
+        save_message(uid, "user", user_input)
+    except Exception:
+        logger.exception("Failed to persist user message")
+    # Build rows to pass to retriever: get last messages from DB (ensures persistence)
+    rows = get_last_messages(uid, limit=200)  # chronological order
+    # Retrieve context using hybrid retriever (uses last 3 user messages internally)
+    try:
+        retrieved = retrieve_node_from_rows(rows)
+        context = retrieved.get("context")
+    except Exception:
+        logger.exception("Retriever failed")
+        context = None
+    # Build prompt: SystemMessage + last 3 user messages (HumanMessage)
+    prompt_msgs = []
+    system_content = SYSTEM_PROMPT + (context or "No context found.")
+    prompt_msgs.append(SystemMessage(content=system_content))
+    # collect last 3 user messages (from rows)
+    last_users = [r[1] for r in rows if r[0] == "user"][-3:]
+    if not last_users:
+        # fallback to current input if DB empty
+        last_users = [user_input]
+    # append each of the last user messages as HumanMessage (preserves order)
+    for u in last_users:
+        prompt_msgs.append(HumanMessage(content=u))
+    # send to LLM
+    try:
+        raw = call_llm(prompt_msgs)
+        answer = _extract_answer_from_response(raw) or ""
+    except Exception as e:
+        logger.exception("LLM call failed")
+        answer = f"Sorry — I couldn't process that right now ({e})."
+    # persist assistant reply
+    try:
+        save_message(uid, "assistant", answer)
+    except Exception:
+        logger.exception("Failed to persist assistant message")
+    # update gradio chat state: append current user and assistant
+    history = history or load_history(uid)  # in case front-end was empty, rehydrate
+    history.append({"role": "user", "content": user_input})
+    history.append({"role": "assistant", "content": answer})
+    # return: clear the input box (""), updated history for gr.Chatbot(type="messages")
+    return "", history
+# ---- Minimal / attractive Gradio UI ----
+with gr.Blocks(css=".gradio-container {max-width:900px; margin:0 auto;}") as demo:
+    gr.Markdown("# 🤖 PrepGraph — RAG Tutor")
+    with gr.Row():
+        user_id_input = gr.Textbox(label="User ID (will be used to persist your memory)", value=os.getenv("DEFAULT_USER", "vinayak"))
+    chatbot = gr.Chatbot(label="Conversation", type="messages")
+    with gr.Row():
+        msg = gr.Textbox(placeholder="Ask anything about your course material...", show_label=False)
+        send = gr.Button("Send")
+    with gr.Row():
+        clear_ui = gr.Button("Clear Chat")
+    # Load history at page load (and when user_id changes)
+    demo.load(load_history, [user_id_input], [chatbot])
+    user_id_input.change(load_history, [user_id_input], [chatbot])
+    # Bind send
+    msg.submit(chat_interface, [msg, chatbot, user_id_input], [msg, chatbot])
+    send.click(chat_interface, [msg, chatbot, user_id_input], [msg, chatbot])
+    # just clears the UI, not the DB
+    clear_ui.click(lambda: [], None, chatbot)
+if __name__ == "__main__":
+    demo.launch()

chatbot_retriever.py ADDED Viewed

	@@ -0,0 +1,417 @@

+# chatbot_retriever.py
+"""
+Hybrid retriever:
+ - loads PDFs & PPTX (robust imports)
+ - chunks via RecursiveCharacterTextSplitter
+ - BM25 (rank_bm25) + FAISS (IVF when possible) using SentenceTransformers
+ - returns a combined context string limited by MAX_CONTEXT_CHARS
+"""
+import os
+import re
+import pickle
+import logging
+import shutil
+import random
+from typing import List, Optional, Dict, Any
+import numpy as np
+import faiss
+from rank_bm25 import BM25Okapi
+from langchain_community.document_loaders import UnstructuredFileLoader
+# Document loaders: try langchain first, then community loader
+try:
+    from langchain.document_loaders import PyPDFLoader, UnstructuredPowerPointLoader
+except Exception:
+    # fallback to community package (older installations)
+    try:
+        from langchain_community.document_loaders import PyPDFLoader, UnstructuredPowerPointLoader
+        from langchain_community.document_loaders.powerpoint import UnstructuredPowerPointLoader
+    except Exception:
+        raise ImportError("Please install langchain + langchain-community (or upgrade).")
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from sentence_transformers import SentenceTransformer
+# ---------- Config ----------
+DATA_DIR = os.getenv("DATA_DIR", "data")
+CACHE_DIR = os.getenv("CACHE_DIR", ".ragg_cache")
+CHUNKS_CACHE = os.path.join(CACHE_DIR, "chunks.pkl")
+BM25_CACHE = os.path.join(CACHE_DIR, "bm25.pkl")
+FAISS_DIR = os.getenv("FAISS_DIR", "faiss_index")
+FAISS_INDEX_PATH = os.path.join(FAISS_DIR, "index.faiss")
+FAISS_META_PATH = os.path.join(FAISS_DIR, "meta.pkl")
+os.makedirs(CACHE_DIR, exist_ok=True)
+os.makedirs(FAISS_DIR, exist_ok=True)
+CHUNK_SIZE = int(os.getenv("CHUNK_SIZE", 400))
+CHUNK_OVERLAP = int(os.getenv("CHUNK_OVERLAP", 80))
+EMBED_MODEL = os.getenv("EMBED_MODEL", "all-MiniLM-L6-v2")
+TOP_K_DOCS = int(os.getenv("TOP_K_DOCS", 3))
+MAX_CONTEXT_CHARS = int(os.getenv("MAX_CONTEXT_CHARS", 4000))
+# FAISS params
+BATCH_SIZE = int(os.getenv("BATCH_SIZE", 256))
+FAISS_NLIST = int(os.getenv("FAISS_NLIST", 100))
+FAISS_TRAIN_SIZE = int(os.getenv("FAISS_TRAIN_SIZE", 2000))
+FAISS_NPROBE = int(os.getenv("FAISS_NPROBE", 10))
+SEARCH_EXPANSION = int(os.getenv("FAISS_SEARCH_EXPANSION", 5))
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+def detect_subject(fname: str) -> Optional[str]:
+    # light heuristic to guess subject code from filename
+    t = (fname or "").lower()
+    if "network" in t or "cn" in t:
+        return "cn"
+    if "distributed" in t or "dos" in t:
+        return "dos"
+    if "software" in t or "se" in t:
+        return "se"
+    return None
+def extract_year(s: str) -> Optional[str]:
+    m = re.search(r"\b(20\d{2})\b", s)
+    return m.group(1) if m else None
+# ---------- Embeddings wrapper (SentenceTransformers) ----------
+class Embeddings:
+    def __init__(self, model_name=EMBED_MODEL):
+        self.model_name = model_name
+        self.model = SentenceTransformer(model_name)
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        vecs = self.model.encode(texts, show_progress_bar=False, convert_to_numpy=True)
+        return [v.astype("float32") for v in vecs]
+    def embed_query(self, text: str) -> List[float]:
+        v = self.model.encode([text], show_progress_bar=False, convert_to_numpy=True)[0]
+        return v.astype("float32")
+# ---------- Load documents ----------
+def load_all_docs(base_dir: str = DATA_DIR) -> List:
+    docs = []
+    if not os.path.isdir(base_dir):
+        logger.warning("Data dir does not exist: %s", base_dir)
+        return docs
+    def load_file(path: str, filename: str, category: str):
+        try:
+            fname = filename.lower()
+            if fname.endswith(".pdf"):
+                loader = PyPDFLoader(path)
+            elif fname.endswith(".pptx"):
+                loader = UnstructuredPowerPointLoader(path)
+            else:
+                return []
+            file_docs = loader.load()
+            subject = detect_subject(fname)
+            year = extract_year(fname)
+            for d in file_docs:
+                d.metadata["subject"] = subject
+                d.metadata["filename"] = filename
+                d.metadata["category"] = category
+                if year:
+                    d.metadata["year"] = year
+            return file_docs
+        except Exception:
+            logger.exception("Failed to load %s", filename)
+            return []
+    # root files
+    for file in os.listdir(base_dir):
+        path = os.path.join(base_dir, file)
+        if os.path.isfile(path) and (file.lower().endswith(".pdf") or file.lower().endswith(".pptx")):
+            docs.extend(load_file(path, file, "syllabus"))
+    # optional pyqs directory
+    pyqs_dir = os.path.join(base_dir, "pyqs")
+    if os.path.isdir(pyqs_dir):
+        for file in os.listdir(pyqs_dir):
+            path = os.path.join(pyqs_dir, file)
+            if os.path.isfile(path) and file.lower().endswith(".pdf"):
+                docs.extend(load_file(path, file, "pyq"))
+    logger.info("Loaded %d raw document pages", len(docs))
+    return docs
+# ---------- Build / load FAISS + BM25 ----------
+def build_or_load_indexes(force_reindex: bool = False):
+    if os.getenv("FORCE_REINDEX", "0").lower() in ("1", "true", "yes"):
+        force_reindex = True
+    docs = load_all_docs(DATA_DIR)
+    if not docs:
+        logger.warning("No documents found. Returning empty indexes.")
+        return [], None, [], [], None
+    # chunking
+    if os.path.exists(CHUNKS_CACHE) and not force_reindex:
+        with open(CHUNKS_CACHE, "rb") as f:
+            chunks = pickle.load(f)
+        logger.info("Loaded %d chunks from cache.", len(chunks))
+    else:
+        splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
+        chunks = splitter.split_documents(docs)
+        with open(CHUNKS_CACHE, "wb") as f:
+            pickle.dump(chunks, f)
+        logger.info("Created and cached %d chunks.", len(chunks))
+    corpus_texts = [c.page_content for c in chunks]
+    # BM25
+    if os.path.exists(BM25_CACHE) and not force_reindex:
+        try:
+            with open(BM25_CACHE, "rb") as f:
+                bm25_data = pickle.load(f)
+            bm25 = bm25_data.get("bm25")
+            tokenized = bm25_data.get("tokenized", [])
+            logger.info("Loaded BM25 from cache (n=%d)", len(corpus_texts))
+        except Exception:
+            logger.exception("Failed to load BM25 cache — rebuilding")
+            tokenized = [re.findall(r"\w+", t.lower()) for t in corpus_texts]
+            bm25 = BM25Okapi(tokenized)
+            with open(BM25_CACHE, "wb") as f:
+                pickle.dump({"bm25": bm25, "tokenized": tokenized}, f)
+    else:
+        tokenized = [re.findall(r"\w+", t.lower()) for t in corpus_texts]
+        bm25 = BM25Okapi(tokenized)
+        try:
+            with open(BM25_CACHE, "wb") as f:
+                pickle.dump({"bm25": bm25, "tokenized": tokenized}, f)
+        except Exception:
+            logger.warning("Could not write BM25 cache")
+    # Embeddings
+    embeddings = Embeddings()
+    metadatas = [c.metadata for c in chunks]
+    # load existing faiss index
+    if os.path.exists(FAISS_INDEX_PATH) and os.path.exists(FAISS_META_PATH) and not force_reindex:
+        try:
+            index = faiss.read_index(FAISS_INDEX_PATH)
+            with open(FAISS_META_PATH, "rb") as f:
+                meta = pickle.load(f)
+            texts = meta.get("texts", corpus_texts)
+            try:
+                index.nprobe = FAISS_NPROBE
+            except Exception:
+                pass
+            logger.info("Loaded FAISS index from disk (%s), entries=%d", FAISS_INDEX_PATH, len(texts))
+            return chunks, bm25, tokenized, corpus_texts, {"index": index, "texts": texts, "metadatas": metadatas, "embeddings": embeddings}
+        except Exception:
+            logger.exception("Failed to load FAISS index; rebuilding")
+    # force reindex cleanup
+    if force_reindex:
+        try:
+            shutil.rmtree(FAISS_DIR, ignore_errors=True)
+            os.makedirs(FAISS_DIR, exist_ok=True)
+        except Exception:
+            pass
+    # Build FAISS (memory-aware, batch)
+    logger.info("Building FAISS index (nlist=%d). This may take a while...", FAISS_NLIST)
+    total = len(corpus_texts)
+    sample_size = min(total, FAISS_TRAIN_SIZE)
+    sample_indices = random.sample(range(total), sample_size) if sample_size < total else list(range(total))
+    sample_embs = []
+    for i in range(0, len(sample_indices), BATCH_SIZE):
+        batch_idx = sample_indices[i:i + BATCH_SIZE]
+        batch_texts = [corpus_texts[j] for j in batch_idx]
+        try:
+            batch_vecs = embeddings.embed_documents(batch_texts)
+        except Exception:
+            batch_vecs = [embeddings.embed_query(t) for t in batch_texts]
+        sample_embs.extend(batch_vecs)
+    sample_np = np.array(sample_embs, dtype="float32")
+    if sample_np.ndim == 1:
+        sample_np = sample_np.reshape(1, -1)
+    d = sample_np.shape[1]
+    n_train_samples = sample_np.shape[0]
+    use_ivf = True
+    if n_train_samples < FAISS_NLIST:
+        logger.warning("Not enough training samples (%d) for FAISS_NLIST=%d — using Flat index", n_train_samples, FAISS_NLIST)
+        use_ivf = False
+    try:
+        if use_ivf:
+            index_desc = f"IVF{FAISS_NLIST},Flat"
+            index = faiss.index_factory(d, index_desc, faiss.METRIC_L2)
+            if not index.is_trained:
+                try:
+                    index.train(sample_np)
+                    logger.info("Trained IVF on %d samples", n_train_samples)
+                except Exception:
+                    logger.exception("IVF training failed — falling back to Flat")
+                    index = faiss.index_factory(d, "Flat", faiss.METRIC_L2)
+        else:
+            index = faiss.index_factory(d, "Flat", faiss.METRIC_L2)
+    except Exception:
+        logger.exception("Failed to create FAISS index — using Flat")
+        index = faiss.index_factory(d, "Flat", faiss.METRIC_L2)
+    # add vectors in batches
+    added = 0
+    for i in range(0, total, BATCH_SIZE):
+        batch_texts = corpus_texts[i:i + BATCH_SIZE]
+        try:
+            batch_vecs = embeddings.embed_documents(batch_texts)
+        except Exception:
+            batch_vecs = [embeddings.embed_query(t) for t in batch_texts]
+        batch_np = np.array(batch_vecs, dtype="float32")
+        if batch_np.ndim == 1:
+            batch_np = batch_np.reshape(1, -1)
+        index.add(batch_np)
+        added += batch_np.shape[0]
+        logger.info("FAISS: added %d / %d vectors", added, total)
+    try:
+        index.nprobe = FAISS_NPROBE
+    except Exception:
+        pass
+    try:
+        faiss.write_index(index, FAISS_INDEX_PATH)
+        with open(FAISS_META_PATH, "wb") as f:
+            pickle.dump({"texts": corpus_texts}, f)
+        logger.info("FAISS index saved to %s (entries=%d)", FAISS_INDEX_PATH, total)
+    except Exception:
+        logger.exception("Failed to persist FAISS index on disk")
+    return chunks, bm25, tokenized, corpus_texts, {"index": index, "texts": corpus_texts, "metadatas": metadatas, "embeddings": embeddings}
+# ---------- Hybrid retrieve ----------
+def _ensure_index_built():
+    if not hasattr(hybrid_retrieve, "_index_built") or not hybrid_retrieve._index_built:
+        hybrid_retrieve._chunks, hybrid_retrieve._bm25, hybrid_retrieve._tokenized, hybrid_retrieve._corpus, hybrid_retrieve._faiss = build_or_load_indexes()
+        hybrid_retrieve._index_built = True
+def _faiss_search(query: str, top_k: int = TOP_K_DOCS, subject: Optional[str] = None):
+    faiss_data = hybrid_retrieve._faiss
+    if not faiss_data:
+        return []
+    index = faiss_data.get("index")
+    texts = faiss_data.get("texts", [])
+    metadatas = faiss_data.get("metadatas", [{}] * len(texts))
+    embeddings = faiss_data.get("embeddings")
+    try:
+        q_vec = embeddings.embed_query(query)
+    except Exception:
+        q_vec = embeddings.embed_documents([query])[0]
+    q_np = np.array(q_vec, dtype="float32").reshape(1, -1)
+    search_k = max(top_k * SEARCH_EXPANSION, top_k)
+    try:
+        distances, indices = index.search(q_np, int(search_k))
+    except Exception:
+        distances, indices = index.search(q_np, int(top_k))
+    results = []
+    for dist, idx in zip(distances[0], indices[0]):
+        if idx < 0 or idx >= len(texts):
+            continue
+        meta = metadatas[idx]
+        if subject and meta.get("subject") != subject:
+            continue
+        score_like = float(-dist)
+        results.append((score_like, meta, texts[idx]))
+        if len(results) >= top_k:
+            break
+    return results
+def hybrid_retrieve(query: str, subject: Optional[str] = None, top_k: int = TOP_K_DOCS, max_chars: int = MAX_CONTEXT_CHARS) -> Dict[str, Any]:
+    if not query:
+        return {"context": None, "bm25_docs": [], "faiss_docs": [], "meta": []}
+    _ensure_index_built()
+    chunks = hybrid_retrieve._chunks
+    bm25 = hybrid_retrieve._bm25
+    # BM25
+    results_bm25 = []
+    try:
+        if bm25:
+            q_tokens = re.findall(r"\w+", query.lower())
+            scores = bm25.get_scores(q_tokens)
+            ranked_idx = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:top_k]
+            for i in ranked_idx:
+                results_bm25.append((float(scores[i]), chunks[i].metadata, chunks[i].page_content))
+    except Exception:
+        logger.exception("BM25 search failed")
+    # FAISS
+    results_faiss = []
+    try:
+        results_faiss = _faiss_search(query, top_k=top_k, subject=subject)
+    except Exception:
+        logger.exception("FAISS search failed")
+    # Merge and dedupe by text
+    merged_texts = []
+    merged_meta = []
+    for score, meta, text in results_bm25:
+        if text and text.strip() and text not in merged_texts:
+            merged_texts.append(text)
+            merged_meta.append({"source": meta.get("filename"), "subject": meta.get("subject"), "score": score})
+    for score, meta, text in results_faiss:
+        if text and text.strip() and text not in merged_texts:
+            merged_texts.append(text)
+            merged_meta.append({"source": meta.get("filename") if isinstance(meta, dict) else None, "subject": meta.get("subject") if isinstance(meta, dict) else None, "score": score})
+    # compose context parts with headers
+    context_parts = []
+    for i, t in enumerate(merged_texts):
+        header = f"\n\n===== DOC {i+1} =====\n"
+        context_parts.append(header + t)
+    context = "\n".join(context_parts).strip()
+    if not context:
+        return {"context": None, "bm25_docs": results_bm25, "faiss_docs": results_faiss, "meta": merged_meta}
+    if len(context) > max_chars:
+        context = context[:max_chars].rstrip() + "..."
+    return {"context": context, "bm25_docs": results_bm25, "faiss_docs": results_faiss, "meta": merged_meta}
+# ---------- retrieve_node (for reuse) ----------
+def _last_n_user_messages(rows: List[tuple], n: int = 3) -> List[str]:
+    """Return only the latest user message for retrieval context."""
+    users = [r[1] for r in rows if r[0] == "user"]
+    return users[-n:]  # only keep the last one
+def retrieve_node_from_rows(rows: List[tuple], top_k: int = TOP_K_DOCS) -> Dict[str, Any]:
+    last_users = _last_n_user_messages(rows, n=3)
+    current_query = " ".join(last_users).strip() if last_users else ""
+    if not current_query:
+        return {"context": None, "direct": False}
+    detected = None
+    try:
+        detected = detect_subject(current_query)
+    except Exception:
+        detected = None
+    result = hybrid_retrieve(current_query, subject=detected, top_k=top_k, max_chars=MAX_CONTEXT_CHARS)
+    return {"context": result.get("context"), "direct": False}

main_api.py ADDED Viewed

	@@ -0,0 +1,309 @@

+# main_api.py
+import os
+import logging
+import traceback
+from typing import Optional, List, Dict, Any
+import tiktoken
+from fastapi import FastAPI, HTTPException, BackgroundTasks, UploadFile, File, Form
+from fastapi.responses import JSONResponse, FileResponse
+from fastapi.staticfiles import StaticFiles
+from pydantic import BaseModel
+import uvicorn
+# import your existing modules (assumed in same directory)
+from memory_store import init_db, save_message, get_last_messages, clear_user_memory, build_gradio_history  # :contentReference[oaicite:4]{index=4}
+from chatbot_retriever import build_or_load_indexes, hybrid_retrieve, retrieve_node_from_rows, load_all_docs  # :contentReference[oaicite:5]{index=5}
+from chatbot_graph import SYSTEM_PROMPT, call_llm, _extract_answer_from_response  # :contentReference[oaicite:6]{index=6}
+# ----------------- CORS SETUP -----------------
+from fastapi.middleware.cors import CORSMiddleware
+app = FastAPI(title="RAG Chat Backend", version="1.0")
+from fastapi.middleware.cors import CORSMiddleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=[
+        "http://localhost:5173",
+        "http://127.0.0.1:5173",
+    ],
+    allow_credentials=True,
+    allow_methods=["*"],   # ✅ lowercase 'allow_'
+    allow_headers=["*"],   # ✅ lowercase 'allow_'
+)
+# ------------------------------------------------
+from dotenv import load_dotenv
+load_dotenv()
+logger = logging.getLogger("rag_api")
+logging.basicConfig(level=logging.INFO)
+logger.setLevel(logging.INFO)
+# initialize DB now
+init_db()
+# Global in-memory flag/object to check indexes loaded (populated by build_or_load_indexes)
+INDEXES = {"built": False, "info": None}
+# ---------- Pydantic models ----------
+class ChatRequest(BaseModel):
+    user_id: Optional[str] = None
+    message: str
+class ChatResponse(BaseModel):
+    user_id: str
+    message: str
+    assistant: str
+    history: List[Dict[str, str]]
+class RetrieveResponse(BaseModel):
+    query: str
+    context: Optional[str]
+    meta: List[Dict[str, Any]]
+# ---------- helpers ----------
+def ensure_indexes(force_reindex: bool = False):
+    """
+    Build or load indexes synchronously. This wraps build_or_load_indexes from chatbot_retriever.
+    """
+    if INDEXES["built"] and not force_reindex:
+        return INDEXES["info"]
+    try:
+        chunks, bm25, tokenized, corpus_texts, faiss_data = build_or_load_indexes(force_reindex=force_reindex)
+        INDEXES["built"] = True
+        INDEXES["info"] = {"chunks_len": len(chunks) if chunks else 0, "corpus_len": len(corpus_texts) if corpus_texts else 0}
+        return INDEXES["info"]
+    except Exception:
+        logger.exception("Index build/load failed")
+        raise
+# ===== Token limiter helper =====
+enc = tiktoken.get_encoding("cl100k_base")
+def trim_to_token_limit(texts, limit=4000):
+    """Join text chunks until token limit is reached."""
+    joined = ""
+    for t in texts:
+        if len(enc.encode(joined + t)) > limit:
+            break
+        joined += t + "\n"
+    return joined
+def extract_history_for_frontend(user_id: str, limit: int = 500):
+    return build_gradio_history(user_id)
+# ---------- Routes ----------
+@app.get("/health")
+def health():
+    """Basic health check."""
+    return {"status": "ok", "indexes_built": INDEXES["built"]}
+@app.post("/reindex")
+def reindex(force: Optional[bool] = False):
+    """
+    Force rebuild of indexes. This calls the same build_or_load_indexes used by your retriever module.
+    Use ?force=true to force.
+    """
+    try:
+        info = ensure_indexes(force_reindex=bool(force))
+        return {"status": "ok", "info": info}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to build indexes: {e}")
+@app.post("/upload")
+async def upload_file(file: UploadFile = File(...), category: Optional[str] = Form("syllabus")):
+    """
+    Upload PDF/PPTX into DATA_DIR (same dir used by chatbot_retriever.load_all_docs).
+    After upload you may call /reindex to include the file.
+    """
+    from chatbot_retriever import DATA_DIR  # keep using same constant
+    os.makedirs(DATA_DIR, exist_ok=True)
+    dest_path = os.path.join(DATA_DIR, file.filename)
+    try:
+        with open(dest_path, "wb") as f:
+            content = await file.read()
+            f.write(content)
+        return {"status": "ok", "filename": file.filename, "saved_to": dest_path}
+    except Exception as e:
+        logger.exception("upload failed")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/docs_list")
+def docs_list():
+    """List files in DATA_DIR (documents available to retriever)."""
+    from chatbot_retriever import DATA_DIR
+    if not os.path.isdir(DATA_DIR):
+        return {"files": []}
+    files = [f for f in os.listdir(DATA_DIR) if os.path.isfile(os.path.join(DATA_DIR, f))]
+    return {"files": files}
+@app.get("/retrieve", response_model=RetrieveResponse)
+def retrieve(query: str, subject: Optional[str] = None, top_k: Optional[int] = None):
+    """
+    Directly call the hybrid retriever for a query. Returns context + meta.
+    """
+    try:
+        # ensure indexes built (but don't force)
+        ensure_indexes(force_reindex=False)
+        res = hybrid_retrieve(query=query, subject=subject, top_k=(top_k or None))
+        return {"query": query, "context": res.get("context"), "meta": res.get("meta", [])}
+    except Exception as e:
+        logger.exception("retrieve failed")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/history/{user_id}")
+def get_history(user_id: str, limit: Optional[int] = 500):
+    """Return persisted history for a user (in same format your frontend expects)."""
+    try:
+        hist = extract_history_for_frontend(user_id)
+        if limit:
+            hist = hist[-int(limit):]
+        return {"user_id": user_id, "history": hist}
+    except Exception as e:
+        logger.exception("history fetch failed")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/memory/clear")
+def clear_memory(user_id: str):
+    """Clear stored memory for user."""
+    try:
+        deleted = clear_user_memory(user_id)
+        return {"status": "ok", "deleted_rows": deleted}
+    except Exception as e:
+        logger.exception("clear failed")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/chat", response_model=ChatResponse)
+def chat(req: ChatRequest):
+    """
+    Main chat endpoint.
+    - saves user message
+    - fetches last messages from sqlite memory
+    - runs retriever to get context
+    - builds the system prompt + last 3 user messages
+    - calls the LLM via call_llm (same wrapper imported from chatbot_graph)
+    - saves assistant reply and returns it + updated history
+    """
+    uid = (req.user_id or os.getenv("DEFAULT_USER", "vinayak")).strip() or "vinayak"
+    if not req.message:
+        raise HTTPException(status_code=400, detail="message is required")
+    try:
+        # 1) persist user message
+        save_message(uid, "user", req.message)
+        # 2) get rows (chronological order) for retriever
+        rows = get_last_messages(uid, limit=200)
+        # 3) ensure indexes exist (non-force)
+        try:
+            ensure_indexes(force_reindex=False)
+        except Exception:
+            logger.warning("Indexes not built or failed. retriever may return no context.")
+        # 4) run retrieve_node_from_rows to get context (keeps same logic as your retriever glue)
+        try:
+            retrieved = retrieve_node_from_rows(rows)
+            context = retrieved.get("context")
+        except Exception:
+            logger.exception("retriever call failed")
+            context = None
+        # 5) build system prompt content
+        # ===== Combine retrieval context + last 2 user turns =====
+        MAX_TOKENS_CONTEXT = 3000
+        NUM_RECENT_TURNS = 2   # last 2 user + assistant pairs
+# Get last few messages (both user + assistant)
+        recent_pairs = rows[-(NUM_RECENT_TURNS * 2):]
+        recent_chat = "\n".join([f"{r[0].upper()}: {r[1]}" for r in recent_pairs])
+# Trim context to token-safe limit
+        context_texts = context.split("\n\n") if context else []
+        trimmed_context = trim_to_token_limit(context_texts, limit=MAX_TOKENS_CONTEXT)
+# Final system prompt
+        system_content = SYSTEM_PROMPT
+        if trimmed_context:
+            system_content += "\n\n===== RETRIEVED CONTEXT =====\n" + trimmed_context
+# Always include recent conversation (to maintain chat flow)
+        system_content += "\n\n===== RECENT CHAT =====\n" + recent_chat
+        # build prompt messages as list of simple dicts (call_llm expects same message format as in chatbot_graph)
+        # chatbot_graph.call_llm expects langchain messages (SystemMessage/HumanMessage) — we built that in original file.
+        # create messages as minimal objects that call_llm can accept (we rely on original call_llm).
+        from langchain_core.messages import SystemMessage, HumanMessage  # re-use same message classes
+        prompt_msgs = [SystemMessage(content=system_content)]
+        # collect last 3 user messages
+        last_users = [r[1] for r in rows if r[0] == "user"][-1:]
+        if not last_users:
+            last_users = [req.message]
+        for u in last_users:
+            prompt_msgs.append(HumanMessage(content=u))
+        # 6) call LLM
+        try:
+            raw = call_llm(prompt_msgs)
+            answer = _extract_answer_from_response(raw) or ""
+        except Exception as e:
+            logger.exception("LLM call failed")
+            # If LLM client not configured (ChatGroq missing or no API KEY), return helpful message
+            detail = str(e)
+            answer = f"LLM call failed: {detail}"
+        # 7) persist assistant reply
+        try:
+            save_message(uid, "assistant", answer)
+        except Exception:
+            logger.exception("Failed to persist assistant message")
+        # 8) build history to return
+        history = extract_history_for_frontend(uid)
+        return {
+            "user_id": uid,
+            "message": req.message,
+            "assistant": answer,
+            "history": history,
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.exception("chat failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))
+# Mount static files for frontend
+FRONTEND_DIR = os.path.join(os.path.dirname(__file__), "frontend", "dist")
+if os.path.exists(FRONTEND_DIR):
+    app.mount("/assets", StaticFiles(directory=os.path.join(FRONTEND_DIR, "assets")), name="assets")
+    @app.get("/{full_path:path}")
+    async def serve_frontend(full_path: str):
+        """Serve the React frontend for all non-API routes"""
+        if full_path and not full_path.startswith("api"):
+            file_path = os.path.join(FRONTEND_DIR, full_path)
+            if os.path.exists(file_path) and os.path.isfile(file_path):
+                return FileResponse(file_path)
+        return FileResponse(os.path.join(FRONTEND_DIR, "index.html"))
+# Run with: uvicorn main_api:app --reload --host 127.0.0.1 --port 8000
+if __name__ == "__main__":
+    uvicorn.run("main_api:app", host="127.0.0.1", port=8000, reload=True)

memory_store.py ADDED Viewed

	@@ -0,0 +1,110 @@

+# memory_store.py
+import sqlite3
+import os
+import logging
+from typing import List, Tuple
+DB_PATH = os.getenv("MEMORY_DB", "chat_memory.db")
+MAX_MESSAGES_PER_USER = int(os.getenv("MAX_MESSAGES_PER_USER", 500))
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+def _get_conn():
+    # check_same_thread=False so Gradio threads can use the DB concurrently
+    return sqlite3.connect(DB_PATH, timeout=10, check_same_thread=False)
+def init_db():
+    conn = _get_conn()
+    try:
+        with conn:
+            conn.execute(
+                """
+                CREATE TABLE IF NOT EXISTS memory (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    user_id TEXT,
+                    role TEXT,
+                    message TEXT,
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                )
+                """
+            )
+    finally:
+        conn.close()
+def save_message(user_id: str, role: str, message: str) -> None:
+    if not user_id:
+        raise ValueError("user_id is required")
+    conn = _get_conn()
+    try:
+        with conn:
+            conn.execute(
+                "INSERT INTO memory (user_id, role, message) VALUES (?, ?, ?)",
+                (user_id, role, message),
+            )
+            # prune if too many
+            if MAX_MESSAGES_PER_USER and MAX_MESSAGES_PER_USER > 0:
+                cur = conn.execute(
+                    "SELECT id FROM memory WHERE user_id = ? ORDER BY id DESC",
+                    (user_id,),
+                )
+                rows = cur.fetchall()
+                if len(rows) > MAX_MESSAGES_PER_USER:
+                    ids_to_delete = [r[0] for r in rows[MAX_MESSAGES_PER_USER:]]
+                    conn.executemany("DELETE FROM memory WHERE id = ?", [(i,) for i in ids_to_delete])
+    except Exception:
+        logger.exception("Failed to save message for user %s", user_id)
+        raise
+    finally:
+        conn.close()
+def get_last_messages(user_id: str, limit: int = 200) -> List[Tuple[str, str, str]]:
+    """
+    Return last `limit` messages in chronological order as (role, message, created_at)
+    """
+    conn = _get_conn()
+    try:
+        cur = conn.cursor()
+        cur.execute(
+            """
+            SELECT role, message, created_at FROM memory
+            WHERE user_id = ?
+            ORDER BY id DESC
+            LIMIT ?
+            """,
+            (user_id, limit),
+        )
+        rows = cur.fetchall()
+        return list(reversed(rows))
+    except Exception:
+        logger.exception("Failed to fetch messages for user %s", user_id)
+        return []
+    finally:
+        conn.close()
+def clear_user_memory(user_id: str) -> int:
+    """Delete memory for user. Returns deleted rowcount."""
+    conn = _get_conn()
+    try:
+        with conn:
+            cur = conn.execute("DELETE FROM memory WHERE user_id = ?", (user_id,))
+            return cur.rowcount
+    except Exception:
+        logger.exception("Failed to clear memory for user %s", user_id)
+        raise
+    finally:
+        conn.close()
+def build_gradio_history(user_id: str) -> List[dict]:
+    """
+    Return history formatted for gr.Chatbot with type='messages':
+    A chronological list of dicts: {'role':'user'|'assistant','content': '...'}
+    """
+    rows = get_last_messages(user_id, limit=500)
+    return [{"role": r[0], "content": r[1]} for r in rows]

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+langchain
+langchain-community
+langchain-groq
+sentence-transformers
+faiss-cpu
+pypdf
+unstructured
+python-dotenv
+gradio
+sqlite3-binary