Spaces:

aryan195a
/

LangGraph-RAG-Chatbot

Sleeping

App Files Files

xet

Community

aryan195a commited on 26 days ago

Commit

dd83a42

1 Parent(s): 8fa7103

Modified all files with the replacement of flan-t5 with groq api

Browse files

Files changed (4) hide show

Dockerfile +5 -10
app.py +60 -44
graph.py +46 -53
requirements.txt +3 -4

Dockerfile CHANGED Viewed

@@ -3,27 +3,22 @@ FROM python:3.10-slim
 WORKDIR /app
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    build-essential \
     git \
     curl \
-    libopenblas-dev \
-    libomp-dev \
-    python3-dev \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
-COPY . /app
 ENV PYTHONUNBUFFERED=1 \
     PYTHONDONTWRITEBYTECODE=1 \
     LANG=C.UTF-8
-RUN pip install --no-cache-dir --upgrade pip setuptools wheel
-RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
-RUN pip install --no-cache-dir -r requirements.txt
 EXPOSE 7860
-CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]

 WORKDIR /app
 RUN apt-get update && apt-get install -y --no-install-recommends \
     git \
     curl \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
 ENV PYTHONUNBUFFERED=1 \
     PYTHONDONTWRITEBYTECODE=1 \
     LANG=C.UTF-8
+COPY requirements.txt .
+RUN pip install --no-cache-dir --upgrade pip setuptools wheel \
+    && pip install --no-cache-dir -r requirements.txt
+COPY . .
 EXPOSE 7860
+CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]

app.py CHANGED Viewed

@@ -5,19 +5,11 @@ from pypdf import PdfReader
 import hashlib
 from transformers import pipeline
 st.set_page_config(page_title="LangGraph RAG Chatbot", layout="wide")
 st.title("📚 LangGraph RAG Chatbot")
-# Cached Vectorstore
-@st.cache_resource(show_spinner=False)
-def cached_vectorstore_from_text(text):
-    try:
-        return load_vectorstore_from_text(text=text)
-    except Exception as e:
-        st.warning(f"Failed to load vectorstore: {e}")
-        return get_retriever(text)
-# Helpers
 def compute_file_hash(raw_text):
     return hashlib.md5(raw_text.encode("utf-8")).hexdigest() if raw_text else None
@@ -34,7 +26,20 @@ def load_uploaded_file(uploaded_file):
         st.error(f"Error reading file: {e}")
     return raw_text
-# Sidebar: Settings & File Upload
 with st.sidebar:
     st.header("🔧 Settings")
     temperature = st.slider("LLM Temperature", 0.0, 1.0, 0.7)
@@ -47,32 +52,35 @@ with st.sidebar:
     uploaded_file = st.file_uploader("Upload a file (optional)", type=["txt", "pdf"])
-    # Handle file upload
     if uploaded_file:
         raw_text = load_uploaded_file(uploaded_file)
         if raw_text:
-            # Reset session state when new file is uploaded
-            st.session_state.history = []
-            for key in ["retriever", "file_hash"]:
-                st.session_state.pop(key, None)
-            st.session_state.raw_text = raw_text
-            st.session_state.retriever = cached_vectorstore_from_text(raw_text)
-            st.session_state.file_hash = compute_file_hash(raw_text)
             st.markdown("**📄 Uploaded File Preview:**")
             st.text_area("Contents", raw_text, height=200)
-            st.success("✅ Document loaded! You can now ask questions about it.")
         else:
             st.warning("Uploaded file is empty or could not be read.")
     # Show current mode
     if "retriever" in st.session_state and st.session_state.retriever:
         st.info("📄 **RAG Mode**: Answering from uploaded document")
     else:
         st.info("💬 **General Chat Mode**: No document loaded")
-# Initialize summarizer
 if "summarizer" not in st.session_state:
     st.session_state.summarizer = pipeline(
         "summarization",
@@ -80,7 +88,7 @@ if "summarizer" not in st.session_state:
         device=-1
     )
-# Build Graph
 if "graph" not in st.session_state or st.session_state.get("graph_model") != model_type:
     try:
         st.session_state.graph = build_graph(
@@ -93,50 +101,57 @@ if "graph" not in st.session_state or st.session_state.get("graph_model") != mod
         st.error(f"Failed to build graph: {e}")
         st.stop()
-# Chat History
 if "history" not in st.session_state:
     st.session_state.history = []
-# Query Input
-query = st.text_input("💬 Ask a question:")
-if st.button("Send") or query:
-    if query.strip():
         try:
-            # Prepare history in the format expected by the graph (tuples of (query, response))
-            formatted_history = [(q, r) for q, r, _ in st.session_state.history]
             result = st.session_state.graph(
                 query=query,
                 temperature=temperature,
-                raw_text=st.session_state.get("raw_text", None),
                 history=formatted_history,
                 retriever_override=st.session_state.get("retriever")
             )
             response = result.get("response", "No response generated.")
             retrieved_docs = result.get("retrieved_docs", [])
             st.markdown("### 🤖 Response")
             st.markdown(response)
-            # Add to history (keeping the original format for display)
             st.session_state.history.append((query, response, retrieved_docs))
-            # Show retrieved documents if available
             if retrieved_docs:
                 with st.expander("📄 Retrieved Chunks"):
                     for j, doc in enumerate(retrieved_docs):
                         content = getattr(doc, "text", str(doc))
                         st.markdown(f"**Chunk {j+1}:**")
                         st.code(content.strip(), language="markdown")
         except Exception as e:
             st.error(f"Query failed: {e}")
-    else:
-        st.warning("Please enter a question.")
-# Display Chat History
 if st.session_state.history:
     st.markdown("### 💬 Chat History")
     for i, (q, r, docs) in enumerate(reversed(st.session_state.history)):
@@ -149,7 +164,8 @@ if st.session_state.history:
                     content = getattr(doc, "text", str(doc))
                     st.code(content.strip()[:200] + "...", language="markdown")
-# Clear chat button
 if st.sidebar.button("🗑️ Clear Chat History"):
     st.session_state.history = []
-    st.rerun()

 import hashlib
 from transformers import pipeline
+# --- Page Config ---
 st.set_page_config(page_title="LangGraph RAG Chatbot", layout="wide")
 st.title("📚 LangGraph RAG Chatbot")
+# --- Helpers ---
 def compute_file_hash(raw_text):
     return hashlib.md5(raw_text.encode("utf-8")).hexdigest() if raw_text else None
         st.error(f"Error reading file: {e}")
     return raw_text
+# --- Cached Vectorstore with Persistent Cache ---
+@st.cache_data(show_spinner=False)
+def cached_vectorstore_from_text(raw_text_hash: str, text: str):
+    """
+    Cache vectorstore based on hash of raw text.
+    If the same text is uploaded again, returns cached retriever.
+    """
+    try:
+        return load_vectorstore_from_text(text=text)
+    except Exception as e:
+        st.warning(f"Failed to load vectorstore: {e}")
+        return get_retriever(text)
+# --- Sidebar ---
 with st.sidebar:
     st.header("🔧 Settings")
     temperature = st.slider("LLM Temperature", 0.0, 1.0, 0.7)
     uploaded_file = st.file_uploader("Upload a file (optional)", type=["txt", "pdf"])
     if uploaded_file:
         raw_text = load_uploaded_file(uploaded_file)
         if raw_text:
+            file_hash = compute_file_hash(raw_text)
+            # Reset session if new file
+            if st.session_state.get("file_hash") != file_hash:
+                st.session_state.history = []
+                for key in ["retriever", "file_hash"]:
+                    st.session_state.pop(key, None)
+                st.session_state.raw_text = raw_text
+                st.session_state.file_hash = file_hash
+                # Persistent cached vectorstore
+                st.session_state.retriever = cached_vectorstore_from_text(file_hash, raw_text)
             st.markdown("**📄 Uploaded File Preview:**")
             st.text_area("Contents", raw_text, height=200)
+            st.success("✅ Document loaded!")
         else:
             st.warning("Uploaded file is empty or could not be read.")
     # Show current mode
     if "retriever" in st.session_state and st.session_state.retriever:
         st.info("📄 **RAG Mode**: Answering from uploaded document")
     else:
         st.info("💬 **General Chat Mode**: No document loaded")
+# --- Initialize Summarizer ---
 if "summarizer" not in st.session_state:
     st.session_state.summarizer = pipeline(
         "summarization",
         device=-1
     )
+# --- Build Graph ---
 if "graph" not in st.session_state or st.session_state.get("graph_model") != model_type:
     try:
         st.session_state.graph = build_graph(
         st.error(f"Failed to build graph: {e}")
         st.stop()
+# --- Initialize History ---
 if "history" not in st.session_state:
     st.session_state.history = []
+# --- Query Input ---
+if "current_query" not in st.session_state:
+    st.session_state.current_query = ""
+query = st.text_input("💬 Ask a question:", key="current_query")
+send_triggered = st.button("Send")
+# --- Send Query ---
+if send_triggered and query.strip():
+    formatted_history = [(q, r) for q, r, _ in st.session_state.history]
+    with st.spinner("Generating response..."):
         try:
             result = st.session_state.graph(
                 query=query,
                 temperature=temperature,
+                raw_text=st.session_state.get("raw_text"),
                 history=formatted_history,
                 retriever_override=st.session_state.get("retriever")
             )
             response = result.get("response", "No response generated.")
             retrieved_docs = result.get("retrieved_docs", [])
             st.markdown("### 🤖 Response")
             st.markdown(response)
+            # Save to history
             st.session_state.history.append((query, response, retrieved_docs))
+            # Show retrieved docs
             if retrieved_docs:
                 with st.expander("📄 Retrieved Chunks"):
                     for j, doc in enumerate(retrieved_docs):
                         content = getattr(doc, "text", str(doc))
                         st.markdown(f"**Chunk {j+1}:**")
                         st.code(content.strip(), language="markdown")
+            # Clear input
+            st.session_state.current_query = ""
         except Exception as e:
             st.error(f"Query failed: {e}")
+elif send_triggered:
+    st.warning("Please enter a question.")
+# --- Chat History Display ---
 if st.session_state.history:
     st.markdown("### 💬 Chat History")
     for i, (q, r, docs) in enumerate(reversed(st.session_state.history)):
                     content = getattr(doc, "text", str(doc))
                     st.code(content.strip()[:200] + "...", language="markdown")
+# --- Clear Chat ---
 if st.sidebar.button("🗑️ Clear Chat History"):
     st.session_state.history = []
+    st.session_state.current_query = ""
+    st.rerun()

graph.py CHANGED Viewed

@@ -1,14 +1,15 @@
 import datetime
 import os
 from typing import TypedDict, Optional, List
 from llama_index.core.schema import Document
-from langchain.llms.huggingface_pipeline import HuggingFacePipeline
 from langchain_google_genai import ChatGoogleGenerativeAI
-from transformers import pipeline as hf_pipeline
 from langgraph.graph import StateGraph, END
 from llama_index.core import VectorStoreIndex
 from llama_index.core.retrievers import BaseRetriever
 # --- 1. Define the State for the Graph ---
 class GraphState(TypedDict):
@@ -22,17 +23,13 @@ class GraphState(TypedDict):
     summarizer: Optional[any]
 # --- 2. Define Graph Nodes ---
-# Router node to decide the flow
 def router_node(state: GraphState) -> GraphState:
     """
     Router that determines the next step based on available retriever.
     """
     print("---NODE: ROUTER---")
-    # This node just passes through the state - routing logic is in conditional edges
     return state
-# Node for handling general conversation when no PDF is loaded
 def general_chat_node(state: GraphState) -> GraphState:
     """
     Generates a response for general conversation using the LLM.
@@ -41,8 +38,6 @@ def general_chat_node(state: GraphState) -> GraphState:
     llm = state["llm"]
     query = state["query"]
     history = state["history"]
-    # Format history for the prompt
     history_context = "\n".join([f"Human: {q}\nAI: {a}" for q, a in history])
     current_time = datetime.datetime.now().strftime("%Y-%m-%d %I:%M %p")
     prompt = f"""You are Sarathi, a friendly and knowledgeable AI assistant.
@@ -61,23 +56,17 @@ Human: {query}
 AI:"""
     try:
-        if isinstance(llm, HuggingFacePipeline):
-            response_text = llm.invoke(prompt)
-        elif isinstance(llm, ChatGoogleGenerativeAI):
-            response_obj = llm.invoke(prompt)
-            response_text = getattr(response_obj, "content", str(response_obj))
-        else:
-            response_text = "Unsupported LLM type provided."
     except Exception as e:
-        response_text = f"Model inference failed in general chat: {str(e)}"
     return {"response": response_text.strip()}
-# Node for retrieving information from a PDF
 def retrieve_node(state: GraphState) -> GraphState:
     """
     Retrieves relevant documents from the vector store based on the query.
     """
     print("---NODE: RETRIEVE---")
     query = state["query"]
@@ -89,7 +78,6 @@ def retrieve_node(state: GraphState) -> GraphState:
     retrieved_docs = []
     try:
-        # Dynamic top_k based on query length
         q_len = len(query.split())
         top_k = 3 if q_len < 5 else (5 if q_len < 15 else 8)
@@ -98,19 +86,32 @@ def retrieve_node(state: GraphState) -> GraphState:
         if retrieved_docs:
             context = "\n\n---\n\n".join([doc.text for doc in retrieved_docs])
-        # Add chat history to context
         if history:
             history_context = "\n\n".join([f"Human: {q}\nAI: {a}" for q, a in history])
             context = f"{context}\n\n--- Chat History ---\n{history_context}"
-        # Summarize if context is too long
         MAX_CONTEXT_CHARS = 4000
-        if len(context) > MAX_CONTEXT_CHARS and summarizer:
-            print("---CONTEXT TOO LONG, SUMMARIZING---")
-            summary_result = summarizer(context, max_length=500, min_length=150, do_sample=False)
-            context = summary_result[0]['summary_text']
-        elif len(context) > MAX_CONTEXT_CHARS:
-            context = context[:MAX_CONTEXT_CHARS]
     except Exception as e:
         print(f"Error in retrieve_node: {e}")
@@ -118,7 +119,6 @@ def retrieve_node(state: GraphState) -> GraphState:
     return {"retrieved_docs": retrieved_docs, "context": context}
-# Node for generating a response from RAG context
 def generate_rag_node(state: GraphState) -> GraphState:
     """
     Generates an answer using the retrieved context from the PDF.
@@ -148,16 +148,11 @@ Instructions:
 Answer:"""
     try:
-        if isinstance(llm, HuggingFacePipeline):
-            response_text = llm.invoke(prompt)
-        elif isinstance(llm, ChatGoogleGenerativeAI):
-            response_obj = llm.invoke(prompt)
-            response_text = getattr(response_obj, "content", str(response_obj))
-        else:
-            response_text = "Unsupported LLM type provided."
     except Exception as e:
-        response_text = f"Model inference failed during RAG generation: {str(e)}"
     return {"response": response_text.strip()}
@@ -174,18 +169,18 @@ def route_query(state: GraphState) -> str:
         return "general_chat"
 # --- 4. Build the Graph ---
-def build_graph(model_type: str = "huggingface", retriever=None, summarizer=None):
     """
-    Builds the conditional LangGraph workflow.
     """
-    # Configure the LLM based on the selected model type
     if model_type == "groq":
-        from langchain_groq import ChatGroq
         api_key = os.getenv("GROQ_API_KEY", "").strip()
         if not api_key:
             raise ValueError("GROQ_API_KEY environment variable not set.")
         llm = ChatGroq(
-            model="mixtral-8x7b-32768",  # Fast and capable model
             api_key=api_key,
             temperature=0.7,
         )
@@ -193,23 +188,27 @@ def build_graph(model_type: str = "huggingface", retriever=None, summarizer=None
         api_key = os.getenv("GEMINI_API_KEY", "").strip()
         if not api_key:
             raise ValueError("GEMINI_API_KEY environment variable not set.")
-        llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", api_key=api_key, temperature=0.7)
     else:
         raise ValueError("Invalid model_type. Choose 'groq' or 'gemini'.")
-    # Define the graph structure
     workflow = StateGraph(GraphState)
-    # Add all the nodes to the graph
     workflow.add_node("router", router_node)
     workflow.add_node("general_chat", general_chat_node)
     workflow.add_node("retrieve", retrieve_node)
     workflow.add_node("generate", generate_rag_node)
-    # Set the router as the entry point
     workflow.set_entry_point("router")
-    # Add the conditional edge from the router
     workflow.add_conditional_edges(
         "router",
         route_query,
@@ -219,19 +218,13 @@ def build_graph(model_type: str = "huggingface", retriever=None, summarizer=None
         },
     )
-    # Define the standard path for the RAG pipeline
     workflow.add_edge("retrieve", "generate")
-    # Define the end points for the graph
     workflow.add_edge("generate", END)
     workflow.add_edge("general_chat", END)
-    # Compile the graph
     compiled_graph = workflow.compile()
-    # Return a function that wraps the graph invocation
     def graph_wrapper(query: str, temperature: float = 0.7, raw_text: str = None, history=None, retriever_override=None):
-        # Use retriever_override if provided, otherwise use the build-time retriever
         active_retriever = retriever_override or retriever
         return compiled_graph.invoke({
             "query": query,

 import datetime
 import os
+import re
 from typing import TypedDict, Optional, List
 from llama_index.core.schema import Document
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langgraph.graph import StateGraph, END
 from llama_index.core import VectorStoreIndex
 from llama_index.core.retrievers import BaseRetriever
+from langchain_groq import ChatGroq
+from transformers import pipeline as hf_pipeline
 # --- 1. Define the State for the Graph ---
 class GraphState(TypedDict):
     summarizer: Optional[any]
 # --- 2. Define Graph Nodes ---
 def router_node(state: GraphState) -> GraphState:
     """
     Router that determines the next step based on available retriever.
     """
     print("---NODE: ROUTER---")
     return state
 def general_chat_node(state: GraphState) -> GraphState:
     """
     Generates a response for general conversation using the LLM.
     llm = state["llm"]
     query = state["query"]
     history = state["history"]
     history_context = "\n".join([f"Human: {q}\nAI: {a}" for q, a in history])
     current_time = datetime.datetime.now().strftime("%Y-%m-%d %I:%M %p")
     prompt = f"""You are Sarathi, a friendly and knowledgeable AI assistant.
 AI:"""
     try:
+        response_obj = llm.invoke(prompt)
+        response_text = getattr(response_obj, "content", str(response_obj))
     except Exception as e:
+        response_text = f"Model inference failed: {str(e)}"
     return {"response": response_text.strip()}
 def retrieve_node(state: GraphState) -> GraphState:
     """
     Retrieves relevant documents from the vector store based on the query.
+    Summarizes context if too long, or truncates at sentence boundaries.
     """
     print("---NODE: RETRIEVE---")
     query = state["query"]
     retrieved_docs = []
     try:
         q_len = len(query.split())
         top_k = 3 if q_len < 5 else (5 if q_len < 15 else 8)
         if retrieved_docs:
             context = "\n\n---\n\n".join([doc.text for doc in retrieved_docs])
         if history:
             history_context = "\n\n".join([f"Human: {q}\nAI: {a}" for q, a in history])
             context = f"{context}\n\n--- Chat History ---\n{history_context}"
         MAX_CONTEXT_CHARS = 4000
+        if len(context) > MAX_CONTEXT_CHARS:
+            try:
+                print("---CONTEXT TOO LONG, SUMMARIZING---")
+                summary_result = summarizer(
+                    context,
+                    max_length=500,
+                    min_length=150,
+                    do_sample=False
+                )
+                context = summary_result[0].get("summary_text", context[:MAX_CONTEXT_CHARS])
+            except Exception as e:
+                print(f"Summarizer failed: {e}")
+                sentences = re.split(r'(?<=[.!?]) +', context)
+                truncated = []
+                total_len = 0
+                for sent in sentences:
+                    if total_len + len(sent) > MAX_CONTEXT_CHARS:
+                        break
+                    truncated.append(sent)
+                    total_len += len(sent)
+                context = " ".join(truncated)
     except Exception as e:
         print(f"Error in retrieve_node: {e}")
     return {"retrieved_docs": retrieved_docs, "context": context}
 def generate_rag_node(state: GraphState) -> GraphState:
     """
     Generates an answer using the retrieved context from the PDF.
 Answer:"""
     try:
+        response_obj = llm.invoke(prompt)
+        response_text = getattr(response_obj, "content", str(response_obj))
     except Exception as e:
+        response_text = f"Model inference failed: {str(e)}"
     return {"response": response_text.strip()}
         return "general_chat"
 # --- 4. Build the Graph ---
+def build_graph(model_type: str = "groq", retriever=None, summarizer=None):
     """
+    Builds the workflow graph with LLM, retriever, and optional summarizer.
+    If summarizer not provided, initializes a default HuggingFace summarizer.
     """
     if model_type == "groq":
         api_key = os.getenv("GROQ_API_KEY", "").strip()
         if not api_key:
             raise ValueError("GROQ_API_KEY environment variable not set.")
         llm = ChatGroq(
+            model="mixtral-8x7b-32768",
             api_key=api_key,
             temperature=0.7,
         )
         api_key = os.getenv("GEMINI_API_KEY", "").strip()
         if not api_key:
             raise ValueError("GEMINI_API_KEY environment variable not set.")
+        llm = ChatGoogleGenerativeAI(
+            model="gemini-2.0-flash",
+            api_key=api_key,
+            temperature=0.7
+        )
     else:
         raise ValueError("Invalid model_type. Choose 'groq' or 'gemini'.")
+    if summarizer is None:
+        print("---NO SUMMARIZER PROVIDED, USING DEFAULT (facebook/bart-large-cnn)---")
+        summarizer = hf_pipeline("summarization", model="facebook/bart-large-cnn")
     workflow = StateGraph(GraphState)
     workflow.add_node("router", router_node)
     workflow.add_node("general_chat", general_chat_node)
     workflow.add_node("retrieve", retrieve_node)
     workflow.add_node("generate", generate_rag_node)
     workflow.set_entry_point("router")
     workflow.add_conditional_edges(
         "router",
         route_query,
         },
     )
     workflow.add_edge("retrieve", "generate")
     workflow.add_edge("generate", END)
     workflow.add_edge("general_chat", END)
     compiled_graph = workflow.compile()
     def graph_wrapper(query: str, temperature: float = 0.7, raw_text: str = None, history=None, retriever_override=None):
         active_retriever = retriever_override or retriever
         return compiled_graph.invoke({
             "query": query,

requirements.txt CHANGED Viewed

@@ -6,14 +6,13 @@ accelerate>=0.30.0
 # LangChain + LangGraph
 langchain>=0.2.1
 langgraph>=0.0.45
-langchain-community>=0.0.45
-langchain-huggingface>=0.1.0
-langchain-google-genai>=1.0.5  # wrapper for Gemini
 # Retrieval + Embeddings
 llama-index>=0.13.5
 llama-index-embeddings-huggingface>=0.1.3
-chromadb>=0.5.3  # optional if you still want hybrid search / persistence
 # Hugging Face + Deployment
 huggingface_hub>=0.23.4

 # LangChain + LangGraph
 langchain>=0.2.1
 langgraph>=0.0.45
+langchain-groq>=0.1.0
+langchain-google-genai>=1.0.5
 # Retrieval + Embeddings
 llama-index>=0.13.5
 llama-index-embeddings-huggingface>=0.1.3
+chromadb>=0.5.3
 # Hugging Face + Deployment
 huggingface_hub>=0.23.4