Spaces:

Harihareshwar3
/

rag-bot

Sleeping

App Files Files Community

Harihareshwar3 commited on Aug 31

Commit

6aa9d46

verified ·

1 Parent(s): 9fd79cf

Create app.py

Browse files

Files changed (1) hide show

app.py +343 -0

app.py ADDED Viewed

	@@ -0,0 +1,343 @@

+import gradio as gr
+import os
+from dotenv import load_dotenv
+import PyPDF2
+import faiss
+# LangChain imports
+from langchain_groq import ChatGroq
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.schema import Document
+from langchain.prompts import PromptTemplate
+from langchain.chains import RetrievalQA
+from langchain.vectorstores import FAISS
+from langchain.tools import Tool
+from langchain.agents import initialize_agent, AgentType
+from langchain.memory import ConversationBufferWindowMemory
+# Load environment variables
+load_dotenv()
+class SmartAcademicAssistant:
+    def __init__(self):
+        # Initialize Groq LLM
+        self.llm = ChatGroq(
+            groq_api_key=os.getenv("GROQ_API_KEY"),
+            model_name="llama-3.1-8b-instant",
+            temperature=0.3,
+            max_tokens=1000
+        )
+        # Initialize HuggingFace embeddings
+        self.embeddings = HuggingFaceEmbeddings(
+            model_name="sentence-transformers/all-MiniLM-L6-v2"
+        )
+        # Vector store for uploaded documents
+        self.vector_store = None
+        self.uploaded_docs = []
+        self.qa_chain = None
+        self.agent = None
+        # Memory for conversation
+        self.memory = ConversationBufferWindowMemory(
+            memory_key="chat_history",
+            k=3,  # Keep last 3 exchanges
+            return_messages=True
+        )
+        # Text splitter for PDFs
+        self.text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000,
+            chunk_overlap=200
+        )
+    def extract_text_from_pdf(self, pdf_file) -> str:
+        """Extract text from uploaded PDF file"""
+        try:
+            pdf_reader = PyPDF2.PdfReader(pdf_file)
+            text = ""
+            for page in pdf_reader.pages:
+                text += page.extract_text() + "\n"
+            return text
+        except Exception as e:
+            return f"Error reading PDF: {str(e)}"
+    def process_uploaded_pdfs(self, files) -> str:
+        """Process uploaded PDF files and create vector store"""
+        if not files:
+            return "No files uploaded."
+        all_documents = []
+        processed_files = []
+        for file in files:
+            if file.name.endswith('.pdf'):
+                # Extract text from PDF
+                text = self.extract_text_from_pdf(file.name)
+                if not text.startswith("Error"):
+                    # Split text into chunks
+                    documents = self.text_splitter.create_documents([text],
+                        metadatas=[{"source": os.path.basename(file.name)}])
+                    all_documents.extend(documents)
+                    processed_files.append(file.name)
+        if all_documents:
+            # Create FAISS vector store
+            self.vector_store = FAISS.from_documents(all_documents, self.embeddings)
+            # Create QA chain with better prompt
+            qa_prompt = PromptTemplate(
+                template="""You are a helpful academic assistant. Answer the question based on the provided context from uploaded documents.
+Context: {context}
+Question: {question}
+Important:
+- Give a direct, comprehensive answer based on the context
+- If information is not in the context, say so clearly
+- Do not make up information not present in the documents
+- Keep your answer focused and relevant
+Answer:""",
+                input_variables=["context", "question"]
+            )
+            # Create retrieval QA chain
+            self.qa_chain = RetrievalQA.from_chain_type(
+                llm=self.llm,
+                chain_type="stuff",
+                retriever=self.vector_store.as_retriever(
+                    search_type="similarity",
+                    search_kwargs={"k": 4}
+                ),
+                chain_type_kwargs={"prompt": qa_prompt},
+                return_source_documents=False  # Prevent confusion
+            )
+            # Create tool for agent
+            def document_search_tool(query: str) -> str:
+                """Search through uploaded PDF documents to answer questions"""
+                try:
+                    result = self.qa_chain.invoke({"query": query})
+                    return result["result"]
+                except Exception as e:
+                    return f"Error searching documents: {str(e)}"
+            # Define tools with very specific descriptions
+            tools = [
+                Tool(
+                    name="document_search",
+                    func=document_search_tool,
+                    description="""Use this tool ONLY when the user asks questions about the uploaded PDF documents.
+                    This tool searches through the uploaded academic papers, textbooks, or documents to find relevant information.
+                    Input should be the user's question exactly as asked.
+                    DO NOT use this tool for general knowledge questions."""
+                )
+            ]
+            # Create agent with strict instructions
+            agent_prompt = """You are a smart academic assistant. You have access to uploaded PDF documents through the document_search tool.
+IMPORTANT RULES:
+1. If the user asks about content from uploaded PDFs, use the document_search tool EXACTLY ONCE
+2. For general knowledge questions, answer directly without using tools
+3. Do NOT call tools multiple times for the same question
+4. Do NOT use tools for math problems or general knowledge
+5. Give your final answer immediately after using a tool
+Available tools:
+- document_search: Use for questions about uploaded PDF content only
+Let's think step by step and provide helpful answers."""
+            self.agent = initialize_agent(
+                tools=tools,
+                llm=self.llm,
+                agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
+                verbose=False,  # Reduce verbosity to prevent loops
+                handle_parsing_errors=True,
+                max_execution_time=15,  # Shorter timeout
+                max_iterations=3,  # Limit iterations to prevent loops
+                early_stopping_method="generate",
+                memory=self.memory
+            )
+            self.uploaded_docs = processed_files
+            return f"✅ Successfully processed {len(processed_files)} PDF(s): {', '.join([os.path.basename(f) for f in processed_files])}\n\nRAG Agent is ready! You can now ask questions about the content!"
+        else:
+            return "❌ No valid PDF files found or error processing files."
+    def tutor_mode_cot(self, math_problem: str) -> str:
+        """Tutor mode with Chain-of-Thought reasoning for math problems"""
+        if not math_problem.strip():
+            return "Please enter a math problem."
+        cot_prompt = PromptTemplate(
+            input_variables=["problem"],
+            template="""You are an expert math tutor. Solve this math problem using Chain-of-Thought reasoning.
+Problem: {problem}
+Please solve this step-by-step:
+1. First, understand what the problem is asking
+2. Identify the key information and what needs to be found
+3. Choose the appropriate method or formula
+4. Show each step of the calculation clearly
+5. Verify your answer makes sense
+6. Provide the final answer
+Step-by-step solution:"""
+        )
+        try:
+            # Format the prompt
+            formatted_prompt = cot_prompt.format(problem=math_problem)
+            # Get response from LLM
+            response = self.llm.invoke(formatted_prompt)
+            return response.content
+        except Exception as e:
+            return f"Error in tutor mode: {str(e)}\n\nPlease check your GROQ_API_KEY in the .env file."
+    def assistant_mode_rag(self, question: str) -> str:
+        """Agent-based RAG Q&A from uploaded documents"""
+        if not question.strip():
+            return "Please enter a question."
+        if not self.vector_store or not self.agent:
+            return "⚠️ Please upload at least one PDF file first to initialize the RAG agent."
+        try:
+            # Clear any previous conversation context that might cause loops
+            self.memory.clear()
+            # Use agent to answer question
+            result = self.agent.run(question)
+            return result
+        except Exception as e:
+            # Fallback to direct QA if agent fails
+            try:
+                fallback_result = self.qa_chain.invoke({"query": question})
+                return f"🔄 Fallback answer:\n\n{fallback_result['result']}"
+            except:
+                return f"❌ Error in agent mode: {str(e)}\n\nPlease try rephrasing your question or check your setup."
+# Initialize the assistant
+assistant = SmartAcademicAssistant()
+# Create Gradio interface
+def create_interface():
+    with gr.Blocks(title="Smart Academic Assistant", theme=gr.themes.Soft()) as demo:
+        gr.HTML("<h1 style='text-align: center; color: #2e86c1;'>🎓 Smart Academic Assistant</h1>")
+        gr.HTML("<p style='text-align: center;'>Two modes: <b>Tutor</b> for math problems with CoT reasoning, <b>Assistant</b> for Q&A from your documents</p>")
+        with gr.Tabs():
+            # Tutor Mode Tab
+            with gr.Tab("🧮 Tutor Mode"):
+                gr.HTML("<h3>Math Problem Solver with Chain-of-Thought</h3>")
+                gr.HTML("<p>Enter any math problem and get step-by-step solution using CoT reasoning.</p>")
+                with gr.Row():
+                    with gr.Column():
+                        math_input = gr.Textbox(
+                            label="Math Problem",
+                            placeholder="e.g., Solve for x: 2x + 5 = 13\nor\nFind the derivative of f(x) = x³ + 2x² - x + 1",
+                            lines=4
+                        )
+                        solve_btn = gr.Button("🔍 Solve Problem", variant="primary")
+                        # Example problems
+                        gr.HTML("<b>Example problems to try:</b>")
+                        gr.HTML("• Solve: 3x² - 12x + 9 = 0<br>• Find integral of sin(2x)dx<br>• Calculate: (2+3i)(4-i)")
+                    with gr.Column():
+                        math_output = gr.Textbox(
+                            label="Step-by-Step Solution",
+                            lines=15,
+                            max_lines=20
+                        )
+                solve_btn.click(
+                    fn=assistant.tutor_mode_cot,
+                    inputs=[math_input],
+                    outputs=[math_output]
+                )
+            # Assistant Mode Tab
+            with gr.Tab("📚 Assistant Mode"):
+                gr.HTML("<h3>Document Q&A with Retrieval-Augmented Generation</h3>")
+                gr.HTML("<p><b>Step 1:</b> Upload PDF documents, then <b>Step 2:</b> ask questions about them.</p>")
+                with gr.Row():
+                    with gr.Column():
+                        # PDF Upload Section
+                        gr.HTML("<h4>📤 Upload Documents</h4>")
+                        pdf_upload = gr.File(
+                            label="Upload PDF Documents",
+                            file_types=[".pdf"],
+                            file_count="multiple"
+                        )
+                        upload_status = gr.Textbox(
+                            label="Upload Status",
+                            lines=3,
+                            interactive=False,
+                            placeholder="Upload status will appear here..."
+                        )
+                        # Question Section
+                        gr.HTML("<h4>❓ Ask Questions</h4>")
+                        question_input = gr.Textbox(
+                            label="Your Question",
+                            placeholder="What is the main topic discussed in the document?\nCan you summarize chapter 2?\nWhat are the key findings?",
+                            lines=4
+                        )
+                        ask_btn = gr.Button("💬 Ask Question", variant="primary")
+                    with gr.Column():
+                        answer_output = gr.Textbox(
+                            label="Answer from Documents",
+                            lines=15,
+                            max_lines=25,
+                            placeholder="Answers will appear here..."
+                        )
+                # Handle file upload
+                pdf_upload.change(
+                    fn=assistant.process_uploaded_pdfs,
+                    inputs=[pdf_upload],
+                    outputs=[upload_status]
+                )
+                # Handle question
+                ask_btn.click(
+                    fn=assistant.assistant_mode_rag,
+                    inputs=[question_input],
+                    outputs=[answer_output]
+                )
+        # Footer with setup instructions
+    return demo
+if __name__ == "__main__":
+    # Create .env file template if it doesn't exist
+    if not os.path.exists(".env"):
+        with open(".env", "w") as f:
+            f.write("# Add your Groq API key here\n")
+            f.write("GROQ_API_KEY=your_groq_api_key_here\n")
+        print("📝 Created .env file. Please add your GROQ_API_KEY.")
+    # Check if API key exists
+    if not os.getenv("GROQ_API_KEY"):
+        print("⚠️  Warning: GROQ_API_KEY not found. Please add it to your .env file.")
+    # Launch the app
+    demo = create_interface()
+    demo.launch(debug=True, share=True)