NewMSDSTA / app.py
BivinSadler's picture
Update app.py
65a6051 verified
# βœ… Set up your OpenAI API Key
import os
os.environ["OPENAI_API_KEY"] = "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" # πŸ” Replace with your key
# βœ… Imports
import gradio as gr
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from transformers import pipeline, AutoConfig
from langchain_community.document_loaders import PyPDFLoader
import os
import torch # βœ… Needed for Hugging Face models
from langchain_community.vectorstores import FAISS
from langchain_openai import ChatOpenAI, OpenAIEmbeddings # βœ… Correct usage
# βœ… Load OpenAI LLM
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
# βœ… Build RAG agent from a PDF
def build_rag_agent(pdf_path):
loader = PyPDFLoader(pdf_path)
docs = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_documents(docs)
vectorstore = FAISS.from_documents(chunks, OpenAIEmbeddings())
retriever = vectorstore.as_retriever()
return RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type="stuff")
# βœ… Create RAG agents for course syllabi
stat6371_agent = build_rag_agent("ds6371syllabusVer7.pdf")
ds6306_agent = build_rag_agent("DDSSyllabus2025.pdf")
# βœ… Load Hugging Face fine-tuned model for general statistics questions
general_stat_agent = pipeline("text2text-generation", model="google/flan-t5-base") # Replace with your model
#general_stat_agent = pipeline("text2text-generation", model="BivinSadler/llama3-finetuned-Statistics") # Replace with your model
# βœ… LLM-based Routing Agent
def route_question_llm(question):
routing_prompt = f"""
You are a classification agent that helps route questions to the appropriate expert.
There are three possible categories:
A. Stat 6371 (Theoretical statistics course)
B. DS 6306 (Applied data science tools course)
C. General statistics (any other statistics question)
Classify the following question into one of those three categories by answering only with a single letter: A, B, or C.
Question: "{question}"
Answer:"""
route_response = llm.invoke(routing_prompt).content.strip().upper()
if route_response.startswith("A"):
return "stat6371"
elif route_response.startswith("B"):
return "ds6306"
else:
return "general"
# βœ… Writer Agent
def writer_agent(raw_answer, audience="high school students"):
prompt = f"""
You are a talented science communicator. Your job is to explain the following answer in a way that is clear, short, and engaging for {audience}.
Answer:
{raw_answer}
Write your response in 2–3 sentences. Avoid technical jargon.
"""
return llm.invoke(prompt).content
# βœ… Multi-Agent System Pipeline
def multiagent_system(question):
print(f"\n🧭 Routing question: {question}")
route = route_question_llm(question)
if route == "stat6371":
print("πŸ”Ž Using Stat 6371 RAG Agent")
raw_answer = stat6371_agent.run(question)
elif route == "ds6306":
print("πŸ”Ž Using DS 6306 RAG Agent")
raw_answer = ds6306_agent.run(question)
else:
print("🧠 Using General Statistics HF Agent")
result = general_stat_agent(question, max_length=200, do_sample=False)
raw_answer = result[0]['generated_text']
print("\n✍️ Writing Agent Finalizing Answer...")
final_answer = writer_agent(raw_answer)
return final_answer
# βœ… Example Usage
question = "What kind of inference methods are taught in Stat 6371?"
print("\nβœ… Final Answer:\n", multiagent_system(question))
# πŸŽ›οΈ Gradio Interface
import gradio as gr
iface = gr.Interface(
fn=multiagent_system,
inputs=gr.Textbox(lines=2, label="Ask a statistics question"),
outputs=gr.Textbox(label="Answer"),
title="πŸ“Š Multi-Agent Statistics Assistant",
description="Ask a stats questionβ€”our system will route it to the right expert (Stat 6371, DS 6306, or general statistics) and return a simplified answer."
)
iface.launch()