Spaces:

agnixcode
/

bottttt

Sleeping

App Files Files Community

agnixcode commited on Jul 13

Commit

fcbf118

verified ·

1 Parent(s): ed124b7

Create app.py

Browse files

Files changed (1) hide show

app.py +89 -0

app.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import os
+import gradio as gr
+import fitz  # PyMuPDF
+from sentence_transformers import SentenceTransformer
+import chromadb
+from chromadb.utils import embedding_functions
+import openai
+# Load GROQ API Key
+openai.api_key = os.getenv("GROQ_API_KEY")
+openai.api_base = "https://api.groq.com/openai/v1"
+# Load embedding model
+embedder = SentenceTransformer("all-MiniLM-L6-v2")
+# Set up ChromaDB with persistence
+persist_path = "./chroma_db"
+db = chromadb.Client(chromadb.config.Settings(persist_directory=persist_path))
+collection = db.get_or_create_collection("papers")
+# Extract text from uploaded PDF
+def extract_text_from_pdf(file):
+    text = ""
+    doc = fitz.open(stream=file.read(), filetype="pdf")
+    for page in doc:
+        text += page.get_text()
+    return text
+# Chunk and store in vector DB
+def chunk_and_store(text):
+    chunks = [text[i:i+500] for i in range(0, len(text), 500)]
+    embeddings = embedder.encode(chunks).tolist()
+    for i, chunk in enumerate(chunks):
+        collection.add(documents=[chunk], ids=[f"id_{len(collection.get()['ids']) + i}"], embeddings=[embeddings[i]])
+    db.persist()
+# Retrieve relevant chunks and send to LLaMA3 via Groq
+def retrieve_and_ask(query):
+    if len(collection.get()["documents"]) == 0:
+        return "Please upload a paper first."
+    query_embedding = embedder.encode([query]).tolist()[0]
+    results = collection.query(query_embeddings=[query_embedding], n_results=3)
+    context = "\n".join(results["documents"][0])
+    system_prompt = "You are an academic assistant helping students understand research papers."
+    user_prompt = f"Based on the following context:\n{context}\n\nAnswer the question:\n{query}"
+    try:
+        response = openai.ChatCompletion.create(
+            model="llama3-70b-8192",
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt}
+            ]
+        )
+        return response['choices'][0]['message']['content']
+    except Exception as e:
+        return f"Error: {str(e)}"
+# Gradio UI
+def handle_upload(file):
+    if file is None:
+        return "Upload a valid PDF file."
+    text = extract_text_from_pdf(file)
+    chunk_and_store(text)
+    return "✅ Paper uploaded and processed."
+def handle_query(query):
+    return retrieve_and_ask(query)
+with gr.Blocks() as demo:
+    gr.Markdown("### 📘 RAG Academic Assistant\nUpload a paper and ask questions.")
+    with gr.Row():
+        file = gr.File(label="Upload PDF", type="binary")
+        upload_btn = gr.Button("Process")
+        upload_output = gr.Textbox()
+    with gr.Row():
+        query = gr.Textbox(label="Ask a question")
+        response = gr.Textbox(label="Answer")
+        ask_btn = gr.Button("Ask")
+    upload_btn.click(handle_upload, inputs=[file], outputs=[upload_output])
+    ask_btn.click(handle_query, inputs=[query], outputs=[response])
+demo.launch()