RAGBOT

Running

App Files Files Community

Rahatara commited on 8 days ago

Commit

7d38514

•

1 Parent(s): 274f522

Create app.py

Browse files

Files changed (1) hide show

app.py +102 -0

app.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import os
+import gradio as gr
+import fitz  # PyMuPDF
+from sentence_transformers import SentenceTransformer
+import numpy as np
+import faiss
+from typing import List
+from google.generativeai import GenerativeModel, configure, types
+# Set up the Google API for the Gemini model
+GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+configure(api_key=GOOGLE_API_KEY)
+class MyApp:
+    def __init__(self):
+        self.documents = []
+        self.embeddings = None
+        self.index = None
+        self.model = SentenceTransformer('all-MiniLM-L6-v2')
+    def load_pdfs(self, files):
+        """Load and extract text from the provided PDF files."""
+        self.documents = []
+        for file in files:
+            file_path = file.name  # Get the file path
+            doc = fitz.open(file_path)  # Open the PDF using the file path
+            for page_num in range(len(doc)):
+                page = doc[page_num]
+                text = page.get_text()
+                self.documents.append({"page": page_num + 1, "content": text})
+        print("PDFs processed successfully.")
+    def build_vector_db(self):
+        """Build a vector database using the content of the PDFs."""
+        if not self.documents:
+            return "No documents to process."
+        self.embeddings = self.model.encode(
+            [doc["content"] for doc in self.documents], show_progress_bar=True
+        )
+        self.index = faiss.IndexFlatL2(self.embeddings.shape[1])
+        self.index.add(np.array(self.embeddings))
+        return "Vector database built successfully!"
+    def search_documents(self, query: str, k: int = 3) -> List[str]:
+        """Search for relevant documents using vector similarity."""
+        if not self.index:
+            return ["Vector database is not ready."]
+        query_embedding = self.model.encode([query], show_progress_bar=False)
+        _, I = self.index.search(np.array(query_embedding), k)
+        results = [self.documents[i]["content"] for i in I[0]]
+        return results
+app = MyApp()
+def upload_files(files):
+    app.load_pdfs(files)
+    return "Files uploaded and processed. Ready to build vector database."
+def build_vector_db():
+    return app.build_vector_db()
+def answer_query(query):
+    results = app.search_documents(query)
+    if not results:
+        return "No results found."
+    # Generate a response using the generative model
+    model = GenerativeModel("gemini-1.5-pro-latest")
+    generation_config = types.GenerationConfig(
+        temperature=0.7,
+        max_output_tokens=150
+    )
+    try:
+        response = model.generate_content(results, generation_config=generation_config)
+        response_text = response.text if hasattr(response, "text") else "No response generated."
+    except Exception as e:
+        response_text = f"An error occurred while generating the response: {str(e)}"
+    return response_text
+with gr.Blocks() as demo:
+    gr.Markdown("# 🧘‍♀️ **Dialectical Behaviour Therapy Chatbot**")
+    gr.Markdown("Upload your PDFs and interact with the content using AI.")
+    with gr.Row():
+        upload_btn = gr.Files(label="Upload PDFs", file_types=["pdf"])
+        upload_status = gr.Textbox()
+    with gr.Row():
+        db_btn = gr.Button("Build Vector Database")
+        db_status = gr.Textbox()
+    with gr.Row():
+        query_input = gr.Textbox(label="Enter your query")
+        submit_btn = gr.Button("Submit")
+        response_display = gr.Chatbot()
+    upload_btn.change(upload_files, inputs=[upload_btn], outputs=[upload_status])
+    db_btn.click(build_vector_db, outputs=[db_status])
+    submit_btn.click(answer_query, inputs=[query_input], outputs=[response_display])
+demo.launch()