Rahatara commited on
Commit
7d38514
1 Parent(s): 274f522

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -0
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import fitz # PyMuPDF
4
+ from sentence_transformers import SentenceTransformer
5
+ import numpy as np
6
+ import faiss
7
+ from typing import List
8
+ from google.generativeai import GenerativeModel, configure, types
9
+
10
+ # Set up the Google API for the Gemini model
11
+ GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
12
+ configure(api_key=GOOGLE_API_KEY)
13
+
14
+ class MyApp:
15
+ def __init__(self):
16
+ self.documents = []
17
+ self.embeddings = None
18
+ self.index = None
19
+ self.model = SentenceTransformer('all-MiniLM-L6-v2')
20
+
21
+ def load_pdfs(self, files):
22
+ """Load and extract text from the provided PDF files."""
23
+ self.documents = []
24
+ for file in files:
25
+ file_path = file.name # Get the file path
26
+ doc = fitz.open(file_path) # Open the PDF using the file path
27
+ for page_num in range(len(doc)):
28
+ page = doc[page_num]
29
+ text = page.get_text()
30
+ self.documents.append({"page": page_num + 1, "content": text})
31
+ print("PDFs processed successfully.")
32
+
33
+ def build_vector_db(self):
34
+ """Build a vector database using the content of the PDFs."""
35
+ if not self.documents:
36
+ return "No documents to process."
37
+ self.embeddings = self.model.encode(
38
+ [doc["content"] for doc in self.documents], show_progress_bar=True
39
+ )
40
+ self.index = faiss.IndexFlatL2(self.embeddings.shape[1])
41
+ self.index.add(np.array(self.embeddings))
42
+ return "Vector database built successfully!"
43
+
44
+ def search_documents(self, query: str, k: int = 3) -> List[str]:
45
+ """Search for relevant documents using vector similarity."""
46
+ if not self.index:
47
+ return ["Vector database is not ready."]
48
+ query_embedding = self.model.encode([query], show_progress_bar=False)
49
+ _, I = self.index.search(np.array(query_embedding), k)
50
+ results = [self.documents[i]["content"] for i in I[0]]
51
+ return results
52
+
53
+ app = MyApp()
54
+
55
+ def upload_files(files):
56
+ app.load_pdfs(files)
57
+ return "Files uploaded and processed. Ready to build vector database."
58
+
59
+ def build_vector_db():
60
+ return app.build_vector_db()
61
+
62
+ def answer_query(query):
63
+ results = app.search_documents(query)
64
+ if not results:
65
+ return "No results found."
66
+
67
+ # Generate a response using the generative model
68
+ model = GenerativeModel("gemini-1.5-pro-latest")
69
+ generation_config = types.GenerationConfig(
70
+ temperature=0.7,
71
+ max_output_tokens=150
72
+ )
73
+ try:
74
+ response = model.generate_content(results, generation_config=generation_config)
75
+ response_text = response.text if hasattr(response, "text") else "No response generated."
76
+ except Exception as e:
77
+ response_text = f"An error occurred while generating the response: {str(e)}"
78
+
79
+ return response_text
80
+
81
+ with gr.Blocks() as demo:
82
+ gr.Markdown("# 🧘‍♀️ **Dialectical Behaviour Therapy Chatbot**")
83
+ gr.Markdown("Upload your PDFs and interact with the content using AI.")
84
+
85
+ with gr.Row():
86
+ upload_btn = gr.Files(label="Upload PDFs", file_types=["pdf"])
87
+ upload_status = gr.Textbox()
88
+
89
+ with gr.Row():
90
+ db_btn = gr.Button("Build Vector Database")
91
+ db_status = gr.Textbox()
92
+
93
+ with gr.Row():
94
+ query_input = gr.Textbox(label="Enter your query")
95
+ submit_btn = gr.Button("Submit")
96
+ response_display = gr.Chatbot()
97
+
98
+ upload_btn.change(upload_files, inputs=[upload_btn], outputs=[upload_status])
99
+ db_btn.click(build_vector_db, outputs=[db_status])
100
+ submit_btn.click(answer_query, inputs=[query_input], outputs=[response_display])
101
+
102
+ demo.launch()