Rahatara commited on
Commit
f6933d1
1 Parent(s): 0b6997c

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -125
app.py DELETED
@@ -1,125 +0,0 @@
1
- import os
2
- import gradio as gr
3
- import fitz # PyMuPDF
4
- from sentence_transformers import SentenceTransformer
5
- import numpy as np
6
- import faiss
7
- from typing import List, Tuple, Dict
8
- from google.generativeai import GenerativeModel, configure, types
9
-
10
- # Set up the Google API for the Gemini model
11
- GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
12
- configure(api_key=GOOGLE_API_KEY)
13
-
14
- # Placeholder for the app's state
15
- class MyApp:
16
- def __init__(self) -> None:
17
- self.documents = []
18
- self.embeddings = None
19
- self.index = None
20
- self.model = SentenceTransformer('all-MiniLM-L6-v2')
21
-
22
- def load_pdfs(self, files: List[Dict]) -> str:
23
- """Extracts text from multiple PDF files and stores them."""
24
- self.documents = []
25
- for file_dict in files:
26
- file_path = file_dict['name'] # Access the file path
27
- doc = fitz.open(file_path) # Open the PDF using the file path
28
- for page_num in range(len(doc)):
29
- page = doc[page_num]
30
- text = page.get_text()
31
- self.documents.append({
32
- "file_name": os.path.basename(file_path),
33
- "page": page_num + 1,
34
- "content": text
35
- })
36
- return f"Processed {len(files)} PDFs successfully!"
37
-
38
- def build_vector_db(self) -> str:
39
- """Builds a vector database using the content of the PDFs."""
40
- if not self.documents:
41
- return "No documents to process."
42
- contents = [doc["content"] for doc in self.documents]
43
- self.embeddings = self.model.encode(contents, show_progress_bar=True)
44
- self.index = faiss.IndexFlatL2(self.embeddings.shape[1])
45
- self.index.add(np.array(self.embeddings))
46
- return "Vector database built successfully!"
47
-
48
- def search_documents(self, query: str, k: int = 3) -> List[Dict]:
49
- """Searches for relevant document snippets using vector similarity."""
50
- if not self.index:
51
- return [{"content": "Vector database is not built."}]
52
- query_embedding = self.model.encode([query], show_progress_bar=False)
53
- D, I = self.index.search(np.array(query_embedding), k)
54
- results = [self.documents[i] for i in I[0]]
55
- return results if results else [{"content": "No relevant documents found."}]
56
-
57
- app = MyApp()
58
-
59
- def upload_files(files: List[Dict]) -> str:
60
- return app.load_pdfs(files)
61
-
62
- def build_vector_db() -> str:
63
- return app.build_vector_db()
64
-
65
- def respond(message: str, history: List[Tuple[str, str]]) -> Tuple[List[Tuple[str, str]], str]:
66
- system_message = (
67
- "You are a helpful assistant designed to assist with studying and learning. "
68
- "You analyze uploaded PDF documents and provide clear, concise responses "
69
- "to any questions based on the content. You strive to be accurate, detailed, "
70
- "and educational in your responses."
71
- )
72
- messages = [{"role": "system", "content": system_message}]
73
-
74
- for user_msg, assistant_msg in history:
75
- if user_msg:
76
- messages.append({"role": "user", "content": user_msg})
77
- if assistant_msg:
78
- messages.append({"role": "assistant", "content": assistant_msg})
79
-
80
- messages.append({"role": "user", "content": message})
81
-
82
- # Retrieve relevant documents
83
- retrieved_docs = app.search_documents(message)
84
- context = "\n".join(
85
- [f"File: {doc['file_name']}, Page: {doc['page']}\n{doc['content'][:200]}..." for doc in retrieved_docs]
86
- )
87
-
88
- # Generate response using the generative model
89
- model = GenerativeModel("gemini-1.5-pro-latest")
90
- generation_config = types.GenerationConfig(
91
- temperature=0.7,
92
- max_output_tokens=1024,
93
- )
94
-
95
- try:
96
- response = model.generate_content([context + "\nQuestion: " + message], generation_config=generation_config)
97
- response_content = response.text if hasattr(response, "text") else "No response generated."
98
- except Exception as e:
99
- response_content = f"An error occurred while generating the response: {str(e)}"
100
-
101
- # Append the message and generated response to the chat history
102
- history.append((message, response_content))
103
- return history, ""
104
-
105
- with gr.Blocks() as demo:
106
- gr.Markdown("# Study Assistant Chatbot")
107
- gr.Markdown("Upload your PDFs, build a vector database, and ask questions to learn efficiently.")
108
-
109
- with gr.Row():
110
- with gr.Column():
111
- upload_btn = gr.File(label="Upload PDFs", file_types=[".pdf"], file_count="multiple")
112
- upload_message = gr.Textbox(label="Upload Status", lines=2)
113
- build_db_btn = gr.Button("Build Vector Database")
114
- db_message = gr.Textbox(label="DB Build Status", lines=2)
115
-
116
- upload_btn.change(upload_files, inputs=[upload_btn], outputs=[upload_message])
117
- build_db_btn.click(build_vector_db, inputs=[], outputs=[db_message])
118
-
119
- with gr.Column():
120
- chatbot = gr.Chatbot(label="Chat Responses")
121
- query_input = gr.Textbox(label="Enter your query here")
122
- submit_btn = gr.Button("Submit")
123
- submit_btn.click(respond, inputs=[query_input, chatbot], outputs=[chatbot, query_input])
124
-
125
- demo.launch()