SanjeevB1 commited on
Commit
416ad75
·
verified ·
1 Parent(s): 40e764f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +212 -202
app.py CHANGED
@@ -1,202 +1,212 @@
1
- # app.py
2
-
3
- import os
4
- import sys
5
- import logging
6
- from getpass import getpass
7
- from langchain.embeddings import OpenAIEmbeddings
8
- from langchain.vectorstores import Chroma
9
- from langchain.chat_models import ChatOpenAI
10
- from langchain.chains.question_answering import load_qa_chain
11
- from langchain.prompts import ChatPromptTemplate
12
- import gradio as gr
13
-
14
- # Setup logging
15
- logging.basicConfig(level=logging.INFO)
16
- logger = logging.getLogger(__name__)
17
-
18
- # Function to get the absolute path
19
- def get_absolute_path(relative_path):
20
- if getattr(sys, 'frozen', False):
21
- # If the application is run as a bundle, the PyInstaller bootloader
22
- # extends the sys module by a flag frozen=True and sets the app
23
- # path into variable _MEIPASS'.
24
- base_path = sys._MEIPASS
25
- else:
26
- base_path = os.path.abspath(".")
27
- return os.path.join(base_path, relative_path)
28
-
29
- # Retrieve OpenAI API key from environment variable or prompt
30
- openai_api_key = os.getenv("OPENAI_API_KEY")
31
- if not openai_api_key:
32
- openai_api_key = getpass("Enter your OpenAI API key2: ")
33
- os.environ["OPENAI_API_KEY"] = openai_api_key
34
-
35
- # Initialize embeddings
36
- embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
37
-
38
- # Function to list available vector store directories
39
- def list_vectorstore_directories(base_path='vectorstores'):
40
- """
41
- Lists all subdirectories in the base_path which are potential vector store directories.
42
- """
43
- directories = []
44
- try:
45
- for entry in os.listdir(base_path):
46
- full_path = os.path.join(base_path, entry)
47
- print(full_path)
48
- print(full_path)
49
- if os.path.isdir(full_path):
50
- # Check if the directory contains Chroma vector store files
51
- required_files = ['chroma.sqlite3']
52
- if all(os.path.exists(os.path.join(full_path, file)) for file in required_files):
53
- directories.append(full_path)
54
- except Exception as e:
55
- logger.error(f"Error listing directories in '{base_path}': {e}")
56
- return directories
57
-
58
- # Function to load selected vector stores
59
- def load_selected_vectorstores(selected_dirs):
60
- """
61
- Loads Chroma vector stores from the selected directories.
62
- """
63
- vectorstores = []
64
- for directory in selected_dirs:
65
- try:
66
- vectorstore = Chroma(
67
- persist_directory=directory,
68
- embedding_function=embeddings
69
- )
70
- vectorstores.append(vectorstore)
71
- logger.info(f"Loaded vectorstore from '{directory}'.")
72
- except Exception as e:
73
- logger.error(f"Error loading vectorstore from '{directory}': {e}")
74
- return vectorstores
75
-
76
- # Function to create a combined retriever
77
- def create_combined_retriever(vectorstores, search_kwargs={"k": 20}):
78
- retrievers = [vs.as_retriever(search_kwargs=search_kwargs) for vs in vectorstores]
79
-
80
- class CombinedRetriever:
81
- def __init__(self, retrievers):
82
- self.retrievers = retrievers
83
-
84
- def get_relevant_documents(self, query):
85
- docs = []
86
- for retriever in self.retrievers:
87
- try:
88
- docs.extend(retriever.get_relevant_documents(query))
89
- except Exception as e:
90
- logger.error(f"Error retrieving documents: {e}")
91
- # Remove duplicates based on content and source
92
- unique_docs = { (doc.page_content, doc.metadata.get('source', '')): doc for doc in docs }
93
- return list(unique_docs.values())
94
-
95
- return CombinedRetriever(retrievers)
96
-
97
- # Define the QA function
98
- def answer_question(selected_dirs, question):
99
- if not selected_dirs:
100
- return "Please select at least one vector store directory."
101
-
102
- # Load the selected vector stores
103
- vectorstores = load_selected_vectorstores(selected_dirs)
104
- if not vectorstores:
105
- return "No vector stores loaded. Please check the selected directories."
106
-
107
- # Create combined retriever
108
- combined_retriever = create_combined_retriever(vectorstores, search_kwargs={"k": 20})
109
-
110
- # Load the LLM
111
- try:
112
- llm = ChatOpenAI(model_name="gpt-4o")
113
- except Exception as e:
114
- logger.error(f"Error loading LLM: {e}")
115
- return "Error loading the language model. Please check your OpenAI API key and access."
116
-
117
- # Define the prompt template
118
- template = """
119
- You are an AI assistant specialized in extracting precise information from legal documents.
120
- Special emphasis on documents but refer outside if necessary.
121
- Always include the source filename and page number in your response.
122
- If multiple documents are the always prefer the lastest date ones.
123
- If ammendment documents are the always prefer the ammendments.
124
-
125
- Context:
126
- {context}
127
-
128
- Question: {input}
129
-
130
- Answer:
131
- """
132
-
133
- prompt = ChatPromptTemplate.from_template(template)
134
-
135
- # Create QA chain
136
- try:
137
- qa_chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt)
138
- except Exception as e:
139
- logger.error(f"Error creating QA chain: {e}")
140
- return "Error initializing the QA system."
141
-
142
- # Retrieve documents
143
- try:
144
- retrieved_docs = combined_retriever.get_relevant_documents(question)
145
- except Exception as e:
146
- logger.error(f"Error retrieving documents: {e}")
147
- return "Error retrieving documents."
148
-
149
- if not retrieved_docs:
150
- return "No relevant documents found for the question."
151
-
152
- # Modify the retrieved documents to include metadata within the content
153
- for doc in retrieved_docs:
154
- source = doc.metadata.get("source", "Unknown Source")
155
- page_number = doc.metadata.get("page_number", "Unknown Page")
156
- doc.page_content = f"Source: {source}\nPage: {page_number}\nContent: {doc.page_content}"
157
-
158
- # Generate response using the QA chain
159
- try:
160
- response = qa_chain.run(input_documents=retrieved_docs, input=question)
161
- except Exception as e:
162
- logger.error(f"Error generating response: {e}")
163
- return "Error generating the response."
164
-
165
- return response
166
-
167
- # Set Up the Gradio Interface
168
-
169
- # Get absolute path for vectorstores
170
- vectorstores_path = get_absolute_path('/content/properties_vectors/vectors')
171
-
172
- # List available vector store directories
173
- available_dirs = list_vectorstore_directories(vectorstores_path)
174
-
175
- # if not available_dirs:
176
- # available_dirs = [
177
- # "/content/trinity"
178
- # # Add other directories as needed
179
- # ]
180
-
181
- # Define Gradio interface
182
- iface = gr.Interface(
183
- fn=answer_question,
184
- inputs=[
185
- gr.CheckboxGroup(
186
- choices=available_dirs,
187
- label="Select Vector Store Directories"
188
- ),
189
- gr.Textbox(
190
- lines=2,
191
- placeholder="Enter your question here...",
192
- label="Your Question"
193
- )
194
- ],
195
- outputs=gr.Textbox(label="Response"),
196
- title="Vector Store QA Assistant",
197
- description="Select one or more vector store directories and ask your question. The assistant will retrieve relevant documents and provide an answer.",
198
- allow_flagging="never"
199
- )
200
-
201
- # Launch the interface
202
- iface.launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+
3
+ import os
4
+ import sys
5
+ import logging
6
+ from getpass import getpass
7
+ from langchain.embeddings import OpenAIEmbeddings
8
+ from langchain.vectorstores import Chroma
9
+ from langchain.chat_models import ChatOpenAI
10
+ from langchain.chains.question_answering import load_qa_chain
11
+ from langchain.prompts import ChatPromptTemplate
12
+ import gradio as gr
13
+
14
+
15
+ zip_file = '/content/vectors (2).zip' # Replace with your zip file path
16
+
17
+ # Step 2: Unzip the file
18
+ !unzip -q "{zip_file}" -d "/content/properties_vectors"
19
+
20
+ print("Unzipping completed.")
21
+
22
+
23
+
24
+ # Setup logging
25
+ logging.basicConfig(level=logging.INFO)
26
+ logger = logging.getLogger(__name__)
27
+
28
+ # Function to get the absolute path
29
+ def get_absolute_path(relative_path):
30
+ if getattr(sys, 'frozen', False):
31
+ # If the application is run as a bundle, the PyInstaller bootloader
32
+ # extends the sys module by a flag frozen=True and sets the app
33
+ # path into variable _MEIPASS'.
34
+ base_path = sys._MEIPASS
35
+ else:
36
+ base_path = os.path.abspath(".")
37
+ return os.path.join(base_path, relative_path)
38
+
39
+ # Retrieve OpenAI API key from environment variable or prompt
40
+ openai_api_key = os.getenv("OPENAI_API_KEY")
41
+ if not openai_api_key:
42
+ openai_api_key = getpass("Enter your OpenAI API key2: ")
43
+ os.environ["OPENAI_API_KEY"] = openai_api_key
44
+
45
+ # Initialize embeddings
46
+ embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
47
+
48
+ # Function to list available vector store directories
49
+ def list_vectorstore_directories(base_path='vectorstores'):
50
+ """
51
+ Lists all subdirectories in the base_path which are potential vector store directories.
52
+ """
53
+ directories = []
54
+ try:
55
+ for entry in os.listdir(base_path):
56
+ full_path = os.path.join(base_path, entry)
57
+ print(full_path)
58
+ print(full_path)
59
+ if os.path.isdir(full_path):
60
+ # Check if the directory contains Chroma vector store files
61
+ required_files = ['chroma.sqlite3']
62
+ if all(os.path.exists(os.path.join(full_path, file)) for file in required_files):
63
+ directories.append(full_path)
64
+ except Exception as e:
65
+ logger.error(f"Error listing directories in '{base_path}': {e}")
66
+ return directories
67
+
68
+ # Function to load selected vector stores
69
+ def load_selected_vectorstores(selected_dirs):
70
+ """
71
+ Loads Chroma vector stores from the selected directories.
72
+ """
73
+ vectorstores = []
74
+ for directory in selected_dirs:
75
+ try:
76
+ vectorstore = Chroma(
77
+ persist_directory=directory,
78
+ embedding_function=embeddings
79
+ )
80
+ vectorstores.append(vectorstore)
81
+ logger.info(f"Loaded vectorstore from '{directory}'.")
82
+ except Exception as e:
83
+ logger.error(f"Error loading vectorstore from '{directory}': {e}")
84
+ return vectorstores
85
+
86
+ # Function to create a combined retriever
87
+ def create_combined_retriever(vectorstores, search_kwargs={"k": 20}):
88
+ retrievers = [vs.as_retriever(search_kwargs=search_kwargs) for vs in vectorstores]
89
+
90
+ class CombinedRetriever:
91
+ def __init__(self, retrievers):
92
+ self.retrievers = retrievers
93
+
94
+ def get_relevant_documents(self, query):
95
+ docs = []
96
+ for retriever in self.retrievers:
97
+ try:
98
+ docs.extend(retriever.get_relevant_documents(query))
99
+ except Exception as e:
100
+ logger.error(f"Error retrieving documents: {e}")
101
+ # Remove duplicates based on content and source
102
+ unique_docs = { (doc.page_content, doc.metadata.get('source', '')): doc for doc in docs }
103
+ return list(unique_docs.values())
104
+
105
+ return CombinedRetriever(retrievers)
106
+
107
+ # Define the QA function
108
+ def answer_question(selected_dirs, question):
109
+ if not selected_dirs:
110
+ return "Please select at least one vector store directory."
111
+
112
+ # Load the selected vector stores
113
+ vectorstores = load_selected_vectorstores(selected_dirs)
114
+ if not vectorstores:
115
+ return "No vector stores loaded. Please check the selected directories."
116
+
117
+ # Create combined retriever
118
+ combined_retriever = create_combined_retriever(vectorstores, search_kwargs={"k": 20})
119
+
120
+ # Load the LLM
121
+ try:
122
+ llm = ChatOpenAI(model_name="gpt-4o")
123
+ except Exception as e:
124
+ logger.error(f"Error loading LLM: {e}")
125
+ return "Error loading the language model. Please check your OpenAI API key and access."
126
+
127
+ # Define the prompt template
128
+ template = """
129
+ You are an AI assistant specialized in extracting precise information from legal documents.
130
+ Special emphasis on documents but refer outside if necessary.
131
+ Always include the source filename and page number in your response.
132
+ If multiple documents are the always prefer the lastest date ones.
133
+ If ammendment documents are the always prefer the ammendments.
134
+
135
+ Context:
136
+ {context}
137
+
138
+ Question: {input}
139
+
140
+ Answer:
141
+ """
142
+
143
+ prompt = ChatPromptTemplate.from_template(template)
144
+
145
+ # Create QA chain
146
+ try:
147
+ qa_chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt)
148
+ except Exception as e:
149
+ logger.error(f"Error creating QA chain: {e}")
150
+ return "Error initializing the QA system."
151
+
152
+ # Retrieve documents
153
+ try:
154
+ retrieved_docs = combined_retriever.get_relevant_documents(question)
155
+ except Exception as e:
156
+ logger.error(f"Error retrieving documents: {e}")
157
+ return "Error retrieving documents."
158
+
159
+ if not retrieved_docs:
160
+ return "No relevant documents found for the question."
161
+
162
+ # Modify the retrieved documents to include metadata within the content
163
+ for doc in retrieved_docs:
164
+ source = doc.metadata.get("source", "Unknown Source")
165
+ page_number = doc.metadata.get("page_number", "Unknown Page")
166
+ doc.page_content = f"Source: {source}\nPage: {page_number}\nContent: {doc.page_content}"
167
+
168
+ # Generate response using the QA chain
169
+ try:
170
+ response = qa_chain.run(input_documents=retrieved_docs, input=question)
171
+ except Exception as e:
172
+ logger.error(f"Error generating response: {e}")
173
+ return "Error generating the response."
174
+
175
+ return response
176
+
177
+ # Set Up the Gradio Interface
178
+
179
+ # Get absolute path for vectorstores
180
+ vectorstores_path = get_absolute_path('/content/properties_vectors/vectors')
181
+
182
+ # List available vector store directories
183
+ available_dirs = list_vectorstore_directories(vectorstores_path)
184
+
185
+ # if not available_dirs:
186
+ # available_dirs = [
187
+ # "/content/trinity"
188
+ # # Add other directories as needed
189
+ # ]
190
+
191
+ # Define Gradio interface
192
+ iface = gr.Interface(
193
+ fn=answer_question,
194
+ inputs=[
195
+ gr.CheckboxGroup(
196
+ choices=available_dirs,
197
+ label="Select Vector Store Directories"
198
+ ),
199
+ gr.Textbox(
200
+ lines=2,
201
+ placeholder="Enter your question here...",
202
+ label="Your Question"
203
+ )
204
+ ],
205
+ outputs=gr.Textbox(label="Response"),
206
+ title="Vector Store QA Assistant",
207
+ description="Select one or more vector store directories and ask your question. The assistant will retrieve relevant documents and provide an answer.",
208
+ allow_flagging="never"
209
+ )
210
+
211
+ # Launch the interface
212
+ iface.launch(debug=True)