evanperez commited on
Commit
11db46b
1 Parent(s): a570cbe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -3
app.py CHANGED
@@ -30,9 +30,9 @@ st.set_page_config(page_title="Gemini RAG", layout="wide")
30
  api_key = 'AIzaSyCvXRggpO2yNwIpZmoMy_5Xhm2bDyD-pOo'
31
 
32
 
33
- # os.mkdir('faiss_index')
34
 
35
- # empty faise_index and chat_history.json
36
  def delete_files_in_folder(folder_path):
37
  try:
38
  # Iterate over all the files in the folder
@@ -134,7 +134,49 @@ def user_input(user_question, api_key):
134
  #chat history
135
  update_chat_history(user_question, response["output_text"])
136
 
 
137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
 
140
  def main():
@@ -152,6 +194,8 @@ def main():
152
  accept_multiple_files=True, key="pdf_uploader")
153
  if st.button("Submit & Process", key="process_button") and api_key:
154
  with st.spinner("Processing..."):
 
 
155
  raw_text = get_pdf_text(pdf_docs)
156
  text_chunks = get_text_chunks(raw_text)
157
  get_vector_store(text_chunks, api_key)
@@ -159,4 +203,5 @@ def main():
159
 
160
 
161
  if __name__ == "__main__":
162
- main()
 
 
30
  api_key = 'AIzaSyCvXRggpO2yNwIpZmoMy_5Xhm2bDyD-pOo'
31
 
32
 
33
+ #os.mkdir('faiss_index')
34
 
35
+ # empty faiss_index and chat_history.json
36
  def delete_files_in_folder(folder_path):
37
  try:
38
  # Iterate over all the files in the folder
 
134
  #chat history
135
  update_chat_history(user_question, response["output_text"])
136
 
137
+ ''''''''''''''''''
138
 
139
+ def clear_faiss_index(folder_path):
140
+ try:
141
+ if os.path.exists(folder_path):
142
+ for file_name in os.listdir(folder_path):
143
+ file_path = os.path.join(folder_path, file_name)
144
+ if os.path.isfile(file_path):
145
+ os.remove(file_path)
146
+ st.write("Existing FAISS index files cleared successfully!")
147
+ else:
148
+ st.write("No existing FAISS index files found.")
149
+ except Exception as e:
150
+ st.error(f"An error occurred while clearing FAISS index files: {e}")
151
+ # Function to process PDF files and recreate FAISS index
152
+
153
+
154
+ def recreate_faiss_index(pdf_docs, chunk_size, chunk_overlap, api_key):
155
+ try:
156
+ # Clear existing FAISS index files
157
+ clear_faiss_index("faiss_index")
158
+
159
+ # Process PDF files and extract text
160
+ text = ""
161
+ for pdf in pdf_docs:
162
+ pdf_reader = PdfReader(pdf)
163
+ for page in pdf_reader.pages:
164
+ text += page.extract_text()
165
+
166
+ # Split text into chunks
167
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
168
+ chunks = text_splitter.split_text(text)
169
+
170
+ # Generate embeddings for text chunks
171
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
172
+ vector_store = FAISS.from_texts(chunks, embedding=embeddings)
173
+
174
+ # Save FAISS index
175
+ vector_store.save_local("faiss_index")
176
+
177
+ st.success("FAISS index recreated successfully!")
178
+ except Exception as e:
179
+ st.error(f"An error occurred while recreating FAISS index: {e}")
180
 
181
 
182
  def main():
 
194
  accept_multiple_files=True, key="pdf_uploader")
195
  if st.button("Submit & Process", key="process_button") and api_key:
196
  with st.spinner("Processing..."):
197
+ recreate_faiss_index(pdf_docs, CH_size, CH_overlap, api_key)
198
+
199
  raw_text = get_pdf_text(pdf_docs)
200
  text_chunks = get_text_chunks(raw_text)
201
  get_vector_store(text_chunks, api_key)
 
203
 
204
 
205
  if __name__ == "__main__":
206
+ main()
207
+