Shreyas094 commited on
Commit
d23826b
1 Parent(s): f080583

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -120
app.py CHANGED
@@ -210,104 +210,71 @@ def google_search(term, num_results=5, lang="en", timeout=5, safe="active", ssl_
210
  print(f"Result {i}:")
211
  print(f" Link: {result['link']}")
212
  if result['text']:
213
- print(f" Text: {result['text'][:100]}...") # Print first 100 characters
214
  else:
215
- print(" Text: None")
216
- print("End of search results")
217
-
218
- if not all_results:
219
- print("No search results found. Returning a default message.")
220
- return [{"link": None, "text": "No information found in the web search results."}]
221
-
222
  return all_results
223
 
224
- def ask_question(question, temperature, top_p, repetition_penalty, web_search):
225
  global conversation_history
226
 
227
- if not question:
228
- return "Please enter a question."
 
 
 
 
 
229
 
230
- if question in memory_database and not web_search:
231
- answer = memory_database[question]
 
 
232
  else:
233
- model = get_model(temperature, top_p, repetition_penalty)
234
- embed = get_embeddings()
235
-
236
- if web_search:
237
- search_results = google_search(question)
238
- context_str = "\n".join([result["text"] for result in search_results if result["text"]])
239
-
240
- # Convert web search results to Document format
241
- web_docs = [Document(page_content=result["text"], metadata={"source": result["link"]}) for result in search_results if result["text"]]
242
-
243
- # Check if the FAISS database exists
244
- if os.path.exists("faiss_database"):
245
- database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
246
- database.add_documents(web_docs)
247
- else:
248
- database = FAISS.from_documents(web_docs, embed)
249
- database.save_local("faiss_database")
250
-
251
- prompt_template = """
252
- Answer the question based on the following web search results:
253
- Web Search Results:
254
- {context}
255
- Current Question: {question}
256
- If the web search results don't contain relevant information, state that the information is not available in the search results.
257
- Provide a concise and direct answer to the question without mentioning the web search or these instructions:
258
- """
259
- prompt_val = ChatPromptTemplate.from_template(prompt_template)
260
- formatted_prompt = prompt_val.format(context=context_str, question=question)
261
- else:
262
- # Check if the FAISS database exists
263
- if os.path.exists("faiss_database"):
264
- database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
265
- else:
266
- return "No FAISS database found. Please upload documents to create the vector store."
267
 
268
- history_str = "\n".join([f"Q: {item['question']}\nA: {item['answer']}" for item in conversation_history])
 
 
 
269
 
270
- if is_related_to_history(question, conversation_history):
271
- context_str = "No additional context needed. Please refer to the conversation history."
272
- else:
273
- retriever = database.as_retriever()
274
- relevant_docs = retriever.get_relevant_documents(question)
275
- context_str = "\n".join([doc.page_content for doc in relevant_docs])
276
 
277
- prompt_val = ChatPromptTemplate.from_template(prompt)
278
- formatted_prompt = prompt_val.format(history=history_str, context=context_str, question=question)
 
279
 
280
- answer = generate_chunked_response(model, formatted_prompt)
281
- answer = re.split(r'Question:|Current Question:', answer)[-1].strip()
282
 
283
- # Remove any remaining prompt instructions from the answer
284
- answer_lines = answer.split('\n')
285
- answer = '\n'.join(line for line in answer_lines if not line.startswith('If') and not line.startswith('Provide'))
 
 
286
 
287
- if not web_search:
288
- memory_database[question] = answer
 
 
289
 
290
- if not web_search:
291
- conversation_history = manage_conversation_history(question, answer, conversation_history)
 
 
 
292
 
293
  return answer
294
 
295
- def update_vectors(files, use_recursive_splitter):
296
- if not files:
297
- return "Please upload at least one PDF file."
298
-
299
- embed = get_embeddings()
300
- total_chunks = 0
301
-
302
- for file in files:
303
- if use_recursive_splitter:
304
- data = load_and_split_document_recursive(file)
305
- else:
306
- data = load_and_split_document_basic(file)
307
- create_or_update_database(data, embed)
308
- total_chunks += len(data)
309
-
310
- return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files."
311
 
312
  def extract_db_to_excel():
313
  embed = get_embeddings()
@@ -338,47 +305,46 @@ def export_memory_db_to_excel():
338
 
339
  return excel_path
340
 
341
- # Gradio interface
342
  with gr.Blocks() as demo:
343
- gr.Markdown("# Chat with your PDF documents")
344
-
345
- with gr.Row():
346
- file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
347
- update_button = gr.Button("Update Vector Store")
348
- use_recursive_splitter = gr.Checkbox(label="Use Recursive Text Splitter", value=False)
349
-
350
- update_output = gr.Textbox(label="Update Status")
351
- update_button.click(update_vectors, inputs=[file_input, use_recursive_splitter], outputs=update_output)
352
-
353
- with gr.Row():
354
- with gr.Column(scale=2):
355
- chatbot = gr.Chatbot(label="Conversation")
356
- question_input = gr.Textbox(label="Ask a question about your documents")
357
- submit_button = gr.Button("Submit")
358
- with gr.Column(scale=1):
359
- temperature_slider = gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, value=0.5, step=0.1)
360
- top_p_slider = gr.Slider(label="Top P", minimum=0.0, maximum=1.0, value=0.9, step=0.1)
361
- repetition_penalty_slider = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, value=1.0, step=0.1)
362
- web_search_checkbox = gr.Checkbox(label="Enable Web Search", value=False)
363
-
364
- def chat(question, history):
365
- answer = ask_question(question, temperature_slider.value, top_p_slider.value, repetition_penalty_slider.value, web_search_checkbox.value)
366
- history.append((question, answer))
367
- return "", history
368
-
369
- submit_button.click(chat, inputs=[question_input, chatbot], outputs=[question_input, chatbot])
370
 
371
- extract_button = gr.Button("Extract Database to Excel")
372
- excel_output = gr.File(label="Download Excel File")
373
- extract_button.click(extract_db_to_excel, inputs=[], outputs=excel_output)
 
 
 
 
 
 
 
 
374
 
375
- export_memory_button = gr.Button("Export Memory Database to Excel")
376
- memory_excel_output = gr.File(label="Download Memory Excel File")
377
- export_memory_button.click(export_memory_db_to_excel, inputs=[], outputs=memory_excel_output)
 
 
 
 
 
 
 
 
 
 
378
 
379
- clear_button = gr.Button("Clear Cache")
380
- clear_output = gr.Textbox(label="Cache Status")
381
- clear_button.click(clear_cache, inputs=[], outputs=clear_output)
 
 
382
 
383
- if __name__ == "__main__":
384
- demo.launch()
 
210
  print(f"Result {i}:")
211
  print(f" Link: {result['link']}")
212
  if result['text']:
213
+ print(f" Text: {result['text'][:100]}...") # Display the first 100 characters of the text for brevity
214
  else:
215
+ print(" No text extracted")
 
 
 
 
 
 
216
  return all_results
217
 
218
+ def process_question(question, documents, history, temperature, top_p, repetition_penalty):
219
  global conversation_history
220
 
221
+ embeddings = get_embeddings()
222
+
223
+ # Check the memory database for similar questions
224
+ for prev_question, prev_answer in memory_database.items():
225
+ similarity = get_similarity(question, prev_question)
226
+ if similarity > 0.7:
227
+ return prev_answer
228
 
229
+ # Load the FAISS vector store if it exists
230
+ if os.path.exists("faiss_database"):
231
+ db = FAISS.load_local("faiss_database", embeddings, allow_dangerous_deserialization=True)
232
+ relevant_docs = db.similarity_search(question, k=3)
233
  else:
234
+ relevant_docs = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
 
236
+ if len(relevant_docs) == 0:
237
+ # Perform web search and update the vector store
238
+ web_search_results = google_search(question, num_results=5)
239
+ web_docs = [Document(page_content=res["text"] or "", metadata={"source": res["link"]}) for res in web_search_results if res["text"]]
240
 
241
+ if web_docs:
242
+ # Update the FAISS vector store with new documents
243
+ create_or_update_database(web_docs, embeddings)
 
 
 
244
 
245
+ # Reload the updated FAISS store and retrieve relevant documents
246
+ db = FAISS.load_local("faiss_database", embeddings, allow_dangerous_deserialization=True)
247
+ relevant_docs = db.similarity_search(question, k=3)
248
 
249
+ context = "\n\n".join([doc.page_content for doc in relevant_docs])
 
250
 
251
+ if is_related_to_history(question, history):
252
+ context = "None"
253
+ else:
254
+ history_text = "\n".join([f"Q: {h['question']}\nA: {h['answer']}" for h in history])
255
+ context = context if context else "None"
256
 
257
+ prompt_text = ChatPromptTemplate(
258
+ input_variables=["history", "context", "question"],
259
+ template=prompt
260
+ ).format(history=history_text, context=context, question=question)
261
 
262
+ model = get_model(temperature, top_p, repetition_penalty)
263
+ answer = generate_chunked_response(model, prompt_text)
264
+
265
+ conversation_history = manage_conversation_history(question, answer, history)
266
+ memory_database[question] = answer
267
 
268
  return answer
269
 
270
+ def process_uploaded_file(file, is_recursive):
271
+ if is_recursive:
272
+ data = load_and_split_document_recursive(file)
273
+ else:
274
+ data = load_and_split_document_basic(file)
275
+ embeddings = get_embeddings()
276
+ create_or_update_database(data, embeddings)
277
+ return "File processed and data added to the vector database."
 
 
 
 
 
 
 
 
278
 
279
  def extract_db_to_excel():
280
  embed = get_embeddings()
 
305
 
306
  return excel_path
307
 
 
308
  with gr.Blocks() as demo:
309
+ with gr.Tab("Upload PDF"):
310
+ with gr.Row():
311
+ pdf_file = gr.File(label="Upload PDF")
312
+ with gr.Row():
313
+ recursive_check = gr.Checkbox(label="Use Recursive Text Splitter")
314
+ upload_button = gr.Button("Upload and Process")
315
+ with gr.Row():
316
+ upload_output = gr.Textbox(label="Upload Output")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
 
318
+ with gr.Tab("Ask Questions"):
319
+ with gr.Row():
320
+ question = gr.Textbox(label="Your Question")
321
+ with gr.Row():
322
+ temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, label="Temperature")
323
+ top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.9, label="Top P")
324
+ repetition_penalty = gr.Slider(minimum=0.0, maximum=2.0, value=1.0, label="Repetition Penalty")
325
+ with gr.Row():
326
+ ask_button = gr.Button("Ask")
327
+ with gr.Row():
328
+ answer = gr.Textbox(label="Answer")
329
 
330
+ with gr.Tab("Clear Cache"):
331
+ with gr.Row():
332
+ clear_button = gr.Button("Clear Cache")
333
+ with gr.Row():
334
+ clear_output = gr.Textbox(label="Clear Output")
335
+
336
+ with gr.Tab("Export Data"):
337
+ with gr.Row():
338
+ export_db_button = gr.Button("Export Database to Excel")
339
+ export_db_output = gr.Textbox(label="Export Output")
340
+ with gr.Row():
341
+ export_memory_button = gr.Button("Export Memory DB to Excel")
342
+ export_memory_output = gr.Textbox(label="Export Output")
343
 
344
+ upload_button.click(process_uploaded_file, [pdf_file, recursive_check], upload_output)
345
+ ask_button.click(process_question, [question, pdf_file, recursive_check, temperature, top_p, repetition_penalty], answer)
346
+ clear_button.click(clear_cache, [], clear_output)
347
+ export_db_button.click(extract_db_to_excel, [], export_db_output)
348
+ export_memory_button.click(export_memory_db_to_excel, [], export_memory_output)
349
 
350
+ demo.launch()