ROHAN181 commited on
Commit
23cb72c
1 Parent(s): d265cec
Files changed (1) hide show
  1. app.py +200 -6
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  import os
3
-
4
  from langchain.document_loaders import PyPDFLoader
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain.vectorstores import Chroma
@@ -16,10 +16,37 @@ import transformers
16
  import torch
17
  import tqdm
18
  import accelerate
 
 
 
 
 
 
 
 
 
 
19
 
20
 
21
  default_persist_directory = './chroma_HF/'
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  llm_name1 = "mistralai/Mistral-7B-Instruct-v0.2"
24
  llm_name2 = "mistralai/Mistral-7B-Instruct-v0.1"
25
  llm_name3 = "meta-llama/Llama-2-7b-chat-hf"
@@ -30,6 +57,12 @@ llm_name7 = "google/flan-t5-xxl"
30
  list_llm = [llm_name1, llm_name2, llm_name3, llm_name4, llm_name5, llm_name6, llm_name7]
31
  list_llm_simple = [os.path.basename(llm) for llm in list_llm]
32
 
 
 
 
 
 
 
33
  # Load PDF document and create doc splits
34
  def load_doc(list_file_path, chunk_size, chunk_overlap):
35
  # Processing for one document only
@@ -47,6 +80,12 @@ def load_doc(list_file_path, chunk_size, chunk_overlap):
47
  return doc_splits
48
 
49
 
 
 
 
 
 
 
50
  # Create vector database
51
  def create_db(splits):
52
  embedding = HuggingFaceEmbeddings()
@@ -186,23 +225,161 @@ def upload_file(file_obj):
186
  return list_file_path
187
 
188
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  def demo():
 
190
  with gr.Blocks(theme="base") as demo:
191
  vector_db = gr.State()
192
  qa_chain = gr.State()
193
-
 
 
 
194
  gr.Markdown(
195
  """<center><h2>PDF-based chatbot (powered by LangChain and open-source LLMs)</center></h2>
196
  <h3>Ask any questions about your PDF documents, along with follow-ups</h3>
197
- <b>Note:</b> This AI assistant performs retrieval-augmented generation from your PDF documents. \
198
- When generating answers, it takes past questions into account (via conversational memory), and includes document references for clarity purposes.</i>
199
- <br><b>Warning:</b> This space uses the free CPU Basic hardware from Hugging Face. Some steps and LLM models used below (free inference endpoints) can take some time to generate an output.<br>
200
  """)
201
  with gr.Tab("Step 1 - Document pre-processing"):
 
202
  with gr.Row():
203
  document = gr.Files(height=100, file_count="multiple", file_types=["pdf"], interactive=True, label="Upload your PDF documents (single or multiple)")
204
  # upload_btn = gr.UploadButton("Loading document...", height=100, file_count="multiple", file_types=["pdf"], scale=1)
205
  with gr.Row():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  db_btn = gr.Radio(["ChromaDB"], label="Vector database type", value = "ChromaDB", type="index", info="Choose your vector database")
207
  with gr.Accordion("Advanced options - Document text splitter", open=False):
208
  with gr.Row():
@@ -244,6 +421,11 @@ def demo():
244
 
245
  # Preprocessing events
246
  #upload_btn.upload(upload_file, inputs=[upload_btn], outputs=[document])
 
 
 
 
 
247
  db_btn.click(initialize_database, \
248
  inputs=[document, slider_chunk_size, slider_chunk_overlap], \
249
  outputs=[vector_db, db_progress])
@@ -267,8 +449,20 @@ def demo():
267
  inputs=None, \
268
  outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page], \
269
  queue=False)
270
- demo.queue().launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
271
 
272
 
273
  if __name__ == "__main__":
 
274
  demo()
 
1
  import gradio as gr
2
  import os
3
+ from dotenv import load_dotenv
4
  from langchain.document_loaders import PyPDFLoader
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain.vectorstores import Chroma
 
16
  import torch
17
  import tqdm
18
  import accelerate
19
+ import requests
20
+ import shutil
21
+ import os
22
+
23
+ import sys
24
+
25
+ import sys
26
+ import subprocess
27
+
28
+
29
 
30
 
31
  default_persist_directory = './chroma_HF/'
32
 
33
+
34
+
35
+
36
+
37
+
38
+ # Use a try-except block to handle potential errors
39
+ try:
40
+ # Delete the directory and its contents
41
+ shutil.rmtree(default_persist_directory)
42
+ print(f"Successfully deleted the directory: {default_persist_directory}")
43
+ except OSError as e:
44
+ # Handle the exception (e.g., directory not found)
45
+ print(f"Error: {e.filename} - {e.strerror}")
46
+
47
+
48
+
49
+
50
  llm_name1 = "mistralai/Mistral-7B-Instruct-v0.2"
51
  llm_name2 = "mistralai/Mistral-7B-Instruct-v0.1"
52
  llm_name3 = "meta-llama/Llama-2-7b-chat-hf"
 
57
  list_llm = [llm_name1, llm_name2, llm_name3, llm_name4, llm_name5, llm_name6, llm_name7]
58
  list_llm_simple = [os.path.basename(llm) for llm in list_llm]
59
 
60
+
61
+
62
+ import os
63
+
64
+
65
+
66
  # Load PDF document and create doc splits
67
  def load_doc(list_file_path, chunk_size, chunk_overlap):
68
  # Processing for one document only
 
80
  return doc_splits
81
 
82
 
83
+
84
+
85
+ def restart_program():
86
+ python = sys.executable
87
+ os.execl(python, python, *sys.argv)
88
+
89
  # Create vector database
90
  def create_db(splits):
91
  embedding = HuggingFaceEmbeddings()
 
225
  return list_file_path
226
 
227
 
228
+
229
+
230
+
231
+ # ... other code ...
232
+
233
+
234
+
235
+
236
+ import random
237
+ import time
238
+
239
+ def authenticate(username, password):
240
+ if username == 'fiver' and password == 'fiver':
241
+ return True
242
+
243
+ else:
244
+ return False
245
+
246
+ def logout(request: gr.Request):
247
+
248
+ print("logged out")
249
+
250
+
251
+
252
+
253
+
254
+ def restart():
255
+ print('Restarting script...')
256
+
257
+
258
+
259
+ # Start the script again
260
+ # try:
261
+ # # Replace 'python script_name.py' with the appropriate command to start your script
262
+ # subprocess.run(['python', 'app2.py'], check=True)
263
+ # except subprocess.CalledProcessError as e:
264
+ # print(f'Error restarting script: {e}')
265
+
266
+
267
+ # Use a try-except block to handle potential errors
268
+ try:
269
+ # Delete the directory and its contents
270
+ shutil.rmtree(default_persist_directory)
271
+ print(f"Successfully deleted the directory: {default_persist_directory}")
272
+ except OSError as e:
273
+ # Handle the exception (e.g., directory not found)
274
+ print(f"Error: {e.filename} - {e.strerror}")
275
+
276
+
277
+
278
+
279
+
280
+ # def restart_and_clear():
281
+ # print('Restarting script and clearing cookies/session...')
282
+
283
+ # # JavaScript code to clear cookies and session
284
+ # js_code = """
285
+ # // Clear cookies
286
+ # document.cookie.split(";").forEach(function(c) {
287
+ # document.cookie = c.replace(/^\\s+/,"").replace(/=.*/, "=;expires=" + new Date().toUTCString() + ";path=/");
288
+ # });
289
+
290
+ # // Clear session storage
291
+ # window.sessionStorage.clear();
292
+
293
+ # // Clear local storage
294
+ # window.localStorage.clear();
295
+ # """
296
+
297
+ # # Display JavaScript code in Gradio interface
298
+ # return gr.Text(js_code, type="code", label="JavaScript Code")
299
+
300
+
301
+
302
+ with gr.Blocks() as demo:
303
+ chatbot = gr.Chatbot()
304
+ msg = gr.Textbox()
305
+ clear = gr.ClearButton([msg, chatbot])
306
+ logout_button = gr.Button(value = "Logout")
307
+ logout_button.click(logout)
308
+ # logout_button = gr.LogoutButton()
309
+ def respond(message, chat_history):
310
+ bot_message = random.choice(["How are you?", "I'm very hungry"])
311
+ chat_history.append((message, bot_message))
312
+ time.sleep(2)
313
+ return "", chat_history
314
+
315
+ msg.submit(respond, [msg, chatbot], [msg, chatbot])
316
+
317
+
318
+
319
+ def download_and_update_list(url): # Function to handle download and list update
320
+ try:
321
+ response = requests.get(url)
322
+ response.raise_for_status()
323
+
324
+ with open("downloaded_pdf.pdf", "wb") as f:
325
+ f.write(response.content)
326
+
327
+ return ["downloaded_pdf.pdf"] # Return the path of the downloaded file
328
+ except requests.exceptions.RequestException as e:
329
+ print("Download error:", e)
330
+ return [] # Return an empty list in case of errors
331
+
332
+
333
+
334
+
335
+ # def download_and_update_list(urls):
336
+ # filenames = []
337
+ # for url in urls:
338
+ # response = requests.get(url) # Download the PDF from the provided URL
339
+ # filename = f"downloaded_{len(filenames)+1}.pdf" # Generate unique filename
340
+ # with open(filename, "wb") as fh: # Save it to a file
341
+ # fh.write(response.content)
342
+ # filenames.append(filename)
343
+ # return filenames # Return a list of filenames
344
+
345
+
346
+
347
+
348
+
349
  def demo():
350
+ load_dotenv()
351
  with gr.Blocks(theme="base") as demo:
352
  vector_db = gr.State()
353
  qa_chain = gr.State()
354
+ logout_btn = gr.Button("RESET MODEL")
355
+
356
+
357
+
358
  gr.Markdown(
359
  """<center><h2>PDF-based chatbot (powered by LangChain and open-source LLMs)</center></h2>
360
  <h3>Ask any questions about your PDF documents, along with follow-ups</h3>
361
+
 
 
362
  """)
363
  with gr.Tab("Step 1 - Document pre-processing"):
364
+ uploaded_documents = []
365
  with gr.Row():
366
  document = gr.Files(height=100, file_count="multiple", file_types=["pdf"], interactive=True, label="Upload your PDF documents (single or multiple)")
367
  # upload_btn = gr.UploadButton("Loading document...", height=100, file_count="multiple", file_types=["pdf"], scale=1)
368
  with gr.Row():
369
+ document_url = gr.Textbox(label="Or enter a PDF document URL:") # Add URL field
370
+
371
+ download_btn = gr.Button("Download PDF") # Add download button
372
+
373
+
374
+ # ... (rest of your code)
375
+ # ... (rest of your code)
376
+
377
+
378
+
379
+ # Error handling
380
+
381
+
382
+ with gr.Row():
383
  db_btn = gr.Radio(["ChromaDB"], label="Vector database type", value = "ChromaDB", type="index", info="Choose your vector database")
384
  with gr.Accordion("Advanced options - Document text splitter", open=False):
385
  with gr.Row():
 
421
 
422
  # Preprocessing events
423
  #upload_btn.upload(upload_file, inputs=[upload_btn], outputs=[document])
424
+
425
+ download_btn.click(download_and_update_list, inputs=[document_url], outputs=[document])
426
+
427
+
428
+ logout_btn.click(restart)
429
  db_btn.click(initialize_database, \
430
  inputs=[document, slider_chunk_size, slider_chunk_overlap], \
431
  outputs=[vector_db, db_progress])
 
449
  inputs=None, \
450
  outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page], \
451
  queue=False)
452
+ demo.queue().launch(auth=authenticate,debug=True)
453
+
454
+
455
+
456
+
457
+
458
+
459
+
460
+
461
+
462
+
463
+
464
 
465
 
466
  if __name__ == "__main__":
467
+
468
  demo()