pdf-chatbotfiv

Running

App Files Files Community

ROHAN181 commited on Jan 26

Commit

23cb72c

•

1 Parent(s): d265cec

bbb

Browse files

Files changed (1) hide show

app.py +200 -6

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
 import os
 from langchain.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.vectorstores import Chroma
@@ -16,10 +16,37 @@ import transformers
 import torch
 import tqdm
 import accelerate
 default_persist_directory = './chroma_HF/'
 llm_name1 = "mistralai/Mistral-7B-Instruct-v0.2"
 llm_name2 = "mistralai/Mistral-7B-Instruct-v0.1"
 llm_name3 = "meta-llama/Llama-2-7b-chat-hf"
@@ -30,6 +57,12 @@ llm_name7 = "google/flan-t5-xxl"
 list_llm = [llm_name1, llm_name2, llm_name3, llm_name4, llm_name5, llm_name6, llm_name7]
 list_llm_simple = [os.path.basename(llm) for llm in list_llm]
 # Load PDF document and create doc splits
 def load_doc(list_file_path, chunk_size, chunk_overlap):
     # Processing for one document only
@@ -47,6 +80,12 @@ def load_doc(list_file_path, chunk_size, chunk_overlap):
     return doc_splits
 # Create vector database
 def create_db(splits):
     embedding = HuggingFaceEmbeddings()
@@ -186,23 +225,161 @@ def upload_file(file_obj):
     return list_file_path
 def demo():
     with gr.Blocks(theme="base") as demo:
         vector_db = gr.State()
         qa_chain = gr.State()
         gr.Markdown(
         """<center><h2>PDF-based chatbot (powered by LangChain and open-source LLMs)</center></h2>
         <h3>Ask any questions about your PDF documents, along with follow-ups</h3>
-        <b>Note:</b> This AI assistant performs retrieval-augmented generation from your PDF documents. \
-        When generating answers, it takes past questions into account (via conversational memory), and includes document references for clarity purposes.</i>
-        <br><b>Warning:</b> This space uses the free CPU Basic hardware from Hugging Face. Some steps and LLM models used below (free inference endpoints) can take some time to generate an output.<br>
         """)
         with gr.Tab("Step 1 - Document pre-processing"):
             with gr.Row():
                 document = gr.Files(height=100, file_count="multiple", file_types=["pdf"], interactive=True, label="Upload your PDF documents (single or multiple)")
                 # upload_btn = gr.UploadButton("Loading document...", height=100, file_count="multiple", file_types=["pdf"], scale=1)
             with gr.Row():
                 db_btn = gr.Radio(["ChromaDB"], label="Vector database type", value = "ChromaDB", type="index", info="Choose your vector database")
             with gr.Accordion("Advanced options - Document text splitter", open=False):
                 with gr.Row():
@@ -244,6 +421,11 @@ def demo():
         # Preprocessing events
         #upload_btn.upload(upload_file, inputs=[upload_btn], outputs=[document])
         db_btn.click(initialize_database, \
             inputs=[document, slider_chunk_size, slider_chunk_overlap], \
             outputs=[vector_db, db_progress])
@@ -267,8 +449,20 @@ def demo():
             inputs=None, \
             outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page], \
             queue=False)
-    demo.queue().launch(debug=True)
 if __name__ == "__main__":
     demo()

 import gradio as gr
 import os
+from dotenv import load_dotenv
 from langchain.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.vectorstores import Chroma
 import torch
 import tqdm
 import accelerate
+import requests
+import shutil
+import os
+import sys
+import sys
+import subprocess
 default_persist_directory = './chroma_HF/'
+# Use a try-except block to handle potential errors
+try:
+    # Delete the directory and its contents
+    shutil.rmtree(default_persist_directory)
+    print(f"Successfully deleted the directory: {default_persist_directory}")
+except OSError as e:
+    # Handle the exception (e.g., directory not found)
+    print(f"Error: {e.filename} - {e.strerror}")
 llm_name1 = "mistralai/Mistral-7B-Instruct-v0.2"
 llm_name2 = "mistralai/Mistral-7B-Instruct-v0.1"
 llm_name3 = "meta-llama/Llama-2-7b-chat-hf"
 list_llm = [llm_name1, llm_name2, llm_name3, llm_name4, llm_name5, llm_name6, llm_name7]
 list_llm_simple = [os.path.basename(llm) for llm in list_llm]
+import os
 # Load PDF document and create doc splits
 def load_doc(list_file_path, chunk_size, chunk_overlap):
     # Processing for one document only
     return doc_splits
+def restart_program():
+    python = sys.executable
+    os.execl(python, python, *sys.argv)
 # Create vector database
 def create_db(splits):
     embedding = HuggingFaceEmbeddings()
     return list_file_path
+# ... other code ...
+import random
+import time
+def authenticate(username, password):
+    if username == 'fiver' and password == 'fiver':
+        return True
+    else:
+        return False
+def logout(request: gr.Request):
+    print("logged out")
+def restart():
+    print('Restarting script...')
+    # Start the script again
+    # try:
+    #     # Replace 'python script_name.py' with the appropriate command to start your script
+    #     subprocess.run(['python', 'app2.py'], check=True)
+    # except subprocess.CalledProcessError as e:
+    #     print(f'Error restarting script: {e}')
+    # Use a try-except block to handle potential errors
+    try:
+        # Delete the directory and its contents
+        shutil.rmtree(default_persist_directory)
+        print(f"Successfully deleted the directory: {default_persist_directory}")
+    except OSError as e:
+        # Handle the exception (e.g., directory not found)
+        print(f"Error: {e.filename} - {e.strerror}")
+# def restart_and_clear():
+#     print('Restarting script and clearing cookies/session...')
+#     # JavaScript code to clear cookies and session
+#     js_code = """
+#     // Clear cookies
+#     document.cookie.split(";").forEach(function(c) {
+#         document.cookie = c.replace(/^\\s+/,"").replace(/=.*/, "=;expires=" + new Date().toUTCString() + ";path=/");
+#     });
+#     // Clear session storage
+#     window.sessionStorage.clear();
+#     // Clear local storage
+#     window.localStorage.clear();
+#     """
+#     # Display JavaScript code in Gradio interface
+#     return gr.Text(js_code, type="code", label="JavaScript Code")
+with gr.Blocks() as demo:
+    chatbot = gr.Chatbot()
+    msg = gr.Textbox()
+    clear = gr.ClearButton([msg, chatbot])
+    logout_button = gr.Button(value = "Logout")
+    logout_button.click(logout)
+    # logout_button = gr.LogoutButton()
+    def respond(message, chat_history):
+        bot_message = random.choice(["How are you?", "I'm very hungry"])
+        chat_history.append((message, bot_message))
+        time.sleep(2)
+        return "", chat_history
+    msg.submit(respond, [msg, chatbot], [msg, chatbot])
+def download_and_update_list(url):  # Function to handle download and list update
+    try:
+        response = requests.get(url)
+        response.raise_for_status()
+        with open("downloaded_pdf.pdf", "wb") as f:
+            f.write(response.content)
+        return ["downloaded_pdf.pdf"]  # Return the path of the downloaded file
+    except requests.exceptions.RequestException as e:
+        print("Download error:", e)
+        return []  # Return an empty list in case of errors
+# def download_and_update_list(urls):
+#     filenames = []
+#     for url in urls:
+#         response = requests.get(url)  # Download the PDF from the provided URL
+#         filename = f"downloaded_{len(filenames)+1}.pdf"  # Generate unique filename
+#         with open(filename, "wb") as fh:  # Save it to a file
+#             fh.write(response.content)
+#         filenames.append(filename)
+#     return filenames  # Return a list of filenames
 def demo():
+    load_dotenv()
     with gr.Blocks(theme="base") as demo:
         vector_db = gr.State()
         qa_chain = gr.State()
+        logout_btn = gr.Button("RESET MODEL")
         gr.Markdown(
         """<center><h2>PDF-based chatbot (powered by LangChain and open-source LLMs)</center></h2>
         <h3>Ask any questions about your PDF documents, along with follow-ups</h3>
         """)
         with gr.Tab("Step 1 - Document pre-processing"):
+            uploaded_documents = []
             with gr.Row():
                 document = gr.Files(height=100, file_count="multiple", file_types=["pdf"], interactive=True, label="Upload your PDF documents (single or multiple)")
                 # upload_btn = gr.UploadButton("Loading document...", height=100, file_count="multiple", file_types=["pdf"], scale=1)
             with gr.Row():
+              document_url = gr.Textbox(label="Or enter a PDF document URL:")  # Add URL field
+              download_btn = gr.Button("Download PDF")  # Add download button
+            # ... (rest of your code)
+            # ... (rest of your code)
+    # Error handling
+            with gr.Row():
                 db_btn = gr.Radio(["ChromaDB"], label="Vector database type", value = "ChromaDB", type="index", info="Choose your vector database")
             with gr.Accordion("Advanced options - Document text splitter", open=False):
                 with gr.Row():
         # Preprocessing events
         #upload_btn.upload(upload_file, inputs=[upload_btn], outputs=[document])
+        download_btn.click(download_and_update_list, inputs=[document_url], outputs=[document])
+        logout_btn.click(restart)
         db_btn.click(initialize_database, \
             inputs=[document, slider_chunk_size, slider_chunk_overlap], \
             outputs=[vector_db, db_progress])
             inputs=None, \
             outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page], \
             queue=False)
+    demo.queue().launch(auth=authenticate,debug=True)
 if __name__ == "__main__":
     demo()