pdf-chatbot-opensource-llm

Running

App Files Files Community

Ubai commited on Feb 26, 2024

Commit

339ce69

verified ·

1 Parent(s): 6b0097a

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -82

app.py CHANGED Viewed

@@ -1,95 +1,82 @@
 import gradio as gr
 import os
-from langchain_community.document_loaders import PyPDFLoader  # Corrected import
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.vectorstores import Chroma  # Corrected import
-from langchain.chains import ConversationalRetrievalChain  # Note: Not from "langchain_community"
-from langchain_community.embeddings import HuggingFaceEmbeddings  # Corrected import
-from langchain_community.llms import HuggingFacePipeline, HuggingFaceHub  # Corrected import
-from langchain.chains import ConversationChain  # Note: Not from "langchain_community"
 from langchain.memory import ConversationBufferMemory
 from pathlib import Path
 import chromadb
 from transformers import AutoTokenizer
 import transformers
 import torch
-import tqdm
 import accelerate
-# LLM model and parameters (adjusted for clarity)
-chosen_llm_model = "mistralai/Mistral-7B-Instruct-v0.2"
-llm_temperature = 0.7
-max_tokens = 1024
-top_k = 3
-# Chunk size and overlap (adjusted for clarity)
-chunk_size = 600
-chunk_overlap = 40
-# Initialize vector database in background
-accelerate(initialize_database)()  # Function definition moved here
-def initialize_database():
-    """
-    This function initializes the vector database (assumed to be ChromaDB).
-    Modify this function based on your specific database needs.
-    """
-    # Replace with your ChromaDB connection and schema creation logic
-    # ...
-    pass
-def demo():
-    with gr.Blocks(theme="base") as demo:
-        qa_chain = gr.State()  # Store the initialized QA chain
-        collection_name = gr.State()
-        gr.Markdown(
-            """
-            <center><h2>PDF-based chatbot (powered by LangChain and open-source LLMs)</center></h2>
-            <h3>Ask any questions about your PDF documents, along with follow-ups</h3>
-            <b>Note:</b> This AI assistant performs retrieval-augmented generation from your PDF documents. \
-            When generating answers, it takes past questions into account (via conversational memory), and includes document references for clarity purposes.</i>
-            <br><b>Warning:</b> This space uses the free CPU Basic hardware from Hugging Face. Some steps and LLM models used below (free inference endpoints) can take some time to generate an output.<br>
-            """
         )
-        with gr.Row():
-            document = gr.Files(
-                height=100,
-                file_count="multiple",
-                file_types=["pdf"],
-                interactive=True,
-                label="Upload your PDF documents (single or multiple)",
-            )
-        with gr.Row():
-            chatbot = gr.Chatbot(height=300)
-        with gr.Accordion("Advanced - Document references", open=False):
-            with gr.Row():
-                doc_source1 = gr.Textbox(label="Reference 1", lines=2, container=True, scale=20)
-                source1_page = gr.Number(label="Page", scale=1)
-            with gr.Row():
-                doc_source2 = gr.Textbox(label="Reference 2", lines=2, container=True, scale=20)
-                source2_page = gr.Number(label="Page", scale=1)
-            with gr.Row():
-                doc_source3 = gr.Textbox(label="Reference 3", lines=2, container=True, scale=20)
-                source3_page = gr.Number(label="Page", scale=1)
-        with gr.Row():
-            msg = gr.Textbox(placeholder="Type message", container=True)
-        with gr.Row():
-            submit_btn = gr.Button("Submit")
-            clear_btn = gr.ClearButton([msg, chatbot])
-        # Initialize default QA chain when documents are uploaded
-        document.uploaded(initialize_LLM, inputs=[chosen_llm_model])
-        # Chatbot events
-        msg.submit(conversation, inputs=[qa_chain, msg, chatbot])
-        submit_btn.click(conversation, inputs=[qa_chain, msg, chatbot])
-        clear_btn.click(lambda: [None, "", 0, "", 0, "", 0], inputs=None, outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page])

 import gradio as gr
 import os
+from langchain.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.vectorstores import Chroma
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.llms import HuggingFaceHub
+from langchain.chains import ConversationalRetrievalChain
 from langchain.memory import ConversationBufferMemory
 from pathlib import Path
 import chromadb
 from transformers import AutoTokenizer
 import transformers
 import torch
+import tqdm
 import accelerate
+# Default LLM model
+llm_model = "mistralai/Mistral-7B-Instruct-v0.2"
+# Other settings
+default_persist_directory = './chroma_HF/'
+list_llm = ["mistralai/Mistral-7B-Instruct-v0.2", "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.1", \
+    "google/gemma-7b-it","google/gemma-2b-it", \
+    "HuggingFaceH4/zephyr-7b-beta", "meta-llama/Llama-2-7b-chat-hf", "microsoft/phi-2", \
+    "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "mosaicml/mpt-7b-instruct", "tiiuae/falcon-7b-instruct", \
+    "google/flan-t5-xxl"
+]
+list_llm_simple = [os.path.basename(llm) for llm in list_llm]
+# Load vector database
+def load_db():
+    embedding = HuggingFaceEmbeddings()
+    vectordb = Chroma(
+        persist_directory=default_persist_directory,
+        embedding_function=embedding)
+    return vectordb
+# Initialize langchain LLM chain
+def initialize_llmchain(vector_db, progress=gr.Progress()):
+    progress(0.5, desc="Initializing HF Hub...")
+    # Use of trust_remote_code as model_kwargs
+    # Warning: langchain issue
+    # URL: https://github.com/langchain-ai/langchain/issues/6080
+    if llm_model == "mistralai/Mixtral-8x7B-Instruct-v0.1":
+        llm = HuggingFaceHub(
+            repo_id=llm_model,
+            model_kwargs={"temperature": 0.7, "max_new_tokens": 1024, "top_k": 3, "load_in_8bit": True}
         )
+    # ... (other model configurations for different model options)
+    else:
+        llm = HuggingFaceHub(
+            repo_id=llm_model,
+            model_kwargs={"temperature": 0.7, "max_new_tokens": 1024, "top_k": 3}
+        )
+    progress(0.75, desc="Defining buffer memory...")
+    memory = ConversationBufferMemory(
+        memory_key="chat_history",
+        output_key='answer',
+        return_messages=True
+    )
+    retriever=vector_db.as_retriever()
+    progress(0.8, desc="Defining retrieval chain...")
+    qa_chain = ConversationalRetrievalChain.from_llm(
+        llm,
+        retriever=retriever,
+        chain_type="stuff",
+        memory=memory,
+        return_source_documents=True,
+        verbose=False,
+    )
+    progress(0.9, desc="Done!")
+    return qa_chain
+# ... (other functions remain the same)