djangomango commited on
Commit
abb24bf
1 Parent(s): 8c6eb1a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -0
app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import necessary libraries
2
+ import gradio as gr
3
+ from langchain.document_loaders import OnlinePDFLoader
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from langchain.llms import HuggingFaceHub
6
+ from langchain.embeddings import HuggingFaceHubEmbeddings
7
+ from langchain.vectorstores import Chroma
8
+ from langchain.chains import RetrievalQA
9
+
10
+ # Define a function to display "Loading..." when loading a PDF
11
+ def loading_pdf():
12
+ return "Loading..."
13
+
14
+ # Define a function to process PDF changes
15
+ def pdf_changes(pdf_doc, repo_id):
16
+ # Initialize the OnlinePDFLoader to load the PDF document
17
+ loader = OnlinePDFLoader(pdf_doc.name)
18
+ documents = loader.load()
19
+
20
+ # Split the loaded documents into chunks using CharacterTextSplitter
21
+ text_splitter = CharacterTextSplitter(chunk_size=400, chunk_overlap=50)
22
+ texts = text_splitter.split_documents(documents)
23
+
24
+ # Initialize HuggingFaceHubEmbeddings for embeddings
25
+ embeddings = HuggingFaceHubEmbeddings()
26
+
27
+ # Create a Chroma vector store from the text chunks and embeddings
28
+ db = Chroma.from_documents(texts, embeddings)
29
+
30
+ # Convert the vector store to a retriever
31
+ retriever = db.as_retriever()
32
+
33
+ # Initialize an HuggingFaceHub language model (LLM)
34
+ llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature": 0.25, "max_new_tokens": 1000})
35
+
36
+ # Create a RetrievalQA chain with the LLM, retriever, and return_source_documents option
37
+ global qa
38
+ qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
39
+
40
+ return "Ready"
41
+
42
+ # Define a function to add text to a history
43
+ def add_text(history, text):
44
+ history = history + [(text, None)]
45
+ return history, ""
46
+
47
+ # Define a bot function to generate responses
48
+ def bot(history):
49
+ response = infer(history[-1][0])
50
+ history[-1][1] = response['result']
51
+ return history
52
+
53
+ # Define an inference function to query the LLM
54
+ def infer(query):
55
+ result = qa({"query": query})
56
+ return result
57
+
58
+ # Define custom CSS styles
59
+ css = """
60
+ #col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
61
+ """
62
+
63
+ # Define a title HTML for the interface
64
+ title = """
65
+ <div style="text-align: center;max-width: 700px;">
66
+ <h1>Chat with PDF</h1>
67
+ <p style="text-align: center;">Upload a .PDF from your computer, click the "Load PDF to LangChain" button, <br />
68
+ when everything is ready, you can start asking questions about the PDF ;)</p>
69
+ """
70
+
71
+ # Create the Gradio interface
72
+ with gr.Blocks(css=css) as demo:
73
+ with gr.Column(elem_id="col-container"):
74
+ gr.HTML(title)
75
+
76
+ with gr.Column():
77
+ # Create a file input for loading PDF
78
+ pdf_doc = gr.File(label="Load a PDF", file_types=['.pdf'], type="file", value="AhmedS_Resume.pdf")
79
+
80
+ # Create a dropdown for selecting the LLM
81
+ repo_id = gr.Dropdown(label="LLM", choices=["HuggingFaceH4/zephyr-7b-alpha", "CausalLM/14B", "meta-llama/Llama-2-7b-chat-hf"], value="HuggingFaceH4/zephyr-7b-alpha")
82
+
83
+ with gr.Row():
84
+ langchain_status = gr.Textbox(label="Status", placeholder="Waiting...", interactive=False)
85
+ load_pdf = gr.Button("Load PDF to LangChain")
86
+
87
+ chatbot = gr.Chatbot([], elem_id="chatbot").style(height=350)
88
+ query = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")
89
+ submit_btn = gr.Button("Send message")
90
+
91
+ # Set up actions for UI elements
92
+ repo_id.change(pdf_changes, inputs=[pdf_doc, repo_id], outputs=[langchain_status], queue=False)
93
+ load_pdf.click(pdf_changes, inputs=[pdf_doc, repo_id], outputs=[langchain_status], queue=False)
94
+ question.submit(add_text, [chatbot, question], [chatbot, question]).then(bot, chatbot, chatbot)
95
+ submit_btn.click(add_text, [chatbot, question], [chatbot, question]).then(bot, chatbot, chatbot)
96
+
97
+ # Launch the Gradio interface
98
+ demo.launch()