Spaces:
Sleeping
Sleeping
File size: 3,599 Bytes
3b80dc7 526fa04 0c79d96 657987d dbf2c3c 0c79d96 dbf2c3c 0c79d96 526fa04 0c79d96 6f43b67 fc7e487 526fa04 8113b90 81b38f5 a36182a 526fa04 3b80dc7 c7552d0 8113b90 c7552d0 3765576 c7552d0 8113b90 c7552d0 657987d c7552d0 526fa04 9100935 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
import gradio as gr
from langchain.document_loaders import OnlinePDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import HuggingFaceHub
from langchain.embeddings import HuggingFaceHubEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
def loading_pdf():
return "Loading..."
def pdf_changes(pdf_doc, repo_id):
loader = OnlinePDFLoader(pdf_doc.name)
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
embeddings = HuggingFaceHubEmbeddings()
db = Chroma.from_documents(texts, embeddings)
retriever = db.as_retriever()
llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature":0.1, "max_new_tokens":250})
global qa
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
return "Ready"
def add_text(history, text):
history = history + [(text, None)]
return history, ""
def bot(history):
response = infer(history[-1][0])
history[-1][1] = response['result']
return history
def infer(question):
query = question
result = qa({"query": query})
return result
# CSS
css = """
#col-container {
max-width: 700px;
margin-left: auto;
margin-right: auto;
}
.title {
text-align: center;
max-width: 600px;
margin-left: auto;
margin-right: auto;
color: #000;
}
.pdf-doc {
margin-bottom: 10px;
}
.chatbot {
max-height: 350px;
margin-left: auto;
margin-right: auto;
padding: 10px;
background-color: #fff;
font-family: sans-serif;
font-size: 16px;
line-height: 24px;
}
.chatbot .message {
color: #000;
}
.chatbot .user-message {
background-color: #eee;
}
.chatbot .bot-message {
background-color: #ccc;
}
"""
# HTML
title = """
<div style="text-align: center;max-width: 800px;">
<h1>Chat with PDF</h1>
<p style="text-align: center;">Upload a .pdf from local machine, click the "Load PDF🚀" button, <br />
When ready, you are all set to start asking questions from the pdf</p>
</div>
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.HTML(title)
with gr.Column(elem_id="col-container"):
pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="file")
repo_id = gr.Dropdown(label="LLM", choices=["mistralai/Mixtral-8x7B-v0.1","google/flan-ul2", "OpenAssistant/oasst-sft-1-pythia-12b", "bigscience/bloomz", "meta-llama/Llama-2-7b-chat-hf"], value="google/flan-ul2")
with gr.Row():
langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
load_pdf = gr.Button("Load pdf to langchain")
chatbot = gr.Chatbot([], elem_id="chatbot").style(height=350)
question = gr.Textbox(label="Question", placeholder="Type your Question and hit Enter ",elem_id="chatbot .user-message")
submit_btn = gr.Button("Send message")
#load_pdf.click(loading_pdf, None, langchain_status, queue=False)
repo_id.change(pdf_changes, inputs=[pdf_doc, repo_id], outputs=[langchain_status], queue=False)
load_pdf.click(pdf_changes, inputs=[pdf_doc, repo_id], outputs=[langchain_status], queue=False)
question.submit(add_text, [chatbot, question], [chatbot, question]).then(
bot, chatbot, chatbot
)
submit_btn.click(add_text, [chatbot, question], [chatbot, question]).then(
bot, chatbot, chatbot
)
demo.launch() |