File size: 3,599 Bytes
3b80dc7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
526fa04
 
0c79d96
 
 
 
 
 
 
 
657987d
dbf2c3c
 
0c79d96
 
 
 
 
 
 
 
dbf2c3c
 
 
0c79d96
526fa04
 
 
 
0c79d96
 
 
 
 
 
 
 
 
 
 
 
 
6f43b67
fc7e487
526fa04
 
8113b90
81b38f5
a36182a
 
526fa04
 
3b80dc7
c7552d0
 
 
8113b90
c7552d0
3765576
c7552d0
 
 
 
 
8113b90
c7552d0
657987d
c7552d0
 
 
 
 
 
 
 
526fa04
9100935
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import gradio as gr

from langchain.document_loaders import OnlinePDFLoader

from langchain.text_splitter import CharacterTextSplitter

from langchain.llms import HuggingFaceHub

from langchain.embeddings import HuggingFaceHubEmbeddings

from langchain.vectorstores import Chroma

from langchain.chains import RetrievalQA



def loading_pdf():
    return "Loading..."

def pdf_changes(pdf_doc, repo_id):
    
    loader = OnlinePDFLoader(pdf_doc.name)
    documents = loader.load()
    text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=0)
    texts = text_splitter.split_documents(documents)
    embeddings = HuggingFaceHubEmbeddings()
    db = Chroma.from_documents(texts, embeddings)
    retriever = db.as_retriever()
    llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature":0.1, "max_new_tokens":250})
    global qa 
    qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
    return "Ready"

def add_text(history, text):
    history = history + [(text, None)]
    return history, ""

def bot(history):
    response = infer(history[-1][0])
    history[-1][1] = response['result']
    return history

def infer(question):
    
    query = question
    result = qa({"query": query})

    return result

# CSS
css = """
#col-container {
  max-width: 700px;
  margin-left: auto;
  margin-right: auto;
}

.title {
  text-align: center;
  max-width: 600px;
  margin-left: auto;
  margin-right: auto;
  color: #000;
}

.pdf-doc {
  margin-bottom: 10px;
}

.chatbot {
  max-height: 350px;
  margin-left: auto;
  margin-right: auto;
  padding: 10px;
  background-color: #fff;
  font-family: sans-serif;
  font-size: 16px;
  line-height: 24px;
}

.chatbot .message {
  color: #000;
}

.chatbot .user-message {
  background-color: #eee;
}

.chatbot .bot-message {
  background-color: #ccc;
}
"""

# HTML
title = """
<div style="text-align: center;max-width: 800px;">
    <h1>Chat with PDF</h1>
    <p style="text-align: center;">Upload a .pdf from local machine, click the "Load PDF🚀" button, <br />
    When ready, you are all set to start asking questions from the pdf</p>
</div>
"""
with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.HTML(title)
        
        with gr.Column(elem_id="col-container"):
            pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="file")
            repo_id = gr.Dropdown(label="LLM", choices=["mistralai/Mixtral-8x7B-v0.1","google/flan-ul2", "OpenAssistant/oasst-sft-1-pythia-12b", "bigscience/bloomz", "meta-llama/Llama-2-7b-chat-hf"], value="google/flan-ul2")
            with gr.Row():
                langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
                load_pdf = gr.Button("Load pdf to langchain")
        
        chatbot = gr.Chatbot([], elem_id="chatbot").style(height=350)
        question = gr.Textbox(label="Question", placeholder="Type your Question and hit Enter ",elem_id="chatbot .user-message")
        submit_btn = gr.Button("Send message")
    #load_pdf.click(loading_pdf, None, langchain_status, queue=False)    
    repo_id.change(pdf_changes, inputs=[pdf_doc, repo_id], outputs=[langchain_status], queue=False)
    load_pdf.click(pdf_changes, inputs=[pdf_doc, repo_id], outputs=[langchain_status], queue=False)
    question.submit(add_text, [chatbot, question], [chatbot, question]).then(
        bot, chatbot, chatbot
    )
    submit_btn.click(add_text, [chatbot, question], [chatbot, question]).then(
        bot, chatbot, chatbot
    )

demo.launch()