File size: 4,187 Bytes
7db9219
 
 
6a23cd5
7db9219
 
 
6a23cd5
7db9219
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a23cd5
7db9219
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a23cd5
 
 
7db9219
 
 
 
 
 
 
 
 
 
 
 
6a23cd5
7db9219
 
6a23cd5
 
7db9219
 
6a23cd5
7db9219
 
6a23cd5
 
7db9219
 
 
 
 
 
 
 
 
 
 
6a23cd5
 
7db9219
 
 
6a23cd5
7db9219
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a23cd5
 
7db9219
 
 
 
 
 
 
 
 
 
 
 
 
6a23cd5
7db9219
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#!/usr/bin/env python
# coding: utf-8

# !pip install langchain openai chromadb tiktoken pypdf panel
# In[ ]:


import os
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyPDFLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
import panel as pn
import tempfile


# In[ ]:


pn.extension('texteditor', template="bootstrap", sizing_mode='stretch_width')
pn.state.template.param.update(
    main_max_width="690px",
    header_background="#F08080",
)


# In[3]:


file_input = pn.widgets.FileInput(width=300)

openaikey = pn.widgets.PasswordInput(
    value="", placeholder="Enter your OpenAI API Key here...", width=300
)
prompt = pn.widgets.TextEditor(
    value="", placeholder="Enter your questions here...", height=160, toolbar=False
)
run_button = pn.widgets.Button(name="Run!")

select_k = pn.widgets.IntSlider(
    name="Number of relevant chunks", start=1, end=5, step=1, value=2
)
select_chain_type = pn.widgets.RadioButtonGroup(
    name='Chain type',
    options=['stuff', 'map_reduce', "refine", "map_rerank"]
)

widgets = pn.Row(
    pn.Column(prompt, run_button, margin=5),
    pn.Card(
        "Chain type:",
        pn.Column(select_chain_type, select_k),
        title="Advanced settings", margin=10
    ), width=600
)


# In[4]:


def qa(file, query, chain_type, k):
    # load document
    loader = PyPDFLoader(file)
    documents = loader.load()
    # split the documents into chunks
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    texts = text_splitter.split_documents(documents)
    # select which embeddings we want to use
    embeddings = OpenAIEmbeddings()
    # create the vectorestore to use as the index
    db = Chroma.from_documents(texts, embeddings)
    # expose this index in a retriever interface
    retriever = db.as_retriever(
        search_type="similarity", search_kwargs={"k": k})
    # create a chain to answer questions
    qa = RetrievalQA.from_chain_type(
        llm=OpenAI(), chain_type=chain_type, retriever=retriever, return_source_documents=True)
    result = qa({"query": query})
    print(result['result'])
    return result


# In[6]:


convos = []  # store all panel objects in a list


def qa_result(_):
    os.environ["OPENAI_API_KEY"] = openaikey.value

    # save pdf file to a temp file
    if file_input.value is not None:
        file_input.save("/.cache/temp.pdf")

        prompt_text = prompt.value
        if prompt_text:
            result = qa(file="/.cache/temp.pdf", query=prompt_text,
                        chain_type=select_chain_type.value, k=select_k.value)
            convos.extend([
                pn.Row(
                    pn.panel("\U0001F60A", width=10),
                    prompt_text,
                    width=600
                ),
                pn.Row(
                    pn.panel("\U0001F916", width=10),
                    pn.Column(
                        result["result"],
                        "Relevant source text:",
                        pn.pane.Markdown('\n--------------------------------------------------------------------\n'.join(
                            doc.page_content for doc in result["source_documents"]))
                    )
                )
            ])
            # return convos
    return pn.Column(*convos, margin=15, width=575, min_height=400)


# In[7]:


qa_interactive = pn.panel(
    pn.bind(qa_result, run_button),
    loading_indicator=True,
)


# In[8]:


output = pn.WidgetBox('*Output will show up here:*',
                      qa_interactive, width=630, scroll=True)


# In[9]:


# layout
pn.Column(
    pn.pane.Markdown("""
    ## \U0001F60A! Question Answering with your PDF file
    
    1) Upload a PDF. 2) Enter OpenAI API key. This costs $. Set up billing at [OpenAI](https://platform.openai.com/account). 3) Type a question and click "Run".
    
    """),
    pn.Row(file_input, openaikey),
    output,
    widgets

).servable()