File size: 4,570 Bytes
a49e567
ffb89fc
 
a49e567
 
 
 
 
ad7af6e
 
a49e567
 
 
 
ad7af6e
 
a49e567
 
 
 
ad7af6e
a49e567
ad7af6e
a49e567
ad7af6e
a49e567
 
771025f
a49e567
 
ad7af6e
 
 
 
 
 
 
 
 
 
33a3ffa
 
a49e567
771025f
a49e567
 
 
771025f
a49e567
771025f
 
 
eb4c6de
771025f
a49e567
 
 
ffb89fc
 
a49e567
03af56f
a49e567
 
 
 
 
 
 
 
 
bc305d6
a49e567
 
 
 
 
ad7af6e
a49e567
 
 
 
 
 
 
ad7af6e
a49e567
 
 
 
 
 
 
 
 
dd49bb0
 
a49e567
 
 
 
03af56f
 
a49e567
771025f
 
 
 
 
a49e567
771025f
a49e567
ad7af6e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import gradio as gr
import paperqa


docs = None


def request_pathname(files):
    if files is None:
        return [[]]
    return [[file.name, file.name.split('/')[-1]] for file in files]


def validate_dataset(dataset, openapi):
    global docs
    docs = None  # clear it out if dataset is modified
    docs_ready = dataset.iloc[-1, 0] != ""
    if docs_ready and type(openapi) is str and len(openapi) > 0:
        return "✨Ready✨"
    elif docs_ready:
        return "⚠️Waiting for key..."
    elif type(openapi) is str and len(openapi) > 0:
        return "⚠️Waiting for documents..."
    else:
        return "⚠️Waiting for documents and key..."


def do_ask(question, button, openapi, dataset, slider, progress=gr.Progress()):
    global docs
    docs_ready = dataset.iloc[-1, 0] != ""
    if button == "✨Ready✨" and type(openapi) is str and len(openapi) > 0 and docs_ready:
        if docs is None:  # don't want to rebuild index if it's already built
            import os
            os.environ['OPENAI_API_KEY'] = openapi.strip()
            docs = paperqa.Docs()
            # dataset is pandas dataframe
            for _, row in dataset.iterrows():
                key = None
                if ',' not in row['citation string']:
                    key = row['citation string']
                docs.add(row['filepath'], row['citation string'],
                         key=key, disable_check=True)
    else:
        return "", "", ""
    progress(0, "Building Index...")
    docs._build_faiss_index()
    progress(0.25, "Querying...")
    result = docs.query(question, length_prompt=f'use {slider:d} words')
    progress(1.0, "Done!")
    # format the passages
    passages = ""
    for i, (key, passage) in enumerate(result.passages.items()):
        passages += f'{i+1}. {key}\n\n >{passage} \n\n'
    return result.formatted_answer, result.context, passages


with gr.Blocks() as demo:
    gr.Markdown(f"""
    # Document Question and Answer (v{paperqa.__version__})

    This tool will enable asking questions of your uploaded text or PDF documents.
    It uses OpenAI's GPT models and thus you must enter your API key below. This
    tool is under active development and currently uses many tokens - up to 10,000
    for a single query. That is $0.10-0.20 per query, so please be careful!

    * [PaperQA](https://github.com/whitead/paper-qa) is the code used to build this tool.
    * [langchain](https://github.com/hwchase17/langchain) is the main library this tool utilizes.

    ## Instructions

    1. Enter API Key ([What is that?](https://platform.openai.com/account/api-keys))
    2. Upload your documents and modify citation strings if you want (to look prettier)
    """)
    openai_api_key = gr.Textbox(
        label="OpenAI API Key", placeholder="sk-...", type="password")
    uploaded_files = gr.File(
        label="Your Documents Upload (PDF or txt)", file_count="multiple", )
    dataset = gr.Dataframe(
        headers=["filepath", "citation string"],
        datatype=["str", "str"],
        col_count=(2, "fixed"),
        interactive=True,
        label="Documents and Citations"
    )
    buildb = gr.Textbox("⚠️Waiting for documents and key...",
                        label="Status", interactive=False, show_label=True)
    openai_api_key.change(validate_dataset, inputs=[
                          dataset, openai_api_key], outputs=[buildb])
    dataset.change(validate_dataset, inputs=[
                   dataset, openai_api_key], outputs=[buildb])
    uploaded_files.change(request_pathname, inputs=[
                          uploaded_files], outputs=[dataset])
    query = gr.Textbox(
        placeholder="Enter your question here...", label="Question")
    slider = gr.Slider(25, 200, value=100, step=5,
                       label='Suggested # of words in answer')
    ask = gr.Button("Ask Question")
    gr.Markdown("## Answer")
    answer = gr.Markdown(label="Answer")
    with gr.Accordion("Context", open=False):
        gr.Markdown(
            "### Context\n\nThe following context was used to generate the answer:")
        context = gr.Markdown(label="Context")

    with gr.Accordion("Raw Text", open=False):
        gr.Markdown(
            "### Raw Text\n\nThe following raw text was used to generate the answer:")
        passages = gr.Markdown(label="Passages")
    ask.click(fn=do_ask, inputs=[query, buildb,
                                 openai_api_key, dataset, slider], outputs=[answer, context, passages])

demo.queue(concurrency_count=20)
demo.launch(show_error=True)