whitead commited on
Commit
a49e567
1 Parent(s): 1168b87

Upload space.py

Browse files
Files changed (1) hide show
  1. space.py +96 -0
space.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ docs = None
4
+
5
+
6
+ def request_pathname(files):
7
+ return [[file.name, file.name.split('/')[-1]] for file in files]
8
+
9
+
10
+ def validate_dataset(dataset, openapi):
11
+ docs_ready = dataset.iloc[-1, 0] != ""
12
+ if docs_ready and type(openapi) is str and len(openapi) > 0:
13
+ return "✨Ready✨"
14
+ elif docs_ready:
15
+ return "Waiting for key..."
16
+ elif type(openapi) is str and len(openapi) > 0:
17
+ return "Waiting for documents..."
18
+ else:
19
+ return "Waiting for documents and key..."
20
+
21
+
22
+ def do_ask(question, button, openapi, dataset, progress=gr.Progress()):
23
+ global docs
24
+ docs_ready = dataset.iloc[-1, 0] != ""
25
+ if button == "✨Ready✨" and openapi != "" and docs_ready:
26
+ import os
27
+ os.environ['OPENAI_API_KEY'] = openapi.strip()
28
+ import paperqa
29
+ docs = paperqa.Docs()
30
+ # dataset is pandas dataframe
31
+ for _, row in dataset.iterrows():
32
+ key = None
33
+ if ',' not in row['citation string']:
34
+ key = row['citation string']
35
+ docs.add(row['filepath'], row['citation string'], key=key)
36
+ else:
37
+ return ""
38
+ if docs is None:
39
+ return """**Error**: You must build the index first!"""
40
+ progress(0, "Building Index...")
41
+ docs._build_faiss_index()
42
+ progress(0.25, "Querying...")
43
+ result = docs.query(question)
44
+ progress(1.0, "Done!")
45
+ return result.formatted_answer, result.context
46
+
47
+
48
+ with gr.Blocks() as demo:
49
+ gr.Markdown("""
50
+ # Document Question and Answer
51
+
52
+ This tool will enable question and answers with your uploaded text or PDF documents.
53
+ It uses OpenAI's GPT models and thus you must enter your API key below. This
54
+ tool is under active development and currently uses many tokens - up to 10,000
55
+ for a single query. That is $0.10-0.20 per query, so please be careful!
56
+
57
+ * [PaperQA](https://github.com/whitead/paper-qa) is the code used to build this tool.
58
+ * [langchain](https://github.com/hwchase17/langchain) is the main library this tool utilizes.
59
+
60
+ ## Instructions
61
+
62
+ 1. Enter API Key
63
+ 2. Upload your documents and modify citation strings if you want (to look prettier)
64
+ """)
65
+ openai_api_key = gr.Textbox(
66
+ label="OpenAI API Key", placeholder="sk-...", type="password")
67
+ uploaded_files = gr.File(
68
+ label="Your Documents Upload (PDF or txt)", file_count="multiple")
69
+ dataset = gr.Dataframe(
70
+ headers=["filepath", "citation string"],
71
+ datatype=["str", "str"],
72
+ col_count=(2, "fixed"),
73
+ interactive=True,
74
+ label="Documents and Citations"
75
+ )
76
+ buildb = gr.Textbox("Waiting for documents and key...",
77
+ label="Status", interactive=False, show_label=True)
78
+ openai_api_key.change(validate_dataset, inputs=[
79
+ dataset, openai_api_key], outputs=[buildb])
80
+ dataset.change(validate_dataset, inputs=[
81
+ dataset, openai_api_key], outputs=[buildb])
82
+ uploaded_files.change(request_pathname, inputs=[
83
+ uploaded_files], outputs=[dataset])
84
+ query = gr.Textbox(
85
+ placeholder="Enter your question here...", label="Question")
86
+ ask = gr.Button("Ask Question")
87
+ gr.Markdown("## Answer")
88
+ answer = gr.Markdown(label="Answer")
89
+ with gr.Accordion("Context", open=False):
90
+ gr.Markdown("### Context\n\nThe following context was used to generate the answer:")
91
+ context = gr.Markdown(label="Context")
92
+ ask.click(fn=do_ask, inputs=[query, buildb,
93
+ openai_api_key, dataset], outputs=[answer, context])
94
+
95
+ demo.queue(concurrency_count=3)
96
+ demo.launch()