whitead commited on
Commit
1495711
1 Parent(s): 78ebf5b

Fixed some problems (and I bet there are more) in loading

Browse files
Files changed (1) hide show
  1. space.py +102 -0
space.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ docs = None
4
+
5
+
6
+ def request_pathname(files):
7
+ if files is None:
8
+ return [[]]
9
+ return [[file.name, file.name.split('/')[-1]] for file in files]
10
+
11
+
12
+ def validate_dataset(dataset, openapi):
13
+ global docs
14
+ print('clearing docs')
15
+ docs = None # clear it out if dataset is modified
16
+ docs_ready = dataset.iloc[-1, 0] != ""
17
+ if docs_ready and type(openapi) is str and len(openapi) > 0:
18
+ return "✨Ready✨"
19
+ elif docs_ready:
20
+ return "⚠️Waiting for key..."
21
+ elif type(openapi) is str and len(openapi) > 0:
22
+ return "⚠️Waiting for documents..."
23
+ else:
24
+ return "⚠️Waiting for documents and key..."
25
+
26
+
27
+ def do_ask(question, button, openapi, dataset, progress=gr.Progress()):
28
+ global docs
29
+ docs_ready = dataset.iloc[-1, 0] != ""
30
+ if button == "✨Ready✨" and type(openapi) is str and len(openapi) > 0 and docs_ready:
31
+ print('are docs ready?', docs)
32
+ if docs is None: # don't want to rebuild index if it's already built
33
+ import os
34
+ os.environ['OPENAI_API_KEY'] = openapi.strip()
35
+ import paperqa
36
+ docs = paperqa.Docs()
37
+ # dataset is pandas dataframe
38
+ for _, row in dataset.iterrows():
39
+ key = None
40
+ if ',' not in row['citation string']:
41
+ key = row['citation string']
42
+ docs.add(row['filepath'], row['citation string'], key=key)
43
+ else:
44
+ return ""
45
+ progress(0, "Building Index...")
46
+ docs._build_faiss_index()
47
+ progress(0.25, "Querying...")
48
+ result = docs.query(question)
49
+ progress(1.0, "Done!")
50
+ return result.formatted_answer, result.context
51
+
52
+
53
+ with gr.Blocks() as demo:
54
+ gr.Markdown("""
55
+ # Document Question and Answer
56
+
57
+ This tool will enable asking questions of your uploaded text or PDF documents.
58
+ It uses OpenAI's GPT models and thus you must enter your API key below. This
59
+ tool is under active development and currently uses many tokens - up to 10,000
60
+ for a single query. That is $0.10-0.20 per query, so please be careful!
61
+
62
+ * [PaperQA](https://github.com/whitead/paper-qa) is the code used to build this tool.
63
+ * [langchain](https://github.com/hwchase17/langchain) is the main library this tool utilizes.
64
+
65
+ ## Instructions
66
+
67
+ 1. Enter API Key ([What is that?](https://openai.com/api/))
68
+ 2. Upload your documents and modify citation strings if you want (to look prettier)
69
+ """)
70
+ openai_api_key = gr.Textbox(
71
+ label="OpenAI API Key", placeholder="sk-...", type="password")
72
+ uploaded_files = gr.File(
73
+ label="Your Documents Upload (PDF or txt)", file_count="multiple", )
74
+ dataset = gr.Dataframe(
75
+ headers=["filepath", "citation string"],
76
+ datatype=["str", "str"],
77
+ col_count=(2, "fixed"),
78
+ interactive=True,
79
+ label="Documents and Citations"
80
+ )
81
+ buildb = gr.Textbox("⚠️Waiting for documents and key...",
82
+ label="Status", interactive=False, show_label=True)
83
+ openai_api_key.change(validate_dataset, inputs=[
84
+ dataset, openai_api_key], outputs=[buildb])
85
+ dataset.change(validate_dataset, inputs=[
86
+ dataset, openai_api_key], outputs=[buildb])
87
+ uploaded_files.change(request_pathname, inputs=[
88
+ uploaded_files], outputs=[dataset])
89
+ query = gr.Textbox(
90
+ placeholder="Enter your question here...", label="Question")
91
+ ask = gr.Button("Ask Question")
92
+ gr.Markdown("## Answer")
93
+ answer = gr.Markdown(label="Answer")
94
+ with gr.Accordion("Context", open=False):
95
+ gr.Markdown(
96
+ "### Context\n\nThe following context was used to generate the answer:")
97
+ context = gr.Markdown(label="Context")
98
+ ask.click(fn=do_ask, inputs=[query, buildb,
99
+ openai_api_key, dataset], outputs=[answer, context])
100
+
101
+ demo.queue(concurrency_count=20)
102
+ demo.launch(show_error=True)