paascorb whitead commited on
Commit
fdf7fb5
0 Parent(s):

Duplicate from whitead/paper-qa

Browse files

Co-authored-by: Andrew White <whitead@users.noreply.huggingface.co>

Files changed (4) hide show
  1. .gitattributes +34 -0
  2. README.md +22 -0
  3. app.py +100 -0
  4. requirements.txt +2 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Paper Qa
3
+ emoji: ❓
4
+ colorFrom: indigo
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 3.18.0
8
+ app_file: app.py
9
+ pinned: true
10
+ license: mit
11
+ duplicated_from: whitead/paper-qa
12
+ ---
13
+
14
+ # Paper QA
15
+
16
+ This tool will enable asking questions of your uploaded text or PDF documents.
17
+ It uses OpenAI's GPT models and thus you must enter your API key below. This
18
+ tool is under active development and currently uses many tokens - up to 10,000
19
+ for a single query. That is $0.10-0.20 per query, so please be careful!
20
+
21
+ * [PaperQA](https://github.com/whitead/paper-qa) is the code used to build this tool.
22
+ * [langchain](https://github.com/hwchase17/langchain) is the main library this tool utilizes.
app.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ docs = None
4
+
5
+
6
+ def request_pathname(files):
7
+ if files is None:
8
+ return [[]]
9
+ return [[file.name, file.name.split('/')[-1]] for file in files]
10
+
11
+
12
+ def validate_dataset(dataset, openapi):
13
+ global docs
14
+ docs = None # clear it out if dataset is modified
15
+ docs_ready = dataset.iloc[-1, 0] != ""
16
+ if docs_ready and type(openapi) is str and len(openapi) > 0:
17
+ return "✨Ready✨"
18
+ elif docs_ready:
19
+ return "⚠️Waiting for key..."
20
+ elif type(openapi) is str and len(openapi) > 0:
21
+ return "⚠️Waiting for documents..."
22
+ else:
23
+ return "⚠️Waiting for documents and key..."
24
+
25
+
26
+ def do_ask(question, button, openapi, dataset, progress=gr.Progress()):
27
+ global docs
28
+ docs_ready = dataset.iloc[-1, 0] != ""
29
+ if button == "✨Ready✨" and type(openapi) is str and len(openapi) > 0 and docs_ready:
30
+ if docs is None: # don't want to rebuild index if it's already built
31
+ import os
32
+ os.environ['OPENAI_API_KEY'] = openapi.strip()
33
+ import paperqa
34
+ docs = paperqa.Docs()
35
+ # dataset is pandas dataframe
36
+ for _, row in dataset.iterrows():
37
+ key = None
38
+ if ',' not in row['citation string']:
39
+ key = row['citation string']
40
+ docs.add(row['filepath'], row['citation string'], key=key)
41
+ else:
42
+ return ""
43
+ progress(0, "Building Index...")
44
+ docs._build_faiss_index()
45
+ progress(0.25, "Querying...")
46
+ result = docs.query(question)
47
+ progress(1.0, "Done!")
48
+ return result.formatted_answer, result.context
49
+
50
+
51
+ with gr.Blocks() as demo:
52
+ gr.Markdown("""
53
+ # Document Question and Answer
54
+
55
+ This tool will enable asking questions of your uploaded text or PDF documents.
56
+ It uses OpenAI's GPT models and thus you must enter your API key below. This
57
+ tool is under active development and currently uses many tokens - up to 10,000
58
+ for a single query. That is $0.10-0.20 per query, so please be careful!
59
+
60
+ * [PaperQA](https://github.com/whitead/paper-qa) is the code used to build this tool.
61
+ * [langchain](https://github.com/hwchase17/langchain) is the main library this tool utilizes.
62
+
63
+ ## Instructions
64
+
65
+ 1. Enter API Key ([What is that?](https://openai.com/api/))
66
+ 2. Upload your documents and modify citation strings if you want (to look prettier)
67
+ """)
68
+ openai_api_key = gr.Textbox(
69
+ label="OpenAI API Key", placeholder="sk-...", type="password")
70
+ uploaded_files = gr.File(
71
+ label="Your Documents Upload (PDF or txt)", file_count="multiple", )
72
+ dataset = gr.Dataframe(
73
+ headers=["filepath", "citation string"],
74
+ datatype=["str", "str"],
75
+ col_count=(2, "fixed"),
76
+ interactive=True,
77
+ label="Documents and Citations"
78
+ )
79
+ buildb = gr.Textbox("⚠️Waiting for documents and key...",
80
+ label="Status", interactive=False, show_label=True)
81
+ openai_api_key.change(validate_dataset, inputs=[
82
+ dataset, openai_api_key], outputs=[buildb])
83
+ dataset.change(validate_dataset, inputs=[
84
+ dataset, openai_api_key], outputs=[buildb])
85
+ uploaded_files.change(request_pathname, inputs=[
86
+ uploaded_files], outputs=[dataset])
87
+ query = gr.Textbox(
88
+ placeholder="Enter your question here...", label="Question")
89
+ ask = gr.Button("Ask Question")
90
+ gr.Markdown("## Answer")
91
+ answer = gr.Markdown(label="Answer")
92
+ with gr.Accordion("Context", open=False):
93
+ gr.Markdown(
94
+ "### Context\n\nThe following context was used to generate the answer:")
95
+ context = gr.Markdown(label="Context")
96
+ ask.click(fn=do_ask, inputs=[query, buildb,
97
+ openai_api_key, dataset], outputs=[answer, context])
98
+
99
+ demo.queue(concurrency_count=20)
100
+ demo.launch(show_error=True)
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ paper-qa>=0.0.6
2
+ gradio