BishanSingh246 commited on
Commit
749b606
1 Parent(s): 3634816

Add application file

Browse files
Files changed (2) hide show
  1. app.py +162 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import paperqa
3
+ import pickle
4
+ import pandas as pd
5
+ from pathlib import Path
6
+ import requests
7
+ import zipfile
8
+ import io
9
+ import tempfile
10
+ import os
11
+
12
+
13
+ css_style = """
14
+ .gradio-container {
15
+ font-family: "IBM Plex Mono";
16
+ }
17
+ """
18
+
19
+
20
+ def request_pathname(files, data, openai_api_key):
21
+ if files is None:
22
+ return [[]]
23
+ for file in files:
24
+ # make sure we're not duplicating things in the dataset
25
+ if file.name in [x[0] for x in data]:
26
+ continue
27
+ data.append([file.name, None, None])
28
+ return [[len(data), 0]], data, data, validate_dataset(pd.DataFrame(data), openai_api_key)
29
+
30
+
31
+ def validate_dataset(dataset, openapi):
32
+ docs_ready = dataset.iloc[-1, 0] != ""
33
+ if docs_ready and type(openapi) is str and len(openapi) > 0:
34
+ return "✨Ready✨"
35
+ elif docs_ready:
36
+ return "⚠️Waiting for key⚠️"
37
+ elif type(openapi) is str and len(openapi) > 0:
38
+ return "⚠️Waiting for documents⚠️"
39
+ else:
40
+ return "⚠️Waiting for documents and key⚠️"
41
+
42
+
43
+ def make_stats(docs):
44
+ return [[len(docs.doc_previews), sum([x[0] for x in docs.doc_previews])]]
45
+
46
+
47
+ # , progress=gr.Progress()):
48
+ def do_ask(question, button, openapi, dataset, length, do_marg, k, max_sources, docs):
49
+ passages = ""
50
+ docs_ready = dataset.iloc[-1, 0] != ""
51
+ if button == "✨Ready✨" and type(openapi) is str and len(openapi) > 0 and docs_ready:
52
+ os.environ['OPENAI_API_KEY'] = openapi.strip()
53
+ if docs is None:
54
+ docs = paperqa.Docs()
55
+ # dataset is pandas dataframe
56
+ for _, row in dataset.iterrows():
57
+ try:
58
+ docs.add(row['filepath'], row['citation string'],
59
+ key=row['key'], disable_check=True)
60
+ yield "", "", "", docs, make_stats(docs)
61
+ except Exception as e:
62
+ pass
63
+ else:
64
+ yield "", "", "", docs, [[0, 0]]
65
+ #progress(0, "Building Index...")
66
+ docs._build_faiss_index()
67
+ #progress(0.25, "Querying...")
68
+ for i, result in enumerate(docs.query_gen(question,
69
+ length_prompt=f'use {length:d} words',
70
+ marginal_relevance=do_marg,
71
+ k=k, max_sources=max_sources)):
72
+ #progress(0.25 + 0.1 * i, "Generating Context" + str(i))
73
+ yield result.formatted_answer, result.context, passages, docs, make_stats(docs)
74
+ #progress(1.0, "Done!")
75
+ # format the passages
76
+ for i, (key, passage) in enumerate(result.passages.items()):
77
+ passages += f'Disabled for now'
78
+ yield result.formatted_answer, result.context, passages, docs, make_stats(docs)
79
+
80
+
81
+
82
+
83
+ with gr.Blocks(css=css_style) as demo:
84
+
85
+ docs = gr.State(None)
86
+ data = gr.State([])
87
+ openai_api_key = gr.State('')
88
+
89
+ gr.Markdown(f"""
90
+ # Document Question and Answer (v{paperqa.__version__})
91
+ *By Andrew White ([@andrewwhite01](https://twitter.com/andrewwhite01))*
92
+ This tool will enable asking questions of your uploaded text, PDF documents,
93
+ or scrape github repos.
94
+ It uses OpenAI's GPT models and thus you must enter your API key below. This
95
+ tool is under active development and currently uses many tokens - up to 10,000
96
+ for a single query. That is $0.10-0.20 per query, so please be careful!
97
+ * [PaperQA](https://github.com/whitead/paper-qa) is the code used to build this tool.
98
+ * [langchain](https://github.com/hwchase17/langchain) is the main library this tool utilizes.
99
+ 1. Enter API Key ([What is that?](https://platform.openai.com/account/api-keys))
100
+ 2. Upload your documents
101
+ 3. Ask a questions
102
+ """)
103
+ openai_api_key = gr.Textbox(
104
+ label="OpenAI API Key", placeholder="sk-...", type="password")
105
+ with gr.Tab('File Upload'):
106
+ uploaded_files = gr.File(
107
+ label="Your Documents Upload (PDF or txt)", file_count="multiple", )
108
+ with gr.Tab('Github Repo'):
109
+ gh_repo = gr.Textbox(
110
+ label="Github Repo", placeholder="whitead/paper-qa")
111
+ download = gr.Button("Download Repo")
112
+
113
+ with gr.Accordion("See Docs:", open=False):
114
+ dataset = gr.Dataframe(
115
+ headers=["filepath", "citation string", "key"],
116
+ datatype=["str", "str", "str"],
117
+ col_count=(3, "fixed"),
118
+ interactive=False,
119
+ label="Documents and Citations",
120
+ overflow_row_behaviour='paginate',
121
+ max_rows=5
122
+ )
123
+ buildb = gr.Textbox("⚠️Waiting for documents and key...",
124
+ label="Status", interactive=False, show_label=True,
125
+ max_lines=1)
126
+ stats = gr.Dataframe(headers=['Docs', 'Chunks'],
127
+ datatype=['number', 'number'],
128
+ col_count=(2, "fixed"),
129
+ interactive=False,
130
+ label="Doc Stats")
131
+ openai_api_key.change(validate_dataset, inputs=[
132
+ dataset, openai_api_key], outputs=[buildb])
133
+ dataset.change(validate_dataset, inputs=[
134
+ dataset, openai_api_key], outputs=[buildb])
135
+ uploaded_files.change(request_pathname, inputs=[
136
+ uploaded_files, data, openai_api_key], outputs=[stats, data, dataset, buildb])
137
+
138
+ query = gr.Textbox(
139
+ placeholder="Enter your question here...", label="Question")
140
+ with gr.Row():
141
+ length = gr.Slider(25, 200, value=100, step=5,
142
+ label='Words in answer')
143
+ marg = gr.Checkbox(True, label='Max marginal relevance')
144
+ k = gr.Slider(1, 20, value=10, step=1,
145
+ label='Chunks to examine')
146
+ sources = gr.Slider(1, 10, value=5, step=1,
147
+ label='Contexts to include')
148
+
149
+ ask = gr.Button("Ask Question")
150
+ answer = gr.Markdown(label="Answer")
151
+ with gr.Accordion("Context", open=True):
152
+ context = gr.Markdown(label="Context")
153
+
154
+ with gr.Accordion("Raw Text", open=False):
155
+ passages = gr.Markdown(label="Passages")
156
+ ask.click(fn=do_ask, inputs=[query, buildb,
157
+ openai_api_key, dataset,
158
+ length, marg, k, sources,
159
+ docs], outputs=[answer, context, passages, docs, stats])
160
+
161
+ demo.queue(concurrency_count=20)
162
+ demo.launch(show_error=True)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ paper-qa>=0.0.21
2
+ gradio
3
+ requests
4
+ transformers