Spaces:

futurehouse
/

paper-qa

Running

App Files Files Community

whitead commited on Feb 10, 2023

Commit

1495711

•

1 Parent(s): 78ebf5b

Fixed some problems (and I bet there are more) in loading

Browse files

Files changed (1) hide show

space.py +102 -0

space.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import gradio as gr
+docs = None
+def request_pathname(files):
+    if files is None:
+        return [[]]
+    return [[file.name, file.name.split('/')[-1]] for file in files]
+def validate_dataset(dataset, openapi):
+    global docs
+    print('clearing docs')
+    docs = None  # clear it out if dataset is modified
+    docs_ready = dataset.iloc[-1, 0] != ""
+    if docs_ready and type(openapi) is str and len(openapi) > 0:
+        return "✨Ready✨"
+    elif docs_ready:
+        return "⚠️Waiting for key..."
+    elif type(openapi) is str and len(openapi) > 0:
+        return "⚠️Waiting for documents..."
+    else:
+        return "⚠️Waiting for documents and key..."
+def do_ask(question, button, openapi, dataset, progress=gr.Progress()):
+    global docs
+    docs_ready = dataset.iloc[-1, 0] != ""
+    if button == "✨Ready✨" and type(openapi) is str and len(openapi) > 0 and docs_ready:
+        print('are docs ready?', docs)
+        if docs is None:  # don't want to rebuild index if it's already built
+            import os
+            os.environ['OPENAI_API_KEY'] = openapi.strip()
+            import paperqa
+            docs = paperqa.Docs()
+            # dataset is pandas dataframe
+            for _, row in dataset.iterrows():
+                key = None
+                if ',' not in row['citation string']:
+                    key = row['citation string']
+                docs.add(row['filepath'], row['citation string'], key=key)
+    else:
+        return ""
+    progress(0, "Building Index...")
+    docs._build_faiss_index()
+    progress(0.25, "Querying...")
+    result = docs.query(question)
+    progress(1.0, "Done!")
+    return result.formatted_answer, result.context
+with gr.Blocks() as demo:
+    gr.Markdown("""
+    # Document Question and Answer
+    This tool will enable asking questions of your uploaded text or PDF documents.
+    It uses OpenAI's GPT models and thus you must enter your API key below. This
+    tool is under active development and currently uses many tokens - up to 10,000
+    for a single query. That is $0.10-0.20 per query, so please be careful!
+    * [PaperQA](https://github.com/whitead/paper-qa) is the code used to build this tool.
+    * [langchain](https://github.com/hwchase17/langchain) is the main library this tool utilizes.
+    ## Instructions
+    1. Enter API Key ([What is that?](https://openai.com/api/))
+    2. Upload your documents and modify citation strings if you want (to look prettier)
+    """)
+    openai_api_key = gr.Textbox(
+        label="OpenAI API Key", placeholder="sk-...", type="password")
+    uploaded_files = gr.File(
+        label="Your Documents Upload (PDF or txt)", file_count="multiple", )
+    dataset = gr.Dataframe(
+        headers=["filepath", "citation string"],
+        datatype=["str", "str"],
+        col_count=(2, "fixed"),
+        interactive=True,
+        label="Documents and Citations"
+    )
+    buildb = gr.Textbox("⚠️Waiting for documents and key...",
+                        label="Status", interactive=False, show_label=True)
+    openai_api_key.change(validate_dataset, inputs=[
+                          dataset, openai_api_key], outputs=[buildb])
+    dataset.change(validate_dataset, inputs=[
+                   dataset, openai_api_key], outputs=[buildb])
+    uploaded_files.change(request_pathname, inputs=[
+                          uploaded_files], outputs=[dataset])
+    query = gr.Textbox(
+        placeholder="Enter your question here...", label="Question")
+    ask = gr.Button("Ask Question")
+    gr.Markdown("## Answer")
+    answer = gr.Markdown(label="Answer")
+    with gr.Accordion("Context", open=False):
+        gr.Markdown(
+            "### Context\n\nThe following context was used to generate the answer:")
+        context = gr.Markdown(label="Context")
+    ask.click(fn=do_ask, inputs=[query, buildb,
+                                 openai_api_key, dataset], outputs=[answer, context])
+demo.queue(concurrency_count=20)
+demo.launch(show_error=True)