question_answering_TFM

Runtime error

App Files Files Community

paascorb

whitead commited on Feb 13, 2023

Commit

fdf7fb5

•

0 Parent(s):

Duplicate from whitead/paper-qa

Browse files

Co-authored-by: Andrew White <whitead@users.noreply.huggingface.co>

Files changed (4) hide show

.gitattributes +34 -0
README.md +22 -0
app.py +100 -0
requirements.txt +2 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,34 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,22 @@

+---
+title: Paper Qa
+emoji: ❓
+colorFrom: indigo
+colorTo: blue
+sdk: gradio
+sdk_version: 3.18.0
+app_file: app.py
+pinned: true
+license: mit
+duplicated_from: whitead/paper-qa
+---
+# Paper QA
+This tool will enable asking questions of your uploaded text or PDF documents.
+It uses OpenAI's GPT models and thus you must enter your API key below. This
+tool is under active development and currently uses many tokens - up to 10,000
+for a single query. That is $0.10-0.20 per query, so please be careful!
+* [PaperQA](https://github.com/whitead/paper-qa) is the code used to build this tool.
+* [langchain](https://github.com/hwchase17/langchain) is the main library this tool utilizes.

app.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import gradio as gr
+docs = None
+def request_pathname(files):
+    if files is None:
+        return [[]]
+    return [[file.name, file.name.split('/')[-1]] for file in files]
+def validate_dataset(dataset, openapi):
+    global docs
+    docs = None  # clear it out if dataset is modified
+    docs_ready = dataset.iloc[-1, 0] != ""
+    if docs_ready and type(openapi) is str and len(openapi) > 0:
+        return "✨Ready✨"
+    elif docs_ready:
+        return "⚠️Waiting for key..."
+    elif type(openapi) is str and len(openapi) > 0:
+        return "⚠️Waiting for documents..."
+    else:
+        return "⚠️Waiting for documents and key..."
+def do_ask(question, button, openapi, dataset, progress=gr.Progress()):
+    global docs
+    docs_ready = dataset.iloc[-1, 0] != ""
+    if button == "✨Ready✨" and type(openapi) is str and len(openapi) > 0 and docs_ready:
+        if docs is None:  # don't want to rebuild index if it's already built
+            import os
+            os.environ['OPENAI_API_KEY'] = openapi.strip()
+            import paperqa
+            docs = paperqa.Docs()
+            # dataset is pandas dataframe
+            for _, row in dataset.iterrows():
+                key = None
+                if ',' not in row['citation string']:
+                    key = row['citation string']
+                docs.add(row['filepath'], row['citation string'], key=key)
+    else:
+        return ""
+    progress(0, "Building Index...")
+    docs._build_faiss_index()
+    progress(0.25, "Querying...")
+    result = docs.query(question)
+    progress(1.0, "Done!")
+    return result.formatted_answer, result.context
+with gr.Blocks() as demo:
+    gr.Markdown("""
+    # Document Question and Answer
+    This tool will enable asking questions of your uploaded text or PDF documents.
+    It uses OpenAI's GPT models and thus you must enter your API key below. This
+    tool is under active development and currently uses many tokens - up to 10,000
+    for a single query. That is $0.10-0.20 per query, so please be careful!
+    * [PaperQA](https://github.com/whitead/paper-qa) is the code used to build this tool.
+    * [langchain](https://github.com/hwchase17/langchain) is the main library this tool utilizes.
+    ## Instructions
+    1. Enter API Key ([What is that?](https://openai.com/api/))
+    2. Upload your documents and modify citation strings if you want (to look prettier)
+    """)
+    openai_api_key = gr.Textbox(
+        label="OpenAI API Key", placeholder="sk-...", type="password")
+    uploaded_files = gr.File(
+        label="Your Documents Upload (PDF or txt)", file_count="multiple", )
+    dataset = gr.Dataframe(
+        headers=["filepath", "citation string"],
+        datatype=["str", "str"],
+        col_count=(2, "fixed"),
+        interactive=True,
+        label="Documents and Citations"
+    )
+    buildb = gr.Textbox("⚠️Waiting for documents and key...",
+                        label="Status", interactive=False, show_label=True)
+    openai_api_key.change(validate_dataset, inputs=[
+                          dataset, openai_api_key], outputs=[buildb])
+    dataset.change(validate_dataset, inputs=[
+                   dataset, openai_api_key], outputs=[buildb])
+    uploaded_files.change(request_pathname, inputs=[
+                          uploaded_files], outputs=[dataset])
+    query = gr.Textbox(
+        placeholder="Enter your question here...", label="Question")
+    ask = gr.Button("Ask Question")
+    gr.Markdown("## Answer")
+    answer = gr.Markdown(label="Answer")
+    with gr.Accordion("Context", open=False):
+        gr.Markdown(
+            "### Context\n\nThe following context was used to generate the answer:")
+        context = gr.Markdown(label="Context")
+    ask.click(fn=do_ask, inputs=[query, buildb,
+                                 openai_api_key, dataset], outputs=[answer, context])
+demo.queue(concurrency_count=20)
+demo.launch(show_error=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ paper-qa>=0.0.6
2	+ gradio