Spaces:

impira
/

docquery

Runtime error

App Files Files Community

Ankur Goyal commited on Aug 26, 2022

Commit

bc6a638

1 Parent(s): 8171e8e

Improve state management/data flow

Browse files

Files changed (1) hide show

app.py +51 -9

app.py CHANGED Viewed

@@ -2,13 +2,12 @@ import os
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
-print("Importing")
 import streamlit as st
 import torch
 from docquery.pipeline import get_pipeline
-from docquery.document import load_bytes
 def ensure_list(x):
     if isinstance(x, list):
@@ -16,27 +15,70 @@ def ensure_list(x):
     else:
         return [x]
 @st.experimental_singleton
 def construct_pipeline():
     device = "cuda" if torch.cuda.is_available() else "cpu"
     ret = get_pipeline(device=device)
     return ret
 @st.cache
 def run_pipeline(question, document):
     return construct_pipeline()(question=question, **document.context)
-st.title("DocQuery: Query Documents Using NLP")
-file = st.file_uploader("Upload a PDF or Image document")
 question = st.text_input("QUESTION", "")
-if file is not None:
     col1, col2 = st.columns(2)
-    document = load_bytes(file, file.name)
     col1.image(document.preview, use_column_width=True)
-if file is not None and question is not None and len(question) > 0:
     predictions = run_pipeline(question=question, document=document)
     col2.header("Answers")

 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 import streamlit as st
 import torch
 from docquery.pipeline import get_pipeline
+from docquery.document import load_bytes, load_document
 def ensure_list(x):
     if isinstance(x, list):
     else:
         return [x]
 @st.experimental_singleton
 def construct_pipeline():
     device = "cuda" if torch.cuda.is_available() else "cpu"
     ret = get_pipeline(device=device)
     return ret
 @st.cache
 def run_pipeline(question, document):
     return construct_pipeline()(question=question, **document.context)
+st.markdown("# DocQuery: Query Documents w/ NLP")
+if "document" not in st.session_state:
+    st.session_state["document"] = None
+input_type = st.radio("Pick an input type", ["Upload", "URL"], horizontal=True)
+def load_file_cb():
+    if st.session_state.file_input is None:
+        return
+    file = st.session_state.file_input
+    with loading_placeholder:
+        with st.spinner("Processing..."):
+            document = load_bytes(file, file.name)
+            _ = document.context
+            st.session_state.document = document
+def load_url(url):
+    if st.session_state.url_input is None:
+        return
+    url = st.session_state.url_input
+    with loading_placeholder:
+        with st.spinner("Downloading..."):
+            document = load_document(url)
+        with st.spinner("Processing..."):
+            _ = document.context
+        st.session_state.document = document
+if input_type == "Upload":
+    file = st.file_uploader(
+        "Upload a PDF or Image document", key="file_input", on_change=load_file_cb
+    )
+elif input_type == "URL":
+    # url = st.text_input("URL", "", on_change=load_url_callback, key="url_input")
+    url = st.text_input("URL", "", key="url_input", on_change=load_url_cb)
 question = st.text_input("QUESTION", "")
+document = st.session_state.document
+loading_placeholder = st.empty()
+if document is not None:
     col1, col2 = st.columns(2)
     col1.image(document.preview, use_column_width=True)
+if document is not None and question is not None and len(question) > 0:
     predictions = run_pipeline(question=question, document=document)
     col2.header("Answers")