Spaces:

impira
/

docquery

Running

App Files Files Community

Ankur Goyal commited on Aug 27, 2022

Commit

225fcc2

•

1 Parent(s): 588673f

Support Donut

Browse files

Files changed (2) hide show

app.py +43 -26
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -19,16 +19,23 @@ def ensure_list(x):
         return [x]
-@st.experimental_singleton
-def construct_pipeline():
     device = "cuda" if torch.cuda.is_available() else "cpu"
-    ret = get_pipeline(device=device)
     return ret
-@st.cache
-def run_pipeline(question, document, top_k):
-    return construct_pipeline()(question=question, **document.context, top_k=top_k)
 # TODO: Move into docquery
@@ -56,13 +63,14 @@ st.markdown("# DocQuery: Query Documents w/ NLP")
 if "document" not in st.session_state:
     st.session_state["document"] = None
-input_col, model_col = st.columns([2,1])
 with input_col:
     input_type = st.radio("Pick an input type", ["Upload", "URL"], horizontal=True)
 with model_col:
-    model_type = st.radio("Pick a model", ["Upload", "URL"], horizontal=True)
 def load_file_cb():
     if st.session_state.file_input is None:
@@ -109,30 +117,39 @@ if document is not None:
 colors = ["blue", "red", "green"]
 if document is not None and question is not None and len(question) > 0:
-    col2.header("Answers")
     with col2:
         answers_placeholder = st.empty()
         answers_loading_placeholder = st.empty()
-    with answers_loading_placeholder:
-        with st.spinner("Processing question..."):
-            predictions = run_pipeline(question=question, document=document, top_k=1)
-    with answers_placeholder:
-        word_boxes = lift_word_boxes(document)
-        image = image.copy()
-        draw = ImageDraw.Draw(image)
-        for i, p in enumerate(ensure_list(predictions)):
-            col2.markdown(f"#### { p['answer'] }: ({round(p['score'] * 100, 1)}%)")
-            x1, y1, x2, y2 = normalize_bbox(
-                expand_bbox(word_boxes[p["start"] : p["end"] + 1]),
-                image.width,
-                image.height,
-            )
-            draw.rectangle(((x1, y1), (x2, y2)), outline=colors[i], width=3)
 if document is not None:
-    col1.image(image, use_column_width='auto')
 "DocQuery uses LayoutLMv1 fine-tuned on DocVQA, a document visual question answering dataset, as well as SQuAD, which boosts its English-language comprehension. To use it, simply upload an image or PDF, type a question, and click 'submit', or click one of the examples to load them."

         return [x]
+CHECKPOINTS = {
+    "LayoutLMv1 🦉": "impira/layoutlm-document-qa",
+    "Donut 🍩": "naver-clova-ix/donut-base-finetuned-docvqa",
+}
+@st.experimental_singleton(show_spinner=False)
+def construct_pipeline(model):
     device = "cuda" if torch.cuda.is_available() else "cpu"
+    ret = get_pipeline(checkpoint=CHECKPOINTS[model], device=device)
     return ret
+@st.cache(show_spinner=False)
+def run_pipeline(model, question, document, top_k):
+    pipeline = construct_pipeline(model)
+    return pipeline(question=question, **document.context, top_k=top_k)
 # TODO: Move into docquery
 if "document" not in st.session_state:
     st.session_state["document"] = None
+input_col, model_col = st.columns([2, 1])
 with input_col:
     input_type = st.radio("Pick an input type", ["Upload", "URL"], horizontal=True)
 with model_col:
+    model_type = st.radio("Pick a model", list(CHECKPOINTS.keys()), horizontal=True)
 def load_file_cb():
     if st.session_state.file_input is None:
 colors = ["blue", "red", "green"]
 if document is not None and question is not None and len(question) > 0:
+    col2.header(f"Answers ({model_type})")
     with col2:
         answers_placeholder = st.empty()
         answers_loading_placeholder = st.empty()
+        with answers_loading_placeholder:
+            # Run this (one-time) expensive operation outside of the processing
+            # question placeholder
+            with st.spinner("Constructing pipeline..."):
+                construct_pipeline(model_type)
+            with st.spinner("Processing question..."):
+                predictions = run_pipeline(
+                    model=model_type, question=question, document=document, top_k=1
+                )
+        with answers_placeholder:
+            image = image.copy()
+            draw = ImageDraw.Draw(image)
+            for i, p in enumerate(ensure_list(predictions)):
+                col2.markdown(f"#### { p['answer'] }: ({round(p['score'] * 100, 1)}%)")
+                if "start" in p and "end" in p:
+                    x1, y1, x2, y2 = normalize_bbox(
+                        expand_bbox(
+                            lift_word_boxes(document)[p["start"] : p["end"] + 1]
+                        ),
+                        image.width,
+                        image.height,
+                    )
+                    draw.rectangle(((x1, y1), (x2, y2)), outline=colors[i], width=3)
 if document is not None:
+    col1.image(image, use_column_width="auto")
 "DocQuery uses LayoutLMv1 fine-tuned on DocVQA, a document visual question answering dataset, as well as SQuAD, which boosts its English-language comprehension. To use it, simply upload an image or PDF, type a question, and click 'submit', or click one of the examples to load them."

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 torch
 git+https://github.com/huggingface/transformers.git@21f6f58721dd9154357576be6de54eefef1f1818
 git+https://github.com/impira/docquery.git@43683e0dae72cadf8e8b4927191978109153458c

 torch
 git+https://github.com/huggingface/transformers.git@21f6f58721dd9154357576be6de54eefef1f1818
 git+https://github.com/impira/docquery.git@43683e0dae72cadf8e8b4927191978109153458c
+sentencepiece