Spaces:

impira
/

invoices

Build error

App Files Files Community

Ankur Goyal commited on Sep 7, 2022

Commit

2af0878

•

1 Parent(s): 8bd074d

Plumbing for fields

Browse files

Files changed (1) hide show

app.py +57 -33

app.py CHANGED Viewed

@@ -21,9 +21,7 @@ def ensure_list(x):
 CHECKPOINTS = {
-    "LayoutLMv1 🦉": "impira/layoutlm-document-qa",
-    "LayoutLMv1 for Invoices 💸": "impira/layoutlm-invoices",
-    "Donut 🍩": "naver-clova-ix/donut-base-finetuned-docvqa",
 }
 PIPELINES = {}
@@ -71,10 +69,10 @@ def normalize_bbox(box, width, height, padding=0.005):
     return [min_x * width, min_y * height, max_x * width, max_y * height]
-examples = [
     [
         "invoice.png",
-        "What is the invoice number?",
     ],
     [
         "contract.jpeg",
@@ -86,8 +84,12 @@ examples = [
     ],
 ]
-question_files = {
-    "What are net sales for 2020?": "statement.pdf",
 }
@@ -135,6 +137,19 @@ def process_upload(file):
 colors = ["#64A087", "green", "black"]
 def process_question(question, document, model=list(CHECKPOINTS.keys())[0]):
     if not question or document is None:
         return None, None, None
@@ -150,16 +165,7 @@ def process_question(question, document, model=list(CHECKPOINTS.keys())[0]):
             # prediction for now
             break
-        if "word_ids" in p:
-            image = pages[p["page"]]
-            draw = ImageDraw.Draw(image, "RGBA")
-            word_boxes = lift_word_boxes(document, p["page"])
-            x1, y1, x2, y2 = normalize_bbox(
-                expand_bbox([word_boxes[i] for i in p["word_ids"]]),
-                image.width,
-                image.height,
-            )
-            draw.rectangle(((x1, y1), (x2, y2)), fill=(0, 255, 0, int(0.4 * 255)))
     return (
         gr.update(visible=True, value=pages),
@@ -171,16 +177,33 @@ def process_question(question, document, model=list(CHECKPOINTS.keys())[0]):
     )
-def load_example_document(img, question, model):
     if img is not None:
-        if question in question_files:
-            document = load_document(question_files[question])
         else:
-            document = ImageDocument(Image.fromarray(img), get_ocr_reader())
-        preview, answer, answer_text = process_question(question, document, model)
-        return document, question, preview, gr.update(visible=True), answer, answer_text
     else:
-        return None, None, None, gr.update(visible=False), None, None
 CSS = """
@@ -280,12 +303,13 @@ gradio-app h2, .gradio-app h2 {
 with gr.Blocks(css=CSS) as demo:
     gr.Markdown("# DocQuery: Document Query Engine")
     gr.Markdown(
-        "DocQuery (created by [Impira](https://impira.com?utm_source=huggingface&utm_medium=referral&utm_campaign=docquery_space))"
-        " uses LayoutLMv1 fine-tuned on DocVQA, a document visual question"
-        " answering dataset, as well as SQuAD, which boosts its English-language comprehension."
-        " To use it, simply upload an image or PDF, type a question, and click 'submit', or "
-        " click one of the examples to load them."
-        " DocQuery is MIT-licensed and available on [Github](https://github.com/impira/docquery)."
     )
     document = gr.Variable()
@@ -295,7 +319,7 @@ with gr.Blocks(css=CSS) as demo:
     with gr.Row(equal_height=True):
         with gr.Column():
             with gr.Row():
-                gr.Markdown("## 1. Select a file", elem_id="select-a-file")
                 img_clear_button = gr.Button(
                     "Clear", variant="secondary", elem_id="file-clear", visible=False
                 )
@@ -321,7 +345,7 @@ with gr.Blocks(css=CSS) as demo:
             gr.Markdown("— or —")
             upload = gr.File(label=None, interactive=True, elem_id="short-upload-box")
             gr.Examples(
-                examples=examples,
                 inputs=[example_image, example_question],
             )
@@ -411,7 +435,7 @@ with gr.Blocks(css=CSS) as demo:
     example_image.change(
         fn=load_example_document,
         inputs=[example_image, example_question, model],
-        outputs=[document, question, image, img_clear_button, output, output_text],
     )
 if __name__ == "__main__":

 CHECKPOINTS = {
+    "LayoutLMv1 for Invoices 🧾": "impira/layoutlm-invoices",
 }
 PIPELINES = {}
     return [min_x * width, min_y * height, max_x * width, max_y * height]
+EXAMPLES = [
     [
         "invoice.png",
+        "Invoice 1",
     ],
     [
         "contract.jpeg",
     ],
 ]
+QUESTION_FILES = {}
+FIELDS = {
+    "Vendor Name": ["Vendor Name - Logo?", "Vendor Name - Address?"],
+    "Vendor Address": ["Vendor Address?"],
+    "Invoice Total": ["Invoice Total?"],
 }
 colors = ["#64A087", "green", "black"]
+def annotate_page(prediction, pages, document):
+    if "word_ids" in prediction:
+        image = pages[prediction["page"]]
+        draw = ImageDraw.Draw(image, "RGBA")
+        word_boxes = lift_word_boxes(document, prediction["page"])
+        x1, y1, x2, y2 = normalize_bbox(
+            expand_bbox([word_boxes[i] for i in prediction["word_ids"]]),
+            image.width,
+            image.height,
+        )
+        draw.rectangle(((x1, y1), (x2, y2)), fill=(0, 255, 0, int(0.4 * 255)))
 def process_question(question, document, model=list(CHECKPOINTS.keys())[0]):
     if not question or document is None:
         return None, None, None
             # prediction for now
             break
+        annotate_page(p, pages, document)
     return (
         gr.update(visible=True, value=pages),
     )
+def process_fields(document, model=list(CHECKPOINTS.keys())[0]):
+    pages = [x.copy().convert("RGB") for x in document.preview]
+    ret = {}
+    for (field_name, questions) in FIELDS.items():
+        answers = [run_pipeline(model, q, document, top_k=1) for q in questions]
+        answers.sort(key=lambda x: -x.get("score", 0) if x else 0)
+        top = answers[0]
+        annotate_page(top, pages, document)
+        ret[field_name] = top
+    return (
+        gr.update(visible=True, value=pages),
+        gr.update(visible=True, value=ret),
+    )
+def load_example_document(img, title, model):
     if img is not None:
+        if title in QUESTION_FILES:
+            print("using document")
+            document = load_document(QUESTION_FILES[title])
         else:
+            document = ImageDocument(Image.fromarray(img), ocr_reader=get_ocr_reader())
+        preview, answer = process_fields(document, model)
+        return document, preview, gr.update(visible=True), answer
     else:
+        return None, None, gr.update(visible=False), None
 CSS = """
 with gr.Blocks(css=CSS) as demo:
     gr.Markdown("# DocQuery: Document Query Engine")
     gr.Markdown(
+        "DocQuery (created by [Impira](https://impira.com)) uses LayoutLMv1 fine-tuned on an invoice dataset"
+        " as well as DocVQA and SQuAD, which boot its general comprehension skills. The model is an enhanced"
+        " QA architecture that supports selecting blocks of text which may be non-consecutive, which is a major"
+        " issue when dealing with invoice documents (e.g. addresses)."
+        " To use it, simply upload an image or PDF invoice and the model will predict values for several fields."
+        " You can also create additional fields by simply typing in a question."
+        " DocQuery is available on [Github](https://github.com/impira/docquery)."
     )
     document = gr.Variable()
     with gr.Row(equal_height=True):
         with gr.Column():
             with gr.Row():
+                gr.Markdown("## 1. Select an invoice", elem_id="select-a-file")
                 img_clear_button = gr.Button(
                     "Clear", variant="secondary", elem_id="file-clear", visible=False
                 )
             gr.Markdown("— or —")
             upload = gr.File(label=None, interactive=True, elem_id="short-upload-box")
             gr.Examples(
+                examples=EXAMPLES,
                 inputs=[example_image, example_question],
             )
     example_image.change(
         fn=load_example_document,
         inputs=[example_image, example_question, model],
+        outputs=[document, image, img_clear_button, output],
     )
 if __name__ == "__main__":