Document-Image-Search-and-Query

Runtime error

App Files Files Community

awacke1 commited on Apr 11, 2024

Commit

9883346

verified ·

1 Parent(s): 213441a

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -37

app.py CHANGED Viewed

@@ -1,17 +1,14 @@
 import os
-os.environ["TOKENIZERS_PARALLELISM"] = "false"
-from PIL import Image, ImageDraw
 import traceback
-import gradio as gr
-import torch
 from docquery import pipeline
 from docquery.document import load_document, ImageDocument
 from docquery.ocr_reader import get_ocr_reader
 def ensure_list(x):
     if isinstance(x, list):
@@ -19,47 +16,36 @@ def ensure_list(x):
     else:
         return [x]
 CHECKPOINTS = {
     "LayoutLMv1 🦉": "impira/layoutlm-document-qa",
     "LayoutLMv1 for Invoices 💸": "impira/layoutlm-invoices",
     "Donut 🍩": "naver-clova-ix/donut-base-finetuned-docvqa",
 }
 PIPELINES = {}
 def construct_pipeline(task, model):
     global PIPELINES
     if model in PIPELINES:
         return PIPELINES[model]
     device = "cuda" if torch.cuda.is_available() else "cpu"
     ret = pipeline(task=task, model=CHECKPOINTS[model], device=device)
     PIPELINES[model] = ret
     return ret
 def run_pipeline(model, question, document, top_k):
     pipeline = construct_pipeline("document-question-answering", model)
     return pipeline(question=question, **document.context, top_k=top_k)
-# TODO: Move into docquery
-# TODO: Support words past the first page (or window?)
 def lift_word_boxes(document, page):
     return document.context["image"][page][1]
 def expand_bbox(word_boxes):
     if len(word_boxes) == 0:
         return None
     min_x, min_y, max_x, max_y = zip(*[x[1] for x in word_boxes])
     min_x, min_y, max_x, max_y = [min(min_x), min(min_y), max(max_x), max(max_y)]
     return [min_x, min_y, max_x, max_y]
 # LayoutLM boxes are normalized to 0, 1000
 def normalize_bbox(box, width, height, padding=0.005):
     min_x, min_y, max_x, max_y = [c / 1000 for c in box]
@@ -70,7 +56,6 @@ def normalize_bbox(box, width, height, padding=0.005):
         max_y = min(max_y + padding, 1)
     return [min_x * width, min_y * height, max_x * width, max_y * height]
 examples = [
     [
         "invoice.png",
@@ -84,14 +69,6 @@ examples = [
         "statement.png",
         "What are net sales for 2020?",
     ],
-    #    [
-    #        "docquery.png",
-    #        "How many likes does the space have?",
-    #    ],
-    #    [
-    #        "hacker_news.png",
-    #        "What is the title of post number 5?",
-    #    ],
 ]
 question_files = {
@@ -100,7 +77,6 @@ question_files = {
     "What is the title of post number 5?": "https://news.ycombinator.com",
 }
 def process_path(path):
     error = None
     if path:
@@ -141,7 +117,6 @@ def process_upload(file):
             None,
         )
 colors = ["#64A087", "green", "black"]
@@ -156,8 +131,6 @@ def process_question(question, document, model=list(CHECKPOINTS.keys())[0]):
         if i == 0:
             text_value = p["answer"]
         else:
-            # Keep the code around to produce multiple boxes, but only show the top
-            # prediction for now
             break
         if "word_ids" in p:
@@ -297,11 +270,9 @@ with gr.Blocks(css=CSS) as demo:
         " click one of the examples to load them."
         " DocQuery is MIT-licensed and available on [Github](https://github.com/impira/docquery)."
     )
     document = gr.Variable()
     example_question = gr.Textbox(visible=False)
     example_image = gr.Image(visible=False)
     with gr.Row(equal_height=True):
         with gr.Column():
             with gr.Row():
@@ -399,25 +370,21 @@ with gr.Blocks(css=CSS) as demo:
         inputs=[url],
         outputs=[document, image, img_clear_button, output, output_text, url_error],
     )
     question.submit(
         fn=process_question,
         inputs=[question, document, model],
         outputs=[image, output, output_text],
     )
     submit_button.click(
         process_question,
         inputs=[question, document, model],
         outputs=[image, output, output_text],
     )
     model.change(
         process_question,
         inputs=[question, document, model],
         outputs=[image, output, output_text],
     )
     example_image.change(
         fn=load_example_document,
         inputs=[example_image, example_question, model],

+import gradio as gr
 import os
+import torch
 import traceback
 from docquery import pipeline
 from docquery.document import load_document, ImageDocument
 from docquery.ocr_reader import get_ocr_reader
+from PIL import Image, ImageDraw
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
 def ensure_list(x):
     if isinstance(x, list):
     else:
         return [x]
 CHECKPOINTS = {
     "LayoutLMv1 🦉": "impira/layoutlm-document-qa",
     "LayoutLMv1 for Invoices 💸": "impira/layoutlm-invoices",
     "Donut 🍩": "naver-clova-ix/donut-base-finetuned-docvqa",
 }
 PIPELINES = {}
 def construct_pipeline(task, model):
     global PIPELINES
     if model in PIPELINES:
         return PIPELINES[model]
     device = "cuda" if torch.cuda.is_available() else "cpu"
     ret = pipeline(task=task, model=CHECKPOINTS[model], device=device)
     PIPELINES[model] = ret
     return ret
 def run_pipeline(model, question, document, top_k):
     pipeline = construct_pipeline("document-question-answering", model)
     return pipeline(question=question, **document.context, top_k=top_k)
 def lift_word_boxes(document, page):
     return document.context["image"][page][1]
 def expand_bbox(word_boxes):
     if len(word_boxes) == 0:
         return None
     min_x, min_y, max_x, max_y = zip(*[x[1] for x in word_boxes])
     min_x, min_y, max_x, max_y = [min(min_x), min(min_y), max(max_x), max(max_y)]
     return [min_x, min_y, max_x, max_y]
 # LayoutLM boxes are normalized to 0, 1000
 def normalize_bbox(box, width, height, padding=0.005):
     min_x, min_y, max_x, max_y = [c / 1000 for c in box]
         max_y = min(max_y + padding, 1)
     return [min_x * width, min_y * height, max_x * width, max_y * height]
 examples = [
     [
         "invoice.png",
         "statement.png",
         "What are net sales for 2020?",
     ],
 ]
 question_files = {
     "What is the title of post number 5?": "https://news.ycombinator.com",
 }
 def process_path(path):
     error = None
     if path:
             None,
         )
 colors = ["#64A087", "green", "black"]
         if i == 0:
             text_value = p["answer"]
         else:
             break
         if "word_ids" in p:
         " click one of the examples to load them."
         " DocQuery is MIT-licensed and available on [Github](https://github.com/impira/docquery)."
     )
     document = gr.Variable()
     example_question = gr.Textbox(visible=False)
     example_image = gr.Image(visible=False)
     with gr.Row(equal_height=True):
         with gr.Column():
             with gr.Row():
         inputs=[url],
         outputs=[document, image, img_clear_button, output, output_text, url_error],
     )
     question.submit(
         fn=process_question,
         inputs=[question, document, model],
         outputs=[image, output, output_text],
     )
     submit_button.click(
         process_question,
         inputs=[question, document, model],
         outputs=[image, output, output_text],
     )
     model.change(
         process_question,
         inputs=[question, document, model],
         outputs=[image, output, output_text],
     )
     example_image.change(
         fn=load_example_document,
         inputs=[example_image, example_question, model],