Spaces:
Runtime error
Runtime error
import gradio as gr | |
import warnings | |
import os | |
import pix2struct, layoutlm, donut | |
warnings.filterwarnings('ignore') | |
desc = """Step into the DocVQA Sanctum, where three formidable models stand ready to tackle your document queries head-on! Discover the prowess of LayoutLM, Pix2Struct, and Donut as they decode your document images and provide insightful answers to your questions. | |
From LayoutLM's adept layout analysis to Pix2Struct's prowess in structural understanding and Donut's skill in content comprehension, this demo offers a captivating showcase of cutting-edge document visual question answering (DocVQA) technologies. | |
**Please Note:** Kindly allow a few moments for result generation, as the models are currently being inferred on CPU. | |
For a brief overview of what document visual question answering is, check out my latest blog post [here](https://medium.com/@krishnapal2308/understanding-docvqa-document-visual-question-answering-9e3db222bfed).""" | |
def process_image_and_generate_output(image, model_selection, question): | |
result = '' | |
if image is None: | |
return "Please select an image", None | |
if model_selection == "LayoutLM": | |
result = layoutlm.get_result(image, question) | |
return result | |
if model_selection == 'Pix2Struct': | |
result = pix2struct.get_result(image, question) | |
return result | |
if model_selection == 'Donut': | |
result = donut.get_result(image, question) | |
return result | |
return result | |
sample_images = [ | |
[os.path.join(os.path.dirname(__file__), "images/1.png"), "LayoutLM", "What is the NIC Code?"], | |
[os.path.join(os.path.dirname(__file__), "images/1.png"), "Pix2Struct", "What is the Age Group?"], | |
[os.path.join(os.path.dirname(__file__), "images/1.png"), "Donut", "What is the Industry Group?"] | |
] | |
# Create a dropdown to select sample image | |
image_input = gr.Image(label="Upload Image", type='filepath') | |
# Create a dropdown to choose the model | |
model_selection_input = gr.Radio(["LayoutLM", "Pix2Struct", "Donut"], | |
label="Choose Model") | |
question_input = gr.Text(label="Question") | |
iface = gr.Interface(fn=process_image_and_generate_output, | |
inputs=[image_input, model_selection_input, question_input], | |
outputs=gr.Text(label="Result"), | |
allow_flagging='never', | |
examples=sample_images, | |
title="DocVQA Sanctum", description=desc) | |
iface.launch() | |