Spaces:

krishnapal2308
/

DocVQA-Sanctum

Runtime error

DocVQA-Sanctum / app.py

krishnapal2308

adding description and pix2struct output fix

0f51c16 over 1 year ago

2.49 kB

	import gradio as gr
	import warnings
	import os
	import pix2struct, layoutlm, donut

	warnings.filterwarnings('ignore')

	desc = """Step into the DocVQA Sanctum, where three formidable models stand ready to tackle your document queries head-on! Discover the prowess of LayoutLM, Pix2Struct, and Donut as they decode your document images and provide insightful answers to your questions.

	From LayoutLM's adept layout analysis to Pix2Struct's prowess in structural understanding and Donut's skill in content comprehension, this demo offers a captivating showcase of cutting-edge document visual question answering (DocVQA) technologies.

	Please Note: Kindly allow a few moments for result generation, as the models are currently being inferred on CPU.

	For a brief overview of what document visual question answering is, check out my latest blog post [here](https://medium.com/@krishnapal2308/understanding-docvqa-document-visual-question-answering-9e3db222bfed)."""



	def process_image_and_generate_output(image, model_selection, question):
	result = ''
	if image is None:
	return "Please select an image", None

	if model_selection == "LayoutLM":
	result = layoutlm.get_result(image, question)
	return result
	if model_selection == 'Pix2Struct':
	result = pix2struct.get_result(image, question)
	return result
	if model_selection == 'Donut':
	result = donut.get_result(image, question)
	return result

	return result


	sample_images = [
	[os.path.join(os.path.dirname(__file__), "images/1.png"), "LayoutLM", "What is the NIC Code?"],
	[os.path.join(os.path.dirname(__file__), "images/1.png"), "Pix2Struct", "What is the Age Group?"],
	[os.path.join(os.path.dirname(__file__), "images/1.png"), "Donut", "What is the Industry Group?"]
	]

	# Create a dropdown to select sample image
	image_input = gr.Image(label="Upload Image", type='filepath')

	# Create a dropdown to choose the model
	model_selection_input = gr.Radio(["LayoutLM", "Pix2Struct", "Donut"],
	label="Choose Model")
	question_input = gr.Text(label="Question")

	iface = gr.Interface(fn=process_image_and_generate_output,
	inputs=[image_input, model_selection_input, question_input],
	outputs=gr.Text(label="Result"),
	allow_flagging='never',
	examples=sample_images,
	title="DocVQA Sanctum", description=desc)

	iface.launch()