import gradio as gr import torch from PIL import Image from donut import DonutModel def demo_process(input_img): global pretrained_model, task_prompt, task_name # input_img = Image.fromarray(input_img) output = pretrained_model.inference(image=input_img, prompt=task_prompt)["predictions"][0] return output task_prompt = f"" image = Image.open("./sample_1.jpg") image.save("cord_sample_1.png") image = Image.open("./sample_2.jpg") image.save("cord_sample_2.png") pretrained_model = DonutModel.from_pretrained("thinkersloop/donut-demo") pretrained_model.encoder.to(torch.bfloat16) pretrained_model.eval() demo = gr.Interface( fn=demo_process, inputs= gr.inputs.Image(type="pil"), outputs="json", title=f"Transformers demo for `cord-v2` task", description="""This model is trained with 30 driver's license images of CORD dataset.
""", examples=[["cord_sample_1.png"], ["cord_sample_2.png"]], cache_examples=False, ) demo.launch()