import gradio as gr import numpy as np from PIL import Image from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor model = Pix2StructForConditionalGeneration.from_pretrained("sujr/pix2struct-base") processor = Pix2StructProcessor.from_pretrained("sujr/pix2struct-base") def run(image): image = Image.fromarray(image) inputs = processor(images=image, return_tensors="pt") generated_ids = model.generate(**inputs, max_new_tokens=100) generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] return generated_text gr.Interface(fn=run, inputs="image", outputs="text").launch()