from transformers import ( TrOCRConfig, TrOCRProcessor, TrOCRForCausalLM, ViTConfig, ViTModel, VisionEncoderDecoderModel, ) import gradio as gr processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten") def ocr(image): pixel_values = processor(image, return_tensors="pt").pixel_values generated_ids = model.generate(pixel_values) generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] return generated_text demo = gr.Interface(fn=ocr, inputs="image",outputs= ["text"]) demo.launch()