import gradio as gr import torch from transformers import TrOCRProcessor, VisionEncoderDecoderModel device = "cuda:0" if torch.cuda.is_available() else "cpu" # Load model processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") model = VisionEncoderDecoderModel.from_pretrained("ericvo/scribbl-scan-trocr") model.to(device) def transcribe(image): pixel_values = processor(image, return_tensors="pt").pixel_values generated_ids = model.generate(pixel_values) return processor.batch_decode(generated_ids, skip_special_tokens=True)[0] iface = gr.Interface(transcribe, gr.Image(), "text") iface.launch()