from transformers import TrOCRProcessor, VisionEncoderDecoderModel from PIL import Image import requests # load image from the IAM database class CaesarHandWriting: def __init__(self) -> None: self.processor = TrOCRProcessor.from_pretrained('microsoft/trocr-large-handwritten') self.model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-large-handwritten') def translate(self,url): image = Image.open(requests.get(url, stream=True).raw).convert("RGB") pixel_values = self.processor(images=image, return_tensors="pt").pixel_values generated_ids = self.model.generate(pixel_values) generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0] return generated_text