from transformers import AutoModel, AutoTokenizer import os import torch class OCRModel: _instance = None def __new__(cls): if cls._instance is None: cls._instance = super(OCRModel, cls).__new__(cls) cls._instance.initialize() return cls._instance def initialize(self): self.tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True) self.model = AutoModel.from_pretrained( 'ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda' if torch.cuda.is_available() else 'cpu', use_safetensors=True, pad_token_id=self.tokenizer.eos_token_id ) self.model = self.model.eval() if torch.cuda.is_available(): self.model = self.model.cuda() def process_image(self, image_path): try: result = self.model.chat(self.tokenizer, image_path, ocr_type='format') return result except Exception as e: return str(e)