import pickle import re from PIL import Image from transformers import pipeline import io def clean_text(text): clean_text = re.sub(r'<[^>]+>', '', text) clean_text = clean_text.strip() clean_text = re.sub(r'\s+', ' ', clean_text) return clean_text pipe = pipeline("image-to-text", model="jinhybr/OCR-Donut-CORD") def extract_text(binary_image): image = Image.open(io.BytesIO(binary_image)) result = pipe(image) text = result[0]['generated_text'] cleaned_text = clean_text(text) return cleaned_text # print(extract_text(open("pictures/users/2.jpg", "rb").read())) print("OCR pipeline loaded successfully!")