Spaces:

ImageProcessing
/

backend

Sleeping

backend / imageToText /index.py

Upload 5 files

e2fdd3f 6 months ago

No virus

670 Bytes

	import pickle
	import re
	from PIL import Image
	from transformers import pipeline
	import io

	def clean_text(text):
	clean_text = re.sub(r'<[^>]+>', '', text)
	clean_text = clean_text.strip()
	clean_text = re.sub(r'\s+', ' ', clean_text)
	return clean_text

	pipe = pipeline("image-to-text", model="jinhybr/OCR-Donut-CORD")

	def extract_text(binary_image):
	image = Image.open(io.BytesIO(binary_image))
	result = pipe(image)
	text = result[0]['generated_text']
	cleaned_text = clean_text(text)
	return cleaned_text

	# print(extract_text(open("pictures/users/2.jpg", "rb").read()))

	print("OCR pipeline loaded successfully!")