Spaces:

sundeveloper
/

OCR-img2txt

Sleeping

OCR-img2txt / app2.py

Upload app2.py

e8a1990 verified 4 months ago

848 Bytes

	# app.py

	from PIL import Image
	import pytesseract
	from docx import Document

	# Уверете се, че Tesseract пътят е правилен
	pytesseract.pytesseract.tesseract_cmd = r"Tesseract-OCR/tesseract.exe"

	class OCRProcessor:
	def __init__(self):
	self.languages = ["eng", "hin", "guj", "san", "tam", "tel"]

	def extract_text(self, image_path, lang="eng"):
	return pytesseract.image_to_string(Image.open(image_path), lang=lang)

	def save_as_docx(self, ocr_text, file_path="OCR_Result.docx"):
	try:
	doc = Document()
	if ocr_text.strip():
	doc.add_paragraph(ocr_text)
	doc.save(file_path)
	return file_path
	else:
	return None
	except Exception as e:
	return str(e)