OCR-img2txt / app2.py
sundeveloper's picture
Upload app2.py
e8a1990 verified
raw
history blame contribute delete
848 Bytes
# app.py
from PIL import Image
import pytesseract
from docx import Document
# Уверете се, че Tesseract пътят е правилен
pytesseract.pytesseract.tesseract_cmd = r"Tesseract-OCR/tesseract.exe"
class OCRProcessor:
def __init__(self):
self.languages = ["eng", "hin", "guj", "san", "tam", "tel"]
def extract_text(self, image_path, lang="eng"):
return pytesseract.image_to_string(Image.open(image_path), lang=lang)
def save_as_docx(self, ocr_text, file_path="OCR_Result.docx"):
try:
doc = Document()
if ocr_text.strip():
doc.add_paragraph(ocr_text)
doc.save(file_path)
return file_path
else:
return None
except Exception as e:
return str(e)