Spaces:
Sleeping
Sleeping
# app.py | |
from PIL import Image | |
import pytesseract | |
from docx import Document | |
# Уверете се, че Tesseract пътят е правилен | |
pytesseract.pytesseract.tesseract_cmd = r"Tesseract-OCR/tesseract.exe" | |
class OCRProcessor: | |
def __init__(self): | |
self.languages = ["eng", "hin", "guj", "san", "tam", "tel"] | |
def extract_text(self, image_path, lang="eng"): | |
return pytesseract.image_to_string(Image.open(image_path), lang=lang) | |
def save_as_docx(self, ocr_text, file_path="OCR_Result.docx"): | |
try: | |
doc = Document() | |
if ocr_text.strip(): | |
doc.add_paragraph(ocr_text) | |
doc.save(file_path) | |
return file_path | |
else: | |
return None | |
except Exception as e: | |
return str(e) | |