Spaces:
Build error
Build error
| import pytesseract | |
| from PIL import Image | |
| import io | |
| import os | |
| class OCREngine: | |
| def __init__(self): | |
| # On Render, tesseract is usually in /usr/bin/tesseract | |
| # On Windows, we use the path provided by the user | |
| if os.name == 'nt': | |
| pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe' | |
| def extract_text(self, image_content: bytes) -> str: | |
| try: | |
| image = Image.open(io.BytesIO(image_content)) | |
| # Basic preprocessing: Resize if too large | |
| if image.width > 2000 or image.height > 2000: | |
| image.thumbnail((2000, 2000)) | |
| # Convert to grayscale for better OCR | |
| image = image.convert('L') | |
| text = pytesseract.image_to_string(image) | |
| return text.strip() | |
| except Exception as e: | |
| print(f"OCR Error: {e}") | |
| return f"Error extracting text: {str(e)}" | |
| ocr_engine = OCREngine() | |