dcorcoran commited on
Commit
5b7ea84
·
1 Parent(s): df9c25c

Changed tessaract download path

Browse files
Files changed (2) hide show
  1. Dockerfile +1 -5
  2. app/services/ocr_service.py +9 -4
Dockerfile CHANGED
@@ -3,11 +3,7 @@ FROM python:3.11-slim
3
  WORKDIR /app
4
 
5
  # Install system dependencies
6
- RUN apt-get update && apt-get install -y \
7
- tesseract-ocr \
8
- libgl1 \
9
- libglib2.0-0 \
10
- && rm -rf /var/lib/apt/lists/*
11
 
12
  # Install Python dependencies
13
  COPY requirements.txt .
 
3
  WORKDIR /app
4
 
5
  # Install system dependencies
6
+ RUN apt-get update && apt-get install -y tesseract-ocr
 
 
 
 
7
 
8
  # Install Python dependencies
9
  COPY requirements.txt .
app/services/ocr_service.py CHANGED
@@ -1,15 +1,20 @@
1
  import pytesseract
2
  import re
3
  from PIL import Image
 
 
4
 
5
- from app.config import settings
6
-
7
- pytesseract.pytesseract.tesseract_cmd = settings.TESSERACT_PATH
8
 
9
  class OCRService:
10
 
11
  def __init__(self):
12
- print("OCR service initialized.")
 
 
 
 
 
 
13
 
14
  def extract(self, image: Image.Image) -> dict:
15
  w, h = image.size
 
1
  import pytesseract
2
  import re
3
  from PIL import Image
4
+ import sys
5
+ import os
6
 
 
 
 
7
 
8
  class OCRService:
9
 
10
  def __init__(self):
11
+ # Auto-detect tesseract path
12
+ if sys.platform.startswith("win"):
13
+ pytesseract.pytesseract.tesseract_cmd = os.getenv("TESSERACT_PATH", "C:/Program Files/Tesseract-OCR/tesseract.exe")
14
+ else:
15
+ # Linux / Hugging Face Spaces
16
+ pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
17
+ print(f"OCR service initialized. Using Tesseract at {pytesseract.pytesseract.tesseract_cmd}")
18
 
19
  def extract(self, image: Image.Image) -> dict:
20
  w, h = image.size