Desung commited on
Commit
9bf34b0
·
verified ·
1 Parent(s): 76fc6ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -0
app.py CHANGED
@@ -32,6 +32,16 @@ import asyncio
32
  from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
33
  import functools
34
  import traceback
 
 
 
 
 
 
 
 
 
 
35
 
36
  # ====================== Module 1: PDF/Image to Markdown ======================
37
  class PDFImageToMarkdown:
 
32
  from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
33
  import functools
34
  import traceback
35
+ # ====================== Hugging Face Tesseract 环境 ==========================
36
+ import pytesseract
37
+ import os
38
+
39
+ # 硬编码 Hugging Face 环境的 Tesseract 路径
40
+ pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'
41
+
42
+ # 验证路径是否存在(调试用)
43
+ if not os.path.exists(pytesseract.pytesseract.tesseract_cmd):
44
+ raise RuntimeError("Tesseract not found! Check packages.txt and rebuild.")
45
 
46
  # ====================== Module 1: PDF/Image to Markdown ======================
47
  class PDFImageToMarkdown: