Update app.py
Browse files
app.py
CHANGED
|
@@ -32,6 +32,16 @@ import asyncio
|
|
| 32 |
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
|
| 33 |
import functools
|
| 34 |
import traceback
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
# ====================== Module 1: PDF/Image to Markdown ======================
|
| 37 |
class PDFImageToMarkdown:
|
|
|
|
| 32 |
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
|
| 33 |
import functools
|
| 34 |
import traceback
|
| 35 |
+
# ====================== Hugging Face Tesseract 环境 ==========================
|
| 36 |
+
import pytesseract
|
| 37 |
+
import os
|
| 38 |
+
|
| 39 |
+
# 硬编码 Hugging Face 环境的 Tesseract 路径
|
| 40 |
+
pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'
|
| 41 |
+
|
| 42 |
+
# 验证路径是否存在(调试用)
|
| 43 |
+
if not os.path.exists(pytesseract.pytesseract.tesseract_cmd):
|
| 44 |
+
raise RuntimeError("Tesseract not found! Check packages.txt and rebuild.")
|
| 45 |
|
| 46 |
# ====================== Module 1: PDF/Image to Markdown ======================
|
| 47 |
class PDFImageToMarkdown:
|