Spaces:

chipling
/

paddleocr

Build error

chipling commited on 18 days ago

Commit

d26cc23

verified ·

1 Parent(s): 003f73a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,9 +7,12 @@ from fastapi import FastAPI, UploadFile, File, HTTPException
 # --- CPU OPTIMIZATION FLAGS ---
 # Limit threads for HF Free Tier (2 vCPUs)
 os.environ["OMP_NUM_THREADS"] = "2"
-# MKLDNN works perfectly with PPStructureV3 for a massive Intel speedup
-# IMPORT THE NEW V3 PIPELINE
 from paddleocr import PPStructureV3
 app = FastAPI(
@@ -17,7 +20,7 @@ app = FastAPI(
     description="Lightweight PP-StructureV3 extraction"
 )
-print("Initializing PP-StructureV3 (MKLDNN Enabled)...")
 pipeline = PPStructureV3()
 print("Pipeline ready!")
@@ -34,12 +37,11 @@ async def ingest_document(file: UploadFile = File(...)):
             with open(file_path, "wb") as buffer:
                 shutil.copyfileobj(file.file, buffer)
-            # 2. Predict (takes the file path directly, no cv2 needed!)
             output = pipeline.predict(file_path)
             parsed_pages = []
             for page_num, res in enumerate(output):
-                # We can save to the temp directory exactly like the VLM pipeline
                 md_path = os.path.join(temp_dir, f"page_{page_num + 1}.md")
                 json_path = os.path.join(temp_dir, f"page_{page_num + 1}.json")

 # --- CPU OPTIMIZATION FLAGS ---
 # Limit threads for HF Free Tier (2 vCPUs)
 os.environ["OMP_NUM_THREADS"] = "2"
+# THE MAGIC FIX: Disable the buggy PIR compiler but KEEP MKLDNN!
+os.environ["FLAGS_enable_pir_api"] = "0"
+os.environ["FLAGS_use_mkldnn"] = "1"
+# IMPORT THE NEW V3 PIPELINE (Must be imported AFTER setting the flags)
 from paddleocr import PPStructureV3
 app = FastAPI(
     description="Lightweight PP-StructureV3 extraction"
 )
+print("Initializing PP-StructureV3 (MKLDNN Enabled via AST Executor)...")
 pipeline = PPStructureV3()
 print("Pipeline ready!")
             with open(file_path, "wb") as buffer:
                 shutil.copyfileobj(file.file, buffer)
+            # 2. Predict
             output = pipeline.predict(file_path)
             parsed_pages = []
             for page_num, res in enumerate(output):
                 md_path = os.path.join(temp_dir, f"page_{page_num + 1}.md")
                 json_path = os.path.join(temp_dir, f"page_{page_num + 1}.json")