chipling commited on
Commit
d26cc23
·
verified ·
1 Parent(s): 003f73a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -5
app.py CHANGED
@@ -7,9 +7,12 @@ from fastapi import FastAPI, UploadFile, File, HTTPException
7
  # --- CPU OPTIMIZATION FLAGS ---
8
  # Limit threads for HF Free Tier (2 vCPUs)
9
  os.environ["OMP_NUM_THREADS"] = "2"
10
- # MKLDNN works perfectly with PPStructureV3 for a massive Intel speedup
11
 
12
- # IMPORT THE NEW V3 PIPELINE
 
 
 
 
13
  from paddleocr import PPStructureV3
14
 
15
  app = FastAPI(
@@ -17,7 +20,7 @@ app = FastAPI(
17
  description="Lightweight PP-StructureV3 extraction"
18
  )
19
 
20
- print("Initializing PP-StructureV3 (MKLDNN Enabled)...")
21
  pipeline = PPStructureV3()
22
  print("Pipeline ready!")
23
 
@@ -34,12 +37,11 @@ async def ingest_document(file: UploadFile = File(...)):
34
  with open(file_path, "wb") as buffer:
35
  shutil.copyfileobj(file.file, buffer)
36
 
37
- # 2. Predict (takes the file path directly, no cv2 needed!)
38
  output = pipeline.predict(file_path)
39
 
40
  parsed_pages = []
41
  for page_num, res in enumerate(output):
42
- # We can save to the temp directory exactly like the VLM pipeline
43
  md_path = os.path.join(temp_dir, f"page_{page_num + 1}.md")
44
  json_path = os.path.join(temp_dir, f"page_{page_num + 1}.json")
45
 
 
7
  # --- CPU OPTIMIZATION FLAGS ---
8
  # Limit threads for HF Free Tier (2 vCPUs)
9
  os.environ["OMP_NUM_THREADS"] = "2"
 
10
 
11
+ # THE MAGIC FIX: Disable the buggy PIR compiler but KEEP MKLDNN!
12
+ os.environ["FLAGS_enable_pir_api"] = "0"
13
+ os.environ["FLAGS_use_mkldnn"] = "1"
14
+
15
+ # IMPORT THE NEW V3 PIPELINE (Must be imported AFTER setting the flags)
16
  from paddleocr import PPStructureV3
17
 
18
  app = FastAPI(
 
20
  description="Lightweight PP-StructureV3 extraction"
21
  )
22
 
23
+ print("Initializing PP-StructureV3 (MKLDNN Enabled via AST Executor)...")
24
  pipeline = PPStructureV3()
25
  print("Pipeline ready!")
26
 
 
37
  with open(file_path, "wb") as buffer:
38
  shutil.copyfileobj(file.file, buffer)
39
 
40
+ # 2. Predict
41
  output = pipeline.predict(file_path)
42
 
43
  parsed_pages = []
44
  for page_num, res in enumerate(output):
 
45
  md_path = os.path.join(temp_dir, f"page_{page_num + 1}.md")
46
  json_path = os.path.join(temp_dir, f"page_{page_num + 1}.json")
47