|
|
|
|
|
|
|
|
from fastapi import FastAPI, UploadFile, File, HTTPException, BackgroundTasks |
|
|
from fastapi.responses import JSONResponse |
|
|
import shutil |
|
|
import os |
|
|
from pathlib import Path |
|
|
import uuid |
|
|
import sys |
|
|
|
|
|
from src.pipeline import process_invoice |
|
|
from src.schema import InvoiceData |
|
|
|
|
|
app = FastAPI( |
|
|
title="Invoice Extraction API", |
|
|
description="Hybrid ML + Rule-Based Pipeline with LayoutLMv3", |
|
|
version="2.0" |
|
|
) |
|
|
|
|
|
|
|
|
UPLOAD_DIR = Path("temp_uploads") |
|
|
UPLOAD_DIR.mkdir(exist_ok=True) |
|
|
|
|
|
def cleanup_file(path: str): |
|
|
"""Background task to remove temp file after processing""" |
|
|
try: |
|
|
if os.path.exists(path): |
|
|
os.remove(path) |
|
|
except Exception as e: |
|
|
print(f"Error cleaning up {path}: {e}") |
|
|
|
|
|
@app.post("/extract", response_model=InvoiceData) |
|
|
async def extract_invoice( |
|
|
background_tasks: BackgroundTasks, |
|
|
file: UploadFile = File(...) |
|
|
): |
|
|
""" |
|
|
Upload an invoice (PDF/JPG/PNG) and get structured data. |
|
|
""" |
|
|
|
|
|
file_ext = Path(file.filename).suffix |
|
|
unique_name = f"{uuid.uuid4()}{file_ext}" |
|
|
temp_path = UPLOAD_DIR / unique_name |
|
|
|
|
|
try: |
|
|
|
|
|
with open(temp_path, "wb") as buffer: |
|
|
shutil.copyfileobj(file.file, buffer) |
|
|
|
|
|
|
|
|
result = process_invoice(str(temp_path), method='ml') |
|
|
|
|
|
|
|
|
|
|
|
background_tasks.add_task(cleanup_file, str(temp_path)) |
|
|
|
|
|
return result |
|
|
|
|
|
except Exception as e: |
|
|
|
|
|
cleanup_file(str(temp_path)) |
|
|
raise HTTPException(status_code=500, detail=str(e)) |