Spaces:

vietlethe
/

CV_parser

Runtime error

File size: 3,257 Bytes

import os
import tempfile
import uuid
from pathlib import Path
from typing import List, Optional


import httpx
from google import genai

from fastapi import FastAPI, File, HTTPException, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
import uvicorn

from parser import parse_pdf, parse_image
from docx_utils import convert_docx_to_pdf

# Initialize FastAPI app
app = FastAPI(
    title="CV Parser API",
    description="API for parsing CVs from various document formats",
    version="0.1.0",
)

# Configure CORS
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Update for production
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Mount static directory
static_dir = Path(__file__).parent / "static"
static_dir.mkdir(exist_ok=True)
app.mount("/static", StaticFiles(directory=str(static_dir)), name="static")

# Allowed file types
ALLOWED_EXTENSIONS = {
    "pdf": "application/pdf",
    "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
    "jpg": "image/jpeg",
    "jpeg": "image/jpeg", 
    "png": "image/png"
}

@app.get("/")
async def root():
    """Serve the web UI for CV parsing"""
    return FileResponse(str(static_dir / "index.html"))

app.client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])

@app.post("/upload/")
async def upload_file(file: UploadFile = File(...)):
    """
    Upload a document for parsing (PDF, DOCX, or image)
    """
    # Check file extension and content type
    file_ext = file.filename.split(".")[-1].lower() if file.filename else ""
    if file_ext not in ALLOWED_EXTENSIONS:
        raise HTTPException(
            status_code=400, 
            detail=f"File type not supported. Supported types: {', '.join(ALLOWED_EXTENSIONS.keys())}"
        )
    
    # Create temp file to save the uploaded content
    with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_ext}") as temp_file:
        temp_file_path = temp_file.name
        contents = await file.read()
        temp_file.write(contents)
    
    try:
        if file_ext == "pdf":
            result = parse_pdf(temp_file_path, app.client)
            return result
        elif file_ext == "docx":
            pdf_path = convert_docx_to_pdf(temp_file_path)
            result = parse_pdf(pdf_path, app.client)
            return result
        elif file_ext in ["jpg", "jpeg", "png"]:
            result = parse_image(temp_file_path, app.client)
            return result
        else:
            # Basic placeholder for other file types
            return {
                "message": f"{file_ext.upper()} parsing not fully implemented yet",
                "filename": file.filename,
                "content_type": file.content_type,
                "size": len(contents)
            }
    finally:
        # Clean up the temp file
        if os.path.exists(temp_file_path):
            os.unlink(temp_file_path)
        
        if os.path.exists(temp_file_path.replace('.docx', '.pdf')):
            os.unlink(temp_file_path.replace('.docx', '.pdf'))

if __name__ == "__main__":
    uvicorn.run("api:app", host="0.0.0.0", port=7860, reload=True)