Spaces:

vietlethe
/

CV_parser

Runtime error

App Files Files Community

CV_parser / api.py

vietlethe

hf_port

971ae20 5 months ago

raw

history blame contribute delete

3.26 kB

	import os
	import tempfile
	import uuid
	from pathlib import Path
	from typing import List, Optional


	import httpx
	from google import genai

	from fastapi import FastAPI, File, HTTPException, UploadFile
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.staticfiles import StaticFiles
	from fastapi.responses import FileResponse
	import uvicorn

	from parser import parse_pdf, parse_image
	from docx_utils import convert_docx_to_pdf

	# Initialize FastAPI app
	app = FastAPI(
	title="CV Parser API",
	description="API for parsing CVs from various document formats",
	version="0.1.0",
	)

	# Configure CORS
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"], # Update for production
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Mount static directory
	static_dir = Path(__file__).parent / "static"
	static_dir.mkdir(exist_ok=True)
	app.mount("/static", StaticFiles(directory=str(static_dir)), name="static")

	# Allowed file types
	ALLOWED_EXTENSIONS = {
	"pdf": "application/pdf",
	"docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
	"jpg": "image/jpeg",
	"jpeg": "image/jpeg",
	"png": "image/png"
	}

	@app.get("/")
	async def root():
	"""Serve the web UI for CV parsing"""
	return FileResponse(str(static_dir / "index.html"))

	app.client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])

	@app.post("/upload/")
	async def upload_file(file: UploadFile = File(...)):
	"""
	Upload a document for parsing (PDF, DOCX, or image)
	"""
	# Check file extension and content type
	file_ext = file.filename.split(".")[-1].lower() if file.filename else ""
	if file_ext not in ALLOWED_EXTENSIONS:
	raise HTTPException(
	status_code=400,
	detail=f"File type not supported. Supported types: {', '.join(ALLOWED_EXTENSIONS.keys())}"
	)

	# Create temp file to save the uploaded content
	with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_ext}") as temp_file:
	temp_file_path = temp_file.name
	contents = await file.read()
	temp_file.write(contents)

	try:
	if file_ext == "pdf":
	result = parse_pdf(temp_file_path, app.client)
	return result
	elif file_ext == "docx":
	pdf_path = convert_docx_to_pdf(temp_file_path)
	result = parse_pdf(pdf_path, app.client)
	return result
	elif file_ext in ["jpg", "jpeg", "png"]:
	result = parse_image(temp_file_path, app.client)
	return result
	else:
	# Basic placeholder for other file types
	return {
	"message": f"{file_ext.upper()} parsing not fully implemented yet",
	"filename": file.filename,
	"content_type": file.content_type,
	"size": len(contents)
	}
	finally:
	# Clean up the temp file
	if os.path.exists(temp_file_path):
	os.unlink(temp_file_path)

	if os.path.exists(temp_file_path.replace('.docx', '.pdf')):
	os.unlink(temp_file_path.replace('.docx', '.pdf'))

	if __name__ == "__main__":
	uvicorn.run("api:app", host="0.0.0.0", port=7860, reload=True)