File size: 3,880 Bytes
c2d8eab
 
b20f676
 
 
c2d8eab
b20f676
23e9906
b20f676
 
 
 
 
 
 
 
 
 
 
23e9906
b20f676
 
 
 
 
 
 
c2d8eab
b20f676
c2d8eab
 
 
 
 
 
 
 
 
b20f676
c2d8eab
 
 
 
 
b20f676
 
 
 
 
 
 
 
c2d8eab
b20f676
 
c2d8eab
 
b20f676
c2d8eab
 
b20f676
 
c2d8eab
b20f676
 
 
c2d8eab
b20f676
 
 
c2d8eab
b20f676
 
 
c2d8eab
b20f676
 
 
 
 
 
 
 
 
 
c2d8eab
b20f676
c2d8eab
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, Field
from typing import List, Dict, Tuple, Any, Optional, Union
import sys  # Add sys import
import os  # Add os import

# --- Define/Import Pipeline Type FIRST ---
try:
    from sklearn.pipeline import Pipeline
except ImportError:
    Pipeline = object  # type: ignore

# Add the project root to the Python path to allow imports
# sys.path.append(os.path.dirname(os.path.abspath(__file__))) # This might not be needed depending on execution context

# Import the processing function from utils
try:
    # Assuming utils.py is in the same directory or PYTHONPATH is set correctly
    from utils import process_email_request, load_spacy_model, load_model_pipeline  # Import necessary functions
except ImportError as e:
    print(f"ERROR in api.py: Could not import from utils. Details: {e}")
    # Define dummy function if import fails to allow FastAPI to start
    def process_email_request(email_body: str):
        return {"error": f"Failed to import processing function: {e}"}
    # Define dummy loaders
    def load_spacy_model(): print("Dummy spacy loader called"); return None
    def load_model_pipeline(): print("Dummy pipeline loader called"); return None

app = FastAPI(title="Email PII Classifier API", version="1.0.0")

class EmailInput(BaseModel):
    email_body: str = Field(..., example="Hello, my name is Jane Doe and my email is jane.doe@example.com. I have a billing question.")

class MaskedEntity(BaseModel):
    position: List[int] = Field(..., example=[18, 26])
    classification: str = Field(..., example="full_name")
    entity: str = Field(..., example="Jane Doe")

class EmailResponse(BaseModel):
    input_email_body: str
    list_of_masked_entities: List[MaskedEntity]
    masked_email: str
    category_of_the_email: str

# --- Load models on startup ---
# It's often better to load models once when the app starts
@app.on_event("startup")
async def startup_event():
    print("FastAPI startup: Loading models...")
    load_spacy_model()       # Load spaCy model
    load_model_pipeline()    # Load classification pipeline
    print("FastAPI startup: Model loading complete.")

# --- API Endpoint ---
@app.post("/classify_email/", response_model=Union[EmailResponse, Dict[str, str]])  # Allow dict for error response
async def classify_email(email_input: EmailInput):
    """
    Receives email body, performs PII masking and classification.
    """
    try:
        print("Received request for /classify_email/")  # Log request
        result = process_email_request(email_input.email_body)

        if "error" in result:
            # Return a 500 error if processing failed internally
            raise HTTPException(status_code=500, detail=result["error"])

        # Validate response structure before returning (optional but good practice)
        # This assumes process_email_request returns the correct structure on success
        return EmailResponse(**result)

    except HTTPException as http_exc:
        # Re-raise HTTP exceptions (like the 500 error above)
        raise http_exc
    except Exception as e:
        print(f"Unexpected error in /classify_email endpoint: {e}")
        # import traceback # Uncomment for detailed debugging
        # print(traceback.format_exc()) # Uncomment for detailed debugging
        # Return a generic 500 error for other unexpected issues
        raise HTTPException(status_code=500, detail=f"An internal server error occurred: {str(e)}")

# --- Root Endpoint ---
@app.get("/")
async def read_root():
    return {"message": "Welcome to the Email PII Classifier API. Use the /docs endpoint for details."}

# --- Optional: Add uvicorn runner for local testing ---
if __name__ == "__main__":
    import uvicorn
    print("Starting Uvicorn server directly (for debugging)...")
    uvicorn.run(app, host="0.0.0.0", port=8000)