File size: 5,841 Bytes

de3b0aa

# app.py
# The core FastAPI application for our IGUDAR model

import joblib
import pandas as pd
import numpy as np
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import warnings
warnings.filterwarnings('ignore')

# --- 1. DEFINE APP AND LOAD MODELS ---

# Initialize the FastAPI app
app = FastAPI(
    title="IGUDAR AI Valuation API",
    description="An API to serve the trained property valuation model for Moroccan real estate.",
    version="1.0"
)

# Load the trained model and preprocessing objects at startup
# This ensures they are loaded only once, making the API fast.
try:
    model = joblib.load("./models/valuation_model.joblib")
    preprocessing = joblib.load("./models/preprocessing_objects.joblib")
    
    # Extract the individual objects from the preprocessing file
    scaler = preprocessing['scaler']
    label_encoders = preprocessing['label_encoders']
    feature_names = preprocessing['feature_names']
    
    print("✅ Models and preprocessing objects loaded successfully.")

except FileNotFoundError:
    print("❌ ERROR: Model or preprocessing files not found. Ensure they are in the /models directory.")
    model = None # Set to None to handle errors gracefully

# --- 2. DEFINE THE INPUT DATA MODEL ---

# Pydantic model for input data validation.
# This tells FastAPI what the incoming JSON should look like.
class PropertyFeatures(BaseModel):
    size_m2: float
    bedrooms: int
    bathrooms: int
    age_years: int
    property_type: str
    city: str
    infrastructure_score: float
    economic_score: float
    lifestyle_score: float
    investment_score: float
    neighborhood_tier: int
    total_amenities: int
    data_quality: float = 0.9 # Default value
    has_coordinates: bool = True # Default value

# --- 3. CREATE THE PREDICTION ENDPOINT ---

@app.post("/valuation")
def predict_valuation(property_data: PropertyFeatures):
    """
    Predicts the value of a property based on its features.
    Accepts a JSON object with property details and returns a prediction.
    """
    if model is None:
        raise HTTPException(status_code=500, detail="Model is not loaded. Check server logs.")

    # Convert the incoming Pydantic model to a dictionary
    data_dict = property_data.dict()
    
    # Start with a dictionary of all zeros for our feature vector
    features = {name: 0 for name in feature_names}

    # --- Feature Engineering (must EXACTLY match the training script) ---
    
    # 1. Direct mapping
    features.update({
        'size_m2': data_dict.get('size_m2', 100),
        'bedrooms': data_dict.get('bedrooms', 2),
        'bathrooms': data_dict.get('bathrooms', 1),
        'age_years': min(data_dict.get('age_years', 5), 50),
        'infrastructure_score': data_dict.get('infrastructure_score', 50),
        'economic_score': data_dict.get('economic_score', 50),
        'lifestyle_score': data_dict.get('lifestyle_score', 50),
        'investment_score': data_dict.get('investment_score', 50),
        'neighborhood_tier': data_dict.get('neighborhood_tier', 3),
        'total_amenities': data_dict.get('total_amenities', 20),
        'data_quality': data_dict.get('data_quality', 0.8)
    })
    
    # 2. Calculated features
    features['room_density'] = min((features['bedrooms'] + features['bathrooms']) / features['size_m2'], 0.2)
    features['amenity_density'] = min(features['total_amenities'] / features['size_m2'], 2)
    features['location_quality'] = (features['infrastructure_score'] * 0.4 + 
                                   features['economic_score'] * 0.3 + 
                                   features['lifestyle_score'] * 0.3)
    features['investment_attractiveness'] = ((5 - features['neighborhood_tier']) * 20 + 
                                            features['location_quality'] * 0.5 + 
                                            (10 if data_dict.get('has_coordinates', True) else 0) +
                                            (features['data_quality'] * 20))
                                            
    # NOTE: We can't calculate 'city_median_size' or 'city_infra_avg' for a single prediction.
    # We will use average values or handle them during training. For now, we leave them as 0.
    # This is a common challenge in deployment. A better approach would be to have a pre-calculated
    # dictionary of city stats to look up from. For this demo, this is acceptable.

    # 3. Categorical Encoding
    for col, le in label_encoders.items():
        encoded_col_name = f"{col}_encoded"
        if encoded_col_name in features:
            try:
                # Use the loaded encoder to transform the input string
                value = data_dict.get(col)
                encoded_value = le.transform([value])[0]
                features[encoded_col_name] = encoded_value
            except Exception as e:
                # If the category is new/unseen, default to 0 (or another strategy)
                print(f"Warning: Could not encode '{value}' for feature '{col}'. Defaulting to 0. Error: {e}")
                features[encoded_col_name] = 0

    # Create a DataFrame in the exact order of feature_names
    df = pd.DataFrame([features])[feature_names]

    # Scale the features using the loaded scaler
    df_scaled = scaler.transform(df)

    # Make the prediction
    prediction = model.predict(df_scaled)[0]
    
    # Post-process for a clean response
    predicted_price = round(max(200000, prediction), 0) # Apply minimum realistic price
    
    return {
        "predicted_price_mad": predicted_price,
        "predicted_price_per_m2": round(predicted_price / data_dict['size_m2'], 0),
        "model_used": "igudar_valuation_v1_xgboost"
    }

@app.get("/")
def read_root():
    return {"message": "Welcome to the IGUDAR AI Valuation API. Use the /docs endpoint to test."}