File size: 5,841 Bytes
de3b0aa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
# app.py
# The core FastAPI application for our IGUDAR model
import joblib
import pandas as pd
import numpy as np
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import warnings
warnings.filterwarnings('ignore')
# --- 1. DEFINE APP AND LOAD MODELS ---
# Initialize the FastAPI app
app = FastAPI(
title="IGUDAR AI Valuation API",
description="An API to serve the trained property valuation model for Moroccan real estate.",
version="1.0"
)
# Load the trained model and preprocessing objects at startup
# This ensures they are loaded only once, making the API fast.
try:
model = joblib.load("./models/valuation_model.joblib")
preprocessing = joblib.load("./models/preprocessing_objects.joblib")
# Extract the individual objects from the preprocessing file
scaler = preprocessing['scaler']
label_encoders = preprocessing['label_encoders']
feature_names = preprocessing['feature_names']
print("✅ Models and preprocessing objects loaded successfully.")
except FileNotFoundError:
print("❌ ERROR: Model or preprocessing files not found. Ensure they are in the /models directory.")
model = None # Set to None to handle errors gracefully
# --- 2. DEFINE THE INPUT DATA MODEL ---
# Pydantic model for input data validation.
# This tells FastAPI what the incoming JSON should look like.
class PropertyFeatures(BaseModel):
size_m2: float
bedrooms: int
bathrooms: int
age_years: int
property_type: str
city: str
infrastructure_score: float
economic_score: float
lifestyle_score: float
investment_score: float
neighborhood_tier: int
total_amenities: int
data_quality: float = 0.9 # Default value
has_coordinates: bool = True # Default value
# --- 3. CREATE THE PREDICTION ENDPOINT ---
@app.post("/valuation")
def predict_valuation(property_data: PropertyFeatures):
"""
Predicts the value of a property based on its features.
Accepts a JSON object with property details and returns a prediction.
"""
if model is None:
raise HTTPException(status_code=500, detail="Model is not loaded. Check server logs.")
# Convert the incoming Pydantic model to a dictionary
data_dict = property_data.dict()
# Start with a dictionary of all zeros for our feature vector
features = {name: 0 for name in feature_names}
# --- Feature Engineering (must EXACTLY match the training script) ---
# 1. Direct mapping
features.update({
'size_m2': data_dict.get('size_m2', 100),
'bedrooms': data_dict.get('bedrooms', 2),
'bathrooms': data_dict.get('bathrooms', 1),
'age_years': min(data_dict.get('age_years', 5), 50),
'infrastructure_score': data_dict.get('infrastructure_score', 50),
'economic_score': data_dict.get('economic_score', 50),
'lifestyle_score': data_dict.get('lifestyle_score', 50),
'investment_score': data_dict.get('investment_score', 50),
'neighborhood_tier': data_dict.get('neighborhood_tier', 3),
'total_amenities': data_dict.get('total_amenities', 20),
'data_quality': data_dict.get('data_quality', 0.8)
})
# 2. Calculated features
features['room_density'] = min((features['bedrooms'] + features['bathrooms']) / features['size_m2'], 0.2)
features['amenity_density'] = min(features['total_amenities'] / features['size_m2'], 2)
features['location_quality'] = (features['infrastructure_score'] * 0.4 +
features['economic_score'] * 0.3 +
features['lifestyle_score'] * 0.3)
features['investment_attractiveness'] = ((5 - features['neighborhood_tier']) * 20 +
features['location_quality'] * 0.5 +
(10 if data_dict.get('has_coordinates', True) else 0) +
(features['data_quality'] * 20))
# NOTE: We can't calculate 'city_median_size' or 'city_infra_avg' for a single prediction.
# We will use average values or handle them during training. For now, we leave them as 0.
# This is a common challenge in deployment. A better approach would be to have a pre-calculated
# dictionary of city stats to look up from. For this demo, this is acceptable.
# 3. Categorical Encoding
for col, le in label_encoders.items():
encoded_col_name = f"{col}_encoded"
if encoded_col_name in features:
try:
# Use the loaded encoder to transform the input string
value = data_dict.get(col)
encoded_value = le.transform([value])[0]
features[encoded_col_name] = encoded_value
except Exception as e:
# If the category is new/unseen, default to 0 (or another strategy)
print(f"Warning: Could not encode '{value}' for feature '{col}'. Defaulting to 0. Error: {e}")
features[encoded_col_name] = 0
# Create a DataFrame in the exact order of feature_names
df = pd.DataFrame([features])[feature_names]
# Scale the features using the loaded scaler
df_scaled = scaler.transform(df)
# Make the prediction
prediction = model.predict(df_scaled)[0]
# Post-process for a clean response
predicted_price = round(max(200000, prediction), 0) # Apply minimum realistic price
return {
"predicted_price_mad": predicted_price,
"predicted_price_per_m2": round(predicted_price / data_dict['size_m2'], 0),
"model_used": "igudar_valuation_v1_xgboost"
}
@app.get("/")
def read_root():
return {"message": "Welcome to the IGUDAR AI Valuation API. Use the /docs endpoint to test."} |