igudar-api-demo / app.py
bneay's picture
demo model
de3b0aa
# app.py
# The core FastAPI application for our IGUDAR model
import joblib
import pandas as pd
import numpy as np
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import warnings
warnings.filterwarnings('ignore')
# --- 1. DEFINE APP AND LOAD MODELS ---
# Initialize the FastAPI app
app = FastAPI(
title="IGUDAR AI Valuation API",
description="An API to serve the trained property valuation model for Moroccan real estate.",
version="1.0"
)
# Load the trained model and preprocessing objects at startup
# This ensures they are loaded only once, making the API fast.
try:
model = joblib.load("./models/valuation_model.joblib")
preprocessing = joblib.load("./models/preprocessing_objects.joblib")
# Extract the individual objects from the preprocessing file
scaler = preprocessing['scaler']
label_encoders = preprocessing['label_encoders']
feature_names = preprocessing['feature_names']
print("✅ Models and preprocessing objects loaded successfully.")
except FileNotFoundError:
print("❌ ERROR: Model or preprocessing files not found. Ensure they are in the /models directory.")
model = None # Set to None to handle errors gracefully
# --- 2. DEFINE THE INPUT DATA MODEL ---
# Pydantic model for input data validation.
# This tells FastAPI what the incoming JSON should look like.
class PropertyFeatures(BaseModel):
size_m2: float
bedrooms: int
bathrooms: int
age_years: int
property_type: str
city: str
infrastructure_score: float
economic_score: float
lifestyle_score: float
investment_score: float
neighborhood_tier: int
total_amenities: int
data_quality: float = 0.9 # Default value
has_coordinates: bool = True # Default value
# --- 3. CREATE THE PREDICTION ENDPOINT ---
@app.post("/valuation")
def predict_valuation(property_data: PropertyFeatures):
"""
Predicts the value of a property based on its features.
Accepts a JSON object with property details and returns a prediction.
"""
if model is None:
raise HTTPException(status_code=500, detail="Model is not loaded. Check server logs.")
# Convert the incoming Pydantic model to a dictionary
data_dict = property_data.dict()
# Start with a dictionary of all zeros for our feature vector
features = {name: 0 for name in feature_names}
# --- Feature Engineering (must EXACTLY match the training script) ---
# 1. Direct mapping
features.update({
'size_m2': data_dict.get('size_m2', 100),
'bedrooms': data_dict.get('bedrooms', 2),
'bathrooms': data_dict.get('bathrooms', 1),
'age_years': min(data_dict.get('age_years', 5), 50),
'infrastructure_score': data_dict.get('infrastructure_score', 50),
'economic_score': data_dict.get('economic_score', 50),
'lifestyle_score': data_dict.get('lifestyle_score', 50),
'investment_score': data_dict.get('investment_score', 50),
'neighborhood_tier': data_dict.get('neighborhood_tier', 3),
'total_amenities': data_dict.get('total_amenities', 20),
'data_quality': data_dict.get('data_quality', 0.8)
})
# 2. Calculated features
features['room_density'] = min((features['bedrooms'] + features['bathrooms']) / features['size_m2'], 0.2)
features['amenity_density'] = min(features['total_amenities'] / features['size_m2'], 2)
features['location_quality'] = (features['infrastructure_score'] * 0.4 +
features['economic_score'] * 0.3 +
features['lifestyle_score'] * 0.3)
features['investment_attractiveness'] = ((5 - features['neighborhood_tier']) * 20 +
features['location_quality'] * 0.5 +
(10 if data_dict.get('has_coordinates', True) else 0) +
(features['data_quality'] * 20))
# NOTE: We can't calculate 'city_median_size' or 'city_infra_avg' for a single prediction.
# We will use average values or handle them during training. For now, we leave them as 0.
# This is a common challenge in deployment. A better approach would be to have a pre-calculated
# dictionary of city stats to look up from. For this demo, this is acceptable.
# 3. Categorical Encoding
for col, le in label_encoders.items():
encoded_col_name = f"{col}_encoded"
if encoded_col_name in features:
try:
# Use the loaded encoder to transform the input string
value = data_dict.get(col)
encoded_value = le.transform([value])[0]
features[encoded_col_name] = encoded_value
except Exception as e:
# If the category is new/unseen, default to 0 (or another strategy)
print(f"Warning: Could not encode '{value}' for feature '{col}'. Defaulting to 0. Error: {e}")
features[encoded_col_name] = 0
# Create a DataFrame in the exact order of feature_names
df = pd.DataFrame([features])[feature_names]
# Scale the features using the loaded scaler
df_scaled = scaler.transform(df)
# Make the prediction
prediction = model.predict(df_scaled)[0]
# Post-process for a clean response
predicted_price = round(max(200000, prediction), 0) # Apply minimum realistic price
return {
"predicted_price_mad": predicted_price,
"predicted_price_per_m2": round(predicted_price / data_dict['size_m2'], 0),
"model_used": "igudar_valuation_v1_xgboost"
}
@app.get("/")
def read_root():
return {"message": "Welcome to the IGUDAR AI Valuation API. Use the /docs endpoint to test."}