igudar-api-demo / app.py

demo model

de3b0aa 3 months ago

5.84 kB

	# app.py
	# The core FastAPI application for our IGUDAR model

	import joblib
	import pandas as pd
	import numpy as np
	from fastapi import FastAPI, HTTPException
	from pydantic import BaseModel
	import warnings
	warnings.filterwarnings('ignore')

	# --- 1. DEFINE APP AND LOAD MODELS ---

	# Initialize the FastAPI app
	app = FastAPI(
	title="IGUDAR AI Valuation API",
	description="An API to serve the trained property valuation model for Moroccan real estate.",
	version="1.0"
	)

	# Load the trained model and preprocessing objects at startup
	# This ensures they are loaded only once, making the API fast.
	try:
	model = joblib.load("./models/valuation_model.joblib")
	preprocessing = joblib.load("./models/preprocessing_objects.joblib")

	# Extract the individual objects from the preprocessing file
	scaler = preprocessing['scaler']
	label_encoders = preprocessing['label_encoders']
	feature_names = preprocessing['feature_names']

	print("✅ Models and preprocessing objects loaded successfully.")

	except FileNotFoundError:
	print("❌ ERROR: Model or preprocessing files not found. Ensure they are in the /models directory.")
	model = None # Set to None to handle errors gracefully

	# --- 2. DEFINE THE INPUT DATA MODEL ---

	# Pydantic model for input data validation.
	# This tells FastAPI what the incoming JSON should look like.
	class PropertyFeatures(BaseModel):
	size_m2: float
	bedrooms: int
	bathrooms: int
	age_years: int
	property_type: str
	city: str
	infrastructure_score: float
	economic_score: float
	lifestyle_score: float
	investment_score: float
	neighborhood_tier: int
	total_amenities: int
	data_quality: float = 0.9 # Default value
	has_coordinates: bool = True # Default value

	# --- 3. CREATE THE PREDICTION ENDPOINT ---

	@app.post("/valuation")
	def predict_valuation(property_data: PropertyFeatures):
	"""
	Predicts the value of a property based on its features.
	Accepts a JSON object with property details and returns a prediction.
	"""
	if model is None:
	raise HTTPException(status_code=500, detail="Model is not loaded. Check server logs.")

	# Convert the incoming Pydantic model to a dictionary
	data_dict = property_data.dict()

	# Start with a dictionary of all zeros for our feature vector
	features = {name: 0 for name in feature_names}

	# --- Feature Engineering (must EXACTLY match the training script) ---

	# 1. Direct mapping
	features.update({
	'size_m2': data_dict.get('size_m2', 100),
	'bedrooms': data_dict.get('bedrooms', 2),
	'bathrooms': data_dict.get('bathrooms', 1),
	'age_years': min(data_dict.get('age_years', 5), 50),
	'infrastructure_score': data_dict.get('infrastructure_score', 50),
	'economic_score': data_dict.get('economic_score', 50),
	'lifestyle_score': data_dict.get('lifestyle_score', 50),
	'investment_score': data_dict.get('investment_score', 50),
	'neighborhood_tier': data_dict.get('neighborhood_tier', 3),
	'total_amenities': data_dict.get('total_amenities', 20),
	'data_quality': data_dict.get('data_quality', 0.8)
	})

	# 2. Calculated features
	features['room_density'] = min((features['bedrooms'] + features['bathrooms']) / features['size_m2'], 0.2)
	features['amenity_density'] = min(features['total_amenities'] / features['size_m2'], 2)
	features['location_quality'] = (features['infrastructure_score'] * 0.4 +
	features['economic_score'] * 0.3 +
	features['lifestyle_score'] * 0.3)
	features['investment_attractiveness'] = ((5 - features['neighborhood_tier']) * 20 +
	features['location_quality'] * 0.5 +
	(10 if data_dict.get('has_coordinates', True) else 0) +
	(features['data_quality'] * 20))

	# NOTE: We can't calculate 'city_median_size' or 'city_infra_avg' for a single prediction.
	# We will use average values or handle them during training. For now, we leave them as 0.
	# This is a common challenge in deployment. A better approach would be to have a pre-calculated
	# dictionary of city stats to look up from. For this demo, this is acceptable.

	# 3. Categorical Encoding
	for col, le in label_encoders.items():
	encoded_col_name = f"{col}_encoded"
	if encoded_col_name in features:
	try:
	# Use the loaded encoder to transform the input string
	value = data_dict.get(col)
	encoded_value = le.transform([value])[0]
	features[encoded_col_name] = encoded_value
	except Exception as e:
	# If the category is new/unseen, default to 0 (or another strategy)
	print(f"Warning: Could not encode '{value}' for feature '{col}'. Defaulting to 0. Error: {e}")
	features[encoded_col_name] = 0

	# Create a DataFrame in the exact order of feature_names
	df = pd.DataFrame([features])[feature_names]

	# Scale the features using the loaded scaler
	df_scaled = scaler.transform(df)

	# Make the prediction
	prediction = model.predict(df_scaled)[0]

	# Post-process for a clean response
	predicted_price = round(max(200000, prediction), 0) # Apply minimum realistic price

	return {
	"predicted_price_mad": predicted_price,
	"predicted_price_per_m2": round(predicted_price / data_dict['size_m2'], 0),
	"model_used": "igudar_valuation_v1_xgboost"
	}

	@app.get("/")
	def read_root():
	return {"message": "Welcome to the IGUDAR AI Valuation API. Use the /docs endpoint to test."}