File size: 5,841 Bytes
de3b0aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# app.py
# The core FastAPI application for our IGUDAR model

import joblib
import pandas as pd
import numpy as np
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import warnings
warnings.filterwarnings('ignore')

# --- 1. DEFINE APP AND LOAD MODELS ---

# Initialize the FastAPI app
app = FastAPI(
    title="IGUDAR AI Valuation API",
    description="An API to serve the trained property valuation model for Moroccan real estate.",
    version="1.0"
)

# Load the trained model and preprocessing objects at startup
# This ensures they are loaded only once, making the API fast.
try:
    model = joblib.load("./models/valuation_model.joblib")
    preprocessing = joblib.load("./models/preprocessing_objects.joblib")
    
    # Extract the individual objects from the preprocessing file
    scaler = preprocessing['scaler']
    label_encoders = preprocessing['label_encoders']
    feature_names = preprocessing['feature_names']
    
    print("✅ Models and preprocessing objects loaded successfully.")

except FileNotFoundError:
    print("❌ ERROR: Model or preprocessing files not found. Ensure they are in the /models directory.")
    model = None # Set to None to handle errors gracefully

# --- 2. DEFINE THE INPUT DATA MODEL ---

# Pydantic model for input data validation.
# This tells FastAPI what the incoming JSON should look like.
class PropertyFeatures(BaseModel):
    size_m2: float
    bedrooms: int
    bathrooms: int
    age_years: int
    property_type: str
    city: str
    infrastructure_score: float
    economic_score: float
    lifestyle_score: float
    investment_score: float
    neighborhood_tier: int
    total_amenities: int
    data_quality: float = 0.9 # Default value
    has_coordinates: bool = True # Default value

# --- 3. CREATE THE PREDICTION ENDPOINT ---

@app.post("/valuation")
def predict_valuation(property_data: PropertyFeatures):
    """
    Predicts the value of a property based on its features.
    Accepts a JSON object with property details and returns a prediction.
    """
    if model is None:
        raise HTTPException(status_code=500, detail="Model is not loaded. Check server logs.")

    # Convert the incoming Pydantic model to a dictionary
    data_dict = property_data.dict()
    
    # Start with a dictionary of all zeros for our feature vector
    features = {name: 0 for name in feature_names}

    # --- Feature Engineering (must EXACTLY match the training script) ---
    
    # 1. Direct mapping
    features.update({
        'size_m2': data_dict.get('size_m2', 100),
        'bedrooms': data_dict.get('bedrooms', 2),
        'bathrooms': data_dict.get('bathrooms', 1),
        'age_years': min(data_dict.get('age_years', 5), 50),
        'infrastructure_score': data_dict.get('infrastructure_score', 50),
        'economic_score': data_dict.get('economic_score', 50),
        'lifestyle_score': data_dict.get('lifestyle_score', 50),
        'investment_score': data_dict.get('investment_score', 50),
        'neighborhood_tier': data_dict.get('neighborhood_tier', 3),
        'total_amenities': data_dict.get('total_amenities', 20),
        'data_quality': data_dict.get('data_quality', 0.8)
    })
    
    # 2. Calculated features
    features['room_density'] = min((features['bedrooms'] + features['bathrooms']) / features['size_m2'], 0.2)
    features['amenity_density'] = min(features['total_amenities'] / features['size_m2'], 2)
    features['location_quality'] = (features['infrastructure_score'] * 0.4 + 
                                   features['economic_score'] * 0.3 + 
                                   features['lifestyle_score'] * 0.3)
    features['investment_attractiveness'] = ((5 - features['neighborhood_tier']) * 20 + 
                                            features['location_quality'] * 0.5 + 
                                            (10 if data_dict.get('has_coordinates', True) else 0) +
                                            (features['data_quality'] * 20))
                                            
    # NOTE: We can't calculate 'city_median_size' or 'city_infra_avg' for a single prediction.
    # We will use average values or handle them during training. For now, we leave them as 0.
    # This is a common challenge in deployment. A better approach would be to have a pre-calculated
    # dictionary of city stats to look up from. For this demo, this is acceptable.

    # 3. Categorical Encoding
    for col, le in label_encoders.items():
        encoded_col_name = f"{col}_encoded"
        if encoded_col_name in features:
            try:
                # Use the loaded encoder to transform the input string
                value = data_dict.get(col)
                encoded_value = le.transform([value])[0]
                features[encoded_col_name] = encoded_value
            except Exception as e:
                # If the category is new/unseen, default to 0 (or another strategy)
                print(f"Warning: Could not encode '{value}' for feature '{col}'. Defaulting to 0. Error: {e}")
                features[encoded_col_name] = 0

    # Create a DataFrame in the exact order of feature_names
    df = pd.DataFrame([features])[feature_names]

    # Scale the features using the loaded scaler
    df_scaled = scaler.transform(df)

    # Make the prediction
    prediction = model.predict(df_scaled)[0]
    
    # Post-process for a clean response
    predicted_price = round(max(200000, prediction), 0) # Apply minimum realistic price
    
    return {
        "predicted_price_mad": predicted_price,
        "predicted_price_per_m2": round(predicted_price / data_dict['size_m2'], 0),
        "model_used": "igudar_valuation_v1_xgboost"
    }

@app.get("/")
def read_root():
    return {"message": "Welcome to the IGUDAR AI Valuation API. Use the /docs endpoint to test."}