Spaces:
Sleeping
Sleeping
File size: 3,157 Bytes
8452900 a88ac30 8452900 a88ac30 8452900 a88ac30 8452900 a88ac30 8452900 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | import pandas as pd
import numpy as np
import os
import streamlit as st
@st.cache_data
def load_sample_data():
"""
Load a sample of the housing data for statistics display
"""
try:
# Attempt to load from different possible locations
possible_paths = [
os.path.join(os.path.dirname(__file__), "House-Data.csv")
]
for path in possible_paths:
if os.path.exists(path):
return pd.read_csv(path)
# If no file found, show warning but continue
st.warning("Fichier de données d'exemple non trouvé. Certaines statistiques peuvent ne pas être disponibles.")
return None
except Exception as e:
st.warning(f"Could not load sample data: {e}")
return None
def preprocess_inputs(input_dict):
"""
Preprocess the input dictionary to match the format expected by the model
Args:
input_dict (dict): Dictionary containing the input features
Returns:
pd.DataFrame: Processed dataframe ready for prediction
"""
# Load sample data to get feature means for filling missing values
sample_data = load_sample_data()
feature_means = {}
if sample_data is not None:
# Calculate means for numerical features to use as defaults
for col in sample_data.select_dtypes(include=['int64', 'float64']).columns:
if col not in ['id', 'price']:
feature_means[col] = sample_data[col].mean()
# Filter out None values and replace with means from dataset
filtered_dict = {}
for key, value in input_dict.items():
if value is not None:
filtered_dict[key] = value
elif key in feature_means:
# Use mean from dataset if available
filtered_dict[key] = feature_means.get(key, 0)
else:
# Default fallback values if no mean is available
defaults = {
'bedrooms': 3,
'bathrooms': 2.0,
'sqft_living': 1500,
'sqft_lot': 5000,
'floors': 1.0,
'waterfront': 0,
'view': 0,
'condition': 3,
'grade': 7,
'sqft_above': 1000,
'sqft_basement': 0,
'yr_built': 1980,
'yr_renovated': 0,
'zipcode': 98000,
'lat': 47.5,
'long': -122.0,
'sqft_living15': 1500,
'sqft_lot15': 5000
}
filtered_dict[key] = defaults.get(key, 0)
# Convert to dataframe
input_df = pd.DataFrame([filtered_dict])
# Remove 'date' column if it exists (as it's not needed for prediction)
if 'date' in input_df.columns:
input_df = input_df.drop('date', axis=1)
# Remove 'id' column if it exists
if 'id' in input_df.columns:
input_df = input_df.drop('id', axis=1)
# Ensure all numeric columns are float
for col in input_df.columns:
input_df[col] = input_df[col].astype(float)
return input_df
|