Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| import os | |
| import streamlit as st | |
| def load_sample_data(): | |
| """ | |
| Load a sample of the housing data for statistics display | |
| """ | |
| try: | |
| # Attempt to load from different possible locations | |
| possible_paths = [ | |
| os.path.join(os.path.dirname(__file__), "House-Data.csv") | |
| ] | |
| for path in possible_paths: | |
| if os.path.exists(path): | |
| return pd.read_csv(path) | |
| # If no file found, show warning but continue | |
| st.warning("Fichier de données d'exemple non trouvé. Certaines statistiques peuvent ne pas être disponibles.") | |
| return None | |
| except Exception as e: | |
| st.warning(f"Could not load sample data: {e}") | |
| return None | |
| def preprocess_inputs(input_dict): | |
| """ | |
| Preprocess the input dictionary to match the format expected by the model | |
| Args: | |
| input_dict (dict): Dictionary containing the input features | |
| Returns: | |
| pd.DataFrame: Processed dataframe ready for prediction | |
| """ | |
| # Load sample data to get feature means for filling missing values | |
| sample_data = load_sample_data() | |
| feature_means = {} | |
| if sample_data is not None: | |
| # Calculate means for numerical features to use as defaults | |
| for col in sample_data.select_dtypes(include=['int64', 'float64']).columns: | |
| if col not in ['id', 'price']: | |
| feature_means[col] = sample_data[col].mean() | |
| # Filter out None values and replace with means from dataset | |
| filtered_dict = {} | |
| for key, value in input_dict.items(): | |
| if value is not None: | |
| filtered_dict[key] = value | |
| elif key in feature_means: | |
| # Use mean from dataset if available | |
| filtered_dict[key] = feature_means.get(key, 0) | |
| else: | |
| # Default fallback values if no mean is available | |
| defaults = { | |
| 'bedrooms': 3, | |
| 'bathrooms': 2.0, | |
| 'sqft_living': 1500, | |
| 'sqft_lot': 5000, | |
| 'floors': 1.0, | |
| 'waterfront': 0, | |
| 'view': 0, | |
| 'condition': 3, | |
| 'grade': 7, | |
| 'sqft_above': 1000, | |
| 'sqft_basement': 0, | |
| 'yr_built': 1980, | |
| 'yr_renovated': 0, | |
| 'zipcode': 98000, | |
| 'lat': 47.5, | |
| 'long': -122.0, | |
| 'sqft_living15': 1500, | |
| 'sqft_lot15': 5000 | |
| } | |
| filtered_dict[key] = defaults.get(key, 0) | |
| # Convert to dataframe | |
| input_df = pd.DataFrame([filtered_dict]) | |
| # Remove 'date' column if it exists (as it's not needed for prediction) | |
| if 'date' in input_df.columns: | |
| input_df = input_df.drop('date', axis=1) | |
| # Remove 'id' column if it exists | |
| if 'id' in input_df.columns: | |
| input_df = input_df.drop('id', axis=1) | |
| # Ensure all numeric columns are float | |
| for col in input_df.columns: | |
| input_df[col] = input_df[col].astype(float) | |
| return input_df | |