Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import LabelEncoder, StandardScaler | |
| from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier | |
| from sklearn.linear_model import LogisticRegression | |
| from sklearn.naive_bayes import GaussianNB | |
| from sklearn.svm import SVC | |
| from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, RocCurveDisplay | |
| from sklearn.decomposition import PCA | |
| st.set_option('deprecation.showPyplotGlobalUse', False) | |
| st.title("Electric Vehicle ML Dashboard (Optimized for Hugging Face)") | |
| # Load data | |
| def load_data(): | |
| url = "https://drive.google.com/uc?export=download&id=1QBTnXxORRbJzE5Z2aqKHsVqgB7mqowiN" | |
| return pd.read_csv(url) | |
| df = load_data() | |
| st.subheader("1. Data Preview") | |
| st.dataframe(df.head()) | |
| # Fill missing values | |
| for col in df.select_dtypes(include='object').columns: | |
| df[col] = df[col].fillna(df[col].mode()[0]) | |
| for col in df.select_dtypes(include=np.number).columns: | |
| df[col] = df[col].fillna(df[col].median()) | |
| # Encode categories | |
| for col in df.select_dtypes(include='object').columns: | |
| df[col] = LabelEncoder().fit_transform(df[col]) | |
| # Feature engineering | |
| if 'Model Year' in df.columns: | |
| df['Vehicle_Age'] = 2025 - df['Model Year'] | |
| # Target setup | |
| if 'Electric Range' not in df.columns: | |
| st.error("'Electric Range' column missing!") | |
| st.stop() | |
| df['Target'] = (df['Electric Range'] > df['Electric Range'].median()).astype(int) | |
| y = df['Target'] | |
| X = df.drop(columns=['Electric Range', 'Target']) | |
| # Feature selection via Random Forest | |
| scaler = StandardScaler() | |
| X_scaled = scaler.fit_transform(X) | |
| rf = RandomForestClassifier(n_estimators=50, random_state=42) | |
| rf.fit(X_scaled, y) | |
| top_features = pd.Series(rf.feature_importances_, index=X.columns).nlargest(5).index.tolis_ | |