final / app2.py
opinder2906's picture
Update app2.py
fda7c4e verified
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, RocCurveDisplay
from sklearn.decomposition import PCA
st.set_option('deprecation.showPyplotGlobalUse', False)
st.title("Electric Vehicle ML Dashboard (Optimized for Hugging Face)")
# Load data
@st.cache_data
def load_data():
url = "https://drive.google.com/uc?export=download&id=1QBTnXxORRbJzE5Z2aqKHsVqgB7mqowiN"
return pd.read_csv(url)
df = load_data()
st.subheader("1. Data Preview")
st.dataframe(df.head())
# Fill missing values
for col in df.select_dtypes(include='object').columns:
df[col] = df[col].fillna(df[col].mode()[0])
for col in df.select_dtypes(include=np.number).columns:
df[col] = df[col].fillna(df[col].median())
# Encode categories
for col in df.select_dtypes(include='object').columns:
df[col] = LabelEncoder().fit_transform(df[col])
# Feature engineering
if 'Model Year' in df.columns:
df['Vehicle_Age'] = 2025 - df['Model Year']
# Target setup
if 'Electric Range' not in df.columns:
st.error("'Electric Range' column missing!")
st.stop()
df['Target'] = (df['Electric Range'] > df['Electric Range'].median()).astype(int)
y = df['Target']
X = df.drop(columns=['Electric Range', 'Target'])
# Feature selection via Random Forest
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
rf = RandomForestClassifier(n_estimators=50, random_state=42)
rf.fit(X_scaled, y)
top_features = pd.Series(rf.feature_importances_, index=X.columns).nlargest(5).index.tolis_