Good / app.py
drwaseem's picture
Create app.py
f8ea024 verified
# Alzheimer's Prediction App with Random Forest Classifier
# -----------------------------------------------------------
# Made with ❀️ for the contest, featuring long code, animations, and brain 🧠 emojis.
# Importing Libraries
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
accuracy_score,
classification_report,
confusion_matrix,
roc_curve,
auc,
)
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
from plotly import graph_objs as go
import time
# Set up the Streamlit app
st.set_page_config(
page_title="🧠 Alzheimer's Detection",
page_icon="🧠",
layout="wide",
)
# Add loading animation
with st.spinner("πŸš€ App is loading... Please wait!"):
time.sleep(2) # Simulating loading time
# Title and Description
st.title("🧠 Alzheimer's Disease Prediction")
st.markdown(
"""
Welcome to the **Alzheimer's Disease Prediction App**! This tool uses a **Random Forest Classifier**
to predict whether a patient has Alzheimer's disease based on clinical data.
---
"""
)
# Sidebar for uploading data
st.sidebar.header("Upload Dataset")
uploaded_file = st.sidebar.file_uploader(
"Upload your CSV file containing the dataset", type=["csv"]
)
# Default dataset (if no file uploaded)
if uploaded_file is not None:
df = pd.read_csv(uploaded_file)
st.sidebar.success("βœ… Dataset loaded successfully!")
else:
st.sidebar.warning("⚠️ Please upload a dataset to proceed!")
st.stop()
# Display the dataset
st.write("### Dataset Overview")
st.dataframe(df.head())
# Preprocessing the data
st.write("### Data Preprocessing")
with st.spinner("πŸ”„ Preprocessing data..."):
time.sleep(1) # Simulate processing delay
# Dropping duplicates
initial_rows = df.shape[0]
df = df.drop_duplicates()
final_rows = df.shape[0]
st.write(f"πŸ—‘οΈ Removed {initial_rows - final_rows} duplicate rows.")
# Checking for missing values
missing_values = df.isnull().sum()
st.write("#### Missing Values:")
st.write(missing_values[missing_values > 0])
# Fill or drop missing values
df.fillna(df.mean(), inplace=True)
# Splitting features and target
target = st.sidebar.selectbox("Select the Target Column", df.columns)
X = df.drop(columns=[target])
y = df[target]
# Splitting data into training and testing sets
test_size = st.sidebar.slider("Test Data Size (%)", min_value=10, max_value=50, value=20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size / 100, random_state=42)
# Scaling the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Feature selection
st.write("#### Feature Importance Visualization")
with st.spinner("🌟 Generating feature importances..."):
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train_scaled, y_train)
feature_importances = rf.feature_importances_
# Plotting feature importances
fig, ax = plt.subplots(figsize=(10, 6))
sns.barplot(x=feature_importances, y=X.columns, ax=ax)
ax.set_title("Feature Importance")
ax.set_xlabel("Importance Score")
ax.set_ylabel("Features")
st.pyplot(fig)
# Training the Random Forest Classifier
st.write("### Training the Model")
with st.spinner("🧠 Training the Random Forest Classifier..."):
time.sleep(2)
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train_scaled, y_train)
st.success("πŸŽ‰ Model trained successfully!")
# Model Evaluation
st.write("### Model Evaluation")
y_pred = rf.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
st.write(f"**Accuracy:** {accuracy * 100:.2f}%")
st.write("#### Classification Report")
st.text(classification_report(y_test, y_pred))
# Confusion Matrix
st.write("#### Confusion Matrix")
conf_matrix = confusion_matrix(y_test, y_pred)
fig, ax = plt.subplots(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", ax=ax)
ax.set_title("Confusion Matrix")
ax.set_xlabel("Predicted")
ax.set_ylabel("Actual")
st.pyplot(fig)
# ROC Curve
st.write("#### ROC Curve")
y_prob = rf.predict_proba(X_test_scaled)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_prob)
roc_auc = auc(fpr, tpr)
fig, ax = plt.subplots(figsize=(8, 6))
ax.plot(fpr, tpr, label=f"AUC = {roc_auc:.2f}", color="darkorange")
ax.plot([0, 1], [0, 1], "r--")
ax.set_title("Receiver Operating Characteristic (ROC) Curve")
ax.set_xlabel("False Positive Rate")
ax.set_ylabel("True Positive Rate")
ax.legend(loc="lower right")
st.pyplot(fig)
# Additional Graphs
st.write("### Additional Visualizations")
with st.spinner("πŸ“Š Generating more graphs..."):
# Pairplot
st.write("#### Pairplot")
pairplot_fig = sns.pairplot(df, hue=target)
st.pyplot(pairplot_fig)
# Correlation Heatmap
st.write("#### Correlation Heatmap")
fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(df.corr(), annot=True, cmap="coolwarm", ax=ax)
ax.set_title("Correlation Matrix")
st.pyplot(fig)
# Histogram for each feature
st.write("#### Feature Distributions")
for col in X.columns:
fig, ax = plt.subplots(figsize=(6, 4))
sns.histplot(df[col], kde=True, ax=ax)
ax.set_title(f"Distribution of {col}")
st.pyplot(fig)
# Save the model
st.sidebar.write("### Save Model")
save_model = st.sidebar.button("Save Model")
if save_model:
import joblib
joblib.dump(rf, "alzheimers_model.pkl")
st.sidebar.success("πŸŽ‰ Model saved as 'alzheimers_model.pkl'!")
# End of the app
st.write("---")
st.write("🧠 Thank you for using the Alzheimer's Disease Prediction App!")
st.balloons()