Spaces:

drwaseem
/

Good

Sleeping

File size: 5,749 Bytes

f8ea024

# Alzheimer's Prediction App with Random Forest Classifier
# -----------------------------------------------------------
# Made with ❤️ for the contest, featuring long code, animations, and brain 🧠 emojis.

# Importing Libraries
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score,
    classification_report,
    confusion_matrix,
    roc_curve,
    auc,
)
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
from plotly import graph_objs as go
import time

# Set up the Streamlit app
st.set_page_config(
    page_title="🧠 Alzheimer's Detection",
    page_icon="🧠",
    layout="wide",
)

# Add loading animation
with st.spinner("🚀 App is loading... Please wait!"):
    time.sleep(2)  # Simulating loading time

# Title and Description
st.title("🧠 Alzheimer's Disease Prediction")
st.markdown(
    """
Welcome to the **Alzheimer's Disease Prediction App**! This tool uses a **Random Forest Classifier** 
to predict whether a patient has Alzheimer's disease based on clinical data.  
---
"""
)

# Sidebar for uploading data
st.sidebar.header("Upload Dataset")
uploaded_file = st.sidebar.file_uploader(
    "Upload your CSV file containing the dataset", type=["csv"]
)

# Default dataset (if no file uploaded)
if uploaded_file is not None:
    df = pd.read_csv(uploaded_file)
    st.sidebar.success("✅ Dataset loaded successfully!")
else:
    st.sidebar.warning("⚠️ Please upload a dataset to proceed!")
    st.stop()

# Display the dataset
st.write("### Dataset Overview")
st.dataframe(df.head())

# Preprocessing the data
st.write("### Data Preprocessing")
with st.spinner("🔄 Preprocessing data..."):
    time.sleep(1)  # Simulate processing delay

# Dropping duplicates
initial_rows = df.shape[0]
df = df.drop_duplicates()
final_rows = df.shape[0]
st.write(f"🗑️ Removed {initial_rows - final_rows} duplicate rows.")

# Checking for missing values
missing_values = df.isnull().sum()
st.write("#### Missing Values:")
st.write(missing_values[missing_values > 0])

# Fill or drop missing values
df.fillna(df.mean(), inplace=True)

# Splitting features and target
target = st.sidebar.selectbox("Select the Target Column", df.columns)
X = df.drop(columns=[target])
y = df[target]

# Splitting data into training and testing sets
test_size = st.sidebar.slider("Test Data Size (%)", min_value=10, max_value=50, value=20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size / 100, random_state=42)

# Scaling the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Feature selection
st.write("#### Feature Importance Visualization")
with st.spinner("🌟 Generating feature importances..."):
    rf = RandomForestClassifier(random_state=42)
    rf.fit(X_train_scaled, y_train)
    feature_importances = rf.feature_importances_

# Plotting feature importances
fig, ax = plt.subplots(figsize=(10, 6))
sns.barplot(x=feature_importances, y=X.columns, ax=ax)
ax.set_title("Feature Importance")
ax.set_xlabel("Importance Score")
ax.set_ylabel("Features")
st.pyplot(fig)

# Training the Random Forest Classifier
st.write("### Training the Model")
with st.spinner("🧠 Training the Random Forest Classifier..."):
    time.sleep(2)
    rf = RandomForestClassifier(random_state=42)
    rf.fit(X_train_scaled, y_train)
    st.success("🎉 Model trained successfully!")

# Model Evaluation
st.write("### Model Evaluation")
y_pred = rf.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
st.write(f"**Accuracy:** {accuracy * 100:.2f}%")
st.write("#### Classification Report")
st.text(classification_report(y_test, y_pred))

# Confusion Matrix
st.write("#### Confusion Matrix")
conf_matrix = confusion_matrix(y_test, y_pred)
fig, ax = plt.subplots(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", ax=ax)
ax.set_title("Confusion Matrix")
ax.set_xlabel("Predicted")
ax.set_ylabel("Actual")
st.pyplot(fig)

# ROC Curve
st.write("#### ROC Curve")
y_prob = rf.predict_proba(X_test_scaled)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_prob)
roc_auc = auc(fpr, tpr)

fig, ax = plt.subplots(figsize=(8, 6))
ax.plot(fpr, tpr, label=f"AUC = {roc_auc:.2f}", color="darkorange")
ax.plot([0, 1], [0, 1], "r--")
ax.set_title("Receiver Operating Characteristic (ROC) Curve")
ax.set_xlabel("False Positive Rate")
ax.set_ylabel("True Positive Rate")
ax.legend(loc="lower right")
st.pyplot(fig)

# Additional Graphs
st.write("### Additional Visualizations")
with st.spinner("📊 Generating more graphs..."):
    # Pairplot
    st.write("#### Pairplot")
    pairplot_fig = sns.pairplot(df, hue=target)
    st.pyplot(pairplot_fig)

    # Correlation Heatmap
    st.write("#### Correlation Heatmap")
    fig, ax = plt.subplots(figsize=(10, 8))
    sns.heatmap(df.corr(), annot=True, cmap="coolwarm", ax=ax)
    ax.set_title("Correlation Matrix")
    st.pyplot(fig)

    # Histogram for each feature
    st.write("#### Feature Distributions")
    for col in X.columns:
        fig, ax = plt.subplots(figsize=(6, 4))
        sns.histplot(df[col], kde=True, ax=ax)
        ax.set_title(f"Distribution of {col}")
        st.pyplot(fig)

# Save the model
st.sidebar.write("### Save Model")
save_model = st.sidebar.button("Save Model")
if save_model:
    import joblib

    joblib.dump(rf, "alzheimers_model.pkl")
    st.sidebar.success("🎉 Model saved as 'alzheimers_model.pkl'!")

# End of the app
st.write("---")
st.write("🧠 Thank you for using the Alzheimer's Disease Prediction App!")
st.balloons()