|
|
|
|
|
|
|
|
|
|
|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.ensemble import RandomForestClassifier |
|
from sklearn.metrics import ( |
|
accuracy_score, |
|
classification_report, |
|
confusion_matrix, |
|
roc_curve, |
|
auc, |
|
) |
|
from sklearn.preprocessing import StandardScaler |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
from plotly import graph_objs as go |
|
import time |
|
|
|
|
|
st.set_page_config( |
|
page_title="π§ Alzheimer's Detection", |
|
page_icon="π§ ", |
|
layout="wide", |
|
) |
|
|
|
|
|
with st.spinner("π App is loading... Please wait!"): |
|
time.sleep(2) |
|
|
|
|
|
st.title("π§ Alzheimer's Disease Prediction") |
|
st.markdown( |
|
""" |
|
Welcome to the **Alzheimer's Disease Prediction App**! This tool uses a **Random Forest Classifier** |
|
to predict whether a patient has Alzheimer's disease based on clinical data. |
|
--- |
|
""" |
|
) |
|
|
|
|
|
st.sidebar.header("Upload Dataset") |
|
uploaded_file = st.sidebar.file_uploader( |
|
"Upload your CSV file containing the dataset", type=["csv"] |
|
) |
|
|
|
|
|
if uploaded_file is not None: |
|
df = pd.read_csv(uploaded_file) |
|
st.sidebar.success("β
Dataset loaded successfully!") |
|
else: |
|
st.sidebar.warning("β οΈ Please upload a dataset to proceed!") |
|
st.stop() |
|
|
|
|
|
st.write("### Dataset Overview") |
|
st.dataframe(df.head()) |
|
|
|
|
|
st.write("### Data Preprocessing") |
|
with st.spinner("π Preprocessing data..."): |
|
time.sleep(1) |
|
|
|
|
|
initial_rows = df.shape[0] |
|
df = df.drop_duplicates() |
|
final_rows = df.shape[0] |
|
st.write(f"ποΈ Removed {initial_rows - final_rows} duplicate rows.") |
|
|
|
|
|
missing_values = df.isnull().sum() |
|
st.write("#### Missing Values:") |
|
st.write(missing_values[missing_values > 0]) |
|
|
|
|
|
df.fillna(df.mean(), inplace=True) |
|
|
|
|
|
target = st.sidebar.selectbox("Select the Target Column", df.columns) |
|
X = df.drop(columns=[target]) |
|
y = df[target] |
|
|
|
|
|
test_size = st.sidebar.slider("Test Data Size (%)", min_value=10, max_value=50, value=20) |
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size / 100, random_state=42) |
|
|
|
|
|
scaler = StandardScaler() |
|
X_train_scaled = scaler.fit_transform(X_train) |
|
X_test_scaled = scaler.transform(X_test) |
|
|
|
|
|
st.write("#### Feature Importance Visualization") |
|
with st.spinner("π Generating feature importances..."): |
|
rf = RandomForestClassifier(random_state=42) |
|
rf.fit(X_train_scaled, y_train) |
|
feature_importances = rf.feature_importances_ |
|
|
|
|
|
fig, ax = plt.subplots(figsize=(10, 6)) |
|
sns.barplot(x=feature_importances, y=X.columns, ax=ax) |
|
ax.set_title("Feature Importance") |
|
ax.set_xlabel("Importance Score") |
|
ax.set_ylabel("Features") |
|
st.pyplot(fig) |
|
|
|
|
|
st.write("### Training the Model") |
|
with st.spinner("π§ Training the Random Forest Classifier..."): |
|
time.sleep(2) |
|
rf = RandomForestClassifier(random_state=42) |
|
rf.fit(X_train_scaled, y_train) |
|
st.success("π Model trained successfully!") |
|
|
|
|
|
st.write("### Model Evaluation") |
|
y_pred = rf.predict(X_test_scaled) |
|
accuracy = accuracy_score(y_test, y_pred) |
|
st.write(f"**Accuracy:** {accuracy * 100:.2f}%") |
|
st.write("#### Classification Report") |
|
st.text(classification_report(y_test, y_pred)) |
|
|
|
|
|
st.write("#### Confusion Matrix") |
|
conf_matrix = confusion_matrix(y_test, y_pred) |
|
fig, ax = plt.subplots(figsize=(8, 6)) |
|
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", ax=ax) |
|
ax.set_title("Confusion Matrix") |
|
ax.set_xlabel("Predicted") |
|
ax.set_ylabel("Actual") |
|
st.pyplot(fig) |
|
|
|
|
|
st.write("#### ROC Curve") |
|
y_prob = rf.predict_proba(X_test_scaled)[:, 1] |
|
fpr, tpr, thresholds = roc_curve(y_test, y_prob) |
|
roc_auc = auc(fpr, tpr) |
|
|
|
fig, ax = plt.subplots(figsize=(8, 6)) |
|
ax.plot(fpr, tpr, label=f"AUC = {roc_auc:.2f}", color="darkorange") |
|
ax.plot([0, 1], [0, 1], "r--") |
|
ax.set_title("Receiver Operating Characteristic (ROC) Curve") |
|
ax.set_xlabel("False Positive Rate") |
|
ax.set_ylabel("True Positive Rate") |
|
ax.legend(loc="lower right") |
|
st.pyplot(fig) |
|
|
|
|
|
st.write("### Additional Visualizations") |
|
with st.spinner("π Generating more graphs..."): |
|
|
|
st.write("#### Pairplot") |
|
pairplot_fig = sns.pairplot(df, hue=target) |
|
st.pyplot(pairplot_fig) |
|
|
|
|
|
st.write("#### Correlation Heatmap") |
|
fig, ax = plt.subplots(figsize=(10, 8)) |
|
sns.heatmap(df.corr(), annot=True, cmap="coolwarm", ax=ax) |
|
ax.set_title("Correlation Matrix") |
|
st.pyplot(fig) |
|
|
|
|
|
st.write("#### Feature Distributions") |
|
for col in X.columns: |
|
fig, ax = plt.subplots(figsize=(6, 4)) |
|
sns.histplot(df[col], kde=True, ax=ax) |
|
ax.set_title(f"Distribution of {col}") |
|
st.pyplot(fig) |
|
|
|
|
|
st.sidebar.write("### Save Model") |
|
save_model = st.sidebar.button("Save Model") |
|
if save_model: |
|
import joblib |
|
|
|
joblib.dump(rf, "alzheimers_model.pkl") |
|
st.sidebar.success("π Model saved as 'alzheimers_model.pkl'!") |
|
|
|
|
|
st.write("---") |
|
st.write("π§ Thank you for using the Alzheimer's Disease Prediction App!") |
|
st.balloons() |