|
import pandas as pd |
|
import numpy as np |
|
import streamlit as st |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
from sklearn.ensemble import RandomForestClassifier |
|
from sklearn.metrics import ( |
|
accuracy_score, |
|
f1_score, |
|
confusion_matrix, |
|
roc_curve, |
|
auc, |
|
precision_recall_curve, |
|
) |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.preprocessing import LabelEncoder |
|
from sklearn.datasets import make_classification |
|
from mpl_toolkits.mplot3d import Axes3D |
|
|
|
|
|
st.set_page_config( |
|
page_title="๐ง Alzheimer's Diagnosis App", |
|
page_icon="๐ก", |
|
layout="wide", |
|
) |
|
|
|
|
|
st.title("๐ง Early Diagnosis of Alzheimer's Disease ๐ง ") |
|
st.subheader("๐ Empowering early intervention for a healthier future! ๐") |
|
|
|
|
|
uploaded_file = st.file_uploader("๐ Upload your dataset (CSV format)", type=["csv"]) |
|
|
|
if uploaded_file is not None: |
|
data = pd.read_csv(uploaded_file) |
|
st.success("โ
Dataset loaded successfully! ๐ง ") |
|
else: |
|
|
|
st.warning("โ ๏ธ No file uploaded. Using synthetic data. ๐ง ") |
|
X, y = make_classification( |
|
n_samples=1000, |
|
n_features=10, |
|
n_informative=5, |
|
n_redundant=2, |
|
n_classes=2, |
|
random_state=42, |
|
) |
|
columns = [f"Feature_{i}" for i in range(X.shape[1])] |
|
data = pd.DataFrame(X, columns=columns) |
|
data["AlzheimerRisk"] = y |
|
|
|
|
|
st.write("### ๐ Dataset Preview ๐ง ") |
|
st.write(data.head(250)) |
|
|
|
if "AlzheimerRisk" not in data.columns: |
|
st.error("โ Dataset must contain a column named 'AlzheimerRisk'. ๐ง ") |
|
else: |
|
|
|
st.write("### ๐ Data Preprocessing ๐ง ") |
|
|
|
|
|
label_encoders = {} |
|
for col in data.select_dtypes(include=["object"]).columns: |
|
label_encoders[col] = LabelEncoder() |
|
data[col] = label_encoders[col].fit_transform(data[col]) |
|
|
|
|
|
st.write("โ
Preprocessed Dataset ๐ง ", data.head(250)) |
|
|
|
|
|
if data['AlzheimerRisk'].dtype != 'int' and data['AlzheimerRisk'].dtype != 'bool': |
|
|
|
st.write("โ ๏ธ Binarizing 'AlzheimerRisk' to binary classification. ๐ง ") |
|
data['AlzheimerRisk'] = (data['AlzheimerRisk'] >= 0.5).astype(int) |
|
|
|
|
|
features = [col for col in data.columns if col != "AlzheimerRisk"] |
|
X = data[features] |
|
y = data["AlzheimerRisk"] |
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
|
|
|
rf_model = RandomForestClassifier(random_state=42, n_estimators=200, max_depth=10) |
|
rf_model.fit(X_train, y_train) |
|
|
|
|
|
y_pred = rf_model.predict(X_test) |
|
accuracy = accuracy_score(y_test, y_pred) |
|
f1 = f1_score(y_test, y_pred) |
|
|
|
|
|
st.metric("๐ฏ Accuracy ๐ง ", f"{accuracy*100:.2f}%") |
|
st.metric("๐ F1 Score ๐ง ", f"{f1:.2f}") |
|
|
|
|
|
st.write("### ๐ Age Distribution ๐ง ") |
|
if "Age" in data.columns: |
|
plt.figure(figsize=(10, 6)) |
|
sns.histplot(data['Age'], kde=True, color='dodgerblue', bins=20) |
|
plt.title("Age Distribution ๐ง ") |
|
st.pyplot(plt.gcf()) |
|
plt.clf() |
|
else: |
|
st.warning("โ ๏ธ Age column not found in the dataset! ๐ง ") |
|
|
|
|
|
st.write("### ๐ Confusion Matrix ๐ง ") |
|
cm = confusion_matrix(y_test, y_pred) |
|
fig, ax = plt.subplots(figsize=(8, 6)) |
|
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["No Risk", "At Risk"], yticklabels=["No Risk", "At Risk"]) |
|
plt.title("Confusion Matrix ๐ง ") |
|
plt.ylabel("True label ๐ง ") |
|
plt.xlabel("Predicted label ๐ง ") |
|
st.pyplot(fig) |
|
plt.clf() |
|
|
|
|
|
st.write("### ๐ Feature Importance ๐ง ") |
|
feature_importances = rf_model.feature_importances_ |
|
sorted_idx = np.argsort(feature_importances)[::-1] |
|
sorted_features = np.array(features)[sorted_idx] |
|
sorted_importances = feature_importances[sorted_idx] |
|
|
|
|
|
plt.figure(figsize=(10, 6)) |
|
sns.barplot(x=sorted_importances, y=sorted_features, palette="viridis") |
|
plt.title("Feature Importance ๐ง ") |
|
plt.xlabel("Importance Score ๐ง ") |
|
plt.ylabel("Features ๐ง ") |
|
st.pyplot(plt.gcf()) |
|
plt.clf() |
|
|
|
|
|
st.write("### ๐ Line Graph ๐ง ") |
|
line_feature = st.selectbox("Select feature for Line Graph:", features) |
|
plt.figure(figsize=(10, 6)) |
|
sns.lineplot(data=data, x=data.index, y=line_feature, color="green") |
|
plt.title(f"Line Graph of {line_feature} ๐ง ") |
|
plt.xlabel("Index ๐ง ") |
|
plt.ylabel(line_feature) |
|
st.pyplot(plt.gcf()) |
|
plt.clf() |
|
|
|
st.write("### ๐ Area Graph ๐ง ") |
|
area_feature = st.selectbox("Select feature for Area Graph:", features) |
|
plt.figure(figsize=(10, 6)) |
|
sns.lineplot(data=data, x=data.index, y=area_feature, color="orange", linewidth=2) |
|
plt.fill_between(data.index, data[area_feature], color="orange", alpha=0.3) |
|
plt.title(f"Area Graph of {area_feature} ๐ง ") |
|
plt.xlabel("Index ๐ง ") |
|
plt.ylabel(area_feature) |
|
st.pyplot(plt.gcf()) |
|
plt.clf() |
|
|
|
|
|
st.write("### ๐ Data Visualizations ๐ง ") |
|
visualization_type = st.selectbox( |
|
"Choose a visualization type ๐ง :", |
|
["2D Scatter Plot", "3D Scatter Plot", "Bar Chart", "Pie Chart", "Histogram"], |
|
) |
|
|
|
if visualization_type == "2D Scatter Plot": |
|
x_col = st.selectbox("Select X-axis feature ๐ง :", features) |
|
y_col = st.selectbox("Select Y-axis feature ๐ง :", features) |
|
plt.figure(figsize=(10, 6)) |
|
sns.scatterplot(data=data, x=x_col, y=y_col, hue="AlzheimerRisk", palette="viridis") |
|
plt.title("2D Scatter Plot ๐ง ") |
|
st.pyplot(plt.gcf()) |
|
plt.clf() |
|
|
|
elif visualization_type == "3D Scatter Plot": |
|
x_col = st.selectbox("Select X-axis feature ๐ง :", features) |
|
y_col = st.selectbox("Select Y-axis feature ๐ง :", features) |
|
z_col = st.selectbox("Select Z-axis feature ๐ง :", features) |
|
fig = plt.figure(figsize=(10, 8)) |
|
ax = fig.add_subplot(111, projection="3d") |
|
scatter = ax.scatter( |
|
data[x_col], data[y_col], data[z_col], c=data["AlzheimerRisk"], cmap="viridis", s=50 |
|
) |
|
ax.set_xlabel(x_col) |
|
ax.set_ylabel(y_col) |
|
ax.set_zlabel(z_col) |
|
plt.colorbar(scatter, label="AlzheimerRisk ๐ง ") |
|
st.pyplot(fig) |
|
plt.clf() |
|
|
|
elif visualization_type == "Bar Chart": |
|
bar_feature = st.selectbox("Select feature for Bar Chart ๐ง :", features) |
|
plt.figure(figsize=(10, 6)) |
|
data.groupby(bar_feature)["AlzheimerRisk"].mean().plot(kind="bar", color="skyblue") |
|
plt.title("Bar Chart of Risk by Feature ๐ง ") |
|
plt.xlabel(bar_feature) |
|
plt.ylabel("Average Risk ๐ง ") |
|
st.pyplot(plt.gcf()) |
|
plt.clf() |
|
|
|
elif visualization_type == "Pie Chart": |
|
pie_counts = data["AlzheimerRisk"].value_counts() |
|
plt.figure(figsize=(8, 8)) |
|
plt.pie( |
|
pie_counts, |
|
labels=["No Risk ๐ง ", "At Risk ๐ง "], |
|
autopct="%1.1f%%", |
|
startangle=140, |
|
colors=["green", "red"], |
|
) |
|
plt.title("Distribution of Alzheimer's Risk ๐ง ") |
|
st.pyplot(plt.gcf()) |
|
plt.clf() |
|
|
|
elif visualization_type == "Histogram": |
|
hist_feature = st.selectbox("Select feature for Histogram ๐ง :", features) |
|
plt.figure(figsize=(10, 6)) |
|
sns.histplot(data=data, x=hist_feature, hue="AlzheimerRisk", kde=True, palette="viridis") |
|
plt.title("Histogram ๐ง ") |
|
st.pyplot(plt.gcf()) |
|
plt.clf() |
|
|
|
|
|
st.write("### ๐ ROC Curve ๐ง ") |
|
y_proba = rf_model.predict_proba(X_test)[:, 1] |
|
fpr, tpr, _ = roc_curve(y_test, y_proba) |
|
roc_auc = auc(fpr, tpr) |
|
plt.figure(figsize=(10, 6)) |
|
plt.plot(fpr, tpr, color="blue", lw=2, label=f"ROC Curve (AUC = {roc_auc:.2f}) ๐ง ") |
|
plt.plot([0, 1], [0, 1], color="gray", linestyle="--") |
|
plt.xlabel("False Positive Rate ๐ง ") |
|
plt.ylabel("True Positive Rate ๐ง ") |
|
plt.title("Receiver Operating Characteristic (ROC) Curve ๐ง ") |
|
plt.legend(loc="lower right") |
|
st.pyplot(plt.gcf()) |
|
plt.clf() |
|
|
|
|
|
st.write("### ๐ Precision-Recall Curve ๐ง ") |
|
precision, recall, _ = precision_recall_curve(y_test, y_proba) |
|
plt.figure(figsize=(10, 6)) |
|
plt.plot(recall, precision, color="green", lw=2, label="Precision-Recall Curve ๐ง ") |
|
plt.xlabel("Recall ๐ง ") |
|
plt.ylabel("Precision ๐ง ") |
|
plt.title("Precision-Recall Curve ๐ง ") |
|
plt.legend(loc="upper right") |
|
st.pyplot(plt.gcf()) |
|
plt.clf() |
|
|
|
|
|
st.write("### ๐งฎ Predict Alzheimer's Risk ๐ง ") |
|
input_data = {} |
|
for feature in features: |
|
if feature in label_encoders: |
|
input_data[feature] = st.selectbox(f"{feature} ๐ฝ", label_encoders[feature].classes_) |
|
input_data[feature] = label_encoders[feature].transform([input_data[feature]])[0] |
|
else: |
|
input_data[feature] = st.number_input(f"{feature} โ๏ธ", value=float(data[feature].mean())) |
|
|
|
|
|
input_df = pd.DataFrame([input_data]) |
|
prediction = rf_model.predict(input_df)[0] |
|
prediction_proba = rf_model.predict_proba(input_df)[0] |
|
|
|
|
|
st.write("### ๐ฉบ Prediction Result ๐ง ") |
|
if prediction == 1: |
|
st.error(f"๐จ The person is **at risk** of Alzheimer's Disease ๐ง .") |
|
else: |
|
st.success(f"โ
The person is **not at risk** of Alzheimer's Disease ๐ง .") |
|
st.write(f"๐ Prediction Confidence ๐ง : {prediction_proba[prediction]:.2f}") |
|
|