Spaces:

drwaseem
/

Good

Sleeping

App Files Files Community

Good / app.py

drwaseem

Create app.py

f8ea024 verified 6 months ago

raw

history blame contribute delete

5.75 kB

	# Alzheimer's Prediction App with Random Forest Classifier
	# -----------------------------------------------------------
	# Made with ❤️ for the contest, featuring long code, animations, and brain 🧠 emojis.

	# Importing Libraries
	import streamlit as st
	import pandas as pd
	import numpy as np
	from sklearn.model_selection import train_test_split
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.metrics import (
	accuracy_score,
	classification_report,
	confusion_matrix,
	roc_curve,
	auc,
	)
	from sklearn.preprocessing import StandardScaler
	import matplotlib.pyplot as plt
	import seaborn as sns
	from plotly import graph_objs as go
	import time

	# Set up the Streamlit app
	st.set_page_config(
	page_title="🧠 Alzheimer's Detection",
	page_icon="🧠",
	layout="wide",
	)

	# Add loading animation
	with st.spinner("🚀 App is loading... Please wait!"):
	time.sleep(2) # Simulating loading time

	# Title and Description
	st.title("🧠 Alzheimer's Disease Prediction")
	st.markdown(
	"""
	Welcome to the Alzheimer's Disease Prediction App! This tool uses a Random Forest Classifier
	to predict whether a patient has Alzheimer's disease based on clinical data.
	---
	"""
	)

	# Sidebar for uploading data
	st.sidebar.header("Upload Dataset")
	uploaded_file = st.sidebar.file_uploader(
	"Upload your CSV file containing the dataset", type=["csv"]
	)

	# Default dataset (if no file uploaded)
	if uploaded_file is not None:
	df = pd.read_csv(uploaded_file)
	st.sidebar.success("✅ Dataset loaded successfully!")
	else:
	st.sidebar.warning("⚠️ Please upload a dataset to proceed!")
	st.stop()

	# Display the dataset
	st.write("### Dataset Overview")
	st.dataframe(df.head())

	# Preprocessing the data
	st.write("### Data Preprocessing")
	with st.spinner("🔄 Preprocessing data..."):
	time.sleep(1) # Simulate processing delay

	# Dropping duplicates
	initial_rows = df.shape[0]
	df = df.drop_duplicates()
	final_rows = df.shape[0]
	st.write(f"🗑️ Removed {initial_rows - final_rows} duplicate rows.")

	# Checking for missing values
	missing_values = df.isnull().sum()
	st.write("#### Missing Values:")
	st.write(missing_values[missing_values > 0])

	# Fill or drop missing values
	df.fillna(df.mean(), inplace=True)

	# Splitting features and target
	target = st.sidebar.selectbox("Select the Target Column", df.columns)
	X = df.drop(columns=[target])
	y = df[target]

	# Splitting data into training and testing sets
	test_size = st.sidebar.slider("Test Data Size (%)", min_value=10, max_value=50, value=20)
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size / 100, random_state=42)

	# Scaling the data
	scaler = StandardScaler()
	X_train_scaled = scaler.fit_transform(X_train)
	X_test_scaled = scaler.transform(X_test)

	# Feature selection
	st.write("#### Feature Importance Visualization")
	with st.spinner("🌟 Generating feature importances..."):
	rf = RandomForestClassifier(random_state=42)
	rf.fit(X_train_scaled, y_train)
	feature_importances = rf.feature_importances_

	# Plotting feature importances
	fig, ax = plt.subplots(figsize=(10, 6))
	sns.barplot(x=feature_importances, y=X.columns, ax=ax)
	ax.set_title("Feature Importance")
	ax.set_xlabel("Importance Score")
	ax.set_ylabel("Features")
	st.pyplot(fig)

	# Training the Random Forest Classifier
	st.write("### Training the Model")
	with st.spinner("🧠 Training the Random Forest Classifier..."):
	time.sleep(2)
	rf = RandomForestClassifier(random_state=42)
	rf.fit(X_train_scaled, y_train)
	st.success("🎉 Model trained successfully!")

	# Model Evaluation
	st.write("### Model Evaluation")
	y_pred = rf.predict(X_test_scaled)
	accuracy = accuracy_score(y_test, y_pred)
	st.write(f"Accuracy: {accuracy * 100:.2f}%")
	st.write("#### Classification Report")
	st.text(classification_report(y_test, y_pred))

	# Confusion Matrix
	st.write("#### Confusion Matrix")
	conf_matrix = confusion_matrix(y_test, y_pred)
	fig, ax = plt.subplots(figsize=(8, 6))
	sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", ax=ax)
	ax.set_title("Confusion Matrix")
	ax.set_xlabel("Predicted")
	ax.set_ylabel("Actual")
	st.pyplot(fig)

	# ROC Curve
	st.write("#### ROC Curve")
	y_prob = rf.predict_proba(X_test_scaled)[:, 1]
	fpr, tpr, thresholds = roc_curve(y_test, y_prob)
	roc_auc = auc(fpr, tpr)

	fig, ax = plt.subplots(figsize=(8, 6))
	ax.plot(fpr, tpr, label=f"AUC = {roc_auc:.2f}", color="darkorange")
	ax.plot([0, 1], [0, 1], "r--")
	ax.set_title("Receiver Operating Characteristic (ROC) Curve")
	ax.set_xlabel("False Positive Rate")
	ax.set_ylabel("True Positive Rate")
	ax.legend(loc="lower right")
	st.pyplot(fig)

	# Additional Graphs
	st.write("### Additional Visualizations")
	with st.spinner("📊 Generating more graphs..."):
	# Pairplot
	st.write("#### Pairplot")
	pairplot_fig = sns.pairplot(df, hue=target)
	st.pyplot(pairplot_fig)

	# Correlation Heatmap
	st.write("#### Correlation Heatmap")
	fig, ax = plt.subplots(figsize=(10, 8))
	sns.heatmap(df.corr(), annot=True, cmap="coolwarm", ax=ax)
	ax.set_title("Correlation Matrix")
	st.pyplot(fig)

	# Histogram for each feature
	st.write("#### Feature Distributions")
	for col in X.columns:
	fig, ax = plt.subplots(figsize=(6, 4))
	sns.histplot(df[col], kde=True, ax=ax)
	ax.set_title(f"Distribution of {col}")
	st.pyplot(fig)

	# Save the model
	st.sidebar.write("### Save Model")
	save_model = st.sidebar.button("Save Model")
	if save_model:
	import joblib

	joblib.dump(rf, "alzheimers_model.pkl")
	st.sidebar.success("🎉 Model saved as 'alzheimers_model.pkl'!")

	# End of the app
	st.write("---")
	st.write("🧠 Thank you for using the Alzheimer's Disease Prediction App!")
	st.balloons()