import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.ensemble import GradientBoostingRegressor from sklearn.ensemble import RandomForestRegressor, VotingRegressor from sklearn.tree import DecisionTreeRegressor from sklearn.linear_model import LinearRegression from sklearn.neighbors import KNeighborsRegressor from sklearn.svm import SVR from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score from sklearn.neural_network import MLPRegressor from lightgbm import LGBMRegressor from xgboost import XGBRegressor import math st.title('Heart Disease Prediction Application') st.write(''' Please fill in the attributes below, then hit the Predict button to get your results. ''') st.header('Input Attributes') age = st.slider('Your Age (Years)', min_value=0.0, max_value=100.0, value=50.0, step=1.0) st.write(''' ''') gen = st.radio("Your Gender", ('Male', 'Female')) st.write(''' ''') cp = st.radio("Chest Pain", ('Typical Angina', 'Atypical Angina', 'Non-Anginal Pain', 'Asymptomatic')) st.write(''' ''') resting_bp = st.slider('Resting Blood Pressure (In mm Hg)', min_value=0.0, max_value=200.0, value=100.0, step=1.0) st.write(''' ''') serum = st.slider('Serum Cholesterol (In mm mg/dl)', min_value=0.0, max_value=400.0, value=200.0, step=1.0) st.write(''' ''') bs = st.radio("Is Your Fasting Blood Sugar > 120 mg/dl?", ('Yes', 'No')) st.write(''' ''') re = st.radio("Resting Electrocardiogram Results", ('Normal', 'ST-T Wave Abnormality (T Wave Inversions and/or ST Elevation or Depression of > 0.05 mV)', 'Showing Probable or Definite Left Ventricular Hypertrophy by Estes Criteria')) st.write(''' ''') max_heart = st.slider('Maximum Heart Rate', min_value=0.0, max_value=300.0, value=150.0, step=1.0) st.write(''' ''') ex = st.radio("Exercise Induced Angina", ('Yes', 'No')) st.write(''' ''') oldpeak = st.slider('ST Depression Induced by Exercise Relative to Rest', min_value=-5.0, max_value=5.0, value=0.0, step=0.01) st.write(''' ''') sp = st.radio("The Slope of the Peak Exercise ST Segment", ('Upsloping', 'Flat', 'Downsloping')) st.write(''' ''') selected_models = st.multiselect("Choose Regressor Models", ('Random Forest', 'Linear Regression', 'K-Nearest Neighbors', 'Decision Tree', 'Support Vector Machine', 'Gradient Boosting Regression', 'XGBoost Regression', 'LightGBM Regression')) st.write(''' ''') # Initialize an empty list to store the selected models models_to_run = [] # Check which models were selected and add them to the models_to_run list if 'Random Forest' in selected_models: models_to_run.append(RandomForestRegressor()) if 'Linear Regression' in selected_models: models_to_run.append(LinearRegression()) if 'K-Nearest Neighbors' in selected_models: models_to_run.append(KNeighborsRegressor()) if 'Decision Tree' in selected_models: models_to_run.append(DecisionTreeRegressor()) if 'Support Vector Machine' in selected_models: models_to_run.append(SVR()) if 'Gradient Boosting Regression' in selected_models: models_to_run.append(GradientBoostingRegressor()) if 'XGBoost Regression' in selected_models: models_to_run.append(XGBRegressor()) if 'LightGBM Regression' in selected_models: models_to_run.append(LGBMRegressor()) if 'Neural Network (MLP) Regression' in selected_models: models_to_run.append(MLPRegressor()) # gender conversion if gen == "Male": gender = 1 else: gender = 0 # Chest Pain if cp == "Typical Angina": chest = 1 elif cp == "Atypical Angina": chest = 2 elif cp == "Non-Anginal Pain": chest = 3 else: chest = 4 # blood_sugar conversion if bs == "Yes": blood_sugar = 1 else: blood_sugar = 0 # electro conversion if re == "Normal": electro = 0 elif re == "ST-T Wave Abnormality (T Wave Inversions and/or ST Elevation or Depression of > 0.05 mV)": electro = 1 else: electro = 2 # exercise conversion if ex == "Yes": exercise = 1 else: exercise = 0 # slope conversion if sp == "Upsloping": slope = 1 elif sp == "Flat": slope = 2 else: slope = 3 user_input = np.array([age, gender, chest, blood_sugar, resting_bp, serum, electro, max_heart, exercise, oldpeak, slope]).reshape(1, -1) # import dataset def get_dataset(): data = pd.read_csv('heart.csv') return data def generate_model_labels(model_names): model_labels = [] for name in model_names: words = name.split() if len(words) > 1: # Multiple words, use initials label = "".join(word[0] for word in words) else: # Single word, take the first 3 letters label = name[:3] model_labels.append(label) return model_labels if st.button('Submit'): df = get_dataset() # fix column names df.columns = (["age", "sex", "chest pain type", "resting bp s", "cholesterol", "fasting blood sugar", "resting ecg", "max heart rate", "exercise angina", "oldpeak", "ST slope", "target"]) # Split the dataset into train and test X = df.drop('target', axis=1) y = df['target'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Create two columns to divide the screen left_column, right_column = st.columns(2) # Left column content with left_column: # Create a VotingRegressor with the selected models ensemble = VotingRegressor( estimators=[('rf', RandomForestRegressor()), ('lr', LinearRegression()), ('dt', DecisionTreeRegressor())] ) # Fit the voting regressor to the training data ensemble.fit(X_train, y_train) # Make predictions on the test set ensemble_predictions = ensemble.predict(user_input) # Evaluate the model's performance on the test set ensemble_r2 = r2_score(y_test, ensemble.predict(X_test)) ensemble_mse = mean_squared_error(y_test, ensemble.predict(X_test)) ensemble_mae = mean_absolute_error(y_test, ensemble.predict(X_test)) ensemble_rmse = np.sqrt(ensemble_mse) st.write(f'According to Ensemble Model, You have a predicted value of: {ensemble_predictions[0]:.2f}') st.write('Ensemble Model R-squared (R2) Score:', ensemble_r2) st.write('Ensemble Model Root Mean Squared Error (RMSE):', ensemble_rmse) st.write('Ensemble Model Mean Squared Error (MSE):', ensemble_mse) st.write('Ensemble Model Mean Absolute Error (MAE):', ensemble_mae) st.write('------------------------------------------------------------------------------------------------------') # Add padding between the columns st.empty() # Right column content with right_column: for model in models_to_run: # Train the selected model model.fit(X_train, y_train) # Make predictions on the test set model_predictions = model.predict(user_input) # Evaluate the model's performance on the test set model_r2 = r2_score(y_test, model.predict(X_test)) model_mse = mean_squared_error(y_test, model.predict(X_test)) model_mae = mean_absolute_error(y_test, model.predict(X_test)) model_rmse = np.sqrt(model_mse) st.write(f'According to {type(model).__name__} Model, You have a predicted value of: {model_predictions[0]:.2f}') st.write(f'{type(model).__name__} Model R-squared (R2) Score:', model_r2) st.write(f'{type(model).__name__} Model Root Mean Squared Error (RMSE):', model_rmse) st.write(f'{type(model).__name__} Model Mean Squared Error (MSE):', model_mse) st.write(f'{type(model).__name__} Model Mean Absolute Error (MAE):', model_mae) st.write('------------------------------------------------------------------------------------------------------') # Initialize lists to store model names and their respective performance metrics model_names = ['Ensemble'] r2_scores = [ensemble_r2] rmses = [ensemble_rmse] mses = [ensemble_mse] maes = [ensemble_mae] # Loop through the selected models to compute their performance metrics for model in models_to_run: model_names.append(type(model).__name__) model.fit(X_train, y_train) model_predictions = model.predict(X_test) model_r2 = r2_score(y_test, model_predictions) model_mse = mean_squared_error(y_test, model_predictions) model_mae = mean_absolute_error(y_test, model_predictions) model_rmse = np.sqrt(model_mse) r2_scores.append(model_r2) rmses.append(model_rmse) mses.append(model_mse) maes.append(model_mae) # Create a DataFrame to store the performance metrics metrics_df = pd.DataFrame({ 'Model': model_names, 'R-squared (R2)': r2_scores, 'Root Mean Squared Error (RMSE)': rmses, 'Mean Squared Error (MSE)': mses, 'Mean Absolute Error (MAE)': maes }) # Get the model labels model_labels = generate_model_labels(metrics_df['Model']) # Plot the comparison graphs plt.figure(figsize=(12, 10)) # R-squared (R2) score comparison plt.subplot(2, 2, 1) plt.bar(model_labels, metrics_df['R-squared (R2)'], color='skyblue') plt.title('R-squared (R2) Score Comparison') plt.ylim(0, 1) # Root Mean Squared Error (RMSE) comparison plt.subplot(2, 2, 2) plt.bar(model_labels, metrics_df['Root Mean Squared Error (RMSE)'], color='orange') plt.title('Root Mean Squared Error (RMSE) Comparison') # Mean Squared Error (MSE) comparison plt.subplot(2, 2, 3) plt.bar(model_labels, metrics_df['Mean Squared Error (MSE)'], color='green') plt.title('Mean Squared Error (MSE) Comparison') # Mean Absolute Error (MAE) comparison plt.subplot(2, 2, 4) plt.bar(model_labels, metrics_df['Mean Absolute Error (MAE)'], color='purple') plt.title('Mean Absolute Error (MAE) Comparison') # Adjust layout to prevent overlapping of titles plt.tight_layout() # Display the graphs in Streamlit st.pyplot()