import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.ensemble import GradientBoostingRegressor from sklearn.ensemble import RandomForestRegressor, VotingRegressor from sklearn.tree import DecisionTreeRegressor from sklearn.linear_model import LinearRegression from sklearn.neighbors import KNeighborsRegressor from sklearn.svm import SVR from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score from sklearn.neural_network import MLPRegressor from lightgbm import LGBMRegressor from xgboost import XGBRegressor import math st.title('Liver Disease Prediction Application') st.write(''' Please fill in the attributes below, then hit the Predict button to get your results. ''') st.header('Input Attributes') age = st.slider('Your Age (Years)', min_value=0.0, max_value=100.0, value=50.0, step=1.0) st.write(''' ''') gen = st.radio("Your Gender", ('Male', 'Female')) st.write(''' ''') tb = st.slider('Total Bilirubin (TB)', min_value=0.0, max_value=100.0, value=50.0, step=0.1) st.write(''' ''') db = st.slider('Direct Bilirubin (DB)', min_value=0.0, max_value=20.0, value=10.0, step=0.1) st.write(''' ''') aap = st.slider('Alkphos Alkaline Phosphotase', min_value=0.0, max_value=2400.0, value=1200.0, step=1.0) st.write(''' ''') sgpt = st.slider('SGPT Alamine Aminotransferase', min_value=0.0, max_value=2400.0, value=1200.0, step=1.0) st.write(''' ''') sgot = st.slider('SGOT Aspartate Aminotransferase', min_value=0.0, max_value=5000.0, value=2500.0, step=1.0) st.write(''' ''') tp = st.slider('Total Protiens (TP)', min_value=0.0, max_value=10.0, value=5.0, step=0.1) st.write(''' ''') alb = st.slider('ALB Albumin', min_value=-0.0, max_value=10.0, value=5.0, step=0.1) st.write(''' ''') ag = st.slider('A/G Ratio Albumin and Globulin Ratio', min_value=0.0, max_value=10.0, value=5.0, step=0.1) st.write(''' ''') selected_models = st.multiselect("Choose Regressor Models", ('Random Forest', 'Linear Regression', 'K-Nearest Neighbors', 'Decision Tree', 'Support Vector Machine', 'Gradient Boosting Regression', 'XGBoost Regression', 'LightGBM Regression')) st.write(''' ''') # Initialize an empty list to store the selected models models_to_run = [] # Check which models were selected and add them to the models_to_run list if 'Random Forest' in selected_models: models_to_run.append(RandomForestRegressor()) if 'Linear Regression' in selected_models: models_to_run.append(LinearRegression()) if 'K-Nearest Neighbors' in selected_models: models_to_run.append(KNeighborsRegressor()) if 'Decision Tree' in selected_models: models_to_run.append(DecisionTreeRegressor()) if 'Support Vector Machine' in selected_models: models_to_run.append(SVR()) if 'Gradient Boosting Regression' in selected_models: models_to_run.append(GradientBoostingRegressor()) if 'XGBoost Regression' in selected_models: models_to_run.append(XGBRegressor()) if 'LightGBM Regression' in selected_models: models_to_run.append(LGBMRegressor()) if 'Neural Network (MLP) Regression' in selected_models: models_to_run.append(MLPRegressor()) # gender conversion if gen == "Male": gender = 1 else: gender = 0 user_input = np.array([age, gender, tb, db, aap, sgpt, sgot, tp, alb, ag]).reshape(1, -1) # import dataset def get_dataset(): data = pd.read_csv('Liver.csv', encoding='unicode_escape') # delete Nan value data = data.dropna() # Mapping 'Male' to 1 and 'Female' to 0 in the 'Gender of the patient' column data['Gender of the patient'] = data['Gender of the patient'].map({'Male': 1, 'Female': 0}) # No liver disease then:=0 for having liver disease then:=1 data['Result'] = data['Result'].map({1: 1, 2: 0}) return data def generate_model_labels(model_names): model_labels = [] for name in model_names: words = name.split() if len(words) > 1: # Multiple words, use initials label = "".join(word[0] for word in words) else: # Single word, take the first 3 letters label = name[:3] model_labels.append(label) return model_labels if st.button('Submit'): df = get_dataset() # fix column names df.columns = (["age", "gender", "tb", "db", "aap", "sgpt", "sgot", "tp", "alb", "ag", "result"]) # Split the dataset into train and test X = df.drop('result', axis=1) y = df['result'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1337) # Create two columns to divide the screen left_column, right_column = st.columns(2) # Left column content with left_column: # Create a VotingRegressor with the selected models ensemble = VotingRegressor( estimators=[('rf', RandomForestRegressor()), ('xg', XGBRegressor()), ('dt', DecisionTreeRegressor())] ) # Fit the voting regressor to the training data ensemble.fit(X_train, y_train) # Make predictions on the test set ensemble_predictions = ensemble.predict(user_input) # Evaluate the model's performance on the test set ensemble_r2 = r2_score(y_test, ensemble.predict(X_test)) ensemble_mse = mean_squared_error(y_test, ensemble.predict(X_test)) ensemble_mae = mean_absolute_error(y_test, ensemble.predict(X_test)) ensemble_rmse = np.sqrt(ensemble_mse) st.write(f'According to Ensemble Model, Your Liver Disease Risk Score is: {ensemble_predictions[0]:.1f}') st.write('Ensemble Model R-squared (R2) Score:', ensemble_r2) st.write('Ensemble Model Root Mean Squared Error (RMSE):', ensemble_rmse) st.write('Ensemble Model Mean Squared Error (MSE):', ensemble_mse) st.write('Ensemble Model Mean Absolute Error (MAE):', ensemble_mae) st.write('------------------------------------------------------------------------------------------------------') # Right column content with right_column: # Initialize lists to store model names and their respective performance metrics model_names = ['Ensemble'] r2_scores = [ensemble_r2] rmses = [ensemble_rmse] mses = [ensemble_mse] maes = [ensemble_mae] for model in models_to_run: # Train the selected model model.fit(X_train, y_train) # Make predictions on the test set model_predictions = model.predict(user_input) # Evaluate the model's performance on the test set model_r2 = r2_score(y_test, model.predict(X_test)) model_mse = mean_squared_error(y_test, model.predict(X_test)) model_mae = mean_absolute_error(y_test, model.predict(X_test)) model_rmse = np.sqrt(model_mse) st.write(f'According to {type(model).__name__} Model, Your Liver Disease Risk Score is: {model_predictions[0]:.2f}') st.write(f'{type(model).__name__} Model R-squared (R2) Score:', model_r2) st.write(f'{type(model).__name__} Model Root Mean Squared Error (RMSE):', model_rmse) st.write(f'{type(model).__name__} Model Mean Squared Error (MSE):', model_mse) st.write(f'{type(model).__name__} Model Mean Absolute Error (MAE):', model_mae) st.write('------------------------------------------------------------------------------------------------------') # Append model metrics to lists model_names.append(type(model).__name__) r2_scores.append(model_r2) rmses.append(model_rmse) mses.append(model_mse) maes.append(model_mae) # Create a DataFrame to store the performance metrics metrics_df = pd.DataFrame({ 'Model': model_names, 'R-squared (R2)': r2_scores, 'Root Mean Squared Error (RMSE)': rmses, 'Mean Squared Error (MSE)': mses, 'Mean Absolute Error (MAE)': maes }) # Get the model labels model_labels = generate_model_labels(metrics_df['Model']) # Plot the comparison graphs plt.figure(figsize=(12, 10)) # R-squared (R2) score comparison plt.subplot(2, 2, 1) plt.bar(model_labels, metrics_df['R-squared (R2)'], color='skyblue') plt.title('R-squared (R2) Score Comparison') plt.ylim(0, 1) # Root Mean Squared Error (RMSE) comparison plt.subplot(2, 2, 2) plt.bar(model_labels, metrics_df['Root Mean Squared Error (RMSE)'], color='orange') plt.title('Root Mean Squared Error (RMSE) Comparison') # Mean Squared Error (MSE) comparison plt.subplot(2, 2, 3) plt.bar(model_labels, metrics_df['Mean Squared Error (MSE)'], color='green') plt.title('Mean Squared Error (MSE) Comparison') # Mean Absolute Error (MAE) comparison plt.subplot(2, 2, 4) plt.bar(model_labels, metrics_df['Mean Absolute Error (MAE)'], color='purple') plt.title('Mean Absolute Error (MAE) Comparison') # Adjust layout to prevent overlapping of titles plt.tight_layout() # Display the graphs in Streamlit st.pyplot()