import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt import joblib from sklearn.ensemble import RandomForestClassifier, VotingClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn.naive_bayes import GaussianNB from sklearn.neural_network import MLPClassifier from sklearn.ensemble import GradientBoostingClassifier from xgboost import XGBClassifier from lightgbm import LGBMClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score st.title('Disease Prediction Application') st.write(''' Please fill in the attributes below, then hit the Predict button to get your results. ''') st.header('General Information') gen = st.radio("Your Gender", ('Male', 'Female')) st.write(''' ''') age = st.slider('Your Age (Years)', min_value=0.0, max_value=100.0, value=50.0, step=1.0) st.write(''' ''') bmi = st.slider('BMI', min_value=0.0, max_value=50.0, value=25.0, step=0.1) st.write(''' ''') st.header('Blood Test Information') bs = st.radio("Is Your Fasting Blood Sugar > 120 mg/dl?", ('Yes', 'No')) st.write(''' ''') urea = st.slider('Urea', min_value=0.0, max_value=100.0, value=50.0, step=0.1) st.write(''' ''') cr = st.slider('Creatinine Ratio(Cr)', min_value=0.0, max_value=1000.0, value=500.0, step=1.0) st.write(''' ''') hb = st.slider('HbA1c', min_value=0.0, max_value=20.0, value=10.0, step=0.1) st.write(''' ''') st.header('Cholesterol Test Information') chol = st.slider('Cholesterol (Chol)', min_value=0.0, max_value=20.0, value=10.0, step=0.1) st.write(''' ''') tg = st.slider('Triglycerides(TG) Cholesterol', min_value=0.0, max_value=20.0, value=10.0, step=0.1) st.write(''' ''') hdl = st.slider('HDL Cholesterol', min_value=0.0, max_value=20.0, value=10.0, step=0.1) st.write(''' ''') ldl = st.slider('LDL Cholesterol', min_value=0.0, max_value=20.0, value=10.0, step=0.1) st.write(''' ''') vldl = st.slider('VLDL Cholesterol', min_value=0.0, max_value=50.0, value=25.0, step=0.1) st.write(''' ''') st.header('Cardio Test Information') cp = st.radio("Chest Pain", ('Typical Angina', 'Atypical Angina', 'Non-Anginal Pain', 'Asymptomatic')) st.write(''' ''') max_heart = st.slider('Maximum Heart Rate', min_value=0.0, max_value=300.0, value=150.0, step=1.0) st.write(''' ''') resting_bp = st.slider('Resting Blood Pressure (In mm Hg)', min_value=0.0, max_value=200.0, value=100.0, step=1.0) st.write(''' ''') re = st.radio("Resting Electrocardiogram Results", ('Normal', 'ST-T Wave Abnormality (T Wave Inversions and/or ST Elevation or Depression of > 0.05 mV)', 'Showing Probable or Definite Left Ventricular Hypertrophy by Estes Criteria')) st.write(''' ''') ex = st.radio("Exercise Induced Angina", ('Yes', 'No')) st.write(''' ''') oldpeak = st.slider('ST Depression Induced by Exercise Relative to Rest', min_value=-5.0, max_value=5.0, value=0.0, step=0.01) st.write(''' ''') sp = st.radio("The Slope of the Peak Exercise ST Segment", ('Upsloping', 'Flat', 'Downsloping')) st.write(''' ''') # gender conversion if gen == "Male": gender = 1 else: gender = 0 # Chest Pain if cp == "Typical Angina": chest = 1 elif cp == "Atypical Angina": chest = 2 elif cp == "Non-Anginal Pain": chest = 3 else: chest = 4 # blood_sugar conversion if bs == "Yes": blood_sugar = 1 else: blood_sugar = 0 # electro conversion if re == "Normal": electro = 0 elif re == "ST-T Wave Abnormality (T Wave Inversions and/or ST Elevation or Depression of > 0.05 mV)": electro = 1 else: electro = 2 # exercise conversion if ex == "Yes": exercise = 1 else: exercise = 0 # slope conversion if sp == "Upsloping": slope = 1 elif sp == "Flat": slope = 2 else: slope = 3 user_input = np.array([age, gender, chest, blood_sugar, resting_bp, electro, max_heart, exercise, oldpeak, slope, urea, cr, hb, chol, tg, hdl, ldl, vldl, bmi]).reshape(1, -1) # import dataset def get_dataset(): data = pd.read_csv('Fianl Dataset.csv') # Calculate the correlation matrix # corr_matrix = data.corr() # Create a heatmap of the correlation matrix # plt.figure(figsize=(10, 8)) # sns.heatmap(corr_matrix, annot=True, cmap='coolwarm') # plt.title('Correlation Matrix') # plt.xticks(rotation=45) # plt.yticks(rotation=0) # plt.tight_layout() # Display the heatmap in Streamlit # st.pyplot() return data if st.button('Submit'): # Load your dataset for prediction df = get_dataset() # Split the dataset into features and targets for Heart and Diabetes prediction X = df.drop(['Diabetes', 'Heart'], axis=1) y_heart = df['Heart'] y_diabetes = df['Diabetes'] # Create an ensemble model for Heart prediction random_forest_model_heart = RandomForestClassifier(random_state=42) naive_bayes_model_heart = GaussianNB() gradient_boosting_model_heart = GradientBoostingClassifier(random_state=42) # Create a voting ensemble with soft voting for Heart prediction ensemble_model_heart = VotingClassifier(estimators=[ ('Random Forest', random_forest_model_heart), ('Naive Bayes', naive_bayes_model_heart), ('Gradient Boosting', gradient_boosting_model_heart) ], voting='soft') # Create an ensemble model for Diabetes prediction random_forest_model_diabetes = RandomForestClassifier(random_state=42) naive_bayes_model_diabetes = GaussianNB() gradient_boosting_model_diabetes = GradientBoostingClassifier(random_state=42) # Create a voting ensemble with soft voting for Diabetes prediction ensemble_model_diabetes = VotingClassifier(estimators=[ ('Random Forest', random_forest_model_diabetes), ('Naive Bayes', naive_bayes_model_diabetes), ('Gradient Boosting', gradient_boosting_model_diabetes) ], voting='soft') # Split the data into training and testing sets for both targets X_train, X_test, y_heart_train, y_heart_test, y_diabetes_train, y_diabetes_test = train_test_split( X, y_heart, y_diabetes, test_size=0.2, random_state=42 ) # Ensure the user input has the correct number of features for Heart prediction if user_input.shape[1] == X_train.shape[1]: # Fit the ensemble model for Heart prediction on the training data ensemble_model_heart.fit(X_train, y_heart_train) # Make predictions on user input for Heart prediction_heart = ensemble_model_heart.predict(user_input) prediction_proba_heart = ensemble_model_heart.predict_proba(user_input) # Check if the dimensions of prediction_heart match y_heart_test if prediction_heart.shape[0] == y_heart_test.shape[0]: # You can add a threshold and provide a prediction based on class 1 for Heart threshold_heart = 0.5 if prediction_proba_heart[0][1] >= threshold_heart: st.header("Predicted Heart Disease: You might have Heart Disease") st.write("Predicted Probability of Having Heart Disease:", prediction_proba_heart[0][1] * 100) else: st.header("Predicted Heart Disease: You do not have Heart Disease") st.write("Predicted Probability of Having Heart Disease:", prediction_proba_heart[0][1] * 100) # Calculate and print Heart prediction metrics accuracy_heart = accuracy_score(y_heart_test, prediction_heart) precision_heart = precision_score(y_heart_test, prediction_heart) recall_heart = recall_score(y_heart_test, prediction_heart) f1_heart = f1_score(y_heart_test, prediction_heart) st.write("Heart Prediction Metrics:") st.write("Accuracy:", accuracy_heart) st.write("Precision:", precision_heart) st.write("Recall:", recall_heart) st.write("F1-score:", f1_heart) st.write("____________________________________________________________________________________________") else: st.write("Error: Inconsistent dimensions in Heart prediction. Please check your data.") else: st.write("Error: Input features do not match the dataset. Please provide valid input.") # Ensure the user input has the correct number of features for Diabetes prediction if user_input.shape[1] == X_train.shape[1]: # Fit the ensemble model for Diabetes prediction on the training data ensemble_model_diabetes.fit(X_train, y_diabetes_train) # Make predictions on user input for Diabetes pred_diabetes = ensemble_model_diabetes.predict(user_input) pred_diabetes_proba = ensemble_model_diabetes.predict_proba(user_input) # Check if the dimensions of pred_diabetes match y_diabetes_test if pred_diabetes.shape[0] == y_diabetes_test.shape[0]: # You can add a threshold and provide a prediction based on class 1 for Diabetes threshold_diabetes = 0.5 if pred_diabetes_proba[0][1] >= threshold_diabetes: st.header("Predicted Diabetes: You might have Diabetes") st.write("Predicted Probability of Having Diabetes:", pred_diabetes_proba[0][1] * 100) else: st.header("Predicted Diabetes: You do not have Diabetes") st.write("Predicted Probability of Having Diabetes:", pred_diabetes_proba[0][1] * 100) # Calculate and print Diabetes prediction metrics accuracy_diabetes = accuracy_score(y_diabetes_test, pred_diabetes) precision_diabetes = precision_score(y_diabetes_test, pred_diabetes) recall_diabetes = recall_score(y_diabetes_test, pred_diabetes) f1_diabetes = f1_score(y_diabetes_test, pred_diabetes) st.write("Diabetes Prediction Metrics:") st.write("Accuracy:", accuracy_diabetes) st.write("Precision:", precision_diabetes) st.write("Recall:", recall_diabetes) st.write("F1-score:", f1_diabetes) else: st.write("Error: Inconsistent dimensions in Diabetes prediction. Please check your data.") else: st.write("Error: Input features do not match the dataset. Please provide valid input.")