|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
|
|
|
|
from sklearn.ensemble import RandomForestClassifier, VotingClassifier |
|
from sklearn.tree import DecisionTreeClassifier |
|
from sklearn.linear_model import LogisticRegression |
|
from sklearn.neighbors import KNeighborsClassifier |
|
from sklearn.svm import SVC |
|
from sklearn.naive_bayes import GaussianNB |
|
from sklearn.neural_network import MLPClassifier |
|
from sklearn.ensemble import GradientBoostingClassifier |
|
from xgboost import XGBClassifier |
|
from lightgbm import LGBMClassifier |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score |
|
|
|
|
|
st.title('Diabetes Prediction Application') |
|
st.write(''' |
|
Please fill in the attributes below, then hit the Predict button |
|
to get your results. |
|
''') |
|
|
|
st.header('Input Attributes') |
|
age = st.slider('Your Age (Years)', min_value=0.0, max_value=100.0, value=50.0, step=1.0) |
|
st.write(''' ''') |
|
gen = st.radio("Your Gender", ('Male', 'Female')) |
|
st.write(''' ''') |
|
|
|
if gen == "Male": |
|
gender = 1 |
|
else: |
|
gender = 0 |
|
|
|
urea = st.slider('Urea', min_value=0.0, max_value=100.0, value=50.0, step=0.1) |
|
st.write(''' ''') |
|
cr = st.slider('Creatinine Ratio(Cr)', min_value=0.0, max_value=1000.0, value=500.0, step=1.0) |
|
st.write(''' ''') |
|
hb = st.slider('HbA1c', min_value=0.0, max_value=20.0, value=10.0, step=0.1) |
|
st.write(''' ''') |
|
chol = st.slider('Cholesterol (Chol)', min_value=0.0, max_value=20.0, value=10.0, step=0.1) |
|
st.write(''' ''') |
|
tg = st.slider('Triglycerides(TG) Cholesterol', min_value=0.0, max_value=20.0, value=10.0, step=0.1) |
|
st.write(''' ''') |
|
hdl = st.slider('HDL Cholesterol', min_value=0.0, max_value=20.0, value=10.0, step=0.1) |
|
st.write(''' ''') |
|
ldl = st.slider('LDL Cholesterol', min_value=0.0, max_value=20.0, value=10.0, step=0.1) |
|
st.write(''' ''') |
|
vldl = st.slider('VLDL Cholesterol', min_value=0.0, max_value=50.0, value=25.0, step=0.1) |
|
st.write(''' ''') |
|
bmi = st.slider('BMI', min_value=0.0, max_value=50.0, value=25.0, step=0.1) |
|
st.write(''' ''') |
|
|
|
selected_models = st.multiselect("Choose Classifier Models", ('Random Forest', 'Naïve Bayes', 'Logistic Regression', 'K-Nearest Neighbors', 'Decision Tree', 'Gradient Boosting', 'LightGBM', 'XGBoost', 'Multilayer Perceptron', 'Artificial Neural Network', 'Support Vector Machine')) |
|
st.write(''' ''') |
|
|
|
|
|
models_to_run = [] |
|
|
|
|
|
if 'Random Forest' in selected_models: |
|
models_to_run.append(RandomForestClassifier()) |
|
|
|
if 'Naïve Bayes' in selected_models: |
|
models_to_run.append(GaussianNB()) |
|
|
|
if 'Logistic Regression' in selected_models: |
|
models_to_run.append(LogisticRegression()) |
|
|
|
if 'K-Nearest Neighbors' in selected_models: |
|
models_to_run.append(KNeighborsClassifier()) |
|
|
|
if 'Decision Tree' in selected_models: |
|
models_to_run.append(DecisionTreeClassifier()) |
|
|
|
if 'Gradient Boosting' in selected_models: |
|
models_to_run.append(GradientBoostingClassifier()) |
|
|
|
if 'Support Vector Machine' in selected_models: |
|
models_to_run.append(SVC(probability=True)) |
|
|
|
if 'LightGBM' in selected_models: |
|
models_to_run.append(LGBMClassifier()) |
|
|
|
if 'XGBoost' in selected_models: |
|
models_to_run.append(XGBClassifier()) |
|
|
|
if 'Multilayer Perceptron' in selected_models: |
|
models_to_run.append(MLPClassifier()) |
|
|
|
if 'Artificial Neural Network' in selected_models: |
|
models_to_run.append(MLPClassifier(hidden_layer_sizes=(100,), max_iter=100)) |
|
|
|
|
|
|
|
user_input = np.array([age, gender, urea, cr, hb, chol, tg, hdl, vldl, |
|
ldl, bmi]).reshape(1, -1) |
|
|
|
|
|
def get_dataset(): |
|
data = pd.read_csv('Diabetes.csv') |
|
|
|
data['CLASS'] = data['CLASS'].apply(lambda x: 0 if x == 'N' else 1) |
|
|
|
|
|
data['Gender'] = data['Gender'].apply(lambda x: 1 if x == 'M' else 0) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return data |
|
|
|
def generate_model_labels(model_names): |
|
model_labels = [] |
|
for name in model_names: |
|
words = name.split() |
|
if len(words) > 1: |
|
|
|
label = "".join(word[0] for word in words) |
|
else: |
|
|
|
label = name[:3] |
|
model_labels.append(label) |
|
return model_labels |
|
|
|
if st.button('Submit'): |
|
df = get_dataset() |
|
|
|
|
|
df.columns = (["id", "pation_no", "gender", "age", "urea", "cr", |
|
"hb", "chol", "tg", "hdl", "ldl", |
|
"vldl", "bmi", "target"]) |
|
|
|
|
|
X = df.drop(['target','id','pation_no'], axis=1) |
|
y = df['target'] |
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
|
|
|
left_column, right_column = st.columns(2) |
|
|
|
|
|
|
|
with left_column: |
|
|
|
ensemble = VotingClassifier( |
|
estimators=[('rf', RandomForestClassifier()), ('xgb', XGBClassifier()), ('gb', LGBMClassifier())], |
|
voting='soft') |
|
|
|
|
|
ensemble.fit(X_train, y_train) |
|
|
|
|
|
model_predictions = ensemble.predict(user_input) |
|
model_prob = ensemble.predict_proba(user_input)[:, 1] |
|
|
|
|
|
ensamble_accuracy = accuracy_score(y_test, ensemble.predict(X_test)) |
|
ensamble_precision = precision_score(y_test, ensemble.predict(X_test)) |
|
ensamble_recall = recall_score(y_test, ensemble.predict(X_test)) |
|
ensamble_f1score = f1_score(y_test, ensemble.predict(X_test)) |
|
|
|
if model_predictions == 1: |
|
st.write(f'According to Ensemble Model You have a **Very High Chance (1)** of Diabetes.') |
|
st.write(f'Diabetes Probability: ', (model_prob* 100)) |
|
|
|
else: |
|
st.write(f'According to Ensemble Model You have a **Very Low Chance (0)** of Diabetes.') |
|
st.write(f'Diabetes Probability: ', (model_prob* 100)) |
|
|
|
st.write('Ensemble Model Accuracy:', ensamble_accuracy) |
|
st.write('Ensemble Model Precision:', ensamble_precision) |
|
st.write('Ensemble Model Recall:', ensamble_recall) |
|
st.write('Ensemble Model F1 Score:', ensamble_f1score) |
|
st.write('------------------------------------------------------------------------------------------------------') |
|
|
|
|
|
|
|
with right_column: |
|
|
|
for model in models_to_run: |
|
|
|
model.fit(X_train, y_train) |
|
|
|
|
|
model_predictions = model.predict(user_input) |
|
model_prob = model.predict_proba(user_input)[:, 1] |
|
|
|
|
|
model_accuracy = accuracy_score(y_test, model.predict(X_test)) |
|
model_precision = precision_score(y_test, model.predict(X_test)) |
|
model_recall = recall_score(y_test, model.predict(X_test)) |
|
model_f1score = f1_score(y_test, model.predict(X_test)) |
|
|
|
if model_predictions == 1: |
|
st.write(f'According to {type(model).__name__} Model You have a **Very High Chance (1)** of Diabetes.') |
|
st.write(f'Diabetes Probability: ', (model_prob* 100)) |
|
|
|
else: |
|
st.write(f'According to {type(model).__name__} Model You have a **Very Low Chance (0)** of Diabetes.') |
|
st.write(f'Diabetes Probability: ', (model_prob* 100)) |
|
|
|
st.write(f'{type(model).__name__} Accuracy:', model_accuracy) |
|
st.write(f'{type(model).__name__} Precision:', model_precision) |
|
st.write(f'{type(model).__name__} Recall:', model_recall) |
|
st.write(f'{type(model).__name__} F1 Score:', model_f1score) |
|
st.write('------------------------------------------------------------------------------------------------------') |
|
|
|
|
|
model_names = ['Ensemble'] |
|
accuracies = [ensamble_accuracy] |
|
precisions = [ensamble_precision] |
|
recalls = [ensamble_recall] |
|
f1_scores = [ensamble_f1score] |
|
|
|
|
|
for model in models_to_run: |
|
model_names.append(type(model).__name__) |
|
model.fit(X_train, y_train) |
|
model_predictions = model.predict(X_test) |
|
accuracies.append(accuracy_score(y_test, model_predictions)) |
|
precisions.append(precision_score(y_test, model_predictions)) |
|
recalls.append(recall_score(y_test, model_predictions)) |
|
f1_scores.append(f1_score(y_test, model_predictions)) |
|
|
|
|
|
metrics_df = pd.DataFrame({ |
|
'Model': model_names, |
|
'Accuracy': accuracies, |
|
'Precision': precisions, |
|
'Recall': recalls, |
|
'F1 Score': f1_scores |
|
}) |
|
|
|
|
|
model_labels = generate_model_labels(metrics_df['Model']) |
|
|
|
|
|
plt.figure(figsize=(12, 10)) |
|
|
|
|
|
plt.subplot(2, 2, 1) |
|
plt.bar(model_labels, metrics_df['Accuracy'], color='skyblue') |
|
plt.title('Accuracy Comparison') |
|
plt.ylim(0, 1) |
|
|
|
|
|
plt.subplot(2, 2, 2) |
|
plt.bar(model_labels, metrics_df['Precision'], color='orange') |
|
plt.title('Precision Comparison') |
|
plt.ylim(0, 1) |
|
|
|
|
|
plt.subplot(2, 2, 3) |
|
plt.bar(model_labels, metrics_df['Recall'], color='green') |
|
plt.title('Recall Comparison') |
|
plt.ylim(0, 1) |
|
|
|
|
|
plt.subplot(2, 2, 4) |
|
plt.bar(model_labels, metrics_df['F1 Score'], color='purple') |
|
plt.title('F1 Score Comparison') |
|
plt.ylim(0, 1) |
|
|
|
|
|
plt.tight_layout() |
|
|
|
|
|
st.pyplot() |