saifsunny commited on
Commit
7fa3b30
1 Parent(s): bbf17e2

Upload 3 files

Browse files
Files changed (3) hide show
  1. Diabetes.csv +1 -0
  2. main.py +283 -0
  3. requirements.txt +7 -0
Diabetes.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ ID,No_Pation,Gender,AGE,Urea,Cr,HbA1c,Chol,TG,HDL,LDL,VLDL,BMI,CLASS
main.py ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+
6
+
7
+ from sklearn.ensemble import RandomForestClassifier, VotingClassifier
8
+ from sklearn.tree import DecisionTreeClassifier
9
+ from sklearn.linear_model import LogisticRegression
10
+ from sklearn.neighbors import KNeighborsClassifier
11
+ from sklearn.svm import SVC
12
+ from sklearn.naive_bayes import GaussianNB
13
+ from sklearn.neural_network import MLPClassifier
14
+ from sklearn.ensemble import GradientBoostingClassifier
15
+ from xgboost import XGBClassifier
16
+ from lightgbm import LGBMClassifier
17
+ from sklearn.model_selection import train_test_split
18
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
19
+
20
+
21
+ st.title('Diabetes Prediction Application')
22
+ st.write('''
23
+ Please fill in the attributes below, then hit the Predict button
24
+ to get your results.
25
+ ''')
26
+
27
+ st.header('Input Attributes')
28
+ age = st.slider('Your Age (Years)', min_value=0.0, max_value=100.0, value=50.0, step=1.0)
29
+ st.write(''' ''')
30
+ gen = st.radio("Your Gender", ('Male', 'Female'))
31
+ st.write(''' ''')
32
+ # gender conversion
33
+ if gen == "Male":
34
+ gender = 1
35
+ else:
36
+ gender = 0
37
+
38
+ urea = st.slider('Urea', min_value=0.0, max_value=100.0, value=50.0, step=0.1)
39
+ st.write(''' ''')
40
+ cr = st.slider('Creatinine Ratio(Cr)', min_value=0.0, max_value=1000.0, value=500.0, step=1.0)
41
+ st.write(''' ''')
42
+ hb = st.slider('HbA1c', min_value=0.0, max_value=20.0, value=10.0, step=0.1)
43
+ st.write(''' ''')
44
+ chol = st.slider('Cholesterol (Chol)', min_value=0.0, max_value=20.0, value=10.0, step=0.1)
45
+ st.write(''' ''')
46
+ tg = st.slider('Triglycerides(TG) Cholesterol', min_value=0.0, max_value=20.0, value=10.0, step=0.1)
47
+ st.write(''' ''')
48
+ hdl = st.slider('HDL Cholesterol', min_value=0.0, max_value=20.0, value=10.0, step=0.1)
49
+ st.write(''' ''')
50
+ ldl = st.slider('LDL Cholesterol', min_value=0.0, max_value=20.0, value=10.0, step=0.1)
51
+ st.write(''' ''')
52
+ vldl = st.slider('VLDL Cholesterol', min_value=0.0, max_value=50.0, value=25.0, step=0.1)
53
+ st.write(''' ''')
54
+ bmi = st.slider('BMI', min_value=0.0, max_value=50.0, value=25.0, step=0.1)
55
+ st.write(''' ''')
56
+
57
+ selected_models = st.multiselect("Choose Classifier Models", ('Random Forest', 'Naïve Bayes', 'Logistic Regression', 'K-Nearest Neighbors', 'Decision Tree', 'Gradient Boosting', 'LightGBM', 'XGBoost', 'Multilayer Perceptron', 'Artificial Neural Network', 'Support Vector Machine'))
58
+ st.write(''' ''')
59
+
60
+ # Initialize an empty list to store the selected models
61
+ models_to_run = []
62
+
63
+ # Check which models were selected and add them to the models_to_run list
64
+ if 'Random Forest' in selected_models:
65
+ models_to_run.append(RandomForestClassifier())
66
+
67
+ if 'Naïve Bayes' in selected_models:
68
+ models_to_run.append(GaussianNB())
69
+
70
+ if 'Logistic Regression' in selected_models:
71
+ models_to_run.append(LogisticRegression())
72
+
73
+ if 'K-Nearest Neighbors' in selected_models:
74
+ models_to_run.append(KNeighborsClassifier())
75
+
76
+ if 'Decision Tree' in selected_models:
77
+ models_to_run.append(DecisionTreeClassifier())
78
+
79
+ if 'Gradient Boosting' in selected_models:
80
+ models_to_run.append(GradientBoostingClassifier())
81
+
82
+ if 'Support Vector Machine' in selected_models:
83
+ models_to_run.append(SVC(probability=True))
84
+
85
+ if 'LightGBM' in selected_models:
86
+ models_to_run.append(LGBMClassifier())
87
+
88
+ if 'XGBoost' in selected_models:
89
+ models_to_run.append(XGBClassifier())
90
+
91
+ if 'Multilayer Perceptron' in selected_models:
92
+ models_to_run.append(MLPClassifier())
93
+
94
+ if 'Artificial Neural Network' in selected_models:
95
+ models_to_run.append(MLPClassifier(hidden_layer_sizes=(100,), max_iter=100))
96
+
97
+
98
+
99
+ user_input = np.array([age, gender, urea, cr, hb, chol, tg, hdl, vldl,
100
+ ldl, bmi]).reshape(1, -1)
101
+
102
+ # import dataset
103
+ def get_dataset():
104
+ data = pd.read_csv('Diabetes.csv')
105
+ # Transforming class into numerical format
106
+ data['CLASS'] = data['CLASS'].apply(lambda x: 0 if x == 'N' else 1)
107
+
108
+ # Transforming Gender into numerical format
109
+ data['Gender'] = data['Gender'].apply(lambda x: 1 if x == 'M' else 0)
110
+
111
+ # Calculate the correlation matrix
112
+ # corr_matrix = data.corr()
113
+
114
+ # Create a heatmap of the correlation matrix
115
+ # plt.figure(figsize=(10, 8))
116
+ # sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')
117
+ # plt.title('Correlation Matrix')
118
+ # plt.xticks(rotation=45)
119
+ # plt.yticks(rotation=0)
120
+ # plt.tight_layout()
121
+
122
+ # Display the heatmap in Streamlit
123
+ # st.pyplot()
124
+
125
+ return data
126
+
127
+ def generate_model_labels(model_names):
128
+ model_labels = []
129
+ for name in model_names:
130
+ words = name.split()
131
+ if len(words) > 1:
132
+ # Multiple words, use initials
133
+ label = "".join(word[0] for word in words)
134
+ else:
135
+ # Single word, take the first 3 letters
136
+ label = name[:3]
137
+ model_labels.append(label)
138
+ return model_labels
139
+
140
+ if st.button('Submit'):
141
+ df = get_dataset()
142
+
143
+ # fix column names
144
+ df.columns = (["id", "pation_no", "gender", "age", "urea", "cr",
145
+ "hb", "chol", "tg", "hdl", "ldl",
146
+ "vldl", "bmi", "target"])
147
+
148
+ # Split the dataset into train and test
149
+ X = df.drop(['target','id','pation_no'], axis=1)
150
+ y = df['target']
151
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
152
+
153
+ # Create two columns to divide the screen
154
+ left_column, right_column = st.columns(2)
155
+
156
+
157
+ # Left column content
158
+ with left_column:
159
+ # Create a VotingClassifier with the top 3 models
160
+ ensemble = VotingClassifier(
161
+ estimators=[('rf', RandomForestClassifier()), ('xgb', XGBClassifier()), ('gb', LGBMClassifier())],
162
+ voting='soft')
163
+
164
+ # Fit the voting classifier to the training data
165
+ ensemble.fit(X_train, y_train)
166
+
167
+ # Make predictions on the test set
168
+ model_predictions = ensemble.predict(user_input)
169
+ model_prob = ensemble.predict_proba(user_input)[:, 1]
170
+
171
+ # Evaluate the model's performance on the test set
172
+ ensamble_accuracy = accuracy_score(y_test, ensemble.predict(X_test))
173
+ ensamble_precision = precision_score(y_test, ensemble.predict(X_test))
174
+ ensamble_recall = recall_score(y_test, ensemble.predict(X_test))
175
+ ensamble_f1score = f1_score(y_test, ensemble.predict(X_test))
176
+
177
+ if model_predictions == 1:
178
+ st.write(f'According to Ensemble Model You have a **Very High Chance (1)** of Diabetes.')
179
+ st.write(f'Diabetes Probability: ', (model_prob* 100))
180
+
181
+ else:
182
+ st.write(f'According to Ensemble Model You have a **Very Low Chance (0)** of Diabetes.')
183
+ st.write(f'Diabetes Probability: ', (model_prob* 100))
184
+
185
+ st.write('Ensemble Model Accuracy:', ensamble_accuracy)
186
+ st.write('Ensemble Model Precision:', ensamble_precision)
187
+ st.write('Ensemble Model Recall:', ensamble_recall)
188
+ st.write('Ensemble Model F1 Score:', ensamble_f1score)
189
+ st.write('------------------------------------------------------------------------------------------------------')
190
+
191
+
192
+ # Right column content
193
+ with right_column:
194
+
195
+ for model in models_to_run:
196
+ # Train the selected model
197
+ model.fit(X_train, y_train)
198
+
199
+ # Make predictions on the test set
200
+ model_predictions = model.predict(user_input)
201
+ model_prob = model.predict_proba(user_input)[:, 1]
202
+
203
+ # Evaluate the model's performance on the test set
204
+ model_accuracy = accuracy_score(y_test, model.predict(X_test))
205
+ model_precision = precision_score(y_test, model.predict(X_test))
206
+ model_recall = recall_score(y_test, model.predict(X_test))
207
+ model_f1score = f1_score(y_test, model.predict(X_test))
208
+
209
+ if model_predictions == 1:
210
+ st.write(f'According to {type(model).__name__} Model You have a **Very High Chance (1)** of Diabetes.')
211
+ st.write(f'Diabetes Probability: ', (model_prob* 100))
212
+
213
+ else:
214
+ st.write(f'According to {type(model).__name__} Model You have a **Very Low Chance (0)** of Diabetes.')
215
+ st.write(f'Diabetes Probability: ', (model_prob* 100))
216
+
217
+ st.write(f'{type(model).__name__} Accuracy:', model_accuracy)
218
+ st.write(f'{type(model).__name__} Precision:', model_precision)
219
+ st.write(f'{type(model).__name__} Recall:', model_recall)
220
+ st.write(f'{type(model).__name__} F1 Score:', model_f1score)
221
+ st.write('------------------------------------------------------------------------------------------------------')
222
+
223
+ # Initialize lists to store model names and their respective performance metrics
224
+ model_names = ['Ensemble']
225
+ accuracies = [ensamble_accuracy]
226
+ precisions = [ensamble_precision]
227
+ recalls = [ensamble_recall]
228
+ f1_scores = [ensamble_f1score]
229
+
230
+ # Loop through the selected models to compute their performance metrics
231
+ for model in models_to_run:
232
+ model_names.append(type(model).__name__)
233
+ model.fit(X_train, y_train)
234
+ model_predictions = model.predict(X_test)
235
+ accuracies.append(accuracy_score(y_test, model_predictions))
236
+ precisions.append(precision_score(y_test, model_predictions))
237
+ recalls.append(recall_score(y_test, model_predictions))
238
+ f1_scores.append(f1_score(y_test, model_predictions))
239
+
240
+ # Create a DataFrame to store the performance metrics
241
+ metrics_df = pd.DataFrame({
242
+ 'Model': model_names,
243
+ 'Accuracy': accuracies,
244
+ 'Precision': precisions,
245
+ 'Recall': recalls,
246
+ 'F1 Score': f1_scores
247
+ })
248
+
249
+ # Get the model labels
250
+ model_labels = generate_model_labels(metrics_df['Model'])
251
+
252
+ # Plot the comparison graphs
253
+ plt.figure(figsize=(12, 10))
254
+
255
+ # Accuracy comparison
256
+ plt.subplot(2, 2, 1)
257
+ plt.bar(model_labels, metrics_df['Accuracy'], color='skyblue')
258
+ plt.title('Accuracy Comparison')
259
+ plt.ylim(0, 1)
260
+
261
+ # Precision comparison
262
+ plt.subplot(2, 2, 2)
263
+ plt.bar(model_labels, metrics_df['Precision'], color='orange')
264
+ plt.title('Precision Comparison')
265
+ plt.ylim(0, 1)
266
+
267
+ # Recall comparison
268
+ plt.subplot(2, 2, 3)
269
+ plt.bar(model_labels, metrics_df['Recall'], color='green')
270
+ plt.title('Recall Comparison')
271
+ plt.ylim(0, 1)
272
+
273
+ # F1 Score comparison
274
+ plt.subplot(2, 2, 4)
275
+ plt.bar(model_labels, metrics_df['F1 Score'], color='purple')
276
+ plt.title('F1 Score Comparison')
277
+ plt.ylim(0, 1)
278
+
279
+ # Adjust layout to prevent overlapping of titles
280
+ plt.tight_layout()
281
+
282
+ # Display the graphs in Streamlit
283
+ st.pyplot()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ lightgbm==4.0.0
2
+ matplotlib==3.7.2
3
+ numpy==1.25.1
4
+ pandas==2.0.3
5
+ scikit_learn==1.3.0
6
+ streamlit==1.25.0
7
+ xgboost==1.7.6