Evan Derin Ihsanudin commited on
Commit
d305ca4
1 Parent(s): 60c6114

GC3_Deployment_Evan_Derin_Ihsanudin

Browse files
Files changed (7) hide show
  1. app.py +12 -0
  2. eda.py +424 -0
  3. model_forest_opt.pkl +3 -0
  4. prediction.py +73 -0
  5. requirements.txt +6 -0
  6. scaler_final.pkl +3 -0
  7. windsorizer.pkl +3 -0
app.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import eda
3
+ import prediction
4
+
5
+ #Membuat navigasi
6
+ navigation = st.sidebar.selectbox('Pilih Halaman : ', ('Death Event Prediction','Exploratory Data Analysis'))
7
+
8
+ #Run modul dengan if else
9
+ if navigation == 'Death Event Prediction' :
10
+ prediction.run()
11
+ else :
12
+ eda.run()
eda.py ADDED
@@ -0,0 +1,424 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import seaborn as sns
4
+ import matplotlib.pyplot as plt
5
+ from PIL import Image
6
+
7
+ #Set Config
8
+ st.set_page_config(
9
+ page_title='Heart Failure - EDA',
10
+ layout='wide',
11
+ initial_sidebar_state='expanded'
12
+ )
13
+
14
+ def run() :
15
+ # Membuat Title
16
+ st.markdown("<h1 style='text-align: center; color: black;'>Exploratory Data Analysis</h1>", unsafe_allow_html=True)
17
+ st.write('Berikut adalah EDA dari setiap feature')
18
+
19
+ # Import DF
20
+ df_eda = pd.read_csv('eda.csv')
21
+
22
+ # Membuat Sub Header
23
+ st.subheader('**EDA Feature Age**')
24
+ st.write('1. Range `age` pasien dengan *deceased* yang terbanyak')
25
+ st.write('2. Melihat pengaruh `age` terhadap klasifikasi `DEATH_EVENT`')
26
+
27
+ col1, col2 = st.columns(2)
28
+ #EDA 1
29
+ fig= plt.figure(figsize=(15,6))
30
+ ax = sns.countplot(data = df_eda, x = 'AgeBin', hue="DEATH_EVENT", palette = 'winter', order = ['(40, 50]', '(50, 60]', '(60, 70]', '(70, 80]', '(80, 90]', '(90, 100]'])
31
+ plt.title('Distribusi Age', fontsize=18, fontweight='bold')
32
+ plt.xlabel("Range Age", fontsize= 12)
33
+ plt.ylabel("# of Patient", fontsize= 12)
34
+ ax.tick_params(axis="x", labelsize= 9.5)
35
+ plt.legend(fontsize=10,title='Klasifikasi Death Event', loc='upper right', labels=['Not Deceased', 'Deceased'])
36
+ for p in ax.patches:
37
+ ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
38
+ p.get_height()+1), ha='center', va='center',fontsize = 11)
39
+ col1.pyplot(fig)
40
+
41
+ #EDA 2
42
+ fig= plt.figure(figsize=(15,6))
43
+ ax = sns.barplot(x = 'AgeBin', y = 'DEATH_EVENT', data = df_eda, palette = 'winter', order = ['(40, 50]', '(50, 60]', '(60, 70]', '(70, 80]', '(80, 90]', '(90, 100]'], errorbar=None)
44
+ plt.xlabel("Range Age", fontsize= 12)
45
+ plt.ylabel("% Deceased", fontsize= 12)
46
+ plt.title('% Deceased berdasarkan Age', fontsize=18, fontweight='bold')
47
+ plt.ylim(0,1.3)
48
+ for p in ax.patches:
49
+ ax.annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
50
+ p.get_height()+0.019), ha='center', va='center',fontsize = 11)
51
+ col2.pyplot(fig)
52
+
53
+
54
+ # Membuat Sub Header
55
+ st.subheader('**EDA Feature `creatinine_phosphokinase`**')
56
+ st.write('1. Range `creatinine_phosphokinase` dengan *deceased* yang terbanyak')
57
+ st.write('2. Melihat pengaruh `creatinine_phosphokinase` terhadap klasifikasi `DEATH_EVENT`')
58
+ col1, col2 = st.columns(2)
59
+
60
+ #EDA 1
61
+ fig= plt.figure(figsize=(15,6))
62
+ ax = sns.countplot(data = df_eda, x = 'CPBin', hue="DEATH_EVENT", palette = 'winter', order = ['(0, 200]', '(200, 400]', '(400, 600]', '(600, 800]', '(800, 1000]', '(1000, 2000]', '(2000, 5000]', '(5000, 7000]', '(7000, 9000]'])
63
+ plt.title('Distribusi Creatinine Phosphokinase', fontsize=18, fontweight='bold')
64
+ plt.xlabel("Range Creatinine Phosphokinase (mcg/L)", fontsize= 12)
65
+ plt.ylabel("# of Patient", fontsize= 12)
66
+ ax.tick_params(axis="x", labelsize= 9.5)
67
+ plt.legend(fontsize=10,title='Klasifikasi Death Event', loc='upper right', labels=['Not Deceased', 'Deceased'])
68
+ for p in ax.patches:
69
+ ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
70
+ p.get_height()+2), ha='center', va='center',fontsize = 11)
71
+ plt.ylim(0,100)
72
+ col1.pyplot(fig)
73
+
74
+ #EDA 2
75
+ fig= plt.figure(figsize=(15,6))
76
+ ax = sns.barplot(x = 'CPBin', y = 'DEATH_EVENT', data = df_eda, palette = 'winter', order = ['(0, 200]', '(200, 400]', '(400, 600]', '(600, 800]', '(800, 1000]', '(1000, 2000]', '(2000, 5000]', '(5000, 7000]', '(7000, 9000]'], errorbar=None)
77
+ plt.xlabel("Range Creatinine Phosphokinase (mcg/L)", fontsize= 12)
78
+ plt.ylabel("% Deceased", fontsize= 12)
79
+ plt.title('% Deceased berdasarkan Creatinine Phosphokinase', fontsize=18, fontweight='bold')
80
+ plt.ylim(0,1.1)
81
+ for p in ax.patches:
82
+ ax.annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
83
+ p.get_height()+0.019), ha='center', va='center',fontsize = 11)
84
+ col2.pyplot(fig)
85
+
86
+ # Membuat Sub Header
87
+ st.subheader('**EDA Feature `ejection_fraction`**')
88
+ st.write('1. Range `ejection_fraction` dengan *deceased* yang terbanyak')
89
+ st.write('2. Melihat pengaruh `ejection_fraction` terhadap klasifikasi `DEATH_EVENT`')
90
+ col1, col2 = st.columns(2)
91
+
92
+ #EDA 1
93
+ fig= plt.figure(figsize=(15,6))
94
+ ax = sns.countplot(data = df_eda, x = 'EfBin', hue="DEATH_EVENT", palette = 'winter', order = ['(10, 20]', '(20, 30]', '(30, 40]', '(40, 50]', '(50, 60]', '(60, 70]', '(70, 80]'])
95
+ plt.title('Distribusi Ejection Fraction', fontsize=18, fontweight='bold')
96
+ plt.xlabel("Range Ejection Fraction (%)", fontsize= 12)
97
+ plt.ylabel("# of Patient", fontsize= 12)
98
+ ax.tick_params(axis="x", labelsize= 9.5)
99
+ plt.legend(fontsize=10,title='Klasifikasi Death Event', loc='upper right', labels=['Not Deceased', 'Deceased'])
100
+ for p in ax.patches:
101
+ ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
102
+ p.get_height()+2), ha='center', va='center',fontsize = 11)
103
+ plt.ylim(0,110)
104
+ col1.pyplot(fig)
105
+
106
+ #EDA 2
107
+ fig= plt.figure(figsize=(15,6))
108
+ ax = sns.barplot(x = 'EfBin', y = 'DEATH_EVENT', data = df_eda, palette = 'winter', order = ['(10, 20]', '(20, 30]', '(30, 40]', '(40, 50]', '(50, 60]', '(60, 70]', '(70, 80]'], errorbar=None)
109
+
110
+ plt.xlabel("Range Ejection Fraction (%)", fontsize= 12)
111
+ plt.ylabel("% Deceased", fontsize= 12)
112
+ plt.title('% Deceased berdasarkan Ejection Fraction', fontsize=18, fontweight='bold')
113
+ plt.ylim(0,1)
114
+ for p in ax.patches:
115
+ ax.annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
116
+ p.get_height()+0.019), ha='center', va='center',fontsize = 11)
117
+ col2.pyplot(fig)
118
+
119
+ # Membuat Sub Header
120
+ st.subheader('**EDA Feature `platelets`**')
121
+ st.write('1. Range `platelets` dengan *deceased* yang terbanyak')
122
+ st.write('2. Melihat pengaruh `platelets` terhadap klasifikasi `DEATH_EVENT`')
123
+ col1, col2 = st.columns(2)
124
+
125
+ #EDA 1
126
+ fig= plt.figure(figsize=(15,6))
127
+ ax = sns.countplot(data = df_eda, x = 'PlatBin', hue="DEATH_EVENT", palette = 'winter', order = ['(200000, 300000]', '(300000, 400000]', '(400000, 500000]', '(500000, 600000]', '(600000, 700000]', '(700000, 800000]', '(800000, 900000]'])
128
+ plt.title('Distribusi Platelets', fontsize=18, fontweight='bold')
129
+ plt.xlabel("Range Platelets (kp/mL)", fontsize= 12)
130
+ plt.ylabel("# of Patient", fontsize= 12)
131
+ ax.tick_params(axis="x", labelsize= 9.5)
132
+ plt.legend(fontsize=10,title='Klasifikasi Death Event', loc='upper right', labels=['Not Deceased', 'Deceased'])
133
+ for p in ax.patches:
134
+ ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
135
+ p.get_height()+3), ha='center', va='center',fontsize = 11)
136
+ plt.ylim(0,140)
137
+ col1.pyplot(fig)
138
+
139
+ #EDA 2
140
+ fig= plt.figure(figsize=(15,6))
141
+ ax = sns.barplot(x = 'PlatBin', y = 'DEATH_EVENT', data = df_eda, palette = 'winter', order = ['(200000, 300000]', '(300000, 400000]', '(400000, 500000]', '(500000, 600000]', '(600000, 700000]', '(700000, 800000]', '(800000, 900000]'], errorbar=None)
142
+ plt.xlabel("Range Platelets (kp/mL)", fontsize= 12)
143
+ plt.ylabel("% Deceased", fontsize= 12)
144
+ plt.title('% Deceased berdasarkan Platelets', fontsize=18, fontweight='bold')
145
+ plt.ylim(0,1.3)
146
+ for p in ax.patches:
147
+ ax.annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
148
+ p.get_height()+0.019), ha='center', va='center',fontsize = 11)
149
+ col2.pyplot(fig)
150
+
151
+
152
+ # Membuat Sub Header
153
+ st.subheader('**EDA Feature `serum_creatinine`**')
154
+ st.write('1. Range `serum_creatinine` pasien dengan *deceased* terbanyak')
155
+ st.write('1. Melihat pengaruh `serum_creatinine` terhadap klasifikasi `DEATH_EVENT`')
156
+ col1, col2 = st.columns(2)
157
+
158
+ #EDA 1
159
+ fig= plt.figure(figsize=(15,6))
160
+ ax = sns.countplot(data = df_eda, x = 'SCBin', hue="DEATH_EVENT", palette = 'winter', order = ['(0, 2]', '(2, 4]', '(4, 6]', '(6, 8]', '(8, 10]'])
161
+ plt.title('Distribusi Serum Creatinine', fontsize=18, fontweight='bold')
162
+ plt.xlabel("Range Serum Creatinine (mg/dL)", fontsize= 12)
163
+ plt.ylabel("# of Patient", fontsize= 12)
164
+ ax.tick_params(axis="x", labelsize= 9.5)
165
+ plt.legend(fontsize=10,title='Klasifikasi Death Event', loc='upper right', labels=['Not Deceased', 'Deceased'])
166
+ for p in ax.patches:
167
+ ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
168
+ p.get_height()+3), ha='center', va='center',fontsize = 11)
169
+ plt.ylim(0,220)
170
+ col1.pyplot(fig)
171
+
172
+ #EDA 2
173
+ fig= plt.figure(figsize=(15,6))
174
+ ax = sns.barplot(x = 'SCBin', y = 'DEATH_EVENT', data = df_eda, palette = 'winter', order = ['(0, 2]', '(2, 4]', '(4, 6]', '(6, 8]', '(8, 10]'], errorbar=None)
175
+ plt.xlabel("Range Serum Creatinine (mg/dL)", fontsize= 12)
176
+ plt.ylabel("% Deceased", fontsize= 12)
177
+ plt.title('% Deceased berdasarkan Serum Creatinine', fontsize=18, fontweight='bold')
178
+ plt.ylim(0,1.1)
179
+ for p in ax.patches:
180
+ ax.annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
181
+ p.get_height()+0.019), ha='center', va='center',fontsize = 11)
182
+ col2.pyplot(fig)
183
+
184
+ # Membuat Sub Header EDA
185
+ st.subheader('**EDA Feature `serum_sodium`**')
186
+ st.write('1. Range `serum_sodium` dengan *deceased* yang terbanyak')
187
+ st.write('2. Melihat pengaruh `serum_sodium` terhadap klasifikasi `DEATH_EVENT`')
188
+ col1, col2 = st.columns(2)
189
+
190
+ #EDA 1
191
+ fig= plt.figure(figsize=(15,6))
192
+ ax = sns.countplot(data = df_eda, x = 'SSBin', hue="DEATH_EVENT", palette = 'winter', order = ['(110, 120]', '(120, 130]', '(130, 140]'])
193
+ plt.title('Distribusi Serum Sodium', fontsize=18, fontweight='bold')
194
+ plt.xlabel("Range Serum Sodium (mEq/L)", fontsize= 12)
195
+ plt.ylabel("# of Patient", fontsize= 12)
196
+ ax.tick_params(axis="x", labelsize= 9.5)
197
+ plt.legend(fontsize=10,title='Klasifikasi Death Event', loc='upper right', labels=['Not Deceased', 'Deceased'])
198
+ for p in ax.patches:
199
+ ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
200
+ p.get_height()+3), ha='center', va='center',fontsize = 11)
201
+ plt.ylim(0,175)
202
+ col1.pyplot(fig)
203
+
204
+ #EDA 2
205
+ fig= plt.figure(figsize=(15,6))
206
+ ax = sns.barplot(x = 'SSBin', y = 'DEATH_EVENT', data = df_eda, palette = 'winter', order = ['(110, 120]', '(120, 130]', '(130, 140]'], errorbar=None)
207
+ plt.xlabel("Range Serum Sodium (mEq/L)", fontsize= 12)
208
+ plt.ylabel("% Deceased", fontsize= 12)
209
+ plt.title('% Deceased berdasarkan Serum Sodium', fontsize=18, fontweight='bold')
210
+ plt.ylim(0,0.7)
211
+ for p in ax.patches:
212
+ ax.annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
213
+ p.get_height()+0.019), ha='center', va='center',fontsize = 11)
214
+
215
+ col2.pyplot(fig)
216
+
217
+ # Membuat Sub Header EDA
218
+ st.subheader('**EDA Feature `time`**')
219
+ st.write('1. Range `time` follow up dengan *deceased* yang terbanyak')
220
+ st.write('2. Melihat pengaruh `time` terhadap klasifikasi `DEATH_EVENT`')
221
+ col1, col2 = st.columns(2)
222
+
223
+ #EDA 1
224
+ fig= plt.figure(figsize=(15,6))
225
+ ax = sns.countplot(data = df_eda, x = 'TimeBin', hue="DEATH_EVENT", palette = 'winter', order = ['(0, 50]', '(50, 100]', '(100, 150]', '(150, 200]', '(200, 250]', '(250, 300]'])
226
+ plt.title('Distribusi Time Follow Up', fontsize=18, fontweight='bold')
227
+ plt.xlabel("Range Time Follow Up (days)", fontsize= 12)
228
+ plt.ylabel("# of Patient", fontsize= 12)
229
+ ax.tick_params(axis="x", labelsize= 9.5)
230
+ plt.legend(fontsize=10,title='Klasifikasi Death Event', loc='upper right', labels=['Not Deceased', 'Deceased'])
231
+ for p in ax.patches:
232
+ ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
233
+ p.get_height()+2), ha='center', va='center',fontsize = 11)
234
+ plt.ylim(0,80)
235
+ col1.pyplot(fig)
236
+
237
+ #EDA 2
238
+ fig= plt.figure(figsize=(15,6))
239
+ ax = sns.barplot(x = 'TimeBin', y = 'DEATH_EVENT', data = df_eda, palette = 'winter', order = ['(0, 50]', '(50, 100]', '(100, 150]', '(150, 200]', '(200, 250]', '(250, 300]'], errorbar=None)
240
+ plt.xlabel("Range Time Follow Up (days)", fontsize= 12)
241
+ plt.ylabel("% Deceased", fontsize= 12)
242
+ plt.title('% Deceased berdasarkan Time Follow Up', fontsize=18, fontweight='bold')
243
+ plt.ylim(0,1)
244
+ for p in ax.patches:
245
+ ax.annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
246
+ p.get_height()+0.019), ha='center', va='center',fontsize = 11)
247
+ col2.pyplot(fig)
248
+
249
+ # Membuat Sub Header EDA
250
+ st.subheader('**EDA Feature `DEATH_EVENT`**')
251
+ st.write('1. Balance dataset antara *class no deceased* dan *class deceased*')
252
+
253
+ #EDA 1
254
+ fig= plt.figure(figsize=(10,3))
255
+ ax = sns.countplot(x='DEATH_EVENT', data=df_eda, palette="winter")
256
+ plt.xlabel("Death Status", fontsize= 12)
257
+ plt.ylabel("# of Patient", fontsize= 12)
258
+ plt.xticks([0,1], ['Not Deceased', 'Deceased'], fontsize = 11)
259
+ plt.title('Not Deceased vs Deceased', fontsize=18, fontweight='bold')
260
+ for p in ax.patches:
261
+ ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
262
+ p.get_height()+5), ha='center', va='center',fontsize = 11)
263
+ plt.ylim(0,230)
264
+ st.pyplot(fig)
265
+
266
+ # Membuat Sub Header EDA
267
+ st.subheader('**EDA Feature `anaemia`**')
268
+ st.write('1. Melihat pengaruh `anaemia` terhadap klasifikasi `DEATH_EVENT`')
269
+ col1, col2 = st.columns(2)
270
+
271
+ #EDA 1
272
+ fig= plt.figure(figsize=(15,6))
273
+ ax = sns.countplot(data = df_eda, x = 'anaemia', hue="DEATH_EVENT", palette = 'winter')
274
+ plt.xlabel("Anaemia", fontsize= 12)
275
+ plt.ylabel("# of Patient", fontsize= 12)
276
+ plt.xticks([0,1],['No', 'Yes'], fontsize = 11)
277
+ plt.title('Anaemia vs Death Event', fontsize=18, fontweight='bold')
278
+ plt.legend(fontsize=10,title='Deceased', loc='upper right', labels=['Not Deceased', 'Deceased'])
279
+ for p in ax.patches:
280
+ ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
281
+ p.get_height()+3), ha='center', va='center',fontsize = 11)
282
+ plt.ylim(0,140)
283
+ col1.pyplot(fig)
284
+
285
+ #EDA 2
286
+ fig= plt.figure(figsize=(15,6))
287
+ ax = sns.barplot(x = "anaemia", y = "DEATH_EVENT", data = df_eda, palette = 'winter', errorbar= None)
288
+ plt.ylabel("% Deceased", fontsize= 14)
289
+ plt.xlabel("Anaemia", fontsize= 14)
290
+ plt.ylim(0,0.5)
291
+ plt.xticks([0,1],['No', 'Yes'], fontsize = 14)
292
+ plt.title('% Deceased vs Anaemia', fontsize=18, fontweight='bold')
293
+ for p in ax.patches:
294
+ ax.annotate("%.2f" %(p.get_height()), (p.get_x()+0.35, p.get_height()+0.01),fontsize=13)
295
+ col2.pyplot(fig)
296
+
297
+
298
+ # Membuat Sub Header EDA
299
+ st.subheader('**EDA Feature `diabetes`**')
300
+ st.write('1. Melihat pengaruh `diabetes` terhadap klasifikasi `DEATH_EVENT`')
301
+ col1, col2 = st.columns(2)
302
+
303
+ #EDA 1
304
+ fig= plt.figure(figsize=(15,6))
305
+ ax = sns.countplot(data = df_eda, x = 'diabetes', hue="DEATH_EVENT", palette = 'winter')
306
+ plt.xlabel("Diabetes", fontsize= 12)
307
+ plt.ylabel("# of Patient", fontsize= 12)
308
+ plt.xticks([0,1],['No', 'Yes'], fontsize = 11)
309
+ plt.title('Diabetes vs Death Event', fontsize=18, fontweight='bold')
310
+ plt.legend(fontsize=10,title='Deceased', loc='upper right', labels=['Not Deceased', 'Deceased'])
311
+ for p in ax.patches:
312
+ ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
313
+ p.get_height()+3), ha='center', va='center',fontsize = 11)
314
+ plt.ylim(0,140)
315
+ col1.pyplot(fig)
316
+
317
+ #EDA 2
318
+ fig= plt.figure(figsize=(15,6))
319
+ ax = sns.barplot(x = "diabetes", y = "DEATH_EVENT", data = df_eda, palette = 'winter', errorbar= None)
320
+ plt.ylabel("% Deceased", fontsize= 14)
321
+ plt.xlabel("Diabetes", fontsize= 14)
322
+ plt.ylim(0,0.5)
323
+ plt.xticks([0,1],['No', 'Yes'], fontsize = 14)
324
+ plt.title('% Deceased vs Diabetes', fontsize=18, fontweight='bold')
325
+ for p in ax.patches:
326
+ ax.annotate("%.2f" %(p.get_height()), (p.get_x()+0.35, p.get_height()+0.01),fontsize=13)
327
+ col2.pyplot(fig)
328
+
329
+ # Membuat Sub Header EDA
330
+ st.subheader('**EDA Feature `high_blood_pressure`**')
331
+ st.write('1. Melihat pengaruh `high_blood_pressure` terhadap klasifikasi `DEATH_EVENT`')
332
+ col1, col2 = st.columns(2)
333
+
334
+ #EDA 1
335
+ fig= plt.figure(figsize=(15,6))
336
+ ax = sns.countplot(data = df_eda, x = 'high_blood_pressure', hue="DEATH_EVENT", palette = 'winter')
337
+ plt.xlabel("Hypertension", fontsize= 12)
338
+ plt.ylabel("# of Patient", fontsize= 12)
339
+ plt.xticks([0,1],['No', 'Yes'], fontsize = 11)
340
+ plt.title('Hypertension vs Death Event', fontsize=18, fontweight='bold')
341
+ plt.legend(fontsize=10,title='Deceased', loc='upper right', labels=['Not Deceased', 'Deceased'])
342
+ for p in ax.patches:
343
+ ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
344
+ p.get_height()+3), ha='center', va='center',fontsize = 11)
345
+ plt.ylim(0,160)
346
+ col1.pyplot(fig)
347
+
348
+ #EDA 2
349
+ fig= plt.figure(figsize=(15,6))
350
+ ax = sns.barplot(x = "high_blood_pressure", y = "DEATH_EVENT", data = df_eda, palette = 'winter', errorbar= None)
351
+ plt.ylabel("% Deceased", fontsize= 14)
352
+ plt.xlabel("Hypertension", fontsize= 14)
353
+ plt.ylim(0,0.5)
354
+ plt.xticks([0,1],['No', 'Yes'], fontsize = 14)
355
+ plt.title('% Deceased vs Hypertension', fontsize=18, fontweight='bold')
356
+ for p in ax.patches:
357
+ ax.annotate("%.2f" %(p.get_height()), (p.get_x()+0.35, p.get_height()+0.01),fontsize=13)
358
+ col2.pyplot(fig)
359
+
360
+ # Membuat Sub Header EDA
361
+ st.subheader('**EDA Feature `sex`**')
362
+ st.write('1. Melihat pengaruh `sex` terhadap klasifikasi `DEATH_EVENT`')
363
+ col1, col2 = st.columns(2)
364
+
365
+ #EDA 1
366
+ fig= plt.figure(figsize=(15,6))
367
+ ax = sns.countplot(data = df_eda, x = 'sex', hue="DEATH_EVENT", palette = 'winter')
368
+ plt.xlabel("Gender", fontsize= 12)
369
+ plt.ylabel("# of Patient", fontsize= 12)
370
+ plt.xticks([0,1],['Woman', 'Men'], fontsize = 11)
371
+ plt.title('Gender vs Death Event', fontsize=18, fontweight='bold')
372
+ plt.legend(fontsize=10,title='Deceased', loc='upper right', labels=['Not Deceased', 'Deceased'])
373
+ for p in ax.patches:
374
+ ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
375
+ p.get_height()+3), ha='center', va='center',fontsize = 11)
376
+ plt.ylim(0,140)
377
+ col1.pyplot(fig)
378
+
379
+ #EDA 2
380
+ fig= plt.figure(figsize=(15,6))
381
+ ax = sns.barplot(x = "sex", y = "DEATH_EVENT", data = df_eda, palette = 'winter', errorbar= None)
382
+ plt.ylabel("% Deceased", fontsize= 14)
383
+ plt.xlabel("Gender", fontsize= 14)
384
+ plt.ylim(0,0.5)
385
+ plt.xticks([0,1],['Woman', 'Man'], fontsize = 14)
386
+ plt.title('% Deceased vs Gender', fontsize=18, fontweight='bold')
387
+ for p in ax.patches:
388
+ ax.annotate("%.2f" %(p.get_height()), (p.get_x()+0.35, p.get_height()+0.01),fontsize=13)
389
+ col2.pyplot(fig)
390
+
391
+ # Membuat Sub Header EDA
392
+ st.subheader('**EDA Feature `smoking`**')
393
+ st.write('1. Melihat pengaruh `smoking` terhadap klasifikasi `DEATH_EVENT`')
394
+ col1, col2 = st.columns(2)
395
+
396
+ #EDA 1
397
+ fig= plt.figure(figsize=(15,6))
398
+ ax = sns.countplot(data = df_eda, x = 'smoking', hue="DEATH_EVENT", palette = 'winter')
399
+ plt.xlabel("Smoking", fontsize= 12)
400
+ plt.ylabel("# of Patient", fontsize= 12)
401
+ plt.xticks([0,1],['No', 'Yes'], fontsize = 11)
402
+ plt.title('Smoking vs Death Event', fontsize=18, fontweight='bold')
403
+ plt.legend(fontsize=10,title='Deceased', loc='upper right', labels=['Not Deceased', 'Deceased'])
404
+ for p in ax.patches:
405
+ ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
406
+ p.get_height()+3), ha='center', va='center',fontsize = 11)
407
+ plt.ylim(0,160)
408
+ col1.pyplot(fig)
409
+
410
+ #EDA 2
411
+ fig= plt.figure(figsize=(15,6))
412
+ ax = sns.barplot(x = "smoking", y = "DEATH_EVENT", data = df_eda, palette = 'winter', errorbar= None)
413
+ plt.ylabel("% Deceased", fontsize= 14)
414
+ plt.xlabel("Smoking", fontsize= 14)
415
+ plt.ylim(0,0.5)
416
+ plt.xticks([0,1],['No', 'Yes'], fontsize = 14)
417
+ plt.title('% Deceased vs Smoking', fontsize=18, fontweight='bold')
418
+ for p in ax.patches:
419
+ ax.annotate("%.2f" %(p.get_height()), (p.get_x()+0.35, p.get_height()+0.01),fontsize=13)
420
+
421
+ col2.pyplot(fig)
422
+
423
+ if __name__ == '__main__':
424
+ run()
model_forest_opt.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a925f02e2d0276f0cdfb2f39223ecc339cc977fc8fd17c4c1ea490a9a62b573
3
+ size 146064
prediction.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import pickle
5
+
6
+ # Load All Files
7
+ with open('windsorizer.pkl', 'rb') as file_1:
8
+ winsorizer_inf = pickle.load(file_1)
9
+
10
+ with open('scaler_final.pkl', 'rb') as file_2:
11
+ scaler_final = pickle.load(file_2)
12
+
13
+ with open('model_forest_opt.pkl', 'rb') as file_3:
14
+ model_forest_opt = pickle.load(file_3)
15
+
16
+
17
+ def run() :
18
+ # Membuat Title
19
+ st.markdown("<h1 style='text-align: center; color: black;'>Death Event Prediction</h1>", unsafe_allow_html=True)
20
+
21
+ # Menambahkan Deskripsi
22
+ st.write('Page ini berisi model untuk memprediksi *Death Event* (potensi meninggalnya pasien) dari pasien')
23
+ st.write('Mohon persiapkan **data Age, Creatinine Phosphokinase (mcg/L), Ejection Fraction (%), Platelets (kp/L), Serum Creatinine (mg/dl), Serum Sodium (mEq/L) dan Time (days) sesuai satuan yang tertera sebelum melakukan prediksi**')
24
+
25
+ #Membuat Form
26
+ with st.form(key= 'form_health_parameter'):
27
+ #Deklasrasi input
28
+ age = st.number_input('Age', min_value=40, max_value=100, value=40 ,step=1, help='Usia Pasien')
29
+ creatinine_phosphokinase = st.number_input('Creatinine Phospokinase (mcg/L)', min_value=20, max_value=8000, value=250 ,step=1, help='Kadar creatine kinase dalam darah (mcg/L)')
30
+ ejection_fraction = st.slider('Ejection Fraction (%)', min_value=10, max_value=100, value=40 ,step=1, help='Kemampuan jantung untuk memompa darah dengan oksigen ke seluruh tubuh (%)')
31
+ platelets = st.number_input('Platelets (kp/L)', min_value=25000, max_value=850000, value=250000 ,step=1, help='Jumlah trombosit pada darah (kp/L)')
32
+ serum_creatinine = st.number_input('Serum Creatinine (mg/dl)', min_value=1, max_value=10, value=1 ,step=1, help='Kadar Kreatinin (Zat Limbah) pada darah (mg/dl)')
33
+ serum_sodium = st.slider('Serum Sodium (mEq/L)', min_value=100, max_value=150, value=130 ,step=1, help='Kadar natrium serum pada darah (mEq/L)')
34
+ time = st.number_input('Time Follow Up (days)', min_value=1, max_value=300, value=100 ,step=1, help='Waktu Follow Up Pasien (days)')
35
+ #Membuat Button
36
+ submitted = st.form_submit_button('Predict')
37
+
38
+ #Membuat Data Inference
39
+ data_inf = {
40
+ 'age': age,
41
+ 'creatinine_phosphokinase': creatinine_phosphokinase,
42
+ 'ejection_fraction': ejection_fraction,
43
+ 'platelets': platelets,
44
+ 'serum_creatinine': serum_creatinine,
45
+ 'serum_sodium': serum_sodium,
46
+ 'time': time
47
+ }
48
+
49
+ #Membuat Dataframe
50
+ data_inf = pd.DataFrame([data_inf])
51
+ data_inf
52
+
53
+ #Prediksi
54
+ if submitted :
55
+ #Windsoriser pada data inferencing dengan modul windsoriser
56
+ data_inf = winsorizer_inf.transform(data_inf)
57
+
58
+ #Feature Scaling
59
+ data_inf_final = scaler_final.transform(data_inf)
60
+
61
+ #Predict using Random Forest Parameter Tuning
62
+ y_pred_inf = model_forest_opt.predict(data_inf_final)
63
+
64
+ #Print hasil prediksi
65
+ if y_pred_inf == 1:
66
+ prediction = 'Deceased (Class 1)'
67
+ else:
68
+ prediction = 'Not Deceasead (Class 0)'
69
+
70
+ st.write('# Death Event : ', prediction)
71
+
72
+ if __name__ == '__main__':
73
+ run()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ seaborn
4
+ matplotlib
5
+ scikit-learn == 1.1.3
6
+ numpy
scaler_final.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4a53a10c9ffcb9a36702e63ae8c960885f785dc238a85ae8f734d20d98fc802
3
+ size 698
windsorizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1fb9553f7240b00c14b34cd1339b813ff7dc96d723b8fbe870c3137753e40a7
3
+ size 480