Upload 2 files
Browse files- eda.py +21 -2
- prediction.py +1 -1
eda.py
CHANGED
@@ -3,6 +3,9 @@ import pandas as pd
|
|
3 |
import seaborn as sns
|
4 |
import matplotlib.pyplot as plt
|
5 |
import plotly.express as px
|
|
|
|
|
|
|
6 |
from PIL import Image
|
7 |
|
8 |
st.set_page_config(
|
@@ -86,7 +89,7 @@ def run():
|
|
86 |
st.pyplot(fig)
|
87 |
st.write('From the table and visualization above, it can be seen that the number of male patients with heart failure is more than female patients. **Where about 32% die during the follow-up period**. Further data exploration is necessary to find out the condition of male patients.')
|
88 |
|
89 |
-
#
|
90 |
st.write('## Comparison between Death Event with other variables ')
|
91 |
# Creating new dataframe for the histogram
|
92 |
sns.set(font_scale=1)
|
@@ -98,7 +101,23 @@ def run():
|
|
98 |
g = sns.FacetGrid(f, hue=output, col="variable", col_wrap=4, sharex=False, sharey=False )
|
99 |
g = g.map(sns.histplot, "value", kde=True).add_legend()
|
100 |
st.pyplot(g)
|
101 |
-
st.write('Based on the histogram above, we can see that the distribution of Not Death is still dominating that Death. However, we should check wherer variable time looks different than the others, where Death is high with time between 0-100 days. From here we should check the skewness of time as well.')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
|
103 |
if __name__ == '__main__':
|
104 |
run()
|
|
|
3 |
import seaborn as sns
|
4 |
import matplotlib.pyplot as plt
|
5 |
import plotly.express as px
|
6 |
+
import sklearn
|
7 |
+
from sklearn.preprocessing import LabelEncoder
|
8 |
+
|
9 |
from PIL import Image
|
10 |
|
11 |
st.set_page_config(
|
|
|
89 |
st.pyplot(fig)
|
90 |
st.write('From the table and visualization above, it can be seen that the number of male patients with heart failure is more than female patients. **Where about 32% die during the follow-up period**. Further data exploration is necessary to find out the condition of male patients.')
|
91 |
|
92 |
+
# Comparison between Death Event with other variables
|
93 |
st.write('## Comparison between Death Event with other variables ')
|
94 |
# Creating new dataframe for the histogram
|
95 |
sns.set(font_scale=1)
|
|
|
101 |
g = sns.FacetGrid(f, hue=output, col="variable", col_wrap=4, sharex=False, sharey=False )
|
102 |
g = g.map(sns.histplot, "value", kde=True).add_legend()
|
103 |
st.pyplot(g)
|
104 |
+
st.write('Based on the histogram above, we can see that the distribution of **Not Death** is still dominating that Death. However, we should check wherer variable time looks different than the others, where Death is high with time between 0-100 days. From here we should check the skewness of time as well.')
|
105 |
+
|
106 |
+
# Using LabelEncoder to convert categorical into numerical data
|
107 |
+
st.write('## Correlation Matrix Analysis')
|
108 |
+
df_copy =df.copy()
|
109 |
+
categorical = ['anaemia','diabetes','high_blood_pressure','sex','smoking','DEATH_EVENT']
|
110 |
+
m_LabelEncoder = LabelEncoder()
|
111 |
+
|
112 |
+
for col in df_copy[categorical]:
|
113 |
+
df_copy[col]=m_LabelEncoder.fit_transform(df_copy[col])
|
114 |
+
|
115 |
+
# Plotting Correlation Matrix of Features and DEATH_EVENT
|
116 |
+
sns.set(font_scale=1)
|
117 |
+
fig = plt.figure(figsize=(20,20))
|
118 |
+
sns.heatmap(df_copy.corr(),annot=True,cmap='coolwarm', fmt='.2f')
|
119 |
+
st.pyplot(fig)
|
120 |
+
st.write('Based on visualization above, the `education_level`, `sex`, `marital_status` has a low correlation to the target (`DEATH_EVENT`).')
|
121 |
|
122 |
if __name__ == '__main__':
|
123 |
run()
|
prediction.py
CHANGED
@@ -20,7 +20,7 @@ def run():
|
|
20 |
ejection_fraction = st.number_input('Percentage of blood leaving the heart at each contraction (%)',min_value=0,max_value=100,value=38)
|
21 |
high_blood_pressure = st.number_input('Have Hypertension?',min_value=0,max_value=1,value=0,help='0 for No, 1 for Yes')
|
22 |
platelets = st.number_input('Platelets in the blood (kiloplatelets/mL)',min_value=0,max_value=999999,value=215000,help='in kiloplatelets/mL')
|
23 |
-
serum_creatinine = st.number_input('Level of serum creatinine in the blood ',step=0.01,format="%.2f",min_value=0.00,max_value=10.00,value=1.
|
24 |
serum_sodium = st.number_input('Level of serum sodium in the blood',min_value=0,max_value=150,value=133,help='in mEq/dL')
|
25 |
sex = st.number_input('Gender',min_value=0,max_value=1,value=0,help='(Female = 0, Male = 1)')
|
26 |
smoking = st.number_input('Smoker or Not Smoker ?',min_value=0,max_value=1,value=0,help='(No= 0, Yes = 1)')
|
|
|
20 |
ejection_fraction = st.number_input('Percentage of blood leaving the heart at each contraction (%)',min_value=0,max_value=100,value=38)
|
21 |
high_blood_pressure = st.number_input('Have Hypertension?',min_value=0,max_value=1,value=0,help='0 for No, 1 for Yes')
|
22 |
platelets = st.number_input('Platelets in the blood (kiloplatelets/mL)',min_value=0,max_value=999999,value=215000,help='in kiloplatelets/mL')
|
23 |
+
serum_creatinine = st.number_input('Level of serum creatinine in the blood ',step=0.01,format="%.2f",min_value=0.00,max_value=10.00,value=1.20,help='in mg/dL')
|
24 |
serum_sodium = st.number_input('Level of serum sodium in the blood',min_value=0,max_value=150,value=133,help='in mEq/dL')
|
25 |
sex = st.number_input('Gender',min_value=0,max_value=1,value=0,help='(Female = 0, Male = 1)')
|
26 |
smoking = st.number_input('Smoker or Not Smoker ?',min_value=0,max_value=1,value=0,help='(No= 0, Yes = 1)')
|