ahmadluay commited on
Commit
47f9edb
1 Parent(s): e1607b4

Upload 2 files

Browse files
Files changed (2) hide show
  1. eda.py +21 -2
  2. prediction.py +1 -1
eda.py CHANGED
@@ -3,6 +3,9 @@ import pandas as pd
3
  import seaborn as sns
4
  import matplotlib.pyplot as plt
5
  import plotly.express as px
 
 
 
6
  from PIL import Image
7
 
8
  st.set_page_config(
@@ -86,7 +89,7 @@ def run():
86
  st.pyplot(fig)
87
  st.write('From the table and visualization above, it can be seen that the number of male patients with heart failure is more than female patients. **Where about 32% die during the follow-up period**. Further data exploration is necessary to find out the condition of male patients.')
88
 
89
- #
90
  st.write('## Comparison between Death Event with other variables ')
91
  # Creating new dataframe for the histogram
92
  sns.set(font_scale=1)
@@ -98,7 +101,23 @@ def run():
98
  g = sns.FacetGrid(f, hue=output, col="variable", col_wrap=4, sharex=False, sharey=False )
99
  g = g.map(sns.histplot, "value", kde=True).add_legend()
100
  st.pyplot(g)
101
- st.write('Based on the histogram above, we can see that the distribution of Not Death is still dominating that Death. However, we should check wherer variable time looks different than the others, where Death is high with time between 0-100 days. From here we should check the skewness of time as well.')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
  if __name__ == '__main__':
104
  run()
 
3
  import seaborn as sns
4
  import matplotlib.pyplot as plt
5
  import plotly.express as px
6
+ import sklearn
7
+ from sklearn.preprocessing import LabelEncoder
8
+
9
  from PIL import Image
10
 
11
  st.set_page_config(
 
89
  st.pyplot(fig)
90
  st.write('From the table and visualization above, it can be seen that the number of male patients with heart failure is more than female patients. **Where about 32% die during the follow-up period**. Further data exploration is necessary to find out the condition of male patients.')
91
 
92
+ # Comparison between Death Event with other variables
93
  st.write('## Comparison between Death Event with other variables ')
94
  # Creating new dataframe for the histogram
95
  sns.set(font_scale=1)
 
101
  g = sns.FacetGrid(f, hue=output, col="variable", col_wrap=4, sharex=False, sharey=False )
102
  g = g.map(sns.histplot, "value", kde=True).add_legend()
103
  st.pyplot(g)
104
+ st.write('Based on the histogram above, we can see that the distribution of **Not Death** is still dominating that Death. However, we should check wherer variable time looks different than the others, where Death is high with time between 0-100 days. From here we should check the skewness of time as well.')
105
+
106
+ # Using LabelEncoder to convert categorical into numerical data
107
+ st.write('## Correlation Matrix Analysis')
108
+ df_copy =df.copy()
109
+ categorical = ['anaemia','diabetes','high_blood_pressure','sex','smoking','DEATH_EVENT']
110
+ m_LabelEncoder = LabelEncoder()
111
+
112
+ for col in df_copy[categorical]:
113
+ df_copy[col]=m_LabelEncoder.fit_transform(df_copy[col])
114
+
115
+ # Plotting Correlation Matrix of Features and DEATH_EVENT
116
+ sns.set(font_scale=1)
117
+ fig = plt.figure(figsize=(20,20))
118
+ sns.heatmap(df_copy.corr(),annot=True,cmap='coolwarm', fmt='.2f')
119
+ st.pyplot(fig)
120
+ st.write('Based on visualization above, the `education_level`, `sex`, `marital_status` has a low correlation to the target (`DEATH_EVENT`).')
121
 
122
  if __name__ == '__main__':
123
  run()
prediction.py CHANGED
@@ -20,7 +20,7 @@ def run():
20
  ejection_fraction = st.number_input('Percentage of blood leaving the heart at each contraction (%)',min_value=0,max_value=100,value=38)
21
  high_blood_pressure = st.number_input('Have Hypertension?',min_value=0,max_value=1,value=0,help='0 for No, 1 for Yes')
22
  platelets = st.number_input('Platelets in the blood (kiloplatelets/mL)',min_value=0,max_value=999999,value=215000,help='in kiloplatelets/mL')
23
- serum_creatinine = st.number_input('Level of serum creatinine in the blood ',step=0.01,format="%.2f",min_value=0.00,max_value=10.00,value=1.00,help='in mg/dL')
24
  serum_sodium = st.number_input('Level of serum sodium in the blood',min_value=0,max_value=150,value=133,help='in mEq/dL')
25
  sex = st.number_input('Gender',min_value=0,max_value=1,value=0,help='(Female = 0, Male = 1)')
26
  smoking = st.number_input('Smoker or Not Smoker ?',min_value=0,max_value=1,value=0,help='(No= 0, Yes = 1)')
 
20
  ejection_fraction = st.number_input('Percentage of blood leaving the heart at each contraction (%)',min_value=0,max_value=100,value=38)
21
  high_blood_pressure = st.number_input('Have Hypertension?',min_value=0,max_value=1,value=0,help='0 for No, 1 for Yes')
22
  platelets = st.number_input('Platelets in the blood (kiloplatelets/mL)',min_value=0,max_value=999999,value=215000,help='in kiloplatelets/mL')
23
+ serum_creatinine = st.number_input('Level of serum creatinine in the blood ',step=0.01,format="%.2f",min_value=0.00,max_value=10.00,value=1.20,help='in mg/dL')
24
  serum_sodium = st.number_input('Level of serum sodium in the blood',min_value=0,max_value=150,value=133,help='in mEq/dL')
25
  sex = st.number_input('Gender',min_value=0,max_value=1,value=0,help='(Female = 0, Male = 1)')
26
  smoking = st.number_input('Smoker or Not Smoker ?',min_value=0,max_value=1,value=0,help='(No= 0, Yes = 1)')