Spaces:

ahmadluay
/

deploy

Runtime error

App Files Files Community

ahmadluay commited on Mar 17, 2023

Commit

47f9edb

•

1 Parent(s): e1607b4

Upload 2 files

Browse files

Files changed (2) hide show

eda.py +21 -2
prediction.py +1 -1

eda.py CHANGED Viewed

@@ -3,6 +3,9 @@ import pandas as pd
 import seaborn as sns
 import matplotlib.pyplot as plt
 import plotly.express as px
 from PIL import Image
 st.set_page_config(
@@ -86,7 +89,7 @@ def run():
     st.pyplot(fig)
     st.write('From the table and visualization above, it can be seen that the number of male patients with heart failure is more than female patients. **Where about 32% die during the follow-up period**. Further data exploration is necessary to find out the condition of male patients.')
-    #
     st.write('## Comparison between Death Event with other variables ')
     # Creating new dataframe for the histogram
     sns.set(font_scale=1)
@@ -98,7 +101,23 @@ def run():
     g = sns.FacetGrid(f, hue=output, col="variable", col_wrap=4, sharex=False, sharey=False )
     g = g.map(sns.histplot, "value", kde=True).add_legend()
     st.pyplot(g)
-    st.write('Based on the histogram above, we can see that the distribution of Not Death is still dominating that Death. However, we should check wherer variable time looks different than the others, where Death is high with time between 0-100 days. From here we should check the skewness of time as well.')
 if __name__ == '__main__':
     run()

 import seaborn as sns
 import matplotlib.pyplot as plt
 import plotly.express as px
+import sklearn
+from sklearn.preprocessing import LabelEncoder
 from PIL import Image
 st.set_page_config(
     st.pyplot(fig)
     st.write('From the table and visualization above, it can be seen that the number of male patients with heart failure is more than female patients. **Where about 32% die during the follow-up period**. Further data exploration is necessary to find out the condition of male patients.')
+    # Comparison between Death Event with other variables
     st.write('## Comparison between Death Event with other variables ')
     # Creating new dataframe for the histogram
     sns.set(font_scale=1)
     g = sns.FacetGrid(f, hue=output, col="variable", col_wrap=4, sharex=False, sharey=False )
     g = g.map(sns.histplot, "value", kde=True).add_legend()
     st.pyplot(g)
+    st.write('Based on the histogram above, we can see that the distribution of **Not Death** is still dominating that Death. However, we should check wherer variable time looks different than the others, where Death is high with time between 0-100 days. From here we should check the skewness of time as well.')
+    # Using LabelEncoder to convert categorical into numerical data
+    st.write('## Correlation Matrix Analysis')
+    df_copy =df.copy()
+    categorical = ['anaemia','diabetes','high_blood_pressure','sex','smoking','DEATH_EVENT']
+    m_LabelEncoder = LabelEncoder()
+    for col in df_copy[categorical]:
+        df_copy[col]=m_LabelEncoder.fit_transform(df_copy[col])
+    # Plotting Correlation Matrix of Features and DEATH_EVENT
+    sns.set(font_scale=1)
+    fig = plt.figure(figsize=(20,20))
+    sns.heatmap(df_copy.corr(),annot=True,cmap='coolwarm', fmt='.2f')
+    st.pyplot(fig)
+    st.write('Based on visualization above, the `education_level`, `sex`, `marital_status` has a low correlation to the target (`DEATH_EVENT`).')
 if __name__ == '__main__':
     run()

prediction.py CHANGED Viewed

@@ -20,7 +20,7 @@ def run():
       ejection_fraction = st.number_input('Percentage of blood leaving the heart at each contraction (%)',min_value=0,max_value=100,value=38)
       high_blood_pressure = st.number_input('Have Hypertension?',min_value=0,max_value=1,value=0,help='0 for No, 1 for Yes')
       platelets = st.number_input('Platelets in the blood (kiloplatelets/mL)',min_value=0,max_value=999999,value=215000,help='in kiloplatelets/mL')
-      serum_creatinine = st.number_input('Level of serum creatinine in the blood ',step=0.01,format="%.2f",min_value=0.00,max_value=10.00,value=1.00,help='in mg/dL')
       serum_sodium = st.number_input('Level of serum sodium in the blood',min_value=0,max_value=150,value=133,help='in mEq/dL')
       sex = st.number_input('Gender',min_value=0,max_value=1,value=0,help='(Female = 0, Male = 1)')
       smoking = st.number_input('Smoker or Not Smoker ?',min_value=0,max_value=1,value=0,help='(No= 0, Yes = 1)')

       ejection_fraction = st.number_input('Percentage of blood leaving the heart at each contraction (%)',min_value=0,max_value=100,value=38)
       high_blood_pressure = st.number_input('Have Hypertension?',min_value=0,max_value=1,value=0,help='0 for No, 1 for Yes')
       platelets = st.number_input('Platelets in the blood (kiloplatelets/mL)',min_value=0,max_value=999999,value=215000,help='in kiloplatelets/mL')
+      serum_creatinine = st.number_input('Level of serum creatinine in the blood ',step=0.01,format="%.2f",min_value=0.00,max_value=10.00,value=1.20,help='in mg/dL')
       serum_sodium = st.number_input('Level of serum sodium in the blood',min_value=0,max_value=150,value=133,help='in mEq/dL')
       sex = st.number_input('Gender',min_value=0,max_value=1,value=0,help='(Female = 0, Male = 1)')
       smoking = st.number_input('Smoker or Not Smoker ?',min_value=0,max_value=1,value=0,help='(No= 0, Yes = 1)')