import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from PIL import Image
#Set Config
st.set_page_config(
page_title='Heart Failure - EDA',
layout='wide',
initial_sidebar_state='expanded'
)
def run() :
# Membuat Title
st.markdown("
Exploratory Data Analysis
", unsafe_allow_html=True)
st.write('Berikut adalah EDA dari setiap feature')
# Import DF
df_eda = pd.read_csv('eda.csv')
# Membuat Sub Header
st.subheader('**EDA Feature Age**')
st.write('1. Range `age` pasien dengan *deceased* yang terbanyak')
st.write('2. Melihat pengaruh `age` terhadap klasifikasi `DEATH_EVENT`')
col1, col2 = st.columns(2)
#EDA 1
fig= plt.figure(figsize=(15,6))
ax = sns.countplot(data = df_eda, x = 'AgeBin', hue="DEATH_EVENT", palette = 'winter', order = ['(40, 50]', '(50, 60]', '(60, 70]', '(70, 80]', '(80, 90]', '(90, 100]'])
plt.title('Distribusi Age', fontsize=18, fontweight='bold')
plt.xlabel("Range Age", fontsize= 12)
plt.ylabel("# of Patient", fontsize= 12)
ax.tick_params(axis="x", labelsize= 9.5)
plt.legend(fontsize=10,title='Klasifikasi Death Event', loc='upper right', labels=['Not Deceased', 'Deceased'])
for p in ax.patches:
ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
p.get_height()+1), ha='center', va='center',fontsize = 11)
col1.pyplot(fig)
#EDA 2
fig= plt.figure(figsize=(15,6))
ax = sns.barplot(x = 'AgeBin', y = 'DEATH_EVENT', data = df_eda, palette = 'winter', order = ['(40, 50]', '(50, 60]', '(60, 70]', '(70, 80]', '(80, 90]', '(90, 100]'], errorbar=None)
plt.xlabel("Range Age", fontsize= 12)
plt.ylabel("% Deceased", fontsize= 12)
plt.title('% Deceased berdasarkan Age', fontsize=18, fontweight='bold')
plt.ylim(0,1.3)
for p in ax.patches:
ax.annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
p.get_height()+0.019), ha='center', va='center',fontsize = 11)
col2.pyplot(fig)
# Membuat Sub Header
st.subheader('**EDA Feature `creatinine_phosphokinase`**')
st.write('1. Range `creatinine_phosphokinase` dengan *deceased* yang terbanyak')
st.write('2. Melihat pengaruh `creatinine_phosphokinase` terhadap klasifikasi `DEATH_EVENT`')
col1, col2 = st.columns(2)
#EDA 1
fig= plt.figure(figsize=(15,6))
ax = sns.countplot(data = df_eda, x = 'CPBin', hue="DEATH_EVENT", palette = 'winter', order = ['(0, 200]', '(200, 400]', '(400, 600]', '(600, 800]', '(800, 1000]', '(1000, 2000]', '(2000, 5000]', '(5000, 7000]', '(7000, 9000]'])
plt.title('Distribusi Creatinine Phosphokinase', fontsize=18, fontweight='bold')
plt.xlabel("Range Creatinine Phosphokinase (mcg/L)", fontsize= 12)
plt.ylabel("# of Patient", fontsize= 12)
ax.tick_params(axis="x", labelsize= 9.5)
plt.legend(fontsize=10,title='Klasifikasi Death Event', loc='upper right', labels=['Not Deceased', 'Deceased'])
for p in ax.patches:
ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
p.get_height()+2), ha='center', va='center',fontsize = 11)
plt.ylim(0,100)
col1.pyplot(fig)
#EDA 2
fig= plt.figure(figsize=(15,6))
ax = sns.barplot(x = 'CPBin', y = 'DEATH_EVENT', data = df_eda, palette = 'winter', order = ['(0, 200]', '(200, 400]', '(400, 600]', '(600, 800]', '(800, 1000]', '(1000, 2000]', '(2000, 5000]', '(5000, 7000]', '(7000, 9000]'], errorbar=None)
plt.xlabel("Range Creatinine Phosphokinase (mcg/L)", fontsize= 12)
plt.ylabel("% Deceased", fontsize= 12)
plt.title('% Deceased berdasarkan Creatinine Phosphokinase', fontsize=18, fontweight='bold')
plt.ylim(0,1.1)
for p in ax.patches:
ax.annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
p.get_height()+0.019), ha='center', va='center',fontsize = 11)
col2.pyplot(fig)
# Membuat Sub Header
st.subheader('**EDA Feature `ejection_fraction`**')
st.write('1. Range `ejection_fraction` dengan *deceased* yang terbanyak')
st.write('2. Melihat pengaruh `ejection_fraction` terhadap klasifikasi `DEATH_EVENT`')
col1, col2 = st.columns(2)
#EDA 1
fig= plt.figure(figsize=(15,6))
ax = sns.countplot(data = df_eda, x = 'EfBin', hue="DEATH_EVENT", palette = 'winter', order = ['(10, 20]', '(20, 30]', '(30, 40]', '(40, 50]', '(50, 60]', '(60, 70]', '(70, 80]'])
plt.title('Distribusi Ejection Fraction', fontsize=18, fontweight='bold')
plt.xlabel("Range Ejection Fraction (%)", fontsize= 12)
plt.ylabel("# of Patient", fontsize= 12)
ax.tick_params(axis="x", labelsize= 9.5)
plt.legend(fontsize=10,title='Klasifikasi Death Event', loc='upper right', labels=['Not Deceased', 'Deceased'])
for p in ax.patches:
ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
p.get_height()+2), ha='center', va='center',fontsize = 11)
plt.ylim(0,110)
col1.pyplot(fig)
#EDA 2
fig= plt.figure(figsize=(15,6))
ax = sns.barplot(x = 'EfBin', y = 'DEATH_EVENT', data = df_eda, palette = 'winter', order = ['(10, 20]', '(20, 30]', '(30, 40]', '(40, 50]', '(50, 60]', '(60, 70]', '(70, 80]'], errorbar=None)
plt.xlabel("Range Ejection Fraction (%)", fontsize= 12)
plt.ylabel("% Deceased", fontsize= 12)
plt.title('% Deceased berdasarkan Ejection Fraction', fontsize=18, fontweight='bold')
plt.ylim(0,1)
for p in ax.patches:
ax.annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
p.get_height()+0.019), ha='center', va='center',fontsize = 11)
col2.pyplot(fig)
# Membuat Sub Header
st.subheader('**EDA Feature `platelets`**')
st.write('1. Range `platelets` dengan *deceased* yang terbanyak')
st.write('2. Melihat pengaruh `platelets` terhadap klasifikasi `DEATH_EVENT`')
col1, col2 = st.columns(2)
#EDA 1
fig= plt.figure(figsize=(15,6))
ax = sns.countplot(data = df_eda, x = 'PlatBin', hue="DEATH_EVENT", palette = 'winter', order = ['(200000, 300000]', '(300000, 400000]', '(400000, 500000]', '(500000, 600000]', '(600000, 700000]', '(700000, 800000]', '(800000, 900000]'])
plt.title('Distribusi Platelets', fontsize=18, fontweight='bold')
plt.xlabel("Range Platelets (kp/mL)", fontsize= 12)
plt.ylabel("# of Patient", fontsize= 12)
ax.tick_params(axis="x", labelsize= 9.5)
plt.legend(fontsize=10,title='Klasifikasi Death Event', loc='upper right', labels=['Not Deceased', 'Deceased'])
for p in ax.patches:
ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
p.get_height()+3), ha='center', va='center',fontsize = 11)
plt.ylim(0,140)
col1.pyplot(fig)
#EDA 2
fig= plt.figure(figsize=(15,6))
ax = sns.barplot(x = 'PlatBin', y = 'DEATH_EVENT', data = df_eda, palette = 'winter', order = ['(200000, 300000]', '(300000, 400000]', '(400000, 500000]', '(500000, 600000]', '(600000, 700000]', '(700000, 800000]', '(800000, 900000]'], errorbar=None)
plt.xlabel("Range Platelets (kp/mL)", fontsize= 12)
plt.ylabel("% Deceased", fontsize= 12)
plt.title('% Deceased berdasarkan Platelets', fontsize=18, fontweight='bold')
plt.ylim(0,1.3)
for p in ax.patches:
ax.annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
p.get_height()+0.019), ha='center', va='center',fontsize = 11)
col2.pyplot(fig)
# Membuat Sub Header
st.subheader('**EDA Feature `serum_creatinine`**')
st.write('1. Range `serum_creatinine` pasien dengan *deceased* terbanyak')
st.write('1. Melihat pengaruh `serum_creatinine` terhadap klasifikasi `DEATH_EVENT`')
col1, col2 = st.columns(2)
#EDA 1
fig= plt.figure(figsize=(15,6))
ax = sns.countplot(data = df_eda, x = 'SCBin', hue="DEATH_EVENT", palette = 'winter', order = ['(0, 2]', '(2, 4]', '(4, 6]', '(6, 8]', '(8, 10]'])
plt.title('Distribusi Serum Creatinine', fontsize=18, fontweight='bold')
plt.xlabel("Range Serum Creatinine (mg/dL)", fontsize= 12)
plt.ylabel("# of Patient", fontsize= 12)
ax.tick_params(axis="x", labelsize= 9.5)
plt.legend(fontsize=10,title='Klasifikasi Death Event', loc='upper right', labels=['Not Deceased', 'Deceased'])
for p in ax.patches:
ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
p.get_height()+3), ha='center', va='center',fontsize = 11)
plt.ylim(0,220)
col1.pyplot(fig)
#EDA 2
fig= plt.figure(figsize=(15,6))
ax = sns.barplot(x = 'SCBin', y = 'DEATH_EVENT', data = df_eda, palette = 'winter', order = ['(0, 2]', '(2, 4]', '(4, 6]', '(6, 8]', '(8, 10]'], errorbar=None)
plt.xlabel("Range Serum Creatinine (mg/dL)", fontsize= 12)
plt.ylabel("% Deceased", fontsize= 12)
plt.title('% Deceased berdasarkan Serum Creatinine', fontsize=18, fontweight='bold')
plt.ylim(0,1.1)
for p in ax.patches:
ax.annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
p.get_height()+0.019), ha='center', va='center',fontsize = 11)
col2.pyplot(fig)
# Membuat Sub Header EDA
st.subheader('**EDA Feature `serum_sodium`**')
st.write('1. Range `serum_sodium` dengan *deceased* yang terbanyak')
st.write('2. Melihat pengaruh `serum_sodium` terhadap klasifikasi `DEATH_EVENT`')
col1, col2 = st.columns(2)
#EDA 1
fig= plt.figure(figsize=(15,6))
ax = sns.countplot(data = df_eda, x = 'SSBin', hue="DEATH_EVENT", palette = 'winter', order = ['(110, 120]', '(120, 130]', '(130, 140]'])
plt.title('Distribusi Serum Sodium', fontsize=18, fontweight='bold')
plt.xlabel("Range Serum Sodium (mEq/L)", fontsize= 12)
plt.ylabel("# of Patient", fontsize= 12)
ax.tick_params(axis="x", labelsize= 9.5)
plt.legend(fontsize=10,title='Klasifikasi Death Event', loc='upper right', labels=['Not Deceased', 'Deceased'])
for p in ax.patches:
ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
p.get_height()+3), ha='center', va='center',fontsize = 11)
plt.ylim(0,175)
col1.pyplot(fig)
#EDA 2
fig= plt.figure(figsize=(15,6))
ax = sns.barplot(x = 'SSBin', y = 'DEATH_EVENT', data = df_eda, palette = 'winter', order = ['(110, 120]', '(120, 130]', '(130, 140]'], errorbar=None)
plt.xlabel("Range Serum Sodium (mEq/L)", fontsize= 12)
plt.ylabel("% Deceased", fontsize= 12)
plt.title('% Deceased berdasarkan Serum Sodium', fontsize=18, fontweight='bold')
plt.ylim(0,0.7)
for p in ax.patches:
ax.annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
p.get_height()+0.019), ha='center', va='center',fontsize = 11)
col2.pyplot(fig)
# Membuat Sub Header EDA
st.subheader('**EDA Feature `time`**')
st.write('1. Range `time` follow up dengan *deceased* yang terbanyak')
st.write('2. Melihat pengaruh `time` terhadap klasifikasi `DEATH_EVENT`')
col1, col2 = st.columns(2)
#EDA 1
fig= plt.figure(figsize=(15,6))
ax = sns.countplot(data = df_eda, x = 'TimeBin', hue="DEATH_EVENT", palette = 'winter', order = ['(0, 50]', '(50, 100]', '(100, 150]', '(150, 200]', '(200, 250]', '(250, 300]'])
plt.title('Distribusi Time Follow Up', fontsize=18, fontweight='bold')
plt.xlabel("Range Time Follow Up (days)", fontsize= 12)
plt.ylabel("# of Patient", fontsize= 12)
ax.tick_params(axis="x", labelsize= 9.5)
plt.legend(fontsize=10,title='Klasifikasi Death Event', loc='upper right', labels=['Not Deceased', 'Deceased'])
for p in ax.patches:
ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
p.get_height()+2), ha='center', va='center',fontsize = 11)
plt.ylim(0,80)
col1.pyplot(fig)
#EDA 2
fig= plt.figure(figsize=(15,6))
ax = sns.barplot(x = 'TimeBin', y = 'DEATH_EVENT', data = df_eda, palette = 'winter', order = ['(0, 50]', '(50, 100]', '(100, 150]', '(150, 200]', '(200, 250]', '(250, 300]'], errorbar=None)
plt.xlabel("Range Time Follow Up (days)", fontsize= 12)
plt.ylabel("% Deceased", fontsize= 12)
plt.title('% Deceased berdasarkan Time Follow Up', fontsize=18, fontweight='bold')
plt.ylim(0,1)
for p in ax.patches:
ax.annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
p.get_height()+0.019), ha='center', va='center',fontsize = 11)
col2.pyplot(fig)
# Membuat Sub Header EDA
st.subheader('**EDA Feature `DEATH_EVENT`**')
st.write('1. Balance dataset antara *class no deceased* dan *class deceased*')
#EDA 1
fig= plt.figure(figsize=(10,3))
ax = sns.countplot(x='DEATH_EVENT', data=df_eda, palette="winter")
plt.xlabel("Death Status", fontsize= 12)
plt.ylabel("# of Patient", fontsize= 12)
plt.xticks([0,1], ['Not Deceased', 'Deceased'], fontsize = 11)
plt.title('Not Deceased vs Deceased', fontsize=18, fontweight='bold')
for p in ax.patches:
ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
p.get_height()+5), ha='center', va='center',fontsize = 11)
plt.ylim(0,230)
st.pyplot(fig)
# Membuat Sub Header EDA
st.subheader('**EDA Feature `anaemia`**')
st.write('1. Melihat pengaruh `anaemia` terhadap klasifikasi `DEATH_EVENT`')
col1, col2 = st.columns(2)
#EDA 1
fig= plt.figure(figsize=(15,6))
ax = sns.countplot(data = df_eda, x = 'anaemia', hue="DEATH_EVENT", palette = 'winter')
plt.xlabel("Anaemia", fontsize= 12)
plt.ylabel("# of Patient", fontsize= 12)
plt.xticks([0,1],['No', 'Yes'], fontsize = 11)
plt.title('Anaemia vs Death Event', fontsize=18, fontweight='bold')
plt.legend(fontsize=10,title='Deceased', loc='upper right', labels=['Not Deceased', 'Deceased'])
for p in ax.patches:
ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
p.get_height()+3), ha='center', va='center',fontsize = 11)
plt.ylim(0,140)
col1.pyplot(fig)
#EDA 2
fig= plt.figure(figsize=(15,6))
ax = sns.barplot(x = "anaemia", y = "DEATH_EVENT", data = df_eda, palette = 'winter', errorbar= None)
plt.ylabel("% Deceased", fontsize= 14)
plt.xlabel("Anaemia", fontsize= 14)
plt.ylim(0,0.5)
plt.xticks([0,1],['No', 'Yes'], fontsize = 14)
plt.title('% Deceased vs Anaemia', fontsize=18, fontweight='bold')
for p in ax.patches:
ax.annotate("%.2f" %(p.get_height()), (p.get_x()+0.35, p.get_height()+0.01),fontsize=13)
col2.pyplot(fig)
# Membuat Sub Header EDA
st.subheader('**EDA Feature `diabetes`**')
st.write('1. Melihat pengaruh `diabetes` terhadap klasifikasi `DEATH_EVENT`')
col1, col2 = st.columns(2)
#EDA 1
fig= plt.figure(figsize=(15,6))
ax = sns.countplot(data = df_eda, x = 'diabetes', hue="DEATH_EVENT", palette = 'winter')
plt.xlabel("Diabetes", fontsize= 12)
plt.ylabel("# of Patient", fontsize= 12)
plt.xticks([0,1],['No', 'Yes'], fontsize = 11)
plt.title('Diabetes vs Death Event', fontsize=18, fontweight='bold')
plt.legend(fontsize=10,title='Deceased', loc='upper right', labels=['Not Deceased', 'Deceased'])
for p in ax.patches:
ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
p.get_height()+3), ha='center', va='center',fontsize = 11)
plt.ylim(0,140)
col1.pyplot(fig)
#EDA 2
fig= plt.figure(figsize=(15,6))
ax = sns.barplot(x = "diabetes", y = "DEATH_EVENT", data = df_eda, palette = 'winter', errorbar= None)
plt.ylabel("% Deceased", fontsize= 14)
plt.xlabel("Diabetes", fontsize= 14)
plt.ylim(0,0.5)
plt.xticks([0,1],['No', 'Yes'], fontsize = 14)
plt.title('% Deceased vs Diabetes', fontsize=18, fontweight='bold')
for p in ax.patches:
ax.annotate("%.2f" %(p.get_height()), (p.get_x()+0.35, p.get_height()+0.01),fontsize=13)
col2.pyplot(fig)
# Membuat Sub Header EDA
st.subheader('**EDA Feature `high_blood_pressure`**')
st.write('1. Melihat pengaruh `high_blood_pressure` terhadap klasifikasi `DEATH_EVENT`')
col1, col2 = st.columns(2)
#EDA 1
fig= plt.figure(figsize=(15,6))
ax = sns.countplot(data = df_eda, x = 'high_blood_pressure', hue="DEATH_EVENT", palette = 'winter')
plt.xlabel("Hypertension", fontsize= 12)
plt.ylabel("# of Patient", fontsize= 12)
plt.xticks([0,1],['No', 'Yes'], fontsize = 11)
plt.title('Hypertension vs Death Event', fontsize=18, fontweight='bold')
plt.legend(fontsize=10,title='Deceased', loc='upper right', labels=['Not Deceased', 'Deceased'])
for p in ax.patches:
ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
p.get_height()+3), ha='center', va='center',fontsize = 11)
plt.ylim(0,160)
col1.pyplot(fig)
#EDA 2
fig= plt.figure(figsize=(15,6))
ax = sns.barplot(x = "high_blood_pressure", y = "DEATH_EVENT", data = df_eda, palette = 'winter', errorbar= None)
plt.ylabel("% Deceased", fontsize= 14)
plt.xlabel("Hypertension", fontsize= 14)
plt.ylim(0,0.5)
plt.xticks([0,1],['No', 'Yes'], fontsize = 14)
plt.title('% Deceased vs Hypertension', fontsize=18, fontweight='bold')
for p in ax.patches:
ax.annotate("%.2f" %(p.get_height()), (p.get_x()+0.35, p.get_height()+0.01),fontsize=13)
col2.pyplot(fig)
# Membuat Sub Header EDA
st.subheader('**EDA Feature `sex`**')
st.write('1. Melihat pengaruh `sex` terhadap klasifikasi `DEATH_EVENT`')
col1, col2 = st.columns(2)
#EDA 1
fig= plt.figure(figsize=(15,6))
ax = sns.countplot(data = df_eda, x = 'sex', hue="DEATH_EVENT", palette = 'winter')
plt.xlabel("Gender", fontsize= 12)
plt.ylabel("# of Patient", fontsize= 12)
plt.xticks([0,1],['Woman', 'Men'], fontsize = 11)
plt.title('Gender vs Death Event', fontsize=18, fontweight='bold')
plt.legend(fontsize=10,title='Deceased', loc='upper right', labels=['Not Deceased', 'Deceased'])
for p in ax.patches:
ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
p.get_height()+3), ha='center', va='center',fontsize = 11)
plt.ylim(0,140)
col1.pyplot(fig)
#EDA 2
fig= plt.figure(figsize=(15,6))
ax = sns.barplot(x = "sex", y = "DEATH_EVENT", data = df_eda, palette = 'winter', errorbar= None)
plt.ylabel("% Deceased", fontsize= 14)
plt.xlabel("Gender", fontsize= 14)
plt.ylim(0,0.5)
plt.xticks([0,1],['Woman', 'Man'], fontsize = 14)
plt.title('% Deceased vs Gender', fontsize=18, fontweight='bold')
for p in ax.patches:
ax.annotate("%.2f" %(p.get_height()), (p.get_x()+0.35, p.get_height()+0.01),fontsize=13)
col2.pyplot(fig)
# Membuat Sub Header EDA
st.subheader('**EDA Feature `smoking`**')
st.write('1. Melihat pengaruh `smoking` terhadap klasifikasi `DEATH_EVENT`')
col1, col2 = st.columns(2)
#EDA 1
fig= plt.figure(figsize=(15,6))
ax = sns.countplot(data = df_eda, x = 'smoking', hue="DEATH_EVENT", palette = 'winter')
plt.xlabel("Smoking", fontsize= 12)
plt.ylabel("# of Patient", fontsize= 12)
plt.xticks([0,1],['No', 'Yes'], fontsize = 11)
plt.title('Smoking vs Death Event', fontsize=18, fontweight='bold')
plt.legend(fontsize=10,title='Deceased', loc='upper right', labels=['Not Deceased', 'Deceased'])
for p in ax.patches:
ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
p.get_height()+3), ha='center', va='center',fontsize = 11)
plt.ylim(0,160)
col1.pyplot(fig)
#EDA 2
fig= plt.figure(figsize=(15,6))
ax = sns.barplot(x = "smoking", y = "DEATH_EVENT", data = df_eda, palette = 'winter', errorbar= None)
plt.ylabel("% Deceased", fontsize= 14)
plt.xlabel("Smoking", fontsize= 14)
plt.ylim(0,0.5)
plt.xticks([0,1],['No', 'Yes'], fontsize = 14)
plt.title('% Deceased vs Smoking', fontsize=18, fontweight='bold')
for p in ax.patches:
ax.annotate("%.2f" %(p.get_height()), (p.get_x()+0.35, p.get_height()+0.01),fontsize=13)
col2.pyplot(fig)
if __name__ == '__main__':
run()