|
import streamlit as st |
|
import pandas as pd |
|
import seaborn as sns |
|
import matplotlib.pyplot as plt |
|
import plotly.express as px |
|
from PIL import Image |
|
|
|
|
|
|
|
st.set_page_config( |
|
page_title='Employee Attrition Prediction', |
|
layout='wide', |
|
initial_sidebar_state='expanded' |
|
|
|
) |
|
|
|
st.markdown("""<style>.reportview-container {background: "5160549.jpg"}.sidebar .sidebar-content {background: "5160549.jpg"}</style>""",unsafe_allow_html=True) |
|
|
|
|
|
|
|
def run(): |
|
|
|
|
|
st.title('Employee Attrition Prediction') |
|
|
|
|
|
st.subheader('Employee Attrition Prediction EDA') |
|
|
|
st.write('Created by: Gieorgie Kosasih') |
|
|
|
|
|
|
|
image = Image.open('employee.jpg') |
|
st.markdown('---') |
|
st.image(image) |
|
|
|
|
|
st.markdown('---') |
|
|
|
|
|
|
|
st.write('### Employee Attrition Data') |
|
|
|
df = pd.read_csv('Main_Data.csv') |
|
st.dataframe(df.head(5)) |
|
|
|
|
|
|
|
|
|
|
|
st.write('### Attrition Distribution') |
|
|
|
|
|
target_counts = df['Attrition'].value_counts() |
|
|
|
|
|
labels = [f'Attrition {i} - {count}' for i, count in target_counts.items()] |
|
|
|
|
|
fig = plt.figure(figsize=(10, 5)) |
|
plt.subplot(1, 2, 2) |
|
target_counts.plot(kind='pie', autopct='%1.1f%%', shadow=True, labels=None, colors =['#0072C6', '#BFBFBF']) |
|
plt.title('Employee Attrition') |
|
|
|
|
|
plt.legend(labels, loc='upper right', bbox_to_anchor=(1.3, 1)) |
|
|
|
|
|
st.pyplot(fig) |
|
|
|
st.markdown('---') |
|
|
|
|
|
|
|
st.write('### Data Demografi Karyawan') |
|
pilihan = st.selectbox('Pilih Kolom : ', ('Gender','Education','MaritalStatus','Department')) |
|
|
|
|
|
attrition_data = df.groupby([df[pilihan], 'Attrition']).size().unstack(fill_value=0) |
|
|
|
fig = plt.figure(figsize=(15, 5)) |
|
colors =['#0072C6', '#BFBFBF'] |
|
|
|
ax = plt.gca() |
|
|
|
|
|
if len(attrition_data.index) > 3: |
|
attrition_data.plot(kind='barh', stacked=True, color=colors, ax=ax) |
|
ax.set_xlabel('Jumlah Karyawan') |
|
ax.set_ylabel(pilihan) |
|
else: |
|
attrition_data.plot(kind='bar', stacked=True, color=colors, ax=ax) |
|
ax.set_ylabel('Jumlah Karyawan') |
|
ax.set_xlabel(pilihan) |
|
ax.set_xticklabels(attrition_data.index, rotation=0) |
|
|
|
ax.set_title(f'Distribusi Attrition Berdasarkan {pilihan}') |
|
ax.legend(title='Attrition', labels=['Tidak', 'Ya']) |
|
|
|
|
|
for container in ax.containers: |
|
if len(attrition_data.index) > 3: |
|
labels = [f'{int(v.get_width())}' for v in container] |
|
else: |
|
labels = [f'{int(v.get_height())}' for v in container] |
|
ax.bar_label(container, labels=labels, label_type='center', padding=2) |
|
st.pyplot(fig) |
|
|
|
st.markdown('---') |
|
|
|
|
|
|
|
st.write('### Data Survey Karyawan') |
|
pilihan = st.selectbox('Pilih Kolom : ', ('EnvironmentSatisfaction','JobSatisfaction', 'WorkLifeBalance')) |
|
|
|
|
|
attrition_data = df.groupby([df[pilihan], 'Attrition']).size().unstack(fill_value=0) |
|
|
|
fig = plt.figure(figsize=(15, 5)) |
|
colors =['#0072C6', '#BFBFBF'] |
|
|
|
ax = plt.gca() |
|
|
|
|
|
if len(attrition_data.index) > 3: |
|
attrition_data.plot(kind='barh', stacked=True, color=colors, ax=ax) |
|
ax.set_xlabel('Jumlah Karyawan') |
|
ax.set_ylabel(pilihan) |
|
else: |
|
attrition_data.plot(kind='bar', stacked=True, color=colors, ax=ax) |
|
ax.set_ylabel('Jumlah Karyawan') |
|
ax.set_xlabel(pilihan) |
|
ax.set_xticklabels(attrition_data.index, rotation=0) |
|
|
|
ax.set_title(f'Distribusi Attrition Berdasarkan {pilihan}') |
|
ax.legend(title='Attrition', labels=['Tidak', 'Ya']) |
|
|
|
|
|
for container in ax.containers: |
|
if len(attrition_data.index) > 3: |
|
labels = [f'{int(v.get_width())}' for v in container] |
|
else: |
|
labels = [f'{int(v.get_height())}' for v in container] |
|
ax.bar_label(container, labels=labels, label_type='center', padding=2) |
|
st.pyplot(fig) |
|
|
|
st.markdown('---') |
|
|
|
|
|
|
|
st.write('### Data Performa Karyawan') |
|
pilihan = st.selectbox('Pilih Kolom : ', ('JobInvolvement', 'PerformanceRating','BusinessTravel','JobLevel', 'JobRole')) |
|
|
|
|
|
attrition_data = df.groupby([df[pilihan], 'Attrition']).size().unstack(fill_value=0) |
|
|
|
fig = plt.figure(figsize=(15, 5)) |
|
colors =['#0072C6', '#BFBFBF'] |
|
|
|
ax = plt.gca() |
|
|
|
|
|
if len(attrition_data.index) > 3: |
|
attrition_data.plot(kind='barh', stacked=True, color=colors, ax=ax) |
|
ax.set_xlabel('Jumlah Karyawan') |
|
ax.set_ylabel(pilihan) |
|
else: |
|
attrition_data.plot(kind='bar', stacked=True, color=colors, ax=ax) |
|
ax.set_ylabel('Jumlah Karyawan') |
|
ax.set_xlabel(pilihan) |
|
ax.set_xticklabels(attrition_data.index, rotation=0) |
|
|
|
ax.set_title(f'Distribusi Attrition Berdasarkan {pilihan}') |
|
ax.legend(title='Attrition', labels=['Tidak', 'Ya']) |
|
|
|
|
|
for container in ax.containers: |
|
if len(attrition_data.index) > 3: |
|
labels = [f'{int(v.get_width())}' for v in container] |
|
else: |
|
labels = [f'{int(v.get_height())}' for v in container] |
|
ax.bar_label(container, labels=labels, label_type='center', padding=2) |
|
st.pyplot(fig) |
|
|
|
st.markdown('---') |
|
|
|
|
|
|
|
st.write('### Data Numerical') |
|
pilihan = st.selectbox('Pilih Kolom : ', ('Age','DistanceFromHome','MonthlyIncome', 'NumCompaniesWorked','PercentSalaryHike','TotalWorkingYears', |
|
'YearsAtCompany','YearsSinceLastPromotion','YearsWithCurrManager')) |
|
|
|
|
|
fig = plt.figure(figsize=(15, 5)) |
|
|
|
attrition_no = df[df['Attrition'] == 'No'][pilihan] |
|
attrition_yes = df[df['Attrition'] == 'Yes'][pilihan] |
|
|
|
sns.histplot(attrition_no, color=colors[0], label='No', kde=False, bins=30) |
|
sns.histplot(attrition_yes, color=colors[1], label='Yes', kde=False, bins=30) |
|
|
|
plt.title(f'Histogram Distribusi {pilihan} Berdasarkan Attrition') |
|
plt.xlabel(pilihan) |
|
plt.ylabel('Jumlah Karyawan') |
|
plt.legend(title='Attrition') |
|
|
|
plt.tight_layout() |
|
|
|
st.pyplot(fig) |
|
|
|
|
|
if __name__ == '__main__': |
|
run() |