Evan Derin Ihsanudin
commited on
Commit
•
b8f05e2
1
Parent(s):
9b49114
P1M2_deployment
Browse files- .streamlit/config.toml +5 -0
- app.py +31 -0
- eda.py +425 -0
- employee_eda.csv +0 -0
- pipeline_xgb_opt +0 -0
- prediction.py +67 -0
- requirements.txt +6 -0
.streamlit/config.toml
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[theme]
|
2 |
+
primaryColor="#bd93f9"
|
3 |
+
backgroundColor="#282a36"
|
4 |
+
secondaryBackgroundColor="#44475a"
|
5 |
+
textColor="#f8f8f2"
|
app.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import eda
|
3 |
+
import prediction
|
4 |
+
|
5 |
+
|
6 |
+
#Set Config dan icon
|
7 |
+
st.set_page_config(
|
8 |
+
page_title='Resign Prediction',
|
9 |
+
layout='wide',
|
10 |
+
initial_sidebar_state='expanded' , page_icon='https://imgur.com/sYKazYD.png'
|
11 |
+
)
|
12 |
+
|
13 |
+
#Hide Streamlit Style
|
14 |
+
hide_streamlit_style = """
|
15 |
+
<style>
|
16 |
+
#MainMenu {visibility: hidden;}
|
17 |
+
footer {visibility: hidden;}
|
18 |
+
</style>
|
19 |
+
"""
|
20 |
+
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
|
21 |
+
|
22 |
+
#Membuat navigasi
|
23 |
+
st.sidebar.markdown("# Evan Derin Ihsanudin - RMT-FTDS-17")
|
24 |
+
navigation = st.sidebar.selectbox('Pilih Halaman (Resign Prediction/EDA): ', ('Resign Prediction','Exploratory Data Analysis'))
|
25 |
+
st.sidebar.image("https://imgur.com/sYKazYD.png", use_column_width=True)
|
26 |
+
|
27 |
+
#Run modul dengan if else
|
28 |
+
if navigation == 'Resign Prediction' :
|
29 |
+
prediction.run()
|
30 |
+
else :
|
31 |
+
eda.run()
|
eda.py
ADDED
@@ -0,0 +1,425 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import seaborn as sns
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import plotly.express as px
|
6 |
+
from PIL import Image
|
7 |
+
|
8 |
+
|
9 |
+
|
10 |
+
|
11 |
+
def run() :
|
12 |
+
# Membuat Title
|
13 |
+
st.markdown("<h1 style='text-align: center; color: white;'>Exploratory Data Analysis</h1>", unsafe_allow_html=True)
|
14 |
+
st.write('Berikut adalah EDA dari setiap feature')
|
15 |
+
|
16 |
+
# Import DF
|
17 |
+
df_eda = pd.read_csv('employee_eda.csv')
|
18 |
+
|
19 |
+
# Membuat Sub Header Age
|
20 |
+
st.subheader('**EDA Feature Age**')
|
21 |
+
st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
|
22 |
+
st.markdown('- Umur karyawan terpusat pada 25-30 tahun (2.350 karyawan/50.5%)')
|
23 |
+
st.markdown('- Karyawan yang paling banyak *resign* adalah karyawan dengan *range* umur 25-30 tahun (787 karyawan). Kemungkinan banyak pada *range* ini karena, karyawan paling banyak pada *range* umur tersebut ')
|
24 |
+
st.markdown('- Akan tetapi jika dilihat dari persentase *resign* pada setiap kelas, maka pada *range* umur 20-25 tahun memiliki persentase *resign* tertinggi. Kemudian pada *range* 25-40 tahun cenderung stabil dan persentase turun di angka 28% pada *range* 40-45 tahun')
|
25 |
+
|
26 |
+
# Membuat visualisasi Distribusi Age berdasarkan Bins
|
27 |
+
fig, ax =plt.subplots(1,2,figsize=(8,4))
|
28 |
+
sns.countplot(x='AgeBin', data=df_eda, palette="winter", ax=ax[0])
|
29 |
+
ax[0].set_xlabel("Employee Age", fontsize= 12)
|
30 |
+
ax[0].set_ylabel("# of Employee", fontsize= 12)
|
31 |
+
fig.suptitle('Distribusi Employee Age', fontsize=18, fontweight='bold')
|
32 |
+
ax[0].set_ylim(0,2500)
|
33 |
+
for p in ax[0].patches:
|
34 |
+
ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
35 |
+
p.get_height()+45), ha='center', va='center',fontsize = 8)
|
36 |
+
|
37 |
+
df_eda['AgeBin'].value_counts().plot(kind='pie', autopct='%1.1f%%', textprops = {"fontsize":8})
|
38 |
+
ax[1].set_ylabel("% of Employee", fontsize= 12)
|
39 |
+
st.pyplot(fig)
|
40 |
+
|
41 |
+
# Membuat Visualisasi distribusi Age berdasarkan LeaveOrNot
|
42 |
+
col1, col2 = st.columns(2)
|
43 |
+
fig = plt.figure(figsize=(12,6))
|
44 |
+
ax = sns.countplot(data = df_eda, x = 'AgeBin', hue="LeaveOrNot", palette = 'winter', order = ['(20, 25]', '(25, 30]', '(30, 35]', '(35, 40]', '(40, 45]'])
|
45 |
+
plt.title('Distribusi Range Age', fontsize=18, fontweight='bold')
|
46 |
+
plt.xlabel("Range Age", fontsize= 12)
|
47 |
+
plt.ylabel("# of Employee", fontsize= 12)
|
48 |
+
ax.tick_params(axis="x", labelsize= 9.5)
|
49 |
+
plt.legend(fontsize=10,title='Klasifikasi LeaveOrNot', loc='upper right', labels=['Not Leave', 'Leave'])
|
50 |
+
for p in ax.patches:
|
51 |
+
ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
52 |
+
p.get_height()+25), ha='center', va='center',fontsize = 11)
|
53 |
+
plt.ylim(0,1700)
|
54 |
+
col1.pyplot(fig)
|
55 |
+
|
56 |
+
#Visualisasi % Leave or Not dari setiap kelas
|
57 |
+
fig = plt.figure(figsize=(12,6))
|
58 |
+
ax = sns.barplot(x = 'AgeBin', y = 'LeaveOrNot', data = df_eda, palette = 'winter', order = ['(20, 25]', '(25, 30]', '(30, 35]', '(35, 40]', '(40, 45]'], errorbar=None)
|
59 |
+
plt.xlabel("Range Age", fontsize= 12)
|
60 |
+
plt.ylabel("% Leave", fontsize= 12)
|
61 |
+
plt.title('% Leave berdasarkan Age', fontsize=18, fontweight='bold')
|
62 |
+
plt.ylim(0,0.5)
|
63 |
+
for p in ax.patches:
|
64 |
+
ax.annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
65 |
+
p.get_height()+0.012), ha='center', va='center',fontsize = 11)
|
66 |
+
col2.pyplot(fig)
|
67 |
+
|
68 |
+
|
69 |
+
# Membuat Sub Header ExperienceInCurrentDomain
|
70 |
+
st.subheader('**EDA Feature ExperienceInCurrentDomain**')
|
71 |
+
st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
|
72 |
+
st.markdown('- Pengalaman karyawan pada domain-nya terpusat pada 2 tahun (1.087 karyawan/23.4%)')
|
73 |
+
st.markdown('- Karyawan yang paling banyak *resign* adalah karyawan dengan *range* pengalaman 2 tahun (399 karyawan). Kemungkinan banyak pada *range* ini karena, karyawan paling banyak pada *range* pengalaman tersebut ')
|
74 |
+
st.markdown('- Akan tetapi jika dilihat dari persentase *resign* pada setiap kelas, maka persentase *resign* pada setiap kelas pengalaman tidak jauh berbeda (sekitar 30%)')
|
75 |
+
|
76 |
+
# Membuat visualisasi Distribusi ExperienceInCurrentDomain
|
77 |
+
fig, ax =plt.subplots(1,2,figsize=(8,4))
|
78 |
+
sns.countplot(x='ExperienceInCurrentDomain', data=df_eda, palette="winter", ax=ax[0])
|
79 |
+
ax[0].set_xlabel("Experience In Current Domain", fontsize= 12)
|
80 |
+
ax[0].set_ylabel("# of Employee", fontsize= 12)
|
81 |
+
fig.suptitle('Distribusi Experience In Current Domain', fontsize=18, fontweight='bold')
|
82 |
+
ax[0].set_ylim(0,1300)
|
83 |
+
for p in ax[0].patches:
|
84 |
+
ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
85 |
+
p.get_height()+25), ha='center', va='center',fontsize = 8)
|
86 |
+
df_eda['ExperienceInCurrentDomain'].value_counts().plot(kind='pie', autopct='%1.1f%%', textprops = {"fontsize":8})
|
87 |
+
ax[1].set_ylabel("% of Employee", fontsize= 12)
|
88 |
+
st.pyplot(fig)
|
89 |
+
|
90 |
+
# Membuat Visualisasi distribusi ExperienceInCurrentDomain berdasarkan LeaveOrNot
|
91 |
+
col1, col2 = st.columns(2)
|
92 |
+
fig = plt.figure(figsize=(12,6))
|
93 |
+
ax = sns.countplot(data = df_eda, x = 'ExperienceInCurrentDomain', hue="LeaveOrNot", palette = 'winter')
|
94 |
+
plt.title('Distribusi Experience In Current Domain', fontsize=18, fontweight='bold')
|
95 |
+
plt.xlabel("Experience In Current Domain", fontsize= 12)
|
96 |
+
plt.ylabel("# of Employee", fontsize= 12)
|
97 |
+
ax.tick_params(axis="x", labelsize= 9.5)
|
98 |
+
plt.legend(fontsize=10,title='Klasifikasi LeaveOrNot', loc='upper right', labels=['Not Leave', 'Leave'])
|
99 |
+
for p in ax.patches:
|
100 |
+
ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
101 |
+
p.get_height()+15), ha='center', va='center',fontsize = 11)
|
102 |
+
plt.ylim(0,800)
|
103 |
+
col1.pyplot(fig)
|
104 |
+
|
105 |
+
#Visualisasi % Leave or Not dari setiap kelas
|
106 |
+
fig = plt.figure(figsize=(12,6))
|
107 |
+
ax = sns.barplot(x = 'ExperienceInCurrentDomain', y = 'LeaveOrNot', data = df_eda, palette = 'winter', errorbar=None)
|
108 |
+
plt.xlabel("Experience In Current Domain", fontsize= 12)
|
109 |
+
plt.ylabel("% Leave", fontsize= 12)
|
110 |
+
plt.title('% Leave berdasarkan Experience In Current Domain', fontsize=18, fontweight='bold')
|
111 |
+
plt.ylim(0,0.5)
|
112 |
+
for p in ax.patches:
|
113 |
+
ax.annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
114 |
+
p.get_height()+0.012), ha='center', va='center',fontsize = 11)
|
115 |
+
col2.pyplot(fig)
|
116 |
+
|
117 |
+
|
118 |
+
# Membuat Sub Header JoiningYear
|
119 |
+
st.subheader('**EDA Feature JoiningYear**')
|
120 |
+
st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
|
121 |
+
st.markdown('- Karyawan paling banyak bergabung pada tahun 2017 (1.108 karyawan/23.8%)')
|
122 |
+
st.markdown('- Karyawan yang paling banyak *resign* adalah karyawan yang bergabung pada tahun 2018 (362 karyawan)')
|
123 |
+
st.markdown('- Jika dilihat dari persentase *resign* pada setiap kelas, maka persentase *resign* untuk karyawan yang bergabung pada tahun 2018 lebih besar dari tahun lain-nya (99%)')
|
124 |
+
|
125 |
+
# Membuat visualisasi Distribusi JoiningYear
|
126 |
+
fig, ax =plt.subplots(1,2,figsize=(8,4))
|
127 |
+
sns.countplot(x='JoiningYear', data=df_eda, palette="winter", ax=ax[0])
|
128 |
+
ax[0].set_xlabel("JoiningYear", fontsize= 12)
|
129 |
+
ax[0].set_ylabel("# of Employee", fontsize= 12)
|
130 |
+
fig.suptitle('Distribusi JoiningYear', fontsize=18, fontweight='bold')
|
131 |
+
ax[0].set_ylim(0,1200)
|
132 |
+
for p in ax[0].patches:
|
133 |
+
ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
134 |
+
p.get_height()+22), ha='center', va='center',fontsize = 8)
|
135 |
+
|
136 |
+
df_eda['JoiningYear'].value_counts().plot(kind='pie',autopct='%1.1f%%', textprops = {"fontsize":8})
|
137 |
+
ax[1].set_ylabel("% of Employee", fontsize= 12)
|
138 |
+
st.pyplot(fig)
|
139 |
+
|
140 |
+
#Visualisasi distribusi JoiningYear berdasarkan klasifikasi LeaveOrNot
|
141 |
+
col1, col2 = st.columns(2)
|
142 |
+
fig = plt.figure(figsize=(12,6))
|
143 |
+
ax = sns.countplot(data = df_eda, x = 'JoiningYear', hue="LeaveOrNot", palette = 'winter')
|
144 |
+
plt.xlabel("JoiningYear", fontsize= 12)
|
145 |
+
plt.ylabel("# of Employee", fontsize= 12)
|
146 |
+
plt.title('JoiningYear vs LeaveOrNot', fontsize=18, fontweight='bold')
|
147 |
+
plt.legend(fontsize=10,title='LeaveOrNot', loc='upper right', labels=['Not Leave', 'Leave'])
|
148 |
+
for p in ax.patches:
|
149 |
+
ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
150 |
+
p.get_height()+15), ha='center', va='center',fontsize = 11)
|
151 |
+
plt.ylim(0,900)
|
152 |
+
col1.pyplot(fig)
|
153 |
+
|
154 |
+
#Visualisasi persentase Leave berdasarkan JoiningYear
|
155 |
+
fig = plt.figure(figsize=(12,6))
|
156 |
+
ax = sns.barplot(x = "JoiningYear", y = "LeaveOrNot", data = df_eda, palette = 'winter', errorbar= None)
|
157 |
+
plt.ylabel("% Leave", fontsize= 14)
|
158 |
+
plt.xlabel("JoiningYear", fontsize= 14)
|
159 |
+
plt.ylim(0,1.2)
|
160 |
+
plt.title('% Leave vs JoiningYear', fontsize=18, fontweight='bold')
|
161 |
+
for p in ax.patches:
|
162 |
+
ax.annotate("%.2f" %(p.get_height()), (p.get_x()+0.23, p.get_height()+0.01),fontsize=13)
|
163 |
+
col2.pyplot(fig)
|
164 |
+
|
165 |
+
# Membuat Sub Header Target LeaveOrNot
|
166 |
+
st.subheader('**EDA Feature LeaveOrNot**')
|
167 |
+
st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
|
168 |
+
st.markdown('- Karyawan yang tidak *resign* lebih banyak dari pada karyawan yang *resign* dengan perbandingan 2 (65.6%) : 1 (34.4%)')
|
169 |
+
|
170 |
+
# Membuat visualisasi Distribusi LeaveOrNot
|
171 |
+
fig, ax =plt.subplots(1,2,figsize=(8,4))
|
172 |
+
sns.countplot(x='LeaveOrNot', data=df_eda, palette="winter", ax=ax[0])
|
173 |
+
ax[0].set_xlabel("LeaveOrNot", fontsize= 12)
|
174 |
+
ax[0].set_ylabel("# of Employee", fontsize= 12)
|
175 |
+
fig.suptitle('Distribusi LeaveOrNot', fontsize=18, fontweight='bold')
|
176 |
+
ax[0].set_ylim(0,3300)
|
177 |
+
plt.xlabel("Leave Or Not", fontsize= 12)
|
178 |
+
plt.ylabel("# of Employee", fontsize= 12)
|
179 |
+
ax[0].set_xticks([0,1], ['Not Leave', 'Leave'], fontsize = 11)
|
180 |
+
for p in ax[0].patches:
|
181 |
+
ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
182 |
+
p.get_height()+55), ha='center', va='center',fontsize = 10)
|
183 |
+
|
184 |
+
df_eda['LeaveOrNot'].value_counts().plot(kind='pie', labels = ['Not Leave','Leave'],autopct='%1.1f%%', textprops = {"fontsize":8})
|
185 |
+
ax[1].set_ylabel("% of Employee", fontsize= 12)
|
186 |
+
st.pyplot(fig)
|
187 |
+
|
188 |
+
# Membuat Sub Header Education
|
189 |
+
st.subheader('**EDA Feature Education**')
|
190 |
+
st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
|
191 |
+
st.markdown('- *Education* karyawan terbanyak adalah pada level *bachelors* (3.601 karyawan/77.4%)')
|
192 |
+
st.markdown('- Karyawan yang paling banyak *resign* adalah karyawan dengan level edukasi *bachelors* (1.129 karyawan). Kemungkinan banyak pada level ini karena, karyawan paling banyak pada level edukasi tersebut ')
|
193 |
+
st.markdown('- Akan tetapi jika dilihat dari persentase *resign* pada setiap kelas, maka persentase *resign* pada level edukasi *masters* lebih tinggi dari pada level edukasi lainnya (49%)')
|
194 |
+
|
195 |
+
# Membuat visualisasi Distribusi Education
|
196 |
+
fig, ax =plt.subplots(1,2,figsize=(8,4))
|
197 |
+
sns.countplot(x='Education', data=df_eda, palette="winter", ax=ax[0])
|
198 |
+
ax[0].set_xlabel("Education", fontsize= 12)
|
199 |
+
ax[0].set_ylabel("# of Employee", fontsize= 12)
|
200 |
+
fig.suptitle('Distribusi Education', fontsize=18, fontweight='bold')
|
201 |
+
ax[0].set_ylim(0,4000)
|
202 |
+
for p in ax[0].patches:
|
203 |
+
ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
204 |
+
p.get_height()+70), ha='center', va='center',fontsize = 8)
|
205 |
+
df_eda['Education'].value_counts().plot(kind='pie',autopct='%1.1f%%', textprops = {"fontsize":8})
|
206 |
+
ax[1].set_ylabel("% of Employee", fontsize= 12)
|
207 |
+
st.pyplot(fig)
|
208 |
+
|
209 |
+
#Visualisasi distribusi Education berdasarkan klasifikasi LeaveOrNot
|
210 |
+
col1, col2 = st.columns(2)
|
211 |
+
fig = plt.figure(figsize=(12,6))
|
212 |
+
ax = sns.countplot(data = df_eda, x = 'Education', hue="LeaveOrNot", palette = 'winter')
|
213 |
+
plt.xlabel("Education", fontsize= 12)
|
214 |
+
plt.ylabel("# of Employee", fontsize= 12)
|
215 |
+
plt.title('Education vs LeaveOrNot', fontsize=18, fontweight='bold')
|
216 |
+
plt.legend(fontsize=10,title='LeaveOrNot', loc='upper right', labels=['Not Leave', 'Leave'])
|
217 |
+
for p in ax.patches:
|
218 |
+
ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
219 |
+
p.get_height()+45), ha='center', va='center',fontsize = 11)
|
220 |
+
plt.ylim(0,3000)
|
221 |
+
col1.pyplot(fig)
|
222 |
+
|
223 |
+
|
224 |
+
#Visualisasi persentase Leave berdasarkan Education
|
225 |
+
fig = plt.figure(figsize=(12,6))
|
226 |
+
ax = sns.barplot(x = "Education", y = "LeaveOrNot", data = df_eda, palette = 'winter', errorbar= None)
|
227 |
+
plt.ylabel("% Leave", fontsize= 14)
|
228 |
+
plt.xlabel("Education", fontsize= 14)
|
229 |
+
plt.ylim(0,0.7)
|
230 |
+
plt.title('% Leave vs Education', fontsize=18, fontweight='bold')
|
231 |
+
for p in ax.patches:
|
232 |
+
ax.annotate("%.2f" %(p.get_height()), (p.get_x()+0.33, p.get_height()+0.01),fontsize=13)
|
233 |
+
col2.pyplot(fig)
|
234 |
+
|
235 |
+
# Membuat Sub Header City
|
236 |
+
st.subheader('**EDA Feature City**')
|
237 |
+
st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
|
238 |
+
st.markdown('- Karyawan banyak yang bekerja pada kota Bangalore (2.228 karyawan/47.9%)')
|
239 |
+
st.markdown('- Karyawan yang paling banyak *resign* adalah karyawan yang bekerja di kota Pune (639 karyawan)')
|
240 |
+
st.markdown('- Jika dilihat dari persentase *resign* pada setiap kelas, maka persentase *resign* pada karyawan yang bekerja di kota Pune lebih tinggi dari pada karyawan yang bekerja di kota lainnya (50%)')
|
241 |
+
|
242 |
+
# Membuat visualisasi Distribusi City
|
243 |
+
fig, ax =plt.subplots(1,2,figsize=(8,4))
|
244 |
+
sns.countplot(x='City', data=df_eda, palette="winter", ax=ax[0])
|
245 |
+
ax[0].set_xlabel("City", fontsize= 12)
|
246 |
+
ax[0].set_ylabel("# of Employee", fontsize= 12)
|
247 |
+
fig.suptitle('Distribusi City', fontsize=18, fontweight='bold')
|
248 |
+
ax[0].set_ylim(0,2500)
|
249 |
+
for p in ax[0].patches:
|
250 |
+
ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
251 |
+
p.get_height()+50), ha='center', va='center',fontsize = 8)
|
252 |
+
df_eda['City'].value_counts().plot(kind='pie',autopct='%1.1f%%', textprops = {"fontsize":8})
|
253 |
+
ax[1].set_ylabel("% of Employee", fontsize= 12)
|
254 |
+
st.pyplot(fig)
|
255 |
+
|
256 |
+
#Visualisasi distribusi City berdasarkan klasifikasi LeaveOrNot
|
257 |
+
col1, col2 = st.columns(2)
|
258 |
+
fig = plt.figure(figsize=(12,6))
|
259 |
+
ax = sns.countplot(data = df_eda, x = 'City', hue="LeaveOrNot", palette = 'winter')
|
260 |
+
plt.xlabel("City", fontsize= 12)
|
261 |
+
plt.ylabel("# of Employee", fontsize= 12)
|
262 |
+
plt.title('City vs LeaveOrNot', fontsize=18, fontweight='bold')
|
263 |
+
plt.legend(fontsize=10,title='LeaveOrNot', loc='upper right', labels=['Not Leave', 'Leave'])
|
264 |
+
for p in ax.patches:
|
265 |
+
ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
266 |
+
p.get_height()+35), ha='center', va='center',fontsize = 11)
|
267 |
+
plt.ylim(0,2000)
|
268 |
+
col1.pyplot(fig)
|
269 |
+
|
270 |
+
|
271 |
+
#Visualisasi persentase Leave berdasarkan City
|
272 |
+
fig = plt.figure(figsize=(12,6))
|
273 |
+
ax = sns.barplot(x = "City", y = "LeaveOrNot", data = df_eda, palette = 'winter', errorbar= None)
|
274 |
+
plt.ylabel("% Leave", fontsize= 14)
|
275 |
+
plt.xlabel("City", fontsize= 14)
|
276 |
+
plt.ylim(0,0.7)
|
277 |
+
plt.title('% Leave vs City', fontsize=18, fontweight='bold')
|
278 |
+
for p in ax.patches:
|
279 |
+
ax.annotate("%.2f" %(p.get_height()), (p.get_x()+0.33, p.get_height()+0.01),fontsize=13)
|
280 |
+
col2.pyplot(fig)
|
281 |
+
|
282 |
+
# Membuat Sub Header Gender
|
283 |
+
st.subheader('**EDA Feature Gender**')
|
284 |
+
st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
|
285 |
+
st.markdown('- Karyawan yang bekerja paling banyak memiliki *gender* pria (2.778 karyawan/59.7%)')
|
286 |
+
st.markdown('- Karyawan yang paling banyak *resign* adalah karyawan dengan *gender* wanita (884 karyawan)')
|
287 |
+
st.markdown('- Jika dilihat dari persentase *resign* pada setiap kelas, maka persentase *resign* pada *gender* wanita lebih banyak dari pada *gender* pria (47%)')
|
288 |
+
|
289 |
+
# Membuat visualisasi Distribusi Gender
|
290 |
+
fig, ax =plt.subplots(1,2,figsize=(8,4))
|
291 |
+
sns.countplot(x='Gender', data=df_eda, palette="winter", ax=ax[0])
|
292 |
+
ax[0].set_xlabel("Gender", fontsize= 12)
|
293 |
+
ax[0].set_ylabel("# of Employee", fontsize= 12)
|
294 |
+
fig.suptitle('Distribusi Gender', fontsize=18, fontweight='bold')
|
295 |
+
ax[0].set_ylim(0,3000)
|
296 |
+
for p in ax[0].patches:
|
297 |
+
ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
298 |
+
p.get_height()+55), ha='center', va='center',fontsize = 8)
|
299 |
+
|
300 |
+
df_eda['Gender'].value_counts().plot(kind='pie',autopct='%1.1f%%', textprops = {"fontsize":8})
|
301 |
+
ax[1].set_ylabel("% of Employee", fontsize= 12)
|
302 |
+
st.pyplot(fig)
|
303 |
+
|
304 |
+
#Visualisasi distribusi Gender berdasarkan klasifikasi LeaveOrNot
|
305 |
+
col1, col2 = st.columns(2)
|
306 |
+
fig = plt.figure(figsize=(12,6))
|
307 |
+
ax = sns.countplot(data = df_eda, x = 'Gender', hue="LeaveOrNot", palette = 'winter')
|
308 |
+
plt.xlabel("Gender", fontsize= 12)
|
309 |
+
plt.ylabel("# of Employee", fontsize= 12)
|
310 |
+
plt.title('Gender vs LeaveOrNot', fontsize=18, fontweight='bold')
|
311 |
+
plt.legend(fontsize=10,title='LeaveOrNot', loc='upper right', labels=['Not Leave', 'Leave'])
|
312 |
+
for p in ax.patches:
|
313 |
+
ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
314 |
+
p.get_height()+45), ha='center', va='center',fontsize = 11)
|
315 |
+
plt.ylim(0,2300)
|
316 |
+
col1.pyplot(fig)
|
317 |
+
|
318 |
+
#Visualisasi persentase Leave berdasarkan Gender
|
319 |
+
fig = plt.figure(figsize=(12,6))
|
320 |
+
ax = sns.barplot(x = "Gender", y = "LeaveOrNot", data = df_eda, palette = 'winter', errorbar= None)
|
321 |
+
plt.ylabel("% Leave", fontsize= 14)
|
322 |
+
plt.xlabel("Gender", fontsize= 14)
|
323 |
+
plt.ylim(0,0.6)
|
324 |
+
plt.title('% Leave vs Gender', fontsize=18, fontweight='bold')
|
325 |
+
for p in ax.patches:
|
326 |
+
ax.annotate("%.2f" %(p.get_height()), (p.get_x()+0.33, p.get_height()+0.01),fontsize=13)
|
327 |
+
col2.pyplot(fig)
|
328 |
+
|
329 |
+
# Membuat Sub Header EverBenched
|
330 |
+
st.subheader('**EDA Feature EverBenched**')
|
331 |
+
st.write('Dari visualisasi dibawha dapat disimpulkan bahwa :')
|
332 |
+
st.markdown('- Karyawan yang tidak pernah memegang *project* > 1 bulan lebih banyak dari yang pernah memegang *project* (4.175 karyawan/89.7%)')
|
333 |
+
st.markdown('- Karyawan yang paling banyak *resign* adalah karyawan yang tidak pernah memegang *project* > 1 bulan (1.383 karyawan). Kemungkinan banyak pada kelas ini karena, karyawan paling banyak pada kelas tersebut')
|
334 |
+
st.markdown('- Akan tetapi jika dilihat dari persentase *resign* pada setiap kelas, maka persentase *resign* pada karyawan yang pernah memegang *project* > 1 bulan lebih tinggi dari pada yang tidak pernah memegang *project* > 1 bulan (45%)')
|
335 |
+
|
336 |
+
# Membuat visualisasi Distribusi EverBenched
|
337 |
+
fig, ax =plt.subplots(1,2,figsize=(8,4))
|
338 |
+
sns.countplot(x='EverBenched', data=df_eda, palette="winter", ax=ax[0])
|
339 |
+
ax[0].set_xlabel("EverBenched", fontsize= 12)
|
340 |
+
ax[0].set_ylabel("# of Employee", fontsize= 12)
|
341 |
+
fig.suptitle('Distribusi EverBenched', fontsize=18, fontweight='bold')
|
342 |
+
ax[0].set_ylim(0,4600)
|
343 |
+
for p in ax[0].patches:
|
344 |
+
ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
345 |
+
p.get_height()+70), ha='center', va='center',fontsize = 8)
|
346 |
+
|
347 |
+
df_eda['EverBenched'].value_counts().plot(kind='pie',autopct='%1.1f%%', textprops = {"fontsize":12})
|
348 |
+
ax[1].set_ylabel("% of Employee", fontsize= 12)
|
349 |
+
st.pyplot(fig)
|
350 |
+
|
351 |
+
#Visualisasi distribusi EverBenched berdasarkan klasifikasi LeaveOrNot
|
352 |
+
col1, col2 = st.columns(2)
|
353 |
+
fig = plt.figure(figsize=(12,6))
|
354 |
+
ax = sns.countplot(data = df_eda, x = 'EverBenched', hue="LeaveOrNot", palette = 'winter')
|
355 |
+
plt.xlabel("EverBenched", fontsize= 12)
|
356 |
+
plt.ylabel("# of Employee", fontsize= 12)
|
357 |
+
plt.title('EverBenched vs LeaveOrNot', fontsize=18, fontweight='bold')
|
358 |
+
plt.legend(fontsize=10,title='LeaveOrNot', loc='upper right', labels=['Not Leave', 'Leave'])
|
359 |
+
for p in ax.patches:
|
360 |
+
ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
361 |
+
p.get_height()+45), ha='center', va='center',fontsize = 11)
|
362 |
+
plt.ylim(0,3100)
|
363 |
+
col1.pyplot(fig)
|
364 |
+
|
365 |
+
#Visualisasi persentase Leave berdasarkan EverBenched
|
366 |
+
fig = plt.figure(figsize=(12,6))
|
367 |
+
ax = sns.barplot(x = "EverBenched", y = "LeaveOrNot", data = df_eda, palette = 'winter', errorbar= None)
|
368 |
+
plt.ylabel("% Leave", fontsize= 14)
|
369 |
+
plt.xlabel("EverBenched", fontsize= 14)
|
370 |
+
plt.ylim(0,0.7)
|
371 |
+
plt.title('% Leave vs EverBenched', fontsize=18, fontweight='bold')
|
372 |
+
for p in ax.patches:
|
373 |
+
ax.annotate("%.2f" %(p.get_height()), (p.get_x()+0.33, p.get_height()+0.01),fontsize=13)
|
374 |
+
col2.pyplot(fig)
|
375 |
+
|
376 |
+
# Membuat Sub Header PaymentTier
|
377 |
+
st.subheader('**EDA Feature PaymentTier**')
|
378 |
+
st.write('Dari visualisasi dibawha dapat disimpulkan bahwa :')
|
379 |
+
st.markdown('- Karyawan banyak yang memiliki gaji dengan *tier* 3 (3.492 karyawan/75%)')
|
380 |
+
st.markdown('- Karyawan yang paling banyak *resign* adalah karyawan dengan gaji *tier* 3 (961 karyawan). Kemungkinan banyak pada kelas ini karena, karyawan paling banyak pada *tier* gaji tersebut')
|
381 |
+
st.markdown('- Akan tetapi jika dilihat dari persentase *resign* pada setiap kelas, maka persentase *resign* pada kelas gaji *tier* 2 lebih tinggi dari pada *tier* lainnya (60%)')
|
382 |
+
|
383 |
+
# Membuat visualisasi Distribusi PaymentTier
|
384 |
+
fig, ax =plt.subplots(1,2,figsize=(8,4))
|
385 |
+
sns.countplot(x='PaymentTier', data=df_eda, palette="winter", ax=ax[0])
|
386 |
+
ax[0].set_xlabel("PaymentTier", fontsize= 12)
|
387 |
+
ax[0].set_ylabel("# of Employee", fontsize= 12)
|
388 |
+
fig.suptitle('Distribusi PaymentTier', fontsize=18, fontweight='bold')
|
389 |
+
ax[0].set_ylim(0,4000)
|
390 |
+
for p in ax[0].patches:
|
391 |
+
ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
392 |
+
p.get_height()+55), ha='center', va='center',fontsize = 8)
|
393 |
+
|
394 |
+
df_eda['PaymentTier'].value_counts().plot(kind='pie',autopct='%1.1f%%', textprops = {"fontsize":12})
|
395 |
+
ax[1].set_ylabel("% of Employee", fontsize= 12)
|
396 |
+
st.pyplot(fig)
|
397 |
+
|
398 |
+
#Visualisasi distribusi PaymentTier berdasarkan klasifikasi LeaveOrNot
|
399 |
+
col1, col2 = st.columns(2)
|
400 |
+
fig = plt.figure(figsize=(12,6))
|
401 |
+
ax = sns.countplot(data = df_eda, x = 'PaymentTier', hue="LeaveOrNot", palette = 'winter')
|
402 |
+
plt.xlabel("PaymentTier", fontsize= 12)
|
403 |
+
plt.ylabel("# of Employee", fontsize= 12)
|
404 |
+
plt.title('PaymentTier vs LeaveOrNot', fontsize=18, fontweight='bold')
|
405 |
+
plt.legend(fontsize=10,title='LeaveOrNot', loc='upper right', labels=['Not Leave', 'Leave'])
|
406 |
+
for p in ax.patches:
|
407 |
+
ax.annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
408 |
+
p.get_height()+45), ha='center', va='center',fontsize = 11)
|
409 |
+
plt.ylim(0,3000)
|
410 |
+
col1.pyplot(fig)
|
411 |
+
|
412 |
+
#Visualisasi persentase Leave berdasarkan PaymentTier
|
413 |
+
plt.figure(figsize=(12,6))
|
414 |
+
ax = sns.barplot(x = "PaymentTier", y = "LeaveOrNot", data = df_eda, palette = 'winter', errorbar= None)
|
415 |
+
plt.ylabel("% Leave", fontsize= 14)
|
416 |
+
plt.xlabel("PaymentTier", fontsize= 14)
|
417 |
+
plt.ylim(0,0.7)
|
418 |
+
plt.title('% Leave vs PaymentTier', fontsize=18, fontweight='bold')
|
419 |
+
for p in ax.patches:
|
420 |
+
ax.annotate("%.2f" %(p.get_height()), (p.get_x()+0.33, p.get_height()+0.01),fontsize=13)
|
421 |
+
col2.pyplot(fig)
|
422 |
+
|
423 |
+
if __name__ == '__main__':
|
424 |
+
run()
|
425 |
+
|
employee_eda.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pipeline_xgb_opt
ADDED
Binary file (826 kB). View file
|
|
prediction.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import pickle
|
5 |
+
|
6 |
+
|
7 |
+
|
8 |
+
|
9 |
+
|
10 |
+
# Load All Files
|
11 |
+
with open('pipeline_xgb_opt', 'rb') as file_1:
|
12 |
+
pipeline_xgb_opt = pickle.load(file_1)
|
13 |
+
|
14 |
+
def run() :
|
15 |
+
# Membuat Title
|
16 |
+
st.markdown("<h1 style='text-align: center; color: white;'>Resign Prediction</h1>", unsafe_allow_html=True)
|
17 |
+
|
18 |
+
# Menambahkan Deskripsi Form
|
19 |
+
st.write('Page ini berisi model untuk memprediksi potensi resign karyawan dalam 2 tahun mendatang')
|
20 |
+
st.write('Mohon persiapkan data terlebih dahulu sebelum melakukan prediksi')
|
21 |
+
|
22 |
+
#Membuat Form
|
23 |
+
with st.form(key= 'form_employee'):
|
24 |
+
Education = st.radio('Education', options=['Bachelors','Masters','PHD'], horizontal=True)
|
25 |
+
JoiningYear = st.number_input('Joining Year', min_value=2012, max_value=2018, value=2015 ,step=1, help='Tahun bergabungnya karyawan')
|
26 |
+
City = st.selectbox('City',('Bangalore','Pune','New Delhi'),index=1)
|
27 |
+
PaymentTier = st.selectbox('Payment Tier',(1,2,3),index=1)
|
28 |
+
Age = st.slider('Age',22,41,25)
|
29 |
+
Gender = st.radio('Gender', options=['Male','Female'], horizontal=False)
|
30 |
+
EverBenched = st.selectbox('Ever Benched',('Yes','No'),index=1)
|
31 |
+
ExperienceInCurrentDomain= st.slider('Experience',0,7,2)
|
32 |
+
submitted = st.form_submit_button('Predict')
|
33 |
+
|
34 |
+
#Membuat Data Inference
|
35 |
+
data_inf = {
|
36 |
+
'Education' : Education,
|
37 |
+
'JoiningYear' : JoiningYear,
|
38 |
+
'City' : City,
|
39 |
+
'PaymentTier' : PaymentTier,
|
40 |
+
'Age' : Age,
|
41 |
+
'Gender' : Gender,
|
42 |
+
'EverBenched' : EverBenched,
|
43 |
+
'ExperienceInCurrentDomain' : ExperienceInCurrentDomain
|
44 |
+
}
|
45 |
+
|
46 |
+
#Membuat Dataframe
|
47 |
+
data_inf = pd.DataFrame([data_inf])
|
48 |
+
data_inf
|
49 |
+
|
50 |
+
#Prediksi Kemungkinan Resign
|
51 |
+
|
52 |
+
if submitted :
|
53 |
+
# Predict using XGBoost Parameter Tuning
|
54 |
+
y_pred_inf = pipeline_xgb_opt.predict(data_inf)
|
55 |
+
|
56 |
+
if y_pred_inf == 1:
|
57 |
+
prediction = 'Resign'
|
58 |
+
else:
|
59 |
+
prediction = 'Not Resign'
|
60 |
+
|
61 |
+
st.write('# Resign Prediction : ', prediction)
|
62 |
+
|
63 |
+
if __name__ == '__main__':
|
64 |
+
run()
|
65 |
+
|
66 |
+
|
67 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pandas
|
3 |
+
seaborn
|
4 |
+
matplotlib
|
5 |
+
scikit-learn == 1.1.3
|
6 |
+
numpy
|