Evan Derin Ihsanudin
commited on
Commit
•
0a18345
1
Parent(s):
8c27fc8
revisi eda dan model
Browse files- churn_model.h5 +1 -1
- eda.py +425 -3
- prediction.py +2 -2
churn_model.h5
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 262152
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14f0cfe911eb0d8f947e5afb09f2fa35b4e9f116435b3cd53319f3f36dd7cc30
|
3 |
size 262152
|
eda.py
CHANGED
@@ -35,7 +35,7 @@ def run() :
|
|
35 |
ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
36 |
p.get_height()+405), ha='center', va='center',fontsize = 11)
|
37 |
|
38 |
-
df_eda['churn_risk_score'].value_counts().plot(kind='pie', labels = ['
|
39 |
ax[1].set_ylabel("% of Customer", fontsize= 12)
|
40 |
st.pyplot(fig)
|
41 |
|
@@ -137,8 +137,8 @@ def run() :
|
|
137 |
# Membuat Sub Header
|
138 |
st.subheader('**EDA Feature Gender**')
|
139 |
st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
|
140 |
-
st.markdown('- *Customer* paling banyak adalah *customer* wanita (50.1%). Akan tetapi tidak berbeda signifikan, hanya berbeda 0.
|
141 |
-
st.markdown('- *Customer* yang paling banyak *churn* adalah *customer* wanita
|
142 |
st.markdown('- Akan tetapi jika dilihat dari persentase *churn* pada setiap kelas *gender*, maka tidak ada perbedaan signifikan')
|
143 |
|
144 |
#Visualisasi distribusi Gender
|
@@ -179,5 +179,427 @@ def run() :
|
|
179 |
p.get_height()+0.02), ha='center', va='center',fontsize = 11)
|
180 |
st.pyplot(fig)
|
181 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
if __name__ == '__main__':
|
183 |
run()
|
|
|
35 |
ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
36 |
p.get_height()+405), ha='center', va='center',fontsize = 11)
|
37 |
|
38 |
+
df_eda['churn_risk_score'].value_counts().plot(kind='pie', labels = ['Churn', 'Not Churn'],autopct='%1.1f%%', textprops = {"fontsize":12})
|
39 |
ax[1].set_ylabel("% of Customer", fontsize= 12)
|
40 |
st.pyplot(fig)
|
41 |
|
|
|
137 |
# Membuat Sub Header
|
138 |
st.subheader('**EDA Feature Gender**')
|
139 |
st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
|
140 |
+
st.markdown('- *Customer* paling banyak adalah *customer* wanita (50.1%). Akan tetapi tidak berbeda signifikan, hanya berbeda 0.2% dari *customer* pria')
|
141 |
+
st.markdown('- *Customer* yang paling banyak *churn* adalah *customer* wanita')
|
142 |
st.markdown('- Akan tetapi jika dilihat dari persentase *churn* pada setiap kelas *gender*, maka tidak ada perbedaan signifikan')
|
143 |
|
144 |
#Visualisasi distribusi Gender
|
|
|
179 |
p.get_height()+0.02), ha='center', va='center',fontsize = 11)
|
180 |
st.pyplot(fig)
|
181 |
|
182 |
+
# Membuat Sub Header
|
183 |
+
st.subheader('**EDA Feature Region Category**')
|
184 |
+
st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
|
185 |
+
st.markdown('- *Customer* paling banyak adalah *customer* yang berasal dari kota (44.8%)')
|
186 |
+
st.markdown('- *Customer* yang paling banyak *churn* adalah *customer* yang tinggal di kota')
|
187 |
+
st.markdown('- Akan tetapi jika dilihat dari persentase *churn* pada setiap kelas *region*, maka tidak ada perbedaan signifikan')
|
188 |
+
|
189 |
+
#Visualisasi distribusi region_category
|
190 |
+
fig, ax =plt.subplots(1,2,figsize=(15,6))
|
191 |
+
sns.countplot(x='region_category', data=df_eda, palette='winter', ax=ax[0])
|
192 |
+
ax[0].set_xlabel("region_category", fontsize= 12)
|
193 |
+
ax[0].set_ylabel("# of Customer", fontsize= 12)
|
194 |
+
fig.suptitle('Region Category Distribution', fontsize=18, fontweight='bold')
|
195 |
+
ax[0].set_ylim(0,16000)
|
196 |
+
for p in ax[0].patches:
|
197 |
+
ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
198 |
+
p.get_height()+305), ha='center', va='center',fontsize = 10)
|
199 |
+
df_eda['region_category'].value_counts().plot(kind='pie', autopct='%1.1f%%', textprops = {"fontsize":12})
|
200 |
+
ax[1].set_ylabel("% of Customer", fontsize= 10)
|
201 |
+
st.pyplot(fig)
|
202 |
+
|
203 |
+
# Membuat Visualisasi distribusi region_category berdasarkan Churn
|
204 |
+
fig, ax =plt.subplots(1,2,figsize=(15,6))
|
205 |
+
sns.countplot(data = df_eda, x = 'region_category', hue="churn_risk_score", palette = 'winter', ax=ax[0])
|
206 |
+
ax[0].set_title('Region Category Distribution', fontsize=14, fontweight='bold',)
|
207 |
+
ax[0].set_xlabel("region_category", fontsize= 12)
|
208 |
+
ax[0].set_ylabel("# of Customer", fontsize= 12)
|
209 |
+
ax[0].tick_params(axis="x", labelsize= 9.5)
|
210 |
+
ax[0].legend(fontsize=10,title='Churn Classification', loc='upper right', labels=['Not Churn', 'Churn'])
|
211 |
+
for p in ax[0].patches:
|
212 |
+
ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
213 |
+
p.get_height()+180), ha='center', va='center',fontsize = 10)
|
214 |
+
ax[0].set_ylim(0,10000)
|
215 |
+
|
216 |
+
#Visualisasi % Churn dari setiap kelas
|
217 |
+
sns.barplot(x = 'region_category', y = 'churn_risk_score', data = df_eda, palette = 'winter',ax=ax[1])
|
218 |
+
ax[1].set_xlabel("region_category", fontsize= 12)
|
219 |
+
ax[1].set_ylabel("% Churn", fontsize= 12)
|
220 |
+
ax[1].set_title('% Churn based on Region Category', fontsize=14, fontweight='bold')
|
221 |
+
ax[1].set_ylim(0,0.7)
|
222 |
+
for p in ax[1].patches:
|
223 |
+
ax[1].annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
224 |
+
p.get_height()+0.03), ha='center', va='center',fontsize = 11)
|
225 |
+
st.pyplot(fig)
|
226 |
+
|
227 |
+
# Membuat Sub Header
|
228 |
+
st.subheader('**EDA Feature Membership Category**')
|
229 |
+
st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
|
230 |
+
st.markdown('- *Customer* paling banyak adalah *customer* dengan *basic membership* (20.9%) dan *no membership* (20.8%)')
|
231 |
+
st.markdown('- *Customer* yang paling banyak *churn* adalah *customer* dengan *no membership*')
|
232 |
+
st.markdown('- Akan tetapi jika dilihat dari persentase *churn* pada setiap kelas *membership*, maka terdapat perbedaan yang signifikan')
|
233 |
+
|
234 |
+
# Visualisasi distribusi membership_category
|
235 |
+
fig, ax =plt.subplots(1,2,figsize=(15,6))
|
236 |
+
sns.countplot(x='membership_category', data=df_eda, palette='winter', ax=ax[0])
|
237 |
+
ax[0].set_xlabel("membership_category", fontsize= 12)
|
238 |
+
ax[0].set_ylabel("# of Customer", fontsize= 12)
|
239 |
+
fig.suptitle('Membership Category Distribution', fontsize=18, fontweight='bold')
|
240 |
+
ax[0].tick_params(axis='x', rotation=90)
|
241 |
+
ax[0].set_ylim(0,8500)
|
242 |
+
for p in ax[0].patches:
|
243 |
+
ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
244 |
+
p.get_height()+185), ha='center', va='center',fontsize = 10)
|
245 |
+
|
246 |
+
df_eda['membership_category'].value_counts().plot(kind='pie', autopct='%1.1f%%', textprops = {"fontsize":12})
|
247 |
+
ax[1].set_ylabel("% of Customer", fontsize= 10)
|
248 |
+
st.pyplot(fig)
|
249 |
+
# Membuat Visualisasi distribusi membership_category berdasarkan Churn
|
250 |
+
fig, ax =plt.subplots(1,2,figsize=(15,6))
|
251 |
+
sns.countplot(data = df_eda, x = 'membership_category', hue="churn_risk_score", palette = 'winter', ax=ax[0])
|
252 |
+
ax[0].set_title('Membership Category Distribution', fontsize=14, fontweight='bold',)
|
253 |
+
ax[0].set_xlabel("membership_category", fontsize= 12)
|
254 |
+
ax[0].set_ylabel("# of Customer", fontsize= 12)
|
255 |
+
ax[0].tick_params(axis="x", labelsize= 9.5)
|
256 |
+
ax[0].legend(fontsize=10,title='Churn Classification', loc='upper right', labels=['Not Churn', 'Churn'])
|
257 |
+
for p in ax[0].patches:
|
258 |
+
ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
259 |
+
p.get_height()+180), ha='center', va='center',fontsize = 10)
|
260 |
+
ax[0].tick_params(axis='x', rotation=90)
|
261 |
+
ax[0].set_ylim(0,8500)
|
262 |
+
|
263 |
+
#Visualisasi % Churn dari setiap kelas
|
264 |
+
sns.barplot(x = 'membership_category', y = 'churn_risk_score', data = df_eda, palette = 'winter',ax=ax[1])
|
265 |
+
ax[1].set_xlabel("membership_category", fontsize= 12)
|
266 |
+
ax[1].set_ylabel("% Churn", fontsize= 12)
|
267 |
+
ax[1].set_title('% Churn based on Membership Category', fontsize=14, fontweight='bold')
|
268 |
+
ax[1].set_ylim(0,1.2)
|
269 |
+
ax[1].tick_params(axis='x', rotation=90)
|
270 |
+
for p in ax[1].patches:
|
271 |
+
ax[1].annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
272 |
+
p.get_height()+0.03), ha='center', va='center',fontsize = 11)
|
273 |
+
|
274 |
+
st.pyplot(fig)
|
275 |
+
|
276 |
+
st.subheader('**EDA Feature Joined Through Referral**')
|
277 |
+
st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
|
278 |
+
st.markdown('- *Customer* paling banyak adalah *customer* yang tidak menggunakan *referral* (50.2%). Akan tetapi tidak berbeda signifikan, hanya berbeda 0.4% dari *customer* yang menggunakan *referral*')
|
279 |
+
st.markdown('- *Customer* yang paling banyak *churn* adalah *customer* dengan *referral*')
|
280 |
+
st.markdown('- Akan tetapi jika dilihat dari persentase *churn* pada setiap kelas *joined through referral*, maka tidak terdapat perbedaan yang signifikan')
|
281 |
+
|
282 |
+
# Visualisasi distribusi joined_through_referral
|
283 |
+
fig, ax =plt.subplots(1,2,figsize=(15,6))
|
284 |
+
|
285 |
+
sns.countplot(x='joined_through_referral', data=df_eda, palette='winter', ax=ax[0])
|
286 |
+
ax[0].set_xlabel("joined_through_referral", fontsize= 12)
|
287 |
+
ax[0].set_ylabel("# of Customer", fontsize= 12)
|
288 |
+
fig.suptitle('Joined Through Referral Distribution', fontsize=18, fontweight='bold')
|
289 |
+
ax[0].set_ylim(0,17500)
|
290 |
+
for p in ax[0].patches:
|
291 |
+
ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
292 |
+
p.get_height()+235), ha='center', va='center',fontsize = 10)
|
293 |
+
|
294 |
+
df_eda['joined_through_referral'].value_counts().plot(kind='pie', autopct='%1.1f%%', textprops = {"fontsize":12})
|
295 |
+
ax[1].set_ylabel("% of Customer", fontsize= 10)
|
296 |
+
st.pyplot(fig)
|
297 |
+
|
298 |
+
# Membuat Visualisasi distribusi joined_through_referral berdasarkan Churn
|
299 |
+
fig, ax =plt.subplots(1,2,figsize=(15,6))
|
300 |
+
sns.countplot(data = df_eda, x = 'joined_through_referral', hue="churn_risk_score", palette = 'winter', ax=ax[0])
|
301 |
+
ax[0].set_title('Joined Through Referral Distribution', fontsize=14, fontweight='bold',)
|
302 |
+
ax[0].set_xlabel("joined_through_referral", fontsize= 12)
|
303 |
+
ax[0].set_ylabel("# of Customer", fontsize= 12)
|
304 |
+
ax[0].tick_params(axis="x", labelsize= 9.5)
|
305 |
+
ax[0].legend(fontsize=10,title='Churn Classification', loc='upper right', labels=['Not Churn', 'Churn'])
|
306 |
+
for p in ax[0].patches:
|
307 |
+
ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
308 |
+
p.get_height()+180), ha='center', va='center',fontsize = 10)
|
309 |
+
ax[0].set_ylim(0,12000)
|
310 |
+
|
311 |
+
#Visualisasi % Churn dari setiap kelas
|
312 |
+
sns.barplot(x = 'joined_through_referral', y = 'churn_risk_score', data = df_eda, palette = 'winter',ax=ax[1])
|
313 |
+
ax[1].set_xlabel("joined_through_referral", fontsize= 12)
|
314 |
+
ax[1].set_ylabel("% Churn", fontsize= 12)
|
315 |
+
ax[1].set_title('% Churn based on Joined Through Referral', fontsize=14, fontweight='bold')
|
316 |
+
ax[1].set_ylim(0,0.8)
|
317 |
+
for p in ax[1].patches:
|
318 |
+
ax[1].annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
319 |
+
p.get_height()+0.03), ha='center', va='center',fontsize = 11)
|
320 |
+
|
321 |
+
st.pyplot(fig)
|
322 |
+
|
323 |
+
st.subheader('**EDA Feature Preferred Offer Types**')
|
324 |
+
st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
|
325 |
+
st.markdown('- *Customer* paling banyak adalah *customer* menggunakan kupon (33.7%)')
|
326 |
+
st.markdown('- *Customer* yang paling banyak *churn* adalah *customer* yang tidak menggunakan penawaran')
|
327 |
+
st.markdown('- Akan tetapi jika dilihat dari persentase *churn* pada setiap kelas *preferred offer types*, maka tidak terdapat perbedaan yang signifikan')
|
328 |
+
|
329 |
+
#Visualisasi distribusi preferred_offer_types
|
330 |
+
fig, ax =plt.subplots(1,2,figsize=(15,6))
|
331 |
+
sns.countplot(x='preferred_offer_types', data=df_eda, palette='winter', ax=ax[0])
|
332 |
+
ax[0].set_xlabel("preferred_offer_types", fontsize= 12)
|
333 |
+
ax[0].set_ylabel("# of Customer", fontsize= 12)
|
334 |
+
fig.suptitle('Offer Types Distribution', fontsize=18, fontweight='bold')
|
335 |
+
ax[0].set_ylim(0,14000)
|
336 |
+
for p in ax[0].patches:
|
337 |
+
ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
338 |
+
p.get_height()+235), ha='center', va='center',fontsize = 10)
|
339 |
+
|
340 |
+
df_eda['preferred_offer_types'].value_counts().plot(kind='pie', autopct='%1.1f%%', textprops = {"fontsize":10})
|
341 |
+
ax[1].set_ylabel("% of Customer", fontsize= 10)
|
342 |
+
st.pyplot(fig)
|
343 |
+
|
344 |
+
# Membuat Visualisasi distribusi preferred_offer_types berdasarkan Churn
|
345 |
+
fig, ax =plt.subplots(1,2,figsize=(15,6))
|
346 |
+
|
347 |
+
sns.countplot(data = df_eda, x = 'preferred_offer_types', hue="churn_risk_score", palette = 'winter', ax=ax[0])
|
348 |
+
ax[0].set_title('Offer Types Distribution', fontsize=14, fontweight='bold',)
|
349 |
+
ax[0].set_xlabel("preferred_offer_types", fontsize= 12)
|
350 |
+
ax[0].set_ylabel("# of Customer", fontsize= 12)
|
351 |
+
ax[0].tick_params(axis="x", labelsize= 9.5)
|
352 |
+
ax[0].legend(fontsize=10,title='Churn Classification', loc='upper right', labels=['Not Churn', 'Churn'])
|
353 |
+
for p in ax[0].patches:
|
354 |
+
ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
355 |
+
p.get_height()+180), ha='center', va='center',fontsize = 10)
|
356 |
+
ax[0].set_ylim(0,9000)
|
357 |
+
|
358 |
+
#Visualisasi % Churn dari setiap kelas
|
359 |
+
sns.barplot(x = 'preferred_offer_types', y = 'churn_risk_score', data = df_eda, palette = 'winter',ax=ax[1])
|
360 |
+
ax[1].set_xlabel("preferred_offer_types", fontsize= 12)
|
361 |
+
ax[1].set_ylabel("% Churn", fontsize= 12)
|
362 |
+
ax[1].set_title('% Churn based on Offer Types', fontsize=14, fontweight='bold')
|
363 |
+
ax[1].set_ylim(0,0.8)
|
364 |
+
for p in ax[1].patches:
|
365 |
+
ax[1].annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
366 |
+
p.get_height()+0.03), ha='center', va='center',fontsize = 11)
|
367 |
+
|
368 |
+
st.pyplot(fig)
|
369 |
+
|
370 |
+
st.subheader('**EDA Feature Used Special Discount Types**')
|
371 |
+
st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
|
372 |
+
st.markdown('- *Customer* paling banyak adalah *customer* yang menggunakan diskon (55%)')
|
373 |
+
st.markdown('- *Customer* yang paling banyak *churn* adalah *customer* yang menggunakan diskon')
|
374 |
+
st.markdown('- Akan tetapi jika dilihat dari persentase *churn* pada setiap kelas *used special discount*, maka tidak terdapat perbedaan yang signifikan')
|
375 |
+
|
376 |
+
#Visualisasi distribusi used_special_discount
|
377 |
+
fig, ax =plt.subplots(1,2,figsize=(15,6))
|
378 |
+
sns.countplot(x='used_special_discount', data=df_eda, palette='winter', ax=ax[0])
|
379 |
+
ax[0].set_xlabel("used_special_discount", fontsize= 12)
|
380 |
+
ax[0].set_ylabel("# of Customer", fontsize= 12)
|
381 |
+
fig.suptitle('Used Special Discount Distribution', fontsize=18, fontweight='bold')
|
382 |
+
ax[0].set_ylim(0,23000)
|
383 |
+
for p in ax[0].patches:
|
384 |
+
ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
385 |
+
p.get_height()+335), ha='center', va='center',fontsize = 10)
|
386 |
+
|
387 |
+
df_eda['used_special_discount'].value_counts().plot(kind='pie', autopct='%1.1f%%', textprops = {"fontsize":10})
|
388 |
+
ax[1].set_ylabel("% of Customer", fontsize= 10)
|
389 |
+
st.pyplot(fig)
|
390 |
+
|
391 |
+
# Membuat Visualisasi distribusi used_special_discount berdasarkan Churn
|
392 |
+
fig, ax =plt.subplots(1,2,figsize=(15,6))
|
393 |
+
sns.countplot(data = df_eda, x = 'used_special_discount', hue="churn_risk_score", palette = 'winter', ax=ax[0])
|
394 |
+
ax[0].set_title('Used Special Discount Distribution', fontsize=14, fontweight='bold',)
|
395 |
+
ax[0].set_xlabel("used_special_discount", fontsize= 12)
|
396 |
+
ax[0].set_ylabel("# of Customer", fontsize= 12)
|
397 |
+
ax[0].tick_params(axis="x", labelsize= 9.5)
|
398 |
+
ax[0].legend(fontsize=10,title='Churn Classification', loc='upper right', labels=['Not Churn', 'Churn'])
|
399 |
+
for p in ax[0].patches:
|
400 |
+
ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
401 |
+
p.get_height()+180), ha='center', va='center',fontsize = 10)
|
402 |
+
ax[0].set_ylim(0,12000)
|
403 |
+
|
404 |
+
#Visualisasi % Churn dari setiap kelas
|
405 |
+
sns.barplot(x = 'used_special_discount', y = 'churn_risk_score', data = df_eda, palette = 'winter', ax=ax[1])
|
406 |
+
ax[1].set_xlabel("used_special_discount", fontsize= 12)
|
407 |
+
ax[1].set_ylabel("% Churn", fontsize= 12)
|
408 |
+
ax[1].set_title('% Churn based on Used Special Discount', fontsize=14, fontweight='bold')
|
409 |
+
ax[1].set_ylim(0,0.8)
|
410 |
+
for p in ax[1].patches:
|
411 |
+
ax[1].annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
412 |
+
p.get_height()+0.03), ha='center', va='center',fontsize = 11)
|
413 |
+
|
414 |
+
st.pyplot(fig)
|
415 |
+
|
416 |
+
st.subheader('**EDA Feature Offer Application Preference**')
|
417 |
+
st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
|
418 |
+
st.markdown('- *Customer* paling banyak adalah *customer* yang menyukai tawaran (55.3%)')
|
419 |
+
st.markdown('- *Customer* yang paling banyak *churn* adalah *customer* yang menyukai tawaran')
|
420 |
+
st.markdown('- Akan tetapi jika dilihat dari persentase *churn* pada setiap kelas kelas tawaran, maka tidak terdapat perbedaan yang signifikan')
|
421 |
+
|
422 |
+
#Visualisasi distribusi offer_application_preference
|
423 |
+
fig, ax =plt.subplots(1,2,figsize=(15,6))
|
424 |
+
sns.countplot(x='offer_application_preference', data=df_eda, palette='winter', ax=ax[0])
|
425 |
+
ax[0].set_xlabel("offer_application_preference", fontsize= 12)
|
426 |
+
ax[0].set_ylabel("# of Customer", fontsize= 12)
|
427 |
+
fig.suptitle('Offer Aplication Preference Distribution', fontsize=18, fontweight='bold')
|
428 |
+
ax[0].set_ylim(0,23000)
|
429 |
+
for p in ax[0].patches:
|
430 |
+
ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
431 |
+
p.get_height()+335), ha='center', va='center',fontsize = 10)
|
432 |
+
|
433 |
+
df_eda['offer_application_preference'].value_counts().plot(kind='pie', autopct='%1.1f%%', textprops = {"fontsize":10})
|
434 |
+
ax[1].set_ylabel("% of Customer", fontsize= 10)
|
435 |
+
st.pyplot(fig)
|
436 |
+
|
437 |
+
# Membuat Visualisasi distribusi offer_application_preference berdasarkan Churn
|
438 |
+
fig, ax =plt.subplots(1,2,figsize=(15,6))
|
439 |
+
sns.countplot(data = df_eda, x = 'offer_application_preference', hue="churn_risk_score", palette = 'winter', ax=ax[0])
|
440 |
+
ax[0].set_title('Offer Aplication Preference Distribution', fontsize=14, fontweight='bold',)
|
441 |
+
ax[0].set_xlabel("offer_application_preference", fontsize= 12)
|
442 |
+
ax[0].set_ylabel("# of Customer", fontsize= 12)
|
443 |
+
ax[0].tick_params(axis="x", labelsize= 9.5)
|
444 |
+
ax[0].legend(fontsize=10,title='Churn Classification', loc='upper right', labels=['Not Churn', 'Churn'])
|
445 |
+
for p in ax[0].patches:
|
446 |
+
ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
447 |
+
p.get_height()+180), ha='center', va='center',fontsize = 10)
|
448 |
+
ax[0].set_ylim(0,12000)
|
449 |
+
|
450 |
+
#Visualisasi % Churn dari setiap kelas
|
451 |
+
sns.barplot(x = 'offer_application_preference', y = 'churn_risk_score', data = df_eda, palette = 'winter',ax=ax[1])
|
452 |
+
ax[1].set_xlabel("offer_application_preference", fontsize= 12)
|
453 |
+
ax[1].set_ylabel("% Churn", fontsize= 12)
|
454 |
+
ax[1].set_title('% Churn based on Offer Aplication Preference', fontsize=14, fontweight='bold')
|
455 |
+
ax[1].set_ylim(0,0.8)
|
456 |
+
for p in ax[1].patches:
|
457 |
+
ax[1].annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
458 |
+
p.get_height()+0.03), ha='center', va='center',fontsize = 11)
|
459 |
+
st.pyplot(fig)
|
460 |
+
|
461 |
+
st.subheader('**EDA Feature Past Complaint**')
|
462 |
+
st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
|
463 |
+
st.markdown('- *Customer* paling banyak adalah *customer* yang tidak *complaint* (50.3%). Akan tetapi tidak berbeda signifikan, hanya berbeda 0.6% dari *customer* yang *complaint*')
|
464 |
+
st.markdown('- *Customer* yang paling banyak *churn* adalah *customer* yang *complaint*')
|
465 |
+
st.markdown('- Akan tetapi jika dilihat dari persentase *churn* pada setiap kelas *past complaint*, maka tidak terdapat perbedaan yang signifikan')
|
466 |
+
|
467 |
+
#Visualisasi distribusi past_complaint
|
468 |
+
fig, ax =plt.subplots(1,2,figsize=(15,6))
|
469 |
+
sns.countplot(x='past_complaint', data=df_eda, palette='winter', ax=ax[0])
|
470 |
+
ax[0].set_xlabel("past_complaint", fontsize= 12)
|
471 |
+
ax[0].set_ylabel("# of Customer", fontsize= 12)
|
472 |
+
fig.suptitle('Past Complaint Distribution', fontsize=18, fontweight='bold')
|
473 |
+
ax[0].set_ylim(0,23000)
|
474 |
+
for p in ax[0].patches:
|
475 |
+
ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
476 |
+
p.get_height()+335), ha='center', va='center',fontsize = 10)
|
477 |
+
df_eda['past_complaint'].value_counts().plot(kind='pie', autopct='%1.1f%%', textprops = {"fontsize":10})
|
478 |
+
ax[1].set_ylabel("% of Customer", fontsize= 10)
|
479 |
+
st.pyplot(fig)
|
480 |
+
|
481 |
+
# Membuat Visualisasi distribusi past_complaint berdasarkan Churn
|
482 |
+
fig, ax =plt.subplots(1,2,figsize=(15,6))
|
483 |
+
|
484 |
+
sns.countplot(data = df_eda, x = 'past_complaint', hue="churn_risk_score", palette = 'winter', ax=ax[0])
|
485 |
+
ax[0].set_title('Past Complaint Distribution', fontsize=14, fontweight='bold',)
|
486 |
+
ax[0].set_xlabel("past_complaint", fontsize= 12)
|
487 |
+
ax[0].set_ylabel("# of Customer", fontsize= 12)
|
488 |
+
ax[0].tick_params(axis="x", labelsize= 9.5)
|
489 |
+
ax[0].legend(fontsize=10,title='Churn Classification', loc='upper right', labels=['Not Churn', 'Churn'])
|
490 |
+
for p in ax[0].patches:
|
491 |
+
ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
492 |
+
p.get_height()+180), ha='center', va='center',fontsize = 10)
|
493 |
+
ax[0].set_ylim(0,14000)
|
494 |
+
|
495 |
+
#Visualisasi % Churn dari setiap kelas
|
496 |
+
sns.barplot(x = 'past_complaint', y = 'churn_risk_score', data = df_eda, palette = 'winter', ax=ax[1])
|
497 |
+
ax[1].set_xlabel("past_complaint", fontsize= 12)
|
498 |
+
ax[1].set_ylabel("% Churn", fontsize= 12)
|
499 |
+
ax[1].set_title('% Churn based on Past Complaint', fontsize=14, fontweight='bold')
|
500 |
+
ax[1].set_ylim(0,0.8)
|
501 |
+
for p in ax[1].patches:
|
502 |
+
ax[1].annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
503 |
+
p.get_height()+0.03), ha='center', va='center',fontsize = 11)
|
504 |
+
|
505 |
+
st.pyplot(fig)
|
506 |
+
|
507 |
+
st.subheader('**EDA Feature Complaint Status**')
|
508 |
+
st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
|
509 |
+
st.markdown('- *Customer* paling banyak adalah *customer* yang *complaint* nya tidak dapat diaplikasikan/direalisasikan (50.3%)')
|
510 |
+
st.markdown('- *Customer* yang paling banyak *churn* adalah *customer* yang *complaint* nya tidak dapat diaplikasikan/direalisasikan')
|
511 |
+
st.markdown('- Akan tetapi jika dilihat dari persentase *churn* pada setiap kelas *complaint status*, maka tidak terdapat perbedaan yang signifikan')
|
512 |
+
|
513 |
+
#Visualisasi distribusi complaint_status
|
514 |
+
fig, ax =plt.subplots(1,2,figsize=(15,6))
|
515 |
+
sns.countplot(x='complaint_status', data=df_eda, palette='winter', ax=ax[0])
|
516 |
+
ax[0].set_xlabel("complaint_status", fontsize= 12)
|
517 |
+
ax[0].set_ylabel("# of Customer", fontsize= 12)
|
518 |
+
fig.suptitle('Complaint Status Distribution', fontsize=18, fontweight='bold')
|
519 |
+
ax[0].set_ylim(0,23000)
|
520 |
+
ax[0].tick_params(axis='x', rotation=90)
|
521 |
+
for p in ax[0].patches:
|
522 |
+
ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
523 |
+
p.get_height()+335), ha='center', va='center',fontsize = 10)
|
524 |
+
|
525 |
+
df_eda['complaint_status'].value_counts().plot(kind='pie', autopct='%1.1f%%', textprops = {"fontsize":10})
|
526 |
+
ax[1].set_ylabel("% of Customer", fontsize= 10)
|
527 |
+
st.pyplot(fig)
|
528 |
+
|
529 |
+
# Membuat Visualisasi distribusi complaint_status berdasarkan Churn
|
530 |
+
fig, ax =plt.subplots(1,2,figsize=(15,6))
|
531 |
+
sns.countplot(data = df_eda, x = 'complaint_status', hue="churn_risk_score", palette = 'winter', ax=ax[0])
|
532 |
+
ax[0].set_title('Complaint Status Distribution', fontsize=14, fontweight='bold',)
|
533 |
+
ax[0].set_xlabel("complaint_status", fontsize= 12)
|
534 |
+
ax[0].set_ylabel("# of Customer", fontsize= 12)
|
535 |
+
ax[0].tick_params(axis="x", labelsize= 9.5)
|
536 |
+
ax[0].legend(fontsize=10,title='Churn Classification', loc='upper right', labels=['Not Churn', 'Churn'])
|
537 |
+
for p in ax[0].patches:
|
538 |
+
ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
539 |
+
p.get_height()+180), ha='center', va='center',fontsize = 10)
|
540 |
+
ax[0].tick_params(axis='x', rotation=90)
|
541 |
+
ax[0].set_ylim(0,12000)
|
542 |
+
|
543 |
+
#Visualisasi % Churn dari setiap kelas
|
544 |
+
sns.barplot(x = 'complaint_status', y = 'churn_risk_score', data = df_eda, palette = 'winter',ax=ax[1])
|
545 |
+
ax[1].set_xlabel("complaint_status", fontsize= 12)
|
546 |
+
ax[1].set_ylabel("% Churn", fontsize= 12)
|
547 |
+
ax[1].set_title('% Churn based on Complaint Status', fontsize=14, fontweight='bold')
|
548 |
+
ax[1].set_ylim(0,0.8)
|
549 |
+
ax[1].tick_params(axis='x', rotation=90)
|
550 |
+
for p in ax[1].patches:
|
551 |
+
ax[1].annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
552 |
+
p.get_height()+0.03), ha='center', va='center',fontsize = 11)
|
553 |
+
|
554 |
+
st.pyplot(fig)
|
555 |
+
|
556 |
+
st.subheader('**EDA Feature Feedback**')
|
557 |
+
st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
|
558 |
+
st.markdown('- *Customer* paling banyak adalah *customer* yang memberikan *feedback* buruk (*too many ads*, *poor product quality*) dan yang tidak memberikan *feedback*. 3 kelas tersebut memiliki persentase proporsi 17%')
|
559 |
+
st.markdown('- *Customer* yang paling banyak *churn* adalah *customer* yang memberikan *feedback poor product quality*')
|
560 |
+
st.markdown('- Akan tetapi jika dilihat dari persentase *churn* pada setiap kelas *feedback*, maka terdapat perbedaan yang signifikan')
|
561 |
+
|
562 |
+
# Visualisasi distribusi feedback
|
563 |
+
fig, ax =plt.subplots(1,2,figsize=(15,6))
|
564 |
+
sns.countplot(x='feedback', data=df_eda, palette='winter', ax=ax[0])
|
565 |
+
ax[0].set_xlabel("feedback", fontsize= 12)
|
566 |
+
ax[0].set_ylabel("# of Customer", fontsize= 12)
|
567 |
+
fig.suptitle('Feedback Distribution', fontsize=18, fontweight='bold')
|
568 |
+
ax[0].set_ylim(0,7000)
|
569 |
+
ax[0].tick_params(axis='x', rotation=90)
|
570 |
+
for p in ax[0].patches:
|
571 |
+
ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
572 |
+
p.get_height()+135), ha='center', va='center',fontsize = 10)
|
573 |
+
df_eda['feedback'].value_counts().plot(kind='pie', autopct='%1.1f%%', textprops = {"fontsize":10})
|
574 |
+
ax[1].set_ylabel("% of Customer", fontsize= 10)
|
575 |
+
st.pyplot(fig)
|
576 |
+
|
577 |
+
# Membuat Visualisasi distribusi feedback berdasarkan Churn
|
578 |
+
fig, ax =plt.subplots(1,2,figsize=(15,6))
|
579 |
+
sns.countplot(data = df_eda, x = 'feedback', hue="churn_risk_score", palette = 'winter', ax=ax[0])
|
580 |
+
ax[0].set_title('Feedback Distribution', fontsize=14, fontweight='bold',)
|
581 |
+
ax[0].set_xlabel("feedback", fontsize= 12)
|
582 |
+
ax[0].set_ylabel("# of Customer", fontsize= 12)
|
583 |
+
ax[0].tick_params(axis="x", labelsize= 9.5)
|
584 |
+
ax[0].legend(fontsize=10,title='Churn Classification', loc='upper right', labels=['Not Churn', 'Churn'])
|
585 |
+
for p in ax[0].patches:
|
586 |
+
ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
587 |
+
p.get_height()+60), ha='center', va='center',fontsize = 6)
|
588 |
+
ax[0].tick_params(axis='x', rotation=90)
|
589 |
+
ax[0].set_ylim(0,5000)
|
590 |
+
|
591 |
+
#Visualisasi % Churn dari setiap kelas
|
592 |
+
sns.barplot(x = 'feedback', y = 'churn_risk_score', data = df_eda, palette = 'winter',ax=ax[1])
|
593 |
+
ax[1].set_xlabel("feedback", fontsize= 12)
|
594 |
+
ax[1].set_ylabel("% Churn", fontsize= 12)
|
595 |
+
ax[1].set_title('% Churn based on Feedback', fontsize=14, fontweight='bold')
|
596 |
+
ax[1].set_ylim(0,0.8)
|
597 |
+
ax[1].tick_params(axis='x', rotation=90)
|
598 |
+
for p in ax[1].patches:
|
599 |
+
ax[1].annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
|
600 |
+
p.get_height()+0.03), ha='center', va='center',fontsize = 11)
|
601 |
+
st.pyplot(fig)
|
602 |
+
|
603 |
+
|
604 |
if __name__ == '__main__':
|
605 |
run()
|
prediction.py
CHANGED
@@ -7,8 +7,8 @@ from tensorflow.keras.models import load_model
|
|
7 |
|
8 |
def run() :
|
9 |
# Load Model
|
10 |
-
with open('preprocessor.pkl', 'rb') as
|
11 |
-
preprocessor = pickle.load(
|
12 |
model_churn = load_model('churn_model.h5', compile=False)
|
13 |
|
14 |
# Membuat Title
|
|
|
7 |
|
8 |
def run() :
|
9 |
# Load Model
|
10 |
+
with open('preprocessor.pkl', 'rb') as file_1:
|
11 |
+
preprocessor = pickle.load(file_1)
|
12 |
model_churn = load_model('churn_model.h5', compile=False)
|
13 |
|
14 |
# Membuat Title
|