Spaces:

evanderin
/

P2M1_FTDS-RMT17_evan_derin_ihsanudin

Sleeping

P2M1_FTDS-RMT17_evan_derin_ihsanudin / eda.py

Evan Derin Ihsanudin

P2M1_Deployment

400dd5b over 1 year ago

9.53 kB

	import streamlit as st
	import pandas as pd
	import seaborn as sns
	import matplotlib.pyplot as plt
	import plotly.express as px
	from PIL import Image



	def run() :
	# Membuat Title
	st.markdown("<h1 style='text-align: center;'>Exploratory Data Analysis</h1>", unsafe_allow_html=True)
	st.write('Berikut adalah EDA dari setiap feature')

	# Import DF
	df_eda = pd.read_csv('eda_churn.csv')

	# Membuat Sub Header
	st.subheader('EDA Feature Churn')
	st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
	st.markdown('- Customer yang churn lebih banyak dari pada customer yang tidak churn')

	# Membuat visualisasi Distribusi churn_risk_score
	fig, ax =plt.subplots(1,2,figsize=(15,6))

	sns.countplot(x='churn_risk_score', data=df_eda, palette="winter", ax=ax[0])
	ax[0].set_xlabel("churn_risk_score", fontsize= 12)
	ax[0].set_ylabel("# of Customer", fontsize= 12)
	fig.suptitle('Customer Churn Distribution', fontsize=18, fontweight='bold')
	ax[0].set_ylim(0,23000)
	plt.xlabel("churn_risk_score", fontsize= 12)
	plt.ylabel("# of Customer", fontsize= 12)
	ax[0].set_xticks([0,1], ['Not Churn', 'Churn'])
	for p in ax[0].patches:
	ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
	p.get_height()+405), ha='center', va='center',fontsize = 11)

	df_eda['churn_risk_score'].value_counts().plot(kind='pie', labels = ['Not Churn', 'Churn'],autopct='%1.1f%%', textprops = {"fontsize":12})
	ax[1].set_ylabel("% of Customer", fontsize= 12)
	st.pyplot(fig)

	# Membuat Sub Header
	st.subheader('EDA Feature Age')
	st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
	st.markdown('- Customer paling banyak adalah customer yang memiliki range umur 40-50 tahun')
	st.markdown('- Customer yang paling banyak churn adalah customer dengan range umur 50-60 tahun')
	st.markdown('- Akan tetapi jika dilihat dari persentase churn pada setiap kelas range umur, maka tidak ada perbedaan signifikan')

	#Visualisasi distribusi range age
	fig, ax =plt.subplots(1,2,figsize=(15,6))
	sns.countplot(x='AgeBin', data=df_eda, palette='winter', ax=ax[0])
	ax[0].set_xlabel("Range Customer Age", fontsize= 12)
	ax[0].set_ylabel("# of Customer", fontsize= 12)
	fig.suptitle('Range Customer Age Distribution', fontsize=18, fontweight='bold')
	ax[0].set_ylim(0,7600)
	for p in ax[0].patches:
	ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
	p.get_height()+105), ha='center', va='center',fontsize = 10)

	df_eda['AgeBin'].value_counts().plot(kind='pie', autopct='%1.1f%%', textprops = {"fontsize":12})
	ax[1].set_ylabel("% of Customer", fontsize= 10)
	st.pyplot(fig)

	# Membuat Visualisasi distribusi Age berdasarkan Churn
	fig, ax =plt.subplots(1,2,figsize=(15,6))
	sns.countplot(data = df_eda, x = 'AgeBin', hue="churn_risk_score", palette = 'winter', order = ['(10, 20]', '(20, 30]', '(30, 40]', '(40, 50]', '(50, 60]', '(60, 70]'], ax=ax[0])
	ax[0].set_title('Range Age Distribution', fontsize=14, fontweight='bold',)
	ax[0].set_xlabel("Range Age", fontsize= 12)
	ax[0].set_ylabel("# of Customer", fontsize= 12)
	ax[0].tick_params(axis="x", labelsize= 9.5)
	ax[0].legend(fontsize=10,title='Churn Classification', loc='upper right', labels=['Not Churn', 'Churn'])
	for p in ax[0].patches:
	ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
	p.get_height()+75), ha='center', va='center',fontsize = 10)
	ax[0].set_ylim(0,4700)

	#Visualisasi % Churn dari setiap kelas
	sns.barplot(x = 'AgeBin', y = 'churn_risk_score', data = df_eda, palette = 'winter', order = ['(10, 20]', '(20, 30]', '(30, 40]', '(40, 50]', '(50, 60]', '(60, 70]'],ax=ax[1])
	ax[1].set_xlabel("Range Age", fontsize= 12)
	ax[1].set_ylabel("% Churn", fontsize= 12)
	ax[1].set_title('% Churn based on Age', fontsize=14, fontweight='bold')
	ax[1].set_ylim(0,0.7)
	for p in ax[1].patches:
	ax[1].annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
	p.get_height()+0.03), ha='center', va='center',fontsize = 11)
	st.pyplot(fig)

	# Membuat Sub Header
	st.subheader('EDA Feature Time Spent')
	st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
	st.markdown('- Jika dilihat pada visualisasi diatas maka `avg_time_spent` antara customer yang churn dan customer yang tidak churn tidak berbeda secara signifikan')

	# Visualisasi avg_time_spent vs Churn
	fig =plt.figure(figsize=(15,6))
	plt.rcParams['figure.figsize'] = (10, 5)
	sns.boxenplot(y=df_eda['avg_time_spent'], x= df_eda['churn_risk_score'], palette = 'Blues')
	plt.title('Average Time Spent vs Churn', fontsize = 20)
	st.pyplot(fig)


	# Membuat Sub Header
	st.subheader('EDA Feature Transaction Value')
	st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
	st.markdown('- Customer yang tidak churn memiliki average transaction value yang lebih tinggi (terpusat di 18.000-40.000) dari pada customer yang churn (terpusat di 16.000-36.000)')

	# Visualisasi avg_transaction_value vs Churn
	fig =plt.figure(figsize=(15,6))
	plt.rcParams['figure.figsize'] = (10, 5)
	sns.boxenplot(y=df_eda['avg_transaction_value'], x= df_eda['churn_risk_score'], palette = 'Blues')
	plt.title('Average Transaction Value vs Churn', fontsize = 20)
	st.pyplot(fig)

	# Membuat Sub Header
	st.subheader('EDA Feature Avg Frequency Login Days')
	st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
	st.markdown('- Customer yang tidak churn memiliki average frequency login days yang lebih rendah (terpusat di 8-20x) dari pada customer yang churn (terpusat di 10-25x)')

	# Visualisasi avg_frequency_login_days vs Churn
	fig =plt.figure(figsize=(15,6))
	plt.rcParams['figure.figsize'] = (10, 5)
	sns.boxenplot(y=df_eda['avg_frequency_login_days'], x= df_eda['churn_risk_score'], palette = 'Blues')
	plt.title('Average Frequency Login Days vs Churn', fontsize = 20)
	st.pyplot(fig)

	# Membuat Sub Header
	st.subheader('EDA Feature Point Wallet')
	st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
	st.markdown('- Customer yang tidak churn memiliki points in wallet yang lebih tinggi (terpusat di 700-800) dari pada customer yang churn (terpusat di 600-700)')

	# Visualisasi points_in_wallet vs Churn
	fig =plt.figure(figsize=(15,6))
	plt.rcParams['figure.figsize'] = (10, 5)
	sns.boxenplot(y=df_eda['points_in_wallet'], x= df_eda['churn_risk_score'], palette = 'Blues')
	plt.title('Points in Wallet vs Churn', fontsize = 20)
	st.pyplot(fig)

	# Membuat Sub Header
	st.subheader('EDA Feature Gender')
	st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
	st.markdown('- Customer paling banyak adalah customer wanita (50.1%). Akan tetapi tidak berbeda signifikan, hanya berbeda 0.1% dari customer pria')
	st.markdown('- Customer yang paling banyak churn adalah customer wanita. Kemungkinan banyak pada kelas wanita karena customer paling banyak juga pada kelas ini')
	st.markdown('- Akan tetapi jika dilihat dari persentase churn pada setiap kelas gender, maka tidak ada perbedaan signifikan')

	#Visualisasi distribusi Gender
	fig, ax =plt.subplots(1,2,figsize=(15,6))
	sns.countplot(x='gender', data=df_eda, palette='winter', ax=ax[0])
	ax[0].set_xlabel("Gender", fontsize= 12)
	ax[0].set_ylabel("# of Customer", fontsize= 12)
	fig.suptitle('Gender Distribution', fontsize=18, fontweight='bold')
	ax[0].set_ylim(0,21000)
	for p in ax[0].patches:
	ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
	p.get_height()+305), ha='center', va='center',fontsize = 10)
	df_eda['gender'].value_counts().plot(kind='pie', autopct='%1.1f%%', textprops = {"fontsize":12})
	ax[1].set_ylabel("% of Customer", fontsize= 10)
	st.pyplot(fig)

	# Membuat Visualisasi distribusi Gender berdasarkan Churn
	fig, ax =plt.subplots(1,2,figsize=(15,6))
	sns.countplot(data = df_eda, x = 'gender', hue="churn_risk_score", palette = 'winter', ax=ax[0])
	ax[0].set_title('Gender Distribution', fontsize=14, fontweight='bold',)
	ax[0].set_xlabel("Gender", fontsize= 12)
	ax[0].set_ylabel("# of Customer", fontsize= 12)
	ax[0].tick_params(axis="x", labelsize= 9.5)
	ax[0].legend(fontsize=10,title='Churn Classification', loc='upper right', labels=['Not Churn', 'Churn'])
	for p in ax[0].patches:
	ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
	p.get_height()+175), ha='center', va='center',fontsize = 10)
	ax[0].set_ylim(0,13000)

	#Visualisasi % Churn dari setiap kelas
	sns.barplot(x = 'gender', y = 'churn_risk_score', data = df_eda, palette = 'winter',ax=ax[1])
	ax[1].set_xlabel("Gender", fontsize= 12)
	ax[1].set_ylabel("% Churn", fontsize= 12)
	ax[1].set_title('% Churn based on Gender', fontsize=14, fontweight='bold')
	ax[1].set_ylim(0,0.7)
	for p in ax[1].patches:
	ax[1].annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
	p.get_height()+0.02), ha='center', va='center',fontsize = 11)
	st.pyplot(fig)

	if __name__ == '__main__':
	run()