Spaces:

evanderin
/

P2M1_FTDS-RMT17_evan_derin_ihsanudin

Sleeping

App Files Files Community

Evan Derin Ihsanudin commited on Feb 3, 2023

Commit

0a18345

•

1 Parent(s): 8c27fc8

revisi eda dan model

Browse files

Files changed (3) hide show

churn_model.h5 +1 -1
eda.py +425 -3
prediction.py +2 -2

churn_model.h5 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7eea4b8ac89d86ecbbef7e8f38e1cd9c6d6b8822b4193f7d14791fe026e4b1e0
 size 262152

 version https://git-lfs.github.com/spec/v1
+oid sha256:14f0cfe911eb0d8f947e5afb09f2fa35b4e9f116435b3cd53319f3f36dd7cc30
 size 262152

eda.py CHANGED Viewed

@@ -35,7 +35,7 @@ def run() :
         ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
                         p.get_height()+405), ha='center', va='center',fontsize = 11)
-    df_eda['churn_risk_score'].value_counts().plot(kind='pie', labels = ['Not Churn', 'Churn'],autopct='%1.1f%%', textprops = {"fontsize":12})
     ax[1].set_ylabel("% of Customer", fontsize= 12)
     st.pyplot(fig)
@@ -137,8 +137,8 @@ def run() :
     # Membuat Sub Header
     st.subheader('**EDA Feature Gender**')
     st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
-    st.markdown('- *Customer* paling banyak adalah *customer* wanita (50.1%). Akan tetapi tidak berbeda signifikan, hanya berbeda 0.1% dari *customer* pria')
-    st.markdown('- *Customer* yang paling banyak *churn* adalah *customer* wanita. Kemungkinan banyak pada kelas wanita karena *customer* paling banyak juga pada kelas ini')
     st.markdown('- Akan tetapi jika dilihat dari persentase *churn* pada setiap kelas *gender*, maka tidak ada perbedaan signifikan')
     #Visualisasi distribusi Gender
@@ -179,5 +179,427 @@ def run() :
                         p.get_height()+0.02), ha='center', va='center',fontsize = 11)
     st.pyplot(fig)
 if __name__ == '__main__':
     run()

         ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
                         p.get_height()+405), ha='center', va='center',fontsize = 11)
+    df_eda['churn_risk_score'].value_counts().plot(kind='pie', labels = ['Churn', 'Not Churn'],autopct='%1.1f%%', textprops = {"fontsize":12})
     ax[1].set_ylabel("% of Customer", fontsize= 12)
     st.pyplot(fig)
     # Membuat Sub Header
     st.subheader('**EDA Feature Gender**')
     st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
+    st.markdown('- *Customer* paling banyak adalah *customer* wanita (50.1%). Akan tetapi tidak berbeda signifikan, hanya berbeda 0.2% dari *customer* pria')
+    st.markdown('- *Customer* yang paling banyak *churn* adalah *customer* wanita')
     st.markdown('- Akan tetapi jika dilihat dari persentase *churn* pada setiap kelas *gender*, maka tidak ada perbedaan signifikan')
     #Visualisasi distribusi Gender
                         p.get_height()+0.02), ha='center', va='center',fontsize = 11)
     st.pyplot(fig)
+    # Membuat Sub Header
+    st.subheader('**EDA Feature Region Category**')
+    st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
+    st.markdown('- *Customer* paling banyak adalah *customer* yang berasal dari kota (44.8%)')
+    st.markdown('- *Customer* yang paling banyak *churn* adalah *customer* yang tinggal di kota')
+    st.markdown('- Akan tetapi jika dilihat dari persentase *churn* pada setiap kelas *region*, maka tidak ada perbedaan signifikan')
+    #Visualisasi distribusi region_category
+    fig, ax =plt.subplots(1,2,figsize=(15,6))
+    sns.countplot(x='region_category', data=df_eda, palette='winter', ax=ax[0])
+    ax[0].set_xlabel("region_category", fontsize= 12)
+    ax[0].set_ylabel("# of Customer", fontsize= 12)
+    fig.suptitle('Region Category Distribution', fontsize=18, fontweight='bold')
+    ax[0].set_ylim(0,16000)
+    for p in ax[0].patches:
+        ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+305), ha='center', va='center',fontsize = 10)
+    df_eda['region_category'].value_counts().plot(kind='pie', autopct='%1.1f%%', textprops = {"fontsize":12})
+    ax[1].set_ylabel("% of Customer", fontsize= 10)
+    st.pyplot(fig)
+    # Membuat Visualisasi distribusi region_category berdasarkan Churn
+    fig, ax =plt.subplots(1,2,figsize=(15,6))
+    sns.countplot(data = df_eda, x = 'region_category', hue="churn_risk_score", palette = 'winter', ax=ax[0])
+    ax[0].set_title('Region Category Distribution', fontsize=14, fontweight='bold',)
+    ax[0].set_xlabel("region_category", fontsize= 12)
+    ax[0].set_ylabel("# of Customer", fontsize= 12)
+    ax[0].tick_params(axis="x", labelsize= 9.5)
+    ax[0].legend(fontsize=10,title='Churn Classification', loc='upper right', labels=['Not Churn', 'Churn'])
+    for p in ax[0].patches:
+        ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+180), ha='center', va='center',fontsize = 10)
+    ax[0].set_ylim(0,10000)
+    #Visualisasi % Churn dari setiap kelas
+    sns.barplot(x = 'region_category', y = 'churn_risk_score', data = df_eda, palette = 'winter',ax=ax[1])
+    ax[1].set_xlabel("region_category", fontsize= 12)
+    ax[1].set_ylabel("% Churn", fontsize= 12)
+    ax[1].set_title('% Churn based on Region Category', fontsize=14, fontweight='bold')
+    ax[1].set_ylim(0,0.7)
+    for p in ax[1].patches:
+        ax[1].annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+0.03), ha='center', va='center',fontsize = 11)
+    st.pyplot(fig)
+    # Membuat Sub Header
+    st.subheader('**EDA Feature Membership Category**')
+    st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
+    st.markdown('- *Customer* paling banyak adalah *customer* dengan *basic membership* (20.9%) dan *no membership* (20.8%)')
+    st.markdown('- *Customer* yang paling banyak *churn* adalah *customer* dengan *no membership*')
+    st.markdown('- Akan tetapi jika dilihat dari persentase *churn* pada setiap kelas *membership*, maka terdapat perbedaan yang signifikan')
+    # Visualisasi distribusi membership_category
+    fig, ax =plt.subplots(1,2,figsize=(15,6))
+    sns.countplot(x='membership_category', data=df_eda, palette='winter', ax=ax[0])
+    ax[0].set_xlabel("membership_category", fontsize= 12)
+    ax[0].set_ylabel("# of Customer", fontsize= 12)
+    fig.suptitle('Membership Category Distribution', fontsize=18, fontweight='bold')
+    ax[0].tick_params(axis='x', rotation=90)
+    ax[0].set_ylim(0,8500)
+    for p in ax[0].patches:
+        ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+185), ha='center', va='center',fontsize = 10)
+    df_eda['membership_category'].value_counts().plot(kind='pie', autopct='%1.1f%%', textprops = {"fontsize":12})
+    ax[1].set_ylabel("% of Customer", fontsize= 10)
+    st.pyplot(fig)
+    # Membuat Visualisasi distribusi membership_category berdasarkan Churn
+    fig, ax =plt.subplots(1,2,figsize=(15,6))
+    sns.countplot(data = df_eda, x = 'membership_category', hue="churn_risk_score", palette = 'winter', ax=ax[0])
+    ax[0].set_title('Membership Category Distribution', fontsize=14, fontweight='bold',)
+    ax[0].set_xlabel("membership_category", fontsize= 12)
+    ax[0].set_ylabel("# of Customer", fontsize= 12)
+    ax[0].tick_params(axis="x", labelsize= 9.5)
+    ax[0].legend(fontsize=10,title='Churn Classification', loc='upper right', labels=['Not Churn', 'Churn'])
+    for p in ax[0].patches:
+        ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+180), ha='center', va='center',fontsize = 10)
+    ax[0].tick_params(axis='x', rotation=90)
+    ax[0].set_ylim(0,8500)
+    #Visualisasi % Churn dari setiap kelas
+    sns.barplot(x = 'membership_category', y = 'churn_risk_score', data = df_eda, palette = 'winter',ax=ax[1])
+    ax[1].set_xlabel("membership_category", fontsize= 12)
+    ax[1].set_ylabel("% Churn", fontsize= 12)
+    ax[1].set_title('% Churn based on Membership Category', fontsize=14, fontweight='bold')
+    ax[1].set_ylim(0,1.2)
+    ax[1].tick_params(axis='x', rotation=90)
+    for p in ax[1].patches:
+        ax[1].annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+0.03), ha='center', va='center',fontsize = 11)
+    st.pyplot(fig)
+    st.subheader('**EDA Feature Joined Through Referral**')
+    st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
+    st.markdown('- *Customer* paling banyak adalah *customer* yang tidak menggunakan *referral* (50.2%). Akan tetapi tidak berbeda signifikan, hanya berbeda 0.4% dari *customer* yang menggunakan *referral*')
+    st.markdown('- *Customer* yang paling banyak *churn* adalah *customer* dengan *referral*')
+    st.markdown('- Akan tetapi jika dilihat dari persentase *churn* pada setiap kelas *joined through referral*, maka tidak terdapat perbedaan yang signifikan')
+    # Visualisasi distribusi joined_through_referral
+    fig, ax =plt.subplots(1,2,figsize=(15,6))
+    sns.countplot(x='joined_through_referral', data=df_eda, palette='winter', ax=ax[0])
+    ax[0].set_xlabel("joined_through_referral", fontsize= 12)
+    ax[0].set_ylabel("# of Customer", fontsize= 12)
+    fig.suptitle('Joined Through Referral Distribution', fontsize=18, fontweight='bold')
+    ax[0].set_ylim(0,17500)
+    for p in ax[0].patches:
+        ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+235), ha='center', va='center',fontsize = 10)
+    df_eda['joined_through_referral'].value_counts().plot(kind='pie', autopct='%1.1f%%', textprops = {"fontsize":12})
+    ax[1].set_ylabel("% of Customer", fontsize= 10)
+    st.pyplot(fig)
+    # Membuat Visualisasi distribusi joined_through_referral berdasarkan Churn
+    fig, ax =plt.subplots(1,2,figsize=(15,6))
+    sns.countplot(data = df_eda, x = 'joined_through_referral', hue="churn_risk_score", palette = 'winter', ax=ax[0])
+    ax[0].set_title('Joined Through Referral Distribution', fontsize=14, fontweight='bold',)
+    ax[0].set_xlabel("joined_through_referral", fontsize= 12)
+    ax[0].set_ylabel("# of Customer", fontsize= 12)
+    ax[0].tick_params(axis="x", labelsize= 9.5)
+    ax[0].legend(fontsize=10,title='Churn Classification', loc='upper right', labels=['Not Churn', 'Churn'])
+    for p in ax[0].patches:
+        ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+180), ha='center', va='center',fontsize = 10)
+    ax[0].set_ylim(0,12000)
+    #Visualisasi % Churn dari setiap kelas
+    sns.barplot(x = 'joined_through_referral', y = 'churn_risk_score', data = df_eda, palette = 'winter',ax=ax[1])
+    ax[1].set_xlabel("joined_through_referral", fontsize= 12)
+    ax[1].set_ylabel("% Churn", fontsize= 12)
+    ax[1].set_title('% Churn based on Joined Through Referral', fontsize=14, fontweight='bold')
+    ax[1].set_ylim(0,0.8)
+    for p in ax[1].patches:
+        ax[1].annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+0.03), ha='center', va='center',fontsize = 11)
+    st.pyplot(fig)
+    st.subheader('**EDA Feature Preferred Offer Types**')
+    st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
+    st.markdown('- *Customer* paling banyak adalah *customer* menggunakan kupon (33.7%)')
+    st.markdown('- *Customer* yang paling banyak *churn* adalah *customer* yang tidak menggunakan penawaran')
+    st.markdown('- Akan tetapi jika dilihat dari persentase *churn* pada setiap kelas *preferred offer types*, maka tidak terdapat perbedaan yang signifikan')
+    #Visualisasi distribusi preferred_offer_types
+    fig, ax =plt.subplots(1,2,figsize=(15,6))
+    sns.countplot(x='preferred_offer_types', data=df_eda, palette='winter', ax=ax[0])
+    ax[0].set_xlabel("preferred_offer_types", fontsize= 12)
+    ax[0].set_ylabel("# of Customer", fontsize= 12)
+    fig.suptitle('Offer Types Distribution', fontsize=18, fontweight='bold')
+    ax[0].set_ylim(0,14000)
+    for p in ax[0].patches:
+        ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+235), ha='center', va='center',fontsize = 10)
+    df_eda['preferred_offer_types'].value_counts().plot(kind='pie', autopct='%1.1f%%', textprops = {"fontsize":10})
+    ax[1].set_ylabel("% of Customer", fontsize= 10)
+    st.pyplot(fig)
+    # Membuat Visualisasi distribusi preferred_offer_types berdasarkan Churn
+    fig, ax =plt.subplots(1,2,figsize=(15,6))
+    sns.countplot(data = df_eda, x = 'preferred_offer_types', hue="churn_risk_score", palette = 'winter', ax=ax[0])
+    ax[0].set_title('Offer Types Distribution', fontsize=14, fontweight='bold',)
+    ax[0].set_xlabel("preferred_offer_types", fontsize= 12)
+    ax[0].set_ylabel("# of Customer", fontsize= 12)
+    ax[0].tick_params(axis="x", labelsize= 9.5)
+    ax[0].legend(fontsize=10,title='Churn Classification', loc='upper right', labels=['Not Churn', 'Churn'])
+    for p in ax[0].patches:
+        ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+180), ha='center', va='center',fontsize = 10)
+    ax[0].set_ylim(0,9000)
+    #Visualisasi % Churn dari setiap kelas
+    sns.barplot(x = 'preferred_offer_types', y = 'churn_risk_score', data = df_eda, palette = 'winter',ax=ax[1])
+    ax[1].set_xlabel("preferred_offer_types", fontsize= 12)
+    ax[1].set_ylabel("% Churn", fontsize= 12)
+    ax[1].set_title('% Churn based on Offer Types', fontsize=14, fontweight='bold')
+    ax[1].set_ylim(0,0.8)
+    for p in ax[1].patches:
+        ax[1].annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+0.03), ha='center', va='center',fontsize = 11)
+    st.pyplot(fig)
+    st.subheader('**EDA Feature Used Special Discount Types**')
+    st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
+    st.markdown('- *Customer* paling banyak adalah *customer* yang menggunakan diskon (55%)')
+    st.markdown('- *Customer* yang paling banyak *churn* adalah *customer* yang menggunakan diskon')
+    st.markdown('- Akan tetapi jika dilihat dari persentase *churn* pada setiap kelas *used special discount*, maka tidak terdapat perbedaan yang signifikan')
+    #Visualisasi distribusi used_special_discount
+    fig, ax =plt.subplots(1,2,figsize=(15,6))
+    sns.countplot(x='used_special_discount', data=df_eda, palette='winter', ax=ax[0])
+    ax[0].set_xlabel("used_special_discount", fontsize= 12)
+    ax[0].set_ylabel("# of Customer", fontsize= 12)
+    fig.suptitle('Used Special Discount Distribution', fontsize=18, fontweight='bold')
+    ax[0].set_ylim(0,23000)
+    for p in ax[0].patches:
+        ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+335), ha='center', va='center',fontsize = 10)
+    df_eda['used_special_discount'].value_counts().plot(kind='pie', autopct='%1.1f%%', textprops = {"fontsize":10})
+    ax[1].set_ylabel("% of Customer", fontsize= 10)
+    st.pyplot(fig)
+    # Membuat Visualisasi distribusi used_special_discount berdasarkan Churn
+    fig, ax =plt.subplots(1,2,figsize=(15,6))
+    sns.countplot(data = df_eda, x = 'used_special_discount', hue="churn_risk_score", palette = 'winter', ax=ax[0])
+    ax[0].set_title('Used Special Discount Distribution', fontsize=14, fontweight='bold',)
+    ax[0].set_xlabel("used_special_discount", fontsize= 12)
+    ax[0].set_ylabel("# of Customer", fontsize= 12)
+    ax[0].tick_params(axis="x", labelsize= 9.5)
+    ax[0].legend(fontsize=10,title='Churn Classification', loc='upper right', labels=['Not Churn', 'Churn'])
+    for p in ax[0].patches:
+        ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+180), ha='center', va='center',fontsize = 10)
+    ax[0].set_ylim(0,12000)
+    #Visualisasi % Churn dari setiap kelas
+    sns.barplot(x = 'used_special_discount', y = 'churn_risk_score', data = df_eda, palette = 'winter', ax=ax[1])
+    ax[1].set_xlabel("used_special_discount", fontsize= 12)
+    ax[1].set_ylabel("% Churn", fontsize= 12)
+    ax[1].set_title('% Churn based on Used Special Discount', fontsize=14, fontweight='bold')
+    ax[1].set_ylim(0,0.8)
+    for p in ax[1].patches:
+        ax[1].annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+0.03), ha='center', va='center',fontsize = 11)
+    st.pyplot(fig)
+    st.subheader('**EDA Feature Offer Application Preference**')
+    st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
+    st.markdown('- *Customer* paling banyak adalah *customer* yang menyukai tawaran (55.3%)')
+    st.markdown('- *Customer* yang paling banyak *churn* adalah *customer* yang menyukai tawaran')
+    st.markdown('- Akan tetapi jika dilihat dari persentase *churn* pada setiap kelas kelas tawaran, maka tidak terdapat perbedaan yang signifikan')
+    #Visualisasi distribusi offer_application_preference
+    fig, ax =plt.subplots(1,2,figsize=(15,6))
+    sns.countplot(x='offer_application_preference', data=df_eda, palette='winter', ax=ax[0])
+    ax[0].set_xlabel("offer_application_preference", fontsize= 12)
+    ax[0].set_ylabel("# of Customer", fontsize= 12)
+    fig.suptitle('Offer Aplication Preference Distribution', fontsize=18, fontweight='bold')
+    ax[0].set_ylim(0,23000)
+    for p in ax[0].patches:
+        ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+335), ha='center', va='center',fontsize = 10)
+    df_eda['offer_application_preference'].value_counts().plot(kind='pie', autopct='%1.1f%%', textprops = {"fontsize":10})
+    ax[1].set_ylabel("% of Customer", fontsize= 10)
+    st.pyplot(fig)
+    # Membuat Visualisasi distribusi offer_application_preference berdasarkan Churn
+    fig, ax =plt.subplots(1,2,figsize=(15,6))
+    sns.countplot(data = df_eda, x = 'offer_application_preference', hue="churn_risk_score", palette = 'winter', ax=ax[0])
+    ax[0].set_title('Offer Aplication Preference Distribution', fontsize=14, fontweight='bold',)
+    ax[0].set_xlabel("offer_application_preference", fontsize= 12)
+    ax[0].set_ylabel("# of Customer", fontsize= 12)
+    ax[0].tick_params(axis="x", labelsize= 9.5)
+    ax[0].legend(fontsize=10,title='Churn Classification', loc='upper right', labels=['Not Churn', 'Churn'])
+    for p in ax[0].patches:
+        ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+180), ha='center', va='center',fontsize = 10)
+    ax[0].set_ylim(0,12000)
+    #Visualisasi % Churn dari setiap kelas
+    sns.barplot(x = 'offer_application_preference', y = 'churn_risk_score', data = df_eda, palette = 'winter',ax=ax[1])
+    ax[1].set_xlabel("offer_application_preference", fontsize= 12)
+    ax[1].set_ylabel("% Churn", fontsize= 12)
+    ax[1].set_title('% Churn based on Offer Aplication Preference', fontsize=14, fontweight='bold')
+    ax[1].set_ylim(0,0.8)
+    for p in ax[1].patches:
+        ax[1].annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+0.03), ha='center', va='center',fontsize = 11)
+    st.pyplot(fig)
+    st.subheader('**EDA Feature Past Complaint**')
+    st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
+    st.markdown('- *Customer* paling banyak adalah *customer* yang tidak *complaint* (50.3%). Akan tetapi tidak berbeda signifikan, hanya berbeda 0.6% dari *customer* yang *complaint*')
+    st.markdown('- *Customer* yang paling banyak *churn* adalah *customer* yang *complaint*')
+    st.markdown('- Akan tetapi jika dilihat dari persentase *churn* pada setiap kelas *past complaint*, maka tidak terdapat perbedaan yang signifikan')
+    #Visualisasi distribusi past_complaint
+    fig, ax =plt.subplots(1,2,figsize=(15,6))
+    sns.countplot(x='past_complaint', data=df_eda, palette='winter', ax=ax[0])
+    ax[0].set_xlabel("past_complaint", fontsize= 12)
+    ax[0].set_ylabel("# of Customer", fontsize= 12)
+    fig.suptitle('Past Complaint Distribution', fontsize=18, fontweight='bold')
+    ax[0].set_ylim(0,23000)
+    for p in ax[0].patches:
+        ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+335), ha='center', va='center',fontsize = 10)
+    df_eda['past_complaint'].value_counts().plot(kind='pie', autopct='%1.1f%%', textprops = {"fontsize":10})
+    ax[1].set_ylabel("% of Customer", fontsize= 10)
+    st.pyplot(fig)
+    # Membuat Visualisasi distribusi past_complaint berdasarkan Churn
+    fig, ax =plt.subplots(1,2,figsize=(15,6))
+    sns.countplot(data = df_eda, x = 'past_complaint', hue="churn_risk_score", palette = 'winter', ax=ax[0])
+    ax[0].set_title('Past Complaint Distribution', fontsize=14, fontweight='bold',)
+    ax[0].set_xlabel("past_complaint", fontsize= 12)
+    ax[0].set_ylabel("# of Customer", fontsize= 12)
+    ax[0].tick_params(axis="x", labelsize= 9.5)
+    ax[0].legend(fontsize=10,title='Churn Classification', loc='upper right', labels=['Not Churn', 'Churn'])
+    for p in ax[0].patches:
+        ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+180), ha='center', va='center',fontsize = 10)
+    ax[0].set_ylim(0,14000)
+    #Visualisasi % Churn dari setiap kelas
+    sns.barplot(x = 'past_complaint', y = 'churn_risk_score', data = df_eda, palette = 'winter', ax=ax[1])
+    ax[1].set_xlabel("past_complaint", fontsize= 12)
+    ax[1].set_ylabel("% Churn", fontsize= 12)
+    ax[1].set_title('% Churn based on Past Complaint', fontsize=14, fontweight='bold')
+    ax[1].set_ylim(0,0.8)
+    for p in ax[1].patches:
+        ax[1].annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+0.03), ha='center', va='center',fontsize = 11)
+    st.pyplot(fig)
+    st.subheader('**EDA Feature Complaint Status**')
+    st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
+    st.markdown('- *Customer* paling banyak adalah *customer* yang *complaint* nya tidak dapat diaplikasikan/direalisasikan (50.3%)')
+    st.markdown('- *Customer* yang paling banyak *churn* adalah *customer* yang *complaint* nya tidak dapat diaplikasikan/direalisasikan')
+    st.markdown('- Akan tetapi jika dilihat dari persentase *churn* pada setiap kelas *complaint status*, maka tidak terdapat perbedaan yang signifikan')
+    #Visualisasi distribusi complaint_status
+    fig, ax =plt.subplots(1,2,figsize=(15,6))
+    sns.countplot(x='complaint_status', data=df_eda, palette='winter', ax=ax[0])
+    ax[0].set_xlabel("complaint_status", fontsize= 12)
+    ax[0].set_ylabel("# of Customer", fontsize= 12)
+    fig.suptitle('Complaint Status Distribution', fontsize=18, fontweight='bold')
+    ax[0].set_ylim(0,23000)
+    ax[0].tick_params(axis='x', rotation=90)
+    for p in ax[0].patches:
+        ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+335), ha='center', va='center',fontsize = 10)
+    df_eda['complaint_status'].value_counts().plot(kind='pie', autopct='%1.1f%%', textprops = {"fontsize":10})
+    ax[1].set_ylabel("% of Customer", fontsize= 10)
+    st.pyplot(fig)
+    # Membuat Visualisasi distribusi complaint_status berdasarkan Churn
+    fig, ax =plt.subplots(1,2,figsize=(15,6))
+    sns.countplot(data = df_eda, x = 'complaint_status', hue="churn_risk_score", palette = 'winter', ax=ax[0])
+    ax[0].set_title('Complaint Status Distribution', fontsize=14, fontweight='bold',)
+    ax[0].set_xlabel("complaint_status", fontsize= 12)
+    ax[0].set_ylabel("# of Customer", fontsize= 12)
+    ax[0].tick_params(axis="x", labelsize= 9.5)
+    ax[0].legend(fontsize=10,title='Churn Classification', loc='upper right', labels=['Not Churn', 'Churn'])
+    for p in ax[0].patches:
+        ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+180), ha='center', va='center',fontsize = 10)
+    ax[0].tick_params(axis='x', rotation=90)
+    ax[0].set_ylim(0,12000)
+    #Visualisasi % Churn dari setiap kelas
+    sns.barplot(x = 'complaint_status', y = 'churn_risk_score', data = df_eda, palette = 'winter',ax=ax[1])
+    ax[1].set_xlabel("complaint_status", fontsize= 12)
+    ax[1].set_ylabel("% Churn", fontsize= 12)
+    ax[1].set_title('% Churn based on Complaint Status', fontsize=14, fontweight='bold')
+    ax[1].set_ylim(0,0.8)
+    ax[1].tick_params(axis='x', rotation=90)
+    for p in ax[1].patches:
+        ax[1].annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+0.03), ha='center', va='center',fontsize = 11)
+    st.pyplot(fig)
+    st.subheader('**EDA Feature Feedback**')
+    st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
+    st.markdown('- *Customer* paling banyak adalah *customer* yang memberikan *feedback* buruk (*too many ads*, *poor product quality*) dan yang tidak memberikan *feedback*. 3 kelas tersebut memiliki persentase proporsi 17%')
+    st.markdown('- *Customer* yang paling banyak *churn* adalah *customer* yang memberikan *feedback poor product quality*')
+    st.markdown('- Akan tetapi jika dilihat dari persentase *churn* pada setiap kelas *feedback*, maka terdapat perbedaan yang signifikan')
+    # Visualisasi distribusi feedback
+    fig, ax =plt.subplots(1,2,figsize=(15,6))
+    sns.countplot(x='feedback', data=df_eda, palette='winter', ax=ax[0])
+    ax[0].set_xlabel("feedback", fontsize= 12)
+    ax[0].set_ylabel("# of Customer", fontsize= 12)
+    fig.suptitle('Feedback Distribution', fontsize=18, fontweight='bold')
+    ax[0].set_ylim(0,7000)
+    ax[0].tick_params(axis='x', rotation=90)
+    for p in ax[0].patches:
+        ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+135), ha='center', va='center',fontsize = 10)
+    df_eda['feedback'].value_counts().plot(kind='pie', autopct='%1.1f%%', textprops = {"fontsize":10})
+    ax[1].set_ylabel("% of Customer", fontsize= 10)
+    st.pyplot(fig)
+    # Membuat Visualisasi distribusi feedback berdasarkan Churn
+    fig, ax =plt.subplots(1,2,figsize=(15,6))
+    sns.countplot(data = df_eda, x = 'feedback', hue="churn_risk_score", palette = 'winter', ax=ax[0])
+    ax[0].set_title('Feedback Distribution', fontsize=14, fontweight='bold',)
+    ax[0].set_xlabel("feedback", fontsize= 12)
+    ax[0].set_ylabel("# of Customer", fontsize= 12)
+    ax[0].tick_params(axis="x", labelsize= 9.5)
+    ax[0].legend(fontsize=10,title='Churn Classification', loc='upper right', labels=['Not Churn', 'Churn'])
+    for p in ax[0].patches:
+        ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+60), ha='center', va='center',fontsize = 6)
+    ax[0].tick_params(axis='x', rotation=90)
+    ax[0].set_ylim(0,5000)
+    #Visualisasi % Churn dari setiap kelas
+    sns.barplot(x = 'feedback', y = 'churn_risk_score', data = df_eda, palette = 'winter',ax=ax[1])
+    ax[1].set_xlabel("feedback", fontsize= 12)
+    ax[1].set_ylabel("% Churn", fontsize= 12)
+    ax[1].set_title('% Churn based on Feedback', fontsize=14, fontweight='bold')
+    ax[1].set_ylim(0,0.8)
+    ax[1].tick_params(axis='x', rotation=90)
+    for p in ax[1].patches:
+        ax[1].annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+0.03), ha='center', va='center',fontsize = 11)
+    st.pyplot(fig)
 if __name__ == '__main__':
     run()

prediction.py CHANGED Viewed

@@ -7,8 +7,8 @@ from tensorflow.keras.models import load_model
 def run() :
   # Load Model
-  with open('preprocessor.pkl', 'rb') as file_2:
-    preprocessor = pickle.load(file_2)
   model_churn = load_model('churn_model.h5', compile=False)
   # Membuat Title

 def run() :
   # Load Model
+  with open('preprocessor.pkl', 'rb') as file_1:
+    preprocessor = pickle.load(file_1)
   model_churn = load_model('churn_model.h5', compile=False)
   # Membuat Title