Evan Derin Ihsanudin commited on
Commit
400dd5b
1 Parent(s): e54c232

P2M1_Deployment

Browse files
Files changed (6) hide show
  1. app.py +29 -0
  2. churn_model.h5 +3 -0
  3. eda.py +183 -0
  4. eda_churn.csv +0 -0
  5. prediction.py +86 -0
  6. preprocessor.pkl +3 -0
app.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import eda
3
+ import prediction
4
+
5
+ # Set Config dan icon
6
+ st.set_page_config(
7
+ page_title='Churn Prediction',
8
+ layout='wide',
9
+ )
10
+
11
+ # Hide Streamlit Style
12
+ hide_streamlit_style = """
13
+ <style>
14
+ #MainMenu {visibility: hidden;}
15
+ footer {visibility: hidden;}
16
+ </style>
17
+ """
18
+ st.markdown(hide_streamlit_style, unsafe_allow_html=True)
19
+
20
+ #Membuat navigasi
21
+ st.sidebar.markdown("# Evan Derin Ihsanudin - RMT-FTDS-17")
22
+ navigation = st.sidebar.selectbox('Pilih Halaman (Churn Prediction/EDA): ', ('Churn Prediction','Exploratory Data Analysis'))
23
+ st.sidebar.image("https://imgur.com/t4aS0jH.png", use_column_width=True)
24
+
25
+ #Run modul dengan if else
26
+ if navigation == 'Churn Prediction' :
27
+ prediction.run()
28
+ else :
29
+ eda.run()
churn_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7eea4b8ac89d86ecbbef7e8f38e1cd9c6d6b8822b4193f7d14791fe026e4b1e0
3
+ size 262152
eda.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import seaborn as sns
4
+ import matplotlib.pyplot as plt
5
+ import plotly.express as px
6
+ from PIL import Image
7
+
8
+
9
+
10
+ def run() :
11
+ # Membuat Title
12
+ st.markdown("<h1 style='text-align: center;'>Exploratory Data Analysis</h1>", unsafe_allow_html=True)
13
+ st.write('Berikut adalah EDA dari setiap feature')
14
+
15
+ # Import DF
16
+ df_eda = pd.read_csv('eda_churn.csv')
17
+
18
+ # Membuat Sub Header
19
+ st.subheader('**EDA Feature Churn**')
20
+ st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
21
+ st.markdown('- *Customer* yang *churn* lebih banyak dari pada *customer* yang tidak *churn*')
22
+
23
+ # Membuat visualisasi Distribusi churn_risk_score
24
+ fig, ax =plt.subplots(1,2,figsize=(15,6))
25
+
26
+ sns.countplot(x='churn_risk_score', data=df_eda, palette="winter", ax=ax[0])
27
+ ax[0].set_xlabel("churn_risk_score", fontsize= 12)
28
+ ax[0].set_ylabel("# of Customer", fontsize= 12)
29
+ fig.suptitle('Customer Churn Distribution', fontsize=18, fontweight='bold')
30
+ ax[0].set_ylim(0,23000)
31
+ plt.xlabel("churn_risk_score", fontsize= 12)
32
+ plt.ylabel("# of Customer", fontsize= 12)
33
+ ax[0].set_xticks([0,1], ['Not Churn', 'Churn'])
34
+ for p in ax[0].patches:
35
+ ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
36
+ p.get_height()+405), ha='center', va='center',fontsize = 11)
37
+
38
+ df_eda['churn_risk_score'].value_counts().plot(kind='pie', labels = ['Not Churn', 'Churn'],autopct='%1.1f%%', textprops = {"fontsize":12})
39
+ ax[1].set_ylabel("% of Customer", fontsize= 12)
40
+ st.pyplot(fig)
41
+
42
+ # Membuat Sub Header
43
+ st.subheader('**EDA Feature Age**')
44
+ st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
45
+ st.markdown('- *Customer* paling banyak adalah *customer* yang memiliki *range* umur 40-50 tahun')
46
+ st.markdown('- *Customer* yang paling banyak *churn* adalah *customer* dengan *range* umur 50-60 tahun')
47
+ st.markdown('- Akan tetapi jika dilihat dari persentase *churn* pada setiap kelas *range* umur, maka tidak ada perbedaan signifikan')
48
+
49
+ #Visualisasi distribusi range age
50
+ fig, ax =plt.subplots(1,2,figsize=(15,6))
51
+ sns.countplot(x='AgeBin', data=df_eda, palette='winter', ax=ax[0])
52
+ ax[0].set_xlabel("Range Customer Age", fontsize= 12)
53
+ ax[0].set_ylabel("# of Customer", fontsize= 12)
54
+ fig.suptitle('Range Customer Age Distribution', fontsize=18, fontweight='bold')
55
+ ax[0].set_ylim(0,7600)
56
+ for p in ax[0].patches:
57
+ ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
58
+ p.get_height()+105), ha='center', va='center',fontsize = 10)
59
+
60
+ df_eda['AgeBin'].value_counts().plot(kind='pie', autopct='%1.1f%%', textprops = {"fontsize":12})
61
+ ax[1].set_ylabel("% of Customer", fontsize= 10)
62
+ st.pyplot(fig)
63
+
64
+ # Membuat Visualisasi distribusi Age berdasarkan Churn
65
+ fig, ax =plt.subplots(1,2,figsize=(15,6))
66
+ sns.countplot(data = df_eda, x = 'AgeBin', hue="churn_risk_score", palette = 'winter', order = ['(10, 20]', '(20, 30]', '(30, 40]', '(40, 50]', '(50, 60]', '(60, 70]'], ax=ax[0])
67
+ ax[0].set_title('Range Age Distribution', fontsize=14, fontweight='bold',)
68
+ ax[0].set_xlabel("Range Age", fontsize= 12)
69
+ ax[0].set_ylabel("# of Customer", fontsize= 12)
70
+ ax[0].tick_params(axis="x", labelsize= 9.5)
71
+ ax[0].legend(fontsize=10,title='Churn Classification', loc='upper right', labels=['Not Churn', 'Churn'])
72
+ for p in ax[0].patches:
73
+ ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
74
+ p.get_height()+75), ha='center', va='center',fontsize = 10)
75
+ ax[0].set_ylim(0,4700)
76
+
77
+ #Visualisasi % Churn dari setiap kelas
78
+ sns.barplot(x = 'AgeBin', y = 'churn_risk_score', data = df_eda, palette = 'winter', order = ['(10, 20]', '(20, 30]', '(30, 40]', '(40, 50]', '(50, 60]', '(60, 70]'],ax=ax[1])
79
+ ax[1].set_xlabel("Range Age", fontsize= 12)
80
+ ax[1].set_ylabel("% Churn", fontsize= 12)
81
+ ax[1].set_title('% Churn based on Age', fontsize=14, fontweight='bold')
82
+ ax[1].set_ylim(0,0.7)
83
+ for p in ax[1].patches:
84
+ ax[1].annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
85
+ p.get_height()+0.03), ha='center', va='center',fontsize = 11)
86
+ st.pyplot(fig)
87
+
88
+ # Membuat Sub Header
89
+ st.subheader('**EDA Feature Time Spent**')
90
+ st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
91
+ st.markdown('- Jika dilihat pada visualisasi diatas maka `avg_time_spent` antara *customer* yang *churn* dan *customer* yang tidak *churn* tidak berbeda secara signifikan')
92
+
93
+ # Visualisasi avg_time_spent vs Churn
94
+ fig =plt.figure(figsize=(15,6))
95
+ plt.rcParams['figure.figsize'] = (10, 5)
96
+ sns.boxenplot(y=df_eda['avg_time_spent'], x= df_eda['churn_risk_score'], palette = 'Blues')
97
+ plt.title('Average Time Spent vs Churn', fontsize = 20)
98
+ st.pyplot(fig)
99
+
100
+
101
+ # Membuat Sub Header
102
+ st.subheader('**EDA Feature Transaction Value**')
103
+ st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
104
+ st.markdown('- *Customer* yang tidak *churn* memiliki *average transaction value* yang lebih tinggi (terpusat di 18.000-40.000) dari pada *customer* yang *churn* (terpusat di 16.000-36.000)')
105
+
106
+ # Visualisasi avg_transaction_value vs Churn
107
+ fig =plt.figure(figsize=(15,6))
108
+ plt.rcParams['figure.figsize'] = (10, 5)
109
+ sns.boxenplot(y=df_eda['avg_transaction_value'], x= df_eda['churn_risk_score'], palette = 'Blues')
110
+ plt.title('Average Transaction Value vs Churn', fontsize = 20)
111
+ st.pyplot(fig)
112
+
113
+ # Membuat Sub Header
114
+ st.subheader('**EDA Feature Avg Frequency Login Days**')
115
+ st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
116
+ st.markdown('- *Customer* yang tidak *churn* memiliki *average frequency login days* yang lebih rendah (terpusat di 8-20x) dari pada *customer* yang *churn* (terpusat di 10-25x)')
117
+
118
+ # Visualisasi avg_frequency_login_days vs Churn
119
+ fig =plt.figure(figsize=(15,6))
120
+ plt.rcParams['figure.figsize'] = (10, 5)
121
+ sns.boxenplot(y=df_eda['avg_frequency_login_days'], x= df_eda['churn_risk_score'], palette = 'Blues')
122
+ plt.title('Average Frequency Login Days vs Churn', fontsize = 20)
123
+ st.pyplot(fig)
124
+
125
+ # Membuat Sub Header
126
+ st.subheader('**EDA Feature Point Wallet**')
127
+ st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
128
+ st.markdown('- *Customer* yang tidak *churn* memiliki *points in wallet* yang lebih tinggi (terpusat di 700-800) dari pada *customer* yang *churn* (terpusat di 600-700)')
129
+
130
+ # Visualisasi points_in_wallet vs Churn
131
+ fig =plt.figure(figsize=(15,6))
132
+ plt.rcParams['figure.figsize'] = (10, 5)
133
+ sns.boxenplot(y=df_eda['points_in_wallet'], x= df_eda['churn_risk_score'], palette = 'Blues')
134
+ plt.title('Points in Wallet vs Churn', fontsize = 20)
135
+ st.pyplot(fig)
136
+
137
+ # Membuat Sub Header
138
+ st.subheader('**EDA Feature Gender**')
139
+ st.write('Dari visualisasi dibawah dapat disimpulkan bahwa :')
140
+ st.markdown('- *Customer* paling banyak adalah *customer* wanita (50.1%). Akan tetapi tidak berbeda signifikan, hanya berbeda 0.1% dari *customer* pria')
141
+ st.markdown('- *Customer* yang paling banyak *churn* adalah *customer* wanita. Kemungkinan banyak pada kelas wanita karena *customer* paling banyak juga pada kelas ini')
142
+ st.markdown('- Akan tetapi jika dilihat dari persentase *churn* pada setiap kelas *gender*, maka tidak ada perbedaan signifikan')
143
+
144
+ #Visualisasi distribusi Gender
145
+ fig, ax =plt.subplots(1,2,figsize=(15,6))
146
+ sns.countplot(x='gender', data=df_eda, palette='winter', ax=ax[0])
147
+ ax[0].set_xlabel("Gender", fontsize= 12)
148
+ ax[0].set_ylabel("# of Customer", fontsize= 12)
149
+ fig.suptitle('Gender Distribution', fontsize=18, fontweight='bold')
150
+ ax[0].set_ylim(0,21000)
151
+ for p in ax[0].patches:
152
+ ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
153
+ p.get_height()+305), ha='center', va='center',fontsize = 10)
154
+ df_eda['gender'].value_counts().plot(kind='pie', autopct='%1.1f%%', textprops = {"fontsize":12})
155
+ ax[1].set_ylabel("% of Customer", fontsize= 10)
156
+ st.pyplot(fig)
157
+
158
+ # Membuat Visualisasi distribusi Gender berdasarkan Churn
159
+ fig, ax =plt.subplots(1,2,figsize=(15,6))
160
+ sns.countplot(data = df_eda, x = 'gender', hue="churn_risk_score", palette = 'winter', ax=ax[0])
161
+ ax[0].set_title('Gender Distribution', fontsize=14, fontweight='bold',)
162
+ ax[0].set_xlabel("Gender", fontsize= 12)
163
+ ax[0].set_ylabel("# of Customer", fontsize= 12)
164
+ ax[0].tick_params(axis="x", labelsize= 9.5)
165
+ ax[0].legend(fontsize=10,title='Churn Classification', loc='upper right', labels=['Not Churn', 'Churn'])
166
+ for p in ax[0].patches:
167
+ ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
168
+ p.get_height()+175), ha='center', va='center',fontsize = 10)
169
+ ax[0].set_ylim(0,13000)
170
+
171
+ #Visualisasi % Churn dari setiap kelas
172
+ sns.barplot(x = 'gender', y = 'churn_risk_score', data = df_eda, palette = 'winter',ax=ax[1])
173
+ ax[1].set_xlabel("Gender", fontsize= 12)
174
+ ax[1].set_ylabel("% Churn", fontsize= 12)
175
+ ax[1].set_title('% Churn based on Gender', fontsize=14, fontweight='bold')
176
+ ax[1].set_ylim(0,0.7)
177
+ for p in ax[1].patches:
178
+ ax[1].annotate("%.2f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
179
+ p.get_height()+0.02), ha='center', va='center',fontsize = 11)
180
+ st.pyplot(fig)
181
+
182
+ if __name__ == '__main__':
183
+ run()
eda_churn.csv ADDED
The diff for this file is too large to render. See raw diff
 
prediction.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import pickle
5
+ from tensorflow.keras.models import load_model
6
+
7
+
8
+ def run() :
9
+ # Load Model
10
+ with open('preprocessor.pkl', 'rb') as file_2:
11
+ preprocessor = pickle.load(file_2)
12
+ model_churn = load_model('churn_model.h5', compile=False)
13
+
14
+ # Membuat Title
15
+ st.markdown("<h1 style='text-align: center;'>Churn Prediction</h1>", unsafe_allow_html=True)
16
+
17
+ # Menambahkan Deskripsi Form
18
+ st.write('Page ini berisi model untuk memprediksi churn customer')
19
+
20
+ #Membuat Form
21
+ with st.form(key= 'form_customer'):
22
+
23
+ st.markdown('### **Customer Data**')
24
+ user_id = st.text_input('User ID',value= '')
25
+ age = st.slider('Age',10,70,30)
26
+ gender = st.selectbox('Gender',('M','F'),index=1)
27
+ region_category = st.radio('Region', options=['City','Village','Town'], horizontal=True)
28
+ internet_option = st.selectbox('Internet Option',('Wi-Fi','Fiber_Optic', 'Mobile_Data'),index=1)
29
+ medium_of_operation = st.radio('Medium', options=['Desktop','Smartphone','Both'], horizontal=True)
30
+ st.markdown('---')
31
+ st.markdown('### **Login Data**')
32
+ days_since_last_login = st.slider('Days Since Last Login',0,30,3)
33
+ avg_frequency_login_days = st.slider('Avg Frequency Login Days',0,73,14)
34
+ st.markdown('---')
35
+ st.markdown('### **Membership Data**')
36
+ joined_through_referral = st.selectbox('Referral',('Yes','No'),index=1)
37
+ membership_category = st.selectbox('Membership Category',('No Membership','Basic Membership','Silver Membership', 'Premium Membership', 'Gold Membership', 'Platinum Membership'),index=1)
38
+ st.markdown('---')
39
+ st.markdown('### **Transaction Data**')
40
+ points_in_wallet = st.number_input('Points in Wallet', min_value=0, max_value=2070, value=600 ,step=1)
41
+ avg_transaction_value = st.number_input('Avg Transaction Value', min_value=800, max_value=90000, value=30000 ,step=1)
42
+ preferred_offer_types = st.radio('Offer Types', options=['Without Offers','Credit/Debit Card Offers','Gift Vouchers/Coupons'], horizontal=True)
43
+ used_special_discount = st.selectbox('Used Special Discount',('Yes','No'),index=1)
44
+ past_complaint = st.selectbox('Past Complaint',('Yes','No'),index=1)
45
+ feedback = st.selectbox('Feedback',('Poor Website','Poor Customer Service', 'Too many ads', 'Poor Product Quality', 'No reason specified', 'Products always in Stock', 'Reasonable Price', 'Quality Customer Care', 'User Friendly Website'),index=1)
46
+ submitted = st.form_submit_button('Predict')
47
+
48
+ # Create New Data
49
+ data_inf = {
50
+ 'user_id' : user_id,
51
+ 'age' : age,
52
+ 'gender' : gender,
53
+ 'region_category' : region_category,
54
+ 'internet_option' : internet_option,
55
+ 'medium_of_operation' : medium_of_operation,
56
+ 'days_since_last_login' : days_since_last_login,
57
+ 'avg_frequency_login_days' : avg_frequency_login_days,
58
+ 'joined_through_referral' : joined_through_referral,
59
+ 'membership_category' : membership_category,
60
+ 'points_in_wallet' : points_in_wallet,
61
+ 'avg_transaction_value' : avg_transaction_value,
62
+ 'used_special_discount' : used_special_discount,
63
+ 'past_complaint' : past_complaint,
64
+ 'preferred_offer_types' : preferred_offer_types,
65
+ 'feedback' : feedback
66
+ }
67
+
68
+ data_inf = pd.DataFrame([data_inf])
69
+ data_inf
70
+
71
+ if submitted :
72
+ # Feature Scaling and Feature Encoding
73
+ data_final = preprocessor.transform(data_inf)
74
+
75
+ # Predict using Linear Regression
76
+ y_inf_pred = np.where(model_churn.predict(data_final) >= 0.5, 1, 0)
77
+
78
+ if y_inf_pred == 1:
79
+ prediction = 'Churn'
80
+ else:
81
+ prediction = 'Not Churn'
82
+
83
+ st.write('# Churn Prediction : ', prediction)
84
+
85
+ if __name__ == '__main__':
86
+ run()
preprocessor.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ceb6d174d15913521943fd72ada7006de18e51eba93ef3d5a722538f5f8e4be
3
+ size 2828