fadyabila commited on
Commit
d0c30b2
1 Parent(s): a802d59

Submission

Browse files
Files changed (10) hide show
  1. app.py +10 -0
  2. churn.csv +0 -0
  3. churn.png +0 -0
  4. customer_churn.h5 +3 -0
  5. eda.py +153 -0
  6. final_pipeline.pkl +3 -0
  7. main.py +10 -0
  8. model.png +0 -0
  9. prediction.py +97 -0
  10. requirements.txt +11 -0
app.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import eda
3
+ import prediction
4
+
5
+ navigation = st.sidebar.selectbox('Choose Page : ', ('EDA', 'Churn Customer Prediction'))
6
+
7
+ if navigation == 'EDA':
8
+ eda.run()
9
+ else:
10
+ prediction.run()
churn.csv ADDED
The diff for this file is too large to render. See raw diff
 
churn.png ADDED
customer_churn.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f48cc86f0c3a4c763f9f92a5c0d7e9af5ecb50fe6567aab95e827ba07b0860f
3
+ size 66352
eda.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import seaborn as sns
4
+ import matplotlib.pyplot as plt
5
+ import plotly.express as px
6
+ from PIL import Image
7
+
8
+ # Melebarkan visualisasi untuk memaksimalkan browser
9
+ st.set_page_config(
10
+ page_title='Churn Customer',
11
+ layout='wide',
12
+ initial_sidebar_state='expanded'
13
+ )
14
+
15
+ def run():
16
+ # Membuat title
17
+ st.title('Churn Customer Prediction')
18
+ st.write('### by Fadya Ulya Salsabila')
19
+
20
+ # Menambahkan Gambar
21
+ image = Image.open('churn.png')
22
+ st.image(image, caption='Illustration of Churn Customer')
23
+
24
+ # Menambahkan Deskripsi
25
+ st.write('## Background')
26
+ st.write("""
27
+ A make-up company "Sister" wants to minimize the risk of a customer stopping using their product.
28
+ The company then analyzes the history of its customers in making purchases based on time and frequency. Then, this company also looks at the feedback that customers have given it.
29
+ This is intended to determine customer predictions whether to stop using their product or not.
30
+ Because if many customers stop, the company will evaluate product sales and marketing to customers. In addition, the company will also provide discounts and special offers to loyal customers.
31
+
32
+ The objectives from this analysis and modeling in this dataset are:
33
+ 1. Find out the customer prediction, whether customer churn or not.
34
+ 2. Find out the best model prediction using Artificial Neural Network (ANN).""")
35
+
36
+ st.write('## Dataset')
37
+ st.write("""
38
+ The dataset is from Github Milestones 1 Hacktiv8 `churn.csv` that contains 22 columns.
39
+ 1. `user_id`: ID of a customer
40
+ 2. `age`: Age of a customer
41
+ 3. `gender`: Gender of a customer
42
+ 4. `region_category`: Region that a customer belongs to
43
+ 5. `membership_category`: Category of the membership that a customer is using
44
+ 6. `joining_date`: Date when a customer became a member
45
+ 7. `joined_through_referral`: Whether a customer joined using any referral code or ID
46
+ 8. `preferred_offer_types`: Type of offer that a customer prefers
47
+ 9. `medium_of_operation`: Medium of operation that a customer uses for transactions
48
+ 10. `internet_option`: Type of internet service a customer uses
49
+ 11. `last_visit_time`: The last time a customer visited the website
50
+ 12. `days_since_last_login`: Number of days since a customer last logged into the website
51
+ 13. `avg_time_spent`: Average time spent by a customer on the website
52
+ 14. `avg_transaction_value`: Average transaction value of a customer
53
+ 15. `avg_frequency_login_days`: Number of times a customer has logged in to the website
54
+ 16. `points_in_wallet`: Points awarded to a customer on each transaction
55
+ 17. `used_special_discount`: Whether a customer uses special discounts offered
56
+ 18. `offer_application_preference`: Whether a customer prefers offers
57
+ 19. `past_complaint`: Whether a customer has raised any complaints
58
+ 20. `complaint_status`: Whether the complaints raised by a customer was resolved
59
+ 21. `feedback`: Feedback provided by a customer
60
+ 22. `churn_risk_score`: Churn score (0 : Not churn, 1 : Churn)""")
61
+
62
+ # Membuat Garis Lurus
63
+ st.markdown('---')
64
+
65
+ # Membuat Sub Headrer
66
+ st.subheader('EDA for Churn Customer')
67
+
68
+ # Magic Syntax
69
+ st.write(
70
+ ' On this page, the author will do a simple exploration.'
71
+ ' The dataset used is the Churn Custimer dataset.'
72
+ ' This dataset comes from Github Project Hacktiv8.')
73
+
74
+ # Show DataFrame
75
+ df1 = pd.read_csv('churn.csv')
76
+ st.dataframe(df1)
77
+
78
+ # Membuat Barplot
79
+ st.write('#### Churn Risk Plot')
80
+ fig = plt.figure(figsize=(10,7))
81
+ sns.countplot(x='churn_risk_score', data=df1, palette="PuRd")
82
+ st.pyplot(fig)
83
+ st.write('The target data is balanced.')
84
+
85
+ st.write('#### Gender Based on Churn Risk')
86
+ fig1, ax1 = plt.subplots(figsize=(15, 8))
87
+ sns.countplot(x='gender', hue='churn_risk_score', data=df1, ax=ax1)
88
+ st.pyplot(fig1)
89
+ st.write('Gender distribution is normal between men and women.')
90
+
91
+ # Mengelompokkan Usia
92
+ bins = [8, 20, 30, 40, 50, 60, 120]
93
+ labels = ['10-19', '20-29', '30-39', '40-49', '50-59', '60-69']
94
+ df1['agerange'] = pd.cut(df1.age, bins, labels = labels,include_lowest = True)
95
+
96
+ # Menampilkan visualisasi usia berdasarkan churn risk
97
+ st.write('#### Age Based on Churn Risk')
98
+ fig2, ax2 = plt.subplots(figsize=(10,7))
99
+ sns.countplot(x='agerange', data=df1, hue="churn_risk_score", ax=ax2)
100
+ st.pyplot(fig2)
101
+ st.write('Customers in this company varies greatly, ranging from 10-64 years old. ')
102
+
103
+ # Membuat heatmap correlation
104
+ st.write('#### Heatmap Correlation')
105
+ fig = plt.figure(figsize = (15,8))
106
+ sns.heatmap(df1.corr(), annot = True, square = True)
107
+ st.pyplot(fig)
108
+ st.write("""
109
+ The heatmap correlation above shows that the column that has a very high relationship with churn risk is the `avg_freqeuncy_login_days` column with score `0.11`. This column shows how many customers log in in a day.
110
+ It means they are still interested in the product in this company. Meanwhile, `avg_transaction_value` have a strong negative correlation with churn risk witn score `-0.22`.
111
+ This shows that the number of purchase transactions on this product has no significant effect on customer churn.""")
112
+
113
+ # Membuat internet option berdasarkan churn risk
114
+ st.write('#### Internet Option Based on Churn Risk')
115
+ fig3, ax3 = plt.subplots(figsize=(10,7))
116
+ sns.countplot(x='internet_option', data=df1, hue="churn_risk_score", ax=ax3, palette="Blues")
117
+ st.pyplot(fig3)
118
+ st.write("""
119
+ Bar plot visualization above, shows that the `internet option` of customers doesn't have a strong correlation with churn risk.
120
+ Distribution of internet option data almost have the same number of values and there is no significant difference.
121
+ Customers who use the internet with Wi-Fi, Fiber Optic, and Mobile Data are almost the same.""")
122
+
123
+ # Membuat region category berdasarkan churn risk
124
+ st.write('#### Region Category Based on Churn Risk')
125
+ fig4, ax4 = plt.subplots(figsize=(10,7))
126
+ sns.countplot(x='region_category', data=df1, hue="churn_risk_score", ax=ax4, palette="Blues")
127
+ st.pyplot(fig4)
128
+ st.write("""
129
+ Based on customer region, there is no significant correlation with churn risk.
130
+ It's just that many customers of this product live in town areas compared to villages and cities.""")
131
+
132
+ # Membuat membership category berdasarkan churn risk
133
+ st.write('#### Membership Category on Churn Risk')
134
+ fig5, ax5 = plt.subplots(figsize=(10,7))
135
+ sns.countplot(y='membership_category', data=df1, hue="churn_risk_score", ax=ax5, palette="Blues")
136
+ st.pyplot(fig5)
137
+ st.write("""
138
+ In `membership_category` column, customers which include in `No Membership` dan `Basic Membership` are customers with the highest churn risk.
139
+ This can happen because the customer is deemed not a loyal customer so the risk of stopping the transaction is high.
140
+ In contrast to silver, premium, gold, and platinum members where customers are considered loyal to product transactions.
141
+ """)
142
+
143
+ # Membuat Histogram Berdasarkan Input User
144
+ st.write('#### Histogram Based On User Input')
145
+ pilihan = st.selectbox('Choose Column : ', ('age', 'gender', 'days_since_last_login', 'avg_time_spent',
146
+ 'avg_transaction_value', 'avg_frequency_login_days',
147
+ 'points_in_wallet'))
148
+ fig = plt.figure(figsize=(15,5))
149
+ sns.histplot(df1[pilihan], bins=30, kde=True)
150
+ st.pyplot(fig)
151
+
152
+ if __name__ == '__main__':
153
+ run()
final_pipeline.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9069967f38a72a08cead02e6165a6a92ca62ac04e38e25557416f5b1cdbef61c
3
+ size 3546
main.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import eda
3
+ import prediction
4
+
5
+ navigation = st.sidebar.selectbox('Choose Page : ', ('EDA', 'Churn Customer Prediction'))
6
+
7
+ if navigation == 'EDA':
8
+ eda.run()
9
+ else:
10
+ prediction.run()
model.png ADDED
prediction.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pickle
3
+ from tensorflow.keras.models import load_model
4
+ import pandas as pd
5
+ import numpy as np
6
+
7
+ # Load All Files
8
+
9
+ with open('final_pipeline.pkl', 'rb') as file_1:
10
+ model_pipeline = pickle.load(file_1)
11
+
12
+ model_ann = load_model('customer_churn.h5')
13
+
14
+ def run():
15
+ with st.form(key='Churn_Customer_Prediction'):
16
+ churn_risk_score = st.selectbox('Churn Risk', (0, 1), index=1)
17
+ age = st.number_input('Age', min_value=23, max_value=65, value=23)
18
+ gender = st.selectbox('Gender', ('Male', 'Female'), index=1)
19
+ days_since_last_login = st.number_input('Last Login', min_value=0, max_value=26, value=0)
20
+ avg_time_spent = st.number_input('Avg. Time Spent', min_value=0, max_value=3236, value=0)
21
+ avg_transaction_value = st.number_input('Avg. Transaction Value', min_value=800, max_value=99915, value=29271)
22
+ avg_frequency_login_days = st.number_input('Avg. Frequency Login Days', min_value=0, max_value=73, value=0)
23
+ points_in_wallet = st.number_input('Points in Wallet', min_value=0, max_value=2070, value=0)
24
+ joining_date = st.date_input("Select Join Date")
25
+ last_visit_time = st.time_input('Last Visit Time')
26
+ st.markdown('---')
27
+
28
+ region_category = st.selectbox('Region Category', ('Village', 'Town', 'City'), index=1)
29
+ membership_category = st.selectbox('Membership Category', ('No Membership', 'Basic Membership',
30
+ 'Silver Membership', 'Premium Membership',
31
+ 'Gold Membership', 'Platinum Membership'), index=1)
32
+ preferred_offer_types = st.selectbox('Preffered Offer', ('Without Offers', 'Credit/Debit Card Offers',
33
+ 'Gift Vouchers/Coupons'), index=1)
34
+ medium_of_operation = st.selectbox('Medium Ops', ('Desktop', 'Mobile', 'Both'
35
+ 'Gift Vouchers/Coupons'), index=1)
36
+ internet_option = st.selectbox('Internet Ops', ('Wi-Fi', 'Fiber_Optic', 'Mobile-Data'), index=1)
37
+ feedback = st.selectbox('Feedback', ('Poor Website', 'Poor Customer Service', 'Poor Product Quality',
38
+ 'Too many ads', 'No reason specified', 'Products always in Stock',
39
+ 'Reasonable Price', 'Quality Customer Care', 'User Friendly Website'), index=1)
40
+ complaint_status = st.selectbox('Complaint Status', ('No Information Available', 'Not Aplicable', 'Unsolved',
41
+ 'Solved', 'Solved in Follow-up'), index=1)
42
+
43
+ st.markdown('---')
44
+
45
+ joined_through_referral = st.selectbox('Join Through Referral', ('Yes', 'No'), index=1)
46
+ used_special_discount = st.selectbox('Use Special Discount', ('Yes', 'No'), index=1)
47
+ offer_application_preference = st.selectbox('Offer Application Preference', ('Yes', 'No'), index=1)
48
+ past_complaint = st.selectbox('Past Complaint', ('Yes', 'No'), index=1)
49
+
50
+ submitted = st.form_submit_button('Predict')
51
+
52
+ data_inf = {
53
+ 'age': age,
54
+ 'gender': gender,
55
+ 'region_category': region_category,
56
+ 'membership_category': membership_category,
57
+ 'joining_date': joining_date,
58
+ 'joined_through_referral': joined_through_referral,
59
+ 'preferred_offer_types': preferred_offer_types,
60
+ 'medium_of_operation': medium_of_operation,
61
+ 'internet_option': internet_option,
62
+ 'last_visit_time': last_visit_time,
63
+ 'days_since_last_login': days_since_last_login,
64
+ 'avg_time_spent': avg_time_spent,
65
+ 'avg_transaction_value': avg_transaction_value,
66
+ 'avg_frequency_login_days': avg_frequency_login_days,
67
+ 'points_in_wallet': points_in_wallet,
68
+ 'used_special_discount': used_special_discount,
69
+ 'offer_application_preference': offer_application_preference,
70
+ 'past_complaint': past_complaint,
71
+ 'complaint_status': complaint_status,
72
+ 'feedback': feedback,
73
+ 'churn_risk_score': churn_risk_score
74
+ }
75
+
76
+ data_inf = pd.DataFrame([data_inf])
77
+ data_inf_transform = model_pipeline.transform(data_inf)
78
+
79
+ a = st.dataframe(data_inf_transform)
80
+ b = ''
81
+
82
+ if len(data_inf_transform) == 0:
83
+ b = 'Not Churn'
84
+ else:
85
+ # Predict using ANN: Sequential API
86
+ y_pred_inf = model_ann.predict(data_inf_transform)
87
+ y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0)
88
+ if y_pred_inf == 0:
89
+ b = 'Not Churn'
90
+ else:
91
+ b = 'Churn'
92
+
93
+ if submitted:
94
+ st.write('# Prediction : ', b)
95
+
96
+ if __name__ == '__main__':
97
+ run()
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Berisi daftar library yang kita butuhkan
2
+
3
+ streamlit
4
+ pandas
5
+ seaborn
6
+ matplotlib
7
+ numpy
8
+ scikit-learn==1.2.1
9
+ plotly
10
+ tensorflow-cpu==2.12.0
11
+ protobuf==3.20.1