ahmadluay commited on
Commit
adb5d59
1 Parent(s): 2a603a4

first commit

Browse files
Files changed (9) hide show
  1. Drop_Columns.txt +1 -0
  2. app.py +11 -0
  3. churn.csv +0 -0
  4. churn.jpeg +0 -0
  5. eda.py +196 -0
  6. final_pipeline.pkl +3 -0
  7. model_seq2.h5 +3 -0
  8. prediction.py +94 -0
  9. requirements.txt +8 -0
Drop_Columns.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ["days_since_last_login", "used_special_discount", "joining_date", "gender", "age", "user_id", "internet_option", "last_visit_time", "complaint_status"]
app.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import eda # python file
3
+ import prediction # python file
4
+
5
+ navigation = st.sidebar.selectbox('Page Navigation: ',('EDA','Customer Churn Prediction'))
6
+
7
+ if navigation == 'EDA':
8
+ eda.run()
9
+ else:
10
+ prediction.run()
11
+
churn.csv ADDED
The diff for this file is too large to render. See raw diff
 
churn.jpeg ADDED
eda.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import pandas as pd
4
+ import seaborn as sns
5
+ import matplotlib.pyplot as plt
6
+ import plotly.express as px
7
+ import sklearn
8
+ from sklearn.preprocessing import LabelEncoder
9
+
10
+ from PIL import Image
11
+
12
+ st.set_page_config(
13
+ page_title='Customer Churn Prediction Using Artificial Neural Network in E-commerce Company',
14
+ layout = 'wide',
15
+ initial_sidebar_state='expanded'
16
+ )
17
+
18
+ def run():
19
+ # title
20
+ st.title('Customer Churn Prediction Using Artificial Neural Network in E-commerce Company')
21
+ st.write('by Ahmad Luay Adnani')
22
+
23
+ # sub header
24
+ st.subheader ('Exploratory Data Analysis of the Dataset.')
25
+
26
+ # Add Image
27
+ image = Image.open('churn.jpeg')
28
+ st.image(image,caption = 'Customer churn illustration')
29
+
30
+ # Description
31
+ st.write('In customer relationship management, it is important for e-commerce businesses to attract new customers and retain existing ones. Predicting customer churn in e-commerce business is critical to the success of online retailers. By analyzing customer data, businesses can gain insights into customer behavior and develop strategies to retain customers, ultimately improving customer satisfaction and driving revenue growth.')
32
+ st.write('# Dataset')
33
+ st.write('Dataset used in this analysis is churn dataset from an e-commerce company that wants to minimize the risk of a customer stopping using the product they offer.')
34
+
35
+ # show dataframe
36
+ df = pd.read_csv('churn.csv')
37
+ st.dataframe(df)
38
+ # add description of Dataset
39
+ st.write('Following are the variables and definitions of each column in the dataset.')
40
+ st.write("`user_id` : ID of a customer")
41
+ st.write("`age` : Age of a customer")
42
+ st.write("`gender` : Gender of a customer")
43
+ st.write("`region_category` : Region that a customer belongs to")
44
+ st.write("`membership_category` : Category of the membership that a customer is using")
45
+ st.write("`joining_date` : Date when a customer became a member")
46
+ st.write("`joined_through referral` : Whether a customer joined using any referral code or ID")
47
+ st.write("`preferred_offer types` : Type of offer that a customer prefers")
48
+ st.write("`medium_of operation` : Medium of operation that a customer uses for transactions")
49
+ st.write("`internet_option` : Type of internet service a customer uses")
50
+ st.write("`last_visit_time` : The last time a customer visited the website")
51
+ st.write("`days_since_last_login` : Number of days since a customer last logged into the website")
52
+ st.write("`avg_time_spent` : Average time spent by a customer on the website")
53
+ st.write("`avg_transaction_value` : Average transaction value of a customer")
54
+ st.write("`avg_frequency_login_days` : Number of times a customer has logged in to the website")
55
+ st.write("`points_in_wallet` : Points awarded to a customer on each transaction")
56
+ st.write("`used_special_discount` : Whether a customer uses special discounts offered")
57
+ st.write("`offer_application_preference` : Whether a customer prefers offers")
58
+ st.write("`past_complaint` : Whether a customer has raised any complaints")
59
+ st.write("`complaint_status` : Whether the complaints raised by a customer was resolved")
60
+ st.write("`feedback` : Feedback provided by a customer")
61
+ st.write("`churn_risk_score` : Churn score `0` : Not churn `1` : Churn")
62
+
63
+ ###
64
+ # Churn Prediction
65
+
66
+ st.write('# Exploratory Data Analysis ')
67
+ st.write('## Number of Customer at Risk of Churning')
68
+
69
+ # churn
70
+ df_eda = df.copy()
71
+ df_eda.churn_risk_score.replace({0:'Not Churn',1:'Churn'}, inplace=True)
72
+ churn = df_eda.churn_risk_score.value_counts().to_frame().reset_index()
73
+
74
+ # Plot PieChart with Plotly
75
+ fig = px.pie(churn,values='churn_risk_score', names='index',color_discrete_sequence=['red','blue'])
76
+ fig.update_layout(title_text = "Number of Customer at risk of Churning")
77
+ st.plotly_chart(fig)
78
+ st.write('Based on visualization above, the percentage of customer at risk of churning is 54.1%. Further data exploration is needed to find out what factors cause these customers to be at risk of churning.')
79
+
80
+ ###
81
+ # Number of Customers Based on Their Membership Categories
82
+ st.write('## Number of Customers Based on Their Membership Categories')
83
+
84
+ # membership category
85
+ membership_category = df_eda.groupby(['churn_risk_score','membership_category']).aggregate(Number_of_customer_per_membership_category=('membership_category','count')).reset_index()
86
+
87
+ # plotting bar plot
88
+ fig = px.bar(membership_category, x="membership_category", y="Number_of_customer_per_membership_category",color='churn_risk_score',color_discrete_sequence=['red','blue'],
89
+ orientation="v",hover_name="membership_category"
90
+
91
+ )
92
+ fig.update_layout(title_text = "Number of customers based on their membership category")
93
+ st.plotly_chart(fig)
94
+ st.write('Based on visualization above, customers **without membership** and customers with **basic membership** have the highest risk of churning. Based on my assumption, customers without membership and customers with basic membership may have a higher risk of churning for several reasons:')
95
+ st.write('1. **Lack of loyalty**: Customers without membership or with basic membership may not feel a strong sense of loyalty to the company or brand, making it easier for them to switch to a competitor.')
96
+ st.write('2. **Limited benefits**: Basic membership may offer limited benefits or perks compared to higher-tier memberships, making it less attractive to customers who may be seeking more value.')
97
+ st.write('3. **Price sensitivity**: Customers without membership or with basic membership may be more price-sensitive and may be more likely to switch to a competitor if they find a better deal elsewhere.')
98
+ st.write('4. **Limited engagement**: Customers without membership or with basic membership may have limited engagement with the company or brand, making it harder for the company to build a strong relationship with them and retain their loyalty.')
99
+
100
+ ###
101
+ # Average Transaction Value
102
+
103
+ st.write('## Average Transaction Value')
104
+
105
+ # average transaction value
106
+ avg_transaction_value = df_eda.groupby(['churn_risk_score']).aggregate(avg_transaction_value=('avg_transaction_value','mean')).reset_index()
107
+
108
+ # plotting bar plot
109
+ fig = px.bar(avg_transaction_value, x="churn_risk_score", y="avg_transaction_value",color='churn_risk_score',color_discrete_sequence=['red','blue'],
110
+ orientation="v"
111
+
112
+ )
113
+ fig.update_layout(title_text = "Average Transaction Value")
114
+ st.plotly_chart(fig)
115
+ st.write('Based on visualization above, customers who are at risk of churning have a **lower average transaction value** compared to customers who are not at risk of churning. Based on my assumption, customers who are at risk of churning may have a lower average transaction value for several reasons:')
116
+ st.write("1. **Reduced usage**: Customers who are at risk of churning may be using the company's products or services less frequently or may have stopped using them altogether. This reduced usage can result in a lower average transaction value.")
117
+ st.write('2. **Price sensitivity**: Customers who are at risk of churning may be more price-sensitive and may be more likely to switch to a competitor if they find a better deal elsewhere. This can result in customers opting for lower-priced products or services, which can lower the average transaction value.')
118
+ st.write('3. **Disengagement**: Customers who are at risk of churning may be less engaged with the company or brand and may be less likely to make high-value purchases. This reduced engagement can result in a lower average transaction value.')
119
+
120
+ ###
121
+ # Points in Wallet
122
+
123
+ st.write('## Points in Wallet')
124
+
125
+ # points in wallet
126
+ points_in_wallet = df_eda.groupby(['churn_risk_score']).aggregate(points_in_wallet=('points_in_wallet','mean')).reset_index()
127
+
128
+ # plotting bar plot
129
+ fig = px.bar(points_in_wallet, x="churn_risk_score", y="points_in_wallet",color='churn_risk_score',color_discrete_sequence=['red','blue'],
130
+ orientation="v"
131
+
132
+ )
133
+ fig.update_layout(title_text = "Points in Wallet")
134
+ st.plotly_chart(fig)
135
+ st.write('Based on visualization above, customers who are at risk of churning have a **lower points balance in their wallet** compared to customers who are not at risk of churning. Based on my assumption, customers who are at risk of churning may have a lower points balance in their wallet for several reasons:')
136
+ st.write("1. **Reduced usage**: Customers who are at risk of churning may be using the company's products or services less frequently or may have stopped using them altogether. This reduced usage can result in a lower accumulation of points in their wallet.")
137
+ st.write('2. **Disengagement**: Customers who are at risk of churning may be less engaged with the company or brand and may not be actively participating in loyalty programs or earning points. This reduced engagement can result in a lower accumulation of points in their wallet.')
138
+
139
+ ###
140
+ # Feedback
141
+
142
+ st.write('## Feedback')
143
+
144
+ # feedback
145
+ feedback = df_eda.groupby(['churn_risk_score','feedback']).aggregate(Number_of_customer=('feedback','count')).reset_index()
146
+ # plotting bar plot
147
+ fig = px.bar(feedback, x="feedback", y="Number_of_customer",color='churn_risk_score',color_discrete_sequence=['red','blue'],
148
+ orientation="v",hover_name="feedback"
149
+
150
+ )
151
+ fig.update_layout(title_text = "Number of Customers Based on Their Feedback")
152
+ st.plotly_chart(fig)
153
+ st.write('Based on visualization above, The most feedback that causes customers to be at risk of churning is **poor product quality**. Based on my assumption, poor product quality can cause customers to be at risk of churning for several reasons:')
154
+ st.write("1. **Reduced satisfaction**: Poor product quality can lead to reduced customer satisfaction, which can result in customers being less likely to continue using the company's products or services.")
155
+ st.write("2. **Negative word-of-mouth**: Customers who experience poor product quality may share their negative experiences with others, resulting in negative word-of-mouth for the company. This can lead to a decrease in new customer acquisition and can also increase the likelihood of existing customers churning.")
156
+ st.write("3. **Lack of trust**: Poor product quality can lead to a lack of trust in the company and its ability to provide high-quality products or services. This lack of trust can cause customers to be less loyal and more likely to switch to a competitor.")
157
+ st.write("4. **Perceived value**: Poor product quality can result in customers perceiving less value in the company's products or services, which can make them less likely to continue using them and more likely to switch to a competitor.")
158
+
159
+ ###
160
+ # Correlation Matrix Analysis
161
+ st.write('## Correlation Matrix Analysis')
162
+ df_copy = df.copy()
163
+ # Get Numerical Columns and Categorical Columns
164
+
165
+ num_columns = df_copy.select_dtypes(include=np.number).columns.tolist()
166
+ cat_columns = df_copy.select_dtypes(include=['object']).columns.tolist()
167
+
168
+ # Using LabelEncoder to convert categorical into numerical data
169
+ m_LabelEncoder = LabelEncoder()
170
+
171
+ for col in df_copy[cat_columns]:
172
+ df_copy[col]=m_LabelEncoder.fit_transform(df_copy[col])
173
+
174
+ # Plotting Correlation Matrix of Categorical columns and default_payment
175
+ sns.set(font_scale=1)
176
+ fig = plt.figure(figsize=(25,25))
177
+ sns.heatmap(df_copy.corr(),annot=True,cmap='coolwarm', fmt='.2f')
178
+ st.pyplot(fig)
179
+
180
+ st.write('Features that have have a strong correlation with the target variable (`churn_risk_score`) are `membership_category`,`points_in_wallet`,`avg_transaction_value`,`feedback`,`avg_frequency_login_days`,`joined_through_referral`,`preferred_offer_types`,`medium_of_operation`,`region_category` and ` offer_application_preference`.')
181
+
182
+ ###
183
+ # Histogram and Boxplot based on user input
184
+ st.write('## Histogram & Boxplot Based on User Input')
185
+ select_column = st.selectbox('Select Column : ', ('age','days_since_last_login','avg_time_spent','avg_transaction_value','avg_frequency_login_days','points_in_wallet'))
186
+ sns.set(font_scale=2)
187
+ fig, ax = plt.subplots(1,2,figsize=(30,10))
188
+ fig.suptitle(f'Histogram and Boxplot Visualization of {select_column} ')
189
+ sns.histplot(ax=ax[0],data=df_eda[select_column],kde=True)
190
+ ax[0].set_title(f'{select_column} skewness: {df_eda[select_column].skew()}')
191
+ sns.boxplot(ax=ax[1],data=df_eda,x=df_eda[select_column],width=0.50)
192
+ ax[1].set_title(select_column)
193
+
194
+ st.pyplot(fig)
195
+ if __name__ == '__main__':
196
+ run()
final_pipeline.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c4637514be86cee323e8e9ff2651eef530c289f006b697e1faca5e0caea3348
3
+ size 3357
model_seq2.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:babedfb7bd2f04c6ff7cd3b0a8f5ec5568b010274cb5fbe8ccf7fa62eb292076
3
+ size 88696
prediction.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from tensorflow.keras.models import load_model
5
+ import datetime
6
+ import pickle
7
+ import json
8
+ # Load All Files
9
+
10
+ with open('final_pipeline.pkl', 'rb') as file_1:
11
+ final_pipeline = pickle.load(file_1)
12
+
13
+ with open('Drop_Columns.txt', 'r') as file_2:
14
+ Drop_Columns = json.load(file_2)
15
+
16
+ model_seq2 = load_model('model_seq2.h5')
17
+
18
+ def run():
19
+ with st.form(key='Customer_Churn_Prediction'):
20
+ user_id = st.text_input('ID',value='972706cb0db0068e')
21
+ age = st.number_input('Age',min_value=2012,max_value=2012,value=2012)
22
+ gender = st.radio('Gender',('Male','Female'))
23
+ if gender=='Male':
24
+ gender='M'
25
+ else: gender='F'
26
+ region_category = st.selectbox('Region Category',('Town', 'City','Village'))
27
+ membership_category = st.selectbox('Membership Category',('Premium Membership','Basic Membership','No Membership', 'Gold Membership','Silver Membership','Platinum Membership'))
28
+ joining_date = st.date_input('Joining Date',datetime.date(2015,3,27))
29
+ joined_through_referral = st.selectbox('Joined Through Referral',('Yes','No'))
30
+ preferred_offer_types = st.selectbox('Preferred Offer Types',('Credit/Debit Card Offers','Gift Vouchers/Coupons','Without Offers'))
31
+ medium_of_operation = st.selectbox('Medium of Operation',('Smartphone','Desktop','Both'))
32
+ internet_option = st.selectbox('Internet Option',('Mobile_Data','Wi-Fi','Fiber_Optic'))
33
+ last_visit_time = st.text_input('Last Visit Time',value='09:41:40')
34
+ days_since_last_login = st.number_input('Days Since Last Login',min_value=0,max_value=31,value=16)
35
+ avg_time_spent = st.number_input('Average Time Spent on the Website',step=0.000001,format="%.6f",min_value=0.000000,max_value=9999.999999,value=1447.387929)
36
+ avg_transaction_value = st.number_input('Average Transaction Value',step=0.01,format="%.2f",min_value=0.00,max_value=99999.99,value=11839.58)
37
+ avg_frequency_login_days = st.number_input('Number of Times Login to the Website',min_value=1, max_value=99,value=29)
38
+ points_in_wallet = st.number_input('Points Balance',step=0.01,format="%.2f",min_value=0.00,max_value=9999.99,value=727.91)
39
+ used_special_discount = st.selectbox('Uses Special Discount Offered ?',('Yes','No'))
40
+ offer_application_preference = st.selectbox('Prefer Offers ?',('No','Yes'))
41
+ past_complaint = st.selectbox(' Has raised any complaints before ?',('No','Yes'))
42
+ complaint_status = st.selectbox('Were the complaints raised resolved?',('Not Applicable ','Unsolved','Solved','Solved in Follow-up','No Information Available'))
43
+ feedback = st.text_input('Feedback',value='No reason specified')
44
+
45
+ st.markdown('---')
46
+ submitted = st.form_submit_button('Are Customers at Risk of Churning ? :thinking_face:')
47
+
48
+ df_inf = {
49
+ 'user_id': user_id,
50
+ 'age': age,
51
+ 'gender': gender,
52
+ 'region_category': region_category,
53
+ 'membership_category': membership_category,
54
+ 'joining_date': joining_date,
55
+ 'joined_through_referral': joined_through_referral,
56
+ 'preferred_offer_types': preferred_offer_types,
57
+ 'medium_of_operation': medium_of_operation,
58
+ 'internet_option': internet_option,
59
+ 'last_visit_time':last_visit_time,
60
+ 'days_since_last_login':days_since_last_login,
61
+ 'avg_time_spent':avg_time_spent,
62
+ 'avg_transaction_value':avg_transaction_value,
63
+ 'avg_frequency_login_days':avg_frequency_login_days,
64
+ 'points_in_wallet':points_in_wallet,
65
+ 'used_special_discount':used_special_discount,
66
+ 'offer_application_preference':offer_application_preference,
67
+ 'past_complaint':past_complaint,
68
+ 'complaint_status':complaint_status,
69
+ 'feedback':feedback
70
+
71
+ }
72
+
73
+ df_inf = pd.DataFrame([df_inf])
74
+ # Data Inference
75
+ df_inf_copy = df_inf.copy()
76
+
77
+
78
+ # Removing unnecessary features
79
+ df_inf_final = df_inf_copy.drop(Drop_Columns,axis=1).sort_index()
80
+ data_inf_transform = final_pipeline.transform(df_inf_final)
81
+
82
+ st.dataframe(df_inf_final)
83
+
84
+ if submitted:
85
+ # Predict using Neural Network
86
+ y_pred_inf = model_seq2.predict(data_inf_transform)
87
+ st.write('# Are Customers at Risk of Churning ? :thinking_face:')
88
+ if y_pred_inf == 0:
89
+ st.subheader('Yes, customers are at risk of churning :disappointed: ')
90
+ else:
91
+ st.subheader('No, customers are not at risk of churning :wink:')
92
+
93
+ if __name__ == '__main__':
94
+ run()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ seaborn
4
+ matplotlib
5
+ numpy
6
+ scikit-learn==1.2.1
7
+ tensorflow==2.9.0
8
+ plotly