imamzarkasie commited on
Commit
110b1f5
1 Parent(s): e5db050

Upload 11 files

Browse files
__pycache__/eda.cpython-39.pyc ADDED
Binary file (2.67 kB). View file
 
__pycache__/prediction.cpython-39.pyc ADDED
Binary file (3.08 kB). View file
 
app.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import eda
3
+ import prediction
4
+
5
+ navigation = st.sidebar.selectbox('Select Page : ', ('EDA', 'Predict A Customer'))
6
+
7
+ if navigation == 'EDA':
8
+ eda.run()
9
+ else:
10
+ prediction.run()
churn.csv ADDED
The diff for this file is too large to render. See raw diff
 
churn.jpeg ADDED
churn_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b2cc27d1634550f8e9e2b8803f06bd8cd5b596eb61ec26a3c243f7b73efb168
3
+ size 2989736
eda.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import seaborn as sns
5
+ import matplotlib.pyplot as plt
6
+ import plotly.express as px
7
+ from PIL import Image
8
+
9
+ st.set_page_config(
10
+ page_title = 'Customer Churn Risk - EDA',
11
+ layout = 'wide',
12
+ initial_sidebar_state = 'expanded'
13
+ )
14
+
15
+ def run():
16
+ # Membuat Title
17
+ st.title('Customer Churn Prediction')
18
+
19
+ # Membuat Sub Header
20
+ st.subheader('EDA for Customer Churn Risk')
21
+
22
+ # Menambahkan Gambar
23
+ image = Image.open('churn.jpeg')
24
+ st.image(image, caption='Customer Churn')
25
+
26
+ # Menambahkan Deskripsi
27
+ st.write('This page created by **Imam Zarkasie**')
28
+ st.write('### Hello!')
29
+ st.write('#### The competition of product sales in the e-commerce industry in heating up!')
30
+ st.write('##### In this page we can explore customer segmentation, more than that this website provides an ability to predict a customer churn risk.')
31
+
32
+
33
+ # Membuat Garis Lurus
34
+ st.markdown('---')
35
+
36
+ # Magic Syntax
37
+ '''
38
+ On this page, the author will do a simple exploration.
39
+ The dataset used is the churn dataset.
40
+ '''
41
+
42
+ # Show DataFrame
43
+ df = pd.read_csv('churn.csv')
44
+ st.dataframe(df)
45
+
46
+ #Melihat histogram fitur target
47
+ df['churn_risk_score'].value_counts().plot(kind='bar')
48
+
49
+ # Membuat Historgram
50
+ st.write('#### Histogram of Churn')
51
+ fig = plt.figure(figsize=(15, 5))
52
+ sns.histplot(df['churn_risk_score'], bins=30, kde=True)
53
+ st.pyplot(fig)
54
+
55
+ # Mengelompokkan data dan menghitung statistik churn risk score
56
+ grouped = df.groupby('membership_category')['churn_risk_score'].value_counts().unstack().fillna(0)
57
+
58
+ # Membuat barplot
59
+ fig, ax = plt.subplots(figsize=(8, 3))
60
+ grouped.plot(kind='bar', stacked=False, ax=ax)
61
+
62
+ # Menampilkan plot di Streamlit
63
+ st.pyplot(fig)
64
+
65
+ # Mengelompokkan data dan menghitung statistik churn risk score
66
+ grouped = df.groupby('gender')['churn_risk_score'].value_counts().unstack().fillna(0)
67
+
68
+ # Mengambil nilai churn risk score yang unik
69
+ churn_risk_scores = df['churn_risk_score'].unique()
70
+
71
+ # Mengatur lebar barplot
72
+ bar_width = 0.35
73
+
74
+ # Mengatur posisi barplot untuk setiap gender
75
+ male_positions = np.arange(len(churn_risk_scores))
76
+ female_positions = male_positions + bar_width
77
+
78
+ # Membuat figure dan axes untuk plot
79
+ fig, ax = plt.subplots(figsize=(8, 4))
80
+
81
+ # Membuat barplot untuk gender Male dengan warna biru (blue)
82
+ ax.bar(male_positions, grouped.loc['M'], width=bar_width, label='Male', color='lightblue')
83
+
84
+ # Membuat barplot untuk gender Female dengan warna merah (red)
85
+ ax.bar(female_positions, grouped.loc['F'], width=bar_width, label='Female', color='pink')
86
+
87
+ # Memberikan label pada sumbu x
88
+ ax.set_xticks(male_positions + bar_width/2)
89
+ ax.set_xticklabels(churn_risk_scores)
90
+
91
+ # Memberikan judul pada sumbu x dan y
92
+ ax.set_xlabel('Churn Risk Score')
93
+ ax.set_ylabel('Count')
94
+
95
+ # Menambahkan judul plot
96
+ ax.set_title('Churn Risk Score by Gender')
97
+
98
+ # Menampilkan legenda
99
+ ax.legend()
100
+
101
+ # Menampilkan grid
102
+ ax.grid(True, axis='y', linestyle='--', alpha=0.5)
103
+
104
+ # Menampilkan plot di Streamlit
105
+ st.pyplot(fig)
106
+
107
+ # Membuat Histogram Berdasarkan Input User
108
+ st.write('#### Histogram based on input user')
109
+ pilihan = st.selectbox('Pilih column : ', ('avg_time_spent', 'avg_transaction_value', 'avg_frequency_login_days', 'points_in_wallet'))
110
+ fig = plt.figure(figsize=(15, 5))
111
+ sns.histplot(df[pilihan], bins=30, kde=True)
112
+ st.pyplot(fig)
113
+
114
+ # Membuat Plotly Plot
115
+ st.write('#### Plotly Plot - points_in_wallet VS avg_transaction_value')
116
+ fig = px.scatter(df, x='points_in_wallet', y='avg_transaction_value')
117
+ st.plotly_chart(fig)
118
+
119
+ if __name__=='__main__':
120
+ run()
final_pipeline.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9ed56de94ad1dd4fb9516e9975b9530878c0439b02d01c349cafd004daa06d8
3
+ size 3385
model_encoder.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:614f4ea128d1a715049a5af4d9b1564bdcf44554b6bdc090f5ca98877954d9f7
3
+ size 516
prediction.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import pickle
5
+ import json
6
+ import tensorflow as tf
7
+ # Load All Files
8
+
9
+ # Load the Models
10
+
11
+ with open('final_pipeline.pkl', 'rb') as file_1:
12
+ model_pipeline = pickle.load(file_1)
13
+
14
+ with open('model_encoder.pkl','rb') as file_2:
15
+ encoder_ord = pickle.load(file_2)
16
+
17
+ model_ann = tf.keras.models.load_model('churn_model.h5')
18
+
19
+
20
+ def run():
21
+ with st.form(key='from_churn'):
22
+ user_id = st.text_input('User id', value='')
23
+ age = st.number_input('Age', min_value=0, max_value=100, value=0)
24
+ gender = st.selectbox('Gender', ('M', 'F'), index=1)
25
+ region_category = st.selectbox('Region', ('Town', 'City', 'Village'), index=1)
26
+ st.markdown('---')
27
+
28
+ membership_category = st.selectbox('Membership Category', ('Basic Membership', 'No Membership', 'Gold Membership', 'Silver Membership', 'Premium Membership', 'Platinum Membership'), index=1)
29
+ joining_days = st.number_input('joining_days', min_value=0, max_value=1000, value=0)
30
+ joined_through_referral = st.selectbox('Join Through Referral', ('Yes', 'No'), index=1)
31
+ preferred_offer_types = st.selectbox('Offer Type', ('Gift Vouchers/Coupons', 'Credit/Debit Card Offers', 'Without Offers'), index=1)
32
+ medium_of_operation = st.selectbox('Gadget Type', ('Desktop', 'Smartphone', 'Both'), index=1)
33
+ internet_option = st.selectbox('Internet Type', ('Wi-Fi', 'Mobile_Data', 'Fiber_Optic'), index=1)
34
+ days_since_last_login = st.number_input('Days Since Last Login', min_value=0, max_value=100, value=0)
35
+ avg_time_spent = st.number_input('Average Time Spent', min_value=0, max_value=3000, value=0)
36
+ avg_transaction_value = st.number_input('Average Transaction Value', min_value=0, max_value=100000, value=0)
37
+ avg_frequency_login_days = st.number_input('Average Login Days', min_value=0, max_value=100, value=0)
38
+ points_in_wallet = st.number_input('Points in Wallet', min_value=0, max_value=3000, value=0)
39
+ used_special_discount = st.selectbox('Used Special Discount', ('Yes', 'No'), index=1)
40
+ offer_application_preference = st.selectbox('Offer Preference', ('Yes', 'No'), index=1)
41
+ past_complaint = st.selectbox('Past Complaint', ('Yes', 'No'), index=1)
42
+ complaint_status = st.selectbox('Complaint Status', ('Not Applicable', 'Unsolved', 'Solved', 'Solved in Follow-up', 'No Information Available'), index=1)
43
+ feedback = st.selectbox('Feedback', ('Poor Product Quality', 'No reason specified', 'Too many ads', 'Poor Website', 'Poor Customer Service', 'Reasonable Price', 'User Friendly Website', 'Products always in Stock', 'Quality Customer Care'), index=1)
44
+
45
+ submitted = st.form_submit_button('Predict')
46
+
47
+ data_inf = {
48
+ 'user_id': user_id,
49
+ 'age': age,
50
+ 'gender': gender,
51
+ 'region_category': region_category,
52
+ 'membership_category': membership_category,
53
+ 'joining_days': joining_days,
54
+ 'joined_through_referral': joined_through_referral,
55
+ 'preferred_offer_types': preferred_offer_types,
56
+ 'medium_of_operation': medium_of_operation,
57
+ 'internet_option': internet_option,
58
+ 'days_since_last_login': days_since_last_login,
59
+ 'avg_time_spent': avg_time_spent,
60
+ 'avg_transaction_value': avg_transaction_value,
61
+ 'avg_frequency_login_days': avg_frequency_login_days,
62
+ 'points_in_wallet': points_in_wallet,
63
+ 'used_special_discount': used_special_discount,
64
+ 'offer_application_preference': offer_application_preference,
65
+ 'past_complaint': past_complaint,
66
+ 'complaint_status': complaint_status,
67
+ 'feedback': feedback
68
+
69
+ }
70
+
71
+ data_inf = pd.DataFrame([data_inf])
72
+ st.dataframe(data_inf)
73
+
74
+ if submitted:
75
+ # Split between Numerical Columns and Categorical Columns
76
+ enc_columns = ['membership_category']
77
+ data_inf[enc_columns] = encoder_ord.fit_transform(data_inf[enc_columns])
78
+
79
+ # Feature Scaling and Feature Encoding
80
+ data_inf_transform = model_pipeline.transform(data_inf)
81
+
82
+ #data_inf_num_scaled = model_scaler.transform(data_inf_num)
83
+ #data_inf_cat_encoded = model_encoder.transform(data_inf_cat)
84
+ #data_inf_final = np.concatenate([data_inf_num_scaled, data_inf_cat_encoded], axis=1)
85
+
86
+ # Predict using Linear Regression
87
+ y_pred_inf = model_ann.predict(data_inf_transform)
88
+ y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0)
89
+
90
+ st.write('# Churn Risk : ', str(int(y_pred_inf)))
91
+
92
+ if __name__=='__main__':
93
+ run()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ seaborn
4
+ matplotlib
5
+ Pillow
6
+ plotly
7
+ scikit-learn==1.2.2
8
+ tensorflow