Commit
•
110b1f5
1
Parent(s):
e5db050
Upload 11 files
Browse files- __pycache__/eda.cpython-39.pyc +0 -0
- __pycache__/prediction.cpython-39.pyc +0 -0
- app.py +10 -0
- churn.csv +0 -0
- churn.jpeg +0 -0
- churn_model.h5 +3 -0
- eda.py +120 -0
- final_pipeline.pkl +3 -0
- model_encoder.pkl +3 -0
- prediction.py +93 -0
- requirements.txt +8 -0
__pycache__/eda.cpython-39.pyc
ADDED
Binary file (2.67 kB). View file
|
|
__pycache__/prediction.cpython-39.pyc
ADDED
Binary file (3.08 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import eda
|
3 |
+
import prediction
|
4 |
+
|
5 |
+
navigation = st.sidebar.selectbox('Select Page : ', ('EDA', 'Predict A Customer'))
|
6 |
+
|
7 |
+
if navigation == 'EDA':
|
8 |
+
eda.run()
|
9 |
+
else:
|
10 |
+
prediction.run()
|
churn.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
churn.jpeg
ADDED
![]() |
churn_model.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b2cc27d1634550f8e9e2b8803f06bd8cd5b596eb61ec26a3c243f7b73efb168
|
3 |
+
size 2989736
|
eda.py
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import seaborn as sns
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
import plotly.express as px
|
7 |
+
from PIL import Image
|
8 |
+
|
9 |
+
st.set_page_config(
|
10 |
+
page_title = 'Customer Churn Risk - EDA',
|
11 |
+
layout = 'wide',
|
12 |
+
initial_sidebar_state = 'expanded'
|
13 |
+
)
|
14 |
+
|
15 |
+
def run():
|
16 |
+
# Membuat Title
|
17 |
+
st.title('Customer Churn Prediction')
|
18 |
+
|
19 |
+
# Membuat Sub Header
|
20 |
+
st.subheader('EDA for Customer Churn Risk')
|
21 |
+
|
22 |
+
# Menambahkan Gambar
|
23 |
+
image = Image.open('churn.jpeg')
|
24 |
+
st.image(image, caption='Customer Churn')
|
25 |
+
|
26 |
+
# Menambahkan Deskripsi
|
27 |
+
st.write('This page created by **Imam Zarkasie**')
|
28 |
+
st.write('### Hello!')
|
29 |
+
st.write('#### The competition of product sales in the e-commerce industry in heating up!')
|
30 |
+
st.write('##### In this page we can explore customer segmentation, more than that this website provides an ability to predict a customer churn risk.')
|
31 |
+
|
32 |
+
|
33 |
+
# Membuat Garis Lurus
|
34 |
+
st.markdown('---')
|
35 |
+
|
36 |
+
# Magic Syntax
|
37 |
+
'''
|
38 |
+
On this page, the author will do a simple exploration.
|
39 |
+
The dataset used is the churn dataset.
|
40 |
+
'''
|
41 |
+
|
42 |
+
# Show DataFrame
|
43 |
+
df = pd.read_csv('churn.csv')
|
44 |
+
st.dataframe(df)
|
45 |
+
|
46 |
+
#Melihat histogram fitur target
|
47 |
+
df['churn_risk_score'].value_counts().plot(kind='bar')
|
48 |
+
|
49 |
+
# Membuat Historgram
|
50 |
+
st.write('#### Histogram of Churn')
|
51 |
+
fig = plt.figure(figsize=(15, 5))
|
52 |
+
sns.histplot(df['churn_risk_score'], bins=30, kde=True)
|
53 |
+
st.pyplot(fig)
|
54 |
+
|
55 |
+
# Mengelompokkan data dan menghitung statistik churn risk score
|
56 |
+
grouped = df.groupby('membership_category')['churn_risk_score'].value_counts().unstack().fillna(0)
|
57 |
+
|
58 |
+
# Membuat barplot
|
59 |
+
fig, ax = plt.subplots(figsize=(8, 3))
|
60 |
+
grouped.plot(kind='bar', stacked=False, ax=ax)
|
61 |
+
|
62 |
+
# Menampilkan plot di Streamlit
|
63 |
+
st.pyplot(fig)
|
64 |
+
|
65 |
+
# Mengelompokkan data dan menghitung statistik churn risk score
|
66 |
+
grouped = df.groupby('gender')['churn_risk_score'].value_counts().unstack().fillna(0)
|
67 |
+
|
68 |
+
# Mengambil nilai churn risk score yang unik
|
69 |
+
churn_risk_scores = df['churn_risk_score'].unique()
|
70 |
+
|
71 |
+
# Mengatur lebar barplot
|
72 |
+
bar_width = 0.35
|
73 |
+
|
74 |
+
# Mengatur posisi barplot untuk setiap gender
|
75 |
+
male_positions = np.arange(len(churn_risk_scores))
|
76 |
+
female_positions = male_positions + bar_width
|
77 |
+
|
78 |
+
# Membuat figure dan axes untuk plot
|
79 |
+
fig, ax = plt.subplots(figsize=(8, 4))
|
80 |
+
|
81 |
+
# Membuat barplot untuk gender Male dengan warna biru (blue)
|
82 |
+
ax.bar(male_positions, grouped.loc['M'], width=bar_width, label='Male', color='lightblue')
|
83 |
+
|
84 |
+
# Membuat barplot untuk gender Female dengan warna merah (red)
|
85 |
+
ax.bar(female_positions, grouped.loc['F'], width=bar_width, label='Female', color='pink')
|
86 |
+
|
87 |
+
# Memberikan label pada sumbu x
|
88 |
+
ax.set_xticks(male_positions + bar_width/2)
|
89 |
+
ax.set_xticklabels(churn_risk_scores)
|
90 |
+
|
91 |
+
# Memberikan judul pada sumbu x dan y
|
92 |
+
ax.set_xlabel('Churn Risk Score')
|
93 |
+
ax.set_ylabel('Count')
|
94 |
+
|
95 |
+
# Menambahkan judul plot
|
96 |
+
ax.set_title('Churn Risk Score by Gender')
|
97 |
+
|
98 |
+
# Menampilkan legenda
|
99 |
+
ax.legend()
|
100 |
+
|
101 |
+
# Menampilkan grid
|
102 |
+
ax.grid(True, axis='y', linestyle='--', alpha=0.5)
|
103 |
+
|
104 |
+
# Menampilkan plot di Streamlit
|
105 |
+
st.pyplot(fig)
|
106 |
+
|
107 |
+
# Membuat Histogram Berdasarkan Input User
|
108 |
+
st.write('#### Histogram based on input user')
|
109 |
+
pilihan = st.selectbox('Pilih column : ', ('avg_time_spent', 'avg_transaction_value', 'avg_frequency_login_days', 'points_in_wallet'))
|
110 |
+
fig = plt.figure(figsize=(15, 5))
|
111 |
+
sns.histplot(df[pilihan], bins=30, kde=True)
|
112 |
+
st.pyplot(fig)
|
113 |
+
|
114 |
+
# Membuat Plotly Plot
|
115 |
+
st.write('#### Plotly Plot - points_in_wallet VS avg_transaction_value')
|
116 |
+
fig = px.scatter(df, x='points_in_wallet', y='avg_transaction_value')
|
117 |
+
st.plotly_chart(fig)
|
118 |
+
|
119 |
+
if __name__=='__main__':
|
120 |
+
run()
|
final_pipeline.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b9ed56de94ad1dd4fb9516e9975b9530878c0439b02d01c349cafd004daa06d8
|
3 |
+
size 3385
|
model_encoder.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:614f4ea128d1a715049a5af4d9b1564bdcf44554b6bdc090f5ca98877954d9f7
|
3 |
+
size 516
|
prediction.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import pickle
|
5 |
+
import json
|
6 |
+
import tensorflow as tf
|
7 |
+
# Load All Files
|
8 |
+
|
9 |
+
# Load the Models
|
10 |
+
|
11 |
+
with open('final_pipeline.pkl', 'rb') as file_1:
|
12 |
+
model_pipeline = pickle.load(file_1)
|
13 |
+
|
14 |
+
with open('model_encoder.pkl','rb') as file_2:
|
15 |
+
encoder_ord = pickle.load(file_2)
|
16 |
+
|
17 |
+
model_ann = tf.keras.models.load_model('churn_model.h5')
|
18 |
+
|
19 |
+
|
20 |
+
def run():
|
21 |
+
with st.form(key='from_churn'):
|
22 |
+
user_id = st.text_input('User id', value='')
|
23 |
+
age = st.number_input('Age', min_value=0, max_value=100, value=0)
|
24 |
+
gender = st.selectbox('Gender', ('M', 'F'), index=1)
|
25 |
+
region_category = st.selectbox('Region', ('Town', 'City', 'Village'), index=1)
|
26 |
+
st.markdown('---')
|
27 |
+
|
28 |
+
membership_category = st.selectbox('Membership Category', ('Basic Membership', 'No Membership', 'Gold Membership', 'Silver Membership', 'Premium Membership', 'Platinum Membership'), index=1)
|
29 |
+
joining_days = st.number_input('joining_days', min_value=0, max_value=1000, value=0)
|
30 |
+
joined_through_referral = st.selectbox('Join Through Referral', ('Yes', 'No'), index=1)
|
31 |
+
preferred_offer_types = st.selectbox('Offer Type', ('Gift Vouchers/Coupons', 'Credit/Debit Card Offers', 'Without Offers'), index=1)
|
32 |
+
medium_of_operation = st.selectbox('Gadget Type', ('Desktop', 'Smartphone', 'Both'), index=1)
|
33 |
+
internet_option = st.selectbox('Internet Type', ('Wi-Fi', 'Mobile_Data', 'Fiber_Optic'), index=1)
|
34 |
+
days_since_last_login = st.number_input('Days Since Last Login', min_value=0, max_value=100, value=0)
|
35 |
+
avg_time_spent = st.number_input('Average Time Spent', min_value=0, max_value=3000, value=0)
|
36 |
+
avg_transaction_value = st.number_input('Average Transaction Value', min_value=0, max_value=100000, value=0)
|
37 |
+
avg_frequency_login_days = st.number_input('Average Login Days', min_value=0, max_value=100, value=0)
|
38 |
+
points_in_wallet = st.number_input('Points in Wallet', min_value=0, max_value=3000, value=0)
|
39 |
+
used_special_discount = st.selectbox('Used Special Discount', ('Yes', 'No'), index=1)
|
40 |
+
offer_application_preference = st.selectbox('Offer Preference', ('Yes', 'No'), index=1)
|
41 |
+
past_complaint = st.selectbox('Past Complaint', ('Yes', 'No'), index=1)
|
42 |
+
complaint_status = st.selectbox('Complaint Status', ('Not Applicable', 'Unsolved', 'Solved', 'Solved in Follow-up', 'No Information Available'), index=1)
|
43 |
+
feedback = st.selectbox('Feedback', ('Poor Product Quality', 'No reason specified', 'Too many ads', 'Poor Website', 'Poor Customer Service', 'Reasonable Price', 'User Friendly Website', 'Products always in Stock', 'Quality Customer Care'), index=1)
|
44 |
+
|
45 |
+
submitted = st.form_submit_button('Predict')
|
46 |
+
|
47 |
+
data_inf = {
|
48 |
+
'user_id': user_id,
|
49 |
+
'age': age,
|
50 |
+
'gender': gender,
|
51 |
+
'region_category': region_category,
|
52 |
+
'membership_category': membership_category,
|
53 |
+
'joining_days': joining_days,
|
54 |
+
'joined_through_referral': joined_through_referral,
|
55 |
+
'preferred_offer_types': preferred_offer_types,
|
56 |
+
'medium_of_operation': medium_of_operation,
|
57 |
+
'internet_option': internet_option,
|
58 |
+
'days_since_last_login': days_since_last_login,
|
59 |
+
'avg_time_spent': avg_time_spent,
|
60 |
+
'avg_transaction_value': avg_transaction_value,
|
61 |
+
'avg_frequency_login_days': avg_frequency_login_days,
|
62 |
+
'points_in_wallet': points_in_wallet,
|
63 |
+
'used_special_discount': used_special_discount,
|
64 |
+
'offer_application_preference': offer_application_preference,
|
65 |
+
'past_complaint': past_complaint,
|
66 |
+
'complaint_status': complaint_status,
|
67 |
+
'feedback': feedback
|
68 |
+
|
69 |
+
}
|
70 |
+
|
71 |
+
data_inf = pd.DataFrame([data_inf])
|
72 |
+
st.dataframe(data_inf)
|
73 |
+
|
74 |
+
if submitted:
|
75 |
+
# Split between Numerical Columns and Categorical Columns
|
76 |
+
enc_columns = ['membership_category']
|
77 |
+
data_inf[enc_columns] = encoder_ord.fit_transform(data_inf[enc_columns])
|
78 |
+
|
79 |
+
# Feature Scaling and Feature Encoding
|
80 |
+
data_inf_transform = model_pipeline.transform(data_inf)
|
81 |
+
|
82 |
+
#data_inf_num_scaled = model_scaler.transform(data_inf_num)
|
83 |
+
#data_inf_cat_encoded = model_encoder.transform(data_inf_cat)
|
84 |
+
#data_inf_final = np.concatenate([data_inf_num_scaled, data_inf_cat_encoded], axis=1)
|
85 |
+
|
86 |
+
# Predict using Linear Regression
|
87 |
+
y_pred_inf = model_ann.predict(data_inf_transform)
|
88 |
+
y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0)
|
89 |
+
|
90 |
+
st.write('# Churn Risk : ', str(int(y_pred_inf)))
|
91 |
+
|
92 |
+
if __name__=='__main__':
|
93 |
+
run()
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pandas
|
3 |
+
seaborn
|
4 |
+
matplotlib
|
5 |
+
Pillow
|
6 |
+
plotly
|
7 |
+
scikit-learn==1.2.2
|
8 |
+
tensorflow
|