Spaces:
Running
Running
deployment
Browse files- app.py +10 -0
- churn_model.h5 +3 -0
- customer-churn-edit.jpeg +0 -0
- eda.py +102 -0
- final_pipeline.pkl +3 -0
- prediction.py +81 -0
- requirements.txt +8 -0
app.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import eda
|
3 |
+
import prediction
|
4 |
+
|
5 |
+
navigation = st.sidebar.selectbox('Pilih Halaman : ', ('EDA','Predict'))
|
6 |
+
|
7 |
+
if navigation == 'EDA':
|
8 |
+
eda.run()
|
9 |
+
else:
|
10 |
+
prediction.run()
|
churn_model.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff6f27a3999a90ae70f33d4ef253931a23d1ee68d6ddfc5df0802681e5417985
|
3 |
+
size 44168
|
customer-churn-edit.jpeg
ADDED
![]() |
eda.py
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import seaborn as sns
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import plotly.express as px
|
6 |
+
from PIL import Image
|
7 |
+
|
8 |
+
st.set_page_config(
|
9 |
+
page_icon='Churn and Prediction',
|
10 |
+
layout='wide',
|
11 |
+
initial_sidebar_state='expanded')
|
12 |
+
|
13 |
+
def run():
|
14 |
+
# membuat judul
|
15 |
+
st.title('Churn and Prediction')
|
16 |
+
|
17 |
+
# Membuat Sub header
|
18 |
+
st.subheader ('Exploratory Data Analysis Churn Risk Score')
|
19 |
+
|
20 |
+
# Menambahkan gambar
|
21 |
+
image = Image.open('customer-churn-edit.jpeg')
|
22 |
+
st.image(image)
|
23 |
+
|
24 |
+
# Menambahkan deskripsi
|
25 |
+
st.write('Ditengah ramainya persaingan dunia industri yang semakin ketat ini, maka menjaga pelanggan tetap puas dan tetap menggunakan produk dari perusahaan kita adalah hal yang paling penting dalam sebuah bisnis. Dimana kita harus meminimalisir pelanggan kita yang akan akan churn. Dalam kasus ini saya mencoba untuk membuat model machine learnig yang akan digunakan untuk memprediksi pelanggan yang akan churn atau tidak, untuk membantu memberikan gambaran pada perusahaan tentang pelanggan yang akan churn dan dapat mengambil tindakan yang tepat untuk mengatsai masalah tersebut')
|
26 |
+
# mbuat garis lurus
|
27 |
+
st.write('---')
|
28 |
+
|
29 |
+
# Magic Syntax
|
30 |
+
'''
|
31 |
+
Pada dataset di bawah ini terdpat pelanggan dengan berbagai kondisi mulai dari tempat tinggal, umur, jenis kelamin, dll beserta dengan churn risk scorenya. Dengan dataset ini saya akan coba mencari tau karakteristik pelanggan yang menngalami churn
|
32 |
+
'''
|
33 |
+
# menambhakan dataframe
|
34 |
+
data=pd.read_csv('https://raw.githubusercontent.com/pram2601/Hacktiv8/main/churn.csv')
|
35 |
+
st.dataframe(data)
|
36 |
+
|
37 |
+
# membuat data pelanggan yang churn risk score = 1
|
38 |
+
data_c = data[data['churn_risk_score']==1]
|
39 |
+
|
40 |
+
# membuat pie chart untuk melihat perbandingan churn risk dengan kolom lain
|
41 |
+
st.write('### Data Churn Berdasarkan Berbagai Kondisi')
|
42 |
+
fig=plt.figure(figsize=(25,5))
|
43 |
+
|
44 |
+
plt.subplot(1, 4, 1)
|
45 |
+
plt.pie(data['churn_risk_score'].value_counts(), labels=['Yes', 'No'], autopct='%1.1f%%')
|
46 |
+
plt.title('Churn')
|
47 |
+
|
48 |
+
plt.subplot(1, 4, 2)
|
49 |
+
plt.pie(data_c['gender'].value_counts(), labels=['Female', 'Male'], autopct='%1.1f%%')
|
50 |
+
plt.title('Churn by Gender')
|
51 |
+
|
52 |
+
plt.subplot(1, 4, 3)
|
53 |
+
plt.pie(data_c['region_category'].value_counts(), labels=['Town', 'City', 'Village'], autopct='%1.1f%%')
|
54 |
+
plt.title('Churn by Region')
|
55 |
+
|
56 |
+
plt.subplot(1, 4, 4)
|
57 |
+
plt.pie(data_c['membership_category'].value_counts(), labels=['Basic Membership', 'No Membership', 'Silver Membership','Gold Membership'], autopct='%1.1f%%')
|
58 |
+
plt.title('Churn by Membership Category')
|
59 |
+
|
60 |
+
st.pyplot(fig)
|
61 |
+
|
62 |
+
st.write('- Dalam chart diatas terlihat bahwa pelanggan yang churn cukup banyak yaitu 54.1%, ini sangat tidak baik untuk sebuah bisnis, dimana jenis kelamin yang churn mayoritas memiliki jumlah yang sama, tidak ada perbedaan yang signifikan. Namun pada region ini cukup banyak yang churn di Town dan City, keduanya cukup mendoninasi, dimana yang di desa malah sedikit yang mengalami churn. Untuk membershipnya sendiri kebanyakan di dominasi oleh basic membership dan no membership(belum dapat title membership)')
|
63 |
+
|
64 |
+
# mengecek churn pada kolom feedback dan age
|
65 |
+
fig = plt.figure(figsize=(30,5))
|
66 |
+
|
67 |
+
plt.subplot(1, 2, 1)
|
68 |
+
sns.countplot(x=data_c['feedback'])
|
69 |
+
plt.title('Churn By Feedback')
|
70 |
+
|
71 |
+
plt.subplot(1, 2, 2)
|
72 |
+
sns.countplot(x=data_c['age'])
|
73 |
+
plt.title('Churn By Age')
|
74 |
+
plt.xticks(rotation=90)
|
75 |
+
|
76 |
+
st.pyplot(fig)
|
77 |
+
|
78 |
+
st.write('- Dari chart diatas pelanggan yang churn kebanyakan mempunyai feedback yang negatif, dan untuk kolom age yan churn datanya tersebar secara merata')
|
79 |
+
|
80 |
+
# mengecek churn pada kolom # mengecek churn pada kolom used special discount, offer aplication preference, and past complaint
|
81 |
+
|
82 |
+
fig = plt.figure(figsize=(20,5))
|
83 |
+
|
84 |
+
plt.subplot(1, 3, 1)
|
85 |
+
sns.countplot(x=data_c['used_special_discount'])
|
86 |
+
plt.title('Churn By Used Special Discount')
|
87 |
+
|
88 |
+
plt.subplot(1, 3, 2)
|
89 |
+
sns.countplot(x=data_c['offer_application_preference'])
|
90 |
+
plt.title('Churn By Offer Application Preference')
|
91 |
+
|
92 |
+
plt.subplot(1, 3, 3)
|
93 |
+
sns.countplot(x=data_c['past_complaint'])
|
94 |
+
plt.title('Churn By Past Complaint')
|
95 |
+
|
96 |
+
st.pyplot(fig)
|
97 |
+
|
98 |
+
st.write('- Dari chart diatas walaupun menggunakan diskon masih saja pelanggan tetap mengalami churn, dan pelanggan yang sebelumnya pernah komplain dan tidak, masih sama saja tetap bisa mengalami churn')
|
99 |
+
st.write('---')
|
100 |
+
st.write('Pada EDA dalam kasus kali ini agak cukup membingungkann karakteristik pelanggaan yang churn ini, dimana mereka sudah diberikan promo dan banyak yang menggunakan promo tersebut tapi masih tetap saja churn, ini sepertinya memang produknya yang perlu ditingkatkan lagi kualitsnya. Dilihat dari feedback juga yang churn kebanyakan memberikan feedback yang negatif, mulai dari pelayanan yang kurang, produk yang kurang, dll. Ini mungkin bisa jadi bahan evaluasi untuk perusahan dalam hal pelayanan dan kualitas produk.')
|
101 |
+
|
102 |
+
if __name__ == '__main__': run()
|
final_pipeline.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e2591cdd0de9f42fdf30edbd277b3869a105b7a4deda5dd8d194ff1c6fba318
|
3 |
+
size 2563
|
prediction.py
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import pickle
|
5 |
+
import json
|
6 |
+
from datetime import date, datetime, time
|
7 |
+
from tensorflow.keras.models import load_model
|
8 |
+
|
9 |
+
# load model
|
10 |
+
with open('final_pipeline.pkl', 'rb') as file_1:
|
11 |
+
model_pipeline = pickle.load(file_1)
|
12 |
+
model_ann = load_model('churn_model.h5')
|
13 |
+
|
14 |
+
def run():
|
15 |
+
# membuat judul
|
16 |
+
st.title('Churn Prediction')
|
17 |
+
# membuat kolom input
|
18 |
+
with st.form(key='Churn Status'):
|
19 |
+
user_id = st.text_input('User ID', value='')
|
20 |
+
age = st.number_input('Age', min_value=10, max_value=70, value=26, step=1)
|
21 |
+
gender = st.radio('Gender', ('M','F'))
|
22 |
+
region_category = st.selectbox('Region Category', ('Town','City','Village'),index=1)
|
23 |
+
membership_category = st.selectbox('Membership Category', ('No Membership', 'Basic Membership', 'Silver Membership', 'Gold Membership', 'Premium Membership', 'Platinum Membership' ),index=1)
|
24 |
+
joining_date = st.date_input('Joining Date', date.today())
|
25 |
+
joined_through_referral = st.radio('Join By Referral', ('Yes','No'))
|
26 |
+
preferred_offer_types = st.selectbox('Preferred Offer Types', ('Gift Vouchers/Coupons','Credit/Debit Card Offers', 'Without Offers'),index=1)
|
27 |
+
medium_of_operation = st.selectbox('Medium of Operation', ('Desktop', 'Smartphone', 'Both'),index=1)
|
28 |
+
internet_option = st.selectbox('Internet Option', ('Wi-Fi', 'Mobile_Data', 'Fiber_Optic'),index=1)
|
29 |
+
last_visit_time = st.time_input('Last Visit Time ', time(hour=9, minute=0))
|
30 |
+
days_since_last_login = st.number_input('Days Since Last Login', min_value=0, max_value=30, value=26, step=1)
|
31 |
+
avg_time_spent = st.number_input('Average Time Spent', min_value=0, max_value=400, value=26, step=1)
|
32 |
+
avg_transaction_value = st.number_input('Average Transaction Value ', min_value=750, max_value=100000, value=800, step=50)
|
33 |
+
avg_frequency_login_days = st.number_input('Average Frequency Login Days', min_value=0, max_value=70, value=26, step=1)
|
34 |
+
points_in_wallet = st.number_input('Points In Wallet', min_value=0, max_value=2100, value=260, step=10)
|
35 |
+
used_special_discount = st.radio('Use Special Discount', ('Yes','No'))
|
36 |
+
offer_application_preference = st.radio('Offer Application Preference', ('Yes','No'))
|
37 |
+
past_complaint = st.radio('Past Complain', ('Yes','No'))
|
38 |
+
complaint_status = st.selectbox('Complaint Status', ('Not Applicable','Unsolved', 'Solved', 'Solved in Follow-up', 'No Information Available'),index=1)
|
39 |
+
feedback = st.selectbox('Feedback', ('Poor Product Quality','No reason specified', 'Too many ads', 'Poor Website', 'Poor Customer Service', 'Reasonable Price', 'User Friendly Website', 'Products always in Stock', 'Quality Customer Care'),index=1)
|
40 |
+
|
41 |
+
submitted = st.form_submit_button('Predict')
|
42 |
+
|
43 |
+
# membuat data-set baru
|
44 |
+
data_inf = {
|
45 |
+
'user_id':user_id,
|
46 |
+
'age':age,
|
47 |
+
'gender':gender,
|
48 |
+
'region_category':region_category,
|
49 |
+
'membership_category':membership_category,
|
50 |
+
'joining_date':joining_date,
|
51 |
+
'joined_through_referral':joined_through_referral,
|
52 |
+
'preferred_offer_types': preferred_offer_types,
|
53 |
+
'medium_of_operation': medium_of_operation,
|
54 |
+
'internet_option': internet_option,
|
55 |
+
'last_visit_time':last_visit_time,
|
56 |
+
'days_since_last_login': days_since_last_login,
|
57 |
+
'avg_time_spent': avg_time_spent,
|
58 |
+
'avg_transaction_value':avg_transaction_value,
|
59 |
+
'avg_frequency_login_days': avg_frequency_login_days,
|
60 |
+
'points_in_wallet': points_in_wallet,
|
61 |
+
'used_special_discount': used_special_discount,
|
62 |
+
'offer_application_preference': offer_application_preference,
|
63 |
+
'past_complaint': past_complaint,
|
64 |
+
'complaint_status': complaint_status,
|
65 |
+
'feedback': feedback
|
66 |
+
}
|
67 |
+
|
68 |
+
data_inf = pd.DataFrame([data_inf])
|
69 |
+
st.dataframe(data_inf)
|
70 |
+
|
71 |
+
if submitted:
|
72 |
+
# transform inference-Set
|
73 |
+
data_inf_transform = model_pipeline.transform(data_inf)
|
74 |
+
|
75 |
+
# predict using neural network
|
76 |
+
y_pred_inf = model_ann.predict(data_inf_transform)
|
77 |
+
y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0)
|
78 |
+
churn_prediction = "Yes" if y_pred_inf == 1 else "No"
|
79 |
+
st.write('# Churn : ', churn_prediction)
|
80 |
+
|
81 |
+
if __name__ == '__main__': run()
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
tensorflow
|
3 |
+
pandas
|
4 |
+
seaborn
|
5 |
+
matplotlib
|
6 |
+
numpy
|
7 |
+
scikit-learn==1.2.1
|
8 |
+
plotly
|