Spaces:

imamzarkasie
/

Customer-Churn-Prediction

Sleeping

App Files Files Community

imamzarkasie commited on Jun 2, 2023

Commit

110b1f5

•

1 Parent(s): e5db050

Upload 11 files

Browse files

Files changed (11) hide show

__pycache__/eda.cpython-39.pyc +0 -0
__pycache__/prediction.cpython-39.pyc +0 -0
app.py +10 -0
churn.csv +0 -0
churn.jpeg +0 -0
churn_model.h5 +3 -0
eda.py +120 -0
final_pipeline.pkl +3 -0
model_encoder.pkl +3 -0
prediction.py +93 -0
requirements.txt +8 -0

__pycache__/eda.cpython-39.pyc ADDED Viewed

Binary file (2.67 kB). View file

__pycache__/prediction.cpython-39.pyc ADDED Viewed

Binary file (3.08 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import streamlit as st
+import eda
+import prediction
+navigation = st.sidebar.selectbox('Select Page : ', ('EDA', 'Predict A Customer'))
+if navigation == 'EDA':
+    eda.run()
+else:
+    prediction.run()

churn.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

churn.jpeg ADDED Viewed

churn_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9b2cc27d1634550f8e9e2b8803f06bd8cd5b596eb61ec26a3c243f7b73efb168
+size 2989736

eda.py ADDED Viewed

	@@ -0,0 +1,120 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import seaborn as sns
+import matplotlib.pyplot as plt
+import plotly.express as px
+from PIL import Image
+st.set_page_config(
+    page_title = 'Customer Churn Risk - EDA',
+    layout = 'wide',
+    initial_sidebar_state = 'expanded'
+    )
+def run():
+    # Membuat Title
+    st.title('Customer Churn Prediction')
+    # Membuat Sub Header
+    st.subheader('EDA for Customer Churn Risk')
+    # Menambahkan Gambar
+    image = Image.open('churn.jpeg')
+    st.image(image, caption='Customer Churn')
+    # Menambahkan Deskripsi
+    st.write('This page created by **Imam Zarkasie**')
+    st.write('### Hello!')
+    st.write('#### The competition of product sales in the e-commerce industry in heating up!')
+    st.write('##### In this page we can explore customer segmentation, more than that this website provides an ability to predict a customer churn risk.')
+    # Membuat Garis Lurus
+    st.markdown('---')
+    # Magic Syntax
+    '''
+    On this page, the author will do a simple exploration.
+    The dataset used is the churn dataset.
+    '''
+    # Show DataFrame
+    df = pd.read_csv('churn.csv')
+    st.dataframe(df)
+    #Melihat histogram fitur target
+    df['churn_risk_score'].value_counts().plot(kind='bar')
+    # Membuat Historgram
+    st.write('#### Histogram of Churn')
+    fig = plt.figure(figsize=(15, 5))
+    sns.histplot(df['churn_risk_score'], bins=30, kde=True)
+    st.pyplot(fig)
+    # Mengelompokkan data dan menghitung statistik churn risk score
+    grouped = df.groupby('membership_category')['churn_risk_score'].value_counts().unstack().fillna(0)
+    # Membuat barplot
+    fig, ax = plt.subplots(figsize=(8, 3))
+    grouped.plot(kind='bar', stacked=False, ax=ax)
+    # Menampilkan plot di Streamlit
+    st.pyplot(fig)
+    # Mengelompokkan data dan menghitung statistik churn risk score
+    grouped = df.groupby('gender')['churn_risk_score'].value_counts().unstack().fillna(0)
+    # Mengambil nilai churn risk score yang unik
+    churn_risk_scores = df['churn_risk_score'].unique()
+    # Mengatur lebar barplot
+    bar_width = 0.35
+    # Mengatur posisi barplot untuk setiap gender
+    male_positions = np.arange(len(churn_risk_scores))
+    female_positions = male_positions + bar_width
+    # Membuat figure dan axes untuk plot
+    fig, ax = plt.subplots(figsize=(8, 4))
+    # Membuat barplot untuk gender Male dengan warna biru (blue)
+    ax.bar(male_positions, grouped.loc['M'], width=bar_width, label='Male', color='lightblue')
+    # Membuat barplot untuk gender Female dengan warna merah (red)
+    ax.bar(female_positions, grouped.loc['F'], width=bar_width, label='Female', color='pink')
+    # Memberikan label pada sumbu x
+    ax.set_xticks(male_positions + bar_width/2)
+    ax.set_xticklabels(churn_risk_scores)
+    # Memberikan judul pada sumbu x dan y
+    ax.set_xlabel('Churn Risk Score')
+    ax.set_ylabel('Count')
+    # Menambahkan judul plot
+    ax.set_title('Churn Risk Score by Gender')
+    # Menampilkan legenda
+    ax.legend()
+    # Menampilkan grid
+    ax.grid(True, axis='y', linestyle='--', alpha=0.5)
+    # Menampilkan plot di Streamlit
+    st.pyplot(fig)
+    # Membuat Histogram Berdasarkan Input User
+    st.write('#### Histogram based on input user')
+    pilihan = st.selectbox('Pilih column : ', ('avg_time_spent', 'avg_transaction_value', 'avg_frequency_login_days', 'points_in_wallet'))
+    fig = plt.figure(figsize=(15, 5))
+    sns.histplot(df[pilihan], bins=30, kde=True)
+    st.pyplot(fig)
+    # Membuat Plotly Plot
+    st.write('#### Plotly Plot - points_in_wallet VS avg_transaction_value')
+    fig = px.scatter(df, x='points_in_wallet', y='avg_transaction_value')
+    st.plotly_chart(fig)
+if __name__=='__main__':
+    run()

final_pipeline.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b9ed56de94ad1dd4fb9516e9975b9530878c0439b02d01c349cafd004daa06d8
+size 3385

model_encoder.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:614f4ea128d1a715049a5af4d9b1564bdcf44554b6bdc090f5ca98877954d9f7
+size 516

prediction.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import pickle
+import json
+import tensorflow as tf
+# Load All Files
+# Load the Models
+with open('final_pipeline.pkl', 'rb') as file_1:
+  model_pipeline = pickle.load(file_1)
+with open('model_encoder.pkl','rb') as file_2:
+  encoder_ord = pickle.load(file_2)
+model_ann = tf.keras.models.load_model('churn_model.h5')
+def run():
+  with st.form(key='from_churn'):
+      user_id = st.text_input('User id', value='')
+      age = st.number_input('Age', min_value=0, max_value=100, value=0)
+      gender = st.selectbox('Gender', ('M', 'F'), index=1)
+      region_category = st.selectbox('Region', ('Town', 'City', 'Village'), index=1)
+      st.markdown('---')
+      membership_category = st.selectbox('Membership Category', ('Basic Membership', 'No Membership', 'Gold Membership', 'Silver Membership', 'Premium Membership', 'Platinum Membership'), index=1)
+      joining_days = st.number_input('joining_days', min_value=0, max_value=1000, value=0)
+      joined_through_referral = st.selectbox('Join Through Referral', ('Yes', 'No'), index=1)
+      preferred_offer_types = st.selectbox('Offer Type', ('Gift Vouchers/Coupons', 'Credit/Debit Card Offers', 'Without Offers'), index=1)
+      medium_of_operation = st.selectbox('Gadget Type', ('Desktop', 'Smartphone', 'Both'), index=1)
+      internet_option = st.selectbox('Internet Type', ('Wi-Fi', 'Mobile_Data', 'Fiber_Optic'), index=1)
+      days_since_last_login = st.number_input('Days Since Last Login', min_value=0, max_value=100, value=0)
+      avg_time_spent = st.number_input('Average Time Spent', min_value=0, max_value=3000, value=0)
+      avg_transaction_value = st.number_input('Average Transaction Value', min_value=0, max_value=100000, value=0)
+      avg_frequency_login_days = st.number_input('Average Login Days', min_value=0, max_value=100, value=0)
+      points_in_wallet = st.number_input('Points in Wallet', min_value=0, max_value=3000, value=0)
+      used_special_discount = st.selectbox('Used Special Discount', ('Yes', 'No'), index=1)
+      offer_application_preference = st.selectbox('Offer Preference', ('Yes', 'No'), index=1)
+      past_complaint = st.selectbox('Past Complaint', ('Yes', 'No'), index=1)
+      complaint_status = st.selectbox('Complaint Status', ('Not Applicable', 'Unsolved', 'Solved', 'Solved in Follow-up', 'No Information Available'), index=1)
+      feedback = st.selectbox('Feedback', ('Poor Product Quality', 'No reason specified', 'Too many ads', 'Poor Website', 'Poor Customer Service', 'Reasonable Price', 'User Friendly Website', 'Products always in Stock', 'Quality Customer Care'), index=1)
+      submitted = st.form_submit_button('Predict')
+  data_inf = {
+      'user_id': user_id,
+      'age': age,
+      'gender': gender,
+      'region_category': region_category,
+      'membership_category': membership_category,
+      'joining_days': joining_days,
+      'joined_through_referral': joined_through_referral,
+      'preferred_offer_types': preferred_offer_types,
+      'medium_of_operation': medium_of_operation,
+      'internet_option': internet_option,
+      'days_since_last_login': days_since_last_login,
+      'avg_time_spent': avg_time_spent,
+      'avg_transaction_value': avg_transaction_value,
+      'avg_frequency_login_days': avg_frequency_login_days,
+      'points_in_wallet': points_in_wallet,
+      'used_special_discount': used_special_discount,
+      'offer_application_preference': offer_application_preference,
+      'past_complaint': past_complaint,
+      'complaint_status': complaint_status,
+      'feedback': feedback
+    }
+  data_inf = pd.DataFrame([data_inf])
+  st.dataframe(data_inf)
+  if submitted:
+    # Split between Numerical Columns and Categorical Columns
+    enc_columns = ['membership_category']
+    data_inf[enc_columns] = encoder_ord.fit_transform(data_inf[enc_columns])
+    # Feature Scaling and Feature Encoding
+    data_inf_transform = model_pipeline.transform(data_inf)
+    #data_inf_num_scaled = model_scaler.transform(data_inf_num)
+    #data_inf_cat_encoded = model_encoder.transform(data_inf_cat)
+    #data_inf_final = np.concatenate([data_inf_num_scaled, data_inf_cat_encoded], axis=1)
+    # Predict using Linear Regression
+    y_pred_inf = model_ann.predict(data_inf_transform)
+    y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0)
+    st.write('# Churn Risk : ', str(int(y_pred_inf)))
+if __name__=='__main__':
+    run()

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+streamlit
+pandas
+seaborn
+matplotlib
+Pillow
+plotly
+scikit-learn==1.2.2
+tensorflow