Spaces:

fadyabila
/

Churn_Customer_Prediction

Runtime error

App Files Files Community

fadyabila commited on Mar 31, 2023

Commit

d0c30b2

•

1 Parent(s): a802d59

Submission

Browse files

Files changed (10) hide show

app.py +10 -0
churn.csv +0 -0
churn.png +0 -0
customer_churn.h5 +3 -0
eda.py +153 -0
final_pipeline.pkl +3 -0
main.py +10 -0
model.png +0 -0
prediction.py +97 -0
requirements.txt +11 -0

app.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import streamlit as st
+import eda
+import prediction
+navigation = st.sidebar.selectbox('Choose Page : ', ('EDA', 'Churn Customer Prediction'))
+if navigation == 'EDA':
+    eda.run()
+else:
+    prediction.run()

churn.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

churn.png ADDED Viewed

customer_churn.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0f48cc86f0c3a4c763f9f92a5c0d7e9af5ecb50fe6567aab95e827ba07b0860f
+size 66352

eda.py ADDED Viewed

	@@ -0,0 +1,153 @@

+import streamlit as st
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+import plotly.express as px
+from PIL import Image
+# Melebarkan visualisasi untuk memaksimalkan browser
+st.set_page_config(
+    page_title='Churn Customer',
+    layout='wide',
+    initial_sidebar_state='expanded'
+)
+def run():
+    # Membuat title
+    st.title('Churn Customer Prediction')
+    st.write('### by Fadya Ulya Salsabila')
+    # Menambahkan Gambar
+    image = Image.open('churn.png')
+    st.image(image, caption='Illustration of Churn Customer')
+    # Menambahkan Deskripsi
+    st.write('## Background')
+    st.write("""
+    A make-up company "Sister" wants to minimize the risk of a customer stopping using their product.
+    The company then analyzes the history of its customers in making purchases based on time and frequency. Then, this company also looks at the feedback that customers have given it.
+    This is intended to determine customer predictions whether to stop using their product or not.
+    Because if many customers stop, the company will evaluate product sales and marketing to customers. In addition, the company will also provide discounts and special offers to loyal customers.
+    The objectives from this analysis and modeling in this dataset are:
+    1. Find out the customer prediction, whether customer churn or not.
+    2. Find out the best model prediction using Artificial Neural Network (ANN).""")
+    st.write('## Dataset')
+    st.write("""
+    The dataset is from Github Milestones 1 Hacktiv8 `churn.csv` that contains 22 columns.
+    1. `user_id`:	ID of a customer
+    2. `age`:	Age of a customer
+    3. `gender`:	Gender of a customer
+    4. `region_category`:	Region that a customer belongs to
+    5. `membership_category`:	Category of the membership that a customer is using
+    6. `joining_date`:	Date when a customer became a member
+    7. `joined_through_referral`:	Whether a customer joined using any referral code or ID
+    8. `preferred_offer_types`:	Type of offer that a customer prefers
+    9. `medium_of_operation`:	Medium of operation that a customer uses for transactions
+    10. `internet_option`:	Type of internet service a customer uses
+    11. `last_visit_time`:	The last time a customer visited the website
+    12. `days_since_last_login`:	Number of days since a customer last logged into the website
+    13. `avg_time_spent`:	Average time spent by a customer on the website
+    14. `avg_transaction_value`:	Average transaction value of a customer
+    15. `avg_frequency_login_days`:	Number of times a customer has logged in to the website
+    16. `points_in_wallet`:	Points awarded to a customer on each transaction
+    17. `used_special_discount`:	Whether a customer uses special discounts offered
+    18. `offer_application_preference`:	Whether a customer prefers offers
+    19. `past_complaint`:	Whether a customer has raised any complaints
+    20. `complaint_status`:	Whether the complaints raised by a customer was resolved
+    21. `feedback`:	Feedback provided by a customer
+    22. `churn_risk_score`:	Churn score (0 : Not churn, 1 : Churn)""")
+    # Membuat Garis Lurus
+    st.markdown('---')
+    # Membuat Sub Headrer
+    st.subheader('EDA for Churn Customer')
+    # Magic Syntax
+    st.write(
+    ' On this page, the author will do a simple exploration.'
+    ' The dataset used is the Churn Custimer dataset.'
+    ' This dataset comes from Github Project Hacktiv8.')
+    # Show DataFrame
+    df1 = pd.read_csv('churn.csv')
+    st.dataframe(df1)
+    # Membuat Barplot
+    st.write('#### Churn Risk Plot')
+    fig = plt.figure(figsize=(10,7))
+    sns.countplot(x='churn_risk_score', data=df1, palette="PuRd")
+    st.pyplot(fig)
+    st.write('The target data is balanced.')
+    st.write('#### Gender Based on Churn Risk')
+    fig1, ax1 = plt.subplots(figsize=(15, 8))
+    sns.countplot(x='gender', hue='churn_risk_score', data=df1, ax=ax1)
+    st.pyplot(fig1)
+    st.write('Gender distribution is normal between men and women.')
+    # Mengelompokkan Usia
+    bins = [8, 20, 30, 40, 50, 60, 120]
+    labels = ['10-19', '20-29', '30-39', '40-49', '50-59', '60-69']
+    df1['agerange'] = pd.cut(df1.age, bins, labels = labels,include_lowest = True)
+    # Menampilkan visualisasi usia berdasarkan churn risk
+    st.write('#### Age Based on Churn Risk')
+    fig2, ax2 = plt.subplots(figsize=(10,7))
+    sns.countplot(x='agerange', data=df1, hue="churn_risk_score", ax=ax2)
+    st.pyplot(fig2)
+    st.write('Customers in this company varies greatly, ranging from 10-64 years old. ')
+    # Membuat heatmap correlation
+    st.write('#### Heatmap Correlation')
+    fig = plt.figure(figsize = (15,8))
+    sns.heatmap(df1.corr(), annot = True, square = True)
+    st.pyplot(fig)
+    st.write("""
+    The heatmap correlation above shows that the column that has a very high relationship with churn risk is the `avg_freqeuncy_login_days` column with score `0.11`. This column shows how many customers log in in a day.
+    It means they are still interested in the product in this company. Meanwhile, `avg_transaction_value` have a strong negative correlation with churn risk witn score `-0.22`.
+    This shows that the number of purchase transactions on this product has no significant effect on customer churn.""")
+    # Membuat internet option berdasarkan churn risk
+    st.write('#### Internet Option Based on Churn Risk')
+    fig3, ax3 = plt.subplots(figsize=(10,7))
+    sns.countplot(x='internet_option', data=df1, hue="churn_risk_score", ax=ax3, palette="Blues")
+    st.pyplot(fig3)
+    st.write("""
+    Bar plot visualization above, shows that the `internet option` of customers doesn't have a strong correlation with churn risk.
+    Distribution of internet option data almost have the same number of values and there is no significant difference.
+    Customers who use the internet with Wi-Fi, Fiber Optic, and Mobile Data are almost the same.""")
+    # Membuat region category berdasarkan churn risk
+    st.write('#### Region Category Based on Churn Risk')
+    fig4, ax4 = plt.subplots(figsize=(10,7))
+    sns.countplot(x='region_category', data=df1, hue="churn_risk_score", ax=ax4, palette="Blues")
+    st.pyplot(fig4)
+    st.write("""
+    Based on customer region, there is no significant correlation with churn risk.
+    It's just that many customers of this product live in town areas compared to villages and cities.""")
+    # Membuat membership category berdasarkan churn risk
+    st.write('#### Membership Category on Churn Risk')
+    fig5, ax5 = plt.subplots(figsize=(10,7))
+    sns.countplot(y='membership_category', data=df1, hue="churn_risk_score", ax=ax5, palette="Blues")
+    st.pyplot(fig5)
+    st.write("""
+    In `membership_category` column, customers which include in `No Membership` dan `Basic Membership` are customers with the highest churn risk.
+    This can happen because the customer is deemed not a loyal customer so the risk of stopping the transaction is high.
+    In contrast to silver, premium, gold, and platinum members where customers are considered loyal to product transactions.
+    """)
+    # Membuat Histogram Berdasarkan Input User
+    st.write('#### Histogram Based On User Input')
+    pilihan = st.selectbox('Choose Column : ', ('age', 'gender', 'days_since_last_login', 'avg_time_spent',
+                                                'avg_transaction_value', 'avg_frequency_login_days',
+                                                'points_in_wallet'))
+    fig = plt.figure(figsize=(15,5))
+    sns.histplot(df1[pilihan], bins=30, kde=True)
+    st.pyplot(fig)
+if __name__ == '__main__':
+    run()

final_pipeline.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9069967f38a72a08cead02e6165a6a92ca62ac04e38e25557416f5b1cdbef61c
+size 3546

main.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import streamlit as st
+import eda
+import prediction
+navigation = st.sidebar.selectbox('Choose Page : ', ('EDA', 'Churn Customer Prediction'))
+if navigation == 'EDA':
+    eda.run()
+else:
+    prediction.run()

model.png ADDED Viewed

prediction.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import streamlit as st
+import pickle
+from tensorflow.keras.models import load_model
+import pandas as pd
+import numpy as np
+# Load All Files
+with open('final_pipeline.pkl', 'rb') as file_1:
+  model_pipeline = pickle.load(file_1)
+model_ann = load_model('customer_churn.h5')
+def run():
+    with st.form(key='Churn_Customer_Prediction'):
+        churn_risk_score = st.selectbox('Churn Risk', (0, 1), index=1)
+        age = st.number_input('Age', min_value=23, max_value=65, value=23)
+        gender = st.selectbox('Gender', ('Male', 'Female'), index=1)
+        days_since_last_login = st.number_input('Last Login', min_value=0, max_value=26, value=0)
+        avg_time_spent = st.number_input('Avg. Time Spent', min_value=0, max_value=3236, value=0)
+        avg_transaction_value = st.number_input('Avg. Transaction Value', min_value=800, max_value=99915, value=29271)
+        avg_frequency_login_days = st.number_input('Avg. Frequency Login Days', min_value=0, max_value=73, value=0)
+        points_in_wallet = st.number_input('Points in Wallet', min_value=0, max_value=2070, value=0)
+        joining_date = st.date_input("Select Join Date")
+        last_visit_time = st.time_input('Last Visit Time')
+        st.markdown('---')
+        region_category = st.selectbox('Region Category', ('Village', 'Town', 'City'), index=1)
+        membership_category = st.selectbox('Membership Category', ('No Membership', 'Basic Membership',
+                                                                   'Silver Membership', 'Premium Membership',
+                                                                   'Gold Membership', 'Platinum Membership'), index=1)
+        preferred_offer_types = st.selectbox('Preffered Offer', ('Without Offers', 'Credit/Debit Card Offers',
+                                                                 'Gift Vouchers/Coupons'), index=1)
+        medium_of_operation = st.selectbox('Medium Ops', ('Desktop', 'Mobile', 'Both'
+                                                          'Gift Vouchers/Coupons'), index=1)
+        internet_option = st.selectbox('Internet Ops', ('Wi-Fi', 'Fiber_Optic', 'Mobile-Data'), index=1)
+        feedback = st.selectbox('Feedback', ('Poor Website', 'Poor Customer Service', 'Poor Product Quality',
+                                             'Too many ads', 'No reason specified', 'Products always in Stock',
+                                             'Reasonable Price', 'Quality Customer Care', 'User Friendly Website'), index=1)
+        complaint_status = st.selectbox('Complaint Status', ('No Information Available', 'Not Aplicable', 'Unsolved',
+                                                             'Solved', 'Solved in Follow-up'), index=1)
+        st.markdown('---')
+        joined_through_referral = st.selectbox('Join Through Referral', ('Yes', 'No'), index=1)
+        used_special_discount = st.selectbox('Use Special Discount', ('Yes', 'No'), index=1)
+        offer_application_preference = st.selectbox('Offer Application Preference', ('Yes', 'No'), index=1)
+        past_complaint = st.selectbox('Past Complaint', ('Yes', 'No'), index=1)
+        submitted = st.form_submit_button('Predict')
+    data_inf = {
+    'age': age,
+    'gender': gender,
+    'region_category': region_category,
+    'membership_category': membership_category,
+    'joining_date': joining_date,
+    'joined_through_referral': joined_through_referral,
+    'preferred_offer_types': preferred_offer_types,
+    'medium_of_operation': medium_of_operation,
+    'internet_option': internet_option,
+    'last_visit_time': last_visit_time,
+    'days_since_last_login': days_since_last_login,
+    'avg_time_spent': avg_time_spent,
+    'avg_transaction_value': avg_transaction_value,
+    'avg_frequency_login_days': avg_frequency_login_days,
+    'points_in_wallet': points_in_wallet,
+    'used_special_discount': used_special_discount,
+    'offer_application_preference': offer_application_preference,
+    'past_complaint': past_complaint,
+    'complaint_status': complaint_status,
+    'feedback': feedback,
+    'churn_risk_score': churn_risk_score
+    }
+    data_inf = pd.DataFrame([data_inf])
+    data_inf_transform = model_pipeline.transform(data_inf)
+    a = st.dataframe(data_inf_transform)
+    b = ''
+    if len(data_inf_transform) == 0:
+        b = 'Not Churn'
+    else:
+        # Predict using ANN: Sequential API
+        y_pred_inf = model_ann.predict(data_inf_transform)
+        y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0)
+        if y_pred_inf == 0:
+            b = 'Not Churn'
+        else:
+            b = 'Churn'
+    if submitted:
+        st.write('# Prediction : ', b)
+if __name__ == '__main__':
+    run()

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+# Berisi daftar library yang kita butuhkan
+streamlit
+pandas
+seaborn
+matplotlib
+numpy
+scikit-learn==1.2.1
+plotly
+tensorflow-cpu==2.12.0
+protobuf==3.20.1