Spaces:

ahmadluay
/

Customer_Churn_Prediction

Runtime error

App Files Files Community

ahmadluay commited on Mar 31, 2023

Commit

adb5d59

•

1 Parent(s): 2a603a4

first commit

Browse files

Files changed (9) hide show

Drop_Columns.txt +1 -0
app.py +11 -0
churn.csv +0 -0
churn.jpeg +0 -0
eda.py +196 -0
final_pipeline.pkl +3 -0
model_seq2.h5 +3 -0
prediction.py +94 -0
requirements.txt +8 -0

Drop_Columns.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["days_since_last_login", "used_special_discount", "joining_date", "gender", "age", "user_id", "internet_option", "last_visit_time", "complaint_status"]

app.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import streamlit as st
+import eda # python file
+import prediction # python file
+navigation = st.sidebar.selectbox('Page Navigation: ',('EDA','Customer Churn Prediction'))
+if navigation == 'EDA':
+    eda.run()
+else:
+    prediction.run()

churn.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

churn.jpeg ADDED Viewed

eda.py ADDED Viewed

	@@ -0,0 +1,196 @@

+import streamlit as st
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+import plotly.express as px
+import sklearn
+from sklearn.preprocessing import LabelEncoder
+from PIL import Image
+st.set_page_config(
+    page_title='Customer Churn Prediction Using Artificial Neural Network in E-commerce Company',
+    layout = 'wide',
+    initial_sidebar_state='expanded'
+)
+def run():
+    # title
+    st.title('Customer Churn Prediction Using Artificial Neural Network in E-commerce Company')
+    st.write('by Ahmad Luay Adnani')
+    # sub header
+    st.subheader ('Exploratory Data Analysis of the Dataset.')
+    # Add Image
+    image = Image.open('churn.jpeg')
+    st.image(image,caption = 'Customer churn illustration')
+    # Description
+    st.write('In customer relationship management, it is important for e-commerce businesses to attract new customers and retain existing ones. Predicting customer churn in e-commerce business is critical to the success of online retailers. By analyzing customer data, businesses can gain insights into customer behavior and develop strategies to retain customers, ultimately improving customer satisfaction and driving revenue growth.')
+    st.write('# Dataset')
+    st.write('Dataset used in this analysis is churn dataset from an e-commerce company that wants to minimize the risk of a customer stopping using the product they offer.')
+    # show dataframe
+    df = pd.read_csv('churn.csv')
+    st.dataframe(df)
+    # add description of Dataset
+    st.write('Following are the variables and definitions of each column in the dataset.')
+    st.write("`user_id` : ID of a customer")
+    st.write("`age` : Age of a customer")
+    st.write("`gender` : Gender of a customer")
+    st.write("`region_category` : Region that a customer belongs to")
+    st.write("`membership_category` : Category of the membership that a customer is using")
+    st.write("`joining_date` : Date when a customer became a member")
+    st.write("`joined_through referral` : Whether a customer joined using any referral code or ID")
+    st.write("`preferred_offer types` : Type of offer that a customer prefers")
+    st.write("`medium_of operation` : 	Medium of operation that a customer uses for transactions")
+    st.write("`internet_option` : Type of internet service a customer uses")
+    st.write("`last_visit_time` : The last time a customer visited the website")
+    st.write("`days_since_last_login` : Number of days since a customer last logged into the website")
+    st.write("`avg_time_spent` : Average time spent by a customer on the website")
+    st.write("`avg_transaction_value` : Average transaction value of a customer")
+    st.write("`avg_frequency_login_days` : Number of times a customer has logged in to the website")
+    st.write("`points_in_wallet` : Points awarded to a customer on each transaction")
+    st.write("`used_special_discount` : Whether a customer uses special discounts offered")
+    st.write("`offer_application_preference` : Whether a customer prefers offers")
+    st.write("`past_complaint` : Whether a customer has raised any complaints")
+    st.write("`complaint_status` : Whether the complaints raised by a customer was resolved")
+    st.write("`feedback` : Feedback provided by a customer")
+    st.write("`churn_risk_score` : Churn score `0` : Not churn `1` : Churn")
+    ###
+    # Churn Prediction
+    st.write('# Exploratory Data Analysis ')
+    st.write('## Number of Customer at Risk of Churning')
+    # churn
+    df_eda = df.copy()
+    df_eda.churn_risk_score.replace({0:'Not Churn',1:'Churn'}, inplace=True)
+    churn = df_eda.churn_risk_score.value_counts().to_frame().reset_index()
+    # Plot PieChart with Plotly
+    fig = px.pie(churn,values='churn_risk_score', names='index',color_discrete_sequence=['red','blue'])
+    fig.update_layout(title_text = "Number of Customer at risk of Churning")
+    st.plotly_chart(fig)
+    st.write('Based on visualization above, the percentage of customer at risk of churning is 54.1%. Further data exploration is needed to find out what factors cause these customers to be at risk of churning.')
+    ###
+    # Number of Customers Based on Their Membership Categories
+    st.write('## Number of Customers Based on Their Membership Categories')
+    # membership category
+    membership_category = df_eda.groupby(['churn_risk_score','membership_category']).aggregate(Number_of_customer_per_membership_category=('membership_category','count')).reset_index()
+    # plotting bar plot
+    fig = px.bar(membership_category, x="membership_category", y="Number_of_customer_per_membership_category",color='churn_risk_score',color_discrete_sequence=['red','blue'],
+             orientation="v",hover_name="membership_category"
+             )
+    fig.update_layout(title_text = "Number of customers based on their membership category")
+    st.plotly_chart(fig)
+    st.write('Based on visualization above, customers **without membership** and customers with **basic membership** have the highest risk of churning. Based on my assumption, customers without membership and customers with basic membership may have a higher risk of churning for several reasons:')
+    st.write('1. **Lack of loyalty**: Customers without membership or with basic membership may not feel a strong sense of loyalty to the company or brand, making it easier for them to switch to a competitor.')
+    st.write('2. **Limited benefits**: Basic membership may offer limited benefits or perks compared to higher-tier memberships, making it less attractive to customers who may be seeking more value.')
+    st.write('3. **Price sensitivity**: Customers without membership or with basic membership may be more price-sensitive and may be more likely to switch to a competitor if they find a better deal elsewhere.')
+    st.write('4. **Limited engagement**: Customers without membership or with basic membership may have limited engagement with the company or brand, making it harder for the company to build a strong relationship with them and retain their loyalty.')
+    ###
+    # Average Transaction Value
+    st.write('## Average Transaction Value')
+    # average transaction value
+    avg_transaction_value = df_eda.groupby(['churn_risk_score']).aggregate(avg_transaction_value=('avg_transaction_value','mean')).reset_index()
+    # plotting bar plot
+    fig = px.bar(avg_transaction_value, x="churn_risk_score", y="avg_transaction_value",color='churn_risk_score',color_discrete_sequence=['red','blue'],
+             orientation="v"
+             )
+    fig.update_layout(title_text = "Average Transaction Value")
+    st.plotly_chart(fig)
+    st.write('Based on visualization above, customers who are at risk of churning have a **lower average transaction value** compared to customers who are not at risk of churning. Based on my assumption, customers who are at risk of churning may have a lower average transaction value for several reasons:')
+    st.write("1. **Reduced usage**: Customers who are at risk of churning may be using the company's products or services less frequently or may have stopped using them altogether. This reduced usage can result in a lower average transaction value.")
+    st.write('2. **Price sensitivity**: Customers who are at risk of churning may be more price-sensitive and may be more likely to switch to a competitor if they find a better deal elsewhere. This can result in customers opting for lower-priced products or services, which can lower the average transaction value.')
+    st.write('3. **Disengagement**: Customers who are at risk of churning may be less engaged with the company or brand and may be less likely to make high-value purchases. This reduced engagement can result in a lower average transaction value.')
+    ###
+    # Points in Wallet
+    st.write('## Points in Wallet')
+    # points in wallet
+    points_in_wallet = df_eda.groupby(['churn_risk_score']).aggregate(points_in_wallet=('points_in_wallet','mean')).reset_index()
+    # plotting bar plot
+    fig = px.bar(points_in_wallet, x="churn_risk_score", y="points_in_wallet",color='churn_risk_score',color_discrete_sequence=['red','blue'],
+             orientation="v"
+             )
+    fig.update_layout(title_text = "Points in Wallet")
+    st.plotly_chart(fig)
+    st.write('Based on visualization above, customers who are at risk of churning have a **lower points balance in their wallet** compared to customers who are not at risk of churning. Based on my assumption, customers who are at risk of churning may have a lower points balance in their wallet for several reasons:')
+    st.write("1. **Reduced usage**: Customers who are at risk of churning may be using the company's products or services less frequently or may have stopped using them altogether. This reduced usage can result in a lower accumulation of points in their wallet.")
+    st.write('2. **Disengagement**: Customers who are at risk of churning may be less engaged with the company or brand and may not be actively participating in loyalty programs or earning points. This reduced engagement can result in a lower accumulation of points in their wallet.')
+    ###
+    # Feedback
+    st.write('## Feedback')
+    # feedback
+    feedback = df_eda.groupby(['churn_risk_score','feedback']).aggregate(Number_of_customer=('feedback','count')).reset_index()
+    # plotting bar plot
+    fig = px.bar(feedback, x="feedback", y="Number_of_customer",color='churn_risk_score',color_discrete_sequence=['red','blue'],
+             orientation="v",hover_name="feedback"
+             )
+    fig.update_layout(title_text = "Number of Customers Based on Their Feedback")
+    st.plotly_chart(fig)
+    st.write('Based on visualization above, The most feedback that causes customers to be at risk of churning is **poor product quality**. Based on my assumption, poor product quality can cause customers to be at risk of churning for several reasons:')
+    st.write("1. **Reduced satisfaction**: Poor product quality can lead to reduced customer satisfaction, which can result in customers being less likely to continue using the company's products or services.")
+    st.write("2. **Negative word-of-mouth**: Customers who experience poor product quality may share their negative experiences with others, resulting in negative word-of-mouth for the company. This can lead to a decrease in new customer acquisition and can also increase the likelihood of existing customers churning.")
+    st.write("3. **Lack of trust**: Poor product quality can lead to a lack of trust in the company and its ability to provide high-quality products or services. This lack of trust can cause customers to be less loyal and more likely to switch to a competitor.")
+    st.write("4. **Perceived value**: Poor product quality can result in customers perceiving less value in the company's products or services, which can make them less likely to continue using them and more likely to switch to a competitor.")
+    ###
+    # Correlation Matrix Analysis
+    st.write('## Correlation Matrix Analysis')
+    df_copy = df.copy()
+    # Get Numerical Columns and Categorical Columns
+    num_columns = df_copy.select_dtypes(include=np.number).columns.tolist()
+    cat_columns = df_copy.select_dtypes(include=['object']).columns.tolist()
+    # Using LabelEncoder to convert categorical into numerical data
+    m_LabelEncoder = LabelEncoder()
+    for col in df_copy[cat_columns]:
+        df_copy[col]=m_LabelEncoder.fit_transform(df_copy[col])
+    # Plotting Correlation Matrix of Categorical columns and default_payment
+    sns.set(font_scale=1)
+    fig = plt.figure(figsize=(25,25))
+    sns.heatmap(df_copy.corr(),annot=True,cmap='coolwarm', fmt='.2f')
+    st.pyplot(fig)
+    st.write('Features that have have a strong correlation with the target variable (`churn_risk_score`) are `membership_category`,`points_in_wallet`,`avg_transaction_value`,`feedback`,`avg_frequency_login_days`,`joined_through_referral`,`preferred_offer_types`,`medium_of_operation`,`region_category` and `	offer_application_preference`.')
+    ###
+    # Histogram and Boxplot based on user input
+    st.write('## Histogram & Boxplot Based on User Input')
+    select_column = st.selectbox('Select Column : ', ('age','days_since_last_login','avg_time_spent','avg_transaction_value','avg_frequency_login_days','points_in_wallet'))
+    sns.set(font_scale=2)
+    fig, ax = plt.subplots(1,2,figsize=(30,10))
+    fig.suptitle(f'Histogram and Boxplot Visualization of {select_column} ')
+    sns.histplot(ax=ax[0],data=df_eda[select_column],kde=True)
+    ax[0].set_title(f'{select_column} skewness: {df_eda[select_column].skew()}')
+    sns.boxplot(ax=ax[1],data=df_eda,x=df_eda[select_column],width=0.50)
+    ax[1].set_title(select_column)
+    st.pyplot(fig)
+if __name__ == '__main__':
+    run()

final_pipeline.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7c4637514be86cee323e8e9ff2651eef530c289f006b697e1faca5e0caea3348
+size 3357

model_seq2.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:babedfb7bd2f04c6ff7cd3b0a8f5ec5568b010274cb5fbe8ccf7fa62eb292076
+size 88696

prediction.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+from tensorflow.keras.models import load_model
+import datetime
+import pickle
+import json
+# Load All Files
+with open('final_pipeline.pkl', 'rb') as file_1:
+  final_pipeline = pickle.load(file_1)
+with open('Drop_Columns.txt', 'r') as file_2:
+  Drop_Columns = json.load(file_2)
+model_seq2 = load_model('model_seq2.h5')
+def run():
+  with st.form(key='Customer_Churn_Prediction'):
+      user_id = st.text_input('ID',value='972706cb0db0068e')
+      age = st.number_input('Age',min_value=2012,max_value=2012,value=2012)
+      gender = st.radio('Gender',('Male','Female'))
+      if gender=='Male':
+            gender='M'
+      else: gender='F'
+      region_category = st.selectbox('Region Category',('Town', 'City','Village'))
+      membership_category = st.selectbox('Membership Category',('Premium Membership','Basic Membership','No Membership', 'Gold Membership','Silver Membership','Platinum Membership'))
+      joining_date = st.date_input('Joining Date',datetime.date(2015,3,27))
+      joined_through_referral = st.selectbox('Joined Through Referral',('Yes','No'))
+      preferred_offer_types = st.selectbox('Preferred Offer Types',('Credit/Debit Card Offers','Gift Vouchers/Coupons','Without Offers'))
+      medium_of_operation = st.selectbox('Medium of Operation',('Smartphone','Desktop','Both'))
+      internet_option = st.selectbox('Internet Option',('Mobile_Data','Wi-Fi','Fiber_Optic'))
+      last_visit_time = st.text_input('Last Visit Time',value='09:41:40')
+      days_since_last_login = st.number_input('Days Since Last Login',min_value=0,max_value=31,value=16)
+      avg_time_spent = st.number_input('Average Time Spent on the Website',step=0.000001,format="%.6f",min_value=0.000000,max_value=9999.999999,value=1447.387929)
+      avg_transaction_value = st.number_input('Average Transaction Value',step=0.01,format="%.2f",min_value=0.00,max_value=99999.99,value=11839.58)
+      avg_frequency_login_days = st.number_input('Number of Times Login to the Website',min_value=1, max_value=99,value=29)
+      points_in_wallet = st.number_input('Points Balance',step=0.01,format="%.2f",min_value=0.00,max_value=9999.99,value=727.91)
+      used_special_discount = st.selectbox('Uses Special Discount Offered ?',('Yes','No'))
+      offer_application_preference = st.selectbox('Prefer Offers ?',('No','Yes'))
+      past_complaint = st.selectbox(' Has raised any complaints before ?',('No','Yes'))
+      complaint_status = st.selectbox('Were the complaints raised resolved?',('Not Applicable ','Unsolved','Solved','Solved in Follow-up','No Information Available'))
+      feedback = st.text_input('Feedback',value='No reason specified')
+      st.markdown('---')
+      submitted = st.form_submit_button('Are Customers at Risk of Churning ? :thinking_face:')
+  df_inf = {
+      'user_id': user_id,
+      'age': age,
+      'gender': gender,
+      'region_category': region_category,
+      'membership_category': membership_category,
+      'joining_date': joining_date,
+      'joined_through_referral': joined_through_referral,
+      'preferred_offer_types': preferred_offer_types,
+      'medium_of_operation': medium_of_operation,
+      'internet_option': internet_option,
+      'last_visit_time':last_visit_time,
+      'days_since_last_login':days_since_last_login,
+      'avg_time_spent':avg_time_spent,
+      'avg_transaction_value':avg_transaction_value,
+      'avg_frequency_login_days':avg_frequency_login_days,
+      'points_in_wallet':points_in_wallet,
+      'used_special_discount':used_special_discount,
+      'offer_application_preference':offer_application_preference,
+      'past_complaint':past_complaint,
+      'complaint_status':complaint_status,
+      'feedback':feedback
+  }
+  df_inf = pd.DataFrame([df_inf])
+  # Data Inference
+  df_inf_copy = df_inf.copy()
+  # Removing unnecessary features
+  df_inf_final = df_inf_copy.drop(Drop_Columns,axis=1).sort_index()
+  data_inf_transform = final_pipeline.transform(df_inf_final)
+  st.dataframe(df_inf_final)
+  if submitted:
+      # Predict using Neural Network
+      y_pred_inf = model_seq2.predict(data_inf_transform)
+      st.write('# Are Customers at Risk of Churning ? :thinking_face:')
+      if y_pred_inf == 0:
+         st.subheader('Yes, customers are at risk of churning :disappointed: ')
+      else:
+         st.subheader('No, customers are not at risk of churning :wink:')
+if __name__ == '__main__':
+    run()

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+streamlit
+pandas
+seaborn
+matplotlib
+numpy
+scikit-learn==1.2.1
+tensorflow==2.9.0
+plotly