import streamlit as st import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import plotly.express as px import sklearn from sklearn.preprocessing import LabelEncoder from PIL import Image st.set_page_config( page_title='Customer Churn Prediction Using Artificial Neural Network in E-commerce Company', layout = 'wide', initial_sidebar_state='expanded' ) def run(): # title st.title('Customer Churn Prediction Using Artificial Neural Network in E-commerce Company') st.write('by Ahmad Luay Adnani') # sub header st.subheader ('Exploratory Data Analysis of the Dataset.') # Add Image image = Image.open('churn.jpeg') st.image(image,caption = 'Customer churn illustration') # Description st.write('In customer relationship management, it is important for e-commerce businesses to attract new customers and retain existing ones. Predicting customer churn in e-commerce business is critical to the success of online retailers. By analyzing customer data, businesses can gain insights into customer behavior and develop strategies to retain customers, ultimately improving customer satisfaction and driving revenue growth.') st.write('# Dataset') st.write('Dataset used in this analysis is churn dataset from an e-commerce company that wants to minimize the risk of a customer stopping using the product they offer.') # show dataframe df = pd.read_csv('churn.csv') st.dataframe(df) # add description of Dataset st.write('Following are the variables and definitions of each column in the dataset.') st.write("`user_id` : ID of a customer") st.write("`age` : Age of a customer") st.write("`gender` : Gender of a customer") st.write("`region_category` : Region that a customer belongs to") st.write("`membership_category` : Category of the membership that a customer is using") st.write("`joining_date` : Date when a customer became a member") st.write("`joined_through referral` : Whether a customer joined using any referral code or ID") st.write("`preferred_offer types` : Type of offer that a customer prefers") st.write("`medium_of operation` : Medium of operation that a customer uses for transactions") st.write("`internet_option` : Type of internet service a customer uses") st.write("`last_visit_time` : The last time a customer visited the website") st.write("`days_since_last_login` : Number of days since a customer last logged into the website") st.write("`avg_time_spent` : Average time spent by a customer on the website") st.write("`avg_transaction_value` : Average transaction value of a customer") st.write("`avg_frequency_login_days` : Number of times a customer has logged in to the website") st.write("`points_in_wallet` : Points awarded to a customer on each transaction") st.write("`used_special_discount` : Whether a customer uses special discounts offered") st.write("`offer_application_preference` : Whether a customer prefers offers") st.write("`past_complaint` : Whether a customer has raised any complaints") st.write("`complaint_status` : Whether the complaints raised by a customer was resolved") st.write("`feedback` : Feedback provided by a customer") st.write("`churn_risk_score` : Churn score `0` : Not churn `1` : Churn") ### # Churn Prediction st.write('# Exploratory Data Analysis ') st.write('## Number of Customer at Risk of Churning') # churn df_eda = df.copy() df_eda.churn_risk_score.replace({0:'Not Churn',1:'Churn'}, inplace=True) churn = df_eda.churn_risk_score.value_counts().to_frame().reset_index() # Plot PieChart with Plotly fig = px.pie(churn,values='churn_risk_score', names='index',color_discrete_sequence=['red','blue']) fig.update_layout(title_text = "Number of Customer at risk of Churning") st.plotly_chart(fig) st.write('Based on visualization above, the percentage of customer at risk of churning is 54.1%. Further data exploration is needed to find out what factors cause these customers to be at risk of churning.') ### # Number of Customers Based on Their Membership Categories st.write('## Number of Customers Based on Their Membership Categories') # membership category membership_category = df_eda.groupby(['churn_risk_score','membership_category']).aggregate(Number_of_customer_per_membership_category=('membership_category','count')).reset_index() # plotting bar plot fig = px.bar(membership_category, x="membership_category", y="Number_of_customer_per_membership_category",color='churn_risk_score',color_discrete_sequence=['red','blue'], orientation="v",hover_name="membership_category" ) fig.update_layout(title_text = "Number of customers based on their membership category") st.plotly_chart(fig) st.write('Based on visualization above, customers **without membership** and customers with **basic membership** have the highest risk of churning. Based on my assumption, customers without membership and customers with basic membership may have a higher risk of churning for several reasons:') st.write('1. **Lack of loyalty**: Customers without membership or with basic membership may not feel a strong sense of loyalty to the company or brand, making it easier for them to switch to a competitor.') st.write('2. **Limited benefits**: Basic membership may offer limited benefits or perks compared to higher-tier memberships, making it less attractive to customers who may be seeking more value.') st.write('3. **Price sensitivity**: Customers without membership or with basic membership may be more price-sensitive and may be more likely to switch to a competitor if they find a better deal elsewhere.') st.write('4. **Limited engagement**: Customers without membership or with basic membership may have limited engagement with the company or brand, making it harder for the company to build a strong relationship with them and retain their loyalty.') ### # Average Transaction Value st.write('## Average Transaction Value') # average transaction value avg_transaction_value = df_eda.groupby(['churn_risk_score']).aggregate(avg_transaction_value=('avg_transaction_value','mean')).reset_index() # plotting bar plot fig = px.bar(avg_transaction_value, x="churn_risk_score", y="avg_transaction_value",color='churn_risk_score',color_discrete_sequence=['red','blue'], orientation="v" ) fig.update_layout(title_text = "Average Transaction Value") st.plotly_chart(fig) st.write('Based on visualization above, customers who are at risk of churning have a **lower average transaction value** compared to customers who are not at risk of churning. Based on my assumption, customers who are at risk of churning may have a lower average transaction value for several reasons:') st.write("1. **Reduced usage**: Customers who are at risk of churning may be using the company's products or services less frequently or may have stopped using them altogether. This reduced usage can result in a lower average transaction value.") st.write('2. **Price sensitivity**: Customers who are at risk of churning may be more price-sensitive and may be more likely to switch to a competitor if they find a better deal elsewhere. This can result in customers opting for lower-priced products or services, which can lower the average transaction value.') st.write('3. **Disengagement**: Customers who are at risk of churning may be less engaged with the company or brand and may be less likely to make high-value purchases. This reduced engagement can result in a lower average transaction value.') ### # Points in Wallet st.write('## Points in Wallet') # points in wallet points_in_wallet = df_eda.groupby(['churn_risk_score']).aggregate(points_in_wallet=('points_in_wallet','mean')).reset_index() # plotting bar plot fig = px.bar(points_in_wallet, x="churn_risk_score", y="points_in_wallet",color='churn_risk_score',color_discrete_sequence=['red','blue'], orientation="v" ) fig.update_layout(title_text = "Points in Wallet") st.plotly_chart(fig) st.write('Based on visualization above, customers who are at risk of churning have a **lower points balance in their wallet** compared to customers who are not at risk of churning. Based on my assumption, customers who are at risk of churning may have a lower points balance in their wallet for several reasons:') st.write("1. **Reduced usage**: Customers who are at risk of churning may be using the company's products or services less frequently or may have stopped using them altogether. This reduced usage can result in a lower accumulation of points in their wallet.") st.write('2. **Disengagement**: Customers who are at risk of churning may be less engaged with the company or brand and may not be actively participating in loyalty programs or earning points. This reduced engagement can result in a lower accumulation of points in their wallet.') ### # Feedback st.write('## Feedback') # feedback feedback = df_eda.groupby(['churn_risk_score','feedback']).aggregate(Number_of_customer=('feedback','count')).reset_index() # plotting bar plot fig = px.bar(feedback, x="feedback", y="Number_of_customer",color='churn_risk_score',color_discrete_sequence=['red','blue'], orientation="v",hover_name="feedback" ) fig.update_layout(title_text = "Number of Customers Based on Their Feedback") st.plotly_chart(fig) st.write('Based on visualization above, The most feedback that causes customers to be at risk of churning is **poor product quality**. Based on my assumption, poor product quality can cause customers to be at risk of churning for several reasons:') st.write("1. **Reduced satisfaction**: Poor product quality can lead to reduced customer satisfaction, which can result in customers being less likely to continue using the company's products or services.") st.write("2. **Negative word-of-mouth**: Customers who experience poor product quality may share their negative experiences with others, resulting in negative word-of-mouth for the company. This can lead to a decrease in new customer acquisition and can also increase the likelihood of existing customers churning.") st.write("3. **Lack of trust**: Poor product quality can lead to a lack of trust in the company and its ability to provide high-quality products or services. This lack of trust can cause customers to be less loyal and more likely to switch to a competitor.") st.write("4. **Perceived value**: Poor product quality can result in customers perceiving less value in the company's products or services, which can make them less likely to continue using them and more likely to switch to a competitor.") ### # Correlation Matrix Analysis st.write('## Correlation Matrix Analysis') df_copy = df.copy() # Get Numerical Columns and Categorical Columns num_columns = df_copy.select_dtypes(include=np.number).columns.tolist() cat_columns = df_copy.select_dtypes(include=['object']).columns.tolist() # Using LabelEncoder to convert categorical into numerical data m_LabelEncoder = LabelEncoder() for col in df_copy[cat_columns]: df_copy[col]=m_LabelEncoder.fit_transform(df_copy[col]) # Plotting Correlation Matrix of Categorical columns and default_payment sns.set(font_scale=1) fig = plt.figure(figsize=(25,25)) sns.heatmap(df_copy.corr(),annot=True,cmap='coolwarm', fmt='.2f') st.pyplot(fig) st.write('Features that have have a strong correlation with the target variable (`churn_risk_score`) are `membership_category`,`points_in_wallet`,`avg_transaction_value`,`feedback`,`avg_frequency_login_days`,`joined_through_referral`,`preferred_offer_types`,`medium_of_operation`,`region_category` and ` offer_application_preference`.') ### # Histogram and Boxplot based on user input st.write('## Histogram & Boxplot Based on User Input') select_column = st.selectbox('Select Column : ', ('age','days_since_last_login','avg_time_spent','avg_transaction_value','avg_frequency_login_days','points_in_wallet')) sns.set(font_scale=2) fig, ax = plt.subplots(1,2,figsize=(30,10)) fig.suptitle(f'Histogram and Boxplot Visualization of {select_column} ') sns.histplot(ax=ax[0],data=df_eda[select_column],kde=True) ax[0].set_title(f'{select_column} skewness: {df_eda[select_column].skew()}') sns.boxplot(ax=ax[1],data=df_eda,x=df_eda[select_column],width=0.50) ax[1].set_title(select_column) st.pyplot(fig) if __name__ == '__main__': run()