File size: 6,825 Bytes
c980d16 6cafa70 c980d16 6cafa70 c980d16 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
st.set_option('deprecation.showPyplotGlobalUse', False)
# Function to run the EDA
def run():
# Load dataset
df = pd.read_csv('Churn_Modelling.csv')
# Set the title in the Streamlit app
st.title('Exploratory Data Analysis')
st.write('---')
# 1. Distribution of Customer Churn
st.subheader('1. Distribution of Customer Churn')
value_counts = df['Exited'].value_counts()
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
colors_pie = ['pink', 'grey']
ax1.pie(value_counts, labels=['Not Churn', 'Churn'], autopct='%1.1f%%', colors=colors_pie)
ax1.set_title('Distribution of Customer Churn', fontsize=12)
colors_bar = ['pink', 'grey']
value_counts.index = ['Not Churn', 'Churn']
value_counts.plot(kind='bar', ax=ax2, color=colors_bar)
ax2.set_title('Count of Customer Churn', fontsize=12)
ax2.set_xlabel('Churn Status', fontsize=10)
ax2.set_ylabel('Count', fontsize=10)
ax2.tick_params(axis='x', rotation=0)
for i, v in enumerate(value_counts):
ax2.text(i, v + 10, str(v), ha='center', va='bottom')
st.pyplot(fig)
st.write('---')
# 2. Customer Churn by Gender
st.subheader('2. Customer Churn by Gender')
gender_group = df.groupby(['Exited', 'Gender']).size().unstack()
fig, ax = plt.subplots(figsize=(8, 5))
bar_chart = gender_group.plot(kind='bar', color=['pink', 'grey'], ax=ax)
for container in bar_chart.containers:
bar_chart.bar_label(container)
bar_chart.set_xticklabels(['Not Churn', 'Churn'], rotation=0)
bar_chart.set_xlabel('Churn Status')
bar_chart.set_ylabel('Count')
bar_chart.set_title('Customer Churn by Gender')
st.pyplot(fig)
st.write('**Churn by Gender Statistics**')
st.write('Percentage of Male customers who churned: 16.5%')
st.write('Percentage of Female customers who churned: 25.1%')
st.write('---')
# 3 Customer Churn by Age
st.subheader('3. Customer Churn by Age')
churn_customers = df[df['Exited'] == 1]
not_churn_customers = df[df['Exited'] == 0]
plt.figure(figsize=(10, 6))
plt.hist(not_churn_customers['Age'], bins=30, alpha=0.5, label='Not Churn', color='pink')
plt.hist(churn_customers['Age'], bins=30, alpha=0.5, label='Churn', color='grey')
plt.title('Customer Churn by Age', fontsize=14)
plt.xlabel('Age')
plt.ylabel('Frequency')
plt.legend(loc='upper right')
st.pyplot()
st.write('---')
# 4. Customer Churn by Geography
st.subheader('4. Customer Churn by Geography')
geo_group = df.groupby(['Exited', 'Geography']).size().unstack()
fig, ax = plt.subplots(figsize=(8, 5))
bar_char = geo_group.plot(kind='bar', color=['pink', 'grey', 'lightblue'], ax=ax)
for container in bar_char.containers:
bar_char.bar_label(container)
bar_char.set_xticklabels(['Not Churn', 'Churn'], rotation=0)
bar_char.set_xlabel('Churn Status', fontsize=9)
bar_char.set_ylabel('Count', fontsize=9)
bar_char.set_title('Customer Churn by Geography', fontsize=10)
st.pyplot()
st.write('**Churn by Geography Statistics**')
st.write('Percentage of customers who churned in France: 16.2 %')
st.write('Percentage of customers who churned in Germany: 32.5 %')
st.write('Percentage of customers who churned in Spain: 16.7 %')
st.write('---')
# 5. Customer Churn by Tenure
st.subheader('5. Customer Churn by Tenure')
churn_by_tenure = df.groupby(['Tenure', 'Exited']).size().unstack()
fig, ax = plt.subplots(figsize=(8, 5))
churn_by_tenure.plot(kind='bar', stacked=True, color=['pink', 'grey'], ax=ax)
plt.title('Customer Churn by Tenure', fontsize=14)
plt.xticks(rotation=0)
plt.xlabel('Tenure (years)')
plt.ylabel('Number of Customers')
plt.legend(['Not Churn', 'Churn'])
for container in ax.containers:
ax.bar_label(container, label_type='center')
st.pyplot(fig)
st.write('---')
# 6. Customer Churn by Number of Products
st.subheader('6. Customer Churn by Number of Products')
plt.figure(figsize=(8, 5))
ax = sns.countplot(data=df, x='NumOfProducts', hue='Exited', palette=['grey', 'pink'])
plt.title('Customer Churn by Number of Products', fontsize=14)
plt.xlabel('Number of Products', fontsize=10)
plt.ylabel('Number of Customers', fontsize=10)
for container in ax.containers:
ax.bar_label(container)
st.pyplot()
st.write('**Churn by Number of Products Statistics**')
st.write('Percentage of customers who churned with 1 products: 27.7 %')
st.write('Percentage of customers who churned with 2 products: 7.6 %')
st.write('Percentage of customers who churned with 3 products: 82.7 %')
st.write('Percentage of customers who churned with 4 products: 100 %')
st.write('---')
# 7. Customer Churn by Has Credit Card or Not
st.subheader('7. Customer Churn by Has Credit Card or Not')
churn_by_HasCrCard = df.groupby(['HasCrCard', 'Exited']).size().unstack()
fig, ax = plt.subplots(figsize=(8, 4))
churn_by_HasCrCard.plot(kind='bar', stacked=True, color=['pink', 'grey'], ax=ax)
plt.title('Customer Churn by HasCrCard', fontsize=11)
plt.xticks(rotation=0)
plt.xlabel('HasCrCard', fontsize=10)
plt.ylabel('Number of Customers', fontsize=10)
plt.legend(['Not Churn', 'Churn'])
ax.set_xticklabels(['Has Credit Card', 'No Credit Card'])
for container in ax.containers:
ax.bar_label(container, label_type='center')
st.pyplot(fig)
st.write('**Churn by Has Credit Card or Not Statistics**')
st.write('Percentage of customers who churned with No Credit Card: 20.8 %')
st.write('Percentage of customers who churned with Has Credit Card: 20.2 %')
st.write('---')
# 8. Customer Churn by Is Active Member or Not
st.subheader('8. Customer Churn by Is Active Member or Not')
churn_by_IsActiveMember = df.groupby(['IsActiveMember', 'Exited']).size().unstack()
fig, ax = plt.subplots(figsize=(8, 4))
churn_by_IsActiveMember.plot(kind='bar', stacked=True, color=['pink', 'grey'], ax=ax)
plt.title('Customer Churn by IsActiveMember', fontsize=11)
plt.xticks(rotation=0)
plt.xlabel('IsActiveMember', fontsize=10)
plt.ylabel('Number of Customers', fontsize=10)
plt.legend(['Not Churn', 'Churn'],loc='lower center')
ax.set_xticklabels(['Inactive Member', 'Active Member'])
for container in ax.containers:
ax.bar_label(container, label_type='center')
st.pyplot(fig)
st.write('**Churn by Is Active Member or Not Statistics**')
st.write('Percentage of customers who churned with Inactive Member: 26.9 %')
st.write('Percentage of customers who churned with Active Member: 14.3 %')
|