|
|
|
import streamlit as st |
|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
|
|
st.set_option('deprecation.showPyplotGlobalUse', False) |
|
|
|
|
|
def run(): |
|
|
|
df = pd.read_csv('Churn_Modelling.csv') |
|
|
|
|
|
st.title('Exploratory Data Analysis') |
|
st.write('---') |
|
|
|
|
|
st.subheader('1. Distribution of Customer Churn') |
|
value_counts = df['Exited'].value_counts() |
|
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6)) |
|
colors_pie = ['pink', 'grey'] |
|
ax1.pie(value_counts, labels=['Not Churn', 'Churn'], autopct='%1.1f%%', colors=colors_pie) |
|
ax1.set_title('Distribution of Customer Churn', fontsize=12) |
|
colors_bar = ['pink', 'grey'] |
|
value_counts.index = ['Not Churn', 'Churn'] |
|
value_counts.plot(kind='bar', ax=ax2, color=colors_bar) |
|
ax2.set_title('Count of Customer Churn', fontsize=12) |
|
ax2.set_xlabel('Churn Status', fontsize=10) |
|
ax2.set_ylabel('Count', fontsize=10) |
|
ax2.tick_params(axis='x', rotation=0) |
|
for i, v in enumerate(value_counts): |
|
ax2.text(i, v + 10, str(v), ha='center', va='bottom') |
|
st.pyplot(fig) |
|
st.write('---') |
|
|
|
|
|
st.subheader('2. Customer Churn by Gender') |
|
gender_group = df.groupby(['Exited', 'Gender']).size().unstack() |
|
fig, ax = plt.subplots(figsize=(8, 5)) |
|
bar_chart = gender_group.plot(kind='bar', color=['pink', 'grey'], ax=ax) |
|
for container in bar_chart.containers: |
|
bar_chart.bar_label(container) |
|
bar_chart.set_xticklabels(['Not Churn', 'Churn'], rotation=0) |
|
bar_chart.set_xlabel('Churn Status') |
|
bar_chart.set_ylabel('Count') |
|
bar_chart.set_title('Customer Churn by Gender') |
|
st.pyplot(fig) |
|
st.write('**Churn by Gender Statistics**') |
|
st.write('Percentage of Male customers who churned: 16.5%') |
|
st.write('Percentage of Female customers who churned: 25.1%') |
|
st.write('---') |
|
|
|
|
|
|
|
st.subheader('3. Customer Churn by Age') |
|
churn_customers = df[df['Exited'] == 1] |
|
not_churn_customers = df[df['Exited'] == 0] |
|
plt.figure(figsize=(10, 6)) |
|
plt.hist(not_churn_customers['Age'], bins=30, alpha=0.5, label='Not Churn', color='pink') |
|
plt.hist(churn_customers['Age'], bins=30, alpha=0.5, label='Churn', color='grey') |
|
plt.title('Customer Churn by Age', fontsize=14) |
|
plt.xlabel('Age') |
|
plt.ylabel('Frequency') |
|
plt.legend(loc='upper right') |
|
st.pyplot() |
|
st.write('---') |
|
|
|
|
|
st.subheader('4. Customer Churn by Geography') |
|
geo_group = df.groupby(['Exited', 'Geography']).size().unstack() |
|
fig, ax = plt.subplots(figsize=(8, 5)) |
|
bar_char = geo_group.plot(kind='bar', color=['pink', 'grey', 'lightblue'], ax=ax) |
|
for container in bar_char.containers: |
|
bar_char.bar_label(container) |
|
bar_char.set_xticklabels(['Not Churn', 'Churn'], rotation=0) |
|
bar_char.set_xlabel('Churn Status', fontsize=9) |
|
bar_char.set_ylabel('Count', fontsize=9) |
|
bar_char.set_title('Customer Churn by Geography', fontsize=10) |
|
st.pyplot() |
|
st.write('**Churn by Geography Statistics**') |
|
st.write('Percentage of customers who churned in France: 16.2 %') |
|
st.write('Percentage of customers who churned in Germany: 32.5 %') |
|
st.write('Percentage of customers who churned in Spain: 16.7 %') |
|
st.write('---') |
|
|
|
|
|
st.subheader('5. Customer Churn by Tenure') |
|
churn_by_tenure = df.groupby(['Tenure', 'Exited']).size().unstack() |
|
fig, ax = plt.subplots(figsize=(8, 5)) |
|
churn_by_tenure.plot(kind='bar', stacked=True, color=['pink', 'grey'], ax=ax) |
|
plt.title('Customer Churn by Tenure', fontsize=14) |
|
plt.xticks(rotation=0) |
|
plt.xlabel('Tenure (years)') |
|
plt.ylabel('Number of Customers') |
|
plt.legend(['Not Churn', 'Churn']) |
|
for container in ax.containers: |
|
ax.bar_label(container, label_type='center') |
|
st.pyplot(fig) |
|
st.write('---') |
|
|
|
|
|
st.subheader('6. Customer Churn by Number of Products') |
|
plt.figure(figsize=(8, 5)) |
|
ax = sns.countplot(data=df, x='NumOfProducts', hue='Exited', palette=['grey', 'pink']) |
|
plt.title('Customer Churn by Number of Products', fontsize=14) |
|
plt.xlabel('Number of Products', fontsize=10) |
|
plt.ylabel('Number of Customers', fontsize=10) |
|
for container in ax.containers: |
|
ax.bar_label(container) |
|
st.pyplot() |
|
st.write('**Churn by Number of Products Statistics**') |
|
st.write('Percentage of customers who churned with 1 products: 27.7 %') |
|
st.write('Percentage of customers who churned with 2 products: 7.6 %') |
|
st.write('Percentage of customers who churned with 3 products: 82.7 %') |
|
st.write('Percentage of customers who churned with 4 products: 100 %') |
|
st.write('---') |
|
|
|
|
|
st.subheader('7. Customer Churn by Has Credit Card or Not') |
|
churn_by_HasCrCard = df.groupby(['HasCrCard', 'Exited']).size().unstack() |
|
fig, ax = plt.subplots(figsize=(8, 4)) |
|
churn_by_HasCrCard.plot(kind='bar', stacked=True, color=['pink', 'grey'], ax=ax) |
|
plt.title('Customer Churn by HasCrCard', fontsize=11) |
|
plt.xticks(rotation=0) |
|
plt.xlabel('HasCrCard', fontsize=10) |
|
plt.ylabel('Number of Customers', fontsize=10) |
|
plt.legend(['Not Churn', 'Churn']) |
|
ax.set_xticklabels(['Has Credit Card', 'No Credit Card']) |
|
for container in ax.containers: |
|
ax.bar_label(container, label_type='center') |
|
st.pyplot(fig) |
|
st.write('**Churn by Has Credit Card or Not Statistics**') |
|
st.write('Percentage of customers who churned with No Credit Card: 20.8 %') |
|
st.write('Percentage of customers who churned with Has Credit Card: 20.2 %') |
|
st.write('---') |
|
|
|
|
|
st.subheader('8. Customer Churn by Is Active Member or Not') |
|
churn_by_IsActiveMember = df.groupby(['IsActiveMember', 'Exited']).size().unstack() |
|
fig, ax = plt.subplots(figsize=(8, 4)) |
|
churn_by_IsActiveMember.plot(kind='bar', stacked=True, color=['pink', 'grey'], ax=ax) |
|
plt.title('Customer Churn by IsActiveMember', fontsize=11) |
|
plt.xticks(rotation=0) |
|
plt.xlabel('IsActiveMember', fontsize=10) |
|
plt.ylabel('Number of Customers', fontsize=10) |
|
plt.legend(['Not Churn', 'Churn'],loc='lower center') |
|
ax.set_xticklabels(['Inactive Member', 'Active Member']) |
|
for container in ax.containers: |
|
ax.bar_label(container, label_type='center') |
|
st.pyplot(fig) |
|
st.write('**Churn by Is Active Member or Not Statistics**') |
|
st.write('Percentage of customers who churned with Inactive Member: 26.9 %') |
|
st.write('Percentage of customers who churned with Active Member: 14.3 %') |
|
|