Spaces:
Build error
Build error
import streamlit as st | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
st.set_option('deprecation.showPyplotGlobalUse', False) | |
# Function to run the EDA | |
def run(): | |
# Load dataset | |
df = pd.read_csv('Churn_Modelling.csv') | |
# Set the title in the Streamlit app | |
st.title('Exploratory Data Analysis') | |
st.write('---') | |
# 1. Distribution of Customer Churn | |
st.subheader('1. Distribution of Customer Churn') | |
value_counts = df['Exited'].value_counts() | |
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6)) | |
colors_pie = ['pink', 'grey'] | |
ax1.pie(value_counts, labels=['Not Churn', 'Churn'], autopct='%1.1f%%', colors=colors_pie) | |
ax1.set_title('Distribution of Customer Churn', fontsize=12) | |
colors_bar = ['pink', 'grey'] | |
value_counts.index = ['Not Churn', 'Churn'] | |
value_counts.plot(kind='bar', ax=ax2, color=colors_bar) | |
ax2.set_title('Count of Customer Churn', fontsize=12) | |
ax2.set_xlabel('Churn Status', fontsize=10) | |
ax2.set_ylabel('Count', fontsize=10) | |
ax2.tick_params(axis='x', rotation=0) | |
for i, v in enumerate(value_counts): | |
ax2.text(i, v + 10, str(v), ha='center', va='bottom') | |
st.pyplot(fig) | |
st.write('---') | |
# 2. Customer Churn by Gender | |
st.subheader('2. Customer Churn by Gender') | |
gender_group = df.groupby(['Exited', 'Gender']).size().unstack() | |
fig, ax = plt.subplots(figsize=(8, 5)) | |
bar_chart = gender_group.plot(kind='bar', color=['pink', 'grey'], ax=ax) | |
for container in bar_chart.containers: | |
bar_chart.bar_label(container) | |
bar_chart.set_xticklabels(['Not Churn', 'Churn'], rotation=0) | |
bar_chart.set_xlabel('Churn Status') | |
bar_chart.set_ylabel('Count') | |
bar_chart.set_title('Customer Churn by Gender') | |
st.pyplot(fig) | |
st.write('**Churn by Gender Statistics**') | |
st.write('Percentage of Male customers who churned: 16.5%') | |
st.write('Percentage of Female customers who churned: 25.1%') | |
st.write('---') | |
# 3 Customer Churn by Age | |
st.subheader('3. Customer Churn by Age') | |
churn_customers = df[df['Exited'] == 1] | |
not_churn_customers = df[df['Exited'] == 0] | |
plt.figure(figsize=(10, 6)) | |
plt.hist(not_churn_customers['Age'], bins=30, alpha=0.5, label='Not Churn', color='pink') | |
plt.hist(churn_customers['Age'], bins=30, alpha=0.5, label='Churn', color='grey') | |
plt.title('Customer Churn by Age', fontsize=14) | |
plt.xlabel('Age') | |
plt.ylabel('Frequency') | |
plt.legend(loc='upper right') | |
st.pyplot() | |
st.write('---') | |
# 4. Customer Churn by Geography | |
st.subheader('4. Customer Churn by Geography') | |
geo_group = df.groupby(['Exited', 'Geography']).size().unstack() | |
fig, ax = plt.subplots(figsize=(8, 5)) | |
bar_char = geo_group.plot(kind='bar', color=['pink', 'grey', 'lightblue'], ax=ax) | |
for container in bar_char.containers: | |
bar_char.bar_label(container) | |
bar_char.set_xticklabels(['Not Churn', 'Churn'], rotation=0) | |
bar_char.set_xlabel('Churn Status', fontsize=9) | |
bar_char.set_ylabel('Count', fontsize=9) | |
bar_char.set_title('Customer Churn by Geography', fontsize=10) | |
st.pyplot() | |
st.write('**Churn by Geography Statistics**') | |
st.write('Percentage of customers who churned in France: 16.2 %') | |
st.write('Percentage of customers who churned in Germany: 32.5 %') | |
st.write('Percentage of customers who churned in Spain: 16.7 %') | |
st.write('---') | |
# 5. Customer Churn by Tenure | |
st.subheader('5. Customer Churn by Tenure') | |
churn_by_tenure = df.groupby(['Tenure', 'Exited']).size().unstack() | |
fig, ax = plt.subplots(figsize=(8, 5)) | |
churn_by_tenure.plot(kind='bar', stacked=True, color=['pink', 'grey'], ax=ax) | |
plt.title('Customer Churn by Tenure', fontsize=14) | |
plt.xticks(rotation=0) | |
plt.xlabel('Tenure (years)') | |
plt.ylabel('Number of Customers') | |
plt.legend(['Not Churn', 'Churn']) | |
for container in ax.containers: | |
ax.bar_label(container, label_type='center') | |
st.pyplot(fig) | |
st.write('---') | |
# 6. Customer Churn by Number of Products | |
st.subheader('6. Customer Churn by Number of Products') | |
plt.figure(figsize=(8, 5)) | |
ax = sns.countplot(data=df, x='NumOfProducts', hue='Exited', palette=['grey', 'pink']) | |
plt.title('Customer Churn by Number of Products', fontsize=14) | |
plt.xlabel('Number of Products', fontsize=10) | |
plt.ylabel('Number of Customers', fontsize=10) | |
for container in ax.containers: | |
ax.bar_label(container) | |
st.pyplot() | |
st.write('**Churn by Number of Products Statistics**') | |
st.write('Percentage of customers who churned with 1 products: 27.7 %') | |
st.write('Percentage of customers who churned with 2 products: 7.6 %') | |
st.write('Percentage of customers who churned with 3 products: 82.7 %') | |
st.write('Percentage of customers who churned with 4 products: 100 %') | |
st.write('---') | |
# 7. Customer Churn by Has Credit Card or Not | |
st.subheader('7. Customer Churn by Has Credit Card or Not') | |
churn_by_HasCrCard = df.groupby(['HasCrCard', 'Exited']).size().unstack() | |
fig, ax = plt.subplots(figsize=(8, 4)) | |
churn_by_HasCrCard.plot(kind='bar', stacked=True, color=['pink', 'grey'], ax=ax) | |
plt.title('Customer Churn by HasCrCard', fontsize=11) | |
plt.xticks(rotation=0) | |
plt.xlabel('HasCrCard', fontsize=10) | |
plt.ylabel('Number of Customers', fontsize=10) | |
plt.legend(['Not Churn', 'Churn']) | |
ax.set_xticklabels(['Has Credit Card', 'No Credit Card']) | |
for container in ax.containers: | |
ax.bar_label(container, label_type='center') | |
st.pyplot(fig) | |
st.write('**Churn by Has Credit Card or Not Statistics**') | |
st.write('Percentage of customers who churned with No Credit Card: 20.8 %') | |
st.write('Percentage of customers who churned with Has Credit Card: 20.2 %') | |
st.write('---') | |
# 8. Customer Churn by Is Active Member or Not | |
st.subheader('8. Customer Churn by Is Active Member or Not') | |
churn_by_IsActiveMember = df.groupby(['IsActiveMember', 'Exited']).size().unstack() | |
fig, ax = plt.subplots(figsize=(8, 4)) | |
churn_by_IsActiveMember.plot(kind='bar', stacked=True, color=['pink', 'grey'], ax=ax) | |
plt.title('Customer Churn by IsActiveMember', fontsize=11) | |
plt.xticks(rotation=0) | |
plt.xlabel('IsActiveMember', fontsize=10) | |
plt.ylabel('Number of Customers', fontsize=10) | |
plt.legend(['Not Churn', 'Churn'],loc='lower center') | |
ax.set_xticklabels(['Inactive Member', 'Active Member']) | |
for container in ax.containers: | |
ax.bar_label(container, label_type='center') | |
st.pyplot(fig) | |
st.write('**Churn by Is Active Member or Not Statistics**') | |
st.write('Percentage of customers who churned with Inactive Member: 26.9 %') | |
st.write('Percentage of customers who churned with Active Member: 14.3 %') | |