Bitha's picture
Upload eda.py
6cafa70 verified
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
st.set_option('deprecation.showPyplotGlobalUse', False)
# Function to run the EDA
def run():
# Load dataset
df = pd.read_csv('Churn_Modelling.csv')
# Set the title in the Streamlit app
st.title('Exploratory Data Analysis')
st.write('---')
# 1. Distribution of Customer Churn
st.subheader('1. Distribution of Customer Churn')
value_counts = df['Exited'].value_counts()
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
colors_pie = ['pink', 'grey']
ax1.pie(value_counts, labels=['Not Churn', 'Churn'], autopct='%1.1f%%', colors=colors_pie)
ax1.set_title('Distribution of Customer Churn', fontsize=12)
colors_bar = ['pink', 'grey']
value_counts.index = ['Not Churn', 'Churn']
value_counts.plot(kind='bar', ax=ax2, color=colors_bar)
ax2.set_title('Count of Customer Churn', fontsize=12)
ax2.set_xlabel('Churn Status', fontsize=10)
ax2.set_ylabel('Count', fontsize=10)
ax2.tick_params(axis='x', rotation=0)
for i, v in enumerate(value_counts):
ax2.text(i, v + 10, str(v), ha='center', va='bottom')
st.pyplot(fig)
st.write('---')
# 2. Customer Churn by Gender
st.subheader('2. Customer Churn by Gender')
gender_group = df.groupby(['Exited', 'Gender']).size().unstack()
fig, ax = plt.subplots(figsize=(8, 5))
bar_chart = gender_group.plot(kind='bar', color=['pink', 'grey'], ax=ax)
for container in bar_chart.containers:
bar_chart.bar_label(container)
bar_chart.set_xticklabels(['Not Churn', 'Churn'], rotation=0)
bar_chart.set_xlabel('Churn Status')
bar_chart.set_ylabel('Count')
bar_chart.set_title('Customer Churn by Gender')
st.pyplot(fig)
st.write('**Churn by Gender Statistics**')
st.write('Percentage of Male customers who churned: 16.5%')
st.write('Percentage of Female customers who churned: 25.1%')
st.write('---')
# 3 Customer Churn by Age
st.subheader('3. Customer Churn by Age')
churn_customers = df[df['Exited'] == 1]
not_churn_customers = df[df['Exited'] == 0]
plt.figure(figsize=(10, 6))
plt.hist(not_churn_customers['Age'], bins=30, alpha=0.5, label='Not Churn', color='pink')
plt.hist(churn_customers['Age'], bins=30, alpha=0.5, label='Churn', color='grey')
plt.title('Customer Churn by Age', fontsize=14)
plt.xlabel('Age')
plt.ylabel('Frequency')
plt.legend(loc='upper right')
st.pyplot()
st.write('---')
# 4. Customer Churn by Geography
st.subheader('4. Customer Churn by Geography')
geo_group = df.groupby(['Exited', 'Geography']).size().unstack()
fig, ax = plt.subplots(figsize=(8, 5))
bar_char = geo_group.plot(kind='bar', color=['pink', 'grey', 'lightblue'], ax=ax)
for container in bar_char.containers:
bar_char.bar_label(container)
bar_char.set_xticklabels(['Not Churn', 'Churn'], rotation=0)
bar_char.set_xlabel('Churn Status', fontsize=9)
bar_char.set_ylabel('Count', fontsize=9)
bar_char.set_title('Customer Churn by Geography', fontsize=10)
st.pyplot()
st.write('**Churn by Geography Statistics**')
st.write('Percentage of customers who churned in France: 16.2 %')
st.write('Percentage of customers who churned in Germany: 32.5 %')
st.write('Percentage of customers who churned in Spain: 16.7 %')
st.write('---')
# 5. Customer Churn by Tenure
st.subheader('5. Customer Churn by Tenure')
churn_by_tenure = df.groupby(['Tenure', 'Exited']).size().unstack()
fig, ax = plt.subplots(figsize=(8, 5))
churn_by_tenure.plot(kind='bar', stacked=True, color=['pink', 'grey'], ax=ax)
plt.title('Customer Churn by Tenure', fontsize=14)
plt.xticks(rotation=0)
plt.xlabel('Tenure (years)')
plt.ylabel('Number of Customers')
plt.legend(['Not Churn', 'Churn'])
for container in ax.containers:
ax.bar_label(container, label_type='center')
st.pyplot(fig)
st.write('---')
# 6. Customer Churn by Number of Products
st.subheader('6. Customer Churn by Number of Products')
plt.figure(figsize=(8, 5))
ax = sns.countplot(data=df, x='NumOfProducts', hue='Exited', palette=['grey', 'pink'])
plt.title('Customer Churn by Number of Products', fontsize=14)
plt.xlabel('Number of Products', fontsize=10)
plt.ylabel('Number of Customers', fontsize=10)
for container in ax.containers:
ax.bar_label(container)
st.pyplot()
st.write('**Churn by Number of Products Statistics**')
st.write('Percentage of customers who churned with 1 products: 27.7 %')
st.write('Percentage of customers who churned with 2 products: 7.6 %')
st.write('Percentage of customers who churned with 3 products: 82.7 %')
st.write('Percentage of customers who churned with 4 products: 100 %')
st.write('---')
# 7. Customer Churn by Has Credit Card or Not
st.subheader('7. Customer Churn by Has Credit Card or Not')
churn_by_HasCrCard = df.groupby(['HasCrCard', 'Exited']).size().unstack()
fig, ax = plt.subplots(figsize=(8, 4))
churn_by_HasCrCard.plot(kind='bar', stacked=True, color=['pink', 'grey'], ax=ax)
plt.title('Customer Churn by HasCrCard', fontsize=11)
plt.xticks(rotation=0)
plt.xlabel('HasCrCard', fontsize=10)
plt.ylabel('Number of Customers', fontsize=10)
plt.legend(['Not Churn', 'Churn'])
ax.set_xticklabels(['Has Credit Card', 'No Credit Card'])
for container in ax.containers:
ax.bar_label(container, label_type='center')
st.pyplot(fig)
st.write('**Churn by Has Credit Card or Not Statistics**')
st.write('Percentage of customers who churned with No Credit Card: 20.8 %')
st.write('Percentage of customers who churned with Has Credit Card: 20.2 %')
st.write('---')
# 8. Customer Churn by Is Active Member or Not
st.subheader('8. Customer Churn by Is Active Member or Not')
churn_by_IsActiveMember = df.groupby(['IsActiveMember', 'Exited']).size().unstack()
fig, ax = plt.subplots(figsize=(8, 4))
churn_by_IsActiveMember.plot(kind='bar', stacked=True, color=['pink', 'grey'], ax=ax)
plt.title('Customer Churn by IsActiveMember', fontsize=11)
plt.xticks(rotation=0)
plt.xlabel('IsActiveMember', fontsize=10)
plt.ylabel('Number of Customers', fontsize=10)
plt.legend(['Not Churn', 'Churn'],loc='lower center')
ax.set_xticklabels(['Inactive Member', 'Active Member'])
for container in ax.containers:
ax.bar_label(container, label_type='center')
st.pyplot(fig)
st.write('**Churn by Is Active Member or Not Statistics**')
st.write('Percentage of customers who churned with Inactive Member: 26.9 %')
st.write('Percentage of customers who churned with Active Member: 14.3 %')