import streamlit as st import pandas as pd import matplotlib.pyplot as plt import seaborn as sns st.set_option('deprecation.showPyplotGlobalUse', False) # Function to run the EDA def run(): # Load dataset df = pd.read_csv('Churn_Modelling.csv') # Set the title in the Streamlit app st.title('Exploratory Data Analysis') st.write('---') # 1. Distribution of Customer Churn st.subheader('1. Distribution of Customer Churn') value_counts = df['Exited'].value_counts() fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6)) colors_pie = ['pink', 'grey'] ax1.pie(value_counts, labels=['Not Churn', 'Churn'], autopct='%1.1f%%', colors=colors_pie) ax1.set_title('Distribution of Customer Churn', fontsize=12) colors_bar = ['pink', 'grey'] value_counts.index = ['Not Churn', 'Churn'] value_counts.plot(kind='bar', ax=ax2, color=colors_bar) ax2.set_title('Count of Customer Churn', fontsize=12) ax2.set_xlabel('Churn Status', fontsize=10) ax2.set_ylabel('Count', fontsize=10) ax2.tick_params(axis='x', rotation=0) for i, v in enumerate(value_counts): ax2.text(i, v + 10, str(v), ha='center', va='bottom') st.pyplot(fig) st.write('---') # 2. Customer Churn by Gender st.subheader('2. Customer Churn by Gender') gender_group = df.groupby(['Exited', 'Gender']).size().unstack() fig, ax = plt.subplots(figsize=(8, 5)) bar_chart = gender_group.plot(kind='bar', color=['pink', 'grey'], ax=ax) for container in bar_chart.containers: bar_chart.bar_label(container) bar_chart.set_xticklabels(['Not Churn', 'Churn'], rotation=0) bar_chart.set_xlabel('Churn Status') bar_chart.set_ylabel('Count') bar_chart.set_title('Customer Churn by Gender') st.pyplot(fig) st.write('**Churn by Gender Statistics**') st.write('Percentage of Male customers who churned: 16.5%') st.write('Percentage of Female customers who churned: 25.1%') st.write('---') # 3 Customer Churn by Age st.subheader('3. Customer Churn by Age') churn_customers = df[df['Exited'] == 1] not_churn_customers = df[df['Exited'] == 0] plt.figure(figsize=(10, 6)) plt.hist(not_churn_customers['Age'], bins=30, alpha=0.5, label='Not Churn', color='pink') plt.hist(churn_customers['Age'], bins=30, alpha=0.5, label='Churn', color='grey') plt.title('Customer Churn by Age', fontsize=14) plt.xlabel('Age') plt.ylabel('Frequency') plt.legend(loc='upper right') st.pyplot() st.write('---') # 4. Customer Churn by Geography st.subheader('4. Customer Churn by Geography') geo_group = df.groupby(['Exited', 'Geography']).size().unstack() fig, ax = plt.subplots(figsize=(8, 5)) bar_char = geo_group.plot(kind='bar', color=['pink', 'grey', 'lightblue'], ax=ax) for container in bar_char.containers: bar_char.bar_label(container) bar_char.set_xticklabels(['Not Churn', 'Churn'], rotation=0) bar_char.set_xlabel('Churn Status', fontsize=9) bar_char.set_ylabel('Count', fontsize=9) bar_char.set_title('Customer Churn by Geography', fontsize=10) st.pyplot() st.write('**Churn by Geography Statistics**') st.write('Percentage of customers who churned in France: 16.2 %') st.write('Percentage of customers who churned in Germany: 32.5 %') st.write('Percentage of customers who churned in Spain: 16.7 %') st.write('---') # 5. Customer Churn by Tenure st.subheader('5. Customer Churn by Tenure') churn_by_tenure = df.groupby(['Tenure', 'Exited']).size().unstack() fig, ax = plt.subplots(figsize=(8, 5)) churn_by_tenure.plot(kind='bar', stacked=True, color=['pink', 'grey'], ax=ax) plt.title('Customer Churn by Tenure', fontsize=14) plt.xticks(rotation=0) plt.xlabel('Tenure (years)') plt.ylabel('Number of Customers') plt.legend(['Not Churn', 'Churn']) for container in ax.containers: ax.bar_label(container, label_type='center') st.pyplot(fig) st.write('---') # 6. Customer Churn by Number of Products st.subheader('6. Customer Churn by Number of Products') plt.figure(figsize=(8, 5)) ax = sns.countplot(data=df, x='NumOfProducts', hue='Exited', palette=['grey', 'pink']) plt.title('Customer Churn by Number of Products', fontsize=14) plt.xlabel('Number of Products', fontsize=10) plt.ylabel('Number of Customers', fontsize=10) for container in ax.containers: ax.bar_label(container) st.pyplot() st.write('**Churn by Number of Products Statistics**') st.write('Percentage of customers who churned with 1 products: 27.7 %') st.write('Percentage of customers who churned with 2 products: 7.6 %') st.write('Percentage of customers who churned with 3 products: 82.7 %') st.write('Percentage of customers who churned with 4 products: 100 %') st.write('---') # 7. Customer Churn by Has Credit Card or Not st.subheader('7. Customer Churn by Has Credit Card or Not') churn_by_HasCrCard = df.groupby(['HasCrCard', 'Exited']).size().unstack() fig, ax = plt.subplots(figsize=(8, 4)) churn_by_HasCrCard.plot(kind='bar', stacked=True, color=['pink', 'grey'], ax=ax) plt.title('Customer Churn by HasCrCard', fontsize=11) plt.xticks(rotation=0) plt.xlabel('HasCrCard', fontsize=10) plt.ylabel('Number of Customers', fontsize=10) plt.legend(['Not Churn', 'Churn']) ax.set_xticklabels(['Has Credit Card', 'No Credit Card']) for container in ax.containers: ax.bar_label(container, label_type='center') st.pyplot(fig) st.write('**Churn by Has Credit Card or Not Statistics**') st.write('Percentage of customers who churned with No Credit Card: 20.8 %') st.write('Percentage of customers who churned with Has Credit Card: 20.2 %') st.write('---') # 8. Customer Churn by Is Active Member or Not st.subheader('8. Customer Churn by Is Active Member or Not') churn_by_IsActiveMember = df.groupby(['IsActiveMember', 'Exited']).size().unstack() fig, ax = plt.subplots(figsize=(8, 4)) churn_by_IsActiveMember.plot(kind='bar', stacked=True, color=['pink', 'grey'], ax=ax) plt.title('Customer Churn by IsActiveMember', fontsize=11) plt.xticks(rotation=0) plt.xlabel('IsActiveMember', fontsize=10) plt.ylabel('Number of Customers', fontsize=10) plt.legend(['Not Churn', 'Churn'],loc='lower center') ax.set_xticklabels(['Inactive Member', 'Active Member']) for container in ax.containers: ax.bar_label(container, label_type='center') st.pyplot(fig) st.write('**Churn by Is Active Member or Not Statistics**') st.write('Percentage of customers who churned with Inactive Member: 26.9 %') st.write('Percentage of customers who churned with Active Member: 14.3 %')