File size: 6,825 Bytes
c980d16
 
 
 
 
 
6cafa70
 
c980d16
 
 
 
 
 
6cafa70
c980d16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155

import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

st.set_option('deprecation.showPyplotGlobalUse', False)

# Function to run the EDA
def run():
    # Load dataset
    df = pd.read_csv('Churn_Modelling.csv') 

    # Set the title in the Streamlit app
    st.title('Exploratory Data Analysis')
    st.write('---')

    # 1. Distribution of Customer Churn
    st.subheader('1. Distribution of Customer Churn')
    value_counts = df['Exited'].value_counts()
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
    colors_pie = ['pink', 'grey']
    ax1.pie(value_counts, labels=['Not Churn', 'Churn'], autopct='%1.1f%%', colors=colors_pie)
    ax1.set_title('Distribution of Customer Churn', fontsize=12)
    colors_bar = ['pink', 'grey']
    value_counts.index = ['Not Churn', 'Churn']
    value_counts.plot(kind='bar', ax=ax2, color=colors_bar)
    ax2.set_title('Count of Customer Churn', fontsize=12)
    ax2.set_xlabel('Churn Status', fontsize=10)
    ax2.set_ylabel('Count', fontsize=10)
    ax2.tick_params(axis='x', rotation=0)
    for i, v in enumerate(value_counts):
        ax2.text(i, v + 10, str(v), ha='center', va='bottom')
    st.pyplot(fig)
    st.write('---')

    # 2. Customer Churn by Gender
    st.subheader('2. Customer Churn by Gender')
    gender_group = df.groupby(['Exited', 'Gender']).size().unstack()
    fig, ax = plt.subplots(figsize=(8, 5))
    bar_chart = gender_group.plot(kind='bar', color=['pink', 'grey'], ax=ax)
    for container in bar_chart.containers:
        bar_chart.bar_label(container)
    bar_chart.set_xticklabels(['Not Churn', 'Churn'], rotation=0)
    bar_chart.set_xlabel('Churn Status')
    bar_chart.set_ylabel('Count')
    bar_chart.set_title('Customer Churn by Gender')
    st.pyplot(fig)
    st.write('**Churn by Gender Statistics**')
    st.write('Percentage of Male customers who churned: 16.5%')
    st.write('Percentage of Female customers who churned: 25.1%')
    st.write('---')


    # 3 Customer Churn by Age
    st.subheader('3. Customer Churn by Age')
    churn_customers = df[df['Exited'] == 1]
    not_churn_customers = df[df['Exited'] == 0]
    plt.figure(figsize=(10, 6))
    plt.hist(not_churn_customers['Age'], bins=30, alpha=0.5, label='Not Churn', color='pink')
    plt.hist(churn_customers['Age'], bins=30, alpha=0.5, label='Churn', color='grey')
    plt.title('Customer Churn by Age', fontsize=14)
    plt.xlabel('Age')
    plt.ylabel('Frequency')
    plt.legend(loc='upper right')
    st.pyplot()
    st.write('---')

    # 4. Customer Churn by Geography
    st.subheader('4. Customer Churn by Geography')
    geo_group = df.groupby(['Exited', 'Geography']).size().unstack()
    fig, ax = plt.subplots(figsize=(8, 5))
    bar_char = geo_group.plot(kind='bar', color=['pink', 'grey', 'lightblue'], ax=ax)
    for container in bar_char.containers:
        bar_char.bar_label(container)
    bar_char.set_xticklabels(['Not Churn', 'Churn'], rotation=0)
    bar_char.set_xlabel('Churn Status', fontsize=9)
    bar_char.set_ylabel('Count', fontsize=9)
    bar_char.set_title('Customer Churn by Geography', fontsize=10)
    st.pyplot()
    st.write('**Churn by Geography Statistics**')
    st.write('Percentage of customers who churned in France: 16.2 %')
    st.write('Percentage of customers who churned in Germany: 32.5 %')
    st.write('Percentage of customers who churned in Spain: 16.7 %')
    st.write('---')

    # 5. Customer Churn by Tenure
    st.subheader('5. Customer Churn by Tenure')
    churn_by_tenure = df.groupby(['Tenure', 'Exited']).size().unstack()
    fig, ax = plt.subplots(figsize=(8, 5))
    churn_by_tenure.plot(kind='bar', stacked=True, color=['pink', 'grey'], ax=ax)
    plt.title('Customer Churn by Tenure', fontsize=14)
    plt.xticks(rotation=0)
    plt.xlabel('Tenure (years)')
    plt.ylabel('Number of Customers')
    plt.legend(['Not Churn', 'Churn'])
    for container in ax.containers:
        ax.bar_label(container, label_type='center')
    st.pyplot(fig)
    st.write('---')

    # 6. Customer Churn by Number of Products
    st.subheader('6. Customer Churn by Number of Products')
    plt.figure(figsize=(8, 5))
    ax = sns.countplot(data=df, x='NumOfProducts', hue='Exited', palette=['grey', 'pink'])
    plt.title('Customer Churn by Number of Products', fontsize=14)
    plt.xlabel('Number of Products', fontsize=10)
    plt.ylabel('Number of Customers', fontsize=10)
    for container in ax.containers:
        ax.bar_label(container)
    st.pyplot()
    st.write('**Churn by Number of Products Statistics**')
    st.write('Percentage of customers who churned with 1 products: 27.7 %')
    st.write('Percentage of customers who churned with 2 products: 7.6 %')
    st.write('Percentage of customers who churned with 3 products: 82.7 %')
    st.write('Percentage of customers who churned with 4 products: 100 %')
    st.write('---')

    # 7. Customer Churn by Has Credit Card or Not
    st.subheader('7. Customer Churn by Has Credit Card or Not')
    churn_by_HasCrCard = df.groupby(['HasCrCard', 'Exited']).size().unstack()
    fig, ax = plt.subplots(figsize=(8, 4))
    churn_by_HasCrCard.plot(kind='bar', stacked=True, color=['pink', 'grey'], ax=ax)
    plt.title('Customer Churn by HasCrCard', fontsize=11)
    plt.xticks(rotation=0)
    plt.xlabel('HasCrCard', fontsize=10)
    plt.ylabel('Number of Customers', fontsize=10)
    plt.legend(['Not Churn', 'Churn'])
    ax.set_xticklabels(['Has Credit Card', 'No Credit Card'])
    for container in ax.containers:
        ax.bar_label(container, label_type='center')
    st.pyplot(fig)
    st.write('**Churn by Has Credit Card or Not Statistics**')
    st.write('Percentage of customers who churned with No Credit Card: 20.8 %')
    st.write('Percentage of customers who churned with Has Credit Card: 20.2 %')
    st.write('---')

    # 8. Customer Churn by Is Active Member or Not
    st.subheader('8. Customer Churn by Is Active Member or Not')
    churn_by_IsActiveMember = df.groupby(['IsActiveMember', 'Exited']).size().unstack()
    fig, ax = plt.subplots(figsize=(8, 4))
    churn_by_IsActiveMember.plot(kind='bar', stacked=True, color=['pink', 'grey'], ax=ax)
    plt.title('Customer Churn by IsActiveMember', fontsize=11)
    plt.xticks(rotation=0)
    plt.xlabel('IsActiveMember', fontsize=10)
    plt.ylabel('Number of Customers', fontsize=10)
    plt.legend(['Not Churn', 'Churn'],loc='lower center')
    ax.set_xticklabels(['Inactive Member', 'Active Member'])
    for container in ax.containers:
        ax.bar_label(container, label_type='center')
    st.pyplot(fig)
    st.write('**Churn by Is Active Member or Not Statistics**')
    st.write('Percentage of customers who churned with Inactive Member: 26.9 %')
    st.write('Percentage of customers who churned with Active Member: 14.3 %')