Bitha commited on
Commit
c980d16
1 Parent(s): 9de0f58

Update eda

Browse files
Files changed (2) hide show
  1. app.py +5 -5
  2. eda.py +152 -0
app.py CHANGED
@@ -2,11 +2,11 @@
2
  import streamlit as st
3
 
4
  # Import the created Streamlit Page
5
- # import eda
6
  import prediction
7
 
8
  # Navigation button
9
- navigasi = st.sidebar.selectbox(label='Select Page:', options=['Home Page', 'Prediction']) # 'Exploratory Data Analysis',
10
 
11
  # Looping for navigation
12
  if navigasi == 'Home Page':
@@ -18,9 +18,9 @@ if navigasi == 'Home Page':
18
  st.write('**Classification Model for Predicting Customer Churn**')
19
  st.caption('Please select another menu in the Select Box on the left of your screen to start!')
20
 
21
- # # Displays the EDA page
22
- # elif navigasi == 'Exploratory Data Analysis':
23
- # eda.run()
24
 
25
  # Displays the Predict page
26
  elif navigasi == 'Prediction':
 
2
  import streamlit as st
3
 
4
  # Import the created Streamlit Page
5
+ import eda
6
  import prediction
7
 
8
  # Navigation button
9
+ navigasi = st.sidebar.selectbox(label='Select Page:', options=['Home Page', 'Exploratory Data Analysis', 'Prediction'])
10
 
11
  # Looping for navigation
12
  if navigasi == 'Home Page':
 
18
  st.write('**Classification Model for Predicting Customer Churn**')
19
  st.caption('Please select another menu in the Select Box on the left of your screen to start!')
20
 
21
+ # Displays the EDA page
22
+ elif navigasi == 'Exploratory Data Analysis':
23
+ eda.run()
24
 
25
  # Displays the Predict page
26
  elif navigasi == 'Prediction':
eda.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import streamlit as st
3
+ import pandas as pd
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+
7
+ # Function to run the EDA
8
+ def run():
9
+ # Load dataset
10
+ df = pd.read_csv('Churn_Modelling.csv')
11
+
12
+ # Set the title in the Streamlit app
13
+ st.title('Exploratory Data Analysis of Customer Churn')
14
+ st.write('---')
15
+
16
+ # 1. Distribution of Customer Churn
17
+ st.subheader('1. Distribution of Customer Churn')
18
+ value_counts = df['Exited'].value_counts()
19
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
20
+ colors_pie = ['pink', 'grey']
21
+ ax1.pie(value_counts, labels=['Not Churn', 'Churn'], autopct='%1.1f%%', colors=colors_pie)
22
+ ax1.set_title('Distribution of Customer Churn', fontsize=12)
23
+ colors_bar = ['pink', 'grey']
24
+ value_counts.index = ['Not Churn', 'Churn']
25
+ value_counts.plot(kind='bar', ax=ax2, color=colors_bar)
26
+ ax2.set_title('Count of Customer Churn', fontsize=12)
27
+ ax2.set_xlabel('Churn Status', fontsize=10)
28
+ ax2.set_ylabel('Count', fontsize=10)
29
+ ax2.tick_params(axis='x', rotation=0)
30
+ for i, v in enumerate(value_counts):
31
+ ax2.text(i, v + 10, str(v), ha='center', va='bottom')
32
+ st.pyplot(fig)
33
+ st.write('---')
34
+
35
+ # 2. Customer Churn by Gender
36
+ st.subheader('2. Customer Churn by Gender')
37
+ gender_group = df.groupby(['Exited', 'Gender']).size().unstack()
38
+ fig, ax = plt.subplots(figsize=(8, 5))
39
+ bar_chart = gender_group.plot(kind='bar', color=['pink', 'grey'], ax=ax)
40
+ for container in bar_chart.containers:
41
+ bar_chart.bar_label(container)
42
+ bar_chart.set_xticklabels(['Not Churn', 'Churn'], rotation=0)
43
+ bar_chart.set_xlabel('Churn Status')
44
+ bar_chart.set_ylabel('Count')
45
+ bar_chart.set_title('Customer Churn by Gender')
46
+ st.pyplot(fig)
47
+ st.write('**Churn by Gender Statistics**')
48
+ st.write('Percentage of Male customers who churned: 16.5%')
49
+ st.write('Percentage of Female customers who churned: 25.1%')
50
+ st.write('---')
51
+
52
+
53
+ # 3 Customer Churn by Age
54
+ st.subheader('3. Customer Churn by Age')
55
+ churn_customers = df[df['Exited'] == 1]
56
+ not_churn_customers = df[df['Exited'] == 0]
57
+ plt.figure(figsize=(10, 6))
58
+ plt.hist(not_churn_customers['Age'], bins=30, alpha=0.5, label='Not Churn', color='pink')
59
+ plt.hist(churn_customers['Age'], bins=30, alpha=0.5, label='Churn', color='grey')
60
+ plt.title('Customer Churn by Age', fontsize=14)
61
+ plt.xlabel('Age')
62
+ plt.ylabel('Frequency')
63
+ plt.legend(loc='upper right')
64
+ st.pyplot()
65
+ st.write('---')
66
+
67
+ # 4. Customer Churn by Geography
68
+ st.subheader('4. Customer Churn by Geography')
69
+ geo_group = df.groupby(['Exited', 'Geography']).size().unstack()
70
+ fig, ax = plt.subplots(figsize=(8, 5))
71
+ bar_char = geo_group.plot(kind='bar', color=['pink', 'grey', 'lightblue'], ax=ax)
72
+ for container in bar_char.containers:
73
+ bar_char.bar_label(container)
74
+ bar_char.set_xticklabels(['Not Churn', 'Churn'], rotation=0)
75
+ bar_char.set_xlabel('Churn Status', fontsize=9)
76
+ bar_char.set_ylabel('Count', fontsize=9)
77
+ bar_char.set_title('Customer Churn by Geography', fontsize=10)
78
+ st.pyplot()
79
+ st.write('**Churn by Geography Statistics**')
80
+ st.write('Percentage of customers who churned in France: 16.2 %')
81
+ st.write('Percentage of customers who churned in Germany: 32.5 %')
82
+ st.write('Percentage of customers who churned in Spain: 16.7 %')
83
+ st.write('---')
84
+
85
+ # 5. Customer Churn by Tenure
86
+ st.subheader('5. Customer Churn by Tenure')
87
+ churn_by_tenure = df.groupby(['Tenure', 'Exited']).size().unstack()
88
+ fig, ax = plt.subplots(figsize=(8, 5))
89
+ churn_by_tenure.plot(kind='bar', stacked=True, color=['pink', 'grey'], ax=ax)
90
+ plt.title('Customer Churn by Tenure', fontsize=14)
91
+ plt.xticks(rotation=0)
92
+ plt.xlabel('Tenure (years)')
93
+ plt.ylabel('Number of Customers')
94
+ plt.legend(['Not Churn', 'Churn'])
95
+ for container in ax.containers:
96
+ ax.bar_label(container, label_type='center')
97
+ st.pyplot(fig)
98
+ st.write('---')
99
+
100
+ # 6. Customer Churn by Number of Products
101
+ st.subheader('6. Customer Churn by Number of Products')
102
+ plt.figure(figsize=(8, 5))
103
+ ax = sns.countplot(data=df, x='NumOfProducts', hue='Exited', palette=['grey', 'pink'])
104
+ plt.title('Customer Churn by Number of Products', fontsize=14)
105
+ plt.xlabel('Number of Products', fontsize=10)
106
+ plt.ylabel('Number of Customers', fontsize=10)
107
+ for container in ax.containers:
108
+ ax.bar_label(container)
109
+ st.pyplot()
110
+ st.write('**Churn by Number of Products Statistics**')
111
+ st.write('Percentage of customers who churned with 1 products: 27.7 %')
112
+ st.write('Percentage of customers who churned with 2 products: 7.6 %')
113
+ st.write('Percentage of customers who churned with 3 products: 82.7 %')
114
+ st.write('Percentage of customers who churned with 4 products: 100 %')
115
+ st.write('---')
116
+
117
+ # 7. Customer Churn by Has Credit Card or Not
118
+ st.subheader('7. Customer Churn by Has Credit Card or Not')
119
+ churn_by_HasCrCard = df.groupby(['HasCrCard', 'Exited']).size().unstack()
120
+ fig, ax = plt.subplots(figsize=(8, 4))
121
+ churn_by_HasCrCard.plot(kind='bar', stacked=True, color=['pink', 'grey'], ax=ax)
122
+ plt.title('Customer Churn by HasCrCard', fontsize=11)
123
+ plt.xticks(rotation=0)
124
+ plt.xlabel('HasCrCard', fontsize=10)
125
+ plt.ylabel('Number of Customers', fontsize=10)
126
+ plt.legend(['Not Churn', 'Churn'])
127
+ ax.set_xticklabels(['Has Credit Card', 'No Credit Card'])
128
+ for container in ax.containers:
129
+ ax.bar_label(container, label_type='center')
130
+ st.pyplot(fig)
131
+ st.write('**Churn by Has Credit Card or Not Statistics**')
132
+ st.write('Percentage of customers who churned with No Credit Card: 20.8 %')
133
+ st.write('Percentage of customers who churned with Has Credit Card: 20.2 %')
134
+ st.write('---')
135
+
136
+ # 8. Customer Churn by Is Active Member or Not
137
+ st.subheader('8. Customer Churn by Is Active Member or Not')
138
+ churn_by_IsActiveMember = df.groupby(['IsActiveMember', 'Exited']).size().unstack()
139
+ fig, ax = plt.subplots(figsize=(8, 4))
140
+ churn_by_IsActiveMember.plot(kind='bar', stacked=True, color=['pink', 'grey'], ax=ax)
141
+ plt.title('Customer Churn by IsActiveMember', fontsize=11)
142
+ plt.xticks(rotation=0)
143
+ plt.xlabel('IsActiveMember', fontsize=10)
144
+ plt.ylabel('Number of Customers', fontsize=10)
145
+ plt.legend(['Not Churn', 'Churn'],loc='lower center')
146
+ ax.set_xticklabels(['Inactive Member', 'Active Member'])
147
+ for container in ax.containers:
148
+ ax.bar_label(container, label_type='center')
149
+ st.pyplot(fig)
150
+ st.write('**Churn by Is Active Member or Not Statistics**')
151
+ st.write('Percentage of customers who churned with Inactive Member: 26.9 %')
152
+ st.write('Percentage of customers who churned with Active Member: 14.3 %')