anjasafm commited on
Commit
92bce13
1 Parent(s): 0bb4b1c

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. eda.py +131 -24
app.py CHANGED
@@ -24,7 +24,7 @@ if page == 'Home Page':
24
 
25
  with st.expander("Conclusion"):
26
  st.caption('Using Model Machine Learning we can predict whether the customer is will Churn or Not')
27
- elif page == 'Exploration Data Analysis':
28
  eda.run()
29
  else:
30
  models.run()
 
24
 
25
  with st.expander("Conclusion"):
26
  st.caption('Using Model Machine Learning we can predict whether the customer is will Churn or Not')
27
+ elif page == 'Exploratory Data Analysis':
28
  eda.run()
29
  else:
30
  models.run()
eda.py CHANGED
@@ -1,55 +1,162 @@
1
  import streamlit as st
2
  import pandas as pd
 
3
  import seaborn as sns
4
  import matplotlib.pyplot as plt
 
 
5
 
 
6
  def run():
7
  st.title('Welcome to Exploratory Data Analysis')
8
-
9
- # Load Data from CSV
10
  df = pd.read_csv('customer_churn.csv')
11
 
12
- # Central Tendency
13
  st.title('Central Tendency Data')
 
14
  df_describe = df.describe().T
15
  df_describe = df_describe.apply(lambda x: x.map('{:.2f}'.format))
16
- st.write(df_describe)
17
 
18
- # Explanation for Central Tendency
19
  with st.expander('Explanation'):
20
- st.caption("Central tendency provides information on count, mean, standard deviation, min, max, q1, q2, q3 for each numeric column. The standard deviation values suggest the presence of outliers in some columns.")
 
 
 
 
 
21
 
22
- # Target Visualization: Churn Distribution
23
- st.title('Distribution of Customer Churn')
24
  non_default_counts = df['Churn'].value_counts(normalize=True)
25
- st.write(non_default_counts)
 
 
 
 
 
 
 
 
 
26
 
27
- # Explanation for Churn Distribution
 
 
 
 
 
 
 
 
 
 
 
 
28
  with st.expander('Explanation'):
29
- st.caption('The pie chart shows the distribution of churn, with a relatively balanced distribution between "Not Churn" and "Churn".')
30
 
31
- # Age Distribution by Churn
32
  st.title('Age Distribution by Customer Churn')
33
- sns.histplot(x='Age', hue='Churn', data=df, bins=30, kde=True)
 
 
34
  plt.title('Age Distribution by Customer Churn')
35
  plt.xlabel('Age')
36
  plt.ylabel('Frequency')
37
- st.pyplot()
38
 
39
- # Explanation for Age Distribution
40
  with st.expander('Explanation'):
41
- st.caption('The histogram shows the distribution of ages among churned and non-churned customers.')
42
 
43
- # Customer Churn by Gender
44
  st.title('Customer Churn by Gender')
45
- churn_rates = df.groupby('Gender')['Churn'].mean()
46
- st.bar_chart(churn_rates)
 
 
 
 
 
 
 
 
 
47
 
48
- # Explanation for Customer Churn by Gender
 
 
 
 
 
 
 
49
  with st.expander('Explanation'):
50
- st.caption('The bar chart shows the churn rate by gender, indicating a higher churn rate among females.')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
- # Add more visualizations and explanations as needed
 
53
 
54
- if __name__ == '__main__':
55
- run()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
+ import numpy as np
4
  import seaborn as sns
5
  import matplotlib.pyplot as plt
6
+ # from phik.report import plot_correlation_matrix
7
+ from PIL import Image
8
 
9
+ #membuat function untuk nantinya dipanggil di app.py
10
  def run():
11
  st.title('Welcome to Exploratory Data Analysis')
12
+ # Load Data from CSV
 
13
  df = pd.read_csv('customer_churn.csv')
14
 
15
+ # Central Tendency
16
  st.title('Central Tendency Data')
17
+ # Checking Central Tendency Numerical Data
18
  df_describe = df.describe().T
19
  df_describe = df_describe.apply(lambda x: x.map('{:.2f}'.format))
20
+ df_describe
21
 
22
+ # Showing Explanation
23
  with st.expander('Explanation'):
24
+ st.caption("From central tendency we can get information related to count, mean, standard deviation, min, max, q1, q2, q3 from each column containing numeric data. The std values of the columns are quite large, meaning that the range between data values is large, indicating that there are outliers in some columns. The min and max values in Price's column are quite far away.")
25
+
26
+ # Target Visualization
27
+ st.title('Target Exploration Churn Pie & Bar Chart')
28
+ # Setting up the figure for subplots
29
+ plt.figure(figsize=(14, 7))
30
 
31
+ # Subplot 1: Pie chart for distribution of non-default payments
32
+ plt.subplot(1, 2, 1) # 1 row, 2 columns, 1st subplot
33
  non_default_counts = df['Churn'].value_counts(normalize=True)
34
+ plt.pie(non_default_counts, labels=['Not Churn (' + str(non_default_counts[0]*100)[:4] + '%)', 'Churn (' + str(non_default_counts[1]*100)[:4] + '%)'], autopct='%1.1f%%', startangle=140, colors=['crimson', 'coral'])
35
+ plt.title('Distribution of Customer Churn')
36
+
37
+ # Subplot 2: Bar chart for count of non-default vs default payments
38
+ plt.subplot(1, 2, 2) # 1 row, 2 columns, 2nd subplot
39
+ barplot = sns.countplot(x='Churn', data=df, palette=['crimson', 'coral'])
40
+ plt.title('Count of Customer Churn')
41
+ plt.xticks([0, 1], ['Not Churn', 'Churn'])
42
+ plt.xlabel('Churn Status')
43
+ plt.ylabel('Count')
44
 
45
+ # Adding count labels above each bar
46
+ for p in barplot.patches:
47
+ barplot.annotate(format(p.get_height(), '.0f'),
48
+ (p.get_x() + p.get_width() / 2., p.get_height()),
49
+ ha = 'center',
50
+ va = 'center',
51
+ xytext = (0, 10),
52
+ textcoords = 'offset points')
53
+
54
+ # Show the plot
55
+ plt.tight_layout()
56
+ plt.show()
57
+ # Showing Explanation
58
  with st.expander('Explanation'):
59
+ st.caption('From the visualization, we can see that a large number of customers Not Churn 33881 (52.6%) and those who will be Churn 30493 (47.4%). In my opinion, this number is a pretty bad number for the company, because customers who Not Churn and those who do Churn are only a very small difference so it is necessary to improve both technically and non-technically.')
60
 
61
+ # Age Distribution
62
  st.title('Age Distribution by Customer Churn')
63
+ # Histogram for the 'age' column
64
+ plt.figure(figsize=(14, 7))
65
+ sns.histplot(x='Age', hue='Churn', data=df , bins=30, kde=True)
66
  plt.title('Age Distribution by Customer Churn')
67
  plt.xlabel('Age')
68
  plt.ylabel('Frequency')
69
+ plt.show()
70
 
71
+ # Showing Explanation
72
  with st.expander('Explanation'):
73
+ st.caption('The distribution of ages appears roughly symmetrical, with a slight right skew. We can see that the frequency of churn is less than the frequency of continued service across all age groups.')
74
 
75
+ # Gender
76
  st.title('Customer Churn by Gender')
77
+ # Churn rates by gender from the previous calculation
78
+ churn_rates = {
79
+ 'Female': 0.587951,
80
+ 'Male': 0.412049
81
+ }
82
+
83
+ # Data to plot
84
+ labels = churn_rates.keys()
85
+ sizes = churn_rates.values()
86
+ colors = ['#ff9999','#66b3ff'] # pink for female, light blue for male
87
+ explode = (0.1, 0) # explode 1st slice for emphasis
88
 
89
+ # Plotting the pie chart
90
+ plt.figure(figsize=(8, 6))
91
+ plt.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%',
92
+ shadow=True, startangle=140)
93
+ plt.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
94
+ plt.title('Churn Rate by Gender')
95
+ plt.show()
96
+ # Showing Explanation
97
  with st.expander('Explanation'):
98
+ st.caption('We can see from the results of the pierchart visualization above, where the possibility of churn rates in the female gender tends to be more than men, where women 58.8% and men 41.2% of the total data, from these results we can later make improvements to maintain female gender customers.')
99
+
100
+ # Hist
101
+ st.title('Customer Churn by Subscription Type')
102
+ # Calculate churn rate by subscription type
103
+ churn_rate_by_subscription = df.groupby('Subscription Type')['Churn'].mean()
104
+ # normalized the calculation
105
+ churn_rate_by_subscription_normalized = churn_rate_by_subscription / churn_rate_by_subscription.sum()
106
+
107
+ # Plot the churn rate by subscription type
108
+ plt.figure(figsize=(14, 7))
109
+ churn_rate_by_subscription_normalized.plot(kind='bar', color='lightsalmon', title='Churn Rate by Subscription Type')
110
+ plt.xlabel('Subscription Type')
111
+ plt.ylabel('Churn Rate')
112
+ plt.xticks(rotation=0)
113
+ # Add labels on each bar
114
+ for i, rate in enumerate(churn_rate_by_subscription_normalized):
115
+ plt.text(i, rate, f'{rate:.4f}', ha='center', va='bottom')
116
+
117
+ plt.show()
118
+ # Showing Explanation
119
+ with st.expander('Explanation'):
120
+ st.caption('For Further Analysis Since churn rates are similar, it suggests that factors other than subscription type may have a more significant impact on churn. And for Business Strategy A churn rate approaching 50% warrants a detailed examination of customer service practices, product quality, pricing strategy, and competitive pressures. Strategies need to be implemented to enhance customer satisfaction and loyalty across all subscription types.')
121
+
122
+ # Contract Length
123
+ st.title('Customer Churn by Contract Length')
124
+ # Group the data by 'Contract Length' and calculate the mean churn for each contract length
125
+ churn_rate_by_contract_length = df.groupby('Contract Length')['Churn'].mean()
126
 
127
+ # normalized the calculation
128
+ churn_rate_by_contract_length_normalized = churn_rate_by_contract_length / churn_rate_by_contract_length.sum()
129
 
130
+ # Plot the churn rate by contract length
131
+ plt.figure(figsize=(10, 6))
132
+ sns.barplot(x=churn_rate_by_contract_length_normalized.index, y=churn_rate_by_contract_length_normalized.values)
133
+ plt.title('Churn Rate by Contract Length')
134
+ plt.xlabel('Contract Length (months)')
135
+ plt.ylabel('Churn Rate')
136
+ plt.xticks(rotation=0) # If there are many contract lengths, rotating the x-ticks can help with readability
137
+ plt.tight_layout() # This will ensure that the labels and titles fit well in the plot area
138
+ # Add labels on each bar
139
+ for i, rate in enumerate(churn_rate_by_contract_length_normalized):
140
+ plt.text(i, rate, f'{rate:.4f}', ha='center', va='bottom')
141
+ plt.show()
142
+
143
+ # Showing Explanation
144
+ with st.expander('Explanation'):
145
+ st.caption("The company might consider encouraging customers to sign up for longer contracts through incentives, as this could help reduce churn rates. However, since even annual contracts have a relatively high churn rate, it's essential to explore why customers are leaving and address those issues directly.")
146
+
147
+ # Total Spend
148
+ st.title('Customer Churn by Total Spend')
149
+ # Histogram for the 'Total Spend' column
150
+ # Set the color palette
151
+
152
+ plt.figure(figsize=(14, 7))
153
+ sns.histplot(x='Total Spend', hue='Churn', data=df , bins=30, kde=True)
154
+ plt.title('Total Spend Distribution by Customer Churn')
155
+ plt.xlabel('Total Spend')
156
+ plt.ylabel('Frequency')
157
+ plt.show()
158
+
159
+
160
+ # Showing Explanation
161
+ with st.expander('Explanation'):
162
+ st.caption('The company might consider focusing retention efforts on customers in the lower to mid spend ranges, where the churn seems to be more prevalent. Incentivizing increased spend among these customers might be one strategy if indeed higher spend is associated with lower churn.')