RICHARDMENSAH commited on
Commit
cbf2263
1 Parent(s): 959fe98

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +402 -0
app.py ADDED
@@ -0,0 +1,402 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import joblib
3
+ import pandas as pd
4
+ import numpy as np
5
+ import plotly.graph_objects as go
6
+ from PIL import Image
7
+ import time
8
+ import matplotlib.pyplot as plt
9
+ import qrcode
10
+ from io import BytesIO
11
+ import csv
12
+
13
+ # Load the trained models and transformers
14
+ num_imputer = joblib.load('numerical_imputer.joblib')
15
+ cat_imputer = joblib.load('cat_imputer.joblib')
16
+ encoder = joblib.load('encoder.joblib')
17
+ scaler = joblib.load('scaler.joblib')
18
+ model1 = joblib.load('lr_model_vif_smote.joblib')
19
+ model2 = joblib.load('gb_model_vif_smote.joblib')
20
+
21
+
22
+ def preprocess_input(input_data):
23
+ input_df = pd.DataFrame(input_data, index=[0])
24
+
25
+ cat_columns = [col for col in input_df.columns if input_df[col].dtype == 'object']
26
+ num_columns = [col for col in input_df.columns if input_df[col].dtype != 'object']
27
+
28
+ input_df_imputed_cat = cat_imputer.transform(input_df[cat_columns])
29
+ input_df_imputed_num = num_imputer.transform(input_df[num_columns])
30
+
31
+ input_encoded_df = pd.DataFrame(encoder.transform(input_df_imputed_cat).toarray(),
32
+ columns=encoder.get_feature_names_out(cat_columns))
33
+
34
+ input_df_scaled = scaler.transform(input_df_imputed_num)
35
+ input_scaled_df = pd.DataFrame(input_df_scaled, columns=num_columns)
36
+ final_df = pd.concat([input_encoded_df, input_scaled_df], axis=1)
37
+ final_df = final_df.reindex(columns=original_feature_names, fill_value=0)
38
+
39
+ return final_df
40
+
41
+ original_feature_names = ['MONTANT', 'FREQUENCE_RECH', 'REVENUE', 'ARPU_SEGMENT', 'FREQUENCE',
42
+ 'DATA_VOLUME', 'ON_NET', 'ORANGE', 'TIGO', 'ZONE1', 'ZONE2', 'REGULARITY', 'FREQ_TOP_PACK',
43
+ 'REGION_DAKAR', 'REGION_DIOURBEL', 'REGION_FATICK', 'REGION_KAFFRINE', 'REGION_KAOLACK',
44
+ 'REGION_KEDOUGOU', 'REGION_KOLDA', 'REGION_LOUGA', 'REGION_MATAM', 'REGION_SAINT-LOUIS',
45
+ 'REGION_SEDHIOU', 'REGION_TAMBACOUNDA', 'REGION_THIES', 'REGION_ZIGUINCHOR',
46
+ 'TENURE_Long-term', 'TENURE_Medium-term', 'TENURE_Mid-term', 'TENURE_Short-term',
47
+ 'TENURE_Very short-term', 'TOP_PACK_data', 'TOP_PACK_international', 'TOP_PACK_messaging',
48
+ 'TOP_PACK_other_services', 'TOP_PACK_social_media', 'TOP_PACK_value_added_services',
49
+ 'TOP_PACK_voice']
50
+
51
+ # Set up the Streamlit app
52
+ st.set_page_config(layout="wide")
53
+
54
+ # Main page - Churn Prediction
55
+ st.title('📞 EXPRESSO TELECOM CUSTOMER CHURN PREDICTION APP 📞')
56
+
57
+ # Main page - Churn Prediction
58
+ st.image("banner.png", use_column_width=True)
59
+ st.markdown("This app predicts whether a customer will leave your company ❌ or not 🎉. Enter the details of the customer on the left sidebar to see the result")
60
+
61
+
62
+ # How to use
63
+ st.title('How to Use')
64
+ st.markdown('1. Select your model of choice on the left sidebar.')
65
+ st.markdown('2. Adjust the input parameters based on customer details')
66
+ st.markdown('3. Click the "Predict" button to initiate the prediction.')
67
+ st.markdown('4. The app will simulate a prediction process with a progress bar.')
68
+ st.markdown('5. Once the prediction is complete, the results will be displayed below.')
69
+
70
+ import csv
71
+ import streamlit as st
72
+
73
+ # Add context text
74
+ st.sidebar.markdown('**Welcome!**')
75
+ st.sidebar.markdown('This is a work in progress, and we would love to hear your suggestions on how to improve the user experience. Please feel free to provide your feedback in the suggestion box below.')
76
+
77
+ # Create the sidebar with a text input field for suggestions
78
+ correction_text = st.sidebar.text_input('Enter your suggestion')
79
+
80
+ # Button to submit the suggestion
81
+ if st.sidebar.button('Submit'):
82
+ # Perform action on suggestion submission (e.g., save to a CSV file)
83
+ with open('suggestions.csv', 'a', newline='') as file:
84
+ writer = csv.writer(file)
85
+ writer.writerow([correction_text])
86
+ st.sidebar.info('Suggestion submitted successfully')
87
+
88
+ # Define a dictionary of models with their names, actual models, and types
89
+ models = {
90
+ 'Logistic Regression': {'model': model1, 'type': 'logistic_regression'},
91
+ 'Gradient Boosting': {'model': model2, 'type': 'gradient_boosting'}
92
+ }
93
+
94
+ # Allow the user to select a model from the sidebar
95
+ # Allow the user to select a model from the sidebar
96
+ st.sidebar.title('Select Model')
97
+ model_name = st.sidebar.selectbox('Choose a model', list(models.keys()))
98
+
99
+ # Retrieve the selected model and its type from the dictionary
100
+ model = models[model_name]['model']
101
+ model_type = models[model_name]['type']
102
+
103
+
104
+ # Collect input from the user
105
+ st.sidebar.title('Enter Customer Details')
106
+ input_features = {
107
+ 'MONTANT': st.sidebar.number_input('Top-up Amount (MONTANT)'),
108
+ 'FREQUENCE_RECH': st.sidebar.number_input('Number of Times the Customer Refilled (FREQUENCE_RECH)'),
109
+ 'REVENUE': st.sidebar.number_input('Monthly income of the client (REVENUE)'),
110
+ 'ARPU_SEGMENT': st.sidebar.number_input('Income over 90 days / 3 (ARPU_SEGMENT)'),
111
+ 'FREQUENCE': st.sidebar.number_input('Number of times the client has made an income (FREQUENCE)'),
112
+ 'DATA_VOLUME': st.sidebar.number_input('Number of Connections (DATA_VOLUME)'),
113
+ 'ON_NET': st.sidebar.number_input('Inter Expresso Call (ON_NET)'),
114
+ 'ORANGE': st.sidebar.number_input('Call to Orange (ORANGE)'),
115
+ 'TIGO': st.sidebar.number_input('Call to Tigo (TIGO)'),
116
+ 'ZONE1': st.sidebar.number_input('Call to Zone 1 (ZONE1)'),
117
+ 'ZONE2': st.sidebar.number_input('Call to Zone 2 (ZONE2)'),
118
+ 'REGULARITY': st.sidebar.number_input('Number of Times the Client is Active for 90 Days (REGULARITY)'),
119
+ 'FREQ_TOP_PACK': st.sidebar.number_input('Number of Times the Client has Activated the Top Packs (FREQ_TOP_PACK)'),
120
+ 'REGION': st.sidebar.selectbox('Location of Each Client (REGION)', ['SAINT-LOUIS', 'THIES', 'LOUGA', 'MATAM', 'FATICK', 'KAOLACK',
121
+ 'DIOURBEL', 'TAMBACOUNDA', 'ZIGUINCHOR', 'KOLDA', 'KAFFRINE', 'SEDHIOU',
122
+ 'KEDOUGOU']),
123
+ 'TENURE': st.sidebar.selectbox('Duration in the Network (TENURE)', ['Short-term', 'Mid-term', 'Medium-term', 'Very short-term']),
124
+ 'TOP_PACK': st.sidebar.selectbox('Most Active Pack (TOP_PACK)', ['data', 'international', 'messaging', 'social_media',
125
+ 'value_added_services', 'voice'])
126
+ }
127
+
128
+ # Input validation
129
+ valid_input = True
130
+ error_messages = []
131
+
132
+ # Validate numeric inputs
133
+ numeric_ranges = {
134
+ 'MONTANT': [0, 1000000],
135
+ 'FREQUENCE_RECH': [0, 100],
136
+ 'REVENUE': [0, 1000000],
137
+ 'ARPU_SEGMENT': [0, 100000],
138
+ 'FREQUENCE': [0, 100],
139
+ 'DATA_VOLUME': [0, 100000],
140
+ 'ON_NET': [0, 100000],
141
+ 'ORANGE': [0, 100000],
142
+ 'TIGO': [0, 100000],
143
+ 'ZONE1': [0, 100000],
144
+ 'ZONE2': [0, 100000],
145
+ 'REGULARITY': [0, 100],
146
+ 'FREQ_TOP_PACK': [0, 100]
147
+ }
148
+
149
+ for feature, value in input_features.items():
150
+ range_min, range_max = numeric_ranges.get(feature, [None, None])
151
+ if range_min is not None and range_max is not None:
152
+ if not range_min <= value <= range_max:
153
+ valid_input = False
154
+ error_messages.append(f"{feature} should be between {range_min} and {range_max}.")
155
+
156
+ #Churn Prediction
157
+
158
+ def predict_churn(input_data, model):
159
+ # Preprocess the input data
160
+ preprocessed_data = preprocess_input(input_data)
161
+
162
+ # Calculate churn probabilities using the model
163
+ probabilities = model.predict_proba(preprocessed_data)
164
+
165
+ # Determine churn labels based on the model type
166
+ if model_type == "logistic_regression":
167
+ churn_labels = ["No Churn", "Churn"]
168
+ elif model_type == "gradient_boosting":
169
+ churn_labels = ["Churn", "No Churn"]
170
+ # Extract churn probability for the first sample
171
+ churn_probability = probabilities[0]
172
+
173
+ # Create a dictionary mapping churn labels to their indices
174
+ churn_indices = {label: idx for idx, label in enumerate(churn_labels)}
175
+
176
+ # Determine the index with the highest churn probability
177
+ churn_index = np.argmax(churn_probability)
178
+
179
+ # Return churn labels, churn probabilities, churn indices, and churn index
180
+ return churn_labels, churn_probability, churn_indices, churn_index
181
+
182
+
183
+ # Predict churn based on user input
184
+ if st.sidebar.button('Predict Churn'):
185
+ try:
186
+ with st.spinner("Predicting..."):
187
+ # Simulate a long-running process
188
+ progress_bar = st.progress(0)
189
+ step = 20 # A big step will reduce the execution time
190
+ for i in range(0, 100, step):
191
+ time.sleep(0.1)
192
+ progress_bar.progress(i + step)
193
+
194
+ #churn_labels, churn_probability = predict_churn(input_features, model) # Pass model1 or model2 based on the selected model
195
+ churn_labels, churn_probability, churn_indices, churn_index = predict_churn(input_features, model)
196
+
197
+ st.subheader('Main Results')
198
+
199
+ col1, col2 = st.columns(2)
200
+
201
+ if churn_labels[churn_index] == "Churn":
202
+ churn_prob = churn_probability[churn_index]
203
+ with col1:
204
+ st.error(f"Beware!!! This customer is likely to churn with a probability of {churn_prob * 100:.2f}% 😢")
205
+ resized_churn_image = Image.open('Churn.png')
206
+ resized_churn_image = resized_churn_image.resize((350, 300)) # Adjust the width and height as desired
207
+ st.image(resized_churn_image)
208
+ # Add suggestions for retaining churned customers in the 'Churn' group
209
+ with col2:
210
+ st.info("Suggestions for retaining churned customers in this customer group:\n"
211
+ "- Offer personalized discounts or promotions\n"
212
+ "- Provide exceptional customer service\n"
213
+ "- Introduce loyalty programs\n"
214
+ "- Send targeted re-engagement emails\n"
215
+ "- Provide a dedicated account manager\n"
216
+ "- Offer extended trial periods\n"
217
+ "- Conduct exit surveys to understand reasons for churn\n"
218
+ "- Implement a customer win-back campaign\n"
219
+ "- Provide incentives for referrals\n"
220
+ "- Improve product or service offerings based on customer feedback")
221
+ else:
222
+ #churn_index = churn_indices["No Churn"]
223
+ churn_prob = churn_probability[churn_index]
224
+ with col1:
225
+ st.success(f"This customer is not likely to churn with a probability of {churn_prob * 100:.2f}% 😀")
226
+ resized_not_churn_image = Image.open('NotChurn.jpg')
227
+ resized_not_churn_image = resized_not_churn_image.resize((350, 300)) # Adjust the width and height as desired
228
+ st.image(resized_not_churn_image)
229
+ # Add suggestions for retaining churned customers in the 'Churn' group
230
+ with col2:
231
+ st.info("Suggestions for retaining non-churned customers in this customer group:\n"
232
+ "- Provide personalized product recommendations\n"
233
+ "- Offer exclusive features or upgrades\n"
234
+ "- Implement proactive customer support\n"
235
+ "- Conduct customer satisfaction surveys\n"
236
+ "- Recognize and reward loyal customers\n"
237
+ "- Organize customer appreciation events\n"
238
+ "- Offer early access to new features or products\n"
239
+ "- Provide educational resources or tutorials\n"
240
+ "- Implement a customer loyalty program\n"
241
+ "- Offer flexible billing or pricing options")
242
+
243
+ st.subheader('Churn Probability')
244
+
245
+ # Create a donut chart to display probabilities
246
+ fig = go.Figure(data=[go.Pie(
247
+ labels=churn_labels,
248
+ values=churn_probability,
249
+ hole=0.5,
250
+ textinfo='label+percent',
251
+ marker=dict(colors=['#FFA07A', '#6495ED', '#FFD700', '#32CD32', '#FF69B4', '#8B008B']))])
252
+
253
+ fig.update_traces(
254
+ hoverinfo='label+percent',
255
+ textfont_size=12,
256
+ textposition='inside',
257
+ texttemplate='%{label}: %{percent:.2f}%'
258
+ )
259
+
260
+ fig.update_layout(
261
+ title='Churn Probability',
262
+ title_x=0.5,
263
+ showlegend=False,
264
+ width=500,
265
+ height=500
266
+ )
267
+
268
+ st.plotly_chart(fig, use_container_width=True)
269
+
270
+ # Calculate the average churn rate (replace with your actual value)
271
+
272
+ st.subheader('Customer Churn Probability Comparison')
273
+
274
+ average_churn_rate = 19
275
+
276
+ # Convert the overall churn rate to churn probability
277
+ main_data_churn_probability = average_churn_rate / 100
278
+
279
+ # Retrieve the predicted churn probability for the selected customer
280
+ predicted_churn_prob = churn_probability[churn_index]
281
+
282
+ if churn_labels[churn_index] == "Churn":
283
+ churn_prob = churn_probability[churn_index]
284
+ # Create a bar chart comparing the churn probability with the average churn rate
285
+ labels = ['Churn Probability', 'Average Churn Probability']
286
+ values = [predicted_churn_prob, main_data_churn_probability]
287
+
288
+ fig = go.Figure(data=[go.Bar(x=labels, y=values)])
289
+ fig.update_layout(
290
+ xaxis_title='Churn Probability',
291
+ yaxis_title='Probability',
292
+ title='Comparison with Average Churn Rate',
293
+ yaxis=dict(range=[0, 1]) # Set the y-axis limits between 0 and 1
294
+ )
295
+
296
+ # Add explanations
297
+ if predicted_churn_prob > main_data_churn_probability:
298
+ churn_comparison = "higher"
299
+ elif predicted_churn_prob < main_data_churn_probability:
300
+ churn_comparison = "lower"
301
+ else:
302
+ churn_comparison = "equal"
303
+
304
+
305
+ explanation = f"This bar chart compares the churn probability of the selected customer " \
306
+ f"with the average churn rate of all customers. It provides insights into how the " \
307
+ f"individual customer's churn likelihood ({predicted_churn_prob:.2f}) compares to the " \
308
+ f"overall trend. The 'Churn Probability' represents the likelihood of churn " \
309
+ f"for the selected customer, while the 'Average Churn Rate' represents the average " \
310
+ f"churn rate across all customers ({main_data_churn_probability:.2f}).\n\n" \
311
+ f"The customer's churn rate is {churn_comparison} than the average churn rate."
312
+
313
+ st.plotly_chart(fig)
314
+ st.write(explanation)
315
+ else:
316
+ # Create a bar chart comparing the no-churn probability with the average churn rate
317
+ labels = ['No-Churn Probability', 'Average Churn Probability']
318
+ values = [1 - predicted_churn_prob, main_data_churn_probability]
319
+
320
+ fig = go.Figure(data=[go.Bar(x=labels, y=values)])
321
+ fig.update_layout(
322
+ xaxis_title='Churn Probability',
323
+ yaxis_title='Probability',
324
+ title='Comparison with Average Churn Rate',
325
+ yaxis=dict(range=[0, 1]) # Set the y-axis limits between 0 and 1
326
+ )
327
+
328
+ explanation = f"This bar chart compares the churn probability of the selected customer " \
329
+ f"with the average churn rate of all customers. It provides insights into how the " \
330
+ f"individual customer's likelihood of churn ({1 - predicted_churn_prob:.2f}) compares to the " \
331
+ f"overall trend. A lower churn probability indicates that the customer is less likely to churn. " \
332
+ f"The chart shows that the churn probability ({1 - predicted_churn_prob:.2f}) is lower than the " \
333
+ f"average churn probability ({main_data_churn_probability:.2f}), suggesting that the customer " \
334
+ f"is predicted to stay with the company. Keep in mind that the prediction is based on the " \
335
+ f"available data and the applied model, and there might still be some uncertainty in the result."
336
+
337
+
338
+ st.plotly_chart(fig)
339
+ st.write(explanation)
340
+
341
+ # Visualize Feature Importance
342
+
343
+ st.subheader('Feature Importance')
344
+ if hasattr(model, 'coef_'): # Check if the model has attribute 'coef_' to determine importance type
345
+ feature_importances = model.coef_[0]
346
+ importance_type = 'Coef'
347
+ elif hasattr(model, 'feature_importances_'):
348
+ feature_importances = model.feature_importances_
349
+ importance_type = 'Importance'
350
+ else:
351
+ st.write('Feature importance is not available for this model.')
352
+
353
+ # If importance information is available, create a DataFrame and sort it
354
+ if hasattr(model, 'coef_') or hasattr(model, 'feature_importances_'):
355
+ importance_df = pd.DataFrame({'Feature': original_feature_names, importance_type: feature_importances})
356
+ importance_df = importance_df.sort_values(importance_type, ascending=False)
357
+
358
+ # Determine color for each bar based on positive or negative importance
359
+ colors = ['green' if importance > 0 else 'red' for importance in importance_df[importance_type]]
360
+
361
+ # Create a horizontal bar chart using Plotly
362
+ fig = go.Figure(go.Bar(
363
+ x=importance_df[importance_type],
364
+ y=importance_df['Feature'],
365
+ orientation='h',
366
+ marker=dict(color=colors),
367
+ text=importance_df[importance_type].apply(lambda x: f'{x:.2f}'),
368
+ textposition='inside'))
369
+
370
+ # Configure the layout of the bar chart
371
+ fig.update_layout(
372
+ title='Feature Importance',
373
+ xaxis_title='Importance',
374
+ yaxis_title='Feature',
375
+ bargap=0.1,
376
+ width=600,
377
+ height=800)
378
+
379
+ # Display the bar chart using Plotly chart in Streamlit
380
+ st.plotly_chart(fig)
381
+
382
+ # Explanation of feature importance
383
+ importance_explanation = f"The feature importance plot shows the relative importance of each feature " \
384
+ f"for predicting churn. The importance is calculated based on the " \
385
+ f"{importance_type} value of each feature in the model. " \
386
+ f"A higher {importance_type} value indicates a stronger influence " \
387
+ f"of the corresponding feature on the prediction of churn.\n\n" \
388
+ f"For logistic regression, positive {importance_type} values indicate " \
389
+ f"features that positively contribute to predicting churn, " \
390
+ f"while negative {importance_type} values indicate features that " \
391
+ f"negatively contribute to predicting churn.\n\n" \
392
+ f"For gradient boosting, higher {importance_type} values " \
393
+ f"indicate features that have a greater importance in predicting churn.\n\n" \
394
+ f"Please note that the feature importance values may vary depending on the model " \
395
+ f"and the data used for training."
396
+
397
+
398
+ st.write(importance_explanation)
399
+
400
+
401
+ except Exception as e:
402
+ st.error(f"An error occurred: {str(e)}")