File size: 19,484 Bytes
cbf2263
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
import streamlit as st
import joblib
import pandas as pd
import numpy as np
from PIL import Image
import time
import matplotlib.pyplot as plt
import qrcode
from io import BytesIO
import csv

# Load the trained models and transformers
num_imputer = joblib.load('numerical_imputer.joblib')
cat_imputer = joblib.load('cat_imputer.joblib')
encoder = joblib.load('encoder.joblib')
scaler = joblib.load('scaler.joblib')
model1 = joblib.load('lr_model_vif_smote.joblib')
model2 = joblib.load('gb_model_vif_smote.joblib')


def preprocess_input(input_data):
    input_df = pd.DataFrame(input_data, index=[0])

    cat_columns = [col for col in input_df.columns if input_df[col].dtype == 'object']
    num_columns = [col for col in input_df.columns if input_df[col].dtype != 'object']

    input_df_imputed_cat = cat_imputer.transform(input_df[cat_columns])
    input_df_imputed_num = num_imputer.transform(input_df[num_columns])

    input_encoded_df = pd.DataFrame(encoder.transform(input_df_imputed_cat).toarray(),
                                    columns=encoder.get_feature_names_out(cat_columns))

    input_df_scaled = scaler.transform(input_df_imputed_num)
    input_scaled_df = pd.DataFrame(input_df_scaled, columns=num_columns)
    final_df = pd.concat([input_encoded_df, input_scaled_df], axis=1)
    final_df = final_df.reindex(columns=original_feature_names, fill_value=0)

    return final_df

original_feature_names = ['MONTANT', 'FREQUENCE_RECH', 'REVENUE', 'ARPU_SEGMENT', 'FREQUENCE',
                          'DATA_VOLUME', 'ON_NET', 'ORANGE', 'TIGO', 'ZONE1', 'ZONE2', 'REGULARITY', 'FREQ_TOP_PACK',
                          'REGION_DAKAR', 'REGION_DIOURBEL', 'REGION_FATICK', 'REGION_KAFFRINE', 'REGION_KAOLACK',
                          'REGION_KEDOUGOU', 'REGION_KOLDA', 'REGION_LOUGA', 'REGION_MATAM', 'REGION_SAINT-LOUIS',
                          'REGION_SEDHIOU', 'REGION_TAMBACOUNDA', 'REGION_THIES', 'REGION_ZIGUINCHOR',
                          'TENURE_Long-term', 'TENURE_Medium-term', 'TENURE_Mid-term', 'TENURE_Short-term',
                          'TENURE_Very short-term', 'TOP_PACK_data', 'TOP_PACK_international', 'TOP_PACK_messaging',
                          'TOP_PACK_other_services', 'TOP_PACK_social_media', 'TOP_PACK_value_added_services',
                          'TOP_PACK_voice']

# Set up the Streamlit app
st.set_page_config(layout="wide")

# Main page - Churn Prediction
st.title('📞 EXPRESSO TELECOM CUSTOMER CHURN PREDICTION APP 📞')

# Main page - Churn Prediction
st.image("banner.png", use_column_width=True)
st.markdown("This app predicts whether a customer will leave your company ❌ or not 🎉. Enter the details of the customer on the left sidebar to see the result")


    # How to use
st.title('How to Use')
st.markdown('1. Select your model of choice on the left sidebar.')
st.markdown('2. Adjust the input parameters based on customer details')
st.markdown('3. Click the "Predict" button to initiate the prediction.')
st.markdown('4. The app will simulate a prediction process with a progress bar.')
st.markdown('5. Once the prediction is complete, the results will be displayed below.')

import csv
import streamlit as st

# Add context text
st.sidebar.markdown('**Welcome!**')
st.sidebar.markdown('This is a work in progress, and we would love to hear your suggestions on how to improve the user experience. Please feel free to provide your feedback in the suggestion box below.')

# Create the sidebar with a text input field for suggestions
correction_text = st.sidebar.text_input('Enter your suggestion')

# Button to submit the suggestion
if st.sidebar.button('Submit'):
    # Perform action on suggestion submission (e.g., save to a CSV file)
    with open('suggestions.csv', 'a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([correction_text])
    st.sidebar.info('Suggestion submitted successfully')

# Define a dictionary of models with their names, actual models, and types
models = {
    'Logistic Regression': {'model': model1, 'type': 'logistic_regression'},
    'Gradient Boosting': {'model': model2, 'type': 'gradient_boosting'}
}

# Allow the user to select a model from the sidebar
# Allow the user to select a model from the sidebar
st.sidebar.title('Select Model')
model_name = st.sidebar.selectbox('Choose a model', list(models.keys()))

# Retrieve the selected model and its type from the dictionary
model = models[model_name]['model']
model_type = models[model_name]['type']


# Collect input from the user
st.sidebar.title('Enter Customer Details')
input_features = {
    'MONTANT': st.sidebar.number_input('Top-up Amount (MONTANT)'),
    'FREQUENCE_RECH': st.sidebar.number_input('Number of Times the Customer Refilled (FREQUENCE_RECH)'),
    'REVENUE': st.sidebar.number_input('Monthly income of the client (REVENUE)'),
    'ARPU_SEGMENT': st.sidebar.number_input('Income over 90 days / 3 (ARPU_SEGMENT)'),
    'FREQUENCE': st.sidebar.number_input('Number of times the client has made an income (FREQUENCE)'),
    'DATA_VOLUME': st.sidebar.number_input('Number of Connections (DATA_VOLUME)'),
    'ON_NET': st.sidebar.number_input('Inter Expresso Call (ON_NET)'),
    'ORANGE': st.sidebar.number_input('Call to Orange (ORANGE)'),
    'TIGO': st.sidebar.number_input('Call to Tigo (TIGO)'),
    'ZONE1': st.sidebar.number_input('Call to Zone 1 (ZONE1)'),
    'ZONE2': st.sidebar.number_input('Call to Zone 2 (ZONE2)'),
    'REGULARITY': st.sidebar.number_input('Number of Times the Client is Active for 90 Days (REGULARITY)'),
    'FREQ_TOP_PACK': st.sidebar.number_input('Number of Times the Client has Activated the Top Packs (FREQ_TOP_PACK)'),
    'REGION': st.sidebar.selectbox('Location of Each Client (REGION)', ['SAINT-LOUIS', 'THIES', 'LOUGA', 'MATAM', 'FATICK', 'KAOLACK',
                                                                        'DIOURBEL', 'TAMBACOUNDA', 'ZIGUINCHOR', 'KOLDA', 'KAFFRINE', 'SEDHIOU',
                                                                        'KEDOUGOU']),
    'TENURE': st.sidebar.selectbox('Duration in the Network (TENURE)', ['Short-term', 'Mid-term', 'Medium-term', 'Very short-term']),
    'TOP_PACK': st.sidebar.selectbox('Most Active Pack (TOP_PACK)', ['data', 'international', 'messaging', 'social_media',
                                                                      'value_added_services', 'voice'])
}

# Input validation
valid_input = True
error_messages = []

# Validate numeric inputs
numeric_ranges = {
    'MONTANT': [0, 1000000],
    'FREQUENCE_RECH': [0, 100],
    'REVENUE': [0, 1000000],
    'ARPU_SEGMENT': [0, 100000],
    'FREQUENCE': [0, 100],
    'DATA_VOLUME': [0, 100000],
    'ON_NET': [0, 100000],
    'ORANGE': [0, 100000],
    'TIGO': [0, 100000],
    'ZONE1': [0, 100000],
    'ZONE2': [0, 100000],
    'REGULARITY': [0, 100],
    'FREQ_TOP_PACK': [0, 100]
}

for feature, value in input_features.items():
    range_min, range_max = numeric_ranges.get(feature, [None, None])
    if range_min is not None and range_max is not None:
        if not range_min <= value <= range_max:
            valid_input = False
            error_messages.append(f"{feature} should be between {range_min} and {range_max}.")

#Churn Prediction

def predict_churn(input_data, model):
    # Preprocess the input data
    preprocessed_data = preprocess_input(input_data)

     # Calculate churn probabilities using the model
    probabilities = model.predict_proba(preprocessed_data)

    # Determine churn labels based on the model type
    if model_type == "logistic_regression":
        churn_labels = ["No Churn", "Churn"]
    elif model_type == "gradient_boosting":
        churn_labels = ["Churn", "No Churn"]
    # Extract churn probability for the first sample
    churn_probability = probabilities[0]

    # Create a dictionary mapping churn labels to their indices
    churn_indices = {label: idx for idx, label in enumerate(churn_labels)}

    # Determine the index with the highest churn probability
    churn_index = np.argmax(churn_probability)

    # Return churn labels, churn probabilities, churn indices, and churn index
    return churn_labels, churn_probability, churn_indices, churn_index


# Predict churn based on user input
if st.sidebar.button('Predict Churn'):
    try:
        with st.spinner("Predicting..."):
        # Simulate a long-running process
            progress_bar = st.progress(0)
            step = 20  # A big step will reduce the execution time
            for i in range(0, 100, step):
                time.sleep(0.1)
                progress_bar.progress(i + step)

                #churn_labels, churn_probability = predict_churn(input_features, model)  # Pass model1 or model2 based on the selected model
        churn_labels, churn_probability, churn_indices, churn_index = predict_churn(input_features, model)

        st.subheader('Main Results')

        col1, col2 = st.columns(2)

        if churn_labels[churn_index] == "Churn":
            churn_prob = churn_probability[churn_index]
            with col1:
                st.error(f"Beware!!! This customer is likely to churn with a probability of {churn_prob * 100:.2f}% 😢")
                resized_churn_image = Image.open('Churn.png')
                resized_churn_image = resized_churn_image.resize((350, 300))  # Adjust the width and height as desired
                st.image(resized_churn_image)
                # Add suggestions for retaining churned customers in the 'Churn' group
            with col2:
                st.info("Suggestions for retaining churned customers in this customer group:\n"
                    "- Offer personalized discounts or promotions\n"
                    "- Provide exceptional customer service\n"
                    "- Introduce loyalty programs\n"
                    "- Send targeted re-engagement emails\n"
                    "- Provide a dedicated account manager\n"
                    "- Offer extended trial periods\n"
                    "- Conduct exit surveys to understand reasons for churn\n"
                    "- Implement a customer win-back campaign\n"
                    "- Provide incentives for referrals\n"
                    "- Improve product or service offerings based on customer feedback")
        else:
            #churn_index = churn_indices["No Churn"]
            churn_prob = churn_probability[churn_index]
            with col1:
                st.success(f"This customer is not likely to churn with a probability of {churn_prob * 100:.2f}% 😀")
                resized_not_churn_image = Image.open('NotChurn.jpg')
                resized_not_churn_image = resized_not_churn_image.resize((350, 300))  # Adjust the width and height as desired
                st.image(resized_not_churn_image)
                # Add suggestions for retaining churned customers in the 'Churn' group
            with col2:
                st.info("Suggestions for retaining non-churned customers in this customer group:\n"
                    "- Provide personalized product recommendations\n"
                    "- Offer exclusive features or upgrades\n"
                    "- Implement proactive customer support\n"
                    "- Conduct customer satisfaction surveys\n"
                    "- Recognize and reward loyal customers\n"
                    "- Organize customer appreciation events\n"
                    "- Offer early access to new features or products\n"
                    "- Provide educational resources or tutorials\n"
                    "- Implement a customer loyalty program\n"
                    "- Offer flexible billing or pricing options")

        st.subheader('Churn Probability')

        # Create a donut chart to display probabilities
        fig = go.Figure(data=[go.Pie(
            labels=churn_labels,
            values=churn_probability,
            hole=0.5,
            textinfo='label+percent',
            marker=dict(colors=['#FFA07A', '#6495ED', '#FFD700', '#32CD32', '#FF69B4', '#8B008B']))])

        fig.update_traces(
            hoverinfo='label+percent',
            textfont_size=12,
            textposition='inside',
            texttemplate='%{label}: %{percent:.2f}%'
            )

        fig.update_layout(
            title='Churn Probability',
            title_x=0.5,
            showlegend=False,
            width=500,
            height=500
            )

        st.plotly_chart(fig, use_container_width=True)

        # Calculate the average churn rate (replace with your actual value)

        st.subheader('Customer Churn Probability Comparison')

        average_churn_rate = 19

        # Convert the overall churn rate to churn probability
        main_data_churn_probability = average_churn_rate / 100

        # Retrieve the predicted churn probability for the selected customer
        predicted_churn_prob = churn_probability[churn_index]

        if churn_labels[churn_index] == "Churn":
            churn_prob = churn_probability[churn_index]
            # Create a bar chart comparing the churn probability with the average churn rate
            labels = ['Churn Probability', 'Average Churn Probability']
            values = [predicted_churn_prob, main_data_churn_probability]

            fig = go.Figure(data=[go.Bar(x=labels, y=values)])
            fig.update_layout(
                xaxis_title='Churn Probability',
                yaxis_title='Probability',
                title='Comparison with Average Churn Rate',
                yaxis=dict(range=[0, 1])  # Set the y-axis limits between 0 and 1
            )

            # Add explanations
            if predicted_churn_prob > main_data_churn_probability:
                churn_comparison = "higher"
            elif predicted_churn_prob < main_data_churn_probability:
                churn_comparison = "lower"
            else:
                churn_comparison = "equal"


            explanation = f"This bar chart compares the churn probability of the selected customer " \
                            f"with the average churn rate of all customers. It provides insights into how the " \
                            f"individual customer's churn likelihood ({predicted_churn_prob:.2f}) compares to the " \
                            f"overall trend. The 'Churn Probability' represents the likelihood of churn " \
                            f"for the selected customer, while the 'Average Churn Rate' represents the average " \
                            f"churn rate across all customers ({main_data_churn_probability:.2f}).\n\n" \
                            f"The customer's churn rate is {churn_comparison} than the average churn rate."

            st.plotly_chart(fig)
            st.write(explanation)
        else:
    # Create a bar chart comparing the no-churn probability with the average churn rate
            labels = ['No-Churn Probability', 'Average Churn Probability']
            values = [1 - predicted_churn_prob, main_data_churn_probability]

            fig = go.Figure(data=[go.Bar(x=labels, y=values)])
            fig.update_layout(
                xaxis_title='Churn Probability',
                yaxis_title='Probability',
                title='Comparison with Average Churn Rate',
                yaxis=dict(range=[0, 1])  # Set the y-axis limits between 0 and 1
            )

            explanation = f"This bar chart compares the churn probability of the selected customer " \
              f"with the average churn rate of all customers. It provides insights into how the " \
              f"individual customer's likelihood of churn ({1 - predicted_churn_prob:.2f}) compares to the " \
              f"overall trend. A lower churn probability indicates that the customer is less likely to churn. " \
              f"The chart shows that the churn probability ({1 - predicted_churn_prob:.2f}) is lower than the " \
              f"average churn probability ({main_data_churn_probability:.2f}), suggesting that the customer " \
              f"is predicted to stay with the company. Keep in mind that the prediction is based on the " \
              f"available data and the applied model, and there might still be some uncertainty in the result."


            st.plotly_chart(fig)
            st.write(explanation)

        # Visualize Feature Importance

        st.subheader('Feature Importance')
        if hasattr(model, 'coef_'):  # Check if the model has attribute 'coef_' to determine importance type
            feature_importances = model.coef_[0]
            importance_type = 'Coef'
        elif hasattr(model, 'feature_importances_'):
            feature_importances = model.feature_importances_
            importance_type = 'Importance'
        else:
            st.write('Feature importance is not available for this model.')

            # If importance information is available, create a DataFrame and sort it
        if hasattr(model, 'coef_') or hasattr(model, 'feature_importances_'):
            importance_df = pd.DataFrame({'Feature': original_feature_names, importance_type: feature_importances})
            importance_df = importance_df.sort_values(importance_type, ascending=False)

            # Determine color for each bar based on positive or negative importance
            colors = ['green' if importance > 0 else 'red' for importance in importance_df[importance_type]]

            # Create a horizontal bar chart using Plotly
            fig = go.Figure(go.Bar(
                x=importance_df[importance_type],
                y=importance_df['Feature'],
                orientation='h',
                marker=dict(color=colors),
                text=importance_df[importance_type].apply(lambda x: f'{x:.2f}'),
                textposition='inside'))

    # Configure the layout of the bar chart
            fig.update_layout(
                title='Feature Importance',
                xaxis_title='Importance',
                yaxis_title='Feature',
                bargap=0.1,
                width=600,
                height=800)

    # Display the bar chart using Plotly chart in Streamlit
            st.plotly_chart(fig)

    # Explanation of feature importance
            importance_explanation = f"The feature importance plot shows the relative importance of each feature " \
                       f"for predicting churn. The importance is calculated based on the " \
                       f"{importance_type} value of each feature in the model. " \
                       f"A higher {importance_type} value indicates a stronger influence " \
                       f"of the corresponding feature on the prediction of churn.\n\n" \
                       f"For logistic regression, positive {importance_type} values indicate " \
                       f"features that positively contribute to predicting churn, " \
                       f"while negative {importance_type} values indicate features that " \
                       f"negatively contribute to predicting churn.\n\n" \
                       f"For gradient boosting, higher {importance_type} values " \
                       f"indicate features that have a greater importance in predicting churn.\n\n" \
                       f"Please note that the feature importance values may vary depending on the model " \
                       f"and the data used for training."


            st.write(importance_explanation)


    except Exception as e:
        st.error(f"An error occurred: {str(e)}")