Spaces:

RICHARDMENSAH
/

Churn_Expresso_Prediction_Streamlit_App

Runtime error

File size: 19,484 Bytes

cbf2263

import streamlit as st
import joblib
import pandas as pd
import numpy as np
from PIL import Image
import time
import matplotlib.pyplot as plt
import qrcode
from io import BytesIO
import csv

# Load the trained models and transformers
num_imputer = joblib.load('numerical_imputer.joblib')
cat_imputer = joblib.load('cat_imputer.joblib')
encoder = joblib.load('encoder.joblib')
scaler = joblib.load('scaler.joblib')
model1 = joblib.load('lr_model_vif_smote.joblib')
model2 = joblib.load('gb_model_vif_smote.joblib')


def preprocess_input(input_data):
    input_df = pd.DataFrame(input_data, index=[0])

    cat_columns = [col for col in input_df.columns if input_df[col].dtype == 'object']
    num_columns = [col for col in input_df.columns if input_df[col].dtype != 'object']

    input_df_imputed_cat = cat_imputer.transform(input_df[cat_columns])
    input_df_imputed_num = num_imputer.transform(input_df[num_columns])

    input_encoded_df = pd.DataFrame(encoder.transform(input_df_imputed_cat).toarray(),
                                    columns=encoder.get_feature_names_out(cat_columns))

    input_df_scaled = scaler.transform(input_df_imputed_num)
    input_scaled_df = pd.DataFrame(input_df_scaled, columns=num_columns)
    final_df = pd.concat([input_encoded_df, input_scaled_df], axis=1)
    final_df = final_df.reindex(columns=original_feature_names, fill_value=0)

    return final_df

original_feature_names = ['MONTANT', 'FREQUENCE_RECH', 'REVENUE', 'ARPU_SEGMENT', 'FREQUENCE',
                          'DATA_VOLUME', 'ON_NET', 'ORANGE', 'TIGO', 'ZONE1', 'ZONE2', 'REGULARITY', 'FREQ_TOP_PACK',
                          'REGION_DAKAR', 'REGION_DIOURBEL', 'REGION_FATICK', 'REGION_KAFFRINE', 'REGION_KAOLACK',
                          'REGION_KEDOUGOU', 'REGION_KOLDA', 'REGION_LOUGA', 'REGION_MATAM', 'REGION_SAINT-LOUIS',
                          'REGION_SEDHIOU', 'REGION_TAMBACOUNDA', 'REGION_THIES', 'REGION_ZIGUINCHOR',
                          'TENURE_Long-term', 'TENURE_Medium-term', 'TENURE_Mid-term', 'TENURE_Short-term',
                          'TENURE_Very short-term', 'TOP_PACK_data', 'TOP_PACK_international', 'TOP_PACK_messaging',
                          'TOP_PACK_other_services', 'TOP_PACK_social_media', 'TOP_PACK_value_added_services',
                          'TOP_PACK_voice']

# Set up the Streamlit app
st.set_page_config(layout="wide")

# Main page - Churn Prediction
st.title('📞 EXPRESSO TELECOM CUSTOMER CHURN PREDICTION APP 📞')

# Main page - Churn Prediction
st.image("banner.png", use_column_width=True)
st.markdown("This app predicts whether a customer will leave your company ❌ or not 🎉. Enter the details of the customer on the left sidebar to see the result")


    # How to use
st.title('How to Use')
st.markdown('1. Select your model of choice on the left sidebar.')
st.markdown('2. Adjust the input parameters based on customer details')
st.markdown('3. Click the "Predict" button to initiate the prediction.')
st.markdown('4. The app will simulate a prediction process with a progress bar.')
st.markdown('5. Once the prediction is complete, the results will be displayed below.')

import csv
import streamlit as st

# Add context text
st.sidebar.markdown('**Welcome!**')
st.sidebar.markdown('This is a work in progress, and we would love to hear your suggestions on how to improve the user experience. Please feel free to provide your feedback in the suggestion box below.')

# Create the sidebar with a text input field for suggestions
correction_text = st.sidebar.text_input('Enter your suggestion')

# Button to submit the suggestion
if st.sidebar.button('Submit'):
    # Perform action on suggestion submission (e.g., save to a CSV file)
    with open('suggestions.csv', 'a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([correction_text])
    st.sidebar.info('Suggestion submitted successfully')

# Define a dictionary of models with their names, actual models, and types
models = {
    'Logistic Regression': {'model': model1, 'type': 'logistic_regression'},
    'Gradient Boosting': {'model': model2, 'type': 'gradient_boosting'}
}

# Allow the user to select a model from the sidebar
# Allow the user to select a model from the sidebar
st.sidebar.title('Select Model')
model_name = st.sidebar.selectbox('Choose a model', list(models.keys()))

# Retrieve the selected model and its type from the dictionary
model = models[model_name]['model']
model_type = models[model_name]['type']


# Collect input from the user
st.sidebar.title('Enter Customer Details')
input_features = {
    'MONTANT': st.sidebar.number_input('Top-up Amount (MONTANT)'),
    'FREQUENCE_RECH': st.sidebar.number_input('Number of Times the Customer Refilled (FREQUENCE_RECH)'),
    'REVENUE': st.sidebar.number_input('Monthly income of the client (REVENUE)'),
    'ARPU_SEGMENT': st.sidebar.number_input('Income over 90 days / 3 (ARPU_SEGMENT)'),
    'FREQUENCE': st.sidebar.number_input('Number of times the client has made an income (FREQUENCE)'),
    'DATA_VOLUME': st.sidebar.number_input('Number of Connections (DATA_VOLUME)'),
    'ON_NET': st.sidebar.number_input('Inter Expresso Call (ON_NET)'),
    'ORANGE': st.sidebar.number_input('Call to Orange (ORANGE)'),
    'TIGO': st.sidebar.number_input('Call to Tigo (TIGO)'),
    'ZONE1': st.sidebar.number_input('Call to Zone 1 (ZONE1)'),
    'ZONE2': st.sidebar.number_input('Call to Zone 2 (ZONE2)'),
    'REGULARITY': st.sidebar.number_input('Number of Times the Client is Active for 90 Days (REGULARITY)'),
    'FREQ_TOP_PACK': st.sidebar.number_input('Number of Times the Client has Activated the Top Packs (FREQ_TOP_PACK)'),
    'REGION': st.sidebar.selectbox('Location of Each Client (REGION)', ['SAINT-LOUIS', 'THIES', 'LOUGA', 'MATAM', 'FATICK', 'KAOLACK',
                                                                        'DIOURBEL', 'TAMBACOUNDA', 'ZIGUINCHOR', 'KOLDA', 'KAFFRINE', 'SEDHIOU',
                                                                        'KEDOUGOU']),
    'TENURE': st.sidebar.selectbox('Duration in the Network (TENURE)', ['Short-term', 'Mid-term', 'Medium-term', 'Very short-term']),
    'TOP_PACK': st.sidebar.selectbox('Most Active Pack (TOP_PACK)', ['data', 'international', 'messaging', 'social_media',
                                                                      'value_added_services', 'voice'])
}

# Input validation
valid_input = True
error_messages = []

# Validate numeric inputs
numeric_ranges = {
    'MONTANT': [0, 1000000],
    'FREQUENCE_RECH': [0, 100],
    'REVENUE': [0, 1000000],
    'ARPU_SEGMENT': [0, 100000],
    'FREQUENCE': [0, 100],
    'DATA_VOLUME': [0, 100000],
    'ON_NET': [0, 100000],
    'ORANGE': [0, 100000],
    'TIGO': [0, 100000],
    'ZONE1': [0, 100000],
    'ZONE2': [0, 100000],
    'REGULARITY': [0, 100],
    'FREQ_TOP_PACK': [0, 100]
}

for feature, value in input_features.items():
    range_min, range_max = numeric_ranges.get(feature, [None, None])
    if range_min is not None and range_max is not None:
        if not range_min <= value <= range_max:
            valid_input = False
            error_messages.append(f"{feature} should be between {range_min} and {range_max}.")

#Churn Prediction

def predict_churn(input_data, model):
    # Preprocess the input data
    preprocessed_data = preprocess_input(input_data)

     # Calculate churn probabilities using the model
    probabilities = model.predict_proba(preprocessed_data)

    # Determine churn labels based on the model type
    if model_type == "logistic_regression":
        churn_labels = ["No Churn", "Churn"]
    elif model_type == "gradient_boosting":
        churn_labels = ["Churn", "No Churn"]
    # Extract churn probability for the first sample
    churn_probability = probabilities[0]

    # Create a dictionary mapping churn labels to their indices
    churn_indices = {label: idx for idx, label in enumerate(churn_labels)}

    # Determine the index with the highest churn probability
    churn_index = np.argmax(churn_probability)

    # Return churn labels, churn probabilities, churn indices, and churn index
    return churn_labels, churn_probability, churn_indices, churn_index


# Predict churn based on user input
if st.sidebar.button('Predict Churn'):
    try:
        with st.spinner("Predicting..."):
        # Simulate a long-running process
            progress_bar = st.progress(0)
            step = 20  # A big step will reduce the execution time
            for i in range(0, 100, step):
                time.sleep(0.1)
                progress_bar.progress(i + step)

                #churn_labels, churn_probability = predict_churn(input_features, model)  # Pass model1 or model2 based on the selected model
        churn_labels, churn_probability, churn_indices, churn_index = predict_churn(input_features, model)

        st.subheader('Main Results')

        col1, col2 = st.columns(2)

        if churn_labels[churn_index] == "Churn":
            churn_prob = churn_probability[churn_index]
            with col1:
                st.error(f"Beware!!! This customer is likely to churn with a probability of {churn_prob * 100:.2f}% 😢")
                resized_churn_image = Image.open('Churn.png')
                resized_churn_image = resized_churn_image.resize((350, 300))  # Adjust the width and height as desired
                st.image(resized_churn_image)
                # Add suggestions for retaining churned customers in the 'Churn' group
            with col2:
                st.info("Suggestions for retaining churned customers in this customer group:\n"
                    "- Offer personalized discounts or promotions\n"
                    "- Provide exceptional customer service\n"
                    "- Introduce loyalty programs\n"
                    "- Send targeted re-engagement emails\n"
                    "- Provide a dedicated account manager\n"
                    "- Offer extended trial periods\n"
                    "- Conduct exit surveys to understand reasons for churn\n"
                    "- Implement a customer win-back campaign\n"
                    "- Provide incentives for referrals\n"
                    "- Improve product or service offerings based on customer feedback")
        else:
            #churn_index = churn_indices["No Churn"]
            churn_prob = churn_probability[churn_index]
            with col1:
                st.success(f"This customer is not likely to churn with a probability of {churn_prob * 100:.2f}% 😀")
                resized_not_churn_image = Image.open('NotChurn.jpg')
                resized_not_churn_image = resized_not_churn_image.resize((350, 300))  # Adjust the width and height as desired
                st.image(resized_not_churn_image)
                # Add suggestions for retaining churned customers in the 'Churn' group
            with col2:
                st.info("Suggestions for retaining non-churned customers in this customer group:\n"
                    "- Provide personalized product recommendations\n"
                    "- Offer exclusive features or upgrades\n"
                    "- Implement proactive customer support\n"
                    "- Conduct customer satisfaction surveys\n"
                    "- Recognize and reward loyal customers\n"
                    "- Organize customer appreciation events\n"
                    "- Offer early access to new features or products\n"
                    "- Provide educational resources or tutorials\n"
                    "- Implement a customer loyalty program\n"
                    "- Offer flexible billing or pricing options")

        st.subheader('Churn Probability')

        # Create a donut chart to display probabilities
        fig = go.Figure(data=[go.Pie(
            labels=churn_labels,
            values=churn_probability,
            hole=0.5,
            textinfo='label+percent',
            marker=dict(colors=['#FFA07A', '#6495ED', '#FFD700', '#32CD32', '#FF69B4', '#8B008B']))])

        fig.update_traces(
            hoverinfo='label+percent',
            textfont_size=12,
            textposition='inside',
            texttemplate='%{label}: %{percent:.2f}%'
            )

        fig.update_layout(
            title='Churn Probability',
            title_x=0.5,
            showlegend=False,
            width=500,
            height=500
            )

        st.plotly_chart(fig, use_container_width=True)

        # Calculate the average churn rate (replace with your actual value)

        st.subheader('Customer Churn Probability Comparison')

        average_churn_rate = 19

        # Convert the overall churn rate to churn probability
        main_data_churn_probability = average_churn_rate / 100

        # Retrieve the predicted churn probability for the selected customer
        predicted_churn_prob = churn_probability[churn_index]

        if churn_labels[churn_index] == "Churn":
            churn_prob = churn_probability[churn_index]
            # Create a bar chart comparing the churn probability with the average churn rate
            labels = ['Churn Probability', 'Average Churn Probability']
            values = [predicted_churn_prob, main_data_churn_probability]

            fig = go.Figure(data=[go.Bar(x=labels, y=values)])
            fig.update_layout(
                xaxis_title='Churn Probability',
                yaxis_title='Probability',
                title='Comparison with Average Churn Rate',
                yaxis=dict(range=[0, 1])  # Set the y-axis limits between 0 and 1
            )

            # Add explanations
            if predicted_churn_prob > main_data_churn_probability:
                churn_comparison = "higher"
            elif predicted_churn_prob < main_data_churn_probability:
                churn_comparison = "lower"
            else:
                churn_comparison = "equal"


            explanation = f"This bar chart compares the churn probability of the selected customer " \
                            f"with the average churn rate of all customers. It provides insights into how the " \
                            f"individual customer's churn likelihood ({predicted_churn_prob:.2f}) compares to the " \
                            f"overall trend. The 'Churn Probability' represents the likelihood of churn " \
                            f"for the selected customer, while the 'Average Churn Rate' represents the average " \
                            f"churn rate across all customers ({main_data_churn_probability:.2f}).\n\n" \
                            f"The customer's churn rate is {churn_comparison} than the average churn rate."

            st.plotly_chart(fig)
            st.write(explanation)
        else:
    # Create a bar chart comparing the no-churn probability with the average churn rate
            labels = ['No-Churn Probability', 'Average Churn Probability']
            values = [1 - predicted_churn_prob, main_data_churn_probability]

            fig = go.Figure(data=[go.Bar(x=labels, y=values)])
            fig.update_layout(
                xaxis_title='Churn Probability',
                yaxis_title='Probability',
                title='Comparison with Average Churn Rate',
                yaxis=dict(range=[0, 1])  # Set the y-axis limits between 0 and 1
            )

            explanation = f"This bar chart compares the churn probability of the selected customer " \
              f"with the average churn rate of all customers. It provides insights into how the " \
              f"individual customer's likelihood of churn ({1 - predicted_churn_prob:.2f}) compares to the " \
              f"overall trend. A lower churn probability indicates that the customer is less likely to churn. " \
              f"The chart shows that the churn probability ({1 - predicted_churn_prob:.2f}) is lower than the " \
              f"average churn probability ({main_data_churn_probability:.2f}), suggesting that the customer " \
              f"is predicted to stay with the company. Keep in mind that the prediction is based on the " \
              f"available data and the applied model, and there might still be some uncertainty in the result."


            st.plotly_chart(fig)
            st.write(explanation)

        # Visualize Feature Importance

        st.subheader('Feature Importance')
        if hasattr(model, 'coef_'):  # Check if the model has attribute 'coef_' to determine importance type
            feature_importances = model.coef_[0]
            importance_type = 'Coef'
        elif hasattr(model, 'feature_importances_'):
            feature_importances = model.feature_importances_
            importance_type = 'Importance'
        else:
            st.write('Feature importance is not available for this model.')

            # If importance information is available, create a DataFrame and sort it
        if hasattr(model, 'coef_') or hasattr(model, 'feature_importances_'):
            importance_df = pd.DataFrame({'Feature': original_feature_names, importance_type: feature_importances})
            importance_df = importance_df.sort_values(importance_type, ascending=False)

            # Determine color for each bar based on positive or negative importance
            colors = ['green' if importance > 0 else 'red' for importance in importance_df[importance_type]]

            # Create a horizontal bar chart using Plotly
            fig = go.Figure(go.Bar(
                x=importance_df[importance_type],
                y=importance_df['Feature'],
                orientation='h',
                marker=dict(color=colors),
                text=importance_df[importance_type].apply(lambda x: f'{x:.2f}'),
                textposition='inside'))

    # Configure the layout of the bar chart
            fig.update_layout(
                title='Feature Importance',
                xaxis_title='Importance',
                yaxis_title='Feature',
                bargap=0.1,
                width=600,
                height=800)

    # Display the bar chart using Plotly chart in Streamlit
            st.plotly_chart(fig)

    # Explanation of feature importance
            importance_explanation = f"The feature importance plot shows the relative importance of each feature " \
                       f"for predicting churn. The importance is calculated based on the " \
                       f"{importance_type} value of each feature in the model. " \
                       f"A higher {importance_type} value indicates a stronger influence " \
                       f"of the corresponding feature on the prediction of churn.\n\n" \
                       f"For logistic regression, positive {importance_type} values indicate " \
                       f"features that positively contribute to predicting churn, " \
                       f"while negative {importance_type} values indicate features that " \
                       f"negatively contribute to predicting churn.\n\n" \
                       f"For gradient boosting, higher {importance_type} values " \
                       f"indicate features that have a greater importance in predicting churn.\n\n" \
                       f"Please note that the feature importance values may vary depending on the model " \
                       f"and the data used for training."


            st.write(importance_explanation)


    except Exception as e:
        st.error(f"An error occurred: {str(e)}")