File size: 5,250 Bytes
9ee75ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4f28d58
9ee75ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138

import os
import sys

import uvicorn
from fastapi import FastAPI, Request, File, UploadFile
from fastapi.responses import HTMLResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
import pandas as pd
import numpy as np
from typing import List
import joblib
from fastapi import FastAPI
from pydantic import BaseModel

# Create an instance of FastAPI
app = FastAPI(debug=True)

# Load the trained models and transformers
num_imputer = joblib.load('numerical_imputer(1).joblib')
cat_imputer = joblib.load('cat_imputer.joblib')
encoder = joblib.load('encoder.joblib')
scaler = joblib.load('scaler.joblib')
model = joblib.load('lr_model_vif_smote.joblib')

original_feature_names = ['MONTANT', 'FREQUENCE_RECH', 'REVENUE', 'ARPU_SEGMENT', 'FREQUENCE',
                          'DATA_VOLUME', 'ON_NET', 'ORANGE', 'TIGO', 'ZONE1', 'ZONE2', 'REGULARITY', 'FREQ_TOP_PACK',
                          'REGION_DAKAR', 'REGION_DIOURBEL', 'REGION_FATICK', 'REGION_KAFFRINE', 'REGION_KAOLACK',
                          'REGION_KEDOUGOU', 'REGION_KOLDA', 'REGION_LOUGA', 'REGION_MATAM', 'REGION_SAINT-LOUIS',
                          'REGION_SEDHIOU', 'REGION_TAMBACOUNDA', 'REGION_THIES', 'REGION_ZIGUINCHOR',
                          'TENURE_Long-term', 'TENURE_Medium-term', 'TENURE_Mid-term', 'TENURE_Short-term',
                          'TENURE_Very short-term', 'TOP_PACK_data', 'TOP_PACK_international', 'TOP_PACK_messaging',
                          'TOP_PACK_other_services', 'TOP_PACK_social_media', 'TOP_PACK_value_added_services',
                          'TOP_PACK_voice']


class InputData(BaseModel):
    MONTANT: float
    FREQUENCE_RECH: float
    REVENUE: float
    ARPU_SEGMENT: float
    FREQUENCE: float
    DATA_VOLUME: float
    ON_NET: float
    ORANGE: float
    TIGO: float
    ZONE1: float
    ZONE2: float
    REGULARITY: float
    FREQ_TOP_PACK: float
    REGION: str
    TENURE: str
    TOP_PACK: str


def preprocess_input(input_data):
    input_df = pd.DataFrame(input_data, index=[0])

    cat_columns = ['REGION', 'TENURE', 'TOP_PACK']
    num_columns = [col for col in input_df.columns if col not in cat_columns]

    input_df_imputed_cat = cat_imputer.transform(input_df[cat_columns])
    input_df_imputed_num = num_imputer.transform(input_df[num_columns])

    input_encoded_df = pd.DataFrame(encoder.transform(input_df_imputed_cat).toarray(),
                                    columns=encoder.get_feature_names_out(cat_columns))

    input_df_scaled = scaler.transform(input_df_imputed_num)
    input_scaled_df = pd.DataFrame(input_df_scaled, columns=num_columns)
    final_df = pd.concat([input_encoded_df, input_scaled_df], axis=1)
    final_df = final_df.reindex(columns=original_feature_names, fill_value=0)

    return final_df


def make_prediction(data, model):
    probabilities = model.predict_proba(data)
    churn_labels = ["No Churn" if class_idx == 0 else "Churn" for class_idx in range(len(probabilities[0]))]
    churn_probabilities = probabilities[0]

    # Get the predicted churn label and its probability
    predicted_class_index = np.argmax(churn_probabilities)
    predicted_churn_label = churn_labels[predicted_class_index]
    predicted_probability = churn_probabilities[predicted_class_index]

    # Customize the output message based on the predicted churn label and its probability
    if predicted_churn_label == "Churn":
        output_message = f"⚠️ Customer is likely to churn with a probability of {predicted_probability:.2f}. This indicates a high risk of losing the customer. ⚠️"
    else:
        output_message = f"✅ Customer is not likely to churn with a probability of {predicted_probability:.2f}. This indicates a lower risk of losing the customer. ✅"

    return output_message

@app.get("/")
def read_root():

    info = """
    Welcome to the Expressor Churn Prediction API!. This API provides advanced machine learning predictions for churn. ⚡📊 For more information and to explore the API's capabilities, please visit the documentation: https://abubakari-expresso-churn-prediction-fastapi.hf.space/docs
    """
    return info.strip()

# Model information endpoint
@app.post('/model-info')
async def model_info():
    model_name = model.__class__.__name__  # get model name
    model_params = model.get_params()  # get model parameters
    model_information = {
        'model info': {
            'model name': model_name,
            'model parameters': model_params
        }
    }
    return model_information  # return model information

@app.post('/predict')
async def predict(input_data: InputData):
    input_features = input_data.dict()
    preprocessed_data = preprocess_input(input_features)
    prediction = make_prediction(preprocessed_data, model)

    return {"prediction": prediction}


@app.post('/batch_predict')
async def predict(input_data: List[InputData]):
    preprocessed_data = []

    for data in input_data:
        input_features = data.dict()
        preprocessed = preprocess_input(input_features)
        preprocessed_data.append(preprocessed)

    predictions = [make_prediction(data, model) for data in preprocessed_data]

    return {"predictions": predictions}