File size: 3,025 Bytes
81448e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# Import the libraries
import gradio as gr
import pandas as pd
import joblib
from sklearn.preprocessing import OneHotEncoder
import subprocess
import json
import uuid
from pathlib import Path
from huggingface_hub import CommitScheduler

# Run the training script placed in the same directory as app.py
# The training script will train and persist a linear regression
# model with the filename 'model.joblib'


# Load the freshly trained model from disk
# model = joblib.load('/content/dt_regressor.pkl') # Uncomment this line to use Decision Tree model
model = joblib.load('model.joblib') # Linear Regression model

# Prepare the logging functionality
log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
log_folder = log_file.parent

scheduler = CommitScheduler(
    repo_id="debjaninath/insurance-charge-mlops-logs",  # provide a name "insurance-charge-mlops-logs" for the repo_id
    repo_type="dataset",
    folder_path=log_folder,
    path_in_repo="data",
    every=2
)

# Define the predict function which will take features, convert to dataframe and make predictions using the saved model
# the functions runs when 'Submit' is clicked or when a API request is made
def predict_charges(age, bmi, children, sex, smoker, region):
    try:
        # Create a DataFrame from the input features
        data = pd.DataFrame({
            'age': [age],
            'bmi': [bmi],
            'children': [children],
            'sex': [sex],
            'smoker': [smoker],
            'region': [region]
        })

        # Handle categorical variables using one-hot encoding
        data = pd.get_dummies(data)

        # Ensure the input data has the same features as the training data
        train_columns = model.feature_names_in_
        missing_columns = set(train_columns) - set(data.columns)
        for column in missing_columns:
            data[column] = 0
        data = data[train_columns]

        print("Input data:")
        print(data)

        # Make predictions using the loaded model
        prediction = model.predict(data)

        print("Prediction:", prediction)

        # Check if prediction is not None and has at least one element
        if prediction is not None and len(prediction) > 0:
            # While the prediction is made, log both the inputs and outputs to a log file
            # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
            # access
            with scheduler.lock:
                with log_file.open("a") as f:
                    f.write(json.dumps(
                        {
                            'age': age,
                            'bmi': bmi,
                            'children': children,
                            'sex': sex,
                            'smoker': smoker,
                            'region': region,
                            'prediction': prediction[0]
                        }
                    ))
                    f.write("\n")

            return float(prediction[0])