File size: 3,685 Bytes
ad2de74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# Import the libraries
import joblib
import pandas as pd
import numpy as np
import json
import uuid
from pathlib import Path
import gradio as gr
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
 

# Run the training script placed in the same directory as app.py
# The training script will train and persist a linear regression
# model with the filename 'model.joblib'
exec(open("train.py").read())



# Load the freshly trained model from disk
saved_model = joblib.load("random_forest_pipeline.pkl")
print("Model loaded from random_forest_pipeline.pkl")

# Prepare the logging functionality
log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
log_folder = log_file.parent
log_folder.mkdir(parents=True, exist_ok=True)

#scheduler = CommitScheduler(
#    repo_id="insurance-charge-mlops-logs",  # provide a name "insurance-charge-mlops-logs" for the repo_id
#    repo_type="dataset",
#    folder_path=log_folder,
#    path_in_repo="data",
#    every=2
#)
# CommitScheduler is not available, so we use a lock mechanism for the example
class SimpleLock:
    def __init__(self, lock_file="lockfile"):
        self.lock_file = Path(lock_file)

    def __enter__(self):
        while self.lock_file.exists():
            pass  # Simple spinlock
        self.lock_file.touch()

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.lock_file.unlink()

scheduler = SimpleLock()


# Define the predict function which will take features, convert to dataframe and make predictions using the saved model
# the functions runs when 'Submit' is clicked or when a API request is made

def predict(age, bmi, children, sex, smoker, region):
    # Create a DataFrame with the input data
    data = {
        'age': [age],
        'bmi': [bmi],
        'children': [children],
        'sex': [sex],
        'smoker': [smoker],
        'region': [region]
    }
    df = pd.DataFrame(data)
     ## Make prediction using the loaded model
    prediction = saved_model.predict(df)   
    

    # While the prediction is made, log both the inputs and outputs to a  log file
    # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
    # access

    with scheduler.lock:
        with log_file.open("a") as f:
            f.write(json.dumps(
                {
                    'age': age,
                    'bmi': bmi,
                    'children': children,
                    'sex': sex,
                    'smoker': smoker,
                    'region': region,
                    'prediction': prediction[0]
                }
            ))
            f.write("\n")

    return prediction[0]



# Set up UI components for input and output
age_input = gr.inputs.Number(label="Age")
bmi_input = gr.inputs.Number(label="BMI")
children_input = gr.inputs.Number(label="Children")
sex_input = gr.inputs.Radio(choices=['male', 'female'], label="Sex")
smoker_input = gr.inputs.Radio(choices=['yes', 'no'], label="Smoker")
region_input = gr.inputs.Dropdown(choices=['northeast', 'northwest', 'southeast', 'southwest'], label="Region")


# Create the gradio interface, make title "HealthyLife Insurance Charge Prediction"
demo = gr.Interface(
    fn=predict,
    inputs=[age_input, bmi_input, children_input, sex_input, smoker_input, region_input],
    outputs="number",
    title="HealthyLife Insurance Charge Prediction"
)

# Launch with a load balancer
demo.queue()
demo.launch(share=False)