# Import the libraries import joblib import pandas as pd import numpy as np import json import uuid from pathlib import Path import gradio as gr from sklearn.compose import make_column_transformer from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.impute import SimpleImputer from sklearn.pipeline import Pipeline # Run the training script placed in the same directory as app.py # The training script will train and persist a linear regression # model with the filename 'model.joblib' exec(open("train.py").read()) # Load the freshly trained model from disk saved_model = joblib.load("random_forest_pipeline.pkl") print("Model loaded from random_forest_pipeline.pkl") # Prepare the logging functionality log_file = Path("logs/") / f"data_{uuid.uuid4()}.json" log_folder = log_file.parent log_folder.mkdir(parents=True, exist_ok=True) #scheduler = CommitScheduler( # repo_id="insurance-charge-mlops-logs", # provide a name "insurance-charge-mlops-logs" for the repo_id # repo_type="dataset", # folder_path=log_folder, # path_in_repo="data", # every=2 #) # CommitScheduler is not available, so we use a lock mechanism for the example class SimpleLock: def __init__(self, lock_file="lockfile"): self.lock_file = Path(lock_file) def __enter__(self): while self.lock_file.exists(): pass # Simple spinlock self.lock_file.touch() def __exit__(self, exc_type, exc_val, exc_tb): self.lock_file.unlink() scheduler = SimpleLock() # Define the predict function which will take features, convert to dataframe and make predictions using the saved model # the functions runs when 'Submit' is clicked or when a API request is made def predict(age, bmi, children, sex, smoker, region): # Create a DataFrame with the input data data = { 'age': [age], 'bmi': [bmi], 'children': [children], 'sex': [sex], 'smoker': [smoker], 'region': [region] } df = pd.DataFrame(data) ## Make prediction using the loaded model prediction = saved_model.predict(df) # While the prediction is made, log both the inputs and outputs to a log file # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel # access with scheduler.lock: with log_file.open("a") as f: f.write(json.dumps( { 'age': age, 'bmi': bmi, 'children': children, 'sex': sex, 'smoker': smoker, 'region': region, 'prediction': prediction[0] } )) f.write("\n") return prediction[0] # Set up UI components for input and output age_input = gr.inputs.Number(label="Age") bmi_input = gr.inputs.Number(label="BMI") children_input = gr.inputs.Number(label="Children") sex_input = gr.inputs.Radio(choices=['male', 'female'], label="Sex") smoker_input = gr.inputs.Radio(choices=['yes', 'no'], label="Smoker") region_input = gr.inputs.Dropdown(choices=['northeast', 'northwest', 'southeast', 'southwest'], label="Region") # Create the gradio interface, make title "HealthyLife Insurance Charge Prediction" demo = gr.Interface( fn=predict, inputs=[age_input, bmi_input, children_input, sex_input, smoker_input, region_input], outputs="number", title="HealthyLife Insurance Charge Prediction" ) # Launch with a load balancer demo.queue() demo.launch(share=False)