Spaces:
Sleeping
Sleeping
# Import the libraries | |
import joblib | |
import pandas as pd | |
import numpy as np | |
import json | |
import uuid | |
from pathlib import Path | |
import gradio as gr | |
from sklearn.compose import make_column_transformer | |
from sklearn.preprocessing import StandardScaler, OneHotEncoder | |
from sklearn.impute import SimpleImputer | |
from sklearn.pipeline import Pipeline | |
# Run the training script placed in the same directory as app.py | |
# The training script will train and persist a linear regression | |
# model with the filename 'model.joblib' | |
exec(open("train.py").read()) | |
#from commit_scheduler import CommitScheduler | |
# Load the freshly trained model from disk | |
saved_model = joblib.load("random_forest_pipeline.pkl") | |
print("Model loaded from random_forest_pipeline.pkl") | |
# Prepare the logging functionality | |
log_file = Path("logs/") / f"data_{uuid.uuid4()}.json" | |
log_folder = log_file.parent | |
log_folder.mkdir(parents=True, exist_ok=True) | |
# Simple lock mechanism for logging | |
class SimpleLock: | |
def __init__(self, lock_file="lockfile"): | |
self.lock_file = Path(lock_file) | |
def __enter__(self): | |
while self.lock_file.exists(): | |
pass # Simple spinlock | |
self.lock_file.touch() | |
def __exit__(self, exc_type, exc_val, exc_tb): | |
self.lock_file.unlink() | |
scheduler = SimpleLock() | |
#scheduler = CommitScheduler( | |
# repo_id="insurance-charge-mlops-logs", # provide a name "insurance-charge-mlops-logs" for the repo_id | |
# repo_type="dataset", | |
# folder_path=log_folder, | |
# path_in_repo="data", | |
# every=2 | |
#) | |
# Define the predict function which will take features, convert to dataframe and make predictions using the saved model | |
# the functions runs when 'Submit' is clicked or when a API request is made | |
def predict(age, bmi, children, sex, smoker, region): | |
# Create a DataFrame with the input data | |
data = { | |
'age': [age], | |
'bmi': [bmi], | |
'children': [children], | |
'sex': [sex], | |
'smoker': [smoker], | |
'region': [region] | |
} | |
df = pd.DataFrame(data) | |
# Make prediction using the loaded model | |
prediction = saved_model.predict(df) | |
# While the prediction is made, log both the inputs and outputs to a log file | |
# While writing to the log file, ensure that the commit scheduler is locked to avoid parallel | |
# access | |
# with scheduler.lock: | |
with scheduler: | |
with log_file.open("a") as f: | |
f.write(json.dumps( | |
{ | |
'age': age, | |
'bmi': bmi, | |
'children': children, | |
'sex': sex, | |
'smoker': smoker, | |
'region': region, | |
'prediction': prediction[0] | |
} | |
)) | |
f.write("\n") | |
return prediction[0] | |
# Set up UI components for input and output | |
age_input = gr.Number(label="Age") | |
bmi_input = gr.Number(label="BMI") | |
children_input = gr.Number(label="Children") | |
sex_input = gr.Radio(choices=['male', 'female'], label="Sex") | |
smoker_input = gr.Radio(choices=['yes', 'no'], label="Smoker") | |
region_input = gr.Dropdown(choices=['northeast', 'northwest', 'southeast', 'southwest'], label="Region") | |
# Create the gradio interface, make title "HealthyLife Insurance Charge Prediction" | |
demo = gr.Interface( | |
fn=predict, | |
inputs=[age_input, bmi_input, children_input, sex_input, smoker_input, region_input], | |
outputs="number", | |
title="HealthyLife Insurance Charge Prediction" | |
) | |
# Launch with a load balancer | |
demo.queue() | |
demo.launch(share=False, show_error=True) | |