Spaces:
Sleeping
Sleeping
# Import the libraries | |
import joblib | |
import pandas as pd | |
import json | |
import uuid | |
from pathlib import Path | |
import gradio as gr | |
from sklearn.pipeline import Pipeline | |
from sklearn.compose import ColumnTransformer | |
from sklearn.preprocessing import StandardScaler, OneHotEncoder | |
from sklearn.impute import SimpleImputer | |
from sklearn.ensemble import RandomForestRegressor | |
from sklearn.model_selection import train_test_split | |
from huggingface_hub import CommitScheduler | |
import os | |
# Read the Hugging Face token from environment variables | |
HF_TOKEN = os.getenv("HF_TOKEN") | |
# Running the training script placed in the same directory as app.py | |
# The training script will train and persist a random forest model with the filename 'random_forest_pipeline_best.pkl' | |
import train | |
# Loading the freshly trained model from disk | |
saved_model_path = "random_forest_pipeline_best.pkl" | |
model_pipeline = joblib.load(saved_model_path) | |
# Preparing the logging functionality | |
log_file = Path("logs/") / f"data_{uuid.uuid4()}.json" | |
log_folder = log_file.parent | |
log_folder.mkdir(parents=True, exist_ok=True) | |
scheduler = CommitScheduler( | |
repo_id="insurance-charge-mlops-logs", # provide a name for the repo_id | |
repo_type="dataset", | |
folder_path=log_folder, | |
path_in_repo="data", | |
every=2, | |
token=HF_TOKEN # Pass the token directly | |
) | |
# Defining the predict function which will take features, convert to dataframe and make predictions using the saved model | |
def predict(age, bmi, children, sex, smoker, region): | |
# Prepare the input data as a DataFrame | |
input_data = pd.DataFrame({ | |
'age': [age], | |
'bmi': [bmi], | |
'children': [children], | |
'sex': [sex], | |
'smoker': [smoker], | |
'region': [region] | |
}) | |
# Making prediction using the loaded model pipeline | |
prediction = model_pipeline.predict(input_data) | |
# While the prediction is made, log both the inputs and outputs to a log file | |
# While writing to the log file, ensure that the commit scheduler is locked to avoid parallel access | |
with scheduler.lock: | |
with log_file.open("a") as f: | |
f.write(json.dumps( | |
{ | |
'age': age, | |
'bmi': bmi, | |
'children': children, | |
'sex': sex, | |
'smoker': smoker, | |
'region': region, | |
'prediction': prediction[0] | |
} | |
)) | |
f.write("\n") | |
return prediction[0] | |
# Setting up UI components for input and output | |
age_input = gr.Number(label="Age") | |
bmi_input = gr.Number(label="BMI") | |
children_input = gr.Number(label="Children") | |
sex_input = gr.Radio(choices=['male', 'female'], label="Sex") | |
smoker_input = gr.Radio(choices=['yes', 'no'], label="Smoker") | |
region_input = gr.Dropdown(choices=['northeast', 'northwest', 'southeast', 'southwest'], label="Region") | |
# Creating the gradio interface, make title "HealthyLife Insurance Charge Prediction" | |
demo = gr.Interface( | |
fn=predict, | |
inputs=[age_input, bmi_input, children_input, sex_input, smoker_input, region_input], | |
outputs="number", | |
title="HealthyLife Insurance Charge Prediction" | |
) | |
# Launching with a load balancer | |
demo.queue() | |
demo.launch(share=False) | |