# Import the libraries import joblib import pandas as pd import json import uuid from pathlib import Path import gradio as gr from sklearn.pipeline import Pipeline from sklearn.compose import ColumnTransformer from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.impute import SimpleImputer from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import train_test_split from huggingface_hub import CommitScheduler import os # Read the Hugging Face token from environment variables HF_TOKEN = os.getenv("HF_TOKEN") # Running the training script placed in the same directory as app.py # The training script will train and persist a random forest model with the filename 'random_forest_pipeline_best.pkl' import train # Loading the freshly trained model from disk saved_model_path = "random_forest_pipeline_best.pkl" model_pipeline = joblib.load(saved_model_path) # Preparing the logging functionality log_file = Path("logs/") / f"data_{uuid.uuid4()}.json" log_folder = log_file.parent log_folder.mkdir(parents=True, exist_ok=True) scheduler = CommitScheduler( repo_id="insurance-charge-mlops-logs", # provide a name for the repo_id repo_type="dataset", folder_path=log_folder, path_in_repo="data", every=2, token=HF_TOKEN # Pass the token directly ) # Defining the predict function which will take features, convert to dataframe and make predictions using the saved model def predict(age, bmi, children, sex, smoker, region): # Prepare the input data as a DataFrame input_data = pd.DataFrame({ 'age': [age], 'bmi': [bmi], 'children': [children], 'sex': [sex], 'smoker': [smoker], 'region': [region] }) # Making prediction using the loaded model pipeline prediction = model_pipeline.predict(input_data) # While the prediction is made, log both the inputs and outputs to a log file # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel access with scheduler.lock: with log_file.open("a") as f: f.write(json.dumps( { 'age': age, 'bmi': bmi, 'children': children, 'sex': sex, 'smoker': smoker, 'region': region, 'prediction': prediction[0] } )) f.write("\n") return prediction[0] # Setting up UI components for input and output age_input = gr.Number(label="Age") bmi_input = gr.Number(label="BMI") children_input = gr.Number(label="Children") sex_input = gr.Radio(choices=['male', 'female'], label="Sex") smoker_input = gr.Radio(choices=['yes', 'no'], label="Smoker") region_input = gr.Dropdown(choices=['northeast', 'northwest', 'southeast', 'southwest'], label="Region") # Creating the gradio interface, make title "HealthyLife Insurance Charge Prediction" demo = gr.Interface( fn=predict, inputs=[age_input, bmi_input, children_input, sex_input, smoker_input, region_input], outputs="number", title="HealthyLife Insurance Charge Prediction" ) # Launching with a load balancer demo.queue() demo.launch(share=False)