Spaces:
Sleeping
Sleeping
| # Import the libraries | |
| import joblib | |
| import pandas as pd | |
| import json | |
| import uuid | |
| from pathlib import Path | |
| import gradio as gr | |
| from sklearn.pipeline import Pipeline | |
| from sklearn.compose import ColumnTransformer | |
| from sklearn.preprocessing import StandardScaler, OneHotEncoder | |
| from sklearn.impute import SimpleImputer | |
| from sklearn.ensemble import RandomForestRegressor | |
| from sklearn.model_selection import train_test_split | |
| from huggingface_hub import CommitScheduler | |
| import os | |
| # Read the Hugging Face token from environment variables | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| # Running the training script placed in the same directory as app.py | |
| # The training script will train and persist a random forest model with the filename 'random_forest_pipeline_best.pkl' | |
| import train | |
| # Loading the freshly trained model from disk | |
| saved_model_path = "random_forest_pipeline_best.pkl" | |
| model_pipeline = joblib.load(saved_model_path) | |
| # Preparing the logging functionality | |
| log_file = Path("logs/") / f"data_{uuid.uuid4()}.json" | |
| log_folder = log_file.parent | |
| log_folder.mkdir(parents=True, exist_ok=True) | |
| scheduler = CommitScheduler( | |
| repo_id="insurance-charge-mlops-logs", # provide a name for the repo_id | |
| repo_type="dataset", | |
| folder_path=log_folder, | |
| path_in_repo="data", | |
| every=2, | |
| token=HF_TOKEN # Pass the token directly | |
| ) | |
| # Defining the predict function which will take features, convert to dataframe and make predictions using the saved model | |
| def predict(age, bmi, children, sex, smoker, region): | |
| # Prepare the input data as a DataFrame | |
| input_data = pd.DataFrame({ | |
| 'age': [age], | |
| 'bmi': [bmi], | |
| 'children': [children], | |
| 'sex': [sex], | |
| 'smoker': [smoker], | |
| 'region': [region] | |
| }) | |
| # Making prediction using the loaded model pipeline | |
| prediction = model_pipeline.predict(input_data) | |
| # While the prediction is made, log both the inputs and outputs to a log file | |
| # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel access | |
| with scheduler.lock: | |
| with log_file.open("a") as f: | |
| f.write(json.dumps( | |
| { | |
| 'age': age, | |
| 'bmi': bmi, | |
| 'children': children, | |
| 'sex': sex, | |
| 'smoker': smoker, | |
| 'region': region, | |
| 'prediction': prediction[0] | |
| } | |
| )) | |
| f.write("\n") | |
| return prediction[0] | |
| # Setting up UI components for input and output | |
| age_input = gr.Number(label="Age") | |
| bmi_input = gr.Number(label="BMI") | |
| children_input = gr.Number(label="Children") | |
| sex_input = gr.Radio(choices=['male', 'female'], label="Sex") | |
| smoker_input = gr.Radio(choices=['yes', 'no'], label="Smoker") | |
| region_input = gr.Dropdown(choices=['northeast', 'northwest', 'southeast', 'southwest'], label="Region") | |
| # Creating the gradio interface, make title "HealthyLife Insurance Charge Prediction" | |
| demo = gr.Interface( | |
| fn=predict, | |
| inputs=[age_input, bmi_input, children_input, sex_input, smoker_input, region_input], | |
| outputs="number", | |
| title="HealthyLife Insurance Charge Prediction" | |
| ) | |
| # Launching with a load balancer | |
| demo.queue() | |
| demo.launch(share=False) | |