Spaces:

VinnoGS
/

InsuranceGL

Sleeping

App Files Files Community

InsuranceGL / app.py

VinnoGS

Upload 5 files

248741e verified 12 months ago

raw

history blame contribute delete

3.35 kB


	# Import the libraries
	import joblib
	import pandas as pd
	import json
	import uuid
	from pathlib import Path
	import gradio as gr
	from sklearn.pipeline import Pipeline
	from sklearn.compose import ColumnTransformer
	from sklearn.preprocessing import StandardScaler, OneHotEncoder
	from sklearn.impute import SimpleImputer
	from sklearn.ensemble import RandomForestRegressor
	from sklearn.model_selection import train_test_split
	from huggingface_hub import CommitScheduler
	import os

	# Read the Hugging Face token from environment variables
	HF_TOKEN = os.getenv("HF_TOKEN")

	# Running the training script placed in the same directory as app.py
	# The training script will train and persist a random forest model with the filename 'random_forest_pipeline_best.pkl'
	import train

	# Loading the freshly trained model from disk
	saved_model_path = "random_forest_pipeline_best.pkl"
	model_pipeline = joblib.load(saved_model_path)

	# Preparing the logging functionality
	log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
	log_folder = log_file.parent
	log_folder.mkdir(parents=True, exist_ok=True)

	scheduler = CommitScheduler(
	repo_id="insurance-charge-mlops-logs", # provide a name for the repo_id
	repo_type="dataset",
	folder_path=log_folder,
	path_in_repo="data",
	every=2,
	token=HF_TOKEN # Pass the token directly
	)

	# Defining the predict function which will take features, convert to dataframe and make predictions using the saved model
	def predict(age, bmi, children, sex, smoker, region):
	# Prepare the input data as a DataFrame
	input_data = pd.DataFrame({
	'age': [age],
	'bmi': [bmi],
	'children': [children],
	'sex': [sex],
	'smoker': [smoker],
	'region': [region]
	})

	# Making prediction using the loaded model pipeline
	prediction = model_pipeline.predict(input_data)

	# While the prediction is made, log both the inputs and outputs to a log file
	# While writing to the log file, ensure that the commit scheduler is locked to avoid parallel access
	with scheduler.lock:
	with log_file.open("a") as f:
	f.write(json.dumps(
	{
	'age': age,
	'bmi': bmi,
	'children': children,
	'sex': sex,
	'smoker': smoker,
	'region': region,
	'prediction': prediction[0]
	}
	))
	f.write("\n")

	return prediction[0]

	# Setting up UI components for input and output
	age_input = gr.Number(label="Age")
	bmi_input = gr.Number(label="BMI")
	children_input = gr.Number(label="Children")
	sex_input = gr.Radio(choices=['male', 'female'], label="Sex")
	smoker_input = gr.Radio(choices=['yes', 'no'], label="Smoker")
	region_input = gr.Dropdown(choices=['northeast', 'northwest', 'southeast', 'southwest'], label="Region")

	# Creating the gradio interface, make title "HealthyLife Insurance Charge Prediction"
	demo = gr.Interface(
	fn=predict,
	inputs=[age_input, bmi_input, children_input, sex_input, smoker_input, region_input],
	outputs="number",
	title="HealthyLife Insurance Charge Prediction"
	)

	# Launching with a load balancer
	demo.queue()
	demo.launch(share=False)