Spaces:

debjaninath
/

ml_python

Runtime error

App Files Files Community

ml_python / app.py

debjaninath

Create app.py

81448e1 verified 12 months ago

raw

history blame contribute delete

3.03 kB

	# Import the libraries
	import gradio as gr
	import pandas as pd
	import joblib
	from sklearn.preprocessing import OneHotEncoder
	import subprocess
	import json
	import uuid
	from pathlib import Path
	from huggingface_hub import CommitScheduler

	# Run the training script placed in the same directory as app.py
	# The training script will train and persist a linear regression
	# model with the filename 'model.joblib'


	# Load the freshly trained model from disk
	# model = joblib.load('/content/dt_regressor.pkl') # Uncomment this line to use Decision Tree model
	model = joblib.load('model.joblib') # Linear Regression model

	# Prepare the logging functionality
	log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
	log_folder = log_file.parent

	scheduler = CommitScheduler(
	repo_id="debjaninath/insurance-charge-mlops-logs", # provide a name "insurance-charge-mlops-logs" for the repo_id
	repo_type="dataset",
	folder_path=log_folder,
	path_in_repo="data",
	every=2
	)

	# Define the predict function which will take features, convert to dataframe and make predictions using the saved model
	# the functions runs when 'Submit' is clicked or when a API request is made
	def predict_charges(age, bmi, children, sex, smoker, region):
	try:
	# Create a DataFrame from the input features
	data = pd.DataFrame({
	'age': [age],
	'bmi': [bmi],
	'children': [children],
	'sex': [sex],
	'smoker': [smoker],
	'region': [region]
	})

	# Handle categorical variables using one-hot encoding
	data = pd.get_dummies(data)

	# Ensure the input data has the same features as the training data
	train_columns = model.feature_names_in_
	missing_columns = set(train_columns) - set(data.columns)
	for column in missing_columns:
	data[column] = 0
	data = data[train_columns]

	print("Input data:")
	print(data)

	# Make predictions using the loaded model
	prediction = model.predict(data)

	print("Prediction:", prediction)

	# Check if prediction is not None and has at least one element
	if prediction is not None and len(prediction) > 0:
	# While the prediction is made, log both the inputs and outputs to a log file
	# While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
	# access
	with scheduler.lock:
	with log_file.open("a") as f:
	f.write(json.dumps(
	{
	'age': age,
	'bmi': bmi,
	'children': children,
	'sex': sex,
	'smoker': smoker,
	'region': region,
	'prediction': prediction[0]
	}
	))
	f.write("\n")

	return float(prediction[0])