Spaces:
Runtime error
Runtime error
File size: 3,025 Bytes
81448e1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
# Import the libraries
import gradio as gr
import pandas as pd
import joblib
from sklearn.preprocessing import OneHotEncoder
import subprocess
import json
import uuid
from pathlib import Path
from huggingface_hub import CommitScheduler
# Run the training script placed in the same directory as app.py
# The training script will train and persist a linear regression
# model with the filename 'model.joblib'
# Load the freshly trained model from disk
# model = joblib.load('/content/dt_regressor.pkl') # Uncomment this line to use Decision Tree model
model = joblib.load('model.joblib') # Linear Regression model
# Prepare the logging functionality
log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
log_folder = log_file.parent
scheduler = CommitScheduler(
repo_id="debjaninath/insurance-charge-mlops-logs", # provide a name "insurance-charge-mlops-logs" for the repo_id
repo_type="dataset",
folder_path=log_folder,
path_in_repo="data",
every=2
)
# Define the predict function which will take features, convert to dataframe and make predictions using the saved model
# the functions runs when 'Submit' is clicked or when a API request is made
def predict_charges(age, bmi, children, sex, smoker, region):
try:
# Create a DataFrame from the input features
data = pd.DataFrame({
'age': [age],
'bmi': [bmi],
'children': [children],
'sex': [sex],
'smoker': [smoker],
'region': [region]
})
# Handle categorical variables using one-hot encoding
data = pd.get_dummies(data)
# Ensure the input data has the same features as the training data
train_columns = model.feature_names_in_
missing_columns = set(train_columns) - set(data.columns)
for column in missing_columns:
data[column] = 0
data = data[train_columns]
print("Input data:")
print(data)
# Make predictions using the loaded model
prediction = model.predict(data)
print("Prediction:", prediction)
# Check if prediction is not None and has at least one element
if prediction is not None and len(prediction) > 0:
# While the prediction is made, log both the inputs and outputs to a log file
# While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
# access
with scheduler.lock:
with log_file.open("a") as f:
f.write(json.dumps(
{
'age': age,
'bmi': bmi,
'children': children,
'sex': sex,
'smoker': smoker,
'region': region,
'prediction': prediction[0]
}
))
f.write("\n")
return float(prediction[0]) |