debjaninath commited on
Commit
81448e1
·
verified ·
1 Parent(s): 31008d9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -0
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import the libraries
2
+ import gradio as gr
3
+ import pandas as pd
4
+ import joblib
5
+ from sklearn.preprocessing import OneHotEncoder
6
+ import subprocess
7
+ import json
8
+ import uuid
9
+ from pathlib import Path
10
+ from huggingface_hub import CommitScheduler
11
+
12
+ # Run the training script placed in the same directory as app.py
13
+ # The training script will train and persist a linear regression
14
+ # model with the filename 'model.joblib'
15
+
16
+
17
+ # Load the freshly trained model from disk
18
+ # model = joblib.load('/content/dt_regressor.pkl') # Uncomment this line to use Decision Tree model
19
+ model = joblib.load('model.joblib') # Linear Regression model
20
+
21
+ # Prepare the logging functionality
22
+ log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
23
+ log_folder = log_file.parent
24
+
25
+ scheduler = CommitScheduler(
26
+ repo_id="debjaninath/insurance-charge-mlops-logs", # provide a name "insurance-charge-mlops-logs" for the repo_id
27
+ repo_type="dataset",
28
+ folder_path=log_folder,
29
+ path_in_repo="data",
30
+ every=2
31
+ )
32
+
33
+ # Define the predict function which will take features, convert to dataframe and make predictions using the saved model
34
+ # the functions runs when 'Submit' is clicked or when a API request is made
35
+ def predict_charges(age, bmi, children, sex, smoker, region):
36
+ try:
37
+ # Create a DataFrame from the input features
38
+ data = pd.DataFrame({
39
+ 'age': [age],
40
+ 'bmi': [bmi],
41
+ 'children': [children],
42
+ 'sex': [sex],
43
+ 'smoker': [smoker],
44
+ 'region': [region]
45
+ })
46
+
47
+ # Handle categorical variables using one-hot encoding
48
+ data = pd.get_dummies(data)
49
+
50
+ # Ensure the input data has the same features as the training data
51
+ train_columns = model.feature_names_in_
52
+ missing_columns = set(train_columns) - set(data.columns)
53
+ for column in missing_columns:
54
+ data[column] = 0
55
+ data = data[train_columns]
56
+
57
+ print("Input data:")
58
+ print(data)
59
+
60
+ # Make predictions using the loaded model
61
+ prediction = model.predict(data)
62
+
63
+ print("Prediction:", prediction)
64
+
65
+ # Check if prediction is not None and has at least one element
66
+ if prediction is not None and len(prediction) > 0:
67
+ # While the prediction is made, log both the inputs and outputs to a log file
68
+ # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
69
+ # access
70
+ with scheduler.lock:
71
+ with log_file.open("a") as f:
72
+ f.write(json.dumps(
73
+ {
74
+ 'age': age,
75
+ 'bmi': bmi,
76
+ 'children': children,
77
+ 'sex': sex,
78
+ 'smoker': smoker,
79
+ 'region': region,
80
+ 'prediction': prediction[0]
81
+ }
82
+ ))
83
+ f.write("\n")
84
+
85
+ return float(prediction[0])