tuiza-reph commited on
Commit
8db0561
1 Parent(s): c0b3ddb

uploaded initial artifacts for model deployment

Browse files
Files changed (4) hide show
  1. app.py +112 -0
  2. model.joblib +3 -0
  3. requirements.txt +1 -0
  4. train.py +64 -0
app.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import the libraries
2
+ import os
3
+ import uuid
4
+ import joblib
5
+ import json
6
+
7
+ import gradio as gr
8
+ import pandas as pd
9
+
10
+ from huggingface_hub import CommitScheduler
11
+ from pathlib import Path
12
+
13
+
14
+ # Run the training script placed in the same directory as app.py
15
+ # The training script will train and persist a linear regression
16
+ # model with the filename 'model.joblib'
17
+
18
+ age = gr.Number(label='Age')
19
+ sex = gr.Dropdown(['female','male'], label='Sex')
20
+ bmi = gr.Number(label='Body-Mass-Index (BMI)')
21
+ children = gr.Number(label='Number of children dependents')
22
+ smoker = gr.Dropdown(['yes','no'], label='Smoker')
23
+ region = gr.Dropdown(['northeast',
24
+ 'northwest',
25
+ 'southeast',
26
+ 'southwest',], label='Region')
27
+
28
+ model_output = gr.Label(label="Insurance Charge Amount")
29
+
30
+
31
+ # Load the freshly trained model from disk
32
+ insurance_charge_predictor = joblib.load('model.joblib')
33
+
34
+ # Prepare the logging functionality
35
+ log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
36
+ log_folder = log_file.parent
37
+
38
+ scheduler = CommitScheduler(
39
+ repo_id="insurance-charge-mlops-logs", # provide a name "insurance-charge-mlops-logs" for the repo_id
40
+ repo_type="dataset",
41
+ folder_path=log_folder,
42
+ path_in_repo="data",
43
+ every=2
44
+ )
45
+
46
+ # Define the predict function which will take features, convert to dataframe and make predictions using the saved model
47
+ # the functions runs when 'Submit' is clicked or when a API request is made
48
+
49
+
50
+ # While the prediction is made, log both the inputs and outputs to a log file
51
+ # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
52
+ # access
53
+
54
+ def insurance_charge_amount(age, sex, bmi, children, smoker, region):
55
+ sample = {
56
+ 'Age': age,
57
+ 'Sex': sex,
58
+ 'Body-Mass-Index (BMI)': bmi,
59
+ 'Number of children dependents': children,
60
+ 'Smoker': smoker,
61
+ 'Region': region
62
+ }
63
+
64
+ data_point = pd.DataFrame([sample])
65
+ prediction = insurance_charge_predictor.predict(data_point).tolist()
66
+
67
+ with scheduler.lock:
68
+ with log_file.open("a") as f:
69
+ f.write(json.dumps(
70
+ {
71
+ 'Age': age,
72
+ 'Sex': sex,
73
+ 'Body-Mass-Index (BMI)': bmi,
74
+ 'Number of children dependents': children,
75
+ 'Smoker': smoker,
76
+ 'Region': region,
77
+ 'prediction': prediction[0]
78
+ }
79
+ ))
80
+ f.write("\n")
81
+
82
+ return prediction[0]
83
+
84
+ demo = gr.Interface(
85
+ fn=insurance_charge_amount,
86
+ inputs=[age, sex, bmi,
87
+ children, smoker, region],
88
+ outputs=model_output,
89
+ title="HealthyLife Insurance Charge Prediction",
90
+ description="This API allows you to calculate an estimated Insurance Charge Amount",
91
+ allow_flagging="auto",
92
+ concurrency_limit=8
93
+ )
94
+
95
+ # Set up UI components for input and output
96
+ ui_inputs = [age, sex, bmi, children, smoker, region]
97
+ ui_output = model_output
98
+
99
+ # Create the gradio interface, make title "HealthyLife Insurance Charge Prediction"
100
+ demo = gr.Interface(
101
+ fn=insurance_charge_amount,
102
+ inputs=ui_inputs,
103
+ outputs=ui_output,
104
+ title="HealthyLife Insurance Charge Prediction",
105
+ description="This API allows you to calculate an estimated Insurance Charge Amount",
106
+ allow_flagging="auto",
107
+ concurrency_limit=8
108
+ )
109
+
110
+ # Launch with a load balancer
111
+ demo.queue()
112
+ demo.launch(share=False)
model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7daba9429128f97d5a4a7ecd5a3446e52671ea84db941314407dfecd2e480c16
3
+ size 3950
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ scikit-learn==1.2.2
train.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import pandas as pd
3
+ import joblib
4
+
5
+ from sklearn.datasets import fetch_openml
6
+
7
+ from sklearn.preprocessing import StandardScaler, OneHotEncoder
8
+ from sklearn.compose import make_column_transformer
9
+
10
+ from sklearn.pipeline import make_pipeline
11
+
12
+ from sklearn.model_selection import train_test_split
13
+
14
+ from sklearn.linear_model import LinearRegression
15
+ from sklearn.metrics import mean_squared_error, r2_score
16
+
17
+ data_df = pd.read_csv('insurance.csv')
18
+
19
+ target = 'charges'
20
+ numeric_features = [
21
+ 'age',
22
+ 'bmi',
23
+ 'children'
24
+ ]
25
+ categorical_features = [
26
+ 'smoker',
27
+ 'sex',
28
+ 'region']
29
+
30
+ print("Creating data subsets")
31
+
32
+ X = data_df[numeric_features + categorical_features]
33
+ y = data_df[target]
34
+
35
+ Xtrain, Xtest, ytrain, ytest = train_test_split(
36
+ X, y,
37
+ test_size=0.2,
38
+ random_state=100
39
+ )
40
+
41
+ preprocessor = make_column_transformer(
42
+ (StandardScaler(), numeric_features),
43
+ (OneHotEncoder(handle_unknown='ignore'), categorical_features)
44
+ )
45
+
46
+ model_linear_regression = LinearRegression(n_jobs=-1)
47
+
48
+ print("Estimating Model Pipeline")
49
+
50
+ model_pipeline = make_pipeline(
51
+ preprocessor,
52
+ model_linear_regression
53
+ )
54
+
55
+ model_pipeline.fit(Xtrain, ytrain)
56
+
57
+ print("Logging Metrics")
58
+ print(f"R-squared: {r2_score(ytest, model_pipeline.predict(Xtest))}")
59
+
60
+ print("Serializing Model")
61
+
62
+ saved_model_path = "model.joblib"
63
+
64
+ joblib.dump(model_pipeline, saved_model_path)