debjaninath commited on
Commit
cb8d2af
·
verified ·
1 Parent(s): 6b3b121

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +87 -0
  2. model.joblib +3 -0
  3. requirements.txt +2 -0
  4. train.py +68 -0
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import the libraries
2
+ import gradio as gr
3
+ import joblib
4
+ import pandas as pd
5
+
6
+ # Run the training script placed in the same directory as app.py
7
+ # The training script will train and persist a linear regression
8
+ # model with the filename 'model.joblib'
9
+
10
+
11
+ # Load the freshly trained model from disk
12
+ model = joblib.load('model.joblib')
13
+
14
+ # Prepare the logging functionality
15
+ log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
16
+ log_folder = log_file.parent
17
+
18
+ scheduler = CommitScheduler(
19
+ repo_id="-----------", # provide a name "insurance-charge-mlops-logs" for the repo_id
20
+ repo_type="dataset",
21
+ folder_path=log_folder,
22
+ path_in_repo="data",
23
+ every=2
24
+ )
25
+
26
+ # Define the predict function which will take features, convert to dataframe and make predictions using the saved model
27
+ def predict_insu_charges(age, bmi, children, sex, smoker, region):
28
+ sample = {
29
+ 'Age': age,
30
+ 'bmi' : bmi,
31
+ 'children' : children,
32
+ 'sex' : sex,
33
+ 'smoker' : smoker,
34
+ 'region' : region
35
+ }
36
+ data_point = pd.DataFrame([sample])
37
+ result = model.predict(data_point)
38
+ print(result)
39
+ return result
40
+
41
+ # the functions runs when 'Submit' is clicked or when a API request is made
42
+
43
+
44
+ # While the prediction is made, log both the inputs and outputs to a log file
45
+ # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
46
+ # access
47
+
48
+ with scheduler.lock:
49
+ with log_file.open("a") as f:
50
+ f.write(json.dumps(
51
+ {
52
+ 'age': age,
53
+ 'bmi': bmi,
54
+ 'children': children,
55
+ 'sex': sex,
56
+ 'smoker': smoker,
57
+ 'region': region,
58
+ 'prediction': prediction[0]
59
+ }
60
+ ))
61
+ f.write("\n")
62
+
63
+ return prediction[0]
64
+
65
+ # Set up UI components for input and output
66
+ age_input = gr.number(label="Age")
67
+ bmi_input = gr.number(label="BMI")
68
+ children_input = gr.number(label="Number of children")
69
+ sex_input = gr.Dropdown(['Female','Male'],label="Age")
70
+ smoker_input = gr.Dropdown(['Yes','No'],label="smoker?")
71
+ region_input = gr.Dropdown(['SouthWest','NorthWest','SouthEast','NorthEast'],label="Age")
72
+
73
+ model_output = gr.Label(label="charges")
74
+
75
+ # Create the gradio interface, make title "HealthyLife Insurance Charge Prediction"
76
+ demo = gr.Interface(fn=predict_insu_charges,
77
+ inputs = ['age_input', 'bmi_input','children_input','sex_input','smoker_input','region_input'],
78
+ outputs = model_output,
79
+ title = "HealthyLife Insurance Charge Prediction",
80
+ description = "For predicting insurance charges",
81
+ allow_flagging = "auto")
82
+
83
+ interface.launch(share=True)
84
+
85
+ # Launch with a load balancer
86
+ demo.queue()
87
+ demo.launch(share=False)
model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9341658ee84e297a6b15c9262019ebe8a2dc3679a326700703f5a6116b9958d
3
+ size 4887
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ scikit-learn=1.5.0
2
+ overwriting requirements.txt
train.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ import seaborn as sns
4
+ import joblib
5
+ import matplotlib.pyplot as plt
6
+ from sklearn.model_selection import train_test_split, RandomizedSearchCV
7
+ from sklearn.metrics import classification_report
8
+ from sklearn.metrics import mean_squared_error
9
+ from sklearn.preprocessing import OneHotEncoder
10
+ from sklearn.compose import make_column_transformer
11
+ from sklearn.preprocessing import StandardScaler
12
+ from sklearn.linear_model import LinearRegression
13
+ from sklearn.pipeline import make_pipeline
14
+ from sklearn.pipeline import Pipeline
15
+ from sklearn.impute import SimpleImputer
16
+ from sklearn.preprocessing import StandardScaler
17
+ from sklearn.compose import ColumnTransformer
18
+ from sklearn.metrics import mean_squared_error, r2_score
19
+
20
+ data = pd.read_csv("/Users/debjanighosh/insurance.csv")
21
+
22
+ target = 'charges'
23
+
24
+ numerical_features = ['age', 'bmi','children']
25
+ categorical_features = ['sex','smoker','region']
26
+
27
+ print("Creating data subsets")
28
+
29
+ X = data[numerical_features + categorical_features]
30
+ y = data[target]
31
+
32
+ Xtrain, Xtest, ytrain, ytest = train_test_split(
33
+ X,y,
34
+ test_size=0.2,
35
+ random_state=42
36
+ )
37
+
38
+ numerical_pipeline = Pipeline([
39
+ ('imputer',SimpleImputer(strategy='median')),
40
+ ('scaler',StandardScaler())
41
+ ])
42
+
43
+ categorical_pipeline = Pipeline([
44
+ ('imputer',SimpleImputer(strategy='most_frequent')),
45
+ ('onehot',OneHotEncoder(handle_unknown='ignore'))
46
+ ])
47
+
48
+ preprocessor = make_column_transformer(
49
+ (numerical_pipeline, numerical_features),
50
+ (categorical_pipeline, categorical_features)
51
+ )
52
+
53
+ model_linear_regression = LinearRegression()
54
+
55
+ print ("Estimating Best Model Pipeline")
56
+
57
+ model_pipeline = make_pipeline(
58
+ preprocessor,
59
+ model_linear_regression
60
+ )
61
+
62
+ model_pipeline.fit(Xtrain, ytrain)
63
+ print("Logging Metrics")
64
+ print(f"R2 Score:{r2_score(ytest, model_pipeline.predict(Xtest))}")
65
+ print("Serializing Model")
66
+ saved_model_path = "model.joblib"
67
+
68
+ joblib.dump(model_pipeline, saved_model_path)