pgurazada1 commited on
Commit
7940dfd
1 Parent(s): 96b9617
Files changed (2) hide show
  1. model-v1.joblib +2 -2
  2. train.py +36 -0
model-v1.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41dcd60d356e52288c1c14eab8a25ea684958f04a9000770ed71153e49ad38af
3
- size 4721680
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d42378540751e7b97f6d4502eca01baef7afe937fc526b2657339b895aa0158
3
+ size 4721568
train.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import joblib
2
+
3
+ from sklearn.datasets import fetch_openml
4
+ from sklearn.preprocessing import StandardScaler, OneHotEncoder
5
+ from sklearn.compose import make_column_transformer
6
+ from sklearn.pipeline import make_pipeline
7
+ from sklearn.model_selection import train_test_split
8
+ from sklearn.tree import DecisionTreeRegressor
9
+
10
+ dataset = fetch_openml(data_id=43355, as_frame=True, parser='auto')
11
+
12
+ diamond_prices = dataset.data
13
+
14
+ target = ['price']
15
+ numeric_features = ['carat']
16
+ categorical_features = ['shape', 'cut', 'color', 'clarity', 'report', 'type']
17
+
18
+ X = diamond_prices.drop(columns=target)
19
+ y = diamond_prices[target]
20
+
21
+ Xtrain, Xtest, ytrain, ytest = train_test_split(
22
+ X, y,
23
+ test_size=0.2,
24
+ random_state=42
25
+ )
26
+
27
+ preprocessor = make_column_transformer(
28
+ (StandardScaler(), numeric_features),
29
+ (OneHotEncoder(handle_unknown='ignore'), categorical_features)
30
+ )
31
+
32
+ model_pipeline = make_pipeline(preprocessor, DecisionTreeRegressor())
33
+
34
+ model_pipeline.fit(Xtrain, ytrain)
35
+
36
+ joblib.dump(model_pipeline, 'model-v1.joblib')