sgbaird commited on
Commit
4fb3259
1 Parent(s): da6382c

Add parameter validation to CrabNetSurrogateModel in surrogate.py

Browse files
Files changed (1) hide show
  1. surrogate.py +101 -11
surrogate.py CHANGED
@@ -1,39 +1,129 @@
1
  from joblib import load
2
  import pandas as pd
3
  import random
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
 
6
  class CrabNetSurrogateModel(object):
7
- def __init__(self):
8
- self.models = load("surrogate_models.pkl")
 
9
 
10
- def prepare_params_for_eval(self, raw_params):
11
  raw_params["bias"] = int(raw_params["bias"])
12
  raw_params["use_RobustL1"] = raw_params["criterion"] == "RobustL1"
13
- raw_params.pop("criterion")
14
-
15
- raw_params.pop("losscurve")
16
- raw_params.pop("learningcurve")
17
 
18
- # raw_params["train_frac"] = random.uniform(0.01, 1)
 
19
 
20
  elem_prop = raw_params["elem_prop"]
21
  raw_params["elem_prop_magpie"] = 0
22
  raw_params["elem_prop_mat2vec"] = 0
23
  raw_params["elem_prop_onehot"] = 0
24
  raw_params[f"elem_prop_{elem_prop}"] = 1
25
- raw_params.pop("elem_prop")
26
 
27
  return raw_params
28
 
29
- def surrogate_evaluate(self, params):
30
 
31
  parameters = self.prepare_params_for_eval(params)
32
  parameters = pd.DataFrame([parameters])
33
 
34
  percentile = random.uniform(0, 1) # generate random percentile
35
 
36
- # TODO: should percentile be different for each objective? (I guess depends on what is meant to be correlated vs. not)
37
  mae = self.models["mae"].predict(parameters.assign(mae_rank=[percentile]))
38
  rmse = self.models["rmse"].predict(parameters.assign(rmse_rank=[percentile]))
39
  runtime = self.models["runtime"].predict(
 
1
  from joblib import load
2
  import pandas as pd
3
  import random
4
+ from pydantic import BaseModel, ValidationInfo, field_validator
5
+
6
+ PARAM_CONSTRAINTS = {
7
+ "N": {"type": "range", "bounds": [1, 10]},
8
+ "alpha": {"type": "range", "bounds": [0.0, 1.0]},
9
+ "d_model": {"type": "range", "bounds": [100, 1024]},
10
+ "dim_feedforward": {"type": "range", "bounds": [1024, 4096]},
11
+ "dropout": {"type": "range", "bounds": [0.0, 1.0]},
12
+ "emb_scaler": {"type": "range", "bounds": [0.0, 1.0]},
13
+ "eps": {"type": "range", "bounds": [1e-7, 1e-4]},
14
+ "epochs_step": {"type": "range", "bounds": [5, 20]},
15
+ "fudge": {"type": "range", "bounds": [0.0, 0.1]},
16
+ "heads": {"type": "range", "bounds": [1, 10]},
17
+ "k": {"type": "range", "bounds": [2, 10]},
18
+ "lr": {"type": "range", "bounds": [1e-4, 6e-3]},
19
+ "pe_resolution": {"type": "range", "bounds": [2500, 10000]},
20
+ "ple_resolution": {"type": "range", "bounds": [2500, 10000]},
21
+ "pos_scaler": {"type": "range", "bounds": [0.0, 1.0]},
22
+ "weight_decay": {"type": "range", "bounds": [0.0, 1.0]},
23
+ "batch_size": {"type": "range", "bounds": [32, 256]},
24
+ "out_hidden4": {"type": "range", "bounds": [32, 512]},
25
+ "betas1": {"type": "range", "bounds": [0.5, 0.9999]},
26
+ "betas2": {"type": "range", "bounds": [0.5, 0.9999]},
27
+ "bias": {"type": "choice", "values": [False, True]},
28
+ "criterion": {"type": "choice", "values": ["RobustL1", "RobustL2"]},
29
+ "elem_prop": {"type": "choice", "values": ["mat2vec", "magpie", "onehot"]},
30
+ "train_frac": {"type": "range", "bounds": [0.01, 1.0]},
31
+ }
32
+
33
+
34
+ class Parameterization(BaseModel):
35
+ N: int
36
+ alpha: float
37
+ d_model: int
38
+ dim_feedforward: int
39
+ dropout: float
40
+ emb_scaler: float
41
+ epochs_step: int
42
+ eps: float
43
+ fudge: float
44
+ heads: int
45
+ k: int
46
+ lr: float
47
+ pe_resolution: int
48
+ ple_resolution: int
49
+ pos_scaler: float
50
+ weight_decay: int
51
+ batch_size: int
52
+ out_hidden4: int
53
+ betas1: float
54
+ betas2: float
55
+ losscurve: bool
56
+ learningcurve: bool
57
+ bias: bool
58
+ criterion: str
59
+ elem_prop: str
60
+ train_frac: float
61
+
62
+ @field_validator("*")
63
+ def check_constraints(cls, v: int, info: ValidationInfo) -> int:
64
+ param = PARAM_CONSTRAINTS.get(info.field_name)
65
+ if param is None:
66
+ return v
67
+
68
+ if param["type"] == "range":
69
+ min_val, max_val = param["bounds"]
70
+ if not min_val <= v <= max_val:
71
+ raise ValueError(
72
+ f"{info.field_name} must be between {min_val} and {max_val}"
73
+ )
74
+ elif param["type"] == "choice":
75
+ if v not in param["values"]:
76
+ raise ValueError(f"{info.field_name} must be one of {param['values']}")
77
+
78
+ if (
79
+ info.field_name in ("betas1", "betas2")
80
+ and "betas1" in field.owner
81
+ and "betas2" in field.owner
82
+ ):
83
+ if field.owner["betas1"] > field.owner["betas2"]:
84
+ raise ValueError("betas1 must be less than or equal to betas2")
85
+ if (
86
+ info.field_name in ("emb_scaler", "pos_scaler")
87
+ and "emb_scaler" in field.owner
88
+ and "pos_scaler" in field.owner
89
+ ):
90
+ if field.owner["emb_scaler"] + field.owner["pos_scaler"] > 1.0:
91
+ raise ValueError(
92
+ "The sum of emb_scaler and pos_scaler must be less than or equal to 1.0"
93
+ )
94
+
95
+ return v
96
 
97
 
98
  class CrabNetSurrogateModel(object):
99
+ def __init__(self, fpath="surrogate_models.pkl"):
100
+ self.models = load(fpath)
101
+ pass
102
 
103
+ def prepare_params_for_eval(self, raw_params: Parameterization):
104
  raw_params["bias"] = int(raw_params["bias"])
105
  raw_params["use_RobustL1"] = raw_params["criterion"] == "RobustL1"
106
+ raw_params["criterion"] = None
 
 
 
107
 
108
+ raw_params["losscurve"] = None
109
+ raw_params["learningcurve"] = None
110
 
111
  elem_prop = raw_params["elem_prop"]
112
  raw_params["elem_prop_magpie"] = 0
113
  raw_params["elem_prop_mat2vec"] = 0
114
  raw_params["elem_prop_onehot"] = 0
115
  raw_params[f"elem_prop_{elem_prop}"] = 1
116
+ raw_params["elem_prop"] = None
117
 
118
  return raw_params
119
 
120
+ def surrogate_evaluate(self, params: Parameterization):
121
 
122
  parameters = self.prepare_params_for_eval(params)
123
  parameters = pd.DataFrame([parameters])
124
 
125
  percentile = random.uniform(0, 1) # generate random percentile
126
 
 
127
  mae = self.models["mae"].predict(parameters.assign(mae_rank=[percentile]))
128
  rmse = self.models["rmse"].predict(parameters.assign(rmse_rank=[percentile]))
129
  runtime = self.models["runtime"].predict(