File size: 4,815 Bytes
855113e 4fb3259 855113e 4fb3259 855113e 4fb3259 855113e 4fb3259 855113e 4fb3259 855113e 4fb3259 855113e 4fb3259 855113e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
from joblib import load
import pandas as pd
import random
from pydantic import BaseModel, ValidationInfo, field_validator
PARAM_CONSTRAINTS = {
"N": {"type": "range", "bounds": [1, 10]},
"alpha": {"type": "range", "bounds": [0.0, 1.0]},
"d_model": {"type": "range", "bounds": [100, 1024]},
"dim_feedforward": {"type": "range", "bounds": [1024, 4096]},
"dropout": {"type": "range", "bounds": [0.0, 1.0]},
"emb_scaler": {"type": "range", "bounds": [0.0, 1.0]},
"eps": {"type": "range", "bounds": [1e-7, 1e-4]},
"epochs_step": {"type": "range", "bounds": [5, 20]},
"fudge": {"type": "range", "bounds": [0.0, 0.1]},
"heads": {"type": "range", "bounds": [1, 10]},
"k": {"type": "range", "bounds": [2, 10]},
"lr": {"type": "range", "bounds": [1e-4, 6e-3]},
"pe_resolution": {"type": "range", "bounds": [2500, 10000]},
"ple_resolution": {"type": "range", "bounds": [2500, 10000]},
"pos_scaler": {"type": "range", "bounds": [0.0, 1.0]},
"weight_decay": {"type": "range", "bounds": [0.0, 1.0]},
"batch_size": {"type": "range", "bounds": [32, 256]},
"out_hidden4": {"type": "range", "bounds": [32, 512]},
"betas1": {"type": "range", "bounds": [0.5, 0.9999]},
"betas2": {"type": "range", "bounds": [0.5, 0.9999]},
"bias": {"type": "choice", "values": [False, True]},
"criterion": {"type": "choice", "values": ["RobustL1", "RobustL2"]},
"elem_prop": {"type": "choice", "values": ["mat2vec", "magpie", "onehot"]},
"train_frac": {"type": "range", "bounds": [0.01, 1.0]},
}
class Parameterization(BaseModel):
N: int
alpha: float
d_model: int
dim_feedforward: int
dropout: float
emb_scaler: float
epochs_step: int
eps: float
fudge: float
heads: int
k: int
lr: float
pe_resolution: int
ple_resolution: int
pos_scaler: float
weight_decay: int
batch_size: int
out_hidden4: int
betas1: float
betas2: float
losscurve: bool
learningcurve: bool
bias: bool
criterion: str
elem_prop: str
train_frac: float
@field_validator("*")
def check_constraints(cls, v: int, info: ValidationInfo) -> int:
param = PARAM_CONSTRAINTS.get(info.field_name)
if param is None:
return v
if param["type"] == "range":
min_val, max_val = param["bounds"]
if not min_val <= v <= max_val:
raise ValueError(
f"{info.field_name} must be between {min_val} and {max_val}"
)
elif param["type"] == "choice":
if v not in param["values"]:
raise ValueError(f"{info.field_name} must be one of {param['values']}")
if (
info.field_name in ("betas1", "betas2")
and "betas1" in field.owner
and "betas2" in field.owner
):
if field.owner["betas1"] > field.owner["betas2"]:
raise ValueError("betas1 must be less than or equal to betas2")
if (
info.field_name in ("emb_scaler", "pos_scaler")
and "emb_scaler" in field.owner
and "pos_scaler" in field.owner
):
if field.owner["emb_scaler"] + field.owner["pos_scaler"] > 1.0:
raise ValueError(
"The sum of emb_scaler and pos_scaler must be less than or equal to 1.0"
)
return v
class CrabNetSurrogateModel(object):
def __init__(self, fpath="surrogate_models.pkl"):
self.models = load(fpath)
pass
def prepare_params_for_eval(self, raw_params: Parameterization):
raw_params["bias"] = int(raw_params["bias"])
raw_params["use_RobustL1"] = raw_params["criterion"] == "RobustL1"
raw_params["criterion"] = None
raw_params["losscurve"] = None
raw_params["learningcurve"] = None
elem_prop = raw_params["elem_prop"]
raw_params["elem_prop_magpie"] = 0
raw_params["elem_prop_mat2vec"] = 0
raw_params["elem_prop_onehot"] = 0
raw_params[f"elem_prop_{elem_prop}"] = 1
raw_params["elem_prop"] = None
return raw_params
def surrogate_evaluate(self, params: Parameterization):
parameters = self.prepare_params_for_eval(params)
parameters = pd.DataFrame([parameters])
percentile = random.uniform(0, 1) # generate random percentile
mae = self.models["mae"].predict(parameters.assign(mae_rank=[percentile]))
rmse = self.models["rmse"].predict(parameters.assign(rmse_rank=[percentile]))
runtime = self.models["runtime"].predict(
parameters.assign(runtime_rank=[percentile])
)
model_size = self.models["model_size"].predict(parameters)
return mae, rmse, runtime, model_size
|