sgbaird commited on
Commit
55ab120
2 Parent(s): 4d81854 97a16d9

Merge branch 'main' of https://huggingface.co/spaces/AccelerationConsortium/crabnet-hyperparameter

Browse files
Files changed (5) hide show
  1. .gitattributes +0 -6
  2. README.md +13 -3
  3. app.py +2 -2
  4. requirements.txt +5 -3
  5. surrogate.py +134 -3
.gitattributes CHANGED
@@ -33,9 +33,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
- sobol_regression.csv filter=lfs diff=lfs merge=lfs -text
37
- *.md filter=lfs diff=lfs merge=lfs -text
38
- *.py filter=lfs diff=lfs merge=lfs -text
39
- *.txt filter=lfs diff=lfs merge=lfs -text
40
- *.csv filter=lfs diff=lfs merge=lfs -text
41
- *.png filter=lfs diff=lfs merge=lfs -text
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
README.md CHANGED
@@ -1,3 +1,13 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b2b04f7692584768f043f4a584de65a0f61c01633e6fd2767d1774c72e09b6a
3
- size 258
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Crabnet Hyperparameter
3
+ emoji: 🏢
4
+ colorFrom: purple
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 4.22.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b416c328f8fbabec93b4de8341e9d52c01ac5c1eb5b274cda39fb2c2d00e1bc7
3
- size 5279
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50221812588d786ce268f62a2a577004c6778e4820d8c68628ea8714063c3b61
3
+ size 900
requirements.txt CHANGED
@@ -1,3 +1,5 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c446eecfcea7ac33806b19fb5a4eac8e9b8acce330888c9b953a5dcb00acd2b
3
- size 2532
 
 
 
1
+ scikit-learn==1.0.1
2
+ pandas
3
+ pydantic
4
+ gradio
5
+ # cloudpickle # import cloudpickle as pickle
surrogate.py CHANGED
@@ -1,3 +1,134 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:32b9e0f983a649b57a194a31e3176c194e7fbd55fbae39f4f407ff585d22e44e
3
- size 7077
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from joblib import load
2
+ import pandas as pd
3
+ import random
4
+ from pydantic import BaseModel, ValidationInfo, field_validator
5
+
6
+ PARAM_CONSTRAINTS = {
7
+ "N": {"type": "range", "bounds": [1, 10]},
8
+ "alpha": {"type": "range", "bounds": [0.0, 1.0]},
9
+ "d_model": {"type": "range", "bounds": [100, 1024]},
10
+ "dim_feedforward": {"type": "range", "bounds": [1024, 4096]},
11
+ "dropout": {"type": "range", "bounds": [0.0, 1.0]},
12
+ "emb_scaler": {"type": "range", "bounds": [0.0, 1.0]},
13
+ "eps": {"type": "range", "bounds": [1e-7, 1e-4]},
14
+ "epochs_step": {"type": "range", "bounds": [5, 20]},
15
+ "fudge": {"type": "range", "bounds": [0.0, 0.1]},
16
+ "heads": {"type": "range", "bounds": [1, 10]},
17
+ "k": {"type": "range", "bounds": [2, 10]},
18
+ "lr": {"type": "range", "bounds": [1e-4, 6e-3]},
19
+ "pe_resolution": {"type": "range", "bounds": [2500, 10000]},
20
+ "ple_resolution": {"type": "range", "bounds": [2500, 10000]},
21
+ "pos_scaler": {"type": "range", "bounds": [0.0, 1.0]},
22
+ "weight_decay": {"type": "range", "bounds": [0.0, 1.0]},
23
+ "batch_size": {"type": "range", "bounds": [32, 256]},
24
+ "out_hidden4": {"type": "range", "bounds": [32, 512]},
25
+ "betas1": {"type": "range", "bounds": [0.5, 0.9999]},
26
+ "betas2": {"type": "range", "bounds": [0.5, 0.9999]},
27
+ "bias": {"type": "choice", "values": [False, True]},
28
+ "criterion": {"type": "choice", "values": ["RobustL1", "RobustL2"]},
29
+ "elem_prop": {"type": "choice", "values": ["mat2vec", "magpie", "onehot"]},
30
+ "train_frac": {"type": "range", "bounds": [0.01, 1.0]},
31
+ }
32
+
33
+
34
+ class Parameterization(BaseModel):
35
+ N: int
36
+ alpha: float
37
+ d_model: int
38
+ dim_feedforward: int
39
+ dropout: float
40
+ emb_scaler: float
41
+ epochs_step: int
42
+ eps: float
43
+ fudge: float
44
+ heads: int
45
+ k: int
46
+ lr: float
47
+ pe_resolution: int
48
+ ple_resolution: int
49
+ pos_scaler: float
50
+ weight_decay: int
51
+ batch_size: int
52
+ out_hidden4: int
53
+ betas1: float
54
+ betas2: float
55
+ losscurve: bool
56
+ learningcurve: bool
57
+ bias: bool
58
+ criterion: str
59
+ elem_prop: str
60
+ train_frac: float
61
+
62
+ @field_validator("*")
63
+ def check_constraints(cls, v: int, info: ValidationInfo) -> int:
64
+ param = PARAM_CONSTRAINTS.get(info.field_name)
65
+ if param is None:
66
+ return v
67
+
68
+ if param["type"] == "range":
69
+ min_val, max_val = param["bounds"]
70
+ if not min_val <= v <= max_val:
71
+ raise ValueError(
72
+ f"{info.field_name} must be between {min_val} and {max_val}"
73
+ )
74
+ elif param["type"] == "choice":
75
+ if v not in param["values"]:
76
+ raise ValueError(f"{info.field_name} must be one of {param['values']}")
77
+
78
+ if (
79
+ info.field_name in ("betas1", "betas2")
80
+ and "betas1" in field.owner
81
+ and "betas2" in field.owner
82
+ ):
83
+ if field.owner["betas1"] > field.owner["betas2"]:
84
+ raise ValueError("betas1 must be less than or equal to betas2")
85
+ if (
86
+ info.field_name in ("emb_scaler", "pos_scaler")
87
+ and "emb_scaler" in field.owner
88
+ and "pos_scaler" in field.owner
89
+ ):
90
+ if field.owner["emb_scaler"] + field.owner["pos_scaler"] > 1.0:
91
+ raise ValueError(
92
+ "The sum of emb_scaler and pos_scaler must be less than or equal to 1.0"
93
+ )
94
+
95
+ return v
96
+
97
+
98
+ class CrabNetSurrogateModel(object):
99
+ def __init__(self, fpath="surrogate_models.pkl"):
100
+ self.models = load(fpath)
101
+ pass
102
+
103
+ def prepare_params_for_eval(self, raw_params: Parameterization):
104
+ raw_params["bias"] = int(raw_params["bias"])
105
+ raw_params["use_RobustL1"] = raw_params["criterion"] == "RobustL1"
106
+ raw_params["criterion"] = None
107
+
108
+ raw_params["losscurve"] = None
109
+ raw_params["learningcurve"] = None
110
+
111
+ elem_prop = raw_params["elem_prop"]
112
+ raw_params["elem_prop_magpie"] = 0
113
+ raw_params["elem_prop_mat2vec"] = 0
114
+ raw_params["elem_prop_onehot"] = 0
115
+ raw_params[f"elem_prop_{elem_prop}"] = 1
116
+ raw_params["elem_prop"] = None
117
+
118
+ return raw_params
119
+
120
+ def surrogate_evaluate(self, params: Parameterization):
121
+
122
+ parameters = self.prepare_params_for_eval(params)
123
+ parameters = pd.DataFrame([parameters])
124
+
125
+ percentile = random.uniform(0, 1) # generate random percentile
126
+
127
+ mae = self.models["mae"].predict(parameters.assign(mae_rank=[percentile]))
128
+ rmse = self.models["rmse"].predict(parameters.assign(rmse_rank=[percentile]))
129
+ runtime = self.models["runtime"].predict(
130
+ parameters.assign(runtime_rank=[percentile])
131
+ )
132
+ model_size = self.models["model_size"].predict(parameters)
133
+
134
+ return mae, rmse, runtime, model_size