rosenyu commited on
Commit
165ee00
·
verified ·
1 Parent(s): a84e1f3

Upload 529 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Ackley10D_CEI_Avg_Obj.pt +3 -0
  2. Ackley10D_CEI_Avg_Time.pt +3 -0
  3. CantileverBeam.png +0 -0
  4. Car.png +0 -0
  5. Car_CEI_Avg_Obj.pt +3 -0
  6. Car_CEI_Avg_Time.pt +3 -0
  7. CompressionSpring.png +0 -0
  8. Formulation_default.png +0 -0
  9. Gradio_important.ipynb +588 -0
  10. Gradio_test.ipynb +569 -0
  11. HeatExchanger.png +0 -0
  12. HeatExchanger_CEI_Avg_Obj.pt +3 -0
  13. HeatExchanger_CEI_Avg_Time.pt +3 -0
  14. PressureVessel.png +0 -0
  15. PressureVessel_CEI_Avg_Obj.pt +3 -0
  16. PressureVessel_CEI_Avg_Time.pt +3 -0
  17. ReinforcedConcreteBeam_CEI_Avg_Obj.pt +3 -0
  18. ReinforcedConcreteBeam_CEI_Avg_Time.pt +3 -0
  19. Reinforcement.png +0 -0
  20. Rosen_PFN4BO.py +442 -0
  21. SpeedReducer.png +0 -0
  22. SpeedReducer_CEI_Avg_Obj.pt +3 -0
  23. SpeedReducer_CEI_Avg_Time.pt +3 -0
  24. Test_formulation.png +0 -0
  25. Test_formulation_default.png +0 -0
  26. ThreeTruss.png +0 -0
  27. ThreeTruss_CEI_Avg_Obj.pt +3 -0
  28. ThreeTruss_CEI_Avg_Time.pt +3 -0
  29. WeldedBeam.png +0 -0
  30. WeldedBeam_CEI_Avg_Obj.pt +3 -0
  31. WeldedBeam_CEI_Avg_Time.pt +3 -0
  32. __pycache__/Rosen_PFN4BO.cpython-310.pyc +0 -0
  33. final_models/Cyril_500features.pt +3 -0
  34. final_models/Cyril_500features_800epoch_cpu.pt +3 -0
  35. final_models/Cyril_50features.pt +3 -0
  36. final_models/hebo_morebudget_9_unused_features_3_userpriorperdim2_8.pt +3 -0
  37. final_models/heboplus_500features_retrain_epoch800_cpu.pt +3 -0
  38. final_models/model_hebo_morebudget_9_unused_features_3.pt +3 -0
  39. final_models/model_sampled_warp_simple_mlp_for_hpob_46.pt +3 -0
  40. pfns4bo/.ipynb_checkpoints/__init__-checkpoint.py +50 -0
  41. pfns4bo/.ipynb_checkpoints/bar_distribution-checkpoint.py +410 -0
  42. pfns4bo/.ipynb_checkpoints/lost_functions-checkpoint.py +177 -0
  43. pfns4bo/.ipynb_checkpoints/transformer-checkpoint.py +327 -0
  44. pfns4bo/__init__.py +50 -0
  45. pfns4bo/__pycache__/__init__.cpython-310.pyc +0 -0
  46. pfns4bo/__pycache__/__init__.cpython-311.pyc +0 -0
  47. pfns4bo/__pycache__/__init__.cpython-38.pyc +0 -0
  48. pfns4bo/__pycache__/__init__.cpython-39.pyc +0 -0
  49. pfns4bo/__pycache__/bar_distribution.cpython-310.pyc +0 -0
  50. pfns4bo/__pycache__/bar_distribution.cpython-311.pyc +0 -0
Ackley10D_CEI_Avg_Obj.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef078fab3310090b39fa175558b54f1a6819ea07cb0b9e19f38b39bd4c27c12b
3
+ size 2968
Ackley10D_CEI_Avg_Time.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e43f57c340f37f7e912b2143872910e14e797a16f1c16c1b3088cf3d550c64a
3
+ size 3484
CantileverBeam.png ADDED
Car.png ADDED
Car_CEI_Avg_Obj.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2703f7d2083dfc5a340b082b9b16406467443a82ced26ac7202f7440f68c9854
3
+ size 3008
Car_CEI_Avg_Time.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acaa29100dfafb78d40a7391feae3d042716159955ec46772eb2f0c017830d1a
3
+ size 3396
CompressionSpring.png ADDED
Formulation_default.png ADDED
Gradio_important.ipynb ADDED
@@ -0,0 +1,588 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "0823043e-8451-4dc8-968c-ca066003f4a7",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stdout",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "Running on local URL: http://127.0.0.1:7958\n",
14
+ "\n",
15
+ "To create a public link, set `share=True` in `launch()`.\n"
16
+ ]
17
+ },
18
+ {
19
+ "data": {
20
+ "text/html": [
21
+ "<div><iframe src=\"http://127.0.0.1:7958/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
22
+ ],
23
+ "text/plain": [
24
+ "<IPython.core.display.HTML object>"
25
+ ]
26
+ },
27
+ "metadata": {},
28
+ "output_type": "display_data"
29
+ },
30
+ {
31
+ "data": {
32
+ "text/plain": []
33
+ },
34
+ "execution_count": 1,
35
+ "metadata": {},
36
+ "output_type": "execute_result"
37
+ }
38
+ ],
39
+ "source": [
40
+ "import gradio as gr\n",
41
+ "import torch\n",
42
+ "import numpy as np\n",
43
+ "import matplotlib.pyplot as plt\n",
44
+ "from test_functions.Ackley10D import *\n",
45
+ "from test_functions.Ackley2D import *\n",
46
+ "from test_functions.Ackley6D import *\n",
47
+ "from test_functions.HeatExchanger import *\n",
48
+ "from test_functions.CantileverBeam import *\n",
49
+ "from test_functions.Car import *\n",
50
+ "from test_functions.CompressionSpring import *\n",
51
+ "from test_functions.GKXWC1 import *\n",
52
+ "from test_functions.GKXWC2 import *\n",
53
+ "from test_functions.HeatExchanger import *\n",
54
+ "from test_functions.JLH1 import *\n",
55
+ "from test_functions.JLH2 import *\n",
56
+ "from test_functions.KeaneBump import *\n",
57
+ "from test_functions.GKXWC1 import *\n",
58
+ "from test_functions.GKXWC2 import *\n",
59
+ "from test_functions.PressureVessel import *\n",
60
+ "from test_functions.ReinforcedConcreteBeam import *\n",
61
+ "from test_functions.SpeedReducer import *\n",
62
+ "from test_functions.ThreeTruss import *\n",
63
+ "from test_functions.WeldedBeam import *\n",
64
+ "# Import other objective functions as needed\n",
65
+ "import time\n",
66
+ "\n",
67
+ "from Rosen_PFN4BO import *\n",
68
+ "from PIL import Image\n",
69
+ "\n",
70
+ "\n",
71
+ "\n",
72
+ "\n",
73
+ "\n",
74
+ "\n",
75
+ "\n",
76
+ "\n",
77
+ "\n",
78
+ "\n",
79
+ "\n",
80
+ "\n",
81
+ "\n",
82
+ "\n",
83
+ "\n",
84
+ "def s(input_string):\n",
85
+ " return input_string\n",
86
+ "\n",
87
+ "\n",
88
+ "\n",
89
+ "\n",
90
+ "def optimize(objective_function, iteration_input, progress=gr.Progress()):\n",
91
+ "\n",
92
+ " print(objective_function)\n",
93
+ "\n",
94
+ " # Variable setup\n",
95
+ " Current_BEST = torch.tensor( -1e10 ) # Some arbitrary very small number\n",
96
+ " Prev_BEST = torch.tensor( -1e10 )\n",
97
+ "\n",
98
+ " if objective_function==\"CantileverBeam.png\":\n",
99
+ " Current_BEST = torch.tensor( -82500 ) # Some arbitrary very small number\n",
100
+ " Prev_BEST = torch.tensor( -82500 )\n",
101
+ " elif objective_function==\"CompressionSpring.png\":\n",
102
+ " Current_BEST = torch.tensor( -8 ) # Some arbitrary very small number\n",
103
+ " Prev_BEST = torch.tensor( -8 )\n",
104
+ " elif objective_function==\"HeatExchanger.png\":\n",
105
+ " Current_BEST = torch.tensor( -30000 ) # Some arbitrary very small number\n",
106
+ " Prev_BEST = torch.tensor( -30000 )\n",
107
+ " elif objective_function==\"ThreeTruss.png\":\n",
108
+ " Current_BEST = torch.tensor( -300 ) # Some arbitrary very small number\n",
109
+ " Prev_BEST = torch.tensor( -300 )\n",
110
+ " elif objective_function==\"Reinforcement.png\":\n",
111
+ " Current_BEST = torch.tensor( -440 ) # Some arbitrary very small number\n",
112
+ " Prev_BEST = torch.tensor( -440 )\n",
113
+ " elif objective_function==\"PressureVessel.png\":\n",
114
+ " Current_BEST = torch.tensor( -40000 ) # Some arbitrary very small number\n",
115
+ " Prev_BEST = torch.tensor( -40000 ) \n",
116
+ " elif objective_function==\"SpeedReducer.png\":\n",
117
+ " Current_BEST = torch.tensor( -3200 ) # Some arbitrary very small number\n",
118
+ " Prev_BEST = torch.tensor( -3200 ) \n",
119
+ " elif objective_function==\"WeldedBeam.png\":\n",
120
+ " Current_BEST = torch.tensor( -35 ) # Some arbitrary very small number\n",
121
+ " Prev_BEST = torch.tensor( -35 )\n",
122
+ " elif objective_function==\"Car.png\":\n",
123
+ " Current_BEST = torch.tensor( -35 ) # Some arbitrary very small number\n",
124
+ " Prev_BEST = torch.tensor( -35 )\n",
125
+ "\n",
126
+ " # Initial random samples\n",
127
+ " # print(objective_functions)\n",
128
+ " trained_X = torch.rand(20, objective_functions[objective_function]['dim'])\n",
129
+ "\n",
130
+ " # Scale it to the domain of interest using the selected function\n",
131
+ " # print(objective_function)\n",
132
+ " X_Scaled = objective_functions[objective_function]['scaling'](trained_X)\n",
133
+ "\n",
134
+ " # Get the constraints and objective\n",
135
+ " trained_gx, trained_Y = objective_functions[objective_function]['function'](X_Scaled)\n",
136
+ "\n",
137
+ " # Convergence list to store best values\n",
138
+ " convergence = []\n",
139
+ " time_conv = []\n",
140
+ "\n",
141
+ " START_TIME = time.time()\n",
142
+ "\n",
143
+ "\n",
144
+ "# with gr.Progress(track_tqdm=True) as progress:\n",
145
+ "\n",
146
+ "\n",
147
+ " # Optimization Loop\n",
148
+ " for ii in progress.tqdm(range(iteration_input)): # Example with 100 iterations\n",
149
+ "\n",
150
+ " # (0) Get the updated data for this iteration\n",
151
+ " X_scaled = objective_functions[objective_function]['scaling'](trained_X)\n",
152
+ " trained_gx, trained_Y = objective_functions[objective_function]['function'](X_scaled)\n",
153
+ "\n",
154
+ " # (1) Randomly sample Xpen \n",
155
+ " X_pen = torch.rand(1000,trained_X.shape[1])\n",
156
+ "\n",
157
+ " # (2) PFN inference phase with EI\n",
158
+ " default_model = 'final_models/model_hebo_morebudget_9_unused_features_3.pt'\n",
159
+ " \n",
160
+ " ei, p_feas = Rosen_PFN_Parallel(default_model,\n",
161
+ " trained_X, \n",
162
+ " trained_Y, \n",
163
+ " trained_gx,\n",
164
+ " X_pen,\n",
165
+ " 'power',\n",
166
+ " 'ei'\n",
167
+ " )\n",
168
+ "\n",
169
+ " # Calculating CEI\n",
170
+ " CEI = ei\n",
171
+ " for jj in range(p_feas.shape[1]):\n",
172
+ " CEI = CEI*p_feas[:,jj]\n",
173
+ "\n",
174
+ " # (4) Get the next search value\n",
175
+ " rec_idx = torch.argmax(CEI)\n",
176
+ " best_candidate = X_pen[rec_idx,:].unsqueeze(0)\n",
177
+ "\n",
178
+ " # (5) Append the next search point\n",
179
+ " trained_X = torch.cat([trained_X, best_candidate])\n",
180
+ "\n",
181
+ "\n",
182
+ " ################################################################################\n",
183
+ " # This is just for visualizing the best value. \n",
184
+ " # This section can be remove for pure optimization purpose\n",
185
+ " Current_X = objective_functions[objective_function]['scaling'](trained_X)\n",
186
+ " Current_GX, Current_Y = objective_functions[objective_function]['function'](Current_X)\n",
187
+ " if ((Current_GX<=0).all(dim=1)).any():\n",
188
+ " Current_BEST = torch.max(Current_Y[(Current_GX<=0).all(dim=1)])\n",
189
+ " else:\n",
190
+ " Current_BEST = Prev_BEST\n",
191
+ " ################################################################################\n",
192
+ " \n",
193
+ " # (ii) Convergence tracking (assuming the best Y is to be maximized)\n",
194
+ " # if Current_BEST != -1e10:\n",
195
+ " print(Current_BEST)\n",
196
+ " print(convergence)\n",
197
+ " convergence.append(Current_BEST.abs())\n",
198
+ " time_conv.append(time.time() - START_TIME)\n",
199
+ "\n",
200
+ " # Timing\n",
201
+ " END_TIME = time.time()\n",
202
+ " TOTAL_TIME = END_TIME - START_TIME\n",
203
+ " \n",
204
+ " # Website visualization\n",
205
+ " # (i) Radar chart for trained_X\n",
206
+ " radar_chart = None\n",
207
+ " # radar_chart = create_radar_chart(X_scaled)\n",
208
+ " # (ii) Convergence tracking (assuming the best Y is to be maximized)\n",
209
+ " convergence_plot = create_convergence_plot(objective_function, iteration_input, \n",
210
+ " time_conv, \n",
211
+ " convergence, TOTAL_TIME)\n",
212
+ "\n",
213
+ "\n",
214
+ " return convergence_plot\n",
215
+ " # return radar_chart, convergence_plot\n",
216
+ "\n",
217
+ "\n",
218
+ "\n",
219
+ "\n",
220
+ "\n",
221
+ "\n",
222
+ "\n",
223
+ "\n",
224
+ "\n",
225
+ "def create_radar_chart(X_scaled):\n",
226
+ " fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))\n",
227
+ " labels = [f'x{i+1}' for i in range(X_scaled.shape[1])]\n",
228
+ " values = X_scaled.mean(dim=0).numpy()\n",
229
+ " \n",
230
+ " num_vars = len(labels)\n",
231
+ " angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()\n",
232
+ " values = np.concatenate((values, [values[0]]))\n",
233
+ " angles += angles[:1]\n",
234
+ "\n",
235
+ " ax.fill(angles, values, color='green', alpha=0.25)\n",
236
+ " ax.plot(angles, values, color='green', linewidth=2)\n",
237
+ " ax.set_yticklabels([])\n",
238
+ " ax.set_xticks(angles[:-1])\n",
239
+ " # ax.set_xticklabels(labels)\n",
240
+ " ax.set_xticklabels([f'{label}\\n({value:.2f})' for label, value in zip(labels, values[:-1])]) # Show values\n",
241
+ " ax.set_title(\"Selected Design\", size=15, color='black', y=1.1)\n",
242
+ " \n",
243
+ " plt.close(fig)\n",
244
+ " return fig\n",
245
+ "\n",
246
+ "\n",
247
+ "\n",
248
+ "\n",
249
+ "\n",
250
+ "\n",
251
+ "\n",
252
+ "def create_convergence_plot(objective_function, iteration_input, time_conv, convergence, TOTAL_TIME):\n",
253
+ " fig, ax = plt.subplots()\n",
254
+ " \n",
255
+ " # Realtime optimization data\n",
256
+ " ax.plot(time_conv, convergence, '^-', label='PFN-CBO (Realtime)' )\n",
257
+ "\n",
258
+ " # Stored GP data\n",
259
+ " if objective_function==\"CantileverBeam.png\":\n",
260
+ " GP_TIME = torch.load('CantileverBeam_CEI_Avg_Time.pt')\n",
261
+ " GP_OBJ = torch.load('CantileverBeam_CEI_Avg_Obj.pt')\n",
262
+ " \n",
263
+ " elif objective_function==\"CompressionSpring.png\":\n",
264
+ " GP_TIME = torch.load('CompressionSpring_CEI_Avg_Time.pt')\n",
265
+ " GP_OBJ = torch.load('CompressionSpring_CEI_Avg_Obj.pt')\n",
266
+ "\n",
267
+ " elif objective_function==\"HeatExchanger.png\":\n",
268
+ " GP_TIME = torch.load('HeatExchanger_CEI_Avg_Time.pt')\n",
269
+ " GP_OBJ = torch.load('HeatExchanger_CEI_Avg_Obj.pt')\n",
270
+ " \n",
271
+ " elif objective_function==\"ThreeTruss.png\":\n",
272
+ " GP_TIME = torch.load('ThreeTruss_CEI_Avg_Time.pt')\n",
273
+ " GP_OBJ = torch.load('ThreeTruss_CEI_Avg_Obj.pt')\n",
274
+ " \n",
275
+ " elif objective_function==\"Reinforcement.png\":\n",
276
+ " GP_TIME = torch.load('ReinforcedConcreteBeam_CEI_Avg_Time.pt')\n",
277
+ " GP_OBJ = torch.load('ReinforcedConcreteBeam_CEI_Avg_Obj.pt')\n",
278
+ " \n",
279
+ " elif objective_function==\"PressureVessel.png\":\n",
280
+ " GP_TIME = torch.load('PressureVessel_CEI_Avg_Time.pt')\n",
281
+ " GP_OBJ = torch.load('PressureVessel_CEI_Avg_Obj.pt')\n",
282
+ " \n",
283
+ " elif objective_function==\"SpeedReducer.png\":\n",
284
+ " GP_TIME = torch.load('SpeedReducer_CEI_Avg_Time.pt')\n",
285
+ " GP_OBJ = torch.load('SpeedReducer_CEI_Avg_Obj.pt')\n",
286
+ " \n",
287
+ " elif objective_function==\"WeldedBeam.png\":\n",
288
+ " GP_TIME = torch.load('WeldedBeam_CEI_Avg_Time.pt')\n",
289
+ " GP_OBJ = torch.load('WeldedBeam_CEI_Avg_Obj.pt') \n",
290
+ "\n",
291
+ " elif objective_function==\"Car.png\":\n",
292
+ " GP_TIME = torch.load('Car_CEI_Avg_Time.pt')\n",
293
+ " GP_OBJ = torch.load('Car_CEI_Avg_Obj.pt') \n",
294
+ " \n",
295
+ " # Plot GP data \n",
296
+ " ax.plot(GP_TIME[:iteration_input], GP_OBJ[:iteration_input], '^-', label='GP-CBO (Data)' )\n",
297
+ "\n",
298
+ " \n",
299
+ " ax.set_xlabel('Time (seconds)')\n",
300
+ " ax.set_ylabel('Objective Value')\n",
301
+ " ax.set_title('Convergence Plot for {t} iterations'.format(t=iteration_input))\n",
302
+ " # ax.legend()\n",
303
+ "\n",
304
+ " if objective_function==\"CantileverBeam.png\":\n",
305
+ " ax.axhline(y=50000, color='red', linestyle='--', label='Optimal Value')\n",
306
+ "\n",
307
+ " elif objective_function==\"CompressionSpring.png\":\n",
308
+ " ax.axhline(y=0, color='red', linestyle='--', label='Optimal Value')\n",
309
+ "\n",
310
+ " elif objective_function==\"HeatExchanger.png\":\n",
311
+ " ax.axhline(y=4700, color='red', linestyle='--', label='Optimal Value')\n",
312
+ " \n",
313
+ " elif objective_function==\"ThreeTruss.png\":\n",
314
+ " ax.axhline(y=262, color='red', linestyle='--', label='Optimal Value')\n",
315
+ " \n",
316
+ " elif objective_function==\"Reinforcement.png\":\n",
317
+ " ax.axhline(y=355, color='red', linestyle='--', label='Optimal Value')\n",
318
+ " \n",
319
+ " elif objective_function==\"PressureVessel.png\":\n",
320
+ " ax.axhline(y=5000, color='red', linestyle='--', label='Optimal Value')\n",
321
+ " \n",
322
+ " elif objective_function==\"SpeedReducer.png\":\n",
323
+ " ax.axhline(y=2650, color='red', linestyle='--', label='Optimal Value')\n",
324
+ " \n",
325
+ " elif objective_function==\"WeldedBeam.png\":\n",
326
+ " ax.axhline(y=6, color='red', linestyle='--', label='Optimal Value') \n",
327
+ "\n",
328
+ " elif objective_function==\"Car.png\":\n",
329
+ " ax.axhline(y=25, color='red', linestyle='--', label='Optimal Value') \n",
330
+ "\n",
331
+ " \n",
332
+ " ax.legend(loc='best')\n",
333
+ " # ax.legend(loc='lower left')\n",
334
+ " \n",
335
+ "\n",
336
+ " # Add text to the top right corner of the plot\n",
337
+ " if len(convergence) == 0:\n",
338
+ " ax.text(0.5, 0.5, 'No Feasible Design Found', transform=ax.transAxes, fontsize=12,\n",
339
+ " verticalalignment='top', horizontalalignment='right')\n",
340
+ " \n",
341
+ " \n",
342
+ " plt.close(fig)\n",
343
+ " return fig\n",
344
+ "\n",
345
+ "\n",
346
+ "\n",
347
+ "\n",
348
+ "\n",
349
+ "\n",
350
+ "# Define available objective functions\n",
351
+ "objective_functions = {\n",
352
+ " # \"ThreeTruss.png\": {\"image\": \"ThreeTruss.png\", \n",
353
+ " # \"function\": ThreeTruss, \n",
354
+ " # \"scaling\": ThreeTruss_Scaling, \n",
355
+ " # \"dim\": 2},\n",
356
+ " \"CompressionSpring.png\": {\"image\": \"CompressionSpring.png\", \n",
357
+ " \"function\": CompressionSpring, \n",
358
+ " \"scaling\": CompressionSpring_Scaling, \n",
359
+ " \"dim\": 3},\n",
360
+ " \"Reinforcement.png\": {\"image\": \"Reinforcement.png\", \"function\": ReinforcedConcreteBeam, \"scaling\": ReinforcedConcreteBeam_Scaling, \"dim\": 3},\n",
361
+ " \"PressureVessel.png\": {\"image\": \"PressureVessel.png\", \"function\": PressureVessel, \"scaling\": PressureVessel_Scaling, \"dim\": 4},\n",
362
+ " \"SpeedReducer.png\": {\"image\": \"SpeedReducer.png\", \"function\": SpeedReducer, \"scaling\": SpeedReducer_Scaling, \"dim\": 7},\n",
363
+ " \"WeldedBeam.png\": {\"image\": \"WeldedBeam.png\", \"function\": WeldedBeam, \"scaling\": WeldedBeam_Scaling, \"dim\": 4},\n",
364
+ " \"HeatExchanger.png\": {\"image\": \"HeatExchanger.png\", \"function\": HeatExchanger, \"scaling\": HeatExchanger_Scaling, \"dim\": 8},\n",
365
+ " \"CantileverBeam.png\": {\"image\": \"CantileverBeam.png\", \"function\": CantileverBeam, \"scaling\": CantileverBeam_Scaling, \"dim\": 10},\n",
366
+ " \"Car.png\": {\"image\": \"Car.png\", \"function\": Car, \"scaling\": Car_Scaling, \"dim\": 11},\n",
367
+ "}\n",
368
+ "\n",
369
+ "\n",
370
+ "\n",
371
+ "\n",
372
+ "\n",
373
+ "\n",
374
+ "\n",
375
+ "\n",
376
+ "\n",
377
+ "\n",
378
+ "\n",
379
+ "\n",
380
+ "\n",
381
+ "\n",
382
+ "\n",
383
+ "\n",
384
+ "\n",
385
+ "\n",
386
+ "\n",
387
+ "\n",
388
+ "\n",
389
+ "\n",
390
+ "\n",
391
+ "\n",
392
+ "# Extract just the image paths for the gallery\n",
393
+ "image_paths = [key for key in objective_functions]\n",
394
+ "\n",
395
+ "\n",
396
+ "def submit_action(objective_function_choices, iteration_input):\n",
397
+ " # print(iteration_input)\n",
398
+ " # print(len(objective_function_choices))\n",
399
+ " # print(objective_functions[objective_function_choices]['function'])\n",
400
+ " if len(objective_function_choices)>0:\n",
401
+ " selected_function = objective_functions[objective_function_choices]['function']\n",
402
+ " return optimize(objective_function_choices, iteration_input)\n",
403
+ " return None\n",
404
+ "\n",
405
+ "# Function to clear the output\n",
406
+ "def clear_output():\n",
407
+ " # print(gallery.selected_index)\n",
408
+ " \n",
409
+ " return gr.update(value=[], selected=None), None, 15, gr.Markdown(\"\"), 'Test_formulation_default.png'\n",
410
+ "\n",
411
+ "def reset_gallery():\n",
412
+ " return gr.update(value=image_paths)\n",
413
+ "\n",
414
+ "\n",
415
+ "with gr.Blocks() as demo:\n",
416
+ " # Centered Title and Description using gr.HTML\n",
417
+ " gr.HTML(\n",
418
+ " \"\"\"\n",
419
+ " <div style=\"text-align: center;\">\n",
420
+ " <h1>Pre-trained Transformer for Constrained Bayesian Optimization</h1>\n",
421
+ " <h4>Paper: <a href=\"https://arxiv.org/abs/2404.04495\">\n",
422
+ " Fast and Accurate Bayesian Optimization with Pre-trained Transformers for Constrained Engineering Problems</a> \n",
423
+ " </h4>\n",
424
+ "\n",
425
+ " <p style=\"text-align: left;\">This is a demo for Bayesian Optimization using PFN (Prior-Data Fitted Networks). \n",
426
+ " Select your objective function by clicking on one of the check boxes below, then enter the iteration number to run the optimization process. \n",
427
+ " The results will be visualized in the radar chart and convergence plot.</p>\n",
428
+ " \n",
429
+ " \n",
430
+ " \n",
431
+ "\n",
432
+ " </div>\n",
433
+ " \"\"\"\n",
434
+ " )\n",
435
+ "\n",
436
+ " \n",
437
+ " with gr.Row():\n",
438
+ " \n",
439
+ " \n",
440
+ " with gr.Column(variant='compact'):\n",
441
+ " # gr.Markdown(\"# Inputs: \")\n",
442
+ " \n",
443
+ " with gr.Row():\n",
444
+ " gr.Markdown(\"## Select a problem (objective): \")\n",
445
+ " img_key = gr.Markdown(value=\"\", visible=False)\n",
446
+ " \n",
447
+ " gallery = gr.Gallery(value=image_paths, label=\"Objective Functions\", \n",
448
+ " # height = 450, \n",
449
+ " object_fit='contain',\n",
450
+ " columns=3, rows=3, elem_id=\"gallery\")\n",
451
+ " \n",
452
+ " gr.Markdown(\"## Enter iteration Number: \")\n",
453
+ " iteration_input = gr.Slider(label=\"Iterations:\", minimum=15, maximum=50, step=1, value=15)\n",
454
+ " \n",
455
+ "\n",
456
+ " # Row for the Clear and Submit buttons\n",
457
+ " with gr.Row():\n",
458
+ " clear_button = gr.Button(\"Clear\")\n",
459
+ " submit_button = gr.Button(\"Submit\", variant=\"primary\")\n",
460
+ "\n",
461
+ " with gr.Column():\n",
462
+ " # gr.Markdown(\"# Outputs: \")\n",
463
+ " gr.Markdown(\"## Problem Formulation: \")\n",
464
+ " formulation = gr.Image(value='Formulation_default.png', height=150)\n",
465
+ " gr.Markdown(\"## Results: \")\n",
466
+ " gr.Markdown(\"The graph will plot the best observed data v.s. the time for the algorithm to run up until the iteration. The PFN-CBO shows the result of the realtime optimization running in the backend while the GP-CBO shows the stored data from our previous experiments since running GP-CBO will take longer time.\")\n",
467
+ " convergence_plot = gr.Plot(label=\"Convergence Plot\")\n",
468
+ "\n",
469
+ "\n",
470
+ "\n",
471
+ " def handle_select(evt: gr.SelectData):\n",
472
+ " selected_image = evt.value\n",
473
+ " key = evt.value['image']['orig_name']\n",
474
+ " formulation = 'Test_formulation.png'\n",
475
+ " print('here')\n",
476
+ " print(key)\n",
477
+ "\n",
478
+ " return key, formulation\n",
479
+ " \n",
480
+ " gallery.select(fn=handle_select, inputs=None, outputs=[img_key, formulation])\n",
481
+ "\n",
482
+ "\n",
483
+ " \n",
484
+ " submit_button.click(\n",
485
+ " submit_action,\n",
486
+ " inputs=[img_key, iteration_input],\n",
487
+ " # outputs= [radar_plot, convergence_plot],\n",
488
+ " outputs= convergence_plot,\n",
489
+ " \n",
490
+ " # progress=True # Enable progress tracking\n",
491
+ " \n",
492
+ " )\n",
493
+ "\n",
494
+ " clear_button.click(\n",
495
+ " clear_output,\n",
496
+ " inputs=None,\n",
497
+ " outputs=[gallery, convergence_plot, iteration_input, img_key, formulation]\n",
498
+ " ).then(\n",
499
+ " # Step 2: Reset the gallery to the original list\n",
500
+ " reset_gallery,\n",
501
+ " inputs=None,\n",
502
+ " outputs=gallery\n",
503
+ " )\n",
504
+ "\n",
505
+ " \n",
506
+ "\n",
507
+ "demo.launch()\n"
508
+ ]
509
+ },
510
+ {
511
+ "cell_type": "code",
512
+ "execution_count": null,
513
+ "id": "776c7ab2-96a1-4e22-9b4b-daf69960e3c4",
514
+ "metadata": {},
515
+ "outputs": [],
516
+ "source": []
517
+ },
518
+ {
519
+ "cell_type": "code",
520
+ "execution_count": null,
521
+ "id": "9d2c7c58-43b1-4e5b-9135-17683dac1788",
522
+ "metadata": {},
523
+ "outputs": [],
524
+ "source": []
525
+ },
526
+ {
527
+ "cell_type": "code",
528
+ "execution_count": null,
529
+ "id": "5d33a24c-818c-4023-bbbd-495f992a9d1a",
530
+ "metadata": {},
531
+ "outputs": [],
532
+ "source": []
533
+ },
534
+ {
535
+ "cell_type": "code",
536
+ "execution_count": null,
537
+ "id": "764d0258-ec88-41d5-b5b5-e0bcb39ff313",
538
+ "metadata": {},
539
+ "outputs": [],
540
+ "source": []
541
+ },
542
+ {
543
+ "cell_type": "code",
544
+ "execution_count": null,
545
+ "id": "d2c35245-543c-4b82-8d12-04f3dda1468b",
546
+ "metadata": {},
547
+ "outputs": [],
548
+ "source": []
549
+ },
550
+ {
551
+ "cell_type": "code",
552
+ "execution_count": null,
553
+ "id": "e3663adc-3e95-418b-bf50-0a372615cdd6",
554
+ "metadata": {},
555
+ "outputs": [],
556
+ "source": []
557
+ },
558
+ {
559
+ "cell_type": "code",
560
+ "execution_count": null,
561
+ "id": "30886262-bd87-4760-a585-7872e071663f",
562
+ "metadata": {},
563
+ "outputs": [],
564
+ "source": []
565
+ }
566
+ ],
567
+ "metadata": {
568
+ "kernelspec": {
569
+ "display_name": "Python 3 (ipykernel)",
570
+ "language": "python",
571
+ "name": "python3"
572
+ },
573
+ "language_info": {
574
+ "codemirror_mode": {
575
+ "name": "ipython",
576
+ "version": 3
577
+ },
578
+ "file_extension": ".py",
579
+ "mimetype": "text/x-python",
580
+ "name": "python",
581
+ "nbconvert_exporter": "python",
582
+ "pygments_lexer": "ipython3",
583
+ "version": "3.10.14"
584
+ }
585
+ },
586
+ "nbformat": 4,
587
+ "nbformat_minor": 5
588
+ }
Gradio_test.ipynb ADDED
@@ -0,0 +1,569 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "4453c5ad-ec87-42e0-a6d5-e3fd3593aec2",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stdout",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "Running on local URL: http://127.0.0.1:7891\n",
14
+ "Running on public URL: https://f714b6f956fb581264.gradio.live\n",
15
+ "\n",
16
+ "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
17
+ ]
18
+ },
19
+ {
20
+ "data": {
21
+ "text/html": [
22
+ "<div><iframe src=\"https://f714b6f956fb581264.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
23
+ ],
24
+ "text/plain": [
25
+ "<IPython.core.display.HTML object>"
26
+ ]
27
+ },
28
+ "metadata": {},
29
+ "output_type": "display_data"
30
+ },
31
+ {
32
+ "data": {
33
+ "text/plain": []
34
+ },
35
+ "execution_count": 1,
36
+ "metadata": {},
37
+ "output_type": "execute_result"
38
+ }
39
+ ],
40
+ "source": [
41
+ "import gradio as gr\n",
42
+ "import torch\n",
43
+ "import numpy as np\n",
44
+ "import matplotlib.pyplot as plt\n",
45
+ "from test_functions.Ackley10D import *\n",
46
+ "from test_functions.Ackley2D import *\n",
47
+ "from test_functions.Ackley6D import *\n",
48
+ "from test_functions.HeatExchanger import *\n",
49
+ "from test_functions.CantileverBeam import *\n",
50
+ "from test_functions.Car import *\n",
51
+ "from test_functions.CompressionSpring import *\n",
52
+ "from test_functions.GKXWC1 import *\n",
53
+ "from test_functions.GKXWC2 import *\n",
54
+ "from test_functions.HeatExchanger import *\n",
55
+ "from test_functions.JLH1 import *\n",
56
+ "from test_functions.JLH2 import *\n",
57
+ "from test_functions.KeaneBump import *\n",
58
+ "from test_functions.GKXWC1 import *\n",
59
+ "from test_functions.GKXWC2 import *\n",
60
+ "from test_functions.PressureVessel import *\n",
61
+ "from test_functions.ReinforcedConcreteBeam import *\n",
62
+ "from test_functions.SpeedReducer import *\n",
63
+ "from test_functions.ThreeTruss import *\n",
64
+ "from test_functions.WeldedBeam import *\n",
65
+ "# Import other objective functions as needed\n",
66
+ "import time\n",
67
+ "\n",
68
+ "from Rosen_PFN4BO import *\n",
69
+ "\n",
70
+ "def optimize(objective_function, iteration_input):\n",
71
+ "\n",
72
+ " # Variable setup\n",
73
+ " Current_BEST = -1e10 # Some arbitrary very small number\n",
74
+ " Prev_BEST = -1e10\n",
75
+ "\n",
76
+ " # Initial random samples\n",
77
+ " # print(objective_functions)\n",
78
+ " trained_X = torch.rand(20, objective_functions[objective_function]['dim'])\n",
79
+ "\n",
80
+ " # Scale it to the domain of interest using the selected function\n",
81
+ " # print(objective_function)\n",
82
+ " X_Scaled = objective_functions[objective_function]['scaling'](trained_X)\n",
83
+ "\n",
84
+ " # Get the constraints and objective\n",
85
+ " trained_gx, trained_Y = objective_functions[objective_function]['function'](X_Scaled)\n",
86
+ "\n",
87
+ " # Convergence list to store best values\n",
88
+ " convergence = []\n",
89
+ "\n",
90
+ " START_TIME = time.time()\n",
91
+ "\n",
92
+ " # Optimization Loop\n",
93
+ " for ii in range(iteration_input): # Example with 100 iterations\n",
94
+ "\n",
95
+ " # (0) Get the updated data for this iteration\n",
96
+ " X_scaled = objective_functions[objective_function]['scaling'](trained_X)\n",
97
+ " trained_gx, trained_Y = objective_functions[objective_function]['function'](X_scaled)\n",
98
+ "\n",
99
+ " # (1) Randomly sample Xpen \n",
100
+ " X_pen = torch.rand(1000,trained_X.shape[1])\n",
101
+ "\n",
102
+ " # (2) PFN inference phase with EI\n",
103
+ " default_model = 'final_models/Cyril_500features_800epoch_cpu.pt'\n",
104
+ " \n",
105
+ " ei, p_feas = Rosen_PFN_Parallel(default_model,\n",
106
+ " trained_X, \n",
107
+ " trained_Y, \n",
108
+ " trained_gx,\n",
109
+ " X_pen,\n",
110
+ " 'power',\n",
111
+ " 'ei'\n",
112
+ " )\n",
113
+ "\n",
114
+ " # Calculating CEI\n",
115
+ " CEI = ei\n",
116
+ " for jj in range(p_feas.shape[1]):\n",
117
+ " CEI = CEI*p_feas[:,jj]\n",
118
+ "\n",
119
+ " # (4) Get the next search value\n",
120
+ " rec_idx = torch.argmax(CEI)\n",
121
+ " best_candidate = X_pen[rec_idx,:].unsqueeze(0)\n",
122
+ "\n",
123
+ " # (5) Append the next search point\n",
124
+ " trained_X = torch.cat([trained_X, best_candidate])\n",
125
+ "\n",
126
+ "\n",
127
+ " ################################################################################\n",
128
+ " # This is just for visualizing the best value. \n",
129
+ " # This section can be remove for pure optimization purpose\n",
130
+ " Current_X = objective_functions[objective_function]['scaling'](trained_X)\n",
131
+ " Current_GX, Current_Y = objective_functions[objective_function]['function'](Current_X)\n",
132
+ " if ((Current_GX<=0).all(dim=1)).any():\n",
133
+ " Current_BEST = torch.max(Current_Y[(Current_GX<=0).all(dim=1)])\n",
134
+ " else:\n",
135
+ " Current_BEST = Prev_BEST\n",
136
+ " ################################################################################\n",
137
+ " \n",
138
+ " # (ii) Convergence tracking (assuming the best Y is to be maximized)\n",
139
+ " if Current_BEST != -1e10:\n",
140
+ " convergence.append(Current_BEST.abs())\n",
141
+ "\n",
142
+ " # Timing\n",
143
+ " END_TIME = time.time()\n",
144
+ " TOTAL_TIME = END_TIME - START_TIME\n",
145
+ " \n",
146
+ " # Website visualization\n",
147
+ " # (i) Radar chart for trained_X\n",
148
+ " radar_chart = create_radar_chart(X_scaled)\n",
149
+ " # (ii) Convergence tracking (assuming the best Y is to be maximized)\n",
150
+ " convergence_plot = create_convergence_plot(convergence, TOTAL_TIME)\n",
151
+ " \n",
152
+ " return radar_chart, convergence_plot\n",
153
+ "\n",
154
+ "def create_radar_chart(X_scaled):\n",
155
+ " fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))\n",
156
+ " labels = [f'x{i+1}' for i in range(X_scaled.shape[1])]\n",
157
+ " values = X_scaled.mean(dim=0).numpy()\n",
158
+ " \n",
159
+ " num_vars = len(labels)\n",
160
+ " angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()\n",
161
+ " values = np.concatenate((values, [values[0]]))\n",
162
+ " angles += angles[:1]\n",
163
+ "\n",
164
+ " ax.fill(angles, values, color='green', alpha=0.25)\n",
165
+ " ax.plot(angles, values, color='green', linewidth=2)\n",
166
+ " ax.set_yticklabels([])\n",
167
+ " ax.set_xticks(angles[:-1])\n",
168
+ " # ax.set_xticklabels(labels)\n",
169
+ " ax.set_xticklabels([f'{label}\\n({value:.2f})' for label, value in zip(labels, values[:-1])]) # Show values\n",
170
+ " ax.set_title(\"Selected Design\", size=15, color='black', y=1.1)\n",
171
+ " \n",
172
+ " plt.close(fig)\n",
173
+ " return fig\n",
174
+ "\n",
175
+ "def create_convergence_plot(convergence, TOTAL_TIME):\n",
176
+ " fig, ax = plt.subplots()\n",
177
+ " # print(len(convergence))\n",
178
+ " ax.plot(convergence, label='Best Objective Value')\n",
179
+ " ax.set_xlabel('Iteration')\n",
180
+ " ax.set_ylabel('Objective Value')\n",
181
+ " ax.set_title('Convergence Plot (Opt Runtime: {t} sec)'.format(t=round(TOTAL_TIME, 2)))\n",
182
+ " ax.legend()\n",
183
+ "\n",
184
+ " # Add text to the top right corner of the plot\n",
185
+ " if len(convergence) == 0:\n",
186
+ " ax.text(0.5, 0.5, 'No Feasible Design Found', transform=ax.transAxes, fontsize=12,\n",
187
+ " verticalalignment='top', horizontalalignment='right')\n",
188
+ " \n",
189
+ " plt.close(fig)\n",
190
+ " return fig\n",
191
+ "\n",
192
+ "# Define available objective functions\n",
193
+ "objective_functions = {\n",
194
+ " \"Ackley2D\": {\"function\": Ackley2D, \"scaling\": Ackley2D_Scaling, \"dim\": 2},\n",
195
+ " \"Ackley6D\": {\"function\": Ackley6D, \"scaling\": Ackley6D_Scaling, \"dim\": 6},\n",
196
+ " \"Ackley10D\": {\"function\": Ackley10D, \"scaling\": Ackley10D_Scaling, \"dim\": 10},\n",
197
+ " \"GKXWC1\": {\"function\": GKXWC1, \"scaling\": GKXWC1_Scaling, \"dim\": 2},\n",
198
+ " \"GKXWC2\": {\"function\": GKXWC2, \"scaling\": GKXWC2_Scaling, \"dim\": 2},\n",
199
+ " \"JLH1\": {\"function\": JLH1, \"scaling\": JLH1_Scaling, \"dim\": 2},\n",
200
+ " \"JLH2\": {\"function\": JLH2, \"scaling\": JLH2_Scaling, \"dim\": 2},\n",
201
+ " \"Keane Bump\": {\"function\": KeaneBump, \"scaling\": KeaneBump_Scaling, \"dim\": 18},\n",
202
+ " \"Three Truss\": {\"function\": ThreeTruss, \"scaling\": ThreeTruss_Scaling, \"dim\": 2},\n",
203
+ " \"Compression Spring\": {\"function\": CompressionSpring, \"scaling\": CompressionSpring_Scaling, \"dim\": 3},\n",
204
+ " \"Reinforced Concrete Beam\": {\"function\": ReinforcedConcreteBeam, \"scaling\": ReinforcedConcreteBeam_Scaling, \"dim\": 3},\n",
205
+ " \"Pressure Vessel\": {\"function\": PressureVessel, \"scaling\": PressureVessel_Scaling, \"dim\": 4},\n",
206
+ " \"Speed Reducer\": {\"function\": SpeedReducer, \"scaling\": SpeedReducer_Scaling, \"dim\": 4},\n",
207
+ " \"Welded Beam\": {\"function\": WeldedBeam, \"scaling\": WeldedBeam_Scaling, \"dim\": 4},\n",
208
+ " \"Heat Exchanger\": {\"function\": HeatExchanger, \"scaling\": HeatExchanger_Scaling, \"dim\": 8},\n",
209
+ " \"Cantilever Beam\": {\"function\": CantileverBeam, \"scaling\": CantileverBeam_Scaling, \"dim\": 10},\n",
210
+ " \"Car\": {\"function\": Car, \"scaling\": Car_Scaling, \"dim\": 11},\n",
211
+ " \n",
212
+ " # Add more functions here\n",
213
+ "}\n",
214
+ "\n",
215
+ "\n",
216
+ "\n",
217
+ "\n",
218
+ "\n",
219
+ "with gr.Blocks(theme=gr.themes.Default()) as demo:\n",
220
+ " # Centered Title and Description using gr.HTML\n",
221
+ " gr.HTML(\n",
222
+ " \"\"\"\n",
223
+ " <div style=\"text-align: center;\">\n",
224
+ " <h1>Pre-trained Transformer for Constrained Bayesian Optimization</h1>\n",
225
+ " <p>This is a demo for Bayesian Optimization using PFN (Prior-Data Fitted Networks). \n",
226
+ " Select your objective function by clicking on one of the check boxes below, then enter the iteration number to run the optimization process. \n",
227
+ " The results will be visualized in the radar chart and convergence plot.</p>\n",
228
+ " <img src=\"https://github.com/rosenyu304/BOEngineeringBenchmark/blob/main/Icons.png?raw=true\" \n",
229
+ " alt=\"Example Image\" \n",
230
+ " style=\"width: 800px; height: auto; margin-top: 20px; display: block; margin-left: auto; margin-right: auto;\">\n",
231
+ "\n",
232
+ " </div>\n",
233
+ " \"\"\"\n",
234
+ " )\n",
235
+ "\n",
236
+ " selected_objective = gr.State(None) # To store the selected objective function\n",
237
+ "\n",
238
+ "\n",
239
+ "\n",
240
+ "\n",
241
+ "\n",
242
+ " \n",
243
+ " with gr.Row():\n",
244
+ " \n",
245
+ " objective_checkbox_group = gr.CheckboxGroup(\n",
246
+ " choices=[\"JLH1\", \"JLH2\", \"GKXWC1\", \"GKXWC2\", \"Ackley2D\", \"Ackley6D\", \"Ackley10D\", \"Keane Bump\", \"Three Truss\", \"Reinforced Concrete Beam\", \"Pressure Vessel\", \"Welded Beam\", \"Speed Reducer\", \"Car\"],\n",
247
+ " label=\"Select the design problem:\"\n",
248
+ " )\n",
249
+ " with gr.Row():\n",
250
+ " iteration_input = gr.Number(label=\"Enter Iteration Number:\", value=10)\n",
251
+ " \n",
252
+ "\n",
253
+ " # Row for the Clear and Submit buttons\n",
254
+ " with gr.Row():\n",
255
+ " clear_button = gr.Button(\"Clear\")\n",
256
+ " submit_button = gr.Button(\"Submit\", variant=\"primary\")\n",
257
+ " \n",
258
+ " \n",
259
+ " with gr.Row():\n",
260
+ " with gr.Column():\n",
261
+ " radar_plot = gr.Plot(label=\"Resulting Design\")\n",
262
+ " with gr.Column():\n",
263
+ " convergence_plot = gr.Plot(label=\"Convergence Plot\")\n",
264
+ "\n",
265
+ "\n",
266
+ "\n",
267
+ " # Define actions for buttons\n",
268
+ " def clear_action():\n",
269
+ " return None, None, None\n",
270
+ "\n",
271
+ " def submit_action(objective_function_choices, iteration_input):\n",
272
+ " # Handle the case where multiple choices are selected\n",
273
+ " if len(objective_function_choices) > 0:\n",
274
+ " selected_function = objective_function_choices[0] # Assuming using the first selected function\n",
275
+ " return optimize(selected_function, iteration_input)\n",
276
+ " return None, None\n",
277
+ "\n",
278
+ " # Button click actions\n",
279
+ " clear_button.click(clear_action, outputs=[objective_checkbox_group, radar_plot, convergence_plot])\n",
280
+ " submit_button.click(\n",
281
+ " submit_action, \n",
282
+ " inputs=[objective_checkbox_group, iteration_input], \n",
283
+ " outputs=[radar_plot, convergence_plot]\n",
284
+ " )\n",
285
+ "\n",
286
+ "demo.launch(share=True)\n",
287
+ "\n"
288
+ ]
289
+ },
290
+ {
291
+ "cell_type": "code",
292
+ "execution_count": null,
293
+ "id": "352d0291-93b4-43eb-b683-3d48776dc670",
294
+ "metadata": {},
295
+ "outputs": [],
296
+ "source": []
297
+ },
298
+ {
299
+ "cell_type": "code",
300
+ "execution_count": null,
301
+ "id": "92ecbbe6-dea6-4e7f-aae1-f0d442dbda3b",
302
+ "metadata": {},
303
+ "outputs": [],
304
+ "source": []
305
+ },
306
+ {
307
+ "cell_type": "code",
308
+ "execution_count": null,
309
+ "id": "ba69b5f9-c52c-4c23-8645-c81c27f7a815",
310
+ "metadata": {},
311
+ "outputs": [],
312
+ "source": []
313
+ },
314
+ {
315
+ "cell_type": "code",
316
+ "execution_count": 1,
317
+ "id": "05789fba-2099-46b7-8675-64b7969427a1",
318
+ "metadata": {},
319
+ "outputs": [
320
+ {
321
+ "name": "stdout",
322
+ "output_type": "stream",
323
+ "text": [
324
+ "Running on local URL: http://127.0.0.1:7899\n",
325
+ "\n",
326
+ "To create a public link, set `share=True` in `launch()`.\n"
327
+ ]
328
+ },
329
+ {
330
+ "data": {
331
+ "text/html": [
332
+ "<div><iframe src=\"http://127.0.0.1:7899/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
333
+ ],
334
+ "text/plain": [
335
+ "<IPython.core.display.HTML object>"
336
+ ]
337
+ },
338
+ "metadata": {},
339
+ "output_type": "display_data"
340
+ }
341
+ ],
342
+ "source": [
343
+ "import gradio as gr\n",
344
+ "\n",
345
+ "def calculator(num1, operation, num2):\n",
346
+ " if operation == \"add\":\n",
347
+ " return num1 + num2\n",
348
+ " elif operation == \"subtract\":\n",
349
+ " return num1 - num2\n",
350
+ " elif operation == \"multiply\":\n",
351
+ " return num1 * num2\n",
352
+ " elif operation == \"divide\":\n",
353
+ " return num1 / num2\n",
354
+ "\n",
355
+ "with gr.Blocks() as demo:\n",
356
+ " with gr.Row():\n",
357
+ " with gr.Column():\n",
358
+ " num_1 = gr.Number(value=4)\n",
359
+ " operation = gr.Radio([\"add\", \"subtract\", \"multiply\", \"divide\"])\n",
360
+ " num_2 = gr.Number(value=0)\n",
361
+ " submit_btn = gr.Button(value=\"Calculate\")\n",
362
+ " with gr.Column():\n",
363
+ " result = gr.Number()\n",
364
+ "\n",
365
+ " submit_btn.click(\n",
366
+ " calculator, inputs=[num_1, operation, num_2], outputs=[result], api_name=False\n",
367
+ " )\n",
368
+ " examples = gr.Examples(\n",
369
+ " examples=[\n",
370
+ " [5, \"add\", 3],\n",
371
+ " [4, \"divide\", 2],\n",
372
+ " [-4, \"multiply\", 2.5],\n",
373
+ " [0, \"subtract\", 1.2],\n",
374
+ " ],\n",
375
+ " inputs=[num_1, operation, num_2],\n",
376
+ " )\n",
377
+ "\n",
378
+ "if __name__ == \"__main__\":\n",
379
+ " demo.launch(show_api=False)"
380
+ ]
381
+ },
382
+ {
383
+ "cell_type": "code",
384
+ "execution_count": null,
385
+ "id": "a4bf709a-ff0a-4aac-a4b4-fd98cd5948bb",
386
+ "metadata": {},
387
+ "outputs": [],
388
+ "source": []
389
+ },
390
+ {
391
+ "cell_type": "code",
392
+ "execution_count": null,
393
+ "id": "679f7647-ca68-46f9-a1da-81d6c96267c9",
394
+ "metadata": {},
395
+ "outputs": [],
396
+ "source": []
397
+ },
398
+ {
399
+ "cell_type": "code",
400
+ "execution_count": null,
401
+ "id": "ea40bfac-e090-4cd5-9caa-99b06db3ea8d",
402
+ "metadata": {},
403
+ "outputs": [],
404
+ "source": []
405
+ },
406
+ {
407
+ "cell_type": "code",
408
+ "execution_count": 50,
409
+ "id": "928ac99a-af8f-401c-8c0b-ef83cfef5ba9",
410
+ "metadata": {},
411
+ "outputs": [
412
+ {
413
+ "name": "stdout",
414
+ "output_type": "stream",
415
+ "text": [
416
+ "Running on local URL: http://127.0.0.1:7890\n",
417
+ "\n",
418
+ "To create a public link, set `share=True` in `launch()`.\n"
419
+ ]
420
+ },
421
+ {
422
+ "data": {
423
+ "text/html": [
424
+ "<div><iframe src=\"http://127.0.0.1:7890/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
425
+ ],
426
+ "text/plain": [
427
+ "<IPython.core.display.HTML object>"
428
+ ]
429
+ },
430
+ "metadata": {},
431
+ "output_type": "display_data"
432
+ }
433
+ ],
434
+ "source": [
435
+ "import gradio as gr\n",
436
+ "\n",
437
+ "def calculator(num1, operation, num2):\n",
438
+ " if operation == \"add\":\n",
439
+ " return num1 + num2\n",
440
+ " elif operation == \"subtract\":\n",
441
+ " return num1 - num2\n",
442
+ " elif operation == \"multiply\":\n",
443
+ " return num1 * num2\n",
444
+ " elif operation == \"divide\":\n",
445
+ " return num1 / num2\n",
446
+ "\n",
447
+ "with gr.Blocks() as demo:\n",
448
+ " with gr.Row():\n",
449
+ " with gr.Column():\n",
450
+ " num_1 = gr.Number(value=4)\n",
451
+ " operation = gr.Radio([\"add\", \"subtract\", \"multiply\", \"divide\"])\n",
452
+ " num_2 = gr.Number(value=0)\n",
453
+ " submit_btn = gr.Button(value=\"Calculate\")\n",
454
+ " with gr.Column():\n",
455
+ " result = gr.Number()\n",
456
+ "\n",
457
+ " submit_btn.click(\n",
458
+ " calculator, inputs=[num_1, operation, num_2], outputs=[result], api_name=False\n",
459
+ " )\n",
460
+ " examples = gr.Examples(\n",
461
+ " examples=[\n",
462
+ " [5, \"add\", 3],\n",
463
+ " [4, \"divide\", 2],\n",
464
+ " [-4, \"multiply\", 2.5],\n",
465
+ " [0, \"subtract\", 1.2],\n",
466
+ " ],\n",
467
+ " inputs=[num_1, operation, num_2],\n",
468
+ " )\n",
469
+ "\n",
470
+ "if __name__ == \"__main__\":\n",
471
+ " demo.launch(show_api=False)"
472
+ ]
473
+ },
474
+ {
475
+ "cell_type": "code",
476
+ "execution_count": 36,
477
+ "id": "09a251df-4076-4925-8799-9a2a59cb8246",
478
+ "metadata": {},
479
+ "outputs": [],
480
+ "source": [
481
+ "# import gradio as gr\n",
482
+ "\n",
483
+ "# def greet(selected_options):\n",
484
+ "# return f\"You selected: {', '.join(selected_options)}\"\n",
485
+ "\n",
486
+ "# with gr.Blocks() as demo:\n",
487
+ "# with gr.Row():\n",
488
+ "# checkbox_group = gr.CheckboxGroup(\n",
489
+ "# choices=[\"Option 1\", \"Option 2\"],\n",
490
+ "# label=\"Select your options\",\n",
491
+ "# elem_id=\"custom_checkbox_group\"\n",
492
+ "# )\n",
493
+ "# output = gr.Textbox(label=\"Output\")\n",
494
+ " \n",
495
+ "# checkbox_group.change(greet, checkbox_group, output)\n",
496
+ "\n",
497
+ "# gr.HTML(\n",
498
+ "# f\"\"\"\n",
499
+ "# <style>\n",
500
+ "# #custom_checkbox_group label {\n",
501
+ "# display: block;\n",
502
+ "# width: 200pt;\n",
503
+ "# height: 200pt;\n",
504
+ "# border: 1px solid #ccc;\n",
505
+ "# margin-bottom: 10pt;\n",
506
+ "# padding: 10pt;\n",
507
+ "# box-sizing: border-box;\n",
508
+ "# position: relative;\n",
509
+ "# }\n",
510
+ "# #custom_checkbox_group label input {\n",
511
+ "# position: absolute;\n",
512
+ "# top: 10pt;\n",
513
+ "# left: 10pt;\n",
514
+ "# }\n",
515
+ "# #custom_checkbox_group label span {\n",
516
+ "# position: absolute;\n",
517
+ "# top: 10pt;\n",
518
+ "# left: 40pt; /* Adjust this value to control the distance between the checkbox and the label */\n",
519
+ "# }\n",
520
+ "# #custom_checkbox_group label img {\n",
521
+ "# position: absolute;\n",
522
+ "# bottom: 10pt;\n",
523
+ "# left: 10pt;\n",
524
+ "# width: 180pt; /* Adjust the size of the image if needed */\n",
525
+ "# height: auto;\n",
526
+ "# }\n",
527
+ "# </style>\n",
528
+ "# <label>\n",
529
+ "# <input type=\"checkbox\" />\n",
530
+ "# <span>Option 1</span>\n",
531
+ "# <img src=\"https://images.pexels.com/photos/1108099/pexels-photo-1108099.jpeg\" alt=\"Dog image\"/>\n",
532
+ "# </label>\n",
533
+ "# \"\"\"\n",
534
+ "# )\n",
535
+ "\n",
536
+ "# demo.launch()\n"
537
+ ]
538
+ },
539
+ {
540
+ "cell_type": "code",
541
+ "execution_count": null,
542
+ "id": "f52549d5-4be0-4672-be6d-df462957cb56",
543
+ "metadata": {},
544
+ "outputs": [],
545
+ "source": []
546
+ }
547
+ ],
548
+ "metadata": {
549
+ "kernelspec": {
550
+ "display_name": "Python 3 (ipykernel)",
551
+ "language": "python",
552
+ "name": "python3"
553
+ },
554
+ "language_info": {
555
+ "codemirror_mode": {
556
+ "name": "ipython",
557
+ "version": 3
558
+ },
559
+ "file_extension": ".py",
560
+ "mimetype": "text/x-python",
561
+ "name": "python",
562
+ "nbconvert_exporter": "python",
563
+ "pygments_lexer": "ipython3",
564
+ "version": "3.10.14"
565
+ }
566
+ },
567
+ "nbformat": 4,
568
+ "nbformat_minor": 5
569
+ }
HeatExchanger.png ADDED
HeatExchanger_CEI_Avg_Obj.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fc95574634750d3dc892076b26e55c6f79d4dbb128d5b65e6832e83783c89a8
3
+ size 3432
HeatExchanger_CEI_Avg_Time.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ed8e01768b9cc8bf82c51f523c9ea46c4f3e7e3e9e6c8e04edb0d615032f1e9
3
+ size 3500
PressureVessel.png ADDED
PressureVessel_CEI_Avg_Obj.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7c81ea242bdcb45cb644cd5f18b941ff8ebbcbbb81b9965eea251c01f9f6c78
3
+ size 3628
PressureVessel_CEI_Avg_Time.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb204723d0523baebbfda6e4f1fdbc7506c66bfc0ed0cbc7ec5ea485451660a7
3
+ size 3504
ReinforcedConcreteBeam_CEI_Avg_Obj.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d2e3212a28eb9cb59212d876c1ddae2f1b37950974eed01683c7d4180206c7e
3
+ size 3532
ReinforcedConcreteBeam_CEI_Avg_Time.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aae5672638e75081965450635cc15310f90ee167c3264399bee07afc2ad3a58d
3
+ size 3472
Reinforcement.png ADDED
Rosen_PFN4BO.py ADDED
@@ -0,0 +1,442 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import contextlib
2
+ import torch
3
+ import scipy
4
+ import math
5
+ from sklearn.preprocessing import power_transform, PowerTransformer, StandardScaler
6
+
7
+ from torchvision.transforms.functional import to_tensor
8
+ from pfns4bo import transformer
9
+ from pfns4bo import bar_distribution
10
+
11
+ import torch
12
+ import numpy as np
13
+
14
+ import pfns4bo
15
+ from pfns4bo.scripts.acquisition_functions import TransformerBOMethod
16
+
17
+
18
+ import warnings
19
+ warnings.filterwarnings('ignore')
20
+
21
+ device = torch.device("cpu")
22
+ # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
23
+ dtype = torch.float32
24
+
25
+
26
+ from sklearn.utils import resample
27
+
28
+ @torch.enable_grad()
29
+ def Rosen_PFN(model_name,
30
+ trained_X,
31
+ trained_Y,
32
+ X_pen,
33
+ trasform_type,
34
+ what_do_you_want
35
+ ):
36
+
37
+ PFN = TransformerBOMethod(torch.load(model_name).requires_grad_(False), device=device)
38
+
39
+ # X_pen.requires_grad_(True)
40
+
41
+ # with torch.no_grad():
42
+
43
+
44
+ dim = trained_X.shape[1]
45
+
46
+ x_given = trained_X
47
+ x_eval = X_pen
48
+ x_predict = torch.cat([x_given, x_eval], dim=0)
49
+ x_full_feed = torch.cat([x_given, x_given, x_eval], dim=0).unsqueeze(1)
50
+
51
+
52
+
53
+ if trasform_type== 'std':
54
+ pt = StandardScaler()
55
+ pt.fit(trained_Y)
56
+ PT_trained_Y = pt.transform(trained_Y)
57
+ trained_Y = to_tensor(PT_trained_Y).to(torch.float32).reshape(trained_Y.shape)
58
+ elif trasform_type== 'power':
59
+ pt = PowerTransformer(method="yeo-johnson")
60
+ pt.fit(trained_Y.detach().numpy())
61
+ # PT_trained_Y = pt.transform(trained_Y.detach().numpy())
62
+ # trained_Y = to_tensor(PT_trained_Y).to(torch.float32).reshape(trained_Y.shape)
63
+ # print(trained_Y.shape)
64
+
65
+ # print(trained_Y)
66
+ trained_Y, _ = general_power_transform(trained_Y,
67
+ trained_Y,
68
+ .0,
69
+ less_safe=False) #.squeeze(1)
70
+ # print(trained_Y.shape)
71
+ # .squeeze(1)
72
+
73
+
74
+ # y_given = general_power_transform(y_given.unsqueeze(1),
75
+ # y_given.unsqueeze(1),
76
+ # .0,
77
+ # less_safe=False).squeeze(1)
78
+
79
+ y_given = trained_Y
80
+
81
+ y_given = y_given.reshape(-1)
82
+ y_full_feed = y_given.unsqueeze(1)
83
+
84
+ criterion: bar_distribution.BarDistribution = PFN.model.criterion
85
+
86
+ style = None
87
+ logits = PFN.model(
88
+ (style,
89
+ x_full_feed.repeat_interleave(dim=1, repeats=y_full_feed.shape[1]),
90
+ y_full_feed.repeat(1,x_full_feed.shape[1])),
91
+ single_eval_pos=len(x_given)
92
+ )
93
+
94
+ # logits = logits.softmax(-1).log_()
95
+ logits = logits.softmax(-1).log()
96
+
97
+ logits_given = logits[:len(x_given)]
98
+ logits_eval = logits[len(x_given):]
99
+
100
+ best_f = torch.max(y_given)
101
+
102
+ if what_do_you_want == 'mean':
103
+ output = criterion.mean(logits_eval)
104
+
105
+
106
+ if trasform_type== 'std' or trasform_type== 'power':
107
+
108
+ if pt.standardize:
109
+ XX = output.clone()
110
+ scale = torch.from_numpy(pt._scaler.scale_)
111
+ std_mean = torch.from_numpy(pt._scaler.mean_)
112
+ XX = torch_std_inverse_transform(XX, scale, std_mean)
113
+
114
+ for i, lmbda in enumerate(pt.lambdas_):
115
+ with np.errstate(invalid="ignore"): # hide NaN warnings
116
+ XX = torch_power_inverse_transform(XX, lmbda)
117
+ # print(XX)
118
+ return XX
119
+
120
+
121
+
122
+
123
+
124
+ # output = pt.inverse_transform(output)
125
+ # output = torch.from_numpy(output)
126
+
127
+
128
+ elif what_do_you_want == 'ei':
129
+ output = criterion.ei(logits_eval, best_f)
130
+
131
+ elif what_do_you_want == 'ucb':
132
+ acq_function = criterion.ucb
133
+ ucb_rest_prob = .05
134
+ if ucb_rest_prob is not None:
135
+ acq_function = lambda *args: criterion.ucb(*args, rest_prob=ucb_rest_prob)
136
+ output = acq_ensembling(acq_function(logits_eval, best_f))
137
+
138
+ elif what_do_you_want == 'variance':
139
+ output = criterion.variance(logits_eval)
140
+
141
+ elif what_do_you_want == 'mode':
142
+ output = criterion.mode(logits_eval)
143
+
144
+ elif what_do_you_want == 'ts':
145
+ mn = criterion.mean(logits_eval)
146
+
147
+
148
+ if trasform_type== 'std' or trasform_type== 'power':
149
+
150
+ if pt.standardize:
151
+ XX = mn.clone()
152
+ scale = torch.from_numpy(pt._scaler.scale_)
153
+ std_mean = torch.from_numpy(pt._scaler.mean_)
154
+ XX = torch_std_inverse_transform(XX, scale, std_mean)
155
+
156
+ for i, lmbda in enumerate(pt.lambdas_):
157
+ with np.errstate(invalid="ignore"): # hide NaN warnings
158
+ XX = torch_power_inverse_transform(XX, lmbda)
159
+
160
+ var = criterion.variance(logits_eval)
161
+
162
+ return XX, var
163
+
164
+ return output
165
+
166
+
167
+
168
+
169
+
170
+
171
+
172
+
173
+
174
+
175
+
176
+ def Rosen_PFN_Parallel(model_name,
177
+ trained_X,
178
+ trained_Y,
179
+ GX,
180
+ X_pen,
181
+ trasform_type,
182
+ what_do_you_want
183
+ ):
184
+
185
+ PFN = TransformerBOMethod(torch.load(model_name), device=device)
186
+
187
+ with torch.no_grad():
188
+
189
+
190
+ dim = trained_X.shape[1]
191
+
192
+ x_given = trained_X
193
+ x_eval = X_pen
194
+ x_predict = torch.cat([x_given, x_eval], dim=0)
195
+ x_full_feed = torch.cat([x_given, x_given, x_eval], dim=0).unsqueeze(1)
196
+
197
+
198
+
199
+ y_given = trained_Y
200
+ y_given = y_given.reshape(-1)
201
+
202
+ ######################################################################
203
+ # Objective Power Transform
204
+ y_given, pt_y = general_power_transform(y_given.unsqueeze(1),
205
+ y_given.unsqueeze(1),
206
+ .0,
207
+ less_safe=False)
208
+ y_given = y_given.squeeze(1)
209
+ ######################################################################
210
+
211
+
212
+ ######################################################################
213
+ # Constraints Power Transform
214
+ # Changes for Parallel:
215
+ GX = -GX
216
+ GX_t, pt_GX = general_power_transform(GX, GX, .0, less_safe=False)
217
+ G_thres, _ = general_power_transform(GX,
218
+ torch.zeros((1, GX.shape[1])).to(GX.device),
219
+ .0,
220
+ less_safe=False)
221
+ GX = GX_t
222
+ ######################################################################
223
+
224
+
225
+
226
+ y_full_feed = y_given.unsqueeze(1)
227
+
228
+ criterion: bar_distribution.BarDistribution = PFN.model.criterion
229
+
230
+ style = None
231
+ logits = PFN.model(
232
+ (style,
233
+ x_full_feed.repeat_interleave(dim=1, repeats=y_full_feed.shape[1]+GX.shape[1]),
234
+ torch.cat([y_full_feed, GX], dim=1).unsqueeze(2) ),
235
+ single_eval_pos=len(x_given)
236
+ )
237
+
238
+ logits = logits.softmax(-1).log_()
239
+
240
+ logits_given = logits[:len(x_given)]
241
+ logits_eval = logits[len(x_given):]
242
+
243
+ best_f = torch.max(y_given)
244
+
245
+ objective_given = logits_given[:,0,:].unsqueeze(1)
246
+ objective_eval = logits_eval[:,0,:].unsqueeze(1)
247
+ constraint_given = logits_given[:,1:,:]
248
+ constraint_eval = logits_eval[:,1:,:]
249
+
250
+
251
+
252
+ if what_do_you_want == 'mean':
253
+ obj_output = criterion.mean(objective_eval)
254
+ con_output = criterion.mean(constraint_eval)
255
+
256
+ elif what_do_you_want == 'ei':
257
+ # Changes for CEI
258
+
259
+ # Objective
260
+ tau = torch.max(y_given)
261
+ objective_acq_value = acq_ensembling(criterion.ei(objective_eval, tau))
262
+
263
+ # Constraints
264
+ constraints_acq_value = acq_ensembling(criterion.pi(constraint_eval[:,0,:].unsqueeze(1), G_thres[0, 0].item()))
265
+ constraints_acq_value = constraints_acq_value.unsqueeze(1)
266
+
267
+
268
+ for jj in range(1,constraint_eval.shape[1]):
269
+ next_constraints_acq_value = acq_ensembling(criterion.pi(constraint_eval[:,jj,:].unsqueeze(1), G_thres[0, jj].item()))
270
+ next_constraints_acq_value = next_constraints_acq_value.unsqueeze(1)
271
+ constraints_acq_value = torch.cat([constraints_acq_value,next_constraints_acq_value], dim=1)
272
+
273
+ return objective_acq_value, constraints_acq_value
274
+
275
+
276
+ elif what_do_you_want == 'variance':
277
+ output = criterion.variance(logits_eval)
278
+ elif what_do_you_want == 'mode':
279
+ output = criterion.mode(logits_eval)
280
+ elif what_do_you_want == 'cts':
281
+ obj_mnn = criterion.mean(objective_eval)
282
+ obj_mnn = pt_y.inverse_transform(obj_mnn)
283
+ obj_mnn = torch.from_numpy(obj_mnn)
284
+
285
+
286
+ con_mnn = criterion.mean(constraint_eval)
287
+ con_mnn = pt_GX.inverse_transform(con_mnn)
288
+ con_mnn = torch.from_numpy(-con_mnn)
289
+
290
+ obj_varr = criterion.variance(objective_eval)
291
+ con_varr = criterion.variance(constraint_eval)
292
+
293
+ return obj_mnn, obj_varr, con_mnn, con_varr
294
+
295
+
296
+
297
+ return output
298
+
299
+
300
+
301
+
302
+ def acq_ensembling(acq_values): # (points, ensemble dim)
303
+ return acq_values.max(1).values
304
+
305
+
306
+
307
+
308
+
309
+
310
+
311
+ def torch_std_inverse_transform(X, scale, mean):
312
+ X *= scale
313
+ X += mean
314
+ return X
315
+
316
+
317
+ def torch_power_inverse_transform(x, lmbda):
318
+ out = torch.zeros_like(x)
319
+ pos = x >= 0
320
+
321
+ # when x >= 0
322
+ if abs(lmbda) < np.spacing(1.0):
323
+ out[pos] = torch.exp(x[pos])-1
324
+ else: # lmbda != 0
325
+ out[pos] = torch.pow(x[pos] * lmbda + 1, 1 / lmbda) - 1
326
+
327
+ # when x < 0
328
+ if abs(lmbda - 2) > np.spacing(1.0):
329
+ out[~pos] = 1 - torch.pow(-(2 - lmbda) * x[~pos] + 1, 1 / (2 - lmbda))
330
+ else: # lmbda == 2
331
+ out[~pos] = 1 - torch.exp(-x[~pos])
332
+
333
+ return out
334
+
335
+
336
+
337
+
338
+
339
+
340
+
341
+
342
+
343
+
344
+
345
+
346
+
347
+
348
+
349
+
350
+
351
+
352
+
353
+
354
+
355
+ ################################################################################
356
+ ## PFN defined functions
357
+ ################################################################################
358
+
359
+
360
+ def log01(x, eps=.0000001, input_between_zero_and_one=False):
361
+ logx = torch.log(x + eps)
362
+ if input_between_zero_and_one:
363
+ return (logx - math.log(eps)) / (math.log(1 + eps) - math.log(eps))
364
+ return (logx - logx.min(0)[0]) / (logx.max(0)[0] - logx.min(0)[0])
365
+
366
+ def log01_batch(x, eps=.0000001, input_between_zero_and_one=False):
367
+ x = x.repeat(1, x.shape[-1] + 1, 1)
368
+ for b in range(x.shape[-1]):
369
+ x[:, b, b] = log01(x[:, b, b], eps=eps, input_between_zero_and_one=input_between_zero_and_one)
370
+ return x
371
+
372
+ def lognormed_batch(x, eval_pos, eps=.0000001):
373
+ x = x.repeat(1, x.shape[-1] + 1, 1)
374
+ for b in range(x.shape[-1]):
375
+ logx = torch.log(x[:, b, b]+eps)
376
+ x[:, b, b] = (logx - logx[:eval_pos].mean(0))/logx[:eval_pos].std(0)
377
+ return x
378
+
379
+ def _rank_transform(x_train, x):
380
+ assert len(x_train.shape) == len(x.shape) == 1
381
+ relative_to = torch.cat((torch.zeros_like(x_train[:1]),x_train.unique(sorted=True,), torch.ones_like(x_train[-1:])),-1)
382
+ higher_comparison = (relative_to < x[...,None]).sum(-1).clamp(min=1)
383
+ pos_inside_interval = (x - relative_to[higher_comparison-1])/(relative_to[higher_comparison] - relative_to[higher_comparison-1])
384
+ x_transformed = higher_comparison - 1 + pos_inside_interval
385
+ return x_transformed/(len(relative_to)-1.)
386
+
387
+ def rank_transform(x_train, x):
388
+ assert x.shape[1] == x_train.shape[1], f"{x.shape=} and {x_train.shape=}"
389
+ # make sure everything is between 0 and 1
390
+ assert (x_train >= 0.).all() and (x_train <= 1.).all(), f"{x_train=}"
391
+ assert (x >= 0.).all() and (x <= 1.).all(), f"{x=}"
392
+ return_x = x.clone()
393
+ for feature_dim in range(x.shape[1]):
394
+ return_x[:, feature_dim] = _rank_transform(x_train[:, feature_dim], x[:, feature_dim])
395
+ return return_x
396
+
397
+
398
+
399
+ def general_power_transform(x_train, x_apply, eps, less_safe=False):
400
+
401
+ # print('in function')
402
+ # print(x_train)
403
+ # print(x_apply)
404
+ # print('in function')
405
+
406
+ if eps > 0:
407
+ try:
408
+ pt = PowerTransformer(method='box-cox')
409
+ pt.fit(x_train.cpu()+eps)
410
+ x_out = torch.tensor(pt.transform(x_apply.cpu()+eps), dtype=x_apply.dtype, device=x_apply.device)
411
+ except Exception as e:
412
+ print(e)
413
+ x_out = x_apply - x_train.mean(0)
414
+ print(x_train)
415
+ print(x_out)
416
+ else:
417
+ pt = PowerTransformer(method='yeo-johnson')
418
+ if not less_safe and (x_train.std() > 1_000 or x_train.mean().abs() > 1_000):
419
+ x_apply = (x_apply - x_train.mean(0)) / x_train.std(0)
420
+ x_train = (x_train - x_train.mean(0)) / x_train.std(0)
421
+ # print('inputs are LAARGEe, normalizing them')
422
+ try:
423
+ pt.fit(x_train.cpu().double())
424
+ # except ValueError as e:
425
+ except Exception as e:
426
+ # print(x_train)
427
+ # print('caught this errrr', e)
428
+ if less_safe:
429
+ x_train = (x_train - x_train.mean(0)) / x_train.std(0)
430
+ x_apply = (x_apply - x_train.mean(0)) / x_train.std(0)
431
+ else:
432
+ x_train = x_train - x_train.mean(0)
433
+ x_apply = x_apply - x_train.mean(0)
434
+ # print(x_train)
435
+ pt.fit(x_train.cpu().double())
436
+ # print(x_train)
437
+ x_out = torch.tensor(pt.transform(x_apply.cpu()), dtype=x_apply.dtype, device=x_apply.device)
438
+ if torch.isnan(x_out).any() or torch.isinf(x_out).any():
439
+ print('WARNING: power transform failed')
440
+ print(f"{x_train=} and {x_apply=}")
441
+ x_out = x_apply - x_train.mean(0)
442
+ return x_out, pt
SpeedReducer.png ADDED
SpeedReducer_CEI_Avg_Obj.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1567f1e9557cb6d701605a2ec74c6e294c42a85c88ddf3c0f33e307bf7f9a07f
3
+ size 3684
SpeedReducer_CEI_Avg_Time.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cfa75441b15a3e1b8dbdc1e4a074e7a3682c41c9a85924793c67a41bec86acd
3
+ size 3496
Test_formulation.png ADDED
Test_formulation_default.png ADDED
ThreeTruss.png ADDED
ThreeTruss_CEI_Avg_Obj.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b02d1c95ef3aee57fee8804a82119d9b68453e182184cf47970779742d059bed
3
+ size 2844
ThreeTruss_CEI_Avg_Time.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e9b30e0f99096ab84b8a545d2c1f24b80cd2d0bce1df6bc7f268b32c88a5b4f
3
+ size 2912
WeldedBeam.png ADDED
WeldedBeam_CEI_Avg_Obj.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efba0c05f0ac9803ee75caa3396983535bc3a104b47db2a3e463b1497ab5a93b
3
+ size 3164
WeldedBeam_CEI_Avg_Time.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06225c2be0d11a0cb563f1eb525401d5f5536401694d7ed7e3f7179a1f51352b
3
+ size 3552
__pycache__/Rosen_PFN4BO.cpython-310.pyc ADDED
Binary file (8.11 kB). View file
 
final_models/Cyril_500features.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43218823860a5ca71657fd25c50bcc1209c3c570bcbb9df9ed2822bbb9f6f9c8
3
+ size 239411934
final_models/Cyril_500features_800epoch_cpu.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25884174687cfbde831badc4f1d05e94f860711dc3a07f4dde09930860e63603
3
+ size 239408346
final_models/Cyril_50features.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5b680c4b72e72a33a21896885de7e8fba52c42612a6165a7cf60afede2e425d
3
+ size 107333480
final_models/hebo_morebudget_9_unused_features_3_userpriorperdim2_8.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff2a4aa60feeca59e80f3b272d7b2ab521e1e82189469db494068de33dcaba17
3
+ size 107378616
final_models/heboplus_500features_retrain_epoch800_cpu.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25884174687cfbde831badc4f1d05e94f860711dc3a07f4dde09930860e63603
3
+ size 239408346
final_models/model_hebo_morebudget_9_unused_features_3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc7f6f4b9b06e59987e42845b7b0d31ffa5b414b9eddfe14d88b25120e3cd4f8
3
+ size 107262245
final_models/model_sampled_warp_simple_mlp_for_hpob_46.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebb2d4d7f419ca4617fdf85c663a69b3b0285fef91712e0dd69d5ab2d61754fd
3
+ size 56761718
pfns4bo/.ipynb_checkpoints/__init__-checkpoint.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ model_path = 'final_models'
4
+
5
+ def prepare_models():
6
+ pfns4bo_dir = os.path.dirname(__file__)
7
+ model_names = ['hebo_morebudget_9_unused_features_3_userpriorperdim2_8.pt',
8
+ 'model_sampled_warp_simple_mlp_for_hpob_46.pt',
9
+ 'model_hebo_morebudget_9_unused_features_3.pt',]
10
+
11
+ for name in model_names:
12
+ weights_path = os.path.join(pfns4bo_dir, model_path, name)
13
+ compressed_weights_path = os.path.join(pfns4bo_dir, model_path, name + '.gz')
14
+ if not os.path.exists(weights_path):
15
+ if not os.path.exists(compressed_weights_path):
16
+ print("Downloading", os.path.abspath(compressed_weights_path))
17
+ import requests
18
+ url = f'https://github.com/automl/PFNs4BO/raw/main/pfns4bo/final_models/{name + ".gz"}'
19
+ r = requests.get(url, allow_redirects=True)
20
+ os.makedirs(os.path.dirname(compressed_weights_path), exist_ok=True)
21
+ with open(compressed_weights_path, 'wb') as f:
22
+ f.write(r.content)
23
+ if os.path.exists(compressed_weights_path):
24
+ print("Unzipping", name)
25
+ os.system(f"gzip -dk {compressed_weights_path}")
26
+ else:
27
+ print("Failed to find", compressed_weights_path)
28
+ print("Make sure you have an internet connection to download the model automatically..")
29
+ if os.path.exists(weights_path):
30
+ print("Successfully located model at", weights_path)
31
+
32
+
33
+ model_dict = {
34
+ 'hebo_plus_userprior_model': os.path.join(os.path.dirname(__file__),model_path,
35
+ 'hebo_morebudget_9_unused_features_3_userpriorperdim2_8.pt'),
36
+ 'hebo_plus_model': os.path.join(os.path.dirname(__file__),model_path,
37
+ 'model_hebo_morebudget_9_unused_features_3.pt'),
38
+ 'bnn_model': os.path.join(os.path.dirname(__file__),model_path,'model_sampled_warp_simple_mlp_for_hpob_46.pt')
39
+ }
40
+
41
+
42
+ def __getattr__(name):
43
+ if name in model_dict:
44
+ if not os.path.exists(model_dict[name]):
45
+ print("Can't find", os.path.abspath(model_dict[name]), "thus unzipping/downloading models now.")
46
+ print("This might take a while..")
47
+ prepare_models()
48
+ return model_dict[name]
49
+ raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
50
+
pfns4bo/.ipynb_checkpoints/bar_distribution-checkpoint.py ADDED
@@ -0,0 +1,410 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .utils import print_once
2
+
3
+ import torch
4
+ from torch import nn
5
+
6
+
7
+ class BarDistribution(nn.Module):
8
+ def __init__(self, borders: torch.Tensor, smoothing=.0, ignore_nan_targets=True): # here borders should start with min and end with max, where all values lie in (min,max) and are sorted
9
+ '''
10
+ :param borders:
11
+ :param smoothing:
12
+ :param append_mean_pred: Whether to predict the mean of the other positions as a last output in forward,
13
+ is enabled when additionally y has a sequence length 1 shorter than logits, i.e. len(logits) == 1 + len(y)
14
+ '''
15
+ super().__init__()
16
+ assert len(borders.shape) == 1
17
+ self.register_buffer('borders', borders)
18
+ self.register_buffer('smoothing', torch.tensor(smoothing))
19
+ self.register_buffer('bucket_widths', self.borders[1:] - self.borders[:-1])
20
+ full_width = self.bucket_widths.sum()
21
+
22
+ assert (1 - (full_width / (self.borders[-1] - self.borders[0]))).abs() < 1e-2, f'diff: {full_width - (self.borders[-1] - self.borders[0])} with {full_width} {self.borders[-1]} {self.borders[0]}'
23
+ assert (self.bucket_widths >= 0.0).all() , "Please provide sorted borders!" # This also allows size zero buckets
24
+ self.num_bars = len(borders) - 1
25
+ self.ignore_nan_targets = ignore_nan_targets
26
+ self.to(borders.device)
27
+
28
+ def __setstate__(self, state):
29
+ super().__setstate__(state)
30
+ self.__dict__.setdefault('append_mean_pred', False)
31
+
32
+ def map_to_bucket_idx(self, y):
33
+ target_sample = torch.searchsorted(self.borders, y) - 1
34
+ target_sample[y == self.borders[0]] = 0
35
+ target_sample[y == self.borders[-1]] = self.num_bars - 1
36
+ return target_sample
37
+
38
+ def ignore_init(self, y):
39
+ ignore_loss_mask = torch.isnan(y)
40
+ if ignore_loss_mask.any():
41
+ if not self.ignore_nan_targets: raise ValueError(f'Found NaN in target {y}')
42
+ print_once("A loss was ignored because there was nan target.")
43
+ y[ignore_loss_mask] = self.borders[0] # this is just a default value, it will be ignored anyway
44
+ return ignore_loss_mask
45
+
46
+ def compute_scaled_log_probs(self, logits):
47
+ # this is equivalent to log(p(y)) of the density p
48
+ bucket_log_probs = torch.log_softmax(logits, -1)
49
+ scaled_bucket_log_probs = bucket_log_probs - torch.log(self.bucket_widths)
50
+ return scaled_bucket_log_probs
51
+
52
+ def forward(self, logits, y, mean_prediction_logits=None): # gives the negative log density (the _loss_), y: T x B, logits: T x B x self.num_bars
53
+ y = y.clone().view(*logits.shape[:-1]) # no trailing one dimension
54
+ ignore_loss_mask = self.ignore_init(y)
55
+ target_sample = self.map_to_bucket_idx(y)
56
+ assert (target_sample >= 0).all() and (target_sample < self.num_bars).all(), f'y {y} not in support set for borders (min_y, max_y) {self.borders}'
57
+ assert logits.shape[-1] == self.num_bars, f'{logits.shape[-1]} vs {self.num_bars}'
58
+
59
+ scaled_bucket_log_probs = self.compute_scaled_log_probs(logits)
60
+ nll_loss = -scaled_bucket_log_probs.gather(-1,target_sample[..., None]).squeeze(-1) # T x B
61
+
62
+ if mean_prediction_logits is not None:
63
+ if not self.training:
64
+ print('Calculating loss incl mean prediction loss for nonmyopic BO.')
65
+ scaled_mean_log_probs = self.compute_scaled_log_probs(mean_prediction_logits)
66
+ nll_loss = torch.cat((nll_loss, self.mean_loss(logits, scaled_mean_log_probs)), 0)
67
+
68
+ smooth_loss = -scaled_bucket_log_probs.mean(dim=-1)
69
+ smoothing = self.smoothing if self.training else 0.
70
+ loss = (1. - smoothing) * nll_loss + smoothing * smooth_loss
71
+ loss[ignore_loss_mask] = 0.
72
+ return loss
73
+
74
+ def mean_loss(self, logits, scaled_mean_logits):
75
+ assert (len(logits.shape) == 3) and (len(scaled_mean_logits.shape) == 2), \
76
+ (len(logits.shape), len(scaled_mean_logits.shape))
77
+ means = self.mean(logits).detach() # T x B
78
+ target_mean = self.map_to_bucket_idx(means).clamp_(0, self.num_bars - 1) # T x B
79
+ return -scaled_mean_logits.gather(1, target_mean.T).mean(1).unsqueeze(0) # 1 x B
80
+
81
+ def mean(self, logits):
82
+ bucket_means = self.borders[:-1] + self.bucket_widths/2
83
+ p = torch.softmax(logits, -1)
84
+ return p @ bucket_means
85
+
86
+ def median(self, logits):
87
+ return self.icdf(logits, 0.5)
88
+
89
+ def icdf(self, logits, left_prob):
90
+ """
91
+ Implementation of the quantile function
92
+ :param logits: Tensor of any shape, with the last dimension being logits
93
+ :param left_prob: float: The probability mass to the left of the result.
94
+ :return: Position with `left_prob` probability weight to the left.
95
+ """
96
+ probs = logits.softmax(-1)
97
+ cumprobs = torch.cumsum(probs, -1)
98
+ idx = torch.searchsorted(cumprobs, left_prob * torch.ones(*cumprobs.shape[:-1], 1, device=logits.device))\
99
+ .squeeze(-1).clamp(0, cumprobs.shape[-1] - 1) # this might not do the right for outliers
100
+ cumprobs = torch.cat([torch.zeros(*cumprobs.shape[:-1], 1, device=logits.device), cumprobs], -1)
101
+
102
+ rest_prob = left_prob - cumprobs.gather(-1, idx[..., None]).squeeze(-1)
103
+ left_border = self.borders[idx]
104
+ right_border = self.borders[idx+1]
105
+ return left_border + (right_border - left_border) * rest_prob / probs.gather(-1, idx[..., None]).squeeze(-1)
106
+
107
+ def quantile(self, logits, center_prob=.682):
108
+ side_probs = (1.-center_prob)/2
109
+ return torch.stack((self.icdf(logits, side_probs), self.icdf(logits, 1.-side_probs)),-1)
110
+
111
+ def ucb(self, logits, best_f, rest_prob=(1-.682)/2, maximize=True):
112
+ """
113
+ UCB utility. Rest Prob is the amount of utility above (below) the confidence interval that is ignored.
114
+ Higher rest_prob is equivalent to lower beta in the standard GP-UCB formulation.
115
+ :param logits: Logits, as returned by the Transformer.
116
+ :param rest_prob: The amount of utility above (below) the confidence interval that is ignored.
117
+ The default is equivalent to using GP-UCB with `beta=1`.
118
+ To get the corresponding `beta`, where `beta` is from
119
+ the standard GP definition of UCB `ucb_utility = mean + beta * std`,
120
+ you can use this computation: `beta = math.sqrt(2)*torch.erfinv(torch.tensor(2*(1-rest_prob)-1))`.
121
+ :param maximize:
122
+ :return: utility
123
+ """
124
+ if maximize:
125
+ rest_prob = 1 - rest_prob
126
+ return self.icdf(logits, rest_prob)
127
+
128
+ def mode(self, logits):
129
+ mode_inds = logits.argmax(-1)
130
+ bucket_means = self.borders[:-1] + self.bucket_widths/2
131
+ return bucket_means[mode_inds]
132
+
133
+ def ei(self, logits, best_f, maximize=True): # logits: evaluation_points x batch x feature_dim
134
+ bucket_diffs = self.borders[1:] - self.borders[:-1]
135
+ assert maximize
136
+ if not torch.is_tensor(best_f) or not len(best_f.shape):
137
+ best_f = torch.full(logits[...,0].shape, best_f, device=logits.device)
138
+
139
+ best_f = best_f[..., None].repeat(*[1]*len(best_f.shape), logits.shape[-1])
140
+ clamped_best_f = best_f.clamp(self.borders[:-1], self.borders[1:])
141
+
142
+ #bucket_contributions = (best_f[...,None] < self.borders[:-1]).float() * bucket_means
143
+ # true bucket contributions
144
+ bucket_contributions = ((self.borders[1:]**2-clamped_best_f**2)/2 - best_f*(self.borders[1:] - clamped_best_f))/bucket_diffs
145
+
146
+ p = torch.softmax(logits, -1)
147
+ return torch.einsum("...b,...b->...", p, bucket_contributions)
148
+
149
+ def pi(self, logits, best_f, maximize=True):# logits: evaluation_points x batch x feature_dim
150
+ """
151
+ Acquisition Function: Probability of Improvement
152
+ :param logits: as returned by Transformer
153
+ :param best_f: best evaluation so far (the incumbent)
154
+ :param maximize: whether to maximize
155
+ :return: utility
156
+ """
157
+ assert maximize is True
158
+ if not torch.is_tensor(best_f) or not len(best_f.shape):
159
+ best_f = torch.full(logits[...,0].shape, best_f, device=logits.device)
160
+ p = torch.softmax(logits, -1)
161
+ border_widths = self.borders[1:] - self.borders[:-1]
162
+ factor = 1. - ((best_f[...,None] - self.borders[:-1]) / border_widths).clamp(0., 1.)
163
+ return (p * factor).sum(-1)
164
+
165
+
166
+ def mean_of_square(self, logits):
167
+ """
168
+ Computes E[x^2].
169
+ :param logits: Output of the model.
170
+ """
171
+ left_borders = self.borders[:-1]
172
+ right_borders = self.borders[1:]
173
+ bucket_mean_of_square = (left_borders.square() + right_borders.square() + left_borders*right_borders)/3.
174
+ p = torch.softmax(logits, -1)
175
+ return p @ bucket_mean_of_square
176
+
177
+ def variance(self, logits):
178
+ return self.mean_of_square(logits) - self.mean(logits).square()
179
+
180
+ def pi(self, logits, best_f, maximize=True):# logits: evaluation_points x batch x feature_dim
181
+ """
182
+ Acquisition Function: Probability of Improvement
183
+ :param logits: as returned by Transformer
184
+ :param best_f: best evaluation so far (the incumbent)
185
+ :param maximize: whether to maximize
186
+ :return: utility
187
+ """
188
+ assert maximize is True
189
+ p = torch.softmax(logits, -1)
190
+ border_widths = self.borders[1:] - self.borders[:-1]
191
+ factor = 1. - ((best_f - self.borders[:-1]) / border_widths).clamp(0., 1.)
192
+ return (p * factor).sum(-1)
193
+
194
+
195
+ def mean_of_square(self, logits):
196
+ """
197
+ Computes E[x^2].
198
+ :param logits: Output of the model.
199
+ """
200
+ left_borders = self.borders[:-1]
201
+ right_borders = self.borders[1:]
202
+ bucket_mean_of_square = (left_borders.square() + right_borders.square() + left_borders*right_borders)/3.
203
+ p = torch.softmax(logits, -1)
204
+ return p @ bucket_mean_of_square
205
+
206
+ def variance(self, logits):
207
+ return self.mean_of_square(logits) - self.mean(logits).square()
208
+
209
+
210
+ class FullSupportBarDistribution(BarDistribution):
211
+ @staticmethod
212
+ def halfnormal_with_p_weight_before(range_max,p=.5):
213
+ s = range_max / torch.distributions.HalfNormal(torch.tensor(1.)).icdf(torch.tensor(p))
214
+ return torch.distributions.HalfNormal(s)
215
+
216
+
217
+ def forward(self, logits, y, mean_prediction_logits=None): # gives the negative log density (the _loss_), y: T x B, logits: T x B x self.num_bars
218
+ assert self.num_bars > 1
219
+ y = y.clone().view(len(y),-1) # no trailing one dimension
220
+ ignore_loss_mask = self.ignore_init(y) # alters y
221
+ target_sample = self.map_to_bucket_idx(y) # shape: T x B (same as y)
222
+ target_sample.clamp_(0, self.num_bars - 1)
223
+
224
+ assert logits.shape[-1] == self.num_bars, f'{logits.shape[-1]} vs {self.num_bars}'
225
+ assert (target_sample >= 0).all() and (target_sample < self.num_bars).all(), \
226
+ f'y {y} not in support set for borders (min_y, max_y) {self.borders}'
227
+ assert logits.shape[-1] == self.num_bars, f'{logits.shape[-1]} vs {self.num_bars}'
228
+ # ignore all position with nan values
229
+
230
+
231
+ scaled_bucket_log_probs = self.compute_scaled_log_probs(logits)
232
+
233
+ assert len(scaled_bucket_log_probs) == len(target_sample), (len(scaled_bucket_log_probs), len(target_sample))
234
+ log_probs = scaled_bucket_log_probs.gather(-1, target_sample.unsqueeze(-1)).squeeze(-1)
235
+
236
+ side_normals = (self.halfnormal_with_p_weight_before(self.bucket_widths[0]), self.halfnormal_with_p_weight_before(self.bucket_widths[-1]))
237
+
238
+
239
+ log_probs[target_sample == 0] += side_normals[0].log_prob((self.borders[1]-y[target_sample == 0]).clamp(min=.00000001)) + torch.log(self.bucket_widths[0])
240
+ log_probs[target_sample == self.num_bars-1] += side_normals[1].log_prob((y[target_sample == self.num_bars-1]-self.borders[-2]).clamp(min=.00000001)) + torch.log(self.bucket_widths[-1])
241
+
242
+ nll_loss = -log_probs
243
+
244
+ if mean_prediction_logits is not None:
245
+ assert not ignore_loss_mask.any(), "Ignoring examples is not implemented with mean pred."
246
+ if not self.training:
247
+ print('Calculating loss incl mean prediction loss for nonmyopic BO.')
248
+ if not torch.is_grad_enabled():
249
+ print("Warning: loss is not correct in absolute terms, only the gradient is right, when using `append_mean_pred`.")
250
+ scaled_mean_log_probs = self.compute_scaled_log_probs(mean_prediction_logits)
251
+ nll_loss = torch.cat((nll_loss, self.mean_loss(logits, scaled_mean_log_probs)), 0)
252
+ #ignore_loss_mask = torch.zeros_like(nll_loss, dtype=torch.bool)
253
+
254
+ if self.smoothing:
255
+ smooth_loss = -scaled_bucket_log_probs.mean(dim=-1)
256
+ smoothing = self.smoothing if self.training else 0.
257
+ nll_loss = (1. - smoothing) * nll_loss + smoothing * smooth_loss
258
+
259
+ if ignore_loss_mask.any():
260
+ nll_loss[ignore_loss_mask] = 0.
261
+
262
+ return nll_loss
263
+
264
+ def mean(self, logits):
265
+ bucket_means = self.borders[:-1] + self.bucket_widths / 2
266
+ p = torch.softmax(logits, -1)
267
+ side_normals = (self.halfnormal_with_p_weight_before(self.bucket_widths[0]),
268
+ self.halfnormal_with_p_weight_before(self.bucket_widths[-1]))
269
+ bucket_means[0] = -side_normals[0].mean + self.borders[1]
270
+ bucket_means[-1] = side_normals[1].mean + self.borders[-2]
271
+ return p @ bucket_means.to(logits.device)
272
+
273
+ def mean_of_square(self, logits):
274
+ """
275
+ Computes E[x^2].
276
+ :param logits: Output of the model.
277
+ """
278
+ left_borders = self.borders[:-1]
279
+ right_borders = self.borders[1:]
280
+ bucket_mean_of_square = (left_borders.square() + right_borders.square() + left_borders*right_borders)/3.
281
+ side_normals = (self.halfnormal_with_p_weight_before(self.bucket_widths[0]),
282
+ self.halfnormal_with_p_weight_before(self.bucket_widths[-1]))
283
+ bucket_mean_of_square[0] = side_normals[0].variance + (-side_normals[0].mean + self.borders[1]).square()
284
+ bucket_mean_of_square[-1] = side_normals[1].variance + (side_normals[1].variance + self.borders[-2]).square()
285
+ p = torch.softmax(logits, -1)
286
+ return p @ bucket_mean_of_square
287
+
288
+ def pi(self, logits, best_f, maximize=True):# logits: evaluation_points x batch x feature_dim
289
+ """
290
+ Acquisition Function: Probability of Improvement
291
+ :param logits: as returned by Transformer (evaluation_points x batch x feature_dim)
292
+ :param best_f: best evaluation so far (the incumbent)
293
+ :param maximize: whether to maximize
294
+ :return: utility
295
+ """
296
+ assert maximize is True
297
+ if not torch.is_tensor(best_f) or not len(best_f.shape):
298
+ best_f = torch.full(logits[...,0].shape, best_f, device=logits.device) # evaluation_points x batch
299
+ assert best_f.shape == logits[...,0].shape, f"best_f.shape: {best_f.shape}, logits.shape: {logits.shape}"
300
+ p = torch.softmax(logits, -1) # evaluation_points x batch
301
+ border_widths = self.borders[1:] - self.borders[:-1]
302
+ factor = 1. - ((best_f[...,None] - self.borders[:-1]) / border_widths).clamp(0., 1.) # evaluation_points x batch x num_bars
303
+
304
+ side_normals = (self.halfnormal_with_p_weight_before(self.bucket_widths[0]),
305
+ self.halfnormal_with_p_weight_before(self.bucket_widths[-1]))
306
+ position_in_side_normals = (-(best_f - self.borders[1]).clamp(max=0.), (best_f - self.borders[-2]).clamp(min=0.)) # evaluation_points x batch
307
+ factor[...,0] = 0.
308
+ factor[...,0][position_in_side_normals[0] > 0.] = side_normals[0].cdf(position_in_side_normals[0][position_in_side_normals[0] > 0.])
309
+ factor[...,-1] = 1.
310
+ factor[...,-1][position_in_side_normals[1] > 0.] = 1. - side_normals[1].cdf(position_in_side_normals[1][position_in_side_normals[1] > 0.])
311
+ return (p * factor).sum(-1)
312
+
313
+
314
+ def ei_for_halfnormal(self, scale, best_f, maximize=True):
315
+ """
316
+ This is the EI for a standard normal distribution with mean 0 and variance `scale` times 2.
317
+ Which is the same as the half normal EI.
318
+ I tested this with MC approximation:
319
+ ei_for_halfnormal = lambda scale, best_f: (torch.distributions.HalfNormal(torch.tensor(scale)).sample((10_000_000,))- best_f ).clamp(min=0.).mean()
320
+ print([(ei_for_halfnormal(scale,best_f), FullSupportBarDistribution().ei_for_halfnormal(scale,best_f)) for scale in [0.1,1.,10.] for best_f in [.1,10.,4.]])
321
+ :param scale:
322
+ :param best_f:
323
+ :param maximize:
324
+ :return:
325
+ """
326
+ assert maximize
327
+ mean = torch.tensor(0.)
328
+ u = (mean - best_f) / scale
329
+ normal = torch.distributions.Normal(torch.zeros_like(u), torch.ones_like(u))
330
+ try:
331
+ ucdf = normal.cdf(u)
332
+ except ValueError:
333
+ print(f"u: {u}, best_f: {best_f}, scale: {scale}")
334
+ raise
335
+ updf = torch.exp(normal.log_prob(u))
336
+ normal_ei = scale * (updf + u * ucdf)
337
+ return 2*normal_ei
338
+
339
+ def ei(self, logits, best_f, maximize=True): # logits: evaluation_points x batch x feature_dim
340
+ if torch.isnan(logits).any():
341
+ raise ValueError(f"logits contains NaNs: {logits}")
342
+ bucket_diffs = self.borders[1:] - self.borders[:-1]
343
+ assert maximize
344
+ if not torch.is_tensor(best_f) or not len(best_f.shape):
345
+ best_f = torch.full(logits[...,0].shape, best_f, device=logits.device)
346
+ assert best_f.shape == logits[...,0].shape, f"best_f.shape: {best_f.shape}, logits.shape: {logits.shape}"
347
+
348
+
349
+ best_f_per_logit = best_f[..., None].repeat(*[1]*len(best_f.shape), logits.shape[-1])
350
+ clamped_best_f = best_f_per_logit.clamp(self.borders[:-1], self.borders[1:])
351
+
352
+ # true bucket contributions
353
+ bucket_contributions = ((self.borders[1:]**2-clamped_best_f**2)/2 - best_f_per_logit*(self.borders[1:] - clamped_best_f))/bucket_diffs
354
+
355
+ # extra stuff for continuous
356
+ side_normals = (self.halfnormal_with_p_weight_before(self.bucket_widths[0]),
357
+ self.halfnormal_with_p_weight_before(self.bucket_widths[-1]))
358
+ position_in_side_normals = (-(best_f - self.borders[1]).clamp(max=0.),
359
+ (best_f - self.borders[-2]).clamp(min=0.)) # evaluation_points x batch
360
+
361
+ bucket_contributions[...,-1] = self.ei_for_halfnormal(side_normals[1].scale, position_in_side_normals[1])
362
+
363
+ bucket_contributions[...,0] = self.ei_for_halfnormal(side_normals[0].scale, torch.zeros_like(position_in_side_normals[0])) \
364
+ - self.ei_for_halfnormal(side_normals[0].scale, position_in_side_normals[0])
365
+
366
+ p = torch.softmax(logits, -1)
367
+ return torch.einsum("...b,...b->...", p, bucket_contributions)
368
+
369
+
370
+ def get_bucket_limits(num_outputs:int, full_range:tuple=None, ys:torch.Tensor=None, verbose:bool=False):
371
+ assert (ys is None) != (full_range is None), 'Either full_range or ys must be passed.'
372
+
373
+ if ys is not None:
374
+ ys = ys.flatten()
375
+ ys = ys[~torch.isnan(ys)]
376
+ if len(ys) % num_outputs: ys = ys[:-(len(ys) % num_outputs)]
377
+ print(f'Using {len(ys)} y evals to estimate {num_outputs} buckets. Cut off the last {len(ys) % num_outputs} ys.')
378
+ ys_per_bucket = len(ys) // num_outputs
379
+ if full_range is None:
380
+ full_range = (ys.min(), ys.max())
381
+ else:
382
+ assert full_range[0] <= ys.min() and full_range[1] >= ys.max(), f'full_range {full_range} not in range of ys {ys.min(), ys.max()}'
383
+ full_range = torch.tensor(full_range)
384
+ ys_sorted, ys_order = ys.sort(0)
385
+ bucket_limits = (ys_sorted[ys_per_bucket-1::ys_per_bucket][:-1]+ys_sorted[ys_per_bucket::ys_per_bucket])/2
386
+ if verbose:
387
+ print(f'Using {len(ys)} y evals to estimate {num_outputs} buckets. Cut off the last {len(ys) % num_outputs} ys.')
388
+ print(full_range)
389
+ bucket_limits = torch.cat([full_range[0].unsqueeze(0), bucket_limits, full_range[1].unsqueeze(0)],0)
390
+
391
+ else:
392
+ class_width = (full_range[1] - full_range[0]) / num_outputs
393
+ bucket_limits = torch.cat([full_range[0] + torch.arange(num_outputs).float()*class_width, torch.tensor(full_range[1]).unsqueeze(0)], 0)
394
+
395
+ assert len(bucket_limits) - 1 == num_outputs, f'len(bucket_limits) - 1 == {len(bucket_limits) - 1} != {num_outputs} == num_outputs'
396
+ assert full_range[0] == bucket_limits[0], f'{full_range[0]} != {bucket_limits[0]}'
397
+ assert full_range[-1] == bucket_limits[-1], f'{full_range[-1]} != {bucket_limits[-1]}'
398
+
399
+ return bucket_limits
400
+
401
+
402
+ def get_custom_bar_dist(borders, criterion):
403
+ # Tested that a bar_dist with borders 0.54 (-> softplus 1.0) yields the same bar distribution as the passed one.
404
+ borders_ = torch.nn.functional.softplus(borders) + 0.001
405
+ borders_ = (torch.cumsum(torch.cat([criterion.borders[0:1], criterion.bucket_widths]) * borders_, 0))
406
+ criterion_ = criterion.__class__(borders=borders_, handle_nans=criterion.handle_nans)
407
+ return criterion_
408
+
409
+
410
+
pfns4bo/.ipynb_checkpoints/lost_functions-checkpoint.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ #
4
+ # This source code is licensed under the MIT license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ r"""
8
+ A converter that simplifies using numpy-based optimizers with generic torch
9
+ `nn.Module` classes. This enables using a `scipy.optim.minimize` optimizer
10
+ for optimizing module parameters.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from collections import OrderedDict
16
+ from math import inf
17
+ from numbers import Number
18
+ from typing import Dict, List, Optional, Set, Tuple
19
+ from warnings import warn
20
+
21
+ import numpy as np
22
+ import torch
23
+ from botorch.optim.utils import (
24
+ _get_extra_mll_args,
25
+ _handle_numerical_errors,
26
+ get_name_filter,
27
+ get_parameters_and_bounds,
28
+ TorchAttr,
29
+ )
30
+ from gpytorch.mlls import MarginalLogLikelihood
31
+ from torch.nn import Module
32
+
33
+
34
+ def module_to_array(
35
+ module: Module,
36
+ bounds: Optional[Dict[str, Tuple[Optional[float], Optional[float]]]] = None,
37
+ exclude: Optional[Set[str]] = None,
38
+ ) -> Tuple[np.ndarray, Dict[str, TorchAttr], Optional[np.ndarray]]:
39
+ r"""Extract named parameters from a module into a numpy array.
40
+
41
+ Only extracts parameters with requires_grad, since it is meant for optimizing.
42
+
43
+ Args:
44
+ module: A module with parameters. May specify parameter constraints in
45
+ a `named_parameters_and_constraints` method.
46
+ bounds: A dictionary mapping parameter names t lower and upper bounds.
47
+ of lower and upper bounds. Bounds specified here take precedence
48
+ over bounds on the same parameters specified in the constraints
49
+ registered with the module.
50
+ exclude: A list of parameter names that are to be excluded from extraction.
51
+
52
+ Returns:
53
+ 3-element tuple containing
54
+ - The parameter values as a numpy array.
55
+ - An ordered dictionary with the name and tensor attributes of each
56
+ parameter.
57
+ - A `2 x n_params` numpy array with lower and upper bounds if at least
58
+ one constraint is finite, and None otherwise.
59
+
60
+ Example:
61
+ >>> mll = ExactMarginalLogLikelihood(model.likelihood, model)
62
+ >>> parameter_array, property_dict, bounds_out = module_to_array(mll)
63
+ """
64
+ warn(
65
+ "`module_to_array` is marked for deprecation, consider using "
66
+ "`get_parameters_and_bounds`, `get_parameters_as_ndarray_1d`, or "
67
+ "`get_bounds_as_ndarray` instead.",
68
+ DeprecationWarning,
69
+ )
70
+ param_dict, bounds_dict = get_parameters_and_bounds(
71
+ module=module,
72
+ name_filter=None if exclude is None else get_name_filter(exclude),
73
+ requires_grad=True,
74
+ )
75
+ if bounds is not None:
76
+ bounds_dict.update(bounds)
77
+
78
+ # Record tensor metadata and read parameter values to the tape
79
+ param_tape: List[Number] = []
80
+ property_dict = OrderedDict()
81
+ with torch.no_grad():
82
+ for name, param in param_dict.items():
83
+ property_dict[name] = TorchAttr(param.shape, param.dtype, param.device)
84
+ param_tape.extend(param.view(-1).cpu().double().tolist())
85
+
86
+ # Extract lower and upper bounds
87
+ start = 0
88
+ bounds_np = None
89
+ params_np = np.asarray(param_tape)
90
+ for name, param in param_dict.items():
91
+ numel = param.numel()
92
+ if name in bounds_dict:
93
+ for row, bound in enumerate(bounds_dict[name]):
94
+ if bound is None:
95
+ continue
96
+
97
+ if torch.is_tensor(bound):
98
+ if (bound == (2 * row - 1) * inf).all():
99
+ continue
100
+ bound = bound.detach().cpu()
101
+
102
+ elif bound == (2 * row - 1) * inf:
103
+ continue
104
+
105
+ if bounds_np is None:
106
+ bounds_np = np.full((2, len(params_np)), ((-inf,), (inf,)))
107
+
108
+ bounds_np[row, start : start + numel] = bound
109
+ start += numel
110
+
111
+ return params_np, property_dict, bounds_np
112
+
113
+
114
+
115
+
116
+ def set_params_with_array(
117
+ module: Module, x: np.ndarray, property_dict: Dict[str, TorchAttr]
118
+ ) -> Module:
119
+ r"""Set module parameters with values from numpy array.
120
+
121
+ Args:
122
+ module: Module with parameters to be set
123
+ x: Numpy array with parameter values
124
+ property_dict: Dictionary of parameter names and torch attributes as
125
+ returned by module_to_array.
126
+
127
+ Returns:
128
+ Module: module with parameters updated in-place.
129
+
130
+ Example:
131
+ >>> mll = ExactMarginalLogLikelihood(model.likelihood, model)
132
+ >>> parameter_array, property_dict, bounds_out = module_to_array(mll)
133
+ >>> parameter_array += 0.1 # perturb parameters (for example only)
134
+ >>> mll = set_params_with_array(mll, parameter_array, property_dict)
135
+ """
136
+ warn(
137
+ "`_set_params_with_array` is marked for deprecation, consider using "
138
+ "`set_parameters_from_ndarray_1d` instead.",
139
+ DeprecationWarning,
140
+ )
141
+ param_dict = OrderedDict(module.named_parameters())
142
+ start_idx = 0
143
+ for p_name, attrs in property_dict.items():
144
+ # Construct the new tensor
145
+ if len(attrs.shape) == 0: # deal with scalar tensors
146
+ end_idx = start_idx + 1
147
+ new_data = torch.tensor(
148
+ x[start_idx], dtype=attrs.dtype, device=attrs.device
149
+ )
150
+ else:
151
+ end_idx = start_idx + np.prod(attrs.shape)
152
+ new_data = torch.tensor(
153
+ x[start_idx:end_idx], dtype=attrs.dtype, device=attrs.device
154
+ ).view(*attrs.shape)
155
+ start_idx = end_idx
156
+ # Update corresponding parameter in-place. Disable autograd to update.
157
+ param_dict[p_name].requires_grad_(False)
158
+ param_dict[p_name].copy_(new_data)
159
+ param_dict[p_name].requires_grad_(True)
160
+ return module
161
+
162
+
163
+
164
+
165
+
166
+
167
+
168
+
169
+
170
+
171
+
172
+
173
+
174
+
175
+
176
+
177
+
pfns4bo/.ipynb_checkpoints/transformer-checkpoint.py ADDED
@@ -0,0 +1,327 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ from typing import Optional
3
+
4
+ import torch
5
+ import torch.nn as nn
6
+ from torch import Tensor
7
+ from torch.nn import Module, TransformerEncoder
8
+
9
+ from .layer import TransformerEncoderLayer, _get_activation_fn
10
+ from .utils import SeqBN, bool_mask_to_att_mask
11
+
12
+
13
+
14
+ class TransformerModel(nn.Module):
15
+ def __init__(self, encoder, ninp, nhead, nhid, nlayers, dropout=0.0, style_encoder=None, y_encoder=None,
16
+ pos_encoder=None, decoder_dict=None, input_normalization=False, init_method=None, pre_norm=False,
17
+ activation='gelu', recompute_attn=False, num_global_att_tokens=0, full_attention=False,
18
+ all_layers_same_init=False, efficient_eval_masking=True, decoder_once_dict=None, return_all_outputs=False,
19
+ save_trainingset_representations=False):
20
+ super().__init__()
21
+ self.model_type = 'Transformer'
22
+ encoder_layer_creator = lambda: TransformerEncoderLayer(ninp, nhead, nhid, dropout, activation=activation,
23
+ pre_norm=pre_norm, recompute_attn=recompute_attn,
24
+ save_trainingset_representations=save_trainingset_representations)
25
+ self.transformer_encoder = TransformerEncoder(encoder_layer_creator(), nlayers)\
26
+ if all_layers_same_init else TransformerEncoderDiffInit(encoder_layer_creator, nlayers)
27
+ self.ninp = ninp
28
+ self.encoder = encoder
29
+ self.y_encoder = y_encoder
30
+ self.pos_encoder = pos_encoder
31
+ self.return_all_outputs = return_all_outputs
32
+
33
+ def make_decoder_dict(decoder_description_dict):
34
+ if decoder_description_dict is None or len(decoder_description_dict) == 0:
35
+ return None
36
+ initialized_decoder_dict = {}
37
+ for decoder_key in decoder_description_dict:
38
+ decoder_model, decoder_n_out = decoder_description_dict[decoder_key]
39
+ if decoder_model is None:
40
+ initialized_decoder_dict[decoder_key] = nn.Sequential(nn.Linear(ninp, nhid), nn.GELU(), nn.Linear(nhid, decoder_n_out))
41
+ else:
42
+ initialized_decoder_dict[decoder_key] = decoder_model(ninp, nhid, decoder_n_out)
43
+ print('Initialized decoder for', decoder_key, 'with', decoder_description_dict[decoder_key], ' and nout', decoder_n_out)
44
+ return torch.nn.ModuleDict(initialized_decoder_dict)
45
+
46
+ self.decoder_dict = make_decoder_dict(decoder_dict)
47
+ self.decoder_dict_once = make_decoder_dict(decoder_once_dict)
48
+
49
+ # N(0,1) is the initialization as the default of nn.Embedding
50
+ self.decoder_dict_once_embeddings = torch.nn.Parameter(torch.randn((len(self.decoder_dict_once), 1, ninp))) if self.decoder_dict_once is not None else None
51
+ #nn.Embedding(len(self.decoder_dict.keys()), nhid)
52
+ self.input_ln = SeqBN(ninp) if input_normalization else None
53
+ self.style_encoder = style_encoder
54
+ self.init_method = init_method
55
+ if num_global_att_tokens is not None:
56
+ assert not full_attention
57
+ self.global_att_embeddings = nn.Embedding(num_global_att_tokens, ninp) if num_global_att_tokens else None
58
+ self.full_attention = full_attention
59
+ self.efficient_eval_masking = efficient_eval_masking
60
+
61
+ self.nhid = nhid
62
+
63
+ self.init_weights()
64
+
65
+ def __setstate__(self, state):
66
+ super().__setstate__(state)
67
+ self.__dict__.setdefault('efficient_eval_masking', False)
68
+ if not hasattr(self, 'decoder_dict_once'):
69
+ self.__dict__.setdefault('decoder_dict_once', None)
70
+ if hasattr(self, 'decoder') and not hasattr(self, 'decoder_dict'):
71
+ self.add_module('decoder_dict', nn.ModuleDict({'standard': self.decoder}))
72
+ self.__dict__.setdefault('return_all_outputs', False)
73
+
74
+ def add_approximate_false(module):
75
+ if isinstance(module, nn.GELU):
76
+ module.__dict__.setdefault('approximate', 'none')
77
+
78
+ self.apply(add_approximate_false)
79
+
80
+ @staticmethod
81
+ def generate_square_subsequent_mask(sz):
82
+ mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
83
+ return bool_mask_to_att_mask(mask)
84
+
85
+ @staticmethod
86
+ def generate_D_q_matrix(sz, query_size):
87
+ train_size = sz-query_size
88
+ mask = torch.zeros(sz,sz) == 0
89
+ mask[:,train_size:].zero_()
90
+ mask |= torch.eye(sz) == 1
91
+ return bool_mask_to_att_mask(mask)
92
+
93
+ @staticmethod
94
+ def generate_global_att_query_matrix(num_global_att_tokens, seq_len, num_query_tokens):
95
+ train_size = seq_len + num_global_att_tokens - num_query_tokens
96
+ sz = seq_len + num_global_att_tokens
97
+ mask = torch.zeros(num_query_tokens, sz) == 0
98
+ mask[:,train_size:].zero_()
99
+ mask[:,train_size:] |= torch.eye(num_query_tokens) == 1
100
+ return bool_mask_to_att_mask(mask)
101
+
102
+ @staticmethod
103
+ def generate_global_att_trainset_matrix(num_global_att_tokens, seq_len, num_query_tokens):
104
+ train_size = seq_len + num_global_att_tokens - num_query_tokens
105
+ trainset_size = seq_len - num_query_tokens
106
+ mask = torch.zeros(trainset_size, num_global_att_tokens) == 0
107
+ #mask[:,num_global_att_tokens:].zero_()
108
+ #mask[:,num_global_att_tokens:] |= torch.eye(trainset_size) == 1
109
+ return bool_mask_to_att_mask(mask)
110
+
111
+ @staticmethod
112
+ def generate_global_att_globaltokens_matrix(num_global_att_tokens, seq_len, num_query_tokens):
113
+ mask = torch.zeros(num_global_att_tokens, num_global_att_tokens+seq_len-num_query_tokens) == 0
114
+ return bool_mask_to_att_mask(mask)
115
+
116
+ def init_weights(self):
117
+ initrange = 1.
118
+ # if isinstance(self.encoder,EmbeddingEncoder):
119
+ # self.encoder.weight.data.uniform_(-initrange, initrange)
120
+ # self.decoder.bias.data.zero_()
121
+ # self.decoder.weight.data.uniform_(-initrange, initrange)
122
+ if self.init_method is not None:
123
+ self.apply(self.init_method)
124
+ for layer in self.transformer_encoder.layers:
125
+ nn.init.zeros_(layer.linear2.weight)
126
+ nn.init.zeros_(layer.linear2.bias)
127
+ attns = layer.self_attn if isinstance(layer.self_attn, nn.ModuleList) else [layer.self_attn]
128
+ for attn in attns:
129
+ nn.init.zeros_(attn.out_proj.weight)
130
+ nn.init.zeros_(attn.out_proj.bias)
131
+
132
+ def forward(self, *args, **kwargs):
133
+ """
134
+ This will perform a forward-pass (possibly recording gradients) of the model.
135
+ We have multiple interfaces we support with this model:
136
+
137
+ model(train_x, train_y, test_x, src_mask=None, style=None, only_return_standard_out=True)
138
+ model((x,y), src_mask=None, single_eval_pos=None, only_return_standard_out=True)
139
+ model((style,x,y), src_mask=None, single_eval_pos=None, only_return_standard_out=True)
140
+ """
141
+ if len(args) == 3:
142
+ # case model(train_x, train_y, test_x, src_mask=None, style=None, only_return_standard_out=True)
143
+ assert all(kwarg in {'src_mask', 'style', 'only_return_standard_out'} for kwarg in kwargs.keys()), \
144
+ f"Unrecognized keyword argument in kwargs: {set(kwargs.keys()) - {'src_mask', 'style', 'only_return_standard_out'}}"
145
+ x = args[0]
146
+ if args[2] is not None:
147
+ x = torch.cat((x, args[2]), dim=0)
148
+ style = kwargs.pop('style', None)
149
+ return self._forward((style, x, args[1]), single_eval_pos=len(args[0]), **kwargs)
150
+ elif len(args) == 1 and isinstance(args, tuple):
151
+ # case model((x,y), src_mask=None, single_eval_pos=None, only_return_standard_out=True)
152
+ # case model((style,x,y), src_mask=None, single_eval_pos=None, only_return_standard_out=True)
153
+ assert all(kwarg in {'src_mask', 'single_eval_pos', 'only_return_standard_out'} for kwarg in kwargs.keys()), \
154
+ f"Unrecognized keyword argument in kwargs: {set(kwargs.keys()) - {'src_mask', 'single_eval_pos', 'only_return_standard_out'}}"
155
+ return self._forward(*args, **kwargs)
156
+
157
+ def _forward(self, src, src_mask=None, single_eval_pos=None, only_return_standard_out=True):
158
+ assert isinstance(src, tuple), 'inputs (src) have to be given as (x,y) or (style,x,y) tuple'
159
+
160
+ if len(src) == 2: # (x,y) and no style
161
+ src = (None,) + src
162
+
163
+ style_src, x_src, y_src = src
164
+
165
+ if single_eval_pos is None:
166
+ single_eval_pos = x_src.shape[0]
167
+
168
+
169
+ x_src = self.encoder(x_src)
170
+
171
+ if self.decoder_dict_once is not None:
172
+ x_src = torch.cat([x_src, self.decoder_dict_once_embeddings.repeat(1, x_src.shape[1], 1)], dim=0)
173
+
174
+ y_src = self.y_encoder(y_src.unsqueeze(-1) if len(y_src.shape) < len(x_src.shape) else y_src) if y_src is not None else None
175
+ if self.style_encoder:
176
+ assert style_src is not None, 'style_src must be given if style_encoder is used'
177
+ style_src = self.style_encoder(style_src).unsqueeze(0)
178
+ else:
179
+ style_src = torch.tensor([], device=x_src.device)
180
+ global_src = torch.tensor([], device=x_src.device) if self.global_att_embeddings is None else \
181
+ self.global_att_embeddings.weight.unsqueeze(1).repeat(1, x_src.shape[1], 1)
182
+
183
+ if src_mask is not None:
184
+ assert self.global_att_embeddings is None or isinstance(src_mask, tuple)
185
+
186
+ if src_mask is None:
187
+ if self.global_att_embeddings is None:
188
+ full_len = len(x_src) + len(style_src)
189
+ if self.full_attention:
190
+ src_mask = bool_mask_to_att_mask(torch.ones((full_len, full_len), dtype=torch.bool)).to(x_src.device)
191
+ elif self.efficient_eval_masking:
192
+ src_mask = single_eval_pos + len(style_src)
193
+ else:
194
+ src_mask = self.generate_D_q_matrix(full_len, len(x_src) - single_eval_pos).to(x_src.device)
195
+ else:
196
+ src_mask_args = (self.global_att_embeddings.num_embeddings,
197
+ len(x_src) + len(style_src),
198
+ len(x_src) + len(style_src) - single_eval_pos)
199
+ src_mask = (self.generate_global_att_globaltokens_matrix(*src_mask_args).to(x_src.device),
200
+ self.generate_global_att_trainset_matrix(*src_mask_args).to(x_src.device),
201
+ self.generate_global_att_query_matrix(*src_mask_args).to(x_src.device))
202
+
203
+ train_x = x_src[:single_eval_pos]
204
+ if y_src is not None:
205
+ train_x = train_x + y_src[:single_eval_pos]
206
+ src = torch.cat([global_src, style_src, train_x, x_src[single_eval_pos:]], 0)
207
+
208
+ if self.input_ln is not None:
209
+ src = self.input_ln(src)
210
+
211
+ if self.pos_encoder is not None:
212
+ src = self.pos_encoder(src)
213
+
214
+
215
+ output = self.transformer_encoder(src, src_mask)
216
+
217
+ num_prefix_positions = len(style_src)+(self.global_att_embeddings.num_embeddings if self.global_att_embeddings else 0)
218
+ if self.return_all_outputs:
219
+ out_range_start = num_prefix_positions
220
+ else:
221
+ out_range_start = single_eval_pos + num_prefix_positions
222
+
223
+ # In the line below, we use the indexing feature, that we have `x[i:None] == x[i:]`
224
+ out_range_end = -len(self.decoder_dict_once_embeddings) if self.decoder_dict_once is not None else None
225
+
226
+ # take care the output once are counted from the end
227
+ output_once = {k: v(output[-(i+1)]) for i, (k, v) in enumerate(self.decoder_dict_once.items())}\
228
+ if self.decoder_dict_once is not None else {}
229
+
230
+ output = {k: v(output[out_range_start:out_range_end]) for k,v in self.decoder_dict.items()}\
231
+ if self.decoder_dict is not None else {}
232
+
233
+ if only_return_standard_out:
234
+ return output['standard']
235
+
236
+ if output_once:
237
+ return output, output_once
238
+ return output
239
+
240
+ @torch.no_grad()
241
+ def init_from_small_model(self, small_model):
242
+ assert isinstance(self.decoder, nn.Linear) and isinstance(self.encoder, (nn.Linear, nn.Sequential)) \
243
+ and isinstance(self.y_encoder, (nn.Linear, nn.Sequential))
244
+
245
+ def set_encoder_weights(my_encoder, small_model_encoder):
246
+ my_encoder_linear, small_encoder_linear = (my_encoder, small_model_encoder) \
247
+ if isinstance(my_encoder, nn.Linear) else (my_encoder[-1], small_model_encoder[-1])
248
+ small_in_dim = small_encoder_linear.out_features
249
+ my_encoder_linear.weight.zero_()
250
+ my_encoder_linear.bias.zero_()
251
+ my_encoder_linear.weight[:small_in_dim] = small_encoder_linear.weight
252
+ my_encoder_linear.bias[:small_in_dim] = small_encoder_linear.bias
253
+
254
+ set_encoder_weights(self.encoder, small_model.encoder)
255
+ set_encoder_weights(self.y_encoder, small_model.y_encoder)
256
+
257
+ small_in_dim = small_model.decoder.in_features
258
+
259
+ self.decoder.weight[:, :small_in_dim] = small_model.decoder.weight
260
+ self.decoder.bias = small_model.decoder.bias
261
+
262
+ for my_layer, small_layer in zip(self.transformer_encoder.layers, small_model.transformer_encoder.layers):
263
+ small_hid_dim = small_layer.linear1.out_features
264
+ my_in_dim = my_layer.linear1.in_features
265
+
266
+ # packed along q,k,v order in first dim
267
+ my_in_proj_w = my_layer.self_attn.in_proj_weight
268
+ small_in_proj_w = small_layer.self_attn.in_proj_weight
269
+
270
+ my_in_proj_w.view(3, my_in_dim, my_in_dim)[:, :small_in_dim, :small_in_dim] = small_in_proj_w.view(3,
271
+ small_in_dim,
272
+ small_in_dim)
273
+ my_layer.self_attn.in_proj_bias.view(3, my_in_dim)[:,
274
+ :small_in_dim] = small_layer.self_attn.in_proj_bias.view(3, small_in_dim)
275
+
276
+ my_layer.self_attn.out_proj.weight[:small_in_dim, :small_in_dim] = small_layer.self_attn.out_proj.weight
277
+ my_layer.self_attn.out_proj.bias[:small_in_dim] = small_layer.self_attn.out_proj.bias
278
+
279
+ my_layer.linear1.weight[:small_hid_dim, :small_in_dim] = small_layer.linear1.weight
280
+ my_layer.linear1.bias[:small_hid_dim] = small_layer.linear1.bias
281
+
282
+ my_layer.linear2.weight[:small_in_dim, :small_hid_dim] = small_layer.linear2.weight
283
+ my_layer.linear2.bias[:small_in_dim] = small_layer.linear2.bias
284
+
285
+ my_layer.norm1.weight[:small_in_dim] = math.sqrt(small_in_dim / my_in_dim) * small_layer.norm1.weight
286
+ my_layer.norm2.weight[:small_in_dim] = math.sqrt(small_in_dim / my_in_dim) * small_layer.norm2.weight
287
+
288
+ my_layer.norm1.bias[:small_in_dim] = small_layer.norm1.bias
289
+ my_layer.norm2.bias[:small_in_dim] = small_layer.norm2.bias
290
+
291
+
292
+ class TransformerEncoderDiffInit(Module):
293
+ r"""TransformerEncoder is a stack of N encoder layers
294
+
295
+ Args:
296
+ encoder_layer_creator: a function generating objects of TransformerEncoderLayer class without args (required).
297
+ num_layers: the number of sub-encoder-layers in the encoder (required).
298
+ norm: the layer normalization component (optional).
299
+ """
300
+ __constants__ = ['norm']
301
+
302
+ def __init__(self, encoder_layer_creator, num_layers, norm=None):
303
+ super().__init__()
304
+ self.layers = nn.ModuleList([encoder_layer_creator() for _ in range(num_layers)])
305
+ self.num_layers = num_layers
306
+ self.norm = norm
307
+
308
+ def forward(self, src: Tensor, mask: Optional[Tensor] = None, src_key_padding_mask: Optional[Tensor] = None) -> Tensor:
309
+ r"""Pass the input through the encoder layers in turn.
310
+
311
+ Args:
312
+ src: the sequence to the encoder (required).
313
+ mask: the mask for the src sequence (optional).
314
+ src_key_padding_mask: the mask for the src keys per batch (optional).
315
+
316
+ Shape:
317
+ see the docs in Transformer class.
318
+ """
319
+ output = src
320
+
321
+ for mod in self.layers:
322
+ output = mod(output, src_mask=mask, src_key_padding_mask=src_key_padding_mask)
323
+
324
+ if self.norm is not None:
325
+ output = self.norm(output)
326
+
327
+ return output
pfns4bo/__init__.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ model_path = 'final_models'
4
+
5
+ def prepare_models():
6
+ pfns4bo_dir = os.path.dirname(__file__)
7
+ model_names = ['hebo_morebudget_9_unused_features_3_userpriorperdim2_8.pt',
8
+ 'model_sampled_warp_simple_mlp_for_hpob_46.pt',
9
+ 'model_hebo_morebudget_9_unused_features_3.pt',]
10
+
11
+ for name in model_names:
12
+ weights_path = os.path.join(pfns4bo_dir, model_path, name)
13
+ compressed_weights_path = os.path.join(pfns4bo_dir, model_path, name + '.gz')
14
+ if not os.path.exists(weights_path):
15
+ if not os.path.exists(compressed_weights_path):
16
+ print("Downloading", os.path.abspath(compressed_weights_path))
17
+ import requests
18
+ url = f'https://github.com/automl/PFNs4BO/raw/main/pfns4bo/final_models/{name + ".gz"}'
19
+ r = requests.get(url, allow_redirects=True)
20
+ os.makedirs(os.path.dirname(compressed_weights_path), exist_ok=True)
21
+ with open(compressed_weights_path, 'wb') as f:
22
+ f.write(r.content)
23
+ if os.path.exists(compressed_weights_path):
24
+ print("Unzipping", name)
25
+ os.system(f"gzip -dk {compressed_weights_path}")
26
+ else:
27
+ print("Failed to find", compressed_weights_path)
28
+ print("Make sure you have an internet connection to download the model automatically..")
29
+ if os.path.exists(weights_path):
30
+ print("Successfully located model at", weights_path)
31
+
32
+
33
+ model_dict = {
34
+ 'hebo_plus_userprior_model': os.path.join(os.path.dirname(__file__),model_path,
35
+ 'hebo_morebudget_9_unused_features_3_userpriorperdim2_8.pt'),
36
+ 'hebo_plus_model': os.path.join(os.path.dirname(__file__),model_path,
37
+ 'model_hebo_morebudget_9_unused_features_3.pt'),
38
+ 'bnn_model': os.path.join(os.path.dirname(__file__),model_path,'model_sampled_warp_simple_mlp_for_hpob_46.pt')
39
+ }
40
+
41
+
42
+ def __getattr__(name):
43
+ if name in model_dict:
44
+ if not os.path.exists(model_dict[name]):
45
+ print("Can't find", os.path.abspath(model_dict[name]), "thus unzipping/downloading models now.")
46
+ print("This might take a while..")
47
+ prepare_models()
48
+ return model_dict[name]
49
+ raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
50
+
pfns4bo/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (1.85 kB). View file
 
pfns4bo/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (3.99 kB). View file
 
pfns4bo/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (1.83 kB). View file
 
pfns4bo/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (1.85 kB). View file
 
pfns4bo/__pycache__/bar_distribution.cpython-310.pyc ADDED
Binary file (15.6 kB). View file
 
pfns4bo/__pycache__/bar_distribution.cpython-311.pyc ADDED
Binary file (33.3 kB). View file