Spaces:
Sleeping
Sleeping
Upload 529 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- Ackley10D_CEI_Avg_Obj.pt +3 -0
- Ackley10D_CEI_Avg_Time.pt +3 -0
- CantileverBeam.png +0 -0
- Car.png +0 -0
- Car_CEI_Avg_Obj.pt +3 -0
- Car_CEI_Avg_Time.pt +3 -0
- CompressionSpring.png +0 -0
- Formulation_default.png +0 -0
- Gradio_important.ipynb +588 -0
- Gradio_test.ipynb +569 -0
- HeatExchanger.png +0 -0
- HeatExchanger_CEI_Avg_Obj.pt +3 -0
- HeatExchanger_CEI_Avg_Time.pt +3 -0
- PressureVessel.png +0 -0
- PressureVessel_CEI_Avg_Obj.pt +3 -0
- PressureVessel_CEI_Avg_Time.pt +3 -0
- ReinforcedConcreteBeam_CEI_Avg_Obj.pt +3 -0
- ReinforcedConcreteBeam_CEI_Avg_Time.pt +3 -0
- Reinforcement.png +0 -0
- Rosen_PFN4BO.py +442 -0
- SpeedReducer.png +0 -0
- SpeedReducer_CEI_Avg_Obj.pt +3 -0
- SpeedReducer_CEI_Avg_Time.pt +3 -0
- Test_formulation.png +0 -0
- Test_formulation_default.png +0 -0
- ThreeTruss.png +0 -0
- ThreeTruss_CEI_Avg_Obj.pt +3 -0
- ThreeTruss_CEI_Avg_Time.pt +3 -0
- WeldedBeam.png +0 -0
- WeldedBeam_CEI_Avg_Obj.pt +3 -0
- WeldedBeam_CEI_Avg_Time.pt +3 -0
- __pycache__/Rosen_PFN4BO.cpython-310.pyc +0 -0
- final_models/Cyril_500features.pt +3 -0
- final_models/Cyril_500features_800epoch_cpu.pt +3 -0
- final_models/Cyril_50features.pt +3 -0
- final_models/hebo_morebudget_9_unused_features_3_userpriorperdim2_8.pt +3 -0
- final_models/heboplus_500features_retrain_epoch800_cpu.pt +3 -0
- final_models/model_hebo_morebudget_9_unused_features_3.pt +3 -0
- final_models/model_sampled_warp_simple_mlp_for_hpob_46.pt +3 -0
- pfns4bo/.ipynb_checkpoints/__init__-checkpoint.py +50 -0
- pfns4bo/.ipynb_checkpoints/bar_distribution-checkpoint.py +410 -0
- pfns4bo/.ipynb_checkpoints/lost_functions-checkpoint.py +177 -0
- pfns4bo/.ipynb_checkpoints/transformer-checkpoint.py +327 -0
- pfns4bo/__init__.py +50 -0
- pfns4bo/__pycache__/__init__.cpython-310.pyc +0 -0
- pfns4bo/__pycache__/__init__.cpython-311.pyc +0 -0
- pfns4bo/__pycache__/__init__.cpython-38.pyc +0 -0
- pfns4bo/__pycache__/__init__.cpython-39.pyc +0 -0
- pfns4bo/__pycache__/bar_distribution.cpython-310.pyc +0 -0
- pfns4bo/__pycache__/bar_distribution.cpython-311.pyc +0 -0
Ackley10D_CEI_Avg_Obj.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef078fab3310090b39fa175558b54f1a6819ea07cb0b9e19f38b39bd4c27c12b
|
3 |
+
size 2968
|
Ackley10D_CEI_Avg_Time.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e43f57c340f37f7e912b2143872910e14e797a16f1c16c1b3088cf3d550c64a
|
3 |
+
size 3484
|
CantileverBeam.png
ADDED
![]() |
Car.png
ADDED
![]() |
Car_CEI_Avg_Obj.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2703f7d2083dfc5a340b082b9b16406467443a82ced26ac7202f7440f68c9854
|
3 |
+
size 3008
|
Car_CEI_Avg_Time.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:acaa29100dfafb78d40a7391feae3d042716159955ec46772eb2f0c017830d1a
|
3 |
+
size 3396
|
CompressionSpring.png
ADDED
![]() |
Formulation_default.png
ADDED
![]() |
Gradio_important.ipynb
ADDED
@@ -0,0 +1,588 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"id": "0823043e-8451-4dc8-968c-ca066003f4a7",
|
7 |
+
"metadata": {},
|
8 |
+
"outputs": [
|
9 |
+
{
|
10 |
+
"name": "stdout",
|
11 |
+
"output_type": "stream",
|
12 |
+
"text": [
|
13 |
+
"Running on local URL: http://127.0.0.1:7958\n",
|
14 |
+
"\n",
|
15 |
+
"To create a public link, set `share=True` in `launch()`.\n"
|
16 |
+
]
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"data": {
|
20 |
+
"text/html": [
|
21 |
+
"<div><iframe src=\"http://127.0.0.1:7958/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
22 |
+
],
|
23 |
+
"text/plain": [
|
24 |
+
"<IPython.core.display.HTML object>"
|
25 |
+
]
|
26 |
+
},
|
27 |
+
"metadata": {},
|
28 |
+
"output_type": "display_data"
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"data": {
|
32 |
+
"text/plain": []
|
33 |
+
},
|
34 |
+
"execution_count": 1,
|
35 |
+
"metadata": {},
|
36 |
+
"output_type": "execute_result"
|
37 |
+
}
|
38 |
+
],
|
39 |
+
"source": [
|
40 |
+
"import gradio as gr\n",
|
41 |
+
"import torch\n",
|
42 |
+
"import numpy as np\n",
|
43 |
+
"import matplotlib.pyplot as plt\n",
|
44 |
+
"from test_functions.Ackley10D import *\n",
|
45 |
+
"from test_functions.Ackley2D import *\n",
|
46 |
+
"from test_functions.Ackley6D import *\n",
|
47 |
+
"from test_functions.HeatExchanger import *\n",
|
48 |
+
"from test_functions.CantileverBeam import *\n",
|
49 |
+
"from test_functions.Car import *\n",
|
50 |
+
"from test_functions.CompressionSpring import *\n",
|
51 |
+
"from test_functions.GKXWC1 import *\n",
|
52 |
+
"from test_functions.GKXWC2 import *\n",
|
53 |
+
"from test_functions.HeatExchanger import *\n",
|
54 |
+
"from test_functions.JLH1 import *\n",
|
55 |
+
"from test_functions.JLH2 import *\n",
|
56 |
+
"from test_functions.KeaneBump import *\n",
|
57 |
+
"from test_functions.GKXWC1 import *\n",
|
58 |
+
"from test_functions.GKXWC2 import *\n",
|
59 |
+
"from test_functions.PressureVessel import *\n",
|
60 |
+
"from test_functions.ReinforcedConcreteBeam import *\n",
|
61 |
+
"from test_functions.SpeedReducer import *\n",
|
62 |
+
"from test_functions.ThreeTruss import *\n",
|
63 |
+
"from test_functions.WeldedBeam import *\n",
|
64 |
+
"# Import other objective functions as needed\n",
|
65 |
+
"import time\n",
|
66 |
+
"\n",
|
67 |
+
"from Rosen_PFN4BO import *\n",
|
68 |
+
"from PIL import Image\n",
|
69 |
+
"\n",
|
70 |
+
"\n",
|
71 |
+
"\n",
|
72 |
+
"\n",
|
73 |
+
"\n",
|
74 |
+
"\n",
|
75 |
+
"\n",
|
76 |
+
"\n",
|
77 |
+
"\n",
|
78 |
+
"\n",
|
79 |
+
"\n",
|
80 |
+
"\n",
|
81 |
+
"\n",
|
82 |
+
"\n",
|
83 |
+
"\n",
|
84 |
+
"def s(input_string):\n",
|
85 |
+
" return input_string\n",
|
86 |
+
"\n",
|
87 |
+
"\n",
|
88 |
+
"\n",
|
89 |
+
"\n",
|
90 |
+
"def optimize(objective_function, iteration_input, progress=gr.Progress()):\n",
|
91 |
+
"\n",
|
92 |
+
" print(objective_function)\n",
|
93 |
+
"\n",
|
94 |
+
" # Variable setup\n",
|
95 |
+
" Current_BEST = torch.tensor( -1e10 ) # Some arbitrary very small number\n",
|
96 |
+
" Prev_BEST = torch.tensor( -1e10 )\n",
|
97 |
+
"\n",
|
98 |
+
" if objective_function==\"CantileverBeam.png\":\n",
|
99 |
+
" Current_BEST = torch.tensor( -82500 ) # Some arbitrary very small number\n",
|
100 |
+
" Prev_BEST = torch.tensor( -82500 )\n",
|
101 |
+
" elif objective_function==\"CompressionSpring.png\":\n",
|
102 |
+
" Current_BEST = torch.tensor( -8 ) # Some arbitrary very small number\n",
|
103 |
+
" Prev_BEST = torch.tensor( -8 )\n",
|
104 |
+
" elif objective_function==\"HeatExchanger.png\":\n",
|
105 |
+
" Current_BEST = torch.tensor( -30000 ) # Some arbitrary very small number\n",
|
106 |
+
" Prev_BEST = torch.tensor( -30000 )\n",
|
107 |
+
" elif objective_function==\"ThreeTruss.png\":\n",
|
108 |
+
" Current_BEST = torch.tensor( -300 ) # Some arbitrary very small number\n",
|
109 |
+
" Prev_BEST = torch.tensor( -300 )\n",
|
110 |
+
" elif objective_function==\"Reinforcement.png\":\n",
|
111 |
+
" Current_BEST = torch.tensor( -440 ) # Some arbitrary very small number\n",
|
112 |
+
" Prev_BEST = torch.tensor( -440 )\n",
|
113 |
+
" elif objective_function==\"PressureVessel.png\":\n",
|
114 |
+
" Current_BEST = torch.tensor( -40000 ) # Some arbitrary very small number\n",
|
115 |
+
" Prev_BEST = torch.tensor( -40000 ) \n",
|
116 |
+
" elif objective_function==\"SpeedReducer.png\":\n",
|
117 |
+
" Current_BEST = torch.tensor( -3200 ) # Some arbitrary very small number\n",
|
118 |
+
" Prev_BEST = torch.tensor( -3200 ) \n",
|
119 |
+
" elif objective_function==\"WeldedBeam.png\":\n",
|
120 |
+
" Current_BEST = torch.tensor( -35 ) # Some arbitrary very small number\n",
|
121 |
+
" Prev_BEST = torch.tensor( -35 )\n",
|
122 |
+
" elif objective_function==\"Car.png\":\n",
|
123 |
+
" Current_BEST = torch.tensor( -35 ) # Some arbitrary very small number\n",
|
124 |
+
" Prev_BEST = torch.tensor( -35 )\n",
|
125 |
+
"\n",
|
126 |
+
" # Initial random samples\n",
|
127 |
+
" # print(objective_functions)\n",
|
128 |
+
" trained_X = torch.rand(20, objective_functions[objective_function]['dim'])\n",
|
129 |
+
"\n",
|
130 |
+
" # Scale it to the domain of interest using the selected function\n",
|
131 |
+
" # print(objective_function)\n",
|
132 |
+
" X_Scaled = objective_functions[objective_function]['scaling'](trained_X)\n",
|
133 |
+
"\n",
|
134 |
+
" # Get the constraints and objective\n",
|
135 |
+
" trained_gx, trained_Y = objective_functions[objective_function]['function'](X_Scaled)\n",
|
136 |
+
"\n",
|
137 |
+
" # Convergence list to store best values\n",
|
138 |
+
" convergence = []\n",
|
139 |
+
" time_conv = []\n",
|
140 |
+
"\n",
|
141 |
+
" START_TIME = time.time()\n",
|
142 |
+
"\n",
|
143 |
+
"\n",
|
144 |
+
"# with gr.Progress(track_tqdm=True) as progress:\n",
|
145 |
+
"\n",
|
146 |
+
"\n",
|
147 |
+
" # Optimization Loop\n",
|
148 |
+
" for ii in progress.tqdm(range(iteration_input)): # Example with 100 iterations\n",
|
149 |
+
"\n",
|
150 |
+
" # (0) Get the updated data for this iteration\n",
|
151 |
+
" X_scaled = objective_functions[objective_function]['scaling'](trained_X)\n",
|
152 |
+
" trained_gx, trained_Y = objective_functions[objective_function]['function'](X_scaled)\n",
|
153 |
+
"\n",
|
154 |
+
" # (1) Randomly sample Xpen \n",
|
155 |
+
" X_pen = torch.rand(1000,trained_X.shape[1])\n",
|
156 |
+
"\n",
|
157 |
+
" # (2) PFN inference phase with EI\n",
|
158 |
+
" default_model = 'final_models/model_hebo_morebudget_9_unused_features_3.pt'\n",
|
159 |
+
" \n",
|
160 |
+
" ei, p_feas = Rosen_PFN_Parallel(default_model,\n",
|
161 |
+
" trained_X, \n",
|
162 |
+
" trained_Y, \n",
|
163 |
+
" trained_gx,\n",
|
164 |
+
" X_pen,\n",
|
165 |
+
" 'power',\n",
|
166 |
+
" 'ei'\n",
|
167 |
+
" )\n",
|
168 |
+
"\n",
|
169 |
+
" # Calculating CEI\n",
|
170 |
+
" CEI = ei\n",
|
171 |
+
" for jj in range(p_feas.shape[1]):\n",
|
172 |
+
" CEI = CEI*p_feas[:,jj]\n",
|
173 |
+
"\n",
|
174 |
+
" # (4) Get the next search value\n",
|
175 |
+
" rec_idx = torch.argmax(CEI)\n",
|
176 |
+
" best_candidate = X_pen[rec_idx,:].unsqueeze(0)\n",
|
177 |
+
"\n",
|
178 |
+
" # (5) Append the next search point\n",
|
179 |
+
" trained_X = torch.cat([trained_X, best_candidate])\n",
|
180 |
+
"\n",
|
181 |
+
"\n",
|
182 |
+
" ################################################################################\n",
|
183 |
+
" # This is just for visualizing the best value. \n",
|
184 |
+
" # This section can be remove for pure optimization purpose\n",
|
185 |
+
" Current_X = objective_functions[objective_function]['scaling'](trained_X)\n",
|
186 |
+
" Current_GX, Current_Y = objective_functions[objective_function]['function'](Current_X)\n",
|
187 |
+
" if ((Current_GX<=0).all(dim=1)).any():\n",
|
188 |
+
" Current_BEST = torch.max(Current_Y[(Current_GX<=0).all(dim=1)])\n",
|
189 |
+
" else:\n",
|
190 |
+
" Current_BEST = Prev_BEST\n",
|
191 |
+
" ################################################################################\n",
|
192 |
+
" \n",
|
193 |
+
" # (ii) Convergence tracking (assuming the best Y is to be maximized)\n",
|
194 |
+
" # if Current_BEST != -1e10:\n",
|
195 |
+
" print(Current_BEST)\n",
|
196 |
+
" print(convergence)\n",
|
197 |
+
" convergence.append(Current_BEST.abs())\n",
|
198 |
+
" time_conv.append(time.time() - START_TIME)\n",
|
199 |
+
"\n",
|
200 |
+
" # Timing\n",
|
201 |
+
" END_TIME = time.time()\n",
|
202 |
+
" TOTAL_TIME = END_TIME - START_TIME\n",
|
203 |
+
" \n",
|
204 |
+
" # Website visualization\n",
|
205 |
+
" # (i) Radar chart for trained_X\n",
|
206 |
+
" radar_chart = None\n",
|
207 |
+
" # radar_chart = create_radar_chart(X_scaled)\n",
|
208 |
+
" # (ii) Convergence tracking (assuming the best Y is to be maximized)\n",
|
209 |
+
" convergence_plot = create_convergence_plot(objective_function, iteration_input, \n",
|
210 |
+
" time_conv, \n",
|
211 |
+
" convergence, TOTAL_TIME)\n",
|
212 |
+
"\n",
|
213 |
+
"\n",
|
214 |
+
" return convergence_plot\n",
|
215 |
+
" # return radar_chart, convergence_plot\n",
|
216 |
+
"\n",
|
217 |
+
"\n",
|
218 |
+
"\n",
|
219 |
+
"\n",
|
220 |
+
"\n",
|
221 |
+
"\n",
|
222 |
+
"\n",
|
223 |
+
"\n",
|
224 |
+
"\n",
|
225 |
+
"def create_radar_chart(X_scaled):\n",
|
226 |
+
" fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))\n",
|
227 |
+
" labels = [f'x{i+1}' for i in range(X_scaled.shape[1])]\n",
|
228 |
+
" values = X_scaled.mean(dim=0).numpy()\n",
|
229 |
+
" \n",
|
230 |
+
" num_vars = len(labels)\n",
|
231 |
+
" angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()\n",
|
232 |
+
" values = np.concatenate((values, [values[0]]))\n",
|
233 |
+
" angles += angles[:1]\n",
|
234 |
+
"\n",
|
235 |
+
" ax.fill(angles, values, color='green', alpha=0.25)\n",
|
236 |
+
" ax.plot(angles, values, color='green', linewidth=2)\n",
|
237 |
+
" ax.set_yticklabels([])\n",
|
238 |
+
" ax.set_xticks(angles[:-1])\n",
|
239 |
+
" # ax.set_xticklabels(labels)\n",
|
240 |
+
" ax.set_xticklabels([f'{label}\\n({value:.2f})' for label, value in zip(labels, values[:-1])]) # Show values\n",
|
241 |
+
" ax.set_title(\"Selected Design\", size=15, color='black', y=1.1)\n",
|
242 |
+
" \n",
|
243 |
+
" plt.close(fig)\n",
|
244 |
+
" return fig\n",
|
245 |
+
"\n",
|
246 |
+
"\n",
|
247 |
+
"\n",
|
248 |
+
"\n",
|
249 |
+
"\n",
|
250 |
+
"\n",
|
251 |
+
"\n",
|
252 |
+
"def create_convergence_plot(objective_function, iteration_input, time_conv, convergence, TOTAL_TIME):\n",
|
253 |
+
" fig, ax = plt.subplots()\n",
|
254 |
+
" \n",
|
255 |
+
" # Realtime optimization data\n",
|
256 |
+
" ax.plot(time_conv, convergence, '^-', label='PFN-CBO (Realtime)' )\n",
|
257 |
+
"\n",
|
258 |
+
" # Stored GP data\n",
|
259 |
+
" if objective_function==\"CantileverBeam.png\":\n",
|
260 |
+
" GP_TIME = torch.load('CantileverBeam_CEI_Avg_Time.pt')\n",
|
261 |
+
" GP_OBJ = torch.load('CantileverBeam_CEI_Avg_Obj.pt')\n",
|
262 |
+
" \n",
|
263 |
+
" elif objective_function==\"CompressionSpring.png\":\n",
|
264 |
+
" GP_TIME = torch.load('CompressionSpring_CEI_Avg_Time.pt')\n",
|
265 |
+
" GP_OBJ = torch.load('CompressionSpring_CEI_Avg_Obj.pt')\n",
|
266 |
+
"\n",
|
267 |
+
" elif objective_function==\"HeatExchanger.png\":\n",
|
268 |
+
" GP_TIME = torch.load('HeatExchanger_CEI_Avg_Time.pt')\n",
|
269 |
+
" GP_OBJ = torch.load('HeatExchanger_CEI_Avg_Obj.pt')\n",
|
270 |
+
" \n",
|
271 |
+
" elif objective_function==\"ThreeTruss.png\":\n",
|
272 |
+
" GP_TIME = torch.load('ThreeTruss_CEI_Avg_Time.pt')\n",
|
273 |
+
" GP_OBJ = torch.load('ThreeTruss_CEI_Avg_Obj.pt')\n",
|
274 |
+
" \n",
|
275 |
+
" elif objective_function==\"Reinforcement.png\":\n",
|
276 |
+
" GP_TIME = torch.load('ReinforcedConcreteBeam_CEI_Avg_Time.pt')\n",
|
277 |
+
" GP_OBJ = torch.load('ReinforcedConcreteBeam_CEI_Avg_Obj.pt')\n",
|
278 |
+
" \n",
|
279 |
+
" elif objective_function==\"PressureVessel.png\":\n",
|
280 |
+
" GP_TIME = torch.load('PressureVessel_CEI_Avg_Time.pt')\n",
|
281 |
+
" GP_OBJ = torch.load('PressureVessel_CEI_Avg_Obj.pt')\n",
|
282 |
+
" \n",
|
283 |
+
" elif objective_function==\"SpeedReducer.png\":\n",
|
284 |
+
" GP_TIME = torch.load('SpeedReducer_CEI_Avg_Time.pt')\n",
|
285 |
+
" GP_OBJ = torch.load('SpeedReducer_CEI_Avg_Obj.pt')\n",
|
286 |
+
" \n",
|
287 |
+
" elif objective_function==\"WeldedBeam.png\":\n",
|
288 |
+
" GP_TIME = torch.load('WeldedBeam_CEI_Avg_Time.pt')\n",
|
289 |
+
" GP_OBJ = torch.load('WeldedBeam_CEI_Avg_Obj.pt') \n",
|
290 |
+
"\n",
|
291 |
+
" elif objective_function==\"Car.png\":\n",
|
292 |
+
" GP_TIME = torch.load('Car_CEI_Avg_Time.pt')\n",
|
293 |
+
" GP_OBJ = torch.load('Car_CEI_Avg_Obj.pt') \n",
|
294 |
+
" \n",
|
295 |
+
" # Plot GP data \n",
|
296 |
+
" ax.plot(GP_TIME[:iteration_input], GP_OBJ[:iteration_input], '^-', label='GP-CBO (Data)' )\n",
|
297 |
+
"\n",
|
298 |
+
" \n",
|
299 |
+
" ax.set_xlabel('Time (seconds)')\n",
|
300 |
+
" ax.set_ylabel('Objective Value')\n",
|
301 |
+
" ax.set_title('Convergence Plot for {t} iterations'.format(t=iteration_input))\n",
|
302 |
+
" # ax.legend()\n",
|
303 |
+
"\n",
|
304 |
+
" if objective_function==\"CantileverBeam.png\":\n",
|
305 |
+
" ax.axhline(y=50000, color='red', linestyle='--', label='Optimal Value')\n",
|
306 |
+
"\n",
|
307 |
+
" elif objective_function==\"CompressionSpring.png\":\n",
|
308 |
+
" ax.axhline(y=0, color='red', linestyle='--', label='Optimal Value')\n",
|
309 |
+
"\n",
|
310 |
+
" elif objective_function==\"HeatExchanger.png\":\n",
|
311 |
+
" ax.axhline(y=4700, color='red', linestyle='--', label='Optimal Value')\n",
|
312 |
+
" \n",
|
313 |
+
" elif objective_function==\"ThreeTruss.png\":\n",
|
314 |
+
" ax.axhline(y=262, color='red', linestyle='--', label='Optimal Value')\n",
|
315 |
+
" \n",
|
316 |
+
" elif objective_function==\"Reinforcement.png\":\n",
|
317 |
+
" ax.axhline(y=355, color='red', linestyle='--', label='Optimal Value')\n",
|
318 |
+
" \n",
|
319 |
+
" elif objective_function==\"PressureVessel.png\":\n",
|
320 |
+
" ax.axhline(y=5000, color='red', linestyle='--', label='Optimal Value')\n",
|
321 |
+
" \n",
|
322 |
+
" elif objective_function==\"SpeedReducer.png\":\n",
|
323 |
+
" ax.axhline(y=2650, color='red', linestyle='--', label='Optimal Value')\n",
|
324 |
+
" \n",
|
325 |
+
" elif objective_function==\"WeldedBeam.png\":\n",
|
326 |
+
" ax.axhline(y=6, color='red', linestyle='--', label='Optimal Value') \n",
|
327 |
+
"\n",
|
328 |
+
" elif objective_function==\"Car.png\":\n",
|
329 |
+
" ax.axhline(y=25, color='red', linestyle='--', label='Optimal Value') \n",
|
330 |
+
"\n",
|
331 |
+
" \n",
|
332 |
+
" ax.legend(loc='best')\n",
|
333 |
+
" # ax.legend(loc='lower left')\n",
|
334 |
+
" \n",
|
335 |
+
"\n",
|
336 |
+
" # Add text to the top right corner of the plot\n",
|
337 |
+
" if len(convergence) == 0:\n",
|
338 |
+
" ax.text(0.5, 0.5, 'No Feasible Design Found', transform=ax.transAxes, fontsize=12,\n",
|
339 |
+
" verticalalignment='top', horizontalalignment='right')\n",
|
340 |
+
" \n",
|
341 |
+
" \n",
|
342 |
+
" plt.close(fig)\n",
|
343 |
+
" return fig\n",
|
344 |
+
"\n",
|
345 |
+
"\n",
|
346 |
+
"\n",
|
347 |
+
"\n",
|
348 |
+
"\n",
|
349 |
+
"\n",
|
350 |
+
"# Define available objective functions\n",
|
351 |
+
"objective_functions = {\n",
|
352 |
+
" # \"ThreeTruss.png\": {\"image\": \"ThreeTruss.png\", \n",
|
353 |
+
" # \"function\": ThreeTruss, \n",
|
354 |
+
" # \"scaling\": ThreeTruss_Scaling, \n",
|
355 |
+
" # \"dim\": 2},\n",
|
356 |
+
" \"CompressionSpring.png\": {\"image\": \"CompressionSpring.png\", \n",
|
357 |
+
" \"function\": CompressionSpring, \n",
|
358 |
+
" \"scaling\": CompressionSpring_Scaling, \n",
|
359 |
+
" \"dim\": 3},\n",
|
360 |
+
" \"Reinforcement.png\": {\"image\": \"Reinforcement.png\", \"function\": ReinforcedConcreteBeam, \"scaling\": ReinforcedConcreteBeam_Scaling, \"dim\": 3},\n",
|
361 |
+
" \"PressureVessel.png\": {\"image\": \"PressureVessel.png\", \"function\": PressureVessel, \"scaling\": PressureVessel_Scaling, \"dim\": 4},\n",
|
362 |
+
" \"SpeedReducer.png\": {\"image\": \"SpeedReducer.png\", \"function\": SpeedReducer, \"scaling\": SpeedReducer_Scaling, \"dim\": 7},\n",
|
363 |
+
" \"WeldedBeam.png\": {\"image\": \"WeldedBeam.png\", \"function\": WeldedBeam, \"scaling\": WeldedBeam_Scaling, \"dim\": 4},\n",
|
364 |
+
" \"HeatExchanger.png\": {\"image\": \"HeatExchanger.png\", \"function\": HeatExchanger, \"scaling\": HeatExchanger_Scaling, \"dim\": 8},\n",
|
365 |
+
" \"CantileverBeam.png\": {\"image\": \"CantileverBeam.png\", \"function\": CantileverBeam, \"scaling\": CantileverBeam_Scaling, \"dim\": 10},\n",
|
366 |
+
" \"Car.png\": {\"image\": \"Car.png\", \"function\": Car, \"scaling\": Car_Scaling, \"dim\": 11},\n",
|
367 |
+
"}\n",
|
368 |
+
"\n",
|
369 |
+
"\n",
|
370 |
+
"\n",
|
371 |
+
"\n",
|
372 |
+
"\n",
|
373 |
+
"\n",
|
374 |
+
"\n",
|
375 |
+
"\n",
|
376 |
+
"\n",
|
377 |
+
"\n",
|
378 |
+
"\n",
|
379 |
+
"\n",
|
380 |
+
"\n",
|
381 |
+
"\n",
|
382 |
+
"\n",
|
383 |
+
"\n",
|
384 |
+
"\n",
|
385 |
+
"\n",
|
386 |
+
"\n",
|
387 |
+
"\n",
|
388 |
+
"\n",
|
389 |
+
"\n",
|
390 |
+
"\n",
|
391 |
+
"\n",
|
392 |
+
"# Extract just the image paths for the gallery\n",
|
393 |
+
"image_paths = [key for key in objective_functions]\n",
|
394 |
+
"\n",
|
395 |
+
"\n",
|
396 |
+
"def submit_action(objective_function_choices, iteration_input):\n",
|
397 |
+
" # print(iteration_input)\n",
|
398 |
+
" # print(len(objective_function_choices))\n",
|
399 |
+
" # print(objective_functions[objective_function_choices]['function'])\n",
|
400 |
+
" if len(objective_function_choices)>0:\n",
|
401 |
+
" selected_function = objective_functions[objective_function_choices]['function']\n",
|
402 |
+
" return optimize(objective_function_choices, iteration_input)\n",
|
403 |
+
" return None\n",
|
404 |
+
"\n",
|
405 |
+
"# Function to clear the output\n",
|
406 |
+
"def clear_output():\n",
|
407 |
+
" # print(gallery.selected_index)\n",
|
408 |
+
" \n",
|
409 |
+
" return gr.update(value=[], selected=None), None, 15, gr.Markdown(\"\"), 'Test_formulation_default.png'\n",
|
410 |
+
"\n",
|
411 |
+
"def reset_gallery():\n",
|
412 |
+
" return gr.update(value=image_paths)\n",
|
413 |
+
"\n",
|
414 |
+
"\n",
|
415 |
+
"with gr.Blocks() as demo:\n",
|
416 |
+
" # Centered Title and Description using gr.HTML\n",
|
417 |
+
" gr.HTML(\n",
|
418 |
+
" \"\"\"\n",
|
419 |
+
" <div style=\"text-align: center;\">\n",
|
420 |
+
" <h1>Pre-trained Transformer for Constrained Bayesian Optimization</h1>\n",
|
421 |
+
" <h4>Paper: <a href=\"https://arxiv.org/abs/2404.04495\">\n",
|
422 |
+
" Fast and Accurate Bayesian Optimization with Pre-trained Transformers for Constrained Engineering Problems</a> \n",
|
423 |
+
" </h4>\n",
|
424 |
+
"\n",
|
425 |
+
" <p style=\"text-align: left;\">This is a demo for Bayesian Optimization using PFN (Prior-Data Fitted Networks). \n",
|
426 |
+
" Select your objective function by clicking on one of the check boxes below, then enter the iteration number to run the optimization process. \n",
|
427 |
+
" The results will be visualized in the radar chart and convergence plot.</p>\n",
|
428 |
+
" \n",
|
429 |
+
" \n",
|
430 |
+
" \n",
|
431 |
+
"\n",
|
432 |
+
" </div>\n",
|
433 |
+
" \"\"\"\n",
|
434 |
+
" )\n",
|
435 |
+
"\n",
|
436 |
+
" \n",
|
437 |
+
" with gr.Row():\n",
|
438 |
+
" \n",
|
439 |
+
" \n",
|
440 |
+
" with gr.Column(variant='compact'):\n",
|
441 |
+
" # gr.Markdown(\"# Inputs: \")\n",
|
442 |
+
" \n",
|
443 |
+
" with gr.Row():\n",
|
444 |
+
" gr.Markdown(\"## Select a problem (objective): \")\n",
|
445 |
+
" img_key = gr.Markdown(value=\"\", visible=False)\n",
|
446 |
+
" \n",
|
447 |
+
" gallery = gr.Gallery(value=image_paths, label=\"Objective Functions\", \n",
|
448 |
+
" # height = 450, \n",
|
449 |
+
" object_fit='contain',\n",
|
450 |
+
" columns=3, rows=3, elem_id=\"gallery\")\n",
|
451 |
+
" \n",
|
452 |
+
" gr.Markdown(\"## Enter iteration Number: \")\n",
|
453 |
+
" iteration_input = gr.Slider(label=\"Iterations:\", minimum=15, maximum=50, step=1, value=15)\n",
|
454 |
+
" \n",
|
455 |
+
"\n",
|
456 |
+
" # Row for the Clear and Submit buttons\n",
|
457 |
+
" with gr.Row():\n",
|
458 |
+
" clear_button = gr.Button(\"Clear\")\n",
|
459 |
+
" submit_button = gr.Button(\"Submit\", variant=\"primary\")\n",
|
460 |
+
"\n",
|
461 |
+
" with gr.Column():\n",
|
462 |
+
" # gr.Markdown(\"# Outputs: \")\n",
|
463 |
+
" gr.Markdown(\"## Problem Formulation: \")\n",
|
464 |
+
" formulation = gr.Image(value='Formulation_default.png', height=150)\n",
|
465 |
+
" gr.Markdown(\"## Results: \")\n",
|
466 |
+
" gr.Markdown(\"The graph will plot the best observed data v.s. the time for the algorithm to run up until the iteration. The PFN-CBO shows the result of the realtime optimization running in the backend while the GP-CBO shows the stored data from our previous experiments since running GP-CBO will take longer time.\")\n",
|
467 |
+
" convergence_plot = gr.Plot(label=\"Convergence Plot\")\n",
|
468 |
+
"\n",
|
469 |
+
"\n",
|
470 |
+
"\n",
|
471 |
+
" def handle_select(evt: gr.SelectData):\n",
|
472 |
+
" selected_image = evt.value\n",
|
473 |
+
" key = evt.value['image']['orig_name']\n",
|
474 |
+
" formulation = 'Test_formulation.png'\n",
|
475 |
+
" print('here')\n",
|
476 |
+
" print(key)\n",
|
477 |
+
"\n",
|
478 |
+
" return key, formulation\n",
|
479 |
+
" \n",
|
480 |
+
" gallery.select(fn=handle_select, inputs=None, outputs=[img_key, formulation])\n",
|
481 |
+
"\n",
|
482 |
+
"\n",
|
483 |
+
" \n",
|
484 |
+
" submit_button.click(\n",
|
485 |
+
" submit_action,\n",
|
486 |
+
" inputs=[img_key, iteration_input],\n",
|
487 |
+
" # outputs= [radar_plot, convergence_plot],\n",
|
488 |
+
" outputs= convergence_plot,\n",
|
489 |
+
" \n",
|
490 |
+
" # progress=True # Enable progress tracking\n",
|
491 |
+
" \n",
|
492 |
+
" )\n",
|
493 |
+
"\n",
|
494 |
+
" clear_button.click(\n",
|
495 |
+
" clear_output,\n",
|
496 |
+
" inputs=None,\n",
|
497 |
+
" outputs=[gallery, convergence_plot, iteration_input, img_key, formulation]\n",
|
498 |
+
" ).then(\n",
|
499 |
+
" # Step 2: Reset the gallery to the original list\n",
|
500 |
+
" reset_gallery,\n",
|
501 |
+
" inputs=None,\n",
|
502 |
+
" outputs=gallery\n",
|
503 |
+
" )\n",
|
504 |
+
"\n",
|
505 |
+
" \n",
|
506 |
+
"\n",
|
507 |
+
"demo.launch()\n"
|
508 |
+
]
|
509 |
+
},
|
510 |
+
{
|
511 |
+
"cell_type": "code",
|
512 |
+
"execution_count": null,
|
513 |
+
"id": "776c7ab2-96a1-4e22-9b4b-daf69960e3c4",
|
514 |
+
"metadata": {},
|
515 |
+
"outputs": [],
|
516 |
+
"source": []
|
517 |
+
},
|
518 |
+
{
|
519 |
+
"cell_type": "code",
|
520 |
+
"execution_count": null,
|
521 |
+
"id": "9d2c7c58-43b1-4e5b-9135-17683dac1788",
|
522 |
+
"metadata": {},
|
523 |
+
"outputs": [],
|
524 |
+
"source": []
|
525 |
+
},
|
526 |
+
{
|
527 |
+
"cell_type": "code",
|
528 |
+
"execution_count": null,
|
529 |
+
"id": "5d33a24c-818c-4023-bbbd-495f992a9d1a",
|
530 |
+
"metadata": {},
|
531 |
+
"outputs": [],
|
532 |
+
"source": []
|
533 |
+
},
|
534 |
+
{
|
535 |
+
"cell_type": "code",
|
536 |
+
"execution_count": null,
|
537 |
+
"id": "764d0258-ec88-41d5-b5b5-e0bcb39ff313",
|
538 |
+
"metadata": {},
|
539 |
+
"outputs": [],
|
540 |
+
"source": []
|
541 |
+
},
|
542 |
+
{
|
543 |
+
"cell_type": "code",
|
544 |
+
"execution_count": null,
|
545 |
+
"id": "d2c35245-543c-4b82-8d12-04f3dda1468b",
|
546 |
+
"metadata": {},
|
547 |
+
"outputs": [],
|
548 |
+
"source": []
|
549 |
+
},
|
550 |
+
{
|
551 |
+
"cell_type": "code",
|
552 |
+
"execution_count": null,
|
553 |
+
"id": "e3663adc-3e95-418b-bf50-0a372615cdd6",
|
554 |
+
"metadata": {},
|
555 |
+
"outputs": [],
|
556 |
+
"source": []
|
557 |
+
},
|
558 |
+
{
|
559 |
+
"cell_type": "code",
|
560 |
+
"execution_count": null,
|
561 |
+
"id": "30886262-bd87-4760-a585-7872e071663f",
|
562 |
+
"metadata": {},
|
563 |
+
"outputs": [],
|
564 |
+
"source": []
|
565 |
+
}
|
566 |
+
],
|
567 |
+
"metadata": {
|
568 |
+
"kernelspec": {
|
569 |
+
"display_name": "Python 3 (ipykernel)",
|
570 |
+
"language": "python",
|
571 |
+
"name": "python3"
|
572 |
+
},
|
573 |
+
"language_info": {
|
574 |
+
"codemirror_mode": {
|
575 |
+
"name": "ipython",
|
576 |
+
"version": 3
|
577 |
+
},
|
578 |
+
"file_extension": ".py",
|
579 |
+
"mimetype": "text/x-python",
|
580 |
+
"name": "python",
|
581 |
+
"nbconvert_exporter": "python",
|
582 |
+
"pygments_lexer": "ipython3",
|
583 |
+
"version": "3.10.14"
|
584 |
+
}
|
585 |
+
},
|
586 |
+
"nbformat": 4,
|
587 |
+
"nbformat_minor": 5
|
588 |
+
}
|
Gradio_test.ipynb
ADDED
@@ -0,0 +1,569 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"id": "4453c5ad-ec87-42e0-a6d5-e3fd3593aec2",
|
7 |
+
"metadata": {},
|
8 |
+
"outputs": [
|
9 |
+
{
|
10 |
+
"name": "stdout",
|
11 |
+
"output_type": "stream",
|
12 |
+
"text": [
|
13 |
+
"Running on local URL: http://127.0.0.1:7891\n",
|
14 |
+
"Running on public URL: https://f714b6f956fb581264.gradio.live\n",
|
15 |
+
"\n",
|
16 |
+
"This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
|
17 |
+
]
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"data": {
|
21 |
+
"text/html": [
|
22 |
+
"<div><iframe src=\"https://f714b6f956fb581264.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
23 |
+
],
|
24 |
+
"text/plain": [
|
25 |
+
"<IPython.core.display.HTML object>"
|
26 |
+
]
|
27 |
+
},
|
28 |
+
"metadata": {},
|
29 |
+
"output_type": "display_data"
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"data": {
|
33 |
+
"text/plain": []
|
34 |
+
},
|
35 |
+
"execution_count": 1,
|
36 |
+
"metadata": {},
|
37 |
+
"output_type": "execute_result"
|
38 |
+
}
|
39 |
+
],
|
40 |
+
"source": [
|
41 |
+
"import gradio as gr\n",
|
42 |
+
"import torch\n",
|
43 |
+
"import numpy as np\n",
|
44 |
+
"import matplotlib.pyplot as plt\n",
|
45 |
+
"from test_functions.Ackley10D import *\n",
|
46 |
+
"from test_functions.Ackley2D import *\n",
|
47 |
+
"from test_functions.Ackley6D import *\n",
|
48 |
+
"from test_functions.HeatExchanger import *\n",
|
49 |
+
"from test_functions.CantileverBeam import *\n",
|
50 |
+
"from test_functions.Car import *\n",
|
51 |
+
"from test_functions.CompressionSpring import *\n",
|
52 |
+
"from test_functions.GKXWC1 import *\n",
|
53 |
+
"from test_functions.GKXWC2 import *\n",
|
54 |
+
"from test_functions.HeatExchanger import *\n",
|
55 |
+
"from test_functions.JLH1 import *\n",
|
56 |
+
"from test_functions.JLH2 import *\n",
|
57 |
+
"from test_functions.KeaneBump import *\n",
|
58 |
+
"from test_functions.GKXWC1 import *\n",
|
59 |
+
"from test_functions.GKXWC2 import *\n",
|
60 |
+
"from test_functions.PressureVessel import *\n",
|
61 |
+
"from test_functions.ReinforcedConcreteBeam import *\n",
|
62 |
+
"from test_functions.SpeedReducer import *\n",
|
63 |
+
"from test_functions.ThreeTruss import *\n",
|
64 |
+
"from test_functions.WeldedBeam import *\n",
|
65 |
+
"# Import other objective functions as needed\n",
|
66 |
+
"import time\n",
|
67 |
+
"\n",
|
68 |
+
"from Rosen_PFN4BO import *\n",
|
69 |
+
"\n",
|
70 |
+
"def optimize(objective_function, iteration_input):\n",
|
71 |
+
"\n",
|
72 |
+
" # Variable setup\n",
|
73 |
+
" Current_BEST = -1e10 # Some arbitrary very small number\n",
|
74 |
+
" Prev_BEST = -1e10\n",
|
75 |
+
"\n",
|
76 |
+
" # Initial random samples\n",
|
77 |
+
" # print(objective_functions)\n",
|
78 |
+
" trained_X = torch.rand(20, objective_functions[objective_function]['dim'])\n",
|
79 |
+
"\n",
|
80 |
+
" # Scale it to the domain of interest using the selected function\n",
|
81 |
+
" # print(objective_function)\n",
|
82 |
+
" X_Scaled = objective_functions[objective_function]['scaling'](trained_X)\n",
|
83 |
+
"\n",
|
84 |
+
" # Get the constraints and objective\n",
|
85 |
+
" trained_gx, trained_Y = objective_functions[objective_function]['function'](X_Scaled)\n",
|
86 |
+
"\n",
|
87 |
+
" # Convergence list to store best values\n",
|
88 |
+
" convergence = []\n",
|
89 |
+
"\n",
|
90 |
+
" START_TIME = time.time()\n",
|
91 |
+
"\n",
|
92 |
+
" # Optimization Loop\n",
|
93 |
+
" for ii in range(iteration_input): # Example with 100 iterations\n",
|
94 |
+
"\n",
|
95 |
+
" # (0) Get the updated data for this iteration\n",
|
96 |
+
" X_scaled = objective_functions[objective_function]['scaling'](trained_X)\n",
|
97 |
+
" trained_gx, trained_Y = objective_functions[objective_function]['function'](X_scaled)\n",
|
98 |
+
"\n",
|
99 |
+
" # (1) Randomly sample Xpen \n",
|
100 |
+
" X_pen = torch.rand(1000,trained_X.shape[1])\n",
|
101 |
+
"\n",
|
102 |
+
" # (2) PFN inference phase with EI\n",
|
103 |
+
" default_model = 'final_models/Cyril_500features_800epoch_cpu.pt'\n",
|
104 |
+
" \n",
|
105 |
+
" ei, p_feas = Rosen_PFN_Parallel(default_model,\n",
|
106 |
+
" trained_X, \n",
|
107 |
+
" trained_Y, \n",
|
108 |
+
" trained_gx,\n",
|
109 |
+
" X_pen,\n",
|
110 |
+
" 'power',\n",
|
111 |
+
" 'ei'\n",
|
112 |
+
" )\n",
|
113 |
+
"\n",
|
114 |
+
" # Calculating CEI\n",
|
115 |
+
" CEI = ei\n",
|
116 |
+
" for jj in range(p_feas.shape[1]):\n",
|
117 |
+
" CEI = CEI*p_feas[:,jj]\n",
|
118 |
+
"\n",
|
119 |
+
" # (4) Get the next search value\n",
|
120 |
+
" rec_idx = torch.argmax(CEI)\n",
|
121 |
+
" best_candidate = X_pen[rec_idx,:].unsqueeze(0)\n",
|
122 |
+
"\n",
|
123 |
+
" # (5) Append the next search point\n",
|
124 |
+
" trained_X = torch.cat([trained_X, best_candidate])\n",
|
125 |
+
"\n",
|
126 |
+
"\n",
|
127 |
+
" ################################################################################\n",
|
128 |
+
" # This is just for visualizing the best value. \n",
|
129 |
+
" # This section can be remove for pure optimization purpose\n",
|
130 |
+
" Current_X = objective_functions[objective_function]['scaling'](trained_X)\n",
|
131 |
+
" Current_GX, Current_Y = objective_functions[objective_function]['function'](Current_X)\n",
|
132 |
+
" if ((Current_GX<=0).all(dim=1)).any():\n",
|
133 |
+
" Current_BEST = torch.max(Current_Y[(Current_GX<=0).all(dim=1)])\n",
|
134 |
+
" else:\n",
|
135 |
+
" Current_BEST = Prev_BEST\n",
|
136 |
+
" ################################################################################\n",
|
137 |
+
" \n",
|
138 |
+
" # (ii) Convergence tracking (assuming the best Y is to be maximized)\n",
|
139 |
+
" if Current_BEST != -1e10:\n",
|
140 |
+
" convergence.append(Current_BEST.abs())\n",
|
141 |
+
"\n",
|
142 |
+
" # Timing\n",
|
143 |
+
" END_TIME = time.time()\n",
|
144 |
+
" TOTAL_TIME = END_TIME - START_TIME\n",
|
145 |
+
" \n",
|
146 |
+
" # Website visualization\n",
|
147 |
+
" # (i) Radar chart for trained_X\n",
|
148 |
+
" radar_chart = create_radar_chart(X_scaled)\n",
|
149 |
+
" # (ii) Convergence tracking (assuming the best Y is to be maximized)\n",
|
150 |
+
" convergence_plot = create_convergence_plot(convergence, TOTAL_TIME)\n",
|
151 |
+
" \n",
|
152 |
+
" return radar_chart, convergence_plot\n",
|
153 |
+
"\n",
|
154 |
+
"def create_radar_chart(X_scaled):\n",
|
155 |
+
" fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))\n",
|
156 |
+
" labels = [f'x{i+1}' for i in range(X_scaled.shape[1])]\n",
|
157 |
+
" values = X_scaled.mean(dim=0).numpy()\n",
|
158 |
+
" \n",
|
159 |
+
" num_vars = len(labels)\n",
|
160 |
+
" angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()\n",
|
161 |
+
" values = np.concatenate((values, [values[0]]))\n",
|
162 |
+
" angles += angles[:1]\n",
|
163 |
+
"\n",
|
164 |
+
" ax.fill(angles, values, color='green', alpha=0.25)\n",
|
165 |
+
" ax.plot(angles, values, color='green', linewidth=2)\n",
|
166 |
+
" ax.set_yticklabels([])\n",
|
167 |
+
" ax.set_xticks(angles[:-1])\n",
|
168 |
+
" # ax.set_xticklabels(labels)\n",
|
169 |
+
" ax.set_xticklabels([f'{label}\\n({value:.2f})' for label, value in zip(labels, values[:-1])]) # Show values\n",
|
170 |
+
" ax.set_title(\"Selected Design\", size=15, color='black', y=1.1)\n",
|
171 |
+
" \n",
|
172 |
+
" plt.close(fig)\n",
|
173 |
+
" return fig\n",
|
174 |
+
"\n",
|
175 |
+
"def create_convergence_plot(convergence, TOTAL_TIME):\n",
|
176 |
+
" fig, ax = plt.subplots()\n",
|
177 |
+
" # print(len(convergence))\n",
|
178 |
+
" ax.plot(convergence, label='Best Objective Value')\n",
|
179 |
+
" ax.set_xlabel('Iteration')\n",
|
180 |
+
" ax.set_ylabel('Objective Value')\n",
|
181 |
+
" ax.set_title('Convergence Plot (Opt Runtime: {t} sec)'.format(t=round(TOTAL_TIME, 2)))\n",
|
182 |
+
" ax.legend()\n",
|
183 |
+
"\n",
|
184 |
+
" # Add text to the top right corner of the plot\n",
|
185 |
+
" if len(convergence) == 0:\n",
|
186 |
+
" ax.text(0.5, 0.5, 'No Feasible Design Found', transform=ax.transAxes, fontsize=12,\n",
|
187 |
+
" verticalalignment='top', horizontalalignment='right')\n",
|
188 |
+
" \n",
|
189 |
+
" plt.close(fig)\n",
|
190 |
+
" return fig\n",
|
191 |
+
"\n",
|
192 |
+
"# Define available objective functions\n",
|
193 |
+
"objective_functions = {\n",
|
194 |
+
" \"Ackley2D\": {\"function\": Ackley2D, \"scaling\": Ackley2D_Scaling, \"dim\": 2},\n",
|
195 |
+
" \"Ackley6D\": {\"function\": Ackley6D, \"scaling\": Ackley6D_Scaling, \"dim\": 6},\n",
|
196 |
+
" \"Ackley10D\": {\"function\": Ackley10D, \"scaling\": Ackley10D_Scaling, \"dim\": 10},\n",
|
197 |
+
" \"GKXWC1\": {\"function\": GKXWC1, \"scaling\": GKXWC1_Scaling, \"dim\": 2},\n",
|
198 |
+
" \"GKXWC2\": {\"function\": GKXWC2, \"scaling\": GKXWC2_Scaling, \"dim\": 2},\n",
|
199 |
+
" \"JLH1\": {\"function\": JLH1, \"scaling\": JLH1_Scaling, \"dim\": 2},\n",
|
200 |
+
" \"JLH2\": {\"function\": JLH2, \"scaling\": JLH2_Scaling, \"dim\": 2},\n",
|
201 |
+
" \"Keane Bump\": {\"function\": KeaneBump, \"scaling\": KeaneBump_Scaling, \"dim\": 18},\n",
|
202 |
+
" \"Three Truss\": {\"function\": ThreeTruss, \"scaling\": ThreeTruss_Scaling, \"dim\": 2},\n",
|
203 |
+
" \"Compression Spring\": {\"function\": CompressionSpring, \"scaling\": CompressionSpring_Scaling, \"dim\": 3},\n",
|
204 |
+
" \"Reinforced Concrete Beam\": {\"function\": ReinforcedConcreteBeam, \"scaling\": ReinforcedConcreteBeam_Scaling, \"dim\": 3},\n",
|
205 |
+
" \"Pressure Vessel\": {\"function\": PressureVessel, \"scaling\": PressureVessel_Scaling, \"dim\": 4},\n",
|
206 |
+
" \"Speed Reducer\": {\"function\": SpeedReducer, \"scaling\": SpeedReducer_Scaling, \"dim\": 4},\n",
|
207 |
+
" \"Welded Beam\": {\"function\": WeldedBeam, \"scaling\": WeldedBeam_Scaling, \"dim\": 4},\n",
|
208 |
+
" \"Heat Exchanger\": {\"function\": HeatExchanger, \"scaling\": HeatExchanger_Scaling, \"dim\": 8},\n",
|
209 |
+
" \"Cantilever Beam\": {\"function\": CantileverBeam, \"scaling\": CantileverBeam_Scaling, \"dim\": 10},\n",
|
210 |
+
" \"Car\": {\"function\": Car, \"scaling\": Car_Scaling, \"dim\": 11},\n",
|
211 |
+
" \n",
|
212 |
+
" # Add more functions here\n",
|
213 |
+
"}\n",
|
214 |
+
"\n",
|
215 |
+
"\n",
|
216 |
+
"\n",
|
217 |
+
"\n",
|
218 |
+
"\n",
|
219 |
+
"with gr.Blocks(theme=gr.themes.Default()) as demo:\n",
|
220 |
+
" # Centered Title and Description using gr.HTML\n",
|
221 |
+
" gr.HTML(\n",
|
222 |
+
" \"\"\"\n",
|
223 |
+
" <div style=\"text-align: center;\">\n",
|
224 |
+
" <h1>Pre-trained Transformer for Constrained Bayesian Optimization</h1>\n",
|
225 |
+
" <p>This is a demo for Bayesian Optimization using PFN (Prior-Data Fitted Networks). \n",
|
226 |
+
" Select your objective function by clicking on one of the check boxes below, then enter the iteration number to run the optimization process. \n",
|
227 |
+
" The results will be visualized in the radar chart and convergence plot.</p>\n",
|
228 |
+
" <img src=\"https://github.com/rosenyu304/BOEngineeringBenchmark/blob/main/Icons.png?raw=true\" \n",
|
229 |
+
" alt=\"Example Image\" \n",
|
230 |
+
" style=\"width: 800px; height: auto; margin-top: 20px; display: block; margin-left: auto; margin-right: auto;\">\n",
|
231 |
+
"\n",
|
232 |
+
" </div>\n",
|
233 |
+
" \"\"\"\n",
|
234 |
+
" )\n",
|
235 |
+
"\n",
|
236 |
+
" selected_objective = gr.State(None) # To store the selected objective function\n",
|
237 |
+
"\n",
|
238 |
+
"\n",
|
239 |
+
"\n",
|
240 |
+
"\n",
|
241 |
+
"\n",
|
242 |
+
" \n",
|
243 |
+
" with gr.Row():\n",
|
244 |
+
" \n",
|
245 |
+
" objective_checkbox_group = gr.CheckboxGroup(\n",
|
246 |
+
" choices=[\"JLH1\", \"JLH2\", \"GKXWC1\", \"GKXWC2\", \"Ackley2D\", \"Ackley6D\", \"Ackley10D\", \"Keane Bump\", \"Three Truss\", \"Reinforced Concrete Beam\", \"Pressure Vessel\", \"Welded Beam\", \"Speed Reducer\", \"Car\"],\n",
|
247 |
+
" label=\"Select the design problem:\"\n",
|
248 |
+
" )\n",
|
249 |
+
" with gr.Row():\n",
|
250 |
+
" iteration_input = gr.Number(label=\"Enter Iteration Number:\", value=10)\n",
|
251 |
+
" \n",
|
252 |
+
"\n",
|
253 |
+
" # Row for the Clear and Submit buttons\n",
|
254 |
+
" with gr.Row():\n",
|
255 |
+
" clear_button = gr.Button(\"Clear\")\n",
|
256 |
+
" submit_button = gr.Button(\"Submit\", variant=\"primary\")\n",
|
257 |
+
" \n",
|
258 |
+
" \n",
|
259 |
+
" with gr.Row():\n",
|
260 |
+
" with gr.Column():\n",
|
261 |
+
" radar_plot = gr.Plot(label=\"Resulting Design\")\n",
|
262 |
+
" with gr.Column():\n",
|
263 |
+
" convergence_plot = gr.Plot(label=\"Convergence Plot\")\n",
|
264 |
+
"\n",
|
265 |
+
"\n",
|
266 |
+
"\n",
|
267 |
+
" # Define actions for buttons\n",
|
268 |
+
" def clear_action():\n",
|
269 |
+
" return None, None, None\n",
|
270 |
+
"\n",
|
271 |
+
" def submit_action(objective_function_choices, iteration_input):\n",
|
272 |
+
" # Handle the case where multiple choices are selected\n",
|
273 |
+
" if len(objective_function_choices) > 0:\n",
|
274 |
+
" selected_function = objective_function_choices[0] # Assuming using the first selected function\n",
|
275 |
+
" return optimize(selected_function, iteration_input)\n",
|
276 |
+
" return None, None\n",
|
277 |
+
"\n",
|
278 |
+
" # Button click actions\n",
|
279 |
+
" clear_button.click(clear_action, outputs=[objective_checkbox_group, radar_plot, convergence_plot])\n",
|
280 |
+
" submit_button.click(\n",
|
281 |
+
" submit_action, \n",
|
282 |
+
" inputs=[objective_checkbox_group, iteration_input], \n",
|
283 |
+
" outputs=[radar_plot, convergence_plot]\n",
|
284 |
+
" )\n",
|
285 |
+
"\n",
|
286 |
+
"demo.launch(share=True)\n",
|
287 |
+
"\n"
|
288 |
+
]
|
289 |
+
},
|
290 |
+
{
|
291 |
+
"cell_type": "code",
|
292 |
+
"execution_count": null,
|
293 |
+
"id": "352d0291-93b4-43eb-b683-3d48776dc670",
|
294 |
+
"metadata": {},
|
295 |
+
"outputs": [],
|
296 |
+
"source": []
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"cell_type": "code",
|
300 |
+
"execution_count": null,
|
301 |
+
"id": "92ecbbe6-dea6-4e7f-aae1-f0d442dbda3b",
|
302 |
+
"metadata": {},
|
303 |
+
"outputs": [],
|
304 |
+
"source": []
|
305 |
+
},
|
306 |
+
{
|
307 |
+
"cell_type": "code",
|
308 |
+
"execution_count": null,
|
309 |
+
"id": "ba69b5f9-c52c-4c23-8645-c81c27f7a815",
|
310 |
+
"metadata": {},
|
311 |
+
"outputs": [],
|
312 |
+
"source": []
|
313 |
+
},
|
314 |
+
{
|
315 |
+
"cell_type": "code",
|
316 |
+
"execution_count": 1,
|
317 |
+
"id": "05789fba-2099-46b7-8675-64b7969427a1",
|
318 |
+
"metadata": {},
|
319 |
+
"outputs": [
|
320 |
+
{
|
321 |
+
"name": "stdout",
|
322 |
+
"output_type": "stream",
|
323 |
+
"text": [
|
324 |
+
"Running on local URL: http://127.0.0.1:7899\n",
|
325 |
+
"\n",
|
326 |
+
"To create a public link, set `share=True` in `launch()`.\n"
|
327 |
+
]
|
328 |
+
},
|
329 |
+
{
|
330 |
+
"data": {
|
331 |
+
"text/html": [
|
332 |
+
"<div><iframe src=\"http://127.0.0.1:7899/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
333 |
+
],
|
334 |
+
"text/plain": [
|
335 |
+
"<IPython.core.display.HTML object>"
|
336 |
+
]
|
337 |
+
},
|
338 |
+
"metadata": {},
|
339 |
+
"output_type": "display_data"
|
340 |
+
}
|
341 |
+
],
|
342 |
+
"source": [
|
343 |
+
"import gradio as gr\n",
|
344 |
+
"\n",
|
345 |
+
"def calculator(num1, operation, num2):\n",
|
346 |
+
" if operation == \"add\":\n",
|
347 |
+
" return num1 + num2\n",
|
348 |
+
" elif operation == \"subtract\":\n",
|
349 |
+
" return num1 - num2\n",
|
350 |
+
" elif operation == \"multiply\":\n",
|
351 |
+
" return num1 * num2\n",
|
352 |
+
" elif operation == \"divide\":\n",
|
353 |
+
" return num1 / num2\n",
|
354 |
+
"\n",
|
355 |
+
"with gr.Blocks() as demo:\n",
|
356 |
+
" with gr.Row():\n",
|
357 |
+
" with gr.Column():\n",
|
358 |
+
" num_1 = gr.Number(value=4)\n",
|
359 |
+
" operation = gr.Radio([\"add\", \"subtract\", \"multiply\", \"divide\"])\n",
|
360 |
+
" num_2 = gr.Number(value=0)\n",
|
361 |
+
" submit_btn = gr.Button(value=\"Calculate\")\n",
|
362 |
+
" with gr.Column():\n",
|
363 |
+
" result = gr.Number()\n",
|
364 |
+
"\n",
|
365 |
+
" submit_btn.click(\n",
|
366 |
+
" calculator, inputs=[num_1, operation, num_2], outputs=[result], api_name=False\n",
|
367 |
+
" )\n",
|
368 |
+
" examples = gr.Examples(\n",
|
369 |
+
" examples=[\n",
|
370 |
+
" [5, \"add\", 3],\n",
|
371 |
+
" [4, \"divide\", 2],\n",
|
372 |
+
" [-4, \"multiply\", 2.5],\n",
|
373 |
+
" [0, \"subtract\", 1.2],\n",
|
374 |
+
" ],\n",
|
375 |
+
" inputs=[num_1, operation, num_2],\n",
|
376 |
+
" )\n",
|
377 |
+
"\n",
|
378 |
+
"if __name__ == \"__main__\":\n",
|
379 |
+
" demo.launch(show_api=False)"
|
380 |
+
]
|
381 |
+
},
|
382 |
+
{
|
383 |
+
"cell_type": "code",
|
384 |
+
"execution_count": null,
|
385 |
+
"id": "a4bf709a-ff0a-4aac-a4b4-fd98cd5948bb",
|
386 |
+
"metadata": {},
|
387 |
+
"outputs": [],
|
388 |
+
"source": []
|
389 |
+
},
|
390 |
+
{
|
391 |
+
"cell_type": "code",
|
392 |
+
"execution_count": null,
|
393 |
+
"id": "679f7647-ca68-46f9-a1da-81d6c96267c9",
|
394 |
+
"metadata": {},
|
395 |
+
"outputs": [],
|
396 |
+
"source": []
|
397 |
+
},
|
398 |
+
{
|
399 |
+
"cell_type": "code",
|
400 |
+
"execution_count": null,
|
401 |
+
"id": "ea40bfac-e090-4cd5-9caa-99b06db3ea8d",
|
402 |
+
"metadata": {},
|
403 |
+
"outputs": [],
|
404 |
+
"source": []
|
405 |
+
},
|
406 |
+
{
|
407 |
+
"cell_type": "code",
|
408 |
+
"execution_count": 50,
|
409 |
+
"id": "928ac99a-af8f-401c-8c0b-ef83cfef5ba9",
|
410 |
+
"metadata": {},
|
411 |
+
"outputs": [
|
412 |
+
{
|
413 |
+
"name": "stdout",
|
414 |
+
"output_type": "stream",
|
415 |
+
"text": [
|
416 |
+
"Running on local URL: http://127.0.0.1:7890\n",
|
417 |
+
"\n",
|
418 |
+
"To create a public link, set `share=True` in `launch()`.\n"
|
419 |
+
]
|
420 |
+
},
|
421 |
+
{
|
422 |
+
"data": {
|
423 |
+
"text/html": [
|
424 |
+
"<div><iframe src=\"http://127.0.0.1:7890/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
425 |
+
],
|
426 |
+
"text/plain": [
|
427 |
+
"<IPython.core.display.HTML object>"
|
428 |
+
]
|
429 |
+
},
|
430 |
+
"metadata": {},
|
431 |
+
"output_type": "display_data"
|
432 |
+
}
|
433 |
+
],
|
434 |
+
"source": [
|
435 |
+
"import gradio as gr\n",
|
436 |
+
"\n",
|
437 |
+
"def calculator(num1, operation, num2):\n",
|
438 |
+
" if operation == \"add\":\n",
|
439 |
+
" return num1 + num2\n",
|
440 |
+
" elif operation == \"subtract\":\n",
|
441 |
+
" return num1 - num2\n",
|
442 |
+
" elif operation == \"multiply\":\n",
|
443 |
+
" return num1 * num2\n",
|
444 |
+
" elif operation == \"divide\":\n",
|
445 |
+
" return num1 / num2\n",
|
446 |
+
"\n",
|
447 |
+
"with gr.Blocks() as demo:\n",
|
448 |
+
" with gr.Row():\n",
|
449 |
+
" with gr.Column():\n",
|
450 |
+
" num_1 = gr.Number(value=4)\n",
|
451 |
+
" operation = gr.Radio([\"add\", \"subtract\", \"multiply\", \"divide\"])\n",
|
452 |
+
" num_2 = gr.Number(value=0)\n",
|
453 |
+
" submit_btn = gr.Button(value=\"Calculate\")\n",
|
454 |
+
" with gr.Column():\n",
|
455 |
+
" result = gr.Number()\n",
|
456 |
+
"\n",
|
457 |
+
" submit_btn.click(\n",
|
458 |
+
" calculator, inputs=[num_1, operation, num_2], outputs=[result], api_name=False\n",
|
459 |
+
" )\n",
|
460 |
+
" examples = gr.Examples(\n",
|
461 |
+
" examples=[\n",
|
462 |
+
" [5, \"add\", 3],\n",
|
463 |
+
" [4, \"divide\", 2],\n",
|
464 |
+
" [-4, \"multiply\", 2.5],\n",
|
465 |
+
" [0, \"subtract\", 1.2],\n",
|
466 |
+
" ],\n",
|
467 |
+
" inputs=[num_1, operation, num_2],\n",
|
468 |
+
" )\n",
|
469 |
+
"\n",
|
470 |
+
"if __name__ == \"__main__\":\n",
|
471 |
+
" demo.launch(show_api=False)"
|
472 |
+
]
|
473 |
+
},
|
474 |
+
{
|
475 |
+
"cell_type": "code",
|
476 |
+
"execution_count": 36,
|
477 |
+
"id": "09a251df-4076-4925-8799-9a2a59cb8246",
|
478 |
+
"metadata": {},
|
479 |
+
"outputs": [],
|
480 |
+
"source": [
|
481 |
+
"# import gradio as gr\n",
|
482 |
+
"\n",
|
483 |
+
"# def greet(selected_options):\n",
|
484 |
+
"# return f\"You selected: {', '.join(selected_options)}\"\n",
|
485 |
+
"\n",
|
486 |
+
"# with gr.Blocks() as demo:\n",
|
487 |
+
"# with gr.Row():\n",
|
488 |
+
"# checkbox_group = gr.CheckboxGroup(\n",
|
489 |
+
"# choices=[\"Option 1\", \"Option 2\"],\n",
|
490 |
+
"# label=\"Select your options\",\n",
|
491 |
+
"# elem_id=\"custom_checkbox_group\"\n",
|
492 |
+
"# )\n",
|
493 |
+
"# output = gr.Textbox(label=\"Output\")\n",
|
494 |
+
" \n",
|
495 |
+
"# checkbox_group.change(greet, checkbox_group, output)\n",
|
496 |
+
"\n",
|
497 |
+
"# gr.HTML(\n",
|
498 |
+
"# f\"\"\"\n",
|
499 |
+
"# <style>\n",
|
500 |
+
"# #custom_checkbox_group label {\n",
|
501 |
+
"# display: block;\n",
|
502 |
+
"# width: 200pt;\n",
|
503 |
+
"# height: 200pt;\n",
|
504 |
+
"# border: 1px solid #ccc;\n",
|
505 |
+
"# margin-bottom: 10pt;\n",
|
506 |
+
"# padding: 10pt;\n",
|
507 |
+
"# box-sizing: border-box;\n",
|
508 |
+
"# position: relative;\n",
|
509 |
+
"# }\n",
|
510 |
+
"# #custom_checkbox_group label input {\n",
|
511 |
+
"# position: absolute;\n",
|
512 |
+
"# top: 10pt;\n",
|
513 |
+
"# left: 10pt;\n",
|
514 |
+
"# }\n",
|
515 |
+
"# #custom_checkbox_group label span {\n",
|
516 |
+
"# position: absolute;\n",
|
517 |
+
"# top: 10pt;\n",
|
518 |
+
"# left: 40pt; /* Adjust this value to control the distance between the checkbox and the label */\n",
|
519 |
+
"# }\n",
|
520 |
+
"# #custom_checkbox_group label img {\n",
|
521 |
+
"# position: absolute;\n",
|
522 |
+
"# bottom: 10pt;\n",
|
523 |
+
"# left: 10pt;\n",
|
524 |
+
"# width: 180pt; /* Adjust the size of the image if needed */\n",
|
525 |
+
"# height: auto;\n",
|
526 |
+
"# }\n",
|
527 |
+
"# </style>\n",
|
528 |
+
"# <label>\n",
|
529 |
+
"# <input type=\"checkbox\" />\n",
|
530 |
+
"# <span>Option 1</span>\n",
|
531 |
+
"# <img src=\"https://images.pexels.com/photos/1108099/pexels-photo-1108099.jpeg\" alt=\"Dog image\"/>\n",
|
532 |
+
"# </label>\n",
|
533 |
+
"# \"\"\"\n",
|
534 |
+
"# )\n",
|
535 |
+
"\n",
|
536 |
+
"# demo.launch()\n"
|
537 |
+
]
|
538 |
+
},
|
539 |
+
{
|
540 |
+
"cell_type": "code",
|
541 |
+
"execution_count": null,
|
542 |
+
"id": "f52549d5-4be0-4672-be6d-df462957cb56",
|
543 |
+
"metadata": {},
|
544 |
+
"outputs": [],
|
545 |
+
"source": []
|
546 |
+
}
|
547 |
+
],
|
548 |
+
"metadata": {
|
549 |
+
"kernelspec": {
|
550 |
+
"display_name": "Python 3 (ipykernel)",
|
551 |
+
"language": "python",
|
552 |
+
"name": "python3"
|
553 |
+
},
|
554 |
+
"language_info": {
|
555 |
+
"codemirror_mode": {
|
556 |
+
"name": "ipython",
|
557 |
+
"version": 3
|
558 |
+
},
|
559 |
+
"file_extension": ".py",
|
560 |
+
"mimetype": "text/x-python",
|
561 |
+
"name": "python",
|
562 |
+
"nbconvert_exporter": "python",
|
563 |
+
"pygments_lexer": "ipython3",
|
564 |
+
"version": "3.10.14"
|
565 |
+
}
|
566 |
+
},
|
567 |
+
"nbformat": 4,
|
568 |
+
"nbformat_minor": 5
|
569 |
+
}
|
HeatExchanger.png
ADDED
![]() |
HeatExchanger_CEI_Avg_Obj.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6fc95574634750d3dc892076b26e55c6f79d4dbb128d5b65e6832e83783c89a8
|
3 |
+
size 3432
|
HeatExchanger_CEI_Avg_Time.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ed8e01768b9cc8bf82c51f523c9ea46c4f3e7e3e9e6c8e04edb0d615032f1e9
|
3 |
+
size 3500
|
PressureVessel.png
ADDED
![]() |
PressureVessel_CEI_Avg_Obj.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7c81ea242bdcb45cb644cd5f18b941ff8ebbcbbb81b9965eea251c01f9f6c78
|
3 |
+
size 3628
|
PressureVessel_CEI_Avg_Time.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb204723d0523baebbfda6e4f1fdbc7506c66bfc0ed0cbc7ec5ea485451660a7
|
3 |
+
size 3504
|
ReinforcedConcreteBeam_CEI_Avg_Obj.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d2e3212a28eb9cb59212d876c1ddae2f1b37950974eed01683c7d4180206c7e
|
3 |
+
size 3532
|
ReinforcedConcreteBeam_CEI_Avg_Time.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aae5672638e75081965450635cc15310f90ee167c3264399bee07afc2ad3a58d
|
3 |
+
size 3472
|
Reinforcement.png
ADDED
![]() |
Rosen_PFN4BO.py
ADDED
@@ -0,0 +1,442 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import contextlib
|
2 |
+
import torch
|
3 |
+
import scipy
|
4 |
+
import math
|
5 |
+
from sklearn.preprocessing import power_transform, PowerTransformer, StandardScaler
|
6 |
+
|
7 |
+
from torchvision.transforms.functional import to_tensor
|
8 |
+
from pfns4bo import transformer
|
9 |
+
from pfns4bo import bar_distribution
|
10 |
+
|
11 |
+
import torch
|
12 |
+
import numpy as np
|
13 |
+
|
14 |
+
import pfns4bo
|
15 |
+
from pfns4bo.scripts.acquisition_functions import TransformerBOMethod
|
16 |
+
|
17 |
+
|
18 |
+
import warnings
|
19 |
+
warnings.filterwarnings('ignore')
|
20 |
+
|
21 |
+
device = torch.device("cpu")
|
22 |
+
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
23 |
+
dtype = torch.float32
|
24 |
+
|
25 |
+
|
26 |
+
from sklearn.utils import resample
|
27 |
+
|
28 |
+
@torch.enable_grad()
|
29 |
+
def Rosen_PFN(model_name,
|
30 |
+
trained_X,
|
31 |
+
trained_Y,
|
32 |
+
X_pen,
|
33 |
+
trasform_type,
|
34 |
+
what_do_you_want
|
35 |
+
):
|
36 |
+
|
37 |
+
PFN = TransformerBOMethod(torch.load(model_name).requires_grad_(False), device=device)
|
38 |
+
|
39 |
+
# X_pen.requires_grad_(True)
|
40 |
+
|
41 |
+
# with torch.no_grad():
|
42 |
+
|
43 |
+
|
44 |
+
dim = trained_X.shape[1]
|
45 |
+
|
46 |
+
x_given = trained_X
|
47 |
+
x_eval = X_pen
|
48 |
+
x_predict = torch.cat([x_given, x_eval], dim=0)
|
49 |
+
x_full_feed = torch.cat([x_given, x_given, x_eval], dim=0).unsqueeze(1)
|
50 |
+
|
51 |
+
|
52 |
+
|
53 |
+
if trasform_type== 'std':
|
54 |
+
pt = StandardScaler()
|
55 |
+
pt.fit(trained_Y)
|
56 |
+
PT_trained_Y = pt.transform(trained_Y)
|
57 |
+
trained_Y = to_tensor(PT_trained_Y).to(torch.float32).reshape(trained_Y.shape)
|
58 |
+
elif trasform_type== 'power':
|
59 |
+
pt = PowerTransformer(method="yeo-johnson")
|
60 |
+
pt.fit(trained_Y.detach().numpy())
|
61 |
+
# PT_trained_Y = pt.transform(trained_Y.detach().numpy())
|
62 |
+
# trained_Y = to_tensor(PT_trained_Y).to(torch.float32).reshape(trained_Y.shape)
|
63 |
+
# print(trained_Y.shape)
|
64 |
+
|
65 |
+
# print(trained_Y)
|
66 |
+
trained_Y, _ = general_power_transform(trained_Y,
|
67 |
+
trained_Y,
|
68 |
+
.0,
|
69 |
+
less_safe=False) #.squeeze(1)
|
70 |
+
# print(trained_Y.shape)
|
71 |
+
# .squeeze(1)
|
72 |
+
|
73 |
+
|
74 |
+
# y_given = general_power_transform(y_given.unsqueeze(1),
|
75 |
+
# y_given.unsqueeze(1),
|
76 |
+
# .0,
|
77 |
+
# less_safe=False).squeeze(1)
|
78 |
+
|
79 |
+
y_given = trained_Y
|
80 |
+
|
81 |
+
y_given = y_given.reshape(-1)
|
82 |
+
y_full_feed = y_given.unsqueeze(1)
|
83 |
+
|
84 |
+
criterion: bar_distribution.BarDistribution = PFN.model.criterion
|
85 |
+
|
86 |
+
style = None
|
87 |
+
logits = PFN.model(
|
88 |
+
(style,
|
89 |
+
x_full_feed.repeat_interleave(dim=1, repeats=y_full_feed.shape[1]),
|
90 |
+
y_full_feed.repeat(1,x_full_feed.shape[1])),
|
91 |
+
single_eval_pos=len(x_given)
|
92 |
+
)
|
93 |
+
|
94 |
+
# logits = logits.softmax(-1).log_()
|
95 |
+
logits = logits.softmax(-1).log()
|
96 |
+
|
97 |
+
logits_given = logits[:len(x_given)]
|
98 |
+
logits_eval = logits[len(x_given):]
|
99 |
+
|
100 |
+
best_f = torch.max(y_given)
|
101 |
+
|
102 |
+
if what_do_you_want == 'mean':
|
103 |
+
output = criterion.mean(logits_eval)
|
104 |
+
|
105 |
+
|
106 |
+
if trasform_type== 'std' or trasform_type== 'power':
|
107 |
+
|
108 |
+
if pt.standardize:
|
109 |
+
XX = output.clone()
|
110 |
+
scale = torch.from_numpy(pt._scaler.scale_)
|
111 |
+
std_mean = torch.from_numpy(pt._scaler.mean_)
|
112 |
+
XX = torch_std_inverse_transform(XX, scale, std_mean)
|
113 |
+
|
114 |
+
for i, lmbda in enumerate(pt.lambdas_):
|
115 |
+
with np.errstate(invalid="ignore"): # hide NaN warnings
|
116 |
+
XX = torch_power_inverse_transform(XX, lmbda)
|
117 |
+
# print(XX)
|
118 |
+
return XX
|
119 |
+
|
120 |
+
|
121 |
+
|
122 |
+
|
123 |
+
|
124 |
+
# output = pt.inverse_transform(output)
|
125 |
+
# output = torch.from_numpy(output)
|
126 |
+
|
127 |
+
|
128 |
+
elif what_do_you_want == 'ei':
|
129 |
+
output = criterion.ei(logits_eval, best_f)
|
130 |
+
|
131 |
+
elif what_do_you_want == 'ucb':
|
132 |
+
acq_function = criterion.ucb
|
133 |
+
ucb_rest_prob = .05
|
134 |
+
if ucb_rest_prob is not None:
|
135 |
+
acq_function = lambda *args: criterion.ucb(*args, rest_prob=ucb_rest_prob)
|
136 |
+
output = acq_ensembling(acq_function(logits_eval, best_f))
|
137 |
+
|
138 |
+
elif what_do_you_want == 'variance':
|
139 |
+
output = criterion.variance(logits_eval)
|
140 |
+
|
141 |
+
elif what_do_you_want == 'mode':
|
142 |
+
output = criterion.mode(logits_eval)
|
143 |
+
|
144 |
+
elif what_do_you_want == 'ts':
|
145 |
+
mn = criterion.mean(logits_eval)
|
146 |
+
|
147 |
+
|
148 |
+
if trasform_type== 'std' or trasform_type== 'power':
|
149 |
+
|
150 |
+
if pt.standardize:
|
151 |
+
XX = mn.clone()
|
152 |
+
scale = torch.from_numpy(pt._scaler.scale_)
|
153 |
+
std_mean = torch.from_numpy(pt._scaler.mean_)
|
154 |
+
XX = torch_std_inverse_transform(XX, scale, std_mean)
|
155 |
+
|
156 |
+
for i, lmbda in enumerate(pt.lambdas_):
|
157 |
+
with np.errstate(invalid="ignore"): # hide NaN warnings
|
158 |
+
XX = torch_power_inverse_transform(XX, lmbda)
|
159 |
+
|
160 |
+
var = criterion.variance(logits_eval)
|
161 |
+
|
162 |
+
return XX, var
|
163 |
+
|
164 |
+
return output
|
165 |
+
|
166 |
+
|
167 |
+
|
168 |
+
|
169 |
+
|
170 |
+
|
171 |
+
|
172 |
+
|
173 |
+
|
174 |
+
|
175 |
+
|
176 |
+
def Rosen_PFN_Parallel(model_name,
|
177 |
+
trained_X,
|
178 |
+
trained_Y,
|
179 |
+
GX,
|
180 |
+
X_pen,
|
181 |
+
trasform_type,
|
182 |
+
what_do_you_want
|
183 |
+
):
|
184 |
+
|
185 |
+
PFN = TransformerBOMethod(torch.load(model_name), device=device)
|
186 |
+
|
187 |
+
with torch.no_grad():
|
188 |
+
|
189 |
+
|
190 |
+
dim = trained_X.shape[1]
|
191 |
+
|
192 |
+
x_given = trained_X
|
193 |
+
x_eval = X_pen
|
194 |
+
x_predict = torch.cat([x_given, x_eval], dim=0)
|
195 |
+
x_full_feed = torch.cat([x_given, x_given, x_eval], dim=0).unsqueeze(1)
|
196 |
+
|
197 |
+
|
198 |
+
|
199 |
+
y_given = trained_Y
|
200 |
+
y_given = y_given.reshape(-1)
|
201 |
+
|
202 |
+
######################################################################
|
203 |
+
# Objective Power Transform
|
204 |
+
y_given, pt_y = general_power_transform(y_given.unsqueeze(1),
|
205 |
+
y_given.unsqueeze(1),
|
206 |
+
.0,
|
207 |
+
less_safe=False)
|
208 |
+
y_given = y_given.squeeze(1)
|
209 |
+
######################################################################
|
210 |
+
|
211 |
+
|
212 |
+
######################################################################
|
213 |
+
# Constraints Power Transform
|
214 |
+
# Changes for Parallel:
|
215 |
+
GX = -GX
|
216 |
+
GX_t, pt_GX = general_power_transform(GX, GX, .0, less_safe=False)
|
217 |
+
G_thres, _ = general_power_transform(GX,
|
218 |
+
torch.zeros((1, GX.shape[1])).to(GX.device),
|
219 |
+
.0,
|
220 |
+
less_safe=False)
|
221 |
+
GX = GX_t
|
222 |
+
######################################################################
|
223 |
+
|
224 |
+
|
225 |
+
|
226 |
+
y_full_feed = y_given.unsqueeze(1)
|
227 |
+
|
228 |
+
criterion: bar_distribution.BarDistribution = PFN.model.criterion
|
229 |
+
|
230 |
+
style = None
|
231 |
+
logits = PFN.model(
|
232 |
+
(style,
|
233 |
+
x_full_feed.repeat_interleave(dim=1, repeats=y_full_feed.shape[1]+GX.shape[1]),
|
234 |
+
torch.cat([y_full_feed, GX], dim=1).unsqueeze(2) ),
|
235 |
+
single_eval_pos=len(x_given)
|
236 |
+
)
|
237 |
+
|
238 |
+
logits = logits.softmax(-1).log_()
|
239 |
+
|
240 |
+
logits_given = logits[:len(x_given)]
|
241 |
+
logits_eval = logits[len(x_given):]
|
242 |
+
|
243 |
+
best_f = torch.max(y_given)
|
244 |
+
|
245 |
+
objective_given = logits_given[:,0,:].unsqueeze(1)
|
246 |
+
objective_eval = logits_eval[:,0,:].unsqueeze(1)
|
247 |
+
constraint_given = logits_given[:,1:,:]
|
248 |
+
constraint_eval = logits_eval[:,1:,:]
|
249 |
+
|
250 |
+
|
251 |
+
|
252 |
+
if what_do_you_want == 'mean':
|
253 |
+
obj_output = criterion.mean(objective_eval)
|
254 |
+
con_output = criterion.mean(constraint_eval)
|
255 |
+
|
256 |
+
elif what_do_you_want == 'ei':
|
257 |
+
# Changes for CEI
|
258 |
+
|
259 |
+
# Objective
|
260 |
+
tau = torch.max(y_given)
|
261 |
+
objective_acq_value = acq_ensembling(criterion.ei(objective_eval, tau))
|
262 |
+
|
263 |
+
# Constraints
|
264 |
+
constraints_acq_value = acq_ensembling(criterion.pi(constraint_eval[:,0,:].unsqueeze(1), G_thres[0, 0].item()))
|
265 |
+
constraints_acq_value = constraints_acq_value.unsqueeze(1)
|
266 |
+
|
267 |
+
|
268 |
+
for jj in range(1,constraint_eval.shape[1]):
|
269 |
+
next_constraints_acq_value = acq_ensembling(criterion.pi(constraint_eval[:,jj,:].unsqueeze(1), G_thres[0, jj].item()))
|
270 |
+
next_constraints_acq_value = next_constraints_acq_value.unsqueeze(1)
|
271 |
+
constraints_acq_value = torch.cat([constraints_acq_value,next_constraints_acq_value], dim=1)
|
272 |
+
|
273 |
+
return objective_acq_value, constraints_acq_value
|
274 |
+
|
275 |
+
|
276 |
+
elif what_do_you_want == 'variance':
|
277 |
+
output = criterion.variance(logits_eval)
|
278 |
+
elif what_do_you_want == 'mode':
|
279 |
+
output = criterion.mode(logits_eval)
|
280 |
+
elif what_do_you_want == 'cts':
|
281 |
+
obj_mnn = criterion.mean(objective_eval)
|
282 |
+
obj_mnn = pt_y.inverse_transform(obj_mnn)
|
283 |
+
obj_mnn = torch.from_numpy(obj_mnn)
|
284 |
+
|
285 |
+
|
286 |
+
con_mnn = criterion.mean(constraint_eval)
|
287 |
+
con_mnn = pt_GX.inverse_transform(con_mnn)
|
288 |
+
con_mnn = torch.from_numpy(-con_mnn)
|
289 |
+
|
290 |
+
obj_varr = criterion.variance(objective_eval)
|
291 |
+
con_varr = criterion.variance(constraint_eval)
|
292 |
+
|
293 |
+
return obj_mnn, obj_varr, con_mnn, con_varr
|
294 |
+
|
295 |
+
|
296 |
+
|
297 |
+
return output
|
298 |
+
|
299 |
+
|
300 |
+
|
301 |
+
|
302 |
+
def acq_ensembling(acq_values): # (points, ensemble dim)
|
303 |
+
return acq_values.max(1).values
|
304 |
+
|
305 |
+
|
306 |
+
|
307 |
+
|
308 |
+
|
309 |
+
|
310 |
+
|
311 |
+
def torch_std_inverse_transform(X, scale, mean):
|
312 |
+
X *= scale
|
313 |
+
X += mean
|
314 |
+
return X
|
315 |
+
|
316 |
+
|
317 |
+
def torch_power_inverse_transform(x, lmbda):
|
318 |
+
out = torch.zeros_like(x)
|
319 |
+
pos = x >= 0
|
320 |
+
|
321 |
+
# when x >= 0
|
322 |
+
if abs(lmbda) < np.spacing(1.0):
|
323 |
+
out[pos] = torch.exp(x[pos])-1
|
324 |
+
else: # lmbda != 0
|
325 |
+
out[pos] = torch.pow(x[pos] * lmbda + 1, 1 / lmbda) - 1
|
326 |
+
|
327 |
+
# when x < 0
|
328 |
+
if abs(lmbda - 2) > np.spacing(1.0):
|
329 |
+
out[~pos] = 1 - torch.pow(-(2 - lmbda) * x[~pos] + 1, 1 / (2 - lmbda))
|
330 |
+
else: # lmbda == 2
|
331 |
+
out[~pos] = 1 - torch.exp(-x[~pos])
|
332 |
+
|
333 |
+
return out
|
334 |
+
|
335 |
+
|
336 |
+
|
337 |
+
|
338 |
+
|
339 |
+
|
340 |
+
|
341 |
+
|
342 |
+
|
343 |
+
|
344 |
+
|
345 |
+
|
346 |
+
|
347 |
+
|
348 |
+
|
349 |
+
|
350 |
+
|
351 |
+
|
352 |
+
|
353 |
+
|
354 |
+
|
355 |
+
################################################################################
|
356 |
+
## PFN defined functions
|
357 |
+
################################################################################
|
358 |
+
|
359 |
+
|
360 |
+
def log01(x, eps=.0000001, input_between_zero_and_one=False):
|
361 |
+
logx = torch.log(x + eps)
|
362 |
+
if input_between_zero_and_one:
|
363 |
+
return (logx - math.log(eps)) / (math.log(1 + eps) - math.log(eps))
|
364 |
+
return (logx - logx.min(0)[0]) / (logx.max(0)[0] - logx.min(0)[0])
|
365 |
+
|
366 |
+
def log01_batch(x, eps=.0000001, input_between_zero_and_one=False):
|
367 |
+
x = x.repeat(1, x.shape[-1] + 1, 1)
|
368 |
+
for b in range(x.shape[-1]):
|
369 |
+
x[:, b, b] = log01(x[:, b, b], eps=eps, input_between_zero_and_one=input_between_zero_and_one)
|
370 |
+
return x
|
371 |
+
|
372 |
+
def lognormed_batch(x, eval_pos, eps=.0000001):
|
373 |
+
x = x.repeat(1, x.shape[-1] + 1, 1)
|
374 |
+
for b in range(x.shape[-1]):
|
375 |
+
logx = torch.log(x[:, b, b]+eps)
|
376 |
+
x[:, b, b] = (logx - logx[:eval_pos].mean(0))/logx[:eval_pos].std(0)
|
377 |
+
return x
|
378 |
+
|
379 |
+
def _rank_transform(x_train, x):
|
380 |
+
assert len(x_train.shape) == len(x.shape) == 1
|
381 |
+
relative_to = torch.cat((torch.zeros_like(x_train[:1]),x_train.unique(sorted=True,), torch.ones_like(x_train[-1:])),-1)
|
382 |
+
higher_comparison = (relative_to < x[...,None]).sum(-1).clamp(min=1)
|
383 |
+
pos_inside_interval = (x - relative_to[higher_comparison-1])/(relative_to[higher_comparison] - relative_to[higher_comparison-1])
|
384 |
+
x_transformed = higher_comparison - 1 + pos_inside_interval
|
385 |
+
return x_transformed/(len(relative_to)-1.)
|
386 |
+
|
387 |
+
def rank_transform(x_train, x):
|
388 |
+
assert x.shape[1] == x_train.shape[1], f"{x.shape=} and {x_train.shape=}"
|
389 |
+
# make sure everything is between 0 and 1
|
390 |
+
assert (x_train >= 0.).all() and (x_train <= 1.).all(), f"{x_train=}"
|
391 |
+
assert (x >= 0.).all() and (x <= 1.).all(), f"{x=}"
|
392 |
+
return_x = x.clone()
|
393 |
+
for feature_dim in range(x.shape[1]):
|
394 |
+
return_x[:, feature_dim] = _rank_transform(x_train[:, feature_dim], x[:, feature_dim])
|
395 |
+
return return_x
|
396 |
+
|
397 |
+
|
398 |
+
|
399 |
+
def general_power_transform(x_train, x_apply, eps, less_safe=False):
|
400 |
+
|
401 |
+
# print('in function')
|
402 |
+
# print(x_train)
|
403 |
+
# print(x_apply)
|
404 |
+
# print('in function')
|
405 |
+
|
406 |
+
if eps > 0:
|
407 |
+
try:
|
408 |
+
pt = PowerTransformer(method='box-cox')
|
409 |
+
pt.fit(x_train.cpu()+eps)
|
410 |
+
x_out = torch.tensor(pt.transform(x_apply.cpu()+eps), dtype=x_apply.dtype, device=x_apply.device)
|
411 |
+
except Exception as e:
|
412 |
+
print(e)
|
413 |
+
x_out = x_apply - x_train.mean(0)
|
414 |
+
print(x_train)
|
415 |
+
print(x_out)
|
416 |
+
else:
|
417 |
+
pt = PowerTransformer(method='yeo-johnson')
|
418 |
+
if not less_safe and (x_train.std() > 1_000 or x_train.mean().abs() > 1_000):
|
419 |
+
x_apply = (x_apply - x_train.mean(0)) / x_train.std(0)
|
420 |
+
x_train = (x_train - x_train.mean(0)) / x_train.std(0)
|
421 |
+
# print('inputs are LAARGEe, normalizing them')
|
422 |
+
try:
|
423 |
+
pt.fit(x_train.cpu().double())
|
424 |
+
# except ValueError as e:
|
425 |
+
except Exception as e:
|
426 |
+
# print(x_train)
|
427 |
+
# print('caught this errrr', e)
|
428 |
+
if less_safe:
|
429 |
+
x_train = (x_train - x_train.mean(0)) / x_train.std(0)
|
430 |
+
x_apply = (x_apply - x_train.mean(0)) / x_train.std(0)
|
431 |
+
else:
|
432 |
+
x_train = x_train - x_train.mean(0)
|
433 |
+
x_apply = x_apply - x_train.mean(0)
|
434 |
+
# print(x_train)
|
435 |
+
pt.fit(x_train.cpu().double())
|
436 |
+
# print(x_train)
|
437 |
+
x_out = torch.tensor(pt.transform(x_apply.cpu()), dtype=x_apply.dtype, device=x_apply.device)
|
438 |
+
if torch.isnan(x_out).any() or torch.isinf(x_out).any():
|
439 |
+
print('WARNING: power transform failed')
|
440 |
+
print(f"{x_train=} and {x_apply=}")
|
441 |
+
x_out = x_apply - x_train.mean(0)
|
442 |
+
return x_out, pt
|
SpeedReducer.png
ADDED
![]() |
SpeedReducer_CEI_Avg_Obj.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1567f1e9557cb6d701605a2ec74c6e294c42a85c88ddf3c0f33e307bf7f9a07f
|
3 |
+
size 3684
|
SpeedReducer_CEI_Avg_Time.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9cfa75441b15a3e1b8dbdc1e4a074e7a3682c41c9a85924793c67a41bec86acd
|
3 |
+
size 3496
|
Test_formulation.png
ADDED
![]() |
Test_formulation_default.png
ADDED
![]() |
ThreeTruss.png
ADDED
![]() |
ThreeTruss_CEI_Avg_Obj.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b02d1c95ef3aee57fee8804a82119d9b68453e182184cf47970779742d059bed
|
3 |
+
size 2844
|
ThreeTruss_CEI_Avg_Time.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e9b30e0f99096ab84b8a545d2c1f24b80cd2d0bce1df6bc7f268b32c88a5b4f
|
3 |
+
size 2912
|
WeldedBeam.png
ADDED
![]() |
WeldedBeam_CEI_Avg_Obj.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:efba0c05f0ac9803ee75caa3396983535bc3a104b47db2a3e463b1497ab5a93b
|
3 |
+
size 3164
|
WeldedBeam_CEI_Avg_Time.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06225c2be0d11a0cb563f1eb525401d5f5536401694d7ed7e3f7179a1f51352b
|
3 |
+
size 3552
|
__pycache__/Rosen_PFN4BO.cpython-310.pyc
ADDED
Binary file (8.11 kB). View file
|
|
final_models/Cyril_500features.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43218823860a5ca71657fd25c50bcc1209c3c570bcbb9df9ed2822bbb9f6f9c8
|
3 |
+
size 239411934
|
final_models/Cyril_500features_800epoch_cpu.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25884174687cfbde831badc4f1d05e94f860711dc3a07f4dde09930860e63603
|
3 |
+
size 239408346
|
final_models/Cyril_50features.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d5b680c4b72e72a33a21896885de7e8fba52c42612a6165a7cf60afede2e425d
|
3 |
+
size 107333480
|
final_models/hebo_morebudget_9_unused_features_3_userpriorperdim2_8.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff2a4aa60feeca59e80f3b272d7b2ab521e1e82189469db494068de33dcaba17
|
3 |
+
size 107378616
|
final_models/heboplus_500features_retrain_epoch800_cpu.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25884174687cfbde831badc4f1d05e94f860711dc3a07f4dde09930860e63603
|
3 |
+
size 239408346
|
final_models/model_hebo_morebudget_9_unused_features_3.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc7f6f4b9b06e59987e42845b7b0d31ffa5b414b9eddfe14d88b25120e3cd4f8
|
3 |
+
size 107262245
|
final_models/model_sampled_warp_simple_mlp_for_hpob_46.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ebb2d4d7f419ca4617fdf85c663a69b3b0285fef91712e0dd69d5ab2d61754fd
|
3 |
+
size 56761718
|
pfns4bo/.ipynb_checkpoints/__init__-checkpoint.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
model_path = 'final_models'
|
4 |
+
|
5 |
+
def prepare_models():
|
6 |
+
pfns4bo_dir = os.path.dirname(__file__)
|
7 |
+
model_names = ['hebo_morebudget_9_unused_features_3_userpriorperdim2_8.pt',
|
8 |
+
'model_sampled_warp_simple_mlp_for_hpob_46.pt',
|
9 |
+
'model_hebo_morebudget_9_unused_features_3.pt',]
|
10 |
+
|
11 |
+
for name in model_names:
|
12 |
+
weights_path = os.path.join(pfns4bo_dir, model_path, name)
|
13 |
+
compressed_weights_path = os.path.join(pfns4bo_dir, model_path, name + '.gz')
|
14 |
+
if not os.path.exists(weights_path):
|
15 |
+
if not os.path.exists(compressed_weights_path):
|
16 |
+
print("Downloading", os.path.abspath(compressed_weights_path))
|
17 |
+
import requests
|
18 |
+
url = f'https://github.com/automl/PFNs4BO/raw/main/pfns4bo/final_models/{name + ".gz"}'
|
19 |
+
r = requests.get(url, allow_redirects=True)
|
20 |
+
os.makedirs(os.path.dirname(compressed_weights_path), exist_ok=True)
|
21 |
+
with open(compressed_weights_path, 'wb') as f:
|
22 |
+
f.write(r.content)
|
23 |
+
if os.path.exists(compressed_weights_path):
|
24 |
+
print("Unzipping", name)
|
25 |
+
os.system(f"gzip -dk {compressed_weights_path}")
|
26 |
+
else:
|
27 |
+
print("Failed to find", compressed_weights_path)
|
28 |
+
print("Make sure you have an internet connection to download the model automatically..")
|
29 |
+
if os.path.exists(weights_path):
|
30 |
+
print("Successfully located model at", weights_path)
|
31 |
+
|
32 |
+
|
33 |
+
model_dict = {
|
34 |
+
'hebo_plus_userprior_model': os.path.join(os.path.dirname(__file__),model_path,
|
35 |
+
'hebo_morebudget_9_unused_features_3_userpriorperdim2_8.pt'),
|
36 |
+
'hebo_plus_model': os.path.join(os.path.dirname(__file__),model_path,
|
37 |
+
'model_hebo_morebudget_9_unused_features_3.pt'),
|
38 |
+
'bnn_model': os.path.join(os.path.dirname(__file__),model_path,'model_sampled_warp_simple_mlp_for_hpob_46.pt')
|
39 |
+
}
|
40 |
+
|
41 |
+
|
42 |
+
def __getattr__(name):
|
43 |
+
if name in model_dict:
|
44 |
+
if not os.path.exists(model_dict[name]):
|
45 |
+
print("Can't find", os.path.abspath(model_dict[name]), "thus unzipping/downloading models now.")
|
46 |
+
print("This might take a while..")
|
47 |
+
prepare_models()
|
48 |
+
return model_dict[name]
|
49 |
+
raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
|
50 |
+
|
pfns4bo/.ipynb_checkpoints/bar_distribution-checkpoint.py
ADDED
@@ -0,0 +1,410 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .utils import print_once
|
2 |
+
|
3 |
+
import torch
|
4 |
+
from torch import nn
|
5 |
+
|
6 |
+
|
7 |
+
class BarDistribution(nn.Module):
|
8 |
+
def __init__(self, borders: torch.Tensor, smoothing=.0, ignore_nan_targets=True): # here borders should start with min and end with max, where all values lie in (min,max) and are sorted
|
9 |
+
'''
|
10 |
+
:param borders:
|
11 |
+
:param smoothing:
|
12 |
+
:param append_mean_pred: Whether to predict the mean of the other positions as a last output in forward,
|
13 |
+
is enabled when additionally y has a sequence length 1 shorter than logits, i.e. len(logits) == 1 + len(y)
|
14 |
+
'''
|
15 |
+
super().__init__()
|
16 |
+
assert len(borders.shape) == 1
|
17 |
+
self.register_buffer('borders', borders)
|
18 |
+
self.register_buffer('smoothing', torch.tensor(smoothing))
|
19 |
+
self.register_buffer('bucket_widths', self.borders[1:] - self.borders[:-1])
|
20 |
+
full_width = self.bucket_widths.sum()
|
21 |
+
|
22 |
+
assert (1 - (full_width / (self.borders[-1] - self.borders[0]))).abs() < 1e-2, f'diff: {full_width - (self.borders[-1] - self.borders[0])} with {full_width} {self.borders[-1]} {self.borders[0]}'
|
23 |
+
assert (self.bucket_widths >= 0.0).all() , "Please provide sorted borders!" # This also allows size zero buckets
|
24 |
+
self.num_bars = len(borders) - 1
|
25 |
+
self.ignore_nan_targets = ignore_nan_targets
|
26 |
+
self.to(borders.device)
|
27 |
+
|
28 |
+
def __setstate__(self, state):
|
29 |
+
super().__setstate__(state)
|
30 |
+
self.__dict__.setdefault('append_mean_pred', False)
|
31 |
+
|
32 |
+
def map_to_bucket_idx(self, y):
|
33 |
+
target_sample = torch.searchsorted(self.borders, y) - 1
|
34 |
+
target_sample[y == self.borders[0]] = 0
|
35 |
+
target_sample[y == self.borders[-1]] = self.num_bars - 1
|
36 |
+
return target_sample
|
37 |
+
|
38 |
+
def ignore_init(self, y):
|
39 |
+
ignore_loss_mask = torch.isnan(y)
|
40 |
+
if ignore_loss_mask.any():
|
41 |
+
if not self.ignore_nan_targets: raise ValueError(f'Found NaN in target {y}')
|
42 |
+
print_once("A loss was ignored because there was nan target.")
|
43 |
+
y[ignore_loss_mask] = self.borders[0] # this is just a default value, it will be ignored anyway
|
44 |
+
return ignore_loss_mask
|
45 |
+
|
46 |
+
def compute_scaled_log_probs(self, logits):
|
47 |
+
# this is equivalent to log(p(y)) of the density p
|
48 |
+
bucket_log_probs = torch.log_softmax(logits, -1)
|
49 |
+
scaled_bucket_log_probs = bucket_log_probs - torch.log(self.bucket_widths)
|
50 |
+
return scaled_bucket_log_probs
|
51 |
+
|
52 |
+
def forward(self, logits, y, mean_prediction_logits=None): # gives the negative log density (the _loss_), y: T x B, logits: T x B x self.num_bars
|
53 |
+
y = y.clone().view(*logits.shape[:-1]) # no trailing one dimension
|
54 |
+
ignore_loss_mask = self.ignore_init(y)
|
55 |
+
target_sample = self.map_to_bucket_idx(y)
|
56 |
+
assert (target_sample >= 0).all() and (target_sample < self.num_bars).all(), f'y {y} not in support set for borders (min_y, max_y) {self.borders}'
|
57 |
+
assert logits.shape[-1] == self.num_bars, f'{logits.shape[-1]} vs {self.num_bars}'
|
58 |
+
|
59 |
+
scaled_bucket_log_probs = self.compute_scaled_log_probs(logits)
|
60 |
+
nll_loss = -scaled_bucket_log_probs.gather(-1,target_sample[..., None]).squeeze(-1) # T x B
|
61 |
+
|
62 |
+
if mean_prediction_logits is not None:
|
63 |
+
if not self.training:
|
64 |
+
print('Calculating loss incl mean prediction loss for nonmyopic BO.')
|
65 |
+
scaled_mean_log_probs = self.compute_scaled_log_probs(mean_prediction_logits)
|
66 |
+
nll_loss = torch.cat((nll_loss, self.mean_loss(logits, scaled_mean_log_probs)), 0)
|
67 |
+
|
68 |
+
smooth_loss = -scaled_bucket_log_probs.mean(dim=-1)
|
69 |
+
smoothing = self.smoothing if self.training else 0.
|
70 |
+
loss = (1. - smoothing) * nll_loss + smoothing * smooth_loss
|
71 |
+
loss[ignore_loss_mask] = 0.
|
72 |
+
return loss
|
73 |
+
|
74 |
+
def mean_loss(self, logits, scaled_mean_logits):
|
75 |
+
assert (len(logits.shape) == 3) and (len(scaled_mean_logits.shape) == 2), \
|
76 |
+
(len(logits.shape), len(scaled_mean_logits.shape))
|
77 |
+
means = self.mean(logits).detach() # T x B
|
78 |
+
target_mean = self.map_to_bucket_idx(means).clamp_(0, self.num_bars - 1) # T x B
|
79 |
+
return -scaled_mean_logits.gather(1, target_mean.T).mean(1).unsqueeze(0) # 1 x B
|
80 |
+
|
81 |
+
def mean(self, logits):
|
82 |
+
bucket_means = self.borders[:-1] + self.bucket_widths/2
|
83 |
+
p = torch.softmax(logits, -1)
|
84 |
+
return p @ bucket_means
|
85 |
+
|
86 |
+
def median(self, logits):
|
87 |
+
return self.icdf(logits, 0.5)
|
88 |
+
|
89 |
+
def icdf(self, logits, left_prob):
|
90 |
+
"""
|
91 |
+
Implementation of the quantile function
|
92 |
+
:param logits: Tensor of any shape, with the last dimension being logits
|
93 |
+
:param left_prob: float: The probability mass to the left of the result.
|
94 |
+
:return: Position with `left_prob` probability weight to the left.
|
95 |
+
"""
|
96 |
+
probs = logits.softmax(-1)
|
97 |
+
cumprobs = torch.cumsum(probs, -1)
|
98 |
+
idx = torch.searchsorted(cumprobs, left_prob * torch.ones(*cumprobs.shape[:-1], 1, device=logits.device))\
|
99 |
+
.squeeze(-1).clamp(0, cumprobs.shape[-1] - 1) # this might not do the right for outliers
|
100 |
+
cumprobs = torch.cat([torch.zeros(*cumprobs.shape[:-1], 1, device=logits.device), cumprobs], -1)
|
101 |
+
|
102 |
+
rest_prob = left_prob - cumprobs.gather(-1, idx[..., None]).squeeze(-1)
|
103 |
+
left_border = self.borders[idx]
|
104 |
+
right_border = self.borders[idx+1]
|
105 |
+
return left_border + (right_border - left_border) * rest_prob / probs.gather(-1, idx[..., None]).squeeze(-1)
|
106 |
+
|
107 |
+
def quantile(self, logits, center_prob=.682):
|
108 |
+
side_probs = (1.-center_prob)/2
|
109 |
+
return torch.stack((self.icdf(logits, side_probs), self.icdf(logits, 1.-side_probs)),-1)
|
110 |
+
|
111 |
+
def ucb(self, logits, best_f, rest_prob=(1-.682)/2, maximize=True):
|
112 |
+
"""
|
113 |
+
UCB utility. Rest Prob is the amount of utility above (below) the confidence interval that is ignored.
|
114 |
+
Higher rest_prob is equivalent to lower beta in the standard GP-UCB formulation.
|
115 |
+
:param logits: Logits, as returned by the Transformer.
|
116 |
+
:param rest_prob: The amount of utility above (below) the confidence interval that is ignored.
|
117 |
+
The default is equivalent to using GP-UCB with `beta=1`.
|
118 |
+
To get the corresponding `beta`, where `beta` is from
|
119 |
+
the standard GP definition of UCB `ucb_utility = mean + beta * std`,
|
120 |
+
you can use this computation: `beta = math.sqrt(2)*torch.erfinv(torch.tensor(2*(1-rest_prob)-1))`.
|
121 |
+
:param maximize:
|
122 |
+
:return: utility
|
123 |
+
"""
|
124 |
+
if maximize:
|
125 |
+
rest_prob = 1 - rest_prob
|
126 |
+
return self.icdf(logits, rest_prob)
|
127 |
+
|
128 |
+
def mode(self, logits):
|
129 |
+
mode_inds = logits.argmax(-1)
|
130 |
+
bucket_means = self.borders[:-1] + self.bucket_widths/2
|
131 |
+
return bucket_means[mode_inds]
|
132 |
+
|
133 |
+
def ei(self, logits, best_f, maximize=True): # logits: evaluation_points x batch x feature_dim
|
134 |
+
bucket_diffs = self.borders[1:] - self.borders[:-1]
|
135 |
+
assert maximize
|
136 |
+
if not torch.is_tensor(best_f) or not len(best_f.shape):
|
137 |
+
best_f = torch.full(logits[...,0].shape, best_f, device=logits.device)
|
138 |
+
|
139 |
+
best_f = best_f[..., None].repeat(*[1]*len(best_f.shape), logits.shape[-1])
|
140 |
+
clamped_best_f = best_f.clamp(self.borders[:-1], self.borders[1:])
|
141 |
+
|
142 |
+
#bucket_contributions = (best_f[...,None] < self.borders[:-1]).float() * bucket_means
|
143 |
+
# true bucket contributions
|
144 |
+
bucket_contributions = ((self.borders[1:]**2-clamped_best_f**2)/2 - best_f*(self.borders[1:] - clamped_best_f))/bucket_diffs
|
145 |
+
|
146 |
+
p = torch.softmax(logits, -1)
|
147 |
+
return torch.einsum("...b,...b->...", p, bucket_contributions)
|
148 |
+
|
149 |
+
def pi(self, logits, best_f, maximize=True):# logits: evaluation_points x batch x feature_dim
|
150 |
+
"""
|
151 |
+
Acquisition Function: Probability of Improvement
|
152 |
+
:param logits: as returned by Transformer
|
153 |
+
:param best_f: best evaluation so far (the incumbent)
|
154 |
+
:param maximize: whether to maximize
|
155 |
+
:return: utility
|
156 |
+
"""
|
157 |
+
assert maximize is True
|
158 |
+
if not torch.is_tensor(best_f) or not len(best_f.shape):
|
159 |
+
best_f = torch.full(logits[...,0].shape, best_f, device=logits.device)
|
160 |
+
p = torch.softmax(logits, -1)
|
161 |
+
border_widths = self.borders[1:] - self.borders[:-1]
|
162 |
+
factor = 1. - ((best_f[...,None] - self.borders[:-1]) / border_widths).clamp(0., 1.)
|
163 |
+
return (p * factor).sum(-1)
|
164 |
+
|
165 |
+
|
166 |
+
def mean_of_square(self, logits):
|
167 |
+
"""
|
168 |
+
Computes E[x^2].
|
169 |
+
:param logits: Output of the model.
|
170 |
+
"""
|
171 |
+
left_borders = self.borders[:-1]
|
172 |
+
right_borders = self.borders[1:]
|
173 |
+
bucket_mean_of_square = (left_borders.square() + right_borders.square() + left_borders*right_borders)/3.
|
174 |
+
p = torch.softmax(logits, -1)
|
175 |
+
return p @ bucket_mean_of_square
|
176 |
+
|
177 |
+
def variance(self, logits):
|
178 |
+
return self.mean_of_square(logits) - self.mean(logits).square()
|
179 |
+
|
180 |
+
def pi(self, logits, best_f, maximize=True):# logits: evaluation_points x batch x feature_dim
|
181 |
+
"""
|
182 |
+
Acquisition Function: Probability of Improvement
|
183 |
+
:param logits: as returned by Transformer
|
184 |
+
:param best_f: best evaluation so far (the incumbent)
|
185 |
+
:param maximize: whether to maximize
|
186 |
+
:return: utility
|
187 |
+
"""
|
188 |
+
assert maximize is True
|
189 |
+
p = torch.softmax(logits, -1)
|
190 |
+
border_widths = self.borders[1:] - self.borders[:-1]
|
191 |
+
factor = 1. - ((best_f - self.borders[:-1]) / border_widths).clamp(0., 1.)
|
192 |
+
return (p * factor).sum(-1)
|
193 |
+
|
194 |
+
|
195 |
+
def mean_of_square(self, logits):
|
196 |
+
"""
|
197 |
+
Computes E[x^2].
|
198 |
+
:param logits: Output of the model.
|
199 |
+
"""
|
200 |
+
left_borders = self.borders[:-1]
|
201 |
+
right_borders = self.borders[1:]
|
202 |
+
bucket_mean_of_square = (left_borders.square() + right_borders.square() + left_borders*right_borders)/3.
|
203 |
+
p = torch.softmax(logits, -1)
|
204 |
+
return p @ bucket_mean_of_square
|
205 |
+
|
206 |
+
def variance(self, logits):
|
207 |
+
return self.mean_of_square(logits) - self.mean(logits).square()
|
208 |
+
|
209 |
+
|
210 |
+
class FullSupportBarDistribution(BarDistribution):
|
211 |
+
@staticmethod
|
212 |
+
def halfnormal_with_p_weight_before(range_max,p=.5):
|
213 |
+
s = range_max / torch.distributions.HalfNormal(torch.tensor(1.)).icdf(torch.tensor(p))
|
214 |
+
return torch.distributions.HalfNormal(s)
|
215 |
+
|
216 |
+
|
217 |
+
def forward(self, logits, y, mean_prediction_logits=None): # gives the negative log density (the _loss_), y: T x B, logits: T x B x self.num_bars
|
218 |
+
assert self.num_bars > 1
|
219 |
+
y = y.clone().view(len(y),-1) # no trailing one dimension
|
220 |
+
ignore_loss_mask = self.ignore_init(y) # alters y
|
221 |
+
target_sample = self.map_to_bucket_idx(y) # shape: T x B (same as y)
|
222 |
+
target_sample.clamp_(0, self.num_bars - 1)
|
223 |
+
|
224 |
+
assert logits.shape[-1] == self.num_bars, f'{logits.shape[-1]} vs {self.num_bars}'
|
225 |
+
assert (target_sample >= 0).all() and (target_sample < self.num_bars).all(), \
|
226 |
+
f'y {y} not in support set for borders (min_y, max_y) {self.borders}'
|
227 |
+
assert logits.shape[-1] == self.num_bars, f'{logits.shape[-1]} vs {self.num_bars}'
|
228 |
+
# ignore all position with nan values
|
229 |
+
|
230 |
+
|
231 |
+
scaled_bucket_log_probs = self.compute_scaled_log_probs(logits)
|
232 |
+
|
233 |
+
assert len(scaled_bucket_log_probs) == len(target_sample), (len(scaled_bucket_log_probs), len(target_sample))
|
234 |
+
log_probs = scaled_bucket_log_probs.gather(-1, target_sample.unsqueeze(-1)).squeeze(-1)
|
235 |
+
|
236 |
+
side_normals = (self.halfnormal_with_p_weight_before(self.bucket_widths[0]), self.halfnormal_with_p_weight_before(self.bucket_widths[-1]))
|
237 |
+
|
238 |
+
|
239 |
+
log_probs[target_sample == 0] += side_normals[0].log_prob((self.borders[1]-y[target_sample == 0]).clamp(min=.00000001)) + torch.log(self.bucket_widths[0])
|
240 |
+
log_probs[target_sample == self.num_bars-1] += side_normals[1].log_prob((y[target_sample == self.num_bars-1]-self.borders[-2]).clamp(min=.00000001)) + torch.log(self.bucket_widths[-1])
|
241 |
+
|
242 |
+
nll_loss = -log_probs
|
243 |
+
|
244 |
+
if mean_prediction_logits is not None:
|
245 |
+
assert not ignore_loss_mask.any(), "Ignoring examples is not implemented with mean pred."
|
246 |
+
if not self.training:
|
247 |
+
print('Calculating loss incl mean prediction loss for nonmyopic BO.')
|
248 |
+
if not torch.is_grad_enabled():
|
249 |
+
print("Warning: loss is not correct in absolute terms, only the gradient is right, when using `append_mean_pred`.")
|
250 |
+
scaled_mean_log_probs = self.compute_scaled_log_probs(mean_prediction_logits)
|
251 |
+
nll_loss = torch.cat((nll_loss, self.mean_loss(logits, scaled_mean_log_probs)), 0)
|
252 |
+
#ignore_loss_mask = torch.zeros_like(nll_loss, dtype=torch.bool)
|
253 |
+
|
254 |
+
if self.smoothing:
|
255 |
+
smooth_loss = -scaled_bucket_log_probs.mean(dim=-1)
|
256 |
+
smoothing = self.smoothing if self.training else 0.
|
257 |
+
nll_loss = (1. - smoothing) * nll_loss + smoothing * smooth_loss
|
258 |
+
|
259 |
+
if ignore_loss_mask.any():
|
260 |
+
nll_loss[ignore_loss_mask] = 0.
|
261 |
+
|
262 |
+
return nll_loss
|
263 |
+
|
264 |
+
def mean(self, logits):
|
265 |
+
bucket_means = self.borders[:-1] + self.bucket_widths / 2
|
266 |
+
p = torch.softmax(logits, -1)
|
267 |
+
side_normals = (self.halfnormal_with_p_weight_before(self.bucket_widths[0]),
|
268 |
+
self.halfnormal_with_p_weight_before(self.bucket_widths[-1]))
|
269 |
+
bucket_means[0] = -side_normals[0].mean + self.borders[1]
|
270 |
+
bucket_means[-1] = side_normals[1].mean + self.borders[-2]
|
271 |
+
return p @ bucket_means.to(logits.device)
|
272 |
+
|
273 |
+
def mean_of_square(self, logits):
|
274 |
+
"""
|
275 |
+
Computes E[x^2].
|
276 |
+
:param logits: Output of the model.
|
277 |
+
"""
|
278 |
+
left_borders = self.borders[:-1]
|
279 |
+
right_borders = self.borders[1:]
|
280 |
+
bucket_mean_of_square = (left_borders.square() + right_borders.square() + left_borders*right_borders)/3.
|
281 |
+
side_normals = (self.halfnormal_with_p_weight_before(self.bucket_widths[0]),
|
282 |
+
self.halfnormal_with_p_weight_before(self.bucket_widths[-1]))
|
283 |
+
bucket_mean_of_square[0] = side_normals[0].variance + (-side_normals[0].mean + self.borders[1]).square()
|
284 |
+
bucket_mean_of_square[-1] = side_normals[1].variance + (side_normals[1].variance + self.borders[-2]).square()
|
285 |
+
p = torch.softmax(logits, -1)
|
286 |
+
return p @ bucket_mean_of_square
|
287 |
+
|
288 |
+
def pi(self, logits, best_f, maximize=True):# logits: evaluation_points x batch x feature_dim
|
289 |
+
"""
|
290 |
+
Acquisition Function: Probability of Improvement
|
291 |
+
:param logits: as returned by Transformer (evaluation_points x batch x feature_dim)
|
292 |
+
:param best_f: best evaluation so far (the incumbent)
|
293 |
+
:param maximize: whether to maximize
|
294 |
+
:return: utility
|
295 |
+
"""
|
296 |
+
assert maximize is True
|
297 |
+
if not torch.is_tensor(best_f) or not len(best_f.shape):
|
298 |
+
best_f = torch.full(logits[...,0].shape, best_f, device=logits.device) # evaluation_points x batch
|
299 |
+
assert best_f.shape == logits[...,0].shape, f"best_f.shape: {best_f.shape}, logits.shape: {logits.shape}"
|
300 |
+
p = torch.softmax(logits, -1) # evaluation_points x batch
|
301 |
+
border_widths = self.borders[1:] - self.borders[:-1]
|
302 |
+
factor = 1. - ((best_f[...,None] - self.borders[:-1]) / border_widths).clamp(0., 1.) # evaluation_points x batch x num_bars
|
303 |
+
|
304 |
+
side_normals = (self.halfnormal_with_p_weight_before(self.bucket_widths[0]),
|
305 |
+
self.halfnormal_with_p_weight_before(self.bucket_widths[-1]))
|
306 |
+
position_in_side_normals = (-(best_f - self.borders[1]).clamp(max=0.), (best_f - self.borders[-2]).clamp(min=0.)) # evaluation_points x batch
|
307 |
+
factor[...,0] = 0.
|
308 |
+
factor[...,0][position_in_side_normals[0] > 0.] = side_normals[0].cdf(position_in_side_normals[0][position_in_side_normals[0] > 0.])
|
309 |
+
factor[...,-1] = 1.
|
310 |
+
factor[...,-1][position_in_side_normals[1] > 0.] = 1. - side_normals[1].cdf(position_in_side_normals[1][position_in_side_normals[1] > 0.])
|
311 |
+
return (p * factor).sum(-1)
|
312 |
+
|
313 |
+
|
314 |
+
def ei_for_halfnormal(self, scale, best_f, maximize=True):
|
315 |
+
"""
|
316 |
+
This is the EI for a standard normal distribution with mean 0 and variance `scale` times 2.
|
317 |
+
Which is the same as the half normal EI.
|
318 |
+
I tested this with MC approximation:
|
319 |
+
ei_for_halfnormal = lambda scale, best_f: (torch.distributions.HalfNormal(torch.tensor(scale)).sample((10_000_000,))- best_f ).clamp(min=0.).mean()
|
320 |
+
print([(ei_for_halfnormal(scale,best_f), FullSupportBarDistribution().ei_for_halfnormal(scale,best_f)) for scale in [0.1,1.,10.] for best_f in [.1,10.,4.]])
|
321 |
+
:param scale:
|
322 |
+
:param best_f:
|
323 |
+
:param maximize:
|
324 |
+
:return:
|
325 |
+
"""
|
326 |
+
assert maximize
|
327 |
+
mean = torch.tensor(0.)
|
328 |
+
u = (mean - best_f) / scale
|
329 |
+
normal = torch.distributions.Normal(torch.zeros_like(u), torch.ones_like(u))
|
330 |
+
try:
|
331 |
+
ucdf = normal.cdf(u)
|
332 |
+
except ValueError:
|
333 |
+
print(f"u: {u}, best_f: {best_f}, scale: {scale}")
|
334 |
+
raise
|
335 |
+
updf = torch.exp(normal.log_prob(u))
|
336 |
+
normal_ei = scale * (updf + u * ucdf)
|
337 |
+
return 2*normal_ei
|
338 |
+
|
339 |
+
def ei(self, logits, best_f, maximize=True): # logits: evaluation_points x batch x feature_dim
|
340 |
+
if torch.isnan(logits).any():
|
341 |
+
raise ValueError(f"logits contains NaNs: {logits}")
|
342 |
+
bucket_diffs = self.borders[1:] - self.borders[:-1]
|
343 |
+
assert maximize
|
344 |
+
if not torch.is_tensor(best_f) or not len(best_f.shape):
|
345 |
+
best_f = torch.full(logits[...,0].shape, best_f, device=logits.device)
|
346 |
+
assert best_f.shape == logits[...,0].shape, f"best_f.shape: {best_f.shape}, logits.shape: {logits.shape}"
|
347 |
+
|
348 |
+
|
349 |
+
best_f_per_logit = best_f[..., None].repeat(*[1]*len(best_f.shape), logits.shape[-1])
|
350 |
+
clamped_best_f = best_f_per_logit.clamp(self.borders[:-1], self.borders[1:])
|
351 |
+
|
352 |
+
# true bucket contributions
|
353 |
+
bucket_contributions = ((self.borders[1:]**2-clamped_best_f**2)/2 - best_f_per_logit*(self.borders[1:] - clamped_best_f))/bucket_diffs
|
354 |
+
|
355 |
+
# extra stuff for continuous
|
356 |
+
side_normals = (self.halfnormal_with_p_weight_before(self.bucket_widths[0]),
|
357 |
+
self.halfnormal_with_p_weight_before(self.bucket_widths[-1]))
|
358 |
+
position_in_side_normals = (-(best_f - self.borders[1]).clamp(max=0.),
|
359 |
+
(best_f - self.borders[-2]).clamp(min=0.)) # evaluation_points x batch
|
360 |
+
|
361 |
+
bucket_contributions[...,-1] = self.ei_for_halfnormal(side_normals[1].scale, position_in_side_normals[1])
|
362 |
+
|
363 |
+
bucket_contributions[...,0] = self.ei_for_halfnormal(side_normals[0].scale, torch.zeros_like(position_in_side_normals[0])) \
|
364 |
+
- self.ei_for_halfnormal(side_normals[0].scale, position_in_side_normals[0])
|
365 |
+
|
366 |
+
p = torch.softmax(logits, -1)
|
367 |
+
return torch.einsum("...b,...b->...", p, bucket_contributions)
|
368 |
+
|
369 |
+
|
370 |
+
def get_bucket_limits(num_outputs:int, full_range:tuple=None, ys:torch.Tensor=None, verbose:bool=False):
|
371 |
+
assert (ys is None) != (full_range is None), 'Either full_range or ys must be passed.'
|
372 |
+
|
373 |
+
if ys is not None:
|
374 |
+
ys = ys.flatten()
|
375 |
+
ys = ys[~torch.isnan(ys)]
|
376 |
+
if len(ys) % num_outputs: ys = ys[:-(len(ys) % num_outputs)]
|
377 |
+
print(f'Using {len(ys)} y evals to estimate {num_outputs} buckets. Cut off the last {len(ys) % num_outputs} ys.')
|
378 |
+
ys_per_bucket = len(ys) // num_outputs
|
379 |
+
if full_range is None:
|
380 |
+
full_range = (ys.min(), ys.max())
|
381 |
+
else:
|
382 |
+
assert full_range[0] <= ys.min() and full_range[1] >= ys.max(), f'full_range {full_range} not in range of ys {ys.min(), ys.max()}'
|
383 |
+
full_range = torch.tensor(full_range)
|
384 |
+
ys_sorted, ys_order = ys.sort(0)
|
385 |
+
bucket_limits = (ys_sorted[ys_per_bucket-1::ys_per_bucket][:-1]+ys_sorted[ys_per_bucket::ys_per_bucket])/2
|
386 |
+
if verbose:
|
387 |
+
print(f'Using {len(ys)} y evals to estimate {num_outputs} buckets. Cut off the last {len(ys) % num_outputs} ys.')
|
388 |
+
print(full_range)
|
389 |
+
bucket_limits = torch.cat([full_range[0].unsqueeze(0), bucket_limits, full_range[1].unsqueeze(0)],0)
|
390 |
+
|
391 |
+
else:
|
392 |
+
class_width = (full_range[1] - full_range[0]) / num_outputs
|
393 |
+
bucket_limits = torch.cat([full_range[0] + torch.arange(num_outputs).float()*class_width, torch.tensor(full_range[1]).unsqueeze(0)], 0)
|
394 |
+
|
395 |
+
assert len(bucket_limits) - 1 == num_outputs, f'len(bucket_limits) - 1 == {len(bucket_limits) - 1} != {num_outputs} == num_outputs'
|
396 |
+
assert full_range[0] == bucket_limits[0], f'{full_range[0]} != {bucket_limits[0]}'
|
397 |
+
assert full_range[-1] == bucket_limits[-1], f'{full_range[-1]} != {bucket_limits[-1]}'
|
398 |
+
|
399 |
+
return bucket_limits
|
400 |
+
|
401 |
+
|
402 |
+
def get_custom_bar_dist(borders, criterion):
|
403 |
+
# Tested that a bar_dist with borders 0.54 (-> softplus 1.0) yields the same bar distribution as the passed one.
|
404 |
+
borders_ = torch.nn.functional.softplus(borders) + 0.001
|
405 |
+
borders_ = (torch.cumsum(torch.cat([criterion.borders[0:1], criterion.bucket_widths]) * borders_, 0))
|
406 |
+
criterion_ = criterion.__class__(borders=borders_, handle_nans=criterion.handle_nans)
|
407 |
+
return criterion_
|
408 |
+
|
409 |
+
|
410 |
+
|
pfns4bo/.ipynb_checkpoints/lost_functions-checkpoint.py
ADDED
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
3 |
+
#
|
4 |
+
# This source code is licensed under the MIT license found in the
|
5 |
+
# LICENSE file in the root directory of this source tree.
|
6 |
+
|
7 |
+
r"""
|
8 |
+
A converter that simplifies using numpy-based optimizers with generic torch
|
9 |
+
`nn.Module` classes. This enables using a `scipy.optim.minimize` optimizer
|
10 |
+
for optimizing module parameters.
|
11 |
+
"""
|
12 |
+
|
13 |
+
from __future__ import annotations
|
14 |
+
|
15 |
+
from collections import OrderedDict
|
16 |
+
from math import inf
|
17 |
+
from numbers import Number
|
18 |
+
from typing import Dict, List, Optional, Set, Tuple
|
19 |
+
from warnings import warn
|
20 |
+
|
21 |
+
import numpy as np
|
22 |
+
import torch
|
23 |
+
from botorch.optim.utils import (
|
24 |
+
_get_extra_mll_args,
|
25 |
+
_handle_numerical_errors,
|
26 |
+
get_name_filter,
|
27 |
+
get_parameters_and_bounds,
|
28 |
+
TorchAttr,
|
29 |
+
)
|
30 |
+
from gpytorch.mlls import MarginalLogLikelihood
|
31 |
+
from torch.nn import Module
|
32 |
+
|
33 |
+
|
34 |
+
def module_to_array(
|
35 |
+
module: Module,
|
36 |
+
bounds: Optional[Dict[str, Tuple[Optional[float], Optional[float]]]] = None,
|
37 |
+
exclude: Optional[Set[str]] = None,
|
38 |
+
) -> Tuple[np.ndarray, Dict[str, TorchAttr], Optional[np.ndarray]]:
|
39 |
+
r"""Extract named parameters from a module into a numpy array.
|
40 |
+
|
41 |
+
Only extracts parameters with requires_grad, since it is meant for optimizing.
|
42 |
+
|
43 |
+
Args:
|
44 |
+
module: A module with parameters. May specify parameter constraints in
|
45 |
+
a `named_parameters_and_constraints` method.
|
46 |
+
bounds: A dictionary mapping parameter names t lower and upper bounds.
|
47 |
+
of lower and upper bounds. Bounds specified here take precedence
|
48 |
+
over bounds on the same parameters specified in the constraints
|
49 |
+
registered with the module.
|
50 |
+
exclude: A list of parameter names that are to be excluded from extraction.
|
51 |
+
|
52 |
+
Returns:
|
53 |
+
3-element tuple containing
|
54 |
+
- The parameter values as a numpy array.
|
55 |
+
- An ordered dictionary with the name and tensor attributes of each
|
56 |
+
parameter.
|
57 |
+
- A `2 x n_params` numpy array with lower and upper bounds if at least
|
58 |
+
one constraint is finite, and None otherwise.
|
59 |
+
|
60 |
+
Example:
|
61 |
+
>>> mll = ExactMarginalLogLikelihood(model.likelihood, model)
|
62 |
+
>>> parameter_array, property_dict, bounds_out = module_to_array(mll)
|
63 |
+
"""
|
64 |
+
warn(
|
65 |
+
"`module_to_array` is marked for deprecation, consider using "
|
66 |
+
"`get_parameters_and_bounds`, `get_parameters_as_ndarray_1d`, or "
|
67 |
+
"`get_bounds_as_ndarray` instead.",
|
68 |
+
DeprecationWarning,
|
69 |
+
)
|
70 |
+
param_dict, bounds_dict = get_parameters_and_bounds(
|
71 |
+
module=module,
|
72 |
+
name_filter=None if exclude is None else get_name_filter(exclude),
|
73 |
+
requires_grad=True,
|
74 |
+
)
|
75 |
+
if bounds is not None:
|
76 |
+
bounds_dict.update(bounds)
|
77 |
+
|
78 |
+
# Record tensor metadata and read parameter values to the tape
|
79 |
+
param_tape: List[Number] = []
|
80 |
+
property_dict = OrderedDict()
|
81 |
+
with torch.no_grad():
|
82 |
+
for name, param in param_dict.items():
|
83 |
+
property_dict[name] = TorchAttr(param.shape, param.dtype, param.device)
|
84 |
+
param_tape.extend(param.view(-1).cpu().double().tolist())
|
85 |
+
|
86 |
+
# Extract lower and upper bounds
|
87 |
+
start = 0
|
88 |
+
bounds_np = None
|
89 |
+
params_np = np.asarray(param_tape)
|
90 |
+
for name, param in param_dict.items():
|
91 |
+
numel = param.numel()
|
92 |
+
if name in bounds_dict:
|
93 |
+
for row, bound in enumerate(bounds_dict[name]):
|
94 |
+
if bound is None:
|
95 |
+
continue
|
96 |
+
|
97 |
+
if torch.is_tensor(bound):
|
98 |
+
if (bound == (2 * row - 1) * inf).all():
|
99 |
+
continue
|
100 |
+
bound = bound.detach().cpu()
|
101 |
+
|
102 |
+
elif bound == (2 * row - 1) * inf:
|
103 |
+
continue
|
104 |
+
|
105 |
+
if bounds_np is None:
|
106 |
+
bounds_np = np.full((2, len(params_np)), ((-inf,), (inf,)))
|
107 |
+
|
108 |
+
bounds_np[row, start : start + numel] = bound
|
109 |
+
start += numel
|
110 |
+
|
111 |
+
return params_np, property_dict, bounds_np
|
112 |
+
|
113 |
+
|
114 |
+
|
115 |
+
|
116 |
+
def set_params_with_array(
|
117 |
+
module: Module, x: np.ndarray, property_dict: Dict[str, TorchAttr]
|
118 |
+
) -> Module:
|
119 |
+
r"""Set module parameters with values from numpy array.
|
120 |
+
|
121 |
+
Args:
|
122 |
+
module: Module with parameters to be set
|
123 |
+
x: Numpy array with parameter values
|
124 |
+
property_dict: Dictionary of parameter names and torch attributes as
|
125 |
+
returned by module_to_array.
|
126 |
+
|
127 |
+
Returns:
|
128 |
+
Module: module with parameters updated in-place.
|
129 |
+
|
130 |
+
Example:
|
131 |
+
>>> mll = ExactMarginalLogLikelihood(model.likelihood, model)
|
132 |
+
>>> parameter_array, property_dict, bounds_out = module_to_array(mll)
|
133 |
+
>>> parameter_array += 0.1 # perturb parameters (for example only)
|
134 |
+
>>> mll = set_params_with_array(mll, parameter_array, property_dict)
|
135 |
+
"""
|
136 |
+
warn(
|
137 |
+
"`_set_params_with_array` is marked for deprecation, consider using "
|
138 |
+
"`set_parameters_from_ndarray_1d` instead.",
|
139 |
+
DeprecationWarning,
|
140 |
+
)
|
141 |
+
param_dict = OrderedDict(module.named_parameters())
|
142 |
+
start_idx = 0
|
143 |
+
for p_name, attrs in property_dict.items():
|
144 |
+
# Construct the new tensor
|
145 |
+
if len(attrs.shape) == 0: # deal with scalar tensors
|
146 |
+
end_idx = start_idx + 1
|
147 |
+
new_data = torch.tensor(
|
148 |
+
x[start_idx], dtype=attrs.dtype, device=attrs.device
|
149 |
+
)
|
150 |
+
else:
|
151 |
+
end_idx = start_idx + np.prod(attrs.shape)
|
152 |
+
new_data = torch.tensor(
|
153 |
+
x[start_idx:end_idx], dtype=attrs.dtype, device=attrs.device
|
154 |
+
).view(*attrs.shape)
|
155 |
+
start_idx = end_idx
|
156 |
+
# Update corresponding parameter in-place. Disable autograd to update.
|
157 |
+
param_dict[p_name].requires_grad_(False)
|
158 |
+
param_dict[p_name].copy_(new_data)
|
159 |
+
param_dict[p_name].requires_grad_(True)
|
160 |
+
return module
|
161 |
+
|
162 |
+
|
163 |
+
|
164 |
+
|
165 |
+
|
166 |
+
|
167 |
+
|
168 |
+
|
169 |
+
|
170 |
+
|
171 |
+
|
172 |
+
|
173 |
+
|
174 |
+
|
175 |
+
|
176 |
+
|
177 |
+
|
pfns4bo/.ipynb_checkpoints/transformer-checkpoint.py
ADDED
@@ -0,0 +1,327 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import math
|
2 |
+
from typing import Optional
|
3 |
+
|
4 |
+
import torch
|
5 |
+
import torch.nn as nn
|
6 |
+
from torch import Tensor
|
7 |
+
from torch.nn import Module, TransformerEncoder
|
8 |
+
|
9 |
+
from .layer import TransformerEncoderLayer, _get_activation_fn
|
10 |
+
from .utils import SeqBN, bool_mask_to_att_mask
|
11 |
+
|
12 |
+
|
13 |
+
|
14 |
+
class TransformerModel(nn.Module):
|
15 |
+
def __init__(self, encoder, ninp, nhead, nhid, nlayers, dropout=0.0, style_encoder=None, y_encoder=None,
|
16 |
+
pos_encoder=None, decoder_dict=None, input_normalization=False, init_method=None, pre_norm=False,
|
17 |
+
activation='gelu', recompute_attn=False, num_global_att_tokens=0, full_attention=False,
|
18 |
+
all_layers_same_init=False, efficient_eval_masking=True, decoder_once_dict=None, return_all_outputs=False,
|
19 |
+
save_trainingset_representations=False):
|
20 |
+
super().__init__()
|
21 |
+
self.model_type = 'Transformer'
|
22 |
+
encoder_layer_creator = lambda: TransformerEncoderLayer(ninp, nhead, nhid, dropout, activation=activation,
|
23 |
+
pre_norm=pre_norm, recompute_attn=recompute_attn,
|
24 |
+
save_trainingset_representations=save_trainingset_representations)
|
25 |
+
self.transformer_encoder = TransformerEncoder(encoder_layer_creator(), nlayers)\
|
26 |
+
if all_layers_same_init else TransformerEncoderDiffInit(encoder_layer_creator, nlayers)
|
27 |
+
self.ninp = ninp
|
28 |
+
self.encoder = encoder
|
29 |
+
self.y_encoder = y_encoder
|
30 |
+
self.pos_encoder = pos_encoder
|
31 |
+
self.return_all_outputs = return_all_outputs
|
32 |
+
|
33 |
+
def make_decoder_dict(decoder_description_dict):
|
34 |
+
if decoder_description_dict is None or len(decoder_description_dict) == 0:
|
35 |
+
return None
|
36 |
+
initialized_decoder_dict = {}
|
37 |
+
for decoder_key in decoder_description_dict:
|
38 |
+
decoder_model, decoder_n_out = decoder_description_dict[decoder_key]
|
39 |
+
if decoder_model is None:
|
40 |
+
initialized_decoder_dict[decoder_key] = nn.Sequential(nn.Linear(ninp, nhid), nn.GELU(), nn.Linear(nhid, decoder_n_out))
|
41 |
+
else:
|
42 |
+
initialized_decoder_dict[decoder_key] = decoder_model(ninp, nhid, decoder_n_out)
|
43 |
+
print('Initialized decoder for', decoder_key, 'with', decoder_description_dict[decoder_key], ' and nout', decoder_n_out)
|
44 |
+
return torch.nn.ModuleDict(initialized_decoder_dict)
|
45 |
+
|
46 |
+
self.decoder_dict = make_decoder_dict(decoder_dict)
|
47 |
+
self.decoder_dict_once = make_decoder_dict(decoder_once_dict)
|
48 |
+
|
49 |
+
# N(0,1) is the initialization as the default of nn.Embedding
|
50 |
+
self.decoder_dict_once_embeddings = torch.nn.Parameter(torch.randn((len(self.decoder_dict_once), 1, ninp))) if self.decoder_dict_once is not None else None
|
51 |
+
#nn.Embedding(len(self.decoder_dict.keys()), nhid)
|
52 |
+
self.input_ln = SeqBN(ninp) if input_normalization else None
|
53 |
+
self.style_encoder = style_encoder
|
54 |
+
self.init_method = init_method
|
55 |
+
if num_global_att_tokens is not None:
|
56 |
+
assert not full_attention
|
57 |
+
self.global_att_embeddings = nn.Embedding(num_global_att_tokens, ninp) if num_global_att_tokens else None
|
58 |
+
self.full_attention = full_attention
|
59 |
+
self.efficient_eval_masking = efficient_eval_masking
|
60 |
+
|
61 |
+
self.nhid = nhid
|
62 |
+
|
63 |
+
self.init_weights()
|
64 |
+
|
65 |
+
def __setstate__(self, state):
|
66 |
+
super().__setstate__(state)
|
67 |
+
self.__dict__.setdefault('efficient_eval_masking', False)
|
68 |
+
if not hasattr(self, 'decoder_dict_once'):
|
69 |
+
self.__dict__.setdefault('decoder_dict_once', None)
|
70 |
+
if hasattr(self, 'decoder') and not hasattr(self, 'decoder_dict'):
|
71 |
+
self.add_module('decoder_dict', nn.ModuleDict({'standard': self.decoder}))
|
72 |
+
self.__dict__.setdefault('return_all_outputs', False)
|
73 |
+
|
74 |
+
def add_approximate_false(module):
|
75 |
+
if isinstance(module, nn.GELU):
|
76 |
+
module.__dict__.setdefault('approximate', 'none')
|
77 |
+
|
78 |
+
self.apply(add_approximate_false)
|
79 |
+
|
80 |
+
@staticmethod
|
81 |
+
def generate_square_subsequent_mask(sz):
|
82 |
+
mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
|
83 |
+
return bool_mask_to_att_mask(mask)
|
84 |
+
|
85 |
+
@staticmethod
|
86 |
+
def generate_D_q_matrix(sz, query_size):
|
87 |
+
train_size = sz-query_size
|
88 |
+
mask = torch.zeros(sz,sz) == 0
|
89 |
+
mask[:,train_size:].zero_()
|
90 |
+
mask |= torch.eye(sz) == 1
|
91 |
+
return bool_mask_to_att_mask(mask)
|
92 |
+
|
93 |
+
@staticmethod
|
94 |
+
def generate_global_att_query_matrix(num_global_att_tokens, seq_len, num_query_tokens):
|
95 |
+
train_size = seq_len + num_global_att_tokens - num_query_tokens
|
96 |
+
sz = seq_len + num_global_att_tokens
|
97 |
+
mask = torch.zeros(num_query_tokens, sz) == 0
|
98 |
+
mask[:,train_size:].zero_()
|
99 |
+
mask[:,train_size:] |= torch.eye(num_query_tokens) == 1
|
100 |
+
return bool_mask_to_att_mask(mask)
|
101 |
+
|
102 |
+
@staticmethod
|
103 |
+
def generate_global_att_trainset_matrix(num_global_att_tokens, seq_len, num_query_tokens):
|
104 |
+
train_size = seq_len + num_global_att_tokens - num_query_tokens
|
105 |
+
trainset_size = seq_len - num_query_tokens
|
106 |
+
mask = torch.zeros(trainset_size, num_global_att_tokens) == 0
|
107 |
+
#mask[:,num_global_att_tokens:].zero_()
|
108 |
+
#mask[:,num_global_att_tokens:] |= torch.eye(trainset_size) == 1
|
109 |
+
return bool_mask_to_att_mask(mask)
|
110 |
+
|
111 |
+
@staticmethod
|
112 |
+
def generate_global_att_globaltokens_matrix(num_global_att_tokens, seq_len, num_query_tokens):
|
113 |
+
mask = torch.zeros(num_global_att_tokens, num_global_att_tokens+seq_len-num_query_tokens) == 0
|
114 |
+
return bool_mask_to_att_mask(mask)
|
115 |
+
|
116 |
+
def init_weights(self):
|
117 |
+
initrange = 1.
|
118 |
+
# if isinstance(self.encoder,EmbeddingEncoder):
|
119 |
+
# self.encoder.weight.data.uniform_(-initrange, initrange)
|
120 |
+
# self.decoder.bias.data.zero_()
|
121 |
+
# self.decoder.weight.data.uniform_(-initrange, initrange)
|
122 |
+
if self.init_method is not None:
|
123 |
+
self.apply(self.init_method)
|
124 |
+
for layer in self.transformer_encoder.layers:
|
125 |
+
nn.init.zeros_(layer.linear2.weight)
|
126 |
+
nn.init.zeros_(layer.linear2.bias)
|
127 |
+
attns = layer.self_attn if isinstance(layer.self_attn, nn.ModuleList) else [layer.self_attn]
|
128 |
+
for attn in attns:
|
129 |
+
nn.init.zeros_(attn.out_proj.weight)
|
130 |
+
nn.init.zeros_(attn.out_proj.bias)
|
131 |
+
|
132 |
+
def forward(self, *args, **kwargs):
|
133 |
+
"""
|
134 |
+
This will perform a forward-pass (possibly recording gradients) of the model.
|
135 |
+
We have multiple interfaces we support with this model:
|
136 |
+
|
137 |
+
model(train_x, train_y, test_x, src_mask=None, style=None, only_return_standard_out=True)
|
138 |
+
model((x,y), src_mask=None, single_eval_pos=None, only_return_standard_out=True)
|
139 |
+
model((style,x,y), src_mask=None, single_eval_pos=None, only_return_standard_out=True)
|
140 |
+
"""
|
141 |
+
if len(args) == 3:
|
142 |
+
# case model(train_x, train_y, test_x, src_mask=None, style=None, only_return_standard_out=True)
|
143 |
+
assert all(kwarg in {'src_mask', 'style', 'only_return_standard_out'} for kwarg in kwargs.keys()), \
|
144 |
+
f"Unrecognized keyword argument in kwargs: {set(kwargs.keys()) - {'src_mask', 'style', 'only_return_standard_out'}}"
|
145 |
+
x = args[0]
|
146 |
+
if args[2] is not None:
|
147 |
+
x = torch.cat((x, args[2]), dim=0)
|
148 |
+
style = kwargs.pop('style', None)
|
149 |
+
return self._forward((style, x, args[1]), single_eval_pos=len(args[0]), **kwargs)
|
150 |
+
elif len(args) == 1 and isinstance(args, tuple):
|
151 |
+
# case model((x,y), src_mask=None, single_eval_pos=None, only_return_standard_out=True)
|
152 |
+
# case model((style,x,y), src_mask=None, single_eval_pos=None, only_return_standard_out=True)
|
153 |
+
assert all(kwarg in {'src_mask', 'single_eval_pos', 'only_return_standard_out'} for kwarg in kwargs.keys()), \
|
154 |
+
f"Unrecognized keyword argument in kwargs: {set(kwargs.keys()) - {'src_mask', 'single_eval_pos', 'only_return_standard_out'}}"
|
155 |
+
return self._forward(*args, **kwargs)
|
156 |
+
|
157 |
+
def _forward(self, src, src_mask=None, single_eval_pos=None, only_return_standard_out=True):
|
158 |
+
assert isinstance(src, tuple), 'inputs (src) have to be given as (x,y) or (style,x,y) tuple'
|
159 |
+
|
160 |
+
if len(src) == 2: # (x,y) and no style
|
161 |
+
src = (None,) + src
|
162 |
+
|
163 |
+
style_src, x_src, y_src = src
|
164 |
+
|
165 |
+
if single_eval_pos is None:
|
166 |
+
single_eval_pos = x_src.shape[0]
|
167 |
+
|
168 |
+
|
169 |
+
x_src = self.encoder(x_src)
|
170 |
+
|
171 |
+
if self.decoder_dict_once is not None:
|
172 |
+
x_src = torch.cat([x_src, self.decoder_dict_once_embeddings.repeat(1, x_src.shape[1], 1)], dim=0)
|
173 |
+
|
174 |
+
y_src = self.y_encoder(y_src.unsqueeze(-1) if len(y_src.shape) < len(x_src.shape) else y_src) if y_src is not None else None
|
175 |
+
if self.style_encoder:
|
176 |
+
assert style_src is not None, 'style_src must be given if style_encoder is used'
|
177 |
+
style_src = self.style_encoder(style_src).unsqueeze(0)
|
178 |
+
else:
|
179 |
+
style_src = torch.tensor([], device=x_src.device)
|
180 |
+
global_src = torch.tensor([], device=x_src.device) if self.global_att_embeddings is None else \
|
181 |
+
self.global_att_embeddings.weight.unsqueeze(1).repeat(1, x_src.shape[1], 1)
|
182 |
+
|
183 |
+
if src_mask is not None:
|
184 |
+
assert self.global_att_embeddings is None or isinstance(src_mask, tuple)
|
185 |
+
|
186 |
+
if src_mask is None:
|
187 |
+
if self.global_att_embeddings is None:
|
188 |
+
full_len = len(x_src) + len(style_src)
|
189 |
+
if self.full_attention:
|
190 |
+
src_mask = bool_mask_to_att_mask(torch.ones((full_len, full_len), dtype=torch.bool)).to(x_src.device)
|
191 |
+
elif self.efficient_eval_masking:
|
192 |
+
src_mask = single_eval_pos + len(style_src)
|
193 |
+
else:
|
194 |
+
src_mask = self.generate_D_q_matrix(full_len, len(x_src) - single_eval_pos).to(x_src.device)
|
195 |
+
else:
|
196 |
+
src_mask_args = (self.global_att_embeddings.num_embeddings,
|
197 |
+
len(x_src) + len(style_src),
|
198 |
+
len(x_src) + len(style_src) - single_eval_pos)
|
199 |
+
src_mask = (self.generate_global_att_globaltokens_matrix(*src_mask_args).to(x_src.device),
|
200 |
+
self.generate_global_att_trainset_matrix(*src_mask_args).to(x_src.device),
|
201 |
+
self.generate_global_att_query_matrix(*src_mask_args).to(x_src.device))
|
202 |
+
|
203 |
+
train_x = x_src[:single_eval_pos]
|
204 |
+
if y_src is not None:
|
205 |
+
train_x = train_x + y_src[:single_eval_pos]
|
206 |
+
src = torch.cat([global_src, style_src, train_x, x_src[single_eval_pos:]], 0)
|
207 |
+
|
208 |
+
if self.input_ln is not None:
|
209 |
+
src = self.input_ln(src)
|
210 |
+
|
211 |
+
if self.pos_encoder is not None:
|
212 |
+
src = self.pos_encoder(src)
|
213 |
+
|
214 |
+
|
215 |
+
output = self.transformer_encoder(src, src_mask)
|
216 |
+
|
217 |
+
num_prefix_positions = len(style_src)+(self.global_att_embeddings.num_embeddings if self.global_att_embeddings else 0)
|
218 |
+
if self.return_all_outputs:
|
219 |
+
out_range_start = num_prefix_positions
|
220 |
+
else:
|
221 |
+
out_range_start = single_eval_pos + num_prefix_positions
|
222 |
+
|
223 |
+
# In the line below, we use the indexing feature, that we have `x[i:None] == x[i:]`
|
224 |
+
out_range_end = -len(self.decoder_dict_once_embeddings) if self.decoder_dict_once is not None else None
|
225 |
+
|
226 |
+
# take care the output once are counted from the end
|
227 |
+
output_once = {k: v(output[-(i+1)]) for i, (k, v) in enumerate(self.decoder_dict_once.items())}\
|
228 |
+
if self.decoder_dict_once is not None else {}
|
229 |
+
|
230 |
+
output = {k: v(output[out_range_start:out_range_end]) for k,v in self.decoder_dict.items()}\
|
231 |
+
if self.decoder_dict is not None else {}
|
232 |
+
|
233 |
+
if only_return_standard_out:
|
234 |
+
return output['standard']
|
235 |
+
|
236 |
+
if output_once:
|
237 |
+
return output, output_once
|
238 |
+
return output
|
239 |
+
|
240 |
+
@torch.no_grad()
|
241 |
+
def init_from_small_model(self, small_model):
|
242 |
+
assert isinstance(self.decoder, nn.Linear) and isinstance(self.encoder, (nn.Linear, nn.Sequential)) \
|
243 |
+
and isinstance(self.y_encoder, (nn.Linear, nn.Sequential))
|
244 |
+
|
245 |
+
def set_encoder_weights(my_encoder, small_model_encoder):
|
246 |
+
my_encoder_linear, small_encoder_linear = (my_encoder, small_model_encoder) \
|
247 |
+
if isinstance(my_encoder, nn.Linear) else (my_encoder[-1], small_model_encoder[-1])
|
248 |
+
small_in_dim = small_encoder_linear.out_features
|
249 |
+
my_encoder_linear.weight.zero_()
|
250 |
+
my_encoder_linear.bias.zero_()
|
251 |
+
my_encoder_linear.weight[:small_in_dim] = small_encoder_linear.weight
|
252 |
+
my_encoder_linear.bias[:small_in_dim] = small_encoder_linear.bias
|
253 |
+
|
254 |
+
set_encoder_weights(self.encoder, small_model.encoder)
|
255 |
+
set_encoder_weights(self.y_encoder, small_model.y_encoder)
|
256 |
+
|
257 |
+
small_in_dim = small_model.decoder.in_features
|
258 |
+
|
259 |
+
self.decoder.weight[:, :small_in_dim] = small_model.decoder.weight
|
260 |
+
self.decoder.bias = small_model.decoder.bias
|
261 |
+
|
262 |
+
for my_layer, small_layer in zip(self.transformer_encoder.layers, small_model.transformer_encoder.layers):
|
263 |
+
small_hid_dim = small_layer.linear1.out_features
|
264 |
+
my_in_dim = my_layer.linear1.in_features
|
265 |
+
|
266 |
+
# packed along q,k,v order in first dim
|
267 |
+
my_in_proj_w = my_layer.self_attn.in_proj_weight
|
268 |
+
small_in_proj_w = small_layer.self_attn.in_proj_weight
|
269 |
+
|
270 |
+
my_in_proj_w.view(3, my_in_dim, my_in_dim)[:, :small_in_dim, :small_in_dim] = small_in_proj_w.view(3,
|
271 |
+
small_in_dim,
|
272 |
+
small_in_dim)
|
273 |
+
my_layer.self_attn.in_proj_bias.view(3, my_in_dim)[:,
|
274 |
+
:small_in_dim] = small_layer.self_attn.in_proj_bias.view(3, small_in_dim)
|
275 |
+
|
276 |
+
my_layer.self_attn.out_proj.weight[:small_in_dim, :small_in_dim] = small_layer.self_attn.out_proj.weight
|
277 |
+
my_layer.self_attn.out_proj.bias[:small_in_dim] = small_layer.self_attn.out_proj.bias
|
278 |
+
|
279 |
+
my_layer.linear1.weight[:small_hid_dim, :small_in_dim] = small_layer.linear1.weight
|
280 |
+
my_layer.linear1.bias[:small_hid_dim] = small_layer.linear1.bias
|
281 |
+
|
282 |
+
my_layer.linear2.weight[:small_in_dim, :small_hid_dim] = small_layer.linear2.weight
|
283 |
+
my_layer.linear2.bias[:small_in_dim] = small_layer.linear2.bias
|
284 |
+
|
285 |
+
my_layer.norm1.weight[:small_in_dim] = math.sqrt(small_in_dim / my_in_dim) * small_layer.norm1.weight
|
286 |
+
my_layer.norm2.weight[:small_in_dim] = math.sqrt(small_in_dim / my_in_dim) * small_layer.norm2.weight
|
287 |
+
|
288 |
+
my_layer.norm1.bias[:small_in_dim] = small_layer.norm1.bias
|
289 |
+
my_layer.norm2.bias[:small_in_dim] = small_layer.norm2.bias
|
290 |
+
|
291 |
+
|
292 |
+
class TransformerEncoderDiffInit(Module):
|
293 |
+
r"""TransformerEncoder is a stack of N encoder layers
|
294 |
+
|
295 |
+
Args:
|
296 |
+
encoder_layer_creator: a function generating objects of TransformerEncoderLayer class without args (required).
|
297 |
+
num_layers: the number of sub-encoder-layers in the encoder (required).
|
298 |
+
norm: the layer normalization component (optional).
|
299 |
+
"""
|
300 |
+
__constants__ = ['norm']
|
301 |
+
|
302 |
+
def __init__(self, encoder_layer_creator, num_layers, norm=None):
|
303 |
+
super().__init__()
|
304 |
+
self.layers = nn.ModuleList([encoder_layer_creator() for _ in range(num_layers)])
|
305 |
+
self.num_layers = num_layers
|
306 |
+
self.norm = norm
|
307 |
+
|
308 |
+
def forward(self, src: Tensor, mask: Optional[Tensor] = None, src_key_padding_mask: Optional[Tensor] = None) -> Tensor:
|
309 |
+
r"""Pass the input through the encoder layers in turn.
|
310 |
+
|
311 |
+
Args:
|
312 |
+
src: the sequence to the encoder (required).
|
313 |
+
mask: the mask for the src sequence (optional).
|
314 |
+
src_key_padding_mask: the mask for the src keys per batch (optional).
|
315 |
+
|
316 |
+
Shape:
|
317 |
+
see the docs in Transformer class.
|
318 |
+
"""
|
319 |
+
output = src
|
320 |
+
|
321 |
+
for mod in self.layers:
|
322 |
+
output = mod(output, src_mask=mask, src_key_padding_mask=src_key_padding_mask)
|
323 |
+
|
324 |
+
if self.norm is not None:
|
325 |
+
output = self.norm(output)
|
326 |
+
|
327 |
+
return output
|
pfns4bo/__init__.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
model_path = 'final_models'
|
4 |
+
|
5 |
+
def prepare_models():
|
6 |
+
pfns4bo_dir = os.path.dirname(__file__)
|
7 |
+
model_names = ['hebo_morebudget_9_unused_features_3_userpriorperdim2_8.pt',
|
8 |
+
'model_sampled_warp_simple_mlp_for_hpob_46.pt',
|
9 |
+
'model_hebo_morebudget_9_unused_features_3.pt',]
|
10 |
+
|
11 |
+
for name in model_names:
|
12 |
+
weights_path = os.path.join(pfns4bo_dir, model_path, name)
|
13 |
+
compressed_weights_path = os.path.join(pfns4bo_dir, model_path, name + '.gz')
|
14 |
+
if not os.path.exists(weights_path):
|
15 |
+
if not os.path.exists(compressed_weights_path):
|
16 |
+
print("Downloading", os.path.abspath(compressed_weights_path))
|
17 |
+
import requests
|
18 |
+
url = f'https://github.com/automl/PFNs4BO/raw/main/pfns4bo/final_models/{name + ".gz"}'
|
19 |
+
r = requests.get(url, allow_redirects=True)
|
20 |
+
os.makedirs(os.path.dirname(compressed_weights_path), exist_ok=True)
|
21 |
+
with open(compressed_weights_path, 'wb') as f:
|
22 |
+
f.write(r.content)
|
23 |
+
if os.path.exists(compressed_weights_path):
|
24 |
+
print("Unzipping", name)
|
25 |
+
os.system(f"gzip -dk {compressed_weights_path}")
|
26 |
+
else:
|
27 |
+
print("Failed to find", compressed_weights_path)
|
28 |
+
print("Make sure you have an internet connection to download the model automatically..")
|
29 |
+
if os.path.exists(weights_path):
|
30 |
+
print("Successfully located model at", weights_path)
|
31 |
+
|
32 |
+
|
33 |
+
model_dict = {
|
34 |
+
'hebo_plus_userprior_model': os.path.join(os.path.dirname(__file__),model_path,
|
35 |
+
'hebo_morebudget_9_unused_features_3_userpriorperdim2_8.pt'),
|
36 |
+
'hebo_plus_model': os.path.join(os.path.dirname(__file__),model_path,
|
37 |
+
'model_hebo_morebudget_9_unused_features_3.pt'),
|
38 |
+
'bnn_model': os.path.join(os.path.dirname(__file__),model_path,'model_sampled_warp_simple_mlp_for_hpob_46.pt')
|
39 |
+
}
|
40 |
+
|
41 |
+
|
42 |
+
def __getattr__(name):
|
43 |
+
if name in model_dict:
|
44 |
+
if not os.path.exists(model_dict[name]):
|
45 |
+
print("Can't find", os.path.abspath(model_dict[name]), "thus unzipping/downloading models now.")
|
46 |
+
print("This might take a while..")
|
47 |
+
prepare_models()
|
48 |
+
return model_dict[name]
|
49 |
+
raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
|
50 |
+
|
pfns4bo/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (1.85 kB). View file
|
|
pfns4bo/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (3.99 kB). View file
|
|
pfns4bo/__pycache__/__init__.cpython-38.pyc
ADDED
Binary file (1.83 kB). View file
|
|
pfns4bo/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (1.85 kB). View file
|
|
pfns4bo/__pycache__/bar_distribution.cpython-310.pyc
ADDED
Binary file (15.6 kB). View file
|
|
pfns4bo/__pycache__/bar_distribution.cpython-311.pyc
ADDED
Binary file (33.3 kB). View file
|
|