File size: 14,749 Bytes
1960aa4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 |
{
"best_metric": null,
"best_model_checkpoint": null,
"best_supernet_model_checkpoint": null,
"epoch": 12.0,
"global_step": 3216,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])",
"epoch": 1.0,
"eval_loss": 0.7149681448936462,
"eval_matthews_correlation": 0.3947290430655349,
"eval_runtime": 1.1039,
"eval_samples_per_second": 944.822,
"eval_steps_per_second": 8.153,
"step": 268
},
{
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])",
"epoch": 1.0,
"eval_loss": 0.6399039030075073,
"eval_matthews_correlation": 0.5222017375430389,
"eval_runtime": 1.6156,
"eval_samples_per_second": 645.592,
"eval_steps_per_second": 5.571,
"step": 268
},
{
"compression_loss": 0.0,
"epoch": 1.87,
"learning_rate": 1.884896652217917e-05,
"loss": 0.8522,
"step": 500
},
{
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])",
"epoch": 2.0,
"eval_loss": 0.7286580801010132,
"eval_matthews_correlation": 0.4630242094821006,
"eval_runtime": 1.1443,
"eval_samples_per_second": 911.446,
"eval_steps_per_second": 7.865,
"step": 536
},
{
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])",
"epoch": 2.0,
"eval_loss": 0.6622146964073181,
"eval_matthews_correlation": 0.5624034645452709,
"eval_runtime": 1.4549,
"eval_samples_per_second": 716.899,
"eval_steps_per_second": 6.186,
"step": 536
},
{
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])",
"epoch": 3.0,
"eval_loss": 0.7320300340652466,
"eval_matthews_correlation": 0.4774992729117021,
"eval_runtime": 1.179,
"eval_samples_per_second": 884.654,
"eval_steps_per_second": 7.634,
"step": 804
},
{
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])",
"epoch": 3.0,
"eval_loss": 0.6782493591308594,
"eval_matthews_correlation": 0.557289393094284,
"eval_runtime": 1.4778,
"eval_samples_per_second": 705.795,
"eval_steps_per_second": 6.09,
"step": 804
},
{
"compression_loss": 0.0,
"epoch": 3.73,
"learning_rate": 1.5644725411751336e-05,
"loss": 0.3135,
"step": 1000
},
{
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])",
"epoch": 4.0,
"eval_loss": 0.8995152711868286,
"eval_matthews_correlation": 0.48304108757820724,
"eval_runtime": 1.126,
"eval_samples_per_second": 926.26,
"eval_steps_per_second": 7.993,
"step": 1072
},
{
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])",
"epoch": 4.0,
"eval_loss": 0.7692049741744995,
"eval_matthews_correlation": 0.554912808282685,
"eval_runtime": 1.5919,
"eval_samples_per_second": 655.204,
"eval_steps_per_second": 5.654,
"step": 1072
},
{
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])",
"epoch": 5.0,
"eval_loss": 0.826237142086029,
"eval_matthews_correlation": 0.5107428879748117,
"eval_runtime": 1.1006,
"eval_samples_per_second": 947.693,
"eval_steps_per_second": 8.178,
"step": 1340
},
{
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])",
"epoch": 5.0,
"eval_loss": 0.6900777816772461,
"eval_matthews_correlation": 0.5834463254140851,
"eval_runtime": 1.7324,
"eval_samples_per_second": 602.048,
"eval_steps_per_second": 5.195,
"step": 1340
},
{
"compression_loss": 0.0,
"epoch": 5.6,
"learning_rate": 1.1130738016122495e-05,
"loss": 0.155,
"step": 1500
},
{
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])",
"epoch": 6.0,
"eval_loss": 0.8721849918365479,
"eval_matthews_correlation": 0.5075813582663956,
"eval_runtime": 0.8756,
"eval_samples_per_second": 1191.142,
"eval_steps_per_second": 10.278,
"step": 1608
},
{
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])",
"epoch": 6.0,
"eval_loss": 0.7215332388877869,
"eval_matthews_correlation": 0.5924834238001306,
"eval_runtime": 1.2201,
"eval_samples_per_second": 854.87,
"eval_steps_per_second": 7.377,
"step": 1608
},
{
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])",
"epoch": 7.0,
"eval_loss": 0.9455906748771667,
"eval_matthews_correlation": 0.5053968650343078,
"eval_runtime": 0.88,
"eval_samples_per_second": 1185.181,
"eval_steps_per_second": 10.227,
"step": 1876
},
{
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])",
"epoch": 7.0,
"eval_loss": 0.8112756013870239,
"eval_matthews_correlation": 0.5764508680057442,
"eval_runtime": 1.2263,
"eval_samples_per_second": 850.555,
"eval_steps_per_second": 7.339,
"step": 1876
},
{
"compression_loss": 0.0,
"epoch": 7.46,
"learning_rate": 6.354385348824488e-06,
"loss": 0.0957,
"step": 2000
},
{
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])",
"epoch": 8.0,
"eval_loss": 0.9190700650215149,
"eval_matthews_correlation": 0.5049093009936784,
"eval_runtime": 0.8638,
"eval_samples_per_second": 1207.492,
"eval_steps_per_second": 10.419,
"step": 2144
},
{
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])",
"epoch": 8.0,
"eval_loss": 0.7810819149017334,
"eval_matthews_correlation": 0.5885471185335819,
"eval_runtime": 1.2436,
"eval_samples_per_second": 838.718,
"eval_steps_per_second": 7.237,
"step": 2144
},
{
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])",
"epoch": 9.0,
"eval_loss": 0.9647462964057922,
"eval_matthews_correlation": 0.49938409054607086,
"eval_runtime": 0.8845,
"eval_samples_per_second": 1179.158,
"eval_steps_per_second": 10.175,
"step": 2412
},
{
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])",
"epoch": 9.0,
"eval_loss": 0.8086517453193665,
"eval_matthews_correlation": 0.5598395777855655,
"eval_runtime": 1.2424,
"eval_samples_per_second": 839.53,
"eval_steps_per_second": 7.244,
"step": 2412
},
{
"compression_loss": 0.0,
"epoch": 9.33,
"learning_rate": 2.4303047703271643e-06,
"loss": 0.0729,
"step": 2500
},
{
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])",
"epoch": 10.0,
"eval_loss": 0.9290460348129272,
"eval_matthews_correlation": 0.49895377962487897,
"eval_runtime": 0.8983,
"eval_samples_per_second": 1161.144,
"eval_steps_per_second": 10.019,
"step": 2680
},
{
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])",
"epoch": 10.0,
"eval_loss": 0.8078813552856445,
"eval_matthews_correlation": 0.575435670477595,
"eval_runtime": 1.2176,
"eval_samples_per_second": 856.621,
"eval_steps_per_second": 7.392,
"step": 2680
},
{
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])",
"epoch": 11.0,
"eval_loss": 0.9496363997459412,
"eval_matthews_correlation": 0.49816788996099576,
"eval_runtime": 0.9041,
"eval_samples_per_second": 1153.571,
"eval_steps_per_second": 9.954,
"step": 2948
},
{
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])",
"epoch": 11.0,
"eval_loss": 0.8123825788497925,
"eval_matthews_correlation": 0.5727969336224868,
"eval_runtime": 1.2393,
"eval_samples_per_second": 841.575,
"eval_steps_per_second": 7.262,
"step": 2948
},
{
"compression_loss": 0.0,
"epoch": 11.19,
"learning_rate": 2.5352335693478615e-07,
"loss": 0.0626,
"step": 3000
},
{
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 512, 1: 256, 2: 320, 3: 448, 4: 640, 5: 640, 6: 768, 7: 576, 8: 448, 9: 256, 10: 384, 11: 320, 12: 949, 13: 959, 14: 1110, 15: 1096, 16: 1158, 17: 1062, 18: 1028, 19: 1014, 20: 670, 21: 436, 22: 348, 23: 370})])",
"epoch": 12.0,
"eval_loss": 0.9496448040008545,
"eval_matthews_correlation": 0.49816788996099576,
"eval_runtime": 0.8999,
"eval_samples_per_second": 1159.047,
"eval_steps_per_second": 10.001,
"step": 3216
},
{
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])",
"epoch": 12.0,
"eval_loss": 0.8131102323532104,
"eval_matthews_correlation": 0.5727969336224868,
"eval_runtime": 1.2698,
"eval_samples_per_second": 821.371,
"eval_steps_per_second": 7.088,
"step": 3216
},
{
"epoch": 12.0,
"step": 3216,
"total_flos": 6749587903150080.0,
"train_loss": 0.244898120265695,
"train_runtime": 5953.5704,
"train_samples_per_second": 17.235,
"train_steps_per_second": 0.54
}
],
"max_steps": 3216,
"min_subnet_acc": null,
"min_subnet_best_acc": null,
"num_train_epochs": 12,
"supernet_acc": null,
"supernet_best_acc": null,
"total_flos": 6749587903150080.0,
"trial_name": null,
"trial_params": null
}
|