|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.5242463958060288, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.999e-06, |
|
"loss": 6.1641, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.994000000000001e-06, |
|
"loss": 5.275, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.989e-06, |
|
"loss": 4.8629, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.984e-06, |
|
"loss": 4.8023, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.979e-06, |
|
"loss": 4.7687, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.974e-06, |
|
"loss": 4.7188, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.969e-06, |
|
"loss": 4.6258, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.964e-06, |
|
"loss": 4.6254, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.959e-06, |
|
"loss": 4.5867, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.954e-06, |
|
"loss": 4.6207, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.949e-06, |
|
"loss": 4.6086, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.944e-06, |
|
"loss": 4.5559, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.939000000000001e-06, |
|
"loss": 4.5836, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.934e-06, |
|
"loss": 4.5121, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.929000000000001e-06, |
|
"loss": 4.5234, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.924e-06, |
|
"loss": 4.4992, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.919000000000001e-06, |
|
"loss": 4.4891, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.914e-06, |
|
"loss": 4.4688, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.909000000000001e-06, |
|
"loss": 4.4836, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.904e-06, |
|
"loss": 4.4363, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.899000000000001e-06, |
|
"loss": 4.4215, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.894e-06, |
|
"loss": 4.4469, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.889000000000001e-06, |
|
"loss": 4.3793, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.884e-06, |
|
"loss": 4.3934, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.879000000000001e-06, |
|
"loss": 4.3309, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.874e-06, |
|
"loss": 4.3875, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.869000000000002e-06, |
|
"loss": 4.4262, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.864e-06, |
|
"loss": 4.4285, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.859e-06, |
|
"loss": 4.3965, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.854000000000001e-06, |
|
"loss": 4.3359, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.849e-06, |
|
"loss": 4.4348, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.844000000000001e-06, |
|
"loss": 4.3152, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.839e-06, |
|
"loss": 4.3402, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.834000000000001e-06, |
|
"loss": 4.316, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.829e-06, |
|
"loss": 4.2969, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.824000000000001e-06, |
|
"loss": 4.2867, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.819e-06, |
|
"loss": 4.3902, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.814000000000001e-06, |
|
"loss": 4.2656, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.809e-06, |
|
"loss": 4.3797, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.804000000000001e-06, |
|
"loss": 4.2863, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.799e-06, |
|
"loss": 4.275, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.794000000000001e-06, |
|
"loss": 4.2891, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.789e-06, |
|
"loss": 4.3059, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.784000000000002e-06, |
|
"loss": 4.3832, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.779e-06, |
|
"loss": 4.3055, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.774000000000002e-06, |
|
"loss": 4.3008, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.769e-06, |
|
"loss": 4.25, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.764000000000002e-06, |
|
"loss": 4.2676, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.759000000000001e-06, |
|
"loss": 4.2422, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.754000000000002e-06, |
|
"loss": 4.3137, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.749000000000001e-06, |
|
"loss": 4.2555, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.744000000000002e-06, |
|
"loss": 4.2637, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.739000000000001e-06, |
|
"loss": 4.2996, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.734000000000002e-06, |
|
"loss": 4.3176, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.729000000000001e-06, |
|
"loss": 4.252, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.724e-06, |
|
"loss": 4.266, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.719000000000001e-06, |
|
"loss": 4.1748, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.714e-06, |
|
"loss": 4.3008, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.709000000000001e-06, |
|
"loss": 4.268, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.704e-06, |
|
"loss": 4.2523, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.699e-06, |
|
"loss": 4.3445, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.694e-06, |
|
"loss": 4.298, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.689e-06, |
|
"loss": 4.2809, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.684e-06, |
|
"loss": 4.2234, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.679e-06, |
|
"loss": 4.227, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.674000000000001e-06, |
|
"loss": 4.2605, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.669e-06, |
|
"loss": 4.2268, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.664000000000001e-06, |
|
"loss": 4.1734, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.659e-06, |
|
"loss": 4.2355, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.654000000000001e-06, |
|
"loss": 4.2123, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.649e-06, |
|
"loss": 4.1396, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.644000000000001e-06, |
|
"loss": 4.1869, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.639e-06, |
|
"loss": 4.2148, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.634000000000001e-06, |
|
"loss": 4.1201, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.629e-06, |
|
"loss": 4.1891, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.624000000000001e-06, |
|
"loss": 4.118, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.619e-06, |
|
"loss": 4.1359, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.614000000000001e-06, |
|
"loss": 4.1469, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.609e-06, |
|
"loss": 4.1941, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.604000000000002e-06, |
|
"loss": 4.1219, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.599e-06, |
|
"loss": 4.0951, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.594000000000002e-06, |
|
"loss": 4.1387, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.589000000000001e-06, |
|
"loss": 4.0973, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.584000000000002e-06, |
|
"loss": 4.1551, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.579000000000001e-06, |
|
"loss": 4.1883, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.574000000000002e-06, |
|
"loss": 4.2137, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.569000000000001e-06, |
|
"loss": 4.1748, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.564e-06, |
|
"loss": 4.1664, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.559000000000001e-06, |
|
"loss": 4.0812, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.554e-06, |
|
"loss": 4.2215, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.549000000000001e-06, |
|
"loss": 4.175, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.544e-06, |
|
"loss": 4.0766, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.539e-06, |
|
"loss": 4.0873, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.534e-06, |
|
"loss": 4.1316, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.529e-06, |
|
"loss": 4.108, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.524e-06, |
|
"loss": 4.1691, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.519e-06, |
|
"loss": 4.1154, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.514e-06, |
|
"loss": 4.1035, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.509e-06, |
|
"loss": 4.1293, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.504e-06, |
|
"loss": 4.1734, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.499e-06, |
|
"loss": 4.0504, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.494000000000001e-06, |
|
"loss": 4.048, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.489e-06, |
|
"loss": 4.1066, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.484000000000001e-06, |
|
"loss": 4.1354, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.479e-06, |
|
"loss": 4.1238, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.474000000000001e-06, |
|
"loss": 4.1232, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.469e-06, |
|
"loss": 4.1252, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.464000000000001e-06, |
|
"loss": 4.0975, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.459e-06, |
|
"loss": 4.1111, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.454000000000001e-06, |
|
"loss": 4.0047, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.449e-06, |
|
"loss": 4.0992, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.444000000000001e-06, |
|
"loss": 4.0734, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.439e-06, |
|
"loss": 4.0809, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.434000000000001e-06, |
|
"loss": 4.101, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.429e-06, |
|
"loss": 4.0662, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.424000000000002e-06, |
|
"loss": 4.1041, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.419e-06, |
|
"loss": 4.0564, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.414000000000002e-06, |
|
"loss": 4.0986, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.409000000000001e-06, |
|
"loss": 4.0309, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.404e-06, |
|
"loss": 4.0605, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.399000000000001e-06, |
|
"loss": 4.0857, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.394e-06, |
|
"loss": 4.1307, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.389000000000001e-06, |
|
"loss": 4.06, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.384e-06, |
|
"loss": 4.0039, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.379000000000001e-06, |
|
"loss": 4.0258, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.374e-06, |
|
"loss": 4.0738, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.369e-06, |
|
"loss": 4.0551, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.364e-06, |
|
"loss": 4.0518, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.359e-06, |
|
"loss": 4.0584, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.354e-06, |
|
"loss": 4.1109, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.349e-06, |
|
"loss": 3.9898, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.344e-06, |
|
"loss": 4.1406, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.339e-06, |
|
"loss": 4.0725, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.334e-06, |
|
"loss": 4.0207, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.329e-06, |
|
"loss": 4.0826, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.324000000000001e-06, |
|
"loss": 4.1059, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.319e-06, |
|
"loss": 3.9967, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.314000000000001e-06, |
|
"loss": 4.0328, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.309e-06, |
|
"loss": 3.9918, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.304000000000001e-06, |
|
"loss": 4.0434, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.299e-06, |
|
"loss": 3.9584, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.294000000000001e-06, |
|
"loss": 4.0551, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.289e-06, |
|
"loss": 3.9684, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.284000000000001e-06, |
|
"loss": 4.0221, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.279e-06, |
|
"loss": 3.985, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.274000000000001e-06, |
|
"loss": 4.0648, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.269e-06, |
|
"loss": 4.0109, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.264000000000001e-06, |
|
"loss": 3.9553, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.259e-06, |
|
"loss": 3.9904, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.254000000000002e-06, |
|
"loss": 3.9719, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.249e-06, |
|
"loss": 3.8973, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.244e-06, |
|
"loss": 3.9936, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.239e-06, |
|
"loss": 3.9498, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.234e-06, |
|
"loss": 3.9557, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.229000000000001e-06, |
|
"loss": 3.9266, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.224e-06, |
|
"loss": 3.9543, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.219000000000001e-06, |
|
"loss": 3.9732, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.214e-06, |
|
"loss": 3.9762, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.209000000000001e-06, |
|
"loss": 4.0695, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.204e-06, |
|
"loss": 3.9869, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.199000000000001e-06, |
|
"loss": 4.0061, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.194e-06, |
|
"loss": 4.0121, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.189000000000001e-06, |
|
"loss": 3.9105, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.184e-06, |
|
"loss": 3.8631, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.179000000000001e-06, |
|
"loss": 3.9498, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.174e-06, |
|
"loss": 3.9451, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.169000000000001e-06, |
|
"loss": 3.951, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.164e-06, |
|
"loss": 3.9297, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.159000000000002e-06, |
|
"loss": 3.9771, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.154e-06, |
|
"loss": 4.0842, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.149000000000002e-06, |
|
"loss": 3.8865, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.144000000000001e-06, |
|
"loss": 3.9312, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.139000000000002e-06, |
|
"loss": 3.8875, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.134000000000001e-06, |
|
"loss": 4.0389, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.129000000000002e-06, |
|
"loss": 3.9568, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.124000000000001e-06, |
|
"loss": 3.9541, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.119000000000002e-06, |
|
"loss": 3.9092, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.114000000000001e-06, |
|
"loss": 3.9404, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.109e-06, |
|
"loss": 3.9371, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.104000000000001e-06, |
|
"loss": 3.9477, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.099e-06, |
|
"loss": 3.9469, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.094000000000001e-06, |
|
"loss": 3.9191, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.089e-06, |
|
"loss": 3.9527, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.084e-06, |
|
"loss": 3.8934, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.079e-06, |
|
"loss": 3.9773, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.074e-06, |
|
"loss": 3.823, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.069e-06, |
|
"loss": 3.8857, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.064e-06, |
|
"loss": 3.9092, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.059000000000001e-06, |
|
"loss": 3.8338, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.054e-06, |
|
"loss": 3.9457, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.049000000000001e-06, |
|
"loss": 3.8869, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.044e-06, |
|
"loss": 3.8594, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.039000000000001e-06, |
|
"loss": 4.0318, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.034e-06, |
|
"loss": 3.8469, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.029000000000001e-06, |
|
"loss": 3.8367, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.024e-06, |
|
"loss": 3.8814, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.019000000000001e-06, |
|
"loss": 3.8818, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.014e-06, |
|
"loss": 3.908, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.009000000000001e-06, |
|
"loss": 3.9705, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.004e-06, |
|
"loss": 3.9086, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 8.999000000000001e-06, |
|
"loss": 3.9795, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 8.994e-06, |
|
"loss": 3.8629, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.989000000000002e-06, |
|
"loss": 3.8287, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.984e-06, |
|
"loss": 3.8717, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.979000000000002e-06, |
|
"loss": 3.8865, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.974e-06, |
|
"loss": 3.8344, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.969000000000002e-06, |
|
"loss": 3.9541, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.964000000000001e-06, |
|
"loss": 3.8318, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.959000000000002e-06, |
|
"loss": 3.9328, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.954000000000001e-06, |
|
"loss": 3.8621, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.949e-06, |
|
"loss": 3.7871, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.944000000000001e-06, |
|
"loss": 3.8988, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.939e-06, |
|
"loss": 3.8232, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.934000000000001e-06, |
|
"loss": 3.8816, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.929e-06, |
|
"loss": 3.8775, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.924e-06, |
|
"loss": 3.8115, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.919e-06, |
|
"loss": 3.7941, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.914e-06, |
|
"loss": 3.8678, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.909e-06, |
|
"loss": 3.8215, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.904e-06, |
|
"loss": 3.79, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.899e-06, |
|
"loss": 3.8092, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.894e-06, |
|
"loss": 3.79, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.889e-06, |
|
"loss": 3.8162, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.884e-06, |
|
"loss": 3.8568, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.879000000000001e-06, |
|
"loss": 3.867, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.874e-06, |
|
"loss": 3.7988, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.869000000000001e-06, |
|
"loss": 3.8088, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.864e-06, |
|
"loss": 3.7711, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.859000000000001e-06, |
|
"loss": 3.7242, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.854e-06, |
|
"loss": 3.8512, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.849000000000001e-06, |
|
"loss": 3.8945, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.844e-06, |
|
"loss": 3.8687, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.839000000000001e-06, |
|
"loss": 3.7533, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.834e-06, |
|
"loss": 3.8707, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.829000000000001e-06, |
|
"loss": 3.8086, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.824e-06, |
|
"loss": 3.7467, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.819000000000001e-06, |
|
"loss": 3.8078, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.814e-06, |
|
"loss": 3.7465, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.809000000000002e-06, |
|
"loss": 3.7955, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.804e-06, |
|
"loss": 3.8281, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.799000000000002e-06, |
|
"loss": 3.8035, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.794e-06, |
|
"loss": 3.7963, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.789e-06, |
|
"loss": 3.8061, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.784000000000001e-06, |
|
"loss": 3.777, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.779e-06, |
|
"loss": 3.7582, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.774000000000001e-06, |
|
"loss": 3.7725, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.769e-06, |
|
"loss": 3.7516, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.764e-06, |
|
"loss": 3.8543, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.759e-06, |
|
"loss": 3.8566, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.754e-06, |
|
"loss": 3.7695, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.749e-06, |
|
"loss": 3.8271, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.744e-06, |
|
"loss": 3.773, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.739e-06, |
|
"loss": 3.7283, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.734e-06, |
|
"loss": 3.7822, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.729e-06, |
|
"loss": 3.7816, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.724e-06, |
|
"loss": 3.751, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.719e-06, |
|
"loss": 3.8271, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.714e-06, |
|
"loss": 3.7195, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.709e-06, |
|
"loss": 3.7584, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.704e-06, |
|
"loss": 3.7889, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.699000000000001e-06, |
|
"loss": 3.8529, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.694e-06, |
|
"loss": 3.8166, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.689000000000001e-06, |
|
"loss": 3.7484, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.684e-06, |
|
"loss": 3.8014, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.679000000000001e-06, |
|
"loss": 3.7658, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.674e-06, |
|
"loss": 3.7834, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.669000000000001e-06, |
|
"loss": 3.7973, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.664e-06, |
|
"loss": 3.7607, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.659000000000001e-06, |
|
"loss": 3.7381, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.654e-06, |
|
"loss": 3.751, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.649000000000001e-06, |
|
"loss": 3.7201, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.644e-06, |
|
"loss": 3.7969, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.639000000000001e-06, |
|
"loss": 3.7773, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.634e-06, |
|
"loss": 3.7752, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.629e-06, |
|
"loss": 3.6992, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.624e-06, |
|
"loss": 3.651, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.619e-06, |
|
"loss": 3.7598, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.614000000000001e-06, |
|
"loss": 3.7367, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.609e-06, |
|
"loss": 3.6896, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.604000000000001e-06, |
|
"loss": 3.7732, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.599e-06, |
|
"loss": 3.7836, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.594000000000001e-06, |
|
"loss": 3.7854, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.589e-06, |
|
"loss": 3.701, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.584000000000001e-06, |
|
"loss": 3.7652, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.579e-06, |
|
"loss": 3.775, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.574000000000001e-06, |
|
"loss": 3.7207, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.569e-06, |
|
"loss": 3.71, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.564000000000001e-06, |
|
"loss": 3.7359, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.559e-06, |
|
"loss": 3.6854, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.554000000000001e-06, |
|
"loss": 3.7342, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.549e-06, |
|
"loss": 3.6707, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.544000000000002e-06, |
|
"loss": 3.6596, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.539e-06, |
|
"loss": 3.6711, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.534000000000002e-06, |
|
"loss": 3.7279, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.529e-06, |
|
"loss": 3.7115, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.524000000000002e-06, |
|
"loss": 3.7139, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.519000000000001e-06, |
|
"loss": 3.674, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.514000000000002e-06, |
|
"loss": 3.6191, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.509000000000001e-06, |
|
"loss": 3.6361, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.504000000000002e-06, |
|
"loss": 3.7717, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.499000000000001e-06, |
|
"loss": 3.6355, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.494e-06, |
|
"loss": 3.8113, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.489000000000001e-06, |
|
"loss": 3.7465, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.484e-06, |
|
"loss": 3.8033, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.479000000000001e-06, |
|
"loss": 3.6867, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.474e-06, |
|
"loss": 3.7062, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.469e-06, |
|
"loss": 3.726, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.464e-06, |
|
"loss": 3.6432, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.459e-06, |
|
"loss": 3.6943, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.454e-06, |
|
"loss": 3.6127, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.449e-06, |
|
"loss": 3.6529, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.444e-06, |
|
"loss": 3.6063, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.439e-06, |
|
"loss": 3.7633, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.434000000000001e-06, |
|
"loss": 3.6211, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.429e-06, |
|
"loss": 3.6895, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.424000000000001e-06, |
|
"loss": 3.6152, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.419e-06, |
|
"loss": 3.6549, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.414000000000001e-06, |
|
"loss": 3.6502, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.409e-06, |
|
"loss": 3.5689, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.404000000000001e-06, |
|
"loss": 3.7002, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.399e-06, |
|
"loss": 3.5998, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.394000000000001e-06, |
|
"loss": 3.7164, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.389e-06, |
|
"loss": 3.6006, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.384000000000001e-06, |
|
"loss": 3.5586, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.379e-06, |
|
"loss": 3.6801, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.374000000000001e-06, |
|
"loss": 3.601, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.369e-06, |
|
"loss": 3.6344, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.364000000000002e-06, |
|
"loss": 3.6637, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.359e-06, |
|
"loss": 3.6357, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.354000000000002e-06, |
|
"loss": 3.652, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.349000000000001e-06, |
|
"loss": 3.6439, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.344000000000002e-06, |
|
"loss": 3.6051, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.339000000000001e-06, |
|
"loss": 3.6207, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.334e-06, |
|
"loss": 3.6059, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.329000000000001e-06, |
|
"loss": 3.7102, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.324e-06, |
|
"loss": 3.5629, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.319000000000001e-06, |
|
"loss": 3.6357, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.314e-06, |
|
"loss": 3.6416, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.309e-06, |
|
"loss": 3.6572, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.304e-06, |
|
"loss": 3.6244, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.299e-06, |
|
"loss": 3.677, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.294e-06, |
|
"loss": 3.6006, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.289e-06, |
|
"loss": 3.7182, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.284e-06, |
|
"loss": 3.6451, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.279e-06, |
|
"loss": 3.508, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.274e-06, |
|
"loss": 3.6182, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.269e-06, |
|
"loss": 3.5447, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.264e-06, |
|
"loss": 3.5941, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.259e-06, |
|
"loss": 3.5094, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.254000000000001e-06, |
|
"loss": 3.5988, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.249e-06, |
|
"loss": 3.6652, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.244000000000001e-06, |
|
"loss": 3.5957, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.239e-06, |
|
"loss": 3.5326, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.234000000000001e-06, |
|
"loss": 3.5537, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.229e-06, |
|
"loss": 3.5834, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.224000000000001e-06, |
|
"loss": 3.5666, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.219e-06, |
|
"loss": 3.6174, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.214000000000001e-06, |
|
"loss": 3.5148, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.209e-06, |
|
"loss": 3.5037, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.204000000000001e-06, |
|
"loss": 3.6, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.199e-06, |
|
"loss": 3.5457, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.194000000000002e-06, |
|
"loss": 3.5021, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.189e-06, |
|
"loss": 3.509, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.184000000000002e-06, |
|
"loss": 3.5457, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.179e-06, |
|
"loss": 3.5449, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.174e-06, |
|
"loss": 3.5832, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.169000000000001e-06, |
|
"loss": 3.4852, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.164e-06, |
|
"loss": 3.6166, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.159000000000001e-06, |
|
"loss": 3.5248, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.154e-06, |
|
"loss": 3.5617, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.149e-06, |
|
"loss": 3.5119, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.144e-06, |
|
"loss": 3.5475, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.139e-06, |
|
"loss": 3.5646, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.134e-06, |
|
"loss": 3.4521, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.129e-06, |
|
"loss": 3.492, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.124e-06, |
|
"loss": 3.6187, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.119e-06, |
|
"loss": 3.4984, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.114e-06, |
|
"loss": 3.5744, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.109e-06, |
|
"loss": 3.5514, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.104e-06, |
|
"loss": 3.4807, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.099e-06, |
|
"loss": 3.5049, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.094e-06, |
|
"loss": 3.5098, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.089e-06, |
|
"loss": 3.4152, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.084000000000001e-06, |
|
"loss": 3.4281, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.079e-06, |
|
"loss": 3.5766, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 8.074000000000001e-06, |
|
"loss": 3.4908, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 8.069e-06, |
|
"loss": 3.5432, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 8.064000000000001e-06, |
|
"loss": 3.5154, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 8.059e-06, |
|
"loss": 3.4568, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 8.054000000000001e-06, |
|
"loss": 3.5314, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 8.049e-06, |
|
"loss": 3.5516, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 8.044000000000001e-06, |
|
"loss": 3.4271, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.039e-06, |
|
"loss": 3.4174, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.034000000000001e-06, |
|
"loss": 3.5492, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.029e-06, |
|
"loss": 3.568, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.024000000000001e-06, |
|
"loss": 3.5455, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.019e-06, |
|
"loss": 3.5598, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.014e-06, |
|
"loss": 3.5848, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.009e-06, |
|
"loss": 3.4631, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.004e-06, |
|
"loss": 3.3873, |
|
"step": 2000 |
|
} |
|
], |
|
"max_steps": 10000, |
|
"num_train_epochs": 3, |
|
"total_flos": 1.5936160471711744e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|