|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.2051282051282053, |
|
"eval_steps": 500, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9903846153846154e-05, |
|
"loss": 5.8218, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.980769230769231e-05, |
|
"loss": 5.2499, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.9711538461538465e-05, |
|
"loss": 5.1236, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.961538461538462e-05, |
|
"loss": 4.8363, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.9519230769230776e-05, |
|
"loss": 4.5805, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.942307692307693e-05, |
|
"loss": 4.5276, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.932692307692308e-05, |
|
"loss": 4.3871, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.923076923076924e-05, |
|
"loss": 4.2706, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9134615384615384e-05, |
|
"loss": 4.0906, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.9038461538461536e-05, |
|
"loss": 4.1704, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.8942307692307695e-05, |
|
"loss": 3.9014, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.884615384615385e-05, |
|
"loss": 4.0338, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.875e-05, |
|
"loss": 4.011, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.865384615384616e-05, |
|
"loss": 3.8017, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.855769230769231e-05, |
|
"loss": 3.819, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.846153846153846e-05, |
|
"loss": 3.8157, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.836538461538462e-05, |
|
"loss": 3.7675, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.826923076923077e-05, |
|
"loss": 3.756, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.8173076923076925e-05, |
|
"loss": 3.7439, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.8076923076923084e-05, |
|
"loss": 3.761, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.7980769230769236e-05, |
|
"loss": 3.7318, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.788461538461539e-05, |
|
"loss": 3.745, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.778846153846154e-05, |
|
"loss": 3.784, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.76923076923077e-05, |
|
"loss": 3.6114, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.7596153846153844e-05, |
|
"loss": 3.637, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.75e-05, |
|
"loss": 3.5881, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.7403846153846155e-05, |
|
"loss": 3.5336, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.730769230769231e-05, |
|
"loss": 3.5313, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.7211538461538465e-05, |
|
"loss": 3.6357, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.711538461538462e-05, |
|
"loss": 3.5818, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.701923076923077e-05, |
|
"loss": 3.5017, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.692307692307693e-05, |
|
"loss": 3.5815, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.682692307692308e-05, |
|
"loss": 3.611, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.673076923076923e-05, |
|
"loss": 3.6096, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.6634615384615384e-05, |
|
"loss": 3.5436, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.653846153846154e-05, |
|
"loss": 3.5258, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.6442307692307695e-05, |
|
"loss": 3.5277, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.634615384615385e-05, |
|
"loss": 3.505, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.6250000000000006e-05, |
|
"loss": 3.4665, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.615384615384616e-05, |
|
"loss": 3.4245, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.605769230769231e-05, |
|
"loss": 3.4547, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.596153846153846e-05, |
|
"loss": 3.4382, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.5865384615384614e-05, |
|
"loss": 3.482, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.576923076923077e-05, |
|
"loss": 3.4795, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.5673076923076925e-05, |
|
"loss": 3.5018, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.557692307692308e-05, |
|
"loss": 3.448, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.548076923076923e-05, |
|
"loss": 3.4206, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.538461538461539e-05, |
|
"loss": 3.528, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.528846153846154e-05, |
|
"loss": 3.475, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.519230769230769e-05, |
|
"loss": 3.4142, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.509615384615385e-05, |
|
"loss": 3.4156, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.5e-05, |
|
"loss": 3.3274, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.4903846153846155e-05, |
|
"loss": 3.3594, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.4807692307692314e-05, |
|
"loss": 3.2517, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.4711538461538466e-05, |
|
"loss": 3.2934, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.461538461538462e-05, |
|
"loss": 3.2986, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.451923076923077e-05, |
|
"loss": 3.2655, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.442307692307692e-05, |
|
"loss": 3.4058, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 4.4326923076923074e-05, |
|
"loss": 3.265, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.423076923076923e-05, |
|
"loss": 3.3208, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.4134615384615385e-05, |
|
"loss": 3.238, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.403846153846154e-05, |
|
"loss": 3.2153, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 4.3942307692307695e-05, |
|
"loss": 3.264, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 4.384615384615385e-05, |
|
"loss": 3.2911, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.375e-05, |
|
"loss": 3.3027, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 4.365384615384616e-05, |
|
"loss": 3.2589, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 4.355769230769231e-05, |
|
"loss": 3.3683, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 4.346153846153846e-05, |
|
"loss": 3.2849, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 4.336538461538462e-05, |
|
"loss": 3.2397, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.326923076923077e-05, |
|
"loss": 3.2128, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.3173076923076925e-05, |
|
"loss": 3.1944, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.3076923076923084e-05, |
|
"loss": 3.1837, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.2980769230769236e-05, |
|
"loss": 3.1793, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.288461538461538e-05, |
|
"loss": 3.1447, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.278846153846154e-05, |
|
"loss": 3.1028, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 4.269230769230769e-05, |
|
"loss": 3.2471, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 4.2596153846153844e-05, |
|
"loss": 3.1855, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 4.25e-05, |
|
"loss": 3.1817, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.2403846153846155e-05, |
|
"loss": 3.214, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.230769230769231e-05, |
|
"loss": 2.997, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.2211538461538466e-05, |
|
"loss": 3.2059, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.211538461538462e-05, |
|
"loss": 3.1517, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.201923076923077e-05, |
|
"loss": 3.2236, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.192307692307693e-05, |
|
"loss": 3.1939, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.182692307692308e-05, |
|
"loss": 3.1143, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.173076923076923e-05, |
|
"loss": 3.2261, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.163461538461539e-05, |
|
"loss": 3.2095, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.1538461538461544e-05, |
|
"loss": 3.2498, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 4.1442307692307696e-05, |
|
"loss": 3.1493, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 4.134615384615385e-05, |
|
"loss": 3.0919, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 4.125e-05, |
|
"loss": 3.2617, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 4.115384615384615e-05, |
|
"loss": 3.1447, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 4.105769230769231e-05, |
|
"loss": 3.2025, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.096153846153846e-05, |
|
"loss": 3.1829, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.0865384615384615e-05, |
|
"loss": 3.2216, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 4.0769230769230773e-05, |
|
"loss": 3.1391, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 4.0673076923076926e-05, |
|
"loss": 3.1344, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 4.057692307692308e-05, |
|
"loss": 3.1699, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 4.0480769230769236e-05, |
|
"loss": 3.0138, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 4.038461538461539e-05, |
|
"loss": 3.1157, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 4.028846153846154e-05, |
|
"loss": 3.1173, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 4.019230769230769e-05, |
|
"loss": 3.2019, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 4.009615384615385e-05, |
|
"loss": 3.0801, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4e-05, |
|
"loss": 3.0888, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.9903846153846155e-05, |
|
"loss": 3.0332, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 3.980769230769231e-05, |
|
"loss": 2.9418, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 3.971153846153846e-05, |
|
"loss": 2.933, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.961538461538462e-05, |
|
"loss": 3.0467, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 3.951923076923077e-05, |
|
"loss": 3.0991, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 3.942307692307692e-05, |
|
"loss": 2.913, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 3.932692307692308e-05, |
|
"loss": 3.0531, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 3.923076923076923e-05, |
|
"loss": 2.9838, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 3.9134615384615385e-05, |
|
"loss": 2.9406, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 3.903846153846154e-05, |
|
"loss": 3.046, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.8942307692307696e-05, |
|
"loss": 3.006, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 3.884615384615385e-05, |
|
"loss": 2.9774, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.875e-05, |
|
"loss": 2.9937, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 3.865384615384616e-05, |
|
"loss": 2.9737, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.855769230769231e-05, |
|
"loss": 2.9772, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 2.8405, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 3.836538461538462e-05, |
|
"loss": 3.0856, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 3.826923076923077e-05, |
|
"loss": 2.9947, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 3.8173076923076926e-05, |
|
"loss": 2.9251, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 3.807692307692308e-05, |
|
"loss": 2.9613, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.798076923076923e-05, |
|
"loss": 2.9402, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 3.788461538461538e-05, |
|
"loss": 2.8823, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 3.778846153846154e-05, |
|
"loss": 2.8906, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.769230769230769e-05, |
|
"loss": 2.9475, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.7596153846153845e-05, |
|
"loss": 2.9999, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 2.9503, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.7403846153846156e-05, |
|
"loss": 3.0112, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 3.730769230769231e-05, |
|
"loss": 3.0096, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 3.7211538461538466e-05, |
|
"loss": 2.9804, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 3.711538461538462e-05, |
|
"loss": 2.9203, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 3.701923076923077e-05, |
|
"loss": 2.939, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.692307692307693e-05, |
|
"loss": 2.8455, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.682692307692308e-05, |
|
"loss": 2.9651, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.673076923076923e-05, |
|
"loss": 2.9528, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.6634615384615385e-05, |
|
"loss": 2.8042, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.653846153846154e-05, |
|
"loss": 2.8311, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 3.644230769230769e-05, |
|
"loss": 2.8888, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 3.634615384615385e-05, |
|
"loss": 2.9151, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 3.625e-05, |
|
"loss": 2.9463, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.615384615384615e-05, |
|
"loss": 2.957, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.605769230769231e-05, |
|
"loss": 2.9473, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.596153846153846e-05, |
|
"loss": 2.9994, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.5865384615384615e-05, |
|
"loss": 3.0486, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 3.5769230769230774e-05, |
|
"loss": 2.9487, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 3.5673076923076926e-05, |
|
"loss": 3.0173, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 3.557692307692308e-05, |
|
"loss": 2.8656, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 3.548076923076924e-05, |
|
"loss": 2.8834, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 3.538461538461539e-05, |
|
"loss": 2.9829, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 3.528846153846154e-05, |
|
"loss": 3.001, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 3.51923076923077e-05, |
|
"loss": 2.9618, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.5096153846153845e-05, |
|
"loss": 2.7964, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.5e-05, |
|
"loss": 2.89, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 3.4903846153846156e-05, |
|
"loss": 2.7191, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 3.480769230769231e-05, |
|
"loss": 2.7875, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 3.471153846153846e-05, |
|
"loss": 2.8498, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.461538461538462e-05, |
|
"loss": 2.7858, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 3.451923076923077e-05, |
|
"loss": 2.7435, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 3.442307692307692e-05, |
|
"loss": 2.8449, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 3.432692307692308e-05, |
|
"loss": 2.7817, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 3.4230769230769234e-05, |
|
"loss": 2.8797, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 3.4134615384615386e-05, |
|
"loss": 2.7719, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 3.4038461538461544e-05, |
|
"loss": 2.7919, |
|
"step": 996 |
|
} |
|
], |
|
"logging_steps": 6, |
|
"max_steps": 3120, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 402675517440000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|