|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.913937547600914, |
|
"eval_steps": 500, |
|
"global_step": 900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6.756756756756757e-08, |
|
"loss": 2.0057, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.378378378378379e-07, |
|
"loss": 2.0564, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.756756756756758e-07, |
|
"loss": 2.0403, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0135135135135136e-06, |
|
"loss": 2.0396, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.3513513513513515e-06, |
|
"loss": 1.9947, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.6891891891891894e-06, |
|
"loss": 2.0448, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.0270270270270273e-06, |
|
"loss": 2.0019, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.364864864864865e-06, |
|
"loss": 1.9959, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.702702702702703e-06, |
|
"loss": 2.0093, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.040540540540541e-06, |
|
"loss": 2.0242, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.3783783783783788e-06, |
|
"loss": 1.9715, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.7162162162162162e-06, |
|
"loss": 1.9773, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.0540540540540545e-06, |
|
"loss": 1.9811, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.391891891891892e-06, |
|
"loss": 1.9947, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.72972972972973e-06, |
|
"loss": 1.988, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 5.067567567567568e-06, |
|
"loss": 1.9718, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 5.405405405405406e-06, |
|
"loss": 1.9505, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 5.743243243243244e-06, |
|
"loss": 1.9706, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 6.081081081081082e-06, |
|
"loss": 1.9506, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 6.41891891891892e-06, |
|
"loss": 1.9243, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 6.7567567567567575e-06, |
|
"loss": 1.9785, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.0945945945945946e-06, |
|
"loss": 1.8531, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.4324324324324324e-06, |
|
"loss": 1.9446, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.77027027027027e-06, |
|
"loss": 1.9, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.108108108108109e-06, |
|
"loss": 1.8533, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.445945945945948e-06, |
|
"loss": 1.92, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.783783783783785e-06, |
|
"loss": 1.8792, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.121621621621622e-06, |
|
"loss": 1.9132, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.45945945945946e-06, |
|
"loss": 1.8309, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.797297297297298e-06, |
|
"loss": 1.8194, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.0135135135135136e-05, |
|
"loss": 1.7605, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.0472972972972975e-05, |
|
"loss": 1.806, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.0810810810810812e-05, |
|
"loss": 1.879, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.114864864864865e-05, |
|
"loss": 1.7906, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.1486486486486488e-05, |
|
"loss": 1.7865, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.1824324324324325e-05, |
|
"loss": 1.855, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.2162162162162164e-05, |
|
"loss": 1.7428, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.25e-05, |
|
"loss": 1.8219, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.283783783783784e-05, |
|
"loss": 1.7607, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.3175675675675676e-05, |
|
"loss": 1.8289, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.3513513513513515e-05, |
|
"loss": 1.8311, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.3851351351351352e-05, |
|
"loss": 1.7481, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.4189189189189189e-05, |
|
"loss": 1.795, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.4527027027027028e-05, |
|
"loss": 1.7709, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.4864864864864865e-05, |
|
"loss": 1.7569, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.5202702702702704e-05, |
|
"loss": 1.7625, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.554054054054054e-05, |
|
"loss": 1.7773, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.587837837837838e-05, |
|
"loss": 1.7449, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.6216216216216218e-05, |
|
"loss": 1.7536, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.6554054054054057e-05, |
|
"loss": 1.7815, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.6891891891891896e-05, |
|
"loss": 1.81, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.722972972972973e-05, |
|
"loss": 1.7928, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.756756756756757e-05, |
|
"loss": 1.7427, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.790540540540541e-05, |
|
"loss": 1.6618, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.8243243243243244e-05, |
|
"loss": 1.7179, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.8581081081081082e-05, |
|
"loss": 1.7514, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.891891891891892e-05, |
|
"loss": 1.7457, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.925675675675676e-05, |
|
"loss": 1.8168, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.9594594594594595e-05, |
|
"loss": 1.7466, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.9932432432432434e-05, |
|
"loss": 1.7226, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.9999888073536733e-05, |
|
"loss": 1.7554, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.9999433376573803e-05, |
|
"loss": 1.8218, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.9998628929598956e-05, |
|
"loss": 1.7816, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.9997474760749314e-05, |
|
"loss": 1.7414, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.9995970910394228e-05, |
|
"loss": 1.778, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.9994117431133843e-05, |
|
"loss": 1.7267, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.9991914387797266e-05, |
|
"loss": 1.6588, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.9989361857440318e-05, |
|
"loss": 1.7615, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.99864599293428e-05, |
|
"loss": 1.7038, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.998320870500541e-05, |
|
"loss": 1.751, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.997960829814616e-05, |
|
"loss": 1.695, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.997565883469643e-05, |
|
"loss": 1.6773, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.9971360452796523e-05, |
|
"loss": 1.7159, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.9966713302790875e-05, |
|
"loss": 1.7268, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.9961717547222775e-05, |
|
"loss": 1.7641, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.995637336082868e-05, |
|
"loss": 1.7376, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.9950680930532107e-05, |
|
"loss": 1.7687, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.994464045543709e-05, |
|
"loss": 1.7248, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.9938252146821236e-05, |
|
"loss": 1.7281, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.9931516228128295e-05, |
|
"loss": 1.7265, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.9924432934960384e-05, |
|
"loss": 1.7459, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.9917002515069732e-05, |
|
"loss": 1.719, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.9909225228350007e-05, |
|
"loss": 1.6819, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.9901101346827233e-05, |
|
"loss": 1.6849, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.989263115465028e-05, |
|
"loss": 1.7905, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.9883814948080918e-05, |
|
"loss": 1.7414, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.987465303548345e-05, |
|
"loss": 1.7082, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.9865145737313937e-05, |
|
"loss": 1.743, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.9855293386108995e-05, |
|
"loss": 1.7089, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.9845096326474135e-05, |
|
"loss": 1.7491, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.9834554915071745e-05, |
|
"loss": 1.6897, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.982366952060859e-05, |
|
"loss": 1.7759, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.981244052382293e-05, |
|
"loss": 1.7774, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.9800868317471196e-05, |
|
"loss": 1.6844, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.978895330631425e-05, |
|
"loss": 1.7419, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.977669590710324e-05, |
|
"loss": 1.7966, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.976409654856501e-05, |
|
"loss": 1.6607, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.975115567138711e-05, |
|
"loss": 1.8121, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.9737873728202376e-05, |
|
"loss": 1.7282, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.972425118357312e-05, |
|
"loss": 1.6864, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.9710288513974846e-05, |
|
"loss": 1.7665, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.969598620777962e-05, |
|
"loss": 1.6843, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.9681344765238958e-05, |
|
"loss": 1.6675, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.966636469846635e-05, |
|
"loss": 1.7891, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.9651046531419335e-05, |
|
"loss": 1.6728, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.9635390799881186e-05, |
|
"loss": 1.7289, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.961939805144216e-05, |
|
"loss": 1.7241, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.9603068845480347e-05, |
|
"loss": 1.7378, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.9586403753142104e-05, |
|
"loss": 1.7717, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.956940335732209e-05, |
|
"loss": 1.7401, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.9552068252642858e-05, |
|
"loss": 1.6758, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.9534399045434073e-05, |
|
"loss": 1.7241, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.9516396353711297e-05, |
|
"loss": 1.7026, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.9498060807154368e-05, |
|
"loss": 1.7662, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.9479393047085392e-05, |
|
"loss": 1.6935, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.94603937264463e-05, |
|
"loss": 1.7303, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.9441063509776003e-05, |
|
"loss": 1.7758, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.9421403073187162e-05, |
|
"loss": 1.7017, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.9401413104342535e-05, |
|
"loss": 1.7726, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.938109430243093e-05, |
|
"loss": 1.6689, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.936044737814273e-05, |
|
"loss": 1.6734, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9339473053645056e-05, |
|
"loss": 1.656, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.931817206255651e-05, |
|
"loss": 1.7196, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9296545149921488e-05, |
|
"loss": 1.686, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.9274593072184152e-05, |
|
"loss": 1.7709, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.9252316597161947e-05, |
|
"loss": 1.6834, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.922971650401877e-05, |
|
"loss": 1.6787, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.920679358323769e-05, |
|
"loss": 1.6922, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9183548636593322e-05, |
|
"loss": 1.7244, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9159982477123776e-05, |
|
"loss": 1.7187, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.9136095929102204e-05, |
|
"loss": 1.6827, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.9111889828007997e-05, |
|
"loss": 1.7466, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.908736502049754e-05, |
|
"loss": 1.7892, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.9062522364374617e-05, |
|
"loss": 1.744, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.903736272856038e-05, |
|
"loss": 1.6438, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.9011886993062994e-05, |
|
"loss": 1.7538, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.8986096048946826e-05, |
|
"loss": 1.6889, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.8959990798301286e-05, |
|
"loss": 1.7199, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.893357215420929e-05, |
|
"loss": 1.7801, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.8906841040715304e-05, |
|
"loss": 1.7559, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.8879798392793033e-05, |
|
"loss": 1.6639, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.8852445156312713e-05, |
|
"loss": 1.687, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.8824782288008038e-05, |
|
"loss": 1.7619, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.8796810755442675e-05, |
|
"loss": 1.7529, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.8768531536976452e-05, |
|
"loss": 1.7513, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.8739945621731114e-05, |
|
"loss": 1.7322, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.8711054009555736e-05, |
|
"loss": 1.7762, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.8681857710991745e-05, |
|
"loss": 1.6949, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.865235774723758e-05, |
|
"loss": 1.6526, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.862255515011298e-05, |
|
"loss": 1.7155, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.8592450962022872e-05, |
|
"loss": 1.8251, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.8562046235920938e-05, |
|
"loss": 1.7077, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.8531342035272768e-05, |
|
"loss": 1.6627, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.850033943401867e-05, |
|
"loss": 1.7163, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.84690395165361e-05, |
|
"loss": 1.713, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.8437443377601736e-05, |
|
"loss": 1.7024, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.8405552122353212e-05, |
|
"loss": 1.7253, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.8373366866250407e-05, |
|
"loss": 1.7109, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.8340888735036485e-05, |
|
"loss": 1.7563, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.830811886469849e-05, |
|
"loss": 1.7479, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.8275058401427622e-05, |
|
"loss": 1.7315, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.8241708501579146e-05, |
|
"loss": 1.7126, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.8208070331631943e-05, |
|
"loss": 1.6866, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.817414506814772e-05, |
|
"loss": 1.6708, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.8139933897729833e-05, |
|
"loss": 1.6868, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.8105438016981816e-05, |
|
"loss": 1.7493, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.807065863246551e-05, |
|
"loss": 1.7707, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.8035596960658856e-05, |
|
"loss": 1.7692, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.8000254227913346e-05, |
|
"loss": 1.7761, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.7964631670411154e-05, |
|
"loss": 1.6765, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.7928730534121872e-05, |
|
"loss": 1.6437, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.7892552074758932e-05, |
|
"loss": 1.6804, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.7856097557735697e-05, |
|
"loss": 1.7373, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.7819368258121188e-05, |
|
"loss": 1.7316, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.77823654605955e-05, |
|
"loss": 1.6764, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.7745090459404853e-05, |
|
"loss": 1.7456, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.7707544558316332e-05, |
|
"loss": 1.6518, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.766972907057229e-05, |
|
"loss": 1.7705, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.763164531884439e-05, |
|
"loss": 1.7431, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.7593294635187385e-05, |
|
"loss": 1.6928, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.7554678360992475e-05, |
|
"loss": 1.7483, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"step": 900, |
|
"total_flos": 1.2656331456763658e+18, |
|
"train_loss": 0.0, |
|
"train_runtime": 3.9268, |
|
"train_samples_per_second": 12034.857, |
|
"train_steps_per_second": 187.937 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 738, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 1.2656331456763658e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|