|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 2217, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.013531799729364006, |
|
"grad_norm": 3.034516595682754, |
|
"learning_rate": 2.2522522522522524e-07, |
|
"loss": 0.6411, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02706359945872801, |
|
"grad_norm": 2.7841028225351487, |
|
"learning_rate": 4.504504504504505e-07, |
|
"loss": 0.6332, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04059539918809202, |
|
"grad_norm": 1.8710512230715446, |
|
"learning_rate": 6.756756756756758e-07, |
|
"loss": 0.6135, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05412719891745602, |
|
"grad_norm": 1.5248415557438793, |
|
"learning_rate": 9.00900900900901e-07, |
|
"loss": 0.6095, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06765899864682003, |
|
"grad_norm": 1.330731637337357, |
|
"learning_rate": 1.1261261261261262e-06, |
|
"loss": 0.5672, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08119079837618404, |
|
"grad_norm": 1.1702536060306041, |
|
"learning_rate": 1.3513513513513515e-06, |
|
"loss": 0.5592, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.09472259810554803, |
|
"grad_norm": 1.2129816143771526, |
|
"learning_rate": 1.5765765765765766e-06, |
|
"loss": 0.527, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.10825439783491204, |
|
"grad_norm": 1.234538155408458, |
|
"learning_rate": 1.801801801801802e-06, |
|
"loss": 0.5421, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.12178619756427606, |
|
"grad_norm": 1.1711228120854185, |
|
"learning_rate": 2.0270270270270273e-06, |
|
"loss": 0.5292, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.13531799729364005, |
|
"grad_norm": 1.2936648719718322, |
|
"learning_rate": 2.2522522522522524e-06, |
|
"loss": 0.5479, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.14884979702300405, |
|
"grad_norm": 1.291284989397587, |
|
"learning_rate": 2.4774774774774775e-06, |
|
"loss": 0.5416, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.16238159675236807, |
|
"grad_norm": 1.34797590127609, |
|
"learning_rate": 2.702702702702703e-06, |
|
"loss": 0.5152, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.17591339648173207, |
|
"grad_norm": 1.360973654533329, |
|
"learning_rate": 2.927927927927928e-06, |
|
"loss": 0.5129, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.18944519621109607, |
|
"grad_norm": 1.2536822051142262, |
|
"learning_rate": 3.1531531531531532e-06, |
|
"loss": 0.5281, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2029769959404601, |
|
"grad_norm": 1.3088135261215759, |
|
"learning_rate": 3.3783783783783788e-06, |
|
"loss": 0.5209, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2165087956698241, |
|
"grad_norm": 1.288765418316905, |
|
"learning_rate": 3.603603603603604e-06, |
|
"loss": 0.5174, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.23004059539918809, |
|
"grad_norm": 1.393215959864774, |
|
"learning_rate": 3.828828828828829e-06, |
|
"loss": 0.5207, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.2435723951285521, |
|
"grad_norm": 1.2564229102790085, |
|
"learning_rate": 4.0540540540540545e-06, |
|
"loss": 0.5273, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2571041948579161, |
|
"grad_norm": 1.2582309298347993, |
|
"learning_rate": 4.27927927927928e-06, |
|
"loss": 0.4934, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.2706359945872801, |
|
"grad_norm": 1.227173613438421, |
|
"learning_rate": 4.504504504504505e-06, |
|
"loss": 0.5112, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.28416779431664413, |
|
"grad_norm": 1.3112186763708722, |
|
"learning_rate": 4.72972972972973e-06, |
|
"loss": 0.4982, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.2976995940460081, |
|
"grad_norm": 1.3637637039109458, |
|
"learning_rate": 4.954954954954955e-06, |
|
"loss": 0.5229, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3112313937753721, |
|
"grad_norm": 1.375489718620912, |
|
"learning_rate": 4.999801619861762e-06, |
|
"loss": 0.5042, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.32476319350473615, |
|
"grad_norm": 1.2608972862462173, |
|
"learning_rate": 4.99899575450882e-06, |
|
"loss": 0.5127, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.3382949932341001, |
|
"grad_norm": 1.2911890277318863, |
|
"learning_rate": 4.9975702048619155e-06, |
|
"loss": 0.5033, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.35182679296346414, |
|
"grad_norm": 1.3078317427998958, |
|
"learning_rate": 4.995525324419338e-06, |
|
"loss": 0.5002, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.36535859269282817, |
|
"grad_norm": 1.3588495282283148, |
|
"learning_rate": 4.992861620256898e-06, |
|
"loss": 0.5065, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.37889039242219213, |
|
"grad_norm": 1.3626322380792617, |
|
"learning_rate": 4.98957975290218e-06, |
|
"loss": 0.511, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.39242219215155616, |
|
"grad_norm": 1.3701159835791592, |
|
"learning_rate": 4.985680536170754e-06, |
|
"loss": 0.5137, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4059539918809202, |
|
"grad_norm": 1.2997955232042016, |
|
"learning_rate": 4.981164936964371e-06, |
|
"loss": 0.4965, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.41948579161028415, |
|
"grad_norm": 1.3519542829203703, |
|
"learning_rate": 4.976034075031193e-06, |
|
"loss": 0.512, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.4330175913396482, |
|
"grad_norm": 1.2630076374904904, |
|
"learning_rate": 4.970289222688129e-06, |
|
"loss": 0.5028, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.4465493910690122, |
|
"grad_norm": 1.2707537109175924, |
|
"learning_rate": 4.963931804505335e-06, |
|
"loss": 0.5032, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.46008119079837617, |
|
"grad_norm": 1.2589722075840526, |
|
"learning_rate": 4.956963396952954e-06, |
|
"loss": 0.4913, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.4736129905277402, |
|
"grad_norm": 1.250431061653177, |
|
"learning_rate": 4.949385728010199e-06, |
|
"loss": 0.4805, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.4871447902571042, |
|
"grad_norm": 1.3372965532968473, |
|
"learning_rate": 4.941200676736856e-06, |
|
"loss": 0.4991, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5006765899864682, |
|
"grad_norm": 1.342662150633839, |
|
"learning_rate": 4.932410272807328e-06, |
|
"loss": 0.5066, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.5142083897158322, |
|
"grad_norm": 1.2238441786277905, |
|
"learning_rate": 4.9230166960073325e-06, |
|
"loss": 0.4863, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.5277401894451962, |
|
"grad_norm": 1.359535295509501, |
|
"learning_rate": 4.913022275693372e-06, |
|
"loss": 0.4949, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.5412719891745602, |
|
"grad_norm": 1.2168352667558966, |
|
"learning_rate": 4.902429490215112e-06, |
|
"loss": 0.4887, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5548037889039242, |
|
"grad_norm": 1.4624621853482647, |
|
"learning_rate": 4.891240966300822e-06, |
|
"loss": 0.4969, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.5683355886332883, |
|
"grad_norm": 1.238588018136266, |
|
"learning_rate": 4.879459478406012e-06, |
|
"loss": 0.5067, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.5818673883626523, |
|
"grad_norm": 1.3181900309459351, |
|
"learning_rate": 4.867087948025444e-06, |
|
"loss": 0.4791, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.5953991880920162, |
|
"grad_norm": 1.2954274899523053, |
|
"learning_rate": 4.854129442968679e-06, |
|
"loss": 0.4927, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.6089309878213802, |
|
"grad_norm": 1.4125967606025722, |
|
"learning_rate": 4.8405871765993435e-06, |
|
"loss": 0.4879, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.6224627875507442, |
|
"grad_norm": 1.3930229082121182, |
|
"learning_rate": 4.8264645070382964e-06, |
|
"loss": 0.5073, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.6359945872801083, |
|
"grad_norm": 1.0709300883023405, |
|
"learning_rate": 4.8117649363309105e-06, |
|
"loss": 0.4729, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.6495263870094723, |
|
"grad_norm": 1.2926318844200286, |
|
"learning_rate": 4.796492109578655e-06, |
|
"loss": 0.4956, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.6630581867388363, |
|
"grad_norm": 1.2313258024791178, |
|
"learning_rate": 4.780649814035205e-06, |
|
"loss": 0.5152, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.6765899864682002, |
|
"grad_norm": 1.178544127732, |
|
"learning_rate": 4.764241978167314e-06, |
|
"loss": 0.4859, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6901217861975643, |
|
"grad_norm": 1.3295118207561514, |
|
"learning_rate": 4.747272670680646e-06, |
|
"loss": 0.4676, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.7036535859269283, |
|
"grad_norm": 1.2304742068887151, |
|
"learning_rate": 4.729746099510853e-06, |
|
"loss": 0.4678, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.7171853856562923, |
|
"grad_norm": 1.3372876193995562, |
|
"learning_rate": 4.711666610780115e-06, |
|
"loss": 0.4831, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.7307171853856563, |
|
"grad_norm": 1.1886552687072083, |
|
"learning_rate": 4.693038687719424e-06, |
|
"loss": 0.4767, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.7442489851150202, |
|
"grad_norm": 1.1599254136343242, |
|
"learning_rate": 4.673866949556854e-06, |
|
"loss": 0.4936, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.7577807848443843, |
|
"grad_norm": 1.3619743128031154, |
|
"learning_rate": 4.654156150372123e-06, |
|
"loss": 0.4932, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.7713125845737483, |
|
"grad_norm": 1.2317704190385679, |
|
"learning_rate": 4.633911177917701e-06, |
|
"loss": 0.4823, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.7848443843031123, |
|
"grad_norm": 1.1895331578985897, |
|
"learning_rate": 4.613137052406783e-06, |
|
"loss": 0.4828, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.7983761840324763, |
|
"grad_norm": 1.2879457390422215, |
|
"learning_rate": 4.5918389252684115e-06, |
|
"loss": 0.4779, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.8119079837618404, |
|
"grad_norm": 1.2202436257043876, |
|
"learning_rate": 4.570022077870051e-06, |
|
"loss": 0.4875, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8254397834912043, |
|
"grad_norm": 1.191014968745712, |
|
"learning_rate": 4.547691920207958e-06, |
|
"loss": 0.4726, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.8389715832205683, |
|
"grad_norm": 1.3456732382536576, |
|
"learning_rate": 4.524853989565644e-06, |
|
"loss": 0.4716, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.8525033829499323, |
|
"grad_norm": 1.1703319949253694, |
|
"learning_rate": 4.501513949140776e-06, |
|
"loss": 0.4715, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.8660351826792964, |
|
"grad_norm": 1.2604743747219656, |
|
"learning_rate": 4.477677586640854e-06, |
|
"loss": 0.483, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.8795669824086604, |
|
"grad_norm": 1.2589734283703884, |
|
"learning_rate": 4.453350812848014e-06, |
|
"loss": 0.4788, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.8930987821380244, |
|
"grad_norm": 1.3165298623822046, |
|
"learning_rate": 4.428539660153315e-06, |
|
"loss": 0.4781, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.9066305818673883, |
|
"grad_norm": 1.2879141623482424, |
|
"learning_rate": 4.403250281060862e-06, |
|
"loss": 0.4663, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.9201623815967523, |
|
"grad_norm": 1.2076038888483498, |
|
"learning_rate": 4.377488946662152e-06, |
|
"loss": 0.4808, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.9336941813261164, |
|
"grad_norm": 1.1851588037695613, |
|
"learning_rate": 4.3512620450810115e-06, |
|
"loss": 0.4793, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.9472259810554804, |
|
"grad_norm": 1.2950814486817164, |
|
"learning_rate": 4.324576079889508e-06, |
|
"loss": 0.4922, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.9607577807848444, |
|
"grad_norm": 1.201177645302697, |
|
"learning_rate": 4.297437668495241e-06, |
|
"loss": 0.4789, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.9742895805142084, |
|
"grad_norm": 1.217638325880889, |
|
"learning_rate": 4.269853540500404e-06, |
|
"loss": 0.4777, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.9878213802435724, |
|
"grad_norm": 1.2410357155495917, |
|
"learning_rate": 4.2418305360330135e-06, |
|
"loss": 0.4677, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.0013531799729365, |
|
"grad_norm": 1.4640185730269493, |
|
"learning_rate": 4.21337560405075e-06, |
|
"loss": 0.4706, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.0148849797023005, |
|
"grad_norm": 1.38525301927104, |
|
"learning_rate": 4.184495800617795e-06, |
|
"loss": 0.3663, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.0284167794316643, |
|
"grad_norm": 1.3150399206192376, |
|
"learning_rate": 4.1551982871551195e-06, |
|
"loss": 0.3446, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.0419485791610283, |
|
"grad_norm": 1.2508231428791732, |
|
"learning_rate": 4.125490328664639e-06, |
|
"loss": 0.3433, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.0554803788903924, |
|
"grad_norm": 1.1828710966111038, |
|
"learning_rate": 4.095379291927689e-06, |
|
"loss": 0.3372, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.0690121786197564, |
|
"grad_norm": 1.3234842465450887, |
|
"learning_rate": 4.064872643678261e-06, |
|
"loss": 0.3492, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.0825439783491204, |
|
"grad_norm": 1.3510823566344805, |
|
"learning_rate": 4.033977948751445e-06, |
|
"loss": 0.3538, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.0960757780784844, |
|
"grad_norm": 1.162520339907919, |
|
"learning_rate": 4.002702868207563e-06, |
|
"loss": 0.3462, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.1096075778078485, |
|
"grad_norm": 1.2976046378570087, |
|
"learning_rate": 3.971055157432421e-06, |
|
"loss": 0.3474, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.1231393775372125, |
|
"grad_norm": 1.2857491708515112, |
|
"learning_rate": 3.939042664214185e-06, |
|
"loss": 0.3365, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.1366711772665765, |
|
"grad_norm": 1.2956865515811835, |
|
"learning_rate": 3.9066733267973335e-06, |
|
"loss": 0.3483, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.1502029769959405, |
|
"grad_norm": 1.2863826250955843, |
|
"learning_rate": 3.873955171914196e-06, |
|
"loss": 0.3515, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.1637347767253043, |
|
"grad_norm": 1.48236681103959, |
|
"learning_rate": 3.840896312794523e-06, |
|
"loss": 0.3502, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.1772665764546684, |
|
"grad_norm": 1.333493280950809, |
|
"learning_rate": 3.8075049471536317e-06, |
|
"loss": 0.3402, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.1907983761840324, |
|
"grad_norm": 1.3845427764767562, |
|
"learning_rate": 3.773789355159587e-06, |
|
"loss": 0.3634, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.2043301759133964, |
|
"grad_norm": 1.1768813996050986, |
|
"learning_rate": 3.7397578973799432e-06, |
|
"loss": 0.3426, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.2178619756427604, |
|
"grad_norm": 1.2400576449491367, |
|
"learning_rate": 3.7054190127085414e-06, |
|
"loss": 0.3643, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.2313937753721245, |
|
"grad_norm": 1.3771269453073722, |
|
"learning_rate": 3.6707812162728963e-06, |
|
"loss": 0.3618, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.2449255751014885, |
|
"grad_norm": 1.342260391925942, |
|
"learning_rate": 3.6358530973226634e-06, |
|
"loss": 0.3619, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.2584573748308525, |
|
"grad_norm": 1.305612405058302, |
|
"learning_rate": 3.600643317099742e-06, |
|
"loss": 0.343, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.2719891745602165, |
|
"grad_norm": 1.2499532104940625, |
|
"learning_rate": 3.5651606066905125e-06, |
|
"loss": 0.3469, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.2855209742895806, |
|
"grad_norm": 1.3139038237546603, |
|
"learning_rate": 3.529413764860763e-06, |
|
"loss": 0.3547, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.2990527740189446, |
|
"grad_norm": 1.3891975036748492, |
|
"learning_rate": 3.493411655873826e-06, |
|
"loss": 0.3565, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.3125845737483086, |
|
"grad_norm": 1.31663081113206, |
|
"learning_rate": 3.4571632072924853e-06, |
|
"loss": 0.3459, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.3261163734776726, |
|
"grad_norm": 1.4316096903150302, |
|
"learning_rate": 3.4206774077651706e-06, |
|
"loss": 0.3435, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.3396481732070367, |
|
"grad_norm": 1.2148334230866564, |
|
"learning_rate": 3.383963304797016e-06, |
|
"loss": 0.3505, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.3531799729364005, |
|
"grad_norm": 1.371605209380619, |
|
"learning_rate": 3.347030002506321e-06, |
|
"loss": 0.3367, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.3667117726657645, |
|
"grad_norm": 1.2492881761277035, |
|
"learning_rate": 3.309886659366967e-06, |
|
"loss": 0.3409, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.3802435723951285, |
|
"grad_norm": 1.3599908217112529, |
|
"learning_rate": 3.272542485937369e-06, |
|
"loss": 0.3485, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.3937753721244925, |
|
"grad_norm": 1.3587500825976617, |
|
"learning_rate": 3.2350067425764932e-06, |
|
"loss": 0.3564, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.4073071718538566, |
|
"grad_norm": 1.286120602039562, |
|
"learning_rate": 3.1972887371475404e-06, |
|
"loss": 0.3445, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.4208389715832206, |
|
"grad_norm": 1.3034585004391024, |
|
"learning_rate": 3.1593978227098442e-06, |
|
"loss": 0.3499, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.4343707713125846, |
|
"grad_norm": 1.4093897542299632, |
|
"learning_rate": 3.1213433951995585e-06, |
|
"loss": 0.3476, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.4479025710419486, |
|
"grad_norm": 1.2311850270587112, |
|
"learning_rate": 3.0831348910997206e-06, |
|
"loss": 0.3364, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.4614343707713127, |
|
"grad_norm": 1.222347605205662, |
|
"learning_rate": 3.0447817851002493e-06, |
|
"loss": 0.3479, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.4749661705006765, |
|
"grad_norm": 1.296829055261883, |
|
"learning_rate": 3.0062935877484807e-06, |
|
"loss": 0.3483, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.4884979702300405, |
|
"grad_norm": 1.4143686463454304, |
|
"learning_rate": 2.9676798430908e-06, |
|
"loss": 0.3468, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.5020297699594045, |
|
"grad_norm": 1.3314404969152827, |
|
"learning_rate": 2.9289501263059796e-06, |
|
"loss": 0.3526, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.5155615696887685, |
|
"grad_norm": 1.2209137417793836, |
|
"learning_rate": 2.890114041330782e-06, |
|
"loss": 0.3527, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.5290933694181326, |
|
"grad_norm": 1.3653030646734905, |
|
"learning_rate": 2.8511812184784476e-06, |
|
"loss": 0.3486, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.5426251691474966, |
|
"grad_norm": 1.3012446208990969, |
|
"learning_rate": 2.8121613120506274e-06, |
|
"loss": 0.3541, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.5561569688768606, |
|
"grad_norm": 1.1960147547589701, |
|
"learning_rate": 2.7730639979433778e-06, |
|
"loss": 0.3473, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.5696887686062246, |
|
"grad_norm": 1.4960354983589137, |
|
"learning_rate": 2.733898971247795e-06, |
|
"loss": 0.3456, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.5832205683355887, |
|
"grad_norm": 1.2152217200297626, |
|
"learning_rate": 2.6946759438458898e-06, |
|
"loss": 0.3433, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.5967523680649527, |
|
"grad_norm": 1.3453974757862506, |
|
"learning_rate": 2.655404642002304e-06, |
|
"loss": 0.3409, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.6102841677943167, |
|
"grad_norm": 1.3428793347679469, |
|
"learning_rate": 2.6160948039524497e-06, |
|
"loss": 0.3419, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.6238159675236807, |
|
"grad_norm": 1.2285874250356115, |
|
"learning_rate": 2.576756177487694e-06, |
|
"loss": 0.3508, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.6373477672530448, |
|
"grad_norm": 1.3449344602808873, |
|
"learning_rate": 2.5373985175381595e-06, |
|
"loss": 0.3503, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.6508795669824088, |
|
"grad_norm": 1.296420958776628, |
|
"learning_rate": 2.4980315837537682e-06, |
|
"loss": 0.3562, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.6644113667117728, |
|
"grad_norm": 1.3437387415224382, |
|
"learning_rate": 2.458665138084104e-06, |
|
"loss": 0.3405, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.6779431664411368, |
|
"grad_norm": 1.388342667143761, |
|
"learning_rate": 2.4193089423577125e-06, |
|
"loss": 0.3609, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.6914749661705006, |
|
"grad_norm": 1.2780103853967297, |
|
"learning_rate": 2.379972755861427e-06, |
|
"loss": 0.3464, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.7050067658998647, |
|
"grad_norm": 1.3272035289197703, |
|
"learning_rate": 2.3406663329203235e-06, |
|
"loss": 0.3495, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.7185385656292287, |
|
"grad_norm": 1.329132748454747, |
|
"learning_rate": 2.3013994204789125e-06, |
|
"loss": 0.3445, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.7320703653585927, |
|
"grad_norm": 1.398385700455576, |
|
"learning_rate": 2.262181755684152e-06, |
|
"loss": 0.3525, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.7456021650879567, |
|
"grad_norm": 1.3017040678134042, |
|
"learning_rate": 2.2230230634709004e-06, |
|
"loss": 0.3437, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.7591339648173205, |
|
"grad_norm": 1.2733756916822523, |
|
"learning_rate": 2.1839330541503846e-06, |
|
"loss": 0.3507, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.7726657645466846, |
|
"grad_norm": 1.2687781856621299, |
|
"learning_rate": 2.14492142100231e-06, |
|
"loss": 0.343, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.7861975642760486, |
|
"grad_norm": 1.2837756754284302, |
|
"learning_rate": 2.105997837871179e-06, |
|
"loss": 0.3567, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.7997293640054126, |
|
"grad_norm": 1.2388730929923404, |
|
"learning_rate": 2.0671719567674396e-06, |
|
"loss": 0.344, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.8132611637347766, |
|
"grad_norm": 1.2901053029094909, |
|
"learning_rate": 2.028453405474043e-06, |
|
"loss": 0.3517, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.8267929634641407, |
|
"grad_norm": 1.2731165360362213, |
|
"learning_rate": 1.9898517851590085e-06, |
|
"loss": 0.3386, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.8403247631935047, |
|
"grad_norm": 1.3976657362615021, |
|
"learning_rate": 1.951376667994594e-06, |
|
"loss": 0.341, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.8538565629228687, |
|
"grad_norm": 1.3707046787252906, |
|
"learning_rate": 1.913037594783648e-06, |
|
"loss": 0.3537, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.8673883626522327, |
|
"grad_norm": 1.2631835198560484, |
|
"learning_rate": 1.8748440725937485e-06, |
|
"loss": 0.3565, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.8809201623815968, |
|
"grad_norm": 1.2721719565376515, |
|
"learning_rate": 1.8368055723997013e-06, |
|
"loss": 0.3522, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.8944519621109608, |
|
"grad_norm": 1.2577752707995227, |
|
"learning_rate": 1.7989315267349936e-06, |
|
"loss": 0.3454, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.9079837618403248, |
|
"grad_norm": 1.514027269766047, |
|
"learning_rate": 1.7612313273527731e-06, |
|
"loss": 0.3496, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.9215155615696888, |
|
"grad_norm": 1.292691163793762, |
|
"learning_rate": 1.7237143228969488e-06, |
|
"loss": 0.348, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.9350473612990529, |
|
"grad_norm": 1.3014087462969852, |
|
"learning_rate": 1.686389816583973e-06, |
|
"loss": 0.3577, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.9485791610284169, |
|
"grad_norm": 1.214993986220213, |
|
"learning_rate": 1.6492670638958924e-06, |
|
"loss": 0.3362, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.962110960757781, |
|
"grad_norm": 1.221639454284981, |
|
"learning_rate": 1.6123552702852375e-06, |
|
"loss": 0.3347, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.975642760487145, |
|
"grad_norm": 1.36951304107227, |
|
"learning_rate": 1.5756635888923122e-06, |
|
"loss": 0.3408, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.989174560216509, |
|
"grad_norm": 1.291050299529943, |
|
"learning_rate": 1.539201118275463e-06, |
|
"loss": 0.3516, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.002706359945873, |
|
"grad_norm": 1.5299892777665929, |
|
"learning_rate": 1.502976900154876e-06, |
|
"loss": 0.3294, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.016238159675237, |
|
"grad_norm": 1.4268912812659167, |
|
"learning_rate": 1.4669999171704742e-06, |
|
"loss": 0.2451, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.029769959404601, |
|
"grad_norm": 1.3062098172273324, |
|
"learning_rate": 1.43127909065446e-06, |
|
"loss": 0.2438, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.0433017591339646, |
|
"grad_norm": 1.4093327122429686, |
|
"learning_rate": 1.395823278419065e-06, |
|
"loss": 0.2458, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.0568335588633286, |
|
"grad_norm": 1.3446778247332223, |
|
"learning_rate": 1.3606412725600471e-06, |
|
"loss": 0.2483, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.0703653585926927, |
|
"grad_norm": 1.288956301952182, |
|
"learning_rate": 1.3257417972764853e-06, |
|
"loss": 0.242, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.0838971583220567, |
|
"grad_norm": 1.2722385458217775, |
|
"learning_rate": 1.2911335067074108e-06, |
|
"loss": 0.2318, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.0974289580514207, |
|
"grad_norm": 1.3902319886251955, |
|
"learning_rate": 1.2568249827858153e-06, |
|
"loss": 0.2331, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.1109607577807847, |
|
"grad_norm": 1.343020265634266, |
|
"learning_rate": 1.2228247331105541e-06, |
|
"loss": 0.242, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.1244925575101488, |
|
"grad_norm": 1.3796390105439542, |
|
"learning_rate": 1.1891411888366933e-06, |
|
"loss": 0.2494, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.138024357239513, |
|
"grad_norm": 1.287377449079076, |
|
"learning_rate": 1.1557827025848048e-06, |
|
"loss": 0.2373, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.151556156968877, |
|
"grad_norm": 1.2636968613031825, |
|
"learning_rate": 1.122757546369744e-06, |
|
"loss": 0.2276, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.165087956698241, |
|
"grad_norm": 1.3768480512381478, |
|
"learning_rate": 1.0900739095494053e-06, |
|
"loss": 0.2392, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.178619756427605, |
|
"grad_norm": 1.3082506088448747, |
|
"learning_rate": 1.0577398967939824e-06, |
|
"loss": 0.2361, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.192151556156969, |
|
"grad_norm": 1.4034690066329039, |
|
"learning_rate": 1.0257635260762281e-06, |
|
"loss": 0.2319, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.205683355886333, |
|
"grad_norm": 1.2317759668032917, |
|
"learning_rate": 9.941527266832064e-07, |
|
"loss": 0.2493, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.219215155615697, |
|
"grad_norm": 1.2971411614293273, |
|
"learning_rate": 9.62915337250044e-07, |
|
"loss": 0.2277, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.232746955345061, |
|
"grad_norm": 1.3135829424346515, |
|
"learning_rate": 9.320591038161575e-07, |
|
"loss": 0.2328, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.246278755074425, |
|
"grad_norm": 1.38689896774966, |
|
"learning_rate": 9.015916779044429e-07, |
|
"loss": 0.2376, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.259810554803789, |
|
"grad_norm": 1.3904871269861285, |
|
"learning_rate": 8.715206146238989e-07, |
|
"loss": 0.2365, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.273342354533153, |
|
"grad_norm": 1.2344911252878241, |
|
"learning_rate": 8.418533707961635e-07, |
|
"loss": 0.2398, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.286874154262517, |
|
"grad_norm": 1.404914010406795, |
|
"learning_rate": 8.125973031064241e-07, |
|
"loss": 0.2375, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.300405953991881, |
|
"grad_norm": 1.4005244602892066, |
|
"learning_rate": 7.837596662791492e-07, |
|
"loss": 0.2418, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.313937753721245, |
|
"grad_norm": 1.3420607852511892, |
|
"learning_rate": 7.553476112791156e-07, |
|
"loss": 0.2332, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.3274695534506087, |
|
"grad_norm": 1.3547101946313251, |
|
"learning_rate": 7.273681835381569e-07, |
|
"loss": 0.229, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.3410013531799727, |
|
"grad_norm": 1.4020705762318206, |
|
"learning_rate": 6.998283212080873e-07, |
|
"loss": 0.2328, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.3545331529093367, |
|
"grad_norm": 1.3623596919516412, |
|
"learning_rate": 6.727348534402217e-07, |
|
"loss": 0.2525, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.3680649526387008, |
|
"grad_norm": 1.3512700634608603, |
|
"learning_rate": 6.460944986919296e-07, |
|
"loss": 0.2408, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.381596752368065, |
|
"grad_norm": 1.3266784606321098, |
|
"learning_rate": 6.199138630606389e-07, |
|
"loss": 0.234, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.395128552097429, |
|
"grad_norm": 1.3645670227625384, |
|
"learning_rate": 5.941994386456962e-07, |
|
"loss": 0.2339, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.408660351826793, |
|
"grad_norm": 1.4293775086324754, |
|
"learning_rate": 5.689576019385015e-07, |
|
"loss": 0.2386, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.422192151556157, |
|
"grad_norm": 1.4511738533160017, |
|
"learning_rate": 5.441946122413086e-07, |
|
"loss": 0.2456, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.435723951285521, |
|
"grad_norm": 1.4144969812794062, |
|
"learning_rate": 5.199166101150854e-07, |
|
"loss": 0.242, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.449255751014885, |
|
"grad_norm": 1.3760683860213159, |
|
"learning_rate": 4.96129615856816e-07, |
|
"loss": 0.2319, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 2.462787550744249, |
|
"grad_norm": 1.2962323322234228, |
|
"learning_rate": 4.7283952800663086e-07, |
|
"loss": 0.239, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 2.476319350473613, |
|
"grad_norm": 1.3785919346369462, |
|
"learning_rate": 4.500521218851234e-07, |
|
"loss": 0.2291, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 2.489851150202977, |
|
"grad_norm": 1.3198852846590903, |
|
"learning_rate": 4.2777304816122744e-07, |
|
"loss": 0.2294, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 2.503382949932341, |
|
"grad_norm": 1.3791429787991882, |
|
"learning_rate": 4.0600783145099935e-07, |
|
"loss": 0.2398, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.516914749661705, |
|
"grad_norm": 1.3758327911725443, |
|
"learning_rate": 3.847618689476612e-07, |
|
"loss": 0.2311, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 2.530446549391069, |
|
"grad_norm": 1.329309433256462, |
|
"learning_rate": 3.640404290832433e-07, |
|
"loss": 0.2322, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 2.543978349120433, |
|
"grad_norm": 1.3667475875891038, |
|
"learning_rate": 3.438486502221494e-07, |
|
"loss": 0.2471, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 2.557510148849797, |
|
"grad_norm": 1.4518282018049613, |
|
"learning_rate": 3.2419153938698295e-07, |
|
"loss": 0.2346, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 2.571041948579161, |
|
"grad_norm": 1.334115365965958, |
|
"learning_rate": 3.0507397101693565e-07, |
|
"loss": 0.2383, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.584573748308525, |
|
"grad_norm": 1.368769415739159, |
|
"learning_rate": 2.865006857590619e-07, |
|
"loss": 0.2344, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 2.598105548037889, |
|
"grad_norm": 1.2174336094447944, |
|
"learning_rate": 2.684762892927184e-07, |
|
"loss": 0.2354, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.611637347767253, |
|
"grad_norm": 1.3104470556604222, |
|
"learning_rate": 2.510052511874822e-07, |
|
"loss": 0.2297, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.6251691474966172, |
|
"grad_norm": 1.3766509507609574, |
|
"learning_rate": 2.3409190379481723e-07, |
|
"loss": 0.2294, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.6387009472259813, |
|
"grad_norm": 1.2110460902512004, |
|
"learning_rate": 2.1774044117376407e-07, |
|
"loss": 0.2273, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.6522327469553453, |
|
"grad_norm": 1.294674188348231, |
|
"learning_rate": 2.019549180509267e-07, |
|
"loss": 0.2304, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.6657645466847093, |
|
"grad_norm": 1.4025491240100805, |
|
"learning_rate": 1.8673924881500826e-07, |
|
"loss": 0.2361, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.6792963464140733, |
|
"grad_norm": 1.3312582202000482, |
|
"learning_rate": 1.7209720654614793e-07, |
|
"loss": 0.2417, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.6928281461434374, |
|
"grad_norm": 1.279370227399727, |
|
"learning_rate": 1.580324220802959e-07, |
|
"loss": 0.2306, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.706359945872801, |
|
"grad_norm": 1.32387777543638, |
|
"learning_rate": 1.4454838310886427e-07, |
|
"loss": 0.2293, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.719891745602165, |
|
"grad_norm": 1.4245015480114729, |
|
"learning_rate": 1.3164843331387123e-07, |
|
"loss": 0.2311, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.733423545331529, |
|
"grad_norm": 1.3831803662571336, |
|
"learning_rate": 1.1933577153879993e-07, |
|
"loss": 0.2387, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.746955345060893, |
|
"grad_norm": 1.341165321378542, |
|
"learning_rate": 1.0761345099536691e-07, |
|
"loss": 0.2406, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.760487144790257, |
|
"grad_norm": 1.4225949777687765, |
|
"learning_rate": 9.648437850640929e-08, |
|
"loss": 0.2334, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.774018944519621, |
|
"grad_norm": 1.2903746437594332, |
|
"learning_rate": 8.595131378507038e-08, |
|
"loss": 0.2302, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.787550744248985, |
|
"grad_norm": 1.4459041867407572, |
|
"learning_rate": 7.601686875046338e-08, |
|
"loss": 0.2357, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.801082543978349, |
|
"grad_norm": 1.43510005352293, |
|
"learning_rate": 6.668350687998565e-08, |
|
"loss": 0.2354, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.814614343707713, |
|
"grad_norm": 1.3495037373612448, |
|
"learning_rate": 5.7953542598441215e-08, |
|
"loss": 0.2299, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.828146143437077, |
|
"grad_norm": 1.2822048194805191, |
|
"learning_rate": 4.9829140704127644e-08, |
|
"loss": 0.2304, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.841677943166441, |
|
"grad_norm": 1.4152294147048579, |
|
"learning_rate": 4.231231583201989e-08, |
|
"loss": 0.2373, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.855209742895805, |
|
"grad_norm": 1.4204373591003197, |
|
"learning_rate": 3.5404931954197696e-08, |
|
"loss": 0.2359, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.8687415426251692, |
|
"grad_norm": 1.3793380734018237, |
|
"learning_rate": 2.9108701917630043e-08, |
|
"loss": 0.2447, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.8822733423545333, |
|
"grad_norm": 1.2804260757138324, |
|
"learning_rate": 2.3425187019432415e-08, |
|
"loss": 0.2446, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.8958051420838973, |
|
"grad_norm": 1.3777377768354557, |
|
"learning_rate": 1.8355796619708988e-08, |
|
"loss": 0.2367, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.9093369418132613, |
|
"grad_norm": 1.3764369341392175, |
|
"learning_rate": 1.39017877920683e-08, |
|
"loss": 0.2357, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.9228687415426253, |
|
"grad_norm": 1.4731353222848615, |
|
"learning_rate": 1.006426501190233e-08, |
|
"loss": 0.2355, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.936400541271989, |
|
"grad_norm": 1.3695944357270635, |
|
"learning_rate": 6.844179882506685e-09, |
|
"loss": 0.2312, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.949932341001353, |
|
"grad_norm": 1.3973969195223515, |
|
"learning_rate": 4.242330899106861e-09, |
|
"loss": 0.2403, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.963464140730717, |
|
"grad_norm": 1.330953340046642, |
|
"learning_rate": 2.259363250854685e-09, |
|
"loss": 0.2264, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.976995940460081, |
|
"grad_norm": 1.305324417569301, |
|
"learning_rate": 8.957686608371263e-10, |
|
"loss": 0.2328, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.990527740189445, |
|
"grad_norm": 1.349805214463894, |
|
"learning_rate": 1.5188526414244842e-10, |
|
"loss": 0.2348, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 2217, |
|
"total_flos": 438250533027840.0, |
|
"train_loss": 0.3635299830593717, |
|
"train_runtime": 20239.4479, |
|
"train_samples_per_second": 14.013, |
|
"train_steps_per_second": 0.11 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2217, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 10086, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 438250533027840.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|