|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1656, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0006038647342995169, |
|
"grad_norm": 12.983106547511849, |
|
"learning_rate": 6.02409638554217e-08, |
|
"loss": 1.5929, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0030193236714975845, |
|
"grad_norm": 13.260146238671595, |
|
"learning_rate": 3.0120481927710845e-07, |
|
"loss": 1.5587, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.006038647342995169, |
|
"grad_norm": 11.79639694358591, |
|
"learning_rate": 6.024096385542169e-07, |
|
"loss": 1.5735, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.009057971014492754, |
|
"grad_norm": 7.277578840066492, |
|
"learning_rate": 9.036144578313254e-07, |
|
"loss": 1.5311, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.012077294685990338, |
|
"grad_norm": 2.769327318468503, |
|
"learning_rate": 1.2048192771084338e-06, |
|
"loss": 1.4438, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.015096618357487922, |
|
"grad_norm": 2.665518311227292, |
|
"learning_rate": 1.5060240963855425e-06, |
|
"loss": 1.3911, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.018115942028985508, |
|
"grad_norm": 1.8820740815152255, |
|
"learning_rate": 1.8072289156626508e-06, |
|
"loss": 1.3317, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.021135265700483092, |
|
"grad_norm": 1.6047068311406143, |
|
"learning_rate": 2.1084337349397595e-06, |
|
"loss": 1.2409, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.024154589371980676, |
|
"grad_norm": 1.3473128041783777, |
|
"learning_rate": 2.4096385542168676e-06, |
|
"loss": 1.1865, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02717391304347826, |
|
"grad_norm": 0.8992074365325567, |
|
"learning_rate": 2.710843373493976e-06, |
|
"loss": 1.1272, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.030193236714975844, |
|
"grad_norm": 0.828221042015975, |
|
"learning_rate": 3.012048192771085e-06, |
|
"loss": 1.0992, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03321256038647343, |
|
"grad_norm": 0.7722295149097933, |
|
"learning_rate": 3.313253012048193e-06, |
|
"loss": 1.077, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.036231884057971016, |
|
"grad_norm": 0.7262180334499571, |
|
"learning_rate": 3.6144578313253016e-06, |
|
"loss": 1.0604, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0392512077294686, |
|
"grad_norm": 0.7658586617349729, |
|
"learning_rate": 3.91566265060241e-06, |
|
"loss": 1.0581, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.042270531400966184, |
|
"grad_norm": 0.7379283258123409, |
|
"learning_rate": 4.216867469879519e-06, |
|
"loss": 1.0455, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04528985507246377, |
|
"grad_norm": 0.7009661807319135, |
|
"learning_rate": 4.518072289156627e-06, |
|
"loss": 1.0437, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.04830917874396135, |
|
"grad_norm": 0.7478867629078089, |
|
"learning_rate": 4.819277108433735e-06, |
|
"loss": 1.0209, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.051328502415458936, |
|
"grad_norm": 0.7441839879764262, |
|
"learning_rate": 5.120481927710844e-06, |
|
"loss": 1.0119, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.05434782608695652, |
|
"grad_norm": 0.7698711195458738, |
|
"learning_rate": 5.421686746987952e-06, |
|
"loss": 1.0097, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.057367149758454104, |
|
"grad_norm": 0.7510808966928294, |
|
"learning_rate": 5.722891566265061e-06, |
|
"loss": 1.0022, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.06038647342995169, |
|
"grad_norm": 0.7895560616147937, |
|
"learning_rate": 6.02409638554217e-06, |
|
"loss": 1.0129, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06340579710144928, |
|
"grad_norm": 0.7650744315430823, |
|
"learning_rate": 6.325301204819277e-06, |
|
"loss": 0.9836, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.06642512077294686, |
|
"grad_norm": 0.7656590839025053, |
|
"learning_rate": 6.626506024096386e-06, |
|
"loss": 0.9863, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06944444444444445, |
|
"grad_norm": 0.7358171567450749, |
|
"learning_rate": 6.927710843373494e-06, |
|
"loss": 0.9901, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.07246376811594203, |
|
"grad_norm": 0.8007259705758099, |
|
"learning_rate": 7.228915662650603e-06, |
|
"loss": 0.9498, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07548309178743962, |
|
"grad_norm": 0.7867873992695736, |
|
"learning_rate": 7.530120481927712e-06, |
|
"loss": 0.9678, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.0785024154589372, |
|
"grad_norm": 0.7713133158468599, |
|
"learning_rate": 7.83132530120482e-06, |
|
"loss": 0.9574, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.08152173913043478, |
|
"grad_norm": 0.8073513926091085, |
|
"learning_rate": 8.132530120481928e-06, |
|
"loss": 0.9572, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.08454106280193237, |
|
"grad_norm": 1.3178364160478768, |
|
"learning_rate": 8.433734939759038e-06, |
|
"loss": 0.9362, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08756038647342995, |
|
"grad_norm": 0.817319025944233, |
|
"learning_rate": 8.734939759036145e-06, |
|
"loss": 0.9559, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.09057971014492754, |
|
"grad_norm": 0.9017519299904656, |
|
"learning_rate": 9.036144578313254e-06, |
|
"loss": 0.9318, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.09359903381642512, |
|
"grad_norm": 0.7674041844083781, |
|
"learning_rate": 9.337349397590362e-06, |
|
"loss": 0.9201, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.0966183574879227, |
|
"grad_norm": 0.7983502874028157, |
|
"learning_rate": 9.63855421686747e-06, |
|
"loss": 0.9226, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09963768115942029, |
|
"grad_norm": 0.83207644117932, |
|
"learning_rate": 9.93975903614458e-06, |
|
"loss": 0.9184, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.10265700483091787, |
|
"grad_norm": 0.8377158508227592, |
|
"learning_rate": 9.999822178354131e-06, |
|
"loss": 0.9236, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.10567632850241546, |
|
"grad_norm": 0.9471244349318043, |
|
"learning_rate": 9.999099799595088e-06, |
|
"loss": 0.9025, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.10869565217391304, |
|
"grad_norm": 1.0575609941440907, |
|
"learning_rate": 9.997821830092095e-06, |
|
"loss": 0.8809, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.11171497584541062, |
|
"grad_norm": 0.8575976269736529, |
|
"learning_rate": 9.995988411876328e-06, |
|
"loss": 0.8897, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.11473429951690821, |
|
"grad_norm": 0.9091932951623557, |
|
"learning_rate": 9.993599748710505e-06, |
|
"loss": 0.886, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.11775362318840579, |
|
"grad_norm": 0.8046734387056934, |
|
"learning_rate": 9.990656106066257e-06, |
|
"loss": 0.8686, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.12077294685990338, |
|
"grad_norm": 0.8525822568043689, |
|
"learning_rate": 9.9871578110946e-06, |
|
"loss": 0.8757, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12379227053140096, |
|
"grad_norm": 0.830408259403725, |
|
"learning_rate": 9.983105252589599e-06, |
|
"loss": 0.8577, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.12681159420289856, |
|
"grad_norm": 0.8432806800750737, |
|
"learning_rate": 9.978498880945138e-06, |
|
"loss": 0.8551, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12983091787439613, |
|
"grad_norm": 0.786271761268233, |
|
"learning_rate": 9.97333920810488e-06, |
|
"loss": 0.8621, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.13285024154589373, |
|
"grad_norm": 0.796593756792945, |
|
"learning_rate": 9.967626807505359e-06, |
|
"loss": 0.8565, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.1358695652173913, |
|
"grad_norm": 0.746917425620166, |
|
"learning_rate": 9.961362314012258e-06, |
|
"loss": 0.8505, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.1388888888888889, |
|
"grad_norm": 0.8349781723797338, |
|
"learning_rate": 9.954546423849842e-06, |
|
"loss": 0.8422, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.14190821256038647, |
|
"grad_norm": 0.7790591361055157, |
|
"learning_rate": 9.947179894523594e-06, |
|
"loss": 0.8328, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.14492753623188406, |
|
"grad_norm": 0.767228136687293, |
|
"learning_rate": 9.93926354473601e-06, |
|
"loss": 0.8295, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.14794685990338163, |
|
"grad_norm": 0.7961627962502099, |
|
"learning_rate": 9.930798254295628e-06, |
|
"loss": 0.8519, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.15096618357487923, |
|
"grad_norm": 0.7643126730967259, |
|
"learning_rate": 9.921784964019234e-06, |
|
"loss": 0.8277, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1539855072463768, |
|
"grad_norm": 0.7916711244024157, |
|
"learning_rate": 9.91222467562731e-06, |
|
"loss": 0.8413, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.1570048309178744, |
|
"grad_norm": 0.7566462648042606, |
|
"learning_rate": 9.902118451632694e-06, |
|
"loss": 0.8396, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.16002415458937197, |
|
"grad_norm": 0.7231104384801381, |
|
"learning_rate": 9.891467415222511e-06, |
|
"loss": 0.8236, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.16304347826086957, |
|
"grad_norm": 0.7475453602566374, |
|
"learning_rate": 9.880272750133328e-06, |
|
"loss": 0.8202, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.16606280193236714, |
|
"grad_norm": 0.7356037530580047, |
|
"learning_rate": 9.868535700519605e-06, |
|
"loss": 0.8254, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.16908212560386474, |
|
"grad_norm": 0.7098295832189967, |
|
"learning_rate": 9.856257570815415e-06, |
|
"loss": 0.8223, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.1721014492753623, |
|
"grad_norm": 0.7371095836853645, |
|
"learning_rate": 9.843439725589481e-06, |
|
"loss": 0.8264, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.1751207729468599, |
|
"grad_norm": 0.7319443665926877, |
|
"learning_rate": 9.83008358939351e-06, |
|
"loss": 0.8091, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.17814009661835747, |
|
"grad_norm": 0.7200925017038792, |
|
"learning_rate": 9.81619064660388e-06, |
|
"loss": 0.802, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.18115942028985507, |
|
"grad_norm": 0.691320410076784, |
|
"learning_rate": 9.801762441256663e-06, |
|
"loss": 0.8167, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.18417874396135267, |
|
"grad_norm": 0.7122816261812357, |
|
"learning_rate": 9.786800576876026e-06, |
|
"loss": 0.8061, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.18719806763285024, |
|
"grad_norm": 0.6987904560271868, |
|
"learning_rate": 9.77130671629602e-06, |
|
"loss": 0.8101, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.19021739130434784, |
|
"grad_norm": 0.7458807360734803, |
|
"learning_rate": 9.755282581475769e-06, |
|
"loss": 0.8106, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.1932367149758454, |
|
"grad_norm": 0.7412678256444616, |
|
"learning_rate": 9.738729953308104e-06, |
|
"loss": 0.8238, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.196256038647343, |
|
"grad_norm": 0.7383776506829391, |
|
"learning_rate": 9.72165067142163e-06, |
|
"loss": 0.8107, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.19927536231884058, |
|
"grad_norm": 0.6870198763568195, |
|
"learning_rate": 9.70404663397628e-06, |
|
"loss": 0.8133, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.20229468599033817, |
|
"grad_norm": 0.7353454455310534, |
|
"learning_rate": 9.68591979745235e-06, |
|
"loss": 0.8014, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.20531400966183574, |
|
"grad_norm": 0.6831519191417884, |
|
"learning_rate": 9.667272176433063e-06, |
|
"loss": 0.7968, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.20833333333333334, |
|
"grad_norm": 0.6847262031072842, |
|
"learning_rate": 9.648105843380674e-06, |
|
"loss": 0.811, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.2113526570048309, |
|
"grad_norm": 0.7475440150871968, |
|
"learning_rate": 9.628422928406133e-06, |
|
"loss": 0.8038, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.2143719806763285, |
|
"grad_norm": 0.7261579774011471, |
|
"learning_rate": 9.608225619032361e-06, |
|
"loss": 0.805, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.21739130434782608, |
|
"grad_norm": 0.6971431141410696, |
|
"learning_rate": 9.587516159951118e-06, |
|
"loss": 0.7923, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.22041062801932368, |
|
"grad_norm": 0.6668789906175726, |
|
"learning_rate": 9.566296852773541e-06, |
|
"loss": 0.797, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.22342995169082125, |
|
"grad_norm": 0.6993288716876052, |
|
"learning_rate": 9.544570055774348e-06, |
|
"loss": 0.8031, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.22644927536231885, |
|
"grad_norm": 0.6998160923378796, |
|
"learning_rate": 9.522338183629737e-06, |
|
"loss": 0.8061, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.22946859903381642, |
|
"grad_norm": 0.676468448981335, |
|
"learning_rate": 9.499603707149035e-06, |
|
"loss": 0.7972, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.23248792270531402, |
|
"grad_norm": 0.691768375741505, |
|
"learning_rate": 9.476369153000076e-06, |
|
"loss": 0.7937, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.23550724637681159, |
|
"grad_norm": 0.69459059647659, |
|
"learning_rate": 9.45263710342842e-06, |
|
"loss": 0.8009, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.23852657004830918, |
|
"grad_norm": 0.7295734841939806, |
|
"learning_rate": 9.428410195970337e-06, |
|
"loss": 0.7843, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.24154589371980675, |
|
"grad_norm": 0.707734024855164, |
|
"learning_rate": 9.403691123159707e-06, |
|
"loss": 0.7992, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.24456521739130435, |
|
"grad_norm": 0.7333982393973975, |
|
"learning_rate": 9.378482632228745e-06, |
|
"loss": 0.8037, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.24758454106280192, |
|
"grad_norm": 0.70227413392279, |
|
"learning_rate": 9.352787524802707e-06, |
|
"loss": 0.7937, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.2506038647342995, |
|
"grad_norm": 0.6884233411609633, |
|
"learning_rate": 9.326608656588502e-06, |
|
"loss": 0.7746, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.2536231884057971, |
|
"grad_norm": 0.6980055553926061, |
|
"learning_rate": 9.299948937057325e-06, |
|
"loss": 0.7831, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.25664251207729466, |
|
"grad_norm": 0.6886842350638906, |
|
"learning_rate": 9.272811329121305e-06, |
|
"loss": 0.7782, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.25966183574879226, |
|
"grad_norm": 0.6774682617588125, |
|
"learning_rate": 9.245198848804197e-06, |
|
"loss": 0.7676, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.26268115942028986, |
|
"grad_norm": 0.6594148286491368, |
|
"learning_rate": 9.217114564906208e-06, |
|
"loss": 0.78, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.26570048309178745, |
|
"grad_norm": 0.719815019827257, |
|
"learning_rate": 9.188561598662921e-06, |
|
"loss": 0.7677, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.26871980676328505, |
|
"grad_norm": 0.7034720888358402, |
|
"learning_rate": 9.159543123398416e-06, |
|
"loss": 0.7909, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.2717391304347826, |
|
"grad_norm": 0.7146788499687661, |
|
"learning_rate": 9.130062364172582e-06, |
|
"loss": 0.7832, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.2747584541062802, |
|
"grad_norm": 0.6785027397476343, |
|
"learning_rate": 9.1001225974227e-06, |
|
"loss": 0.7912, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.2777777777777778, |
|
"grad_norm": 0.6719234399237637, |
|
"learning_rate": 9.0697271505993e-06, |
|
"loss": 0.7918, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.2807971014492754, |
|
"grad_norm": 0.7023247170996745, |
|
"learning_rate": 9.038879401796358e-06, |
|
"loss": 0.7809, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.28381642512077293, |
|
"grad_norm": 0.696493093698672, |
|
"learning_rate": 9.00758277937586e-06, |
|
"loss": 0.7794, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.28683574879227053, |
|
"grad_norm": 0.6846368584631723, |
|
"learning_rate": 8.975840761586772e-06, |
|
"loss": 0.7734, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.2898550724637681, |
|
"grad_norm": 0.7102623044885478, |
|
"learning_rate": 8.94365687617849e-06, |
|
"loss": 0.7781, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2928743961352657, |
|
"grad_norm": 0.7086900307853392, |
|
"learning_rate": 8.911034700008757e-06, |
|
"loss": 0.7764, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.29589371980676327, |
|
"grad_norm": 0.6832022733750703, |
|
"learning_rate": 8.87797785864615e-06, |
|
"loss": 0.7791, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.29891304347826086, |
|
"grad_norm": 0.6784895232664829, |
|
"learning_rate": 8.844490025967126e-06, |
|
"loss": 0.7734, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.30193236714975846, |
|
"grad_norm": 0.7325432498510648, |
|
"learning_rate": 8.810574923747729e-06, |
|
"loss": 0.7638, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.30495169082125606, |
|
"grad_norm": 0.6736300693382467, |
|
"learning_rate": 8.776236321249955e-06, |
|
"loss": 0.7848, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.3079710144927536, |
|
"grad_norm": 0.7199354344226971, |
|
"learning_rate": 8.741478034802835e-06, |
|
"loss": 0.7672, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.3109903381642512, |
|
"grad_norm": 0.7181172806743141, |
|
"learning_rate": 8.706303927378306e-06, |
|
"loss": 0.7726, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.3140096618357488, |
|
"grad_norm": 0.6981681510152888, |
|
"learning_rate": 8.670717908161878e-06, |
|
"loss": 0.7815, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.3170289855072464, |
|
"grad_norm": 0.6999219057435419, |
|
"learning_rate": 8.634723932118184e-06, |
|
"loss": 0.7766, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.32004830917874394, |
|
"grad_norm": 0.6720516411704228, |
|
"learning_rate": 8.598325999551425e-06, |
|
"loss": 0.7774, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.32306763285024154, |
|
"grad_norm": 0.6991878290864751, |
|
"learning_rate": 8.56152815566078e-06, |
|
"loss": 0.7659, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.32608695652173914, |
|
"grad_norm": 1.1363114984267308, |
|
"learning_rate": 8.524334490090848e-06, |
|
"loss": 0.7669, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.32910628019323673, |
|
"grad_norm": 0.7170057225271362, |
|
"learning_rate": 8.486749136477112e-06, |
|
"loss": 0.7607, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.3321256038647343, |
|
"grad_norm": 0.7151461285207649, |
|
"learning_rate": 8.448776271986542e-06, |
|
"loss": 0.7556, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.3351449275362319, |
|
"grad_norm": 0.7080653566142882, |
|
"learning_rate": 8.41042011685336e-06, |
|
"loss": 0.7801, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.33816425120772947, |
|
"grad_norm": 0.7211038917950292, |
|
"learning_rate": 8.371684933909996e-06, |
|
"loss": 0.7819, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.34118357487922707, |
|
"grad_norm": 0.6809572707011691, |
|
"learning_rate": 8.33257502811334e-06, |
|
"loss": 0.7623, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.3442028985507246, |
|
"grad_norm": 0.705758884996601, |
|
"learning_rate": 8.293094746066283e-06, |
|
"loss": 0.7806, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3472222222222222, |
|
"grad_norm": 0.7010425860566304, |
|
"learning_rate": 8.253248475534656e-06, |
|
"loss": 0.7579, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.3502415458937198, |
|
"grad_norm": 0.6961950426104488, |
|
"learning_rate": 8.213040644959572e-06, |
|
"loss": 0.7563, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.3532608695652174, |
|
"grad_norm": 0.7380978491549621, |
|
"learning_rate": 8.172475722965263e-06, |
|
"loss": 0.748, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.35628019323671495, |
|
"grad_norm": 0.6880280880233399, |
|
"learning_rate": 8.131558217862444e-06, |
|
"loss": 0.7464, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.35929951690821255, |
|
"grad_norm": 0.6923751245984867, |
|
"learning_rate": 8.090292677147268e-06, |
|
"loss": 0.7654, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.36231884057971014, |
|
"grad_norm": 0.6875828804508053, |
|
"learning_rate": 8.048683686995921e-06, |
|
"loss": 0.7575, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.36533816425120774, |
|
"grad_norm": 0.7271877934985621, |
|
"learning_rate": 8.006735871754932e-06, |
|
"loss": 0.7521, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.36835748792270534, |
|
"grad_norm": 0.6919263020022135, |
|
"learning_rate": 7.96445389342722e-06, |
|
"loss": 0.7522, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.3713768115942029, |
|
"grad_norm": 0.7079839592582174, |
|
"learning_rate": 7.921842451153982e-06, |
|
"loss": 0.7624, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.3743961352657005, |
|
"grad_norm": 0.7185016239152293, |
|
"learning_rate": 7.878906280692424e-06, |
|
"loss": 0.7638, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.3774154589371981, |
|
"grad_norm": 0.6692678069942056, |
|
"learning_rate": 7.835650153889449e-06, |
|
"loss": 0.7599, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.3804347826086957, |
|
"grad_norm": 0.6901273422065264, |
|
"learning_rate": 7.792078878151318e-06, |
|
"loss": 0.7438, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.3834541062801932, |
|
"grad_norm": 0.7025129368063263, |
|
"learning_rate": 7.748197295909359e-06, |
|
"loss": 0.7544, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.3864734299516908, |
|
"grad_norm": 0.7452661788017922, |
|
"learning_rate": 7.704010284081801e-06, |
|
"loss": 0.7542, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.3894927536231884, |
|
"grad_norm": 0.6838884146151244, |
|
"learning_rate": 7.65952275353175e-06, |
|
"loss": 0.7421, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.392512077294686, |
|
"grad_norm": 0.7050616814337126, |
|
"learning_rate": 7.614739648521412e-06, |
|
"loss": 0.7455, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.39553140096618356, |
|
"grad_norm": 0.7027226698051754, |
|
"learning_rate": 7.56966594616259e-06, |
|
"loss": 0.7526, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.39855072463768115, |
|
"grad_norm": 0.7289432071080068, |
|
"learning_rate": 7.524306655863544e-06, |
|
"loss": 0.7586, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.40157004830917875, |
|
"grad_norm": 0.70492218137627, |
|
"learning_rate": 7.478666818772252e-06, |
|
"loss": 0.7517, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.40458937198067635, |
|
"grad_norm": 0.6844740192077617, |
|
"learning_rate": 7.432751507216146e-06, |
|
"loss": 0.7515, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.4076086956521739, |
|
"grad_norm": 0.7034634050055147, |
|
"learning_rate": 7.386565824138378e-06, |
|
"loss": 0.7389, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.4106280193236715, |
|
"grad_norm": 0.7020675334607289, |
|
"learning_rate": 7.3401149025306995e-06, |
|
"loss": 0.7573, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.4136473429951691, |
|
"grad_norm": 0.7185467171376326, |
|
"learning_rate": 7.293403904862981e-06, |
|
"loss": 0.7525, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.4166666666666667, |
|
"grad_norm": 0.6904148869691911, |
|
"learning_rate": 7.246438022509465e-06, |
|
"loss": 0.7394, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.4196859903381642, |
|
"grad_norm": 0.721590691620625, |
|
"learning_rate": 7.199222475171812e-06, |
|
"loss": 0.7539, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.4227053140096618, |
|
"grad_norm": 0.6707901440948312, |
|
"learning_rate": 7.151762510298985e-06, |
|
"loss": 0.7364, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4257246376811594, |
|
"grad_norm": 0.6984624075001936, |
|
"learning_rate": 7.104063402504065e-06, |
|
"loss": 0.7475, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.428743961352657, |
|
"grad_norm": 0.6893220380004709, |
|
"learning_rate": 7.056130452978039e-06, |
|
"loss": 0.7381, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.43176328502415456, |
|
"grad_norm": 0.6680081009254123, |
|
"learning_rate": 7.0079689889006275e-06, |
|
"loss": 0.7277, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.43478260869565216, |
|
"grad_norm": 0.7193168024456026, |
|
"learning_rate": 6.959584362848239e-06, |
|
"loss": 0.7366, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.43780193236714976, |
|
"grad_norm": 0.6897586291398505, |
|
"learning_rate": 6.910981952199097e-06, |
|
"loss": 0.741, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.44082125603864736, |
|
"grad_norm": 0.6942232877339098, |
|
"learning_rate": 6.862167158535599e-06, |
|
"loss": 0.7322, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.4438405797101449, |
|
"grad_norm": 0.6892818670366425, |
|
"learning_rate": 6.813145407044003e-06, |
|
"loss": 0.7245, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.4468599033816425, |
|
"grad_norm": 0.7158811742929143, |
|
"learning_rate": 6.763922145911474e-06, |
|
"loss": 0.7386, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.4498792270531401, |
|
"grad_norm": 0.6973013952551397, |
|
"learning_rate": 6.714502845720595e-06, |
|
"loss": 0.7405, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.4528985507246377, |
|
"grad_norm": 0.7452913378431993, |
|
"learning_rate": 6.664892998841361e-06, |
|
"loss": 0.7269, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.4559178743961353, |
|
"grad_norm": 0.6976841641342625, |
|
"learning_rate": 6.61509811882078e-06, |
|
"loss": 0.7265, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.45893719806763283, |
|
"grad_norm": 0.7477615859131558, |
|
"learning_rate": 6.565123739770102e-06, |
|
"loss": 0.7276, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.46195652173913043, |
|
"grad_norm": 0.7014375922011533, |
|
"learning_rate": 6.5149754157497645e-06, |
|
"loss": 0.7325, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.46497584541062803, |
|
"grad_norm": 0.7005292253227544, |
|
"learning_rate": 6.464658720152135e-06, |
|
"loss": 0.7225, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.46799516908212563, |
|
"grad_norm": 0.6943536700561682, |
|
"learning_rate": 6.41417924508208e-06, |
|
"loss": 0.7467, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.47101449275362317, |
|
"grad_norm": 0.7080470812325974, |
|
"learning_rate": 6.363542600735486e-06, |
|
"loss": 0.7247, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.47403381642512077, |
|
"grad_norm": 0.7489724802271261, |
|
"learning_rate": 6.312754414775737e-06, |
|
"loss": 0.7364, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.47705314009661837, |
|
"grad_norm": 0.7162417629371682, |
|
"learning_rate": 6.261820331708275e-06, |
|
"loss": 0.7443, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.48007246376811596, |
|
"grad_norm": 0.7148119129623506, |
|
"learning_rate": 6.210746012253277e-06, |
|
"loss": 0.7413, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.4830917874396135, |
|
"grad_norm": 0.735396159989601, |
|
"learning_rate": 6.159537132716532e-06, |
|
"loss": 0.7177, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.4861111111111111, |
|
"grad_norm": 0.6991347342976139, |
|
"learning_rate": 6.108199384358595e-06, |
|
"loss": 0.7236, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.4891304347826087, |
|
"grad_norm": 0.7041418452567949, |
|
"learning_rate": 6.0567384727622566e-06, |
|
"loss": 0.7248, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.4921497584541063, |
|
"grad_norm": 0.7260414438520773, |
|
"learning_rate": 6.005160117198448e-06, |
|
"loss": 0.7361, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.49516908212560384, |
|
"grad_norm": 0.7168927241401117, |
|
"learning_rate": 5.953470049990605e-06, |
|
"loss": 0.7244, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.49818840579710144, |
|
"grad_norm": 0.7099109469035322, |
|
"learning_rate": 5.90167401587759e-06, |
|
"loss": 0.726, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.501207729468599, |
|
"grad_norm": 0.703918614629836, |
|
"learning_rate": 5.84977777137523e-06, |
|
"loss": 0.7192, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.5042270531400966, |
|
"grad_norm": 0.7334401309257407, |
|
"learning_rate": 5.797787084136556e-06, |
|
"loss": 0.7316, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.5072463768115942, |
|
"grad_norm": 0.7182027362178113, |
|
"learning_rate": 5.745707732310781e-06, |
|
"loss": 0.7281, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.5102657004830918, |
|
"grad_norm": 0.7174224552847884, |
|
"learning_rate": 5.693545503901149e-06, |
|
"loss": 0.7246, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.5132850241545893, |
|
"grad_norm": 0.7156314475885969, |
|
"learning_rate": 5.641306196121643e-06, |
|
"loss": 0.725, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.5163043478260869, |
|
"grad_norm": 0.7364143027978156, |
|
"learning_rate": 5.5889956147527156e-06, |
|
"loss": 0.7213, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.5193236714975845, |
|
"grad_norm": 0.7655767182130154, |
|
"learning_rate": 5.536619573496027e-06, |
|
"loss": 0.7243, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.5223429951690821, |
|
"grad_norm": 0.7155331967411032, |
|
"learning_rate": 5.484183893328332e-06, |
|
"loss": 0.7238, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.5253623188405797, |
|
"grad_norm": 0.7094631414851184, |
|
"learning_rate": 5.431694401854545e-06, |
|
"loss": 0.7022, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.5283816425120773, |
|
"grad_norm": 0.701366365553249, |
|
"learning_rate": 5.379156932660067e-06, |
|
"loss": 0.7196, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.5314009661835749, |
|
"grad_norm": 0.690941783111536, |
|
"learning_rate": 5.326577324662459e-06, |
|
"loss": 0.7128, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.5344202898550725, |
|
"grad_norm": 0.7003221590794428, |
|
"learning_rate": 5.273961421462505e-06, |
|
"loss": 0.726, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.5374396135265701, |
|
"grad_norm": 0.6987939906127779, |
|
"learning_rate": 5.221315070694775e-06, |
|
"loss": 0.7083, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.5404589371980676, |
|
"grad_norm": 0.7416781371272098, |
|
"learning_rate": 5.168644123377725e-06, |
|
"loss": 0.7056, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.5434782608695652, |
|
"grad_norm": 0.7259377894484753, |
|
"learning_rate": 5.1159544332634256e-06, |
|
"loss": 0.7224, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.5464975845410628, |
|
"grad_norm": 0.7502013953347851, |
|
"learning_rate": 5.063251856186991e-06, |
|
"loss": 0.7029, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.5495169082125604, |
|
"grad_norm": 0.704558454200649, |
|
"learning_rate": 5.010542249415761e-06, |
|
"loss": 0.7151, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.552536231884058, |
|
"grad_norm": 0.7097627470098892, |
|
"learning_rate": 4.95783147099835e-06, |
|
"loss": 0.7056, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 0.73696211091724, |
|
"learning_rate": 4.90512537911358e-06, |
|
"loss": 0.7162, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.5585748792270532, |
|
"grad_norm": 0.7233738791549937, |
|
"learning_rate": 4.852429831419428e-06, |
|
"loss": 0.7244, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.5615942028985508, |
|
"grad_norm": 0.7505624253150545, |
|
"learning_rate": 4.799750684402006e-06, |
|
"loss": 0.7059, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.5646135265700483, |
|
"grad_norm": 0.7189320855898085, |
|
"learning_rate": 4.747093792724679e-06, |
|
"loss": 0.7123, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.5676328502415459, |
|
"grad_norm": 0.7501646767976898, |
|
"learning_rate": 4.6944650085774095e-06, |
|
"loss": 0.7035, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.5706521739130435, |
|
"grad_norm": 0.7162880585290501, |
|
"learning_rate": 4.641870181026322e-06, |
|
"loss": 0.7179, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.5736714975845411, |
|
"grad_norm": 1.3960570086002833, |
|
"learning_rate": 4.589315155363683e-06, |
|
"loss": 0.7139, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5766908212560387, |
|
"grad_norm": 0.7062944321865234, |
|
"learning_rate": 4.53680577245824e-06, |
|
"loss": 0.704, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.5797101449275363, |
|
"grad_norm": 0.7185669957370462, |
|
"learning_rate": 4.484347868106097e-06, |
|
"loss": 0.7023, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.5827294685990339, |
|
"grad_norm": 0.7187434418978588, |
|
"learning_rate": 4.431947272382118e-06, |
|
"loss": 0.712, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.5857487922705314, |
|
"grad_norm": 0.7079462407351277, |
|
"learning_rate": 4.379609808992e-06, |
|
"loss": 0.7181, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.5887681159420289, |
|
"grad_norm": 0.7482705608146033, |
|
"learning_rate": 4.327341294625019e-06, |
|
"loss": 0.7023, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.5917874396135265, |
|
"grad_norm": 0.7490533325460069, |
|
"learning_rate": 4.275147538307594e-06, |
|
"loss": 0.7137, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.5948067632850241, |
|
"grad_norm": 0.7242674704325197, |
|
"learning_rate": 4.223034340757666e-06, |
|
"loss": 0.7031, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.5978260869565217, |
|
"grad_norm": 0.6939633292606721, |
|
"learning_rate": 4.171007493740023e-06, |
|
"loss": 0.6977, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.6008454106280193, |
|
"grad_norm": 0.7133068235360875, |
|
"learning_rate": 4.1190727794226175e-06, |
|
"loss": 0.7162, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.6038647342995169, |
|
"grad_norm": 0.754448396241569, |
|
"learning_rate": 4.067235969733937e-06, |
|
"loss": 0.6997, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6068840579710145, |
|
"grad_norm": 0.7010857082662448, |
|
"learning_rate": 4.015502825721537e-06, |
|
"loss": 0.7066, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.6099033816425121, |
|
"grad_norm": 0.7220837983225042, |
|
"learning_rate": 3.963879096911751e-06, |
|
"loss": 0.7086, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.6129227053140096, |
|
"grad_norm": 0.7251109346616071, |
|
"learning_rate": 3.91237052067072e-06, |
|
"loss": 0.7066, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.6159420289855072, |
|
"grad_norm": 0.72480781238386, |
|
"learning_rate": 3.860982821566729e-06, |
|
"loss": 0.7182, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.6189613526570048, |
|
"grad_norm": 3.2413869749561592, |
|
"learning_rate": 3.8097217107340107e-06, |
|
"loss": 0.6877, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.6219806763285024, |
|
"grad_norm": 0.7252475452320206, |
|
"learning_rate": 3.7585928852380025e-06, |
|
"loss": 0.6922, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 0.7225890571453955, |
|
"learning_rate": 3.7076020274421996e-06, |
|
"loss": 0.7111, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.6280193236714976, |
|
"grad_norm": 0.7398879600723324, |
|
"learning_rate": 3.6567548043766157e-06, |
|
"loss": 0.6938, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.6310386473429952, |
|
"grad_norm": 0.7320766149333374, |
|
"learning_rate": 3.6060568671079658e-06, |
|
"loss": 0.6994, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.6340579710144928, |
|
"grad_norm": 0.7313655291651611, |
|
"learning_rate": 3.5555138501116247e-06, |
|
"loss": 0.7059, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.6370772946859904, |
|
"grad_norm": 0.7250580708434604, |
|
"learning_rate": 3.5051313706453995e-06, |
|
"loss": 0.7086, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.6400966183574879, |
|
"grad_norm": 0.7365679502351182, |
|
"learning_rate": 3.4549150281252635e-06, |
|
"loss": 0.702, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.6431159420289855, |
|
"grad_norm": 0.7193521177653698, |
|
"learning_rate": 3.40487040350303e-06, |
|
"loss": 0.7027, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.6461352657004831, |
|
"grad_norm": 0.7308160022070641, |
|
"learning_rate": 3.355003058646105e-06, |
|
"loss": 0.717, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.6491545893719807, |
|
"grad_norm": 0.7141660531113965, |
|
"learning_rate": 3.305318535719343e-06, |
|
"loss": 0.6754, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.6521739130434783, |
|
"grad_norm": 0.7027978905252191, |
|
"learning_rate": 3.2558223565691104e-06, |
|
"loss": 0.6984, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.6551932367149759, |
|
"grad_norm": 0.733945795767204, |
|
"learning_rate": 3.2065200221095905e-06, |
|
"loss": 0.6916, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.6582125603864735, |
|
"grad_norm": 0.7409311097771059, |
|
"learning_rate": 3.1574170117114293e-06, |
|
"loss": 0.7043, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.6612318840579711, |
|
"grad_norm": 0.7161483851129942, |
|
"learning_rate": 3.1085187825927555e-06, |
|
"loss": 0.6941, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.6642512077294686, |
|
"grad_norm": 0.7744670260939269, |
|
"learning_rate": 3.0598307692126904e-06, |
|
"loss": 0.7011, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.6672705314009661, |
|
"grad_norm": 0.7237442202307557, |
|
"learning_rate": 3.0113583826673655e-06, |
|
"loss": 0.7007, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.6702898550724637, |
|
"grad_norm": 0.7450399768587923, |
|
"learning_rate": 2.9631070100885373e-06, |
|
"loss": 0.7107, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.6733091787439613, |
|
"grad_norm": 0.7517737488150603, |
|
"learning_rate": 2.915082014044883e-06, |
|
"loss": 0.7067, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.6763285024154589, |
|
"grad_norm": 0.7254996669994487, |
|
"learning_rate": 2.867288731946004e-06, |
|
"loss": 0.7006, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.6793478260869565, |
|
"grad_norm": 0.7421072709801319, |
|
"learning_rate": 2.8197324754492456e-06, |
|
"loss": 0.6946, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.6823671497584541, |
|
"grad_norm": 0.7531577228122275, |
|
"learning_rate": 2.7724185298693596e-06, |
|
"loss": 0.6893, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.6853864734299517, |
|
"grad_norm": 0.7295101380197203, |
|
"learning_rate": 2.7253521535911144e-06, |
|
"loss": 0.708, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.6884057971014492, |
|
"grad_norm": 0.7706880360337985, |
|
"learning_rate": 2.678538577484871e-06, |
|
"loss": 0.6986, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6914251207729468, |
|
"grad_norm": 0.7950469957317675, |
|
"learning_rate": 2.6319830043252616e-06, |
|
"loss": 0.6905, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.6944444444444444, |
|
"grad_norm": 0.7435238103186514, |
|
"learning_rate": 2.5856906082129313e-06, |
|
"loss": 0.705, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.697463768115942, |
|
"grad_norm": 0.7599556075082218, |
|
"learning_rate": 2.53966653399952e-06, |
|
"loss": 0.6885, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.7004830917874396, |
|
"grad_norm": 0.7576946845112895, |
|
"learning_rate": 2.4939158967158657e-06, |
|
"loss": 0.7052, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.7035024154589372, |
|
"grad_norm": 0.7524201974305594, |
|
"learning_rate": 2.448443781003527e-06, |
|
"loss": 0.6897, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.7065217391304348, |
|
"grad_norm": 0.7449907814909446, |
|
"learning_rate": 2.403255240549693e-06, |
|
"loss": 0.6965, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.7095410628019324, |
|
"grad_norm": 0.7506825678278848, |
|
"learning_rate": 2.3583552975255108e-06, |
|
"loss": 0.6826, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.7125603864734299, |
|
"grad_norm": 0.7427190867626492, |
|
"learning_rate": 2.313748942027956e-06, |
|
"loss": 0.6969, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.7155797101449275, |
|
"grad_norm": 0.7351464262809558, |
|
"learning_rate": 2.269441131525213e-06, |
|
"loss": 0.6848, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.7185990338164251, |
|
"grad_norm": 0.7679289820138984, |
|
"learning_rate": 2.225436790305733e-06, |
|
"loss": 0.6941, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.7216183574879227, |
|
"grad_norm": 0.7203078445690018, |
|
"learning_rate": 2.181740808930947e-06, |
|
"loss": 0.6949, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.7246376811594203, |
|
"grad_norm": 0.7480753757617126, |
|
"learning_rate": 2.1383580436917452e-06, |
|
"loss": 0.6853, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7276570048309179, |
|
"grad_norm": 0.7342063399543391, |
|
"learning_rate": 2.0952933160687456e-06, |
|
"loss": 0.688, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.7306763285024155, |
|
"grad_norm": 0.7421683945425792, |
|
"learning_rate": 2.052551412196456e-06, |
|
"loss": 0.6889, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.7336956521739131, |
|
"grad_norm": 0.7453132341130935, |
|
"learning_rate": 2.010137082331354e-06, |
|
"loss": 0.6835, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.7367149758454107, |
|
"grad_norm": 0.7382993949100215, |
|
"learning_rate": 1.96805504032393e-06, |
|
"loss": 0.6846, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.7397342995169082, |
|
"grad_norm": 0.725949395619062, |
|
"learning_rate": 1.9263099630948274e-06, |
|
"loss": 0.6903, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.7427536231884058, |
|
"grad_norm": 0.7405823327274087, |
|
"learning_rate": 1.8849064901150372e-06, |
|
"loss": 0.6855, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.7457729468599034, |
|
"grad_norm": 0.727724200500573, |
|
"learning_rate": 1.8438492228902893e-06, |
|
"loss": 0.6872, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.748792270531401, |
|
"grad_norm": 0.7535334426389806, |
|
"learning_rate": 1.8031427244496357e-06, |
|
"loss": 0.6829, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.7518115942028986, |
|
"grad_norm": 0.7659798574727297, |
|
"learning_rate": 1.7627915188383382e-06, |
|
"loss": 0.6959, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.7548309178743962, |
|
"grad_norm": 0.7326466364617145, |
|
"learning_rate": 1.7228000906150672e-06, |
|
"loss": 0.6877, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.7578502415458938, |
|
"grad_norm": 0.7382801872464264, |
|
"learning_rate": 1.6831728843534962e-06, |
|
"loss": 0.6823, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.7608695652173914, |
|
"grad_norm": 0.752216196056158, |
|
"learning_rate": 1.6439143041483352e-06, |
|
"loss": 0.6934, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.7638888888888888, |
|
"grad_norm": 0.7525714660213496, |
|
"learning_rate": 1.6050287131258862e-06, |
|
"loss": 0.677, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.7669082125603864, |
|
"grad_norm": 0.7264048871030653, |
|
"learning_rate": 1.5665204329591066e-06, |
|
"loss": 0.6724, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.769927536231884, |
|
"grad_norm": 0.8059548153091458, |
|
"learning_rate": 1.528393743387328e-06, |
|
"loss": 0.6827, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.7729468599033816, |
|
"grad_norm": 0.713705103268977, |
|
"learning_rate": 1.4906528817406052e-06, |
|
"loss": 0.6877, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.7759661835748792, |
|
"grad_norm": 0.7174894260477515, |
|
"learning_rate": 1.453302042468786e-06, |
|
"loss": 0.6747, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.7789855072463768, |
|
"grad_norm": 0.7505147750107104, |
|
"learning_rate": 1.4163453766753537e-06, |
|
"loss": 0.6813, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.7820048309178744, |
|
"grad_norm": 0.7257832334807974, |
|
"learning_rate": 1.3797869916560692e-06, |
|
"loss": 0.676, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.785024154589372, |
|
"grad_norm": 0.7311403168001216, |
|
"learning_rate": 1.3436309504425137e-06, |
|
"loss": 0.6834, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.7880434782608695, |
|
"grad_norm": 0.7776182112232197, |
|
"learning_rate": 1.3078812713505079e-06, |
|
"loss": 0.6882, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.7910628019323671, |
|
"grad_norm": 0.7460577014828186, |
|
"learning_rate": 1.2725419275335404e-06, |
|
"loss": 0.6883, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.7940821256038647, |
|
"grad_norm": 0.7681283165774995, |
|
"learning_rate": 1.237616846541192e-06, |
|
"loss": 0.6854, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.7971014492753623, |
|
"grad_norm": 0.7519891668239808, |
|
"learning_rate": 1.2031099098826376e-06, |
|
"loss": 0.6889, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.8001207729468599, |
|
"grad_norm": 0.7499150329840207, |
|
"learning_rate": 1.1690249525952569e-06, |
|
"loss": 0.6886, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.8031400966183575, |
|
"grad_norm": 0.7336555062881699, |
|
"learning_rate": 1.1353657628184217e-06, |
|
"loss": 0.6665, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.8061594202898551, |
|
"grad_norm": 0.747198848048635, |
|
"learning_rate": 1.1021360813724924e-06, |
|
"loss": 0.6887, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.8091787439613527, |
|
"grad_norm": 0.7494833783636606, |
|
"learning_rate": 1.0693396013430552e-06, |
|
"loss": 0.6863, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.8121980676328503, |
|
"grad_norm": 0.762604670610555, |
|
"learning_rate": 1.036979967670494e-06, |
|
"loss": 0.691, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.8152173913043478, |
|
"grad_norm": 0.7514022588829053, |
|
"learning_rate": 1.0050607767448928e-06, |
|
"loss": 0.6721, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.8182367149758454, |
|
"grad_norm": 0.7556434497258411, |
|
"learning_rate": 9.735855760063412e-07, |
|
"loss": 0.6865, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.821256038647343, |
|
"grad_norm": 0.7654410426606472, |
|
"learning_rate": 9.425578635506721e-07, |
|
"loss": 0.6779, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.8242753623188406, |
|
"grad_norm": 0.7454705942741552, |
|
"learning_rate": 9.119810877406998e-07, |
|
"loss": 0.6833, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.8272946859903382, |
|
"grad_norm": 0.7683757765928639, |
|
"learning_rate": 8.818586468229695e-07, |
|
"loss": 0.683, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.8303140096618358, |
|
"grad_norm": 0.7518004833253231, |
|
"learning_rate": 8.521938885500825e-07, |
|
"loss": 0.6817, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 0.7391679840808719, |
|
"learning_rate": 8.229901098086335e-07, |
|
"loss": 0.6834, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.836352657004831, |
|
"grad_norm": 0.7265358575650989, |
|
"learning_rate": 7.942505562528024e-07, |
|
"loss": 0.6828, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.8393719806763285, |
|
"grad_norm": 0.7475584495954996, |
|
"learning_rate": 7.659784219436373e-07, |
|
"loss": 0.6688, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.842391304347826, |
|
"grad_norm": 0.7491195632530819, |
|
"learning_rate": 7.381768489940678e-07, |
|
"loss": 0.6675, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.8454106280193237, |
|
"grad_norm": 0.7526229496466609, |
|
"learning_rate": 7.108489272197089e-07, |
|
"loss": 0.6836, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.8484299516908212, |
|
"grad_norm": 0.7346071246989833, |
|
"learning_rate": 6.839976937954479e-07, |
|
"loss": 0.6844, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.8514492753623188, |
|
"grad_norm": 0.7472181278592462, |
|
"learning_rate": 6.576261329179123e-07, |
|
"loss": 0.6802, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.8544685990338164, |
|
"grad_norm": 0.760752650514086, |
|
"learning_rate": 6.317371754738044e-07, |
|
"loss": 0.6891, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.857487922705314, |
|
"grad_norm": 0.7504755271401121, |
|
"learning_rate": 6.06333698714171e-07, |
|
"loss": 0.6736, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.8605072463768116, |
|
"grad_norm": 0.7362415396628262, |
|
"learning_rate": 5.814185259346267e-07, |
|
"loss": 0.6747, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.8635265700483091, |
|
"grad_norm": 0.7605711326403803, |
|
"learning_rate": 5.56994426161584e-07, |
|
"loss": 0.6862, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.8665458937198067, |
|
"grad_norm": 0.7361924708882572, |
|
"learning_rate": 5.330641138445064e-07, |
|
"loss": 0.6727, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.8695652173913043, |
|
"grad_norm": 0.7535196260958092, |
|
"learning_rate": 5.096302485542265e-07, |
|
"loss": 0.6855, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.8725845410628019, |
|
"grad_norm": 0.779260026378618, |
|
"learning_rate": 4.866954346873715e-07, |
|
"loss": 0.6831, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.8756038647342995, |
|
"grad_norm": 0.7516986282609862, |
|
"learning_rate": 4.642622211769099e-07, |
|
"loss": 0.6799, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.8786231884057971, |
|
"grad_norm": 0.732975469848745, |
|
"learning_rate": 4.4233310120887387e-07, |
|
"loss": 0.6773, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.8816425120772947, |
|
"grad_norm": 0.7423980700987695, |
|
"learning_rate": 4.209105119452628e-07, |
|
"loss": 0.6775, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.8846618357487923, |
|
"grad_norm": 0.7479313416746293, |
|
"learning_rate": 3.999968342531918e-07, |
|
"loss": 0.6635, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.8876811594202898, |
|
"grad_norm": 0.7798737107901984, |
|
"learning_rate": 3.7959439244027727e-07, |
|
"loss": 0.6745, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.8907004830917874, |
|
"grad_norm": 0.7387373917982181, |
|
"learning_rate": 3.5970545399632574e-07, |
|
"loss": 0.6744, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.893719806763285, |
|
"grad_norm": 0.7337344483315793, |
|
"learning_rate": 3.4033222934131914e-07, |
|
"loss": 0.6755, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.8967391304347826, |
|
"grad_norm": 0.7378866951920182, |
|
"learning_rate": 3.214768715797656e-07, |
|
"loss": 0.6642, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.8997584541062802, |
|
"grad_norm": 0.7461379146470514, |
|
"learning_rate": 3.0314147626139543e-07, |
|
"loss": 0.6808, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.9027777777777778, |
|
"grad_norm": 0.757193556184507, |
|
"learning_rate": 2.853280811482734e-07, |
|
"loss": 0.6693, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.9057971014492754, |
|
"grad_norm": 0.7415417205678285, |
|
"learning_rate": 2.6803866598832216e-07, |
|
"loss": 0.6758, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.908816425120773, |
|
"grad_norm": 0.771131359954168, |
|
"learning_rate": 2.5127515229529665e-07, |
|
"loss": 0.682, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.9118357487922706, |
|
"grad_norm": 0.7370379289888651, |
|
"learning_rate": 2.350394031352343e-07, |
|
"loss": 0.6842, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.9148550724637681, |
|
"grad_norm": 0.738116305395423, |
|
"learning_rate": 2.1933322291938897e-07, |
|
"loss": 0.6763, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.9178743961352657, |
|
"grad_norm": 0.7664616804871662, |
|
"learning_rate": 2.041583572037037e-07, |
|
"loss": 0.6884, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.9208937198067633, |
|
"grad_norm": 0.7361049624043503, |
|
"learning_rate": 1.8951649249480287e-07, |
|
"loss": 0.6854, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.9239130434782609, |
|
"grad_norm": 0.73972951148021, |
|
"learning_rate": 1.7540925606256088e-07, |
|
"loss": 0.6956, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.9269323671497585, |
|
"grad_norm": 0.7382244412309273, |
|
"learning_rate": 1.6183821575925186e-07, |
|
"loss": 0.6738, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.9299516908212561, |
|
"grad_norm": 0.7276987617989338, |
|
"learning_rate": 1.4880487984529846e-07, |
|
"loss": 0.6815, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.9329710144927537, |
|
"grad_norm": 0.7527483196420391, |
|
"learning_rate": 1.363106968216482e-07, |
|
"loss": 0.6863, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.9359903381642513, |
|
"grad_norm": 0.7602203209981125, |
|
"learning_rate": 1.243570552687895e-07, |
|
"loss": 0.6869, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.9390096618357487, |
|
"grad_norm": 0.7591679238485205, |
|
"learning_rate": 1.1294528369242663e-07, |
|
"loss": 0.6897, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.9420289855072463, |
|
"grad_norm": 0.7580197201897864, |
|
"learning_rate": 1.020766503758347e-07, |
|
"loss": 0.6862, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.9450483091787439, |
|
"grad_norm": 0.7274440508843062, |
|
"learning_rate": 9.175236323890058e-08, |
|
"loss": 0.6798, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.9480676328502415, |
|
"grad_norm": 0.7517750389382779, |
|
"learning_rate": 8.197356970388148e-08, |
|
"loss": 0.6758, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.9510869565217391, |
|
"grad_norm": 0.7581934148074195, |
|
"learning_rate": 7.274135656787917e-08, |
|
"loss": 0.6871, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.9541062801932367, |
|
"grad_norm": 0.7349286966931822, |
|
"learning_rate": 6.405674988205602e-08, |
|
"loss": 0.6709, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.9571256038647343, |
|
"grad_norm": 0.7492094371596764, |
|
"learning_rate": 5.592071483760397e-08, |
|
"loss": 0.6948, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.9601449275362319, |
|
"grad_norm": 0.7600332357347815, |
|
"learning_rate": 4.833415565847155e-08, |
|
"loss": 0.6798, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.9631642512077294, |
|
"grad_norm": 0.7647034888713604, |
|
"learning_rate": 4.1297915500873034e-08, |
|
"loss": 0.6843, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.966183574879227, |
|
"grad_norm": 0.7434041279358656, |
|
"learning_rate": 3.481277635957903e-08, |
|
"loss": 0.6875, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.9692028985507246, |
|
"grad_norm": 0.7363545932702806, |
|
"learning_rate": 2.88794589810093e-08, |
|
"loss": 0.686, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.9722222222222222, |
|
"grad_norm": 0.7276212514725846, |
|
"learning_rate": 2.3498622783128533e-08, |
|
"loss": 0.6743, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.9752415458937198, |
|
"grad_norm": 0.7350554163815118, |
|
"learning_rate": 1.8670865782161042e-08, |
|
"loss": 0.6838, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.9782608695652174, |
|
"grad_norm": 0.7452729852364519, |
|
"learning_rate": 1.4396724526127282e-08, |
|
"loss": 0.679, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.981280193236715, |
|
"grad_norm": 0.7662828104651594, |
|
"learning_rate": 1.067667403521433e-08, |
|
"loss": 0.6936, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.9842995169082126, |
|
"grad_norm": 0.738399747105122, |
|
"learning_rate": 7.51112774898144e-09, |
|
"loss": 0.6699, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.9873188405797102, |
|
"grad_norm": 0.754232541083393, |
|
"learning_rate": 4.900437480413467e-09, |
|
"loss": 0.6742, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.9903381642512077, |
|
"grad_norm": 0.7350270953242116, |
|
"learning_rate": 2.844893376816593e-09, |
|
"loss": 0.6628, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.9933574879227053, |
|
"grad_norm": 0.7236271614987632, |
|
"learning_rate": 1.3447238875774482e-09, |
|
"loss": 0.6716, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.9963768115942029, |
|
"grad_norm": 0.7622443185044537, |
|
"learning_rate": 4.000957387700899e-10, |
|
"loss": 0.6832, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.9993961352657005, |
|
"grad_norm": 0.7520131293111865, |
|
"learning_rate": 1.1113914626381672e-11, |
|
"loss": 0.6816, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_runtime": 1.6679, |
|
"eval_samples_per_second": 5.996, |
|
"eval_steps_per_second": 1.799, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1656, |
|
"total_flos": 114021718032384.0, |
|
"train_loss": 0.7685830109361289, |
|
"train_runtime": 7351.6585, |
|
"train_samples_per_second": 3.603, |
|
"train_steps_per_second": 0.225 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1656, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 114021718032384.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|