|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.987995198079231, |
|
"eval_steps": 500, |
|
"global_step": 2080, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.024009603841536616, |
|
"grad_norm": 0.48156169056892395, |
|
"learning_rate": 4.9999287112445194e-05, |
|
"loss": 1.2537, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.04801920768307323, |
|
"grad_norm": 0.48818066716194153, |
|
"learning_rate": 4.999714849043745e-05, |
|
"loss": 1.1695, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07202881152460984, |
|
"grad_norm": 0.42254194617271423, |
|
"learning_rate": 4.999358425594454e-05, |
|
"loss": 1.1879, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.09603841536614646, |
|
"grad_norm": 0.4787527024745941, |
|
"learning_rate": 4.9988594612238336e-05, |
|
"loss": 1.0137, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12004801920768307, |
|
"grad_norm": 0.5006477236747742, |
|
"learning_rate": 4.9982179843883225e-05, |
|
"loss": 1.1109, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.14405762304921968, |
|
"grad_norm": 0.4857625961303711, |
|
"learning_rate": 4.9974340316719906e-05, |
|
"loss": 1.04, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.16806722689075632, |
|
"grad_norm": 0.5294004678726196, |
|
"learning_rate": 4.996507647784446e-05, |
|
"loss": 1.0158, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.19207683073229292, |
|
"grad_norm": 0.5924685597419739, |
|
"learning_rate": 4.995438885558294e-05, |
|
"loss": 0.9004, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.21608643457382953, |
|
"grad_norm": 0.5567044615745544, |
|
"learning_rate": 4.9942278059461175e-05, |
|
"loss": 0.9626, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.24009603841536614, |
|
"grad_norm": 0.5250746607780457, |
|
"learning_rate": 4.992874478017003e-05, |
|
"loss": 1.0787, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.26410564225690275, |
|
"grad_norm": 0.5340147018432617, |
|
"learning_rate": 4.9913789789526023e-05, |
|
"loss": 0.8533, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.28811524609843936, |
|
"grad_norm": 0.5907275080680847, |
|
"learning_rate": 4.989741394042727e-05, |
|
"loss": 0.9406, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.31212484993997597, |
|
"grad_norm": 0.6365830898284912, |
|
"learning_rate": 4.987961816680492e-05, |
|
"loss": 1.0017, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.33613445378151263, |
|
"grad_norm": 0.6287215948104858, |
|
"learning_rate": 4.9860403483569805e-05, |
|
"loss": 0.8902, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.36014405762304924, |
|
"grad_norm": 0.6473432183265686, |
|
"learning_rate": 4.983977098655461e-05, |
|
"loss": 0.907, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.38415366146458585, |
|
"grad_norm": 0.663551926612854, |
|
"learning_rate": 4.981772185245135e-05, |
|
"loss": 1.0197, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.40816326530612246, |
|
"grad_norm": 0.5929960012435913, |
|
"learning_rate": 4.979425733874431e-05, |
|
"loss": 0.8131, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.43217286914765907, |
|
"grad_norm": 0.6948341131210327, |
|
"learning_rate": 4.9769378783638255e-05, |
|
"loss": 0.8665, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4561824729891957, |
|
"grad_norm": 0.8430640697479248, |
|
"learning_rate": 4.974308760598218e-05, |
|
"loss": 0.9219, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.4801920768307323, |
|
"grad_norm": 0.7752465009689331, |
|
"learning_rate": 4.971538530518836e-05, |
|
"loss": 0.9425, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5042016806722689, |
|
"grad_norm": 0.7160285711288452, |
|
"learning_rate": 4.968627346114681e-05, |
|
"loss": 0.8781, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.5282112845138055, |
|
"grad_norm": 0.7385575175285339, |
|
"learning_rate": 4.965575373413527e-05, |
|
"loss": 0.8446, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5522208883553421, |
|
"grad_norm": 0.8385459184646606, |
|
"learning_rate": 4.9623827864724394e-05, |
|
"loss": 0.9453, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.5762304921968787, |
|
"grad_norm": 0.8416388630867004, |
|
"learning_rate": 4.959049767367859e-05, |
|
"loss": 0.8086, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6002400960384153, |
|
"grad_norm": 0.7977305054664612, |
|
"learning_rate": 4.955576506185213e-05, |
|
"loss": 0.8657, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.6242496998799519, |
|
"grad_norm": 0.8073853850364685, |
|
"learning_rate": 4.951963201008076e-05, |
|
"loss": 0.9168, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6482593037214885, |
|
"grad_norm": 0.825240433216095, |
|
"learning_rate": 4.9482100579068706e-05, |
|
"loss": 0.9192, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.6722689075630253, |
|
"grad_norm": 0.7907525300979614, |
|
"learning_rate": 4.944317290927117e-05, |
|
"loss": 0.7927, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6962785114045619, |
|
"grad_norm": 0.7800418138504028, |
|
"learning_rate": 4.9402851220772274e-05, |
|
"loss": 0.8486, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.7202881152460985, |
|
"grad_norm": 0.8340911269187927, |
|
"learning_rate": 4.93611378131584e-05, |
|
"loss": 0.7937, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7442977190876351, |
|
"grad_norm": 0.9103155732154846, |
|
"learning_rate": 4.931803506538707e-05, |
|
"loss": 0.834, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.7683073229291717, |
|
"grad_norm": 0.8660081028938293, |
|
"learning_rate": 4.92735454356513e-05, |
|
"loss": 0.7799, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.7923169267707083, |
|
"grad_norm": 0.7390795350074768, |
|
"learning_rate": 4.9227671461239354e-05, |
|
"loss": 0.8608, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 0.8534154891967773, |
|
"learning_rate": 4.918041575839007e-05, |
|
"loss": 0.8872, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.8403361344537815, |
|
"grad_norm": 1.0927950143814087, |
|
"learning_rate": 4.913178102214363e-05, |
|
"loss": 0.8681, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.8643457382953181, |
|
"grad_norm": 0.8660680055618286, |
|
"learning_rate": 4.9081770026187914e-05, |
|
"loss": 0.8451, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.8883553421368547, |
|
"grad_norm": 0.7952775955200195, |
|
"learning_rate": 4.9030385622700225e-05, |
|
"loss": 0.8662, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.9123649459783914, |
|
"grad_norm": 0.8808633685112, |
|
"learning_rate": 4.897763074218472e-05, |
|
"loss": 0.8183, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.936374549819928, |
|
"grad_norm": 0.7455188632011414, |
|
"learning_rate": 4.892350839330522e-05, |
|
"loss": 0.8887, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.9603841536614646, |
|
"grad_norm": 0.7687739729881287, |
|
"learning_rate": 4.886802166271364e-05, |
|
"loss": 0.8505, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9843937575030012, |
|
"grad_norm": 0.7754830121994019, |
|
"learning_rate": 4.881117371487396e-05, |
|
"loss": 0.835, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.0084033613445378, |
|
"grad_norm": 0.7828541398048401, |
|
"learning_rate": 4.875296779188173e-05, |
|
"loss": 0.8531, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.0324129651860745, |
|
"grad_norm": 1.015205979347229, |
|
"learning_rate": 4.8693407213279206e-05, |
|
"loss": 0.8679, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.056422569027611, |
|
"grad_norm": 0.8347631692886353, |
|
"learning_rate": 4.8632495375866004e-05, |
|
"loss": 0.8432, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.0804321728691477, |
|
"grad_norm": 0.9165553450584412, |
|
"learning_rate": 4.8570235753505406e-05, |
|
"loss": 0.8362, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.1044417767106842, |
|
"grad_norm": 0.954313337802887, |
|
"learning_rate": 4.850663189692619e-05, |
|
"loss": 0.7704, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.128451380552221, |
|
"grad_norm": 1.096909761428833, |
|
"learning_rate": 4.844168743352019e-05, |
|
"loss": 0.8357, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.1524609843937574, |
|
"grad_norm": 0.9319111108779907, |
|
"learning_rate": 4.837540606713538e-05, |
|
"loss": 0.7461, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.1764705882352942, |
|
"grad_norm": 1.0731585025787354, |
|
"learning_rate": 4.830779157786465e-05, |
|
"loss": 0.8821, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.2004801920768307, |
|
"grad_norm": 0.8962486982345581, |
|
"learning_rate": 4.823884782183023e-05, |
|
"loss": 0.889, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.2244897959183674, |
|
"grad_norm": 0.8899649381637573, |
|
"learning_rate": 4.8168578730963804e-05, |
|
"loss": 0.7407, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.2484993997599039, |
|
"grad_norm": 0.9980818033218384, |
|
"learning_rate": 4.8096988312782174e-05, |
|
"loss": 0.7822, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.2725090036014406, |
|
"grad_norm": 0.9607000350952148, |
|
"learning_rate": 4.80240806501588e-05, |
|
"loss": 0.9008, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.296518607442977, |
|
"grad_norm": 1.0252315998077393, |
|
"learning_rate": 4.7949859901090896e-05, |
|
"loss": 0.7477, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.3205282112845138, |
|
"grad_norm": 0.8092382550239563, |
|
"learning_rate": 4.787433029846236e-05, |
|
"loss": 0.7846, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.3445378151260505, |
|
"grad_norm": 0.9593423008918762, |
|
"learning_rate": 4.7797496149802256e-05, |
|
"loss": 0.8544, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.368547418967587, |
|
"grad_norm": 0.9516417980194092, |
|
"learning_rate": 4.771936183703927e-05, |
|
"loss": 0.7706, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.3925570228091235, |
|
"grad_norm": 1.2291992902755737, |
|
"learning_rate": 4.763993181625174e-05, |
|
"loss": 0.8256, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.4165666266506602, |
|
"grad_norm": 0.9861960411071777, |
|
"learning_rate": 4.7559210617413514e-05, |
|
"loss": 0.8185, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.440576230492197, |
|
"grad_norm": 0.8679934144020081, |
|
"learning_rate": 4.7477202844135646e-05, |
|
"loss": 0.7356, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.4645858343337335, |
|
"grad_norm": 0.754160463809967, |
|
"learning_rate": 4.739391317340383e-05, |
|
"loss": 0.8015, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.4885954381752702, |
|
"grad_norm": 1.0975936651229858, |
|
"learning_rate": 4.730934635531161e-05, |
|
"loss": 0.8103, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.5126050420168067, |
|
"grad_norm": 0.8794222474098206, |
|
"learning_rate": 4.722350721278958e-05, |
|
"loss": 0.6949, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.5366146458583434, |
|
"grad_norm": 1.009118676185608, |
|
"learning_rate": 4.713640064133025e-05, |
|
"loss": 0.779, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.5606242496998801, |
|
"grad_norm": 1.1100735664367676, |
|
"learning_rate": 4.7048031608708876e-05, |
|
"loss": 0.8016, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.5846338535414166, |
|
"grad_norm": 0.8312719464302063, |
|
"learning_rate": 4.6958405154700154e-05, |
|
"loss": 0.746, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.6086434573829531, |
|
"grad_norm": 0.9220231175422668, |
|
"learning_rate": 4.686752639079076e-05, |
|
"loss": 0.7439, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.6326530612244898, |
|
"grad_norm": 1.1262476444244385, |
|
"learning_rate": 4.677540049988789e-05, |
|
"loss": 0.7969, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.6566626650660266, |
|
"grad_norm": 1.1132248640060425, |
|
"learning_rate": 4.668203273602363e-05, |
|
"loss": 0.858, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.680672268907563, |
|
"grad_norm": 1.0028260946273804, |
|
"learning_rate": 4.6587428424055326e-05, |
|
"loss": 0.8538, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.7046818727490995, |
|
"grad_norm": 1.057077407836914, |
|
"learning_rate": 4.649159295936191e-05, |
|
"loss": 0.8241, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.7286914765906363, |
|
"grad_norm": 0.8777754902839661, |
|
"learning_rate": 4.639453180753619e-05, |
|
"loss": 0.7388, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.752701080432173, |
|
"grad_norm": 0.9972108006477356, |
|
"learning_rate": 4.6296250504073145e-05, |
|
"loss": 0.7924, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.7767106842737095, |
|
"grad_norm": 1.0971965789794922, |
|
"learning_rate": 4.6196754654054216e-05, |
|
"loss": 0.7719, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.800720288115246, |
|
"grad_norm": 1.1859172582626343, |
|
"learning_rate": 4.609604993182767e-05, |
|
"loss": 0.8365, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.8247298919567827, |
|
"grad_norm": 0.9028448462486267, |
|
"learning_rate": 4.599414208068495e-05, |
|
"loss": 0.8118, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.8487394957983194, |
|
"grad_norm": 0.8329624533653259, |
|
"learning_rate": 4.589103691253317e-05, |
|
"loss": 0.7881, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.872749099639856, |
|
"grad_norm": 0.985603928565979, |
|
"learning_rate": 4.5786740307563636e-05, |
|
"loss": 0.7155, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.8967587034813924, |
|
"grad_norm": 0.9097521901130676, |
|
"learning_rate": 4.568125821391647e-05, |
|
"loss": 0.8555, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.9207683073229291, |
|
"grad_norm": 1.0277526378631592, |
|
"learning_rate": 4.557459664734141e-05, |
|
"loss": 0.8685, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.9447779111644659, |
|
"grad_norm": 0.8617818355560303, |
|
"learning_rate": 4.5466761690854746e-05, |
|
"loss": 0.7708, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.9687875150060024, |
|
"grad_norm": 0.8086485266685486, |
|
"learning_rate": 4.535775949439235e-05, |
|
"loss": 0.8448, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.9927971188475389, |
|
"grad_norm": 1.0069591999053955, |
|
"learning_rate": 4.5247596274458956e-05, |
|
"loss": 0.736, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 2.0168067226890756, |
|
"grad_norm": 0.7122059464454651, |
|
"learning_rate": 4.513627831377365e-05, |
|
"loss": 0.7223, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.0408163265306123, |
|
"grad_norm": 0.9053510427474976, |
|
"learning_rate": 4.502381196091154e-05, |
|
"loss": 0.7504, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.064825930372149, |
|
"grad_norm": 1.0357030630111694, |
|
"learning_rate": 4.491020362994168e-05, |
|
"loss": 0.8011, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.0888355342136853, |
|
"grad_norm": 0.9885731339454651, |
|
"learning_rate": 4.47954598000613e-05, |
|
"loss": 0.8665, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 2.112845138055222, |
|
"grad_norm": 0.9989228844642639, |
|
"learning_rate": 4.4679587015226253e-05, |
|
"loss": 0.749, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.1368547418967587, |
|
"grad_norm": 0.8704085350036621, |
|
"learning_rate": 4.456259188377786e-05, |
|
"loss": 0.7962, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 2.1608643457382954, |
|
"grad_norm": 0.9389162063598633, |
|
"learning_rate": 4.444448107806596e-05, |
|
"loss": 0.7964, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.184873949579832, |
|
"grad_norm": 0.9259791970252991, |
|
"learning_rate": 4.4325261334068426e-05, |
|
"loss": 0.7516, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 2.2088835534213684, |
|
"grad_norm": 1.4041215181350708, |
|
"learning_rate": 4.420493945100702e-05, |
|
"loss": 0.8173, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.232893157262905, |
|
"grad_norm": 0.9259509444236755, |
|
"learning_rate": 4.4083522290959564e-05, |
|
"loss": 0.8038, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 2.256902761104442, |
|
"grad_norm": 1.0205409526824951, |
|
"learning_rate": 4.396101677846866e-05, |
|
"loss": 0.8055, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.280912364945978, |
|
"grad_norm": 0.9302276372909546, |
|
"learning_rate": 4.383742990014671e-05, |
|
"loss": 0.7459, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.304921968787515, |
|
"grad_norm": 0.9929239749908447, |
|
"learning_rate": 4.371276870427753e-05, |
|
"loss": 0.7959, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.3289315726290516, |
|
"grad_norm": 1.0315582752227783, |
|
"learning_rate": 4.358704030041432e-05, |
|
"loss": 0.8528, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 2.3529411764705883, |
|
"grad_norm": 0.9557288289070129, |
|
"learning_rate": 4.346025185897424e-05, |
|
"loss": 0.759, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.376950780312125, |
|
"grad_norm": 1.080580472946167, |
|
"learning_rate": 4.333241061082944e-05, |
|
"loss": 0.7499, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 2.4009603841536613, |
|
"grad_norm": 0.96761155128479, |
|
"learning_rate": 4.320352384689471e-05, |
|
"loss": 0.7279, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.424969987995198, |
|
"grad_norm": 1.0945556163787842, |
|
"learning_rate": 4.307359891771165e-05, |
|
"loss": 0.7453, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 2.4489795918367347, |
|
"grad_norm": 0.9983144998550415, |
|
"learning_rate": 4.294264323302946e-05, |
|
"loss": 0.8311, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.4729891956782715, |
|
"grad_norm": 0.8600877523422241, |
|
"learning_rate": 4.2810664261382375e-05, |
|
"loss": 0.7172, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 2.4969987995198077, |
|
"grad_norm": 1.0828715562820435, |
|
"learning_rate": 4.267766952966369e-05, |
|
"loss": 0.8084, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.5210084033613445, |
|
"grad_norm": 0.9153307676315308, |
|
"learning_rate": 4.254366662269655e-05, |
|
"loss": 0.702, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 2.545018007202881, |
|
"grad_norm": 0.8633860349655151, |
|
"learning_rate": 4.240866318280132e-05, |
|
"loss": 0.7261, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.569027611044418, |
|
"grad_norm": 0.9197015762329102, |
|
"learning_rate": 4.227266690935978e-05, |
|
"loss": 0.754, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 2.593037214885954, |
|
"grad_norm": 0.9729764461517334, |
|
"learning_rate": 4.2135685558375994e-05, |
|
"loss": 0.7855, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.617046818727491, |
|
"grad_norm": 0.981951117515564, |
|
"learning_rate": 4.199772694203399e-05, |
|
"loss": 0.7584, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 2.6410564225690276, |
|
"grad_norm": 0.8585065603256226, |
|
"learning_rate": 4.185879892825222e-05, |
|
"loss": 0.8196, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.6650660264105643, |
|
"grad_norm": 0.8955056071281433, |
|
"learning_rate": 4.1718909440234853e-05, |
|
"loss": 0.6836, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 2.689075630252101, |
|
"grad_norm": 0.9751344919204712, |
|
"learning_rate": 4.157806645601988e-05, |
|
"loss": 0.7656, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.7130852340936373, |
|
"grad_norm": 0.9694618582725525, |
|
"learning_rate": 4.143627800802417e-05, |
|
"loss": 0.7787, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 2.737094837935174, |
|
"grad_norm": 1.0527385473251343, |
|
"learning_rate": 4.1293552182585307e-05, |
|
"loss": 0.7665, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.7611044417767108, |
|
"grad_norm": 0.9396669864654541, |
|
"learning_rate": 4.114989711950047e-05, |
|
"loss": 0.748, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 2.785114045618247, |
|
"grad_norm": 1.0898234844207764, |
|
"learning_rate": 4.1005321011562206e-05, |
|
"loss": 0.7761, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.8091236494597838, |
|
"grad_norm": 1.0972914695739746, |
|
"learning_rate": 4.085983210409114e-05, |
|
"loss": 0.7873, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 2.8331332533013205, |
|
"grad_norm": 1.1150754690170288, |
|
"learning_rate": 4.0713438694465806e-05, |
|
"loss": 0.8288, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 0.9612060785293579, |
|
"learning_rate": 4.056614913164938e-05, |
|
"loss": 0.7587, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 2.881152460984394, |
|
"grad_norm": 0.8540961146354675, |
|
"learning_rate": 4.0417971815713584e-05, |
|
"loss": 0.7096, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.90516206482593, |
|
"grad_norm": 1.1552797555923462, |
|
"learning_rate": 4.026891519735955e-05, |
|
"loss": 0.7177, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 2.929171668667467, |
|
"grad_norm": 0.8366991281509399, |
|
"learning_rate": 4.011898777743594e-05, |
|
"loss": 0.7244, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.9531812725090036, |
|
"grad_norm": 1.1516789197921753, |
|
"learning_rate": 3.99681981064541e-05, |
|
"loss": 0.7627, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 2.9771908763505404, |
|
"grad_norm": 1.119287133216858, |
|
"learning_rate": 3.981655478410043e-05, |
|
"loss": 0.7562, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.0012004801920766, |
|
"grad_norm": 0.9897541403770447, |
|
"learning_rate": 3.966406645874589e-05, |
|
"loss": 0.8271, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 3.0252100840336134, |
|
"grad_norm": 0.979117214679718, |
|
"learning_rate": 3.951074182695284e-05, |
|
"loss": 0.7639, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.04921968787515, |
|
"grad_norm": 1.234833836555481, |
|
"learning_rate": 3.935658963297902e-05, |
|
"loss": 0.7117, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 3.073229291716687, |
|
"grad_norm": 1.0178768634796143, |
|
"learning_rate": 3.920161866827889e-05, |
|
"loss": 0.7358, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.097238895558223, |
|
"grad_norm": 0.9386712312698364, |
|
"learning_rate": 3.904583777100223e-05, |
|
"loss": 0.7409, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 3.12124849939976, |
|
"grad_norm": 1.140830397605896, |
|
"learning_rate": 3.888925582549006e-05, |
|
"loss": 0.7052, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.1452581032412965, |
|
"grad_norm": 0.9676099419593811, |
|
"learning_rate": 3.8731881761768e-05, |
|
"loss": 0.7602, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 3.1692677070828332, |
|
"grad_norm": 1.1593329906463623, |
|
"learning_rate": 3.857372455503697e-05, |
|
"loss": 0.8514, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.19327731092437, |
|
"grad_norm": 1.1297277212142944, |
|
"learning_rate": 3.8414793225161325e-05, |
|
"loss": 0.7699, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 3.2172869147659062, |
|
"grad_norm": 0.9413173794746399, |
|
"learning_rate": 3.825509683615442e-05, |
|
"loss": 0.8067, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.241296518607443, |
|
"grad_norm": 0.9704225063323975, |
|
"learning_rate": 3.809464449566175e-05, |
|
"loss": 0.6783, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 3.2653061224489797, |
|
"grad_norm": 0.9347226023674011, |
|
"learning_rate": 3.793344535444142e-05, |
|
"loss": 0.7315, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.2893157262905164, |
|
"grad_norm": 0.9760938882827759, |
|
"learning_rate": 3.777150860584237e-05, |
|
"loss": 0.6999, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 3.3133253301320527, |
|
"grad_norm": 1.114785075187683, |
|
"learning_rate": 3.760884348528002e-05, |
|
"loss": 0.7182, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.3373349339735894, |
|
"grad_norm": 1.0659857988357544, |
|
"learning_rate": 3.744545926970957e-05, |
|
"loss": 0.7577, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 3.361344537815126, |
|
"grad_norm": 1.0988335609436035, |
|
"learning_rate": 3.728136527709694e-05, |
|
"loss": 0.7825, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.385354141656663, |
|
"grad_norm": 0.9283490180969238, |
|
"learning_rate": 3.711657086588733e-05, |
|
"loss": 0.7414, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 3.409363745498199, |
|
"grad_norm": 1.1617555618286133, |
|
"learning_rate": 3.695108543447154e-05, |
|
"loss": 0.7144, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 3.433373349339736, |
|
"grad_norm": 1.1011674404144287, |
|
"learning_rate": 3.678491842064995e-05, |
|
"loss": 0.7516, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 3.4573829531812725, |
|
"grad_norm": 0.9130650758743286, |
|
"learning_rate": 3.6618079301094216e-05, |
|
"loss": 0.7484, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.4813925570228093, |
|
"grad_norm": 1.1056820154190063, |
|
"learning_rate": 3.645057759080692e-05, |
|
"loss": 0.7775, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 3.505402160864346, |
|
"grad_norm": 1.219381332397461, |
|
"learning_rate": 3.6282422842578845e-05, |
|
"loss": 0.7086, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 3.5294117647058822, |
|
"grad_norm": 1.2623860836029053, |
|
"learning_rate": 3.611362464644415e-05, |
|
"loss": 0.7165, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 3.553421368547419, |
|
"grad_norm": 1.0355645418167114, |
|
"learning_rate": 3.594419262913351e-05, |
|
"loss": 0.7668, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.5774309723889557, |
|
"grad_norm": 0.9813769459724426, |
|
"learning_rate": 3.577413645352506e-05, |
|
"loss": 0.6595, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 3.601440576230492, |
|
"grad_norm": 1.0704172849655151, |
|
"learning_rate": 3.560346581809328e-05, |
|
"loss": 0.7469, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.6254501800720287, |
|
"grad_norm": 1.0033190250396729, |
|
"learning_rate": 3.543219045635593e-05, |
|
"loss": 0.7063, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 3.6494597839135654, |
|
"grad_norm": 0.9432305097579956, |
|
"learning_rate": 3.526032013631893e-05, |
|
"loss": 0.6694, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 3.673469387755102, |
|
"grad_norm": 1.0187017917633057, |
|
"learning_rate": 3.508786465991923e-05, |
|
"loss": 0.7789, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 3.697478991596639, |
|
"grad_norm": 1.076158881187439, |
|
"learning_rate": 3.491483386246588e-05, |
|
"loss": 0.7934, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 3.721488595438175, |
|
"grad_norm": 0.8803985714912415, |
|
"learning_rate": 3.474123761207905e-05, |
|
"loss": 0.7295, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 3.745498199279712, |
|
"grad_norm": 0.9059305787086487, |
|
"learning_rate": 3.456708580912725e-05, |
|
"loss": 0.7474, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.7695078031212486, |
|
"grad_norm": 1.1186120510101318, |
|
"learning_rate": 3.4392388385662714e-05, |
|
"loss": 0.7711, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 3.7935174069627853, |
|
"grad_norm": 1.0779385566711426, |
|
"learning_rate": 3.4217155304854976e-05, |
|
"loss": 0.7363, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 3.817527010804322, |
|
"grad_norm": 1.0094141960144043, |
|
"learning_rate": 3.4041396560422624e-05, |
|
"loss": 0.8531, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 3.8415366146458583, |
|
"grad_norm": 0.9442546963691711, |
|
"learning_rate": 3.386512217606339e-05, |
|
"loss": 0.8025, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.865546218487395, |
|
"grad_norm": 1.0685694217681885, |
|
"learning_rate": 3.3688342204882466e-05, |
|
"loss": 0.6634, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 3.8895558223289317, |
|
"grad_norm": 0.9987934827804565, |
|
"learning_rate": 3.351106672881915e-05, |
|
"loss": 0.7664, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.913565426170468, |
|
"grad_norm": 1.0503917932510376, |
|
"learning_rate": 3.33333058580719e-05, |
|
"loss": 0.865, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 3.9375750300120047, |
|
"grad_norm": 0.9366332292556763, |
|
"learning_rate": 3.3155069730521735e-05, |
|
"loss": 0.7096, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 3.9615846338535414, |
|
"grad_norm": 1.173298954963684, |
|
"learning_rate": 3.2976368511153996e-05, |
|
"loss": 0.8476, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 3.985594237695078, |
|
"grad_norm": 1.0985863208770752, |
|
"learning_rate": 3.2797212391478724e-05, |
|
"loss": 0.705, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 4.009603841536615, |
|
"grad_norm": 1.124514102935791, |
|
"learning_rate": 3.261761158894937e-05, |
|
"loss": 0.7086, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 4.033613445378151, |
|
"grad_norm": 0.9647889733314514, |
|
"learning_rate": 3.243757634638008e-05, |
|
"loss": 0.6899, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 4.057623049219688, |
|
"grad_norm": 0.9771691560745239, |
|
"learning_rate": 3.225711693136156e-05, |
|
"loss": 0.8088, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 4.081632653061225, |
|
"grad_norm": 1.1176905632019043, |
|
"learning_rate": 3.2076243635675513e-05, |
|
"loss": 0.7096, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.105642256902761, |
|
"grad_norm": 1.0890402793884277, |
|
"learning_rate": 3.189496677470765e-05, |
|
"loss": 0.7646, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 4.129651860744298, |
|
"grad_norm": 1.099631905555725, |
|
"learning_rate": 3.1713296686859426e-05, |
|
"loss": 0.7525, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 4.153661464585834, |
|
"grad_norm": 0.995556652545929, |
|
"learning_rate": 3.153124373295841e-05, |
|
"loss": 0.7231, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 4.177671068427371, |
|
"grad_norm": 1.1333503723144531, |
|
"learning_rate": 3.1348818295667424e-05, |
|
"loss": 0.7431, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 4.201680672268908, |
|
"grad_norm": 1.0261584520339966, |
|
"learning_rate": 3.116603077889238e-05, |
|
"loss": 0.7619, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 4.225690276110444, |
|
"grad_norm": 0.9810305833816528, |
|
"learning_rate": 3.098289160718895e-05, |
|
"loss": 0.7806, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.249699879951981, |
|
"grad_norm": 1.160054087638855, |
|
"learning_rate": 3.079941122516803e-05, |
|
"loss": 0.701, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 4.2737094837935174, |
|
"grad_norm": 0.9885622262954712, |
|
"learning_rate": 3.061560009690011e-05, |
|
"loss": 0.7114, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 4.297719087635054, |
|
"grad_norm": 1.092362880706787, |
|
"learning_rate": 3.0431468705318424e-05, |
|
"loss": 0.7407, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 4.321728691476591, |
|
"grad_norm": 1.3226685523986816, |
|
"learning_rate": 3.024702755162119e-05, |
|
"loss": 0.756, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.345738295318127, |
|
"grad_norm": 1.164993166923523, |
|
"learning_rate": 3.0062287154672658e-05, |
|
"loss": 0.6897, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 4.369747899159664, |
|
"grad_norm": 1.1299794912338257, |
|
"learning_rate": 2.9877258050403212e-05, |
|
"loss": 0.7212, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 4.393757503001201, |
|
"grad_norm": 1.1793334484100342, |
|
"learning_rate": 2.9691950791208502e-05, |
|
"loss": 0.8578, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 4.417767106842737, |
|
"grad_norm": 1.2607135772705078, |
|
"learning_rate": 2.950637594534765e-05, |
|
"loss": 0.7116, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 4.441776710684274, |
|
"grad_norm": 1.309200644493103, |
|
"learning_rate": 2.9320544096340493e-05, |
|
"loss": 0.7693, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 4.46578631452581, |
|
"grad_norm": 0.9688151478767395, |
|
"learning_rate": 2.9134465842364035e-05, |
|
"loss": 0.6641, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 4.489795918367347, |
|
"grad_norm": 1.155928373336792, |
|
"learning_rate": 2.8948151795647993e-05, |
|
"loss": 0.7488, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 4.513805522208884, |
|
"grad_norm": 1.2800368070602417, |
|
"learning_rate": 2.876161258186958e-05, |
|
"loss": 0.6626, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 4.53781512605042, |
|
"grad_norm": 1.2294648885726929, |
|
"learning_rate": 2.8574858839547512e-05, |
|
"loss": 0.7064, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 4.561824729891956, |
|
"grad_norm": 1.1497923135757446, |
|
"learning_rate": 2.83879012194353e-05, |
|
"loss": 0.703, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 4.5858343337334935, |
|
"grad_norm": 1.0689631700515747, |
|
"learning_rate": 2.8200750383913776e-05, |
|
"loss": 0.8085, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 4.60984393757503, |
|
"grad_norm": 1.038040041923523, |
|
"learning_rate": 2.8013417006383076e-05, |
|
"loss": 0.7251, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 4.633853541416567, |
|
"grad_norm": 0.9959046244621277, |
|
"learning_rate": 2.782591177065388e-05, |
|
"loss": 0.7358, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 4.657863145258103, |
|
"grad_norm": 1.1039036512374878, |
|
"learning_rate": 2.763824537033809e-05, |
|
"loss": 0.7625, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 4.6818727490996395, |
|
"grad_norm": 0.98404860496521, |
|
"learning_rate": 2.7450428508239024e-05, |
|
"loss": 0.8123, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 4.705882352941177, |
|
"grad_norm": 1.1559010744094849, |
|
"learning_rate": 2.726247189574095e-05, |
|
"loss": 0.7068, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 4.729891956782713, |
|
"grad_norm": 1.0589702129364014, |
|
"learning_rate": 2.707438625219827e-05, |
|
"loss": 0.7431, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 4.75390156062425, |
|
"grad_norm": 1.207343339920044, |
|
"learning_rate": 2.6886182304324153e-05, |
|
"loss": 0.7161, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 4.777911164465786, |
|
"grad_norm": 1.023870825767517, |
|
"learning_rate": 2.669787078557876e-05, |
|
"loss": 0.7311, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 4.801920768307323, |
|
"grad_norm": 0.9353678226470947, |
|
"learning_rate": 2.6509462435557152e-05, |
|
"loss": 0.6689, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.82593037214886, |
|
"grad_norm": 0.8823081851005554, |
|
"learning_rate": 2.6320967999376767e-05, |
|
"loss": 0.6888, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 4.849939975990396, |
|
"grad_norm": 1.1103065013885498, |
|
"learning_rate": 2.6132398227064615e-05, |
|
"loss": 0.7879, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 4.873949579831933, |
|
"grad_norm": 1.061865210533142, |
|
"learning_rate": 2.5943763872944206e-05, |
|
"loss": 0.6958, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 4.8979591836734695, |
|
"grad_norm": 1.1248444318771362, |
|
"learning_rate": 2.5755075695022224e-05, |
|
"loss": 0.6858, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 4.921968787515006, |
|
"grad_norm": 1.1603715419769287, |
|
"learning_rate": 2.5566344454374968e-05, |
|
"loss": 0.6799, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 4.945978391356543, |
|
"grad_norm": 1.1303590536117554, |
|
"learning_rate": 2.5377580914534647e-05, |
|
"loss": 0.767, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 4.969987995198079, |
|
"grad_norm": 1.2461163997650146, |
|
"learning_rate": 2.5188795840875544e-05, |
|
"loss": 0.7453, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 4.9939975990396155, |
|
"grad_norm": 1.0747658014297485, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.7559, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 5.018007202881153, |
|
"grad_norm": 1.0154122114181519, |
|
"learning_rate": 2.481120415912446e-05, |
|
"loss": 0.7729, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 5.042016806722689, |
|
"grad_norm": 1.195665717124939, |
|
"learning_rate": 2.4622419085465355e-05, |
|
"loss": 0.6956, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 5.066026410564226, |
|
"grad_norm": 1.1738536357879639, |
|
"learning_rate": 2.4433655545625038e-05, |
|
"loss": 0.7124, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 5.090036014405762, |
|
"grad_norm": 1.158319354057312, |
|
"learning_rate": 2.4244924304977785e-05, |
|
"loss": 0.7472, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 5.114045618247299, |
|
"grad_norm": 1.1242644786834717, |
|
"learning_rate": 2.40562361270558e-05, |
|
"loss": 0.6805, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 5.138055222088836, |
|
"grad_norm": 1.0289393663406372, |
|
"learning_rate": 2.3867601772935397e-05, |
|
"loss": 0.6898, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 5.162064825930372, |
|
"grad_norm": 1.0357887744903564, |
|
"learning_rate": 2.367903200062324e-05, |
|
"loss": 0.6664, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 5.186074429771908, |
|
"grad_norm": 1.1436580419540405, |
|
"learning_rate": 2.3490537564442847e-05, |
|
"loss": 0.7125, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 5.2100840336134455, |
|
"grad_norm": 1.1259691715240479, |
|
"learning_rate": 2.3302129214421242e-05, |
|
"loss": 0.7797, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 5.234093637454982, |
|
"grad_norm": 1.1516937017440796, |
|
"learning_rate": 2.3113817695675853e-05, |
|
"loss": 0.7587, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 5.258103241296519, |
|
"grad_norm": 1.1358476877212524, |
|
"learning_rate": 2.292561374780173e-05, |
|
"loss": 0.7414, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 5.282112845138055, |
|
"grad_norm": 1.2753323316574097, |
|
"learning_rate": 2.2737528104259056e-05, |
|
"loss": 0.6972, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.3061224489795915, |
|
"grad_norm": 1.2500908374786377, |
|
"learning_rate": 2.2549571491760986e-05, |
|
"loss": 0.7168, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 5.330132052821129, |
|
"grad_norm": 1.157196283340454, |
|
"learning_rate": 2.236175462966192e-05, |
|
"loss": 0.6441, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 5.354141656662665, |
|
"grad_norm": 1.1226704120635986, |
|
"learning_rate": 2.217408822934613e-05, |
|
"loss": 0.7473, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 5.378151260504202, |
|
"grad_norm": 1.0768887996673584, |
|
"learning_rate": 2.1986582993616926e-05, |
|
"loss": 0.6849, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 5.402160864345738, |
|
"grad_norm": 1.2554682493209839, |
|
"learning_rate": 2.179924961608623e-05, |
|
"loss": 0.6715, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 5.426170468187275, |
|
"grad_norm": 0.9506689310073853, |
|
"learning_rate": 2.1612098780564714e-05, |
|
"loss": 0.7139, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 5.450180072028812, |
|
"grad_norm": 1.2209597826004028, |
|
"learning_rate": 2.1425141160452494e-05, |
|
"loss": 0.8102, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 5.474189675870348, |
|
"grad_norm": 0.9822332262992859, |
|
"learning_rate": 2.1238387418130422e-05, |
|
"loss": 0.7772, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 5.498199279711884, |
|
"grad_norm": 1.1269121170043945, |
|
"learning_rate": 2.1051848204352013e-05, |
|
"loss": 0.7506, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 5.5222088835534215, |
|
"grad_norm": 1.1985368728637695, |
|
"learning_rate": 2.0865534157635967e-05, |
|
"loss": 0.7174, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 5.546218487394958, |
|
"grad_norm": 1.1794227361679077, |
|
"learning_rate": 2.0679455903659513e-05, |
|
"loss": 0.7339, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 5.570228091236495, |
|
"grad_norm": 1.1553770303726196, |
|
"learning_rate": 2.0493624054652357e-05, |
|
"loss": 0.7076, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 5.594237695078031, |
|
"grad_norm": 1.0076508522033691, |
|
"learning_rate": 2.0308049208791507e-05, |
|
"loss": 0.7758, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 5.6182472989195675, |
|
"grad_norm": 1.0329455137252808, |
|
"learning_rate": 2.0122741949596797e-05, |
|
"loss": 0.7103, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 5.642256902761105, |
|
"grad_norm": 1.0466042757034302, |
|
"learning_rate": 1.9937712845327345e-05, |
|
"loss": 0.747, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 5.666266506602641, |
|
"grad_norm": 1.1813960075378418, |
|
"learning_rate": 1.9752972448378814e-05, |
|
"loss": 0.7325, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 5.690276110444177, |
|
"grad_norm": 1.0979856252670288, |
|
"learning_rate": 1.9568531294681586e-05, |
|
"loss": 0.7336, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 5.714285714285714, |
|
"grad_norm": 1.1668668985366821, |
|
"learning_rate": 1.938439990309991e-05, |
|
"loss": 0.7238, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 5.738295318127251, |
|
"grad_norm": 1.1428868770599365, |
|
"learning_rate": 1.9200588774831975e-05, |
|
"loss": 0.6934, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 5.762304921968788, |
|
"grad_norm": 1.1208099126815796, |
|
"learning_rate": 1.9017108392811065e-05, |
|
"loss": 0.7573, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.786314525810324, |
|
"grad_norm": 1.1175333261489868, |
|
"learning_rate": 1.8833969221107622e-05, |
|
"loss": 0.7232, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 5.81032412965186, |
|
"grad_norm": 0.9412729740142822, |
|
"learning_rate": 1.8651181704332578e-05, |
|
"loss": 0.7221, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 5.834333733493398, |
|
"grad_norm": 1.0848782062530518, |
|
"learning_rate": 1.8468756267041595e-05, |
|
"loss": 0.755, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 5.858343337334934, |
|
"grad_norm": 1.2409135103225708, |
|
"learning_rate": 1.828670331314058e-05, |
|
"loss": 0.6962, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 5.882352941176471, |
|
"grad_norm": 1.05846107006073, |
|
"learning_rate": 1.810503322529236e-05, |
|
"loss": 0.7443, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 5.906362545018007, |
|
"grad_norm": 1.0066033601760864, |
|
"learning_rate": 1.7923756364324492e-05, |
|
"loss": 0.6654, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 5.930372148859544, |
|
"grad_norm": 0.9819132685661316, |
|
"learning_rate": 1.7742883068638447e-05, |
|
"loss": 0.7271, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 5.954381752701081, |
|
"grad_norm": 1.143143892288208, |
|
"learning_rate": 1.756242365361993e-05, |
|
"loss": 0.7538, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 5.978391356542617, |
|
"grad_norm": 1.1252750158309937, |
|
"learning_rate": 1.7382388411050638e-05, |
|
"loss": 0.6795, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 6.002400960384153, |
|
"grad_norm": 1.3320536613464355, |
|
"learning_rate": 1.7202787608521278e-05, |
|
"loss": 0.6446, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 6.02641056422569, |
|
"grad_norm": 1.1098029613494873, |
|
"learning_rate": 1.7023631488846006e-05, |
|
"loss": 0.6994, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 6.050420168067227, |
|
"grad_norm": 1.1677411794662476, |
|
"learning_rate": 1.6844930269478274e-05, |
|
"loss": 0.6886, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 6.074429771908764, |
|
"grad_norm": 1.1150180101394653, |
|
"learning_rate": 1.6666694141928096e-05, |
|
"loss": 0.7485, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 6.0984393757503, |
|
"grad_norm": 1.2847915887832642, |
|
"learning_rate": 1.6488933271180845e-05, |
|
"loss": 0.6399, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 6.122448979591836, |
|
"grad_norm": 1.1303415298461914, |
|
"learning_rate": 1.631165779511754e-05, |
|
"loss": 0.7799, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 6.146458583433374, |
|
"grad_norm": 1.2571803331375122, |
|
"learning_rate": 1.613487782393661e-05, |
|
"loss": 0.7589, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 6.17046818727491, |
|
"grad_norm": 1.0048649311065674, |
|
"learning_rate": 1.595860343957738e-05, |
|
"loss": 0.6969, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 6.194477791116446, |
|
"grad_norm": 1.129724383354187, |
|
"learning_rate": 1.5782844695145033e-05, |
|
"loss": 0.7108, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 6.218487394957983, |
|
"grad_norm": 1.0512356758117676, |
|
"learning_rate": 1.5607611614337292e-05, |
|
"loss": 0.7478, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 6.24249699879952, |
|
"grad_norm": 1.0714658498764038, |
|
"learning_rate": 1.5432914190872757e-05, |
|
"loss": 0.7225, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 6.266506602641057, |
|
"grad_norm": 1.2147399187088013, |
|
"learning_rate": 1.5258762387920956e-05, |
|
"loss": 0.6964, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 6.290516206482593, |
|
"grad_norm": 1.1545813083648682, |
|
"learning_rate": 1.5085166137534123e-05, |
|
"loss": 0.7673, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 6.314525810324129, |
|
"grad_norm": 1.092599630355835, |
|
"learning_rate": 1.4912135340080774e-05, |
|
"loss": 0.7194, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 6.3385354141656665, |
|
"grad_norm": 1.2281053066253662, |
|
"learning_rate": 1.4739679863681086e-05, |
|
"loss": 0.6869, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 6.362545018007203, |
|
"grad_norm": 1.1293562650680542, |
|
"learning_rate": 1.4567809543644076e-05, |
|
"loss": 0.6851, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 6.38655462184874, |
|
"grad_norm": 1.459465742111206, |
|
"learning_rate": 1.4396534181906725e-05, |
|
"loss": 0.7242, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 6.410564225690276, |
|
"grad_norm": 1.203008770942688, |
|
"learning_rate": 1.4225863546474943e-05, |
|
"loss": 0.7155, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 6.4345738295318124, |
|
"grad_norm": 1.0592601299285889, |
|
"learning_rate": 1.4055807370866485e-05, |
|
"loss": 0.6706, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 6.45858343337335, |
|
"grad_norm": 1.1361570358276367, |
|
"learning_rate": 1.388637535355585e-05, |
|
"loss": 0.6937, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 6.482593037214886, |
|
"grad_norm": 1.135237455368042, |
|
"learning_rate": 1.3717577157421169e-05, |
|
"loss": 0.8316, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 6.506602641056423, |
|
"grad_norm": 1.0417910814285278, |
|
"learning_rate": 1.3549422409193083e-05, |
|
"loss": 0.7229, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 6.530612244897959, |
|
"grad_norm": 1.1558960676193237, |
|
"learning_rate": 1.3381920698905787e-05, |
|
"loss": 0.7025, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 6.554621848739496, |
|
"grad_norm": 1.0858848094940186, |
|
"learning_rate": 1.3215081579350058e-05, |
|
"loss": 0.742, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 6.578631452581033, |
|
"grad_norm": 1.2004518508911133, |
|
"learning_rate": 1.3048914565528454e-05, |
|
"loss": 0.6737, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 6.602641056422569, |
|
"grad_norm": 1.1666450500488281, |
|
"learning_rate": 1.2883429134112673e-05, |
|
"loss": 0.7577, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 6.626650660264105, |
|
"grad_norm": 1.1294101476669312, |
|
"learning_rate": 1.2718634722903073e-05, |
|
"loss": 0.6495, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 6.6506602641056425, |
|
"grad_norm": 1.2848787307739258, |
|
"learning_rate": 1.2554540730290437e-05, |
|
"loss": 0.7069, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 6.674669867947179, |
|
"grad_norm": 1.2171602249145508, |
|
"learning_rate": 1.2391156514719984e-05, |
|
"loss": 0.705, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 6.698679471788715, |
|
"grad_norm": 1.0168397426605225, |
|
"learning_rate": 1.222849139415764e-05, |
|
"loss": 0.671, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 6.722689075630252, |
|
"grad_norm": 1.2341060638427734, |
|
"learning_rate": 1.2066554645558578e-05, |
|
"loss": 0.7456, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.7466986794717885, |
|
"grad_norm": 1.174575686454773, |
|
"learning_rate": 1.1905355504338248e-05, |
|
"loss": 0.6899, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 6.770708283313326, |
|
"grad_norm": 1.125368595123291, |
|
"learning_rate": 1.1744903163845577e-05, |
|
"loss": 0.7024, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 6.794717887154862, |
|
"grad_norm": 1.157349705696106, |
|
"learning_rate": 1.1585206774838683e-05, |
|
"loss": 0.6571, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 6.818727490996398, |
|
"grad_norm": 1.1240679025650024, |
|
"learning_rate": 1.1426275444963034e-05, |
|
"loss": 0.729, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 6.842737094837935, |
|
"grad_norm": 1.2598992586135864, |
|
"learning_rate": 1.1268118238232003e-05, |
|
"loss": 0.7971, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 6.866746698679472, |
|
"grad_norm": 1.2101902961730957, |
|
"learning_rate": 1.1110744174509952e-05, |
|
"loss": 0.6376, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 6.890756302521009, |
|
"grad_norm": 1.2665272951126099, |
|
"learning_rate": 1.0954162228997777e-05, |
|
"loss": 0.6663, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 6.914765906362545, |
|
"grad_norm": 1.0040076971054077, |
|
"learning_rate": 1.0798381331721109e-05, |
|
"loss": 0.6898, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 6.938775510204081, |
|
"grad_norm": 1.180012583732605, |
|
"learning_rate": 1.0643410367020983e-05, |
|
"loss": 0.686, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 6.9627851140456185, |
|
"grad_norm": 1.1320979595184326, |
|
"learning_rate": 1.048925817304717e-05, |
|
"loss": 0.7803, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 6.986794717887155, |
|
"grad_norm": 1.141840934753418, |
|
"learning_rate": 1.0335933541254129e-05, |
|
"loss": 0.6809, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 7.010804321728691, |
|
"grad_norm": 1.0778816938400269, |
|
"learning_rate": 1.0183445215899584e-05, |
|
"loss": 0.6936, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 7.034813925570228, |
|
"grad_norm": 0.9368105530738831, |
|
"learning_rate": 1.0031801893545895e-05, |
|
"loss": 0.7005, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 7.0588235294117645, |
|
"grad_norm": 1.0788689851760864, |
|
"learning_rate": 9.881012222564065e-06, |
|
"loss": 0.8484, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 7.082833133253302, |
|
"grad_norm": 1.1011931896209717, |
|
"learning_rate": 9.731084802640459e-06, |
|
"loss": 0.7897, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 7.106842737094838, |
|
"grad_norm": 1.22800612449646, |
|
"learning_rate": 9.582028184286423e-06, |
|
"loss": 0.6696, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 7.130852340936374, |
|
"grad_norm": 1.224184274673462, |
|
"learning_rate": 9.43385086835062e-06, |
|
"loss": 0.6867, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 7.154861944777911, |
|
"grad_norm": 1.2134716510772705, |
|
"learning_rate": 9.286561305534203e-06, |
|
"loss": 0.7372, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 7.178871548619448, |
|
"grad_norm": 1.01565682888031, |
|
"learning_rate": 9.140167895908867e-06, |
|
"loss": 0.6718, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 7.202881152460985, |
|
"grad_norm": 1.176138162612915, |
|
"learning_rate": 8.994678988437802e-06, |
|
"loss": 0.6819, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.226890756302521, |
|
"grad_norm": 1.1396265029907227, |
|
"learning_rate": 8.850102880499531e-06, |
|
"loss": 0.7094, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 7.250900360144057, |
|
"grad_norm": 1.1229872703552246, |
|
"learning_rate": 8.706447817414696e-06, |
|
"loss": 0.7423, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 7.2749099639855945, |
|
"grad_norm": 1.0950995683670044, |
|
"learning_rate": 8.563721991975843e-06, |
|
"loss": 0.6957, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 7.298919567827131, |
|
"grad_norm": 1.322180986404419, |
|
"learning_rate": 8.421933543980126e-06, |
|
"loss": 0.6382, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 7.322929171668667, |
|
"grad_norm": 1.1946529150009155, |
|
"learning_rate": 8.281090559765156e-06, |
|
"loss": 0.6939, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 7.346938775510204, |
|
"grad_norm": 1.3446452617645264, |
|
"learning_rate": 8.141201071747784e-06, |
|
"loss": 0.7682, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 7.3709483793517405, |
|
"grad_norm": 1.063050627708435, |
|
"learning_rate": 8.002273057966011e-06, |
|
"loss": 0.6319, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 7.394957983193278, |
|
"grad_norm": 1.0960049629211426, |
|
"learning_rate": 7.864314441624004e-06, |
|
"loss": 0.7331, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 7.418967587034814, |
|
"grad_norm": 1.153906226158142, |
|
"learning_rate": 7.727333090640218e-06, |
|
"loss": 0.7249, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 7.44297719087635, |
|
"grad_norm": 1.1610156297683716, |
|
"learning_rate": 7.591336817198682e-06, |
|
"loss": 0.6484, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 7.466986794717887, |
|
"grad_norm": 1.1550188064575195, |
|
"learning_rate": 7.456333377303457e-06, |
|
"loss": 0.7723, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 7.490996398559424, |
|
"grad_norm": 1.2671056985855103, |
|
"learning_rate": 7.3223304703363135e-06, |
|
"loss": 0.7266, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 7.515006002400961, |
|
"grad_norm": 1.234147548675537, |
|
"learning_rate": 7.189335738617633e-06, |
|
"loss": 0.6927, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 7.539015606242497, |
|
"grad_norm": 1.1650274991989136, |
|
"learning_rate": 7.057356766970541e-06, |
|
"loss": 0.7219, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 7.563025210084033, |
|
"grad_norm": 1.2067919969558716, |
|
"learning_rate": 6.926401082288359e-06, |
|
"loss": 0.6933, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 7.5870348139255706, |
|
"grad_norm": 1.1489832401275635, |
|
"learning_rate": 6.796476153105294e-06, |
|
"loss": 0.7651, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 7.611044417767107, |
|
"grad_norm": 1.24705970287323, |
|
"learning_rate": 6.667589389170562e-06, |
|
"loss": 0.6558, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 7.635054021608643, |
|
"grad_norm": 1.0954616069793701, |
|
"learning_rate": 6.5397481410257645e-06, |
|
"loss": 0.6601, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 7.65906362545018, |
|
"grad_norm": 1.272865653038025, |
|
"learning_rate": 6.41295969958568e-06, |
|
"loss": 0.6938, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 7.6830732292917165, |
|
"grad_norm": 1.2320659160614014, |
|
"learning_rate": 6.28723129572247e-06, |
|
"loss": 0.7301, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 7.707082833133253, |
|
"grad_norm": 1.2697712182998657, |
|
"learning_rate": 6.16257009985329e-06, |
|
"loss": 0.6848, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 7.73109243697479, |
|
"grad_norm": 1.1409835815429688, |
|
"learning_rate": 6.038983221531352e-06, |
|
"loss": 0.6372, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 7.755102040816326, |
|
"grad_norm": 1.1304911375045776, |
|
"learning_rate": 5.916477709040444e-06, |
|
"loss": 0.6736, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 7.779111644657863, |
|
"grad_norm": 1.1968624591827393, |
|
"learning_rate": 5.79506054899299e-06, |
|
"loss": 0.6915, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 7.8031212484994, |
|
"grad_norm": 1.0843942165374756, |
|
"learning_rate": 5.674738665931575e-06, |
|
"loss": 0.696, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 7.827130852340936, |
|
"grad_norm": 1.0682406425476074, |
|
"learning_rate": 5.555518921934047e-06, |
|
"loss": 0.7064, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 7.851140456182473, |
|
"grad_norm": 1.13326096534729, |
|
"learning_rate": 5.437408116222148e-06, |
|
"loss": 0.7497, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 7.875150060024009, |
|
"grad_norm": 1.1618019342422485, |
|
"learning_rate": 5.320412984773748e-06, |
|
"loss": 0.7025, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 7.899159663865547, |
|
"grad_norm": 1.2662303447723389, |
|
"learning_rate": 5.204540199938707e-06, |
|
"loss": 0.6823, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 7.923169267707083, |
|
"grad_norm": 1.1508044004440308, |
|
"learning_rate": 5.089796370058325e-06, |
|
"loss": 0.8013, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 7.947178871548619, |
|
"grad_norm": 1.1236162185668945, |
|
"learning_rate": 4.9761880390884694e-06, |
|
"loss": 0.7497, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 7.971188475390156, |
|
"grad_norm": 1.239221453666687, |
|
"learning_rate": 4.86372168622635e-06, |
|
"loss": 0.7817, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 7.995198079231693, |
|
"grad_norm": 1.1400153636932373, |
|
"learning_rate": 4.7524037255410434e-06, |
|
"loss": 0.615, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 8.01920768307323, |
|
"grad_norm": 1.1075925827026367, |
|
"learning_rate": 4.642240505607659e-06, |
|
"loss": 0.7037, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 8.043217286914766, |
|
"grad_norm": 1.244471788406372, |
|
"learning_rate": 4.533238309145258e-06, |
|
"loss": 0.6747, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 8.067226890756302, |
|
"grad_norm": 1.114261269569397, |
|
"learning_rate": 4.425403352658591e-06, |
|
"loss": 0.7154, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 8.091236494597839, |
|
"grad_norm": 1.2810643911361694, |
|
"learning_rate": 4.318741786083538e-06, |
|
"loss": 0.7838, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 8.115246098439377, |
|
"grad_norm": 1.1281261444091797, |
|
"learning_rate": 4.213259692436367e-06, |
|
"loss": 0.7525, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 8.139255702280913, |
|
"grad_norm": 1.111745834350586, |
|
"learning_rate": 4.1089630874668325e-06, |
|
"loss": 0.6624, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 8.16326530612245, |
|
"grad_norm": 0.9732471108436584, |
|
"learning_rate": 4.0058579193150535e-06, |
|
"loss": 0.6821, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 8.187274909963985, |
|
"grad_norm": 1.1068557500839233, |
|
"learning_rate": 3.903950068172338e-06, |
|
"loss": 0.6932, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 8.211284513805522, |
|
"grad_norm": 1.1466325521469116, |
|
"learning_rate": 3.8032453459457884e-06, |
|
"loss": 0.7308, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 8.235294117647058, |
|
"grad_norm": 1.076535940170288, |
|
"learning_rate": 3.7037494959268644e-06, |
|
"loss": 0.7136, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 8.259303721488596, |
|
"grad_norm": 1.234830617904663, |
|
"learning_rate": 3.605468192463815e-06, |
|
"loss": 0.6595, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 8.283313325330132, |
|
"grad_norm": 1.2779568433761597, |
|
"learning_rate": 3.5084070406380897e-06, |
|
"loss": 0.7671, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 8.307322929171669, |
|
"grad_norm": 1.0826342105865479, |
|
"learning_rate": 3.4125715759446785e-06, |
|
"loss": 0.6561, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 8.331332533013205, |
|
"grad_norm": 1.292898416519165, |
|
"learning_rate": 3.317967263976374e-06, |
|
"loss": 0.6876, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 8.355342136854741, |
|
"grad_norm": 1.2453337907791138, |
|
"learning_rate": 3.2245995001121106e-06, |
|
"loss": 0.6819, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 8.37935174069628, |
|
"grad_norm": 1.109753131866455, |
|
"learning_rate": 3.1324736092092412e-06, |
|
"loss": 0.7044, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 8.403361344537815, |
|
"grad_norm": 1.1907482147216797, |
|
"learning_rate": 3.0415948452998557e-06, |
|
"loss": 0.6317, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 8.427370948379352, |
|
"grad_norm": 1.036994218826294, |
|
"learning_rate": 2.9519683912911266e-06, |
|
"loss": 0.6521, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 8.451380552220888, |
|
"grad_norm": 1.1481200456619263, |
|
"learning_rate": 2.8635993586697553e-06, |
|
"loss": 0.7572, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 8.475390156062424, |
|
"grad_norm": 1.2365052700042725, |
|
"learning_rate": 2.776492787210425e-06, |
|
"loss": 0.7184, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 8.499399759903962, |
|
"grad_norm": 1.1394840478897095, |
|
"learning_rate": 2.690653644688393e-06, |
|
"loss": 0.6881, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 8.523409363745499, |
|
"grad_norm": 1.2600808143615723, |
|
"learning_rate": 2.6060868265961822e-06, |
|
"loss": 0.6575, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 8.547418967587035, |
|
"grad_norm": 1.09065842628479, |
|
"learning_rate": 2.5227971558643537e-06, |
|
"loss": 0.7457, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 8.571428571428571, |
|
"grad_norm": 1.245969533920288, |
|
"learning_rate": 2.4407893825864892e-06, |
|
"loss": 0.7127, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 8.595438175270107, |
|
"grad_norm": 1.3175572156906128, |
|
"learning_rate": 2.360068183748268e-06, |
|
"loss": 0.7442, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 8.619447779111646, |
|
"grad_norm": 1.220615267753601, |
|
"learning_rate": 2.2806381629607327e-06, |
|
"loss": 0.6743, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 8.643457382953182, |
|
"grad_norm": 1.1195836067199707, |
|
"learning_rate": 2.2025038501977486e-06, |
|
"loss": 0.7338, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 8.667466986794718, |
|
"grad_norm": 1.075049638748169, |
|
"learning_rate": 2.125669701537647e-06, |
|
"loss": 0.7514, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 8.691476590636254, |
|
"grad_norm": 1.1793599128723145, |
|
"learning_rate": 2.0501400989091036e-06, |
|
"loss": 0.7774, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 8.71548619447779, |
|
"grad_norm": 1.165880560874939, |
|
"learning_rate": 1.97591934984121e-06, |
|
"loss": 0.6895, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 8.739495798319329, |
|
"grad_norm": 1.101431131362915, |
|
"learning_rate": 1.9030116872178316e-06, |
|
"loss": 0.7218, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 8.763505402160865, |
|
"grad_norm": 1.2261079549789429, |
|
"learning_rate": 1.8314212690361987e-06, |
|
"loss": 0.7553, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 8.787515006002401, |
|
"grad_norm": 1.1773276329040527, |
|
"learning_rate": 1.7611521781697644e-06, |
|
"loss": 0.6779, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 8.811524609843937, |
|
"grad_norm": 1.2577153444290161, |
|
"learning_rate": 1.6922084221353607e-06, |
|
"loss": 0.6946, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 8.835534213685474, |
|
"grad_norm": 1.1246775388717651, |
|
"learning_rate": 1.624593932864632e-06, |
|
"loss": 0.7149, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 8.85954381752701, |
|
"grad_norm": 1.1089775562286377, |
|
"learning_rate": 1.5583125664798165e-06, |
|
"loss": 0.6681, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 8.883553421368548, |
|
"grad_norm": 1.138650894165039, |
|
"learning_rate": 1.4933681030738138e-06, |
|
"loss": 0.6519, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 8.907563025210084, |
|
"grad_norm": 1.1409391164779663, |
|
"learning_rate": 1.429764246494597e-06, |
|
"loss": 0.6957, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 8.93157262905162, |
|
"grad_norm": 1.2397470474243164, |
|
"learning_rate": 1.3675046241339918e-06, |
|
"loss": 0.6942, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 8.955582232893157, |
|
"grad_norm": 1.310257077217102, |
|
"learning_rate": 1.306592786720795e-06, |
|
"loss": 0.6578, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 8.979591836734693, |
|
"grad_norm": 1.057267189025879, |
|
"learning_rate": 1.2470322081182761e-06, |
|
"loss": 0.6994, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 9.003601440576231, |
|
"grad_norm": 1.2477996349334717, |
|
"learning_rate": 1.1888262851260462e-06, |
|
"loss": 0.643, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 9.027611044417768, |
|
"grad_norm": 1.2775191068649292, |
|
"learning_rate": 1.1319783372863602e-06, |
|
"loss": 0.6307, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 9.051620648259304, |
|
"grad_norm": 1.1136633157730103, |
|
"learning_rate": 1.0764916066947794e-06, |
|
"loss": 0.7088, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 9.07563025210084, |
|
"grad_norm": 1.2463817596435547, |
|
"learning_rate": 1.0223692578152782e-06, |
|
"loss": 0.6603, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 9.099639855942376, |
|
"grad_norm": 1.140369176864624, |
|
"learning_rate": 9.696143772997768e-07, |
|
"loss": 0.8255, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 9.123649459783914, |
|
"grad_norm": 1.0478204488754272, |
|
"learning_rate": 9.182299738120931e-07, |
|
"loss": 0.6834, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 9.14765906362545, |
|
"grad_norm": 1.243027925491333, |
|
"learning_rate": 8.682189778563693e-07, |
|
"loss": 0.6365, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 9.171668667466987, |
|
"grad_norm": 1.1335963010787964, |
|
"learning_rate": 8.195842416099359e-07, |
|
"loss": 0.7317, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 9.195678271308523, |
|
"grad_norm": 1.2012873888015747, |
|
"learning_rate": 7.723285387606471e-07, |
|
"loss": 0.741, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 9.21968787515006, |
|
"grad_norm": 1.098880648612976, |
|
"learning_rate": 7.264545643486997e-07, |
|
"loss": 0.6881, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 9.243697478991596, |
|
"grad_norm": 1.138432502746582, |
|
"learning_rate": 6.819649346129304e-07, |
|
"loss": 0.735, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 9.267707082833134, |
|
"grad_norm": 1.163900375366211, |
|
"learning_rate": 6.3886218684161e-07, |
|
"loss": 0.7227, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 9.29171668667467, |
|
"grad_norm": 1.163901448249817, |
|
"learning_rate": 5.971487792277297e-07, |
|
"loss": 0.681, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 9.315726290516206, |
|
"grad_norm": 1.3683992624282837, |
|
"learning_rate": 5.568270907288287e-07, |
|
"loss": 0.7081, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 9.339735894357743, |
|
"grad_norm": 1.2360917329788208, |
|
"learning_rate": 5.178994209312948e-07, |
|
"loss": 0.7064, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 9.363745498199279, |
|
"grad_norm": 1.0632774829864502, |
|
"learning_rate": 4.803679899192392e-07, |
|
"loss": 0.6829, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 9.387755102040817, |
|
"grad_norm": 1.200908899307251, |
|
"learning_rate": 4.4423493814786667e-07, |
|
"loss": 0.6814, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 9.411764705882353, |
|
"grad_norm": 1.2517386674880981, |
|
"learning_rate": 4.095023263214121e-07, |
|
"loss": 0.769, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 9.43577430972389, |
|
"grad_norm": 1.0798803567886353, |
|
"learning_rate": 3.761721352756098e-07, |
|
"loss": 0.7539, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 9.459783913565426, |
|
"grad_norm": 0.9635175466537476, |
|
"learning_rate": 3.4424626586473385e-07, |
|
"loss": 0.6506, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 9.483793517406962, |
|
"grad_norm": 1.1478654146194458, |
|
"learning_rate": 3.1372653885318736e-07, |
|
"loss": 0.6763, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 9.5078031212485, |
|
"grad_norm": 1.2677192687988281, |
|
"learning_rate": 2.846146948116468e-07, |
|
"loss": 0.6776, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 9.531812725090036, |
|
"grad_norm": 1.1099594831466675, |
|
"learning_rate": 2.569123940178192e-07, |
|
"loss": 0.7292, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 9.555822328931573, |
|
"grad_norm": 1.1103848218917847, |
|
"learning_rate": 2.3062121636174826e-07, |
|
"loss": 0.8111, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 9.579831932773109, |
|
"grad_norm": 1.1750984191894531, |
|
"learning_rate": 2.0574266125569509e-07, |
|
"loss": 0.6473, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 9.603841536614645, |
|
"grad_norm": 1.0524730682373047, |
|
"learning_rate": 1.8227814754865068e-07, |
|
"loss": 0.6774, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.627851140456183, |
|
"grad_norm": 1.1044491529464722, |
|
"learning_rate": 1.6022901344539543e-07, |
|
"loss": 0.6782, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 9.65186074429772, |
|
"grad_norm": 1.2015644311904907, |
|
"learning_rate": 1.39596516430196e-07, |
|
"loss": 0.6965, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 9.675870348139256, |
|
"grad_norm": 1.066968560218811, |
|
"learning_rate": 1.2038183319507955e-07, |
|
"loss": 0.6496, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 9.699879951980792, |
|
"grad_norm": 1.1246190071105957, |
|
"learning_rate": 1.0258605957272627e-07, |
|
"loss": 0.6511, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 9.723889555822328, |
|
"grad_norm": 1.017182469367981, |
|
"learning_rate": 8.621021047398314e-08, |
|
"loss": 0.7524, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 9.747899159663866, |
|
"grad_norm": 1.2211177349090576, |
|
"learning_rate": 7.125521982997152e-08, |
|
"loss": 0.6653, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 9.771908763505403, |
|
"grad_norm": 1.2134476900100708, |
|
"learning_rate": 5.772194053882962e-08, |
|
"loss": 0.6718, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 9.795918367346939, |
|
"grad_norm": 1.3641300201416016, |
|
"learning_rate": 4.56111444170626e-08, |
|
"loss": 0.7114, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 9.819927971188475, |
|
"grad_norm": 1.1759401559829712, |
|
"learning_rate": 3.4923522155544394e-08, |
|
"loss": 0.7125, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 9.843937575030012, |
|
"grad_norm": 1.0511598587036133, |
|
"learning_rate": 2.5659683280102044e-08, |
|
"loss": 0.6236, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 9.867947178871548, |
|
"grad_norm": 1.1891964673995972, |
|
"learning_rate": 1.782015611677401e-08, |
|
"loss": 0.7353, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 9.891956782713086, |
|
"grad_norm": 1.116445541381836, |
|
"learning_rate": 1.1405387761664887e-08, |
|
"loss": 0.7489, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 9.915966386554622, |
|
"grad_norm": 1.1506844758987427, |
|
"learning_rate": 6.415744055460193e-09, |
|
"loss": 0.7743, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 9.939975990396158, |
|
"grad_norm": 1.2120825052261353, |
|
"learning_rate": 2.8515095625514244e-09, |
|
"loss": 0.6435, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 9.963985594237695, |
|
"grad_norm": 1.1971231698989868, |
|
"learning_rate": 7.128875548101377e-10, |
|
"loss": 0.7375, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 9.987995198079231, |
|
"grad_norm": 1.2055232524871826, |
|
"learning_rate": 0.0, |
|
"loss": 0.7238, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 9.987995198079231, |
|
"step": 2080, |
|
"total_flos": 8.185270082173747e+17, |
|
"train_loss": 0.7521515275423344, |
|
"train_runtime": 19400.1091, |
|
"train_samples_per_second": 1.717, |
|
"train_steps_per_second": 0.107 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 2080, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 100, |
|
"total_flos": 8.185270082173747e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|