|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 6185, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00016168148746968473, |
|
"grad_norm": 3.2100687490225734, |
|
"learning_rate": 1.6155088852988694e-08, |
|
"loss": 1.6934, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0008084074373484236, |
|
"grad_norm": 3.4649028983783934, |
|
"learning_rate": 8.077544426494346e-08, |
|
"loss": 1.7213, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0016168148746968471, |
|
"grad_norm": 3.221507146420423, |
|
"learning_rate": 1.6155088852988693e-07, |
|
"loss": 1.6956, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.002425222312045271, |
|
"grad_norm": 3.3319921152603014, |
|
"learning_rate": 2.4232633279483037e-07, |
|
"loss": 1.7147, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0032336297493936943, |
|
"grad_norm": 3.2098440129054313, |
|
"learning_rate": 3.2310177705977386e-07, |
|
"loss": 1.703, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.004042037186742118, |
|
"grad_norm": 3.3447921032483268, |
|
"learning_rate": 4.038772213247173e-07, |
|
"loss": 1.7119, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.004850444624090542, |
|
"grad_norm": 3.142745813218036, |
|
"learning_rate": 4.846526655896607e-07, |
|
"loss": 1.7269, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.005658852061438965, |
|
"grad_norm": 3.150831163401336, |
|
"learning_rate": 5.654281098546043e-07, |
|
"loss": 1.7258, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0064672594987873885, |
|
"grad_norm": 2.8932615743223953, |
|
"learning_rate": 6.462035541195477e-07, |
|
"loss": 1.6689, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.007275666936135812, |
|
"grad_norm": 2.8110189620165857, |
|
"learning_rate": 7.269789983844912e-07, |
|
"loss": 1.6853, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.008084074373484237, |
|
"grad_norm": 2.7038882979842787, |
|
"learning_rate": 8.077544426494346e-07, |
|
"loss": 1.6557, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.00889248181083266, |
|
"grad_norm": 2.483453858975139, |
|
"learning_rate": 8.885298869143781e-07, |
|
"loss": 1.6717, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.009700889248181084, |
|
"grad_norm": 2.52510825170289, |
|
"learning_rate": 9.693053311793215e-07, |
|
"loss": 1.6649, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.010509296685529508, |
|
"grad_norm": 2.3691095179729538, |
|
"learning_rate": 1.0500807754442651e-06, |
|
"loss": 1.6265, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.01131770412287793, |
|
"grad_norm": 1.9488878151454319, |
|
"learning_rate": 1.1308562197092086e-06, |
|
"loss": 1.6181, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.012126111560226353, |
|
"grad_norm": 2.2451551696944954, |
|
"learning_rate": 1.211631663974152e-06, |
|
"loss": 1.5957, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.012934518997574777, |
|
"grad_norm": 1.8628223064786595, |
|
"learning_rate": 1.2924071082390954e-06, |
|
"loss": 1.5846, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0137429264349232, |
|
"grad_norm": 1.718641914773257, |
|
"learning_rate": 1.3731825525040387e-06, |
|
"loss": 1.5563, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.014551333872271624, |
|
"grad_norm": 1.5468505378881632, |
|
"learning_rate": 1.4539579967689823e-06, |
|
"loss": 1.5123, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.015359741309620048, |
|
"grad_norm": 1.3814602687012587, |
|
"learning_rate": 1.5347334410339258e-06, |
|
"loss": 1.5073, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.016168148746968473, |
|
"grad_norm": 1.3287767546309797, |
|
"learning_rate": 1.6155088852988692e-06, |
|
"loss": 1.4911, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.016976556184316895, |
|
"grad_norm": 1.2392531890824057, |
|
"learning_rate": 1.6962843295638126e-06, |
|
"loss": 1.466, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.01778496362166532, |
|
"grad_norm": 1.1786674587956345, |
|
"learning_rate": 1.7770597738287563e-06, |
|
"loss": 1.4641, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.018593371059013743, |
|
"grad_norm": 1.131646019707904, |
|
"learning_rate": 1.8578352180936995e-06, |
|
"loss": 1.4205, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.019401778496362168, |
|
"grad_norm": 1.1355478936146342, |
|
"learning_rate": 1.938610662358643e-06, |
|
"loss": 1.4253, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.02021018593371059, |
|
"grad_norm": 1.0935126326356333, |
|
"learning_rate": 2.0193861066235864e-06, |
|
"loss": 1.4107, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.021018593371059015, |
|
"grad_norm": 1.0807157318374936, |
|
"learning_rate": 2.1001615508885302e-06, |
|
"loss": 1.3773, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.021827000808407437, |
|
"grad_norm": 1.0813532465125704, |
|
"learning_rate": 2.1809369951534733e-06, |
|
"loss": 1.3614, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.02263540824575586, |
|
"grad_norm": 1.0721664075597053, |
|
"learning_rate": 2.261712439418417e-06, |
|
"loss": 1.3454, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.023443815683104285, |
|
"grad_norm": 1.1706087580447129, |
|
"learning_rate": 2.34248788368336e-06, |
|
"loss": 1.3002, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.024252223120452707, |
|
"grad_norm": 1.2130738107133745, |
|
"learning_rate": 2.423263327948304e-06, |
|
"loss": 1.256, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.025060630557801132, |
|
"grad_norm": 1.349088393672931, |
|
"learning_rate": 2.5040387722132474e-06, |
|
"loss": 1.2689, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.025869037995149554, |
|
"grad_norm": 1.507002519970385, |
|
"learning_rate": 2.584814216478191e-06, |
|
"loss": 1.1915, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.02667744543249798, |
|
"grad_norm": 1.8229173794208704, |
|
"learning_rate": 2.6655896607431343e-06, |
|
"loss": 1.1551, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.0274858528698464, |
|
"grad_norm": 1.7143482970110884, |
|
"learning_rate": 2.7463651050080773e-06, |
|
"loss": 1.111, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.028294260307194827, |
|
"grad_norm": 1.6459358408927556, |
|
"learning_rate": 2.827140549273021e-06, |
|
"loss": 1.0532, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.02910266774454325, |
|
"grad_norm": 1.4413642537677436, |
|
"learning_rate": 2.9079159935379646e-06, |
|
"loss": 1.0274, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.029911075181891674, |
|
"grad_norm": 1.1419491679189164, |
|
"learning_rate": 2.988691437802908e-06, |
|
"loss": 1.0013, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.030719482619240096, |
|
"grad_norm": 0.9309353953036712, |
|
"learning_rate": 3.0694668820678515e-06, |
|
"loss": 0.9753, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.03152789005658852, |
|
"grad_norm": 0.8025838589275166, |
|
"learning_rate": 3.1502423263327954e-06, |
|
"loss": 0.9696, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.03233629749393695, |
|
"grad_norm": 0.7733858683355683, |
|
"learning_rate": 3.2310177705977384e-06, |
|
"loss": 0.9494, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.033144704931285365, |
|
"grad_norm": 0.7464642252029693, |
|
"learning_rate": 3.311793214862682e-06, |
|
"loss": 0.9462, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.03395311236863379, |
|
"grad_norm": 0.7055514389315549, |
|
"learning_rate": 3.3925686591276253e-06, |
|
"loss": 0.927, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.034761519805982216, |
|
"grad_norm": 0.7104304189422785, |
|
"learning_rate": 3.473344103392569e-06, |
|
"loss": 0.924, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.03556992724333064, |
|
"grad_norm": 0.6574144588785804, |
|
"learning_rate": 3.5541195476575126e-06, |
|
"loss": 0.9327, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.03637833468067906, |
|
"grad_norm": 0.7173567285454316, |
|
"learning_rate": 3.6348949919224556e-06, |
|
"loss": 0.9049, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.037186742118027485, |
|
"grad_norm": 0.6476430881143054, |
|
"learning_rate": 3.715670436187399e-06, |
|
"loss": 0.8999, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.03799514955537591, |
|
"grad_norm": 0.6762537582692184, |
|
"learning_rate": 3.796445880452343e-06, |
|
"loss": 0.8911, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.038803556992724336, |
|
"grad_norm": 0.5916230517794187, |
|
"learning_rate": 3.877221324717286e-06, |
|
"loss": 0.9092, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.039611964430072755, |
|
"grad_norm": 0.6767171855576188, |
|
"learning_rate": 3.95799676898223e-06, |
|
"loss": 0.9173, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.04042037186742118, |
|
"grad_norm": 0.6057387003801464, |
|
"learning_rate": 4.038772213247173e-06, |
|
"loss": 0.9092, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.041228779304769606, |
|
"grad_norm": 0.6860012772175927, |
|
"learning_rate": 4.119547657512117e-06, |
|
"loss": 0.9265, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.04203718674211803, |
|
"grad_norm": 0.6193353966875188, |
|
"learning_rate": 4.2003231017770605e-06, |
|
"loss": 0.9054, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.04284559417946645, |
|
"grad_norm": 0.7553774828322988, |
|
"learning_rate": 4.2810985460420035e-06, |
|
"loss": 0.9065, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.043654001616814875, |
|
"grad_norm": 0.6613818499045624, |
|
"learning_rate": 4.3618739903069465e-06, |
|
"loss": 0.918, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.0444624090541633, |
|
"grad_norm": 0.6620831207682815, |
|
"learning_rate": 4.44264943457189e-06, |
|
"loss": 0.9037, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.04527081649151172, |
|
"grad_norm": 0.7222236926277061, |
|
"learning_rate": 4.523424878836834e-06, |
|
"loss": 0.8979, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.046079223928860144, |
|
"grad_norm": 0.6242092234384768, |
|
"learning_rate": 4.604200323101777e-06, |
|
"loss": 0.8859, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.04688763136620857, |
|
"grad_norm": 0.6308938354658095, |
|
"learning_rate": 4.68497576736672e-06, |
|
"loss": 0.8841, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.047696038803556995, |
|
"grad_norm": 0.60859508951431, |
|
"learning_rate": 4.765751211631664e-06, |
|
"loss": 0.8838, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.04850444624090541, |
|
"grad_norm": 0.6652854237730623, |
|
"learning_rate": 4.846526655896608e-06, |
|
"loss": 0.9062, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.04931285367825384, |
|
"grad_norm": 0.642163853982114, |
|
"learning_rate": 4.927302100161551e-06, |
|
"loss": 0.9076, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.050121261115602264, |
|
"grad_norm": 0.6957211657326403, |
|
"learning_rate": 5.008077544426495e-06, |
|
"loss": 0.9021, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.05092966855295069, |
|
"grad_norm": 0.6995520981355653, |
|
"learning_rate": 5.088852988691439e-06, |
|
"loss": 0.8737, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.05173807599029911, |
|
"grad_norm": 0.7059034170974082, |
|
"learning_rate": 5.169628432956382e-06, |
|
"loss": 0.8858, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.05254648342764753, |
|
"grad_norm": 0.835327047755355, |
|
"learning_rate": 5.250403877221325e-06, |
|
"loss": 0.8537, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.05335489086499596, |
|
"grad_norm": 0.6934448269317276, |
|
"learning_rate": 5.331179321486269e-06, |
|
"loss": 0.8777, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.054163298302344384, |
|
"grad_norm": 0.6740382005002135, |
|
"learning_rate": 5.411954765751212e-06, |
|
"loss": 0.8776, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.0549717057396928, |
|
"grad_norm": 0.6721017017683236, |
|
"learning_rate": 5.492730210016155e-06, |
|
"loss": 0.8596, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.05578011317704123, |
|
"grad_norm": 0.6684833482788531, |
|
"learning_rate": 5.573505654281099e-06, |
|
"loss": 0.8802, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.056588520614389654, |
|
"grad_norm": 0.6830751371635061, |
|
"learning_rate": 5.654281098546042e-06, |
|
"loss": 0.8847, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.05739692805173808, |
|
"grad_norm": 0.6376927762203839, |
|
"learning_rate": 5.735056542810986e-06, |
|
"loss": 0.8952, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.0582053354890865, |
|
"grad_norm": 0.7491586670802004, |
|
"learning_rate": 5.815831987075929e-06, |
|
"loss": 0.8696, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.05901374292643492, |
|
"grad_norm": 0.7899182711032194, |
|
"learning_rate": 5.896607431340873e-06, |
|
"loss": 0.8708, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.05982215036378335, |
|
"grad_norm": 0.6729809655437429, |
|
"learning_rate": 5.977382875605816e-06, |
|
"loss": 0.8788, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.060630557801131774, |
|
"grad_norm": 0.7074821523168202, |
|
"learning_rate": 6.058158319870759e-06, |
|
"loss": 0.8774, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.06143896523848019, |
|
"grad_norm": 0.7531217874613877, |
|
"learning_rate": 6.138933764135703e-06, |
|
"loss": 0.861, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.06224737267582862, |
|
"grad_norm": 0.700415167204527, |
|
"learning_rate": 6.219709208400647e-06, |
|
"loss": 0.8564, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.06305578011317704, |
|
"grad_norm": 0.6636924053582388, |
|
"learning_rate": 6.300484652665591e-06, |
|
"loss": 0.8771, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.06386418755052546, |
|
"grad_norm": 0.66699758864019, |
|
"learning_rate": 6.381260096930534e-06, |
|
"loss": 0.8562, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.0646725949878739, |
|
"grad_norm": 0.6787528779482374, |
|
"learning_rate": 6.462035541195477e-06, |
|
"loss": 0.849, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.06548100242522231, |
|
"grad_norm": 0.728449788329189, |
|
"learning_rate": 6.542810985460421e-06, |
|
"loss": 0.8844, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.06628940986257073, |
|
"grad_norm": 0.6910443567228122, |
|
"learning_rate": 6.623586429725364e-06, |
|
"loss": 0.8627, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.06709781729991916, |
|
"grad_norm": 0.7043536672068673, |
|
"learning_rate": 6.7043618739903075e-06, |
|
"loss": 0.877, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.06790622473726758, |
|
"grad_norm": 0.6952950226557627, |
|
"learning_rate": 6.7851373182552505e-06, |
|
"loss": 0.8593, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.068714632174616, |
|
"grad_norm": 0.7416956200244156, |
|
"learning_rate": 6.865912762520195e-06, |
|
"loss": 0.8784, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.06952303961196443, |
|
"grad_norm": 0.6558940843547532, |
|
"learning_rate": 6.946688206785138e-06, |
|
"loss": 0.8449, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.07033144704931285, |
|
"grad_norm": 0.7636013144707001, |
|
"learning_rate": 7.027463651050081e-06, |
|
"loss": 0.8625, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.07113985448666128, |
|
"grad_norm": 0.7003307897432925, |
|
"learning_rate": 7.108239095315025e-06, |
|
"loss": 0.8516, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.0719482619240097, |
|
"grad_norm": 0.8469481736942237, |
|
"learning_rate": 7.189014539579968e-06, |
|
"loss": 0.8485, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.07275666936135812, |
|
"grad_norm": 0.8403326547267631, |
|
"learning_rate": 7.269789983844911e-06, |
|
"loss": 0.8505, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.07356507679870655, |
|
"grad_norm": 0.7357863965541986, |
|
"learning_rate": 7.350565428109855e-06, |
|
"loss": 0.8391, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.07437348423605497, |
|
"grad_norm": 0.7683662988715164, |
|
"learning_rate": 7.431340872374798e-06, |
|
"loss": 0.8644, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.07518189167340339, |
|
"grad_norm": 0.7352096943254265, |
|
"learning_rate": 7.512116316639743e-06, |
|
"loss": 0.8758, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.07599029911075182, |
|
"grad_norm": 0.793107329910895, |
|
"learning_rate": 7.592891760904686e-06, |
|
"loss": 0.8548, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.07679870654810024, |
|
"grad_norm": 0.7644006635626036, |
|
"learning_rate": 7.673667205169629e-06, |
|
"loss": 0.8569, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.07760711398544867, |
|
"grad_norm": 0.7654747646539816, |
|
"learning_rate": 7.754442649434572e-06, |
|
"loss": 0.8513, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.07841552142279709, |
|
"grad_norm": 0.7186596171490216, |
|
"learning_rate": 7.835218093699516e-06, |
|
"loss": 0.8694, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.07922392886014551, |
|
"grad_norm": 0.7515718660820482, |
|
"learning_rate": 7.91599353796446e-06, |
|
"loss": 0.8497, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.08003233629749394, |
|
"grad_norm": 0.7475774619661114, |
|
"learning_rate": 7.996768982229403e-06, |
|
"loss": 0.8577, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.08084074373484236, |
|
"grad_norm": 0.742290022489521, |
|
"learning_rate": 8.077544426494346e-06, |
|
"loss": 0.8402, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08164915117219078, |
|
"grad_norm": 0.8472564346824212, |
|
"learning_rate": 8.15831987075929e-06, |
|
"loss": 0.8383, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.08245755860953921, |
|
"grad_norm": 0.7770969163651785, |
|
"learning_rate": 8.239095315024233e-06, |
|
"loss": 0.8622, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.08326596604688763, |
|
"grad_norm": 0.8440539457306321, |
|
"learning_rate": 8.319870759289176e-06, |
|
"loss": 0.8633, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.08407437348423606, |
|
"grad_norm": 0.8914813539424271, |
|
"learning_rate": 8.400646203554121e-06, |
|
"loss": 0.8595, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.08488278092158448, |
|
"grad_norm": 0.800403073002308, |
|
"learning_rate": 8.481421647819064e-06, |
|
"loss": 0.848, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.0856911883589329, |
|
"grad_norm": 0.9509086588684886, |
|
"learning_rate": 8.562197092084007e-06, |
|
"loss": 0.8474, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.08649959579628133, |
|
"grad_norm": 0.8140338179022737, |
|
"learning_rate": 8.64297253634895e-06, |
|
"loss": 0.8373, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.08730800323362975, |
|
"grad_norm": 0.7006852263989337, |
|
"learning_rate": 8.723747980613893e-06, |
|
"loss": 0.8482, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.08811641067097817, |
|
"grad_norm": 0.7785561327612173, |
|
"learning_rate": 8.804523424878838e-06, |
|
"loss": 0.8305, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.0889248181083266, |
|
"grad_norm": 0.7986486796854503, |
|
"learning_rate": 8.88529886914378e-06, |
|
"loss": 0.8648, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.08973322554567502, |
|
"grad_norm": 0.7985244858238523, |
|
"learning_rate": 8.966074313408725e-06, |
|
"loss": 0.8559, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.09054163298302344, |
|
"grad_norm": 0.7908671911880187, |
|
"learning_rate": 9.046849757673668e-06, |
|
"loss": 0.8352, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.09135004042037187, |
|
"grad_norm": 0.7783182513226582, |
|
"learning_rate": 9.127625201938612e-06, |
|
"loss": 0.841, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.09215844785772029, |
|
"grad_norm": 0.8114380820943968, |
|
"learning_rate": 9.208400646203555e-06, |
|
"loss": 0.8696, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.09296685529506872, |
|
"grad_norm": 0.7244790781977708, |
|
"learning_rate": 9.289176090468498e-06, |
|
"loss": 0.8781, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.09377526273241714, |
|
"grad_norm": 0.7757605741684829, |
|
"learning_rate": 9.36995153473344e-06, |
|
"loss": 0.8344, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.09458367016976556, |
|
"grad_norm": 0.8251723121959252, |
|
"learning_rate": 9.450726978998385e-06, |
|
"loss": 0.8699, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.09539207760711399, |
|
"grad_norm": 0.7770030641428826, |
|
"learning_rate": 9.531502423263328e-06, |
|
"loss": 0.8453, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.09620048504446241, |
|
"grad_norm": 0.8828633900844275, |
|
"learning_rate": 9.612277867528273e-06, |
|
"loss": 0.8574, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.09700889248181083, |
|
"grad_norm": 0.8430425689960669, |
|
"learning_rate": 9.693053311793216e-06, |
|
"loss": 0.8534, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.09781729991915926, |
|
"grad_norm": 0.9081575104833137, |
|
"learning_rate": 9.773828756058159e-06, |
|
"loss": 0.8588, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.09862570735650768, |
|
"grad_norm": 0.8428340938970948, |
|
"learning_rate": 9.854604200323102e-06, |
|
"loss": 0.8431, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.09943411479385611, |
|
"grad_norm": 0.7107849668872003, |
|
"learning_rate": 9.935379644588045e-06, |
|
"loss": 0.8446, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.10024252223120453, |
|
"grad_norm": 0.7895600318895202, |
|
"learning_rate": 9.999999203559496e-06, |
|
"loss": 0.8453, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.10105092966855295, |
|
"grad_norm": 0.8083601931655888, |
|
"learning_rate": 9.999971328168497e-06, |
|
"loss": 0.8488, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.10185933710590138, |
|
"grad_norm": 0.7985500406230298, |
|
"learning_rate": 9.999903631006022e-06, |
|
"loss": 0.8425, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.1026677445432498, |
|
"grad_norm": 0.7746795817816715, |
|
"learning_rate": 9.999796112611239e-06, |
|
"loss": 0.8319, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.10347615198059822, |
|
"grad_norm": 0.8389146849754998, |
|
"learning_rate": 9.999648773840469e-06, |
|
"loss": 0.8235, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.10428455941794665, |
|
"grad_norm": 0.7186706984947462, |
|
"learning_rate": 9.999461615867176e-06, |
|
"loss": 0.8458, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.10509296685529507, |
|
"grad_norm": 0.8396675978961405, |
|
"learning_rate": 9.99923464018196e-06, |
|
"loss": 0.8429, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.1059013742926435, |
|
"grad_norm": 0.8459924496521772, |
|
"learning_rate": 9.998967848592548e-06, |
|
"loss": 0.8435, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.10670978172999192, |
|
"grad_norm": 0.8256550955754403, |
|
"learning_rate": 9.998661243223772e-06, |
|
"loss": 0.8266, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.10751818916734034, |
|
"grad_norm": 0.8794829930536063, |
|
"learning_rate": 9.998314826517564e-06, |
|
"loss": 0.8618, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.10832659660468877, |
|
"grad_norm": 0.9329957202025235, |
|
"learning_rate": 9.99792860123292e-06, |
|
"loss": 0.8459, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.10913500404203719, |
|
"grad_norm": 0.8109786838523474, |
|
"learning_rate": 9.997502570445898e-06, |
|
"loss": 0.8357, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.1099434114793856, |
|
"grad_norm": 0.8368185449034288, |
|
"learning_rate": 9.997036737549573e-06, |
|
"loss": 0.8293, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.11075181891673404, |
|
"grad_norm": 0.8442652760477221, |
|
"learning_rate": 9.996531106254027e-06, |
|
"loss": 0.8444, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.11156022635408246, |
|
"grad_norm": 0.8400214154235522, |
|
"learning_rate": 9.99598568058631e-06, |
|
"loss": 0.8292, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.11236863379143087, |
|
"grad_norm": 0.9180831766991244, |
|
"learning_rate": 9.995400464890409e-06, |
|
"loss": 0.8156, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.11317704122877931, |
|
"grad_norm": 0.8609257429862948, |
|
"learning_rate": 9.994775463827218e-06, |
|
"loss": 0.8616, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.11398544866612773, |
|
"grad_norm": 0.8666792501707015, |
|
"learning_rate": 9.994110682374491e-06, |
|
"loss": 0.8271, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.11479385610347616, |
|
"grad_norm": 0.7912432563155621, |
|
"learning_rate": 9.993406125826818e-06, |
|
"loss": 0.8401, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.11560226354082458, |
|
"grad_norm": 0.8597765866247851, |
|
"learning_rate": 9.992661799795568e-06, |
|
"loss": 0.8431, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.116410670978173, |
|
"grad_norm": 0.807630693691931, |
|
"learning_rate": 9.991877710208851e-06, |
|
"loss": 0.8373, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.11721907841552143, |
|
"grad_norm": 0.817871310017788, |
|
"learning_rate": 9.991053863311468e-06, |
|
"loss": 0.8564, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.11802748585286985, |
|
"grad_norm": 0.7613026800591074, |
|
"learning_rate": 9.990190265664868e-06, |
|
"loss": 0.8197, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.11883589329021826, |
|
"grad_norm": 0.8187146133464235, |
|
"learning_rate": 9.989286924147085e-06, |
|
"loss": 0.8222, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.1196443007275667, |
|
"grad_norm": 0.8489781785397903, |
|
"learning_rate": 9.988343845952697e-06, |
|
"loss": 0.8024, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.12045270816491511, |
|
"grad_norm": 0.7613031011178205, |
|
"learning_rate": 9.987361038592751e-06, |
|
"loss": 0.856, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.12126111560226355, |
|
"grad_norm": 0.8718470878736602, |
|
"learning_rate": 9.986338509894722e-06, |
|
"loss": 0.8429, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.12206952303961197, |
|
"grad_norm": 0.8137535001101496, |
|
"learning_rate": 9.985276268002434e-06, |
|
"loss": 0.8251, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.12287793047696038, |
|
"grad_norm": 0.9141558760999264, |
|
"learning_rate": 9.984174321376008e-06, |
|
"loss": 0.8387, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.12368633791430882, |
|
"grad_norm": 0.8433283575738159, |
|
"learning_rate": 9.983032678791787e-06, |
|
"loss": 0.8333, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.12449474535165723, |
|
"grad_norm": 0.8208485969816852, |
|
"learning_rate": 9.98185134934227e-06, |
|
"loss": 0.8435, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.12530315278900567, |
|
"grad_norm": 0.8537472068527471, |
|
"learning_rate": 9.980630342436038e-06, |
|
"loss": 0.8537, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.12611156022635409, |
|
"grad_norm": 0.8514401120204336, |
|
"learning_rate": 9.979369667797675e-06, |
|
"loss": 0.8253, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.1269199676637025, |
|
"grad_norm": 0.8159339445564436, |
|
"learning_rate": 9.978069335467702e-06, |
|
"loss": 0.8485, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.12772837510105092, |
|
"grad_norm": 0.8290585282345666, |
|
"learning_rate": 9.976729355802483e-06, |
|
"loss": 0.8327, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.12853678253839934, |
|
"grad_norm": 0.8280894035719621, |
|
"learning_rate": 9.975349739474156e-06, |
|
"loss": 0.8209, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.1293451899757478, |
|
"grad_norm": 0.8666934295453808, |
|
"learning_rate": 9.97393049747053e-06, |
|
"loss": 0.8284, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.1301535974130962, |
|
"grad_norm": 0.8707107448163076, |
|
"learning_rate": 9.972471641095023e-06, |
|
"loss": 0.8255, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.13096200485044462, |
|
"grad_norm": 0.8245531731991685, |
|
"learning_rate": 9.970973181966548e-06, |
|
"loss": 0.8155, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.13177041228779304, |
|
"grad_norm": 0.7959320542029523, |
|
"learning_rate": 9.96943513201943e-06, |
|
"loss": 0.8205, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.13257881972514146, |
|
"grad_norm": 0.9111851063669996, |
|
"learning_rate": 9.967857503503318e-06, |
|
"loss": 0.8356, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.1333872271624899, |
|
"grad_norm": 0.8307380488248262, |
|
"learning_rate": 9.966240308983078e-06, |
|
"loss": 0.8216, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.13419563459983833, |
|
"grad_norm": 0.9078436840276765, |
|
"learning_rate": 9.964583561338688e-06, |
|
"loss": 0.8027, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.13500404203718674, |
|
"grad_norm": 0.8201106707374893, |
|
"learning_rate": 9.962887273765152e-06, |
|
"loss": 0.8196, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.13581244947453516, |
|
"grad_norm": 0.8774372243990994, |
|
"learning_rate": 9.961151459772384e-06, |
|
"loss": 0.8342, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.13662085691188358, |
|
"grad_norm": 0.9217986467550336, |
|
"learning_rate": 9.959376133185098e-06, |
|
"loss": 0.8281, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.137429264349232, |
|
"grad_norm": 0.8384616776862577, |
|
"learning_rate": 9.95756130814271e-06, |
|
"loss": 0.828, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.13823767178658045, |
|
"grad_norm": 0.8506654451968726, |
|
"learning_rate": 9.955706999099207e-06, |
|
"loss": 0.8117, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.13904607922392886, |
|
"grad_norm": 0.8389541534284436, |
|
"learning_rate": 9.953813220823048e-06, |
|
"loss": 0.7975, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.13985448666127728, |
|
"grad_norm": 0.830733428685578, |
|
"learning_rate": 9.951879988397045e-06, |
|
"loss": 0.8303, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.1406628940986257, |
|
"grad_norm": 0.8021435064021754, |
|
"learning_rate": 9.949907317218233e-06, |
|
"loss": 0.828, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.14147130153597412, |
|
"grad_norm": 0.8658787105545401, |
|
"learning_rate": 9.94789522299775e-06, |
|
"loss": 0.8097, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.14227970897332257, |
|
"grad_norm": 0.9685280853430394, |
|
"learning_rate": 9.945843721760725e-06, |
|
"loss": 0.8232, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.14308811641067098, |
|
"grad_norm": 0.8148173552026365, |
|
"learning_rate": 9.943752829846132e-06, |
|
"loss": 0.8227, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.1438965238480194, |
|
"grad_norm": 0.8662458438707715, |
|
"learning_rate": 9.941622563906667e-06, |
|
"loss": 0.8292, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.14470493128536782, |
|
"grad_norm": 0.8109041012364064, |
|
"learning_rate": 9.939452940908627e-06, |
|
"loss": 0.7983, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.14551333872271624, |
|
"grad_norm": 0.7979068544718093, |
|
"learning_rate": 9.937243978131751e-06, |
|
"loss": 0.8109, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.1463217461600647, |
|
"grad_norm": 0.9947738130271577, |
|
"learning_rate": 9.934995693169104e-06, |
|
"loss": 0.8112, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.1471301535974131, |
|
"grad_norm": 0.8091228479590519, |
|
"learning_rate": 9.932708103926932e-06, |
|
"loss": 0.8282, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.14793856103476152, |
|
"grad_norm": 0.8487266612698499, |
|
"learning_rate": 9.930381228624501e-06, |
|
"loss": 0.8195, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.14874696847210994, |
|
"grad_norm": 0.8322926755681642, |
|
"learning_rate": 9.928015085793983e-06, |
|
"loss": 0.83, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.14955537590945836, |
|
"grad_norm": 0.8200762139699286, |
|
"learning_rate": 9.925609694280284e-06, |
|
"loss": 0.8162, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.15036378334680678, |
|
"grad_norm": 0.8102910947287206, |
|
"learning_rate": 9.923165073240905e-06, |
|
"loss": 0.8177, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.15117219078415522, |
|
"grad_norm": 0.785366694042482, |
|
"learning_rate": 9.920681242145787e-06, |
|
"loss": 0.8085, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.15198059822150364, |
|
"grad_norm": 0.8512969626348545, |
|
"learning_rate": 9.918158220777152e-06, |
|
"loss": 0.8116, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.15278900565885206, |
|
"grad_norm": 0.749686550031715, |
|
"learning_rate": 9.91559602922935e-06, |
|
"loss": 0.7995, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.15359741309620048, |
|
"grad_norm": 0.8078570006806167, |
|
"learning_rate": 9.912994687908701e-06, |
|
"loss": 0.809, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.1544058205335489, |
|
"grad_norm": 0.8901185428475071, |
|
"learning_rate": 9.91035421753333e-06, |
|
"loss": 0.8311, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.15521422797089734, |
|
"grad_norm": 0.8271675548109904, |
|
"learning_rate": 9.907674639132995e-06, |
|
"loss": 0.824, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.15602263540824576, |
|
"grad_norm": 0.8820995149717548, |
|
"learning_rate": 9.904955974048934e-06, |
|
"loss": 0.8107, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.15683104284559418, |
|
"grad_norm": 0.9196878526802965, |
|
"learning_rate": 9.902198243933679e-06, |
|
"loss": 0.8151, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.1576394502829426, |
|
"grad_norm": 0.8351142569585079, |
|
"learning_rate": 9.899401470750898e-06, |
|
"loss": 0.8304, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.15844785772029102, |
|
"grad_norm": 0.9150380540529176, |
|
"learning_rate": 9.896565676775212e-06, |
|
"loss": 0.8071, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.15925626515763944, |
|
"grad_norm": 0.8959222346902678, |
|
"learning_rate": 9.893690884592017e-06, |
|
"loss": 0.8215, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.16006467259498788, |
|
"grad_norm": 0.8761786707963919, |
|
"learning_rate": 9.89077711709731e-06, |
|
"loss": 0.8088, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.1608730800323363, |
|
"grad_norm": 0.9148814698747098, |
|
"learning_rate": 9.887824397497498e-06, |
|
"loss": 0.8226, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.16168148746968472, |
|
"grad_norm": 0.8153076566020756, |
|
"learning_rate": 9.884832749309221e-06, |
|
"loss": 0.8159, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16248989490703314, |
|
"grad_norm": 0.8770559374607161, |
|
"learning_rate": 9.881802196359162e-06, |
|
"loss": 0.8174, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.16329830234438156, |
|
"grad_norm": 0.8362399592900118, |
|
"learning_rate": 9.87873276278386e-06, |
|
"loss": 0.8345, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.16410670978173, |
|
"grad_norm": 1.0016233711345273, |
|
"learning_rate": 9.875624473029508e-06, |
|
"loss": 0.834, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.16491511721907842, |
|
"grad_norm": 0.8247561849661889, |
|
"learning_rate": 9.87247735185177e-06, |
|
"loss": 0.8247, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.16572352465642684, |
|
"grad_norm": 0.9429399357811139, |
|
"learning_rate": 9.869291424315577e-06, |
|
"loss": 0.8069, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.16653193209377526, |
|
"grad_norm": 0.9563552234329712, |
|
"learning_rate": 9.866066715794932e-06, |
|
"loss": 0.8158, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.16734033953112368, |
|
"grad_norm": 0.8804257837923922, |
|
"learning_rate": 9.862803251972701e-06, |
|
"loss": 0.8203, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.16814874696847212, |
|
"grad_norm": 0.9024236063093718, |
|
"learning_rate": 9.859501058840416e-06, |
|
"loss": 0.8174, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.16895715440582054, |
|
"grad_norm": 0.8479328206922394, |
|
"learning_rate": 9.856160162698068e-06, |
|
"loss": 0.8261, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.16976556184316896, |
|
"grad_norm": 0.8622837966852652, |
|
"learning_rate": 9.852780590153884e-06, |
|
"loss": 0.8253, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.17057396928051738, |
|
"grad_norm": 0.9701332391764099, |
|
"learning_rate": 9.849362368124134e-06, |
|
"loss": 0.8199, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.1713823767178658, |
|
"grad_norm": 0.95228412728592, |
|
"learning_rate": 9.845905523832903e-06, |
|
"loss": 0.7991, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.17219078415521422, |
|
"grad_norm": 0.8976839442354373, |
|
"learning_rate": 9.842410084811888e-06, |
|
"loss": 0.822, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.17299919159256266, |
|
"grad_norm": 0.8593896455734684, |
|
"learning_rate": 9.838876078900158e-06, |
|
"loss": 0.7995, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.17380759902991108, |
|
"grad_norm": 0.9295880765287333, |
|
"learning_rate": 9.83530353424395e-06, |
|
"loss": 0.8128, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.1746160064672595, |
|
"grad_norm": 0.875753400493014, |
|
"learning_rate": 9.83169247929644e-06, |
|
"loss": 0.7784, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.17542441390460792, |
|
"grad_norm": 0.9211393522356855, |
|
"learning_rate": 9.828042942817513e-06, |
|
"loss": 0.813, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.17623282134195634, |
|
"grad_norm": 0.8300443612741539, |
|
"learning_rate": 9.824354953873536e-06, |
|
"loss": 0.8092, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.17704122877930478, |
|
"grad_norm": 0.8401922945789401, |
|
"learning_rate": 9.82062854183713e-06, |
|
"loss": 0.8206, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.1778496362166532, |
|
"grad_norm": 0.910386403590293, |
|
"learning_rate": 9.816863736386934e-06, |
|
"loss": 0.8206, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.17865804365400162, |
|
"grad_norm": 0.9153963356098873, |
|
"learning_rate": 9.813060567507358e-06, |
|
"loss": 0.8233, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.17946645109135004, |
|
"grad_norm": 0.9854523603560855, |
|
"learning_rate": 9.809219065488362e-06, |
|
"loss": 0.8054, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.18027485852869846, |
|
"grad_norm": 0.9518212116514045, |
|
"learning_rate": 9.805339260925209e-06, |
|
"loss": 0.782, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.18108326596604687, |
|
"grad_norm": 0.8628818391355276, |
|
"learning_rate": 9.801421184718207e-06, |
|
"loss": 0.8209, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.18189167340339532, |
|
"grad_norm": 0.8864541634165986, |
|
"learning_rate": 9.797464868072489e-06, |
|
"loss": 0.7954, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.18270008084074374, |
|
"grad_norm": 0.9167346076357672, |
|
"learning_rate": 9.793470342497737e-06, |
|
"loss": 0.8061, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.18350848827809216, |
|
"grad_norm": 0.9927145545390887, |
|
"learning_rate": 9.789437639807956e-06, |
|
"loss": 0.7994, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.18431689571544058, |
|
"grad_norm": 0.8867181379160483, |
|
"learning_rate": 9.785366792121199e-06, |
|
"loss": 0.8105, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.185125303152789, |
|
"grad_norm": 0.8543686171692966, |
|
"learning_rate": 9.781257831859326e-06, |
|
"loss": 0.819, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.18593371059013744, |
|
"grad_norm": 0.881943155622054, |
|
"learning_rate": 9.777110791747741e-06, |
|
"loss": 0.8011, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.18674211802748586, |
|
"grad_norm": 0.9138030348872207, |
|
"learning_rate": 9.77292570481513e-06, |
|
"loss": 0.8161, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.18755052546483428, |
|
"grad_norm": 0.934593231124601, |
|
"learning_rate": 9.7687026043932e-06, |
|
"loss": 0.8162, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.1883589329021827, |
|
"grad_norm": 0.9327906245764378, |
|
"learning_rate": 9.76444152411641e-06, |
|
"loss": 0.8128, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.18916734033953111, |
|
"grad_norm": 0.8269574194887537, |
|
"learning_rate": 9.760142497921708e-06, |
|
"loss": 0.8296, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.18997574777687956, |
|
"grad_norm": 0.8942466721266422, |
|
"learning_rate": 9.755805560048259e-06, |
|
"loss": 0.7915, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.19078415521422798, |
|
"grad_norm": 0.9491654283531196, |
|
"learning_rate": 9.75143074503717e-06, |
|
"loss": 0.8095, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.1915925626515764, |
|
"grad_norm": 0.911028544312517, |
|
"learning_rate": 9.74701808773122e-06, |
|
"loss": 0.7965, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.19240097008892482, |
|
"grad_norm": 0.9665924166790011, |
|
"learning_rate": 9.742567623274571e-06, |
|
"loss": 0.8485, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.19320937752627323, |
|
"grad_norm": 0.8510809811181654, |
|
"learning_rate": 9.738079387112509e-06, |
|
"loss": 0.8127, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.19401778496362165, |
|
"grad_norm": 0.9337458560349892, |
|
"learning_rate": 9.733553414991135e-06, |
|
"loss": 0.8196, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.1948261924009701, |
|
"grad_norm": 0.8504908193588703, |
|
"learning_rate": 9.728989742957107e-06, |
|
"loss": 0.803, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.19563459983831852, |
|
"grad_norm": 0.8952911780784167, |
|
"learning_rate": 9.724388407357333e-06, |
|
"loss": 0.8127, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.19644300727566694, |
|
"grad_norm": 0.8693475240728679, |
|
"learning_rate": 9.719749444838687e-06, |
|
"loss": 0.8161, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.19725141471301536, |
|
"grad_norm": 1.0067254194816264, |
|
"learning_rate": 9.715072892347724e-06, |
|
"loss": 0.8127, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.19805982215036377, |
|
"grad_norm": 0.9913591568953714, |
|
"learning_rate": 9.71035878713038e-06, |
|
"loss": 0.7877, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.19886822958771222, |
|
"grad_norm": 0.930691684815205, |
|
"learning_rate": 9.705607166731673e-06, |
|
"loss": 0.8103, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.19967663702506064, |
|
"grad_norm": 0.9913080323236753, |
|
"learning_rate": 9.700818068995407e-06, |
|
"loss": 0.8248, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.20048504446240906, |
|
"grad_norm": 0.9338686380215503, |
|
"learning_rate": 9.695991532063875e-06, |
|
"loss": 0.804, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.20129345189975748, |
|
"grad_norm": 0.881337013423663, |
|
"learning_rate": 9.691127594377546e-06, |
|
"loss": 0.7993, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.2021018593371059, |
|
"grad_norm": 0.9651413063403884, |
|
"learning_rate": 9.686226294674763e-06, |
|
"loss": 0.8157, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.2029102667744543, |
|
"grad_norm": 0.9142968016401275, |
|
"learning_rate": 9.68128767199144e-06, |
|
"loss": 0.7956, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.20371867421180276, |
|
"grad_norm": 0.9699692592410872, |
|
"learning_rate": 9.676311765660743e-06, |
|
"loss": 0.7878, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.20452708164915118, |
|
"grad_norm": 0.9791128074057168, |
|
"learning_rate": 9.67129861531278e-06, |
|
"loss": 0.7988, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.2053354890864996, |
|
"grad_norm": 0.9461753802895918, |
|
"learning_rate": 9.666248260874283e-06, |
|
"loss": 0.8027, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.206143896523848, |
|
"grad_norm": 0.9557703955773883, |
|
"learning_rate": 9.661160742568298e-06, |
|
"loss": 0.8149, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.20695230396119643, |
|
"grad_norm": 0.9648851489245359, |
|
"learning_rate": 9.656036100913854e-06, |
|
"loss": 0.8156, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.20776071139854488, |
|
"grad_norm": 0.933652528739753, |
|
"learning_rate": 9.65087437672565e-06, |
|
"loss": 0.834, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.2085691188358933, |
|
"grad_norm": 0.952902750681017, |
|
"learning_rate": 9.645675611113715e-06, |
|
"loss": 0.7919, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.20937752627324172, |
|
"grad_norm": 0.9227467408489822, |
|
"learning_rate": 9.640439845483106e-06, |
|
"loss": 0.7791, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.21018593371059013, |
|
"grad_norm": 0.9281667306865055, |
|
"learning_rate": 9.635167121533548e-06, |
|
"loss": 0.8075, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.21099434114793855, |
|
"grad_norm": 0.9491984682902288, |
|
"learning_rate": 9.629857481259128e-06, |
|
"loss": 0.7853, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.211802748585287, |
|
"grad_norm": 0.9590804939597338, |
|
"learning_rate": 9.62451096694794e-06, |
|
"loss": 0.8096, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.21261115602263542, |
|
"grad_norm": 0.9022937078982735, |
|
"learning_rate": 9.619127621181767e-06, |
|
"loss": 0.7615, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.21341956345998384, |
|
"grad_norm": 0.9211905503781073, |
|
"learning_rate": 9.613707486835725e-06, |
|
"loss": 0.8009, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.21422797089733225, |
|
"grad_norm": 0.9224553203069952, |
|
"learning_rate": 9.608250607077933e-06, |
|
"loss": 0.8095, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.21503637833468067, |
|
"grad_norm": 0.936067812857163, |
|
"learning_rate": 9.602757025369165e-06, |
|
"loss": 0.8012, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.2158447857720291, |
|
"grad_norm": 0.9252870381796091, |
|
"learning_rate": 9.597226785462501e-06, |
|
"loss": 0.7986, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.21665319320937754, |
|
"grad_norm": 0.9592213802782941, |
|
"learning_rate": 9.591659931402983e-06, |
|
"loss": 0.805, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.21746160064672596, |
|
"grad_norm": 0.9896013218895608, |
|
"learning_rate": 9.586056507527266e-06, |
|
"loss": 0.7993, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.21827000808407437, |
|
"grad_norm": 0.9440277526851043, |
|
"learning_rate": 9.580416558463257e-06, |
|
"loss": 0.8161, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.2190784155214228, |
|
"grad_norm": 0.9781000660563656, |
|
"learning_rate": 9.574740129129767e-06, |
|
"loss": 0.8046, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.2198868229587712, |
|
"grad_norm": 0.927988233216835, |
|
"learning_rate": 9.569027264736148e-06, |
|
"loss": 0.7956, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.22069523039611966, |
|
"grad_norm": 0.9666122184957214, |
|
"learning_rate": 9.563278010781939e-06, |
|
"loss": 0.7913, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.22150363783346808, |
|
"grad_norm": 0.9291765513048547, |
|
"learning_rate": 9.557492413056497e-06, |
|
"loss": 0.7919, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.2223120452708165, |
|
"grad_norm": 0.9610201773035711, |
|
"learning_rate": 9.551670517638637e-06, |
|
"loss": 0.7902, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.2231204527081649, |
|
"grad_norm": 0.9238229313792048, |
|
"learning_rate": 9.545812370896262e-06, |
|
"loss": 0.8058, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.22392886014551333, |
|
"grad_norm": 0.9054092521932388, |
|
"learning_rate": 9.539918019485995e-06, |
|
"loss": 0.7892, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.22473726758286175, |
|
"grad_norm": 1.0579833418628013, |
|
"learning_rate": 9.53398751035281e-06, |
|
"loss": 0.8148, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.2255456750202102, |
|
"grad_norm": 1.0226788158408266, |
|
"learning_rate": 9.528020890729653e-06, |
|
"loss": 0.8031, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.22635408245755861, |
|
"grad_norm": 1.1479246469449391, |
|
"learning_rate": 9.522018208137066e-06, |
|
"loss": 0.8037, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.22716248989490703, |
|
"grad_norm": 0.9756911102647868, |
|
"learning_rate": 9.51597951038282e-06, |
|
"loss": 0.8097, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.22797089733225545, |
|
"grad_norm": 1.035765200896759, |
|
"learning_rate": 9.509904845561517e-06, |
|
"loss": 0.8077, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.22877930476960387, |
|
"grad_norm": 1.0689274362300878, |
|
"learning_rate": 9.503794262054214e-06, |
|
"loss": 0.7851, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.22958771220695232, |
|
"grad_norm": 0.9132533214587567, |
|
"learning_rate": 9.497647808528045e-06, |
|
"loss": 0.7887, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.23039611964430073, |
|
"grad_norm": 1.050699012001928, |
|
"learning_rate": 9.491465533935824e-06, |
|
"loss": 0.7932, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.23120452708164915, |
|
"grad_norm": 0.9946090130405577, |
|
"learning_rate": 9.485247487515658e-06, |
|
"loss": 0.7722, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.23201293451899757, |
|
"grad_norm": 1.06286243808036, |
|
"learning_rate": 9.478993718790558e-06, |
|
"loss": 0.7939, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.232821341956346, |
|
"grad_norm": 1.0038879730182135, |
|
"learning_rate": 9.472704277568034e-06, |
|
"loss": 0.7963, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.23362974939369444, |
|
"grad_norm": 1.0200294772824388, |
|
"learning_rate": 9.466379213939717e-06, |
|
"loss": 0.7919, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.23443815683104285, |
|
"grad_norm": 1.099962358491636, |
|
"learning_rate": 9.46001857828094e-06, |
|
"loss": 0.784, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.23524656426839127, |
|
"grad_norm": 0.9471318804802602, |
|
"learning_rate": 9.453622421250353e-06, |
|
"loss": 0.7974, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.2360549717057397, |
|
"grad_norm": 0.9961320997864533, |
|
"learning_rate": 9.447190793789504e-06, |
|
"loss": 0.7677, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.2368633791430881, |
|
"grad_norm": 1.1123725273599259, |
|
"learning_rate": 9.44072374712245e-06, |
|
"loss": 0.7877, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.23767178658043653, |
|
"grad_norm": 0.9673736978640572, |
|
"learning_rate": 9.43422133275534e-06, |
|
"loss": 0.7851, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.23848019401778497, |
|
"grad_norm": 1.0263547753632962, |
|
"learning_rate": 9.427683602475994e-06, |
|
"loss": 0.7915, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.2392886014551334, |
|
"grad_norm": 0.9685100408061789, |
|
"learning_rate": 9.42111060835352e-06, |
|
"loss": 0.7887, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.2400970088924818, |
|
"grad_norm": 1.0736685654002145, |
|
"learning_rate": 9.414502402737866e-06, |
|
"loss": 0.8043, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.24090541632983023, |
|
"grad_norm": 1.0421660296064565, |
|
"learning_rate": 9.407859038259428e-06, |
|
"loss": 0.7854, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.24171382376717865, |
|
"grad_norm": 1.0032053987863772, |
|
"learning_rate": 9.401180567828615e-06, |
|
"loss": 0.7746, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.2425222312045271, |
|
"grad_norm": 0.9419030918663129, |
|
"learning_rate": 9.394467044635439e-06, |
|
"loss": 0.7803, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.2433306386418755, |
|
"grad_norm": 1.0221215187873243, |
|
"learning_rate": 9.387718522149084e-06, |
|
"loss": 0.7947, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.24413904607922393, |
|
"grad_norm": 1.0090765896605816, |
|
"learning_rate": 9.38093505411748e-06, |
|
"loss": 0.7791, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.24494745351657235, |
|
"grad_norm": 0.9942329189536906, |
|
"learning_rate": 9.374116694566882e-06, |
|
"loss": 0.8088, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.24575586095392077, |
|
"grad_norm": 1.1192063441058093, |
|
"learning_rate": 9.36726349780143e-06, |
|
"loss": 0.8059, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.2465642683912692, |
|
"grad_norm": 1.05918617263924, |
|
"learning_rate": 9.360375518402728e-06, |
|
"loss": 0.7849, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.24737267582861763, |
|
"grad_norm": 0.9510057659465685, |
|
"learning_rate": 9.353452811229395e-06, |
|
"loss": 0.8025, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.24818108326596605, |
|
"grad_norm": 1.0012384708775823, |
|
"learning_rate": 9.346495431416642e-06, |
|
"loss": 0.785, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.24898949070331447, |
|
"grad_norm": 0.9393516177220199, |
|
"learning_rate": 9.339503434375823e-06, |
|
"loss": 0.789, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.2497978981406629, |
|
"grad_norm": 1.0793839568734547, |
|
"learning_rate": 9.332476875794e-06, |
|
"loss": 0.7923, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.25060630557801133, |
|
"grad_norm": 1.0985732881069987, |
|
"learning_rate": 9.325415811633497e-06, |
|
"loss": 0.7662, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.2514147130153597, |
|
"grad_norm": 1.0071649808511798, |
|
"learning_rate": 9.318320298131452e-06, |
|
"loss": 0.8159, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.25222312045270817, |
|
"grad_norm": 1.0609048611209526, |
|
"learning_rate": 9.31119039179937e-06, |
|
"loss": 0.7745, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.25303152789005656, |
|
"grad_norm": 0.9691770014769086, |
|
"learning_rate": 9.30402614942268e-06, |
|
"loss": 0.7966, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.253839935327405, |
|
"grad_norm": 1.072937339455477, |
|
"learning_rate": 9.296827628060274e-06, |
|
"loss": 0.8029, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.25464834276475345, |
|
"grad_norm": 1.078855254896766, |
|
"learning_rate": 9.289594885044054e-06, |
|
"loss": 0.7839, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.25545675020210185, |
|
"grad_norm": 1.0105792692115017, |
|
"learning_rate": 9.282327977978477e-06, |
|
"loss": 0.7881, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.2562651576394503, |
|
"grad_norm": 1.1055275025950306, |
|
"learning_rate": 9.275026964740101e-06, |
|
"loss": 0.8059, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.2570735650767987, |
|
"grad_norm": 1.0897616544392064, |
|
"learning_rate": 9.267691903477112e-06, |
|
"loss": 0.7973, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.25788197251414713, |
|
"grad_norm": 1.1206071430492686, |
|
"learning_rate": 9.260322852608874e-06, |
|
"loss": 0.7887, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.2586903799514956, |
|
"grad_norm": 1.075935762788828, |
|
"learning_rate": 9.252919870825453e-06, |
|
"loss": 0.7904, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.25949878738884397, |
|
"grad_norm": 1.172946439353908, |
|
"learning_rate": 9.245483017087158e-06, |
|
"loss": 0.794, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.2603071948261924, |
|
"grad_norm": 1.1163921302502833, |
|
"learning_rate": 9.238012350624069e-06, |
|
"loss": 0.7888, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.2611156022635408, |
|
"grad_norm": 0.9856503130011599, |
|
"learning_rate": 9.230507930935559e-06, |
|
"loss": 0.7793, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.26192400970088925, |
|
"grad_norm": 1.0253170497677628, |
|
"learning_rate": 9.222969817789829e-06, |
|
"loss": 0.7732, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.2627324171382377, |
|
"grad_norm": 1.08074229250819, |
|
"learning_rate": 9.215398071223427e-06, |
|
"loss": 0.7967, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.2635408245755861, |
|
"grad_norm": 1.0525950674502662, |
|
"learning_rate": 9.20779275154077e-06, |
|
"loss": 0.802, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.26434923201293453, |
|
"grad_norm": 1.0778232477194654, |
|
"learning_rate": 9.200153919313667e-06, |
|
"loss": 0.7747, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.2651576394502829, |
|
"grad_norm": 1.069258333704327, |
|
"learning_rate": 9.192481635380834e-06, |
|
"loss": 0.7666, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.26596604688763137, |
|
"grad_norm": 0.9963364554248915, |
|
"learning_rate": 9.184775960847405e-06, |
|
"loss": 0.7732, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.2667744543249798, |
|
"grad_norm": 1.1349246014179366, |
|
"learning_rate": 9.177036957084459e-06, |
|
"loss": 0.7953, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.2675828617623282, |
|
"grad_norm": 1.0581160333050574, |
|
"learning_rate": 9.169264685728515e-06, |
|
"loss": 0.7784, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.26839126919967665, |
|
"grad_norm": 1.177618781034446, |
|
"learning_rate": 9.161459208681049e-06, |
|
"loss": 0.7961, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.26919967663702504, |
|
"grad_norm": 1.1579983554909417, |
|
"learning_rate": 9.153620588108006e-06, |
|
"loss": 0.7938, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.2700080840743735, |
|
"grad_norm": 1.0681808089207117, |
|
"learning_rate": 9.14574888643929e-06, |
|
"loss": 0.7998, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.27081649151172194, |
|
"grad_norm": 1.0290242122559143, |
|
"learning_rate": 9.137844166368289e-06, |
|
"loss": 0.7897, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.2716248989490703, |
|
"grad_norm": 1.0757364324046452, |
|
"learning_rate": 9.129906490851348e-06, |
|
"loss": 0.7967, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.27243330638641877, |
|
"grad_norm": 1.174666163846864, |
|
"learning_rate": 9.121935923107293e-06, |
|
"loss": 0.7784, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.27324171382376716, |
|
"grad_norm": 1.0474993963836983, |
|
"learning_rate": 9.113932526616912e-06, |
|
"loss": 0.7932, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.2740501212611156, |
|
"grad_norm": 1.3191834743977484, |
|
"learning_rate": 9.10589636512246e-06, |
|
"loss": 0.803, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.274858528698464, |
|
"grad_norm": 1.1027943580362713, |
|
"learning_rate": 9.097827502627137e-06, |
|
"loss": 0.7937, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.27566693613581245, |
|
"grad_norm": 1.40441127971191, |
|
"learning_rate": 9.089726003394593e-06, |
|
"loss": 0.7784, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.2764753435731609, |
|
"grad_norm": 1.13330113493992, |
|
"learning_rate": 9.081591931948405e-06, |
|
"loss": 0.7873, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.2772837510105093, |
|
"grad_norm": 1.0889710176936576, |
|
"learning_rate": 9.073425353071576e-06, |
|
"loss": 0.7704, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.27809215844785773, |
|
"grad_norm": 1.164862749945273, |
|
"learning_rate": 9.065226331806006e-06, |
|
"loss": 0.7627, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.2789005658852061, |
|
"grad_norm": 1.1263042754286632, |
|
"learning_rate": 9.056994933451975e-06, |
|
"loss": 0.7772, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.27970897332255457, |
|
"grad_norm": 1.1536982085539453, |
|
"learning_rate": 9.048731223567636e-06, |
|
"loss": 0.7753, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.280517380759903, |
|
"grad_norm": 1.040290077185074, |
|
"learning_rate": 9.04043526796848e-06, |
|
"loss": 0.7956, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.2813257881972514, |
|
"grad_norm": 1.1200292709537885, |
|
"learning_rate": 9.032107132726812e-06, |
|
"loss": 0.7746, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.28213419563459985, |
|
"grad_norm": 1.1605146350184878, |
|
"learning_rate": 9.023746884171234e-06, |
|
"loss": 0.7875, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.28294260307194824, |
|
"grad_norm": 1.1126474642028563, |
|
"learning_rate": 9.015354588886112e-06, |
|
"loss": 0.7572, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.2837510105092967, |
|
"grad_norm": 1.1900400445950285, |
|
"learning_rate": 9.006930313711038e-06, |
|
"loss": 0.7537, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.28455941794664513, |
|
"grad_norm": 1.1111691561929655, |
|
"learning_rate": 8.99847412574031e-06, |
|
"loss": 0.777, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.2853678253839935, |
|
"grad_norm": 1.160025861824882, |
|
"learning_rate": 8.989986092322394e-06, |
|
"loss": 0.757, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.28617623282134197, |
|
"grad_norm": 1.1040655040153644, |
|
"learning_rate": 8.981466281059378e-06, |
|
"loss": 0.765, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.28698464025869036, |
|
"grad_norm": 1.030626487972238, |
|
"learning_rate": 8.972914759806453e-06, |
|
"loss": 0.7694, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.2877930476960388, |
|
"grad_norm": 1.1545022043122366, |
|
"learning_rate": 8.964331596671348e-06, |
|
"loss": 0.7799, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.28860145513338725, |
|
"grad_norm": 1.0671347264829774, |
|
"learning_rate": 8.955716860013812e-06, |
|
"loss": 0.785, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.28940986257073564, |
|
"grad_norm": 1.1085542970283513, |
|
"learning_rate": 8.94707061844505e-06, |
|
"loss": 0.7938, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.2902182700080841, |
|
"grad_norm": 1.0340008147017365, |
|
"learning_rate": 8.938392940827191e-06, |
|
"loss": 0.7904, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.2910266774454325, |
|
"grad_norm": 1.1186878316073905, |
|
"learning_rate": 8.929683896272728e-06, |
|
"loss": 0.7847, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.2918350848827809, |
|
"grad_norm": 1.0973963525253956, |
|
"learning_rate": 8.920943554143978e-06, |
|
"loss": 0.7689, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.2926434923201294, |
|
"grad_norm": 1.2283562191922641, |
|
"learning_rate": 8.912171984052517e-06, |
|
"loss": 0.7974, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.29345189975747776, |
|
"grad_norm": 1.077126989544477, |
|
"learning_rate": 8.90336925585864e-06, |
|
"loss": 0.7747, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.2942603071948262, |
|
"grad_norm": 1.1424653162262948, |
|
"learning_rate": 8.894535439670798e-06, |
|
"loss": 0.7701, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.2950687146321746, |
|
"grad_norm": 1.05283219197678, |
|
"learning_rate": 8.885670605845032e-06, |
|
"loss": 0.7922, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.29587712206952305, |
|
"grad_norm": 1.2468236264933765, |
|
"learning_rate": 8.876774824984426e-06, |
|
"loss": 0.766, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.29668552950687144, |
|
"grad_norm": 1.175827633568805, |
|
"learning_rate": 8.867848167938535e-06, |
|
"loss": 0.7861, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.2974939369442199, |
|
"grad_norm": 1.156576419379891, |
|
"learning_rate": 8.85889070580283e-06, |
|
"loss": 0.7848, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.29830234438156833, |
|
"grad_norm": 1.152830758997776, |
|
"learning_rate": 8.849902509918119e-06, |
|
"loss": 0.7643, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.2991107518189167, |
|
"grad_norm": 1.2062288809904451, |
|
"learning_rate": 8.84088365186999e-06, |
|
"loss": 0.7971, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.29991915925626517, |
|
"grad_norm": 1.264589031563977, |
|
"learning_rate": 8.831834203488236e-06, |
|
"loss": 0.7715, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.30072756669361356, |
|
"grad_norm": 1.1659553441185666, |
|
"learning_rate": 8.822754236846283e-06, |
|
"loss": 0.7965, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.301535974130962, |
|
"grad_norm": 1.2155282318642753, |
|
"learning_rate": 8.813643824260616e-06, |
|
"loss": 0.7746, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.30234438156831045, |
|
"grad_norm": 1.1209411583569155, |
|
"learning_rate": 8.804503038290204e-06, |
|
"loss": 0.7803, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.30315278900565884, |
|
"grad_norm": 1.1638621979766686, |
|
"learning_rate": 8.795331951735927e-06, |
|
"loss": 0.7645, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.3039611964430073, |
|
"grad_norm": 1.1721735188433011, |
|
"learning_rate": 8.786130637639983e-06, |
|
"loss": 0.8013, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.3047696038803557, |
|
"grad_norm": 1.227142983021926, |
|
"learning_rate": 8.776899169285318e-06, |
|
"loss": 0.7673, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.3055780113177041, |
|
"grad_norm": 1.1530557801034091, |
|
"learning_rate": 8.767637620195037e-06, |
|
"loss": 0.7827, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.30638641875505257, |
|
"grad_norm": 1.215930807426272, |
|
"learning_rate": 8.758346064131824e-06, |
|
"loss": 0.7849, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.30719482619240096, |
|
"grad_norm": 1.2893329374001863, |
|
"learning_rate": 8.749024575097347e-06, |
|
"loss": 0.7688, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.3080032336297494, |
|
"grad_norm": 1.1224840611348765, |
|
"learning_rate": 8.739673227331671e-06, |
|
"loss": 0.769, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.3088116410670978, |
|
"grad_norm": 1.2783902442834993, |
|
"learning_rate": 8.730292095312672e-06, |
|
"loss": 0.7821, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.30962004850444624, |
|
"grad_norm": 1.1486346702768837, |
|
"learning_rate": 8.720881253755438e-06, |
|
"loss": 0.7875, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.3104284559417947, |
|
"grad_norm": 1.3327269392432468, |
|
"learning_rate": 8.711440777611672e-06, |
|
"loss": 0.7651, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.3112368633791431, |
|
"grad_norm": 1.2580934477997214, |
|
"learning_rate": 8.701970742069104e-06, |
|
"loss": 0.7973, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.3120452708164915, |
|
"grad_norm": 1.4391244366514906, |
|
"learning_rate": 8.692471222550886e-06, |
|
"loss": 0.773, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.3128536782538399, |
|
"grad_norm": 1.1961637113375232, |
|
"learning_rate": 8.68294229471499e-06, |
|
"loss": 0.7892, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.31366208569118836, |
|
"grad_norm": 1.3232907922114132, |
|
"learning_rate": 8.673384034453606e-06, |
|
"loss": 0.7524, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.3144704931285368, |
|
"grad_norm": 1.288089316781721, |
|
"learning_rate": 8.663796517892545e-06, |
|
"loss": 0.7786, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 0.3152789005658852, |
|
"grad_norm": 1.235383067935505, |
|
"learning_rate": 8.65417982139062e-06, |
|
"loss": 0.774, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.31608730800323365, |
|
"grad_norm": 1.283153495299508, |
|
"learning_rate": 8.644534021539053e-06, |
|
"loss": 0.7825, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 0.31689571544058204, |
|
"grad_norm": 1.216792856214731, |
|
"learning_rate": 8.63485919516085e-06, |
|
"loss": 0.7521, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.3177041228779305, |
|
"grad_norm": 1.3530498421771213, |
|
"learning_rate": 8.625155419310196e-06, |
|
"loss": 0.7677, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 0.3185125303152789, |
|
"grad_norm": 1.3306248413123443, |
|
"learning_rate": 8.615422771271846e-06, |
|
"loss": 0.7665, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.3193209377526273, |
|
"grad_norm": 1.1191260051140974, |
|
"learning_rate": 8.6056613285605e-06, |
|
"loss": 0.7803, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.32012934518997577, |
|
"grad_norm": 1.2579595003666681, |
|
"learning_rate": 8.595871168920192e-06, |
|
"loss": 0.7947, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.32093775262732416, |
|
"grad_norm": 1.2493019784221522, |
|
"learning_rate": 8.586052370323668e-06, |
|
"loss": 0.7827, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 0.3217461600646726, |
|
"grad_norm": 1.379586194222712, |
|
"learning_rate": 8.57620501097177e-06, |
|
"loss": 0.7958, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.322554567502021, |
|
"grad_norm": 1.3364714838240768, |
|
"learning_rate": 8.566329169292805e-06, |
|
"loss": 0.7613, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 0.32336297493936944, |
|
"grad_norm": 1.2559576604873097, |
|
"learning_rate": 8.556424923941927e-06, |
|
"loss": 0.7761, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.3241713823767179, |
|
"grad_norm": 1.137717977458081, |
|
"learning_rate": 8.546492353800504e-06, |
|
"loss": 0.7714, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 0.3249797898140663, |
|
"grad_norm": 1.1790974788016992, |
|
"learning_rate": 8.536531537975502e-06, |
|
"loss": 0.763, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.3257881972514147, |
|
"grad_norm": 1.3052435801766198, |
|
"learning_rate": 8.526542555798841e-06, |
|
"loss": 0.7747, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.3265966046887631, |
|
"grad_norm": 1.3641107292304886, |
|
"learning_rate": 8.516525486826766e-06, |
|
"loss": 0.7587, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.32740501212611156, |
|
"grad_norm": 1.293959981732072, |
|
"learning_rate": 8.506480410839226e-06, |
|
"loss": 0.7666, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.32821341956346, |
|
"grad_norm": 1.2669851763928037, |
|
"learning_rate": 8.496407407839222e-06, |
|
"loss": 0.7773, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.3290218270008084, |
|
"grad_norm": 1.6431510329413388, |
|
"learning_rate": 8.486306558052177e-06, |
|
"loss": 0.7525, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 0.32983023443815684, |
|
"grad_norm": 1.282525872896183, |
|
"learning_rate": 8.476177941925304e-06, |
|
"loss": 0.7761, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.33063864187550523, |
|
"grad_norm": 1.4186889444880473, |
|
"learning_rate": 8.466021640126946e-06, |
|
"loss": 0.7763, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 0.3314470493128537, |
|
"grad_norm": 1.2342391960980574, |
|
"learning_rate": 8.455837733545958e-06, |
|
"loss": 0.7787, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.3322554567502021, |
|
"grad_norm": 1.1798583685640944, |
|
"learning_rate": 8.445626303291042e-06, |
|
"loss": 0.7651, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 0.3330638641875505, |
|
"grad_norm": 1.4340728360934016, |
|
"learning_rate": 8.435387430690114e-06, |
|
"loss": 0.7739, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.33387227162489896, |
|
"grad_norm": 1.2875171500390412, |
|
"learning_rate": 8.425121197289651e-06, |
|
"loss": 0.7625, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 0.33468067906224735, |
|
"grad_norm": 1.36506923501983, |
|
"learning_rate": 8.414827684854043e-06, |
|
"loss": 0.7832, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.3354890864995958, |
|
"grad_norm": 1.2183591792746593, |
|
"learning_rate": 8.404506975364936e-06, |
|
"loss": 0.7774, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.33629749393694425, |
|
"grad_norm": 1.4107910163303898, |
|
"learning_rate": 8.394159151020592e-06, |
|
"loss": 0.7612, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.33710590137429264, |
|
"grad_norm": 1.354922028103792, |
|
"learning_rate": 8.383784294235223e-06, |
|
"loss": 0.7563, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 0.3379143088116411, |
|
"grad_norm": 1.3755048390820226, |
|
"learning_rate": 8.373382487638336e-06, |
|
"loss": 0.7636, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.3387227162489895, |
|
"grad_norm": 1.2930084423245742, |
|
"learning_rate": 8.36295381407408e-06, |
|
"loss": 0.7693, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 0.3395311236863379, |
|
"grad_norm": 1.378365146427395, |
|
"learning_rate": 8.352498356600582e-06, |
|
"loss": 0.7746, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.3403395311236863, |
|
"grad_norm": 1.3177858986323527, |
|
"learning_rate": 8.342016198489287e-06, |
|
"loss": 0.7777, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 0.34114793856103476, |
|
"grad_norm": 1.1946361921703772, |
|
"learning_rate": 8.331507423224297e-06, |
|
"loss": 0.758, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.3419563459983832, |
|
"grad_norm": 1.2196165253989297, |
|
"learning_rate": 8.320972114501698e-06, |
|
"loss": 0.7717, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 0.3427647534357316, |
|
"grad_norm": 1.5233528090812753, |
|
"learning_rate": 8.310410356228905e-06, |
|
"loss": 0.7643, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.34357316087308004, |
|
"grad_norm": 1.3922351628505543, |
|
"learning_rate": 8.299822232523983e-06, |
|
"loss": 0.7652, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.34438156831042843, |
|
"grad_norm": 1.299229036894897, |
|
"learning_rate": 8.289207827714985e-06, |
|
"loss": 0.7701, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.3451899757477769, |
|
"grad_norm": 1.4076244701750404, |
|
"learning_rate": 8.278567226339278e-06, |
|
"loss": 0.7787, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 0.3459983831851253, |
|
"grad_norm": 1.3491904132063997, |
|
"learning_rate": 8.267900513142865e-06, |
|
"loss": 0.7761, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.3468067906224737, |
|
"grad_norm": 1.389818329274582, |
|
"learning_rate": 8.257207773079717e-06, |
|
"loss": 0.78, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 0.34761519805982216, |
|
"grad_norm": 1.2993500493593475, |
|
"learning_rate": 8.246489091311093e-06, |
|
"loss": 0.7534, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.34842360549717055, |
|
"grad_norm": 1.4908211704294125, |
|
"learning_rate": 8.235744553204862e-06, |
|
"loss": 0.7598, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 0.349232012934519, |
|
"grad_norm": 1.3704141190016572, |
|
"learning_rate": 8.22497424433482e-06, |
|
"loss": 0.7882, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.35004042037186744, |
|
"grad_norm": 1.4318928400255833, |
|
"learning_rate": 8.214178250480018e-06, |
|
"loss": 0.7743, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 0.35084882780921584, |
|
"grad_norm": 1.3025693765735056, |
|
"learning_rate": 8.20335665762407e-06, |
|
"loss": 0.7513, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.3516572352465643, |
|
"grad_norm": 1.5596677809789021, |
|
"learning_rate": 8.192509551954464e-06, |
|
"loss": 0.7587, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.35246564268391267, |
|
"grad_norm": 1.4384756804728538, |
|
"learning_rate": 8.181637019861894e-06, |
|
"loss": 0.7594, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.3532740501212611, |
|
"grad_norm": 1.323585903657254, |
|
"learning_rate": 8.17073914793955e-06, |
|
"loss": 0.7628, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 0.35408245755860956, |
|
"grad_norm": 1.421616795580572, |
|
"learning_rate": 8.159816022982448e-06, |
|
"loss": 0.7483, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.35489086499595796, |
|
"grad_norm": 1.64783406542589, |
|
"learning_rate": 8.148867731986719e-06, |
|
"loss": 0.758, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 0.3556992724333064, |
|
"grad_norm": 1.3954883204221082, |
|
"learning_rate": 8.137894362148932e-06, |
|
"loss": 0.7557, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.3565076798706548, |
|
"grad_norm": 1.4347324336354104, |
|
"learning_rate": 8.126896000865396e-06, |
|
"loss": 0.7727, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.35731608730800324, |
|
"grad_norm": 1.3916989023657484, |
|
"learning_rate": 8.115872735731456e-06, |
|
"loss": 0.7805, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.3581244947453517, |
|
"grad_norm": 1.3896721696095782, |
|
"learning_rate": 8.104824654540808e-06, |
|
"loss": 0.7756, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 0.3589329021827001, |
|
"grad_norm": 1.5753672301918493, |
|
"learning_rate": 8.093751845284788e-06, |
|
"loss": 0.7444, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.3597413096200485, |
|
"grad_norm": 1.4739360020234273, |
|
"learning_rate": 8.082654396151676e-06, |
|
"loss": 0.7407, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.3605497170573969, |
|
"grad_norm": 1.6455271651692198, |
|
"learning_rate": 8.071532395525997e-06, |
|
"loss": 0.7664, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.36135812449474536, |
|
"grad_norm": 1.3036154952077734, |
|
"learning_rate": 8.060385931987813e-06, |
|
"loss": 0.7829, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 0.36216653193209375, |
|
"grad_norm": 1.58649380991059, |
|
"learning_rate": 8.049215094312016e-06, |
|
"loss": 0.777, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.3629749393694422, |
|
"grad_norm": 1.426073276884676, |
|
"learning_rate": 8.038019971467627e-06, |
|
"loss": 0.7661, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 0.36378334680679064, |
|
"grad_norm": 1.37105840839501, |
|
"learning_rate": 8.026800652617082e-06, |
|
"loss": 0.7627, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.36459175424413903, |
|
"grad_norm": 1.3575348993109124, |
|
"learning_rate": 8.01555722711552e-06, |
|
"loss": 0.7595, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 0.3654001616814875, |
|
"grad_norm": 1.3386365085409289, |
|
"learning_rate": 8.004289784510085e-06, |
|
"loss": 0.7521, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.36620856911883587, |
|
"grad_norm": 1.4824129268204114, |
|
"learning_rate": 7.992998414539192e-06, |
|
"loss": 0.772, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 0.3670169765561843, |
|
"grad_norm": 1.2895098918031378, |
|
"learning_rate": 7.981683207131828e-06, |
|
"loss": 0.7689, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.36782538399353276, |
|
"grad_norm": 1.3803230665428012, |
|
"learning_rate": 7.970344252406832e-06, |
|
"loss": 0.7602, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.36863379143088115, |
|
"grad_norm": 1.3275993845905512, |
|
"learning_rate": 7.958981640672173e-06, |
|
"loss": 0.7517, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.3694421988682296, |
|
"grad_norm": 1.5336775968372514, |
|
"learning_rate": 7.947595462424237e-06, |
|
"loss": 0.7608, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 0.370250606305578, |
|
"grad_norm": 1.324781077505621, |
|
"learning_rate": 7.9361858083471e-06, |
|
"loss": 0.7554, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.37105901374292644, |
|
"grad_norm": 1.3538550136393135, |
|
"learning_rate": 7.924752769311812e-06, |
|
"loss": 0.752, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 0.3718674211802749, |
|
"grad_norm": 1.6085089085439872, |
|
"learning_rate": 7.913296436375669e-06, |
|
"loss": 0.7346, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.3726758286176233, |
|
"grad_norm": 1.8804974173920925, |
|
"learning_rate": 7.901816900781487e-06, |
|
"loss": 0.7623, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 0.3734842360549717, |
|
"grad_norm": 1.5290904872516846, |
|
"learning_rate": 7.89031425395688e-06, |
|
"loss": 0.7481, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.3742926434923201, |
|
"grad_norm": 1.2645807911489806, |
|
"learning_rate": 7.87878858751353e-06, |
|
"loss": 0.7398, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 0.37510105092966856, |
|
"grad_norm": 1.430758612094428, |
|
"learning_rate": 7.86723999324645e-06, |
|
"loss": 0.762, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.375909458367017, |
|
"grad_norm": 1.5523221259007942, |
|
"learning_rate": 7.855668563133266e-06, |
|
"loss": 0.7636, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.3767178658043654, |
|
"grad_norm": 1.4039231457429788, |
|
"learning_rate": 7.844074389333475e-06, |
|
"loss": 0.7741, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.37752627324171384, |
|
"grad_norm": 1.5800498692580414, |
|
"learning_rate": 7.832457564187715e-06, |
|
"loss": 0.7584, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 0.37833468067906223, |
|
"grad_norm": 1.4231018707509073, |
|
"learning_rate": 7.82081818021703e-06, |
|
"loss": 0.7535, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.3791430881164107, |
|
"grad_norm": 1.4043910558336434, |
|
"learning_rate": 7.809156330122126e-06, |
|
"loss": 0.7629, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 0.3799514955537591, |
|
"grad_norm": 1.3535251605510783, |
|
"learning_rate": 7.79747210678264e-06, |
|
"loss": 0.7611, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.3807599029911075, |
|
"grad_norm": 1.5600412590315658, |
|
"learning_rate": 7.785765603256403e-06, |
|
"loss": 0.7561, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 0.38156831042845596, |
|
"grad_norm": 1.485858509627278, |
|
"learning_rate": 7.774036912778693e-06, |
|
"loss": 0.7689, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.38237671786580435, |
|
"grad_norm": 1.455361982040585, |
|
"learning_rate": 7.762286128761488e-06, |
|
"loss": 0.7427, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 0.3831851253031528, |
|
"grad_norm": 1.4379434832935507, |
|
"learning_rate": 7.750513344792735e-06, |
|
"loss": 0.7512, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.3839935327405012, |
|
"grad_norm": 1.4392546100628072, |
|
"learning_rate": 7.738718654635593e-06, |
|
"loss": 0.7707, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.38480194017784963, |
|
"grad_norm": 1.5225174077271784, |
|
"learning_rate": 7.726902152227692e-06, |
|
"loss": 0.7592, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.3856103476151981, |
|
"grad_norm": 1.3542620619226433, |
|
"learning_rate": 7.715063931680382e-06, |
|
"loss": 0.755, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 0.38641875505254647, |
|
"grad_norm": 1.4675073613366663, |
|
"learning_rate": 7.703204087277989e-06, |
|
"loss": 0.7487, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.3872271624898949, |
|
"grad_norm": 1.525792717822675, |
|
"learning_rate": 7.691322713477055e-06, |
|
"loss": 0.7563, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 0.3880355699272433, |
|
"grad_norm": 1.5306298024716511, |
|
"learning_rate": 7.679419904905594e-06, |
|
"loss": 0.7647, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.38884397736459175, |
|
"grad_norm": 1.471740454637615, |
|
"learning_rate": 7.667495756362333e-06, |
|
"loss": 0.7466, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 0.3896523848019402, |
|
"grad_norm": 1.433993552907344, |
|
"learning_rate": 7.655550362815961e-06, |
|
"loss": 0.7723, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.3904607922392886, |
|
"grad_norm": 1.3890048044664887, |
|
"learning_rate": 7.643583819404373e-06, |
|
"loss": 0.7645, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 0.39126919967663704, |
|
"grad_norm": 1.458717033811816, |
|
"learning_rate": 7.631596221433903e-06, |
|
"loss": 0.7438, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.3920776071139854, |
|
"grad_norm": 1.5403786743607832, |
|
"learning_rate": 7.619587664378576e-06, |
|
"loss": 0.7583, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.3928860145513339, |
|
"grad_norm": 1.5554771708646353, |
|
"learning_rate": 7.607558243879345e-06, |
|
"loss": 0.7568, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.3936944219886823, |
|
"grad_norm": 1.4050877774215123, |
|
"learning_rate": 7.595508055743327e-06, |
|
"loss": 0.7318, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 0.3945028294260307, |
|
"grad_norm": 1.4349627242310348, |
|
"learning_rate": 7.583437195943038e-06, |
|
"loss": 0.7466, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.39531123686337916, |
|
"grad_norm": 1.362494365903188, |
|
"learning_rate": 7.5713457606156335e-06, |
|
"loss": 0.7541, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 0.39611964430072755, |
|
"grad_norm": 1.6090479796008157, |
|
"learning_rate": 7.5592338460621414e-06, |
|
"loss": 0.7542, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.396928051738076, |
|
"grad_norm": 1.4498197495892686, |
|
"learning_rate": 7.547101548746694e-06, |
|
"loss": 0.7683, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 0.39773645917542444, |
|
"grad_norm": 1.5497610550177745, |
|
"learning_rate": 7.534948965295759e-06, |
|
"loss": 0.743, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.39854486661277283, |
|
"grad_norm": 1.633197313139618, |
|
"learning_rate": 7.5227761924973695e-06, |
|
"loss": 0.7619, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 0.3993532740501213, |
|
"grad_norm": 1.4063313558901078, |
|
"learning_rate": 7.510583327300361e-06, |
|
"loss": 0.757, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.40016168148746967, |
|
"grad_norm": 1.416947436523921, |
|
"learning_rate": 7.498370466813586e-06, |
|
"loss": 0.7473, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.4009700889248181, |
|
"grad_norm": 1.436986729489712, |
|
"learning_rate": 7.4861377083051514e-06, |
|
"loss": 0.7482, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.40177849636216656, |
|
"grad_norm": 1.606273588901474, |
|
"learning_rate": 7.473885149201636e-06, |
|
"loss": 0.7499, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 0.40258690379951495, |
|
"grad_norm": 1.516840232761339, |
|
"learning_rate": 7.461612887087324e-06, |
|
"loss": 0.7544, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.4033953112368634, |
|
"grad_norm": 1.6762062202796544, |
|
"learning_rate": 7.449321019703419e-06, |
|
"loss": 0.7484, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 0.4042037186742118, |
|
"grad_norm": 1.4007361159543554, |
|
"learning_rate": 7.437009644947268e-06, |
|
"loss": 0.7531, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.40501212611156023, |
|
"grad_norm": 1.6334162039294893, |
|
"learning_rate": 7.424678860871584e-06, |
|
"loss": 0.7507, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 0.4058205335489086, |
|
"grad_norm": 1.5489666316051582, |
|
"learning_rate": 7.4123287656836625e-06, |
|
"loss": 0.7466, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.40662894098625707, |
|
"grad_norm": 1.521691255397242, |
|
"learning_rate": 7.399959457744603e-06, |
|
"loss": 0.7441, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 0.4074373484236055, |
|
"grad_norm": 1.5151056129411498, |
|
"learning_rate": 7.387571035568523e-06, |
|
"loss": 0.7535, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.4082457558609539, |
|
"grad_norm": 1.5961542357400647, |
|
"learning_rate": 7.375163597821766e-06, |
|
"loss": 0.7738, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.40905416329830235, |
|
"grad_norm": 1.747860345661276, |
|
"learning_rate": 7.362737243322132e-06, |
|
"loss": 0.7298, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.40986257073565074, |
|
"grad_norm": 1.3979379187367524, |
|
"learning_rate": 7.350292071038079e-06, |
|
"loss": 0.7421, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 0.4106709781729992, |
|
"grad_norm": 1.5286299117288702, |
|
"learning_rate": 7.337828180087934e-06, |
|
"loss": 0.7606, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.41147938561034764, |
|
"grad_norm": 1.482169171151244, |
|
"learning_rate": 7.3253456697391145e-06, |
|
"loss": 0.7534, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 0.412287793047696, |
|
"grad_norm": 1.507882701454434, |
|
"learning_rate": 7.3128446394073216e-06, |
|
"loss": 0.7617, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.4130962004850445, |
|
"grad_norm": 1.3817103348743367, |
|
"learning_rate": 7.300325188655762e-06, |
|
"loss": 0.7612, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 0.41390460792239286, |
|
"grad_norm": 1.449395598404693, |
|
"learning_rate": 7.287787417194348e-06, |
|
"loss": 0.7467, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.4147130153597413, |
|
"grad_norm": 1.6097878448442098, |
|
"learning_rate": 7.275231424878906e-06, |
|
"loss": 0.7833, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 0.41552142279708976, |
|
"grad_norm": 1.4604575128666755, |
|
"learning_rate": 7.262657311710383e-06, |
|
"loss": 0.7547, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.41632983023443815, |
|
"grad_norm": 1.4553068247385297, |
|
"learning_rate": 7.2500651778340425e-06, |
|
"loss": 0.7272, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.4171382376717866, |
|
"grad_norm": 1.5580605108332357, |
|
"learning_rate": 7.237455123538678e-06, |
|
"loss": 0.7622, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.417946645109135, |
|
"grad_norm": 1.397575829167641, |
|
"learning_rate": 7.224827249255804e-06, |
|
"loss": 0.7439, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 0.41875505254648343, |
|
"grad_norm": 1.7002294742720083, |
|
"learning_rate": 7.212181655558863e-06, |
|
"loss": 0.7463, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.4195634599838319, |
|
"grad_norm": 1.5013399985702793, |
|
"learning_rate": 7.199518443162419e-06, |
|
"loss": 0.7527, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 0.42037186742118027, |
|
"grad_norm": 1.619027591523225, |
|
"learning_rate": 7.186837712921362e-06, |
|
"loss": 0.7536, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.4211802748585287, |
|
"grad_norm": 1.5305697974020676, |
|
"learning_rate": 7.174139565830098e-06, |
|
"loss": 0.7551, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 0.4219886822958771, |
|
"grad_norm": 1.7269161713804273, |
|
"learning_rate": 7.161424103021752e-06, |
|
"loss": 0.7676, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.42279708973322555, |
|
"grad_norm": 1.5491570246039656, |
|
"learning_rate": 7.148691425767354e-06, |
|
"loss": 0.7314, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 0.423605497170574, |
|
"grad_norm": 1.7174735150190135, |
|
"learning_rate": 7.1359416354750365e-06, |
|
"loss": 0.7291, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.4244139046079224, |
|
"grad_norm": 1.5540965935630928, |
|
"learning_rate": 7.12317483368923e-06, |
|
"loss": 0.7572, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.42522231204527083, |
|
"grad_norm": 1.449181152489211, |
|
"learning_rate": 7.1103911220898544e-06, |
|
"loss": 0.743, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.4260307194826192, |
|
"grad_norm": 1.652128164913507, |
|
"learning_rate": 7.097590602491495e-06, |
|
"loss": 0.7619, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 0.42683912691996767, |
|
"grad_norm": 1.5607840252827987, |
|
"learning_rate": 7.084773376842615e-06, |
|
"loss": 0.748, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.42764753435731606, |
|
"grad_norm": 1.6399087632058216, |
|
"learning_rate": 7.0719395472247225e-06, |
|
"loss": 0.7618, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 0.4284559417946645, |
|
"grad_norm": 1.6180383108334129, |
|
"learning_rate": 7.05908921585157e-06, |
|
"loss": 0.7473, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.42926434923201295, |
|
"grad_norm": 1.8684396820882947, |
|
"learning_rate": 7.046222485068339e-06, |
|
"loss": 0.7198, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 0.43007275666936134, |
|
"grad_norm": 1.5146248805420086, |
|
"learning_rate": 7.0333394573508185e-06, |
|
"loss": 0.7504, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.4308811641067098, |
|
"grad_norm": 1.6657404552018036, |
|
"learning_rate": 7.020440235304593e-06, |
|
"loss": 0.7469, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 0.4316895715440582, |
|
"grad_norm": 1.597433899337064, |
|
"learning_rate": 7.007524921664226e-06, |
|
"loss": 0.7218, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.43249797898140663, |
|
"grad_norm": 1.6034447006676202, |
|
"learning_rate": 6.994593619292441e-06, |
|
"loss": 0.7484, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.4333063864187551, |
|
"grad_norm": 1.4825714570898108, |
|
"learning_rate": 6.981646431179304e-06, |
|
"loss": 0.7515, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.43411479385610346, |
|
"grad_norm": 1.6102467057772276, |
|
"learning_rate": 6.968683460441398e-06, |
|
"loss": 0.7426, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 0.4349232012934519, |
|
"grad_norm": 1.6012582102326722, |
|
"learning_rate": 6.9557048103210065e-06, |
|
"loss": 0.7158, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.4357316087308003, |
|
"grad_norm": 1.5017101591262598, |
|
"learning_rate": 6.942710584185292e-06, |
|
"loss": 0.7265, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 0.43654001616814875, |
|
"grad_norm": 1.491162425876339, |
|
"learning_rate": 6.929700885525466e-06, |
|
"loss": 0.7296, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.4373484236054972, |
|
"grad_norm": 1.618547335353381, |
|
"learning_rate": 6.916675817955973e-06, |
|
"loss": 0.7587, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 0.4381568310428456, |
|
"grad_norm": 1.5583303827019015, |
|
"learning_rate": 6.9036354852136625e-06, |
|
"loss": 0.763, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.43896523848019403, |
|
"grad_norm": 1.5580614184472952, |
|
"learning_rate": 6.890579991156958e-06, |
|
"loss": 0.7393, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 0.4397736459175424, |
|
"grad_norm": 1.6599720531878508, |
|
"learning_rate": 6.8775094397650375e-06, |
|
"loss": 0.7413, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.44058205335489087, |
|
"grad_norm": 1.6475335959819555, |
|
"learning_rate": 6.864423935136999e-06, |
|
"loss": 0.7319, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.4413904607922393, |
|
"grad_norm": 1.6868001230472809, |
|
"learning_rate": 6.851323581491034e-06, |
|
"loss": 0.7317, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.4421988682295877, |
|
"grad_norm": 1.7072560513083037, |
|
"learning_rate": 6.838208483163601e-06, |
|
"loss": 0.7502, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 0.44300727566693615, |
|
"grad_norm": 1.4703001017567308, |
|
"learning_rate": 6.825078744608589e-06, |
|
"loss": 0.7497, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.44381568310428454, |
|
"grad_norm": 1.6393092026980918, |
|
"learning_rate": 6.811934470396484e-06, |
|
"loss": 0.7306, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 0.444624090541633, |
|
"grad_norm": 1.6755312874196466, |
|
"learning_rate": 6.7987757652135456e-06, |
|
"loss": 0.739, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.44543249797898143, |
|
"grad_norm": 1.6202018958792113, |
|
"learning_rate": 6.785602733860963e-06, |
|
"loss": 0.7381, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 0.4462409054163298, |
|
"grad_norm": 1.5932719199258238, |
|
"learning_rate": 6.77241548125403e-06, |
|
"loss": 0.7329, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.44704931285367827, |
|
"grad_norm": 1.7002494620385344, |
|
"learning_rate": 6.759214112421297e-06, |
|
"loss": 0.7509, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 0.44785772029102666, |
|
"grad_norm": 1.8232932108091804, |
|
"learning_rate": 6.745998732503749e-06, |
|
"loss": 0.7465, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.4486661277283751, |
|
"grad_norm": 1.8027285940055193, |
|
"learning_rate": 6.732769446753954e-06, |
|
"loss": 0.7512, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.4494745351657235, |
|
"grad_norm": 1.476838340418493, |
|
"learning_rate": 6.719526360535238e-06, |
|
"loss": 0.7478, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.45028294260307195, |
|
"grad_norm": 1.6030014859566264, |
|
"learning_rate": 6.706269579320834e-06, |
|
"loss": 0.7491, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 0.4510913500404204, |
|
"grad_norm": 2.0330037226973148, |
|
"learning_rate": 6.6929992086930515e-06, |
|
"loss": 0.7374, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.4518997574777688, |
|
"grad_norm": 1.5613281088896387, |
|
"learning_rate": 6.6797153543424285e-06, |
|
"loss": 0.7342, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 0.45270816491511723, |
|
"grad_norm": 1.5249975129037057, |
|
"learning_rate": 6.666418122066896e-06, |
|
"loss": 0.7227, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.4535165723524656, |
|
"grad_norm": 1.6452739375240444, |
|
"learning_rate": 6.653107617770928e-06, |
|
"loss": 0.754, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 0.45432497978981407, |
|
"grad_norm": 1.6396256738334043, |
|
"learning_rate": 6.639783947464707e-06, |
|
"loss": 0.7337, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.4551333872271625, |
|
"grad_norm": 1.8559299829849996, |
|
"learning_rate": 6.626447217263269e-06, |
|
"loss": 0.7486, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 0.4559417946645109, |
|
"grad_norm": 1.724861765792677, |
|
"learning_rate": 6.613097533385671e-06, |
|
"loss": 0.729, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.45675020210185935, |
|
"grad_norm": 1.6120913915780044, |
|
"learning_rate": 6.599735002154133e-06, |
|
"loss": 0.7246, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.45755860953920774, |
|
"grad_norm": 1.5561331554910032, |
|
"learning_rate": 6.5863597299932e-06, |
|
"loss": 0.7424, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.4583670169765562, |
|
"grad_norm": 1.5756166020131763, |
|
"learning_rate": 6.572971823428885e-06, |
|
"loss": 0.736, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 0.45917542441390463, |
|
"grad_norm": 1.5901722292065572, |
|
"learning_rate": 6.559571389087834e-06, |
|
"loss": 0.7277, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.459983831851253, |
|
"grad_norm": 1.8199798497598791, |
|
"learning_rate": 6.546158533696465e-06, |
|
"loss": 0.7521, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 0.46079223928860147, |
|
"grad_norm": 1.6353472073783677, |
|
"learning_rate": 6.532733364080126e-06, |
|
"loss": 0.7558, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.46160064672594986, |
|
"grad_norm": 1.6335140922701896, |
|
"learning_rate": 6.519295987162232e-06, |
|
"loss": 0.7401, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 0.4624090541632983, |
|
"grad_norm": 1.751367401482544, |
|
"learning_rate": 6.50584650996343e-06, |
|
"loss": 0.7434, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.46321746160064675, |
|
"grad_norm": 1.566395823390269, |
|
"learning_rate": 6.492385039600735e-06, |
|
"loss": 0.7803, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 0.46402586903799514, |
|
"grad_norm": 1.5360516049112365, |
|
"learning_rate": 6.4789116832866834e-06, |
|
"loss": 0.7587, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.4648342764753436, |
|
"grad_norm": 1.819847858777574, |
|
"learning_rate": 6.465426548328473e-06, |
|
"loss": 0.7478, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.465642683912692, |
|
"grad_norm": 1.5385023230471138, |
|
"learning_rate": 6.451929742127109e-06, |
|
"loss": 0.7337, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.4664510913500404, |
|
"grad_norm": 1.4847036423315738, |
|
"learning_rate": 6.4384213721765565e-06, |
|
"loss": 0.7367, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 0.46725949878738887, |
|
"grad_norm": 1.6452154053274373, |
|
"learning_rate": 6.424901546062878e-06, |
|
"loss": 0.7464, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.46806790622473726, |
|
"grad_norm": 1.891586315433217, |
|
"learning_rate": 6.411370371463373e-06, |
|
"loss": 0.7587, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 0.4688763136620857, |
|
"grad_norm": 1.6847638248373114, |
|
"learning_rate": 6.397827956145732e-06, |
|
"loss": 0.757, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.4696847210994341, |
|
"grad_norm": 1.935186064874233, |
|
"learning_rate": 6.3842744079671634e-06, |
|
"loss": 0.7285, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 0.47049312853678255, |
|
"grad_norm": 1.6081940113542759, |
|
"learning_rate": 6.370709834873547e-06, |
|
"loss": 0.7466, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.47130153597413094, |
|
"grad_norm": 1.9116295949365476, |
|
"learning_rate": 6.35713434489857e-06, |
|
"loss": 0.72, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 0.4721099434114794, |
|
"grad_norm": 1.775823518041551, |
|
"learning_rate": 6.343548046162863e-06, |
|
"loss": 0.7538, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.47291835084882783, |
|
"grad_norm": 1.62571587035583, |
|
"learning_rate": 6.329951046873143e-06, |
|
"loss": 0.7426, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.4737267582861762, |
|
"grad_norm": 1.774624905090093, |
|
"learning_rate": 6.31634345532135e-06, |
|
"loss": 0.718, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.47453516572352467, |
|
"grad_norm": 1.6468612905160713, |
|
"learning_rate": 6.302725379883787e-06, |
|
"loss": 0.7293, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 0.47534357316087306, |
|
"grad_norm": 1.6068150290028567, |
|
"learning_rate": 6.289096929020254e-06, |
|
"loss": 0.7227, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.4761519805982215, |
|
"grad_norm": 1.821341348490976, |
|
"learning_rate": 6.275458211273182e-06, |
|
"loss": 0.7291, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 0.47696038803556995, |
|
"grad_norm": 1.646392168409669, |
|
"learning_rate": 6.261809335266776e-06, |
|
"loss": 0.7588, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.47776879547291834, |
|
"grad_norm": 1.4998598776984355, |
|
"learning_rate": 6.248150409706144e-06, |
|
"loss": 0.7431, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 0.4785772029102668, |
|
"grad_norm": 1.6291849374923184, |
|
"learning_rate": 6.234481543376433e-06, |
|
"loss": 0.7494, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.4793856103476152, |
|
"grad_norm": 1.6806295233872666, |
|
"learning_rate": 6.2208028451419575e-06, |
|
"loss": 0.7506, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 0.4801940177849636, |
|
"grad_norm": 1.788703909711479, |
|
"learning_rate": 6.207114423945346e-06, |
|
"loss": 0.7391, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.48100242522231207, |
|
"grad_norm": 1.7460679090246425, |
|
"learning_rate": 6.193416388806655e-06, |
|
"loss": 0.7512, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.48181083265966046, |
|
"grad_norm": 1.7991177181949694, |
|
"learning_rate": 6.179708848822521e-06, |
|
"loss": 0.7494, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.4826192400970089, |
|
"grad_norm": 1.6195605598787102, |
|
"learning_rate": 6.165991913165271e-06, |
|
"loss": 0.7395, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 0.4834276475343573, |
|
"grad_norm": 1.9898749874558108, |
|
"learning_rate": 6.152265691082067e-06, |
|
"loss": 0.7169, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.48423605497170574, |
|
"grad_norm": 1.8398403882057845, |
|
"learning_rate": 6.138530291894033e-06, |
|
"loss": 0.7584, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 0.4850444624090542, |
|
"grad_norm": 1.828005720680138, |
|
"learning_rate": 6.124785824995381e-06, |
|
"loss": 0.7314, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.4858528698464026, |
|
"grad_norm": 1.7421782056931043, |
|
"learning_rate": 6.111032399852542e-06, |
|
"loss": 0.7388, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 0.486661277283751, |
|
"grad_norm": 1.6022841844735267, |
|
"learning_rate": 6.097270126003297e-06, |
|
"loss": 0.7241, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.4874696847210994, |
|
"grad_norm": 1.743402917972022, |
|
"learning_rate": 6.083499113055897e-06, |
|
"loss": 0.7354, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 0.48827809215844786, |
|
"grad_norm": 1.4072740337152898, |
|
"learning_rate": 6.069719470688199e-06, |
|
"loss": 0.7334, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.4890864995957963, |
|
"grad_norm": 1.8931792386123252, |
|
"learning_rate": 6.0559313086467854e-06, |
|
"loss": 0.7301, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.4898949070331447, |
|
"grad_norm": 1.5281809673914062, |
|
"learning_rate": 6.042134736746093e-06, |
|
"loss": 0.7324, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.49070331447049315, |
|
"grad_norm": 1.856573916290289, |
|
"learning_rate": 6.028329864867538e-06, |
|
"loss": 0.7324, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 0.49151172190784154, |
|
"grad_norm": 2.038603374836649, |
|
"learning_rate": 6.0145168029586434e-06, |
|
"loss": 0.7276, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.49232012934519, |
|
"grad_norm": 1.9183080921170146, |
|
"learning_rate": 6.000695661032158e-06, |
|
"loss": 0.7344, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 0.4931285367825384, |
|
"grad_norm": 1.6918091903565058, |
|
"learning_rate": 5.986866549165185e-06, |
|
"loss": 0.7121, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.4939369442198868, |
|
"grad_norm": 1.467542289658805, |
|
"learning_rate": 5.9730295774983e-06, |
|
"loss": 0.7412, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 0.49474535165723527, |
|
"grad_norm": 1.6510602277072752, |
|
"learning_rate": 5.959184856234681e-06, |
|
"loss": 0.7089, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.49555375909458366, |
|
"grad_norm": 1.5658481426672775, |
|
"learning_rate": 5.9453324956392264e-06, |
|
"loss": 0.7382, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 0.4963621665319321, |
|
"grad_norm": 1.810370657979415, |
|
"learning_rate": 5.931472606037677e-06, |
|
"loss": 0.7269, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.4971705739692805, |
|
"grad_norm": 2.1265390970725675, |
|
"learning_rate": 5.917605297815736e-06, |
|
"loss": 0.7319, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.49797898140662894, |
|
"grad_norm": 1.9115609327468914, |
|
"learning_rate": 5.903730681418191e-06, |
|
"loss": 0.7489, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.4987873888439774, |
|
"grad_norm": 1.7058868208634674, |
|
"learning_rate": 5.8898488673480385e-06, |
|
"loss": 0.7291, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 0.4995957962813258, |
|
"grad_norm": 1.5461578620231866, |
|
"learning_rate": 5.8759599661655975e-06, |
|
"loss": 0.7216, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.5004042037186742, |
|
"grad_norm": 1.7707716709348011, |
|
"learning_rate": 5.862064088487632e-06, |
|
"loss": 0.7209, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 0.5012126111560227, |
|
"grad_norm": 1.6647808647832354, |
|
"learning_rate": 5.8481613449864695e-06, |
|
"loss": 0.733, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.502021018593371, |
|
"grad_norm": 1.9301899340867452, |
|
"learning_rate": 5.8342518463891195e-06, |
|
"loss": 0.7321, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 0.5028294260307195, |
|
"grad_norm": 1.8541494136601961, |
|
"learning_rate": 5.820335703476394e-06, |
|
"loss": 0.7195, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.5036378334680679, |
|
"grad_norm": 1.684397088706739, |
|
"learning_rate": 5.806413027082018e-06, |
|
"loss": 0.736, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 0.5044462409054163, |
|
"grad_norm": 1.838888999222473, |
|
"learning_rate": 5.792483928091759e-06, |
|
"loss": 0.7188, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.5052546483427648, |
|
"grad_norm": 1.6586266130026301, |
|
"learning_rate": 5.7785485174425285e-06, |
|
"loss": 0.7341, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.5060630557801131, |
|
"grad_norm": 1.6053396069937373, |
|
"learning_rate": 5.764606906121513e-06, |
|
"loss": 0.7415, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.5068714632174616, |
|
"grad_norm": 1.8485601800302767, |
|
"learning_rate": 5.75065920516528e-06, |
|
"loss": 0.7358, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 0.50767987065481, |
|
"grad_norm": 1.7801558687500054, |
|
"learning_rate": 5.7367055256589e-06, |
|
"loss": 0.7389, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.5084882780921585, |
|
"grad_norm": 1.7682829788110874, |
|
"learning_rate": 5.722745978735056e-06, |
|
"loss": 0.7463, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 0.5092966855295069, |
|
"grad_norm": 2.0310097528031514, |
|
"learning_rate": 5.708780675573163e-06, |
|
"loss": 0.7495, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.5101050929668552, |
|
"grad_norm": 1.6939653960527117, |
|
"learning_rate": 5.694809727398483e-06, |
|
"loss": 0.735, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 0.5109135004042037, |
|
"grad_norm": 1.4907854480309246, |
|
"learning_rate": 5.680833245481234e-06, |
|
"loss": 0.7112, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.5117219078415521, |
|
"grad_norm": 1.5916645160260514, |
|
"learning_rate": 5.666851341135706e-06, |
|
"loss": 0.7314, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 0.5125303152789006, |
|
"grad_norm": 1.7263945402809162, |
|
"learning_rate": 5.652864125719382e-06, |
|
"loss": 0.7453, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.513338722716249, |
|
"grad_norm": 1.7250369660017415, |
|
"learning_rate": 5.638871710632037e-06, |
|
"loss": 0.7499, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.5141471301535974, |
|
"grad_norm": 1.66803893376865, |
|
"learning_rate": 5.624874207314861e-06, |
|
"loss": 0.7165, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.5149555375909458, |
|
"grad_norm": 1.9530837325477433, |
|
"learning_rate": 5.61087172724957e-06, |
|
"loss": 0.751, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 0.5157639450282943, |
|
"grad_norm": 1.7036232716696973, |
|
"learning_rate": 5.596864381957514e-06, |
|
"loss": 0.7072, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.5165723524656427, |
|
"grad_norm": 2.0287938769391585, |
|
"learning_rate": 5.5828522829987965e-06, |
|
"loss": 0.7456, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 0.5173807599029911, |
|
"grad_norm": 1.961486108448002, |
|
"learning_rate": 5.5688355419713766e-06, |
|
"loss": 0.729, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.5181891673403395, |
|
"grad_norm": 1.747396445262512, |
|
"learning_rate": 5.554814270510185e-06, |
|
"loss": 0.7428, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 0.5189975747776879, |
|
"grad_norm": 1.9789250758812496, |
|
"learning_rate": 5.540788580286236e-06, |
|
"loss": 0.7216, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.5198059822150364, |
|
"grad_norm": 1.7067424322279225, |
|
"learning_rate": 5.526758583005736e-06, |
|
"loss": 0.7388, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 0.5206143896523848, |
|
"grad_norm": 1.530137654068918, |
|
"learning_rate": 5.512724390409197e-06, |
|
"loss": 0.7456, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.5214227970897333, |
|
"grad_norm": 1.9188410439442418, |
|
"learning_rate": 5.4986861142705396e-06, |
|
"loss": 0.7257, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.5222312045270816, |
|
"grad_norm": 1.4829883700282378, |
|
"learning_rate": 5.484643866396211e-06, |
|
"loss": 0.7231, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.52303961196443, |
|
"grad_norm": 1.9423854124597335, |
|
"learning_rate": 5.47059775862429e-06, |
|
"loss": 0.7327, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 0.5238480194017785, |
|
"grad_norm": 1.585692364516504, |
|
"learning_rate": 5.456547902823596e-06, |
|
"loss": 0.7095, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.5246564268391269, |
|
"grad_norm": 1.6935294419570495, |
|
"learning_rate": 5.4424944108928005e-06, |
|
"loss": 0.7176, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 0.5254648342764754, |
|
"grad_norm": 1.9443703299521362, |
|
"learning_rate": 5.428437394759534e-06, |
|
"loss": 0.7548, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.5262732417138237, |
|
"grad_norm": 1.8920530979032508, |
|
"learning_rate": 5.414376966379494e-06, |
|
"loss": 0.7295, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 0.5270816491511722, |
|
"grad_norm": 1.742331924607013, |
|
"learning_rate": 5.4003132377355594e-06, |
|
"loss": 0.7507, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.5278900565885206, |
|
"grad_norm": 1.8552434605554495, |
|
"learning_rate": 5.386246320836887e-06, |
|
"loss": 0.7311, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 0.5286984640258691, |
|
"grad_norm": 1.804743780018075, |
|
"learning_rate": 5.372176327718029e-06, |
|
"loss": 0.7357, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.5295068714632175, |
|
"grad_norm": 1.6662837706239626, |
|
"learning_rate": 5.35810337043804e-06, |
|
"loss": 0.7281, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.5303152789005658, |
|
"grad_norm": 1.6292430108827105, |
|
"learning_rate": 5.34402756107958e-06, |
|
"loss": 0.7355, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.5311236863379143, |
|
"grad_norm": 1.7350237488555562, |
|
"learning_rate": 5.3299490117480245e-06, |
|
"loss": 0.7472, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 0.5319320937752627, |
|
"grad_norm": 1.7851311336238955, |
|
"learning_rate": 5.315867834570573e-06, |
|
"loss": 0.7263, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.5327405012126112, |
|
"grad_norm": 1.6356935298013957, |
|
"learning_rate": 5.301784141695348e-06, |
|
"loss": 0.7409, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 0.5335489086499596, |
|
"grad_norm": 1.719781130457011, |
|
"learning_rate": 5.287698045290514e-06, |
|
"loss": 0.7433, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.534357316087308, |
|
"grad_norm": 2.003270367765294, |
|
"learning_rate": 5.2736096575433805e-06, |
|
"loss": 0.7356, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 0.5351657235246564, |
|
"grad_norm": 1.4439742700866685, |
|
"learning_rate": 5.2595190906595e-06, |
|
"loss": 0.7364, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.5359741309620049, |
|
"grad_norm": 1.740158385555015, |
|
"learning_rate": 5.2454264568617815e-06, |
|
"loss": 0.7312, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 0.5367825383993533, |
|
"grad_norm": 1.9626910345859885, |
|
"learning_rate": 5.231331868389599e-06, |
|
"loss": 0.7503, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.5375909458367018, |
|
"grad_norm": 1.68124601360505, |
|
"learning_rate": 5.2172354374978905e-06, |
|
"loss": 0.7406, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.5383993532740501, |
|
"grad_norm": 2.1050005093144994, |
|
"learning_rate": 5.203137276456272e-06, |
|
"loss": 0.7235, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.5392077607113985, |
|
"grad_norm": 1.8947511081977626, |
|
"learning_rate": 5.189037497548136e-06, |
|
"loss": 0.7267, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 0.540016168148747, |
|
"grad_norm": 1.8668010593718123, |
|
"learning_rate": 5.174936213069761e-06, |
|
"loss": 0.7309, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.5408245755860954, |
|
"grad_norm": 1.9553385282357956, |
|
"learning_rate": 5.160833535329417e-06, |
|
"loss": 0.7292, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 0.5416329830234439, |
|
"grad_norm": 1.5662310478755188, |
|
"learning_rate": 5.146729576646469e-06, |
|
"loss": 0.7083, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.5424413904607922, |
|
"grad_norm": 1.982515442376847, |
|
"learning_rate": 5.132624449350486e-06, |
|
"loss": 0.7473, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 0.5432497978981407, |
|
"grad_norm": 1.696121107818326, |
|
"learning_rate": 5.118518265780343e-06, |
|
"loss": 0.7127, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.5440582053354891, |
|
"grad_norm": 1.792047111863481, |
|
"learning_rate": 5.1044111382833284e-06, |
|
"loss": 0.7315, |
|
"step": 3365 |
|
}, |
|
{ |
|
"epoch": 0.5448666127728375, |
|
"grad_norm": 1.6410942741564183, |
|
"learning_rate": 5.090303179214248e-06, |
|
"loss": 0.7202, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.5456750202101859, |
|
"grad_norm": 1.7980781288846641, |
|
"learning_rate": 5.0761945009345295e-06, |
|
"loss": 0.708, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.5464834276475343, |
|
"grad_norm": 1.7891594579241739, |
|
"learning_rate": 5.06208521581133e-06, |
|
"loss": 0.739, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.5472918350848828, |
|
"grad_norm": 1.8996907092180035, |
|
"learning_rate": 5.04797543621664e-06, |
|
"loss": 0.7259, |
|
"step": 3385 |
|
}, |
|
{ |
|
"epoch": 0.5481002425222312, |
|
"grad_norm": 1.7441485524528433, |
|
"learning_rate": 5.033865274526388e-06, |
|
"loss": 0.7234, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.5489086499595797, |
|
"grad_norm": 1.8800934694644797, |
|
"learning_rate": 5.019754843119544e-06, |
|
"loss": 0.718, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 0.549717057396928, |
|
"grad_norm": 1.639197995446039, |
|
"learning_rate": 5.00564425437723e-06, |
|
"loss": 0.7505, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.5505254648342764, |
|
"grad_norm": 1.7066220016466764, |
|
"learning_rate": 4.991533620681814e-06, |
|
"loss": 0.6972, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 0.5513338722716249, |
|
"grad_norm": 1.6540407961792254, |
|
"learning_rate": 4.977423054416031e-06, |
|
"loss": 0.7369, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.5521422797089733, |
|
"grad_norm": 2.3849302708294666, |
|
"learning_rate": 4.963312667962072e-06, |
|
"loss": 0.737, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 0.5529506871463218, |
|
"grad_norm": 1.6980580304581847, |
|
"learning_rate": 4.949202573700699e-06, |
|
"loss": 0.7243, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.5537590945836701, |
|
"grad_norm": 1.8442646934282483, |
|
"learning_rate": 4.935092884010347e-06, |
|
"loss": 0.7174, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.5545675020210186, |
|
"grad_norm": 1.7357839718219499, |
|
"learning_rate": 4.920983711266225e-06, |
|
"loss": 0.7252, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.555375909458367, |
|
"grad_norm": 2.068408711770871, |
|
"learning_rate": 4.906875167839433e-06, |
|
"loss": 0.7427, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 0.5561843168957155, |
|
"grad_norm": 1.6763421869702522, |
|
"learning_rate": 4.89276736609605e-06, |
|
"loss": 0.7285, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.5569927243330639, |
|
"grad_norm": 1.7651505838736554, |
|
"learning_rate": 4.878660418396254e-06, |
|
"loss": 0.7296, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 0.5578011317704122, |
|
"grad_norm": 1.703327968146327, |
|
"learning_rate": 4.864554437093416e-06, |
|
"loss": 0.7208, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.5586095392077607, |
|
"grad_norm": 1.7704160277811705, |
|
"learning_rate": 4.850449534533213e-06, |
|
"loss": 0.7493, |
|
"step": 3455 |
|
}, |
|
{ |
|
"epoch": 0.5594179466451091, |
|
"grad_norm": 1.907706505426485, |
|
"learning_rate": 4.836345823052735e-06, |
|
"loss": 0.7242, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.5602263540824576, |
|
"grad_norm": 1.8298403134205508, |
|
"learning_rate": 4.822243414979578e-06, |
|
"loss": 0.7126, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 0.561034761519806, |
|
"grad_norm": 2.0492118497937484, |
|
"learning_rate": 4.8081424226309605e-06, |
|
"loss": 0.7193, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.5618431689571544, |
|
"grad_norm": 1.9413458452075376, |
|
"learning_rate": 4.794042958312824e-06, |
|
"loss": 0.7177, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.5626515763945028, |
|
"grad_norm": 1.5066422062448757, |
|
"learning_rate": 4.779945134318944e-06, |
|
"loss": 0.7048, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.5634599838318513, |
|
"grad_norm": 1.554271039498153, |
|
"learning_rate": 4.765849062930029e-06, |
|
"loss": 0.7344, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 0.5642683912691997, |
|
"grad_norm": 1.600921934972658, |
|
"learning_rate": 4.75175485641283e-06, |
|
"loss": 0.7101, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.5650767987065481, |
|
"grad_norm": 2.424494189852517, |
|
"learning_rate": 4.737662627019244e-06, |
|
"loss": 0.7251, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 0.5658852061438965, |
|
"grad_norm": 1.6300840852213847, |
|
"learning_rate": 4.723572486985421e-06, |
|
"loss": 0.728, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.5666936135812449, |
|
"grad_norm": 2.2647753462217683, |
|
"learning_rate": 4.7094845485308735e-06, |
|
"loss": 0.7185, |
|
"step": 3505 |
|
}, |
|
{ |
|
"epoch": 0.5675020210185934, |
|
"grad_norm": 1.7811122307778515, |
|
"learning_rate": 4.695398923857579e-06, |
|
"loss": 0.7331, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.5683104284559418, |
|
"grad_norm": 1.9941270454793851, |
|
"learning_rate": 4.681315725149083e-06, |
|
"loss": 0.7357, |
|
"step": 3515 |
|
}, |
|
{ |
|
"epoch": 0.5691188358932903, |
|
"grad_norm": 1.8422816032944238, |
|
"learning_rate": 4.667235064569616e-06, |
|
"loss": 0.7043, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.5699272433306386, |
|
"grad_norm": 1.7136459947309182, |
|
"learning_rate": 4.6531570542631884e-06, |
|
"loss": 0.7283, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.570735650767987, |
|
"grad_norm": 2.09761849915152, |
|
"learning_rate": 4.639081806352707e-06, |
|
"loss": 0.7309, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.5715440582053355, |
|
"grad_norm": 1.6239566569251038, |
|
"learning_rate": 4.625009432939075e-06, |
|
"loss": 0.7194, |
|
"step": 3535 |
|
}, |
|
{ |
|
"epoch": 0.5723524656426839, |
|
"grad_norm": 1.6412212532766335, |
|
"learning_rate": 4.6109400461003005e-06, |
|
"loss": 0.706, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.5731608730800324, |
|
"grad_norm": 1.8885243243418546, |
|
"learning_rate": 4.596873757890612e-06, |
|
"loss": 0.7402, |
|
"step": 3545 |
|
}, |
|
{ |
|
"epoch": 0.5739692805173807, |
|
"grad_norm": 1.6571244102934766, |
|
"learning_rate": 4.582810680339551e-06, |
|
"loss": 0.7245, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.5747776879547292, |
|
"grad_norm": 1.6702595320617768, |
|
"learning_rate": 4.5687509254510924e-06, |
|
"loss": 0.7219, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 0.5755860953920776, |
|
"grad_norm": 1.5072913826468763, |
|
"learning_rate": 4.5546946052027505e-06, |
|
"loss": 0.7228, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.5763945028294261, |
|
"grad_norm": 1.9412492977577889, |
|
"learning_rate": 4.540641831544678e-06, |
|
"loss": 0.7209, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 0.5772029102667745, |
|
"grad_norm": 1.9123748628207098, |
|
"learning_rate": 4.526592716398788e-06, |
|
"loss": 0.7314, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.5780113177041228, |
|
"grad_norm": 1.9224889275166772, |
|
"learning_rate": 4.51254737165785e-06, |
|
"loss": 0.7199, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 0.5788197251414713, |
|
"grad_norm": 1.648787454331624, |
|
"learning_rate": 4.49850590918461e-06, |
|
"loss": 0.7292, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.5796281325788197, |
|
"grad_norm": 2.2311883887862187, |
|
"learning_rate": 4.484468440810888e-06, |
|
"loss": 0.7138, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 0.5804365400161682, |
|
"grad_norm": 1.8060045733095076, |
|
"learning_rate": 4.470435078336699e-06, |
|
"loss": 0.723, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.5812449474535166, |
|
"grad_norm": 1.8077856651950424, |
|
"learning_rate": 4.456405933529355e-06, |
|
"loss": 0.7089, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 0.582053354890865, |
|
"grad_norm": 1.5403196099771954, |
|
"learning_rate": 4.442381118122573e-06, |
|
"loss": 0.7187, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.5828617623282134, |
|
"grad_norm": 1.77289982258147, |
|
"learning_rate": 4.428360743815597e-06, |
|
"loss": 0.7036, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 0.5836701697655619, |
|
"grad_norm": 1.6034581186012247, |
|
"learning_rate": 4.414344922272292e-06, |
|
"loss": 0.7228, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.5844785772029103, |
|
"grad_norm": 1.6471712911511105, |
|
"learning_rate": 4.400333765120268e-06, |
|
"loss": 0.7317, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 0.5852869846402587, |
|
"grad_norm": 1.6056924533457961, |
|
"learning_rate": 4.386327383949986e-06, |
|
"loss": 0.7223, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.5860953920776071, |
|
"grad_norm": 1.7550468882082644, |
|
"learning_rate": 4.372325890313864e-06, |
|
"loss": 0.7164, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.5869037995149555, |
|
"grad_norm": 1.7554697237013572, |
|
"learning_rate": 4.358329395725403e-06, |
|
"loss": 0.7177, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.587712206952304, |
|
"grad_norm": 1.891791374124361, |
|
"learning_rate": 4.3443380116582776e-06, |
|
"loss": 0.694, |
|
"step": 3635 |
|
}, |
|
{ |
|
"epoch": 0.5885206143896524, |
|
"grad_norm": 2.103271809218415, |
|
"learning_rate": 4.330351849545471e-06, |
|
"loss": 0.7278, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.5893290218270008, |
|
"grad_norm": 1.9049264666214307, |
|
"learning_rate": 4.316371020778372e-06, |
|
"loss": 0.6899, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 0.5901374292643492, |
|
"grad_norm": 1.834904512639952, |
|
"learning_rate": 4.302395636705888e-06, |
|
"loss": 0.7336, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.5909458367016976, |
|
"grad_norm": 1.5107909122682632, |
|
"learning_rate": 4.2884258086335755e-06, |
|
"loss": 0.7322, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 0.5917542441390461, |
|
"grad_norm": 1.6914566375954405, |
|
"learning_rate": 4.274461647822726e-06, |
|
"loss": 0.6987, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.5925626515763945, |
|
"grad_norm": 2.047859565864153, |
|
"learning_rate": 4.260503265489503e-06, |
|
"loss": 0.7284, |
|
"step": 3665 |
|
}, |
|
{ |
|
"epoch": 0.5933710590137429, |
|
"grad_norm": 1.8868258227842758, |
|
"learning_rate": 4.24655077280405e-06, |
|
"loss": 0.7185, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.5941794664510913, |
|
"grad_norm": 2.0502511378713373, |
|
"learning_rate": 4.232604280889593e-06, |
|
"loss": 0.7183, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.5949878738884398, |
|
"grad_norm": 2.0904096614555137, |
|
"learning_rate": 4.218663900821578e-06, |
|
"loss": 0.7386, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.5957962813257882, |
|
"grad_norm": 1.6951556666400156, |
|
"learning_rate": 4.2047297436267635e-06, |
|
"loss": 0.7203, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 0.5966046887631367, |
|
"grad_norm": 1.858909449068441, |
|
"learning_rate": 4.190801920282349e-06, |
|
"loss": 0.7116, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.597413096200485, |
|
"grad_norm": 2.110239776887249, |
|
"learning_rate": 4.176880541715097e-06, |
|
"loss": 0.7291, |
|
"step": 3695 |
|
}, |
|
{ |
|
"epoch": 0.5982215036378334, |
|
"grad_norm": 1.9230056997299079, |
|
"learning_rate": 4.162965718800428e-06, |
|
"loss": 0.7217, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.5990299110751819, |
|
"grad_norm": 1.8462970903130231, |
|
"learning_rate": 4.149057562361562e-06, |
|
"loss": 0.7365, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 0.5998383185125303, |
|
"grad_norm": 1.6321519110155143, |
|
"learning_rate": 4.1351561831686136e-06, |
|
"loss": 0.7315, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.6006467259498788, |
|
"grad_norm": 1.6282666293170165, |
|
"learning_rate": 4.121261691937732e-06, |
|
"loss": 0.7213, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 0.6014551333872271, |
|
"grad_norm": 1.897782087386442, |
|
"learning_rate": 4.1073741993302005e-06, |
|
"loss": 0.7123, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.6022635408245756, |
|
"grad_norm": 1.7494970647862944, |
|
"learning_rate": 4.093493815951566e-06, |
|
"loss": 0.7088, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 0.603071948261924, |
|
"grad_norm": 1.7243589892541922, |
|
"learning_rate": 4.079620652350754e-06, |
|
"loss": 0.715, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.6038803556992725, |
|
"grad_norm": 1.8316631448911362, |
|
"learning_rate": 4.065754819019183e-06, |
|
"loss": 0.7248, |
|
"step": 3735 |
|
}, |
|
{ |
|
"epoch": 0.6046887631366209, |
|
"grad_norm": 1.6198889183822909, |
|
"learning_rate": 4.051896426389904e-06, |
|
"loss": 0.7189, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.6054971705739692, |
|
"grad_norm": 1.8244917176494815, |
|
"learning_rate": 4.038045584836691e-06, |
|
"loss": 0.7309, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 0.6063055780113177, |
|
"grad_norm": 1.51923723630652, |
|
"learning_rate": 4.02420240467319e-06, |
|
"loss": 0.7214, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.6071139854486661, |
|
"grad_norm": 1.6915516287756491, |
|
"learning_rate": 4.010366996152025e-06, |
|
"loss": 0.7017, |
|
"step": 3755 |
|
}, |
|
{ |
|
"epoch": 0.6079223928860146, |
|
"grad_norm": 1.8155249672465137, |
|
"learning_rate": 3.99653946946392e-06, |
|
"loss": 0.7436, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.608730800323363, |
|
"grad_norm": 2.2453571071282403, |
|
"learning_rate": 3.982719934736832e-06, |
|
"loss": 0.7281, |
|
"step": 3765 |
|
}, |
|
{ |
|
"epoch": 0.6095392077607114, |
|
"grad_norm": 2.070645865764563, |
|
"learning_rate": 3.96890850203506e-06, |
|
"loss": 0.6972, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.6103476151980598, |
|
"grad_norm": 1.6532992237811048, |
|
"learning_rate": 3.9551052813583776e-06, |
|
"loss": 0.7188, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 0.6111560226354082, |
|
"grad_norm": 1.757629266525101, |
|
"learning_rate": 3.9413103826411595e-06, |
|
"loss": 0.7095, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.6119644300727567, |
|
"grad_norm": 1.6234603453267293, |
|
"learning_rate": 3.927523915751491e-06, |
|
"loss": 0.7291, |
|
"step": 3785 |
|
}, |
|
{ |
|
"epoch": 0.6127728375101051, |
|
"grad_norm": 1.589881321535228, |
|
"learning_rate": 3.913745990490314e-06, |
|
"loss": 0.694, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.6135812449474535, |
|
"grad_norm": 1.6706437491058566, |
|
"learning_rate": 3.899976716590531e-06, |
|
"loss": 0.7335, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 0.6143896523848019, |
|
"grad_norm": 1.694799314467617, |
|
"learning_rate": 3.886216203716149e-06, |
|
"loss": 0.721, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.6151980598221504, |
|
"grad_norm": 2.1158395577766274, |
|
"learning_rate": 3.872464561461397e-06, |
|
"loss": 0.7092, |
|
"step": 3805 |
|
}, |
|
{ |
|
"epoch": 0.6160064672594988, |
|
"grad_norm": 1.6236255430260125, |
|
"learning_rate": 3.8587218993498525e-06, |
|
"loss": 0.7313, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.6168148746968473, |
|
"grad_norm": 1.7100289433829274, |
|
"learning_rate": 3.844988326833574e-06, |
|
"loss": 0.7169, |
|
"step": 3815 |
|
}, |
|
{ |
|
"epoch": 0.6176232821341956, |
|
"grad_norm": 1.66125609703824, |
|
"learning_rate": 3.831263953292225e-06, |
|
"loss": 0.741, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.618431689571544, |
|
"grad_norm": 1.8691718045573207, |
|
"learning_rate": 3.817548888032207e-06, |
|
"loss": 0.7092, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 0.6192400970088925, |
|
"grad_norm": 1.9063385283071115, |
|
"learning_rate": 3.803843240285784e-06, |
|
"loss": 0.724, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.6200485044462409, |
|
"grad_norm": 1.7196545690379716, |
|
"learning_rate": 3.7901471192102173e-06, |
|
"loss": 0.7204, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 0.6208569118835894, |
|
"grad_norm": 1.8618730408898592, |
|
"learning_rate": 3.7764606338868943e-06, |
|
"loss": 0.7218, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.6216653193209377, |
|
"grad_norm": 1.68489988594914, |
|
"learning_rate": 3.7627838933204547e-06, |
|
"loss": 0.7262, |
|
"step": 3845 |
|
}, |
|
{ |
|
"epoch": 0.6224737267582862, |
|
"grad_norm": 2.0315811113955013, |
|
"learning_rate": 3.7491170064379346e-06, |
|
"loss": 0.7127, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.6232821341956346, |
|
"grad_norm": 1.8075962655722444, |
|
"learning_rate": 3.735460082087884e-06, |
|
"loss": 0.7166, |
|
"step": 3855 |
|
}, |
|
{ |
|
"epoch": 0.624090541632983, |
|
"grad_norm": 1.643356272004426, |
|
"learning_rate": 3.7218132290395125e-06, |
|
"loss": 0.7094, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.6248989490703315, |
|
"grad_norm": 1.8603079492566412, |
|
"learning_rate": 3.7081765559818184e-06, |
|
"loss": 0.7174, |
|
"step": 3865 |
|
}, |
|
{ |
|
"epoch": 0.6257073565076798, |
|
"grad_norm": 2.203684498237761, |
|
"learning_rate": 3.6945501715227146e-06, |
|
"loss": 0.6886, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.6265157639450283, |
|
"grad_norm": 1.6838584000200525, |
|
"learning_rate": 3.680934184188182e-06, |
|
"loss": 0.7029, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.6273241713823767, |
|
"grad_norm": 1.750412853978724, |
|
"learning_rate": 3.6673287024213868e-06, |
|
"loss": 0.7133, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.6281325788197252, |
|
"grad_norm": 1.6786616957245395, |
|
"learning_rate": 3.6537338345818273e-06, |
|
"loss": 0.7208, |
|
"step": 3885 |
|
}, |
|
{ |
|
"epoch": 0.6289409862570736, |
|
"grad_norm": 1.7458825802515532, |
|
"learning_rate": 3.640149688944472e-06, |
|
"loss": 0.695, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.629749393694422, |
|
"grad_norm": 1.787947265054849, |
|
"learning_rate": 3.626576373698885e-06, |
|
"loss": 0.7026, |
|
"step": 3895 |
|
}, |
|
{ |
|
"epoch": 0.6305578011317704, |
|
"grad_norm": 1.839057642052893, |
|
"learning_rate": 3.6130139969483825e-06, |
|
"loss": 0.7226, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.6313662085691188, |
|
"grad_norm": 1.6591662592024992, |
|
"learning_rate": 3.599462666709155e-06, |
|
"loss": 0.7167, |
|
"step": 3905 |
|
}, |
|
{ |
|
"epoch": 0.6321746160064673, |
|
"grad_norm": 1.7026548619494275, |
|
"learning_rate": 3.5859224909094147e-06, |
|
"loss": 0.7306, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.6329830234438156, |
|
"grad_norm": 1.6797177301247779, |
|
"learning_rate": 3.5723935773885414e-06, |
|
"loss": 0.6974, |
|
"step": 3915 |
|
}, |
|
{ |
|
"epoch": 0.6337914308811641, |
|
"grad_norm": 1.623329714146258, |
|
"learning_rate": 3.558876033896211e-06, |
|
"loss": 0.7283, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.6345998383185125, |
|
"grad_norm": 1.863028763705875, |
|
"learning_rate": 3.5453699680915476e-06, |
|
"loss": 0.7356, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 0.635408245755861, |
|
"grad_norm": 2.274790139096009, |
|
"learning_rate": 3.5318754875422588e-06, |
|
"loss": 0.7042, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.6362166531932094, |
|
"grad_norm": 1.8149361524248941, |
|
"learning_rate": 3.518392699723786e-06, |
|
"loss": 0.7112, |
|
"step": 3935 |
|
}, |
|
{ |
|
"epoch": 0.6370250606305577, |
|
"grad_norm": 1.6623579800376278, |
|
"learning_rate": 3.5049217120184476e-06, |
|
"loss": 0.7007, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.6378334680679062, |
|
"grad_norm": 1.7895960646702003, |
|
"learning_rate": 3.491462631714574e-06, |
|
"loss": 0.7328, |
|
"step": 3945 |
|
}, |
|
{ |
|
"epoch": 0.6386418755052546, |
|
"grad_norm": 1.5179464123128517, |
|
"learning_rate": 3.4780155660056653e-06, |
|
"loss": 0.7212, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.6394502829426031, |
|
"grad_norm": 1.6048128854081531, |
|
"learning_rate": 3.464580621989528e-06, |
|
"loss": 0.7119, |
|
"step": 3955 |
|
}, |
|
{ |
|
"epoch": 0.6402586903799515, |
|
"grad_norm": 1.8256911462243481, |
|
"learning_rate": 3.4511579066674354e-06, |
|
"loss": 0.7139, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.6410670978172999, |
|
"grad_norm": 1.6782507649830145, |
|
"learning_rate": 3.437747526943256e-06, |
|
"loss": 0.7112, |
|
"step": 3965 |
|
}, |
|
{ |
|
"epoch": 0.6418755052546483, |
|
"grad_norm": 1.9042260389204226, |
|
"learning_rate": 3.42434958962262e-06, |
|
"loss": 0.7424, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.6426839126919968, |
|
"grad_norm": 1.5774540955627445, |
|
"learning_rate": 3.410964201412059e-06, |
|
"loss": 0.7023, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 0.6434923201293452, |
|
"grad_norm": 1.9330271182491578, |
|
"learning_rate": 3.3975914689181565e-06, |
|
"loss": 0.6915, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.6443007275666937, |
|
"grad_norm": 1.729192351032169, |
|
"learning_rate": 3.384231498646706e-06, |
|
"loss": 0.7332, |
|
"step": 3985 |
|
}, |
|
{ |
|
"epoch": 0.645109135004042, |
|
"grad_norm": 1.7304833054051807, |
|
"learning_rate": 3.370884397001851e-06, |
|
"loss": 0.7259, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.6459175424413904, |
|
"grad_norm": 1.668285332848988, |
|
"learning_rate": 3.3575502702852486e-06, |
|
"loss": 0.6954, |
|
"step": 3995 |
|
}, |
|
{ |
|
"epoch": 0.6467259498787389, |
|
"grad_norm": 1.6461002316066478, |
|
"learning_rate": 3.344229224695219e-06, |
|
"loss": 0.7078, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.6475343573160873, |
|
"grad_norm": 2.265529307566932, |
|
"learning_rate": 3.3309213663258933e-06, |
|
"loss": 0.7097, |
|
"step": 4005 |
|
}, |
|
{ |
|
"epoch": 0.6483427647534358, |
|
"grad_norm": 1.8168921242914984, |
|
"learning_rate": 3.3176268011663826e-06, |
|
"loss": 0.7335, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.6491511721907841, |
|
"grad_norm": 1.5972240421560917, |
|
"learning_rate": 3.304345635099918e-06, |
|
"loss": 0.727, |
|
"step": 4015 |
|
}, |
|
{ |
|
"epoch": 0.6499595796281326, |
|
"grad_norm": 1.7262174376471393, |
|
"learning_rate": 3.291077973903018e-06, |
|
"loss": 0.7384, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.650767987065481, |
|
"grad_norm": 1.7197250528022299, |
|
"learning_rate": 3.2778239232446462e-06, |
|
"loss": 0.7212, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 0.6515763945028294, |
|
"grad_norm": 1.8271752649285784, |
|
"learning_rate": 3.2645835886853604e-06, |
|
"loss": 0.7254, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.6523848019401779, |
|
"grad_norm": 1.6271051386234956, |
|
"learning_rate": 3.251357075676482e-06, |
|
"loss": 0.712, |
|
"step": 4035 |
|
}, |
|
{ |
|
"epoch": 0.6531932093775262, |
|
"grad_norm": 1.6767642174095554, |
|
"learning_rate": 3.2381444895592483e-06, |
|
"loss": 0.7218, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.6540016168148747, |
|
"grad_norm": 1.7632914896547123, |
|
"learning_rate": 3.224945935563982e-06, |
|
"loss": 0.715, |
|
"step": 4045 |
|
}, |
|
{ |
|
"epoch": 0.6548100242522231, |
|
"grad_norm": 1.8702788784549214, |
|
"learning_rate": 3.2117615188092475e-06, |
|
"loss": 0.7367, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.6556184316895716, |
|
"grad_norm": 1.9298182916897795, |
|
"learning_rate": 3.1985913443010106e-06, |
|
"loss": 0.7164, |
|
"step": 4055 |
|
}, |
|
{ |
|
"epoch": 0.65642683912692, |
|
"grad_norm": 2.1333743836535555, |
|
"learning_rate": 3.185435516931811e-06, |
|
"loss": 0.7175, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.6572352465642683, |
|
"grad_norm": 1.9618873764352864, |
|
"learning_rate": 3.1722941414799152e-06, |
|
"loss": 0.7293, |
|
"step": 4065 |
|
}, |
|
{ |
|
"epoch": 0.6580436540016168, |
|
"grad_norm": 1.8406120340882652, |
|
"learning_rate": 3.159167322608498e-06, |
|
"loss": 0.7204, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.6588520614389652, |
|
"grad_norm": 1.8153933978636514, |
|
"learning_rate": 3.146055164864794e-06, |
|
"loss": 0.7096, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 0.6596604688763137, |
|
"grad_norm": 1.6106430966301524, |
|
"learning_rate": 3.1329577726792705e-06, |
|
"loss": 0.7199, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.6604688763136621, |
|
"grad_norm": 1.6587566226994688, |
|
"learning_rate": 3.1198752503647995e-06, |
|
"loss": 0.7059, |
|
"step": 4085 |
|
}, |
|
{ |
|
"epoch": 0.6612772837510105, |
|
"grad_norm": 1.6152976800316061, |
|
"learning_rate": 3.1068077021158185e-06, |
|
"loss": 0.7155, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.6620856911883589, |
|
"grad_norm": 1.668320625864469, |
|
"learning_rate": 3.0937552320075116e-06, |
|
"loss": 0.6997, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 0.6628940986257074, |
|
"grad_norm": 1.6309737459520801, |
|
"learning_rate": 3.0807179439949685e-06, |
|
"loss": 0.7242, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.6637025060630558, |
|
"grad_norm": 1.8832810217520193, |
|
"learning_rate": 3.0676959419123666e-06, |
|
"loss": 0.6975, |
|
"step": 4105 |
|
}, |
|
{ |
|
"epoch": 0.6645109135004043, |
|
"grad_norm": 1.750667565585888, |
|
"learning_rate": 3.05468932947214e-06, |
|
"loss": 0.7229, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.6653193209377526, |
|
"grad_norm": 1.7153179028178922, |
|
"learning_rate": 3.041698210264149e-06, |
|
"loss": 0.7051, |
|
"step": 4115 |
|
}, |
|
{ |
|
"epoch": 0.666127728375101, |
|
"grad_norm": 1.644510456041389, |
|
"learning_rate": 3.028722687754867e-06, |
|
"loss": 0.7254, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.6669361358124495, |
|
"grad_norm": 1.7526546920401012, |
|
"learning_rate": 3.0157628652865426e-06, |
|
"loss": 0.725, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 0.6677445432497979, |
|
"grad_norm": 1.6156156934111832, |
|
"learning_rate": 3.0028188460763853e-06, |
|
"loss": 0.7109, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.6685529506871464, |
|
"grad_norm": 1.377558375885402, |
|
"learning_rate": 2.9898907332157432e-06, |
|
"loss": 0.7234, |
|
"step": 4135 |
|
}, |
|
{ |
|
"epoch": 0.6693613581244947, |
|
"grad_norm": 1.5406583460490257, |
|
"learning_rate": 2.976978629669276e-06, |
|
"loss": 0.6983, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.6701697655618432, |
|
"grad_norm": 1.6610140694054174, |
|
"learning_rate": 2.9640826382741427e-06, |
|
"loss": 0.7082, |
|
"step": 4145 |
|
}, |
|
{ |
|
"epoch": 0.6709781729991916, |
|
"grad_norm": 1.8696909034008466, |
|
"learning_rate": 2.951202861739173e-06, |
|
"loss": 0.7039, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.67178658043654, |
|
"grad_norm": 1.699448437132235, |
|
"learning_rate": 2.938339402644061e-06, |
|
"loss": 0.7069, |
|
"step": 4155 |
|
}, |
|
{ |
|
"epoch": 0.6725949878738885, |
|
"grad_norm": 1.9050926736431988, |
|
"learning_rate": 2.9254923634385425e-06, |
|
"loss": 0.7083, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.6734033953112368, |
|
"grad_norm": 1.6041414448997582, |
|
"learning_rate": 2.912661846441572e-06, |
|
"loss": 0.7154, |
|
"step": 4165 |
|
}, |
|
{ |
|
"epoch": 0.6742118027485853, |
|
"grad_norm": 1.5644306988254755, |
|
"learning_rate": 2.8998479538405218e-06, |
|
"loss": 0.727, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.6750202101859337, |
|
"grad_norm": 1.5805303077306065, |
|
"learning_rate": 2.8870507876903536e-06, |
|
"loss": 0.694, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 0.6758286176232822, |
|
"grad_norm": 1.5605132401182347, |
|
"learning_rate": 2.87427044991282e-06, |
|
"loss": 0.712, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.6766370250606305, |
|
"grad_norm": 1.634348076934335, |
|
"learning_rate": 2.861507042295644e-06, |
|
"loss": 0.7134, |
|
"step": 4185 |
|
}, |
|
{ |
|
"epoch": 0.677445432497979, |
|
"grad_norm": 1.5331952726149434, |
|
"learning_rate": 2.8487606664917056e-06, |
|
"loss": 0.7311, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.6782538399353274, |
|
"grad_norm": 1.9231338256054877, |
|
"learning_rate": 2.836031424018243e-06, |
|
"loss": 0.7053, |
|
"step": 4195 |
|
}, |
|
{ |
|
"epoch": 0.6790622473726758, |
|
"grad_norm": 1.6648310177555483, |
|
"learning_rate": 2.823319416256033e-06, |
|
"loss": 0.7094, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.6798706548100243, |
|
"grad_norm": 1.7703929497048758, |
|
"learning_rate": 2.810624744448588e-06, |
|
"loss": 0.6877, |
|
"step": 4205 |
|
}, |
|
{ |
|
"epoch": 0.6806790622473726, |
|
"grad_norm": 1.5831872562872369, |
|
"learning_rate": 2.797947509701354e-06, |
|
"loss": 0.7031, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.6814874696847211, |
|
"grad_norm": 1.4317597869567902, |
|
"learning_rate": 2.785287812980898e-06, |
|
"loss": 0.7371, |
|
"step": 4215 |
|
}, |
|
{ |
|
"epoch": 0.6822958771220695, |
|
"grad_norm": 1.7340298885830805, |
|
"learning_rate": 2.7726457551141093e-06, |
|
"loss": 0.7366, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.683104284559418, |
|
"grad_norm": 1.6924476828852524, |
|
"learning_rate": 2.7600214367873913e-06, |
|
"loss": 0.697, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 0.6839126919967664, |
|
"grad_norm": 1.4262764228242009, |
|
"learning_rate": 2.7474149585458666e-06, |
|
"loss": 0.7228, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.6847210994341147, |
|
"grad_norm": 1.9027434686970166, |
|
"learning_rate": 2.734826420792568e-06, |
|
"loss": 0.7167, |
|
"step": 4235 |
|
}, |
|
{ |
|
"epoch": 0.6855295068714632, |
|
"grad_norm": 1.6470078258301304, |
|
"learning_rate": 2.7222559237876467e-06, |
|
"loss": 0.7287, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.6863379143088116, |
|
"grad_norm": 1.7103358496942485, |
|
"learning_rate": 2.709703567647569e-06, |
|
"loss": 0.6992, |
|
"step": 4245 |
|
}, |
|
{ |
|
"epoch": 0.6871463217461601, |
|
"grad_norm": 1.5289126014423284, |
|
"learning_rate": 2.697169452344316e-06, |
|
"loss": 0.6908, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.6879547291835085, |
|
"grad_norm": 1.5353664972333323, |
|
"learning_rate": 2.6846536777046004e-06, |
|
"loss": 0.7066, |
|
"step": 4255 |
|
}, |
|
{ |
|
"epoch": 0.6887631366208569, |
|
"grad_norm": 1.9300314181188833, |
|
"learning_rate": 2.672156343409053e-06, |
|
"loss": 0.7056, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.6895715440582053, |
|
"grad_norm": 1.7170832117281503, |
|
"learning_rate": 2.659677548991444e-06, |
|
"loss": 0.7065, |
|
"step": 4265 |
|
}, |
|
{ |
|
"epoch": 0.6903799514955538, |
|
"grad_norm": 1.5377665823804576, |
|
"learning_rate": 2.647217393837886e-06, |
|
"loss": 0.7258, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.6911883589329022, |
|
"grad_norm": 1.7401588076713146, |
|
"learning_rate": 2.6347759771860336e-06, |
|
"loss": 0.7026, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 0.6919967663702506, |
|
"grad_norm": 1.799996686879021, |
|
"learning_rate": 2.62235339812431e-06, |
|
"loss": 0.6998, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.692805173807599, |
|
"grad_norm": 1.7779728222118754, |
|
"learning_rate": 2.6099497555911006e-06, |
|
"loss": 0.6993, |
|
"step": 4285 |
|
}, |
|
{ |
|
"epoch": 0.6936135812449474, |
|
"grad_norm": 1.4924029650433472, |
|
"learning_rate": 2.5975651483739745e-06, |
|
"loss": 0.7161, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.6944219886822959, |
|
"grad_norm": 1.7534120757891243, |
|
"learning_rate": 2.5851996751088997e-06, |
|
"loss": 0.7072, |
|
"step": 4295 |
|
}, |
|
{ |
|
"epoch": 0.6952303961196443, |
|
"grad_norm": 1.9089320497241284, |
|
"learning_rate": 2.5728534342794487e-06, |
|
"loss": 0.7063, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.6960388035569928, |
|
"grad_norm": 1.919875371600586, |
|
"learning_rate": 2.560526524216024e-06, |
|
"loss": 0.7033, |
|
"step": 4305 |
|
}, |
|
{ |
|
"epoch": 0.6968472109943411, |
|
"grad_norm": 1.6478824265950052, |
|
"learning_rate": 2.548219043095064e-06, |
|
"loss": 0.7205, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.6976556184316896, |
|
"grad_norm": 1.684623132888406, |
|
"learning_rate": 2.535931088938274e-06, |
|
"loss": 0.6847, |
|
"step": 4315 |
|
}, |
|
{ |
|
"epoch": 0.698464025869038, |
|
"grad_norm": 1.67134719450202, |
|
"learning_rate": 2.5236627596118362e-06, |
|
"loss": 0.703, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.6992724333063864, |
|
"grad_norm": 1.7627300301593591, |
|
"learning_rate": 2.511414152825631e-06, |
|
"loss": 0.6908, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 0.7000808407437349, |
|
"grad_norm": 1.5484331606717547, |
|
"learning_rate": 2.499185366132462e-06, |
|
"loss": 0.7235, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.7008892481810832, |
|
"grad_norm": 1.6640413077548424, |
|
"learning_rate": 2.4869764969272757e-06, |
|
"loss": 0.7027, |
|
"step": 4335 |
|
}, |
|
{ |
|
"epoch": 0.7016976556184317, |
|
"grad_norm": 1.578928776955349, |
|
"learning_rate": 2.474787642446393e-06, |
|
"loss": 0.7164, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.7025060630557801, |
|
"grad_norm": 1.9767016634135068, |
|
"learning_rate": 2.4626188997667224e-06, |
|
"loss": 0.7161, |
|
"step": 4345 |
|
}, |
|
{ |
|
"epoch": 0.7033144704931286, |
|
"grad_norm": 1.7184637637432352, |
|
"learning_rate": 2.4504703658049994e-06, |
|
"loss": 0.6947, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.704122877930477, |
|
"grad_norm": 1.6400468293429349, |
|
"learning_rate": 2.43834213731701e-06, |
|
"loss": 0.7072, |
|
"step": 4355 |
|
}, |
|
{ |
|
"epoch": 0.7049312853678253, |
|
"grad_norm": 1.5907149181306826, |
|
"learning_rate": 2.426234310896812e-06, |
|
"loss": 0.7036, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.7057396928051738, |
|
"grad_norm": 1.418762913742883, |
|
"learning_rate": 2.414146982975983e-06, |
|
"loss": 0.7, |
|
"step": 4365 |
|
}, |
|
{ |
|
"epoch": 0.7065481002425222, |
|
"grad_norm": 2.069021252053298, |
|
"learning_rate": 2.4020802498228333e-06, |
|
"loss": 0.7131, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.7073565076798707, |
|
"grad_norm": 1.7124723237801922, |
|
"learning_rate": 2.3900342075416514e-06, |
|
"loss": 0.6877, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 0.7081649151172191, |
|
"grad_norm": 1.473031145225499, |
|
"learning_rate": 2.37800895207194e-06, |
|
"loss": 0.7242, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.7089733225545675, |
|
"grad_norm": 1.7703727073517148, |
|
"learning_rate": 2.3660045791876386e-06, |
|
"loss": 0.6832, |
|
"step": 4385 |
|
}, |
|
{ |
|
"epoch": 0.7097817299919159, |
|
"grad_norm": 1.7898901246572265, |
|
"learning_rate": 2.3540211844963783e-06, |
|
"loss": 0.7167, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.7105901374292644, |
|
"grad_norm": 1.6018154696798712, |
|
"learning_rate": 2.342058863438704e-06, |
|
"loss": 0.6873, |
|
"step": 4395 |
|
}, |
|
{ |
|
"epoch": 0.7113985448666128, |
|
"grad_norm": 1.7123245457707612, |
|
"learning_rate": 2.330117711287327e-06, |
|
"loss": 0.7074, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.7122069523039612, |
|
"grad_norm": 2.0993312473611363, |
|
"learning_rate": 2.3181978231463604e-06, |
|
"loss": 0.7036, |
|
"step": 4405 |
|
}, |
|
{ |
|
"epoch": 0.7130153597413096, |
|
"grad_norm": 1.5813117767291411, |
|
"learning_rate": 2.306299293950557e-06, |
|
"loss": 0.7153, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.713823767178658, |
|
"grad_norm": 1.6125648806682966, |
|
"learning_rate": 2.294422218464567e-06, |
|
"loss": 0.6898, |
|
"step": 4415 |
|
}, |
|
{ |
|
"epoch": 0.7146321746160065, |
|
"grad_norm": 2.051605303416789, |
|
"learning_rate": 2.2825666912821674e-06, |
|
"loss": 0.7156, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.7154405820533549, |
|
"grad_norm": 1.6595060216820654, |
|
"learning_rate": 2.270732806825517e-06, |
|
"loss": 0.719, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 0.7162489894907034, |
|
"grad_norm": 1.531575665848643, |
|
"learning_rate": 2.2589206593444084e-06, |
|
"loss": 0.7335, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.7170573969280517, |
|
"grad_norm": 1.4778525732647043, |
|
"learning_rate": 2.2471303429155043e-06, |
|
"loss": 0.7191, |
|
"step": 4435 |
|
}, |
|
{ |
|
"epoch": 0.7178658043654002, |
|
"grad_norm": 1.6929860001853365, |
|
"learning_rate": 2.2353619514416052e-06, |
|
"loss": 0.7216, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.7186742118027486, |
|
"grad_norm": 1.782449408026849, |
|
"learning_rate": 2.223615578650884e-06, |
|
"loss": 0.7009, |
|
"step": 4445 |
|
}, |
|
{ |
|
"epoch": 0.719482619240097, |
|
"grad_norm": 1.4215615691395374, |
|
"learning_rate": 2.2118913180961522e-06, |
|
"loss": 0.6972, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.7202910266774454, |
|
"grad_norm": 1.7771811992721345, |
|
"learning_rate": 2.2001892631541132e-06, |
|
"loss": 0.7133, |
|
"step": 4455 |
|
}, |
|
{ |
|
"epoch": 0.7210994341147938, |
|
"grad_norm": 1.53041881310807, |
|
"learning_rate": 2.1885095070246116e-06, |
|
"loss": 0.6989, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.7219078415521423, |
|
"grad_norm": 1.8037641077557016, |
|
"learning_rate": 2.176852142729895e-06, |
|
"loss": 0.7102, |
|
"step": 4465 |
|
}, |
|
{ |
|
"epoch": 0.7227162489894907, |
|
"grad_norm": 1.7035192300078206, |
|
"learning_rate": 2.165217263113875e-06, |
|
"loss": 0.7106, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.7235246564268392, |
|
"grad_norm": 1.599875721865672, |
|
"learning_rate": 2.153604960841389e-06, |
|
"loss": 0.7055, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 0.7243330638641875, |
|
"grad_norm": 1.678804995013585, |
|
"learning_rate": 2.142015328397454e-06, |
|
"loss": 0.6962, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.725141471301536, |
|
"grad_norm": 1.7056685395536566, |
|
"learning_rate": 2.130448458086539e-06, |
|
"loss": 0.7177, |
|
"step": 4485 |
|
}, |
|
{ |
|
"epoch": 0.7259498787388844, |
|
"grad_norm": 1.9835575111149595, |
|
"learning_rate": 2.118904442031829e-06, |
|
"loss": 0.7136, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.7267582861762328, |
|
"grad_norm": 1.556702686596829, |
|
"learning_rate": 2.1073833721744796e-06, |
|
"loss": 0.7113, |
|
"step": 4495 |
|
}, |
|
{ |
|
"epoch": 0.7275666936135813, |
|
"grad_norm": 1.7685805077897936, |
|
"learning_rate": 2.095885340272904e-06, |
|
"loss": 0.6973, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.7283751010509296, |
|
"grad_norm": 1.8918140713245204, |
|
"learning_rate": 2.084410437902025e-06, |
|
"loss": 0.7104, |
|
"step": 4505 |
|
}, |
|
{ |
|
"epoch": 0.7291835084882781, |
|
"grad_norm": 1.6842141260766332, |
|
"learning_rate": 2.0729587564525525e-06, |
|
"loss": 0.7058, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.7299919159256265, |
|
"grad_norm": 1.988830288548141, |
|
"learning_rate": 2.0615303871302617e-06, |
|
"loss": 0.6982, |
|
"step": 4515 |
|
}, |
|
{ |
|
"epoch": 0.730800323362975, |
|
"grad_norm": 1.7150008974104203, |
|
"learning_rate": 2.0501254209552536e-06, |
|
"loss": 0.7253, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.7316087308003234, |
|
"grad_norm": 1.6368813181817818, |
|
"learning_rate": 2.038743948761243e-06, |
|
"loss": 0.7251, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 0.7324171382376717, |
|
"grad_norm": 1.6425874295988543, |
|
"learning_rate": 2.0273860611948244e-06, |
|
"loss": 0.7024, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.7332255456750202, |
|
"grad_norm": 1.5620038329412886, |
|
"learning_rate": 2.016051848714758e-06, |
|
"loss": 0.6972, |
|
"step": 4535 |
|
}, |
|
{ |
|
"epoch": 0.7340339531123686, |
|
"grad_norm": 1.5155991077502002, |
|
"learning_rate": 2.004741401591247e-06, |
|
"loss": 0.6966, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.7348423605497171, |
|
"grad_norm": 1.5087838986819468, |
|
"learning_rate": 1.9934548099052147e-06, |
|
"loss": 0.704, |
|
"step": 4545 |
|
}, |
|
{ |
|
"epoch": 0.7356507679870655, |
|
"grad_norm": 1.540725635553023, |
|
"learning_rate": 1.9821921635475923e-06, |
|
"loss": 0.711, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.7364591754244139, |
|
"grad_norm": 1.7449377277549325, |
|
"learning_rate": 1.9709535522185963e-06, |
|
"loss": 0.7262, |
|
"step": 4555 |
|
}, |
|
{ |
|
"epoch": 0.7372675828617623, |
|
"grad_norm": 1.5400854167352056, |
|
"learning_rate": 1.959739065427026e-06, |
|
"loss": 0.685, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.7380759902991108, |
|
"grad_norm": 1.6573782698805526, |
|
"learning_rate": 1.94854879248954e-06, |
|
"loss": 0.6949, |
|
"step": 4565 |
|
}, |
|
{ |
|
"epoch": 0.7388843977364592, |
|
"grad_norm": 1.444089691451337, |
|
"learning_rate": 1.9373828225299458e-06, |
|
"loss": 0.7192, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.7396928051738076, |
|
"grad_norm": 1.6503277393010736, |
|
"learning_rate": 1.926241244478496e-06, |
|
"loss": 0.7012, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 0.740501212611156, |
|
"grad_norm": 1.4505458685379473, |
|
"learning_rate": 1.9151241470711725e-06, |
|
"loss": 0.7064, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.7413096200485044, |
|
"grad_norm": 1.6098471332377016, |
|
"learning_rate": 1.904031618848987e-06, |
|
"loss": 0.7168, |
|
"step": 4585 |
|
}, |
|
{ |
|
"epoch": 0.7421180274858529, |
|
"grad_norm": 1.5386973446650158, |
|
"learning_rate": 1.8929637481572715e-06, |
|
"loss": 0.6851, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.7429264349232013, |
|
"grad_norm": 1.6231381655370265, |
|
"learning_rate": 1.8819206231449717e-06, |
|
"loss": 0.6933, |
|
"step": 4595 |
|
}, |
|
{ |
|
"epoch": 0.7437348423605498, |
|
"grad_norm": 1.8771029991361663, |
|
"learning_rate": 1.8709023317639558e-06, |
|
"loss": 0.7155, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.7445432497978981, |
|
"grad_norm": 1.6673306502115761, |
|
"learning_rate": 1.8599089617682997e-06, |
|
"loss": 0.6922, |
|
"step": 4605 |
|
}, |
|
{ |
|
"epoch": 0.7453516572352465, |
|
"grad_norm": 1.7070238047014281, |
|
"learning_rate": 1.848940600713603e-06, |
|
"loss": 0.7036, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.746160064672595, |
|
"grad_norm": 1.5626624796074473, |
|
"learning_rate": 1.8379973359562765e-06, |
|
"loss": 0.7121, |
|
"step": 4615 |
|
}, |
|
{ |
|
"epoch": 0.7469684721099434, |
|
"grad_norm": 1.5529354764314784, |
|
"learning_rate": 1.8270792546528593e-06, |
|
"loss": 0.7194, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.7477768795472919, |
|
"grad_norm": 1.7007088230671603, |
|
"learning_rate": 1.816186443759319e-06, |
|
"loss": 0.7124, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 0.7485852869846402, |
|
"grad_norm": 1.4923622281091249, |
|
"learning_rate": 1.8053189900303553e-06, |
|
"loss": 0.7166, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.7493936944219887, |
|
"grad_norm": 1.6071608830962205, |
|
"learning_rate": 1.7944769800187201e-06, |
|
"loss": 0.7148, |
|
"step": 4635 |
|
}, |
|
{ |
|
"epoch": 0.7502021018593371, |
|
"grad_norm": 1.4700051890576207, |
|
"learning_rate": 1.7836605000745154e-06, |
|
"loss": 0.7216, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.7510105092966856, |
|
"grad_norm": 1.6606431322123616, |
|
"learning_rate": 1.772869636344512e-06, |
|
"loss": 0.6907, |
|
"step": 4645 |
|
}, |
|
{ |
|
"epoch": 0.751818916734034, |
|
"grad_norm": 1.4813002160453033, |
|
"learning_rate": 1.7621044747714683e-06, |
|
"loss": 0.7098, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.7526273241713823, |
|
"grad_norm": 1.661640433481697, |
|
"learning_rate": 1.751365101093433e-06, |
|
"loss": 0.6964, |
|
"step": 4655 |
|
}, |
|
{ |
|
"epoch": 0.7534357316087308, |
|
"grad_norm": 1.7279822572241663, |
|
"learning_rate": 1.7406516008430774e-06, |
|
"loss": 0.6834, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.7542441390460792, |
|
"grad_norm": 1.8915619961292818, |
|
"learning_rate": 1.729964059346998e-06, |
|
"loss": 0.7122, |
|
"step": 4665 |
|
}, |
|
{ |
|
"epoch": 0.7550525464834277, |
|
"grad_norm": 1.6083179781780426, |
|
"learning_rate": 1.719302561725053e-06, |
|
"loss": 0.6946, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.7558609539207761, |
|
"grad_norm": 1.7461258307464391, |
|
"learning_rate": 1.7086671928896747e-06, |
|
"loss": 0.6846, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 0.7566693613581245, |
|
"grad_norm": 1.4237466961185907, |
|
"learning_rate": 1.6980580375451928e-06, |
|
"loss": 0.686, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.7574777687954729, |
|
"grad_norm": 1.7755619878515854, |
|
"learning_rate": 1.687475180187163e-06, |
|
"loss": 0.7112, |
|
"step": 4685 |
|
}, |
|
{ |
|
"epoch": 0.7582861762328214, |
|
"grad_norm": 1.61719801995157, |
|
"learning_rate": 1.6769187051016933e-06, |
|
"loss": 0.7094, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.7590945836701698, |
|
"grad_norm": 1.5568841653418648, |
|
"learning_rate": 1.6663886963647753e-06, |
|
"loss": 0.7276, |
|
"step": 4695 |
|
}, |
|
{ |
|
"epoch": 0.7599029911075182, |
|
"grad_norm": 1.7325661061608444, |
|
"learning_rate": 1.6558852378416113e-06, |
|
"loss": 0.7134, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.7607113985448666, |
|
"grad_norm": 1.7935022266405718, |
|
"learning_rate": 1.6454084131859427e-06, |
|
"loss": 0.7126, |
|
"step": 4705 |
|
}, |
|
{ |
|
"epoch": 0.761519805982215, |
|
"grad_norm": 1.644414000022595, |
|
"learning_rate": 1.6349583058393953e-06, |
|
"loss": 0.7072, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.7623282134195635, |
|
"grad_norm": 1.6391071208815438, |
|
"learning_rate": 1.6245349990307997e-06, |
|
"loss": 0.7022, |
|
"step": 4715 |
|
}, |
|
{ |
|
"epoch": 0.7631366208569119, |
|
"grad_norm": 1.693934420854349, |
|
"learning_rate": 1.614138575775544e-06, |
|
"loss": 0.6864, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.7639450282942603, |
|
"grad_norm": 1.538392225632054, |
|
"learning_rate": 1.6037691188748995e-06, |
|
"loss": 0.7145, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 0.7647534357316087, |
|
"grad_norm": 1.7487697179941406, |
|
"learning_rate": 1.5934267109153667e-06, |
|
"loss": 0.6828, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.7655618431689571, |
|
"grad_norm": 1.4221518439513514, |
|
"learning_rate": 1.5831114342680225e-06, |
|
"loss": 0.6978, |
|
"step": 4735 |
|
}, |
|
{ |
|
"epoch": 0.7663702506063056, |
|
"grad_norm": 1.6852680227123094, |
|
"learning_rate": 1.5728233710878527e-06, |
|
"loss": 0.689, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.767178658043654, |
|
"grad_norm": 1.4910729061389476, |
|
"learning_rate": 1.5625626033131102e-06, |
|
"loss": 0.7148, |
|
"step": 4745 |
|
}, |
|
{ |
|
"epoch": 0.7679870654810024, |
|
"grad_norm": 1.3823498315519551, |
|
"learning_rate": 1.5523292126646505e-06, |
|
"loss": 0.7111, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.7687954729183508, |
|
"grad_norm": 1.5120794248749074, |
|
"learning_rate": 1.542123280645292e-06, |
|
"loss": 0.7169, |
|
"step": 4755 |
|
}, |
|
{ |
|
"epoch": 0.7696038803556993, |
|
"grad_norm": 1.4393108032014577, |
|
"learning_rate": 1.5319448885391596e-06, |
|
"loss": 0.7061, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.7704122877930477, |
|
"grad_norm": 1.579487027567959, |
|
"learning_rate": 1.521794117411039e-06, |
|
"loss": 0.7112, |
|
"step": 4765 |
|
}, |
|
{ |
|
"epoch": 0.7712206952303962, |
|
"grad_norm": 1.7223557601541144, |
|
"learning_rate": 1.5116710481057301e-06, |
|
"loss": 0.712, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.7720291026677445, |
|
"grad_norm": 1.3942438179900838, |
|
"learning_rate": 1.5015757612474048e-06, |
|
"loss": 0.7128, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 0.7728375101050929, |
|
"grad_norm": 1.614918823377606, |
|
"learning_rate": 1.4915083372389665e-06, |
|
"loss": 0.7, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.7736459175424414, |
|
"grad_norm": 1.565824384446224, |
|
"learning_rate": 1.4814688562614094e-06, |
|
"loss": 0.7168, |
|
"step": 4785 |
|
}, |
|
{ |
|
"epoch": 0.7744543249797898, |
|
"grad_norm": 1.426544859266959, |
|
"learning_rate": 1.4714573982731705e-06, |
|
"loss": 0.6955, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.7752627324171383, |
|
"grad_norm": 1.5620988707197838, |
|
"learning_rate": 1.4614740430095104e-06, |
|
"loss": 0.7234, |
|
"step": 4795 |
|
}, |
|
{ |
|
"epoch": 0.7760711398544866, |
|
"grad_norm": 1.526988525228804, |
|
"learning_rate": 1.451518869981859e-06, |
|
"loss": 0.7241, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.7768795472918351, |
|
"grad_norm": 1.5578195610213377, |
|
"learning_rate": 1.4415919584771999e-06, |
|
"loss": 0.7097, |
|
"step": 4805 |
|
}, |
|
{ |
|
"epoch": 0.7776879547291835, |
|
"grad_norm": 1.4842378634518532, |
|
"learning_rate": 1.431693387557424e-06, |
|
"loss": 0.7054, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.778496362166532, |
|
"grad_norm": 1.5526408435473718, |
|
"learning_rate": 1.4218232360587092e-06, |
|
"loss": 0.6938, |
|
"step": 4815 |
|
}, |
|
{ |
|
"epoch": 0.7793047696038804, |
|
"grad_norm": 1.6055389555966928, |
|
"learning_rate": 1.4119815825908922e-06, |
|
"loss": 0.711, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.7801131770412287, |
|
"grad_norm": 1.5107126562884128, |
|
"learning_rate": 1.4021685055368345e-06, |
|
"loss": 0.7109, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 0.7809215844785772, |
|
"grad_norm": 1.8143707926782138, |
|
"learning_rate": 1.392384083051808e-06, |
|
"loss": 0.7067, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.7817299919159256, |
|
"grad_norm": 1.5209084571946596, |
|
"learning_rate": 1.3826283930628686e-06, |
|
"loss": 0.7137, |
|
"step": 4835 |
|
}, |
|
{ |
|
"epoch": 0.7825383993532741, |
|
"grad_norm": 1.6615578812614993, |
|
"learning_rate": 1.37290151326823e-06, |
|
"loss": 0.7295, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.7833468067906225, |
|
"grad_norm": 1.4826721800499079, |
|
"learning_rate": 1.3632035211366562e-06, |
|
"loss": 0.6925, |
|
"step": 4845 |
|
}, |
|
{ |
|
"epoch": 0.7841552142279709, |
|
"grad_norm": 1.5444908726157, |
|
"learning_rate": 1.3535344939068347e-06, |
|
"loss": 0.7287, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.7849636216653193, |
|
"grad_norm": 1.6021717100804354, |
|
"learning_rate": 1.3438945085867644e-06, |
|
"loss": 0.6999, |
|
"step": 4855 |
|
}, |
|
{ |
|
"epoch": 0.7857720291026677, |
|
"grad_norm": 1.4010708923873407, |
|
"learning_rate": 1.3342836419531434e-06, |
|
"loss": 0.7173, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.7865804365400162, |
|
"grad_norm": 1.6090770001687282, |
|
"learning_rate": 1.3247019705507596e-06, |
|
"loss": 0.7228, |
|
"step": 4865 |
|
}, |
|
{ |
|
"epoch": 0.7873888439773646, |
|
"grad_norm": 1.424793150875814, |
|
"learning_rate": 1.3151495706918766e-06, |
|
"loss": 0.7151, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.788197251414713, |
|
"grad_norm": 1.4789106548880404, |
|
"learning_rate": 1.3056265184556255e-06, |
|
"loss": 0.7072, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 0.7890056588520614, |
|
"grad_norm": 1.472129014937297, |
|
"learning_rate": 1.2961328896874053e-06, |
|
"loss": 0.695, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.7898140662894099, |
|
"grad_norm": 1.6637166864071475, |
|
"learning_rate": 1.2866687599982709e-06, |
|
"loss": 0.7001, |
|
"step": 4885 |
|
}, |
|
{ |
|
"epoch": 0.7906224737267583, |
|
"grad_norm": 1.5057877081189113, |
|
"learning_rate": 1.2772342047643365e-06, |
|
"loss": 0.7008, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.7914308811641068, |
|
"grad_norm": 1.548339961776595, |
|
"learning_rate": 1.267829299126176e-06, |
|
"loss": 0.6978, |
|
"step": 4895 |
|
}, |
|
{ |
|
"epoch": 0.7922392886014551, |
|
"grad_norm": 1.464593794442187, |
|
"learning_rate": 1.2584541179882177e-06, |
|
"loss": 0.7177, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.7930476960388035, |
|
"grad_norm": 1.6351654658678574, |
|
"learning_rate": 1.2491087360181542e-06, |
|
"loss": 0.7026, |
|
"step": 4905 |
|
}, |
|
{ |
|
"epoch": 0.793856103476152, |
|
"grad_norm": 1.4879686812861164, |
|
"learning_rate": 1.2397932276463436e-06, |
|
"loss": 0.7392, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.7946645109135004, |
|
"grad_norm": 1.3378650693860414, |
|
"learning_rate": 1.2305076670652223e-06, |
|
"loss": 0.6888, |
|
"step": 4915 |
|
}, |
|
{ |
|
"epoch": 0.7954729183508489, |
|
"grad_norm": 1.522150314193267, |
|
"learning_rate": 1.2212521282287093e-06, |
|
"loss": 0.7076, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.7962813257881972, |
|
"grad_norm": 1.417333869582, |
|
"learning_rate": 1.2120266848516154e-06, |
|
"loss": 0.7037, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 0.7970897332255457, |
|
"grad_norm": 1.6740080315406156, |
|
"learning_rate": 1.202831410409065e-06, |
|
"loss": 0.7061, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.7978981406628941, |
|
"grad_norm": 1.553227192647926, |
|
"learning_rate": 1.1936663781358977e-06, |
|
"loss": 0.7079, |
|
"step": 4935 |
|
}, |
|
{ |
|
"epoch": 0.7987065481002426, |
|
"grad_norm": 1.8204170112679587, |
|
"learning_rate": 1.1845316610260992e-06, |
|
"loss": 0.7018, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.799514955537591, |
|
"grad_norm": 1.634253956796262, |
|
"learning_rate": 1.1754273318322096e-06, |
|
"loss": 0.6829, |
|
"step": 4945 |
|
}, |
|
{ |
|
"epoch": 0.8003233629749393, |
|
"grad_norm": 1.6598849108252804, |
|
"learning_rate": 1.1663534630647455e-06, |
|
"loss": 0.693, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.8011317704122878, |
|
"grad_norm": 1.4148173842136398, |
|
"learning_rate": 1.1573101269916304e-06, |
|
"loss": 0.7105, |
|
"step": 4955 |
|
}, |
|
{ |
|
"epoch": 0.8019401778496362, |
|
"grad_norm": 1.8151836099791694, |
|
"learning_rate": 1.148297395637607e-06, |
|
"loss": 0.6941, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.8027485852869847, |
|
"grad_norm": 1.6808272789284275, |
|
"learning_rate": 1.1393153407836742e-06, |
|
"loss": 0.7136, |
|
"step": 4965 |
|
}, |
|
{ |
|
"epoch": 0.8035569927243331, |
|
"grad_norm": 1.519780990446839, |
|
"learning_rate": 1.1303640339665106e-06, |
|
"loss": 0.7162, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.8043654001616815, |
|
"grad_norm": 1.8667006592783422, |
|
"learning_rate": 1.1214435464779006e-06, |
|
"loss": 0.7098, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 0.8051738075990299, |
|
"grad_norm": 1.563431852094894, |
|
"learning_rate": 1.1125539493641774e-06, |
|
"loss": 0.7108, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.8059822150363783, |
|
"grad_norm": 1.4400199022026983, |
|
"learning_rate": 1.1036953134256474e-06, |
|
"loss": 0.7061, |
|
"step": 4985 |
|
}, |
|
{ |
|
"epoch": 0.8067906224737268, |
|
"grad_norm": 1.4832879916845079, |
|
"learning_rate": 1.0948677092160291e-06, |
|
"loss": 0.7221, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.8075990299110751, |
|
"grad_norm": 1.4525799430657014, |
|
"learning_rate": 1.0860712070418933e-06, |
|
"loss": 0.699, |
|
"step": 4995 |
|
}, |
|
{ |
|
"epoch": 0.8084074373484236, |
|
"grad_norm": 1.547946240178616, |
|
"learning_rate": 1.0773058769621015e-06, |
|
"loss": 0.7287, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.809215844785772, |
|
"grad_norm": 1.6138843907647331, |
|
"learning_rate": 1.0685717887872504e-06, |
|
"loss": 0.6947, |
|
"step": 5005 |
|
}, |
|
{ |
|
"epoch": 0.8100242522231205, |
|
"grad_norm": 1.5640680796292272, |
|
"learning_rate": 1.059869012079109e-06, |
|
"loss": 0.7008, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.8108326596604689, |
|
"grad_norm": 1.8041754598860973, |
|
"learning_rate": 1.0511976161500737e-06, |
|
"loss": 0.7132, |
|
"step": 5015 |
|
}, |
|
{ |
|
"epoch": 0.8116410670978172, |
|
"grad_norm": 1.4268846951440264, |
|
"learning_rate": 1.0425576700626084e-06, |
|
"loss": 0.682, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.8124494745351657, |
|
"grad_norm": 1.4889490939202308, |
|
"learning_rate": 1.0339492426287012e-06, |
|
"loss": 0.7013, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 0.8132578819725141, |
|
"grad_norm": 1.3950850556481698, |
|
"learning_rate": 1.0253724024093103e-06, |
|
"loss": 0.7251, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.8140662894098626, |
|
"grad_norm": 1.4223156108512096, |
|
"learning_rate": 1.01682721771382e-06, |
|
"loss": 0.6944, |
|
"step": 5035 |
|
}, |
|
{ |
|
"epoch": 0.814874696847211, |
|
"grad_norm": 1.5095272095787708, |
|
"learning_rate": 1.008313756599502e-06, |
|
"loss": 0.6973, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.8156831042845594, |
|
"grad_norm": 1.590280282889916, |
|
"learning_rate": 9.998320868709632e-07, |
|
"loss": 0.7052, |
|
"step": 5045 |
|
}, |
|
{ |
|
"epoch": 0.8164915117219078, |
|
"grad_norm": 1.4357330509882016, |
|
"learning_rate": 9.91382276079615e-07, |
|
"loss": 0.7014, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.8172999191592563, |
|
"grad_norm": 1.6143043421499383, |
|
"learning_rate": 9.829643915231308e-07, |
|
"loss": 0.7177, |
|
"step": 5055 |
|
}, |
|
{ |
|
"epoch": 0.8181083265966047, |
|
"grad_norm": 1.6121487430257533, |
|
"learning_rate": 9.745785002449076e-07, |
|
"loss": 0.6849, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.8189167340339532, |
|
"grad_norm": 1.550845575196845, |
|
"learning_rate": 9.662246690335414e-07, |
|
"loss": 0.7213, |
|
"step": 5065 |
|
}, |
|
{ |
|
"epoch": 0.8197251414713015, |
|
"grad_norm": 1.9173659278509716, |
|
"learning_rate": 9.579029644222827e-07, |
|
"loss": 0.7148, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.8205335489086499, |
|
"grad_norm": 1.5661412726726536, |
|
"learning_rate": 9.496134526885142e-07, |
|
"loss": 0.7012, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 0.8213419563459984, |
|
"grad_norm": 1.4097401722824516, |
|
"learning_rate": 9.413561998532262e-07, |
|
"loss": 0.6902, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.8221503637833468, |
|
"grad_norm": 1.5165033710569626, |
|
"learning_rate": 9.331312716804791e-07, |
|
"loss": 0.7072, |
|
"step": 5085 |
|
}, |
|
{ |
|
"epoch": 0.8229587712206953, |
|
"grad_norm": 1.4294694612217773, |
|
"learning_rate": 9.249387336768944e-07, |
|
"loss": 0.7064, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.8237671786580436, |
|
"grad_norm": 1.579502358096418, |
|
"learning_rate": 9.167786510911186e-07, |
|
"loss": 0.7231, |
|
"step": 5095 |
|
}, |
|
{ |
|
"epoch": 0.824575586095392, |
|
"grad_norm": 1.6220290591033253, |
|
"learning_rate": 9.086510889133154e-07, |
|
"loss": 0.7057, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.8253839935327405, |
|
"grad_norm": 1.3661914009939502, |
|
"learning_rate": 9.005561118746381e-07, |
|
"loss": 0.6835, |
|
"step": 5105 |
|
}, |
|
{ |
|
"epoch": 0.826192400970089, |
|
"grad_norm": 1.4598647759066914, |
|
"learning_rate": 8.92493784446724e-07, |
|
"loss": 0.6836, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.8270008084074374, |
|
"grad_norm": 1.628879143000057, |
|
"learning_rate": 8.844641708411716e-07, |
|
"loss": 0.7071, |
|
"step": 5115 |
|
}, |
|
{ |
|
"epoch": 0.8278092158447857, |
|
"grad_norm": 1.3295496775714484, |
|
"learning_rate": 8.764673350090375e-07, |
|
"loss": 0.7048, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.8286176232821342, |
|
"grad_norm": 1.5416846282683871, |
|
"learning_rate": 8.685033406403193e-07, |
|
"loss": 0.7318, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 0.8294260307194826, |
|
"grad_norm": 1.4353888828295704, |
|
"learning_rate": 8.605722511634517e-07, |
|
"loss": 0.6864, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.8302344381568311, |
|
"grad_norm": 1.5168677776758168, |
|
"learning_rate": 8.526741297448055e-07, |
|
"loss": 0.7042, |
|
"step": 5135 |
|
}, |
|
{ |
|
"epoch": 0.8310428455941795, |
|
"grad_norm": 1.8851927163642166, |
|
"learning_rate": 8.448090392881797e-07, |
|
"loss": 0.6996, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.8318512530315278, |
|
"grad_norm": 1.5320231668585813, |
|
"learning_rate": 8.369770424342977e-07, |
|
"loss": 0.7029, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8326596604688763, |
|
"grad_norm": 1.4671336484132966, |
|
"learning_rate": 8.291782015603179e-07, |
|
"loss": 0.7119, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.8334680679062247, |
|
"grad_norm": 1.457906963528756, |
|
"learning_rate": 8.214125787793253e-07, |
|
"loss": 0.6918, |
|
"step": 5155 |
|
}, |
|
{ |
|
"epoch": 0.8342764753435732, |
|
"grad_norm": 1.763667060951844, |
|
"learning_rate": 8.136802359398488e-07, |
|
"loss": 0.7089, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.8350848827809216, |
|
"grad_norm": 1.4454996633987895, |
|
"learning_rate": 8.059812346253576e-07, |
|
"loss": 0.7034, |
|
"step": 5165 |
|
}, |
|
{ |
|
"epoch": 0.83589329021827, |
|
"grad_norm": 1.5476432315521382, |
|
"learning_rate": 7.983156361537764e-07, |
|
"loss": 0.7167, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.8367016976556184, |
|
"grad_norm": 1.5243845322424452, |
|
"learning_rate": 7.906835015770003e-07, |
|
"loss": 0.7141, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 0.8375101050929669, |
|
"grad_norm": 1.5839952810553182, |
|
"learning_rate": 7.830848916803985e-07, |
|
"loss": 0.7094, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.8383185125303153, |
|
"grad_norm": 1.4970253587506417, |
|
"learning_rate": 7.755198669823416e-07, |
|
"loss": 0.6893, |
|
"step": 5185 |
|
}, |
|
{ |
|
"epoch": 0.8391269199676638, |
|
"grad_norm": 1.6540289854643369, |
|
"learning_rate": 7.679884877337124e-07, |
|
"loss": 0.7106, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.8399353274050121, |
|
"grad_norm": 1.7088190894280755, |
|
"learning_rate": 7.604908139174255e-07, |
|
"loss": 0.7042, |
|
"step": 5195 |
|
}, |
|
{ |
|
"epoch": 0.8407437348423605, |
|
"grad_norm": 1.6508440609892434, |
|
"learning_rate": 7.530269052479561e-07, |
|
"loss": 0.688, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.841552142279709, |
|
"grad_norm": 1.519149972828735, |
|
"learning_rate": 7.455968211708569e-07, |
|
"loss": 0.6955, |
|
"step": 5205 |
|
}, |
|
{ |
|
"epoch": 0.8423605497170574, |
|
"grad_norm": 1.6854832467205276, |
|
"learning_rate": 7.382006208622889e-07, |
|
"loss": 0.7115, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.8431689571544059, |
|
"grad_norm": 1.4128267828965078, |
|
"learning_rate": 7.30838363228551e-07, |
|
"loss": 0.7038, |
|
"step": 5215 |
|
}, |
|
{ |
|
"epoch": 0.8439773645917542, |
|
"grad_norm": 1.461389977118903, |
|
"learning_rate": 7.235101069056061e-07, |
|
"loss": 0.7149, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.8447857720291027, |
|
"grad_norm": 1.5690615776923793, |
|
"learning_rate": 7.162159102586203e-07, |
|
"loss": 0.7015, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 0.8455941794664511, |
|
"grad_norm": 1.5502555021326645, |
|
"learning_rate": 7.089558313814909e-07, |
|
"loss": 0.7079, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.8464025869037995, |
|
"grad_norm": 1.6274177821627565, |
|
"learning_rate": 7.017299280963918e-07, |
|
"loss": 0.7039, |
|
"step": 5235 |
|
}, |
|
{ |
|
"epoch": 0.847210994341148, |
|
"grad_norm": 1.2857795563254066, |
|
"learning_rate": 6.945382579533061e-07, |
|
"loss": 0.7262, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.8480194017784963, |
|
"grad_norm": 1.7801567985934275, |
|
"learning_rate": 6.873808782295715e-07, |
|
"loss": 0.694, |
|
"step": 5245 |
|
}, |
|
{ |
|
"epoch": 0.8488278092158448, |
|
"grad_norm": 1.465131459024236, |
|
"learning_rate": 6.802578459294235e-07, |
|
"loss": 0.7064, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.8496362166531932, |
|
"grad_norm": 1.4110023215116603, |
|
"learning_rate": 6.731692177835381e-07, |
|
"loss": 0.7042, |
|
"step": 5255 |
|
}, |
|
{ |
|
"epoch": 0.8504446240905417, |
|
"grad_norm": 1.470115925149478, |
|
"learning_rate": 6.661150502485875e-07, |
|
"loss": 0.6949, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.85125303152789, |
|
"grad_norm": 1.5987322730856806, |
|
"learning_rate": 6.590953995067812e-07, |
|
"loss": 0.6898, |
|
"step": 5265 |
|
}, |
|
{ |
|
"epoch": 0.8520614389652384, |
|
"grad_norm": 1.4703074530943865, |
|
"learning_rate": 6.521103214654262e-07, |
|
"loss": 0.7021, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.8528698464025869, |
|
"grad_norm": 1.5583892786451392, |
|
"learning_rate": 6.451598717564794e-07, |
|
"loss": 0.7127, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 0.8536782538399353, |
|
"grad_norm": 1.6831916055099336, |
|
"learning_rate": 6.382441057361e-07, |
|
"loss": 0.7242, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.8544866612772838, |
|
"grad_norm": 1.7877284298900433, |
|
"learning_rate": 6.313630784842168e-07, |
|
"loss": 0.7057, |
|
"step": 5285 |
|
}, |
|
{ |
|
"epoch": 0.8552950687146321, |
|
"grad_norm": 1.5818124463307648, |
|
"learning_rate": 6.245168448040811e-07, |
|
"loss": 0.6779, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.8561034761519806, |
|
"grad_norm": 1.2767314164369492, |
|
"learning_rate": 6.177054592218363e-07, |
|
"loss": 0.7158, |
|
"step": 5295 |
|
}, |
|
{ |
|
"epoch": 0.856911883589329, |
|
"grad_norm": 1.453861814250626, |
|
"learning_rate": 6.109289759860826e-07, |
|
"loss": 0.7206, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.8577202910266775, |
|
"grad_norm": 1.519729144030008, |
|
"learning_rate": 6.041874490674416e-07, |
|
"loss": 0.6963, |
|
"step": 5305 |
|
}, |
|
{ |
|
"epoch": 0.8585286984640259, |
|
"grad_norm": 1.5493355027852693, |
|
"learning_rate": 5.974809321581315e-07, |
|
"loss": 0.6907, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.8593371059013742, |
|
"grad_norm": 1.4314722635920178, |
|
"learning_rate": 5.908094786715341e-07, |
|
"loss": 0.6837, |
|
"step": 5315 |
|
}, |
|
{ |
|
"epoch": 0.8601455133387227, |
|
"grad_norm": 1.5610865108849659, |
|
"learning_rate": 5.841731417417735e-07, |
|
"loss": 0.6957, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.8609539207760711, |
|
"grad_norm": 1.4724518613233466, |
|
"learning_rate": 5.775719742232927e-07, |
|
"loss": 0.7125, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 0.8617623282134196, |
|
"grad_norm": 1.3737829099361796, |
|
"learning_rate": 5.71006028690429e-07, |
|
"loss": 0.6942, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.862570735650768, |
|
"grad_norm": 1.5545604898279772, |
|
"learning_rate": 5.644753574369987e-07, |
|
"loss": 0.7006, |
|
"step": 5335 |
|
}, |
|
{ |
|
"epoch": 0.8633791430881164, |
|
"grad_norm": 1.5672128023139609, |
|
"learning_rate": 5.579800124758789e-07, |
|
"loss": 0.6858, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.8641875505254648, |
|
"grad_norm": 1.5144163766376024, |
|
"learning_rate": 5.515200455385955e-07, |
|
"loss": 0.7224, |
|
"step": 5345 |
|
}, |
|
{ |
|
"epoch": 0.8649959579628133, |
|
"grad_norm": 1.7093478444467205, |
|
"learning_rate": 5.450955080749099e-07, |
|
"loss": 0.7012, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.8658043654001617, |
|
"grad_norm": 1.4151660570069253, |
|
"learning_rate": 5.387064512524065e-07, |
|
"loss": 0.6955, |
|
"step": 5355 |
|
}, |
|
{ |
|
"epoch": 0.8666127728375101, |
|
"grad_norm": 1.6373642598729596, |
|
"learning_rate": 5.323529259560911e-07, |
|
"loss": 0.6996, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.8674211802748585, |
|
"grad_norm": 1.4331673797986197, |
|
"learning_rate": 5.260349827879785e-07, |
|
"loss": 0.7088, |
|
"step": 5365 |
|
}, |
|
{ |
|
"epoch": 0.8682295877122069, |
|
"grad_norm": 1.6411349503885173, |
|
"learning_rate": 5.197526720666963e-07, |
|
"loss": 0.686, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 0.8690379951495554, |
|
"grad_norm": 1.4837007483886673, |
|
"learning_rate": 5.135060438270784e-07, |
|
"loss": 0.6867, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 0.8698464025869038, |
|
"grad_norm": 1.5770702161207801, |
|
"learning_rate": 5.072951478197724e-07, |
|
"loss": 0.7245, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.8706548100242523, |
|
"grad_norm": 1.552555517411911, |
|
"learning_rate": 5.011200335108379e-07, |
|
"loss": 0.7042, |
|
"step": 5385 |
|
}, |
|
{ |
|
"epoch": 0.8714632174616006, |
|
"grad_norm": 1.7490965534967693, |
|
"learning_rate": 4.94980750081353e-07, |
|
"loss": 0.7021, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 0.872271624898949, |
|
"grad_norm": 1.6919904276080737, |
|
"learning_rate": 4.888773464270286e-07, |
|
"loss": 0.7054, |
|
"step": 5395 |
|
}, |
|
{ |
|
"epoch": 0.8730800323362975, |
|
"grad_norm": 2.059158063480853, |
|
"learning_rate": 4.828098711578116e-07, |
|
"loss": 0.7055, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.8738884397736459, |
|
"grad_norm": 1.7275320576169928, |
|
"learning_rate": 4.767783725975017e-07, |
|
"loss": 0.71, |
|
"step": 5405 |
|
}, |
|
{ |
|
"epoch": 0.8746968472109944, |
|
"grad_norm": 1.7182854663915335, |
|
"learning_rate": 4.7078289878336737e-07, |
|
"loss": 0.6998, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 0.8755052546483427, |
|
"grad_norm": 1.5247564407950007, |
|
"learning_rate": 4.6482349746575783e-07, |
|
"loss": 0.6861, |
|
"step": 5415 |
|
}, |
|
{ |
|
"epoch": 0.8763136620856912, |
|
"grad_norm": 1.6432058150970736, |
|
"learning_rate": 4.589002161077305e-07, |
|
"loss": 0.686, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.8771220695230396, |
|
"grad_norm": 1.5847832306944918, |
|
"learning_rate": 4.5301310188466676e-07, |
|
"loss": 0.7039, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 0.8779304769603881, |
|
"grad_norm": 1.4647113649819359, |
|
"learning_rate": 4.4716220168389777e-07, |
|
"loss": 0.6938, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 0.8787388843977365, |
|
"grad_norm": 1.5111904344889613, |
|
"learning_rate": 4.4134756210433505e-07, |
|
"loss": 0.6937, |
|
"step": 5435 |
|
}, |
|
{ |
|
"epoch": 0.8795472918350848, |
|
"grad_norm": 1.8253966937758632, |
|
"learning_rate": 4.355692294560915e-07, |
|
"loss": 0.6878, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.8803556992724333, |
|
"grad_norm": 1.426451063493756, |
|
"learning_rate": 4.2982724976012134e-07, |
|
"loss": 0.6902, |
|
"step": 5445 |
|
}, |
|
{ |
|
"epoch": 0.8811641067097817, |
|
"grad_norm": 1.4894657386745687, |
|
"learning_rate": 4.241216687478455e-07, |
|
"loss": 0.6967, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.8819725141471302, |
|
"grad_norm": 1.413659434241133, |
|
"learning_rate": 4.1845253186079513e-07, |
|
"loss": 0.7019, |
|
"step": 5455 |
|
}, |
|
{ |
|
"epoch": 0.8827809215844786, |
|
"grad_norm": 1.4472062769677223, |
|
"learning_rate": 4.12819884250244e-07, |
|
"loss": 0.6845, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.883589329021827, |
|
"grad_norm": 1.6958560319752523, |
|
"learning_rate": 4.0722377077684947e-07, |
|
"loss": 0.6912, |
|
"step": 5465 |
|
}, |
|
{ |
|
"epoch": 0.8843977364591754, |
|
"grad_norm": 1.5193788282930156, |
|
"learning_rate": 4.0166423601029735e-07, |
|
"loss": 0.7096, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 0.8852061438965239, |
|
"grad_norm": 1.4417388906412087, |
|
"learning_rate": 3.9614132422894637e-07, |
|
"loss": 0.6979, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 0.8860145513338723, |
|
"grad_norm": 1.6887342097664206, |
|
"learning_rate": 3.9065507941947467e-07, |
|
"loss": 0.711, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.8868229587712207, |
|
"grad_norm": 1.5209079055984485, |
|
"learning_rate": 3.852055452765313e-07, |
|
"loss": 0.7, |
|
"step": 5485 |
|
}, |
|
{ |
|
"epoch": 0.8876313662085691, |
|
"grad_norm": 1.4951444788612174, |
|
"learning_rate": 3.797927652023847e-07, |
|
"loss": 0.7025, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 0.8884397736459175, |
|
"grad_norm": 1.5994588482558039, |
|
"learning_rate": 3.744167823065814e-07, |
|
"loss": 0.7053, |
|
"step": 5495 |
|
}, |
|
{ |
|
"epoch": 0.889248181083266, |
|
"grad_norm": 1.6223372164127636, |
|
"learning_rate": 3.6907763940559784e-07, |
|
"loss": 0.6903, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.8900565885206144, |
|
"grad_norm": 1.5787830461685994, |
|
"learning_rate": 3.6377537902250573e-07, |
|
"loss": 0.6968, |
|
"step": 5505 |
|
}, |
|
{ |
|
"epoch": 0.8908649959579629, |
|
"grad_norm": 1.5012854464186227, |
|
"learning_rate": 3.5851004338662564e-07, |
|
"loss": 0.7075, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 0.8916734033953112, |
|
"grad_norm": 1.5816110130647192, |
|
"learning_rate": 3.532816744331963e-07, |
|
"loss": 0.7063, |
|
"step": 5515 |
|
}, |
|
{ |
|
"epoch": 0.8924818108326596, |
|
"grad_norm": 1.6516342043629912, |
|
"learning_rate": 3.4809031380304114e-07, |
|
"loss": 0.7056, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.8932902182700081, |
|
"grad_norm": 1.4324191798602168, |
|
"learning_rate": 3.429360028422307e-07, |
|
"loss": 0.7124, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 0.8940986257073565, |
|
"grad_norm": 1.5278351089643472, |
|
"learning_rate": 3.378187826017604e-07, |
|
"loss": 0.6951, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 0.8949070331447049, |
|
"grad_norm": 1.6668491680320379, |
|
"learning_rate": 3.3273869383721734e-07, |
|
"loss": 0.7165, |
|
"step": 5535 |
|
}, |
|
{ |
|
"epoch": 0.8957154405820533, |
|
"grad_norm": 1.4121703345362437, |
|
"learning_rate": 3.276957770084616e-07, |
|
"loss": 0.705, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.8965238480194018, |
|
"grad_norm": 1.295321212557015, |
|
"learning_rate": 3.2269007227930026e-07, |
|
"loss": 0.6945, |
|
"step": 5545 |
|
}, |
|
{ |
|
"epoch": 0.8973322554567502, |
|
"grad_norm": 1.491642893775521, |
|
"learning_rate": 3.177216195171673e-07, |
|
"loss": 0.71, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.8981406628940987, |
|
"grad_norm": 1.5093040697730336, |
|
"learning_rate": 3.1279045829280706e-07, |
|
"loss": 0.7097, |
|
"step": 5555 |
|
}, |
|
{ |
|
"epoch": 0.898949070331447, |
|
"grad_norm": 1.2316504521547396, |
|
"learning_rate": 3.0789662787996e-07, |
|
"loss": 0.6965, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.8997574777687954, |
|
"grad_norm": 1.550192376633313, |
|
"learning_rate": 3.030401672550487e-07, |
|
"loss": 0.6996, |
|
"step": 5565 |
|
}, |
|
{ |
|
"epoch": 0.9005658852061439, |
|
"grad_norm": 1.484003056341841, |
|
"learning_rate": 2.9822111509687e-07, |
|
"loss": 0.7065, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 0.9013742926434923, |
|
"grad_norm": 1.443207613038234, |
|
"learning_rate": 2.9343950978627965e-07, |
|
"loss": 0.7074, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 0.9021827000808408, |
|
"grad_norm": 1.3381987402914712, |
|
"learning_rate": 2.88695389405898e-07, |
|
"loss": 0.6988, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.9029911075181891, |
|
"grad_norm": 1.5561292741790862, |
|
"learning_rate": 2.8398879173979434e-07, |
|
"loss": 0.6943, |
|
"step": 5585 |
|
}, |
|
{ |
|
"epoch": 0.9037995149555376, |
|
"grad_norm": 1.5456031956017944, |
|
"learning_rate": 2.7931975427319734e-07, |
|
"loss": 0.7075, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 0.904607922392886, |
|
"grad_norm": 1.2770024516759357, |
|
"learning_rate": 2.746883141921869e-07, |
|
"loss": 0.7082, |
|
"step": 5595 |
|
}, |
|
{ |
|
"epoch": 0.9054163298302345, |
|
"grad_norm": 1.5503292032340625, |
|
"learning_rate": 2.7009450838340613e-07, |
|
"loss": 0.7019, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.9062247372675829, |
|
"grad_norm": 1.4285805273520935, |
|
"learning_rate": 2.6553837343376023e-07, |
|
"loss": 0.7018, |
|
"step": 5605 |
|
}, |
|
{ |
|
"epoch": 0.9070331447049312, |
|
"grad_norm": 1.3528073503244311, |
|
"learning_rate": 2.61019945630131e-07, |
|
"loss": 0.6897, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 0.9078415521422797, |
|
"grad_norm": 1.8090163212162127, |
|
"learning_rate": 2.5653926095908446e-07, |
|
"loss": 0.7228, |
|
"step": 5615 |
|
}, |
|
{ |
|
"epoch": 0.9086499595796281, |
|
"grad_norm": 1.3823209097986053, |
|
"learning_rate": 2.520963551065853e-07, |
|
"loss": 0.7024, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.9094583670169766, |
|
"grad_norm": 1.5088907496000867, |
|
"learning_rate": 2.476912634577128e-07, |
|
"loss": 0.689, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 0.910266774454325, |
|
"grad_norm": 1.4516203595743193, |
|
"learning_rate": 2.4332402109638e-07, |
|
"loss": 0.7139, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 0.9110751818916734, |
|
"grad_norm": 1.8781761391343346, |
|
"learning_rate": 2.3899466280504936e-07, |
|
"loss": 0.6915, |
|
"step": 5635 |
|
}, |
|
{ |
|
"epoch": 0.9118835893290218, |
|
"grad_norm": 1.563214928880183, |
|
"learning_rate": 2.3470322306446468e-07, |
|
"loss": 0.7289, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.9126919967663703, |
|
"grad_norm": 1.8065914033787347, |
|
"learning_rate": 2.304497360533664e-07, |
|
"loss": 0.6889, |
|
"step": 5645 |
|
}, |
|
{ |
|
"epoch": 0.9135004042037187, |
|
"grad_norm": 1.889183469859966, |
|
"learning_rate": 2.2623423564822666e-07, |
|
"loss": 0.72, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.9143088116410671, |
|
"grad_norm": 1.6535990324640262, |
|
"learning_rate": 2.22056755422978e-07, |
|
"loss": 0.7238, |
|
"step": 5655 |
|
}, |
|
{ |
|
"epoch": 0.9151172190784155, |
|
"grad_norm": 1.3746775282920567, |
|
"learning_rate": 2.1791732864874182e-07, |
|
"loss": 0.7097, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.9159256265157639, |
|
"grad_norm": 1.3780616233800305, |
|
"learning_rate": 2.1381598829357031e-07, |
|
"loss": 0.7201, |
|
"step": 5665 |
|
}, |
|
{ |
|
"epoch": 0.9167340339531124, |
|
"grad_norm": 1.496841353204978, |
|
"learning_rate": 2.0975276702217716e-07, |
|
"loss": 0.7155, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 0.9175424413904608, |
|
"grad_norm": 1.3354995789369437, |
|
"learning_rate": 2.0572769719568286e-07, |
|
"loss": 0.7035, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 0.9183508488278093, |
|
"grad_norm": 1.2888588034538615, |
|
"learning_rate": 2.0174081087135312e-07, |
|
"loss": 0.7035, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.9191592562651576, |
|
"grad_norm": 1.3070065889530205, |
|
"learning_rate": 1.9779213980234468e-07, |
|
"loss": 0.6906, |
|
"step": 5685 |
|
}, |
|
{ |
|
"epoch": 0.919967663702506, |
|
"grad_norm": 1.649361503680924, |
|
"learning_rate": 1.9388171543745394e-07, |
|
"loss": 0.6991, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 0.9207760711398545, |
|
"grad_norm": 1.4618951093045796, |
|
"learning_rate": 1.9000956892086363e-07, |
|
"loss": 0.7114, |
|
"step": 5695 |
|
}, |
|
{ |
|
"epoch": 0.9215844785772029, |
|
"grad_norm": 1.858821992427871, |
|
"learning_rate": 1.861757310918977e-07, |
|
"loss": 0.6981, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.9223928860145514, |
|
"grad_norm": 1.4390333751784132, |
|
"learning_rate": 1.823802324847751e-07, |
|
"loss": 0.6947, |
|
"step": 5705 |
|
}, |
|
{ |
|
"epoch": 0.9232012934518997, |
|
"grad_norm": 1.3446164451118023, |
|
"learning_rate": 1.7862310332836307e-07, |
|
"loss": 0.7165, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 0.9240097008892482, |
|
"grad_norm": 1.3267887077701044, |
|
"learning_rate": 1.749043735459427e-07, |
|
"loss": 0.6914, |
|
"step": 5715 |
|
}, |
|
{ |
|
"epoch": 0.9248181083265966, |
|
"grad_norm": 1.4452492665400665, |
|
"learning_rate": 1.7122407275496411e-07, |
|
"loss": 0.6994, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.9256265157639451, |
|
"grad_norm": 1.6067816385523603, |
|
"learning_rate": 1.6758223026681507e-07, |
|
"loss": 0.7077, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 0.9264349232012935, |
|
"grad_norm": 1.3759663515194218, |
|
"learning_rate": 1.639788750865867e-07, |
|
"loss": 0.6867, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 0.9272433306386418, |
|
"grad_norm": 1.4050656917463036, |
|
"learning_rate": 1.6041403591283866e-07, |
|
"loss": 0.7155, |
|
"step": 5735 |
|
}, |
|
{ |
|
"epoch": 0.9280517380759903, |
|
"grad_norm": 1.2966156059566525, |
|
"learning_rate": 1.5688774113737814e-07, |
|
"loss": 0.6991, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.9288601455133387, |
|
"grad_norm": 1.3415985484676374, |
|
"learning_rate": 1.5340001884502577e-07, |
|
"loss": 0.7077, |
|
"step": 5745 |
|
}, |
|
{ |
|
"epoch": 0.9296685529506872, |
|
"grad_norm": 1.3587674188346928, |
|
"learning_rate": 1.499508968133978e-07, |
|
"loss": 0.6907, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.9304769603880356, |
|
"grad_norm": 1.4836521085637517, |
|
"learning_rate": 1.4654040251268097e-07, |
|
"loss": 0.711, |
|
"step": 5755 |
|
}, |
|
{ |
|
"epoch": 0.931285367825384, |
|
"grad_norm": 1.5356253203571395, |
|
"learning_rate": 1.4316856310541638e-07, |
|
"loss": 0.7027, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.9320937752627324, |
|
"grad_norm": 1.3179793459708602, |
|
"learning_rate": 1.3983540544628138e-07, |
|
"loss": 0.6885, |
|
"step": 5765 |
|
}, |
|
{ |
|
"epoch": 0.9329021827000809, |
|
"grad_norm": 1.4026702983758432, |
|
"learning_rate": 1.3654095608187757e-07, |
|
"loss": 0.681, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 0.9337105901374293, |
|
"grad_norm": 1.6044541361178586, |
|
"learning_rate": 1.332852412505159e-07, |
|
"loss": 0.7184, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 0.9345189975747777, |
|
"grad_norm": 1.3569634212248618, |
|
"learning_rate": 1.300682868820119e-07, |
|
"loss": 0.6993, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.9353274050121261, |
|
"grad_norm": 1.4476284125206074, |
|
"learning_rate": 1.2689011859747745e-07, |
|
"loss": 0.699, |
|
"step": 5785 |
|
}, |
|
{ |
|
"epoch": 0.9361358124494745, |
|
"grad_norm": 1.2406212283464149, |
|
"learning_rate": 1.2375076170911604e-07, |
|
"loss": 0.6838, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 0.936944219886823, |
|
"grad_norm": 1.3821866002119294, |
|
"learning_rate": 1.2065024122002055e-07, |
|
"loss": 0.6936, |
|
"step": 5795 |
|
}, |
|
{ |
|
"epoch": 0.9377526273241714, |
|
"grad_norm": 1.5651244760885679, |
|
"learning_rate": 1.1758858182397692e-07, |
|
"loss": 0.6886, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.9385610347615198, |
|
"grad_norm": 1.4282620996566417, |
|
"learning_rate": 1.1456580790526528e-07, |
|
"loss": 0.7081, |
|
"step": 5805 |
|
}, |
|
{ |
|
"epoch": 0.9393694421988682, |
|
"grad_norm": 1.5025168367086414, |
|
"learning_rate": 1.1158194353846574e-07, |
|
"loss": 0.6859, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 0.9401778496362166, |
|
"grad_norm": 1.3512739774700069, |
|
"learning_rate": 1.0863701248826797e-07, |
|
"loss": 0.7225, |
|
"step": 5815 |
|
}, |
|
{ |
|
"epoch": 0.9409862570735651, |
|
"grad_norm": 1.4231363820423177, |
|
"learning_rate": 1.0573103820928022e-07, |
|
"loss": 0.706, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.9417946645109135, |
|
"grad_norm": 1.4255699165992657, |
|
"learning_rate": 1.0286404384584448e-07, |
|
"loss": 0.7289, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 0.9426030719482619, |
|
"grad_norm": 1.3866774534958695, |
|
"learning_rate": 1.0003605223184998e-07, |
|
"loss": 0.6676, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 0.9434114793856103, |
|
"grad_norm": 1.558561584720674, |
|
"learning_rate": 9.724708589055332e-08, |
|
"loss": 0.6827, |
|
"step": 5835 |
|
}, |
|
{ |
|
"epoch": 0.9442198868229588, |
|
"grad_norm": 1.4814199460791437, |
|
"learning_rate": 9.449716703439805e-08, |
|
"loss": 0.7012, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.9450282942603072, |
|
"grad_norm": 1.4398896590301569, |
|
"learning_rate": 9.178631756483758e-08, |
|
"loss": 0.7222, |
|
"step": 5845 |
|
}, |
|
{ |
|
"epoch": 0.9458367016976557, |
|
"grad_norm": 1.4274832095739076, |
|
"learning_rate": 8.911455907216149e-08, |
|
"loss": 0.6974, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.946645109135004, |
|
"grad_norm": 1.3302674791379907, |
|
"learning_rate": 8.648191283532337e-08, |
|
"loss": 0.7109, |
|
"step": 5855 |
|
}, |
|
{ |
|
"epoch": 0.9474535165723524, |
|
"grad_norm": 1.518044488774888, |
|
"learning_rate": 8.388839982176988e-08, |
|
"loss": 0.6706, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.9482619240097009, |
|
"grad_norm": 1.3913177767364981, |
|
"learning_rate": 8.133404068727702e-08, |
|
"loss": 0.7175, |
|
"step": 5865 |
|
}, |
|
{ |
|
"epoch": 0.9490703314470493, |
|
"grad_norm": 1.340352098968631, |
|
"learning_rate": 7.881885577578185e-08, |
|
"loss": 0.696, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 0.9498787388843978, |
|
"grad_norm": 1.371861271599684, |
|
"learning_rate": 7.634286511922384e-08, |
|
"loss": 0.7122, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 0.9506871463217461, |
|
"grad_norm": 1.30296931416904, |
|
"learning_rate": 7.390608843738156e-08, |
|
"loss": 0.6949, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.9514955537590946, |
|
"grad_norm": 1.3002210105122816, |
|
"learning_rate": 7.150854513772009e-08, |
|
"loss": 0.7001, |
|
"step": 5885 |
|
}, |
|
{ |
|
"epoch": 0.952303961196443, |
|
"grad_norm": 1.2765177058238852, |
|
"learning_rate": 6.915025431523282e-08, |
|
"loss": 0.7014, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.9531123686337915, |
|
"grad_norm": 1.5746971313785787, |
|
"learning_rate": 6.683123475229148e-08, |
|
"loss": 0.7083, |
|
"step": 5895 |
|
}, |
|
{ |
|
"epoch": 0.9539207760711399, |
|
"grad_norm": 1.5888397024986494, |
|
"learning_rate": 6.455150491849527e-08, |
|
"loss": 0.6858, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.9547291835084882, |
|
"grad_norm": 1.4426978109483788, |
|
"learning_rate": 6.231108297052424e-08, |
|
"loss": 0.7146, |
|
"step": 5905 |
|
}, |
|
{ |
|
"epoch": 0.9555375909458367, |
|
"grad_norm": 1.5455545862291462, |
|
"learning_rate": 6.010998675199554e-08, |
|
"loss": 0.7077, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 0.9563459983831851, |
|
"grad_norm": 1.3957709152429087, |
|
"learning_rate": 5.794823379331793e-08, |
|
"loss": 0.7192, |
|
"step": 5915 |
|
}, |
|
{ |
|
"epoch": 0.9571544058205336, |
|
"grad_norm": 1.5204415869547911, |
|
"learning_rate": 5.582584131155866e-08, |
|
"loss": 0.7096, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.957962813257882, |
|
"grad_norm": 1.3205805736575398, |
|
"learning_rate": 5.3742826210299584e-08, |
|
"loss": 0.7033, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 0.9587712206952304, |
|
"grad_norm": 1.4266292287008944, |
|
"learning_rate": 5.169920507950621e-08, |
|
"loss": 0.6987, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 0.9595796281325788, |
|
"grad_norm": 1.480234828774731, |
|
"learning_rate": 4.9694994195394474e-08, |
|
"loss": 0.7157, |
|
"step": 5935 |
|
}, |
|
{ |
|
"epoch": 0.9603880355699272, |
|
"grad_norm": 1.7268574237528078, |
|
"learning_rate": 4.773020952030083e-08, |
|
"loss": 0.6952, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.9611964430072757, |
|
"grad_norm": 1.4923064684180711, |
|
"learning_rate": 4.58048667025579e-08, |
|
"loss": 0.7001, |
|
"step": 5945 |
|
}, |
|
{ |
|
"epoch": 0.9620048504446241, |
|
"grad_norm": 1.5378356376814888, |
|
"learning_rate": 4.391898107636461e-08, |
|
"loss": 0.6915, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.9628132578819725, |
|
"grad_norm": 1.3364927348435258, |
|
"learning_rate": 4.207256766166845e-08, |
|
"loss": 0.695, |
|
"step": 5955 |
|
}, |
|
{ |
|
"epoch": 0.9636216653193209, |
|
"grad_norm": 1.3683902840034843, |
|
"learning_rate": 4.0265641164045075e-08, |
|
"loss": 0.6916, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.9644300727566694, |
|
"grad_norm": 1.4925361751549704, |
|
"learning_rate": 3.849821597457892e-08, |
|
"loss": 0.6817, |
|
"step": 5965 |
|
}, |
|
{ |
|
"epoch": 0.9652384801940178, |
|
"grad_norm": 1.5452153453031308, |
|
"learning_rate": 3.677030616975163e-08, |
|
"loss": 0.7034, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 0.9660468876313663, |
|
"grad_norm": 1.4457585595064018, |
|
"learning_rate": 3.508192551132883e-08, |
|
"loss": 0.6761, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 0.9668552950687146, |
|
"grad_norm": 1.5607103585541133, |
|
"learning_rate": 3.34330874462474e-08, |
|
"loss": 0.698, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.967663702506063, |
|
"grad_norm": 1.388649382711461, |
|
"learning_rate": 3.182380510651506e-08, |
|
"loss": 0.7104, |
|
"step": 5985 |
|
}, |
|
{ |
|
"epoch": 0.9684721099434115, |
|
"grad_norm": 1.5025532102637633, |
|
"learning_rate": 3.025409130909929e-08, |
|
"loss": 0.6927, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 0.9692805173807599, |
|
"grad_norm": 1.5115410164451122, |
|
"learning_rate": 2.8723958555827993e-08, |
|
"loss": 0.7021, |
|
"step": 5995 |
|
}, |
|
{ |
|
"epoch": 0.9700889248181084, |
|
"grad_norm": 1.4667370044065153, |
|
"learning_rate": 2.723341903329124e-08, |
|
"loss": 0.7011, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.9708973322554567, |
|
"grad_norm": 1.3729938733212024, |
|
"learning_rate": 2.5782484612741908e-08, |
|
"loss": 0.7062, |
|
"step": 6005 |
|
}, |
|
{ |
|
"epoch": 0.9717057396928052, |
|
"grad_norm": 1.3730064673661673, |
|
"learning_rate": 2.4371166850001292e-08, |
|
"loss": 0.7119, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 0.9725141471301536, |
|
"grad_norm": 1.4865892461508956, |
|
"learning_rate": 2.2999476985369196e-08, |
|
"loss": 0.6984, |
|
"step": 6015 |
|
}, |
|
{ |
|
"epoch": 0.973322554567502, |
|
"grad_norm": 1.5028651779794058, |
|
"learning_rate": 2.1667425943532884e-08, |
|
"loss": 0.695, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.9741309620048505, |
|
"grad_norm": 1.5599213421453288, |
|
"learning_rate": 2.0375024333478267e-08, |
|
"loss": 0.7174, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 0.9749393694421988, |
|
"grad_norm": 2.0104805596152, |
|
"learning_rate": 1.9122282448409413e-08, |
|
"loss": 0.7369, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 0.9757477768795473, |
|
"grad_norm": 1.581779872366385, |
|
"learning_rate": 1.7909210265664167e-08, |
|
"loss": 0.6792, |
|
"step": 6035 |
|
}, |
|
{ |
|
"epoch": 0.9765561843168957, |
|
"grad_norm": 1.3891174140800855, |
|
"learning_rate": 1.6735817446633663e-08, |
|
"loss": 0.7007, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.9773645917542442, |
|
"grad_norm": 1.3798388142109828, |
|
"learning_rate": 1.5602113336688485e-08, |
|
"loss": 0.6959, |
|
"step": 6045 |
|
}, |
|
{ |
|
"epoch": 0.9781729991915926, |
|
"grad_norm": 1.4507674372361408, |
|
"learning_rate": 1.450810696510041e-08, |
|
"loss": 0.7079, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.978981406628941, |
|
"grad_norm": 1.5549948970077827, |
|
"learning_rate": 1.3453807044975232e-08, |
|
"loss": 0.6892, |
|
"step": 6055 |
|
}, |
|
{ |
|
"epoch": 0.9797898140662894, |
|
"grad_norm": 1.3944647878700718, |
|
"learning_rate": 1.2439221973178372e-08, |
|
"loss": 0.6956, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.9805982215036378, |
|
"grad_norm": 1.2971921385179899, |
|
"learning_rate": 1.1464359830271055e-08, |
|
"loss": 0.6756, |
|
"step": 6065 |
|
}, |
|
{ |
|
"epoch": 0.9814066289409863, |
|
"grad_norm": 1.4195361555610082, |
|
"learning_rate": 1.05292283804459e-08, |
|
"loss": 0.7044, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 0.9822150363783346, |
|
"grad_norm": 1.3029462140916905, |
|
"learning_rate": 9.633835071463094e-09, |
|
"loss": 0.6926, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 0.9830234438156831, |
|
"grad_norm": 1.3681427565121465, |
|
"learning_rate": 8.778187034593766e-09, |
|
"loss": 0.7141, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.9838318512530315, |
|
"grad_norm": 1.580351014203535, |
|
"learning_rate": 7.962291084560592e-09, |
|
"loss": 0.6982, |
|
"step": 6085 |
|
}, |
|
{ |
|
"epoch": 0.98464025869038, |
|
"grad_norm": 1.402295885158622, |
|
"learning_rate": 7.186153719485056e-09, |
|
"loss": 0.7241, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 0.9854486661277284, |
|
"grad_norm": 1.4372946746103246, |
|
"learning_rate": 6.449781120836385e-09, |
|
"loss": 0.6943, |
|
"step": 6095 |
|
}, |
|
{ |
|
"epoch": 0.9862570735650767, |
|
"grad_norm": 1.354857429409591, |
|
"learning_rate": 5.753179153379362e-09, |
|
"loss": 0.6893, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.9870654810024252, |
|
"grad_norm": 1.2880968583775416, |
|
"learning_rate": 5.09635336513159e-09, |
|
"loss": 0.7132, |
|
"step": 6105 |
|
}, |
|
{ |
|
"epoch": 0.9878738884397736, |
|
"grad_norm": 1.5205030135158186, |
|
"learning_rate": 4.4793089873162995e-09, |
|
"loss": 0.6998, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 0.9886822958771221, |
|
"grad_norm": 1.4082262313295961, |
|
"learning_rate": 3.9020509343212775e-09, |
|
"loss": 0.6831, |
|
"step": 6115 |
|
}, |
|
{ |
|
"epoch": 0.9894907033144705, |
|
"grad_norm": 1.3411345047646837, |
|
"learning_rate": 3.3645838036611146e-09, |
|
"loss": 0.7041, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.9902991107518189, |
|
"grad_norm": 1.4474166300255564, |
|
"learning_rate": 2.8669118759383497e-09, |
|
"loss": 0.7057, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 0.9911075181891673, |
|
"grad_norm": 1.4662855334778262, |
|
"learning_rate": 2.4090391148112734e-09, |
|
"loss": 0.6817, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 0.9919159256265158, |
|
"grad_norm": 1.4814179796345557, |
|
"learning_rate": 1.9909691669622868e-09, |
|
"loss": 0.6871, |
|
"step": 6135 |
|
}, |
|
{ |
|
"epoch": 0.9927243330638642, |
|
"grad_norm": 1.574608287576024, |
|
"learning_rate": 1.6127053620673683e-09, |
|
"loss": 0.7386, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.9935327405012127, |
|
"grad_norm": 1.3774684096651242, |
|
"learning_rate": 1.2742507127710967e-09, |
|
"loss": 0.695, |
|
"step": 6145 |
|
}, |
|
{ |
|
"epoch": 0.994341147938561, |
|
"grad_norm": 1.557883359993657, |
|
"learning_rate": 9.75607914660559e-10, |
|
"loss": 0.7087, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.9951495553759094, |
|
"grad_norm": 1.382818271691103, |
|
"learning_rate": 7.167793462475869e-10, |
|
"loss": 0.7278, |
|
"step": 6155 |
|
}, |
|
{ |
|
"epoch": 0.9959579628132579, |
|
"grad_norm": 1.6471775994899713, |
|
"learning_rate": 4.977670689459979e-10, |
|
"loss": 0.7092, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.9967663702506063, |
|
"grad_norm": 1.5090005897564358, |
|
"learning_rate": 3.18572827057162e-10, |
|
"loss": 0.7281, |
|
"step": 6165 |
|
}, |
|
{ |
|
"epoch": 0.9975747776879548, |
|
"grad_norm": 1.6213042923583454, |
|
"learning_rate": 1.7919804775612394e-10, |
|
"loss": 0.6912, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 0.9983831851253031, |
|
"grad_norm": 1.3874750896868153, |
|
"learning_rate": 7.964384107828071e-11, |
|
"loss": 0.6994, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 0.9991915925626516, |
|
"grad_norm": 1.2804574968031643, |
|
"learning_rate": 1.9910999914385386e-11, |
|
"loss": 0.6947, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.58598368671411, |
|
"learning_rate": 0.0, |
|
"loss": 0.7102, |
|
"step": 6185 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.7062155604362488, |
|
"eval_runtime": 3.4997, |
|
"eval_samples_per_second": 2.857, |
|
"eval_steps_per_second": 0.857, |
|
"step": 6185 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 6185, |
|
"total_flos": 1963824696786944.0, |
|
"train_loss": 0.7727144511168101, |
|
"train_runtime": 22636.3522, |
|
"train_samples_per_second": 4.371, |
|
"train_steps_per_second": 0.273 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 6185, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1963824696786944.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|