{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 10314, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 3.656641960144043, "learning_rate": 4.998060888113244e-05, "loss": 9.8019, "step": 4 }, { "epoch": 0.0, "grad_norm": 3.3737926483154297, "learning_rate": 4.9961217762264883e-05, "loss": 9.12, "step": 8 }, { "epoch": 0.0, "grad_norm": 3.5734024047851562, "learning_rate": 4.9941826643397324e-05, "loss": 8.8002, "step": 12 }, { "epoch": 0.0, "grad_norm": 3.0836071968078613, "learning_rate": 4.992243552452977e-05, "loss": 8.5483, "step": 16 }, { "epoch": 0.0, "grad_norm": 3.126896619796753, "learning_rate": 4.9903044405662205e-05, "loss": 8.376, "step": 20 }, { "epoch": 0.0, "grad_norm": 2.795313835144043, "learning_rate": 4.988365328679465e-05, "loss": 8.2286, "step": 24 }, { "epoch": 0.0, "grad_norm": 2.847642183303833, "learning_rate": 4.9864262167927086e-05, "loss": 8.0428, "step": 28 }, { "epoch": 0.0, "grad_norm": 2.6489531993865967, "learning_rate": 4.9844871049059533e-05, "loss": 7.8429, "step": 32 }, { "epoch": 0.0, "grad_norm": 2.569993495941162, "learning_rate": 4.9825479930191974e-05, "loss": 7.6876, "step": 36 }, { "epoch": 0.0, "grad_norm": 2.3502109050750732, "learning_rate": 4.9806088811324415e-05, "loss": 7.5277, "step": 40 }, { "epoch": 0.0, "grad_norm": 2.2252399921417236, "learning_rate": 4.9786697692456855e-05, "loss": 7.3008, "step": 44 }, { "epoch": 0.0, "grad_norm": 2.2069296836853027, "learning_rate": 4.97673065735893e-05, "loss": 7.1233, "step": 48 }, { "epoch": 0.01, "grad_norm": 2.133172035217285, "learning_rate": 4.9747915454721736e-05, "loss": 6.9925, "step": 52 }, { "epoch": 0.01, "grad_norm": 1.830748438835144, "learning_rate": 4.9728524335854183e-05, "loss": 6.8808, "step": 56 }, { "epoch": 0.01, "grad_norm": 1.7571477890014648, "learning_rate": 4.970913321698662e-05, "loss": 6.7121, "step": 60 }, { "epoch": 0.01, "grad_norm": 1.6238043308258057, "learning_rate": 4.9689742098119065e-05, "loss": 6.6879, "step": 64 }, { "epoch": 0.01, "grad_norm": 1.460631012916565, "learning_rate": 4.9670350979251505e-05, "loss": 6.4892, "step": 68 }, { "epoch": 0.01, "grad_norm": 1.399665355682373, "learning_rate": 4.9650959860383946e-05, "loss": 6.4278, "step": 72 }, { "epoch": 0.01, "grad_norm": 1.2306325435638428, "learning_rate": 4.9631568741516386e-05, "loss": 6.43, "step": 76 }, { "epoch": 0.01, "grad_norm": 1.1850312948226929, "learning_rate": 4.9612177622648833e-05, "loss": 6.3628, "step": 80 }, { "epoch": 0.01, "grad_norm": 1.3839452266693115, "learning_rate": 4.959278650378127e-05, "loss": 6.1944, "step": 84 }, { "epoch": 0.01, "grad_norm": 1.2195193767547607, "learning_rate": 4.9573395384913715e-05, "loss": 6.2424, "step": 88 }, { "epoch": 0.01, "grad_norm": 1.1323491334915161, "learning_rate": 4.9554004266046155e-05, "loss": 6.112, "step": 92 }, { "epoch": 0.01, "grad_norm": 1.2749617099761963, "learning_rate": 4.9534613147178596e-05, "loss": 6.13, "step": 96 }, { "epoch": 0.01, "grad_norm": 1.2026777267456055, "learning_rate": 4.9515222028311036e-05, "loss": 6.0626, "step": 100 }, { "epoch": 0.01, "grad_norm": 1.4500185251235962, "learning_rate": 4.949583090944348e-05, "loss": 6.0918, "step": 104 }, { "epoch": 0.01, "grad_norm": 1.2848747968673706, "learning_rate": 4.947643979057592e-05, "loss": 6.1688, "step": 108 }, { "epoch": 0.01, "grad_norm": 1.3187015056610107, "learning_rate": 4.9457048671708365e-05, "loss": 6.0728, "step": 112 }, { "epoch": 0.01, "grad_norm": 1.487337589263916, "learning_rate": 4.94376575528408e-05, "loss": 6.0801, "step": 116 }, { "epoch": 0.01, "grad_norm": 1.191052794456482, "learning_rate": 4.9418266433973246e-05, "loss": 6.1361, "step": 120 }, { "epoch": 0.01, "grad_norm": 1.5774741172790527, "learning_rate": 4.9398875315105686e-05, "loss": 6.055, "step": 124 }, { "epoch": 0.01, "grad_norm": 1.0442208051681519, "learning_rate": 4.937948419623813e-05, "loss": 6.0708, "step": 128 }, { "epoch": 0.01, "grad_norm": 1.4346356391906738, "learning_rate": 4.936009307737057e-05, "loss": 6.0169, "step": 132 }, { "epoch": 0.01, "grad_norm": 1.2307249307632446, "learning_rate": 4.934070195850301e-05, "loss": 6.0547, "step": 136 }, { "epoch": 0.01, "grad_norm": 1.1539560556411743, "learning_rate": 4.932131083963545e-05, "loss": 6.0166, "step": 140 }, { "epoch": 0.01, "grad_norm": 1.1614229679107666, "learning_rate": 4.930191972076789e-05, "loss": 6.1866, "step": 144 }, { "epoch": 0.01, "grad_norm": 1.0596864223480225, "learning_rate": 4.928252860190033e-05, "loss": 5.9715, "step": 148 }, { "epoch": 0.01, "grad_norm": 1.111221194267273, "learning_rate": 4.926313748303277e-05, "loss": 6.0272, "step": 152 }, { "epoch": 0.02, "grad_norm": 0.9217305183410645, "learning_rate": 4.924374636416522e-05, "loss": 6.0135, "step": 156 }, { "epoch": 0.02, "grad_norm": 0.968235969543457, "learning_rate": 4.922435524529765e-05, "loss": 5.9969, "step": 160 }, { "epoch": 0.02, "grad_norm": 1.1550750732421875, "learning_rate": 4.92049641264301e-05, "loss": 6.0566, "step": 164 }, { "epoch": 0.02, "grad_norm": 1.5655795335769653, "learning_rate": 4.918557300756254e-05, "loss": 5.9619, "step": 168 }, { "epoch": 0.02, "grad_norm": 1.2823691368103027, "learning_rate": 4.916618188869498e-05, "loss": 5.876, "step": 172 }, { "epoch": 0.02, "grad_norm": 0.913962721824646, "learning_rate": 4.914679076982742e-05, "loss": 6.0023, "step": 176 }, { "epoch": 0.02, "grad_norm": 1.1342990398406982, "learning_rate": 4.912739965095986e-05, "loss": 5.9118, "step": 180 }, { "epoch": 0.02, "grad_norm": 1.1372032165527344, "learning_rate": 4.91080085320923e-05, "loss": 5.9543, "step": 184 }, { "epoch": 0.02, "grad_norm": 1.1001873016357422, "learning_rate": 4.908861741322475e-05, "loss": 6.0478, "step": 188 }, { "epoch": 0.02, "grad_norm": 1.163060188293457, "learning_rate": 4.906922629435718e-05, "loss": 5.9354, "step": 192 }, { "epoch": 0.02, "grad_norm": 1.022900938987732, "learning_rate": 4.904983517548963e-05, "loss": 5.8227, "step": 196 }, { "epoch": 0.02, "grad_norm": 1.2557566165924072, "learning_rate": 4.903044405662207e-05, "loss": 5.898, "step": 200 }, { "epoch": 0.02, "grad_norm": 1.260396122932434, "learning_rate": 4.901105293775451e-05, "loss": 5.9742, "step": 204 }, { "epoch": 0.02, "grad_norm": 1.0883902311325073, "learning_rate": 4.899166181888695e-05, "loss": 5.8642, "step": 208 }, { "epoch": 0.02, "grad_norm": 1.07965886592865, "learning_rate": 4.89722707000194e-05, "loss": 5.9836, "step": 212 }, { "epoch": 0.02, "grad_norm": 1.12454092502594, "learning_rate": 4.895287958115183e-05, "loss": 5.9383, "step": 216 }, { "epoch": 0.02, "grad_norm": 1.1025103330612183, "learning_rate": 4.893348846228428e-05, "loss": 5.8433, "step": 220 }, { "epoch": 0.02, "grad_norm": 1.1046481132507324, "learning_rate": 4.891409734341671e-05, "loss": 5.8649, "step": 224 }, { "epoch": 0.02, "grad_norm": 1.356872320175171, "learning_rate": 4.889470622454916e-05, "loss": 5.9069, "step": 228 }, { "epoch": 0.02, "grad_norm": 1.1604045629501343, "learning_rate": 4.88753151056816e-05, "loss": 5.8704, "step": 232 }, { "epoch": 0.02, "grad_norm": 1.1360890865325928, "learning_rate": 4.885592398681404e-05, "loss": 5.8723, "step": 236 }, { "epoch": 0.02, "grad_norm": 0.8874178528785706, "learning_rate": 4.883653286794648e-05, "loss": 5.8548, "step": 240 }, { "epoch": 0.02, "grad_norm": 1.0104761123657227, "learning_rate": 4.881714174907893e-05, "loss": 5.8931, "step": 244 }, { "epoch": 0.02, "grad_norm": 1.1234338283538818, "learning_rate": 4.879775063021136e-05, "loss": 5.8915, "step": 248 }, { "epoch": 0.02, "grad_norm": 1.2352111339569092, "learning_rate": 4.877835951134381e-05, "loss": 5.9084, "step": 252 }, { "epoch": 0.02, "grad_norm": 0.9969356656074524, "learning_rate": 4.875896839247625e-05, "loss": 5.9788, "step": 256 }, { "epoch": 0.03, "grad_norm": 1.4352229833602905, "learning_rate": 4.873957727360869e-05, "loss": 5.8121, "step": 260 }, { "epoch": 0.03, "grad_norm": 1.3081138134002686, "learning_rate": 4.872018615474113e-05, "loss": 5.8714, "step": 264 }, { "epoch": 0.03, "grad_norm": 1.1553027629852295, "learning_rate": 4.870079503587357e-05, "loss": 5.9062, "step": 268 }, { "epoch": 0.03, "grad_norm": 1.328942060470581, "learning_rate": 4.868140391700601e-05, "loss": 5.9659, "step": 272 }, { "epoch": 0.03, "grad_norm": 1.092142939567566, "learning_rate": 4.8662012798138454e-05, "loss": 5.9436, "step": 276 }, { "epoch": 0.03, "grad_norm": 0.8468914031982422, "learning_rate": 4.8642621679270894e-05, "loss": 5.9176, "step": 280 }, { "epoch": 0.03, "grad_norm": 1.1835949420928955, "learning_rate": 4.8623230560403335e-05, "loss": 5.8968, "step": 284 }, { "epoch": 0.03, "grad_norm": 1.401023507118225, "learning_rate": 4.860383944153578e-05, "loss": 5.8126, "step": 288 }, { "epoch": 0.03, "grad_norm": 1.2995976209640503, "learning_rate": 4.8584448322668216e-05, "loss": 5.9769, "step": 292 }, { "epoch": 0.03, "grad_norm": 1.0936148166656494, "learning_rate": 4.856505720380066e-05, "loss": 5.8341, "step": 296 }, { "epoch": 0.03, "grad_norm": 1.121054768562317, "learning_rate": 4.85456660849331e-05, "loss": 5.8082, "step": 300 }, { "epoch": 0.03, "grad_norm": 1.011224389076233, "learning_rate": 4.8526274966065544e-05, "loss": 5.8437, "step": 304 }, { "epoch": 0.03, "grad_norm": 0.909989058971405, "learning_rate": 4.8506883847197985e-05, "loss": 5.8217, "step": 308 }, { "epoch": 0.03, "grad_norm": 0.9056807160377502, "learning_rate": 4.8487492728330425e-05, "loss": 5.8791, "step": 312 }, { "epoch": 0.03, "grad_norm": 0.990393340587616, "learning_rate": 4.8468101609462866e-05, "loss": 5.9895, "step": 316 }, { "epoch": 0.03, "grad_norm": 1.252443790435791, "learning_rate": 4.844871049059531e-05, "loss": 5.9232, "step": 320 }, { "epoch": 0.03, "grad_norm": 0.9842864274978638, "learning_rate": 4.842931937172775e-05, "loss": 5.8068, "step": 324 }, { "epoch": 0.03, "grad_norm": 1.050066351890564, "learning_rate": 4.8409928252860194e-05, "loss": 5.8112, "step": 328 }, { "epoch": 0.03, "grad_norm": 0.9735043048858643, "learning_rate": 4.8390537133992635e-05, "loss": 5.8329, "step": 332 }, { "epoch": 0.03, "grad_norm": 0.948833703994751, "learning_rate": 4.8371146015125075e-05, "loss": 5.8564, "step": 336 }, { "epoch": 0.03, "grad_norm": 1.0226508378982544, "learning_rate": 4.8351754896257516e-05, "loss": 5.7493, "step": 340 }, { "epoch": 0.03, "grad_norm": 1.09911048412323, "learning_rate": 4.8332363777389956e-05, "loss": 5.7015, "step": 344 }, { "epoch": 0.03, "grad_norm": 1.1777935028076172, "learning_rate": 4.83129726585224e-05, "loss": 5.7534, "step": 348 }, { "epoch": 0.03, "grad_norm": 1.1616647243499756, "learning_rate": 4.8293581539654844e-05, "loss": 5.8506, "step": 352 }, { "epoch": 0.03, "grad_norm": 1.0601903200149536, "learning_rate": 4.827419042078728e-05, "loss": 5.8917, "step": 356 }, { "epoch": 0.03, "grad_norm": 0.9961209893226624, "learning_rate": 4.8254799301919725e-05, "loss": 5.797, "step": 360 }, { "epoch": 0.04, "grad_norm": 1.0286229848861694, "learning_rate": 4.8235408183052166e-05, "loss": 5.888, "step": 364 }, { "epoch": 0.04, "grad_norm": 1.0672661066055298, "learning_rate": 4.8216017064184606e-05, "loss": 5.823, "step": 368 }, { "epoch": 0.04, "grad_norm": 0.9099803566932678, "learning_rate": 4.819662594531705e-05, "loss": 5.7411, "step": 372 }, { "epoch": 0.04, "grad_norm": 1.1643503904342651, "learning_rate": 4.8177234826449494e-05, "loss": 5.9417, "step": 376 }, { "epoch": 0.04, "grad_norm": 0.9987436532974243, "learning_rate": 4.815784370758193e-05, "loss": 5.8176, "step": 380 }, { "epoch": 0.04, "grad_norm": 1.1946054697036743, "learning_rate": 4.8138452588714375e-05, "loss": 5.9005, "step": 384 }, { "epoch": 0.04, "grad_norm": 0.9317137002944946, "learning_rate": 4.811906146984681e-05, "loss": 5.8017, "step": 388 }, { "epoch": 0.04, "grad_norm": 1.0581262111663818, "learning_rate": 4.8099670350979256e-05, "loss": 5.794, "step": 392 }, { "epoch": 0.04, "grad_norm": 1.1224080324172974, "learning_rate": 4.80802792321117e-05, "loss": 5.8161, "step": 396 }, { "epoch": 0.04, "grad_norm": 1.2604461908340454, "learning_rate": 4.806088811324414e-05, "loss": 5.8109, "step": 400 }, { "epoch": 0.04, "grad_norm": 1.1275701522827148, "learning_rate": 4.804149699437658e-05, "loss": 5.8104, "step": 404 }, { "epoch": 0.04, "grad_norm": 1.012634038925171, "learning_rate": 4.802210587550902e-05, "loss": 5.8702, "step": 408 }, { "epoch": 0.04, "grad_norm": 0.8785901665687561, "learning_rate": 4.800271475664146e-05, "loss": 5.8498, "step": 412 }, { "epoch": 0.04, "grad_norm": 0.8808034062385559, "learning_rate": 4.79833236377739e-05, "loss": 5.7474, "step": 416 }, { "epoch": 0.04, "grad_norm": 1.1132124662399292, "learning_rate": 4.796393251890634e-05, "loss": 5.7462, "step": 420 }, { "epoch": 0.04, "grad_norm": 0.9468419551849365, "learning_rate": 4.794454140003878e-05, "loss": 5.7767, "step": 424 }, { "epoch": 0.04, "grad_norm": 0.9365801215171814, "learning_rate": 4.792515028117123e-05, "loss": 5.8072, "step": 428 }, { "epoch": 0.04, "grad_norm": 1.0467591285705566, "learning_rate": 4.790575916230366e-05, "loss": 5.7773, "step": 432 }, { "epoch": 0.04, "grad_norm": 1.1843376159667969, "learning_rate": 4.788636804343611e-05, "loss": 5.9357, "step": 436 }, { "epoch": 0.04, "grad_norm": 1.139872431755066, "learning_rate": 4.786697692456855e-05, "loss": 5.7744, "step": 440 }, { "epoch": 0.04, "grad_norm": 0.9096530079841614, "learning_rate": 4.784758580570099e-05, "loss": 5.9122, "step": 444 }, { "epoch": 0.04, "grad_norm": 1.0682241916656494, "learning_rate": 4.782819468683343e-05, "loss": 5.8587, "step": 448 }, { "epoch": 0.04, "grad_norm": 0.99188232421875, "learning_rate": 4.780880356796588e-05, "loss": 5.8096, "step": 452 }, { "epoch": 0.04, "grad_norm": 1.0947283506393433, "learning_rate": 4.778941244909831e-05, "loss": 5.6552, "step": 456 }, { "epoch": 0.04, "grad_norm": 1.0802547931671143, "learning_rate": 4.777002133023076e-05, "loss": 5.9245, "step": 460 }, { "epoch": 0.04, "grad_norm": 1.164602518081665, "learning_rate": 4.775063021136319e-05, "loss": 5.8362, "step": 464 }, { "epoch": 0.05, "grad_norm": 1.1340216398239136, "learning_rate": 4.773123909249564e-05, "loss": 5.8477, "step": 468 }, { "epoch": 0.05, "grad_norm": 1.0476304292678833, "learning_rate": 4.771184797362808e-05, "loss": 5.8336, "step": 472 }, { "epoch": 0.05, "grad_norm": 0.9962639808654785, "learning_rate": 4.769245685476052e-05, "loss": 5.7876, "step": 476 }, { "epoch": 0.05, "grad_norm": 0.9998891949653625, "learning_rate": 4.767306573589296e-05, "loss": 5.6897, "step": 480 }, { "epoch": 0.05, "grad_norm": 1.054436445236206, "learning_rate": 4.765367461702541e-05, "loss": 5.6461, "step": 484 }, { "epoch": 0.05, "grad_norm": 0.9963980913162231, "learning_rate": 4.763428349815784e-05, "loss": 5.7391, "step": 488 }, { "epoch": 0.05, "grad_norm": 1.077802062034607, "learning_rate": 4.761489237929029e-05, "loss": 5.772, "step": 492 }, { "epoch": 0.05, "grad_norm": 1.3317667245864868, "learning_rate": 4.759550126042273e-05, "loss": 5.7657, "step": 496 }, { "epoch": 0.05, "grad_norm": 1.028090000152588, "learning_rate": 4.757611014155517e-05, "loss": 5.7417, "step": 500 }, { "epoch": 0.05, "grad_norm": 0.9719061255455017, "learning_rate": 4.755671902268761e-05, "loss": 5.8042, "step": 504 }, { "epoch": 0.05, "grad_norm": 1.067749261856079, "learning_rate": 4.753732790382005e-05, "loss": 5.7923, "step": 508 }, { "epoch": 0.05, "grad_norm": 0.9830943942070007, "learning_rate": 4.751793678495249e-05, "loss": 5.7911, "step": 512 }, { "epoch": 0.05, "grad_norm": 1.0226807594299316, "learning_rate": 4.749854566608494e-05, "loss": 5.7696, "step": 516 }, { "epoch": 0.05, "grad_norm": 1.3311954736709595, "learning_rate": 4.7479154547217374e-05, "loss": 5.8689, "step": 520 }, { "epoch": 0.05, "grad_norm": 0.9809809923171997, "learning_rate": 4.745976342834982e-05, "loss": 5.8023, "step": 524 }, { "epoch": 0.05, "grad_norm": 1.1488884687423706, "learning_rate": 4.744037230948226e-05, "loss": 5.7116, "step": 528 }, { "epoch": 0.05, "grad_norm": 1.0571842193603516, "learning_rate": 4.74209811906147e-05, "loss": 5.7105, "step": 532 }, { "epoch": 0.05, "grad_norm": 0.9185091257095337, "learning_rate": 4.740159007174714e-05, "loss": 5.8675, "step": 536 }, { "epoch": 0.05, "grad_norm": 0.9164991974830627, "learning_rate": 4.738219895287958e-05, "loss": 5.7231, "step": 540 }, { "epoch": 0.05, "grad_norm": 1.0493993759155273, "learning_rate": 4.7362807834012024e-05, "loss": 5.8437, "step": 544 }, { "epoch": 0.05, "grad_norm": 0.8653802275657654, "learning_rate": 4.7343416715144464e-05, "loss": 5.652, "step": 548 }, { "epoch": 0.05, "grad_norm": 0.975098192691803, "learning_rate": 4.7324025596276905e-05, "loss": 5.7353, "step": 552 }, { "epoch": 0.05, "grad_norm": 0.9267313480377197, "learning_rate": 4.7304634477409345e-05, "loss": 5.7886, "step": 556 }, { "epoch": 0.05, "grad_norm": 0.9583150148391724, "learning_rate": 4.728524335854179e-05, "loss": 5.6864, "step": 560 }, { "epoch": 0.05, "grad_norm": 0.9664693474769592, "learning_rate": 4.7265852239674227e-05, "loss": 5.8314, "step": 564 }, { "epoch": 0.06, "grad_norm": 0.8994282484054565, "learning_rate": 4.7246461120806674e-05, "loss": 5.7865, "step": 568 }, { "epoch": 0.06, "grad_norm": 1.0095504522323608, "learning_rate": 4.7227070001939114e-05, "loss": 5.7627, "step": 572 }, { "epoch": 0.06, "grad_norm": 1.0120139122009277, "learning_rate": 4.7207678883071555e-05, "loss": 5.6769, "step": 576 }, { "epoch": 0.06, "grad_norm": 0.9483413100242615, "learning_rate": 4.7188287764203995e-05, "loss": 5.7615, "step": 580 }, { "epoch": 0.06, "grad_norm": 0.9208195209503174, "learning_rate": 4.7168896645336436e-05, "loss": 5.7178, "step": 584 }, { "epoch": 0.06, "grad_norm": 1.0568493604660034, "learning_rate": 4.7149505526468876e-05, "loss": 5.6305, "step": 588 }, { "epoch": 0.06, "grad_norm": 0.9036898016929626, "learning_rate": 4.7130114407601324e-05, "loss": 5.7088, "step": 592 }, { "epoch": 0.06, "grad_norm": 1.0219190120697021, "learning_rate": 4.711072328873376e-05, "loss": 5.7359, "step": 596 }, { "epoch": 0.06, "grad_norm": 1.1148089170455933, "learning_rate": 4.7091332169866205e-05, "loss": 5.7653, "step": 600 }, { "epoch": 0.06, "grad_norm": 1.0402499437332153, "learning_rate": 4.7071941050998645e-05, "loss": 5.7291, "step": 604 }, { "epoch": 0.06, "grad_norm": 1.2846848964691162, "learning_rate": 4.7052549932131086e-05, "loss": 5.8432, "step": 608 }, { "epoch": 0.06, "grad_norm": 1.0562716722488403, "learning_rate": 4.7033158813263526e-05, "loss": 5.7242, "step": 612 }, { "epoch": 0.06, "grad_norm": 0.9270595908164978, "learning_rate": 4.7013767694395974e-05, "loss": 5.735, "step": 616 }, { "epoch": 0.06, "grad_norm": 0.9591754674911499, "learning_rate": 4.699437657552841e-05, "loss": 5.6973, "step": 620 }, { "epoch": 0.06, "grad_norm": 1.020099401473999, "learning_rate": 4.6974985456660855e-05, "loss": 5.7211, "step": 624 }, { "epoch": 0.06, "grad_norm": 0.9728394150733948, "learning_rate": 4.695559433779329e-05, "loss": 5.7558, "step": 628 }, { "epoch": 0.06, "grad_norm": 0.9725939631462097, "learning_rate": 4.6936203218925736e-05, "loss": 5.8508, "step": 632 }, { "epoch": 0.06, "grad_norm": 1.0429859161376953, "learning_rate": 4.6916812100058176e-05, "loss": 5.7557, "step": 636 }, { "epoch": 0.06, "grad_norm": 1.1550756692886353, "learning_rate": 4.689742098119062e-05, "loss": 5.7864, "step": 640 }, { "epoch": 0.06, "grad_norm": 1.277512788772583, "learning_rate": 4.687802986232306e-05, "loss": 5.731, "step": 644 }, { "epoch": 0.06, "grad_norm": 1.170222282409668, "learning_rate": 4.6858638743455505e-05, "loss": 5.6716, "step": 648 }, { "epoch": 0.06, "grad_norm": 1.0293292999267578, "learning_rate": 4.683924762458794e-05, "loss": 5.7762, "step": 652 }, { "epoch": 0.06, "grad_norm": 1.0924861431121826, "learning_rate": 4.6819856505720386e-05, "loss": 5.7223, "step": 656 }, { "epoch": 0.06, "grad_norm": 0.949212908744812, "learning_rate": 4.680046538685282e-05, "loss": 5.7425, "step": 660 }, { "epoch": 0.06, "grad_norm": 1.0186933279037476, "learning_rate": 4.678107426798527e-05, "loss": 5.8361, "step": 664 }, { "epoch": 0.06, "grad_norm": 1.0421589612960815, "learning_rate": 4.676168314911771e-05, "loss": 5.7093, "step": 668 }, { "epoch": 0.07, "grad_norm": 1.061271071434021, "learning_rate": 4.674229203025015e-05, "loss": 5.6382, "step": 672 }, { "epoch": 0.07, "grad_norm": 1.0228848457336426, "learning_rate": 4.672290091138259e-05, "loss": 5.6845, "step": 676 }, { "epoch": 0.07, "grad_norm": 1.0488477945327759, "learning_rate": 4.670350979251503e-05, "loss": 5.6403, "step": 680 }, { "epoch": 0.07, "grad_norm": 0.9394490718841553, "learning_rate": 4.668411867364747e-05, "loss": 5.6733, "step": 684 }, { "epoch": 0.07, "grad_norm": 1.140890121459961, "learning_rate": 4.666472755477991e-05, "loss": 5.7066, "step": 688 }, { "epoch": 0.07, "grad_norm": 0.9432203769683838, "learning_rate": 4.664533643591236e-05, "loss": 5.7846, "step": 692 }, { "epoch": 0.07, "grad_norm": 1.0256586074829102, "learning_rate": 4.662594531704479e-05, "loss": 5.766, "step": 696 }, { "epoch": 0.07, "grad_norm": 1.0383509397506714, "learning_rate": 4.660655419817724e-05, "loss": 5.8138, "step": 700 }, { "epoch": 0.07, "grad_norm": 0.9286032319068909, "learning_rate": 4.658716307930968e-05, "loss": 5.7233, "step": 704 }, { "epoch": 0.07, "grad_norm": 0.9339187145233154, "learning_rate": 4.656777196044212e-05, "loss": 5.803, "step": 708 }, { "epoch": 0.07, "grad_norm": 0.9535828828811646, "learning_rate": 4.654838084157456e-05, "loss": 5.8002, "step": 712 }, { "epoch": 0.07, "grad_norm": 1.2881035804748535, "learning_rate": 4.6528989722707e-05, "loss": 5.5501, "step": 716 }, { "epoch": 0.07, "grad_norm": 0.9991912841796875, "learning_rate": 4.650959860383944e-05, "loss": 5.7732, "step": 720 }, { "epoch": 0.07, "grad_norm": 1.071502923965454, "learning_rate": 4.649020748497189e-05, "loss": 5.6494, "step": 724 }, { "epoch": 0.07, "grad_norm": 0.9422402381896973, "learning_rate": 4.647081636610432e-05, "loss": 5.7539, "step": 728 }, { "epoch": 0.07, "grad_norm": 1.2175371646881104, "learning_rate": 4.645142524723677e-05, "loss": 5.6747, "step": 732 }, { "epoch": 0.07, "grad_norm": 1.1459147930145264, "learning_rate": 4.643203412836921e-05, "loss": 5.6895, "step": 736 }, { "epoch": 0.07, "grad_norm": 0.9659698009490967, "learning_rate": 4.641264300950165e-05, "loss": 5.6517, "step": 740 }, { "epoch": 0.07, "grad_norm": 1.071323275566101, "learning_rate": 4.639325189063409e-05, "loss": 5.653, "step": 744 }, { "epoch": 0.07, "grad_norm": 1.1298364400863647, "learning_rate": 4.637386077176653e-05, "loss": 5.6975, "step": 748 }, { "epoch": 0.07, "grad_norm": 0.9889796376228333, "learning_rate": 4.635446965289897e-05, "loss": 5.8007, "step": 752 }, { "epoch": 0.07, "grad_norm": 0.9490829110145569, "learning_rate": 4.633507853403142e-05, "loss": 5.6969, "step": 756 }, { "epoch": 0.07, "grad_norm": 1.0920511484146118, "learning_rate": 4.6315687415163853e-05, "loss": 5.74, "step": 760 }, { "epoch": 0.07, "grad_norm": 1.039368748664856, "learning_rate": 4.62962962962963e-05, "loss": 5.7002, "step": 764 }, { "epoch": 0.07, "grad_norm": 0.9749051928520203, "learning_rate": 4.627690517742874e-05, "loss": 5.6879, "step": 768 }, { "epoch": 0.07, "grad_norm": 0.9410939812660217, "learning_rate": 4.625751405856118e-05, "loss": 5.7138, "step": 772 }, { "epoch": 0.08, "grad_norm": 0.9284390807151794, "learning_rate": 4.623812293969362e-05, "loss": 5.6772, "step": 776 }, { "epoch": 0.08, "grad_norm": 1.220140814781189, "learning_rate": 4.621873182082606e-05, "loss": 5.6187, "step": 780 }, { "epoch": 0.08, "grad_norm": 0.9217903017997742, "learning_rate": 4.6199340701958503e-05, "loss": 5.7051, "step": 784 }, { "epoch": 0.08, "grad_norm": 0.9127422571182251, "learning_rate": 4.617994958309095e-05, "loss": 5.6821, "step": 788 }, { "epoch": 0.08, "grad_norm": 0.9815553426742554, "learning_rate": 4.6160558464223385e-05, "loss": 5.7365, "step": 792 }, { "epoch": 0.08, "grad_norm": 0.8972638249397278, "learning_rate": 4.614116734535583e-05, "loss": 5.6835, "step": 796 }, { "epoch": 0.08, "grad_norm": 0.9297725558280945, "learning_rate": 4.612177622648827e-05, "loss": 5.6304, "step": 800 }, { "epoch": 0.08, "grad_norm": 0.9928802847862244, "learning_rate": 4.610238510762071e-05, "loss": 5.8121, "step": 804 }, { "epoch": 0.08, "grad_norm": 0.9002383351325989, "learning_rate": 4.6082993988753153e-05, "loss": 5.6457, "step": 808 }, { "epoch": 0.08, "grad_norm": 0.8902143239974976, "learning_rate": 4.6063602869885594e-05, "loss": 5.7276, "step": 812 }, { "epoch": 0.08, "grad_norm": 1.069921612739563, "learning_rate": 4.6044211751018035e-05, "loss": 5.6827, "step": 816 }, { "epoch": 0.08, "grad_norm": 0.9564573764801025, "learning_rate": 4.6024820632150475e-05, "loss": 5.694, "step": 820 }, { "epoch": 0.08, "grad_norm": 0.9404289126396179, "learning_rate": 4.6005429513282916e-05, "loss": 5.585, "step": 824 }, { "epoch": 0.08, "grad_norm": 0.9606718420982361, "learning_rate": 4.5986038394415356e-05, "loss": 5.7258, "step": 828 }, { "epoch": 0.08, "grad_norm": 0.893195629119873, "learning_rate": 4.5966647275547803e-05, "loss": 5.6816, "step": 832 }, { "epoch": 0.08, "grad_norm": 0.9530772566795349, "learning_rate": 4.5947256156680244e-05, "loss": 5.7424, "step": 836 }, { "epoch": 0.08, "grad_norm": 1.1925373077392578, "learning_rate": 4.5927865037812685e-05, "loss": 5.6958, "step": 840 }, { "epoch": 0.08, "grad_norm": 1.0818183422088623, "learning_rate": 4.5908473918945125e-05, "loss": 5.6273, "step": 844 }, { "epoch": 0.08, "grad_norm": 1.1746087074279785, "learning_rate": 4.5889082800077566e-05, "loss": 5.6699, "step": 848 }, { "epoch": 0.08, "grad_norm": 1.069509744644165, "learning_rate": 4.5869691681210006e-05, "loss": 5.677, "step": 852 }, { "epoch": 0.08, "grad_norm": 1.0898882150650024, "learning_rate": 4.5850300562342453e-05, "loss": 5.723, "step": 856 }, { "epoch": 0.08, "grad_norm": 0.9786805510520935, "learning_rate": 4.583090944347489e-05, "loss": 5.7109, "step": 860 }, { "epoch": 0.08, "grad_norm": 1.0218449831008911, "learning_rate": 4.5811518324607335e-05, "loss": 5.7927, "step": 864 }, { "epoch": 0.08, "grad_norm": 1.0597094297409058, "learning_rate": 4.579212720573977e-05, "loss": 5.6456, "step": 868 }, { "epoch": 0.08, "grad_norm": 1.015222430229187, "learning_rate": 4.5772736086872216e-05, "loss": 5.7023, "step": 872 }, { "epoch": 0.08, "grad_norm": 1.0093826055526733, "learning_rate": 4.5753344968004656e-05, "loss": 5.673, "step": 876 }, { "epoch": 0.09, "grad_norm": 1.1618988513946533, "learning_rate": 4.57339538491371e-05, "loss": 5.5492, "step": 880 }, { "epoch": 0.09, "grad_norm": 0.9958293437957764, "learning_rate": 4.571456273026954e-05, "loss": 5.5945, "step": 884 }, { "epoch": 0.09, "grad_norm": 0.9605300426483154, "learning_rate": 4.5695171611401985e-05, "loss": 5.6439, "step": 888 }, { "epoch": 0.09, "grad_norm": 1.0881812572479248, "learning_rate": 4.567578049253442e-05, "loss": 5.7346, "step": 892 }, { "epoch": 0.09, "grad_norm": 1.1047258377075195, "learning_rate": 4.5656389373666866e-05, "loss": 5.6754, "step": 896 }, { "epoch": 0.09, "grad_norm": 1.0597559213638306, "learning_rate": 4.56369982547993e-05, "loss": 5.6655, "step": 900 }, { "epoch": 0.09, "grad_norm": 1.0080434083938599, "learning_rate": 4.561760713593175e-05, "loss": 5.637, "step": 904 }, { "epoch": 0.09, "grad_norm": 1.1175477504730225, "learning_rate": 4.559821601706419e-05, "loss": 5.6114, "step": 908 }, { "epoch": 0.09, "grad_norm": 1.309441089630127, "learning_rate": 4.557882489819663e-05, "loss": 5.64, "step": 912 }, { "epoch": 0.09, "grad_norm": 1.0713026523590088, "learning_rate": 4.555943377932907e-05, "loss": 5.6433, "step": 916 }, { "epoch": 0.09, "grad_norm": 1.1929994821548462, "learning_rate": 4.5540042660461516e-05, "loss": 5.6503, "step": 920 }, { "epoch": 0.09, "grad_norm": 1.0933693647384644, "learning_rate": 4.552065154159395e-05, "loss": 5.68, "step": 924 }, { "epoch": 0.09, "grad_norm": 0.9823185801506042, "learning_rate": 4.55012604227264e-05, "loss": 5.6941, "step": 928 }, { "epoch": 0.09, "grad_norm": 0.9883749485015869, "learning_rate": 4.548186930385884e-05, "loss": 5.5973, "step": 932 }, { "epoch": 0.09, "grad_norm": 0.9289098381996155, "learning_rate": 4.546247818499128e-05, "loss": 5.7444, "step": 936 }, { "epoch": 0.09, "grad_norm": 1.0262410640716553, "learning_rate": 4.544308706612372e-05, "loss": 5.6235, "step": 940 }, { "epoch": 0.09, "grad_norm": 1.0129894018173218, "learning_rate": 4.542369594725616e-05, "loss": 5.6469, "step": 944 }, { "epoch": 0.09, "grad_norm": 1.0500363111495972, "learning_rate": 4.54043048283886e-05, "loss": 5.6281, "step": 948 }, { "epoch": 0.09, "grad_norm": 0.9239895343780518, "learning_rate": 4.538491370952104e-05, "loss": 5.6151, "step": 952 }, { "epoch": 0.09, "grad_norm": 0.9726962447166443, "learning_rate": 4.536552259065348e-05, "loss": 5.6146, "step": 956 }, { "epoch": 0.09, "grad_norm": 0.9822578430175781, "learning_rate": 4.534613147178592e-05, "loss": 5.5852, "step": 960 }, { "epoch": 0.09, "grad_norm": 0.9551820755004883, "learning_rate": 4.532674035291837e-05, "loss": 5.7069, "step": 964 }, { "epoch": 0.09, "grad_norm": 0.9644606709480286, "learning_rate": 4.530734923405081e-05, "loss": 5.6178, "step": 968 }, { "epoch": 0.09, "grad_norm": 1.1014901399612427, "learning_rate": 4.528795811518325e-05, "loss": 5.6131, "step": 972 }, { "epoch": 0.09, "grad_norm": 1.0390726327896118, "learning_rate": 4.526856699631569e-05, "loss": 5.5601, "step": 976 }, { "epoch": 0.1, "grad_norm": 0.9854323863983154, "learning_rate": 4.524917587744813e-05, "loss": 5.6175, "step": 980 }, { "epoch": 0.1, "grad_norm": 0.9083386659622192, "learning_rate": 4.522978475858057e-05, "loss": 5.6013, "step": 984 }, { "epoch": 0.1, "grad_norm": 1.246685266494751, "learning_rate": 4.521039363971301e-05, "loss": 5.7068, "step": 988 }, { "epoch": 0.1, "grad_norm": 1.0527753829956055, "learning_rate": 4.519100252084545e-05, "loss": 5.6358, "step": 992 }, { "epoch": 0.1, "grad_norm": 1.0731735229492188, "learning_rate": 4.51716114019779e-05, "loss": 5.6828, "step": 996 }, { "epoch": 0.1, "grad_norm": 1.0552845001220703, "learning_rate": 4.515222028311033e-05, "loss": 5.6828, "step": 1000 }, { "epoch": 0.1, "grad_norm": 0.990972101688385, "learning_rate": 4.513282916424278e-05, "loss": 5.7003, "step": 1004 }, { "epoch": 0.1, "grad_norm": 1.1035810708999634, "learning_rate": 4.511343804537522e-05, "loss": 5.7133, "step": 1008 }, { "epoch": 0.1, "grad_norm": 1.2226297855377197, "learning_rate": 4.509404692650766e-05, "loss": 5.6895, "step": 1012 }, { "epoch": 0.1, "grad_norm": 1.1234639883041382, "learning_rate": 4.50746558076401e-05, "loss": 5.7874, "step": 1016 }, { "epoch": 0.1, "grad_norm": 1.1130845546722412, "learning_rate": 4.505526468877254e-05, "loss": 5.7322, "step": 1020 }, { "epoch": 0.1, "grad_norm": 0.8997195363044739, "learning_rate": 4.503587356990498e-05, "loss": 5.5914, "step": 1024 }, { "epoch": 0.1, "grad_norm": 0.9451141953468323, "learning_rate": 4.501648245103743e-05, "loss": 5.68, "step": 1028 }, { "epoch": 0.1, "grad_norm": 1.0043590068817139, "learning_rate": 4.4997091332169864e-05, "loss": 5.7082, "step": 1032 }, { "epoch": 0.1, "grad_norm": 0.8845294713973999, "learning_rate": 4.497770021330231e-05, "loss": 5.6198, "step": 1036 }, { "epoch": 0.1, "grad_norm": 1.0413445234298706, "learning_rate": 4.495830909443475e-05, "loss": 5.6253, "step": 1040 }, { "epoch": 0.1, "grad_norm": 0.9856407642364502, "learning_rate": 4.493891797556719e-05, "loss": 5.7266, "step": 1044 }, { "epoch": 0.1, "grad_norm": 1.0194454193115234, "learning_rate": 4.491952685669963e-05, "loss": 5.6567, "step": 1048 }, { "epoch": 0.1, "grad_norm": 0.9935641884803772, "learning_rate": 4.490013573783208e-05, "loss": 5.639, "step": 1052 }, { "epoch": 0.1, "grad_norm": 1.0133986473083496, "learning_rate": 4.4880744618964514e-05, "loss": 5.6575, "step": 1056 }, { "epoch": 0.1, "grad_norm": 0.9149471521377563, "learning_rate": 4.486135350009696e-05, "loss": 5.7549, "step": 1060 }, { "epoch": 0.1, "grad_norm": 1.1448979377746582, "learning_rate": 4.4841962381229395e-05, "loss": 5.6149, "step": 1064 }, { "epoch": 0.1, "grad_norm": 0.9014776349067688, "learning_rate": 4.482257126236184e-05, "loss": 5.6667, "step": 1068 }, { "epoch": 0.1, "grad_norm": 0.9877203702926636, "learning_rate": 4.480318014349428e-05, "loss": 5.6292, "step": 1072 }, { "epoch": 0.1, "grad_norm": 1.15706467628479, "learning_rate": 4.4783789024626724e-05, "loss": 5.7073, "step": 1076 }, { "epoch": 0.1, "grad_norm": 1.0951610803604126, "learning_rate": 4.4764397905759164e-05, "loss": 5.6028, "step": 1080 }, { "epoch": 0.11, "grad_norm": 0.9684280157089233, "learning_rate": 4.4745006786891605e-05, "loss": 5.6946, "step": 1084 }, { "epoch": 0.11, "grad_norm": 0.9964583516120911, "learning_rate": 4.4725615668024045e-05, "loss": 5.6245, "step": 1088 }, { "epoch": 0.11, "grad_norm": 0.9633233547210693, "learning_rate": 4.4706224549156486e-05, "loss": 5.6683, "step": 1092 }, { "epoch": 0.11, "grad_norm": 1.0245460271835327, "learning_rate": 4.468683343028893e-05, "loss": 5.644, "step": 1096 }, { "epoch": 0.11, "grad_norm": 1.0242762565612793, "learning_rate": 4.4667442311421374e-05, "loss": 5.6365, "step": 1100 }, { "epoch": 0.11, "grad_norm": 0.982917070388794, "learning_rate": 4.4648051192553814e-05, "loss": 5.6458, "step": 1104 }, { "epoch": 0.11, "grad_norm": 0.949592113494873, "learning_rate": 4.4628660073686255e-05, "loss": 5.6253, "step": 1108 }, { "epoch": 0.11, "grad_norm": 1.0238159894943237, "learning_rate": 4.4609268954818695e-05, "loss": 5.6323, "step": 1112 }, { "epoch": 0.11, "grad_norm": 0.9112494587898254, "learning_rate": 4.4589877835951136e-05, "loss": 5.4746, "step": 1116 }, { "epoch": 0.11, "grad_norm": 0.9587331414222717, "learning_rate": 4.4570486717083576e-05, "loss": 5.6729, "step": 1120 }, { "epoch": 0.11, "grad_norm": 0.8947247862815857, "learning_rate": 4.455109559821602e-05, "loss": 5.7247, "step": 1124 }, { "epoch": 0.11, "grad_norm": 0.9927013516426086, "learning_rate": 4.4531704479348464e-05, "loss": 5.6287, "step": 1128 }, { "epoch": 0.11, "grad_norm": 1.0964155197143555, "learning_rate": 4.45123133604809e-05, "loss": 5.611, "step": 1132 }, { "epoch": 0.11, "grad_norm": 0.9409870505332947, "learning_rate": 4.4492922241613345e-05, "loss": 5.6107, "step": 1136 }, { "epoch": 0.11, "grad_norm": 1.1042007207870483, "learning_rate": 4.447353112274578e-05, "loss": 5.7283, "step": 1140 }, { "epoch": 0.11, "grad_norm": 1.0457091331481934, "learning_rate": 4.4454140003878226e-05, "loss": 5.6, "step": 1144 }, { "epoch": 0.11, "grad_norm": 1.037097692489624, "learning_rate": 4.443474888501067e-05, "loss": 5.7081, "step": 1148 }, { "epoch": 0.11, "grad_norm": 0.8815637230873108, "learning_rate": 4.441535776614311e-05, "loss": 5.6222, "step": 1152 }, { "epoch": 0.11, "grad_norm": 0.9820300936698914, "learning_rate": 4.439596664727555e-05, "loss": 5.639, "step": 1156 }, { "epoch": 0.11, "grad_norm": 1.1689587831497192, "learning_rate": 4.4376575528407995e-05, "loss": 5.6293, "step": 1160 }, { "epoch": 0.11, "grad_norm": 0.9346803426742554, "learning_rate": 4.435718440954043e-05, "loss": 5.5408, "step": 1164 }, { "epoch": 0.11, "grad_norm": 0.8906845450401306, "learning_rate": 4.4337793290672876e-05, "loss": 5.5729, "step": 1168 }, { "epoch": 0.11, "grad_norm": 1.0249162912368774, "learning_rate": 4.431840217180532e-05, "loss": 5.6928, "step": 1172 }, { "epoch": 0.11, "grad_norm": 1.0330933332443237, "learning_rate": 4.429901105293776e-05, "loss": 5.6583, "step": 1176 }, { "epoch": 0.11, "grad_norm": 1.2164433002471924, "learning_rate": 4.42796199340702e-05, "loss": 5.6906, "step": 1180 }, { "epoch": 0.11, "grad_norm": 1.061458706855774, "learning_rate": 4.426022881520264e-05, "loss": 5.6325, "step": 1184 }, { "epoch": 0.12, "grad_norm": 1.0236989259719849, "learning_rate": 4.424083769633508e-05, "loss": 5.6806, "step": 1188 }, { "epoch": 0.12, "grad_norm": 1.125806212425232, "learning_rate": 4.4221446577467526e-05, "loss": 5.6773, "step": 1192 }, { "epoch": 0.12, "grad_norm": 1.0632579326629639, "learning_rate": 4.420205545859996e-05, "loss": 5.7194, "step": 1196 }, { "epoch": 0.12, "grad_norm": 1.0620874166488647, "learning_rate": 4.418266433973241e-05, "loss": 5.5184, "step": 1200 }, { "epoch": 0.12, "grad_norm": 1.0390667915344238, "learning_rate": 4.416327322086485e-05, "loss": 5.6573, "step": 1204 }, { "epoch": 0.12, "grad_norm": 0.9894878268241882, "learning_rate": 4.414388210199729e-05, "loss": 5.765, "step": 1208 }, { "epoch": 0.12, "grad_norm": 0.9994860887527466, "learning_rate": 4.412449098312973e-05, "loss": 5.661, "step": 1212 }, { "epoch": 0.12, "grad_norm": 0.9990851283073425, "learning_rate": 4.410509986426217e-05, "loss": 5.6168, "step": 1216 }, { "epoch": 0.12, "grad_norm": 1.0815703868865967, "learning_rate": 4.408570874539461e-05, "loss": 5.6433, "step": 1220 }, { "epoch": 0.12, "grad_norm": 0.9334216117858887, "learning_rate": 4.406631762652705e-05, "loss": 5.5035, "step": 1224 }, { "epoch": 0.12, "grad_norm": 0.9055896401405334, "learning_rate": 4.404692650765949e-05, "loss": 5.632, "step": 1228 }, { "epoch": 0.12, "grad_norm": 1.0294831991195679, "learning_rate": 4.402753538879194e-05, "loss": 5.5739, "step": 1232 }, { "epoch": 0.12, "grad_norm": 1.100758671760559, "learning_rate": 4.400814426992438e-05, "loss": 5.6159, "step": 1236 }, { "epoch": 0.12, "grad_norm": 0.9790968894958496, "learning_rate": 4.398875315105682e-05, "loss": 5.6637, "step": 1240 }, { "epoch": 0.12, "grad_norm": 0.9665990471839905, "learning_rate": 4.396936203218926e-05, "loss": 5.6361, "step": 1244 }, { "epoch": 0.12, "grad_norm": 1.0019501447677612, "learning_rate": 4.39499709133217e-05, "loss": 5.5965, "step": 1248 }, { "epoch": 0.12, "grad_norm": 0.9785341620445251, "learning_rate": 4.393057979445414e-05, "loss": 5.6697, "step": 1252 }, { "epoch": 0.12, "grad_norm": 1.0229973793029785, "learning_rate": 4.391118867558658e-05, "loss": 5.6391, "step": 1256 }, { "epoch": 0.12, "grad_norm": 0.9716441035270691, "learning_rate": 4.389179755671902e-05, "loss": 5.6513, "step": 1260 }, { "epoch": 0.12, "grad_norm": 0.8981690406799316, "learning_rate": 4.387240643785146e-05, "loss": 5.6567, "step": 1264 }, { "epoch": 0.12, "grad_norm": 0.9904807806015015, "learning_rate": 4.385301531898391e-05, "loss": 5.6761, "step": 1268 }, { "epoch": 0.12, "grad_norm": 0.9603332877159119, "learning_rate": 4.3833624200116344e-05, "loss": 5.7039, "step": 1272 }, { "epoch": 0.12, "grad_norm": 1.044404149055481, "learning_rate": 4.381423308124879e-05, "loss": 5.5329, "step": 1276 }, { "epoch": 0.12, "grad_norm": 1.1209489107131958, "learning_rate": 4.379484196238123e-05, "loss": 5.6461, "step": 1280 }, { "epoch": 0.12, "grad_norm": 1.1430855989456177, "learning_rate": 4.377545084351367e-05, "loss": 5.5406, "step": 1284 }, { "epoch": 0.12, "grad_norm": 1.004539966583252, "learning_rate": 4.375605972464611e-05, "loss": 5.5827, "step": 1288 }, { "epoch": 0.13, "grad_norm": 1.0721603631973267, "learning_rate": 4.373666860577856e-05, "loss": 5.5553, "step": 1292 }, { "epoch": 0.13, "grad_norm": 0.9446938037872314, "learning_rate": 4.3717277486910994e-05, "loss": 5.6481, "step": 1296 }, { "epoch": 0.13, "grad_norm": 0.8731529116630554, "learning_rate": 4.369788636804344e-05, "loss": 5.6374, "step": 1300 }, { "epoch": 0.13, "grad_norm": 1.0640937089920044, "learning_rate": 4.3678495249175875e-05, "loss": 5.5943, "step": 1304 }, { "epoch": 0.13, "grad_norm": 1.0243881940841675, "learning_rate": 4.365910413030832e-05, "loss": 5.5065, "step": 1308 }, { "epoch": 0.13, "grad_norm": 1.1701568365097046, "learning_rate": 4.363971301144076e-05, "loss": 5.6329, "step": 1312 }, { "epoch": 0.13, "grad_norm": 0.9212052822113037, "learning_rate": 4.36203218925732e-05, "loss": 5.7735, "step": 1316 }, { "epoch": 0.13, "grad_norm": 0.9392960667610168, "learning_rate": 4.3600930773705644e-05, "loss": 5.5992, "step": 1320 }, { "epoch": 0.13, "grad_norm": 1.0432089567184448, "learning_rate": 4.358153965483809e-05, "loss": 5.5948, "step": 1324 }, { "epoch": 0.13, "grad_norm": 0.9817702770233154, "learning_rate": 4.3562148535970525e-05, "loss": 5.6496, "step": 1328 }, { "epoch": 0.13, "grad_norm": 1.0641746520996094, "learning_rate": 4.354275741710297e-05, "loss": 5.6238, "step": 1332 }, { "epoch": 0.13, "grad_norm": 1.0068002939224243, "learning_rate": 4.352336629823541e-05, "loss": 5.6316, "step": 1336 }, { "epoch": 0.13, "grad_norm": 1.031501054763794, "learning_rate": 4.350397517936785e-05, "loss": 5.6462, "step": 1340 }, { "epoch": 0.13, "grad_norm": 0.980787992477417, "learning_rate": 4.3484584060500294e-05, "loss": 5.6535, "step": 1344 }, { "epoch": 0.13, "grad_norm": 0.9462695717811584, "learning_rate": 4.3465192941632734e-05, "loss": 5.6083, "step": 1348 }, { "epoch": 0.13, "grad_norm": 0.8907430768013, "learning_rate": 4.3445801822765175e-05, "loss": 5.615, "step": 1352 }, { "epoch": 0.13, "grad_norm": 1.2333685159683228, "learning_rate": 4.3426410703897615e-05, "loss": 5.6374, "step": 1356 }, { "epoch": 0.13, "grad_norm": 1.165974497795105, "learning_rate": 4.3407019585030056e-05, "loss": 5.5617, "step": 1360 }, { "epoch": 0.13, "grad_norm": 1.1213206052780151, "learning_rate": 4.33876284661625e-05, "loss": 5.6201, "step": 1364 }, { "epoch": 0.13, "grad_norm": 1.0781583786010742, "learning_rate": 4.3368237347294944e-05, "loss": 5.639, "step": 1368 }, { "epoch": 0.13, "grad_norm": 1.0337659120559692, "learning_rate": 4.3348846228427384e-05, "loss": 5.736, "step": 1372 }, { "epoch": 0.13, "grad_norm": 0.9833229184150696, "learning_rate": 4.3329455109559825e-05, "loss": 5.6153, "step": 1376 }, { "epoch": 0.13, "grad_norm": 1.0340036153793335, "learning_rate": 4.3310063990692265e-05, "loss": 5.6146, "step": 1380 }, { "epoch": 0.13, "grad_norm": 0.9522514939308167, "learning_rate": 4.3290672871824706e-05, "loss": 5.646, "step": 1384 }, { "epoch": 0.13, "grad_norm": 1.040779948234558, "learning_rate": 4.3271281752957146e-05, "loss": 5.6411, "step": 1388 }, { "epoch": 0.13, "grad_norm": 0.9653657674789429, "learning_rate": 4.325189063408959e-05, "loss": 5.634, "step": 1392 }, { "epoch": 0.14, "grad_norm": 0.9610472321510315, "learning_rate": 4.323249951522203e-05, "loss": 5.5112, "step": 1396 }, { "epoch": 0.14, "grad_norm": 1.0222139358520508, "learning_rate": 4.3213108396354475e-05, "loss": 5.4966, "step": 1400 }, { "epoch": 0.14, "grad_norm": 1.0731512308120728, "learning_rate": 4.319371727748691e-05, "loss": 5.5193, "step": 1404 }, { "epoch": 0.14, "grad_norm": 1.075119972229004, "learning_rate": 4.3174326158619356e-05, "loss": 5.573, "step": 1408 }, { "epoch": 0.14, "grad_norm": 0.9324813485145569, "learning_rate": 4.3154935039751796e-05, "loss": 5.5821, "step": 1412 }, { "epoch": 0.14, "grad_norm": 0.8965750336647034, "learning_rate": 4.313554392088424e-05, "loss": 5.4723, "step": 1416 }, { "epoch": 0.14, "grad_norm": 0.902160108089447, "learning_rate": 4.311615280201668e-05, "loss": 5.5979, "step": 1420 }, { "epoch": 0.14, "grad_norm": 0.9165207743644714, "learning_rate": 4.309676168314912e-05, "loss": 5.5698, "step": 1424 }, { "epoch": 0.14, "grad_norm": 1.0775911808013916, "learning_rate": 4.307737056428156e-05, "loss": 5.4091, "step": 1428 }, { "epoch": 0.14, "grad_norm": 0.9935070276260376, "learning_rate": 4.3057979445414006e-05, "loss": 5.6084, "step": 1432 }, { "epoch": 0.14, "grad_norm": 1.105722188949585, "learning_rate": 4.303858832654644e-05, "loss": 5.6961, "step": 1436 }, { "epoch": 0.14, "grad_norm": 1.0309725999832153, "learning_rate": 4.301919720767889e-05, "loss": 5.6465, "step": 1440 }, { "epoch": 0.14, "grad_norm": 0.9571665525436401, "learning_rate": 4.299980608881133e-05, "loss": 5.5844, "step": 1444 }, { "epoch": 0.14, "grad_norm": 1.0037786960601807, "learning_rate": 4.298041496994377e-05, "loss": 5.7001, "step": 1448 }, { "epoch": 0.14, "grad_norm": 0.9573729634284973, "learning_rate": 4.296102385107621e-05, "loss": 5.5545, "step": 1452 }, { "epoch": 0.14, "grad_norm": 1.0113383531570435, "learning_rate": 4.2941632732208656e-05, "loss": 5.6047, "step": 1456 }, { "epoch": 0.14, "grad_norm": 0.9878535866737366, "learning_rate": 4.292224161334109e-05, "loss": 5.5582, "step": 1460 }, { "epoch": 0.14, "grad_norm": 0.9363996386528015, "learning_rate": 4.290285049447354e-05, "loss": 5.5544, "step": 1464 }, { "epoch": 0.14, "grad_norm": 0.8429636359214783, "learning_rate": 4.288345937560597e-05, "loss": 5.5561, "step": 1468 }, { "epoch": 0.14, "grad_norm": 1.0051556825637817, "learning_rate": 4.286406825673842e-05, "loss": 5.5236, "step": 1472 }, { "epoch": 0.14, "grad_norm": 0.9440407156944275, "learning_rate": 4.284467713787086e-05, "loss": 5.6561, "step": 1476 }, { "epoch": 0.14, "grad_norm": 0.9376720786094666, "learning_rate": 4.28252860190033e-05, "loss": 5.6352, "step": 1480 }, { "epoch": 0.14, "grad_norm": 1.0096479654312134, "learning_rate": 4.280589490013574e-05, "loss": 5.5187, "step": 1484 }, { "epoch": 0.14, "grad_norm": 1.003917932510376, "learning_rate": 4.278650378126818e-05, "loss": 5.5664, "step": 1488 }, { "epoch": 0.14, "grad_norm": 0.9462191462516785, "learning_rate": 4.276711266240062e-05, "loss": 5.553, "step": 1492 }, { "epoch": 0.15, "grad_norm": 0.9691956043243408, "learning_rate": 4.274772154353306e-05, "loss": 5.6481, "step": 1496 }, { "epoch": 0.15, "grad_norm": 0.8949794769287109, "learning_rate": 4.27283304246655e-05, "loss": 5.5607, "step": 1500 }, { "epoch": 0.15, "grad_norm": 0.9408190250396729, "learning_rate": 4.270893930579795e-05, "loss": 5.5379, "step": 1504 }, { "epoch": 0.15, "grad_norm": 0.9462267756462097, "learning_rate": 4.268954818693039e-05, "loss": 5.628, "step": 1508 }, { "epoch": 0.15, "grad_norm": 0.9751488566398621, "learning_rate": 4.267015706806283e-05, "loss": 5.5709, "step": 1512 }, { "epoch": 0.15, "grad_norm": 1.0487922430038452, "learning_rate": 4.265076594919527e-05, "loss": 5.5999, "step": 1516 }, { "epoch": 0.15, "grad_norm": 1.0256918668746948, "learning_rate": 4.263137483032771e-05, "loss": 5.5375, "step": 1520 }, { "epoch": 0.15, "grad_norm": 0.9403053522109985, "learning_rate": 4.261198371146015e-05, "loss": 5.539, "step": 1524 }, { "epoch": 0.15, "grad_norm": 1.2121844291687012, "learning_rate": 4.259259259259259e-05, "loss": 5.5828, "step": 1528 }, { "epoch": 0.15, "grad_norm": 1.0676014423370361, "learning_rate": 4.257320147372504e-05, "loss": 5.6215, "step": 1532 }, { "epoch": 0.15, "grad_norm": 0.9083504676818848, "learning_rate": 4.2553810354857473e-05, "loss": 5.7206, "step": 1536 }, { "epoch": 0.15, "grad_norm": 0.8858234882354736, "learning_rate": 4.253441923598992e-05, "loss": 5.5742, "step": 1540 }, { "epoch": 0.15, "grad_norm": 0.9309298396110535, "learning_rate": 4.2515028117122355e-05, "loss": 5.5078, "step": 1544 }, { "epoch": 0.15, "grad_norm": 0.9594176411628723, "learning_rate": 4.24956369982548e-05, "loss": 5.5863, "step": 1548 }, { "epoch": 0.15, "grad_norm": 0.8728674650192261, "learning_rate": 4.247624587938724e-05, "loss": 5.5323, "step": 1552 }, { "epoch": 0.15, "grad_norm": 0.9947747588157654, "learning_rate": 4.245685476051968e-05, "loss": 5.5641, "step": 1556 }, { "epoch": 0.15, "grad_norm": 0.9186658263206482, "learning_rate": 4.2437463641652123e-05, "loss": 5.6258, "step": 1560 }, { "epoch": 0.15, "grad_norm": 0.9086698889732361, "learning_rate": 4.241807252278457e-05, "loss": 5.6391, "step": 1564 }, { "epoch": 0.15, "grad_norm": 0.954474925994873, "learning_rate": 4.2398681403917005e-05, "loss": 5.6077, "step": 1568 }, { "epoch": 0.15, "grad_norm": 0.9422994256019592, "learning_rate": 4.237929028504945e-05, "loss": 5.4322, "step": 1572 }, { "epoch": 0.15, "grad_norm": 0.9581114649772644, "learning_rate": 4.235989916618189e-05, "loss": 5.5757, "step": 1576 }, { "epoch": 0.15, "grad_norm": 0.9948291778564453, "learning_rate": 4.234050804731433e-05, "loss": 5.5409, "step": 1580 }, { "epoch": 0.15, "grad_norm": 1.0310508012771606, "learning_rate": 4.2321116928446773e-05, "loss": 5.5628, "step": 1584 }, { "epoch": 0.15, "grad_norm": 1.053040862083435, "learning_rate": 4.2301725809579214e-05, "loss": 5.6698, "step": 1588 }, { "epoch": 0.15, "grad_norm": 1.093757152557373, "learning_rate": 4.2282334690711655e-05, "loss": 5.5696, "step": 1592 }, { "epoch": 0.15, "grad_norm": 1.0216928720474243, "learning_rate": 4.22629435718441e-05, "loss": 5.6469, "step": 1596 }, { "epoch": 0.16, "grad_norm": 1.0343660116195679, "learning_rate": 4.2243552452976536e-05, "loss": 5.6364, "step": 1600 }, { "epoch": 0.16, "grad_norm": 0.9620201587677002, "learning_rate": 4.222416133410898e-05, "loss": 5.6311, "step": 1604 }, { "epoch": 0.16, "grad_norm": 1.032458782196045, "learning_rate": 4.2204770215241423e-05, "loss": 5.523, "step": 1608 }, { "epoch": 0.16, "grad_norm": 0.9948311448097229, "learning_rate": 4.2185379096373864e-05, "loss": 5.5698, "step": 1612 }, { "epoch": 0.16, "grad_norm": 1.0208816528320312, "learning_rate": 4.2165987977506305e-05, "loss": 5.6042, "step": 1616 }, { "epoch": 0.16, "grad_norm": 1.1074092388153076, "learning_rate": 4.2146596858638745e-05, "loss": 5.542, "step": 1620 }, { "epoch": 0.16, "grad_norm": 0.9410656690597534, "learning_rate": 4.2127205739771186e-05, "loss": 5.5188, "step": 1624 }, { "epoch": 0.16, "grad_norm": 1.0261708498001099, "learning_rate": 4.2107814620903626e-05, "loss": 5.5377, "step": 1628 }, { "epoch": 0.16, "grad_norm": 0.9042761325836182, "learning_rate": 4.208842350203607e-05, "loss": 5.5023, "step": 1632 }, { "epoch": 0.16, "grad_norm": 1.0243982076644897, "learning_rate": 4.2069032383168514e-05, "loss": 5.5674, "step": 1636 }, { "epoch": 0.16, "grad_norm": 1.0028672218322754, "learning_rate": 4.2049641264300955e-05, "loss": 5.5574, "step": 1640 }, { "epoch": 0.16, "grad_norm": 0.9813360571861267, "learning_rate": 4.2030250145433395e-05, "loss": 5.5664, "step": 1644 }, { "epoch": 0.16, "grad_norm": 0.9174672365188599, "learning_rate": 4.2010859026565836e-05, "loss": 5.5837, "step": 1648 }, { "epoch": 0.16, "grad_norm": 1.1121405363082886, "learning_rate": 4.1991467907698276e-05, "loss": 5.5619, "step": 1652 }, { "epoch": 0.16, "grad_norm": 1.0016354322433472, "learning_rate": 4.197207678883072e-05, "loss": 5.5928, "step": 1656 }, { "epoch": 0.16, "grad_norm": 0.9536789655685425, "learning_rate": 4.195268566996316e-05, "loss": 5.5211, "step": 1660 }, { "epoch": 0.16, "grad_norm": 1.0609054565429688, "learning_rate": 4.19332945510956e-05, "loss": 5.5809, "step": 1664 }, { "epoch": 0.16, "grad_norm": 0.9688783288002014, "learning_rate": 4.191390343222804e-05, "loss": 5.576, "step": 1668 }, { "epoch": 0.16, "grad_norm": 1.0187081098556519, "learning_rate": 4.1894512313360486e-05, "loss": 5.569, "step": 1672 }, { "epoch": 0.16, "grad_norm": 0.9904341101646423, "learning_rate": 4.187512119449292e-05, "loss": 5.5725, "step": 1676 }, { "epoch": 0.16, "grad_norm": 1.1697142124176025, "learning_rate": 4.185573007562537e-05, "loss": 5.566, "step": 1680 }, { "epoch": 0.16, "grad_norm": 0.9842191934585571, "learning_rate": 4.183633895675781e-05, "loss": 5.5687, "step": 1684 }, { "epoch": 0.16, "grad_norm": 1.0618923902511597, "learning_rate": 4.181694783789025e-05, "loss": 5.5165, "step": 1688 }, { "epoch": 0.16, "grad_norm": 0.9923197031021118, "learning_rate": 4.179755671902269e-05, "loss": 5.5625, "step": 1692 }, { "epoch": 0.16, "grad_norm": 1.0109176635742188, "learning_rate": 4.1778165600155136e-05, "loss": 5.601, "step": 1696 }, { "epoch": 0.16, "grad_norm": 0.8870441913604736, "learning_rate": 4.175877448128757e-05, "loss": 5.4932, "step": 1700 }, { "epoch": 0.17, "grad_norm": 1.004470705986023, "learning_rate": 4.173938336242002e-05, "loss": 5.5546, "step": 1704 }, { "epoch": 0.17, "grad_norm": 0.9421555399894714, "learning_rate": 4.171999224355245e-05, "loss": 5.4712, "step": 1708 }, { "epoch": 0.17, "grad_norm": 0.9490504264831543, "learning_rate": 4.17006011246849e-05, "loss": 5.493, "step": 1712 }, { "epoch": 0.17, "grad_norm": 0.9385312795639038, "learning_rate": 4.168121000581734e-05, "loss": 5.5997, "step": 1716 }, { "epoch": 0.17, "grad_norm": 0.9013298153877258, "learning_rate": 4.166181888694978e-05, "loss": 5.4883, "step": 1720 }, { "epoch": 0.17, "grad_norm": 0.9516580104827881, "learning_rate": 4.164242776808222e-05, "loss": 5.5971, "step": 1724 }, { "epoch": 0.17, "grad_norm": 1.033234715461731, "learning_rate": 4.162303664921467e-05, "loss": 5.5807, "step": 1728 }, { "epoch": 0.17, "grad_norm": 1.042651653289795, "learning_rate": 4.16036455303471e-05, "loss": 5.5076, "step": 1732 }, { "epoch": 0.17, "grad_norm": 0.9568614959716797, "learning_rate": 4.158425441147955e-05, "loss": 5.5909, "step": 1736 }, { "epoch": 0.17, "grad_norm": 0.9669156074523926, "learning_rate": 4.156486329261198e-05, "loss": 5.5952, "step": 1740 }, { "epoch": 0.17, "grad_norm": 0.9101834893226624, "learning_rate": 4.154547217374443e-05, "loss": 5.5554, "step": 1744 }, { "epoch": 0.17, "grad_norm": 0.9412431716918945, "learning_rate": 4.152608105487687e-05, "loss": 5.5461, "step": 1748 }, { "epoch": 0.17, "grad_norm": 1.0211936235427856, "learning_rate": 4.150668993600931e-05, "loss": 5.6353, "step": 1752 }, { "epoch": 0.17, "grad_norm": 1.0581713914871216, "learning_rate": 4.148729881714175e-05, "loss": 5.5807, "step": 1756 }, { "epoch": 0.17, "grad_norm": 0.9403213858604431, "learning_rate": 4.146790769827419e-05, "loss": 5.5369, "step": 1760 }, { "epoch": 0.17, "grad_norm": 0.9232079386711121, "learning_rate": 4.144851657940663e-05, "loss": 5.5559, "step": 1764 }, { "epoch": 0.17, "grad_norm": 0.9828022122383118, "learning_rate": 4.142912546053908e-05, "loss": 5.4692, "step": 1768 }, { "epoch": 0.17, "grad_norm": 0.9320650696754456, "learning_rate": 4.140973434167152e-05, "loss": 5.5443, "step": 1772 }, { "epoch": 0.17, "grad_norm": 0.9043223261833191, "learning_rate": 4.139034322280396e-05, "loss": 5.4807, "step": 1776 }, { "epoch": 0.17, "grad_norm": 0.9783521294593811, "learning_rate": 4.13709521039364e-05, "loss": 5.575, "step": 1780 }, { "epoch": 0.17, "grad_norm": 1.0388356447219849, "learning_rate": 4.135156098506884e-05, "loss": 5.5553, "step": 1784 }, { "epoch": 0.17, "grad_norm": 0.9247937202453613, "learning_rate": 4.133216986620128e-05, "loss": 5.5511, "step": 1788 }, { "epoch": 0.17, "grad_norm": 1.0111491680145264, "learning_rate": 4.131277874733372e-05, "loss": 5.6378, "step": 1792 }, { "epoch": 0.17, "grad_norm": 0.9572594165802002, "learning_rate": 4.129338762846616e-05, "loss": 5.4827, "step": 1796 }, { "epoch": 0.17, "grad_norm": 0.9328434467315674, "learning_rate": 4.12739965095986e-05, "loss": 5.5489, "step": 1800 }, { "epoch": 0.17, "grad_norm": 1.000696063041687, "learning_rate": 4.125460539073105e-05, "loss": 5.6207, "step": 1804 }, { "epoch": 0.18, "grad_norm": 0.891006350517273, "learning_rate": 4.1235214271863484e-05, "loss": 5.525, "step": 1808 }, { "epoch": 0.18, "grad_norm": 1.0198568105697632, "learning_rate": 4.121582315299593e-05, "loss": 5.5186, "step": 1812 }, { "epoch": 0.18, "grad_norm": 1.0678110122680664, "learning_rate": 4.119643203412837e-05, "loss": 5.565, "step": 1816 }, { "epoch": 0.18, "grad_norm": 0.9433650970458984, "learning_rate": 4.117704091526081e-05, "loss": 5.5038, "step": 1820 }, { "epoch": 0.18, "grad_norm": 0.982768714427948, "learning_rate": 4.115764979639325e-05, "loss": 5.4981, "step": 1824 }, { "epoch": 0.18, "grad_norm": 0.9159711003303528, "learning_rate": 4.1138258677525694e-05, "loss": 5.4945, "step": 1828 }, { "epoch": 0.18, "grad_norm": 1.0521996021270752, "learning_rate": 4.1118867558658134e-05, "loss": 5.5498, "step": 1832 }, { "epoch": 0.18, "grad_norm": 0.9633331894874573, "learning_rate": 4.109947643979058e-05, "loss": 5.4915, "step": 1836 }, { "epoch": 0.18, "grad_norm": 0.9132066965103149, "learning_rate": 4.1080085320923015e-05, "loss": 5.5192, "step": 1840 }, { "epoch": 0.18, "grad_norm": 0.947010338306427, "learning_rate": 4.106069420205546e-05, "loss": 5.5928, "step": 1844 }, { "epoch": 0.18, "grad_norm": 1.1576569080352783, "learning_rate": 4.10413030831879e-05, "loss": 5.6224, "step": 1848 }, { "epoch": 0.18, "grad_norm": 0.913221538066864, "learning_rate": 4.1021911964320344e-05, "loss": 5.6261, "step": 1852 }, { "epoch": 0.18, "grad_norm": 0.9803014397621155, "learning_rate": 4.1002520845452784e-05, "loss": 5.4529, "step": 1856 }, { "epoch": 0.18, "grad_norm": 1.0535297393798828, "learning_rate": 4.0983129726585225e-05, "loss": 5.5471, "step": 1860 }, { "epoch": 0.18, "grad_norm": 0.9755203723907471, "learning_rate": 4.0963738607717665e-05, "loss": 5.6235, "step": 1864 }, { "epoch": 0.18, "grad_norm": 0.8834216594696045, "learning_rate": 4.094434748885011e-05, "loss": 5.5566, "step": 1868 }, { "epoch": 0.18, "grad_norm": 0.9767342209815979, "learning_rate": 4.0924956369982546e-05, "loss": 5.6874, "step": 1872 }, { "epoch": 0.18, "grad_norm": 1.018384575843811, "learning_rate": 4.0905565251114994e-05, "loss": 5.5437, "step": 1876 }, { "epoch": 0.18, "grad_norm": 1.0440962314605713, "learning_rate": 4.0886174132247434e-05, "loss": 5.594, "step": 1880 }, { "epoch": 0.18, "grad_norm": 1.0711756944656372, "learning_rate": 4.0866783013379875e-05, "loss": 5.55, "step": 1884 }, { "epoch": 0.18, "grad_norm": 0.9786011576652527, "learning_rate": 4.0847391894512315e-05, "loss": 5.5315, "step": 1888 }, { "epoch": 0.18, "grad_norm": 1.1673699617385864, "learning_rate": 4.0828000775644756e-05, "loss": 5.596, "step": 1892 }, { "epoch": 0.18, "grad_norm": 1.0179039239883423, "learning_rate": 4.0808609656777196e-05, "loss": 5.5635, "step": 1896 }, { "epoch": 0.18, "grad_norm": 1.0204113721847534, "learning_rate": 4.0789218537909644e-05, "loss": 5.5419, "step": 1900 }, { "epoch": 0.18, "grad_norm": 0.919965386390686, "learning_rate": 4.076982741904208e-05, "loss": 5.5283, "step": 1904 }, { "epoch": 0.18, "grad_norm": 0.911108136177063, "learning_rate": 4.0750436300174525e-05, "loss": 5.4592, "step": 1908 }, { "epoch": 0.19, "grad_norm": 0.9294359087944031, "learning_rate": 4.0731045181306965e-05, "loss": 5.5676, "step": 1912 }, { "epoch": 0.19, "grad_norm": 0.928774893283844, "learning_rate": 4.0711654062439406e-05, "loss": 5.5844, "step": 1916 }, { "epoch": 0.19, "grad_norm": 1.0778926610946655, "learning_rate": 4.0692262943571846e-05, "loss": 5.5877, "step": 1920 }, { "epoch": 0.19, "grad_norm": 1.0779755115509033, "learning_rate": 4.067287182470429e-05, "loss": 5.5362, "step": 1924 }, { "epoch": 0.19, "grad_norm": 0.9737743139266968, "learning_rate": 4.065348070583673e-05, "loss": 5.584, "step": 1928 }, { "epoch": 0.19, "grad_norm": 0.9039328098297119, "learning_rate": 4.063408958696917e-05, "loss": 5.441, "step": 1932 }, { "epoch": 0.19, "grad_norm": 1.0125256776809692, "learning_rate": 4.0614698468101615e-05, "loss": 5.5987, "step": 1936 }, { "epoch": 0.19, "grad_norm": 0.9862051010131836, "learning_rate": 4.059530734923405e-05, "loss": 5.5512, "step": 1940 }, { "epoch": 0.19, "grad_norm": 0.8931455016136169, "learning_rate": 4.0575916230366496e-05, "loss": 5.5184, "step": 1944 }, { "epoch": 0.19, "grad_norm": 0.9782811403274536, "learning_rate": 4.055652511149893e-05, "loss": 5.6231, "step": 1948 }, { "epoch": 0.19, "grad_norm": 0.9657939076423645, "learning_rate": 4.053713399263138e-05, "loss": 5.5536, "step": 1952 }, { "epoch": 0.19, "grad_norm": 0.9090112447738647, "learning_rate": 4.051774287376382e-05, "loss": 5.51, "step": 1956 }, { "epoch": 0.19, "grad_norm": 0.9079639911651611, "learning_rate": 4.049835175489626e-05, "loss": 5.4939, "step": 1960 }, { "epoch": 0.19, "grad_norm": 1.0238642692565918, "learning_rate": 4.04789606360287e-05, "loss": 5.626, "step": 1964 }, { "epoch": 0.19, "grad_norm": 0.9550356268882751, "learning_rate": 4.0459569517161146e-05, "loss": 5.5831, "step": 1968 }, { "epoch": 0.19, "grad_norm": 1.0468617677688599, "learning_rate": 4.044017839829358e-05, "loss": 5.5789, "step": 1972 }, { "epoch": 0.19, "grad_norm": 0.9686053395271301, "learning_rate": 4.042078727942603e-05, "loss": 5.5384, "step": 1976 }, { "epoch": 0.19, "grad_norm": 1.0532784461975098, "learning_rate": 4.040139616055846e-05, "loss": 5.617, "step": 1980 }, { "epoch": 0.19, "grad_norm": 0.9046121835708618, "learning_rate": 4.038200504169091e-05, "loss": 5.6226, "step": 1984 }, { "epoch": 0.19, "grad_norm": 0.9807924628257751, "learning_rate": 4.036261392282335e-05, "loss": 5.5259, "step": 1988 }, { "epoch": 0.19, "grad_norm": 0.957099199295044, "learning_rate": 4.034322280395579e-05, "loss": 5.568, "step": 1992 }, { "epoch": 0.19, "grad_norm": 0.9218006134033203, "learning_rate": 4.032383168508823e-05, "loss": 5.4665, "step": 1996 }, { "epoch": 0.19, "grad_norm": 1.190796971321106, "learning_rate": 4.030444056622068e-05, "loss": 5.4891, "step": 2000 }, { "epoch": 0.19, "grad_norm": 0.9437822699546814, "learning_rate": 4.028504944735311e-05, "loss": 5.4209, "step": 2004 }, { "epoch": 0.19, "grad_norm": 0.8980192542076111, "learning_rate": 4.026565832848556e-05, "loss": 5.5624, "step": 2008 }, { "epoch": 0.2, "grad_norm": 1.1388368606567383, "learning_rate": 4.0246267209618e-05, "loss": 5.5448, "step": 2012 }, { "epoch": 0.2, "grad_norm": 0.9411901235580444, "learning_rate": 4.022687609075044e-05, "loss": 5.552, "step": 2016 }, { "epoch": 0.2, "grad_norm": 0.9226595163345337, "learning_rate": 4.020748497188288e-05, "loss": 5.4518, "step": 2020 }, { "epoch": 0.2, "grad_norm": 1.0351731777191162, "learning_rate": 4.018809385301532e-05, "loss": 5.5449, "step": 2024 }, { "epoch": 0.2, "grad_norm": 0.89235919713974, "learning_rate": 4.016870273414776e-05, "loss": 5.563, "step": 2028 }, { "epoch": 0.2, "grad_norm": 0.9486913084983826, "learning_rate": 4.014931161528021e-05, "loss": 5.4925, "step": 2032 }, { "epoch": 0.2, "grad_norm": 0.9577587842941284, "learning_rate": 4.012992049641264e-05, "loss": 5.4697, "step": 2036 }, { "epoch": 0.2, "grad_norm": 0.9649032950401306, "learning_rate": 4.011052937754509e-05, "loss": 5.4694, "step": 2040 }, { "epoch": 0.2, "grad_norm": 0.8921785950660706, "learning_rate": 4.009113825867753e-05, "loss": 5.6328, "step": 2044 }, { "epoch": 0.2, "grad_norm": 0.9974868893623352, "learning_rate": 4.007174713980997e-05, "loss": 5.572, "step": 2048 }, { "epoch": 0.2, "grad_norm": 0.9336390495300293, "learning_rate": 4.005235602094241e-05, "loss": 5.5147, "step": 2052 }, { "epoch": 0.2, "grad_norm": 0.942483127117157, "learning_rate": 4.003296490207485e-05, "loss": 5.4991, "step": 2056 }, { "epoch": 0.2, "grad_norm": 0.9286855459213257, "learning_rate": 4.001357378320729e-05, "loss": 5.5299, "step": 2060 }, { "epoch": 0.2, "grad_norm": 0.9835842251777649, "learning_rate": 3.999418266433973e-05, "loss": 5.6657, "step": 2064 }, { "epoch": 0.2, "grad_norm": 0.9683178663253784, "learning_rate": 3.997479154547217e-05, "loss": 5.5365, "step": 2068 }, { "epoch": 0.2, "grad_norm": 1.0575816631317139, "learning_rate": 3.9955400426604614e-05, "loss": 5.6907, "step": 2072 }, { "epoch": 0.2, "grad_norm": 0.9691389799118042, "learning_rate": 3.993600930773706e-05, "loss": 5.5166, "step": 2076 }, { "epoch": 0.2, "grad_norm": 1.0539683103561401, "learning_rate": 3.9916618188869495e-05, "loss": 5.6106, "step": 2080 }, { "epoch": 0.2, "grad_norm": 1.010002851486206, "learning_rate": 3.989722707000194e-05, "loss": 5.4635, "step": 2084 }, { "epoch": 0.2, "grad_norm": 0.9025498032569885, "learning_rate": 3.987783595113438e-05, "loss": 5.5538, "step": 2088 }, { "epoch": 0.2, "grad_norm": 0.9283170700073242, "learning_rate": 3.985844483226682e-05, "loss": 5.5295, "step": 2092 }, { "epoch": 0.2, "grad_norm": 1.0095292329788208, "learning_rate": 3.9839053713399264e-05, "loss": 5.4738, "step": 2096 }, { "epoch": 0.2, "grad_norm": 1.009535312652588, "learning_rate": 3.9819662594531704e-05, "loss": 5.4439, "step": 2100 }, { "epoch": 0.2, "grad_norm": 0.9733148217201233, "learning_rate": 3.9800271475664145e-05, "loss": 5.5443, "step": 2104 }, { "epoch": 0.2, "grad_norm": 1.0330760478973389, "learning_rate": 3.978088035679659e-05, "loss": 5.58, "step": 2108 }, { "epoch": 0.2, "grad_norm": 1.0041800737380981, "learning_rate": 3.9761489237929026e-05, "loss": 5.5228, "step": 2112 }, { "epoch": 0.21, "grad_norm": 0.967785120010376, "learning_rate": 3.974209811906147e-05, "loss": 5.6853, "step": 2116 }, { "epoch": 0.21, "grad_norm": 1.0202077627182007, "learning_rate": 3.9722707000193914e-05, "loss": 5.5428, "step": 2120 }, { "epoch": 0.21, "grad_norm": 0.9732391834259033, "learning_rate": 3.9703315881326354e-05, "loss": 5.5364, "step": 2124 }, { "epoch": 0.21, "grad_norm": 0.9813392162322998, "learning_rate": 3.9683924762458795e-05, "loss": 5.5904, "step": 2128 }, { "epoch": 0.21, "grad_norm": 0.9656361937522888, "learning_rate": 3.966453364359124e-05, "loss": 5.4623, "step": 2132 }, { "epoch": 0.21, "grad_norm": 0.956072211265564, "learning_rate": 3.9645142524723676e-05, "loss": 5.5391, "step": 2136 }, { "epoch": 0.21, "grad_norm": 1.0806626081466675, "learning_rate": 3.962575140585612e-05, "loss": 5.5746, "step": 2140 }, { "epoch": 0.21, "grad_norm": 1.0280499458312988, "learning_rate": 3.960636028698856e-05, "loss": 5.5504, "step": 2144 }, { "epoch": 0.21, "grad_norm": 1.0023061037063599, "learning_rate": 3.9586969168121004e-05, "loss": 5.5687, "step": 2148 }, { "epoch": 0.21, "grad_norm": 0.9010854363441467, "learning_rate": 3.9567578049253445e-05, "loss": 5.5546, "step": 2152 }, { "epoch": 0.21, "grad_norm": 1.0272430181503296, "learning_rate": 3.9548186930385885e-05, "loss": 5.4934, "step": 2156 }, { "epoch": 0.21, "grad_norm": 1.0653831958770752, "learning_rate": 3.9528795811518326e-05, "loss": 5.496, "step": 2160 }, { "epoch": 0.21, "grad_norm": 1.0314921140670776, "learning_rate": 3.950940469265077e-05, "loss": 5.5062, "step": 2164 }, { "epoch": 0.21, "grad_norm": 0.9908810257911682, "learning_rate": 3.949001357378321e-05, "loss": 5.6055, "step": 2168 }, { "epoch": 0.21, "grad_norm": 0.9573884606361389, "learning_rate": 3.9470622454915654e-05, "loss": 5.5008, "step": 2172 }, { "epoch": 0.21, "grad_norm": 0.9359253644943237, "learning_rate": 3.9451231336048095e-05, "loss": 5.6236, "step": 2176 }, { "epoch": 0.21, "grad_norm": 1.0594229698181152, "learning_rate": 3.9431840217180535e-05, "loss": 5.4301, "step": 2180 }, { "epoch": 0.21, "grad_norm": 0.994006872177124, "learning_rate": 3.9412449098312976e-05, "loss": 5.5638, "step": 2184 }, { "epoch": 0.21, "grad_norm": 0.9278011322021484, "learning_rate": 3.9393057979445416e-05, "loss": 5.5056, "step": 2188 }, { "epoch": 0.21, "grad_norm": 1.0667142868041992, "learning_rate": 3.937366686057786e-05, "loss": 5.6599, "step": 2192 }, { "epoch": 0.21, "grad_norm": 0.9456555843353271, "learning_rate": 3.93542757417103e-05, "loss": 5.5152, "step": 2196 }, { "epoch": 0.21, "grad_norm": 0.9990763068199158, "learning_rate": 3.933488462284274e-05, "loss": 5.4061, "step": 2200 }, { "epoch": 0.21, "grad_norm": 0.9991575479507446, "learning_rate": 3.931549350397518e-05, "loss": 5.5047, "step": 2204 }, { "epoch": 0.21, "grad_norm": 1.0041279792785645, "learning_rate": 3.9296102385107626e-05, "loss": 5.5836, "step": 2208 }, { "epoch": 0.21, "grad_norm": 1.0032880306243896, "learning_rate": 3.927671126624006e-05, "loss": 5.5682, "step": 2212 }, { "epoch": 0.21, "grad_norm": 1.0610923767089844, "learning_rate": 3.925732014737251e-05, "loss": 5.597, "step": 2216 }, { "epoch": 0.22, "grad_norm": 0.9000992178916931, "learning_rate": 3.923792902850494e-05, "loss": 5.54, "step": 2220 }, { "epoch": 0.22, "grad_norm": 0.9666118621826172, "learning_rate": 3.921853790963739e-05, "loss": 5.568, "step": 2224 }, { "epoch": 0.22, "grad_norm": 0.963789701461792, "learning_rate": 3.919914679076983e-05, "loss": 5.5229, "step": 2228 }, { "epoch": 0.22, "grad_norm": 1.030738353729248, "learning_rate": 3.917975567190227e-05, "loss": 5.6063, "step": 2232 }, { "epoch": 0.22, "grad_norm": 1.0029246807098389, "learning_rate": 3.916036455303471e-05, "loss": 5.5573, "step": 2236 }, { "epoch": 0.22, "grad_norm": 1.0004112720489502, "learning_rate": 3.914097343416716e-05, "loss": 5.4992, "step": 2240 }, { "epoch": 0.22, "grad_norm": 0.9665903449058533, "learning_rate": 3.912158231529959e-05, "loss": 5.475, "step": 2244 }, { "epoch": 0.22, "grad_norm": 0.927628219127655, "learning_rate": 3.910219119643204e-05, "loss": 5.4589, "step": 2248 }, { "epoch": 0.22, "grad_norm": 1.0179831981658936, "learning_rate": 3.908280007756448e-05, "loss": 5.5458, "step": 2252 }, { "epoch": 0.22, "grad_norm": 0.8914494514465332, "learning_rate": 3.906340895869692e-05, "loss": 5.4927, "step": 2256 }, { "epoch": 0.22, "grad_norm": 1.001958966255188, "learning_rate": 3.904401783982936e-05, "loss": 5.4334, "step": 2260 }, { "epoch": 0.22, "grad_norm": 0.9571743011474609, "learning_rate": 3.90246267209618e-05, "loss": 5.5728, "step": 2264 }, { "epoch": 0.22, "grad_norm": 1.0079842805862427, "learning_rate": 3.900523560209424e-05, "loss": 5.5844, "step": 2268 }, { "epoch": 0.22, "grad_norm": 0.9021591544151306, "learning_rate": 3.898584448322669e-05, "loss": 5.5276, "step": 2272 }, { "epoch": 0.22, "grad_norm": 0.9413024187088013, "learning_rate": 3.896645336435912e-05, "loss": 5.4693, "step": 2276 }, { "epoch": 0.22, "grad_norm": 0.9716333150863647, "learning_rate": 3.894706224549157e-05, "loss": 5.434, "step": 2280 }, { "epoch": 0.22, "grad_norm": 1.020964503288269, "learning_rate": 3.892767112662401e-05, "loss": 5.4197, "step": 2284 }, { "epoch": 0.22, "grad_norm": 0.9597110748291016, "learning_rate": 3.890828000775645e-05, "loss": 5.4174, "step": 2288 }, { "epoch": 0.22, "grad_norm": 1.0164399147033691, "learning_rate": 3.888888888888889e-05, "loss": 5.4698, "step": 2292 }, { "epoch": 0.22, "grad_norm": 1.0590660572052002, "learning_rate": 3.886949777002134e-05, "loss": 5.4815, "step": 2296 }, { "epoch": 0.22, "grad_norm": 0.9020886421203613, "learning_rate": 3.885010665115377e-05, "loss": 5.4197, "step": 2300 }, { "epoch": 0.22, "grad_norm": 0.9899044632911682, "learning_rate": 3.883071553228622e-05, "loss": 5.5061, "step": 2304 }, { "epoch": 0.22, "grad_norm": 0.9730533957481384, "learning_rate": 3.881132441341865e-05, "loss": 5.4804, "step": 2308 }, { "epoch": 0.22, "grad_norm": 0.9274543523788452, "learning_rate": 3.87919332945511e-05, "loss": 5.5486, "step": 2312 }, { "epoch": 0.22, "grad_norm": 0.9086050391197205, "learning_rate": 3.877254217568354e-05, "loss": 5.3997, "step": 2316 }, { "epoch": 0.22, "grad_norm": 0.9170548915863037, "learning_rate": 3.875315105681598e-05, "loss": 5.3701, "step": 2320 }, { "epoch": 0.23, "grad_norm": 0.9133801460266113, "learning_rate": 3.873375993794842e-05, "loss": 5.4241, "step": 2324 }, { "epoch": 0.23, "grad_norm": 0.9891861081123352, "learning_rate": 3.871436881908086e-05, "loss": 5.5119, "step": 2328 }, { "epoch": 0.23, "grad_norm": 1.0900509357452393, "learning_rate": 3.86949777002133e-05, "loss": 5.6089, "step": 2332 }, { "epoch": 0.23, "grad_norm": 0.9711102843284607, "learning_rate": 3.8675586581345743e-05, "loss": 5.5975, "step": 2336 }, { "epoch": 0.23, "grad_norm": 0.9177804589271545, "learning_rate": 3.8656195462478184e-05, "loss": 5.4848, "step": 2340 }, { "epoch": 0.23, "grad_norm": 0.9340533018112183, "learning_rate": 3.8636804343610625e-05, "loss": 5.5705, "step": 2344 }, { "epoch": 0.23, "grad_norm": 0.9871985912322998, "learning_rate": 3.861741322474307e-05, "loss": 5.5552, "step": 2348 }, { "epoch": 0.23, "grad_norm": 0.9477519392967224, "learning_rate": 3.8598022105875506e-05, "loss": 5.3622, "step": 2352 }, { "epoch": 0.23, "grad_norm": 0.9251902103424072, "learning_rate": 3.857863098700795e-05, "loss": 5.3964, "step": 2356 }, { "epoch": 0.23, "grad_norm": 0.9803330898284912, "learning_rate": 3.8559239868140393e-05, "loss": 5.4716, "step": 2360 }, { "epoch": 0.23, "grad_norm": 0.9429686069488525, "learning_rate": 3.8539848749272834e-05, "loss": 5.4319, "step": 2364 }, { "epoch": 0.23, "grad_norm": 1.0370640754699707, "learning_rate": 3.8520457630405275e-05, "loss": 5.4297, "step": 2368 }, { "epoch": 0.23, "grad_norm": 0.9429317712783813, "learning_rate": 3.850106651153772e-05, "loss": 5.4068, "step": 2372 }, { "epoch": 0.23, "grad_norm": 1.0374614000320435, "learning_rate": 3.8481675392670156e-05, "loss": 5.4975, "step": 2376 }, { "epoch": 0.23, "grad_norm": 0.9564975500106812, "learning_rate": 3.84622842738026e-05, "loss": 5.5221, "step": 2380 }, { "epoch": 0.23, "grad_norm": 0.9388477802276611, "learning_rate": 3.844289315493504e-05, "loss": 5.4748, "step": 2384 }, { "epoch": 0.23, "grad_norm": 0.9292894601821899, "learning_rate": 3.8423502036067484e-05, "loss": 5.6158, "step": 2388 }, { "epoch": 0.23, "grad_norm": 0.9442563056945801, "learning_rate": 3.8404110917199925e-05, "loss": 5.5371, "step": 2392 }, { "epoch": 0.23, "grad_norm": 1.0346298217773438, "learning_rate": 3.8384719798332365e-05, "loss": 5.4546, "step": 2396 }, { "epoch": 0.23, "grad_norm": 0.9618560671806335, "learning_rate": 3.8365328679464806e-05, "loss": 5.5663, "step": 2400 }, { "epoch": 0.23, "grad_norm": 0.9630410671234131, "learning_rate": 3.834593756059725e-05, "loss": 5.5669, "step": 2404 }, { "epoch": 0.23, "grad_norm": 1.0021862983703613, "learning_rate": 3.832654644172969e-05, "loss": 5.5155, "step": 2408 }, { "epoch": 0.23, "grad_norm": 1.015215277671814, "learning_rate": 3.8307155322862134e-05, "loss": 5.4772, "step": 2412 }, { "epoch": 0.23, "grad_norm": 0.9358635544776917, "learning_rate": 3.8287764203994574e-05, "loss": 5.4375, "step": 2416 }, { "epoch": 0.23, "grad_norm": 1.1098734140396118, "learning_rate": 3.8268373085127015e-05, "loss": 5.5493, "step": 2420 }, { "epoch": 0.24, "grad_norm": 0.9704833030700684, "learning_rate": 3.8248981966259456e-05, "loss": 5.5974, "step": 2424 }, { "epoch": 0.24, "grad_norm": 1.1319690942764282, "learning_rate": 3.8229590847391896e-05, "loss": 5.5381, "step": 2428 }, { "epoch": 0.24, "grad_norm": 0.9214887619018555, "learning_rate": 3.821019972852434e-05, "loss": 5.4689, "step": 2432 }, { "epoch": 0.24, "grad_norm": 0.9483059048652649, "learning_rate": 3.8190808609656784e-05, "loss": 5.447, "step": 2436 }, { "epoch": 0.24, "grad_norm": 0.936059832572937, "learning_rate": 3.817141749078922e-05, "loss": 5.5256, "step": 2440 }, { "epoch": 0.24, "grad_norm": 0.8800360560417175, "learning_rate": 3.8152026371921665e-05, "loss": 5.4862, "step": 2444 }, { "epoch": 0.24, "grad_norm": 0.9923036098480225, "learning_rate": 3.8132635253054106e-05, "loss": 5.5005, "step": 2448 }, { "epoch": 0.24, "grad_norm": 0.9370976090431213, "learning_rate": 3.8113244134186546e-05, "loss": 5.5008, "step": 2452 }, { "epoch": 0.24, "grad_norm": 0.9325810670852661, "learning_rate": 3.809385301531899e-05, "loss": 5.5375, "step": 2456 }, { "epoch": 0.24, "grad_norm": 1.0524775981903076, "learning_rate": 3.807446189645143e-05, "loss": 5.5869, "step": 2460 }, { "epoch": 0.24, "grad_norm": 0.9688572287559509, "learning_rate": 3.805507077758387e-05, "loss": 5.5657, "step": 2464 }, { "epoch": 0.24, "grad_norm": 0.9577921628952026, "learning_rate": 3.803567965871631e-05, "loss": 5.4082, "step": 2468 }, { "epoch": 0.24, "grad_norm": 0.977051317691803, "learning_rate": 3.801628853984875e-05, "loss": 5.4773, "step": 2472 }, { "epoch": 0.24, "grad_norm": 0.9854933023452759, "learning_rate": 3.799689742098119e-05, "loss": 5.487, "step": 2476 }, { "epoch": 0.24, "grad_norm": 1.0686819553375244, "learning_rate": 3.797750630211364e-05, "loss": 5.3842, "step": 2480 }, { "epoch": 0.24, "grad_norm": 0.9736838936805725, "learning_rate": 3.795811518324607e-05, "loss": 5.5956, "step": 2484 }, { "epoch": 0.24, "grad_norm": 0.9340422749519348, "learning_rate": 3.793872406437852e-05, "loss": 5.5148, "step": 2488 }, { "epoch": 0.24, "grad_norm": 1.0714948177337646, "learning_rate": 3.791933294551096e-05, "loss": 5.495, "step": 2492 }, { "epoch": 0.24, "grad_norm": 1.037858486175537, "learning_rate": 3.78999418266434e-05, "loss": 5.5312, "step": 2496 }, { "epoch": 0.24, "grad_norm": 1.1142346858978271, "learning_rate": 3.788055070777584e-05, "loss": 5.6109, "step": 2500 }, { "epoch": 0.24, "grad_norm": 1.0405195951461792, "learning_rate": 3.786115958890828e-05, "loss": 5.4659, "step": 2504 }, { "epoch": 0.24, "grad_norm": 1.106404423713684, "learning_rate": 3.784176847004072e-05, "loss": 5.5187, "step": 2508 }, { "epoch": 0.24, "grad_norm": 0.9529224634170532, "learning_rate": 3.782237735117317e-05, "loss": 5.441, "step": 2512 }, { "epoch": 0.24, "grad_norm": 1.0962753295898438, "learning_rate": 3.78029862323056e-05, "loss": 5.5503, "step": 2516 }, { "epoch": 0.24, "grad_norm": 0.9510455131530762, "learning_rate": 3.778359511343805e-05, "loss": 5.4773, "step": 2520 }, { "epoch": 0.24, "grad_norm": 1.0108531713485718, "learning_rate": 3.776420399457049e-05, "loss": 5.5567, "step": 2524 }, { "epoch": 0.25, "grad_norm": 0.9611102938652039, "learning_rate": 3.774481287570293e-05, "loss": 5.4825, "step": 2528 }, { "epoch": 0.25, "grad_norm": 1.0297412872314453, "learning_rate": 3.772542175683537e-05, "loss": 5.4075, "step": 2532 }, { "epoch": 0.25, "grad_norm": 1.0067005157470703, "learning_rate": 3.770603063796782e-05, "loss": 5.5345, "step": 2536 }, { "epoch": 0.25, "grad_norm": 1.0144344568252563, "learning_rate": 3.768663951910025e-05, "loss": 5.5108, "step": 2540 }, { "epoch": 0.25, "grad_norm": 0.9989475607872009, "learning_rate": 3.76672484002327e-05, "loss": 5.4782, "step": 2544 }, { "epoch": 0.25, "grad_norm": 1.0596572160720825, "learning_rate": 3.764785728136513e-05, "loss": 5.5088, "step": 2548 }, { "epoch": 0.25, "grad_norm": 1.022268533706665, "learning_rate": 3.762846616249758e-05, "loss": 5.4595, "step": 2552 }, { "epoch": 0.25, "grad_norm": 0.9864400029182434, "learning_rate": 3.760907504363002e-05, "loss": 5.5029, "step": 2556 }, { "epoch": 0.25, "grad_norm": 0.9575673937797546, "learning_rate": 3.758968392476246e-05, "loss": 5.4772, "step": 2560 }, { "epoch": 0.25, "grad_norm": 0.9066863059997559, "learning_rate": 3.75702928058949e-05, "loss": 5.5936, "step": 2564 }, { "epoch": 0.25, "grad_norm": 0.9975427985191345, "learning_rate": 3.755090168702735e-05, "loss": 5.5532, "step": 2568 }, { "epoch": 0.25, "grad_norm": 0.935875415802002, "learning_rate": 3.753151056815978e-05, "loss": 5.4235, "step": 2572 }, { "epoch": 0.25, "grad_norm": 1.0825345516204834, "learning_rate": 3.751211944929223e-05, "loss": 5.3898, "step": 2576 }, { "epoch": 0.25, "grad_norm": 1.0362260341644287, "learning_rate": 3.7492728330424664e-05, "loss": 5.4427, "step": 2580 }, { "epoch": 0.25, "grad_norm": 0.8955732583999634, "learning_rate": 3.747333721155711e-05, "loss": 5.5447, "step": 2584 }, { "epoch": 0.25, "grad_norm": 1.0196340084075928, "learning_rate": 3.745394609268955e-05, "loss": 5.5636, "step": 2588 }, { "epoch": 0.25, "grad_norm": 0.9525064826011658, "learning_rate": 3.743455497382199e-05, "loss": 5.5, "step": 2592 }, { "epoch": 0.25, "grad_norm": 0.9297643899917603, "learning_rate": 3.741516385495443e-05, "loss": 5.5382, "step": 2596 }, { "epoch": 0.25, "grad_norm": 0.9364489912986755, "learning_rate": 3.739577273608687e-05, "loss": 5.5248, "step": 2600 }, { "epoch": 0.25, "grad_norm": 0.9031673073768616, "learning_rate": 3.7376381617219314e-05, "loss": 5.4689, "step": 2604 }, { "epoch": 0.25, "grad_norm": 1.020928978919983, "learning_rate": 3.7356990498351754e-05, "loss": 5.4896, "step": 2608 }, { "epoch": 0.25, "grad_norm": 0.9407410621643066, "learning_rate": 3.73375993794842e-05, "loss": 5.4259, "step": 2612 }, { "epoch": 0.25, "grad_norm": 1.0018398761749268, "learning_rate": 3.7318208260616635e-05, "loss": 5.5738, "step": 2616 }, { "epoch": 0.25, "grad_norm": 1.0489344596862793, "learning_rate": 3.729881714174908e-05, "loss": 5.404, "step": 2620 }, { "epoch": 0.25, "grad_norm": 0.9759474992752075, "learning_rate": 3.7279426022881516e-05, "loss": 5.4977, "step": 2624 }, { "epoch": 0.25, "grad_norm": 1.0580724477767944, "learning_rate": 3.7260034904013964e-05, "loss": 5.4477, "step": 2628 }, { "epoch": 0.26, "grad_norm": 1.0281325578689575, "learning_rate": 3.7240643785146404e-05, "loss": 5.4882, "step": 2632 }, { "epoch": 0.26, "grad_norm": 0.9635825157165527, "learning_rate": 3.7221252666278845e-05, "loss": 5.4303, "step": 2636 }, { "epoch": 0.26, "grad_norm": 0.9374428391456604, "learning_rate": 3.7201861547411285e-05, "loss": 5.4308, "step": 2640 }, { "epoch": 0.26, "grad_norm": 1.0188990831375122, "learning_rate": 3.718247042854373e-05, "loss": 5.4521, "step": 2644 }, { "epoch": 0.26, "grad_norm": 1.0239014625549316, "learning_rate": 3.7163079309676166e-05, "loss": 5.5686, "step": 2648 }, { "epoch": 0.26, "grad_norm": 0.946735680103302, "learning_rate": 3.7143688190808614e-05, "loss": 5.4329, "step": 2652 }, { "epoch": 0.26, "grad_norm": 0.9370056986808777, "learning_rate": 3.7124297071941054e-05, "loss": 5.5174, "step": 2656 }, { "epoch": 0.26, "grad_norm": 1.003072738647461, "learning_rate": 3.7104905953073495e-05, "loss": 5.3857, "step": 2660 }, { "epoch": 0.26, "grad_norm": 0.9541458487510681, "learning_rate": 3.7085514834205935e-05, "loss": 5.448, "step": 2664 }, { "epoch": 0.26, "grad_norm": 0.9533443450927734, "learning_rate": 3.7066123715338376e-05, "loss": 5.559, "step": 2668 }, { "epoch": 0.26, "grad_norm": 1.0223768949508667, "learning_rate": 3.7046732596470816e-05, "loss": 5.503, "step": 2672 }, { "epoch": 0.26, "grad_norm": 1.2174021005630493, "learning_rate": 3.7027341477603264e-05, "loss": 5.5322, "step": 2676 }, { "epoch": 0.26, "grad_norm": 1.117325782775879, "learning_rate": 3.70079503587357e-05, "loss": 5.4969, "step": 2680 }, { "epoch": 0.26, "grad_norm": 0.9299269318580627, "learning_rate": 3.6988559239868145e-05, "loss": 5.4264, "step": 2684 }, { "epoch": 0.26, "grad_norm": 0.9750757813453674, "learning_rate": 3.6969168121000585e-05, "loss": 5.4953, "step": 2688 }, { "epoch": 0.26, "grad_norm": 0.9810564517974854, "learning_rate": 3.6949777002133026e-05, "loss": 5.4762, "step": 2692 }, { "epoch": 0.26, "grad_norm": 1.0046603679656982, "learning_rate": 3.6930385883265466e-05, "loss": 5.4572, "step": 2696 }, { "epoch": 0.26, "grad_norm": 0.9024963974952698, "learning_rate": 3.691099476439791e-05, "loss": 5.5038, "step": 2700 }, { "epoch": 0.26, "grad_norm": 0.9631572961807251, "learning_rate": 3.689160364553035e-05, "loss": 5.4895, "step": 2704 }, { "epoch": 0.26, "grad_norm": 0.8802670240402222, "learning_rate": 3.6872212526662795e-05, "loss": 5.6245, "step": 2708 }, { "epoch": 0.26, "grad_norm": 0.9694925546646118, "learning_rate": 3.685282140779523e-05, "loss": 5.4813, "step": 2712 }, { "epoch": 0.26, "grad_norm": 1.0332534313201904, "learning_rate": 3.6833430288927676e-05, "loss": 5.5064, "step": 2716 }, { "epoch": 0.26, "grad_norm": 0.9285298585891724, "learning_rate": 3.6814039170060116e-05, "loss": 5.4926, "step": 2720 }, { "epoch": 0.26, "grad_norm": 0.9079506993293762, "learning_rate": 3.679464805119256e-05, "loss": 5.4837, "step": 2724 }, { "epoch": 0.26, "grad_norm": 1.0010629892349243, "learning_rate": 3.6775256932325e-05, "loss": 5.4814, "step": 2728 }, { "epoch": 0.26, "grad_norm": 0.9733301997184753, "learning_rate": 3.675586581345744e-05, "loss": 5.526, "step": 2732 }, { "epoch": 0.27, "grad_norm": 0.9595903158187866, "learning_rate": 3.673647469458988e-05, "loss": 5.3726, "step": 2736 }, { "epoch": 0.27, "grad_norm": 1.0144261121749878, "learning_rate": 3.671708357572232e-05, "loss": 5.4286, "step": 2740 }, { "epoch": 0.27, "grad_norm": 1.0285661220550537, "learning_rate": 3.669769245685476e-05, "loss": 5.5113, "step": 2744 }, { "epoch": 0.27, "grad_norm": 0.9807763695716858, "learning_rate": 3.66783013379872e-05, "loss": 5.5956, "step": 2748 }, { "epoch": 0.27, "grad_norm": 1.0480782985687256, "learning_rate": 3.665891021911965e-05, "loss": 5.4205, "step": 2752 }, { "epoch": 0.27, "grad_norm": 0.8527302145957947, "learning_rate": 3.663951910025208e-05, "loss": 5.5106, "step": 2756 }, { "epoch": 0.27, "grad_norm": 0.9895337224006653, "learning_rate": 3.662012798138453e-05, "loss": 5.517, "step": 2760 }, { "epoch": 0.27, "grad_norm": 1.005570888519287, "learning_rate": 3.660073686251697e-05, "loss": 5.4432, "step": 2764 }, { "epoch": 0.27, "grad_norm": 1.0083740949630737, "learning_rate": 3.658134574364941e-05, "loss": 5.3863, "step": 2768 }, { "epoch": 0.27, "grad_norm": 0.9431845545768738, "learning_rate": 3.656195462478185e-05, "loss": 5.3734, "step": 2772 }, { "epoch": 0.27, "grad_norm": 0.9629083871841431, "learning_rate": 3.65425635059143e-05, "loss": 5.4737, "step": 2776 }, { "epoch": 0.27, "grad_norm": 0.9649605751037598, "learning_rate": 3.652317238704673e-05, "loss": 5.4022, "step": 2780 }, { "epoch": 0.27, "grad_norm": 0.9746363162994385, "learning_rate": 3.650378126817918e-05, "loss": 5.4826, "step": 2784 }, { "epoch": 0.27, "grad_norm": 0.9153027534484863, "learning_rate": 3.648439014931161e-05, "loss": 5.5833, "step": 2788 }, { "epoch": 0.27, "grad_norm": 0.940949559211731, "learning_rate": 3.646499903044406e-05, "loss": 5.4852, "step": 2792 }, { "epoch": 0.27, "grad_norm": 0.9482103586196899, "learning_rate": 3.64456079115765e-05, "loss": 5.4711, "step": 2796 }, { "epoch": 0.27, "grad_norm": 1.0848538875579834, "learning_rate": 3.642621679270894e-05, "loss": 5.464, "step": 2800 }, { "epoch": 0.27, "grad_norm": 0.9354459643363953, "learning_rate": 3.640682567384138e-05, "loss": 5.4495, "step": 2804 }, { "epoch": 0.27, "grad_norm": 0.9546772837638855, "learning_rate": 3.638743455497383e-05, "loss": 5.4562, "step": 2808 }, { "epoch": 0.27, "grad_norm": 0.9387646317481995, "learning_rate": 3.636804343610626e-05, "loss": 5.4439, "step": 2812 }, { "epoch": 0.27, "grad_norm": 0.9842014312744141, "learning_rate": 3.634865231723871e-05, "loss": 5.3971, "step": 2816 }, { "epoch": 0.27, "grad_norm": 0.9438384175300598, "learning_rate": 3.632926119837114e-05, "loss": 5.4875, "step": 2820 }, { "epoch": 0.27, "grad_norm": 0.998210072517395, "learning_rate": 3.630987007950359e-05, "loss": 5.4301, "step": 2824 }, { "epoch": 0.27, "grad_norm": 0.948137640953064, "learning_rate": 3.629047896063603e-05, "loss": 5.5703, "step": 2828 }, { "epoch": 0.27, "grad_norm": 1.0335065126419067, "learning_rate": 3.627108784176847e-05, "loss": 5.386, "step": 2832 }, { "epoch": 0.27, "grad_norm": 0.9774126410484314, "learning_rate": 3.625169672290091e-05, "loss": 5.4288, "step": 2836 }, { "epoch": 0.28, "grad_norm": 1.168003797531128, "learning_rate": 3.623230560403336e-05, "loss": 5.4524, "step": 2840 }, { "epoch": 0.28, "grad_norm": 1.05010187625885, "learning_rate": 3.621291448516579e-05, "loss": 5.3784, "step": 2844 }, { "epoch": 0.28, "grad_norm": 1.0008686780929565, "learning_rate": 3.619352336629824e-05, "loss": 5.351, "step": 2848 }, { "epoch": 0.28, "grad_norm": 1.0319279432296753, "learning_rate": 3.617413224743068e-05, "loss": 5.5112, "step": 2852 }, { "epoch": 0.28, "grad_norm": 0.9444233775138855, "learning_rate": 3.615474112856312e-05, "loss": 5.4225, "step": 2856 }, { "epoch": 0.28, "grad_norm": 1.0696698427200317, "learning_rate": 3.613535000969556e-05, "loss": 5.5542, "step": 2860 }, { "epoch": 0.28, "grad_norm": 0.9493553042411804, "learning_rate": 3.6115958890828e-05, "loss": 5.42, "step": 2864 }, { "epoch": 0.28, "grad_norm": 1.0606472492218018, "learning_rate": 3.609656777196044e-05, "loss": 5.421, "step": 2868 }, { "epoch": 0.28, "grad_norm": 1.045782208442688, "learning_rate": 3.6077176653092884e-05, "loss": 5.5086, "step": 2872 }, { "epoch": 0.28, "grad_norm": 1.034601092338562, "learning_rate": 3.6057785534225324e-05, "loss": 5.4582, "step": 2876 }, { "epoch": 0.28, "grad_norm": 1.0553306341171265, "learning_rate": 3.6038394415357765e-05, "loss": 5.5501, "step": 2880 }, { "epoch": 0.28, "grad_norm": 1.0070221424102783, "learning_rate": 3.601900329649021e-05, "loss": 5.5016, "step": 2884 }, { "epoch": 0.28, "grad_norm": 1.0406205654144287, "learning_rate": 3.5999612177622646e-05, "loss": 5.4743, "step": 2888 }, { "epoch": 0.28, "grad_norm": 1.0793685913085938, "learning_rate": 3.598022105875509e-05, "loss": 5.4889, "step": 2892 }, { "epoch": 0.28, "grad_norm": 1.1032297611236572, "learning_rate": 3.5960829939887534e-05, "loss": 5.4928, "step": 2896 }, { "epoch": 0.28, "grad_norm": 0.9864259362220764, "learning_rate": 3.5941438821019974e-05, "loss": 5.4582, "step": 2900 }, { "epoch": 0.28, "grad_norm": 0.9251708388328552, "learning_rate": 3.5922047702152415e-05, "loss": 5.4329, "step": 2904 }, { "epoch": 0.28, "grad_norm": 0.9993565082550049, "learning_rate": 3.5902656583284855e-05, "loss": 5.5837, "step": 2908 }, { "epoch": 0.28, "grad_norm": 0.9946919083595276, "learning_rate": 3.5883265464417296e-05, "loss": 5.4545, "step": 2912 }, { "epoch": 0.28, "grad_norm": 0.9719089865684509, "learning_rate": 3.586387434554974e-05, "loss": 5.448, "step": 2916 }, { "epoch": 0.28, "grad_norm": 0.900641143321991, "learning_rate": 3.584448322668218e-05, "loss": 5.5845, "step": 2920 }, { "epoch": 0.28, "grad_norm": 0.9279571771621704, "learning_rate": 3.5825092107814624e-05, "loss": 5.4733, "step": 2924 }, { "epoch": 0.28, "grad_norm": 1.0747668743133545, "learning_rate": 3.5805700988947065e-05, "loss": 5.3805, "step": 2928 }, { "epoch": 0.28, "grad_norm": 0.8932091593742371, "learning_rate": 3.5786309870079505e-05, "loss": 5.4015, "step": 2932 }, { "epoch": 0.28, "grad_norm": 0.9807014465332031, "learning_rate": 3.5766918751211946e-05, "loss": 5.5228, "step": 2936 }, { "epoch": 0.29, "grad_norm": 0.9529114961624146, "learning_rate": 3.5747527632344386e-05, "loss": 5.5328, "step": 2940 }, { "epoch": 0.29, "grad_norm": 0.9890924692153931, "learning_rate": 3.572813651347683e-05, "loss": 5.4686, "step": 2944 }, { "epoch": 0.29, "grad_norm": 0.9855780005455017, "learning_rate": 3.5708745394609274e-05, "loss": 5.4921, "step": 2948 }, { "epoch": 0.29, "grad_norm": 0.9508200287818909, "learning_rate": 3.568935427574171e-05, "loss": 5.5508, "step": 2952 }, { "epoch": 0.29, "grad_norm": 0.9192949533462524, "learning_rate": 3.5669963156874155e-05, "loss": 5.5484, "step": 2956 }, { "epoch": 0.29, "grad_norm": 0.9657400846481323, "learning_rate": 3.5650572038006596e-05, "loss": 5.4305, "step": 2960 }, { "epoch": 0.29, "grad_norm": 1.1960434913635254, "learning_rate": 3.5631180919139036e-05, "loss": 5.4461, "step": 2964 }, { "epoch": 0.29, "grad_norm": 0.9226086735725403, "learning_rate": 3.561178980027148e-05, "loss": 5.4288, "step": 2968 }, { "epoch": 0.29, "grad_norm": 1.0317691564559937, "learning_rate": 3.5592398681403924e-05, "loss": 5.5107, "step": 2972 }, { "epoch": 0.29, "grad_norm": 0.9748375415802002, "learning_rate": 3.557300756253636e-05, "loss": 5.5342, "step": 2976 }, { "epoch": 0.29, "grad_norm": 0.9513313174247742, "learning_rate": 3.5553616443668805e-05, "loss": 5.4337, "step": 2980 }, { "epoch": 0.29, "grad_norm": 0.9269315600395203, "learning_rate": 3.553422532480124e-05, "loss": 5.4853, "step": 2984 }, { "epoch": 0.29, "grad_norm": 1.0136945247650146, "learning_rate": 3.5514834205933686e-05, "loss": 5.4767, "step": 2988 }, { "epoch": 0.29, "grad_norm": 1.049842119216919, "learning_rate": 3.549544308706613e-05, "loss": 5.5028, "step": 2992 }, { "epoch": 0.29, "grad_norm": 0.9470251798629761, "learning_rate": 3.547605196819857e-05, "loss": 5.4391, "step": 2996 }, { "epoch": 0.29, "grad_norm": 1.1014668941497803, "learning_rate": 3.545666084933101e-05, "loss": 5.4564, "step": 3000 }, { "epoch": 0.29, "grad_norm": 1.0358186960220337, "learning_rate": 3.543726973046345e-05, "loss": 5.5398, "step": 3004 }, { "epoch": 0.29, "grad_norm": 1.0642112493515015, "learning_rate": 3.541787861159589e-05, "loss": 5.4548, "step": 3008 }, { "epoch": 0.29, "grad_norm": 0.9606940150260925, "learning_rate": 3.539848749272833e-05, "loss": 5.5331, "step": 3012 }, { "epoch": 0.29, "grad_norm": 1.0315513610839844, "learning_rate": 3.537909637386078e-05, "loss": 5.476, "step": 3016 }, { "epoch": 0.29, "grad_norm": 1.0335862636566162, "learning_rate": 3.535970525499321e-05, "loss": 5.3247, "step": 3020 }, { "epoch": 0.29, "grad_norm": 1.0279650688171387, "learning_rate": 3.534031413612566e-05, "loss": 5.425, "step": 3024 }, { "epoch": 0.29, "grad_norm": 0.9862622022628784, "learning_rate": 3.532092301725809e-05, "loss": 5.5486, "step": 3028 }, { "epoch": 0.29, "grad_norm": 1.0324090719223022, "learning_rate": 3.530153189839054e-05, "loss": 5.546, "step": 3032 }, { "epoch": 0.29, "grad_norm": 1.0767823457717896, "learning_rate": 3.528214077952298e-05, "loss": 5.4906, "step": 3036 }, { "epoch": 0.29, "grad_norm": 0.9795736074447632, "learning_rate": 3.526274966065542e-05, "loss": 5.4132, "step": 3040 }, { "epoch": 0.3, "grad_norm": 0.9786263704299927, "learning_rate": 3.524335854178786e-05, "loss": 5.4225, "step": 3044 }, { "epoch": 0.3, "grad_norm": 0.9306228756904602, "learning_rate": 3.522396742292031e-05, "loss": 5.5407, "step": 3048 }, { "epoch": 0.3, "grad_norm": 0.9735816121101379, "learning_rate": 3.520457630405274e-05, "loss": 5.5262, "step": 3052 }, { "epoch": 0.3, "grad_norm": 0.9719963073730469, "learning_rate": 3.518518518518519e-05, "loss": 5.4489, "step": 3056 }, { "epoch": 0.3, "grad_norm": 0.9754818081855774, "learning_rate": 3.516579406631762e-05, "loss": 5.4656, "step": 3060 }, { "epoch": 0.3, "grad_norm": 1.0036709308624268, "learning_rate": 3.514640294745007e-05, "loss": 5.4314, "step": 3064 }, { "epoch": 0.3, "grad_norm": 0.956697940826416, "learning_rate": 3.512701182858251e-05, "loss": 5.4755, "step": 3068 }, { "epoch": 0.3, "grad_norm": 1.1752293109893799, "learning_rate": 3.510762070971495e-05, "loss": 5.4411, "step": 3072 }, { "epoch": 0.3, "grad_norm": 0.9563004374504089, "learning_rate": 3.508822959084739e-05, "loss": 5.3531, "step": 3076 }, { "epoch": 0.3, "grad_norm": 1.0313175916671753, "learning_rate": 3.506883847197984e-05, "loss": 5.477, "step": 3080 }, { "epoch": 0.3, "grad_norm": 0.997872531414032, "learning_rate": 3.504944735311227e-05, "loss": 5.4479, "step": 3084 }, { "epoch": 0.3, "grad_norm": 0.9374060034751892, "learning_rate": 3.503005623424472e-05, "loss": 5.4492, "step": 3088 }, { "epoch": 0.3, "grad_norm": 0.9899947643280029, "learning_rate": 3.501066511537716e-05, "loss": 5.4255, "step": 3092 }, { "epoch": 0.3, "grad_norm": 1.0138983726501465, "learning_rate": 3.49912739965096e-05, "loss": 5.4234, "step": 3096 }, { "epoch": 0.3, "grad_norm": 0.9544614553451538, "learning_rate": 3.497188287764204e-05, "loss": 5.4781, "step": 3100 }, { "epoch": 0.3, "grad_norm": 1.0117629766464233, "learning_rate": 3.495249175877448e-05, "loss": 5.4445, "step": 3104 }, { "epoch": 0.3, "grad_norm": 1.0312600135803223, "learning_rate": 3.493310063990692e-05, "loss": 5.439, "step": 3108 }, { "epoch": 0.3, "grad_norm": 0.9983291029930115, "learning_rate": 3.491370952103937e-05, "loss": 5.5003, "step": 3112 }, { "epoch": 0.3, "grad_norm": 1.0281239748001099, "learning_rate": 3.4894318402171804e-05, "loss": 5.4276, "step": 3116 }, { "epoch": 0.3, "grad_norm": 0.9426625370979309, "learning_rate": 3.487492728330425e-05, "loss": 5.5267, "step": 3120 }, { "epoch": 0.3, "grad_norm": 1.0768100023269653, "learning_rate": 3.485553616443669e-05, "loss": 5.5023, "step": 3124 }, { "epoch": 0.3, "grad_norm": 1.0463875532150269, "learning_rate": 3.483614504556913e-05, "loss": 5.4019, "step": 3128 }, { "epoch": 0.3, "grad_norm": 0.9380079507827759, "learning_rate": 3.481675392670157e-05, "loss": 5.5364, "step": 3132 }, { "epoch": 0.3, "grad_norm": 0.9299972653388977, "learning_rate": 3.4797362807834013e-05, "loss": 5.4026, "step": 3136 }, { "epoch": 0.3, "grad_norm": 0.9217830300331116, "learning_rate": 3.4777971688966454e-05, "loss": 5.6161, "step": 3140 }, { "epoch": 0.3, "grad_norm": 0.983069121837616, "learning_rate": 3.4758580570098894e-05, "loss": 5.4112, "step": 3144 }, { "epoch": 0.31, "grad_norm": 1.0096659660339355, "learning_rate": 3.4739189451231335e-05, "loss": 5.4705, "step": 3148 }, { "epoch": 0.31, "grad_norm": 0.9388656616210938, "learning_rate": 3.4719798332363776e-05, "loss": 5.4713, "step": 3152 }, { "epoch": 0.31, "grad_norm": 1.0393484830856323, "learning_rate": 3.470040721349622e-05, "loss": 5.4147, "step": 3156 }, { "epoch": 0.31, "grad_norm": 1.001868724822998, "learning_rate": 3.468101609462866e-05, "loss": 5.3701, "step": 3160 }, { "epoch": 0.31, "grad_norm": 0.9857000708580017, "learning_rate": 3.4661624975761104e-05, "loss": 5.4891, "step": 3164 }, { "epoch": 0.31, "grad_norm": 1.0586354732513428, "learning_rate": 3.4642233856893544e-05, "loss": 5.4801, "step": 3168 }, { "epoch": 0.31, "grad_norm": 1.0346976518630981, "learning_rate": 3.4622842738025985e-05, "loss": 5.4555, "step": 3172 }, { "epoch": 0.31, "grad_norm": 0.9425565004348755, "learning_rate": 3.4603451619158426e-05, "loss": 5.4126, "step": 3176 }, { "epoch": 0.31, "grad_norm": 0.984109103679657, "learning_rate": 3.4584060500290866e-05, "loss": 5.3515, "step": 3180 }, { "epoch": 0.31, "grad_norm": 1.0639657974243164, "learning_rate": 3.456466938142331e-05, "loss": 5.3965, "step": 3184 }, { "epoch": 0.31, "grad_norm": 0.9687911868095398, "learning_rate": 3.4545278262555754e-05, "loss": 5.4377, "step": 3188 }, { "epoch": 0.31, "grad_norm": 1.1144814491271973, "learning_rate": 3.452588714368819e-05, "loss": 5.5135, "step": 3192 }, { "epoch": 0.31, "grad_norm": 0.94063401222229, "learning_rate": 3.4506496024820635e-05, "loss": 5.4472, "step": 3196 }, { "epoch": 0.31, "grad_norm": 0.9204466938972473, "learning_rate": 3.4487104905953076e-05, "loss": 5.3214, "step": 3200 }, { "epoch": 0.31, "grad_norm": 1.0223456621170044, "learning_rate": 3.4467713787085516e-05, "loss": 5.5622, "step": 3204 }, { "epoch": 0.31, "grad_norm": 0.9197329878807068, "learning_rate": 3.444832266821796e-05, "loss": 5.4799, "step": 3208 }, { "epoch": 0.31, "grad_norm": 0.9634442329406738, "learning_rate": 3.4428931549350404e-05, "loss": 5.3426, "step": 3212 }, { "epoch": 0.31, "grad_norm": 1.0423585176467896, "learning_rate": 3.440954043048284e-05, "loss": 5.4419, "step": 3216 }, { "epoch": 0.31, "grad_norm": 1.0101354122161865, "learning_rate": 3.4390149311615285e-05, "loss": 5.4811, "step": 3220 }, { "epoch": 0.31, "grad_norm": 1.0531187057495117, "learning_rate": 3.437075819274772e-05, "loss": 5.4645, "step": 3224 }, { "epoch": 0.31, "grad_norm": 1.0495266914367676, "learning_rate": 3.4351367073880166e-05, "loss": 5.4345, "step": 3228 }, { "epoch": 0.31, "grad_norm": 1.029811978340149, "learning_rate": 3.433197595501261e-05, "loss": 5.4607, "step": 3232 }, { "epoch": 0.31, "grad_norm": 0.9279022812843323, "learning_rate": 3.431258483614505e-05, "loss": 5.4771, "step": 3236 }, { "epoch": 0.31, "grad_norm": 1.026243805885315, "learning_rate": 3.429319371727749e-05, "loss": 5.4841, "step": 3240 }, { "epoch": 0.31, "grad_norm": 0.9428079724311829, "learning_rate": 3.4273802598409935e-05, "loss": 5.5417, "step": 3244 }, { "epoch": 0.31, "grad_norm": 0.9030874967575073, "learning_rate": 3.425441147954237e-05, "loss": 5.443, "step": 3248 }, { "epoch": 0.32, "grad_norm": 0.981732189655304, "learning_rate": 3.4235020360674816e-05, "loss": 5.5341, "step": 3252 }, { "epoch": 0.32, "grad_norm": 0.9747270941734314, "learning_rate": 3.421562924180726e-05, "loss": 5.4236, "step": 3256 }, { "epoch": 0.32, "grad_norm": 0.9781522154808044, "learning_rate": 3.41962381229397e-05, "loss": 5.4404, "step": 3260 }, { "epoch": 0.32, "grad_norm": 0.9788567423820496, "learning_rate": 3.417684700407214e-05, "loss": 5.3855, "step": 3264 }, { "epoch": 0.32, "grad_norm": 0.9978493452072144, "learning_rate": 3.415745588520458e-05, "loss": 5.3939, "step": 3268 }, { "epoch": 0.32, "grad_norm": 1.0338048934936523, "learning_rate": 3.413806476633702e-05, "loss": 5.464, "step": 3272 }, { "epoch": 0.32, "grad_norm": 0.9052521586418152, "learning_rate": 3.411867364746946e-05, "loss": 5.3242, "step": 3276 }, { "epoch": 0.32, "grad_norm": 0.9726389050483704, "learning_rate": 3.40992825286019e-05, "loss": 5.4116, "step": 3280 }, { "epoch": 0.32, "grad_norm": 0.987234354019165, "learning_rate": 3.407989140973434e-05, "loss": 5.506, "step": 3284 }, { "epoch": 0.32, "grad_norm": 1.0017744302749634, "learning_rate": 3.406050029086679e-05, "loss": 5.5025, "step": 3288 }, { "epoch": 0.32, "grad_norm": 0.9768481850624084, "learning_rate": 3.404110917199922e-05, "loss": 5.484, "step": 3292 }, { "epoch": 0.32, "grad_norm": 0.9455767273902893, "learning_rate": 3.402171805313167e-05, "loss": 5.4146, "step": 3296 }, { "epoch": 0.32, "grad_norm": 0.9832062721252441, "learning_rate": 3.400232693426411e-05, "loss": 5.4591, "step": 3300 }, { "epoch": 0.32, "grad_norm": 0.8915082216262817, "learning_rate": 3.398293581539655e-05, "loss": 5.3985, "step": 3304 }, { "epoch": 0.32, "grad_norm": 1.0324420928955078, "learning_rate": 3.396354469652899e-05, "loss": 5.5157, "step": 3308 }, { "epoch": 0.32, "grad_norm": 1.0704431533813477, "learning_rate": 3.394415357766143e-05, "loss": 5.4155, "step": 3312 }, { "epoch": 0.32, "grad_norm": 0.9540812373161316, "learning_rate": 3.392476245879387e-05, "loss": 5.418, "step": 3316 }, { "epoch": 0.32, "grad_norm": 1.0278005599975586, "learning_rate": 3.390537133992632e-05, "loss": 5.4143, "step": 3320 }, { "epoch": 0.32, "grad_norm": 1.0574851036071777, "learning_rate": 3.388598022105875e-05, "loss": 5.3889, "step": 3324 }, { "epoch": 0.32, "grad_norm": 1.1078550815582275, "learning_rate": 3.38665891021912e-05, "loss": 5.4035, "step": 3328 }, { "epoch": 0.32, "grad_norm": 0.9876176714897156, "learning_rate": 3.384719798332364e-05, "loss": 5.4488, "step": 3332 }, { "epoch": 0.32, "grad_norm": 0.9665130376815796, "learning_rate": 3.382780686445608e-05, "loss": 5.3989, "step": 3336 }, { "epoch": 0.32, "grad_norm": 1.0209985971450806, "learning_rate": 3.380841574558852e-05, "loss": 5.4641, "step": 3340 }, { "epoch": 0.32, "grad_norm": 0.920693039894104, "learning_rate": 3.378902462672096e-05, "loss": 5.3932, "step": 3344 }, { "epoch": 0.32, "grad_norm": 1.0622704029083252, "learning_rate": 3.37696335078534e-05, "loss": 5.3708, "step": 3348 }, { "epoch": 0.32, "grad_norm": 1.018336296081543, "learning_rate": 3.375024238898585e-05, "loss": 5.4366, "step": 3352 }, { "epoch": 0.33, "grad_norm": 0.9588587284088135, "learning_rate": 3.3730851270118284e-05, "loss": 5.4806, "step": 3356 }, { "epoch": 0.33, "grad_norm": 0.9901473522186279, "learning_rate": 3.371146015125073e-05, "loss": 5.4993, "step": 3360 }, { "epoch": 0.33, "grad_norm": 1.0226725339889526, "learning_rate": 3.369206903238317e-05, "loss": 5.4429, "step": 3364 }, { "epoch": 0.33, "grad_norm": 1.0187616348266602, "learning_rate": 3.367267791351561e-05, "loss": 5.4496, "step": 3368 }, { "epoch": 0.33, "grad_norm": 0.9235848188400269, "learning_rate": 3.365328679464805e-05, "loss": 5.4033, "step": 3372 }, { "epoch": 0.33, "grad_norm": 0.9983669519424438, "learning_rate": 3.36338956757805e-05, "loss": 5.3515, "step": 3376 }, { "epoch": 0.33, "grad_norm": 0.9427633285522461, "learning_rate": 3.3614504556912934e-05, "loss": 5.5194, "step": 3380 }, { "epoch": 0.33, "grad_norm": 0.9658553004264832, "learning_rate": 3.359511343804538e-05, "loss": 5.489, "step": 3384 }, { "epoch": 0.33, "grad_norm": 0.9263963103294373, "learning_rate": 3.3575722319177815e-05, "loss": 5.4182, "step": 3388 }, { "epoch": 0.33, "grad_norm": 1.0251826047897339, "learning_rate": 3.355633120031026e-05, "loss": 5.5208, "step": 3392 }, { "epoch": 0.33, "grad_norm": 0.9037973880767822, "learning_rate": 3.35369400814427e-05, "loss": 5.3744, "step": 3396 }, { "epoch": 0.33, "grad_norm": 1.096735954284668, "learning_rate": 3.351754896257514e-05, "loss": 5.4218, "step": 3400 }, { "epoch": 0.33, "grad_norm": 0.9776617884635925, "learning_rate": 3.3498157843707584e-05, "loss": 5.4539, "step": 3404 }, { "epoch": 0.33, "grad_norm": 1.0098875761032104, "learning_rate": 3.3478766724840024e-05, "loss": 5.4252, "step": 3408 }, { "epoch": 0.33, "grad_norm": 1.0725504159927368, "learning_rate": 3.3459375605972465e-05, "loss": 5.4702, "step": 3412 }, { "epoch": 0.33, "grad_norm": 0.8836826086044312, "learning_rate": 3.3439984487104905e-05, "loss": 5.4135, "step": 3416 }, { "epoch": 0.33, "grad_norm": 0.9869784116744995, "learning_rate": 3.3420593368237346e-05, "loss": 5.4617, "step": 3420 }, { "epoch": 0.33, "grad_norm": 1.0457979440689087, "learning_rate": 3.3401202249369786e-05, "loss": 5.4872, "step": 3424 }, { "epoch": 0.33, "grad_norm": 1.0574203729629517, "learning_rate": 3.3381811130502234e-05, "loss": 5.4167, "step": 3428 }, { "epoch": 0.33, "grad_norm": 1.001185655593872, "learning_rate": 3.3362420011634674e-05, "loss": 5.4038, "step": 3432 }, { "epoch": 0.33, "grad_norm": 1.0334811210632324, "learning_rate": 3.3343028892767115e-05, "loss": 5.3715, "step": 3436 }, { "epoch": 0.33, "grad_norm": 1.0406103134155273, "learning_rate": 3.3323637773899555e-05, "loss": 5.4744, "step": 3440 }, { "epoch": 0.33, "grad_norm": 0.9958846569061279, "learning_rate": 3.3304246655031996e-05, "loss": 5.4423, "step": 3444 }, { "epoch": 0.33, "grad_norm": 0.9611808657646179, "learning_rate": 3.3284855536164436e-05, "loss": 5.4374, "step": 3448 }, { "epoch": 0.33, "grad_norm": 1.0334917306900024, "learning_rate": 3.3265464417296884e-05, "loss": 5.3888, "step": 3452 }, { "epoch": 0.34, "grad_norm": 0.9894860982894897, "learning_rate": 3.324607329842932e-05, "loss": 5.5356, "step": 3456 }, { "epoch": 0.34, "grad_norm": 0.9536516070365906, "learning_rate": 3.3226682179561765e-05, "loss": 5.451, "step": 3460 }, { "epoch": 0.34, "grad_norm": 1.0449178218841553, "learning_rate": 3.32072910606942e-05, "loss": 5.4379, "step": 3464 }, { "epoch": 0.34, "grad_norm": 0.958135724067688, "learning_rate": 3.3187899941826646e-05, "loss": 5.394, "step": 3468 }, { "epoch": 0.34, "grad_norm": 1.049261212348938, "learning_rate": 3.3168508822959086e-05, "loss": 5.4298, "step": 3472 }, { "epoch": 0.34, "grad_norm": 1.018259048461914, "learning_rate": 3.314911770409153e-05, "loss": 5.3572, "step": 3476 }, { "epoch": 0.34, "grad_norm": 0.969048798084259, "learning_rate": 3.312972658522397e-05, "loss": 5.4854, "step": 3480 }, { "epoch": 0.34, "grad_norm": 1.0267409086227417, "learning_rate": 3.3110335466356415e-05, "loss": 5.4474, "step": 3484 }, { "epoch": 0.34, "grad_norm": 0.9933199286460876, "learning_rate": 3.309094434748885e-05, "loss": 5.4446, "step": 3488 }, { "epoch": 0.34, "grad_norm": 1.0801371335983276, "learning_rate": 3.3071553228621296e-05, "loss": 5.4434, "step": 3492 }, { "epoch": 0.34, "grad_norm": 0.953080415725708, "learning_rate": 3.3052162109753736e-05, "loss": 5.4595, "step": 3496 }, { "epoch": 0.34, "grad_norm": 0.9589456915855408, "learning_rate": 3.303277099088618e-05, "loss": 5.433, "step": 3500 }, { "epoch": 0.34, "grad_norm": 0.9284895658493042, "learning_rate": 3.301337987201862e-05, "loss": 5.4761, "step": 3504 }, { "epoch": 0.34, "grad_norm": 0.9796357154846191, "learning_rate": 3.299398875315106e-05, "loss": 5.3998, "step": 3508 }, { "epoch": 0.34, "grad_norm": 0.9407968521118164, "learning_rate": 3.29745976342835e-05, "loss": 5.3964, "step": 3512 }, { "epoch": 0.34, "grad_norm": 1.0362569093704224, "learning_rate": 3.2955206515415946e-05, "loss": 5.5405, "step": 3516 }, { "epoch": 0.34, "grad_norm": 1.024808645248413, "learning_rate": 3.293581539654838e-05, "loss": 5.3888, "step": 3520 }, { "epoch": 0.34, "grad_norm": 1.185713291168213, "learning_rate": 3.291642427768083e-05, "loss": 5.3961, "step": 3524 }, { "epoch": 0.34, "grad_norm": 0.9882110357284546, "learning_rate": 3.289703315881327e-05, "loss": 5.4918, "step": 3528 }, { "epoch": 0.34, "grad_norm": 0.9622325897216797, "learning_rate": 3.287764203994571e-05, "loss": 5.409, "step": 3532 }, { "epoch": 0.34, "grad_norm": 0.9925034642219543, "learning_rate": 3.285825092107815e-05, "loss": 5.4252, "step": 3536 }, { "epoch": 0.34, "grad_norm": 1.0400789976119995, "learning_rate": 3.283885980221059e-05, "loss": 5.4415, "step": 3540 }, { "epoch": 0.34, "grad_norm": 1.0452476739883423, "learning_rate": 3.281946868334303e-05, "loss": 5.4926, "step": 3544 }, { "epoch": 0.34, "grad_norm": 1.106879472732544, "learning_rate": 3.280007756447547e-05, "loss": 5.3975, "step": 3548 }, { "epoch": 0.34, "grad_norm": 0.9826605319976807, "learning_rate": 3.278068644560791e-05, "loss": 5.5062, "step": 3552 }, { "epoch": 0.34, "grad_norm": 1.0423948764801025, "learning_rate": 3.276129532674035e-05, "loss": 5.4536, "step": 3556 }, { "epoch": 0.35, "grad_norm": 1.0544408559799194, "learning_rate": 3.27419042078728e-05, "loss": 5.4063, "step": 3560 }, { "epoch": 0.35, "grad_norm": 0.9723476767539978, "learning_rate": 3.272251308900524e-05, "loss": 5.5508, "step": 3564 }, { "epoch": 0.35, "grad_norm": 1.0550904273986816, "learning_rate": 3.270312197013768e-05, "loss": 5.3544, "step": 3568 }, { "epoch": 0.35, "grad_norm": 1.0153075456619263, "learning_rate": 3.268373085127012e-05, "loss": 5.4197, "step": 3572 }, { "epoch": 0.35, "grad_norm": 0.9704105854034424, "learning_rate": 3.266433973240256e-05, "loss": 5.4494, "step": 3576 }, { "epoch": 0.35, "grad_norm": 1.03487229347229, "learning_rate": 3.2644948613535e-05, "loss": 5.4456, "step": 3580 }, { "epoch": 0.35, "grad_norm": 0.9594029784202576, "learning_rate": 3.262555749466744e-05, "loss": 5.4308, "step": 3584 }, { "epoch": 0.35, "grad_norm": 0.9959999918937683, "learning_rate": 3.260616637579988e-05, "loss": 5.4625, "step": 3588 }, { "epoch": 0.35, "grad_norm": 1.0654551982879639, "learning_rate": 3.258677525693233e-05, "loss": 5.4254, "step": 3592 }, { "epoch": 0.35, "grad_norm": 0.9292247295379639, "learning_rate": 3.256738413806476e-05, "loss": 5.4191, "step": 3596 }, { "epoch": 0.35, "grad_norm": 0.9685704708099365, "learning_rate": 3.254799301919721e-05, "loss": 5.4356, "step": 3600 }, { "epoch": 0.35, "grad_norm": 1.0221501588821411, "learning_rate": 3.252860190032965e-05, "loss": 5.3416, "step": 3604 }, { "epoch": 0.35, "grad_norm": 0.9916023015975952, "learning_rate": 3.250921078146209e-05, "loss": 5.5014, "step": 3608 }, { "epoch": 0.35, "grad_norm": 0.9550511240959167, "learning_rate": 3.248981966259453e-05, "loss": 5.4269, "step": 3612 }, { "epoch": 0.35, "grad_norm": 1.0043843984603882, "learning_rate": 3.247042854372698e-05, "loss": 5.4404, "step": 3616 }, { "epoch": 0.35, "grad_norm": 0.9808074235916138, "learning_rate": 3.245103742485941e-05, "loss": 5.4492, "step": 3620 }, { "epoch": 0.35, "grad_norm": 0.9921736121177673, "learning_rate": 3.243164630599186e-05, "loss": 5.358, "step": 3624 }, { "epoch": 0.35, "grad_norm": 1.0501856803894043, "learning_rate": 3.2412255187124294e-05, "loss": 5.3826, "step": 3628 }, { "epoch": 0.35, "grad_norm": 1.0625114440917969, "learning_rate": 3.239286406825674e-05, "loss": 5.3954, "step": 3632 }, { "epoch": 0.35, "grad_norm": 0.9541780948638916, "learning_rate": 3.237347294938918e-05, "loss": 5.4487, "step": 3636 }, { "epoch": 0.35, "grad_norm": 1.0044699907302856, "learning_rate": 3.235408183052162e-05, "loss": 5.4098, "step": 3640 }, { "epoch": 0.35, "grad_norm": 0.9578327536582947, "learning_rate": 3.233469071165406e-05, "loss": 5.4887, "step": 3644 }, { "epoch": 0.35, "grad_norm": 0.9381611347198486, "learning_rate": 3.231529959278651e-05, "loss": 5.3075, "step": 3648 }, { "epoch": 0.35, "grad_norm": 0.9474562406539917, "learning_rate": 3.2295908473918944e-05, "loss": 5.6028, "step": 3652 }, { "epoch": 0.35, "grad_norm": 0.9133497476577759, "learning_rate": 3.227651735505139e-05, "loss": 5.2818, "step": 3656 }, { "epoch": 0.35, "grad_norm": 1.0017893314361572, "learning_rate": 3.2257126236183825e-05, "loss": 5.4843, "step": 3660 }, { "epoch": 0.36, "grad_norm": 1.0744807720184326, "learning_rate": 3.223773511731627e-05, "loss": 5.3414, "step": 3664 }, { "epoch": 0.36, "grad_norm": 0.9486330151557922, "learning_rate": 3.221834399844871e-05, "loss": 5.4186, "step": 3668 }, { "epoch": 0.36, "grad_norm": 0.9700675010681152, "learning_rate": 3.2198952879581154e-05, "loss": 5.3885, "step": 3672 }, { "epoch": 0.36, "grad_norm": 1.0845935344696045, "learning_rate": 3.2179561760713594e-05, "loss": 5.4627, "step": 3676 }, { "epoch": 0.36, "grad_norm": 1.0409663915634155, "learning_rate": 3.2160170641846035e-05, "loss": 5.4881, "step": 3680 }, { "epoch": 0.36, "grad_norm": 0.9628760814666748, "learning_rate": 3.2140779522978475e-05, "loss": 5.4565, "step": 3684 }, { "epoch": 0.36, "grad_norm": 0.948780357837677, "learning_rate": 3.2121388404110916e-05, "loss": 5.4444, "step": 3688 }, { "epoch": 0.36, "grad_norm": 0.9025591015815735, "learning_rate": 3.210199728524336e-05, "loss": 5.329, "step": 3692 }, { "epoch": 0.36, "grad_norm": 1.059144377708435, "learning_rate": 3.2082606166375804e-05, "loss": 5.4179, "step": 3696 }, { "epoch": 0.36, "grad_norm": 0.9723076820373535, "learning_rate": 3.2063215047508244e-05, "loss": 5.3901, "step": 3700 }, { "epoch": 0.36, "grad_norm": 1.0358588695526123, "learning_rate": 3.2043823928640685e-05, "loss": 5.4644, "step": 3704 }, { "epoch": 0.36, "grad_norm": 1.0152002573013306, "learning_rate": 3.2024432809773125e-05, "loss": 5.5068, "step": 3708 }, { "epoch": 0.36, "grad_norm": 1.0357673168182373, "learning_rate": 3.2005041690905566e-05, "loss": 5.5428, "step": 3712 }, { "epoch": 0.36, "grad_norm": 1.0267854928970337, "learning_rate": 3.1985650572038006e-05, "loss": 5.4718, "step": 3716 }, { "epoch": 0.36, "grad_norm": 0.9849284887313843, "learning_rate": 3.196625945317045e-05, "loss": 5.3864, "step": 3720 }, { "epoch": 0.36, "grad_norm": 1.0108904838562012, "learning_rate": 3.1946868334302894e-05, "loss": 5.4425, "step": 3724 }, { "epoch": 0.36, "grad_norm": 1.0127886533737183, "learning_rate": 3.192747721543533e-05, "loss": 5.3571, "step": 3728 }, { "epoch": 0.36, "grad_norm": 0.9837380051612854, "learning_rate": 3.1908086096567775e-05, "loss": 5.4701, "step": 3732 }, { "epoch": 0.36, "grad_norm": 1.0696730613708496, "learning_rate": 3.1888694977700216e-05, "loss": 5.5145, "step": 3736 }, { "epoch": 0.36, "grad_norm": 0.9534905552864075, "learning_rate": 3.1869303858832656e-05, "loss": 5.3795, "step": 3740 }, { "epoch": 0.36, "grad_norm": 1.048189401626587, "learning_rate": 3.18499127399651e-05, "loss": 5.3971, "step": 3744 }, { "epoch": 0.36, "grad_norm": 1.0402449369430542, "learning_rate": 3.183052162109754e-05, "loss": 5.4111, "step": 3748 }, { "epoch": 0.36, "grad_norm": 1.022661805152893, "learning_rate": 3.181113050222998e-05, "loss": 5.4513, "step": 3752 }, { "epoch": 0.36, "grad_norm": 0.9560799598693848, "learning_rate": 3.1791739383362425e-05, "loss": 5.3978, "step": 3756 }, { "epoch": 0.36, "grad_norm": 0.9077816605567932, "learning_rate": 3.177234826449486e-05, "loss": 5.3508, "step": 3760 }, { "epoch": 0.36, "grad_norm": 1.029691219329834, "learning_rate": 3.1752957145627306e-05, "loss": 5.3808, "step": 3764 }, { "epoch": 0.37, "grad_norm": 0.9867807626724243, "learning_rate": 3.173356602675975e-05, "loss": 5.4433, "step": 3768 }, { "epoch": 0.37, "grad_norm": 1.0569761991500854, "learning_rate": 3.171417490789219e-05, "loss": 5.4479, "step": 3772 }, { "epoch": 0.37, "grad_norm": 1.0014142990112305, "learning_rate": 3.169478378902463e-05, "loss": 5.3415, "step": 3776 }, { "epoch": 0.37, "grad_norm": 0.9502798318862915, "learning_rate": 3.167539267015707e-05, "loss": 5.4549, "step": 3780 }, { "epoch": 0.37, "grad_norm": 0.9451482892036438, "learning_rate": 3.165600155128951e-05, "loss": 5.43, "step": 3784 }, { "epoch": 0.37, "grad_norm": 1.002482295036316, "learning_rate": 3.1636610432421956e-05, "loss": 5.4482, "step": 3788 }, { "epoch": 0.37, "grad_norm": 1.0348658561706543, "learning_rate": 3.161721931355439e-05, "loss": 5.471, "step": 3792 }, { "epoch": 0.37, "grad_norm": 0.9318069815635681, "learning_rate": 3.159782819468684e-05, "loss": 5.3646, "step": 3796 }, { "epoch": 0.37, "grad_norm": 1.1141440868377686, "learning_rate": 3.157843707581928e-05, "loss": 5.4423, "step": 3800 }, { "epoch": 0.37, "grad_norm": 1.066623568534851, "learning_rate": 3.155904595695172e-05, "loss": 5.3612, "step": 3804 }, { "epoch": 0.37, "grad_norm": 1.0264089107513428, "learning_rate": 3.153965483808416e-05, "loss": 5.383, "step": 3808 }, { "epoch": 0.37, "grad_norm": 1.0358729362487793, "learning_rate": 3.15202637192166e-05, "loss": 5.5152, "step": 3812 }, { "epoch": 0.37, "grad_norm": 0.9304607510566711, "learning_rate": 3.150087260034904e-05, "loss": 5.3978, "step": 3816 }, { "epoch": 0.37, "grad_norm": 1.0691871643066406, "learning_rate": 3.148148148148148e-05, "loss": 5.5089, "step": 3820 }, { "epoch": 0.37, "grad_norm": 1.0088552236557007, "learning_rate": 3.146209036261392e-05, "loss": 5.3989, "step": 3824 }, { "epoch": 0.37, "grad_norm": 0.9631534218788147, "learning_rate": 3.144269924374637e-05, "loss": 5.3578, "step": 3828 }, { "epoch": 0.37, "grad_norm": 0.964043378829956, "learning_rate": 3.142330812487881e-05, "loss": 5.4551, "step": 3832 }, { "epoch": 0.37, "grad_norm": 1.0570262670516968, "learning_rate": 3.140391700601125e-05, "loss": 5.4109, "step": 3836 }, { "epoch": 0.37, "grad_norm": 0.975766658782959, "learning_rate": 3.138452588714369e-05, "loss": 5.4265, "step": 3840 }, { "epoch": 0.37, "grad_norm": 0.9857097268104553, "learning_rate": 3.136513476827613e-05, "loss": 5.4179, "step": 3844 }, { "epoch": 0.37, "grad_norm": 0.9902443885803223, "learning_rate": 3.134574364940857e-05, "loss": 5.3386, "step": 3848 }, { "epoch": 0.37, "grad_norm": 0.974892795085907, "learning_rate": 3.132635253054101e-05, "loss": 5.3901, "step": 3852 }, { "epoch": 0.37, "grad_norm": 0.9388407468795776, "learning_rate": 3.130696141167346e-05, "loss": 5.5098, "step": 3856 }, { "epoch": 0.37, "grad_norm": 1.0491032600402832, "learning_rate": 3.128757029280589e-05, "loss": 5.2734, "step": 3860 }, { "epoch": 0.37, "grad_norm": 0.9660788178443909, "learning_rate": 3.126817917393834e-05, "loss": 5.347, "step": 3864 }, { "epoch": 0.38, "grad_norm": 0.9738529324531555, "learning_rate": 3.1248788055070774e-05, "loss": 5.4238, "step": 3868 }, { "epoch": 0.38, "grad_norm": 0.9557338356971741, "learning_rate": 3.122939693620322e-05, "loss": 5.3881, "step": 3872 }, { "epoch": 0.38, "grad_norm": 1.1574413776397705, "learning_rate": 3.121000581733566e-05, "loss": 5.372, "step": 3876 }, { "epoch": 0.38, "grad_norm": 1.046978235244751, "learning_rate": 3.11906146984681e-05, "loss": 5.3993, "step": 3880 }, { "epoch": 0.38, "grad_norm": 1.0011602640151978, "learning_rate": 3.117122357960054e-05, "loss": 5.3449, "step": 3884 }, { "epoch": 0.38, "grad_norm": 1.0066341161727905, "learning_rate": 3.115183246073299e-05, "loss": 5.5072, "step": 3888 }, { "epoch": 0.38, "grad_norm": 0.9333812594413757, "learning_rate": 3.1132441341865424e-05, "loss": 5.4166, "step": 3892 }, { "epoch": 0.38, "grad_norm": 0.9771477580070496, "learning_rate": 3.111305022299787e-05, "loss": 5.4957, "step": 3896 }, { "epoch": 0.38, "grad_norm": 0.9672766923904419, "learning_rate": 3.1093659104130305e-05, "loss": 5.5576, "step": 3900 }, { "epoch": 0.38, "grad_norm": 1.0771820545196533, "learning_rate": 3.107426798526275e-05, "loss": 5.432, "step": 3904 }, { "epoch": 0.38, "grad_norm": 0.9599045515060425, "learning_rate": 3.105487686639519e-05, "loss": 5.3826, "step": 3908 }, { "epoch": 0.38, "grad_norm": 1.0193896293640137, "learning_rate": 3.1035485747527633e-05, "loss": 5.3933, "step": 3912 }, { "epoch": 0.38, "grad_norm": 0.9940204620361328, "learning_rate": 3.1016094628660074e-05, "loss": 5.4366, "step": 3916 }, { "epoch": 0.38, "grad_norm": 0.9993325471878052, "learning_rate": 3.099670350979252e-05, "loss": 5.3764, "step": 3920 }, { "epoch": 0.38, "grad_norm": 1.0441081523895264, "learning_rate": 3.0977312390924955e-05, "loss": 5.4612, "step": 3924 }, { "epoch": 0.38, "grad_norm": 0.971049964427948, "learning_rate": 3.09579212720574e-05, "loss": 5.365, "step": 3928 }, { "epoch": 0.38, "grad_norm": 0.9899376034736633, "learning_rate": 3.093853015318984e-05, "loss": 5.4119, "step": 3932 }, { "epoch": 0.38, "grad_norm": 0.965894341468811, "learning_rate": 3.0919139034322283e-05, "loss": 5.5259, "step": 3936 }, { "epoch": 0.38, "grad_norm": 1.007863998413086, "learning_rate": 3.0899747915454724e-05, "loss": 5.4325, "step": 3940 }, { "epoch": 0.38, "grad_norm": 1.032435417175293, "learning_rate": 3.0880356796587164e-05, "loss": 5.5254, "step": 3944 }, { "epoch": 0.38, "grad_norm": 0.9465643167495728, "learning_rate": 3.0860965677719605e-05, "loss": 5.443, "step": 3948 }, { "epoch": 0.38, "grad_norm": 1.112241268157959, "learning_rate": 3.0841574558852046e-05, "loss": 5.4087, "step": 3952 }, { "epoch": 0.38, "grad_norm": 0.9741985201835632, "learning_rate": 3.0822183439984486e-05, "loss": 5.4688, "step": 3956 }, { "epoch": 0.38, "grad_norm": 1.0749200582504272, "learning_rate": 3.0802792321116933e-05, "loss": 5.3697, "step": 3960 }, { "epoch": 0.38, "grad_norm": 1.0521398782730103, "learning_rate": 3.0783401202249374e-05, "loss": 5.4226, "step": 3964 }, { "epoch": 0.38, "grad_norm": 0.995364248752594, "learning_rate": 3.0764010083381814e-05, "loss": 5.4333, "step": 3968 }, { "epoch": 0.39, "grad_norm": 1.023390769958496, "learning_rate": 3.0744618964514255e-05, "loss": 5.4662, "step": 3972 }, { "epoch": 0.39, "grad_norm": 0.9972986578941345, "learning_rate": 3.0725227845646696e-05, "loss": 5.4432, "step": 3976 }, { "epoch": 0.39, "grad_norm": 1.0111867189407349, "learning_rate": 3.0705836726779136e-05, "loss": 5.4875, "step": 3980 }, { "epoch": 0.39, "grad_norm": 0.930380642414093, "learning_rate": 3.068644560791158e-05, "loss": 5.335, "step": 3984 }, { "epoch": 0.39, "grad_norm": 1.0326186418533325, "learning_rate": 3.066705448904402e-05, "loss": 5.3984, "step": 3988 }, { "epoch": 0.39, "grad_norm": 0.9522334933280945, "learning_rate": 3.064766337017646e-05, "loss": 5.4895, "step": 3992 }, { "epoch": 0.39, "grad_norm": 1.048785924911499, "learning_rate": 3.0628272251308905e-05, "loss": 5.3766, "step": 3996 }, { "epoch": 0.39, "grad_norm": 1.0069239139556885, "learning_rate": 3.060888113244134e-05, "loss": 5.3243, "step": 4000 }, { "epoch": 0.39, "grad_norm": 0.8954800963401794, "learning_rate": 3.0589490013573786e-05, "loss": 5.3281, "step": 4004 }, { "epoch": 0.39, "grad_norm": 1.0596482753753662, "learning_rate": 3.057009889470623e-05, "loss": 5.5029, "step": 4008 }, { "epoch": 0.39, "grad_norm": 0.9929758310317993, "learning_rate": 3.055070777583867e-05, "loss": 5.4649, "step": 4012 }, { "epoch": 0.39, "grad_norm": 0.9867807626724243, "learning_rate": 3.053131665697111e-05, "loss": 5.3755, "step": 4016 }, { "epoch": 0.39, "grad_norm": 0.9822360873222351, "learning_rate": 3.0511925538103548e-05, "loss": 5.4723, "step": 4020 }, { "epoch": 0.39, "grad_norm": 0.9592394232749939, "learning_rate": 3.0492534419235992e-05, "loss": 5.4694, "step": 4024 }, { "epoch": 0.39, "grad_norm": 0.993042528629303, "learning_rate": 3.0473143300368433e-05, "loss": 5.4091, "step": 4028 }, { "epoch": 0.39, "grad_norm": 0.9652045965194702, "learning_rate": 3.0453752181500873e-05, "loss": 5.4072, "step": 4032 }, { "epoch": 0.39, "grad_norm": 0.9788159728050232, "learning_rate": 3.0434361062633314e-05, "loss": 5.4358, "step": 4036 }, { "epoch": 0.39, "grad_norm": 0.9906930923461914, "learning_rate": 3.0414969943765758e-05, "loss": 5.4567, "step": 4040 }, { "epoch": 0.39, "grad_norm": 1.0816450119018555, "learning_rate": 3.0395578824898195e-05, "loss": 5.3435, "step": 4044 }, { "epoch": 0.39, "grad_norm": 1.0458557605743408, "learning_rate": 3.037618770603064e-05, "loss": 5.5151, "step": 4048 }, { "epoch": 0.39, "grad_norm": 0.9744831919670105, "learning_rate": 3.0356796587163083e-05, "loss": 5.486, "step": 4052 }, { "epoch": 0.39, "grad_norm": 0.9657852649688721, "learning_rate": 3.033740546829552e-05, "loss": 5.441, "step": 4056 }, { "epoch": 0.39, "grad_norm": 0.9370314478874207, "learning_rate": 3.0318014349427964e-05, "loss": 5.4117, "step": 4060 }, { "epoch": 0.39, "grad_norm": 0.9673814177513123, "learning_rate": 3.02986232305604e-05, "loss": 5.5008, "step": 4064 }, { "epoch": 0.39, "grad_norm": 1.004071593284607, "learning_rate": 3.0279232111692845e-05, "loss": 5.4525, "step": 4068 }, { "epoch": 0.39, "grad_norm": 1.0029621124267578, "learning_rate": 3.025984099282529e-05, "loss": 5.4788, "step": 4072 }, { "epoch": 0.4, "grad_norm": 1.0012478828430176, "learning_rate": 3.0240449873957726e-05, "loss": 5.443, "step": 4076 }, { "epoch": 0.4, "grad_norm": 0.9706107974052429, "learning_rate": 3.022105875509017e-05, "loss": 5.35, "step": 4080 }, { "epoch": 0.4, "grad_norm": 0.9983603358268738, "learning_rate": 3.0201667636222614e-05, "loss": 5.4215, "step": 4084 }, { "epoch": 0.4, "grad_norm": 1.0420852899551392, "learning_rate": 3.018227651735505e-05, "loss": 5.486, "step": 4088 }, { "epoch": 0.4, "grad_norm": 0.9456035494804382, "learning_rate": 3.0162885398487495e-05, "loss": 5.4257, "step": 4092 }, { "epoch": 0.4, "grad_norm": 0.9700554609298706, "learning_rate": 3.014349427961994e-05, "loss": 5.4415, "step": 4096 }, { "epoch": 0.4, "grad_norm": 1.0411101579666138, "learning_rate": 3.0124103160752376e-05, "loss": 5.4313, "step": 4100 }, { "epoch": 0.4, "grad_norm": 1.0131868124008179, "learning_rate": 3.010471204188482e-05, "loss": 5.371, "step": 4104 }, { "epoch": 0.4, "grad_norm": 0.9504354000091553, "learning_rate": 3.0085320923017257e-05, "loss": 5.4643, "step": 4108 }, { "epoch": 0.4, "grad_norm": 0.9660660624504089, "learning_rate": 3.00659298041497e-05, "loss": 5.4337, "step": 4112 }, { "epoch": 0.4, "grad_norm": 1.0248061418533325, "learning_rate": 3.0046538685282145e-05, "loss": 5.4113, "step": 4116 }, { "epoch": 0.4, "grad_norm": 1.0795485973358154, "learning_rate": 3.0027147566414582e-05, "loss": 5.3737, "step": 4120 }, { "epoch": 0.4, "grad_norm": 1.0381568670272827, "learning_rate": 3.0007756447547026e-05, "loss": 5.4409, "step": 4124 }, { "epoch": 0.4, "grad_norm": 0.9909124970436096, "learning_rate": 2.998836532867947e-05, "loss": 5.358, "step": 4128 }, { "epoch": 0.4, "grad_norm": 1.0048110485076904, "learning_rate": 2.9968974209811907e-05, "loss": 5.4109, "step": 4132 }, { "epoch": 0.4, "grad_norm": 0.9982589483261108, "learning_rate": 2.994958309094435e-05, "loss": 5.4673, "step": 4136 }, { "epoch": 0.4, "grad_norm": 1.010875940322876, "learning_rate": 2.9930191972076788e-05, "loss": 5.3333, "step": 4140 }, { "epoch": 0.4, "grad_norm": 1.0325456857681274, "learning_rate": 2.9910800853209232e-05, "loss": 5.3687, "step": 4144 }, { "epoch": 0.4, "grad_norm": 1.057373046875, "learning_rate": 2.9891409734341676e-05, "loss": 5.4218, "step": 4148 }, { "epoch": 0.4, "grad_norm": 0.975649356842041, "learning_rate": 2.9872018615474113e-05, "loss": 5.3694, "step": 4152 }, { "epoch": 0.4, "grad_norm": 0.9869158864021301, "learning_rate": 2.9852627496606557e-05, "loss": 5.3961, "step": 4156 }, { "epoch": 0.4, "grad_norm": 0.9405871033668518, "learning_rate": 2.9833236377738998e-05, "loss": 5.3882, "step": 4160 }, { "epoch": 0.4, "grad_norm": 0.9522705078125, "learning_rate": 2.9813845258871438e-05, "loss": 5.4143, "step": 4164 }, { "epoch": 0.4, "grad_norm": 0.9004665613174438, "learning_rate": 2.979445414000388e-05, "loss": 5.4774, "step": 4168 }, { "epoch": 0.4, "grad_norm": 1.01994788646698, "learning_rate": 2.9775063021136323e-05, "loss": 5.4938, "step": 4172 }, { "epoch": 0.4, "grad_norm": 1.0324288606643677, "learning_rate": 2.975567190226876e-05, "loss": 5.351, "step": 4176 }, { "epoch": 0.41, "grad_norm": 1.0081384181976318, "learning_rate": 2.9736280783401204e-05, "loss": 5.4233, "step": 4180 }, { "epoch": 0.41, "grad_norm": 0.9753928780555725, "learning_rate": 2.9716889664533644e-05, "loss": 5.3791, "step": 4184 }, { "epoch": 0.41, "grad_norm": 1.0055930614471436, "learning_rate": 2.9697498545666085e-05, "loss": 5.4083, "step": 4188 }, { "epoch": 0.41, "grad_norm": 1.0010572671890259, "learning_rate": 2.967810742679853e-05, "loss": 5.3065, "step": 4192 }, { "epoch": 0.41, "grad_norm": 1.0279070138931274, "learning_rate": 2.9658716307930966e-05, "loss": 5.359, "step": 4196 }, { "epoch": 0.41, "grad_norm": 0.9655927419662476, "learning_rate": 2.963932518906341e-05, "loss": 5.4802, "step": 4200 }, { "epoch": 0.41, "grad_norm": 0.9861381649971008, "learning_rate": 2.9619934070195854e-05, "loss": 5.3633, "step": 4204 }, { "epoch": 0.41, "grad_norm": 1.0157438516616821, "learning_rate": 2.960054295132829e-05, "loss": 5.3607, "step": 4208 }, { "epoch": 0.41, "grad_norm": 1.0280277729034424, "learning_rate": 2.9581151832460735e-05, "loss": 5.3693, "step": 4212 }, { "epoch": 0.41, "grad_norm": 1.045616865158081, "learning_rate": 2.956176071359318e-05, "loss": 5.4264, "step": 4216 }, { "epoch": 0.41, "grad_norm": 0.950824499130249, "learning_rate": 2.9542369594725616e-05, "loss": 5.3266, "step": 4220 }, { "epoch": 0.41, "grad_norm": 0.9672490358352661, "learning_rate": 2.952297847585806e-05, "loss": 5.4075, "step": 4224 }, { "epoch": 0.41, "grad_norm": 0.964094877243042, "learning_rate": 2.9503587356990497e-05, "loss": 5.4298, "step": 4228 }, { "epoch": 0.41, "grad_norm": 0.9985701441764832, "learning_rate": 2.948419623812294e-05, "loss": 5.4677, "step": 4232 }, { "epoch": 0.41, "grad_norm": 1.0406873226165771, "learning_rate": 2.9464805119255385e-05, "loss": 5.3258, "step": 4236 }, { "epoch": 0.41, "grad_norm": 1.0154664516448975, "learning_rate": 2.9445414000387822e-05, "loss": 5.4148, "step": 4240 }, { "epoch": 0.41, "grad_norm": 0.9826991558074951, "learning_rate": 2.9426022881520266e-05, "loss": 5.3293, "step": 4244 }, { "epoch": 0.41, "grad_norm": 1.0213288068771362, "learning_rate": 2.940663176265271e-05, "loss": 5.3245, "step": 4248 }, { "epoch": 0.41, "grad_norm": 1.0005340576171875, "learning_rate": 2.9387240643785147e-05, "loss": 5.4466, "step": 4252 }, { "epoch": 0.41, "grad_norm": 1.0072083473205566, "learning_rate": 2.936784952491759e-05, "loss": 5.3103, "step": 4256 }, { "epoch": 0.41, "grad_norm": 0.9992334246635437, "learning_rate": 2.9348458406050028e-05, "loss": 5.4595, "step": 4260 }, { "epoch": 0.41, "grad_norm": 1.026963233947754, "learning_rate": 2.9329067287182472e-05, "loss": 5.4082, "step": 4264 }, { "epoch": 0.41, "grad_norm": 1.0018388032913208, "learning_rate": 2.9309676168314916e-05, "loss": 5.3292, "step": 4268 }, { "epoch": 0.41, "grad_norm": 1.106248140335083, "learning_rate": 2.9290285049447353e-05, "loss": 5.3176, "step": 4272 }, { "epoch": 0.41, "grad_norm": 1.0284066200256348, "learning_rate": 2.9270893930579797e-05, "loss": 5.3839, "step": 4276 }, { "epoch": 0.41, "grad_norm": 1.0233203172683716, "learning_rate": 2.925150281171224e-05, "loss": 5.4327, "step": 4280 }, { "epoch": 0.42, "grad_norm": 1.0423204898834229, "learning_rate": 2.9232111692844678e-05, "loss": 5.4134, "step": 4284 }, { "epoch": 0.42, "grad_norm": 1.0750269889831543, "learning_rate": 2.9212720573977122e-05, "loss": 5.3983, "step": 4288 }, { "epoch": 0.42, "grad_norm": 1.035136342048645, "learning_rate": 2.9193329455109562e-05, "loss": 5.3828, "step": 4292 }, { "epoch": 0.42, "grad_norm": 0.9154065847396851, "learning_rate": 2.9173938336242003e-05, "loss": 5.3261, "step": 4296 }, { "epoch": 0.42, "grad_norm": 1.0032275915145874, "learning_rate": 2.9154547217374443e-05, "loss": 5.4712, "step": 4300 }, { "epoch": 0.42, "grad_norm": 1.0405406951904297, "learning_rate": 2.9135156098506884e-05, "loss": 5.3499, "step": 4304 }, { "epoch": 0.42, "grad_norm": 1.1554255485534668, "learning_rate": 2.9115764979639324e-05, "loss": 5.4097, "step": 4308 }, { "epoch": 0.42, "grad_norm": 1.054694414138794, "learning_rate": 2.909637386077177e-05, "loss": 5.3183, "step": 4312 }, { "epoch": 0.42, "grad_norm": 0.9714403748512268, "learning_rate": 2.907698274190421e-05, "loss": 5.5304, "step": 4316 }, { "epoch": 0.42, "grad_norm": 1.0622743368148804, "learning_rate": 2.905759162303665e-05, "loss": 5.321, "step": 4320 }, { "epoch": 0.42, "grad_norm": 1.0464075803756714, "learning_rate": 2.9038200504169093e-05, "loss": 5.3688, "step": 4324 }, { "epoch": 0.42, "grad_norm": 0.9132739901542664, "learning_rate": 2.901880938530153e-05, "loss": 5.3441, "step": 4328 }, { "epoch": 0.42, "grad_norm": 1.013675332069397, "learning_rate": 2.8999418266433974e-05, "loss": 5.4333, "step": 4332 }, { "epoch": 0.42, "grad_norm": 1.0379986763000488, "learning_rate": 2.898002714756642e-05, "loss": 5.5111, "step": 4336 }, { "epoch": 0.42, "grad_norm": 1.0619996786117554, "learning_rate": 2.8960636028698856e-05, "loss": 5.3242, "step": 4340 }, { "epoch": 0.42, "grad_norm": 1.0503803491592407, "learning_rate": 2.89412449098313e-05, "loss": 5.4761, "step": 4344 }, { "epoch": 0.42, "grad_norm": 1.006859540939331, "learning_rate": 2.8921853790963737e-05, "loss": 5.4661, "step": 4348 }, { "epoch": 0.42, "grad_norm": 0.9837526082992554, "learning_rate": 2.890246267209618e-05, "loss": 5.4439, "step": 4352 }, { "epoch": 0.42, "grad_norm": 1.0037223100662231, "learning_rate": 2.8883071553228624e-05, "loss": 5.3982, "step": 4356 }, { "epoch": 0.42, "grad_norm": 0.9351251125335693, "learning_rate": 2.886368043436106e-05, "loss": 5.4419, "step": 4360 }, { "epoch": 0.42, "grad_norm": 0.9317170977592468, "learning_rate": 2.8844289315493506e-05, "loss": 5.4067, "step": 4364 }, { "epoch": 0.42, "grad_norm": 0.9478697776794434, "learning_rate": 2.882489819662595e-05, "loss": 5.4012, "step": 4368 }, { "epoch": 0.42, "grad_norm": 0.9834426641464233, "learning_rate": 2.8805507077758387e-05, "loss": 5.4496, "step": 4372 }, { "epoch": 0.42, "grad_norm": 1.0380594730377197, "learning_rate": 2.878611595889083e-05, "loss": 5.4042, "step": 4376 }, { "epoch": 0.42, "grad_norm": 1.1123548746109009, "learning_rate": 2.8766724840023268e-05, "loss": 5.4814, "step": 4380 }, { "epoch": 0.43, "grad_norm": 0.9241830706596375, "learning_rate": 2.874733372115571e-05, "loss": 5.4302, "step": 4384 }, { "epoch": 0.43, "grad_norm": 0.9564453363418579, "learning_rate": 2.8727942602288156e-05, "loss": 5.3426, "step": 4388 }, { "epoch": 0.43, "grad_norm": 1.0000247955322266, "learning_rate": 2.8708551483420593e-05, "loss": 5.3799, "step": 4392 }, { "epoch": 0.43, "grad_norm": 0.9557441473007202, "learning_rate": 2.8689160364553037e-05, "loss": 5.4233, "step": 4396 }, { "epoch": 0.43, "grad_norm": 0.920683741569519, "learning_rate": 2.866976924568548e-05, "loss": 5.351, "step": 4400 }, { "epoch": 0.43, "grad_norm": 0.9885355234146118, "learning_rate": 2.8650378126817918e-05, "loss": 5.4735, "step": 4404 }, { "epoch": 0.43, "grad_norm": 1.02256441116333, "learning_rate": 2.863098700795036e-05, "loss": 5.4872, "step": 4408 }, { "epoch": 0.43, "grad_norm": 1.0346393585205078, "learning_rate": 2.8611595889082806e-05, "loss": 5.4895, "step": 4412 }, { "epoch": 0.43, "grad_norm": 1.0645418167114258, "learning_rate": 2.8592204770215243e-05, "loss": 5.3678, "step": 4416 }, { "epoch": 0.43, "grad_norm": 0.9786351323127747, "learning_rate": 2.8572813651347687e-05, "loss": 5.394, "step": 4420 }, { "epoch": 0.43, "grad_norm": 1.0003682374954224, "learning_rate": 2.8553422532480124e-05, "loss": 5.3394, "step": 4424 }, { "epoch": 0.43, "grad_norm": 1.0191590785980225, "learning_rate": 2.8534031413612568e-05, "loss": 5.3822, "step": 4428 }, { "epoch": 0.43, "grad_norm": 0.9041575193405151, "learning_rate": 2.8514640294745008e-05, "loss": 5.3762, "step": 4432 }, { "epoch": 0.43, "grad_norm": 1.1107348203659058, "learning_rate": 2.849524917587745e-05, "loss": 5.355, "step": 4436 }, { "epoch": 0.43, "grad_norm": 0.9530418515205383, "learning_rate": 2.847585805700989e-05, "loss": 5.3681, "step": 4440 }, { "epoch": 0.43, "grad_norm": 0.988714337348938, "learning_rate": 2.8456466938142333e-05, "loss": 5.3924, "step": 4444 }, { "epoch": 0.43, "grad_norm": 1.0610371828079224, "learning_rate": 2.8437075819274774e-05, "loss": 5.3354, "step": 4448 }, { "epoch": 0.43, "grad_norm": 1.005706548690796, "learning_rate": 2.8417684700407214e-05, "loss": 5.3619, "step": 4452 }, { "epoch": 0.43, "grad_norm": 1.053621530532837, "learning_rate": 2.8398293581539658e-05, "loss": 5.344, "step": 4456 }, { "epoch": 0.43, "grad_norm": 1.0087207555770874, "learning_rate": 2.8378902462672095e-05, "loss": 5.3222, "step": 4460 }, { "epoch": 0.43, "grad_norm": 1.0023506879806519, "learning_rate": 2.835951134380454e-05, "loss": 5.4015, "step": 4464 }, { "epoch": 0.43, "grad_norm": 0.9826129078865051, "learning_rate": 2.8340120224936976e-05, "loss": 5.4818, "step": 4468 }, { "epoch": 0.43, "grad_norm": 0.9970731139183044, "learning_rate": 2.832072910606942e-05, "loss": 5.4439, "step": 4472 }, { "epoch": 0.43, "grad_norm": 1.0162273645401, "learning_rate": 2.8301337987201864e-05, "loss": 5.4481, "step": 4476 }, { "epoch": 0.43, "grad_norm": 1.065722942352295, "learning_rate": 2.82819468683343e-05, "loss": 5.4393, "step": 4480 }, { "epoch": 0.43, "grad_norm": 1.0227274894714355, "learning_rate": 2.8262555749466745e-05, "loss": 5.4049, "step": 4484 }, { "epoch": 0.44, "grad_norm": 1.058347463607788, "learning_rate": 2.824316463059919e-05, "loss": 5.4221, "step": 4488 }, { "epoch": 0.44, "grad_norm": 0.981675922870636, "learning_rate": 2.8223773511731626e-05, "loss": 5.4298, "step": 4492 }, { "epoch": 0.44, "grad_norm": 1.0078340768814087, "learning_rate": 2.820438239286407e-05, "loss": 5.4223, "step": 4496 }, { "epoch": 0.44, "grad_norm": 1.0829211473464966, "learning_rate": 2.8184991273996508e-05, "loss": 5.4577, "step": 4500 }, { "epoch": 0.44, "grad_norm": 0.9840219020843506, "learning_rate": 2.816560015512895e-05, "loss": 5.3451, "step": 4504 }, { "epoch": 0.44, "grad_norm": 1.059767484664917, "learning_rate": 2.8146209036261395e-05, "loss": 5.4052, "step": 4508 }, { "epoch": 0.44, "grad_norm": 1.1250829696655273, "learning_rate": 2.8126817917393833e-05, "loss": 5.3476, "step": 4512 }, { "epoch": 0.44, "grad_norm": 0.9797293543815613, "learning_rate": 2.8107426798526276e-05, "loss": 5.3143, "step": 4516 }, { "epoch": 0.44, "grad_norm": 0.9784405827522278, "learning_rate": 2.808803567965872e-05, "loss": 5.4072, "step": 4520 }, { "epoch": 0.44, "grad_norm": 1.0686568021774292, "learning_rate": 2.8068644560791158e-05, "loss": 5.4029, "step": 4524 }, { "epoch": 0.44, "grad_norm": 0.958621084690094, "learning_rate": 2.80492534419236e-05, "loss": 5.3838, "step": 4528 }, { "epoch": 0.44, "grad_norm": 0.969975471496582, "learning_rate": 2.8029862323056045e-05, "loss": 5.4323, "step": 4532 }, { "epoch": 0.44, "grad_norm": 1.0803236961364746, "learning_rate": 2.8010471204188483e-05, "loss": 5.4438, "step": 4536 }, { "epoch": 0.44, "grad_norm": 1.0710035562515259, "learning_rate": 2.7991080085320926e-05, "loss": 5.4503, "step": 4540 }, { "epoch": 0.44, "grad_norm": 0.9775360226631165, "learning_rate": 2.7971688966453364e-05, "loss": 5.3578, "step": 4544 }, { "epoch": 0.44, "grad_norm": 0.9491469264030457, "learning_rate": 2.7952297847585808e-05, "loss": 5.3979, "step": 4548 }, { "epoch": 0.44, "grad_norm": 1.0196774005889893, "learning_rate": 2.793290672871825e-05, "loss": 5.4313, "step": 4552 }, { "epoch": 0.44, "grad_norm": 1.0691953897476196, "learning_rate": 2.791351560985069e-05, "loss": 5.5029, "step": 4556 }, { "epoch": 0.44, "grad_norm": 1.0333261489868164, "learning_rate": 2.7894124490983133e-05, "loss": 5.4323, "step": 4560 }, { "epoch": 0.44, "grad_norm": 1.0331099033355713, "learning_rate": 2.7874733372115573e-05, "loss": 5.4105, "step": 4564 }, { "epoch": 0.44, "grad_norm": 1.0562598705291748, "learning_rate": 2.7855342253248014e-05, "loss": 5.4442, "step": 4568 }, { "epoch": 0.44, "grad_norm": 1.0251268148422241, "learning_rate": 2.7835951134380454e-05, "loss": 5.4121, "step": 4572 }, { "epoch": 0.44, "grad_norm": 0.9381951689720154, "learning_rate": 2.7816560015512898e-05, "loss": 5.303, "step": 4576 }, { "epoch": 0.44, "grad_norm": 1.0383044481277466, "learning_rate": 2.779716889664534e-05, "loss": 5.3638, "step": 4580 }, { "epoch": 0.44, "grad_norm": 1.0460588932037354, "learning_rate": 2.777777777777778e-05, "loss": 5.3688, "step": 4584 }, { "epoch": 0.44, "grad_norm": 0.9689496159553528, "learning_rate": 2.775838665891022e-05, "loss": 5.499, "step": 4588 }, { "epoch": 0.45, "grad_norm": 0.9622393846511841, "learning_rate": 2.773899554004266e-05, "loss": 5.3238, "step": 4592 }, { "epoch": 0.45, "grad_norm": 0.9752629399299622, "learning_rate": 2.7719604421175104e-05, "loss": 5.3294, "step": 4596 }, { "epoch": 0.45, "grad_norm": 0.9437540173530579, "learning_rate": 2.770021330230754e-05, "loss": 5.3397, "step": 4600 }, { "epoch": 0.45, "grad_norm": 1.041926383972168, "learning_rate": 2.7680822183439985e-05, "loss": 5.4434, "step": 4604 }, { "epoch": 0.45, "grad_norm": 1.045983076095581, "learning_rate": 2.766143106457243e-05, "loss": 5.3467, "step": 4608 }, { "epoch": 0.45, "grad_norm": 0.9513012766838074, "learning_rate": 2.7642039945704866e-05, "loss": 5.4401, "step": 4612 }, { "epoch": 0.45, "grad_norm": 1.0047541856765747, "learning_rate": 2.762264882683731e-05, "loss": 5.3485, "step": 4616 }, { "epoch": 0.45, "grad_norm": 1.0038918256759644, "learning_rate": 2.7603257707969747e-05, "loss": 5.4189, "step": 4620 }, { "epoch": 0.45, "grad_norm": 1.0029683113098145, "learning_rate": 2.758386658910219e-05, "loss": 5.3773, "step": 4624 }, { "epoch": 0.45, "grad_norm": 0.9756178259849548, "learning_rate": 2.7564475470234635e-05, "loss": 5.4745, "step": 4628 }, { "epoch": 0.45, "grad_norm": 1.028535008430481, "learning_rate": 2.7545084351367072e-05, "loss": 5.3767, "step": 4632 }, { "epoch": 0.45, "grad_norm": 1.0276795625686646, "learning_rate": 2.7525693232499516e-05, "loss": 5.4127, "step": 4636 }, { "epoch": 0.45, "grad_norm": 0.9759376645088196, "learning_rate": 2.750630211363196e-05, "loss": 5.4136, "step": 4640 }, { "epoch": 0.45, "grad_norm": 0.9665167331695557, "learning_rate": 2.7486910994764397e-05, "loss": 5.4307, "step": 4644 }, { "epoch": 0.45, "grad_norm": 1.0241793394088745, "learning_rate": 2.746751987589684e-05, "loss": 5.3753, "step": 4648 }, { "epoch": 0.45, "grad_norm": 0.97547847032547, "learning_rate": 2.7448128757029285e-05, "loss": 5.4257, "step": 4652 }, { "epoch": 0.45, "grad_norm": 0.9529567360877991, "learning_rate": 2.7428737638161722e-05, "loss": 5.4561, "step": 4656 }, { "epoch": 0.45, "grad_norm": 0.9759612679481506, "learning_rate": 2.7409346519294166e-05, "loss": 5.3565, "step": 4660 }, { "epoch": 0.45, "grad_norm": 1.0170910358428955, "learning_rate": 2.7389955400426603e-05, "loss": 5.3003, "step": 4664 }, { "epoch": 0.45, "grad_norm": 1.0059340000152588, "learning_rate": 2.7370564281559047e-05, "loss": 5.4549, "step": 4668 }, { "epoch": 0.45, "grad_norm": 0.9615738987922668, "learning_rate": 2.735117316269149e-05, "loss": 5.3853, "step": 4672 }, { "epoch": 0.45, "grad_norm": 1.07008957862854, "learning_rate": 2.733178204382393e-05, "loss": 5.4426, "step": 4676 }, { "epoch": 0.45, "grad_norm": 1.0182669162750244, "learning_rate": 2.7312390924956372e-05, "loss": 5.3921, "step": 4680 }, { "epoch": 0.45, "grad_norm": 1.0533527135849, "learning_rate": 2.7292999806088816e-05, "loss": 5.4001, "step": 4684 }, { "epoch": 0.45, "grad_norm": 1.0803202390670776, "learning_rate": 2.7273608687221253e-05, "loss": 5.4789, "step": 4688 }, { "epoch": 0.45, "grad_norm": 1.0540350675582886, "learning_rate": 2.7254217568353697e-05, "loss": 5.4387, "step": 4692 }, { "epoch": 0.46, "grad_norm": 0.9857541918754578, "learning_rate": 2.7234826449486138e-05, "loss": 5.4231, "step": 4696 }, { "epoch": 0.46, "grad_norm": 0.9859492778778076, "learning_rate": 2.721543533061858e-05, "loss": 5.4614, "step": 4700 }, { "epoch": 0.46, "grad_norm": 1.0070449113845825, "learning_rate": 2.719604421175102e-05, "loss": 5.4235, "step": 4704 }, { "epoch": 0.46, "grad_norm": 1.0052629709243774, "learning_rate": 2.717665309288346e-05, "loss": 5.4255, "step": 4708 }, { "epoch": 0.46, "grad_norm": 1.0358524322509766, "learning_rate": 2.7157261974015903e-05, "loss": 5.3811, "step": 4712 }, { "epoch": 0.46, "grad_norm": 0.9567641615867615, "learning_rate": 2.7137870855148344e-05, "loss": 5.3977, "step": 4716 }, { "epoch": 0.46, "grad_norm": 1.0303080081939697, "learning_rate": 2.7118479736280784e-05, "loss": 5.5686, "step": 4720 }, { "epoch": 0.46, "grad_norm": 0.9873762726783752, "learning_rate": 2.7099088617413225e-05, "loss": 5.3691, "step": 4724 }, { "epoch": 0.46, "grad_norm": 0.9921165704727173, "learning_rate": 2.707969749854567e-05, "loss": 5.4681, "step": 4728 }, { "epoch": 0.46, "grad_norm": 0.9569845795631409, "learning_rate": 2.7060306379678106e-05, "loss": 5.2843, "step": 4732 }, { "epoch": 0.46, "grad_norm": 1.0275355577468872, "learning_rate": 2.704091526081055e-05, "loss": 5.3745, "step": 4736 }, { "epoch": 0.46, "grad_norm": 1.034792184829712, "learning_rate": 2.7021524141942987e-05, "loss": 5.3919, "step": 4740 }, { "epoch": 0.46, "grad_norm": 1.05870521068573, "learning_rate": 2.700213302307543e-05, "loss": 5.4458, "step": 4744 }, { "epoch": 0.46, "grad_norm": 1.1329647302627563, "learning_rate": 2.6982741904207875e-05, "loss": 5.4046, "step": 4748 }, { "epoch": 0.46, "grad_norm": 0.9993448853492737, "learning_rate": 2.6963350785340312e-05, "loss": 5.3655, "step": 4752 }, { "epoch": 0.46, "grad_norm": 1.0315309762954712, "learning_rate": 2.6943959666472756e-05, "loss": 5.3993, "step": 4756 }, { "epoch": 0.46, "grad_norm": 1.0383795499801636, "learning_rate": 2.69245685476052e-05, "loss": 5.3556, "step": 4760 }, { "epoch": 0.46, "grad_norm": 0.9584831595420837, "learning_rate": 2.6905177428737637e-05, "loss": 5.4116, "step": 4764 }, { "epoch": 0.46, "grad_norm": 1.0869189500808716, "learning_rate": 2.688578630987008e-05, "loss": 5.3348, "step": 4768 }, { "epoch": 0.46, "grad_norm": 1.1664848327636719, "learning_rate": 2.6866395191002525e-05, "loss": 5.3155, "step": 4772 }, { "epoch": 0.46, "grad_norm": 1.0994501113891602, "learning_rate": 2.6847004072134962e-05, "loss": 5.3711, "step": 4776 }, { "epoch": 0.46, "grad_norm": 0.9989491105079651, "learning_rate": 2.6827612953267406e-05, "loss": 5.3603, "step": 4780 }, { "epoch": 0.46, "grad_norm": 1.1618521213531494, "learning_rate": 2.6808221834399843e-05, "loss": 5.4217, "step": 4784 }, { "epoch": 0.46, "grad_norm": 0.9329230785369873, "learning_rate": 2.6788830715532287e-05, "loss": 5.4326, "step": 4788 }, { "epoch": 0.46, "grad_norm": 0.9027351140975952, "learning_rate": 2.676943959666473e-05, "loss": 5.2881, "step": 4792 }, { "epoch": 0.46, "grad_norm": 0.9692825078964233, "learning_rate": 2.6750048477797168e-05, "loss": 5.3769, "step": 4796 }, { "epoch": 0.47, "grad_norm": 0.9520678520202637, "learning_rate": 2.6730657358929612e-05, "loss": 5.377, "step": 4800 }, { "epoch": 0.47, "grad_norm": 0.9845559000968933, "learning_rate": 2.6711266240062056e-05, "loss": 5.3848, "step": 4804 }, { "epoch": 0.47, "grad_norm": 1.0046534538269043, "learning_rate": 2.6691875121194493e-05, "loss": 5.3973, "step": 4808 }, { "epoch": 0.47, "grad_norm": 1.0389518737792969, "learning_rate": 2.6672484002326937e-05, "loss": 5.4557, "step": 4812 }, { "epoch": 0.47, "grad_norm": 1.0322433710098267, "learning_rate": 2.665309288345938e-05, "loss": 5.3843, "step": 4816 }, { "epoch": 0.47, "grad_norm": 1.075420618057251, "learning_rate": 2.6633701764591818e-05, "loss": 5.3968, "step": 4820 }, { "epoch": 0.47, "grad_norm": 1.004739761352539, "learning_rate": 2.6614310645724262e-05, "loss": 5.4449, "step": 4824 }, { "epoch": 0.47, "grad_norm": 0.9824436902999878, "learning_rate": 2.65949195268567e-05, "loss": 5.348, "step": 4828 }, { "epoch": 0.47, "grad_norm": 1.0312827825546265, "learning_rate": 2.6575528407989143e-05, "loss": 5.438, "step": 4832 }, { "epoch": 0.47, "grad_norm": 0.9586296677589417, "learning_rate": 2.6556137289121584e-05, "loss": 5.4007, "step": 4836 }, { "epoch": 0.47, "grad_norm": 1.0318596363067627, "learning_rate": 2.6536746170254024e-05, "loss": 5.3997, "step": 4840 }, { "epoch": 0.47, "grad_norm": 0.9269830584526062, "learning_rate": 2.6517355051386468e-05, "loss": 5.3597, "step": 4844 }, { "epoch": 0.47, "grad_norm": 1.0358200073242188, "learning_rate": 2.649796393251891e-05, "loss": 5.3729, "step": 4848 }, { "epoch": 0.47, "grad_norm": 0.9825155735015869, "learning_rate": 2.647857281365135e-05, "loss": 5.3678, "step": 4852 }, { "epoch": 0.47, "grad_norm": 1.0111230611801147, "learning_rate": 2.645918169478379e-05, "loss": 5.4858, "step": 4856 }, { "epoch": 0.47, "grad_norm": 0.947890043258667, "learning_rate": 2.643979057591623e-05, "loss": 5.3939, "step": 4860 }, { "epoch": 0.47, "grad_norm": 0.9673914909362793, "learning_rate": 2.642039945704867e-05, "loss": 5.3704, "step": 4864 }, { "epoch": 0.47, "grad_norm": 1.0731589794158936, "learning_rate": 2.6401008338181115e-05, "loss": 5.4116, "step": 4868 }, { "epoch": 0.47, "grad_norm": 1.0044986009597778, "learning_rate": 2.6381617219313552e-05, "loss": 5.4318, "step": 4872 }, { "epoch": 0.47, "grad_norm": 0.9914534687995911, "learning_rate": 2.6362226100445996e-05, "loss": 5.3216, "step": 4876 }, { "epoch": 0.47, "grad_norm": 0.9887456297874451, "learning_rate": 2.634283498157844e-05, "loss": 5.4072, "step": 4880 }, { "epoch": 0.47, "grad_norm": 1.0049148797988892, "learning_rate": 2.6323443862710877e-05, "loss": 5.4255, "step": 4884 }, { "epoch": 0.47, "grad_norm": 1.0179352760314941, "learning_rate": 2.630405274384332e-05, "loss": 5.3108, "step": 4888 }, { "epoch": 0.47, "grad_norm": 0.9834827184677124, "learning_rate": 2.6284661624975765e-05, "loss": 5.3262, "step": 4892 }, { "epoch": 0.47, "grad_norm": 1.032747507095337, "learning_rate": 2.6265270506108202e-05, "loss": 5.3302, "step": 4896 }, { "epoch": 0.48, "grad_norm": 1.0606615543365479, "learning_rate": 2.6245879387240646e-05, "loss": 5.3351, "step": 4900 }, { "epoch": 0.48, "grad_norm": 1.0665369033813477, "learning_rate": 2.6226488268373083e-05, "loss": 5.3851, "step": 4904 }, { "epoch": 0.48, "grad_norm": 1.0293782949447632, "learning_rate": 2.6207097149505527e-05, "loss": 5.3772, "step": 4908 }, { "epoch": 0.48, "grad_norm": 1.0471354722976685, "learning_rate": 2.618770603063797e-05, "loss": 5.4218, "step": 4912 }, { "epoch": 0.48, "grad_norm": 0.9955822229385376, "learning_rate": 2.6168314911770408e-05, "loss": 5.3427, "step": 4916 }, { "epoch": 0.48, "grad_norm": 0.9505192041397095, "learning_rate": 2.6148923792902852e-05, "loss": 5.4442, "step": 4920 }, { "epoch": 0.48, "grad_norm": 1.0545083284378052, "learning_rate": 2.6129532674035296e-05, "loss": 5.3821, "step": 4924 }, { "epoch": 0.48, "grad_norm": 1.077630639076233, "learning_rate": 2.6110141555167733e-05, "loss": 5.4215, "step": 4928 }, { "epoch": 0.48, "grad_norm": 1.1127928495407104, "learning_rate": 2.6090750436300177e-05, "loss": 5.4017, "step": 4932 }, { "epoch": 0.48, "grad_norm": 1.0543692111968994, "learning_rate": 2.607135931743262e-05, "loss": 5.3918, "step": 4936 }, { "epoch": 0.48, "grad_norm": 1.0516538619995117, "learning_rate": 2.6051968198565058e-05, "loss": 5.5141, "step": 4940 }, { "epoch": 0.48, "grad_norm": 1.077325463294983, "learning_rate": 2.6032577079697502e-05, "loss": 5.4855, "step": 4944 }, { "epoch": 0.48, "grad_norm": 1.034180998802185, "learning_rate": 2.601318596082994e-05, "loss": 5.4097, "step": 4948 }, { "epoch": 0.48, "grad_norm": 1.0808318853378296, "learning_rate": 2.5993794841962383e-05, "loss": 5.3711, "step": 4952 }, { "epoch": 0.48, "grad_norm": 1.0749197006225586, "learning_rate": 2.5974403723094827e-05, "loss": 5.3917, "step": 4956 }, { "epoch": 0.48, "grad_norm": 1.1579383611679077, "learning_rate": 2.5955012604227264e-05, "loss": 5.4251, "step": 4960 }, { "epoch": 0.48, "grad_norm": 1.0081652402877808, "learning_rate": 2.5935621485359708e-05, "loss": 5.3684, "step": 4964 }, { "epoch": 0.48, "grad_norm": 0.9971766471862793, "learning_rate": 2.591623036649215e-05, "loss": 5.5078, "step": 4968 }, { "epoch": 0.48, "grad_norm": 0.9572992920875549, "learning_rate": 2.589683924762459e-05, "loss": 5.4004, "step": 4972 }, { "epoch": 0.48, "grad_norm": 1.0008381605148315, "learning_rate": 2.5877448128757033e-05, "loss": 5.4194, "step": 4976 }, { "epoch": 0.48, "grad_norm": 1.0567643642425537, "learning_rate": 2.585805700988947e-05, "loss": 5.4014, "step": 4980 }, { "epoch": 0.48, "grad_norm": 1.084086298942566, "learning_rate": 2.5838665891021914e-05, "loss": 5.4578, "step": 4984 }, { "epoch": 0.48, "grad_norm": 1.062583088874817, "learning_rate": 2.5819274772154355e-05, "loss": 5.4051, "step": 4988 }, { "epoch": 0.48, "grad_norm": 1.018263578414917, "learning_rate": 2.5799883653286795e-05, "loss": 5.4411, "step": 4992 }, { "epoch": 0.48, "grad_norm": 1.0082392692565918, "learning_rate": 2.5780492534419236e-05, "loss": 5.329, "step": 4996 }, { "epoch": 0.48, "grad_norm": 1.0163345336914062, "learning_rate": 2.576110141555168e-05, "loss": 5.3487, "step": 5000 }, { "epoch": 0.49, "grad_norm": 0.9743790030479431, "learning_rate": 2.5741710296684117e-05, "loss": 5.4595, "step": 5004 }, { "epoch": 0.49, "grad_norm": 1.0049850940704346, "learning_rate": 2.572231917781656e-05, "loss": 5.3147, "step": 5008 }, { "epoch": 0.49, "grad_norm": 0.9798968434333801, "learning_rate": 2.5702928058949005e-05, "loss": 5.4953, "step": 5012 }, { "epoch": 0.49, "grad_norm": 1.1075799465179443, "learning_rate": 2.5683536940081442e-05, "loss": 5.4284, "step": 5016 }, { "epoch": 0.49, "grad_norm": 0.9216572046279907, "learning_rate": 2.5664145821213886e-05, "loss": 5.4338, "step": 5020 }, { "epoch": 0.49, "grad_norm": 1.0511445999145508, "learning_rate": 2.5644754702346323e-05, "loss": 5.35, "step": 5024 }, { "epoch": 0.49, "grad_norm": 1.0618550777435303, "learning_rate": 2.5625363583478767e-05, "loss": 5.3539, "step": 5028 }, { "epoch": 0.49, "grad_norm": 1.0445276498794556, "learning_rate": 2.560597246461121e-05, "loss": 5.3797, "step": 5032 }, { "epoch": 0.49, "grad_norm": 1.0461180210113525, "learning_rate": 2.5586581345743648e-05, "loss": 5.4262, "step": 5036 }, { "epoch": 0.49, "grad_norm": 1.1503204107284546, "learning_rate": 2.5567190226876092e-05, "loss": 5.3629, "step": 5040 }, { "epoch": 0.49, "grad_norm": 0.9792975187301636, "learning_rate": 2.5547799108008536e-05, "loss": 5.3245, "step": 5044 }, { "epoch": 0.49, "grad_norm": 1.0088895559310913, "learning_rate": 2.5528407989140973e-05, "loss": 5.3447, "step": 5048 }, { "epoch": 0.49, "grad_norm": 0.9955673217773438, "learning_rate": 2.5509016870273417e-05, "loss": 5.2618, "step": 5052 }, { "epoch": 0.49, "grad_norm": 0.9601762294769287, "learning_rate": 2.548962575140586e-05, "loss": 5.3711, "step": 5056 }, { "epoch": 0.49, "grad_norm": 0.9723737835884094, "learning_rate": 2.5470234632538298e-05, "loss": 5.4062, "step": 5060 }, { "epoch": 0.49, "grad_norm": 0.961095929145813, "learning_rate": 2.5450843513670742e-05, "loss": 5.4148, "step": 5064 }, { "epoch": 0.49, "grad_norm": 1.071572184562683, "learning_rate": 2.543145239480318e-05, "loss": 5.3569, "step": 5068 }, { "epoch": 0.49, "grad_norm": 1.0116100311279297, "learning_rate": 2.5412061275935623e-05, "loss": 5.4034, "step": 5072 }, { "epoch": 0.49, "grad_norm": 1.0250638723373413, "learning_rate": 2.5392670157068067e-05, "loss": 5.3628, "step": 5076 }, { "epoch": 0.49, "grad_norm": 0.9859606027603149, "learning_rate": 2.5373279038200504e-05, "loss": 5.4041, "step": 5080 }, { "epoch": 0.49, "grad_norm": 1.01252019405365, "learning_rate": 2.5353887919332948e-05, "loss": 5.4278, "step": 5084 }, { "epoch": 0.49, "grad_norm": 1.083108901977539, "learning_rate": 2.5334496800465392e-05, "loss": 5.3863, "step": 5088 }, { "epoch": 0.49, "grad_norm": 0.969508945941925, "learning_rate": 2.531510568159783e-05, "loss": 5.3222, "step": 5092 }, { "epoch": 0.49, "grad_norm": 0.9864810109138489, "learning_rate": 2.5295714562730273e-05, "loss": 5.4214, "step": 5096 }, { "epoch": 0.49, "grad_norm": 0.9918404221534729, "learning_rate": 2.527632344386271e-05, "loss": 5.3679, "step": 5100 }, { "epoch": 0.49, "grad_norm": 1.036550760269165, "learning_rate": 2.5256932324995154e-05, "loss": 5.2626, "step": 5104 }, { "epoch": 0.5, "grad_norm": 0.9950888752937317, "learning_rate": 2.5237541206127598e-05, "loss": 5.3105, "step": 5108 }, { "epoch": 0.5, "grad_norm": 1.0199189186096191, "learning_rate": 2.5218150087260035e-05, "loss": 5.3776, "step": 5112 }, { "epoch": 0.5, "grad_norm": 1.0516811609268188, "learning_rate": 2.519875896839248e-05, "loss": 5.3124, "step": 5116 }, { "epoch": 0.5, "grad_norm": 1.078303575515747, "learning_rate": 2.517936784952492e-05, "loss": 5.4144, "step": 5120 }, { "epoch": 0.5, "grad_norm": 0.987095296382904, "learning_rate": 2.515997673065736e-05, "loss": 5.349, "step": 5124 }, { "epoch": 0.5, "grad_norm": 0.9634592533111572, "learning_rate": 2.51405856117898e-05, "loss": 5.2509, "step": 5128 }, { "epoch": 0.5, "grad_norm": 1.0986416339874268, "learning_rate": 2.5121194492922244e-05, "loss": 5.3225, "step": 5132 }, { "epoch": 0.5, "grad_norm": 1.0385926961898804, "learning_rate": 2.510180337405468e-05, "loss": 5.3529, "step": 5136 }, { "epoch": 0.5, "grad_norm": 0.9627379179000854, "learning_rate": 2.5082412255187126e-05, "loss": 5.3675, "step": 5140 }, { "epoch": 0.5, "grad_norm": 1.003461480140686, "learning_rate": 2.5063021136319566e-05, "loss": 5.4202, "step": 5144 }, { "epoch": 0.5, "grad_norm": 0.9771742224693298, "learning_rate": 2.5043630017452007e-05, "loss": 5.3042, "step": 5148 }, { "epoch": 0.5, "grad_norm": 0.9279887676239014, "learning_rate": 2.502423889858445e-05, "loss": 5.3561, "step": 5152 }, { "epoch": 0.5, "grad_norm": 0.9281094670295715, "learning_rate": 2.5004847779716888e-05, "loss": 5.3231, "step": 5156 }, { "epoch": 0.5, "grad_norm": 1.0261962413787842, "learning_rate": 2.498545666084933e-05, "loss": 5.2869, "step": 5160 }, { "epoch": 0.5, "grad_norm": 0.956251859664917, "learning_rate": 2.4966065541981772e-05, "loss": 5.3058, "step": 5164 }, { "epoch": 0.5, "grad_norm": 1.0073554515838623, "learning_rate": 2.4946674423114213e-05, "loss": 5.3455, "step": 5168 }, { "epoch": 0.5, "grad_norm": 0.9829320907592773, "learning_rate": 2.4927283304246657e-05, "loss": 5.2866, "step": 5172 }, { "epoch": 0.5, "grad_norm": 1.067661166191101, "learning_rate": 2.4907892185379097e-05, "loss": 5.3966, "step": 5176 }, { "epoch": 0.5, "grad_norm": 1.049310326576233, "learning_rate": 2.4888501066511538e-05, "loss": 5.471, "step": 5180 }, { "epoch": 0.5, "grad_norm": 1.052420973777771, "learning_rate": 2.486910994764398e-05, "loss": 5.3907, "step": 5184 }, { "epoch": 0.5, "grad_norm": 1.0315730571746826, "learning_rate": 2.4849718828776422e-05, "loss": 5.3528, "step": 5188 }, { "epoch": 0.5, "grad_norm": 1.165887475013733, "learning_rate": 2.4830327709908863e-05, "loss": 5.399, "step": 5192 }, { "epoch": 0.5, "grad_norm": 0.9832936525344849, "learning_rate": 2.4810936591041303e-05, "loss": 5.4077, "step": 5196 }, { "epoch": 0.5, "grad_norm": 0.9647985100746155, "learning_rate": 2.4791545472173747e-05, "loss": 5.3483, "step": 5200 }, { "epoch": 0.5, "grad_norm": 1.0132298469543457, "learning_rate": 2.4772154353306188e-05, "loss": 5.4671, "step": 5204 }, { "epoch": 0.5, "grad_norm": 0.9725786447525024, "learning_rate": 2.4752763234438628e-05, "loss": 5.3745, "step": 5208 }, { "epoch": 0.51, "grad_norm": 1.0451775789260864, "learning_rate": 2.473337211557107e-05, "loss": 5.4456, "step": 5212 }, { "epoch": 0.51, "grad_norm": 0.9887628555297852, "learning_rate": 2.4713980996703513e-05, "loss": 5.333, "step": 5216 }, { "epoch": 0.51, "grad_norm": 1.044665813446045, "learning_rate": 2.4694589877835953e-05, "loss": 5.3592, "step": 5220 }, { "epoch": 0.51, "grad_norm": 0.9709651470184326, "learning_rate": 2.4675198758968394e-05, "loss": 5.3393, "step": 5224 }, { "epoch": 0.51, "grad_norm": 0.9530077576637268, "learning_rate": 2.4655807640100834e-05, "loss": 5.3602, "step": 5228 }, { "epoch": 0.51, "grad_norm": 0.9592558145523071, "learning_rate": 2.4636416521233278e-05, "loss": 5.3931, "step": 5232 }, { "epoch": 0.51, "grad_norm": 1.0525456666946411, "learning_rate": 2.461702540236572e-05, "loss": 5.3367, "step": 5236 }, { "epoch": 0.51, "grad_norm": 1.0699265003204346, "learning_rate": 2.459763428349816e-05, "loss": 5.3894, "step": 5240 }, { "epoch": 0.51, "grad_norm": 1.0436670780181885, "learning_rate": 2.4578243164630603e-05, "loss": 5.3643, "step": 5244 }, { "epoch": 0.51, "grad_norm": 1.0085383653640747, "learning_rate": 2.4558852045763044e-05, "loss": 5.2872, "step": 5248 }, { "epoch": 0.51, "grad_norm": 1.0748775005340576, "learning_rate": 2.4539460926895484e-05, "loss": 5.4714, "step": 5252 }, { "epoch": 0.51, "grad_norm": 1.0273828506469727, "learning_rate": 2.4520069808027925e-05, "loss": 5.3271, "step": 5256 }, { "epoch": 0.51, "grad_norm": 0.9897158145904541, "learning_rate": 2.4500678689160365e-05, "loss": 5.3268, "step": 5260 }, { "epoch": 0.51, "grad_norm": 0.9869460463523865, "learning_rate": 2.4481287570292806e-05, "loss": 5.3173, "step": 5264 }, { "epoch": 0.51, "grad_norm": 1.0365619659423828, "learning_rate": 2.4461896451425246e-05, "loss": 5.4168, "step": 5268 }, { "epoch": 0.51, "grad_norm": 1.106515884399414, "learning_rate": 2.4442505332557687e-05, "loss": 5.3515, "step": 5272 }, { "epoch": 0.51, "grad_norm": 0.9866341352462769, "learning_rate": 2.442311421369013e-05, "loss": 5.4644, "step": 5276 }, { "epoch": 0.51, "grad_norm": 1.1131459474563599, "learning_rate": 2.440372309482257e-05, "loss": 5.2931, "step": 5280 }, { "epoch": 0.51, "grad_norm": 0.9824268221855164, "learning_rate": 2.4384331975955012e-05, "loss": 5.3094, "step": 5284 }, { "epoch": 0.51, "grad_norm": 1.0347408056259155, "learning_rate": 2.4364940857087452e-05, "loss": 5.3462, "step": 5288 }, { "epoch": 0.51, "grad_norm": 1.0148606300354004, "learning_rate": 2.4345549738219896e-05, "loss": 5.3224, "step": 5292 }, { "epoch": 0.51, "grad_norm": 1.1260877847671509, "learning_rate": 2.4326158619352337e-05, "loss": 5.3337, "step": 5296 }, { "epoch": 0.51, "grad_norm": 1.034441590309143, "learning_rate": 2.4306767500484777e-05, "loss": 5.3728, "step": 5300 }, { "epoch": 0.51, "grad_norm": 1.0142707824707031, "learning_rate": 2.428737638161722e-05, "loss": 5.3848, "step": 5304 }, { "epoch": 0.51, "grad_norm": 0.9446210861206055, "learning_rate": 2.4267985262749662e-05, "loss": 5.3927, "step": 5308 }, { "epoch": 0.52, "grad_norm": 1.0099536180496216, "learning_rate": 2.4248594143882102e-05, "loss": 5.3282, "step": 5312 }, { "epoch": 0.52, "grad_norm": 1.0873996019363403, "learning_rate": 2.4229203025014543e-05, "loss": 5.4533, "step": 5316 }, { "epoch": 0.52, "grad_norm": 0.9772780537605286, "learning_rate": 2.4209811906146987e-05, "loss": 5.4258, "step": 5320 }, { "epoch": 0.52, "grad_norm": 0.9593787789344788, "learning_rate": 2.4190420787279427e-05, "loss": 5.369, "step": 5324 }, { "epoch": 0.52, "grad_norm": 1.0021839141845703, "learning_rate": 2.4171029668411868e-05, "loss": 5.3721, "step": 5328 }, { "epoch": 0.52, "grad_norm": 0.9823392629623413, "learning_rate": 2.415163854954431e-05, "loss": 5.3277, "step": 5332 }, { "epoch": 0.52, "grad_norm": 1.1319977045059204, "learning_rate": 2.4132247430676752e-05, "loss": 5.4176, "step": 5336 }, { "epoch": 0.52, "grad_norm": 0.9715146422386169, "learning_rate": 2.4112856311809193e-05, "loss": 5.3833, "step": 5340 }, { "epoch": 0.52, "grad_norm": 1.0981075763702393, "learning_rate": 2.4093465192941634e-05, "loss": 5.4515, "step": 5344 }, { "epoch": 0.52, "grad_norm": 0.9924083948135376, "learning_rate": 2.4074074074074074e-05, "loss": 5.2979, "step": 5348 }, { "epoch": 0.52, "grad_norm": 0.9601098895072937, "learning_rate": 2.4054682955206518e-05, "loss": 5.3825, "step": 5352 }, { "epoch": 0.52, "grad_norm": 1.134017825126648, "learning_rate": 2.403529183633896e-05, "loss": 5.4259, "step": 5356 }, { "epoch": 0.52, "grad_norm": 1.0065308809280396, "learning_rate": 2.40159007174714e-05, "loss": 5.2852, "step": 5360 }, { "epoch": 0.52, "grad_norm": 1.120582103729248, "learning_rate": 2.3996509598603843e-05, "loss": 5.3872, "step": 5364 }, { "epoch": 0.52, "grad_norm": 1.0139710903167725, "learning_rate": 2.3977118479736284e-05, "loss": 5.4521, "step": 5368 }, { "epoch": 0.52, "grad_norm": 0.9832119941711426, "learning_rate": 2.3957727360868724e-05, "loss": 5.4052, "step": 5372 }, { "epoch": 0.52, "grad_norm": 1.0349886417388916, "learning_rate": 2.3938336242001165e-05, "loss": 5.2876, "step": 5376 }, { "epoch": 0.52, "grad_norm": 1.0244460105895996, "learning_rate": 2.391894512313361e-05, "loss": 5.4077, "step": 5380 }, { "epoch": 0.52, "grad_norm": 1.0130488872528076, "learning_rate": 2.389955400426605e-05, "loss": 5.3853, "step": 5384 }, { "epoch": 0.52, "grad_norm": 1.0110422372817993, "learning_rate": 2.388016288539849e-05, "loss": 5.4025, "step": 5388 }, { "epoch": 0.52, "grad_norm": 1.1506974697113037, "learning_rate": 2.386077176653093e-05, "loss": 5.4613, "step": 5392 }, { "epoch": 0.52, "grad_norm": 0.9795562624931335, "learning_rate": 2.384138064766337e-05, "loss": 5.3075, "step": 5396 }, { "epoch": 0.52, "grad_norm": 1.1148579120635986, "learning_rate": 2.382198952879581e-05, "loss": 5.4263, "step": 5400 }, { "epoch": 0.52, "grad_norm": 0.993959367275238, "learning_rate": 2.3802598409928252e-05, "loss": 5.3765, "step": 5404 }, { "epoch": 0.52, "grad_norm": 0.9957119822502136, "learning_rate": 2.3783207291060692e-05, "loss": 5.3645, "step": 5408 }, { "epoch": 0.52, "grad_norm": 0.975545346736908, "learning_rate": 2.3763816172193136e-05, "loss": 5.3522, "step": 5412 }, { "epoch": 0.53, "grad_norm": 0.9839780330657959, "learning_rate": 2.3744425053325577e-05, "loss": 5.333, "step": 5416 }, { "epoch": 0.53, "grad_norm": 1.0729457139968872, "learning_rate": 2.3725033934458017e-05, "loss": 5.4589, "step": 5420 }, { "epoch": 0.53, "grad_norm": 1.0412721633911133, "learning_rate": 2.370564281559046e-05, "loss": 5.3807, "step": 5424 }, { "epoch": 0.53, "grad_norm": 1.0309202671051025, "learning_rate": 2.3686251696722902e-05, "loss": 5.3716, "step": 5428 }, { "epoch": 0.53, "grad_norm": 1.076830506324768, "learning_rate": 2.3666860577855342e-05, "loss": 5.4141, "step": 5432 }, { "epoch": 0.53, "grad_norm": 1.1014715433120728, "learning_rate": 2.3647469458987783e-05, "loss": 5.3345, "step": 5436 }, { "epoch": 0.53, "grad_norm": 1.107503056526184, "learning_rate": 2.3628078340120227e-05, "loss": 5.2971, "step": 5440 }, { "epoch": 0.53, "grad_norm": 1.0111589431762695, "learning_rate": 2.3608687221252667e-05, "loss": 5.3824, "step": 5444 }, { "epoch": 0.53, "grad_norm": 1.014420509338379, "learning_rate": 2.3589296102385108e-05, "loss": 5.4623, "step": 5448 }, { "epoch": 0.53, "grad_norm": 0.9904616475105286, "learning_rate": 2.356990498351755e-05, "loss": 5.3542, "step": 5452 }, { "epoch": 0.53, "grad_norm": 1.061886191368103, "learning_rate": 2.3550513864649992e-05, "loss": 5.4478, "step": 5456 }, { "epoch": 0.53, "grad_norm": 1.016271948814392, "learning_rate": 2.3531122745782433e-05, "loss": 5.3555, "step": 5460 }, { "epoch": 0.53, "grad_norm": 0.9344733953475952, "learning_rate": 2.3511731626914873e-05, "loss": 5.2854, "step": 5464 }, { "epoch": 0.53, "grad_norm": 1.050699234008789, "learning_rate": 2.3492340508047314e-05, "loss": 5.3064, "step": 5468 }, { "epoch": 0.53, "grad_norm": 1.024604320526123, "learning_rate": 2.3472949389179758e-05, "loss": 5.3682, "step": 5472 }, { "epoch": 0.53, "grad_norm": 1.0533298254013062, "learning_rate": 2.34535582703122e-05, "loss": 5.3336, "step": 5476 }, { "epoch": 0.53, "grad_norm": 1.0331076383590698, "learning_rate": 2.343416715144464e-05, "loss": 5.518, "step": 5480 }, { "epoch": 0.53, "grad_norm": 1.101364254951477, "learning_rate": 2.3414776032577083e-05, "loss": 5.3366, "step": 5484 }, { "epoch": 0.53, "grad_norm": 0.9391170740127563, "learning_rate": 2.3395384913709523e-05, "loss": 5.3567, "step": 5488 }, { "epoch": 0.53, "grad_norm": 0.9690737724304199, "learning_rate": 2.3375993794841964e-05, "loss": 5.3323, "step": 5492 }, { "epoch": 0.53, "grad_norm": 1.0040618181228638, "learning_rate": 2.3356602675974404e-05, "loss": 5.3209, "step": 5496 }, { "epoch": 0.53, "grad_norm": 0.9501678347587585, "learning_rate": 2.333721155710685e-05, "loss": 5.3043, "step": 5500 }, { "epoch": 0.53, "grad_norm": 0.9725291728973389, "learning_rate": 2.331782043823929e-05, "loss": 5.4104, "step": 5504 }, { "epoch": 0.53, "grad_norm": 0.9561870098114014, "learning_rate": 2.329842931937173e-05, "loss": 5.4013, "step": 5508 }, { "epoch": 0.53, "grad_norm": 1.0054466724395752, "learning_rate": 2.327903820050417e-05, "loss": 5.3692, "step": 5512 }, { "epoch": 0.53, "grad_norm": 1.0419256687164307, "learning_rate": 2.3259647081636614e-05, "loss": 5.4082, "step": 5516 }, { "epoch": 0.54, "grad_norm": 1.0327070951461792, "learning_rate": 2.3240255962769054e-05, "loss": 5.4721, "step": 5520 }, { "epoch": 0.54, "grad_norm": 0.9534458518028259, "learning_rate": 2.3220864843901495e-05, "loss": 5.3484, "step": 5524 }, { "epoch": 0.54, "grad_norm": 1.0182461738586426, "learning_rate": 2.3201473725033936e-05, "loss": 5.3595, "step": 5528 }, { "epoch": 0.54, "grad_norm": 1.0508521795272827, "learning_rate": 2.3182082606166376e-05, "loss": 5.3651, "step": 5532 }, { "epoch": 0.54, "grad_norm": 0.9299846887588501, "learning_rate": 2.3162691487298817e-05, "loss": 5.3989, "step": 5536 }, { "epoch": 0.54, "grad_norm": 1.0000419616699219, "learning_rate": 2.3143300368431257e-05, "loss": 5.2571, "step": 5540 }, { "epoch": 0.54, "grad_norm": 1.084428071975708, "learning_rate": 2.31239092495637e-05, "loss": 5.3619, "step": 5544 }, { "epoch": 0.54, "grad_norm": 1.0404269695281982, "learning_rate": 2.310451813069614e-05, "loss": 5.3425, "step": 5548 }, { "epoch": 0.54, "grad_norm": 1.00773286819458, "learning_rate": 2.3085127011828582e-05, "loss": 5.329, "step": 5552 }, { "epoch": 0.54, "grad_norm": 1.000942349433899, "learning_rate": 2.3065735892961023e-05, "loss": 5.3361, "step": 5556 }, { "epoch": 0.54, "grad_norm": 1.021449089050293, "learning_rate": 2.3046344774093467e-05, "loss": 5.3859, "step": 5560 }, { "epoch": 0.54, "grad_norm": 1.0686131715774536, "learning_rate": 2.3026953655225907e-05, "loss": 5.4319, "step": 5564 }, { "epoch": 0.54, "grad_norm": 1.0589152574539185, "learning_rate": 2.3007562536358348e-05, "loss": 5.3367, "step": 5568 }, { "epoch": 0.54, "grad_norm": 1.0058645009994507, "learning_rate": 2.2988171417490788e-05, "loss": 5.3473, "step": 5572 }, { "epoch": 0.54, "grad_norm": 1.0159991979599, "learning_rate": 2.2968780298623232e-05, "loss": 5.375, "step": 5576 }, { "epoch": 0.54, "grad_norm": 1.0634344816207886, "learning_rate": 2.2949389179755673e-05, "loss": 5.401, "step": 5580 }, { "epoch": 0.54, "grad_norm": 1.077378273010254, "learning_rate": 2.2929998060888113e-05, "loss": 5.4141, "step": 5584 }, { "epoch": 0.54, "grad_norm": 0.9465997815132141, "learning_rate": 2.2910606942020554e-05, "loss": 5.357, "step": 5588 }, { "epoch": 0.54, "grad_norm": 1.069690465927124, "learning_rate": 2.2891215823152998e-05, "loss": 5.3555, "step": 5592 }, { "epoch": 0.54, "grad_norm": 0.9830535054206848, "learning_rate": 2.2871824704285438e-05, "loss": 5.3552, "step": 5596 }, { "epoch": 0.54, "grad_norm": 1.0138301849365234, "learning_rate": 2.285243358541788e-05, "loss": 5.342, "step": 5600 }, { "epoch": 0.54, "grad_norm": 1.0020854473114014, "learning_rate": 2.2833042466550323e-05, "loss": 5.3489, "step": 5604 }, { "epoch": 0.54, "grad_norm": 1.0177456140518188, "learning_rate": 2.2813651347682763e-05, "loss": 5.269, "step": 5608 }, { "epoch": 0.54, "grad_norm": 1.0663460493087769, "learning_rate": 2.2794260228815204e-05, "loss": 5.297, "step": 5612 }, { "epoch": 0.54, "grad_norm": 0.9828472137451172, "learning_rate": 2.2774869109947644e-05, "loss": 5.3487, "step": 5616 }, { "epoch": 0.54, "grad_norm": 1.0115931034088135, "learning_rate": 2.2755477991080088e-05, "loss": 5.2995, "step": 5620 }, { "epoch": 0.55, "grad_norm": 0.974193274974823, "learning_rate": 2.273608687221253e-05, "loss": 5.3582, "step": 5624 }, { "epoch": 0.55, "grad_norm": 1.0036460161209106, "learning_rate": 2.271669575334497e-05, "loss": 5.3034, "step": 5628 }, { "epoch": 0.55, "grad_norm": 1.0930732488632202, "learning_rate": 2.269730463447741e-05, "loss": 5.3337, "step": 5632 }, { "epoch": 0.55, "grad_norm": 0.9957634806632996, "learning_rate": 2.2677913515609854e-05, "loss": 5.3641, "step": 5636 }, { "epoch": 0.55, "grad_norm": 1.0515607595443726, "learning_rate": 2.2658522396742294e-05, "loss": 5.5135, "step": 5640 }, { "epoch": 0.55, "grad_norm": 0.9784524440765381, "learning_rate": 2.2639131277874735e-05, "loss": 5.344, "step": 5644 }, { "epoch": 0.55, "grad_norm": 1.0054571628570557, "learning_rate": 2.2619740159007175e-05, "loss": 5.3933, "step": 5648 }, { "epoch": 0.55, "grad_norm": 0.965859591960907, "learning_rate": 2.260034904013962e-05, "loss": 5.3461, "step": 5652 }, { "epoch": 0.55, "grad_norm": 1.1478146314620972, "learning_rate": 2.258095792127206e-05, "loss": 5.3515, "step": 5656 }, { "epoch": 0.55, "grad_norm": 0.9893291592597961, "learning_rate": 2.25615668024045e-05, "loss": 5.2155, "step": 5660 }, { "epoch": 0.55, "grad_norm": 0.945925235748291, "learning_rate": 2.254217568353694e-05, "loss": 5.3156, "step": 5664 }, { "epoch": 0.55, "grad_norm": 1.020790934562683, "learning_rate": 2.252278456466938e-05, "loss": 5.4443, "step": 5668 }, { "epoch": 0.55, "grad_norm": 0.9524897933006287, "learning_rate": 2.2503393445801822e-05, "loss": 5.3692, "step": 5672 }, { "epoch": 0.55, "grad_norm": 1.0599005222320557, "learning_rate": 2.2484002326934266e-05, "loss": 5.3473, "step": 5676 }, { "epoch": 0.55, "grad_norm": 1.029077410697937, "learning_rate": 2.2464611208066706e-05, "loss": 5.3266, "step": 5680 }, { "epoch": 0.55, "grad_norm": 1.0404125452041626, "learning_rate": 2.2445220089199147e-05, "loss": 5.3654, "step": 5684 }, { "epoch": 0.55, "grad_norm": 0.9789564609527588, "learning_rate": 2.2425828970331587e-05, "loss": 5.3689, "step": 5688 }, { "epoch": 0.55, "grad_norm": 0.9803569912910461, "learning_rate": 2.2406437851464028e-05, "loss": 5.3231, "step": 5692 }, { "epoch": 0.55, "grad_norm": 1.036303997039795, "learning_rate": 2.2387046732596472e-05, "loss": 5.3055, "step": 5696 }, { "epoch": 0.55, "grad_norm": 1.0285331010818481, "learning_rate": 2.2367655613728912e-05, "loss": 5.3553, "step": 5700 }, { "epoch": 0.55, "grad_norm": 0.9725428819656372, "learning_rate": 2.2348264494861353e-05, "loss": 5.3338, "step": 5704 }, { "epoch": 0.55, "grad_norm": 1.0550493001937866, "learning_rate": 2.2328873375993794e-05, "loss": 5.3542, "step": 5708 }, { "epoch": 0.55, "grad_norm": 0.9990110993385315, "learning_rate": 2.2309482257126237e-05, "loss": 5.3457, "step": 5712 }, { "epoch": 0.55, "grad_norm": 1.0040626525878906, "learning_rate": 2.2290091138258678e-05, "loss": 5.3244, "step": 5716 }, { "epoch": 0.55, "grad_norm": 1.0053871870040894, "learning_rate": 2.227070001939112e-05, "loss": 5.3694, "step": 5720 }, { "epoch": 0.55, "grad_norm": 1.0112636089324951, "learning_rate": 2.2251308900523562e-05, "loss": 5.4549, "step": 5724 }, { "epoch": 0.56, "grad_norm": 1.0170855522155762, "learning_rate": 2.2231917781656003e-05, "loss": 5.3509, "step": 5728 }, { "epoch": 0.56, "grad_norm": 0.9416339993476868, "learning_rate": 2.2212526662788444e-05, "loss": 5.2804, "step": 5732 }, { "epoch": 0.56, "grad_norm": 1.014902114868164, "learning_rate": 2.2193135543920884e-05, "loss": 5.3263, "step": 5736 }, { "epoch": 0.56, "grad_norm": 1.0529359579086304, "learning_rate": 2.2173744425053328e-05, "loss": 5.3151, "step": 5740 }, { "epoch": 0.56, "grad_norm": 0.9608129262924194, "learning_rate": 2.215435330618577e-05, "loss": 5.2865, "step": 5744 }, { "epoch": 0.56, "grad_norm": 1.0631046295166016, "learning_rate": 2.213496218731821e-05, "loss": 5.3379, "step": 5748 }, { "epoch": 0.56, "grad_norm": 1.0569404363632202, "learning_rate": 2.211557106845065e-05, "loss": 5.4344, "step": 5752 }, { "epoch": 0.56, "grad_norm": 1.0010555982589722, "learning_rate": 2.2096179949583094e-05, "loss": 5.3753, "step": 5756 }, { "epoch": 0.56, "grad_norm": 0.9690674543380737, "learning_rate": 2.2076788830715534e-05, "loss": 5.3373, "step": 5760 }, { "epoch": 0.56, "grad_norm": 1.03341805934906, "learning_rate": 2.2057397711847975e-05, "loss": 5.4154, "step": 5764 }, { "epoch": 0.56, "grad_norm": 0.9742380976676941, "learning_rate": 2.2038006592980415e-05, "loss": 5.4585, "step": 5768 }, { "epoch": 0.56, "grad_norm": 0.977780818939209, "learning_rate": 2.201861547411286e-05, "loss": 5.351, "step": 5772 }, { "epoch": 0.56, "grad_norm": 0.9809128046035767, "learning_rate": 2.19992243552453e-05, "loss": 5.2699, "step": 5776 }, { "epoch": 0.56, "grad_norm": 1.0754761695861816, "learning_rate": 2.197983323637774e-05, "loss": 5.4224, "step": 5780 }, { "epoch": 0.56, "grad_norm": 1.1107863187789917, "learning_rate": 2.1960442117510184e-05, "loss": 5.3779, "step": 5784 }, { "epoch": 0.56, "grad_norm": 0.9794926643371582, "learning_rate": 2.1941050998642625e-05, "loss": 5.4183, "step": 5788 }, { "epoch": 0.56, "grad_norm": 1.0375285148620605, "learning_rate": 2.1921659879775065e-05, "loss": 5.3357, "step": 5792 }, { "epoch": 0.56, "grad_norm": 1.028106451034546, "learning_rate": 2.1902268760907506e-05, "loss": 5.3636, "step": 5796 }, { "epoch": 0.56, "grad_norm": 0.9496660828590393, "learning_rate": 2.1882877642039946e-05, "loss": 5.4075, "step": 5800 }, { "epoch": 0.56, "grad_norm": 0.9957082867622375, "learning_rate": 2.1863486523172387e-05, "loss": 5.3508, "step": 5804 }, { "epoch": 0.56, "grad_norm": 1.0487018823623657, "learning_rate": 2.184409540430483e-05, "loss": 5.3796, "step": 5808 }, { "epoch": 0.56, "grad_norm": 0.9658469557762146, "learning_rate": 2.182470428543727e-05, "loss": 5.2512, "step": 5812 }, { "epoch": 0.56, "grad_norm": 1.0086036920547485, "learning_rate": 2.1805313166569712e-05, "loss": 5.3492, "step": 5816 }, { "epoch": 0.56, "grad_norm": 1.09529447555542, "learning_rate": 2.1785922047702152e-05, "loss": 5.3167, "step": 5820 }, { "epoch": 0.56, "grad_norm": 1.0463392734527588, "learning_rate": 2.1766530928834593e-05, "loss": 5.3627, "step": 5824 }, { "epoch": 0.57, "grad_norm": 1.0675616264343262, "learning_rate": 2.1747139809967033e-05, "loss": 5.3868, "step": 5828 }, { "epoch": 0.57, "grad_norm": 0.9881440997123718, "learning_rate": 2.1727748691099477e-05, "loss": 5.3498, "step": 5832 }, { "epoch": 0.57, "grad_norm": 1.0680946111679077, "learning_rate": 2.1708357572231918e-05, "loss": 5.3376, "step": 5836 }, { "epoch": 0.57, "grad_norm": 1.1282621622085571, "learning_rate": 2.168896645336436e-05, "loss": 5.3426, "step": 5840 }, { "epoch": 0.57, "grad_norm": 1.0684411525726318, "learning_rate": 2.1669575334496802e-05, "loss": 5.3769, "step": 5844 }, { "epoch": 0.57, "grad_norm": 1.0463793277740479, "learning_rate": 2.1650184215629243e-05, "loss": 5.3831, "step": 5848 }, { "epoch": 0.57, "grad_norm": 1.0346778631210327, "learning_rate": 2.1630793096761683e-05, "loss": 5.3522, "step": 5852 }, { "epoch": 0.57, "grad_norm": 1.0426431894302368, "learning_rate": 2.1611401977894124e-05, "loss": 5.4098, "step": 5856 }, { "epoch": 0.57, "grad_norm": 1.0200704336166382, "learning_rate": 2.1592010859026568e-05, "loss": 5.265, "step": 5860 }, { "epoch": 0.57, "grad_norm": 1.0575344562530518, "learning_rate": 2.157261974015901e-05, "loss": 5.3004, "step": 5864 }, { "epoch": 0.57, "grad_norm": 0.9970076680183411, "learning_rate": 2.155322862129145e-05, "loss": 5.3595, "step": 5868 }, { "epoch": 0.57, "grad_norm": 1.0401124954223633, "learning_rate": 2.153383750242389e-05, "loss": 5.5118, "step": 5872 }, { "epoch": 0.57, "grad_norm": 1.0695228576660156, "learning_rate": 2.1514446383556333e-05, "loss": 5.3078, "step": 5876 }, { "epoch": 0.57, "grad_norm": 1.0231329202651978, "learning_rate": 2.1495055264688774e-05, "loss": 5.3619, "step": 5880 }, { "epoch": 0.57, "grad_norm": 1.0032014846801758, "learning_rate": 2.1475664145821214e-05, "loss": 5.2561, "step": 5884 }, { "epoch": 0.57, "grad_norm": 1.0087443590164185, "learning_rate": 2.1456273026953655e-05, "loss": 5.4882, "step": 5888 }, { "epoch": 0.57, "grad_norm": 1.014003038406372, "learning_rate": 2.14368819080861e-05, "loss": 5.4139, "step": 5892 }, { "epoch": 0.57, "grad_norm": 0.9923560619354248, "learning_rate": 2.141749078921854e-05, "loss": 5.3716, "step": 5896 }, { "epoch": 0.57, "grad_norm": 1.022443175315857, "learning_rate": 2.139809967035098e-05, "loss": 5.3654, "step": 5900 }, { "epoch": 0.57, "grad_norm": 1.0812815427780151, "learning_rate": 2.1378708551483424e-05, "loss": 5.4307, "step": 5904 }, { "epoch": 0.57, "grad_norm": 0.9952316284179688, "learning_rate": 2.1359317432615864e-05, "loss": 5.3851, "step": 5908 }, { "epoch": 0.57, "grad_norm": 1.0843716859817505, "learning_rate": 2.1339926313748305e-05, "loss": 5.3507, "step": 5912 }, { "epoch": 0.57, "grad_norm": 0.9775163531303406, "learning_rate": 2.1320535194880746e-05, "loss": 5.2868, "step": 5916 }, { "epoch": 0.57, "grad_norm": 1.051283597946167, "learning_rate": 2.130114407601319e-05, "loss": 5.4404, "step": 5920 }, { "epoch": 0.57, "grad_norm": 1.0350865125656128, "learning_rate": 2.128175295714563e-05, "loss": 5.3286, "step": 5924 }, { "epoch": 0.57, "grad_norm": 1.1702210903167725, "learning_rate": 2.126236183827807e-05, "loss": 5.4338, "step": 5928 }, { "epoch": 0.58, "grad_norm": 1.0268408060073853, "learning_rate": 2.124297071941051e-05, "loss": 5.4108, "step": 5932 }, { "epoch": 0.58, "grad_norm": 1.002463459968567, "learning_rate": 2.122357960054295e-05, "loss": 5.2631, "step": 5936 }, { "epoch": 0.58, "grad_norm": 1.04947829246521, "learning_rate": 2.1204188481675396e-05, "loss": 5.3011, "step": 5940 }, { "epoch": 0.58, "grad_norm": 1.0733132362365723, "learning_rate": 2.1184797362807836e-05, "loss": 5.5408, "step": 5944 }, { "epoch": 0.58, "grad_norm": 1.1274093389511108, "learning_rate": 2.1165406243940277e-05, "loss": 5.4001, "step": 5948 }, { "epoch": 0.58, "grad_norm": 1.0161489248275757, "learning_rate": 2.1146015125072717e-05, "loss": 5.392, "step": 5952 }, { "epoch": 0.58, "grad_norm": 1.1048080921173096, "learning_rate": 2.1126624006205158e-05, "loss": 5.3586, "step": 5956 }, { "epoch": 0.58, "grad_norm": 1.0635709762573242, "learning_rate": 2.1107232887337598e-05, "loss": 5.3164, "step": 5960 }, { "epoch": 0.58, "grad_norm": 1.0826531648635864, "learning_rate": 2.1087841768470042e-05, "loss": 5.4166, "step": 5964 }, { "epoch": 0.58, "grad_norm": 1.0716171264648438, "learning_rate": 2.1068450649602483e-05, "loss": 5.3028, "step": 5968 }, { "epoch": 0.58, "grad_norm": 1.0432461500167847, "learning_rate": 2.1049059530734923e-05, "loss": 5.3777, "step": 5972 }, { "epoch": 0.58, "grad_norm": 1.0327306985855103, "learning_rate": 2.1029668411867364e-05, "loss": 5.2718, "step": 5976 }, { "epoch": 0.58, "grad_norm": 1.0294878482818604, "learning_rate": 2.1010277292999808e-05, "loss": 5.2555, "step": 5980 }, { "epoch": 0.58, "grad_norm": 1.0690736770629883, "learning_rate": 2.0990886174132248e-05, "loss": 5.2835, "step": 5984 }, { "epoch": 0.58, "grad_norm": 1.060434341430664, "learning_rate": 2.097149505526469e-05, "loss": 5.4269, "step": 5988 }, { "epoch": 0.58, "grad_norm": 1.1109763383865356, "learning_rate": 2.095210393639713e-05, "loss": 5.4351, "step": 5992 }, { "epoch": 0.58, "grad_norm": 1.0055971145629883, "learning_rate": 2.0932712817529573e-05, "loss": 5.2801, "step": 5996 }, { "epoch": 0.58, "grad_norm": 0.990050733089447, "learning_rate": 2.0913321698662014e-05, "loss": 5.3526, "step": 6000 }, { "epoch": 0.58, "grad_norm": 0.9774187207221985, "learning_rate": 2.0893930579794454e-05, "loss": 5.3241, "step": 6004 }, { "epoch": 0.58, "grad_norm": 1.0026575326919556, "learning_rate": 2.0874539460926895e-05, "loss": 5.3575, "step": 6008 }, { "epoch": 0.58, "grad_norm": 0.9780769944190979, "learning_rate": 2.085514834205934e-05, "loss": 5.2948, "step": 6012 }, { "epoch": 0.58, "grad_norm": 1.0509179830551147, "learning_rate": 2.083575722319178e-05, "loss": 5.3332, "step": 6016 }, { "epoch": 0.58, "grad_norm": 1.0327376127243042, "learning_rate": 2.081636610432422e-05, "loss": 5.38, "step": 6020 }, { "epoch": 0.58, "grad_norm": 1.105682134628296, "learning_rate": 2.0796974985456664e-05, "loss": 5.3568, "step": 6024 }, { "epoch": 0.58, "grad_norm": 1.0796016454696655, "learning_rate": 2.0777583866589104e-05, "loss": 5.3871, "step": 6028 }, { "epoch": 0.58, "grad_norm": 0.983562171459198, "learning_rate": 2.0758192747721545e-05, "loss": 5.3737, "step": 6032 }, { "epoch": 0.59, "grad_norm": 1.0870602130889893, "learning_rate": 2.0738801628853985e-05, "loss": 5.2752, "step": 6036 }, { "epoch": 0.59, "grad_norm": 0.9455908536911011, "learning_rate": 2.071941050998643e-05, "loss": 5.3517, "step": 6040 }, { "epoch": 0.59, "grad_norm": 1.0257951021194458, "learning_rate": 2.070001939111887e-05, "loss": 5.3043, "step": 6044 }, { "epoch": 0.59, "grad_norm": 1.0068105459213257, "learning_rate": 2.068062827225131e-05, "loss": 5.2876, "step": 6048 }, { "epoch": 0.59, "grad_norm": 1.0739387273788452, "learning_rate": 2.066123715338375e-05, "loss": 5.3432, "step": 6052 }, { "epoch": 0.59, "grad_norm": 0.9713083505630493, "learning_rate": 2.0641846034516195e-05, "loss": 5.368, "step": 6056 }, { "epoch": 0.59, "grad_norm": 0.9598544239997864, "learning_rate": 2.0622454915648635e-05, "loss": 5.467, "step": 6060 }, { "epoch": 0.59, "grad_norm": 1.0342966318130493, "learning_rate": 2.0603063796781076e-05, "loss": 5.3495, "step": 6064 }, { "epoch": 0.59, "grad_norm": 0.9927212595939636, "learning_rate": 2.0583672677913516e-05, "loss": 5.3898, "step": 6068 }, { "epoch": 0.59, "grad_norm": 0.985098659992218, "learning_rate": 2.056428155904596e-05, "loss": 5.3315, "step": 6072 }, { "epoch": 0.59, "grad_norm": 1.1182785034179688, "learning_rate": 2.05448904401784e-05, "loss": 5.3721, "step": 6076 }, { "epoch": 0.59, "grad_norm": 1.0341837406158447, "learning_rate": 2.052549932131084e-05, "loss": 5.3284, "step": 6080 }, { "epoch": 0.59, "grad_norm": 1.0580908060073853, "learning_rate": 2.0506108202443282e-05, "loss": 5.3776, "step": 6084 }, { "epoch": 0.59, "grad_norm": 1.016257405281067, "learning_rate": 2.0486717083575722e-05, "loss": 5.3578, "step": 6088 }, { "epoch": 0.59, "grad_norm": 1.0956933498382568, "learning_rate": 2.0467325964708163e-05, "loss": 5.3792, "step": 6092 }, { "epoch": 0.59, "grad_norm": 1.0442465543746948, "learning_rate": 2.0447934845840604e-05, "loss": 5.2992, "step": 6096 }, { "epoch": 0.59, "grad_norm": 1.1148533821105957, "learning_rate": 2.0428543726973047e-05, "loss": 5.2835, "step": 6100 }, { "epoch": 0.59, "grad_norm": 1.056727409362793, "learning_rate": 2.0409152608105488e-05, "loss": 5.3336, "step": 6104 }, { "epoch": 0.59, "grad_norm": 0.9661352634429932, "learning_rate": 2.038976148923793e-05, "loss": 5.356, "step": 6108 }, { "epoch": 0.59, "grad_norm": 1.0282217264175415, "learning_rate": 2.037037037037037e-05, "loss": 5.3118, "step": 6112 }, { "epoch": 0.59, "grad_norm": 1.0714175701141357, "learning_rate": 2.0350979251502813e-05, "loss": 5.3817, "step": 6116 }, { "epoch": 0.59, "grad_norm": 1.0747861862182617, "learning_rate": 2.0331588132635254e-05, "loss": 5.3961, "step": 6120 }, { "epoch": 0.59, "grad_norm": 0.9946739673614502, "learning_rate": 2.0312197013767694e-05, "loss": 5.343, "step": 6124 }, { "epoch": 0.59, "grad_norm": 1.002004623413086, "learning_rate": 2.0292805894900135e-05, "loss": 5.3654, "step": 6128 }, { "epoch": 0.59, "grad_norm": 1.0547658205032349, "learning_rate": 2.027341477603258e-05, "loss": 5.3869, "step": 6132 }, { "epoch": 0.59, "grad_norm": 1.076196551322937, "learning_rate": 2.025402365716502e-05, "loss": 5.2561, "step": 6136 }, { "epoch": 0.6, "grad_norm": 1.3180465698242188, "learning_rate": 2.023463253829746e-05, "loss": 5.3457, "step": 6140 }, { "epoch": 0.6, "grad_norm": 1.0028260946273804, "learning_rate": 2.0215241419429904e-05, "loss": 5.4271, "step": 6144 }, { "epoch": 0.6, "grad_norm": 1.0375735759735107, "learning_rate": 2.0195850300562344e-05, "loss": 5.3181, "step": 6148 }, { "epoch": 0.6, "grad_norm": 1.0055443048477173, "learning_rate": 2.0176459181694785e-05, "loss": 5.3544, "step": 6152 }, { "epoch": 0.6, "grad_norm": 1.0876275300979614, "learning_rate": 2.0157068062827225e-05, "loss": 5.3487, "step": 6156 }, { "epoch": 0.6, "grad_norm": 1.018140196800232, "learning_rate": 2.013767694395967e-05, "loss": 5.4325, "step": 6160 }, { "epoch": 0.6, "grad_norm": 1.0800230503082275, "learning_rate": 2.011828582509211e-05, "loss": 5.2748, "step": 6164 }, { "epoch": 0.6, "grad_norm": 1.0820457935333252, "learning_rate": 2.009889470622455e-05, "loss": 5.3661, "step": 6168 }, { "epoch": 0.6, "grad_norm": 1.0936886072158813, "learning_rate": 2.007950358735699e-05, "loss": 5.3191, "step": 6172 }, { "epoch": 0.6, "grad_norm": 1.0739785432815552, "learning_rate": 2.0060112468489435e-05, "loss": 5.372, "step": 6176 }, { "epoch": 0.6, "grad_norm": 1.0785382986068726, "learning_rate": 2.0040721349621875e-05, "loss": 5.3858, "step": 6180 }, { "epoch": 0.6, "grad_norm": 1.000064492225647, "learning_rate": 2.0021330230754316e-05, "loss": 5.3544, "step": 6184 }, { "epoch": 0.6, "grad_norm": 1.096794605255127, "learning_rate": 2.0001939111886756e-05, "loss": 5.3608, "step": 6188 }, { "epoch": 0.6, "grad_norm": 1.0260350704193115, "learning_rate": 1.99825479930192e-05, "loss": 5.4049, "step": 6192 }, { "epoch": 0.6, "grad_norm": 1.0056685209274292, "learning_rate": 1.996315687415164e-05, "loss": 5.3588, "step": 6196 }, { "epoch": 0.6, "grad_norm": 1.003697156906128, "learning_rate": 1.994376575528408e-05, "loss": 5.4, "step": 6200 }, { "epoch": 0.6, "grad_norm": 1.0048468112945557, "learning_rate": 1.9924374636416525e-05, "loss": 5.2654, "step": 6204 }, { "epoch": 0.6, "grad_norm": 0.9437733292579651, "learning_rate": 1.9904983517548966e-05, "loss": 5.2859, "step": 6208 }, { "epoch": 0.6, "grad_norm": 0.961186945438385, "learning_rate": 1.9885592398681406e-05, "loss": 5.3605, "step": 6212 }, { "epoch": 0.6, "grad_norm": 1.0297905206680298, "learning_rate": 1.9866201279813847e-05, "loss": 5.3202, "step": 6216 }, { "epoch": 0.6, "grad_norm": 1.3261497020721436, "learning_rate": 1.9846810160946287e-05, "loss": 5.3587, "step": 6220 }, { "epoch": 0.6, "grad_norm": 1.1081327199935913, "learning_rate": 1.9827419042078728e-05, "loss": 5.427, "step": 6224 }, { "epoch": 0.6, "grad_norm": 1.0671581029891968, "learning_rate": 1.980802792321117e-05, "loss": 5.453, "step": 6228 }, { "epoch": 0.6, "grad_norm": 1.0665878057479858, "learning_rate": 1.978863680434361e-05, "loss": 5.3263, "step": 6232 }, { "epoch": 0.6, "grad_norm": 0.9510138034820557, "learning_rate": 1.9769245685476053e-05, "loss": 5.3364, "step": 6236 }, { "epoch": 0.61, "grad_norm": 0.9980200529098511, "learning_rate": 1.9749854566608493e-05, "loss": 5.2064, "step": 6240 }, { "epoch": 0.61, "grad_norm": 1.0917850732803345, "learning_rate": 1.9730463447740934e-05, "loss": 5.3213, "step": 6244 }, { "epoch": 0.61, "grad_norm": 1.0644116401672363, "learning_rate": 1.9711072328873374e-05, "loss": 5.2939, "step": 6248 }, { "epoch": 0.61, "grad_norm": 1.0722259283065796, "learning_rate": 1.969168121000582e-05, "loss": 5.3984, "step": 6252 }, { "epoch": 0.61, "grad_norm": 1.0094479322433472, "learning_rate": 1.967229009113826e-05, "loss": 5.3198, "step": 6256 }, { "epoch": 0.61, "grad_norm": 1.0126482248306274, "learning_rate": 1.96528989722707e-05, "loss": 5.3356, "step": 6260 }, { "epoch": 0.61, "grad_norm": 1.061038613319397, "learning_rate": 1.9633507853403143e-05, "loss": 5.294, "step": 6264 }, { "epoch": 0.61, "grad_norm": 1.1081583499908447, "learning_rate": 1.9614116734535584e-05, "loss": 5.3518, "step": 6268 }, { "epoch": 0.61, "grad_norm": 1.0472965240478516, "learning_rate": 1.9594725615668024e-05, "loss": 5.3526, "step": 6272 }, { "epoch": 0.61, "grad_norm": 0.9935174584388733, "learning_rate": 1.9575334496800465e-05, "loss": 5.3108, "step": 6276 }, { "epoch": 0.61, "grad_norm": 1.0617495775222778, "learning_rate": 1.955594337793291e-05, "loss": 5.3518, "step": 6280 }, { "epoch": 0.61, "grad_norm": 1.0488762855529785, "learning_rate": 1.953655225906535e-05, "loss": 5.2888, "step": 6284 }, { "epoch": 0.61, "grad_norm": 1.065202236175537, "learning_rate": 1.951716114019779e-05, "loss": 5.2927, "step": 6288 }, { "epoch": 0.61, "grad_norm": 1.1323513984680176, "learning_rate": 1.949777002133023e-05, "loss": 5.3774, "step": 6292 }, { "epoch": 0.61, "grad_norm": 1.0796220302581787, "learning_rate": 1.9478378902462674e-05, "loss": 5.3579, "step": 6296 }, { "epoch": 0.61, "grad_norm": 1.0631530284881592, "learning_rate": 1.9458987783595115e-05, "loss": 5.2644, "step": 6300 }, { "epoch": 0.61, "grad_norm": 1.0359313488006592, "learning_rate": 1.9439596664727556e-05, "loss": 5.3949, "step": 6304 }, { "epoch": 0.61, "grad_norm": 1.0680855512619019, "learning_rate": 1.9420205545859996e-05, "loss": 5.3567, "step": 6308 }, { "epoch": 0.61, "grad_norm": 1.013702154159546, "learning_rate": 1.940081442699244e-05, "loss": 5.4166, "step": 6312 }, { "epoch": 0.61, "grad_norm": 0.9817814230918884, "learning_rate": 1.938142330812488e-05, "loss": 5.4169, "step": 6316 }, { "epoch": 0.61, "grad_norm": 1.0646679401397705, "learning_rate": 1.936203218925732e-05, "loss": 5.3344, "step": 6320 }, { "epoch": 0.61, "grad_norm": 1.0351269245147705, "learning_rate": 1.9342641070389765e-05, "loss": 5.358, "step": 6324 }, { "epoch": 0.61, "grad_norm": 0.9547563195228577, "learning_rate": 1.9323249951522206e-05, "loss": 5.3869, "step": 6328 }, { "epoch": 0.61, "grad_norm": 1.0959416627883911, "learning_rate": 1.9303858832654646e-05, "loss": 5.3785, "step": 6332 }, { "epoch": 0.61, "grad_norm": 1.026036262512207, "learning_rate": 1.9284467713787087e-05, "loss": 5.3627, "step": 6336 }, { "epoch": 0.61, "grad_norm": 1.0361682176589966, "learning_rate": 1.926507659491953e-05, "loss": 5.3649, "step": 6340 }, { "epoch": 0.62, "grad_norm": 1.0472002029418945, "learning_rate": 1.924568547605197e-05, "loss": 5.2838, "step": 6344 }, { "epoch": 0.62, "grad_norm": 1.0799552202224731, "learning_rate": 1.922629435718441e-05, "loss": 5.3879, "step": 6348 }, { "epoch": 0.62, "grad_norm": 1.0271328687667847, "learning_rate": 1.9206903238316852e-05, "loss": 5.3387, "step": 6352 }, { "epoch": 0.62, "grad_norm": 1.0310901403427124, "learning_rate": 1.9187512119449293e-05, "loss": 5.4013, "step": 6356 }, { "epoch": 0.62, "grad_norm": 1.0406147241592407, "learning_rate": 1.9168121000581733e-05, "loss": 5.3939, "step": 6360 }, { "epoch": 0.62, "grad_norm": 1.048988938331604, "learning_rate": 1.9148729881714174e-05, "loss": 5.3211, "step": 6364 }, { "epoch": 0.62, "grad_norm": 1.0325740575790405, "learning_rate": 1.9129338762846614e-05, "loss": 5.4601, "step": 6368 }, { "epoch": 0.62, "grad_norm": 0.9758936166763306, "learning_rate": 1.9109947643979058e-05, "loss": 5.3075, "step": 6372 }, { "epoch": 0.62, "grad_norm": 1.064858078956604, "learning_rate": 1.90905565251115e-05, "loss": 5.366, "step": 6376 }, { "epoch": 0.62, "grad_norm": 0.9335108399391174, "learning_rate": 1.907116540624394e-05, "loss": 5.2433, "step": 6380 }, { "epoch": 0.62, "grad_norm": 1.0345276594161987, "learning_rate": 1.9051774287376383e-05, "loss": 5.3438, "step": 6384 }, { "epoch": 0.62, "grad_norm": 1.0446693897247314, "learning_rate": 1.9032383168508824e-05, "loss": 5.4391, "step": 6388 }, { "epoch": 0.62, "grad_norm": 1.0137783288955688, "learning_rate": 1.9012992049641264e-05, "loss": 5.3926, "step": 6392 }, { "epoch": 0.62, "grad_norm": 0.953395664691925, "learning_rate": 1.8993600930773705e-05, "loss": 5.3703, "step": 6396 }, { "epoch": 0.62, "grad_norm": 1.0034725666046143, "learning_rate": 1.897420981190615e-05, "loss": 5.3504, "step": 6400 }, { "epoch": 0.62, "grad_norm": 1.0460563898086548, "learning_rate": 1.895481869303859e-05, "loss": 5.4165, "step": 6404 }, { "epoch": 0.62, "grad_norm": 1.0266785621643066, "learning_rate": 1.893542757417103e-05, "loss": 5.3853, "step": 6408 }, { "epoch": 0.62, "grad_norm": 1.1107487678527832, "learning_rate": 1.891603645530347e-05, "loss": 5.4166, "step": 6412 }, { "epoch": 0.62, "grad_norm": 1.1155457496643066, "learning_rate": 1.8896645336435914e-05, "loss": 5.3838, "step": 6416 }, { "epoch": 0.62, "grad_norm": 1.0329363346099854, "learning_rate": 1.8877254217568355e-05, "loss": 5.3541, "step": 6420 }, { "epoch": 0.62, "grad_norm": 1.028456449508667, "learning_rate": 1.8857863098700795e-05, "loss": 5.3506, "step": 6424 }, { "epoch": 0.62, "grad_norm": 1.090142011642456, "learning_rate": 1.8838471979833236e-05, "loss": 5.3296, "step": 6428 }, { "epoch": 0.62, "grad_norm": 1.071015477180481, "learning_rate": 1.881908086096568e-05, "loss": 5.3341, "step": 6432 }, { "epoch": 0.62, "grad_norm": 0.9764314293861389, "learning_rate": 1.879968974209812e-05, "loss": 5.3198, "step": 6436 }, { "epoch": 0.62, "grad_norm": 1.046424388885498, "learning_rate": 1.878029862323056e-05, "loss": 5.3087, "step": 6440 }, { "epoch": 0.62, "grad_norm": 1.0243570804595947, "learning_rate": 1.8760907504363005e-05, "loss": 5.3045, "step": 6444 }, { "epoch": 0.63, "grad_norm": 0.9760267734527588, "learning_rate": 1.8741516385495445e-05, "loss": 5.2859, "step": 6448 }, { "epoch": 0.63, "grad_norm": 1.1072561740875244, "learning_rate": 1.8722125266627886e-05, "loss": 5.359, "step": 6452 }, { "epoch": 0.63, "grad_norm": 1.0756561756134033, "learning_rate": 1.8702734147760326e-05, "loss": 5.409, "step": 6456 }, { "epoch": 0.63, "grad_norm": 0.9566421508789062, "learning_rate": 1.868334302889277e-05, "loss": 5.3902, "step": 6460 }, { "epoch": 0.63, "grad_norm": 1.0135536193847656, "learning_rate": 1.866395191002521e-05, "loss": 5.504, "step": 6464 }, { "epoch": 0.63, "grad_norm": 1.139150619506836, "learning_rate": 1.864456079115765e-05, "loss": 5.3502, "step": 6468 }, { "epoch": 0.63, "grad_norm": 1.0426169633865356, "learning_rate": 1.8625169672290092e-05, "loss": 5.2562, "step": 6472 }, { "epoch": 0.63, "grad_norm": 1.0229958295822144, "learning_rate": 1.8605778553422536e-05, "loss": 5.3472, "step": 6476 }, { "epoch": 0.63, "grad_norm": 1.0061606168746948, "learning_rate": 1.8586387434554976e-05, "loss": 5.2391, "step": 6480 }, { "epoch": 0.63, "grad_norm": 0.9669675230979919, "learning_rate": 1.8566996315687417e-05, "loss": 5.4266, "step": 6484 }, { "epoch": 0.63, "grad_norm": 1.0888794660568237, "learning_rate": 1.8547605196819857e-05, "loss": 5.3798, "step": 6488 }, { "epoch": 0.63, "grad_norm": 1.0779885053634644, "learning_rate": 1.8528214077952298e-05, "loss": 5.3159, "step": 6492 }, { "epoch": 0.63, "grad_norm": 1.0792943239212036, "learning_rate": 1.850882295908474e-05, "loss": 5.3322, "step": 6496 }, { "epoch": 0.63, "grad_norm": 1.0349109172821045, "learning_rate": 1.848943184021718e-05, "loss": 5.3125, "step": 6500 }, { "epoch": 0.63, "grad_norm": 1.0294796228408813, "learning_rate": 1.8470040721349623e-05, "loss": 5.3059, "step": 6504 }, { "epoch": 0.63, "grad_norm": 0.9547713994979858, "learning_rate": 1.8450649602482064e-05, "loss": 5.2556, "step": 6508 }, { "epoch": 0.63, "grad_norm": 1.1305149793624878, "learning_rate": 1.8431258483614504e-05, "loss": 5.3476, "step": 6512 }, { "epoch": 0.63, "grad_norm": 1.0296196937561035, "learning_rate": 1.8411867364746945e-05, "loss": 5.3404, "step": 6516 }, { "epoch": 0.63, "grad_norm": 1.0865687131881714, "learning_rate": 1.839247624587939e-05, "loss": 5.3772, "step": 6520 }, { "epoch": 0.63, "grad_norm": 1.065975546836853, "learning_rate": 1.837308512701183e-05, "loss": 5.3577, "step": 6524 }, { "epoch": 0.63, "grad_norm": 1.1665056943893433, "learning_rate": 1.835369400814427e-05, "loss": 5.3344, "step": 6528 }, { "epoch": 0.63, "grad_norm": 1.0219711065292358, "learning_rate": 1.833430288927671e-05, "loss": 5.3717, "step": 6532 }, { "epoch": 0.63, "grad_norm": 1.01799738407135, "learning_rate": 1.8314911770409154e-05, "loss": 5.3329, "step": 6536 }, { "epoch": 0.63, "grad_norm": 1.0811940431594849, "learning_rate": 1.8295520651541595e-05, "loss": 5.2708, "step": 6540 }, { "epoch": 0.63, "grad_norm": 1.0369648933410645, "learning_rate": 1.8276129532674035e-05, "loss": 5.3731, "step": 6544 }, { "epoch": 0.63, "grad_norm": 1.0611283779144287, "learning_rate": 1.8256738413806476e-05, "loss": 5.4037, "step": 6548 }, { "epoch": 0.64, "grad_norm": 0.9396833777427673, "learning_rate": 1.823734729493892e-05, "loss": 5.3158, "step": 6552 }, { "epoch": 0.64, "grad_norm": 0.9994028210639954, "learning_rate": 1.821795617607136e-05, "loss": 5.3562, "step": 6556 }, { "epoch": 0.64, "grad_norm": 1.0078858137130737, "learning_rate": 1.81985650572038e-05, "loss": 5.322, "step": 6560 }, { "epoch": 0.64, "grad_norm": 1.0305253267288208, "learning_rate": 1.8179173938336245e-05, "loss": 5.2791, "step": 6564 }, { "epoch": 0.64, "grad_norm": 1.0548304319381714, "learning_rate": 1.8159782819468685e-05, "loss": 5.356, "step": 6568 }, { "epoch": 0.64, "grad_norm": 0.9770938754081726, "learning_rate": 1.8140391700601126e-05, "loss": 5.3234, "step": 6572 }, { "epoch": 0.64, "grad_norm": 1.087689757347107, "learning_rate": 1.8121000581733566e-05, "loss": 5.3944, "step": 6576 }, { "epoch": 0.64, "grad_norm": 1.14712655544281, "learning_rate": 1.810160946286601e-05, "loss": 5.2835, "step": 6580 }, { "epoch": 0.64, "grad_norm": 1.1165133714675903, "learning_rate": 1.808221834399845e-05, "loss": 5.2902, "step": 6584 }, { "epoch": 0.64, "grad_norm": 1.063067078590393, "learning_rate": 1.806282722513089e-05, "loss": 5.3108, "step": 6588 }, { "epoch": 0.64, "grad_norm": 1.0933597087860107, "learning_rate": 1.8043436106263332e-05, "loss": 5.431, "step": 6592 }, { "epoch": 0.64, "grad_norm": 1.0003483295440674, "learning_rate": 1.8024044987395776e-05, "loss": 5.3212, "step": 6596 }, { "epoch": 0.64, "grad_norm": 0.9593095779418945, "learning_rate": 1.8004653868528216e-05, "loss": 5.3799, "step": 6600 }, { "epoch": 0.64, "grad_norm": 1.0600883960723877, "learning_rate": 1.7985262749660657e-05, "loss": 5.2845, "step": 6604 }, { "epoch": 0.64, "grad_norm": 1.054432988166809, "learning_rate": 1.7965871630793097e-05, "loss": 5.3376, "step": 6608 }, { "epoch": 0.64, "grad_norm": 1.0431993007659912, "learning_rate": 1.794648051192554e-05, "loss": 5.3923, "step": 6612 }, { "epoch": 0.64, "grad_norm": 1.1059125661849976, "learning_rate": 1.7927089393057982e-05, "loss": 5.3124, "step": 6616 }, { "epoch": 0.64, "grad_norm": 1.0165326595306396, "learning_rate": 1.7907698274190422e-05, "loss": 5.3649, "step": 6620 }, { "epoch": 0.64, "grad_norm": 1.1015493869781494, "learning_rate": 1.7888307155322863e-05, "loss": 5.3207, "step": 6624 }, { "epoch": 0.64, "grad_norm": 1.062280297279358, "learning_rate": 1.7868916036455303e-05, "loss": 5.3276, "step": 6628 }, { "epoch": 0.64, "grad_norm": 1.101285696029663, "learning_rate": 1.7849524917587744e-05, "loss": 5.4415, "step": 6632 }, { "epoch": 0.64, "grad_norm": 1.039291501045227, "learning_rate": 1.7830133798720184e-05, "loss": 5.3052, "step": 6636 }, { "epoch": 0.64, "grad_norm": 1.0250643491744995, "learning_rate": 1.781074267985263e-05, "loss": 5.337, "step": 6640 }, { "epoch": 0.64, "grad_norm": 1.0628857612609863, "learning_rate": 1.779135156098507e-05, "loss": 5.3236, "step": 6644 }, { "epoch": 0.64, "grad_norm": 1.0590665340423584, "learning_rate": 1.777196044211751e-05, "loss": 5.2928, "step": 6648 }, { "epoch": 0.64, "grad_norm": 1.009734034538269, "learning_rate": 1.775256932324995e-05, "loss": 5.3493, "step": 6652 }, { "epoch": 0.65, "grad_norm": 0.9915148019790649, "learning_rate": 1.7733178204382394e-05, "loss": 5.3266, "step": 6656 }, { "epoch": 0.65, "grad_norm": 1.0235103368759155, "learning_rate": 1.7713787085514834e-05, "loss": 5.3359, "step": 6660 }, { "epoch": 0.65, "grad_norm": 1.003313422203064, "learning_rate": 1.7694395966647275e-05, "loss": 5.3196, "step": 6664 }, { "epoch": 0.65, "grad_norm": 1.0449254512786865, "learning_rate": 1.7675004847779716e-05, "loss": 5.3287, "step": 6668 }, { "epoch": 0.65, "grad_norm": 1.0780065059661865, "learning_rate": 1.765561372891216e-05, "loss": 5.3901, "step": 6672 }, { "epoch": 0.65, "grad_norm": 1.134416103363037, "learning_rate": 1.76362226100446e-05, "loss": 5.3008, "step": 6676 }, { "epoch": 0.65, "grad_norm": 1.095788836479187, "learning_rate": 1.761683149117704e-05, "loss": 5.3973, "step": 6680 }, { "epoch": 0.65, "grad_norm": 1.038216471672058, "learning_rate": 1.7597440372309484e-05, "loss": 5.3031, "step": 6684 }, { "epoch": 0.65, "grad_norm": 1.033199429512024, "learning_rate": 1.7578049253441925e-05, "loss": 5.2332, "step": 6688 }, { "epoch": 0.65, "grad_norm": 1.0989290475845337, "learning_rate": 1.7558658134574366e-05, "loss": 5.3559, "step": 6692 }, { "epoch": 0.65, "grad_norm": 1.0528738498687744, "learning_rate": 1.7539267015706806e-05, "loss": 5.3435, "step": 6696 }, { "epoch": 0.65, "grad_norm": 1.0338224172592163, "learning_rate": 1.751987589683925e-05, "loss": 5.4003, "step": 6700 }, { "epoch": 0.65, "grad_norm": 1.021791934967041, "learning_rate": 1.750048477797169e-05, "loss": 5.2902, "step": 6704 }, { "epoch": 0.65, "grad_norm": 1.069692850112915, "learning_rate": 1.748109365910413e-05, "loss": 5.2897, "step": 6708 }, { "epoch": 0.65, "grad_norm": 1.0573803186416626, "learning_rate": 1.746170254023657e-05, "loss": 5.386, "step": 6712 }, { "epoch": 0.65, "grad_norm": 1.0095142126083374, "learning_rate": 1.7442311421369016e-05, "loss": 5.3541, "step": 6716 }, { "epoch": 0.65, "grad_norm": 1.0076450109481812, "learning_rate": 1.7422920302501456e-05, "loss": 5.3088, "step": 6720 }, { "epoch": 0.65, "grad_norm": 0.9816309809684753, "learning_rate": 1.7403529183633897e-05, "loss": 5.3906, "step": 6724 }, { "epoch": 0.65, "grad_norm": 1.1446229219436646, "learning_rate": 1.7384138064766337e-05, "loss": 5.3372, "step": 6728 }, { "epoch": 0.65, "grad_norm": 0.9934061169624329, "learning_rate": 1.736474694589878e-05, "loss": 5.4092, "step": 6732 }, { "epoch": 0.65, "grad_norm": 0.9881500005722046, "learning_rate": 1.734535582703122e-05, "loss": 5.2318, "step": 6736 }, { "epoch": 0.65, "grad_norm": 1.0100693702697754, "learning_rate": 1.7325964708163662e-05, "loss": 5.35, "step": 6740 }, { "epoch": 0.65, "grad_norm": 1.0123350620269775, "learning_rate": 1.7306573589296106e-05, "loss": 5.2857, "step": 6744 }, { "epoch": 0.65, "grad_norm": 1.0262787342071533, "learning_rate": 1.7287182470428547e-05, "loss": 5.425, "step": 6748 }, { "epoch": 0.65, "grad_norm": 1.0568041801452637, "learning_rate": 1.7267791351560987e-05, "loss": 5.3167, "step": 6752 }, { "epoch": 0.66, "grad_norm": 1.0791805982589722, "learning_rate": 1.7248400232693428e-05, "loss": 5.3329, "step": 6756 }, { "epoch": 0.66, "grad_norm": 1.0418261289596558, "learning_rate": 1.7229009113825868e-05, "loss": 5.3187, "step": 6760 }, { "epoch": 0.66, "grad_norm": 1.0648219585418701, "learning_rate": 1.720961799495831e-05, "loss": 5.2998, "step": 6764 }, { "epoch": 0.66, "grad_norm": 1.0518834590911865, "learning_rate": 1.719022687609075e-05, "loss": 5.4489, "step": 6768 }, { "epoch": 0.66, "grad_norm": 1.1231526136398315, "learning_rate": 1.7170835757223193e-05, "loss": 5.2959, "step": 6772 }, { "epoch": 0.66, "grad_norm": 1.0702370405197144, "learning_rate": 1.7151444638355634e-05, "loss": 5.321, "step": 6776 }, { "epoch": 0.66, "grad_norm": 1.0818907022476196, "learning_rate": 1.7132053519488074e-05, "loss": 5.2864, "step": 6780 }, { "epoch": 0.66, "grad_norm": 0.990020215511322, "learning_rate": 1.7112662400620515e-05, "loss": 5.2245, "step": 6784 }, { "epoch": 0.66, "grad_norm": 1.104169249534607, "learning_rate": 1.7093271281752955e-05, "loss": 5.29, "step": 6788 }, { "epoch": 0.66, "grad_norm": 1.0738272666931152, "learning_rate": 1.70738801628854e-05, "loss": 5.2843, "step": 6792 }, { "epoch": 0.66, "grad_norm": 1.009252905845642, "learning_rate": 1.705448904401784e-05, "loss": 5.4593, "step": 6796 }, { "epoch": 0.66, "grad_norm": 1.0361759662628174, "learning_rate": 1.703509792515028e-05, "loss": 5.2717, "step": 6800 }, { "epoch": 0.66, "grad_norm": 1.1224076747894287, "learning_rate": 1.7015706806282724e-05, "loss": 5.3675, "step": 6804 }, { "epoch": 0.66, "grad_norm": 1.094973087310791, "learning_rate": 1.6996315687415165e-05, "loss": 5.3096, "step": 6808 }, { "epoch": 0.66, "grad_norm": 1.0065085887908936, "learning_rate": 1.6976924568547605e-05, "loss": 5.41, "step": 6812 }, { "epoch": 0.66, "grad_norm": 1.0526219606399536, "learning_rate": 1.6957533449680046e-05, "loss": 5.408, "step": 6816 }, { "epoch": 0.66, "grad_norm": 1.0697109699249268, "learning_rate": 1.693814233081249e-05, "loss": 5.3375, "step": 6820 }, { "epoch": 0.66, "grad_norm": 1.0227476358413696, "learning_rate": 1.691875121194493e-05, "loss": 5.3254, "step": 6824 }, { "epoch": 0.66, "grad_norm": 0.9872763156890869, "learning_rate": 1.689936009307737e-05, "loss": 5.2915, "step": 6828 }, { "epoch": 0.66, "grad_norm": 1.028903603553772, "learning_rate": 1.687996897420981e-05, "loss": 5.2651, "step": 6832 }, { "epoch": 0.66, "grad_norm": 1.095890998840332, "learning_rate": 1.6860577855342255e-05, "loss": 5.4315, "step": 6836 }, { "epoch": 0.66, "grad_norm": 1.0813144445419312, "learning_rate": 1.6841186736474696e-05, "loss": 5.3725, "step": 6840 }, { "epoch": 0.66, "grad_norm": 1.0944348573684692, "learning_rate": 1.6821795617607136e-05, "loss": 5.3591, "step": 6844 }, { "epoch": 0.66, "grad_norm": 1.023173451423645, "learning_rate": 1.6802404498739577e-05, "loss": 5.3222, "step": 6848 }, { "epoch": 0.66, "grad_norm": 1.1330089569091797, "learning_rate": 1.678301337987202e-05, "loss": 5.3623, "step": 6852 }, { "epoch": 0.66, "grad_norm": 1.1375625133514404, "learning_rate": 1.676362226100446e-05, "loss": 5.3356, "step": 6856 }, { "epoch": 0.67, "grad_norm": 1.1181647777557373, "learning_rate": 1.6744231142136902e-05, "loss": 5.3939, "step": 6860 }, { "epoch": 0.67, "grad_norm": 1.0055108070373535, "learning_rate": 1.6724840023269346e-05, "loss": 5.339, "step": 6864 }, { "epoch": 0.67, "grad_norm": 1.001213788986206, "learning_rate": 1.6705448904401786e-05, "loss": 5.4536, "step": 6868 }, { "epoch": 0.67, "grad_norm": 1.0213780403137207, "learning_rate": 1.6686057785534227e-05, "loss": 5.3785, "step": 6872 }, { "epoch": 0.67, "grad_norm": 1.0076147317886353, "learning_rate": 1.6666666666666667e-05, "loss": 5.2837, "step": 6876 }, { "epoch": 0.67, "grad_norm": 1.0241953134536743, "learning_rate": 1.664727554779911e-05, "loss": 5.3633, "step": 6880 }, { "epoch": 0.67, "grad_norm": 1.022125005722046, "learning_rate": 1.6627884428931552e-05, "loss": 5.3523, "step": 6884 }, { "epoch": 0.67, "grad_norm": 1.0576834678649902, "learning_rate": 1.6608493310063992e-05, "loss": 5.315, "step": 6888 }, { "epoch": 0.67, "grad_norm": 1.0498878955841064, "learning_rate": 1.6589102191196433e-05, "loss": 5.3896, "step": 6892 }, { "epoch": 0.67, "grad_norm": 1.003936529159546, "learning_rate": 1.6569711072328874e-05, "loss": 5.3293, "step": 6896 }, { "epoch": 0.67, "grad_norm": 0.9774566292762756, "learning_rate": 1.6550319953461314e-05, "loss": 5.3388, "step": 6900 }, { "epoch": 0.67, "grad_norm": 1.0642743110656738, "learning_rate": 1.6530928834593755e-05, "loss": 5.3104, "step": 6904 }, { "epoch": 0.67, "grad_norm": 0.9880549907684326, "learning_rate": 1.65115377157262e-05, "loss": 5.3273, "step": 6908 }, { "epoch": 0.67, "grad_norm": 1.0308412313461304, "learning_rate": 1.649214659685864e-05, "loss": 5.293, "step": 6912 }, { "epoch": 0.67, "grad_norm": 1.0004191398620605, "learning_rate": 1.647275547799108e-05, "loss": 5.3624, "step": 6916 }, { "epoch": 0.67, "grad_norm": 1.0223098993301392, "learning_rate": 1.645336435912352e-05, "loss": 5.3298, "step": 6920 }, { "epoch": 0.67, "grad_norm": 1.0578629970550537, "learning_rate": 1.6433973240255964e-05, "loss": 5.3331, "step": 6924 }, { "epoch": 0.67, "grad_norm": 1.0649224519729614, "learning_rate": 1.6414582121388405e-05, "loss": 5.4143, "step": 6928 }, { "epoch": 0.67, "grad_norm": 1.0141630172729492, "learning_rate": 1.6395191002520845e-05, "loss": 5.2971, "step": 6932 }, { "epoch": 0.67, "grad_norm": 1.06792414188385, "learning_rate": 1.6375799883653286e-05, "loss": 5.359, "step": 6936 }, { "epoch": 0.67, "grad_norm": 1.0014034509658813, "learning_rate": 1.635640876478573e-05, "loss": 5.3262, "step": 6940 }, { "epoch": 0.67, "grad_norm": 1.0073959827423096, "learning_rate": 1.633701764591817e-05, "loss": 5.2941, "step": 6944 }, { "epoch": 0.67, "grad_norm": 1.0175294876098633, "learning_rate": 1.631762652705061e-05, "loss": 5.4023, "step": 6948 }, { "epoch": 0.67, "grad_norm": 1.0541809797286987, "learning_rate": 1.629823540818305e-05, "loss": 5.3224, "step": 6952 }, { "epoch": 0.67, "grad_norm": 1.0851467847824097, "learning_rate": 1.6278844289315495e-05, "loss": 5.3333, "step": 6956 }, { "epoch": 0.67, "grad_norm": 1.081163763999939, "learning_rate": 1.6259453170447936e-05, "loss": 5.4189, "step": 6960 }, { "epoch": 0.68, "grad_norm": 1.0276130437850952, "learning_rate": 1.6240062051580376e-05, "loss": 5.2743, "step": 6964 }, { "epoch": 0.68, "grad_norm": 1.0390230417251587, "learning_rate": 1.6220670932712817e-05, "loss": 5.3315, "step": 6968 }, { "epoch": 0.68, "grad_norm": 0.9651821851730347, "learning_rate": 1.620127981384526e-05, "loss": 5.33, "step": 6972 }, { "epoch": 0.68, "grad_norm": 1.05519437789917, "learning_rate": 1.61818886949777e-05, "loss": 5.3139, "step": 6976 }, { "epoch": 0.68, "grad_norm": 0.9778598546981812, "learning_rate": 1.6162497576110142e-05, "loss": 5.303, "step": 6980 }, { "epoch": 0.68, "grad_norm": 1.0078487396240234, "learning_rate": 1.6143106457242586e-05, "loss": 5.3094, "step": 6984 }, { "epoch": 0.68, "grad_norm": 1.0488457679748535, "learning_rate": 1.6123715338375026e-05, "loss": 5.2953, "step": 6988 }, { "epoch": 0.68, "grad_norm": 1.0421743392944336, "learning_rate": 1.6104324219507467e-05, "loss": 5.394, "step": 6992 }, { "epoch": 0.68, "grad_norm": 0.9976141452789307, "learning_rate": 1.6084933100639907e-05, "loss": 5.3734, "step": 6996 }, { "epoch": 0.68, "grad_norm": 1.0560002326965332, "learning_rate": 1.606554198177235e-05, "loss": 5.3631, "step": 7000 }, { "epoch": 0.68, "grad_norm": 1.0117374658584595, "learning_rate": 1.6046150862904792e-05, "loss": 5.3586, "step": 7004 }, { "epoch": 0.68, "grad_norm": 1.0476561784744263, "learning_rate": 1.6026759744037232e-05, "loss": 5.3215, "step": 7008 }, { "epoch": 0.68, "grad_norm": 1.0643686056137085, "learning_rate": 1.6007368625169673e-05, "loss": 5.2853, "step": 7012 }, { "epoch": 0.68, "grad_norm": 1.0695873498916626, "learning_rate": 1.5987977506302117e-05, "loss": 5.3231, "step": 7016 }, { "epoch": 0.68, "grad_norm": 1.0597788095474243, "learning_rate": 1.5968586387434557e-05, "loss": 5.3185, "step": 7020 }, { "epoch": 0.68, "grad_norm": 1.0414984226226807, "learning_rate": 1.5949195268566998e-05, "loss": 5.3407, "step": 7024 }, { "epoch": 0.68, "grad_norm": 0.9906083941459656, "learning_rate": 1.592980414969944e-05, "loss": 5.2572, "step": 7028 }, { "epoch": 0.68, "grad_norm": 0.9810131192207336, "learning_rate": 1.591041303083188e-05, "loss": 5.2424, "step": 7032 }, { "epoch": 0.68, "grad_norm": 1.0690699815750122, "learning_rate": 1.589102191196432e-05, "loss": 5.3503, "step": 7036 }, { "epoch": 0.68, "grad_norm": 1.037235140800476, "learning_rate": 1.5871630793096763e-05, "loss": 5.3903, "step": 7040 }, { "epoch": 0.68, "grad_norm": 0.9991822838783264, "learning_rate": 1.5852239674229204e-05, "loss": 5.3439, "step": 7044 }, { "epoch": 0.68, "grad_norm": 1.0834139585494995, "learning_rate": 1.5832848555361644e-05, "loss": 5.2871, "step": 7048 }, { "epoch": 0.68, "grad_norm": 0.9821889400482178, "learning_rate": 1.5813457436494085e-05, "loss": 5.248, "step": 7052 }, { "epoch": 0.68, "grad_norm": 1.0037583112716675, "learning_rate": 1.5794066317626526e-05, "loss": 5.2412, "step": 7056 }, { "epoch": 0.68, "grad_norm": 1.059874176979065, "learning_rate": 1.577467519875897e-05, "loss": 5.2965, "step": 7060 }, { "epoch": 0.68, "grad_norm": 1.0708321332931519, "learning_rate": 1.575528407989141e-05, "loss": 5.1961, "step": 7064 }, { "epoch": 0.69, "grad_norm": 1.0226424932479858, "learning_rate": 1.573589296102385e-05, "loss": 5.3895, "step": 7068 }, { "epoch": 0.69, "grad_norm": 1.088083028793335, "learning_rate": 1.571650184215629e-05, "loss": 5.3202, "step": 7072 }, { "epoch": 0.69, "grad_norm": 1.1663302183151245, "learning_rate": 1.5697110723288735e-05, "loss": 5.2799, "step": 7076 }, { "epoch": 0.69, "grad_norm": 1.049932599067688, "learning_rate": 1.5677719604421175e-05, "loss": 5.3477, "step": 7080 }, { "epoch": 0.69, "grad_norm": 1.0670647621154785, "learning_rate": 1.5658328485553616e-05, "loss": 5.321, "step": 7084 }, { "epoch": 0.69, "grad_norm": 1.0027042627334595, "learning_rate": 1.5638937366686057e-05, "loss": 5.2589, "step": 7088 }, { "epoch": 0.69, "grad_norm": 1.0624514818191528, "learning_rate": 1.56195462478185e-05, "loss": 5.3408, "step": 7092 }, { "epoch": 0.69, "grad_norm": 1.1033272743225098, "learning_rate": 1.560015512895094e-05, "loss": 5.3141, "step": 7096 }, { "epoch": 0.69, "grad_norm": 0.9922211766242981, "learning_rate": 1.558076401008338e-05, "loss": 5.3127, "step": 7100 }, { "epoch": 0.69, "grad_norm": 1.1714577674865723, "learning_rate": 1.5561372891215825e-05, "loss": 5.3356, "step": 7104 }, { "epoch": 0.69, "grad_norm": 1.092363715171814, "learning_rate": 1.5541981772348266e-05, "loss": 5.3375, "step": 7108 }, { "epoch": 0.69, "grad_norm": 1.1520874500274658, "learning_rate": 1.5522590653480707e-05, "loss": 5.38, "step": 7112 }, { "epoch": 0.69, "grad_norm": 1.0429631471633911, "learning_rate": 1.5503199534613147e-05, "loss": 5.2862, "step": 7116 }, { "epoch": 0.69, "grad_norm": 1.133023977279663, "learning_rate": 1.548380841574559e-05, "loss": 5.3376, "step": 7120 }, { "epoch": 0.69, "grad_norm": 1.0379852056503296, "learning_rate": 1.546441729687803e-05, "loss": 5.3617, "step": 7124 }, { "epoch": 0.69, "grad_norm": 0.9884594082832336, "learning_rate": 1.5445026178010472e-05, "loss": 5.3478, "step": 7128 }, { "epoch": 0.69, "grad_norm": 1.0757851600646973, "learning_rate": 1.5425635059142913e-05, "loss": 5.3307, "step": 7132 }, { "epoch": 0.69, "grad_norm": 1.0310724973678589, "learning_rate": 1.5406243940275357e-05, "loss": 5.2984, "step": 7136 }, { "epoch": 0.69, "grad_norm": 0.989493727684021, "learning_rate": 1.5386852821407797e-05, "loss": 5.4285, "step": 7140 }, { "epoch": 0.69, "grad_norm": 0.950312614440918, "learning_rate": 1.5367461702540238e-05, "loss": 5.2814, "step": 7144 }, { "epoch": 0.69, "grad_norm": 1.0180491209030151, "learning_rate": 1.5348070583672678e-05, "loss": 5.3137, "step": 7148 }, { "epoch": 0.69, "grad_norm": 1.0413585901260376, "learning_rate": 1.5328679464805122e-05, "loss": 5.3889, "step": 7152 }, { "epoch": 0.69, "grad_norm": 1.0932857990264893, "learning_rate": 1.5309288345937563e-05, "loss": 5.4341, "step": 7156 }, { "epoch": 0.69, "grad_norm": 1.06010901927948, "learning_rate": 1.5289897227070003e-05, "loss": 5.3442, "step": 7160 }, { "epoch": 0.69, "grad_norm": 1.0883803367614746, "learning_rate": 1.5270506108202444e-05, "loss": 5.3341, "step": 7164 }, { "epoch": 0.69, "grad_norm": 1.0533767938613892, "learning_rate": 1.5251114989334886e-05, "loss": 5.2995, "step": 7168 }, { "epoch": 0.7, "grad_norm": 0.9598619937896729, "learning_rate": 1.5231723870467326e-05, "loss": 5.3465, "step": 7172 }, { "epoch": 0.7, "grad_norm": 1.0123393535614014, "learning_rate": 1.5212332751599767e-05, "loss": 5.2892, "step": 7176 }, { "epoch": 0.7, "grad_norm": 1.0960266590118408, "learning_rate": 1.5192941632732211e-05, "loss": 5.332, "step": 7180 }, { "epoch": 0.7, "grad_norm": 1.1648544073104858, "learning_rate": 1.5173550513864651e-05, "loss": 5.4285, "step": 7184 }, { "epoch": 0.7, "grad_norm": 1.051171064376831, "learning_rate": 1.5154159394997092e-05, "loss": 5.4299, "step": 7188 }, { "epoch": 0.7, "grad_norm": 1.0466375350952148, "learning_rate": 1.5134768276129533e-05, "loss": 5.3216, "step": 7192 }, { "epoch": 0.7, "grad_norm": 0.9910897612571716, "learning_rate": 1.5115377157261976e-05, "loss": 5.3598, "step": 7196 }, { "epoch": 0.7, "grad_norm": 1.1121070384979248, "learning_rate": 1.5095986038394417e-05, "loss": 5.3169, "step": 7200 }, { "epoch": 0.7, "grad_norm": 1.0514254570007324, "learning_rate": 1.5076594919526858e-05, "loss": 5.3502, "step": 7204 }, { "epoch": 0.7, "grad_norm": 1.0363171100616455, "learning_rate": 1.5057203800659298e-05, "loss": 5.3192, "step": 7208 }, { "epoch": 0.7, "grad_norm": 1.0386378765106201, "learning_rate": 1.503781268179174e-05, "loss": 5.3244, "step": 7212 }, { "epoch": 0.7, "grad_norm": 1.0868582725524902, "learning_rate": 1.5018421562924181e-05, "loss": 5.2998, "step": 7216 }, { "epoch": 0.7, "grad_norm": 1.106695532798767, "learning_rate": 1.4999030444056621e-05, "loss": 5.2703, "step": 7220 }, { "epoch": 0.7, "grad_norm": 1.0727300643920898, "learning_rate": 1.4979639325189065e-05, "loss": 5.2557, "step": 7224 }, { "epoch": 0.7, "grad_norm": 1.0959160327911377, "learning_rate": 1.4960248206321506e-05, "loss": 5.2666, "step": 7228 }, { "epoch": 0.7, "grad_norm": 1.0157089233398438, "learning_rate": 1.4940857087453946e-05, "loss": 5.3451, "step": 7232 }, { "epoch": 0.7, "grad_norm": 1.1046866178512573, "learning_rate": 1.4921465968586387e-05, "loss": 5.2349, "step": 7236 }, { "epoch": 0.7, "grad_norm": 1.0781642198562622, "learning_rate": 1.4902074849718831e-05, "loss": 5.2609, "step": 7240 }, { "epoch": 0.7, "grad_norm": 1.0019387006759644, "learning_rate": 1.4882683730851271e-05, "loss": 5.3954, "step": 7244 }, { "epoch": 0.7, "grad_norm": 1.1083266735076904, "learning_rate": 1.4863292611983712e-05, "loss": 5.2636, "step": 7248 }, { "epoch": 0.7, "grad_norm": 1.2309002876281738, "learning_rate": 1.4843901493116152e-05, "loss": 5.2955, "step": 7252 }, { "epoch": 0.7, "grad_norm": 1.087774634361267, "learning_rate": 1.4824510374248596e-05, "loss": 5.3, "step": 7256 }, { "epoch": 0.7, "grad_norm": 1.075287938117981, "learning_rate": 1.4805119255381037e-05, "loss": 5.3727, "step": 7260 }, { "epoch": 0.7, "grad_norm": 1.0246081352233887, "learning_rate": 1.4785728136513477e-05, "loss": 5.3558, "step": 7264 }, { "epoch": 0.7, "grad_norm": 1.124543309211731, "learning_rate": 1.4766337017645918e-05, "loss": 5.3379, "step": 7268 }, { "epoch": 0.71, "grad_norm": 1.047892689704895, "learning_rate": 1.474694589877836e-05, "loss": 5.3975, "step": 7272 }, { "epoch": 0.71, "grad_norm": 1.0381947755813599, "learning_rate": 1.47275547799108e-05, "loss": 5.3515, "step": 7276 }, { "epoch": 0.71, "grad_norm": 1.0230307579040527, "learning_rate": 1.4708163661043243e-05, "loss": 5.2925, "step": 7280 }, { "epoch": 0.71, "grad_norm": 1.0596458911895752, "learning_rate": 1.4688772542175685e-05, "loss": 5.3191, "step": 7284 }, { "epoch": 0.71, "grad_norm": 1.1031346321105957, "learning_rate": 1.4669381423308126e-05, "loss": 5.402, "step": 7288 }, { "epoch": 0.71, "grad_norm": 1.0289580821990967, "learning_rate": 1.4649990304440566e-05, "loss": 5.3957, "step": 7292 }, { "epoch": 0.71, "grad_norm": 1.1469511985778809, "learning_rate": 1.4630599185573007e-05, "loss": 5.3244, "step": 7296 }, { "epoch": 0.71, "grad_norm": 1.0669410228729248, "learning_rate": 1.461120806670545e-05, "loss": 5.431, "step": 7300 }, { "epoch": 0.71, "grad_norm": 1.05574631690979, "learning_rate": 1.4591816947837891e-05, "loss": 5.3382, "step": 7304 }, { "epoch": 0.71, "grad_norm": 1.0296452045440674, "learning_rate": 1.4572425828970332e-05, "loss": 5.337, "step": 7308 }, { "epoch": 0.71, "grad_norm": 1.0180591344833374, "learning_rate": 1.4553034710102772e-05, "loss": 5.3239, "step": 7312 }, { "epoch": 0.71, "grad_norm": 1.0508371591567993, "learning_rate": 1.4533643591235216e-05, "loss": 5.2944, "step": 7316 }, { "epoch": 0.71, "grad_norm": 1.0255225896835327, "learning_rate": 1.4514252472367657e-05, "loss": 5.3574, "step": 7320 }, { "epoch": 0.71, "grad_norm": 1.0599967241287231, "learning_rate": 1.4494861353500097e-05, "loss": 5.3568, "step": 7324 }, { "epoch": 0.71, "grad_norm": 0.9832557439804077, "learning_rate": 1.4475470234632538e-05, "loss": 5.2891, "step": 7328 }, { "epoch": 0.71, "grad_norm": 1.0541869401931763, "learning_rate": 1.4456079115764982e-05, "loss": 5.2502, "step": 7332 }, { "epoch": 0.71, "grad_norm": 1.069973111152649, "learning_rate": 1.4436687996897422e-05, "loss": 5.3376, "step": 7336 }, { "epoch": 0.71, "grad_norm": 1.0768502950668335, "learning_rate": 1.4417296878029863e-05, "loss": 5.2392, "step": 7340 }, { "epoch": 0.71, "grad_norm": 1.000628113746643, "learning_rate": 1.4397905759162305e-05, "loss": 5.3293, "step": 7344 }, { "epoch": 0.71, "grad_norm": 0.9960778951644897, "learning_rate": 1.4378514640294746e-05, "loss": 5.2655, "step": 7348 }, { "epoch": 0.71, "grad_norm": 1.0327279567718506, "learning_rate": 1.4359123521427186e-05, "loss": 5.3477, "step": 7352 }, { "epoch": 0.71, "grad_norm": 1.0809035301208496, "learning_rate": 1.4339732402559627e-05, "loss": 5.331, "step": 7356 }, { "epoch": 0.71, "grad_norm": 1.0690807104110718, "learning_rate": 1.432034128369207e-05, "loss": 5.3986, "step": 7360 }, { "epoch": 0.71, "grad_norm": 1.0245548486709595, "learning_rate": 1.4300950164824511e-05, "loss": 5.3024, "step": 7364 }, { "epoch": 0.71, "grad_norm": 0.9999493956565857, "learning_rate": 1.4281559045956952e-05, "loss": 5.2878, "step": 7368 }, { "epoch": 0.71, "grad_norm": 1.0037769079208374, "learning_rate": 1.4262167927089392e-05, "loss": 5.2216, "step": 7372 }, { "epoch": 0.72, "grad_norm": 1.002113699913025, "learning_rate": 1.4242776808221836e-05, "loss": 5.4347, "step": 7376 }, { "epoch": 0.72, "grad_norm": 1.04192054271698, "learning_rate": 1.4223385689354277e-05, "loss": 5.3812, "step": 7380 }, { "epoch": 0.72, "grad_norm": 1.0271295309066772, "learning_rate": 1.4203994570486717e-05, "loss": 5.427, "step": 7384 }, { "epoch": 0.72, "grad_norm": 1.0071477890014648, "learning_rate": 1.4184603451619158e-05, "loss": 5.2727, "step": 7388 }, { "epoch": 0.72, "grad_norm": 1.0228255987167358, "learning_rate": 1.4165212332751602e-05, "loss": 5.3244, "step": 7392 }, { "epoch": 0.72, "grad_norm": 1.0034810304641724, "learning_rate": 1.4145821213884042e-05, "loss": 5.3883, "step": 7396 }, { "epoch": 0.72, "grad_norm": 1.0305688381195068, "learning_rate": 1.4126430095016483e-05, "loss": 5.3851, "step": 7400 }, { "epoch": 0.72, "grad_norm": 1.074646234512329, "learning_rate": 1.4107038976148925e-05, "loss": 5.2406, "step": 7404 }, { "epoch": 0.72, "grad_norm": 0.9961770176887512, "learning_rate": 1.4087647857281366e-05, "loss": 5.2848, "step": 7408 }, { "epoch": 0.72, "grad_norm": 1.034627079963684, "learning_rate": 1.4068256738413808e-05, "loss": 5.2984, "step": 7412 }, { "epoch": 0.72, "grad_norm": 1.072096824645996, "learning_rate": 1.4048865619546248e-05, "loss": 5.3182, "step": 7416 }, { "epoch": 0.72, "grad_norm": 1.0899096727371216, "learning_rate": 1.402947450067869e-05, "loss": 5.3017, "step": 7420 }, { "epoch": 0.72, "grad_norm": 1.1063376665115356, "learning_rate": 1.4010083381811131e-05, "loss": 5.311, "step": 7424 }, { "epoch": 0.72, "grad_norm": 1.0748202800750732, "learning_rate": 1.3990692262943572e-05, "loss": 5.318, "step": 7428 }, { "epoch": 0.72, "grad_norm": 1.0851057767868042, "learning_rate": 1.3971301144076012e-05, "loss": 5.3425, "step": 7432 }, { "epoch": 0.72, "grad_norm": 0.9942495822906494, "learning_rate": 1.3951910025208456e-05, "loss": 5.3258, "step": 7436 }, { "epoch": 0.72, "grad_norm": 1.0894228219985962, "learning_rate": 1.3932518906340897e-05, "loss": 5.2769, "step": 7440 }, { "epoch": 0.72, "grad_norm": 0.9603523015975952, "learning_rate": 1.3913127787473337e-05, "loss": 5.4087, "step": 7444 }, { "epoch": 0.72, "grad_norm": 1.1049365997314453, "learning_rate": 1.3893736668605778e-05, "loss": 5.339, "step": 7448 }, { "epoch": 0.72, "grad_norm": 1.021468162536621, "learning_rate": 1.3874345549738222e-05, "loss": 5.3447, "step": 7452 }, { "epoch": 0.72, "grad_norm": 1.088437557220459, "learning_rate": 1.3854954430870662e-05, "loss": 5.3546, "step": 7456 }, { "epoch": 0.72, "grad_norm": 1.0266107320785522, "learning_rate": 1.3835563312003103e-05, "loss": 5.1867, "step": 7460 }, { "epoch": 0.72, "grad_norm": 1.088911533355713, "learning_rate": 1.3816172193135547e-05, "loss": 5.3247, "step": 7464 }, { "epoch": 0.72, "grad_norm": 1.0675928592681885, "learning_rate": 1.3796781074267987e-05, "loss": 5.2807, "step": 7468 }, { "epoch": 0.72, "grad_norm": 0.9511438608169556, "learning_rate": 1.3777389955400428e-05, "loss": 5.3774, "step": 7472 }, { "epoch": 0.72, "grad_norm": 1.0546114444732666, "learning_rate": 1.3757998836532868e-05, "loss": 5.2856, "step": 7476 }, { "epoch": 0.73, "grad_norm": 1.0445231199264526, "learning_rate": 1.373860771766531e-05, "loss": 5.3805, "step": 7480 }, { "epoch": 0.73, "grad_norm": 1.0493693351745605, "learning_rate": 1.3719216598797751e-05, "loss": 5.3603, "step": 7484 }, { "epoch": 0.73, "grad_norm": 1.0047773122787476, "learning_rate": 1.3699825479930192e-05, "loss": 5.3224, "step": 7488 }, { "epoch": 0.73, "grad_norm": 1.0536508560180664, "learning_rate": 1.3680434361062632e-05, "loss": 5.4235, "step": 7492 }, { "epoch": 0.73, "grad_norm": 1.0347819328308105, "learning_rate": 1.3661043242195076e-05, "loss": 5.2528, "step": 7496 }, { "epoch": 0.73, "grad_norm": 1.0392394065856934, "learning_rate": 1.3641652123327517e-05, "loss": 5.4127, "step": 7500 }, { "epoch": 0.73, "grad_norm": 1.0930792093276978, "learning_rate": 1.3622261004459957e-05, "loss": 5.3221, "step": 7504 }, { "epoch": 0.73, "grad_norm": 0.9962918758392334, "learning_rate": 1.3602869885592398e-05, "loss": 5.3108, "step": 7508 }, { "epoch": 0.73, "grad_norm": 1.030331015586853, "learning_rate": 1.3583478766724842e-05, "loss": 5.2632, "step": 7512 }, { "epoch": 0.73, "grad_norm": 1.008636236190796, "learning_rate": 1.3564087647857282e-05, "loss": 5.3089, "step": 7516 }, { "epoch": 0.73, "grad_norm": 1.006934642791748, "learning_rate": 1.3544696528989723e-05, "loss": 5.2903, "step": 7520 }, { "epoch": 0.73, "grad_norm": 1.0194462537765503, "learning_rate": 1.3525305410122167e-05, "loss": 5.3827, "step": 7524 }, { "epoch": 0.73, "grad_norm": 0.9879323840141296, "learning_rate": 1.3505914291254607e-05, "loss": 5.4307, "step": 7528 }, { "epoch": 0.73, "grad_norm": 1.0651185512542725, "learning_rate": 1.3486523172387048e-05, "loss": 5.3134, "step": 7532 }, { "epoch": 0.73, "grad_norm": 1.0858125686645508, "learning_rate": 1.3467132053519488e-05, "loss": 5.3024, "step": 7536 }, { "epoch": 0.73, "grad_norm": 1.0240780115127563, "learning_rate": 1.344774093465193e-05, "loss": 5.3555, "step": 7540 }, { "epoch": 0.73, "grad_norm": 1.0629985332489014, "learning_rate": 1.3428349815784373e-05, "loss": 5.3652, "step": 7544 }, { "epoch": 0.73, "grad_norm": 1.1222716569900513, "learning_rate": 1.3408958696916813e-05, "loss": 5.3845, "step": 7548 }, { "epoch": 0.73, "grad_norm": 1.0008291006088257, "learning_rate": 1.3389567578049254e-05, "loss": 5.3552, "step": 7552 }, { "epoch": 0.73, "grad_norm": 1.0860753059387207, "learning_rate": 1.3370176459181696e-05, "loss": 5.3044, "step": 7556 }, { "epoch": 0.73, "grad_norm": 1.0495448112487793, "learning_rate": 1.3350785340314136e-05, "loss": 5.3259, "step": 7560 }, { "epoch": 0.73, "grad_norm": 1.0432664155960083, "learning_rate": 1.3331394221446577e-05, "loss": 5.3482, "step": 7564 }, { "epoch": 0.73, "grad_norm": 1.0440526008605957, "learning_rate": 1.3312003102579018e-05, "loss": 5.2362, "step": 7568 }, { "epoch": 0.73, "grad_norm": 1.0317118167877197, "learning_rate": 1.3292611983711461e-05, "loss": 5.3579, "step": 7572 }, { "epoch": 0.73, "grad_norm": 0.9613714218139648, "learning_rate": 1.3273220864843902e-05, "loss": 5.3631, "step": 7576 }, { "epoch": 0.73, "grad_norm": 1.1608860492706299, "learning_rate": 1.3253829745976343e-05, "loss": 5.3547, "step": 7580 }, { "epoch": 0.74, "grad_norm": 1.06599760055542, "learning_rate": 1.3234438627108786e-05, "loss": 5.3728, "step": 7584 }, { "epoch": 0.74, "grad_norm": 1.0115044116973877, "learning_rate": 1.3215047508241227e-05, "loss": 5.2954, "step": 7588 }, { "epoch": 0.74, "grad_norm": 1.0504167079925537, "learning_rate": 1.3195656389373668e-05, "loss": 5.3378, "step": 7592 }, { "epoch": 0.74, "grad_norm": 1.1047917604446411, "learning_rate": 1.3176265270506108e-05, "loss": 5.3153, "step": 7596 }, { "epoch": 0.74, "grad_norm": 1.0862175226211548, "learning_rate": 1.3156874151638552e-05, "loss": 5.4315, "step": 7600 }, { "epoch": 0.74, "grad_norm": 1.05397629737854, "learning_rate": 1.3137483032770993e-05, "loss": 5.3807, "step": 7604 }, { "epoch": 0.74, "grad_norm": 1.0143108367919922, "learning_rate": 1.3118091913903433e-05, "loss": 5.346, "step": 7608 }, { "epoch": 0.74, "grad_norm": 1.0487464666366577, "learning_rate": 1.3098700795035874e-05, "loss": 5.2966, "step": 7612 }, { "epoch": 0.74, "grad_norm": 1.156467318534851, "learning_rate": 1.3079309676168316e-05, "loss": 5.24, "step": 7616 }, { "epoch": 0.74, "grad_norm": 1.0586912631988525, "learning_rate": 1.3059918557300756e-05, "loss": 5.3162, "step": 7620 }, { "epoch": 0.74, "grad_norm": 1.0472930669784546, "learning_rate": 1.3040527438433197e-05, "loss": 5.4309, "step": 7624 }, { "epoch": 0.74, "grad_norm": 1.0239797830581665, "learning_rate": 1.3021136319565637e-05, "loss": 5.3124, "step": 7628 }, { "epoch": 0.74, "grad_norm": 1.084915041923523, "learning_rate": 1.3001745200698081e-05, "loss": 5.382, "step": 7632 }, { "epoch": 0.74, "grad_norm": 1.121639370918274, "learning_rate": 1.2982354081830522e-05, "loss": 5.2888, "step": 7636 }, { "epoch": 0.74, "grad_norm": 1.06790030002594, "learning_rate": 1.2962962962962962e-05, "loss": 5.3484, "step": 7640 }, { "epoch": 0.74, "grad_norm": 1.0664012432098389, "learning_rate": 1.2943571844095406e-05, "loss": 5.3624, "step": 7644 }, { "epoch": 0.74, "grad_norm": 1.032593846321106, "learning_rate": 1.2924180725227847e-05, "loss": 5.3109, "step": 7648 }, { "epoch": 0.74, "grad_norm": 1.0182029008865356, "learning_rate": 1.2904789606360287e-05, "loss": 5.3785, "step": 7652 }, { "epoch": 0.74, "grad_norm": 0.9787065982818604, "learning_rate": 1.2885398487492728e-05, "loss": 5.2586, "step": 7656 }, { "epoch": 0.74, "grad_norm": 1.14923095703125, "learning_rate": 1.2866007368625172e-05, "loss": 5.3378, "step": 7660 }, { "epoch": 0.74, "grad_norm": 1.0064685344696045, "learning_rate": 1.2846616249757612e-05, "loss": 5.3339, "step": 7664 }, { "epoch": 0.74, "grad_norm": 1.015594720840454, "learning_rate": 1.2827225130890053e-05, "loss": 5.2412, "step": 7668 }, { "epoch": 0.74, "grad_norm": 1.1527953147888184, "learning_rate": 1.2807834012022494e-05, "loss": 5.4026, "step": 7672 }, { "epoch": 0.74, "grad_norm": 1.024170994758606, "learning_rate": 1.2788442893154936e-05, "loss": 5.2431, "step": 7676 }, { "epoch": 0.74, "grad_norm": 1.0834672451019287, "learning_rate": 1.2769051774287378e-05, "loss": 5.3018, "step": 7680 }, { "epoch": 0.75, "grad_norm": 1.00787353515625, "learning_rate": 1.2749660655419819e-05, "loss": 5.3129, "step": 7684 }, { "epoch": 0.75, "grad_norm": 1.0804412364959717, "learning_rate": 1.2730269536552259e-05, "loss": 5.3974, "step": 7688 }, { "epoch": 0.75, "grad_norm": 1.0361813306808472, "learning_rate": 1.2710878417684701e-05, "loss": 5.291, "step": 7692 }, { "epoch": 0.75, "grad_norm": 0.9639879465103149, "learning_rate": 1.2691487298817142e-05, "loss": 5.3649, "step": 7696 }, { "epoch": 0.75, "grad_norm": 1.035768985748291, "learning_rate": 1.2672096179949582e-05, "loss": 5.3159, "step": 7700 }, { "epoch": 0.75, "grad_norm": 1.002968430519104, "learning_rate": 1.2652705061082026e-05, "loss": 5.1975, "step": 7704 }, { "epoch": 0.75, "grad_norm": 1.0666626691818237, "learning_rate": 1.2633313942214467e-05, "loss": 5.2216, "step": 7708 }, { "epoch": 0.75, "grad_norm": 1.031752347946167, "learning_rate": 1.2613922823346907e-05, "loss": 5.4174, "step": 7712 }, { "epoch": 0.75, "grad_norm": 1.0436227321624756, "learning_rate": 1.2594531704479348e-05, "loss": 5.2295, "step": 7716 }, { "epoch": 0.75, "grad_norm": 1.0823688507080078, "learning_rate": 1.2575140585611792e-05, "loss": 5.3274, "step": 7720 }, { "epoch": 0.75, "grad_norm": 1.0667200088500977, "learning_rate": 1.2555749466744232e-05, "loss": 5.3149, "step": 7724 }, { "epoch": 0.75, "grad_norm": 1.0479573011398315, "learning_rate": 1.2536358347876673e-05, "loss": 5.2653, "step": 7728 }, { "epoch": 0.75, "grad_norm": 1.1973553895950317, "learning_rate": 1.2516967229009113e-05, "loss": 5.2504, "step": 7732 }, { "epoch": 0.75, "grad_norm": 1.0791184902191162, "learning_rate": 1.2497576110141556e-05, "loss": 5.2797, "step": 7736 }, { "epoch": 0.75, "grad_norm": 1.091112732887268, "learning_rate": 1.2478184991273998e-05, "loss": 5.3129, "step": 7740 }, { "epoch": 0.75, "grad_norm": 1.0607527494430542, "learning_rate": 1.2458793872406438e-05, "loss": 5.2997, "step": 7744 }, { "epoch": 0.75, "grad_norm": 1.0152305364608765, "learning_rate": 1.243940275353888e-05, "loss": 5.3655, "step": 7748 }, { "epoch": 0.75, "grad_norm": 1.009064793586731, "learning_rate": 1.2420011634671321e-05, "loss": 5.3058, "step": 7752 }, { "epoch": 0.75, "grad_norm": 1.0338549613952637, "learning_rate": 1.2400620515803762e-05, "loss": 5.2527, "step": 7756 }, { "epoch": 0.75, "grad_norm": 1.0737717151641846, "learning_rate": 1.2381229396936204e-05, "loss": 5.3859, "step": 7760 }, { "epoch": 0.75, "grad_norm": 1.0524028539657593, "learning_rate": 1.2361838278068645e-05, "loss": 5.407, "step": 7764 }, { "epoch": 0.75, "grad_norm": 1.0182230472564697, "learning_rate": 1.2342447159201087e-05, "loss": 5.3243, "step": 7768 }, { "epoch": 0.75, "grad_norm": 1.042325496673584, "learning_rate": 1.2323056040333527e-05, "loss": 5.372, "step": 7772 }, { "epoch": 0.75, "grad_norm": 1.099138855934143, "learning_rate": 1.230366492146597e-05, "loss": 5.3577, "step": 7776 }, { "epoch": 0.75, "grad_norm": 1.007309913635254, "learning_rate": 1.228427380259841e-05, "loss": 5.252, "step": 7780 }, { "epoch": 0.75, "grad_norm": 1.1510696411132812, "learning_rate": 1.2264882683730852e-05, "loss": 5.3726, "step": 7784 }, { "epoch": 0.76, "grad_norm": 1.0110242366790771, "learning_rate": 1.2245491564863293e-05, "loss": 5.3932, "step": 7788 }, { "epoch": 0.76, "grad_norm": 1.0620808601379395, "learning_rate": 1.2226100445995735e-05, "loss": 5.2691, "step": 7792 }, { "epoch": 0.76, "grad_norm": 1.052786946296692, "learning_rate": 1.2206709327128176e-05, "loss": 5.3597, "step": 7796 }, { "epoch": 0.76, "grad_norm": 1.1259844303131104, "learning_rate": 1.2187318208260618e-05, "loss": 5.3324, "step": 7800 }, { "epoch": 0.76, "grad_norm": 1.0244724750518799, "learning_rate": 1.2167927089393058e-05, "loss": 5.3643, "step": 7804 }, { "epoch": 0.76, "grad_norm": 1.1225011348724365, "learning_rate": 1.21485359705255e-05, "loss": 5.3501, "step": 7808 }, { "epoch": 0.76, "grad_norm": 0.9966182112693787, "learning_rate": 1.2129144851657941e-05, "loss": 5.3966, "step": 7812 }, { "epoch": 0.76, "grad_norm": 1.102308988571167, "learning_rate": 1.2109753732790383e-05, "loss": 5.2625, "step": 7816 }, { "epoch": 0.76, "grad_norm": 1.0900803804397583, "learning_rate": 1.2095210393639714e-05, "loss": 5.365, "step": 7820 }, { "epoch": 0.76, "grad_norm": 1.1339807510375977, "learning_rate": 1.2075819274772154e-05, "loss": 5.3816, "step": 7824 }, { "epoch": 0.76, "grad_norm": 1.077379822731018, "learning_rate": 1.2056428155904597e-05, "loss": 5.1332, "step": 7828 }, { "epoch": 0.76, "grad_norm": 1.1201417446136475, "learning_rate": 1.2037037037037037e-05, "loss": 5.3037, "step": 7832 }, { "epoch": 0.76, "grad_norm": 1.1047320365905762, "learning_rate": 1.201764591816948e-05, "loss": 5.4453, "step": 7836 }, { "epoch": 0.76, "grad_norm": 1.0625344514846802, "learning_rate": 1.1998254799301922e-05, "loss": 5.2676, "step": 7840 }, { "epoch": 0.76, "grad_norm": 1.0774505138397217, "learning_rate": 1.1978863680434362e-05, "loss": 5.3222, "step": 7844 }, { "epoch": 0.76, "grad_norm": 1.0557003021240234, "learning_rate": 1.1959472561566804e-05, "loss": 5.3235, "step": 7848 }, { "epoch": 0.76, "grad_norm": 0.9856312274932861, "learning_rate": 1.1940081442699245e-05, "loss": 5.2947, "step": 7852 }, { "epoch": 0.76, "grad_norm": 1.0185786485671997, "learning_rate": 1.1920690323831685e-05, "loss": 5.3262, "step": 7856 }, { "epoch": 0.76, "grad_norm": 1.0777360200881958, "learning_rate": 1.1901299204964126e-05, "loss": 5.254, "step": 7860 }, { "epoch": 0.76, "grad_norm": 1.0238205194473267, "learning_rate": 1.1881908086096568e-05, "loss": 5.356, "step": 7864 }, { "epoch": 0.76, "grad_norm": 1.0025471448898315, "learning_rate": 1.1862516967229009e-05, "loss": 5.2541, "step": 7868 }, { "epoch": 0.76, "grad_norm": 1.030316710472107, "learning_rate": 1.1843125848361451e-05, "loss": 5.2578, "step": 7872 }, { "epoch": 0.76, "grad_norm": 1.091535210609436, "learning_rate": 1.1823734729493891e-05, "loss": 5.3234, "step": 7876 }, { "epoch": 0.76, "grad_norm": 1.0827471017837524, "learning_rate": 1.1804343610626334e-05, "loss": 5.2876, "step": 7880 }, { "epoch": 0.76, "grad_norm": 1.0991337299346924, "learning_rate": 1.1784952491758774e-05, "loss": 5.473, "step": 7884 }, { "epoch": 0.76, "grad_norm": 1.0421675443649292, "learning_rate": 1.1765561372891216e-05, "loss": 5.3059, "step": 7888 }, { "epoch": 0.77, "grad_norm": 1.1006362438201904, "learning_rate": 1.1746170254023657e-05, "loss": 5.2836, "step": 7892 }, { "epoch": 0.77, "grad_norm": 1.0210435390472412, "learning_rate": 1.17267791351561e-05, "loss": 5.3632, "step": 7896 }, { "epoch": 0.77, "grad_norm": 0.9662442207336426, "learning_rate": 1.1707388016288541e-05, "loss": 5.3246, "step": 7900 }, { "epoch": 0.77, "grad_norm": 1.1299954652786255, "learning_rate": 1.1687996897420982e-05, "loss": 5.2499, "step": 7904 }, { "epoch": 0.77, "grad_norm": 1.0942116975784302, "learning_rate": 1.1668605778553424e-05, "loss": 5.2834, "step": 7908 }, { "epoch": 0.77, "grad_norm": 1.0262593030929565, "learning_rate": 1.1649214659685865e-05, "loss": 5.3228, "step": 7912 }, { "epoch": 0.77, "grad_norm": 1.0624995231628418, "learning_rate": 1.1629823540818307e-05, "loss": 5.2603, "step": 7916 }, { "epoch": 0.77, "grad_norm": 1.0851109027862549, "learning_rate": 1.1610432421950747e-05, "loss": 5.2343, "step": 7920 }, { "epoch": 0.77, "grad_norm": 1.0987358093261719, "learning_rate": 1.1591041303083188e-05, "loss": 5.2615, "step": 7924 }, { "epoch": 0.77, "grad_norm": 1.0852704048156738, "learning_rate": 1.1571650184215629e-05, "loss": 5.2886, "step": 7928 }, { "epoch": 0.77, "grad_norm": 1.0782544612884521, "learning_rate": 1.155225906534807e-05, "loss": 5.3585, "step": 7932 }, { "epoch": 0.77, "grad_norm": 1.0928773880004883, "learning_rate": 1.1532867946480511e-05, "loss": 5.2083, "step": 7936 }, { "epoch": 0.77, "grad_norm": 0.9779551029205322, "learning_rate": 1.1513476827612954e-05, "loss": 5.2969, "step": 7940 }, { "epoch": 0.77, "grad_norm": 1.0859659910202026, "learning_rate": 1.1494085708745394e-05, "loss": 5.3044, "step": 7944 }, { "epoch": 0.77, "grad_norm": 1.0626839399337769, "learning_rate": 1.1474694589877836e-05, "loss": 5.3455, "step": 7948 }, { "epoch": 0.77, "grad_norm": 1.1160836219787598, "learning_rate": 1.1455303471010277e-05, "loss": 5.3082, "step": 7952 }, { "epoch": 0.77, "grad_norm": 1.089357614517212, "learning_rate": 1.1435912352142719e-05, "loss": 5.3013, "step": 7956 }, { "epoch": 0.77, "grad_norm": 0.9616773128509521, "learning_rate": 1.1416521233275161e-05, "loss": 5.2995, "step": 7960 }, { "epoch": 0.77, "grad_norm": 1.0657833814620972, "learning_rate": 1.1397130114407602e-05, "loss": 5.3208, "step": 7964 }, { "epoch": 0.77, "grad_norm": 1.0845454931259155, "learning_rate": 1.1377738995540044e-05, "loss": 5.3213, "step": 7968 }, { "epoch": 0.77, "grad_norm": 1.0332484245300293, "learning_rate": 1.1358347876672485e-05, "loss": 5.2833, "step": 7972 }, { "epoch": 0.77, "grad_norm": 1.0877047777175903, "learning_rate": 1.1338956757804927e-05, "loss": 5.2889, "step": 7976 }, { "epoch": 0.77, "grad_norm": 1.0782090425491333, "learning_rate": 1.1319565638937367e-05, "loss": 5.2914, "step": 7980 }, { "epoch": 0.77, "grad_norm": 1.0773468017578125, "learning_rate": 1.130017452006981e-05, "loss": 5.2853, "step": 7984 }, { "epoch": 0.77, "grad_norm": 1.1031116247177124, "learning_rate": 1.128078340120225e-05, "loss": 5.2285, "step": 7988 }, { "epoch": 0.77, "grad_norm": 1.0579017400741577, "learning_rate": 1.126139228233469e-05, "loss": 5.2608, "step": 7992 }, { "epoch": 0.78, "grad_norm": 1.0039610862731934, "learning_rate": 1.1242001163467133e-05, "loss": 5.2801, "step": 7996 }, { "epoch": 0.78, "grad_norm": 1.1101170778274536, "learning_rate": 1.1222610044599573e-05, "loss": 5.3003, "step": 8000 }, { "epoch": 0.78, "grad_norm": 1.1231920719146729, "learning_rate": 1.1203218925732014e-05, "loss": 5.2951, "step": 8004 }, { "epoch": 0.78, "grad_norm": 1.104062557220459, "learning_rate": 1.1183827806864456e-05, "loss": 5.2419, "step": 8008 }, { "epoch": 0.78, "grad_norm": 1.0631533861160278, "learning_rate": 1.1164436687996897e-05, "loss": 5.2927, "step": 8012 }, { "epoch": 0.78, "grad_norm": 1.0923023223876953, "learning_rate": 1.1145045569129339e-05, "loss": 5.3566, "step": 8016 }, { "epoch": 0.78, "grad_norm": 1.1646323204040527, "learning_rate": 1.1125654450261781e-05, "loss": 5.229, "step": 8020 }, { "epoch": 0.78, "grad_norm": 1.0775333642959595, "learning_rate": 1.1106263331394222e-05, "loss": 5.3121, "step": 8024 }, { "epoch": 0.78, "grad_norm": 1.101682186126709, "learning_rate": 1.1086872212526664e-05, "loss": 5.3746, "step": 8028 }, { "epoch": 0.78, "grad_norm": 1.06061589717865, "learning_rate": 1.1067481093659105e-05, "loss": 5.2878, "step": 8032 }, { "epoch": 0.78, "grad_norm": 1.0500963926315308, "learning_rate": 1.1048089974791547e-05, "loss": 5.3188, "step": 8036 }, { "epoch": 0.78, "grad_norm": 1.1280819177627563, "learning_rate": 1.1028698855923987e-05, "loss": 5.2829, "step": 8040 }, { "epoch": 0.78, "grad_norm": 1.0025454759597778, "learning_rate": 1.100930773705643e-05, "loss": 5.3121, "step": 8044 }, { "epoch": 0.78, "grad_norm": 1.0445626974105835, "learning_rate": 1.098991661818887e-05, "loss": 5.3264, "step": 8048 }, { "epoch": 0.78, "grad_norm": 1.0949641466140747, "learning_rate": 1.0970525499321312e-05, "loss": 5.3306, "step": 8052 }, { "epoch": 0.78, "grad_norm": 1.044668197631836, "learning_rate": 1.0951134380453753e-05, "loss": 5.3335, "step": 8056 }, { "epoch": 0.78, "grad_norm": 1.079334020614624, "learning_rate": 1.0931743261586193e-05, "loss": 5.3383, "step": 8060 }, { "epoch": 0.78, "grad_norm": 1.0334477424621582, "learning_rate": 1.0912352142718636e-05, "loss": 5.3128, "step": 8064 }, { "epoch": 0.78, "grad_norm": 0.9899519681930542, "learning_rate": 1.0892961023851076e-05, "loss": 5.3245, "step": 8068 }, { "epoch": 0.78, "grad_norm": 1.0130146741867065, "learning_rate": 1.0873569904983517e-05, "loss": 5.2891, "step": 8072 }, { "epoch": 0.78, "grad_norm": 1.0125179290771484, "learning_rate": 1.0854178786115959e-05, "loss": 5.3007, "step": 8076 }, { "epoch": 0.78, "grad_norm": 1.0482258796691895, "learning_rate": 1.0834787667248401e-05, "loss": 5.2417, "step": 8080 }, { "epoch": 0.78, "grad_norm": 1.055640697479248, "learning_rate": 1.0815396548380842e-05, "loss": 5.3779, "step": 8084 }, { "epoch": 0.78, "grad_norm": 1.0579723119735718, "learning_rate": 1.0796005429513284e-05, "loss": 5.232, "step": 8088 }, { "epoch": 0.78, "grad_norm": 1.083598017692566, "learning_rate": 1.0776614310645724e-05, "loss": 5.4277, "step": 8092 }, { "epoch": 0.78, "grad_norm": 0.9834340214729309, "learning_rate": 1.0757223191778167e-05, "loss": 5.2498, "step": 8096 }, { "epoch": 0.79, "grad_norm": 1.0400574207305908, "learning_rate": 1.0737832072910607e-05, "loss": 5.3321, "step": 8100 }, { "epoch": 0.79, "grad_norm": 1.083775281906128, "learning_rate": 1.071844095404305e-05, "loss": 5.1589, "step": 8104 }, { "epoch": 0.79, "grad_norm": 1.035290241241455, "learning_rate": 1.069904983517549e-05, "loss": 5.4106, "step": 8108 }, { "epoch": 0.79, "grad_norm": 1.0348436832427979, "learning_rate": 1.0679658716307932e-05, "loss": 5.2728, "step": 8112 }, { "epoch": 0.79, "grad_norm": 1.058597445487976, "learning_rate": 1.0660267597440373e-05, "loss": 5.2081, "step": 8116 }, { "epoch": 0.79, "grad_norm": 1.0467309951782227, "learning_rate": 1.0640876478572815e-05, "loss": 5.232, "step": 8120 }, { "epoch": 0.79, "grad_norm": 0.9697101712226868, "learning_rate": 1.0621485359705256e-05, "loss": 5.2641, "step": 8124 }, { "epoch": 0.79, "grad_norm": 1.0596665143966675, "learning_rate": 1.0602094240837698e-05, "loss": 5.2865, "step": 8128 }, { "epoch": 0.79, "grad_norm": 1.0948309898376465, "learning_rate": 1.0582703121970138e-05, "loss": 5.2826, "step": 8132 }, { "epoch": 0.79, "grad_norm": 1.0270111560821533, "learning_rate": 1.0563312003102579e-05, "loss": 5.2726, "step": 8136 }, { "epoch": 0.79, "grad_norm": 1.0012414455413818, "learning_rate": 1.0543920884235021e-05, "loss": 5.3184, "step": 8140 }, { "epoch": 0.79, "grad_norm": 1.019332766532898, "learning_rate": 1.0524529765367462e-05, "loss": 5.2908, "step": 8144 }, { "epoch": 0.79, "grad_norm": 1.0300483703613281, "learning_rate": 1.0505138646499904e-05, "loss": 5.3333, "step": 8148 }, { "epoch": 0.79, "grad_norm": 1.0536975860595703, "learning_rate": 1.0485747527632344e-05, "loss": 5.2944, "step": 8152 }, { "epoch": 0.79, "grad_norm": 0.9881764650344849, "learning_rate": 1.0466356408764787e-05, "loss": 5.2767, "step": 8156 }, { "epoch": 0.79, "grad_norm": 1.0163639783859253, "learning_rate": 1.0446965289897227e-05, "loss": 5.2723, "step": 8160 }, { "epoch": 0.79, "grad_norm": 1.0919207334518433, "learning_rate": 1.042757417102967e-05, "loss": 5.3327, "step": 8164 }, { "epoch": 0.79, "grad_norm": 1.0306917428970337, "learning_rate": 1.040818305216211e-05, "loss": 5.2565, "step": 8168 }, { "epoch": 0.79, "grad_norm": 1.0125017166137695, "learning_rate": 1.0388791933294552e-05, "loss": 5.342, "step": 8172 }, { "epoch": 0.79, "grad_norm": 1.0879104137420654, "learning_rate": 1.0369400814426993e-05, "loss": 5.3122, "step": 8176 }, { "epoch": 0.79, "grad_norm": 0.9527262449264526, "learning_rate": 1.0350009695559435e-05, "loss": 5.284, "step": 8180 }, { "epoch": 0.79, "grad_norm": 1.0486680269241333, "learning_rate": 1.0330618576691875e-05, "loss": 5.2428, "step": 8184 }, { "epoch": 0.79, "grad_norm": 1.0278397798538208, "learning_rate": 1.0311227457824318e-05, "loss": 5.3466, "step": 8188 }, { "epoch": 0.79, "grad_norm": 1.0635344982147217, "learning_rate": 1.0291836338956758e-05, "loss": 5.2645, "step": 8192 }, { "epoch": 0.79, "grad_norm": 1.0180613994598389, "learning_rate": 1.02724452200892e-05, "loss": 5.3552, "step": 8196 }, { "epoch": 0.8, "grad_norm": 1.0265159606933594, "learning_rate": 1.0253054101221641e-05, "loss": 5.3422, "step": 8200 }, { "epoch": 0.8, "grad_norm": 1.029842495918274, "learning_rate": 1.0233662982354082e-05, "loss": 5.4009, "step": 8204 }, { "epoch": 0.8, "grad_norm": 1.0848013162612915, "learning_rate": 1.0214271863486524e-05, "loss": 5.3982, "step": 8208 }, { "epoch": 0.8, "grad_norm": 1.0277658700942993, "learning_rate": 1.0194880744618964e-05, "loss": 5.3567, "step": 8212 }, { "epoch": 0.8, "grad_norm": 1.0273760557174683, "learning_rate": 1.0175489625751407e-05, "loss": 5.2762, "step": 8216 }, { "epoch": 0.8, "grad_norm": 0.9951087832450867, "learning_rate": 1.0156098506883847e-05, "loss": 5.3267, "step": 8220 }, { "epoch": 0.8, "grad_norm": 1.0347890853881836, "learning_rate": 1.013670738801629e-05, "loss": 5.2681, "step": 8224 }, { "epoch": 0.8, "grad_norm": 1.076242446899414, "learning_rate": 1.011731626914873e-05, "loss": 5.3621, "step": 8228 }, { "epoch": 0.8, "grad_norm": 1.02762770652771, "learning_rate": 1.0097925150281172e-05, "loss": 5.3412, "step": 8232 }, { "epoch": 0.8, "grad_norm": 1.0394989252090454, "learning_rate": 1.0078534031413613e-05, "loss": 5.2555, "step": 8236 }, { "epoch": 0.8, "grad_norm": 1.0912150144577026, "learning_rate": 1.0059142912546055e-05, "loss": 5.3784, "step": 8240 }, { "epoch": 0.8, "grad_norm": 1.0681991577148438, "learning_rate": 1.0039751793678495e-05, "loss": 5.3226, "step": 8244 }, { "epoch": 0.8, "grad_norm": 1.0572381019592285, "learning_rate": 1.0020360674810938e-05, "loss": 5.2935, "step": 8248 }, { "epoch": 0.8, "grad_norm": 1.0432649850845337, "learning_rate": 1.0000969555943378e-05, "loss": 5.3087, "step": 8252 }, { "epoch": 0.8, "grad_norm": 1.0084688663482666, "learning_rate": 9.98157843707582e-06, "loss": 5.2778, "step": 8256 }, { "epoch": 0.8, "grad_norm": 1.036868691444397, "learning_rate": 9.962187318208263e-06, "loss": 5.31, "step": 8260 }, { "epoch": 0.8, "grad_norm": 1.0464235544204712, "learning_rate": 9.942796199340703e-06, "loss": 5.3512, "step": 8264 }, { "epoch": 0.8, "grad_norm": 1.001470685005188, "learning_rate": 9.923405080473144e-06, "loss": 5.2698, "step": 8268 }, { "epoch": 0.8, "grad_norm": 1.0910736322402954, "learning_rate": 9.904013961605584e-06, "loss": 5.363, "step": 8272 }, { "epoch": 0.8, "grad_norm": 1.087928056716919, "learning_rate": 9.884622842738026e-06, "loss": 5.3454, "step": 8276 }, { "epoch": 0.8, "grad_norm": 1.055014967918396, "learning_rate": 9.865231723870467e-06, "loss": 5.3134, "step": 8280 }, { "epoch": 0.8, "grad_norm": 1.1186180114746094, "learning_rate": 9.84584060500291e-06, "loss": 5.3016, "step": 8284 }, { "epoch": 0.8, "grad_norm": 1.0159074068069458, "learning_rate": 9.82644948613535e-06, "loss": 5.3974, "step": 8288 }, { "epoch": 0.8, "grad_norm": 1.1419733762741089, "learning_rate": 9.807058367267792e-06, "loss": 5.333, "step": 8292 }, { "epoch": 0.8, "grad_norm": 1.078598976135254, "learning_rate": 9.787667248400232e-06, "loss": 5.2157, "step": 8296 }, { "epoch": 0.8, "grad_norm": 1.0978525876998901, "learning_rate": 9.768276129532675e-06, "loss": 5.3084, "step": 8300 }, { "epoch": 0.81, "grad_norm": 1.0307817459106445, "learning_rate": 9.748885010665115e-06, "loss": 5.2962, "step": 8304 }, { "epoch": 0.81, "grad_norm": 0.9767160415649414, "learning_rate": 9.729493891797557e-06, "loss": 5.3765, "step": 8308 }, { "epoch": 0.81, "grad_norm": 1.048744797706604, "learning_rate": 9.710102772929998e-06, "loss": 5.3491, "step": 8312 }, { "epoch": 0.81, "grad_norm": 1.1452877521514893, "learning_rate": 9.69071165406244e-06, "loss": 5.3348, "step": 8316 }, { "epoch": 0.81, "grad_norm": 1.0742149353027344, "learning_rate": 9.671320535194882e-06, "loss": 5.4119, "step": 8320 }, { "epoch": 0.81, "grad_norm": 1.016554594039917, "learning_rate": 9.651929416327323e-06, "loss": 5.2832, "step": 8324 }, { "epoch": 0.81, "grad_norm": 1.1286094188690186, "learning_rate": 9.632538297459765e-06, "loss": 5.3436, "step": 8328 }, { "epoch": 0.81, "grad_norm": 1.0719375610351562, "learning_rate": 9.613147178592206e-06, "loss": 5.2561, "step": 8332 }, { "epoch": 0.81, "grad_norm": 1.0239946842193604, "learning_rate": 9.593756059724646e-06, "loss": 5.3411, "step": 8336 }, { "epoch": 0.81, "grad_norm": 1.16642165184021, "learning_rate": 9.574364940857087e-06, "loss": 5.2173, "step": 8340 }, { "epoch": 0.81, "grad_norm": 1.056943655014038, "learning_rate": 9.554973821989529e-06, "loss": 5.3138, "step": 8344 }, { "epoch": 0.81, "grad_norm": 1.0310717821121216, "learning_rate": 9.53558270312197e-06, "loss": 5.2147, "step": 8348 }, { "epoch": 0.81, "grad_norm": 1.0939549207687378, "learning_rate": 9.516191584254412e-06, "loss": 5.3849, "step": 8352 }, { "epoch": 0.81, "grad_norm": 1.0846009254455566, "learning_rate": 9.496800465386852e-06, "loss": 5.2793, "step": 8356 }, { "epoch": 0.81, "grad_norm": 1.0984148979187012, "learning_rate": 9.477409346519295e-06, "loss": 5.3142, "step": 8360 }, { "epoch": 0.81, "grad_norm": 1.035758376121521, "learning_rate": 9.458018227651735e-06, "loss": 5.264, "step": 8364 }, { "epoch": 0.81, "grad_norm": 1.0837132930755615, "learning_rate": 9.438627108784177e-06, "loss": 5.432, "step": 8368 }, { "epoch": 0.81, "grad_norm": 1.0333995819091797, "learning_rate": 9.419235989916618e-06, "loss": 5.2622, "step": 8372 }, { "epoch": 0.81, "grad_norm": 1.054474949836731, "learning_rate": 9.39984487104906e-06, "loss": 5.2721, "step": 8376 }, { "epoch": 0.81, "grad_norm": 1.0750809907913208, "learning_rate": 9.380453752181502e-06, "loss": 5.2433, "step": 8380 }, { "epoch": 0.81, "grad_norm": 1.054914116859436, "learning_rate": 9.361062633313943e-06, "loss": 5.3407, "step": 8384 }, { "epoch": 0.81, "grad_norm": 1.1101247072219849, "learning_rate": 9.341671514446385e-06, "loss": 5.2774, "step": 8388 }, { "epoch": 0.81, "grad_norm": 1.1054069995880127, "learning_rate": 9.322280395578826e-06, "loss": 5.3978, "step": 8392 }, { "epoch": 0.81, "grad_norm": 1.0813637971878052, "learning_rate": 9.302889276711268e-06, "loss": 5.3735, "step": 8396 }, { "epoch": 0.81, "grad_norm": 1.151734709739685, "learning_rate": 9.283498157843708e-06, "loss": 5.4109, "step": 8400 }, { "epoch": 0.81, "grad_norm": 1.0774791240692139, "learning_rate": 9.264107038976149e-06, "loss": 5.2963, "step": 8404 }, { "epoch": 0.82, "grad_norm": 1.0468578338623047, "learning_rate": 9.24471592010859e-06, "loss": 5.334, "step": 8408 }, { "epoch": 0.82, "grad_norm": 1.0247350931167603, "learning_rate": 9.225324801241032e-06, "loss": 5.3307, "step": 8412 }, { "epoch": 0.82, "grad_norm": 1.021700143814087, "learning_rate": 9.205933682373472e-06, "loss": 5.3315, "step": 8416 }, { "epoch": 0.82, "grad_norm": 1.0852890014648438, "learning_rate": 9.186542563505915e-06, "loss": 5.23, "step": 8420 }, { "epoch": 0.82, "grad_norm": 1.0435699224472046, "learning_rate": 9.167151444638355e-06, "loss": 5.3353, "step": 8424 }, { "epoch": 0.82, "grad_norm": 1.0124831199645996, "learning_rate": 9.147760325770797e-06, "loss": 5.3257, "step": 8428 }, { "epoch": 0.82, "grad_norm": 1.0061957836151123, "learning_rate": 9.128369206903238e-06, "loss": 5.3437, "step": 8432 }, { "epoch": 0.82, "grad_norm": 1.0675718784332275, "learning_rate": 9.10897808803568e-06, "loss": 5.3543, "step": 8436 }, { "epoch": 0.82, "grad_norm": 1.0940582752227783, "learning_rate": 9.089586969168122e-06, "loss": 5.3205, "step": 8440 }, { "epoch": 0.82, "grad_norm": 1.0253922939300537, "learning_rate": 9.070195850300563e-06, "loss": 5.2836, "step": 8444 }, { "epoch": 0.82, "grad_norm": 1.0632801055908203, "learning_rate": 9.050804731433005e-06, "loss": 5.3634, "step": 8448 }, { "epoch": 0.82, "grad_norm": 1.151405692100525, "learning_rate": 9.031413612565446e-06, "loss": 5.266, "step": 8452 }, { "epoch": 0.82, "grad_norm": 1.1215803623199463, "learning_rate": 9.012022493697888e-06, "loss": 5.3605, "step": 8456 }, { "epoch": 0.82, "grad_norm": 1.0406687259674072, "learning_rate": 8.992631374830328e-06, "loss": 5.3037, "step": 8460 }, { "epoch": 0.82, "grad_norm": 1.0305143594741821, "learning_rate": 8.97324025596277e-06, "loss": 5.2419, "step": 8464 }, { "epoch": 0.82, "grad_norm": 1.0159248113632202, "learning_rate": 8.953849137095211e-06, "loss": 5.2576, "step": 8468 }, { "epoch": 0.82, "grad_norm": 1.0444971323013306, "learning_rate": 8.934458018227652e-06, "loss": 5.2461, "step": 8472 }, { "epoch": 0.82, "grad_norm": 1.062738299369812, "learning_rate": 8.915066899360092e-06, "loss": 5.2934, "step": 8476 }, { "epoch": 0.82, "grad_norm": 1.0445396900177002, "learning_rate": 8.895675780492534e-06, "loss": 5.3011, "step": 8480 }, { "epoch": 0.82, "grad_norm": 1.1299471855163574, "learning_rate": 8.876284661624975e-06, "loss": 5.1977, "step": 8484 }, { "epoch": 0.82, "grad_norm": 1.0241918563842773, "learning_rate": 8.856893542757417e-06, "loss": 5.2821, "step": 8488 }, { "epoch": 0.82, "grad_norm": 1.0978903770446777, "learning_rate": 8.837502423889858e-06, "loss": 5.3411, "step": 8492 }, { "epoch": 0.82, "grad_norm": 1.0630090236663818, "learning_rate": 8.8181113050223e-06, "loss": 5.2487, "step": 8496 }, { "epoch": 0.82, "grad_norm": 1.0468335151672363, "learning_rate": 8.798720186154742e-06, "loss": 5.2238, "step": 8500 }, { "epoch": 0.82, "grad_norm": 1.0701797008514404, "learning_rate": 8.779329067287183e-06, "loss": 5.2461, "step": 8504 }, { "epoch": 0.82, "grad_norm": 1.0644505023956299, "learning_rate": 8.759937948419625e-06, "loss": 5.2462, "step": 8508 }, { "epoch": 0.83, "grad_norm": 1.0857114791870117, "learning_rate": 8.740546829552066e-06, "loss": 5.3715, "step": 8512 }, { "epoch": 0.83, "grad_norm": 1.0911511182785034, "learning_rate": 8.721155710684508e-06, "loss": 5.3341, "step": 8516 }, { "epoch": 0.83, "grad_norm": 1.066658854484558, "learning_rate": 8.701764591816948e-06, "loss": 5.2962, "step": 8520 }, { "epoch": 0.83, "grad_norm": 1.0484204292297363, "learning_rate": 8.68237347294939e-06, "loss": 5.2937, "step": 8524 }, { "epoch": 0.83, "grad_norm": 1.1011348962783813, "learning_rate": 8.662982354081831e-06, "loss": 5.3372, "step": 8528 }, { "epoch": 0.83, "grad_norm": 1.1982569694519043, "learning_rate": 8.643591235214273e-06, "loss": 5.1987, "step": 8532 }, { "epoch": 0.83, "grad_norm": 1.1187928915023804, "learning_rate": 8.624200116346714e-06, "loss": 5.3326, "step": 8536 }, { "epoch": 0.83, "grad_norm": 1.0191898345947266, "learning_rate": 8.604808997479154e-06, "loss": 5.384, "step": 8540 }, { "epoch": 0.83, "grad_norm": 1.1834492683410645, "learning_rate": 8.585417878611597e-06, "loss": 5.3774, "step": 8544 }, { "epoch": 0.83, "grad_norm": 1.0459861755371094, "learning_rate": 8.566026759744037e-06, "loss": 5.3309, "step": 8548 }, { "epoch": 0.83, "grad_norm": 1.019656777381897, "learning_rate": 8.546635640876478e-06, "loss": 5.3024, "step": 8552 }, { "epoch": 0.83, "grad_norm": 1.1104713678359985, "learning_rate": 8.52724452200892e-06, "loss": 5.2309, "step": 8556 }, { "epoch": 0.83, "grad_norm": 1.0480828285217285, "learning_rate": 8.507853403141362e-06, "loss": 5.256, "step": 8560 }, { "epoch": 0.83, "grad_norm": 1.0698785781860352, "learning_rate": 8.488462284273803e-06, "loss": 5.2322, "step": 8564 }, { "epoch": 0.83, "grad_norm": 1.0998084545135498, "learning_rate": 8.469071165406245e-06, "loss": 5.2919, "step": 8568 }, { "epoch": 0.83, "grad_norm": 1.0241094827651978, "learning_rate": 8.449680046538685e-06, "loss": 5.3195, "step": 8572 }, { "epoch": 0.83, "grad_norm": 1.1492643356323242, "learning_rate": 8.430288927671128e-06, "loss": 5.2311, "step": 8576 }, { "epoch": 0.83, "grad_norm": 1.1048632860183716, "learning_rate": 8.410897808803568e-06, "loss": 5.3227, "step": 8580 }, { "epoch": 0.83, "grad_norm": 1.0702450275421143, "learning_rate": 8.39150668993601e-06, "loss": 5.3173, "step": 8584 }, { "epoch": 0.83, "grad_norm": 1.0409200191497803, "learning_rate": 8.372115571068451e-06, "loss": 5.3185, "step": 8588 }, { "epoch": 0.83, "grad_norm": 1.046713948249817, "learning_rate": 8.352724452200893e-06, "loss": 5.3996, "step": 8592 }, { "epoch": 0.83, "grad_norm": 1.039919376373291, "learning_rate": 8.333333333333334e-06, "loss": 5.2483, "step": 8596 }, { "epoch": 0.83, "grad_norm": 1.0520331859588623, "learning_rate": 8.313942214465776e-06, "loss": 5.3082, "step": 8600 }, { "epoch": 0.83, "grad_norm": 1.0794312953948975, "learning_rate": 8.294551095598217e-06, "loss": 5.382, "step": 8604 }, { "epoch": 0.83, "grad_norm": 1.0222445726394653, "learning_rate": 8.275159976730657e-06, "loss": 5.2186, "step": 8608 }, { "epoch": 0.83, "grad_norm": 1.043550729751587, "learning_rate": 8.2557688578631e-06, "loss": 5.2447, "step": 8612 }, { "epoch": 0.84, "grad_norm": 1.065027117729187, "learning_rate": 8.23637773899554e-06, "loss": 5.3149, "step": 8616 }, { "epoch": 0.84, "grad_norm": 1.089449405670166, "learning_rate": 8.216986620127982e-06, "loss": 5.231, "step": 8620 }, { "epoch": 0.84, "grad_norm": 1.0330005884170532, "learning_rate": 8.197595501260423e-06, "loss": 5.3099, "step": 8624 }, { "epoch": 0.84, "grad_norm": 1.088131070137024, "learning_rate": 8.178204382392865e-06, "loss": 5.3941, "step": 8628 }, { "epoch": 0.84, "grad_norm": 1.0130773782730103, "learning_rate": 8.158813263525305e-06, "loss": 5.2442, "step": 8632 }, { "epoch": 0.84, "grad_norm": 1.035882592201233, "learning_rate": 8.139422144657748e-06, "loss": 5.21, "step": 8636 }, { "epoch": 0.84, "grad_norm": 1.0550565719604492, "learning_rate": 8.120031025790188e-06, "loss": 5.2888, "step": 8640 }, { "epoch": 0.84, "grad_norm": 1.1436634063720703, "learning_rate": 8.10063990692263e-06, "loss": 5.3587, "step": 8644 }, { "epoch": 0.84, "grad_norm": 1.1211497783660889, "learning_rate": 8.081248788055071e-06, "loss": 5.3188, "step": 8648 }, { "epoch": 0.84, "grad_norm": 1.1205918788909912, "learning_rate": 8.061857669187513e-06, "loss": 5.2688, "step": 8652 }, { "epoch": 0.84, "grad_norm": 1.0907244682312012, "learning_rate": 8.042466550319954e-06, "loss": 5.2518, "step": 8656 }, { "epoch": 0.84, "grad_norm": 1.0856692790985107, "learning_rate": 8.023075431452396e-06, "loss": 5.3384, "step": 8660 }, { "epoch": 0.84, "grad_norm": 1.0303173065185547, "learning_rate": 8.003684312584836e-06, "loss": 5.2906, "step": 8664 }, { "epoch": 0.84, "grad_norm": 1.0908282995224, "learning_rate": 7.984293193717279e-06, "loss": 5.3331, "step": 8668 }, { "epoch": 0.84, "grad_norm": 1.088040828704834, "learning_rate": 7.96490207484972e-06, "loss": 5.3521, "step": 8672 }, { "epoch": 0.84, "grad_norm": 1.0086363554000854, "learning_rate": 7.94551095598216e-06, "loss": 5.3294, "step": 8676 }, { "epoch": 0.84, "grad_norm": 1.1544169187545776, "learning_rate": 7.926119837114602e-06, "loss": 5.3184, "step": 8680 }, { "epoch": 0.84, "grad_norm": 1.0530931949615479, "learning_rate": 7.906728718247042e-06, "loss": 5.2866, "step": 8684 }, { "epoch": 0.84, "grad_norm": 1.080121397972107, "learning_rate": 7.887337599379485e-06, "loss": 5.2793, "step": 8688 }, { "epoch": 0.84, "grad_norm": 1.0175666809082031, "learning_rate": 7.867946480511925e-06, "loss": 5.3282, "step": 8692 }, { "epoch": 0.84, "grad_norm": 1.0283890962600708, "learning_rate": 7.848555361644367e-06, "loss": 5.3439, "step": 8696 }, { "epoch": 0.84, "grad_norm": 1.0480095148086548, "learning_rate": 7.829164242776808e-06, "loss": 5.3042, "step": 8700 }, { "epoch": 0.84, "grad_norm": 0.9947773814201355, "learning_rate": 7.80977312390925e-06, "loss": 5.251, "step": 8704 }, { "epoch": 0.84, "grad_norm": 1.033823847770691, "learning_rate": 7.79038200504169e-06, "loss": 5.4016, "step": 8708 }, { "epoch": 0.84, "grad_norm": 1.129824161529541, "learning_rate": 7.770990886174133e-06, "loss": 5.2176, "step": 8712 }, { "epoch": 0.85, "grad_norm": 1.0202304124832153, "learning_rate": 7.751599767306574e-06, "loss": 5.3219, "step": 8716 }, { "epoch": 0.85, "grad_norm": 1.0748639106750488, "learning_rate": 7.732208648439016e-06, "loss": 5.2945, "step": 8720 }, { "epoch": 0.85, "grad_norm": 1.0026463270187378, "learning_rate": 7.712817529571456e-06, "loss": 5.3474, "step": 8724 }, { "epoch": 0.85, "grad_norm": 0.98891282081604, "learning_rate": 7.693426410703899e-06, "loss": 5.2612, "step": 8728 }, { "epoch": 0.85, "grad_norm": 1.079750418663025, "learning_rate": 7.674035291836339e-06, "loss": 5.2398, "step": 8732 }, { "epoch": 0.85, "grad_norm": 1.057255744934082, "learning_rate": 7.654644172968781e-06, "loss": 5.4364, "step": 8736 }, { "epoch": 0.85, "grad_norm": 1.0570470094680786, "learning_rate": 7.635253054101222e-06, "loss": 5.2695, "step": 8740 }, { "epoch": 0.85, "grad_norm": 1.0664699077606201, "learning_rate": 7.615861935233663e-06, "loss": 5.3378, "step": 8744 }, { "epoch": 0.85, "grad_norm": 1.0890289545059204, "learning_rate": 7.5964708163661055e-06, "loss": 5.375, "step": 8748 }, { "epoch": 0.85, "grad_norm": 1.0607807636260986, "learning_rate": 7.577079697498546e-06, "loss": 5.3258, "step": 8752 }, { "epoch": 0.85, "grad_norm": 1.0911225080490112, "learning_rate": 7.557688578630988e-06, "loss": 5.242, "step": 8756 }, { "epoch": 0.85, "grad_norm": 1.1005817651748657, "learning_rate": 7.538297459763429e-06, "loss": 5.2734, "step": 8760 }, { "epoch": 0.85, "grad_norm": 1.0650907754898071, "learning_rate": 7.51890634089587e-06, "loss": 5.2939, "step": 8764 }, { "epoch": 0.85, "grad_norm": 1.0343334674835205, "learning_rate": 7.499515222028311e-06, "loss": 5.2601, "step": 8768 }, { "epoch": 0.85, "grad_norm": 1.092239260673523, "learning_rate": 7.480124103160753e-06, "loss": 5.266, "step": 8772 }, { "epoch": 0.85, "grad_norm": 1.140648603439331, "learning_rate": 7.4607329842931935e-06, "loss": 5.2353, "step": 8776 }, { "epoch": 0.85, "grad_norm": 1.0743423700332642, "learning_rate": 7.441341865425636e-06, "loss": 5.389, "step": 8780 }, { "epoch": 0.85, "grad_norm": 1.0870285034179688, "learning_rate": 7.421950746558076e-06, "loss": 5.3212, "step": 8784 }, { "epoch": 0.85, "grad_norm": 1.0318245887756348, "learning_rate": 7.4025596276905185e-06, "loss": 5.2854, "step": 8788 }, { "epoch": 0.85, "grad_norm": 1.0597593784332275, "learning_rate": 7.383168508822959e-06, "loss": 5.4204, "step": 8792 }, { "epoch": 0.85, "grad_norm": 1.0621132850646973, "learning_rate": 7.3637773899554e-06, "loss": 5.2903, "step": 8796 }, { "epoch": 0.85, "grad_norm": 1.057024598121643, "learning_rate": 7.344386271087843e-06, "loss": 5.3975, "step": 8800 }, { "epoch": 0.85, "grad_norm": 1.1537240743637085, "learning_rate": 7.324995152220283e-06, "loss": 5.3118, "step": 8804 }, { "epoch": 0.85, "grad_norm": 1.161657691001892, "learning_rate": 7.305604033352725e-06, "loss": 5.3516, "step": 8808 }, { "epoch": 0.85, "grad_norm": 1.0824769735336304, "learning_rate": 7.286212914485166e-06, "loss": 5.3601, "step": 8812 }, { "epoch": 0.85, "grad_norm": 1.0471476316452026, "learning_rate": 7.266821795617608e-06, "loss": 5.2858, "step": 8816 }, { "epoch": 0.86, "grad_norm": 1.0438990592956543, "learning_rate": 7.247430676750049e-06, "loss": 5.2688, "step": 8820 }, { "epoch": 0.86, "grad_norm": 1.0383694171905518, "learning_rate": 7.228039557882491e-06, "loss": 5.3404, "step": 8824 }, { "epoch": 0.86, "grad_norm": 1.039699673652649, "learning_rate": 7.2086484390149315e-06, "loss": 5.2846, "step": 8828 }, { "epoch": 0.86, "grad_norm": 1.1256271600723267, "learning_rate": 7.189257320147373e-06, "loss": 5.327, "step": 8832 }, { "epoch": 0.86, "grad_norm": 0.9789720177650452, "learning_rate": 7.169866201279813e-06, "loss": 5.3369, "step": 8836 }, { "epoch": 0.86, "grad_norm": 1.0387988090515137, "learning_rate": 7.150475082412256e-06, "loss": 5.3402, "step": 8840 }, { "epoch": 0.86, "grad_norm": 1.0737075805664062, "learning_rate": 7.131083963544696e-06, "loss": 5.2837, "step": 8844 }, { "epoch": 0.86, "grad_norm": 1.1215327978134155, "learning_rate": 7.111692844677138e-06, "loss": 5.2237, "step": 8848 }, { "epoch": 0.86, "grad_norm": 1.0533177852630615, "learning_rate": 7.092301725809579e-06, "loss": 5.3783, "step": 8852 }, { "epoch": 0.86, "grad_norm": 1.1658439636230469, "learning_rate": 7.072910606942021e-06, "loss": 5.3175, "step": 8856 }, { "epoch": 0.86, "grad_norm": 1.0966906547546387, "learning_rate": 7.0535194880744625e-06, "loss": 5.4216, "step": 8860 }, { "epoch": 0.86, "grad_norm": 1.0952768325805664, "learning_rate": 7.034128369206904e-06, "loss": 5.2748, "step": 8864 }, { "epoch": 0.86, "grad_norm": 1.096529483795166, "learning_rate": 7.014737250339345e-06, "loss": 5.1666, "step": 8868 }, { "epoch": 0.86, "grad_norm": 1.0736936330795288, "learning_rate": 6.995346131471786e-06, "loss": 5.24, "step": 8872 }, { "epoch": 0.86, "grad_norm": 1.0133376121520996, "learning_rate": 6.975955012604228e-06, "loss": 5.2827, "step": 8876 }, { "epoch": 0.86, "grad_norm": 1.0580708980560303, "learning_rate": 6.956563893736669e-06, "loss": 5.3801, "step": 8880 }, { "epoch": 0.86, "grad_norm": 1.1220327615737915, "learning_rate": 6.937172774869111e-06, "loss": 5.2949, "step": 8884 }, { "epoch": 0.86, "grad_norm": 1.136806845664978, "learning_rate": 6.917781656001551e-06, "loss": 5.347, "step": 8888 }, { "epoch": 0.86, "grad_norm": 1.1147714853286743, "learning_rate": 6.898390537133994e-06, "loss": 5.2648, "step": 8892 }, { "epoch": 0.86, "grad_norm": 1.0525692701339722, "learning_rate": 6.878999418266434e-06, "loss": 5.2665, "step": 8896 }, { "epoch": 0.86, "grad_norm": 1.0400636196136475, "learning_rate": 6.8596082993988755e-06, "loss": 5.3571, "step": 8900 }, { "epoch": 0.86, "grad_norm": 1.0699836015701294, "learning_rate": 6.840217180531316e-06, "loss": 5.3079, "step": 8904 }, { "epoch": 0.86, "grad_norm": 1.0171644687652588, "learning_rate": 6.820826061663758e-06, "loss": 5.3438, "step": 8908 }, { "epoch": 0.86, "grad_norm": 1.0426756143569946, "learning_rate": 6.801434942796199e-06, "loss": 5.417, "step": 8912 }, { "epoch": 0.86, "grad_norm": 1.1138461828231812, "learning_rate": 6.782043823928641e-06, "loss": 5.2769, "step": 8916 }, { "epoch": 0.86, "grad_norm": 1.0219694375991821, "learning_rate": 6.762652705061083e-06, "loss": 5.2811, "step": 8920 }, { "epoch": 0.87, "grad_norm": 1.1686511039733887, "learning_rate": 6.743261586193524e-06, "loss": 5.3218, "step": 8924 }, { "epoch": 0.87, "grad_norm": 1.0407147407531738, "learning_rate": 6.723870467325965e-06, "loss": 5.339, "step": 8928 }, { "epoch": 0.87, "grad_norm": 1.1056681871414185, "learning_rate": 6.704479348458407e-06, "loss": 5.2758, "step": 8932 }, { "epoch": 0.87, "grad_norm": 1.0969740152359009, "learning_rate": 6.685088229590848e-06, "loss": 5.2432, "step": 8936 }, { "epoch": 0.87, "grad_norm": 0.9841113090515137, "learning_rate": 6.6656971107232885e-06, "loss": 5.2596, "step": 8940 }, { "epoch": 0.87, "grad_norm": 1.1172292232513428, "learning_rate": 6.646305991855731e-06, "loss": 5.2884, "step": 8944 }, { "epoch": 0.87, "grad_norm": 0.9936596155166626, "learning_rate": 6.626914872988171e-06, "loss": 5.2968, "step": 8948 }, { "epoch": 0.87, "grad_norm": 1.0389301776885986, "learning_rate": 6.6075237541206135e-06, "loss": 5.2827, "step": 8952 }, { "epoch": 0.87, "grad_norm": 1.020494818687439, "learning_rate": 6.588132635253054e-06, "loss": 5.3882, "step": 8956 }, { "epoch": 0.87, "grad_norm": 1.0391160249710083, "learning_rate": 6.568741516385496e-06, "loss": 5.2172, "step": 8960 }, { "epoch": 0.87, "grad_norm": 1.0213825702667236, "learning_rate": 6.549350397517937e-06, "loss": 5.3307, "step": 8964 }, { "epoch": 0.87, "grad_norm": 1.0745649337768555, "learning_rate": 6.529959278650378e-06, "loss": 5.2638, "step": 8968 }, { "epoch": 0.87, "grad_norm": 1.0567609071731567, "learning_rate": 6.510568159782819e-06, "loss": 5.3164, "step": 8972 }, { "epoch": 0.87, "grad_norm": 1.0450811386108398, "learning_rate": 6.491177040915261e-06, "loss": 5.3648, "step": 8976 }, { "epoch": 0.87, "grad_norm": 1.0880790948867798, "learning_rate": 6.471785922047703e-06, "loss": 5.3617, "step": 8980 }, { "epoch": 0.87, "grad_norm": 1.0606417655944824, "learning_rate": 6.452394803180144e-06, "loss": 5.2201, "step": 8984 }, { "epoch": 0.87, "grad_norm": 1.0124664306640625, "learning_rate": 6.433003684312586e-06, "loss": 5.2499, "step": 8988 }, { "epoch": 0.87, "grad_norm": 1.1352604627609253, "learning_rate": 6.4136125654450265e-06, "loss": 5.3782, "step": 8992 }, { "epoch": 0.87, "grad_norm": 1.1061619520187378, "learning_rate": 6.394221446577468e-06, "loss": 5.3408, "step": 8996 }, { "epoch": 0.87, "grad_norm": 1.1135718822479248, "learning_rate": 6.374830327709909e-06, "loss": 5.2323, "step": 9000 }, { "epoch": 0.87, "grad_norm": 1.0155010223388672, "learning_rate": 6.355439208842351e-06, "loss": 5.2942, "step": 9004 }, { "epoch": 0.87, "grad_norm": 1.044931173324585, "learning_rate": 6.336048089974791e-06, "loss": 5.3267, "step": 9008 }, { "epoch": 0.87, "grad_norm": 1.0643398761749268, "learning_rate": 6.316656971107233e-06, "loss": 5.2651, "step": 9012 }, { "epoch": 0.87, "grad_norm": 1.0203381776809692, "learning_rate": 6.297265852239674e-06, "loss": 5.3493, "step": 9016 }, { "epoch": 0.87, "grad_norm": 1.0563126802444458, "learning_rate": 6.277874733372116e-06, "loss": 5.362, "step": 9020 }, { "epoch": 0.87, "grad_norm": 1.0667084455490112, "learning_rate": 6.258483614504557e-06, "loss": 5.2151, "step": 9024 }, { "epoch": 0.88, "grad_norm": 1.0885251760482788, "learning_rate": 6.239092495636999e-06, "loss": 5.3404, "step": 9028 }, { "epoch": 0.88, "grad_norm": 1.0210630893707275, "learning_rate": 6.21970137676944e-06, "loss": 5.2937, "step": 9032 }, { "epoch": 0.88, "grad_norm": 1.1303844451904297, "learning_rate": 6.200310257901881e-06, "loss": 5.3373, "step": 9036 }, { "epoch": 0.88, "grad_norm": 1.1023499965667725, "learning_rate": 6.180919139034322e-06, "loss": 5.2946, "step": 9040 }, { "epoch": 0.88, "grad_norm": 1.0469759702682495, "learning_rate": 6.161528020166764e-06, "loss": 5.3265, "step": 9044 }, { "epoch": 0.88, "grad_norm": 0.9917576313018799, "learning_rate": 6.142136901299205e-06, "loss": 5.2984, "step": 9048 }, { "epoch": 0.88, "grad_norm": 1.0630229711532593, "learning_rate": 6.122745782431646e-06, "loss": 5.2025, "step": 9052 }, { "epoch": 0.88, "grad_norm": 1.0385984182357788, "learning_rate": 6.103354663564088e-06, "loss": 5.2934, "step": 9056 }, { "epoch": 0.88, "grad_norm": 1.0480278730392456, "learning_rate": 6.083963544696529e-06, "loss": 5.2933, "step": 9060 }, { "epoch": 0.88, "grad_norm": 1.0395824909210205, "learning_rate": 6.0645724258289706e-06, "loss": 5.267, "step": 9064 }, { "epoch": 0.88, "grad_norm": 1.0996421575546265, "learning_rate": 6.045181306961412e-06, "loss": 5.3497, "step": 9068 }, { "epoch": 0.88, "grad_norm": 1.0920591354370117, "learning_rate": 6.025790188093853e-06, "loss": 5.2752, "step": 9072 }, { "epoch": 0.88, "grad_norm": 1.0730946063995361, "learning_rate": 6.006399069226295e-06, "loss": 5.3234, "step": 9076 }, { "epoch": 0.88, "grad_norm": 1.077646017074585, "learning_rate": 5.987007950358736e-06, "loss": 5.3009, "step": 9080 }, { "epoch": 0.88, "grad_norm": 1.0901986360549927, "learning_rate": 5.9676168314911775e-06, "loss": 5.2653, "step": 9084 }, { "epoch": 0.88, "grad_norm": 1.1307499408721924, "learning_rate": 5.948225712623619e-06, "loss": 5.393, "step": 9088 }, { "epoch": 0.88, "grad_norm": 1.1002899408340454, "learning_rate": 5.92883459375606e-06, "loss": 5.2785, "step": 9092 }, { "epoch": 0.88, "grad_norm": 1.1348730325698853, "learning_rate": 5.909443474888502e-06, "loss": 5.3749, "step": 9096 }, { "epoch": 0.88, "grad_norm": 1.2404577732086182, "learning_rate": 5.890052356020943e-06, "loss": 5.2968, "step": 9100 }, { "epoch": 0.88, "grad_norm": 1.1860145330429077, "learning_rate": 5.8706612371533835e-06, "loss": 5.2951, "step": 9104 }, { "epoch": 0.88, "grad_norm": 1.0747588872909546, "learning_rate": 5.851270118285825e-06, "loss": 5.2725, "step": 9108 }, { "epoch": 0.88, "grad_norm": 1.0642809867858887, "learning_rate": 5.831878999418266e-06, "loss": 5.3347, "step": 9112 }, { "epoch": 0.88, "grad_norm": 1.0361077785491943, "learning_rate": 5.812487880550708e-06, "loss": 5.2899, "step": 9116 }, { "epoch": 0.88, "grad_norm": 1.027950406074524, "learning_rate": 5.793096761683149e-06, "loss": 5.3016, "step": 9120 }, { "epoch": 0.88, "grad_norm": 1.0341978073120117, "learning_rate": 5.7737056428155905e-06, "loss": 5.3066, "step": 9124 }, { "epoch": 0.89, "grad_norm": 1.0980345010757446, "learning_rate": 5.754314523948033e-06, "loss": 5.2685, "step": 9128 }, { "epoch": 0.89, "grad_norm": 1.0036580562591553, "learning_rate": 5.734923405080474e-06, "loss": 5.2972, "step": 9132 }, { "epoch": 0.89, "grad_norm": 1.0951625108718872, "learning_rate": 5.715532286212915e-06, "loss": 5.2363, "step": 9136 }, { "epoch": 0.89, "grad_norm": 1.010358214378357, "learning_rate": 5.696141167345356e-06, "loss": 5.2715, "step": 9140 }, { "epoch": 0.89, "grad_norm": 1.0721516609191895, "learning_rate": 5.676750048477797e-06, "loss": 5.3006, "step": 9144 }, { "epoch": 0.89, "grad_norm": 0.9922645688056946, "learning_rate": 5.657358929610239e-06, "loss": 5.2985, "step": 9148 }, { "epoch": 0.89, "grad_norm": 1.0958447456359863, "learning_rate": 5.63796781074268e-06, "loss": 5.2388, "step": 9152 }, { "epoch": 0.89, "grad_norm": 0.9977266788482666, "learning_rate": 5.6185766918751215e-06, "loss": 5.4273, "step": 9156 }, { "epoch": 0.89, "grad_norm": 1.1025915145874023, "learning_rate": 5.599185573007563e-06, "loss": 5.3768, "step": 9160 }, { "epoch": 0.89, "grad_norm": 1.0521866083145142, "learning_rate": 5.579794454140004e-06, "loss": 5.2555, "step": 9164 }, { "epoch": 0.89, "grad_norm": 1.0546320676803589, "learning_rate": 5.560403335272446e-06, "loss": 5.259, "step": 9168 }, { "epoch": 0.89, "grad_norm": 1.084153413772583, "learning_rate": 5.541012216404887e-06, "loss": 5.2733, "step": 9172 }, { "epoch": 0.89, "grad_norm": 1.1298420429229736, "learning_rate": 5.521621097537328e-06, "loss": 5.2499, "step": 9176 }, { "epoch": 0.89, "grad_norm": 1.0296047925949097, "learning_rate": 5.502229978669769e-06, "loss": 5.3293, "step": 9180 }, { "epoch": 0.89, "grad_norm": 1.0231281518936157, "learning_rate": 5.48283885980221e-06, "loss": 5.2614, "step": 9184 }, { "epoch": 0.89, "grad_norm": 1.1063759326934814, "learning_rate": 5.463447740934653e-06, "loss": 5.3569, "step": 9188 }, { "epoch": 0.89, "grad_norm": 1.0762827396392822, "learning_rate": 5.444056622067094e-06, "loss": 5.3734, "step": 9192 }, { "epoch": 0.89, "grad_norm": 1.0667394399642944, "learning_rate": 5.424665503199535e-06, "loss": 5.3136, "step": 9196 }, { "epoch": 0.89, "grad_norm": 1.0663567781448364, "learning_rate": 5.405274384331977e-06, "loss": 5.2178, "step": 9200 }, { "epoch": 0.89, "grad_norm": 1.1014020442962646, "learning_rate": 5.385883265464417e-06, "loss": 5.3121, "step": 9204 }, { "epoch": 0.89, "grad_norm": 1.0944761037826538, "learning_rate": 5.366492146596859e-06, "loss": 5.3321, "step": 9208 }, { "epoch": 0.89, "grad_norm": 1.0576825141906738, "learning_rate": 5.3471010277293e-06, "loss": 5.187, "step": 9212 }, { "epoch": 0.89, "grad_norm": 1.102414608001709, "learning_rate": 5.3277099088617414e-06, "loss": 5.2198, "step": 9216 }, { "epoch": 0.89, "grad_norm": 1.0515443086624146, "learning_rate": 5.308318789994183e-06, "loss": 5.3509, "step": 9220 }, { "epoch": 0.89, "grad_norm": 1.0225639343261719, "learning_rate": 5.288927671126624e-06, "loss": 5.3264, "step": 9224 }, { "epoch": 0.89, "grad_norm": 1.0482409000396729, "learning_rate": 5.269536552259066e-06, "loss": 5.314, "step": 9228 }, { "epoch": 0.9, "grad_norm": 1.0829366445541382, "learning_rate": 5.250145433391507e-06, "loss": 5.3288, "step": 9232 }, { "epoch": 0.9, "grad_norm": 1.018813967704773, "learning_rate": 5.230754314523948e-06, "loss": 5.245, "step": 9236 }, { "epoch": 0.9, "grad_norm": 1.1042280197143555, "learning_rate": 5.21136319565639e-06, "loss": 5.3871, "step": 9240 }, { "epoch": 0.9, "grad_norm": 1.0457403659820557, "learning_rate": 5.19197207678883e-06, "loss": 5.3858, "step": 9244 }, { "epoch": 0.9, "grad_norm": 1.0873547792434692, "learning_rate": 5.1725809579212725e-06, "loss": 5.2907, "step": 9248 }, { "epoch": 0.9, "grad_norm": 1.0615798234939575, "learning_rate": 5.153189839053714e-06, "loss": 5.2729, "step": 9252 }, { "epoch": 0.9, "grad_norm": 1.1086784601211548, "learning_rate": 5.133798720186155e-06, "loss": 5.388, "step": 9256 }, { "epoch": 0.9, "grad_norm": 1.0895764827728271, "learning_rate": 5.114407601318597e-06, "loss": 5.2892, "step": 9260 }, { "epoch": 0.9, "grad_norm": 1.0369954109191895, "learning_rate": 5.095016482451038e-06, "loss": 5.2989, "step": 9264 }, { "epoch": 0.9, "grad_norm": 1.049892544746399, "learning_rate": 5.0756253635834794e-06, "loss": 5.3419, "step": 9268 }, { "epoch": 0.9, "grad_norm": 1.0920242071151733, "learning_rate": 5.056234244715921e-06, "loss": 5.3116, "step": 9272 }, { "epoch": 0.9, "grad_norm": 1.0999177694320679, "learning_rate": 5.036843125848361e-06, "loss": 5.2779, "step": 9276 }, { "epoch": 0.9, "grad_norm": 1.0239474773406982, "learning_rate": 5.017452006980803e-06, "loss": 5.35, "step": 9280 }, { "epoch": 0.9, "grad_norm": 1.0571128129959106, "learning_rate": 4.998060888113244e-06, "loss": 5.3139, "step": 9284 }, { "epoch": 0.9, "grad_norm": 1.1082773208618164, "learning_rate": 4.9786697692456855e-06, "loss": 5.2779, "step": 9288 }, { "epoch": 0.9, "grad_norm": 1.0475021600723267, "learning_rate": 4.959278650378127e-06, "loss": 5.1911, "step": 9292 }, { "epoch": 0.9, "grad_norm": 1.0756545066833496, "learning_rate": 4.939887531510568e-06, "loss": 5.2851, "step": 9296 }, { "epoch": 0.9, "grad_norm": 0.9714142680168152, "learning_rate": 4.92049641264301e-06, "loss": 5.2584, "step": 9300 }, { "epoch": 0.9, "grad_norm": 1.0212279558181763, "learning_rate": 4.901105293775451e-06, "loss": 5.2914, "step": 9304 }, { "epoch": 0.9, "grad_norm": 1.038690209388733, "learning_rate": 4.8817141749078924e-06, "loss": 5.3318, "step": 9308 }, { "epoch": 0.9, "grad_norm": 1.0438801050186157, "learning_rate": 4.862323056040334e-06, "loss": 5.3556, "step": 9312 }, { "epoch": 0.9, "grad_norm": 1.0903195142745972, "learning_rate": 4.842931937172775e-06, "loss": 5.2413, "step": 9316 }, { "epoch": 0.9, "grad_norm": 1.029875636100769, "learning_rate": 4.823540818305217e-06, "loss": 5.2558, "step": 9320 }, { "epoch": 0.9, "grad_norm": 1.0729329586029053, "learning_rate": 4.804149699437658e-06, "loss": 5.2387, "step": 9324 }, { "epoch": 0.9, "grad_norm": 1.012494683265686, "learning_rate": 4.784758580570099e-06, "loss": 5.2488, "step": 9328 }, { "epoch": 0.9, "grad_norm": 1.0358749628067017, "learning_rate": 4.765367461702541e-06, "loss": 5.2961, "step": 9332 }, { "epoch": 0.91, "grad_norm": 1.0714225769042969, "learning_rate": 4.745976342834982e-06, "loss": 5.2666, "step": 9336 }, { "epoch": 0.91, "grad_norm": 1.0506253242492676, "learning_rate": 4.7265852239674235e-06, "loss": 5.2997, "step": 9340 }, { "epoch": 0.91, "grad_norm": 1.067970633506775, "learning_rate": 4.707194105099864e-06, "loss": 5.2415, "step": 9344 }, { "epoch": 0.91, "grad_norm": 1.1353143453598022, "learning_rate": 4.687802986232305e-06, "loss": 5.2687, "step": 9348 }, { "epoch": 0.91, "grad_norm": 1.0400621891021729, "learning_rate": 4.668411867364747e-06, "loss": 5.269, "step": 9352 }, { "epoch": 0.91, "grad_norm": 1.1021333932876587, "learning_rate": 4.649020748497188e-06, "loss": 5.3425, "step": 9356 }, { "epoch": 0.91, "grad_norm": 1.0333983898162842, "learning_rate": 4.6296296296296296e-06, "loss": 5.3178, "step": 9360 }, { "epoch": 0.91, "grad_norm": 1.1346157789230347, "learning_rate": 4.610238510762071e-06, "loss": 5.2779, "step": 9364 }, { "epoch": 0.91, "grad_norm": 1.1103806495666504, "learning_rate": 4.590847391894513e-06, "loss": 5.3102, "step": 9368 }, { "epoch": 0.91, "grad_norm": 1.109861135482788, "learning_rate": 4.5714562730269546e-06, "loss": 5.2864, "step": 9372 }, { "epoch": 0.91, "grad_norm": 1.0895229578018188, "learning_rate": 4.552065154159395e-06, "loss": 5.3538, "step": 9376 }, { "epoch": 0.91, "grad_norm": 1.0834227800369263, "learning_rate": 4.5326740352918365e-06, "loss": 5.328, "step": 9380 }, { "epoch": 0.91, "grad_norm": 1.0530760288238525, "learning_rate": 4.513282916424278e-06, "loss": 5.3337, "step": 9384 }, { "epoch": 0.91, "grad_norm": 1.0888159275054932, "learning_rate": 4.493891797556719e-06, "loss": 5.2769, "step": 9388 }, { "epoch": 0.91, "grad_norm": 0.9955815076828003, "learning_rate": 4.474500678689161e-06, "loss": 5.3042, "step": 9392 }, { "epoch": 0.91, "grad_norm": 1.1201573610305786, "learning_rate": 4.455109559821602e-06, "loss": 5.3797, "step": 9396 }, { "epoch": 0.91, "grad_norm": 1.0425370931625366, "learning_rate": 4.435718440954043e-06, "loss": 5.2661, "step": 9400 }, { "epoch": 0.91, "grad_norm": 1.10947847366333, "learning_rate": 4.416327322086485e-06, "loss": 5.3799, "step": 9404 }, { "epoch": 0.91, "grad_norm": 1.088013768196106, "learning_rate": 4.396936203218926e-06, "loss": 5.4089, "step": 9408 }, { "epoch": 0.91, "grad_norm": 1.0372400283813477, "learning_rate": 4.377545084351367e-06, "loss": 5.2819, "step": 9412 }, { "epoch": 0.91, "grad_norm": 1.030407428741455, "learning_rate": 4.358153965483808e-06, "loss": 5.3247, "step": 9416 }, { "epoch": 0.91, "grad_norm": 1.0526556968688965, "learning_rate": 4.3387628466162495e-06, "loss": 5.243, "step": 9420 }, { "epoch": 0.91, "grad_norm": 1.0113731622695923, "learning_rate": 4.319371727748691e-06, "loss": 5.3229, "step": 9424 }, { "epoch": 0.91, "grad_norm": 1.1396671533584595, "learning_rate": 4.299980608881133e-06, "loss": 5.2698, "step": 9428 }, { "epoch": 0.91, "grad_norm": 1.066570520401001, "learning_rate": 4.2805894900135745e-06, "loss": 5.3542, "step": 9432 }, { "epoch": 0.91, "grad_norm": 1.0811322927474976, "learning_rate": 4.261198371146016e-06, "loss": 5.2388, "step": 9436 }, { "epoch": 0.92, "grad_norm": 1.0718379020690918, "learning_rate": 4.241807252278457e-06, "loss": 5.2413, "step": 9440 }, { "epoch": 0.92, "grad_norm": 1.0602755546569824, "learning_rate": 4.222416133410898e-06, "loss": 5.299, "step": 9444 }, { "epoch": 0.92, "grad_norm": 1.0844111442565918, "learning_rate": 4.203025014543339e-06, "loss": 5.407, "step": 9448 }, { "epoch": 0.92, "grad_norm": 1.0383292436599731, "learning_rate": 4.1836338956757805e-06, "loss": 5.3359, "step": 9452 }, { "epoch": 0.92, "grad_norm": 1.0988202095031738, "learning_rate": 4.164242776808222e-06, "loss": 5.4107, "step": 9456 }, { "epoch": 0.92, "grad_norm": 1.1423723697662354, "learning_rate": 4.144851657940663e-06, "loss": 5.3404, "step": 9460 }, { "epoch": 0.92, "grad_norm": 1.1802666187286377, "learning_rate": 4.125460539073105e-06, "loss": 5.247, "step": 9464 }, { "epoch": 0.92, "grad_norm": 0.9723669290542603, "learning_rate": 4.106069420205546e-06, "loss": 5.2306, "step": 9468 }, { "epoch": 0.92, "grad_norm": 1.0496985912322998, "learning_rate": 4.0866783013379875e-06, "loss": 5.2929, "step": 9472 }, { "epoch": 0.92, "grad_norm": 1.0715490579605103, "learning_rate": 4.067287182470429e-06, "loss": 5.2874, "step": 9476 }, { "epoch": 0.92, "grad_norm": 1.0244431495666504, "learning_rate": 4.04789606360287e-06, "loss": 5.3395, "step": 9480 }, { "epoch": 0.92, "grad_norm": 1.0559965372085571, "learning_rate": 4.028504944735311e-06, "loss": 5.4143, "step": 9484 }, { "epoch": 0.92, "grad_norm": 1.0647709369659424, "learning_rate": 4.009113825867752e-06, "loss": 5.3425, "step": 9488 }, { "epoch": 0.92, "grad_norm": 1.0821641683578491, "learning_rate": 3.989722707000194e-06, "loss": 5.2985, "step": 9492 }, { "epoch": 0.92, "grad_norm": 1.0766880512237549, "learning_rate": 3.970331588132636e-06, "loss": 5.3269, "step": 9496 }, { "epoch": 0.92, "grad_norm": 1.0378237962722778, "learning_rate": 3.950940469265077e-06, "loss": 5.2643, "step": 9500 }, { "epoch": 0.92, "grad_norm": 1.1061269044876099, "learning_rate": 3.9315493503975185e-06, "loss": 5.3131, "step": 9504 }, { "epoch": 0.92, "grad_norm": 1.0113786458969116, "learning_rate": 3.91215823152996e-06, "loss": 5.3312, "step": 9508 }, { "epoch": 0.92, "grad_norm": 1.0934914350509644, "learning_rate": 3.8927671126624005e-06, "loss": 5.2506, "step": 9512 }, { "epoch": 0.92, "grad_norm": 1.024383544921875, "learning_rate": 3.873375993794842e-06, "loss": 5.3126, "step": 9516 }, { "epoch": 0.92, "grad_norm": 1.0432902574539185, "learning_rate": 3.853984874927283e-06, "loss": 5.2686, "step": 9520 }, { "epoch": 0.92, "grad_norm": 1.0603485107421875, "learning_rate": 3.834593756059725e-06, "loss": 5.2765, "step": 9524 }, { "epoch": 0.92, "grad_norm": 1.0203194618225098, "learning_rate": 3.815202637192166e-06, "loss": 5.2347, "step": 9528 }, { "epoch": 0.92, "grad_norm": 1.0024125576019287, "learning_rate": 3.7958115183246074e-06, "loss": 5.2144, "step": 9532 }, { "epoch": 0.92, "grad_norm": 1.098630666732788, "learning_rate": 3.7764203994570488e-06, "loss": 5.327, "step": 9536 }, { "epoch": 0.92, "grad_norm": 1.0508458614349365, "learning_rate": 3.75702928058949e-06, "loss": 5.2154, "step": 9540 }, { "epoch": 0.93, "grad_norm": 1.0391416549682617, "learning_rate": 3.737638161721931e-06, "loss": 5.3172, "step": 9544 }, { "epoch": 0.93, "grad_norm": 1.0499507188796997, "learning_rate": 3.7182470428543725e-06, "loss": 5.3587, "step": 9548 }, { "epoch": 0.93, "grad_norm": 1.1225402355194092, "learning_rate": 3.6988559239868147e-06, "loss": 5.2066, "step": 9552 }, { "epoch": 0.93, "grad_norm": 1.0803992748260498, "learning_rate": 3.6794648051192557e-06, "loss": 5.2294, "step": 9556 }, { "epoch": 0.93, "grad_norm": 1.0833008289337158, "learning_rate": 3.660073686251697e-06, "loss": 5.3644, "step": 9560 }, { "epoch": 0.93, "grad_norm": 1.069279670715332, "learning_rate": 3.6406825673841384e-06, "loss": 5.2461, "step": 9564 }, { "epoch": 0.93, "grad_norm": 1.013576626777649, "learning_rate": 3.62129144851658e-06, "loss": 5.3483, "step": 9568 }, { "epoch": 0.93, "grad_norm": 1.082595944404602, "learning_rate": 3.601900329649021e-06, "loss": 5.2905, "step": 9572 }, { "epoch": 0.93, "grad_norm": 1.0759378671646118, "learning_rate": 3.582509210781462e-06, "loss": 5.3518, "step": 9576 }, { "epoch": 0.93, "grad_norm": 1.070561170578003, "learning_rate": 3.5631180919139036e-06, "loss": 5.3375, "step": 9580 }, { "epoch": 0.93, "grad_norm": 1.0099434852600098, "learning_rate": 3.543726973046345e-06, "loss": 5.3133, "step": 9584 }, { "epoch": 0.93, "grad_norm": 1.1549813747406006, "learning_rate": 3.5243358541787863e-06, "loss": 5.3642, "step": 9588 }, { "epoch": 0.93, "grad_norm": 1.024865746498108, "learning_rate": 3.5049447353112273e-06, "loss": 5.3313, "step": 9592 }, { "epoch": 0.93, "grad_norm": 1.074829339981079, "learning_rate": 3.4855536164436687e-06, "loss": 5.3294, "step": 9596 }, { "epoch": 0.93, "grad_norm": 1.0802452564239502, "learning_rate": 3.46616249757611e-06, "loss": 5.3024, "step": 9600 }, { "epoch": 0.93, "grad_norm": 1.1103779077529907, "learning_rate": 3.4467713787085514e-06, "loss": 5.259, "step": 9604 }, { "epoch": 0.93, "grad_norm": 1.122125506401062, "learning_rate": 3.427380259840993e-06, "loss": 5.388, "step": 9608 }, { "epoch": 0.93, "grad_norm": 1.106959581375122, "learning_rate": 3.4079891409734346e-06, "loss": 5.344, "step": 9612 }, { "epoch": 0.93, "grad_norm": 1.15294349193573, "learning_rate": 3.388598022105876e-06, "loss": 5.3002, "step": 9616 }, { "epoch": 0.93, "grad_norm": 1.124644160270691, "learning_rate": 3.3692069032383174e-06, "loss": 5.406, "step": 9620 }, { "epoch": 0.93, "grad_norm": 0.9895834922790527, "learning_rate": 3.3498157843707584e-06, "loss": 5.3485, "step": 9624 }, { "epoch": 0.93, "grad_norm": 1.155667781829834, "learning_rate": 3.3304246655031997e-06, "loss": 5.3161, "step": 9628 }, { "epoch": 0.93, "grad_norm": 1.0220144987106323, "learning_rate": 3.311033546635641e-06, "loss": 5.3078, "step": 9632 }, { "epoch": 0.93, "grad_norm": 1.0805678367614746, "learning_rate": 3.2916424277680825e-06, "loss": 5.3262, "step": 9636 }, { "epoch": 0.93, "grad_norm": 1.0154497623443604, "learning_rate": 3.272251308900524e-06, "loss": 5.2922, "step": 9640 }, { "epoch": 0.94, "grad_norm": 1.0291320085525513, "learning_rate": 3.252860190032965e-06, "loss": 5.2613, "step": 9644 }, { "epoch": 0.94, "grad_norm": 1.0393972396850586, "learning_rate": 3.2334690711654062e-06, "loss": 5.3226, "step": 9648 }, { "epoch": 0.94, "grad_norm": 1.0496784448623657, "learning_rate": 3.2140779522978476e-06, "loss": 5.3214, "step": 9652 }, { "epoch": 0.94, "grad_norm": 1.1420729160308838, "learning_rate": 3.194686833430289e-06, "loss": 5.3446, "step": 9656 }, { "epoch": 0.94, "grad_norm": 1.1252871751785278, "learning_rate": 3.17529571456273e-06, "loss": 5.2488, "step": 9660 }, { "epoch": 0.94, "grad_norm": 1.0620160102844238, "learning_rate": 3.1559045956951714e-06, "loss": 5.3523, "step": 9664 }, { "epoch": 0.94, "grad_norm": 1.14591646194458, "learning_rate": 3.1365134768276127e-06, "loss": 5.277, "step": 9668 }, { "epoch": 0.94, "grad_norm": 1.0520758628845215, "learning_rate": 3.1171223579600545e-06, "loss": 5.2889, "step": 9672 }, { "epoch": 0.94, "grad_norm": 1.0912026166915894, "learning_rate": 3.0977312390924955e-06, "loss": 5.3545, "step": 9676 }, { "epoch": 0.94, "grad_norm": 1.0775065422058105, "learning_rate": 3.078340120224937e-06, "loss": 5.3119, "step": 9680 }, { "epoch": 0.94, "grad_norm": 1.0335484743118286, "learning_rate": 3.0589490013573787e-06, "loss": 5.3732, "step": 9684 }, { "epoch": 0.94, "grad_norm": 1.0346179008483887, "learning_rate": 3.03955788248982e-06, "loss": 5.2468, "step": 9688 }, { "epoch": 0.94, "grad_norm": 1.091991662979126, "learning_rate": 3.020166763622261e-06, "loss": 5.3091, "step": 9692 }, { "epoch": 0.94, "grad_norm": 1.07473886013031, "learning_rate": 3.0007756447547024e-06, "loss": 5.2884, "step": 9696 }, { "epoch": 0.94, "grad_norm": 1.125009298324585, "learning_rate": 2.981384525887144e-06, "loss": 5.1994, "step": 9700 }, { "epoch": 0.94, "grad_norm": 1.2514116764068604, "learning_rate": 2.961993407019585e-06, "loss": 5.3651, "step": 9704 }, { "epoch": 0.94, "grad_norm": 1.1177825927734375, "learning_rate": 2.9426022881520266e-06, "loss": 5.3326, "step": 9708 }, { "epoch": 0.94, "grad_norm": 0.9657092690467834, "learning_rate": 2.9232111692844675e-06, "loss": 5.2113, "step": 9712 }, { "epoch": 0.94, "grad_norm": 1.1035900115966797, "learning_rate": 2.9038200504169093e-06, "loss": 5.3596, "step": 9716 }, { "epoch": 0.94, "grad_norm": 1.0969198942184448, "learning_rate": 2.8844289315493507e-06, "loss": 5.2202, "step": 9720 }, { "epoch": 0.94, "grad_norm": 1.0725688934326172, "learning_rate": 2.865037812681792e-06, "loss": 5.3028, "step": 9724 }, { "epoch": 0.94, "grad_norm": 1.0617296695709229, "learning_rate": 2.845646693814233e-06, "loss": 5.2939, "step": 9728 }, { "epoch": 0.94, "grad_norm": 1.0573393106460571, "learning_rate": 2.8262555749466745e-06, "loss": 5.2684, "step": 9732 }, { "epoch": 0.94, "grad_norm": 1.0836176872253418, "learning_rate": 2.806864456079116e-06, "loss": 5.2659, "step": 9736 }, { "epoch": 0.94, "grad_norm": 1.157784342765808, "learning_rate": 2.7874733372115572e-06, "loss": 5.326, "step": 9740 }, { "epoch": 0.94, "grad_norm": 1.1517001390457153, "learning_rate": 2.7680822183439986e-06, "loss": 5.3198, "step": 9744 }, { "epoch": 0.95, "grad_norm": 1.0895476341247559, "learning_rate": 2.74869109947644e-06, "loss": 5.2795, "step": 9748 }, { "epoch": 0.95, "grad_norm": 1.0055677890777588, "learning_rate": 2.7292999806088814e-06, "loss": 5.2201, "step": 9752 }, { "epoch": 0.95, "grad_norm": 1.0537750720977783, "learning_rate": 2.7099088617413228e-06, "loss": 5.2557, "step": 9756 }, { "epoch": 0.95, "grad_norm": 1.1611661911010742, "learning_rate": 2.690517742873764e-06, "loss": 5.2528, "step": 9760 }, { "epoch": 0.95, "grad_norm": 1.0563558340072632, "learning_rate": 2.671126624006205e-06, "loss": 5.3437, "step": 9764 }, { "epoch": 0.95, "grad_norm": 1.08133065700531, "learning_rate": 2.6517355051386465e-06, "loss": 5.278, "step": 9768 }, { "epoch": 0.95, "grad_norm": 1.0893269777297974, "learning_rate": 2.632344386271088e-06, "loss": 5.4056, "step": 9772 }, { "epoch": 0.95, "grad_norm": 1.0335135459899902, "learning_rate": 2.6129532674035292e-06, "loss": 5.2513, "step": 9776 }, { "epoch": 0.95, "grad_norm": 1.0350239276885986, "learning_rate": 2.5935621485359706e-06, "loss": 5.2965, "step": 9780 }, { "epoch": 0.95, "grad_norm": 1.1059588193893433, "learning_rate": 2.574171029668412e-06, "loss": 5.291, "step": 9784 }, { "epoch": 0.95, "grad_norm": 1.109610915184021, "learning_rate": 2.5547799108008534e-06, "loss": 5.3289, "step": 9788 }, { "epoch": 0.95, "grad_norm": 1.0264822244644165, "learning_rate": 2.5353887919332948e-06, "loss": 5.3212, "step": 9792 }, { "epoch": 0.95, "grad_norm": 1.0764135122299194, "learning_rate": 2.5159976730657357e-06, "loss": 5.4082, "step": 9796 }, { "epoch": 0.95, "grad_norm": 1.0444653034210205, "learning_rate": 2.496606554198177e-06, "loss": 5.2801, "step": 9800 }, { "epoch": 0.95, "grad_norm": 1.23245108127594, "learning_rate": 2.477215435330619e-06, "loss": 5.4502, "step": 9804 }, { "epoch": 0.95, "grad_norm": 1.0822020769119263, "learning_rate": 2.4578243164630603e-06, "loss": 5.2585, "step": 9808 }, { "epoch": 0.95, "grad_norm": 1.0727524757385254, "learning_rate": 2.4384331975955013e-06, "loss": 5.3905, "step": 9812 }, { "epoch": 0.95, "grad_norm": 1.1105926036834717, "learning_rate": 2.4190420787279427e-06, "loss": 5.3339, "step": 9816 }, { "epoch": 0.95, "grad_norm": 1.0885437726974487, "learning_rate": 2.399650959860384e-06, "loss": 5.277, "step": 9820 }, { "epoch": 0.95, "grad_norm": 1.0500329732894897, "learning_rate": 2.3802598409928254e-06, "loss": 5.3009, "step": 9824 }, { "epoch": 0.95, "grad_norm": 1.0552301406860352, "learning_rate": 2.360868722125267e-06, "loss": 5.3666, "step": 9828 }, { "epoch": 0.95, "grad_norm": 1.1031043529510498, "learning_rate": 2.3414776032577078e-06, "loss": 5.3025, "step": 9832 }, { "epoch": 0.95, "grad_norm": 1.0527067184448242, "learning_rate": 2.3220864843901496e-06, "loss": 5.2945, "step": 9836 }, { "epoch": 0.95, "grad_norm": 1.0818541049957275, "learning_rate": 2.302695365522591e-06, "loss": 5.3044, "step": 9840 }, { "epoch": 0.95, "grad_norm": 1.071460485458374, "learning_rate": 2.2833042466550324e-06, "loss": 5.3087, "step": 9844 }, { "epoch": 0.95, "grad_norm": 1.1150330305099487, "learning_rate": 2.2639131277874733e-06, "loss": 5.2367, "step": 9848 }, { "epoch": 0.96, "grad_norm": 1.1372606754302979, "learning_rate": 2.2445220089199147e-06, "loss": 5.3709, "step": 9852 }, { "epoch": 0.96, "grad_norm": 1.1396896839141846, "learning_rate": 2.225130890052356e-06, "loss": 5.2739, "step": 9856 }, { "epoch": 0.96, "grad_norm": 1.0782867670059204, "learning_rate": 2.2057397711847975e-06, "loss": 5.3314, "step": 9860 }, { "epoch": 0.96, "grad_norm": 1.0799883604049683, "learning_rate": 2.186348652317239e-06, "loss": 5.2398, "step": 9864 }, { "epoch": 0.96, "grad_norm": 1.063509464263916, "learning_rate": 2.1669575334496802e-06, "loss": 5.3209, "step": 9868 }, { "epoch": 0.96, "grad_norm": 1.074567437171936, "learning_rate": 2.1475664145821216e-06, "loss": 5.3116, "step": 9872 }, { "epoch": 0.96, "grad_norm": 1.0963295698165894, "learning_rate": 2.128175295714563e-06, "loss": 5.2356, "step": 9876 }, { "epoch": 0.96, "grad_norm": 1.0509377717971802, "learning_rate": 2.108784176847004e-06, "loss": 5.3888, "step": 9880 }, { "epoch": 0.96, "grad_norm": 1.0329670906066895, "learning_rate": 2.0893930579794453e-06, "loss": 5.2065, "step": 9884 }, { "epoch": 0.96, "grad_norm": 1.0395572185516357, "learning_rate": 2.0700019391118867e-06, "loss": 5.2347, "step": 9888 }, { "epoch": 0.96, "grad_norm": 1.0882209539413452, "learning_rate": 2.050610820244328e-06, "loss": 5.3839, "step": 9892 }, { "epoch": 0.96, "grad_norm": 1.0747255086898804, "learning_rate": 2.0312197013767695e-06, "loss": 5.3768, "step": 9896 }, { "epoch": 0.96, "grad_norm": 1.0426535606384277, "learning_rate": 2.011828582509211e-06, "loss": 5.2553, "step": 9900 }, { "epoch": 0.96, "grad_norm": 0.9872206449508667, "learning_rate": 1.9924374636416523e-06, "loss": 5.246, "step": 9904 }, { "epoch": 0.96, "grad_norm": 1.0624208450317383, "learning_rate": 1.9730463447740936e-06, "loss": 5.2859, "step": 9908 }, { "epoch": 0.96, "grad_norm": 1.0884760618209839, "learning_rate": 1.953655225906535e-06, "loss": 5.3412, "step": 9912 }, { "epoch": 0.96, "grad_norm": 1.0986016988754272, "learning_rate": 1.934264107038976e-06, "loss": 5.2388, "step": 9916 }, { "epoch": 0.96, "grad_norm": 1.0865901708602905, "learning_rate": 1.9148729881714174e-06, "loss": 5.2769, "step": 9920 }, { "epoch": 0.96, "grad_norm": 1.0592422485351562, "learning_rate": 1.8954818693038588e-06, "loss": 5.3446, "step": 9924 }, { "epoch": 0.96, "grad_norm": 1.0826843976974487, "learning_rate": 1.8760907504363004e-06, "loss": 5.254, "step": 9928 }, { "epoch": 0.96, "grad_norm": 1.1321392059326172, "learning_rate": 1.8566996315687417e-06, "loss": 5.266, "step": 9932 }, { "epoch": 0.96, "grad_norm": 1.0519055128097534, "learning_rate": 1.837308512701183e-06, "loss": 5.3199, "step": 9936 }, { "epoch": 0.96, "grad_norm": 1.0540200471878052, "learning_rate": 1.8179173938336243e-06, "loss": 5.3855, "step": 9940 }, { "epoch": 0.96, "grad_norm": 1.0404717922210693, "learning_rate": 1.7985262749660657e-06, "loss": 5.3105, "step": 9944 }, { "epoch": 0.96, "grad_norm": 1.076065182685852, "learning_rate": 1.7791351560985068e-06, "loss": 5.3344, "step": 9948 }, { "epoch": 0.96, "grad_norm": 1.0112926959991455, "learning_rate": 1.7597440372309482e-06, "loss": 5.4095, "step": 9952 }, { "epoch": 0.97, "grad_norm": 1.0115467309951782, "learning_rate": 1.7403529183633898e-06, "loss": 5.2626, "step": 9956 }, { "epoch": 0.97, "grad_norm": 1.0050586462020874, "learning_rate": 1.720961799495831e-06, "loss": 5.346, "step": 9960 }, { "epoch": 0.97, "grad_norm": 1.0998096466064453, "learning_rate": 1.7015706806282724e-06, "loss": 5.2961, "step": 9964 }, { "epoch": 0.97, "grad_norm": 1.0492075681686401, "learning_rate": 1.6821795617607138e-06, "loss": 5.3103, "step": 9968 }, { "epoch": 0.97, "grad_norm": 1.1068048477172852, "learning_rate": 1.662788442893155e-06, "loss": 5.3157, "step": 9972 }, { "epoch": 0.97, "grad_norm": 1.0755717754364014, "learning_rate": 1.6433973240255963e-06, "loss": 5.3995, "step": 9976 }, { "epoch": 0.97, "grad_norm": 1.0472952127456665, "learning_rate": 1.6240062051580375e-06, "loss": 5.2634, "step": 9980 }, { "epoch": 0.97, "grad_norm": 1.0923290252685547, "learning_rate": 1.6046150862904789e-06, "loss": 5.267, "step": 9984 }, { "epoch": 0.97, "grad_norm": 1.0469489097595215, "learning_rate": 1.5852239674229205e-06, "loss": 5.2445, "step": 9988 }, { "epoch": 0.97, "grad_norm": 1.055059552192688, "learning_rate": 1.5658328485553619e-06, "loss": 5.2376, "step": 9992 }, { "epoch": 0.97, "grad_norm": 0.9697688817977905, "learning_rate": 1.546441729687803e-06, "loss": 5.2468, "step": 9996 }, { "epoch": 0.97, "grad_norm": 1.0974935293197632, "learning_rate": 1.5270506108202444e-06, "loss": 5.3911, "step": 10000 }, { "epoch": 0.97, "grad_norm": 1.0364536046981812, "learning_rate": 1.5076594919526856e-06, "loss": 5.171, "step": 10004 }, { "epoch": 0.97, "grad_norm": 1.0847764015197754, "learning_rate": 1.4882683730851272e-06, "loss": 5.3467, "step": 10008 }, { "epoch": 0.97, "grad_norm": 1.0459425449371338, "learning_rate": 1.4688772542175684e-06, "loss": 5.2316, "step": 10012 }, { "epoch": 0.97, "grad_norm": 1.084526777267456, "learning_rate": 1.4494861353500097e-06, "loss": 5.279, "step": 10016 }, { "epoch": 0.97, "grad_norm": 1.141874074935913, "learning_rate": 1.4300950164824511e-06, "loss": 5.1842, "step": 10020 }, { "epoch": 0.97, "grad_norm": 1.1359285116195679, "learning_rate": 1.4107038976148925e-06, "loss": 5.2476, "step": 10024 }, { "epoch": 0.97, "grad_norm": 1.1360441446304321, "learning_rate": 1.3913127787473339e-06, "loss": 5.4255, "step": 10028 }, { "epoch": 0.97, "grad_norm": 1.0821614265441895, "learning_rate": 1.371921659879775e-06, "loss": 5.2677, "step": 10032 }, { "epoch": 0.97, "grad_norm": 1.108450174331665, "learning_rate": 1.3525305410122164e-06, "loss": 5.3634, "step": 10036 }, { "epoch": 0.97, "grad_norm": 1.1564322710037231, "learning_rate": 1.3331394221446578e-06, "loss": 5.3358, "step": 10040 }, { "epoch": 0.97, "grad_norm": 1.0499944686889648, "learning_rate": 1.3137483032770992e-06, "loss": 5.2706, "step": 10044 }, { "epoch": 0.97, "grad_norm": 1.115499496459961, "learning_rate": 1.2943571844095404e-06, "loss": 5.2685, "step": 10048 }, { "epoch": 0.97, "grad_norm": 1.0848426818847656, "learning_rate": 1.274966065541982e-06, "loss": 5.1596, "step": 10052 }, { "epoch": 0.97, "grad_norm": 1.014477014541626, "learning_rate": 1.2555749466744232e-06, "loss": 5.3194, "step": 10056 }, { "epoch": 0.98, "grad_norm": 1.0822529792785645, "learning_rate": 1.2361838278068645e-06, "loss": 5.2521, "step": 10060 }, { "epoch": 0.98, "grad_norm": 1.097193717956543, "learning_rate": 1.2167927089393057e-06, "loss": 5.2256, "step": 10064 }, { "epoch": 0.98, "grad_norm": 1.0768396854400635, "learning_rate": 1.1974015900717473e-06, "loss": 5.3245, "step": 10068 }, { "epoch": 0.98, "grad_norm": 1.1191695928573608, "learning_rate": 1.1780104712041885e-06, "loss": 5.3045, "step": 10072 }, { "epoch": 0.98, "grad_norm": 1.0832325220108032, "learning_rate": 1.1586193523366299e-06, "loss": 5.3106, "step": 10076 }, { "epoch": 0.98, "grad_norm": 1.083602786064148, "learning_rate": 1.1392282334690712e-06, "loss": 5.3153, "step": 10080 }, { "epoch": 0.98, "grad_norm": 1.1011203527450562, "learning_rate": 1.1198371146015126e-06, "loss": 5.3236, "step": 10084 }, { "epoch": 0.98, "grad_norm": 1.1766139268875122, "learning_rate": 1.100445995733954e-06, "loss": 5.2973, "step": 10088 }, { "epoch": 0.98, "grad_norm": 1.0857359170913696, "learning_rate": 1.0810548768663952e-06, "loss": 5.2986, "step": 10092 }, { "epoch": 0.98, "grad_norm": 1.1640616655349731, "learning_rate": 1.0616637579988366e-06, "loss": 5.3083, "step": 10096 }, { "epoch": 0.98, "grad_norm": 1.1388750076293945, "learning_rate": 1.042272639131278e-06, "loss": 5.3472, "step": 10100 }, { "epoch": 0.98, "grad_norm": 1.1172057390213013, "learning_rate": 1.0228815202637193e-06, "loss": 5.2601, "step": 10104 }, { "epoch": 0.98, "grad_norm": 1.078047275543213, "learning_rate": 1.0034904013961605e-06, "loss": 5.2567, "step": 10108 }, { "epoch": 0.98, "grad_norm": 1.134878158569336, "learning_rate": 9.840992825286019e-07, "loss": 5.2999, "step": 10112 }, { "epoch": 0.98, "grad_norm": 1.049080729484558, "learning_rate": 9.647081636610433e-07, "loss": 5.2823, "step": 10116 }, { "epoch": 0.98, "grad_norm": 1.0992294549942017, "learning_rate": 9.453170447934847e-07, "loss": 5.264, "step": 10120 }, { "epoch": 0.98, "grad_norm": 0.9882086515426636, "learning_rate": 9.259259259259259e-07, "loss": 5.2944, "step": 10124 }, { "epoch": 0.98, "grad_norm": 1.0819331407546997, "learning_rate": 9.065348070583674e-07, "loss": 5.3227, "step": 10128 }, { "epoch": 0.98, "grad_norm": 1.0847752094268799, "learning_rate": 8.871436881908087e-07, "loss": 5.375, "step": 10132 }, { "epoch": 0.98, "grad_norm": 1.0530517101287842, "learning_rate": 8.6775256932325e-07, "loss": 5.2639, "step": 10136 }, { "epoch": 0.98, "grad_norm": 1.010219931602478, "learning_rate": 8.483614504556913e-07, "loss": 5.2476, "step": 10140 }, { "epoch": 0.98, "grad_norm": 0.9741299748420715, "learning_rate": 8.289703315881327e-07, "loss": 5.3081, "step": 10144 }, { "epoch": 0.98, "grad_norm": 1.1004730463027954, "learning_rate": 8.09579212720574e-07, "loss": 5.2126, "step": 10148 }, { "epoch": 0.98, "grad_norm": 1.1317744255065918, "learning_rate": 7.901880938530153e-07, "loss": 5.2943, "step": 10152 }, { "epoch": 0.98, "grad_norm": 1.1052905321121216, "learning_rate": 7.707969749854567e-07, "loss": 5.4339, "step": 10156 }, { "epoch": 0.99, "grad_norm": 1.0474963188171387, "learning_rate": 7.51405856117898e-07, "loss": 5.3417, "step": 10160 }, { "epoch": 0.99, "grad_norm": 1.0810647010803223, "learning_rate": 7.320147372503393e-07, "loss": 5.4219, "step": 10164 }, { "epoch": 0.99, "grad_norm": 1.0385664701461792, "learning_rate": 7.126236183827807e-07, "loss": 5.3207, "step": 10168 }, { "epoch": 0.99, "grad_norm": 1.1225430965423584, "learning_rate": 6.93232499515222e-07, "loss": 5.3818, "step": 10172 }, { "epoch": 0.99, "grad_norm": 1.021005630493164, "learning_rate": 6.738413806476634e-07, "loss": 5.3095, "step": 10176 }, { "epoch": 0.99, "grad_norm": 1.0524823665618896, "learning_rate": 6.544502617801048e-07, "loss": 5.2837, "step": 10180 }, { "epoch": 0.99, "grad_norm": 1.0468004941940308, "learning_rate": 6.350591429125462e-07, "loss": 5.2493, "step": 10184 }, { "epoch": 0.99, "grad_norm": 1.0307637453079224, "learning_rate": 6.156680240449874e-07, "loss": 5.2559, "step": 10188 }, { "epoch": 0.99, "grad_norm": 1.0332852602005005, "learning_rate": 5.962769051774288e-07, "loss": 5.4378, "step": 10192 }, { "epoch": 0.99, "grad_norm": 1.0646268129348755, "learning_rate": 5.768857863098701e-07, "loss": 5.2816, "step": 10196 }, { "epoch": 0.99, "grad_norm": 1.0905969142913818, "learning_rate": 5.574946674423115e-07, "loss": 5.363, "step": 10200 }, { "epoch": 0.99, "grad_norm": 1.0439486503601074, "learning_rate": 5.381035485747528e-07, "loss": 5.3199, "step": 10204 }, { "epoch": 0.99, "grad_norm": 1.0738022327423096, "learning_rate": 5.187124297071941e-07, "loss": 5.251, "step": 10208 }, { "epoch": 0.99, "grad_norm": 1.030446171760559, "learning_rate": 4.993213108396354e-07, "loss": 5.3156, "step": 10212 }, { "epoch": 0.99, "grad_norm": 1.042098045349121, "learning_rate": 4.799301919720768e-07, "loss": 5.2401, "step": 10216 }, { "epoch": 0.99, "grad_norm": 0.9965024590492249, "learning_rate": 4.6053907310451814e-07, "loss": 5.2561, "step": 10220 }, { "epoch": 0.99, "grad_norm": 1.0656026601791382, "learning_rate": 4.411479542369595e-07, "loss": 5.3587, "step": 10224 }, { "epoch": 0.99, "grad_norm": 1.0533535480499268, "learning_rate": 4.217568353694008e-07, "loss": 5.3381, "step": 10228 }, { "epoch": 0.99, "grad_norm": 1.0184639692306519, "learning_rate": 4.023657165018422e-07, "loss": 5.3548, "step": 10232 }, { "epoch": 0.99, "grad_norm": 1.0845178365707397, "learning_rate": 3.829745976342835e-07, "loss": 5.3597, "step": 10236 }, { "epoch": 0.99, "grad_norm": 1.068476915359497, "learning_rate": 3.6358347876672485e-07, "loss": 5.2314, "step": 10240 }, { "epoch": 0.99, "grad_norm": 1.1127116680145264, "learning_rate": 3.441923598991662e-07, "loss": 5.2483, "step": 10244 }, { "epoch": 0.99, "grad_norm": 0.9709210395812988, "learning_rate": 3.248012410316075e-07, "loss": 5.2665, "step": 10248 }, { "epoch": 0.99, "grad_norm": 1.0695501565933228, "learning_rate": 3.054101221640489e-07, "loss": 5.2079, "step": 10252 }, { "epoch": 0.99, "grad_norm": 1.1100637912750244, "learning_rate": 2.860190032964902e-07, "loss": 5.3078, "step": 10256 }, { "epoch": 0.99, "grad_norm": 1.0800820589065552, "learning_rate": 2.6662788442893156e-07, "loss": 5.4309, "step": 10260 }, { "epoch": 1.0, "grad_norm": 1.0710211992263794, "learning_rate": 2.472367655613729e-07, "loss": 5.3063, "step": 10264 }, { "epoch": 1.0, "grad_norm": 1.0967445373535156, "learning_rate": 2.2784564669381422e-07, "loss": 5.2245, "step": 10268 }, { "epoch": 1.0, "grad_norm": 1.05315101146698, "learning_rate": 2.084545278262556e-07, "loss": 5.3429, "step": 10272 }, { "epoch": 1.0, "grad_norm": 0.97821044921875, "learning_rate": 1.8906340895869693e-07, "loss": 5.2508, "step": 10276 }, { "epoch": 1.0, "grad_norm": 1.0844597816467285, "learning_rate": 1.6967229009113826e-07, "loss": 5.2811, "step": 10280 }, { "epoch": 1.0, "grad_norm": 1.0489965677261353, "learning_rate": 1.5028117122357962e-07, "loss": 5.3193, "step": 10284 }, { "epoch": 1.0, "grad_norm": 1.0502104759216309, "learning_rate": 1.3089005235602095e-07, "loss": 5.3323, "step": 10288 }, { "epoch": 1.0, "grad_norm": 1.1087231636047363, "learning_rate": 1.114989334884623e-07, "loss": 5.2669, "step": 10292 }, { "epoch": 1.0, "grad_norm": 1.0919489860534668, "learning_rate": 9.210781462090364e-08, "loss": 5.2765, "step": 10296 }, { "epoch": 1.0, "grad_norm": 1.017907738685608, "learning_rate": 7.271669575334497e-08, "loss": 5.3242, "step": 10300 }, { "epoch": 1.0, "grad_norm": 1.1086193323135376, "learning_rate": 5.332557688578631e-08, "loss": 5.2659, "step": 10304 }, { "epoch": 1.0, "grad_norm": 1.0139724016189575, "learning_rate": 3.393445801822765e-08, "loss": 5.3324, "step": 10308 }, { "epoch": 1.0, "grad_norm": 0.9419534802436829, "learning_rate": 1.4543339150668994e-08, "loss": 5.3066, "step": 10312 }, { "epoch": 1.0, "step": 10314, "total_flos": 8.68702227309527e+16, "train_loss": 5.446075817446203, "train_runtime": 2459.6002, "train_samples_per_second": 67.094, "train_steps_per_second": 4.193 } ], "logging_steps": 4, "max_steps": 10314, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1032, "total_flos": 8.68702227309527e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }