{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 34046, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0002937202608235916, "grad_norm": 355849.8125, "learning_rate": 9.997062797391764e-06, "loss": 0.3584, "step": 10 }, { "epoch": 0.0005874405216471832, "grad_norm": 365909.71875, "learning_rate": 9.994125594783528e-06, "loss": 0.3596, "step": 20 }, { "epoch": 0.0008811607824707749, "grad_norm": 446849.40625, "learning_rate": 9.991188392175293e-06, "loss": 0.3529, "step": 30 }, { "epoch": 0.0011748810432943664, "grad_norm": 251224.796875, "learning_rate": 9.988251189567058e-06, "loss": 0.3389, "step": 40 }, { "epoch": 0.001468601304117958, "grad_norm": 373474.09375, "learning_rate": 9.98531398695882e-06, "loss": 0.3534, "step": 50 }, { "epoch": 0.0017623215649415498, "grad_norm": 360658.9375, "learning_rate": 9.982376784350585e-06, "loss": 0.3253, "step": 60 }, { "epoch": 0.0020560418257651414, "grad_norm": 170724.4375, "learning_rate": 9.97943958174235e-06, "loss": 0.3325, "step": 70 }, { "epoch": 0.0023497620865887327, "grad_norm": 322108.1875, "learning_rate": 9.976502379134113e-06, "loss": 0.2944, "step": 80 }, { "epoch": 0.0026434823474123246, "grad_norm": 158976.4375, "learning_rate": 9.973565176525877e-06, "loss": 0.3158, "step": 90 }, { "epoch": 0.002937202608235916, "grad_norm": 353070.03125, "learning_rate": 9.970627973917642e-06, "loss": 0.3046, "step": 100 }, { "epoch": 0.0032309228690595077, "grad_norm": 354673.53125, "learning_rate": 9.967690771309407e-06, "loss": 0.3081, "step": 110 }, { "epoch": 0.0035246431298830995, "grad_norm": 320690.53125, "learning_rate": 9.96475356870117e-06, "loss": 0.3258, "step": 120 }, { "epoch": 0.003818363390706691, "grad_norm": 281013.5, "learning_rate": 9.961816366092934e-06, "loss": 0.3402, "step": 130 }, { "epoch": 0.004112083651530283, "grad_norm": 239469.484375, "learning_rate": 9.958879163484699e-06, "loss": 0.3001, "step": 140 }, { "epoch": 0.0044058039123538745, "grad_norm": 352191.71875, "learning_rate": 9.955941960876462e-06, "loss": 0.3099, "step": 150 }, { "epoch": 0.0046995241731774655, "grad_norm": 226430.03125, "learning_rate": 9.953004758268226e-06, "loss": 0.3165, "step": 160 }, { "epoch": 0.004993244434001057, "grad_norm": 491248.71875, "learning_rate": 9.95006755565999e-06, "loss": 0.3162, "step": 170 }, { "epoch": 0.005286964694824649, "grad_norm": 437459.3125, "learning_rate": 9.947130353051754e-06, "loss": 0.2974, "step": 180 }, { "epoch": 0.005580684955648241, "grad_norm": 279224.875, "learning_rate": 9.944193150443519e-06, "loss": 0.33, "step": 190 }, { "epoch": 0.005874405216471832, "grad_norm": 286324.46875, "learning_rate": 9.941255947835282e-06, "loss": 0.3129, "step": 200 }, { "epoch": 0.006168125477295424, "grad_norm": 544223.25, "learning_rate": 9.938318745227046e-06, "loss": 0.29, "step": 210 }, { "epoch": 0.0064618457381190155, "grad_norm": 353019.59375, "learning_rate": 9.935381542618811e-06, "loss": 0.3103, "step": 220 }, { "epoch": 0.006755565998942607, "grad_norm": 259659.359375, "learning_rate": 9.932444340010576e-06, "loss": 0.3108, "step": 230 }, { "epoch": 0.007049286259766199, "grad_norm": 264642.90625, "learning_rate": 9.929507137402338e-06, "loss": 0.2899, "step": 240 }, { "epoch": 0.00734300652058979, "grad_norm": 289597.1875, "learning_rate": 9.926569934794103e-06, "loss": 0.2848, "step": 250 }, { "epoch": 0.007636726781413382, "grad_norm": 366726.625, "learning_rate": 9.923632732185868e-06, "loss": 0.2767, "step": 260 }, { "epoch": 0.007930447042236973, "grad_norm": 642068.5, "learning_rate": 9.92069552957763e-06, "loss": 0.3161, "step": 270 }, { "epoch": 0.008224167303060565, "grad_norm": 285129.625, "learning_rate": 9.917758326969395e-06, "loss": 0.2775, "step": 280 }, { "epoch": 0.008517887563884156, "grad_norm": 449742.6875, "learning_rate": 9.91482112436116e-06, "loss": 0.2924, "step": 290 }, { "epoch": 0.008811607824707749, "grad_norm": 278153.25, "learning_rate": 9.911883921752923e-06, "loss": 0.2814, "step": 300 }, { "epoch": 0.00910532808553134, "grad_norm": 485545.5, "learning_rate": 9.908946719144687e-06, "loss": 0.3056, "step": 310 }, { "epoch": 0.009399048346354931, "grad_norm": 363030.15625, "learning_rate": 9.90600951653645e-06, "loss": 0.3159, "step": 320 }, { "epoch": 0.009692768607178524, "grad_norm": 292721.4375, "learning_rate": 9.903072313928215e-06, "loss": 0.2924, "step": 330 }, { "epoch": 0.009986488868002115, "grad_norm": 765384.0625, "learning_rate": 9.90013511131998e-06, "loss": 0.2896, "step": 340 }, { "epoch": 0.010280209128825706, "grad_norm": 204394.3125, "learning_rate": 9.897197908711744e-06, "loss": 0.2974, "step": 350 }, { "epoch": 0.010573929389649298, "grad_norm": 319567.40625, "learning_rate": 9.894260706103507e-06, "loss": 0.2971, "step": 360 }, { "epoch": 0.01086764965047289, "grad_norm": 268493.59375, "learning_rate": 9.891323503495272e-06, "loss": 0.2891, "step": 370 }, { "epoch": 0.011161369911296482, "grad_norm": 218346.9375, "learning_rate": 9.888386300887037e-06, "loss": 0.3158, "step": 380 }, { "epoch": 0.011455090172120073, "grad_norm": 487387.90625, "learning_rate": 9.8854490982788e-06, "loss": 0.3073, "step": 390 }, { "epoch": 0.011748810432943664, "grad_norm": 485608.65625, "learning_rate": 9.882511895670564e-06, "loss": 0.2929, "step": 400 }, { "epoch": 0.012042530693767256, "grad_norm": 328877.0, "learning_rate": 9.879574693062329e-06, "loss": 0.3224, "step": 410 }, { "epoch": 0.012336250954590847, "grad_norm": 386102.59375, "learning_rate": 9.876637490454092e-06, "loss": 0.2892, "step": 420 }, { "epoch": 0.01262997121541444, "grad_norm": 398359.625, "learning_rate": 9.873700287845856e-06, "loss": 0.2894, "step": 430 }, { "epoch": 0.012923691476238031, "grad_norm": 273238.59375, "learning_rate": 9.870763085237621e-06, "loss": 0.282, "step": 440 }, { "epoch": 0.013217411737061622, "grad_norm": 296892.875, "learning_rate": 9.867825882629386e-06, "loss": 0.3, "step": 450 }, { "epoch": 0.013511131997885215, "grad_norm": 230401.734375, "learning_rate": 9.864888680021149e-06, "loss": 0.2853, "step": 460 }, { "epoch": 0.013804852258708806, "grad_norm": 290968.9375, "learning_rate": 9.861951477412913e-06, "loss": 0.287, "step": 470 }, { "epoch": 0.014098572519532398, "grad_norm": 298637.78125, "learning_rate": 9.859014274804676e-06, "loss": 0.271, "step": 480 }, { "epoch": 0.01439229278035599, "grad_norm": 289124.15625, "learning_rate": 9.85607707219644e-06, "loss": 0.3159, "step": 490 }, { "epoch": 0.01468601304117958, "grad_norm": 313876.8125, "learning_rate": 9.853139869588205e-06, "loss": 0.306, "step": 500 }, { "epoch": 0.014979733302003173, "grad_norm": 234550.359375, "learning_rate": 9.850202666979968e-06, "loss": 0.273, "step": 510 }, { "epoch": 0.015273453562826764, "grad_norm": 322167.9375, "learning_rate": 9.847265464371733e-06, "loss": 0.2761, "step": 520 }, { "epoch": 0.015567173823650355, "grad_norm": 255505.34375, "learning_rate": 9.844328261763498e-06, "loss": 0.2837, "step": 530 }, { "epoch": 0.015860894084473946, "grad_norm": 307745.90625, "learning_rate": 9.84139105915526e-06, "loss": 0.281, "step": 540 }, { "epoch": 0.016154614345297538, "grad_norm": 285434.78125, "learning_rate": 9.838453856547025e-06, "loss": 0.2751, "step": 550 }, { "epoch": 0.01644833460612113, "grad_norm": 246541.734375, "learning_rate": 9.83551665393879e-06, "loss": 0.2944, "step": 560 }, { "epoch": 0.01674205486694472, "grad_norm": 159543.09375, "learning_rate": 9.832579451330554e-06, "loss": 0.2711, "step": 570 }, { "epoch": 0.017035775127768313, "grad_norm": 300660.5, "learning_rate": 9.829642248722317e-06, "loss": 0.275, "step": 580 }, { "epoch": 0.017329495388591905, "grad_norm": 236116.09375, "learning_rate": 9.826705046114082e-06, "loss": 0.2673, "step": 590 }, { "epoch": 0.017623215649415498, "grad_norm": 490842.09375, "learning_rate": 9.823767843505847e-06, "loss": 0.2606, "step": 600 }, { "epoch": 0.017916935910239087, "grad_norm": 493335.46875, "learning_rate": 9.82083064089761e-06, "loss": 0.2822, "step": 610 }, { "epoch": 0.01821065617106268, "grad_norm": 223498.265625, "learning_rate": 9.817893438289374e-06, "loss": 0.2739, "step": 620 }, { "epoch": 0.018504376431886273, "grad_norm": 383225.0625, "learning_rate": 9.814956235681137e-06, "loss": 0.2742, "step": 630 }, { "epoch": 0.018798096692709862, "grad_norm": 315138.21875, "learning_rate": 9.812019033072903e-06, "loss": 0.2648, "step": 640 }, { "epoch": 0.019091816953533455, "grad_norm": 295599.40625, "learning_rate": 9.809081830464666e-06, "loss": 0.2999, "step": 650 }, { "epoch": 0.019385537214357047, "grad_norm": 327291.9375, "learning_rate": 9.80614462785643e-06, "loss": 0.2708, "step": 660 }, { "epoch": 0.019679257475180637, "grad_norm": 233865.6875, "learning_rate": 9.803207425248194e-06, "loss": 0.2807, "step": 670 }, { "epoch": 0.01997297773600423, "grad_norm": 386481.0625, "learning_rate": 9.800270222639959e-06, "loss": 0.2488, "step": 680 }, { "epoch": 0.020266697996827822, "grad_norm": 204364.640625, "learning_rate": 9.797333020031723e-06, "loss": 0.2625, "step": 690 }, { "epoch": 0.02056041825765141, "grad_norm": 285439.09375, "learning_rate": 9.794395817423486e-06, "loss": 0.2765, "step": 700 }, { "epoch": 0.020854138518475004, "grad_norm": 218414.734375, "learning_rate": 9.79145861481525e-06, "loss": 0.2967, "step": 710 }, { "epoch": 0.021147858779298596, "grad_norm": 382493.40625, "learning_rate": 9.788521412207015e-06, "loss": 0.2813, "step": 720 }, { "epoch": 0.02144157904012219, "grad_norm": 284462.53125, "learning_rate": 9.785584209598778e-06, "loss": 0.2798, "step": 730 }, { "epoch": 0.02173529930094578, "grad_norm": 323670.75, "learning_rate": 9.782647006990543e-06, "loss": 0.2542, "step": 740 }, { "epoch": 0.02202901956176937, "grad_norm": 327430.40625, "learning_rate": 9.779709804382308e-06, "loss": 0.2346, "step": 750 }, { "epoch": 0.022322739822592964, "grad_norm": 391952.625, "learning_rate": 9.776772601774072e-06, "loss": 0.2614, "step": 760 }, { "epoch": 0.022616460083416553, "grad_norm": 287671.125, "learning_rate": 9.773835399165835e-06, "loss": 0.2594, "step": 770 }, { "epoch": 0.022910180344240146, "grad_norm": 339487.625, "learning_rate": 9.770898196557598e-06, "loss": 0.2459, "step": 780 }, { "epoch": 0.023203900605063738, "grad_norm": 460535.125, "learning_rate": 9.767960993949363e-06, "loss": 0.2661, "step": 790 }, { "epoch": 0.023497620865887327, "grad_norm": 266721.375, "learning_rate": 9.765023791341127e-06, "loss": 0.274, "step": 800 }, { "epoch": 0.02379134112671092, "grad_norm": 274317.15625, "learning_rate": 9.762086588732892e-06, "loss": 0.2853, "step": 810 }, { "epoch": 0.024085061387534513, "grad_norm": 334818.6875, "learning_rate": 9.759149386124655e-06, "loss": 0.248, "step": 820 }, { "epoch": 0.024378781648358105, "grad_norm": 423806.46875, "learning_rate": 9.75621218351642e-06, "loss": 0.284, "step": 830 }, { "epoch": 0.024672501909181695, "grad_norm": 372714.0, "learning_rate": 9.753274980908184e-06, "loss": 0.2458, "step": 840 }, { "epoch": 0.024966222170005287, "grad_norm": 267876.3125, "learning_rate": 9.750337778299947e-06, "loss": 0.2578, "step": 850 }, { "epoch": 0.02525994243082888, "grad_norm": 206855.90625, "learning_rate": 9.747400575691712e-06, "loss": 0.2577, "step": 860 }, { "epoch": 0.02555366269165247, "grad_norm": 412404.9375, "learning_rate": 9.744463373083476e-06, "loss": 0.2505, "step": 870 }, { "epoch": 0.025847382952476062, "grad_norm": 329650.1875, "learning_rate": 9.741526170475241e-06, "loss": 0.2414, "step": 880 }, { "epoch": 0.026141103213299655, "grad_norm": 478736.5625, "learning_rate": 9.738588967867004e-06, "loss": 0.2645, "step": 890 }, { "epoch": 0.026434823474123244, "grad_norm": 356531.84375, "learning_rate": 9.735651765258769e-06, "loss": 0.2632, "step": 900 }, { "epoch": 0.026728543734946836, "grad_norm": 208389.875, "learning_rate": 9.732714562650533e-06, "loss": 0.2517, "step": 910 }, { "epoch": 0.02702226399577043, "grad_norm": 304709.84375, "learning_rate": 9.729777360042296e-06, "loss": 0.2363, "step": 920 }, { "epoch": 0.02731598425659402, "grad_norm": 337972.875, "learning_rate": 9.72684015743406e-06, "loss": 0.2588, "step": 930 }, { "epoch": 0.02760970451741761, "grad_norm": 380935.0, "learning_rate": 9.723902954825824e-06, "loss": 0.2599, "step": 940 }, { "epoch": 0.027903424778241204, "grad_norm": 436125.46875, "learning_rate": 9.72096575221759e-06, "loss": 0.2451, "step": 950 }, { "epoch": 0.028197145039064796, "grad_norm": 354980.9375, "learning_rate": 9.718028549609353e-06, "loss": 0.2425, "step": 960 }, { "epoch": 0.028490865299888386, "grad_norm": 381050.125, "learning_rate": 9.715091347001116e-06, "loss": 0.2631, "step": 970 }, { "epoch": 0.02878458556071198, "grad_norm": 381020.28125, "learning_rate": 9.71215414439288e-06, "loss": 0.2557, "step": 980 }, { "epoch": 0.02907830582153557, "grad_norm": 287476.9375, "learning_rate": 9.709216941784645e-06, "loss": 0.2488, "step": 990 }, { "epoch": 0.02937202608235916, "grad_norm": 358732.84375, "learning_rate": 9.70627973917641e-06, "loss": 0.2614, "step": 1000 }, { "epoch": 0.029665746343182753, "grad_norm": 290500.84375, "learning_rate": 9.703342536568173e-06, "loss": 0.249, "step": 1010 }, { "epoch": 0.029959466604006346, "grad_norm": 429046.25, "learning_rate": 9.700405333959937e-06, "loss": 0.2426, "step": 1020 }, { "epoch": 0.030253186864829935, "grad_norm": 183410.671875, "learning_rate": 9.697468131351702e-06, "loss": 0.2469, "step": 1030 }, { "epoch": 0.030546907125653527, "grad_norm": 242722.984375, "learning_rate": 9.694530928743465e-06, "loss": 0.2284, "step": 1040 }, { "epoch": 0.03084062738647712, "grad_norm": 421513.28125, "learning_rate": 9.69159372613523e-06, "loss": 0.2333, "step": 1050 }, { "epoch": 0.03113434764730071, "grad_norm": 458299.375, "learning_rate": 9.688656523526994e-06, "loss": 0.23, "step": 1060 }, { "epoch": 0.0314280679081243, "grad_norm": 301185.78125, "learning_rate": 9.685719320918757e-06, "loss": 0.2469, "step": 1070 }, { "epoch": 0.03172178816894789, "grad_norm": 251586.453125, "learning_rate": 9.682782118310522e-06, "loss": 0.2452, "step": 1080 }, { "epoch": 0.03201550842977149, "grad_norm": 294742.46875, "learning_rate": 9.679844915702285e-06, "loss": 0.2292, "step": 1090 }, { "epoch": 0.032309228690595077, "grad_norm": 395922.5625, "learning_rate": 9.676907713094051e-06, "loss": 0.2346, "step": 1100 }, { "epoch": 0.032602948951418666, "grad_norm": 226168.84375, "learning_rate": 9.673970510485814e-06, "loss": 0.2307, "step": 1110 }, { "epoch": 0.03289666921224226, "grad_norm": 427780.71875, "learning_rate": 9.671033307877579e-06, "loss": 0.2585, "step": 1120 }, { "epoch": 0.03319038947306585, "grad_norm": 388563.125, "learning_rate": 9.668096105269342e-06, "loss": 0.2437, "step": 1130 }, { "epoch": 0.03348410973388944, "grad_norm": 390856.75, "learning_rate": 9.665158902661106e-06, "loss": 0.2514, "step": 1140 }, { "epoch": 0.033777829994713036, "grad_norm": 411799.28125, "learning_rate": 9.662221700052871e-06, "loss": 0.2613, "step": 1150 }, { "epoch": 0.034071550255536626, "grad_norm": 454191.78125, "learning_rate": 9.659284497444634e-06, "loss": 0.2478, "step": 1160 }, { "epoch": 0.03436527051636022, "grad_norm": 227370.71875, "learning_rate": 9.656347294836398e-06, "loss": 0.2421, "step": 1170 }, { "epoch": 0.03465899077718381, "grad_norm": 758629.25, "learning_rate": 9.653410092228163e-06, "loss": 0.2382, "step": 1180 }, { "epoch": 0.0349527110380074, "grad_norm": 279940.8125, "learning_rate": 9.650472889619926e-06, "loss": 0.2437, "step": 1190 }, { "epoch": 0.035246431298830996, "grad_norm": 382634.9375, "learning_rate": 9.64753568701169e-06, "loss": 0.2225, "step": 1200 }, { "epoch": 0.035540151559654586, "grad_norm": 288628.53125, "learning_rate": 9.644598484403455e-06, "loss": 0.2401, "step": 1210 }, { "epoch": 0.035833871820478175, "grad_norm": 469821.9375, "learning_rate": 9.64166128179522e-06, "loss": 0.2259, "step": 1220 }, { "epoch": 0.03612759208130177, "grad_norm": 446818.5, "learning_rate": 9.638724079186983e-06, "loss": 0.2217, "step": 1230 }, { "epoch": 0.03642131234212536, "grad_norm": 316462.8125, "learning_rate": 9.635786876578748e-06, "loss": 0.2352, "step": 1240 }, { "epoch": 0.03671503260294895, "grad_norm": 323125.78125, "learning_rate": 9.632849673970512e-06, "loss": 0.2426, "step": 1250 }, { "epoch": 0.037008752863772545, "grad_norm": 535299.0625, "learning_rate": 9.629912471362275e-06, "loss": 0.237, "step": 1260 }, { "epoch": 0.037302473124596135, "grad_norm": 262606.59375, "learning_rate": 9.62697526875404e-06, "loss": 0.2334, "step": 1270 }, { "epoch": 0.037596193385419724, "grad_norm": 512297.09375, "learning_rate": 9.624038066145803e-06, "loss": 0.2248, "step": 1280 }, { "epoch": 0.03788991364624332, "grad_norm": 443072.15625, "learning_rate": 9.621100863537567e-06, "loss": 0.2086, "step": 1290 }, { "epoch": 0.03818363390706691, "grad_norm": 291394.0625, "learning_rate": 9.618163660929332e-06, "loss": 0.2107, "step": 1300 }, { "epoch": 0.0384773541678905, "grad_norm": 386752.8125, "learning_rate": 9.615226458321095e-06, "loss": 0.2334, "step": 1310 }, { "epoch": 0.038771074428714095, "grad_norm": 371505.3125, "learning_rate": 9.61228925571286e-06, "loss": 0.2208, "step": 1320 }, { "epoch": 0.039064794689537684, "grad_norm": 280528.5625, "learning_rate": 9.609352053104624e-06, "loss": 0.2163, "step": 1330 }, { "epoch": 0.03935851495036127, "grad_norm": 575539.625, "learning_rate": 9.606414850496389e-06, "loss": 0.2349, "step": 1340 }, { "epoch": 0.03965223521118487, "grad_norm": 748801.4375, "learning_rate": 9.603477647888152e-06, "loss": 0.2185, "step": 1350 }, { "epoch": 0.03994595547200846, "grad_norm": 349976.96875, "learning_rate": 9.600540445279916e-06, "loss": 0.2139, "step": 1360 }, { "epoch": 0.04023967573283205, "grad_norm": 519744.09375, "learning_rate": 9.597603242671681e-06, "loss": 0.2391, "step": 1370 }, { "epoch": 0.040533395993655644, "grad_norm": 600618.875, "learning_rate": 9.594666040063444e-06, "loss": 0.2459, "step": 1380 }, { "epoch": 0.04082711625447923, "grad_norm": 280526.90625, "learning_rate": 9.591728837455209e-06, "loss": 0.2469, "step": 1390 }, { "epoch": 0.04112083651530282, "grad_norm": 427909.96875, "learning_rate": 9.588791634846971e-06, "loss": 0.2216, "step": 1400 }, { "epoch": 0.04141455677612642, "grad_norm": 288491.34375, "learning_rate": 9.585854432238738e-06, "loss": 0.2276, "step": 1410 }, { "epoch": 0.04170827703695001, "grad_norm": 347637.78125, "learning_rate": 9.5829172296305e-06, "loss": 0.2379, "step": 1420 }, { "epoch": 0.042001997297773604, "grad_norm": 358120.75, "learning_rate": 9.579980027022264e-06, "loss": 0.2301, "step": 1430 }, { "epoch": 0.04229571755859719, "grad_norm": 300268.78125, "learning_rate": 9.577042824414028e-06, "loss": 0.2123, "step": 1440 }, { "epoch": 0.04258943781942078, "grad_norm": 348844.3125, "learning_rate": 9.574105621805793e-06, "loss": 0.2308, "step": 1450 }, { "epoch": 0.04288315808024438, "grad_norm": 291371.0625, "learning_rate": 9.571168419197558e-06, "loss": 0.2125, "step": 1460 }, { "epoch": 0.04317687834106797, "grad_norm": 329182.65625, "learning_rate": 9.56823121658932e-06, "loss": 0.2141, "step": 1470 }, { "epoch": 0.04347059860189156, "grad_norm": 313359.875, "learning_rate": 9.565294013981085e-06, "loss": 0.2271, "step": 1480 }, { "epoch": 0.04376431886271515, "grad_norm": 268659.09375, "learning_rate": 9.56235681137285e-06, "loss": 0.2107, "step": 1490 }, { "epoch": 0.04405803912353874, "grad_norm": 309288.125, "learning_rate": 9.559419608764613e-06, "loss": 0.2022, "step": 1500 }, { "epoch": 0.04435175938436233, "grad_norm": 328788.0625, "learning_rate": 9.556482406156377e-06, "loss": 0.226, "step": 1510 }, { "epoch": 0.04464547964518593, "grad_norm": 280120.875, "learning_rate": 9.553545203548142e-06, "loss": 0.2051, "step": 1520 }, { "epoch": 0.044939199906009517, "grad_norm": 339906.65625, "learning_rate": 9.550608000939907e-06, "loss": 0.2134, "step": 1530 }, { "epoch": 0.045232920166833106, "grad_norm": 362280.34375, "learning_rate": 9.54767079833167e-06, "loss": 0.2316, "step": 1540 }, { "epoch": 0.0455266404276567, "grad_norm": 370203.96875, "learning_rate": 9.544733595723432e-06, "loss": 0.2066, "step": 1550 }, { "epoch": 0.04582036068848029, "grad_norm": 502701.96875, "learning_rate": 9.541796393115199e-06, "loss": 0.2151, "step": 1560 }, { "epoch": 0.04611408094930388, "grad_norm": 277366.3125, "learning_rate": 9.538859190506962e-06, "loss": 0.2158, "step": 1570 }, { "epoch": 0.046407801210127476, "grad_norm": 264068.53125, "learning_rate": 9.535921987898726e-06, "loss": 0.2435, "step": 1580 }, { "epoch": 0.046701521470951066, "grad_norm": 221130.578125, "learning_rate": 9.53298478529049e-06, "loss": 0.2284, "step": 1590 }, { "epoch": 0.046995241731774655, "grad_norm": 306203.21875, "learning_rate": 9.530047582682254e-06, "loss": 0.2219, "step": 1600 }, { "epoch": 0.04728896199259825, "grad_norm": 394477.8125, "learning_rate": 9.527110380074019e-06, "loss": 0.2027, "step": 1610 }, { "epoch": 0.04758268225342184, "grad_norm": 359984.875, "learning_rate": 9.524173177465782e-06, "loss": 0.2128, "step": 1620 }, { "epoch": 0.04787640251424543, "grad_norm": 569353.75, "learning_rate": 9.521235974857546e-06, "loss": 0.2235, "step": 1630 }, { "epoch": 0.048170122775069026, "grad_norm": 416928.9375, "learning_rate": 9.51829877224931e-06, "loss": 0.2113, "step": 1640 }, { "epoch": 0.048463843035892615, "grad_norm": 343858.1875, "learning_rate": 9.515361569641075e-06, "loss": 0.2062, "step": 1650 }, { "epoch": 0.04875756329671621, "grad_norm": 260839.5, "learning_rate": 9.512424367032838e-06, "loss": 0.2119, "step": 1660 }, { "epoch": 0.0490512835575398, "grad_norm": 463081.75, "learning_rate": 9.509487164424603e-06, "loss": 0.2124, "step": 1670 }, { "epoch": 0.04934500381836339, "grad_norm": 399592.78125, "learning_rate": 9.506549961816368e-06, "loss": 0.2355, "step": 1680 }, { "epoch": 0.049638724079186985, "grad_norm": 359317.0, "learning_rate": 9.50361275920813e-06, "loss": 0.201, "step": 1690 }, { "epoch": 0.049932444340010575, "grad_norm": 287305.53125, "learning_rate": 9.500675556599895e-06, "loss": 0.2161, "step": 1700 }, { "epoch": 0.050226164600834164, "grad_norm": 396563.0, "learning_rate": 9.49773835399166e-06, "loss": 0.2133, "step": 1710 }, { "epoch": 0.05051988486165776, "grad_norm": 526594.8125, "learning_rate": 9.494801151383424e-06, "loss": 0.1952, "step": 1720 }, { "epoch": 0.05081360512248135, "grad_norm": 351680.90625, "learning_rate": 9.491863948775187e-06, "loss": 0.2171, "step": 1730 }, { "epoch": 0.05110732538330494, "grad_norm": 415543.84375, "learning_rate": 9.48892674616695e-06, "loss": 0.2074, "step": 1740 }, { "epoch": 0.051401045644128535, "grad_norm": 345260.15625, "learning_rate": 9.485989543558715e-06, "loss": 0.2271, "step": 1750 }, { "epoch": 0.051694765904952124, "grad_norm": 369044.28125, "learning_rate": 9.48305234095048e-06, "loss": 0.194, "step": 1760 }, { "epoch": 0.05198848616577571, "grad_norm": 380971.96875, "learning_rate": 9.480115138342244e-06, "loss": 0.213, "step": 1770 }, { "epoch": 0.05228220642659931, "grad_norm": 461770.84375, "learning_rate": 9.477177935734007e-06, "loss": 0.2292, "step": 1780 }, { "epoch": 0.0525759266874229, "grad_norm": 565917.875, "learning_rate": 9.474240733125772e-06, "loss": 0.2358, "step": 1790 }, { "epoch": 0.05286964694824649, "grad_norm": 272290.28125, "learning_rate": 9.471303530517536e-06, "loss": 0.1917, "step": 1800 }, { "epoch": 0.053163367209070084, "grad_norm": 446936.125, "learning_rate": 9.4683663279093e-06, "loss": 0.2194, "step": 1810 }, { "epoch": 0.05345708746989367, "grad_norm": 245985.25, "learning_rate": 9.465429125301064e-06, "loss": 0.2147, "step": 1820 }, { "epoch": 0.05375080773071726, "grad_norm": 238467.703125, "learning_rate": 9.462491922692829e-06, "loss": 0.2046, "step": 1830 }, { "epoch": 0.05404452799154086, "grad_norm": 370715.15625, "learning_rate": 9.459554720084593e-06, "loss": 0.1819, "step": 1840 }, { "epoch": 0.05433824825236445, "grad_norm": 249506.3125, "learning_rate": 9.456617517476356e-06, "loss": 0.2078, "step": 1850 }, { "epoch": 0.05463196851318804, "grad_norm": 297427.3125, "learning_rate": 9.45368031486812e-06, "loss": 0.2135, "step": 1860 }, { "epoch": 0.05492568877401163, "grad_norm": 413636.875, "learning_rate": 9.450743112259885e-06, "loss": 0.2041, "step": 1870 }, { "epoch": 0.05521940903483522, "grad_norm": 194450.15625, "learning_rate": 9.447805909651648e-06, "loss": 0.2124, "step": 1880 }, { "epoch": 0.05551312929565881, "grad_norm": 551346.875, "learning_rate": 9.444868707043413e-06, "loss": 0.2031, "step": 1890 }, { "epoch": 0.05580684955648241, "grad_norm": 422475.03125, "learning_rate": 9.441931504435176e-06, "loss": 0.2154, "step": 1900 }, { "epoch": 0.056100569817306, "grad_norm": 521608.21875, "learning_rate": 9.43899430182694e-06, "loss": 0.214, "step": 1910 }, { "epoch": 0.05639429007812959, "grad_norm": 239874.140625, "learning_rate": 9.436057099218705e-06, "loss": 0.203, "step": 1920 }, { "epoch": 0.05668801033895318, "grad_norm": 252188.46875, "learning_rate": 9.433119896610468e-06, "loss": 0.2018, "step": 1930 }, { "epoch": 0.05698173059977677, "grad_norm": 350221.15625, "learning_rate": 9.430182694002233e-06, "loss": 0.2265, "step": 1940 }, { "epoch": 0.05727545086060037, "grad_norm": 327853.09375, "learning_rate": 9.427245491393997e-06, "loss": 0.204, "step": 1950 }, { "epoch": 0.05756917112142396, "grad_norm": 221272.59375, "learning_rate": 9.424308288785762e-06, "loss": 0.2149, "step": 1960 }, { "epoch": 0.057862891382247546, "grad_norm": 411192.09375, "learning_rate": 9.421371086177525e-06, "loss": 0.2161, "step": 1970 }, { "epoch": 0.05815661164307114, "grad_norm": 257674.78125, "learning_rate": 9.41843388356929e-06, "loss": 0.219, "step": 1980 }, { "epoch": 0.05845033190389473, "grad_norm": 269682.5, "learning_rate": 9.415496680961054e-06, "loss": 0.213, "step": 1990 }, { "epoch": 0.05874405216471832, "grad_norm": 565572.3125, "learning_rate": 9.412559478352817e-06, "loss": 0.2237, "step": 2000 }, { "epoch": 0.059037772425541916, "grad_norm": 344856.71875, "learning_rate": 9.409622275744582e-06, "loss": 0.2127, "step": 2010 }, { "epoch": 0.059331492686365506, "grad_norm": 417499.84375, "learning_rate": 9.406685073136347e-06, "loss": 0.2084, "step": 2020 }, { "epoch": 0.059625212947189095, "grad_norm": 432564.65625, "learning_rate": 9.40374787052811e-06, "loss": 0.2125, "step": 2030 }, { "epoch": 0.05991893320801269, "grad_norm": 418082.28125, "learning_rate": 9.400810667919874e-06, "loss": 0.1974, "step": 2040 }, { "epoch": 0.06021265346883628, "grad_norm": 239642.765625, "learning_rate": 9.397873465311637e-06, "loss": 0.195, "step": 2050 }, { "epoch": 0.06050637372965987, "grad_norm": 358961.09375, "learning_rate": 9.394936262703402e-06, "loss": 0.2163, "step": 2060 }, { "epoch": 0.060800093990483466, "grad_norm": 302779.9375, "learning_rate": 9.391999060095166e-06, "loss": 0.1851, "step": 2070 }, { "epoch": 0.061093814251307055, "grad_norm": 384267.5, "learning_rate": 9.38906185748693e-06, "loss": 0.2113, "step": 2080 }, { "epoch": 0.061387534512130644, "grad_norm": 373699.3125, "learning_rate": 9.386124654878694e-06, "loss": 0.2044, "step": 2090 }, { "epoch": 0.06168125477295424, "grad_norm": 253956.109375, "learning_rate": 9.383187452270458e-06, "loss": 0.1907, "step": 2100 }, { "epoch": 0.06197497503377783, "grad_norm": 337275.46875, "learning_rate": 9.380250249662223e-06, "loss": 0.1976, "step": 2110 }, { "epoch": 0.06226869529460142, "grad_norm": 258729.28125, "learning_rate": 9.377313047053986e-06, "loss": 0.1954, "step": 2120 }, { "epoch": 0.06256241555542501, "grad_norm": 266185.4375, "learning_rate": 9.37437584444575e-06, "loss": 0.2007, "step": 2130 }, { "epoch": 0.0628561358162486, "grad_norm": 266724.125, "learning_rate": 9.371438641837515e-06, "loss": 0.2058, "step": 2140 }, { "epoch": 0.0631498560770722, "grad_norm": 291047.0, "learning_rate": 9.368501439229278e-06, "loss": 0.1969, "step": 2150 }, { "epoch": 0.06344357633789578, "grad_norm": 249846.515625, "learning_rate": 9.365564236621043e-06, "loss": 0.1924, "step": 2160 }, { "epoch": 0.06373729659871938, "grad_norm": 290685.4375, "learning_rate": 9.362627034012808e-06, "loss": 0.2026, "step": 2170 }, { "epoch": 0.06403101685954297, "grad_norm": 238835.296875, "learning_rate": 9.359689831404572e-06, "loss": 0.1875, "step": 2180 }, { "epoch": 0.06432473712036656, "grad_norm": 454267.53125, "learning_rate": 9.356752628796335e-06, "loss": 0.2, "step": 2190 }, { "epoch": 0.06461845738119015, "grad_norm": 269860.0625, "learning_rate": 9.353815426188098e-06, "loss": 0.1879, "step": 2200 }, { "epoch": 0.06491217764201375, "grad_norm": 569965.8125, "learning_rate": 9.350878223579863e-06, "loss": 0.2029, "step": 2210 }, { "epoch": 0.06520589790283733, "grad_norm": 375326.03125, "learning_rate": 9.347941020971627e-06, "loss": 0.1933, "step": 2220 }, { "epoch": 0.06549961816366093, "grad_norm": 360245.75, "learning_rate": 9.345003818363392e-06, "loss": 0.199, "step": 2230 }, { "epoch": 0.06579333842448452, "grad_norm": 193564.015625, "learning_rate": 9.342066615755155e-06, "loss": 0.201, "step": 2240 }, { "epoch": 0.0660870586853081, "grad_norm": 869062.125, "learning_rate": 9.33912941314692e-06, "loss": 0.2094, "step": 2250 }, { "epoch": 0.0663807789461317, "grad_norm": 233604.484375, "learning_rate": 9.336192210538684e-06, "loss": 0.2122, "step": 2260 }, { "epoch": 0.0666744992069553, "grad_norm": 347551.5625, "learning_rate": 9.333255007930447e-06, "loss": 0.1973, "step": 2270 }, { "epoch": 0.06696821946777888, "grad_norm": 348031.625, "learning_rate": 9.330317805322212e-06, "loss": 0.192, "step": 2280 }, { "epoch": 0.06726193972860248, "grad_norm": 504646.78125, "learning_rate": 9.327380602713976e-06, "loss": 0.1987, "step": 2290 }, { "epoch": 0.06755565998942607, "grad_norm": 373414.8125, "learning_rate": 9.324443400105741e-06, "loss": 0.1917, "step": 2300 }, { "epoch": 0.06784938025024967, "grad_norm": 685489.5625, "learning_rate": 9.321506197497504e-06, "loss": 0.179, "step": 2310 }, { "epoch": 0.06814310051107325, "grad_norm": 438616.96875, "learning_rate": 9.318568994889267e-06, "loss": 0.211, "step": 2320 }, { "epoch": 0.06843682077189685, "grad_norm": 179268.3125, "learning_rate": 9.315631792281033e-06, "loss": 0.2106, "step": 2330 }, { "epoch": 0.06873054103272044, "grad_norm": 244697.5625, "learning_rate": 9.312694589672796e-06, "loss": 0.1844, "step": 2340 }, { "epoch": 0.06902426129354403, "grad_norm": 324429.40625, "learning_rate": 9.30975738706456e-06, "loss": 0.2047, "step": 2350 }, { "epoch": 0.06931798155436762, "grad_norm": 321817.34375, "learning_rate": 9.306820184456324e-06, "loss": 0.1937, "step": 2360 }, { "epoch": 0.06961170181519122, "grad_norm": 308975.46875, "learning_rate": 9.303882981848088e-06, "loss": 0.2045, "step": 2370 }, { "epoch": 0.0699054220760148, "grad_norm": 267479.59375, "learning_rate": 9.300945779239853e-06, "loss": 0.1939, "step": 2380 }, { "epoch": 0.0701991423368384, "grad_norm": 225495.484375, "learning_rate": 9.298008576631616e-06, "loss": 0.19, "step": 2390 }, { "epoch": 0.07049286259766199, "grad_norm": 309841.3125, "learning_rate": 9.29507137402338e-06, "loss": 0.1924, "step": 2400 }, { "epoch": 0.07078658285848557, "grad_norm": 383263.8125, "learning_rate": 9.292134171415145e-06, "loss": 0.2085, "step": 2410 }, { "epoch": 0.07108030311930917, "grad_norm": 305413.15625, "learning_rate": 9.28919696880691e-06, "loss": 0.2077, "step": 2420 }, { "epoch": 0.07137402338013277, "grad_norm": 420815.8125, "learning_rate": 9.286259766198673e-06, "loss": 0.1825, "step": 2430 }, { "epoch": 0.07166774364095635, "grad_norm": 261197.859375, "learning_rate": 9.283322563590437e-06, "loss": 0.1984, "step": 2440 }, { "epoch": 0.07196146390177995, "grad_norm": 408166.53125, "learning_rate": 9.280385360982202e-06, "loss": 0.1948, "step": 2450 }, { "epoch": 0.07225518416260354, "grad_norm": 358644.03125, "learning_rate": 9.277448158373965e-06, "loss": 0.2104, "step": 2460 }, { "epoch": 0.07254890442342712, "grad_norm": 328955.65625, "learning_rate": 9.27451095576573e-06, "loss": 0.1833, "step": 2470 }, { "epoch": 0.07284262468425072, "grad_norm": 289794.03125, "learning_rate": 9.271573753157494e-06, "loss": 0.1896, "step": 2480 }, { "epoch": 0.07313634494507432, "grad_norm": 504739.53125, "learning_rate": 9.268636550549259e-06, "loss": 0.1911, "step": 2490 }, { "epoch": 0.0734300652058979, "grad_norm": 282808.625, "learning_rate": 9.265699347941022e-06, "loss": 0.1878, "step": 2500 }, { "epoch": 0.0737237854667215, "grad_norm": 328293.0625, "learning_rate": 9.262762145332785e-06, "loss": 0.1895, "step": 2510 }, { "epoch": 0.07401750572754509, "grad_norm": 535972.375, "learning_rate": 9.25982494272455e-06, "loss": 0.1959, "step": 2520 }, { "epoch": 0.07431122598836867, "grad_norm": 334219.96875, "learning_rate": 9.256887740116314e-06, "loss": 0.1972, "step": 2530 }, { "epoch": 0.07460494624919227, "grad_norm": 282323.0, "learning_rate": 9.253950537508079e-06, "loss": 0.1953, "step": 2540 }, { "epoch": 0.07489866651001587, "grad_norm": 404348.28125, "learning_rate": 9.251013334899842e-06, "loss": 0.1944, "step": 2550 }, { "epoch": 0.07519238677083945, "grad_norm": 233389.03125, "learning_rate": 9.248076132291606e-06, "loss": 0.2016, "step": 2560 }, { "epoch": 0.07548610703166304, "grad_norm": 260673.75, "learning_rate": 9.24513892968337e-06, "loss": 0.1829, "step": 2570 }, { "epoch": 0.07577982729248664, "grad_norm": 248246.109375, "learning_rate": 9.242201727075134e-06, "loss": 0.2062, "step": 2580 }, { "epoch": 0.07607354755331022, "grad_norm": 518647.46875, "learning_rate": 9.239264524466898e-06, "loss": 0.1905, "step": 2590 }, { "epoch": 0.07636726781413382, "grad_norm": 357864.625, "learning_rate": 9.236327321858663e-06, "loss": 0.1991, "step": 2600 }, { "epoch": 0.07666098807495741, "grad_norm": 355149.84375, "learning_rate": 9.233390119250428e-06, "loss": 0.1861, "step": 2610 }, { "epoch": 0.076954708335781, "grad_norm": 402685.0625, "learning_rate": 9.23045291664219e-06, "loss": 0.1878, "step": 2620 }, { "epoch": 0.07724842859660459, "grad_norm": 437404.03125, "learning_rate": 9.227515714033955e-06, "loss": 0.1999, "step": 2630 }, { "epoch": 0.07754214885742819, "grad_norm": 344665.5, "learning_rate": 9.22457851142572e-06, "loss": 0.1957, "step": 2640 }, { "epoch": 0.07783586911825177, "grad_norm": 274399.65625, "learning_rate": 9.221641308817483e-06, "loss": 0.2176, "step": 2650 }, { "epoch": 0.07812958937907537, "grad_norm": 226156.125, "learning_rate": 9.218704106209247e-06, "loss": 0.1833, "step": 2660 }, { "epoch": 0.07842330963989896, "grad_norm": 312821.0, "learning_rate": 9.21576690360101e-06, "loss": 0.2053, "step": 2670 }, { "epoch": 0.07871702990072255, "grad_norm": 385619.59375, "learning_rate": 9.212829700992775e-06, "loss": 0.1974, "step": 2680 }, { "epoch": 0.07901075016154614, "grad_norm": 401124.59375, "learning_rate": 9.20989249838454e-06, "loss": 0.2016, "step": 2690 }, { "epoch": 0.07930447042236974, "grad_norm": 271819.15625, "learning_rate": 9.206955295776303e-06, "loss": 0.2018, "step": 2700 }, { "epoch": 0.07959819068319332, "grad_norm": 419305.40625, "learning_rate": 9.204018093168067e-06, "loss": 0.1918, "step": 2710 }, { "epoch": 0.07989191094401692, "grad_norm": 335390.5, "learning_rate": 9.201080890559832e-06, "loss": 0.1909, "step": 2720 }, { "epoch": 0.08018563120484051, "grad_norm": 298464.40625, "learning_rate": 9.198143687951596e-06, "loss": 0.1857, "step": 2730 }, { "epoch": 0.0804793514656641, "grad_norm": 373037.09375, "learning_rate": 9.19520648534336e-06, "loss": 0.1936, "step": 2740 }, { "epoch": 0.08077307172648769, "grad_norm": 351092.90625, "learning_rate": 9.192269282735124e-06, "loss": 0.1978, "step": 2750 }, { "epoch": 0.08106679198731129, "grad_norm": 280589.09375, "learning_rate": 9.189332080126889e-06, "loss": 0.2095, "step": 2760 }, { "epoch": 0.08136051224813487, "grad_norm": 363535.5625, "learning_rate": 9.186394877518652e-06, "loss": 0.1914, "step": 2770 }, { "epoch": 0.08165423250895847, "grad_norm": 248699.375, "learning_rate": 9.183457674910416e-06, "loss": 0.1909, "step": 2780 }, { "epoch": 0.08194795276978206, "grad_norm": 269940.1875, "learning_rate": 9.180520472302181e-06, "loss": 0.1951, "step": 2790 }, { "epoch": 0.08224167303060564, "grad_norm": 354062.15625, "learning_rate": 9.177583269693944e-06, "loss": 0.2041, "step": 2800 }, { "epoch": 0.08253539329142924, "grad_norm": 219158.28125, "learning_rate": 9.174646067085708e-06, "loss": 0.1883, "step": 2810 }, { "epoch": 0.08282911355225284, "grad_norm": 272200.875, "learning_rate": 9.171708864477471e-06, "loss": 0.1717, "step": 2820 }, { "epoch": 0.08312283381307643, "grad_norm": 208811.046875, "learning_rate": 9.168771661869236e-06, "loss": 0.1947, "step": 2830 }, { "epoch": 0.08341655407390001, "grad_norm": 334633.3125, "learning_rate": 9.165834459261e-06, "loss": 0.1897, "step": 2840 }, { "epoch": 0.08371027433472361, "grad_norm": 270426.5, "learning_rate": 9.162897256652765e-06, "loss": 0.1958, "step": 2850 }, { "epoch": 0.08400399459554721, "grad_norm": 257365.65625, "learning_rate": 9.159960054044528e-06, "loss": 0.1794, "step": 2860 }, { "epoch": 0.08429771485637079, "grad_norm": 377377.8125, "learning_rate": 9.157022851436293e-06, "loss": 0.1801, "step": 2870 }, { "epoch": 0.08459143511719439, "grad_norm": 339080.875, "learning_rate": 9.154085648828057e-06, "loss": 0.1839, "step": 2880 }, { "epoch": 0.08488515537801798, "grad_norm": 416387.5625, "learning_rate": 9.15114844621982e-06, "loss": 0.1881, "step": 2890 }, { "epoch": 0.08517887563884156, "grad_norm": 392616.21875, "learning_rate": 9.148211243611585e-06, "loss": 0.196, "step": 2900 }, { "epoch": 0.08547259589966516, "grad_norm": 224952.046875, "learning_rate": 9.14527404100335e-06, "loss": 0.1729, "step": 2910 }, { "epoch": 0.08576631616048876, "grad_norm": 200396.484375, "learning_rate": 9.142336838395113e-06, "loss": 0.1956, "step": 2920 }, { "epoch": 0.08606003642131234, "grad_norm": 322423.75, "learning_rate": 9.139399635786877e-06, "loss": 0.1842, "step": 2930 }, { "epoch": 0.08635375668213593, "grad_norm": 311038.71875, "learning_rate": 9.136462433178642e-06, "loss": 0.1938, "step": 2940 }, { "epoch": 0.08664747694295953, "grad_norm": 330710.25, "learning_rate": 9.133525230570407e-06, "loss": 0.1863, "step": 2950 }, { "epoch": 0.08694119720378311, "grad_norm": 323158.125, "learning_rate": 9.13058802796217e-06, "loss": 0.2012, "step": 2960 }, { "epoch": 0.08723491746460671, "grad_norm": 429194.875, "learning_rate": 9.127650825353934e-06, "loss": 0.196, "step": 2970 }, { "epoch": 0.0875286377254303, "grad_norm": 329341.75, "learning_rate": 9.124713622745697e-06, "loss": 0.1926, "step": 2980 }, { "epoch": 0.08782235798625389, "grad_norm": 401636.59375, "learning_rate": 9.121776420137462e-06, "loss": 0.1879, "step": 2990 }, { "epoch": 0.08811607824707748, "grad_norm": 339975.84375, "learning_rate": 9.118839217529226e-06, "loss": 0.2025, "step": 3000 }, { "epoch": 0.08840979850790108, "grad_norm": 240291.515625, "learning_rate": 9.11590201492099e-06, "loss": 0.1713, "step": 3010 }, { "epoch": 0.08870351876872466, "grad_norm": 415960.1875, "learning_rate": 9.112964812312754e-06, "loss": 0.1858, "step": 3020 }, { "epoch": 0.08899723902954826, "grad_norm": 298152.21875, "learning_rate": 9.110027609704519e-06, "loss": 0.1988, "step": 3030 }, { "epoch": 0.08929095929037185, "grad_norm": 201933.796875, "learning_rate": 9.107090407096281e-06, "loss": 0.1814, "step": 3040 }, { "epoch": 0.08958467955119544, "grad_norm": 200112.703125, "learning_rate": 9.104153204488046e-06, "loss": 0.1843, "step": 3050 }, { "epoch": 0.08987839981201903, "grad_norm": 354383.3125, "learning_rate": 9.10121600187981e-06, "loss": 0.207, "step": 3060 }, { "epoch": 0.09017212007284263, "grad_norm": 311572.5625, "learning_rate": 9.098278799271575e-06, "loss": 0.191, "step": 3070 }, { "epoch": 0.09046584033366621, "grad_norm": 383876.34375, "learning_rate": 9.095341596663338e-06, "loss": 0.1962, "step": 3080 }, { "epoch": 0.09075956059448981, "grad_norm": 558038.75, "learning_rate": 9.092404394055103e-06, "loss": 0.1743, "step": 3090 }, { "epoch": 0.0910532808553134, "grad_norm": 356717.875, "learning_rate": 9.089467191446868e-06, "loss": 0.1984, "step": 3100 }, { "epoch": 0.09134700111613699, "grad_norm": 179228.953125, "learning_rate": 9.08652998883863e-06, "loss": 0.1982, "step": 3110 }, { "epoch": 0.09164072137696058, "grad_norm": 356080.3125, "learning_rate": 9.083592786230395e-06, "loss": 0.1962, "step": 3120 }, { "epoch": 0.09193444163778418, "grad_norm": 305754.25, "learning_rate": 9.080655583622158e-06, "loss": 0.1852, "step": 3130 }, { "epoch": 0.09222816189860776, "grad_norm": 246456.59375, "learning_rate": 9.077718381013924e-06, "loss": 0.1806, "step": 3140 }, { "epoch": 0.09252188215943136, "grad_norm": 263129.4375, "learning_rate": 9.074781178405687e-06, "loss": 0.1881, "step": 3150 }, { "epoch": 0.09281560242025495, "grad_norm": 301278.375, "learning_rate": 9.07184397579745e-06, "loss": 0.1939, "step": 3160 }, { "epoch": 0.09310932268107854, "grad_norm": 523149.75, "learning_rate": 9.068906773189215e-06, "loss": 0.1855, "step": 3170 }, { "epoch": 0.09340304294190213, "grad_norm": 740417.25, "learning_rate": 9.06596957058098e-06, "loss": 0.1888, "step": 3180 }, { "epoch": 0.09369676320272573, "grad_norm": 404247.5625, "learning_rate": 9.063032367972744e-06, "loss": 0.1789, "step": 3190 }, { "epoch": 0.09399048346354931, "grad_norm": 244827.328125, "learning_rate": 9.060095165364507e-06, "loss": 0.193, "step": 3200 }, { "epoch": 0.0942842037243729, "grad_norm": 247029.921875, "learning_rate": 9.057157962756272e-06, "loss": 0.1846, "step": 3210 }, { "epoch": 0.0945779239851965, "grad_norm": 215589.703125, "learning_rate": 9.054220760148036e-06, "loss": 0.1862, "step": 3220 }, { "epoch": 0.09487164424602008, "grad_norm": 338144.3125, "learning_rate": 9.0512835575398e-06, "loss": 0.2117, "step": 3230 }, { "epoch": 0.09516536450684368, "grad_norm": 258357.96875, "learning_rate": 9.048346354931564e-06, "loss": 0.1761, "step": 3240 }, { "epoch": 0.09545908476766728, "grad_norm": 249996.234375, "learning_rate": 9.045409152323329e-06, "loss": 0.1757, "step": 3250 }, { "epoch": 0.09575280502849086, "grad_norm": 442777.84375, "learning_rate": 9.042471949715093e-06, "loss": 0.195, "step": 3260 }, { "epoch": 0.09604652528931445, "grad_norm": 197858.234375, "learning_rate": 9.039534747106856e-06, "loss": 0.1802, "step": 3270 }, { "epoch": 0.09634024555013805, "grad_norm": 302347.59375, "learning_rate": 9.036597544498619e-06, "loss": 0.1854, "step": 3280 }, { "epoch": 0.09663396581096163, "grad_norm": 279752.1875, "learning_rate": 9.033660341890384e-06, "loss": 0.1866, "step": 3290 }, { "epoch": 0.09692768607178523, "grad_norm": 187177.609375, "learning_rate": 9.030723139282148e-06, "loss": 0.1882, "step": 3300 }, { "epoch": 0.09722140633260883, "grad_norm": 448923.5625, "learning_rate": 9.027785936673913e-06, "loss": 0.1818, "step": 3310 }, { "epoch": 0.09751512659343242, "grad_norm": 254510.578125, "learning_rate": 9.024848734065676e-06, "loss": 0.1962, "step": 3320 }, { "epoch": 0.097808846854256, "grad_norm": 280507.1875, "learning_rate": 9.02191153145744e-06, "loss": 0.1965, "step": 3330 }, { "epoch": 0.0981025671150796, "grad_norm": 289900.8125, "learning_rate": 9.018974328849205e-06, "loss": 0.1869, "step": 3340 }, { "epoch": 0.0983962873759032, "grad_norm": 272894.25, "learning_rate": 9.016037126240968e-06, "loss": 0.2004, "step": 3350 }, { "epoch": 0.09869000763672678, "grad_norm": 245662.28125, "learning_rate": 9.013099923632733e-06, "loss": 0.195, "step": 3360 }, { "epoch": 0.09898372789755037, "grad_norm": 312878.1875, "learning_rate": 9.010162721024497e-06, "loss": 0.1874, "step": 3370 }, { "epoch": 0.09927744815837397, "grad_norm": 288099.875, "learning_rate": 9.007225518416262e-06, "loss": 0.1952, "step": 3380 }, { "epoch": 0.09957116841919755, "grad_norm": 214459.125, "learning_rate": 9.004288315808025e-06, "loss": 0.1792, "step": 3390 }, { "epoch": 0.09986488868002115, "grad_norm": 333297.96875, "learning_rate": 9.00135111319979e-06, "loss": 0.1839, "step": 3400 }, { "epoch": 0.10015860894084475, "grad_norm": 286475.03125, "learning_rate": 8.998413910591554e-06, "loss": 0.1878, "step": 3410 }, { "epoch": 0.10045232920166833, "grad_norm": 269363.5625, "learning_rate": 8.995476707983317e-06, "loss": 0.1915, "step": 3420 }, { "epoch": 0.10074604946249192, "grad_norm": 427646.5625, "learning_rate": 8.992539505375082e-06, "loss": 0.1974, "step": 3430 }, { "epoch": 0.10103976972331552, "grad_norm": 448181.90625, "learning_rate": 8.989602302766845e-06, "loss": 0.1668, "step": 3440 }, { "epoch": 0.1013334899841391, "grad_norm": 233967.234375, "learning_rate": 8.98666510015861e-06, "loss": 0.1942, "step": 3450 }, { "epoch": 0.1016272102449627, "grad_norm": 245551.78125, "learning_rate": 8.983727897550374e-06, "loss": 0.1822, "step": 3460 }, { "epoch": 0.1019209305057863, "grad_norm": 392214.90625, "learning_rate": 8.980790694942137e-06, "loss": 0.1898, "step": 3470 }, { "epoch": 0.10221465076660988, "grad_norm": 303766.8125, "learning_rate": 8.977853492333902e-06, "loss": 0.1973, "step": 3480 }, { "epoch": 0.10250837102743347, "grad_norm": 230396.515625, "learning_rate": 8.974916289725666e-06, "loss": 0.1988, "step": 3490 }, { "epoch": 0.10280209128825707, "grad_norm": 391029.3125, "learning_rate": 8.97197908711743e-06, "loss": 0.1782, "step": 3500 }, { "epoch": 0.10309581154908065, "grad_norm": 266050.90625, "learning_rate": 8.969041884509194e-06, "loss": 0.1933, "step": 3510 }, { "epoch": 0.10338953180990425, "grad_norm": 278473.375, "learning_rate": 8.966104681900958e-06, "loss": 0.1852, "step": 3520 }, { "epoch": 0.10368325207072784, "grad_norm": 264432.46875, "learning_rate": 8.963167479292723e-06, "loss": 0.1827, "step": 3530 }, { "epoch": 0.10397697233155143, "grad_norm": 323421.46875, "learning_rate": 8.960230276684486e-06, "loss": 0.1745, "step": 3540 }, { "epoch": 0.10427069259237502, "grad_norm": 190596.953125, "learning_rate": 8.95729307407625e-06, "loss": 0.1801, "step": 3550 }, { "epoch": 0.10456441285319862, "grad_norm": 506692.71875, "learning_rate": 8.954355871468015e-06, "loss": 0.1847, "step": 3560 }, { "epoch": 0.1048581331140222, "grad_norm": 230084.03125, "learning_rate": 8.951418668859778e-06, "loss": 0.1799, "step": 3570 }, { "epoch": 0.1051518533748458, "grad_norm": 240536.28125, "learning_rate": 8.948481466251543e-06, "loss": 0.1754, "step": 3580 }, { "epoch": 0.10544557363566939, "grad_norm": 258820.609375, "learning_rate": 8.945544263643306e-06, "loss": 0.1781, "step": 3590 }, { "epoch": 0.10573929389649298, "grad_norm": 198127.84375, "learning_rate": 8.942607061035072e-06, "loss": 0.174, "step": 3600 }, { "epoch": 0.10603301415731657, "grad_norm": 577946.6875, "learning_rate": 8.939669858426835e-06, "loss": 0.1934, "step": 3610 }, { "epoch": 0.10632673441814017, "grad_norm": 344697.6875, "learning_rate": 8.9367326558186e-06, "loss": 0.1932, "step": 3620 }, { "epoch": 0.10662045467896375, "grad_norm": 237864.3125, "learning_rate": 8.933795453210363e-06, "loss": 0.1872, "step": 3630 }, { "epoch": 0.10691417493978735, "grad_norm": 247542.078125, "learning_rate": 8.930858250602127e-06, "loss": 0.1914, "step": 3640 }, { "epoch": 0.10720789520061094, "grad_norm": 396733.21875, "learning_rate": 8.927921047993892e-06, "loss": 0.1823, "step": 3650 }, { "epoch": 0.10750161546143452, "grad_norm": 237244.421875, "learning_rate": 8.924983845385655e-06, "loss": 0.1693, "step": 3660 }, { "epoch": 0.10779533572225812, "grad_norm": 290167.625, "learning_rate": 8.92204664277742e-06, "loss": 0.1826, "step": 3670 }, { "epoch": 0.10808905598308172, "grad_norm": 331350.875, "learning_rate": 8.919109440169184e-06, "loss": 0.1935, "step": 3680 }, { "epoch": 0.1083827762439053, "grad_norm": 207134.234375, "learning_rate": 8.916172237560947e-06, "loss": 0.1954, "step": 3690 }, { "epoch": 0.1086764965047289, "grad_norm": 210747.578125, "learning_rate": 8.913235034952712e-06, "loss": 0.1673, "step": 3700 }, { "epoch": 0.10897021676555249, "grad_norm": 498143.0625, "learning_rate": 8.910297832344476e-06, "loss": 0.2034, "step": 3710 }, { "epoch": 0.10926393702637607, "grad_norm": 413554.40625, "learning_rate": 8.907360629736241e-06, "loss": 0.18, "step": 3720 }, { "epoch": 0.10955765728719967, "grad_norm": 208447.90625, "learning_rate": 8.904423427128004e-06, "loss": 0.1835, "step": 3730 }, { "epoch": 0.10985137754802327, "grad_norm": 186535.25, "learning_rate": 8.901486224519768e-06, "loss": 0.1722, "step": 3740 }, { "epoch": 0.11014509780884685, "grad_norm": 294469.71875, "learning_rate": 8.898549021911533e-06, "loss": 0.1928, "step": 3750 }, { "epoch": 0.11043881806967044, "grad_norm": 223314.109375, "learning_rate": 8.895611819303296e-06, "loss": 0.1864, "step": 3760 }, { "epoch": 0.11073253833049404, "grad_norm": 240459.03125, "learning_rate": 8.89267461669506e-06, "loss": 0.1688, "step": 3770 }, { "epoch": 0.11102625859131762, "grad_norm": 388123.09375, "learning_rate": 8.889737414086824e-06, "loss": 0.1755, "step": 3780 }, { "epoch": 0.11131997885214122, "grad_norm": 338422.75, "learning_rate": 8.886800211478588e-06, "loss": 0.1821, "step": 3790 }, { "epoch": 0.11161369911296481, "grad_norm": 428418.03125, "learning_rate": 8.883863008870353e-06, "loss": 0.1705, "step": 3800 }, { "epoch": 0.1119074193737884, "grad_norm": 253505.71875, "learning_rate": 8.880925806262116e-06, "loss": 0.1663, "step": 3810 }, { "epoch": 0.112201139634612, "grad_norm": 242074.4375, "learning_rate": 8.87798860365388e-06, "loss": 0.1784, "step": 3820 }, { "epoch": 0.11249485989543559, "grad_norm": 201122.796875, "learning_rate": 8.875051401045645e-06, "loss": 0.1786, "step": 3830 }, { "epoch": 0.11278858015625919, "grad_norm": 287152.6875, "learning_rate": 8.87211419843741e-06, "loss": 0.1831, "step": 3840 }, { "epoch": 0.11308230041708277, "grad_norm": 338570.25, "learning_rate": 8.869176995829173e-06, "loss": 0.1809, "step": 3850 }, { "epoch": 0.11337602067790636, "grad_norm": 231892.359375, "learning_rate": 8.866239793220937e-06, "loss": 0.1676, "step": 3860 }, { "epoch": 0.11366974093872996, "grad_norm": 405391.96875, "learning_rate": 8.863302590612702e-06, "loss": 0.181, "step": 3870 }, { "epoch": 0.11396346119955354, "grad_norm": 341543.90625, "learning_rate": 8.860365388004465e-06, "loss": 0.1931, "step": 3880 }, { "epoch": 0.11425718146037714, "grad_norm": 350273.78125, "learning_rate": 8.85742818539623e-06, "loss": 0.1925, "step": 3890 }, { "epoch": 0.11455090172120073, "grad_norm": 255694.453125, "learning_rate": 8.854490982787992e-06, "loss": 0.1802, "step": 3900 }, { "epoch": 0.11484462198202432, "grad_norm": 310982.375, "learning_rate": 8.851553780179759e-06, "loss": 0.1826, "step": 3910 }, { "epoch": 0.11513834224284791, "grad_norm": 289321.625, "learning_rate": 8.848616577571522e-06, "loss": 0.1985, "step": 3920 }, { "epoch": 0.11543206250367151, "grad_norm": 317722.15625, "learning_rate": 8.845679374963285e-06, "loss": 0.161, "step": 3930 }, { "epoch": 0.11572578276449509, "grad_norm": 374825.6875, "learning_rate": 8.84274217235505e-06, "loss": 0.1825, "step": 3940 }, { "epoch": 0.11601950302531869, "grad_norm": 279561.25, "learning_rate": 8.839804969746814e-06, "loss": 0.1753, "step": 3950 }, { "epoch": 0.11631322328614228, "grad_norm": 256630.5, "learning_rate": 8.836867767138579e-06, "loss": 0.1837, "step": 3960 }, { "epoch": 0.11660694354696587, "grad_norm": 387750.125, "learning_rate": 8.833930564530341e-06, "loss": 0.1737, "step": 3970 }, { "epoch": 0.11690066380778946, "grad_norm": 386381.375, "learning_rate": 8.830993361922106e-06, "loss": 0.1729, "step": 3980 }, { "epoch": 0.11719438406861306, "grad_norm": 447479.28125, "learning_rate": 8.82805615931387e-06, "loss": 0.1813, "step": 3990 }, { "epoch": 0.11748810432943664, "grad_norm": 277310.9375, "learning_rate": 8.825118956705634e-06, "loss": 0.1923, "step": 4000 }, { "epoch": 0.11778182459026024, "grad_norm": 268985.21875, "learning_rate": 8.822181754097398e-06, "loss": 0.1849, "step": 4010 }, { "epoch": 0.11807554485108383, "grad_norm": 278786.0625, "learning_rate": 8.819244551489163e-06, "loss": 0.1881, "step": 4020 }, { "epoch": 0.11836926511190742, "grad_norm": 307563.03125, "learning_rate": 8.816307348880928e-06, "loss": 0.193, "step": 4030 }, { "epoch": 0.11866298537273101, "grad_norm": 315275.9375, "learning_rate": 8.81337014627269e-06, "loss": 0.1898, "step": 4040 }, { "epoch": 0.11895670563355461, "grad_norm": 239696.390625, "learning_rate": 8.810432943664453e-06, "loss": 0.1722, "step": 4050 }, { "epoch": 0.11925042589437819, "grad_norm": 347060.78125, "learning_rate": 8.80749574105622e-06, "loss": 0.1914, "step": 4060 }, { "epoch": 0.11954414615520179, "grad_norm": 280253.90625, "learning_rate": 8.804558538447983e-06, "loss": 0.1759, "step": 4070 }, { "epoch": 0.11983786641602538, "grad_norm": 234820.09375, "learning_rate": 8.801621335839747e-06, "loss": 0.1792, "step": 4080 }, { "epoch": 0.12013158667684896, "grad_norm": 211826.828125, "learning_rate": 8.79868413323151e-06, "loss": 0.1823, "step": 4090 }, { "epoch": 0.12042530693767256, "grad_norm": 336544.59375, "learning_rate": 8.795746930623275e-06, "loss": 0.1814, "step": 4100 }, { "epoch": 0.12071902719849616, "grad_norm": 177731.640625, "learning_rate": 8.79280972801504e-06, "loss": 0.183, "step": 4110 }, { "epoch": 0.12101274745931974, "grad_norm": 196297.0625, "learning_rate": 8.789872525406802e-06, "loss": 0.1816, "step": 4120 }, { "epoch": 0.12130646772014334, "grad_norm": 306813.84375, "learning_rate": 8.786935322798567e-06, "loss": 0.1788, "step": 4130 }, { "epoch": 0.12160018798096693, "grad_norm": 342552.71875, "learning_rate": 8.783998120190332e-06, "loss": 0.1808, "step": 4140 }, { "epoch": 0.12189390824179051, "grad_norm": 226151.515625, "learning_rate": 8.781060917582096e-06, "loss": 0.1773, "step": 4150 }, { "epoch": 0.12218762850261411, "grad_norm": 198044.46875, "learning_rate": 8.77812371497386e-06, "loss": 0.1773, "step": 4160 }, { "epoch": 0.1224813487634377, "grad_norm": 271648.5, "learning_rate": 8.775186512365624e-06, "loss": 0.1732, "step": 4170 }, { "epoch": 0.12277506902426129, "grad_norm": 312562.53125, "learning_rate": 8.772249309757389e-06, "loss": 0.1665, "step": 4180 }, { "epoch": 0.12306878928508488, "grad_norm": 282230.0625, "learning_rate": 8.769312107149152e-06, "loss": 0.1857, "step": 4190 }, { "epoch": 0.12336250954590848, "grad_norm": 305199.9375, "learning_rate": 8.766374904540916e-06, "loss": 0.1668, "step": 4200 }, { "epoch": 0.12365622980673206, "grad_norm": 273784.65625, "learning_rate": 8.76343770193268e-06, "loss": 0.1833, "step": 4210 }, { "epoch": 0.12394995006755566, "grad_norm": 223915.328125, "learning_rate": 8.760500499324444e-06, "loss": 0.1813, "step": 4220 }, { "epoch": 0.12424367032837925, "grad_norm": 299638.46875, "learning_rate": 8.757563296716208e-06, "loss": 0.1863, "step": 4230 }, { "epoch": 0.12453739058920284, "grad_norm": 329316.5, "learning_rate": 8.754626094107971e-06, "loss": 0.1828, "step": 4240 }, { "epoch": 0.12483111085002643, "grad_norm": 205498.5625, "learning_rate": 8.751688891499736e-06, "loss": 0.1679, "step": 4250 }, { "epoch": 0.12512483111085002, "grad_norm": 244807.421875, "learning_rate": 8.7487516888915e-06, "loss": 0.1868, "step": 4260 }, { "epoch": 0.12541855137167363, "grad_norm": 241220.671875, "learning_rate": 8.745814486283265e-06, "loss": 0.1723, "step": 4270 }, { "epoch": 0.1257122716324972, "grad_norm": 303273.65625, "learning_rate": 8.742877283675028e-06, "loss": 0.1812, "step": 4280 }, { "epoch": 0.1260059918933208, "grad_norm": 249635.796875, "learning_rate": 8.739940081066793e-06, "loss": 0.1917, "step": 4290 }, { "epoch": 0.1262997121541444, "grad_norm": 382326.15625, "learning_rate": 8.737002878458557e-06, "loss": 0.1823, "step": 4300 }, { "epoch": 0.12659343241496798, "grad_norm": 202424.40625, "learning_rate": 8.73406567585032e-06, "loss": 0.1632, "step": 4310 }, { "epoch": 0.12688715267579156, "grad_norm": 202828.53125, "learning_rate": 8.731128473242085e-06, "loss": 0.1869, "step": 4320 }, { "epoch": 0.12718087293661517, "grad_norm": 568236.9375, "learning_rate": 8.72819127063385e-06, "loss": 0.1904, "step": 4330 }, { "epoch": 0.12747459319743876, "grad_norm": 235116.859375, "learning_rate": 8.725254068025613e-06, "loss": 0.1804, "step": 4340 }, { "epoch": 0.12776831345826234, "grad_norm": 224853.9375, "learning_rate": 8.722316865417377e-06, "loss": 0.1851, "step": 4350 }, { "epoch": 0.12806203371908595, "grad_norm": 260044.8125, "learning_rate": 8.71937966280914e-06, "loss": 0.1778, "step": 4360 }, { "epoch": 0.12835575397990953, "grad_norm": 304885.5625, "learning_rate": 8.716442460200906e-06, "loss": 0.1851, "step": 4370 }, { "epoch": 0.1286494742407331, "grad_norm": 284311.875, "learning_rate": 8.71350525759267e-06, "loss": 0.1773, "step": 4380 }, { "epoch": 0.12894319450155672, "grad_norm": 212267.234375, "learning_rate": 8.710568054984434e-06, "loss": 0.1767, "step": 4390 }, { "epoch": 0.1292369147623803, "grad_norm": 299386.625, "learning_rate": 8.707630852376197e-06, "loss": 0.1648, "step": 4400 }, { "epoch": 0.1295306350232039, "grad_norm": 426518.5, "learning_rate": 8.704693649767962e-06, "loss": 0.1717, "step": 4410 }, { "epoch": 0.1298243552840275, "grad_norm": 229280.796875, "learning_rate": 8.701756447159726e-06, "loss": 0.1767, "step": 4420 }, { "epoch": 0.13011807554485108, "grad_norm": 354900.84375, "learning_rate": 8.698819244551489e-06, "loss": 0.1693, "step": 4430 }, { "epoch": 0.13041179580567466, "grad_norm": 299092.25, "learning_rate": 8.695882041943254e-06, "loss": 0.1803, "step": 4440 }, { "epoch": 0.13070551606649827, "grad_norm": 195205.28125, "learning_rate": 8.692944839335018e-06, "loss": 0.1654, "step": 4450 }, { "epoch": 0.13099923632732186, "grad_norm": 452646.15625, "learning_rate": 8.690007636726781e-06, "loss": 0.1783, "step": 4460 }, { "epoch": 0.13129295658814544, "grad_norm": 237767.984375, "learning_rate": 8.687070434118546e-06, "loss": 0.1804, "step": 4470 }, { "epoch": 0.13158667684896905, "grad_norm": 241259.640625, "learning_rate": 8.68413323151031e-06, "loss": 0.1675, "step": 4480 }, { "epoch": 0.13188039710979263, "grad_norm": 338898.25, "learning_rate": 8.681196028902075e-06, "loss": 0.1592, "step": 4490 }, { "epoch": 0.1321741173706162, "grad_norm": 215942.75, "learning_rate": 8.678258826293838e-06, "loss": 0.1906, "step": 4500 }, { "epoch": 0.13246783763143982, "grad_norm": 312860.78125, "learning_rate": 8.675321623685603e-06, "loss": 0.1699, "step": 4510 }, { "epoch": 0.1327615578922634, "grad_norm": 294299.0625, "learning_rate": 8.672384421077367e-06, "loss": 0.1698, "step": 4520 }, { "epoch": 0.133055278153087, "grad_norm": 234003.3125, "learning_rate": 8.66944721846913e-06, "loss": 0.1886, "step": 4530 }, { "epoch": 0.1333489984139106, "grad_norm": 336366.6875, "learning_rate": 8.666510015860895e-06, "loss": 0.1593, "step": 4540 }, { "epoch": 0.13364271867473418, "grad_norm": 342974.1875, "learning_rate": 8.663572813252658e-06, "loss": 0.1864, "step": 4550 }, { "epoch": 0.13393643893555776, "grad_norm": 157761.1875, "learning_rate": 8.660635610644423e-06, "loss": 0.1905, "step": 4560 }, { "epoch": 0.13423015919638137, "grad_norm": 277209.21875, "learning_rate": 8.657698408036187e-06, "loss": 0.1742, "step": 4570 }, { "epoch": 0.13452387945720495, "grad_norm": 263838.34375, "learning_rate": 8.65476120542795e-06, "loss": 0.1833, "step": 4580 }, { "epoch": 0.13481759971802856, "grad_norm": 182853.671875, "learning_rate": 8.651824002819715e-06, "loss": 0.1706, "step": 4590 }, { "epoch": 0.13511131997885215, "grad_norm": 336188.90625, "learning_rate": 8.64888680021148e-06, "loss": 0.1801, "step": 4600 }, { "epoch": 0.13540504023967573, "grad_norm": 176167.125, "learning_rate": 8.645949597603244e-06, "loss": 0.184, "step": 4610 }, { "epoch": 0.13569876050049934, "grad_norm": 231575.75, "learning_rate": 8.643012394995007e-06, "loss": 0.1745, "step": 4620 }, { "epoch": 0.13599248076132292, "grad_norm": 253705.140625, "learning_rate": 8.640075192386772e-06, "loss": 0.1455, "step": 4630 }, { "epoch": 0.1362862010221465, "grad_norm": 254489.59375, "learning_rate": 8.637137989778536e-06, "loss": 0.1872, "step": 4640 }, { "epoch": 0.1365799212829701, "grad_norm": 281524.9375, "learning_rate": 8.6342007871703e-06, "loss": 0.1802, "step": 4650 }, { "epoch": 0.1368736415437937, "grad_norm": 350791.0625, "learning_rate": 8.631263584562064e-06, "loss": 0.1673, "step": 4660 }, { "epoch": 0.13716736180461728, "grad_norm": 319458.4375, "learning_rate": 8.628326381953828e-06, "loss": 0.1697, "step": 4670 }, { "epoch": 0.1374610820654409, "grad_norm": 424428.5, "learning_rate": 8.625389179345593e-06, "loss": 0.1881, "step": 4680 }, { "epoch": 0.13775480232626447, "grad_norm": 239419.21875, "learning_rate": 8.622451976737356e-06, "loss": 0.1673, "step": 4690 }, { "epoch": 0.13804852258708805, "grad_norm": 303599.09375, "learning_rate": 8.619514774129119e-06, "loss": 0.17, "step": 4700 }, { "epoch": 0.13834224284791166, "grad_norm": 304951.46875, "learning_rate": 8.616577571520884e-06, "loss": 0.1653, "step": 4710 }, { "epoch": 0.13863596310873524, "grad_norm": 223725.671875, "learning_rate": 8.613640368912648e-06, "loss": 0.1835, "step": 4720 }, { "epoch": 0.13892968336955883, "grad_norm": 338063.09375, "learning_rate": 8.610703166304413e-06, "loss": 0.1824, "step": 4730 }, { "epoch": 0.13922340363038244, "grad_norm": 225460.765625, "learning_rate": 8.607765963696176e-06, "loss": 0.1724, "step": 4740 }, { "epoch": 0.13951712389120602, "grad_norm": 310061.0625, "learning_rate": 8.60482876108794e-06, "loss": 0.1858, "step": 4750 }, { "epoch": 0.1398108441520296, "grad_norm": 321064.40625, "learning_rate": 8.601891558479705e-06, "loss": 0.1895, "step": 4760 }, { "epoch": 0.1401045644128532, "grad_norm": 288338.71875, "learning_rate": 8.598954355871468e-06, "loss": 0.1742, "step": 4770 }, { "epoch": 0.1403982846736768, "grad_norm": 440919.03125, "learning_rate": 8.596017153263233e-06, "loss": 0.1828, "step": 4780 }, { "epoch": 0.14069200493450038, "grad_norm": 324852.46875, "learning_rate": 8.593079950654997e-06, "loss": 0.1634, "step": 4790 }, { "epoch": 0.14098572519532399, "grad_norm": 353120.21875, "learning_rate": 8.590142748046762e-06, "loss": 0.175, "step": 4800 }, { "epoch": 0.14127944545614757, "grad_norm": 241842.796875, "learning_rate": 8.587205545438525e-06, "loss": 0.1852, "step": 4810 }, { "epoch": 0.14157316571697115, "grad_norm": 258627.1875, "learning_rate": 8.58426834283029e-06, "loss": 0.1768, "step": 4820 }, { "epoch": 0.14186688597779476, "grad_norm": 247782.40625, "learning_rate": 8.581331140222054e-06, "loss": 0.1749, "step": 4830 }, { "epoch": 0.14216060623861834, "grad_norm": 170826.4375, "learning_rate": 8.578393937613817e-06, "loss": 0.1686, "step": 4840 }, { "epoch": 0.14245432649944192, "grad_norm": 277335.6875, "learning_rate": 8.575456735005582e-06, "loss": 0.1817, "step": 4850 }, { "epoch": 0.14274804676026553, "grad_norm": 222167.59375, "learning_rate": 8.572519532397345e-06, "loss": 0.1795, "step": 4860 }, { "epoch": 0.14304176702108912, "grad_norm": 439584.625, "learning_rate": 8.56958232978911e-06, "loss": 0.1701, "step": 4870 }, { "epoch": 0.1433354872819127, "grad_norm": 274821.8125, "learning_rate": 8.566645127180874e-06, "loss": 0.1818, "step": 4880 }, { "epoch": 0.1436292075427363, "grad_norm": 265952.96875, "learning_rate": 8.563707924572637e-06, "loss": 0.188, "step": 4890 }, { "epoch": 0.1439229278035599, "grad_norm": 157077.5625, "learning_rate": 8.560770721964401e-06, "loss": 0.1782, "step": 4900 }, { "epoch": 0.14421664806438347, "grad_norm": 404009.5, "learning_rate": 8.557833519356166e-06, "loss": 0.1692, "step": 4910 }, { "epoch": 0.14451036832520708, "grad_norm": 166335.71875, "learning_rate": 8.55489631674793e-06, "loss": 0.1789, "step": 4920 }, { "epoch": 0.14480408858603067, "grad_norm": 313424.125, "learning_rate": 8.551959114139694e-06, "loss": 0.1798, "step": 4930 }, { "epoch": 0.14509780884685425, "grad_norm": 258774.0625, "learning_rate": 8.549021911531458e-06, "loss": 0.1733, "step": 4940 }, { "epoch": 0.14539152910767786, "grad_norm": 425320.71875, "learning_rate": 8.546084708923223e-06, "loss": 0.1733, "step": 4950 }, { "epoch": 0.14568524936850144, "grad_norm": 377357.15625, "learning_rate": 8.543147506314986e-06, "loss": 0.1735, "step": 4960 }, { "epoch": 0.14597896962932502, "grad_norm": 158872.546875, "learning_rate": 8.54021030370675e-06, "loss": 0.1684, "step": 4970 }, { "epoch": 0.14627268989014863, "grad_norm": 521474.34375, "learning_rate": 8.537273101098515e-06, "loss": 0.1719, "step": 4980 }, { "epoch": 0.14656641015097222, "grad_norm": 209138.234375, "learning_rate": 8.53433589849028e-06, "loss": 0.1743, "step": 4990 }, { "epoch": 0.1468601304117958, "grad_norm": 290113.96875, "learning_rate": 8.531398695882043e-06, "loss": 0.1792, "step": 5000 }, { "epoch": 0.1471538506726194, "grad_norm": 172935.109375, "learning_rate": 8.528461493273806e-06, "loss": 0.1611, "step": 5010 }, { "epoch": 0.147447570933443, "grad_norm": 243816.125, "learning_rate": 8.52552429066557e-06, "loss": 0.1773, "step": 5020 }, { "epoch": 0.14774129119426657, "grad_norm": 179897.828125, "learning_rate": 8.522587088057335e-06, "loss": 0.1726, "step": 5030 }, { "epoch": 0.14803501145509018, "grad_norm": 191067.984375, "learning_rate": 8.5196498854491e-06, "loss": 0.1759, "step": 5040 }, { "epoch": 0.14832873171591376, "grad_norm": 344078.46875, "learning_rate": 8.516712682840862e-06, "loss": 0.1791, "step": 5050 }, { "epoch": 0.14862245197673735, "grad_norm": 461933.6875, "learning_rate": 8.513775480232627e-06, "loss": 0.1729, "step": 5060 }, { "epoch": 0.14891617223756096, "grad_norm": 561841.375, "learning_rate": 8.510838277624392e-06, "loss": 0.1858, "step": 5070 }, { "epoch": 0.14920989249838454, "grad_norm": 243993.515625, "learning_rate": 8.507901075016155e-06, "loss": 0.1784, "step": 5080 }, { "epoch": 0.14950361275920812, "grad_norm": 219626.5625, "learning_rate": 8.50496387240792e-06, "loss": 0.1749, "step": 5090 }, { "epoch": 0.14979733302003173, "grad_norm": 317343.15625, "learning_rate": 8.502026669799684e-06, "loss": 0.1749, "step": 5100 }, { "epoch": 0.1500910532808553, "grad_norm": 298402.25, "learning_rate": 8.499089467191447e-06, "loss": 0.178, "step": 5110 }, { "epoch": 0.1503847735416789, "grad_norm": 217485.125, "learning_rate": 8.496152264583212e-06, "loss": 0.1738, "step": 5120 }, { "epoch": 0.1506784938025025, "grad_norm": 343066.90625, "learning_rate": 8.493215061974976e-06, "loss": 0.1756, "step": 5130 }, { "epoch": 0.1509722140633261, "grad_norm": 233665.609375, "learning_rate": 8.49027785936674e-06, "loss": 0.1871, "step": 5140 }, { "epoch": 0.15126593432414967, "grad_norm": 213038.5625, "learning_rate": 8.487340656758504e-06, "loss": 0.1668, "step": 5150 }, { "epoch": 0.15155965458497328, "grad_norm": 150809.328125, "learning_rate": 8.484403454150268e-06, "loss": 0.1812, "step": 5160 }, { "epoch": 0.15185337484579686, "grad_norm": 212670.984375, "learning_rate": 8.481466251542031e-06, "loss": 0.1743, "step": 5170 }, { "epoch": 0.15214709510662044, "grad_norm": 228446.796875, "learning_rate": 8.478529048933796e-06, "loss": 0.1827, "step": 5180 }, { "epoch": 0.15244081536744405, "grad_norm": 287311.03125, "learning_rate": 8.47559184632556e-06, "loss": 0.1747, "step": 5190 }, { "epoch": 0.15273453562826764, "grad_norm": 159062.25, "learning_rate": 8.472654643717324e-06, "loss": 0.1586, "step": 5200 }, { "epoch": 0.15302825588909122, "grad_norm": 154001.234375, "learning_rate": 8.469717441109088e-06, "loss": 0.1791, "step": 5210 }, { "epoch": 0.15332197614991483, "grad_norm": 367286.9375, "learning_rate": 8.466780238500853e-06, "loss": 0.1539, "step": 5220 }, { "epoch": 0.1536156964107384, "grad_norm": 238886.828125, "learning_rate": 8.463843035892616e-06, "loss": 0.1859, "step": 5230 }, { "epoch": 0.153909416671562, "grad_norm": 187707.28125, "learning_rate": 8.46090583328438e-06, "loss": 0.1688, "step": 5240 }, { "epoch": 0.1542031369323856, "grad_norm": 280014.375, "learning_rate": 8.457968630676145e-06, "loss": 0.1717, "step": 5250 }, { "epoch": 0.15449685719320919, "grad_norm": 205199.1875, "learning_rate": 8.45503142806791e-06, "loss": 0.1653, "step": 5260 }, { "epoch": 0.15479057745403277, "grad_norm": 121053.8359375, "learning_rate": 8.452094225459673e-06, "loss": 0.1773, "step": 5270 }, { "epoch": 0.15508429771485638, "grad_norm": 231811.0625, "learning_rate": 8.449157022851437e-06, "loss": 0.1641, "step": 5280 }, { "epoch": 0.15537801797567996, "grad_norm": 248196.359375, "learning_rate": 8.446219820243202e-06, "loss": 0.1752, "step": 5290 }, { "epoch": 0.15567173823650354, "grad_norm": 327846.15625, "learning_rate": 8.443282617634965e-06, "loss": 0.1901, "step": 5300 }, { "epoch": 0.15596545849732715, "grad_norm": 217942.8125, "learning_rate": 8.44034541502673e-06, "loss": 0.1899, "step": 5310 }, { "epoch": 0.15625917875815074, "grad_norm": 242568.65625, "learning_rate": 8.437408212418492e-06, "loss": 0.1757, "step": 5320 }, { "epoch": 0.15655289901897432, "grad_norm": 259455.890625, "learning_rate": 8.434471009810257e-06, "loss": 0.1807, "step": 5330 }, { "epoch": 0.15684661927979793, "grad_norm": 232523.15625, "learning_rate": 8.431533807202022e-06, "loss": 0.1704, "step": 5340 }, { "epoch": 0.1571403395406215, "grad_norm": 212219.765625, "learning_rate": 8.428596604593785e-06, "loss": 0.1718, "step": 5350 }, { "epoch": 0.1574340598014451, "grad_norm": 298160.21875, "learning_rate": 8.42565940198555e-06, "loss": 0.1615, "step": 5360 }, { "epoch": 0.1577277800622687, "grad_norm": 260451.671875, "learning_rate": 8.422722199377314e-06, "loss": 0.16, "step": 5370 }, { "epoch": 0.15802150032309228, "grad_norm": 280901.9375, "learning_rate": 8.419784996769078e-06, "loss": 0.1819, "step": 5380 }, { "epoch": 0.15831522058391587, "grad_norm": 215819.828125, "learning_rate": 8.416847794160841e-06, "loss": 0.17, "step": 5390 }, { "epoch": 0.15860894084473948, "grad_norm": 360181.96875, "learning_rate": 8.413910591552606e-06, "loss": 0.1745, "step": 5400 }, { "epoch": 0.15890266110556306, "grad_norm": 311350.65625, "learning_rate": 8.41097338894437e-06, "loss": 0.1772, "step": 5410 }, { "epoch": 0.15919638136638664, "grad_norm": 252006.109375, "learning_rate": 8.408036186336134e-06, "loss": 0.17, "step": 5420 }, { "epoch": 0.15949010162721025, "grad_norm": 199344.296875, "learning_rate": 8.405098983727898e-06, "loss": 0.1783, "step": 5430 }, { "epoch": 0.15978382188803383, "grad_norm": 151626.015625, "learning_rate": 8.402161781119663e-06, "loss": 0.1844, "step": 5440 }, { "epoch": 0.16007754214885742, "grad_norm": 241839.3125, "learning_rate": 8.399224578511427e-06, "loss": 0.1679, "step": 5450 }, { "epoch": 0.16037126240968103, "grad_norm": 242973.8125, "learning_rate": 8.39628737590319e-06, "loss": 0.1699, "step": 5460 }, { "epoch": 0.1606649826705046, "grad_norm": 320376.65625, "learning_rate": 8.393350173294953e-06, "loss": 0.1869, "step": 5470 }, { "epoch": 0.1609587029313282, "grad_norm": 188262.375, "learning_rate": 8.390412970686718e-06, "loss": 0.1888, "step": 5480 }, { "epoch": 0.1612524231921518, "grad_norm": 239771.296875, "learning_rate": 8.387475768078483e-06, "loss": 0.1615, "step": 5490 }, { "epoch": 0.16154614345297538, "grad_norm": 299439.0625, "learning_rate": 8.384538565470247e-06, "loss": 0.1882, "step": 5500 }, { "epoch": 0.16183986371379896, "grad_norm": 235521.546875, "learning_rate": 8.38160136286201e-06, "loss": 0.1597, "step": 5510 }, { "epoch": 0.16213358397462257, "grad_norm": 193726.546875, "learning_rate": 8.378664160253775e-06, "loss": 0.1561, "step": 5520 }, { "epoch": 0.16242730423544616, "grad_norm": 298818.75, "learning_rate": 8.37572695764554e-06, "loss": 0.1749, "step": 5530 }, { "epoch": 0.16272102449626974, "grad_norm": 286993.5, "learning_rate": 8.372789755037302e-06, "loss": 0.1721, "step": 5540 }, { "epoch": 0.16301474475709335, "grad_norm": 279809.40625, "learning_rate": 8.369852552429067e-06, "loss": 0.1797, "step": 5550 }, { "epoch": 0.16330846501791693, "grad_norm": 331611.34375, "learning_rate": 8.366915349820832e-06, "loss": 0.1824, "step": 5560 }, { "epoch": 0.16360218527874051, "grad_norm": 449533.5625, "learning_rate": 8.363978147212596e-06, "loss": 0.1684, "step": 5570 }, { "epoch": 0.16389590553956412, "grad_norm": 522813.40625, "learning_rate": 8.36104094460436e-06, "loss": 0.1669, "step": 5580 }, { "epoch": 0.1641896258003877, "grad_norm": 368388.375, "learning_rate": 8.358103741996124e-06, "loss": 0.1817, "step": 5590 }, { "epoch": 0.1644833460612113, "grad_norm": 202295.59375, "learning_rate": 8.355166539387888e-06, "loss": 0.1662, "step": 5600 }, { "epoch": 0.1647770663220349, "grad_norm": 215096.4375, "learning_rate": 8.352229336779651e-06, "loss": 0.1726, "step": 5610 }, { "epoch": 0.16507078658285848, "grad_norm": 238593.65625, "learning_rate": 8.349292134171416e-06, "loss": 0.163, "step": 5620 }, { "epoch": 0.1653645068436821, "grad_norm": 237988.578125, "learning_rate": 8.346354931563179e-06, "loss": 0.1627, "step": 5630 }, { "epoch": 0.16565822710450567, "grad_norm": 159327.328125, "learning_rate": 8.343417728954945e-06, "loss": 0.1913, "step": 5640 }, { "epoch": 0.16595194736532926, "grad_norm": 192907.9375, "learning_rate": 8.340480526346708e-06, "loss": 0.1833, "step": 5650 }, { "epoch": 0.16624566762615287, "grad_norm": 188070.359375, "learning_rate": 8.337543323738471e-06, "loss": 0.1751, "step": 5660 }, { "epoch": 0.16653938788697645, "grad_norm": 281782.125, "learning_rate": 8.334606121130236e-06, "loss": 0.1765, "step": 5670 }, { "epoch": 0.16683310814780003, "grad_norm": 197042.6875, "learning_rate": 8.331668918522e-06, "loss": 0.1762, "step": 5680 }, { "epoch": 0.16712682840862364, "grad_norm": 144524.40625, "learning_rate": 8.328731715913765e-06, "loss": 0.165, "step": 5690 }, { "epoch": 0.16742054866944722, "grad_norm": 235727.921875, "learning_rate": 8.325794513305528e-06, "loss": 0.1486, "step": 5700 }, { "epoch": 0.1677142689302708, "grad_norm": 226698.8125, "learning_rate": 8.322857310697293e-06, "loss": 0.1723, "step": 5710 }, { "epoch": 0.16800798919109441, "grad_norm": 329219.9375, "learning_rate": 8.319920108089057e-06, "loss": 0.1827, "step": 5720 }, { "epoch": 0.168301709451918, "grad_norm": 206883.90625, "learning_rate": 8.31698290548082e-06, "loss": 0.1715, "step": 5730 }, { "epoch": 0.16859542971274158, "grad_norm": 242248.640625, "learning_rate": 8.314045702872585e-06, "loss": 0.1569, "step": 5740 }, { "epoch": 0.1688891499735652, "grad_norm": 187447.984375, "learning_rate": 8.31110850026435e-06, "loss": 0.1746, "step": 5750 }, { "epoch": 0.16918287023438877, "grad_norm": 246797.015625, "learning_rate": 8.308171297656114e-06, "loss": 0.1654, "step": 5760 }, { "epoch": 0.16947659049521235, "grad_norm": 252393.328125, "learning_rate": 8.305234095047877e-06, "loss": 0.1866, "step": 5770 }, { "epoch": 0.16977031075603596, "grad_norm": 153204.25, "learning_rate": 8.30229689243964e-06, "loss": 0.1698, "step": 5780 }, { "epoch": 0.17006403101685955, "grad_norm": 348904.25, "learning_rate": 8.299359689831405e-06, "loss": 0.1655, "step": 5790 }, { "epoch": 0.17035775127768313, "grad_norm": 237475.40625, "learning_rate": 8.29642248722317e-06, "loss": 0.1638, "step": 5800 }, { "epoch": 0.17065147153850674, "grad_norm": 226496.875, "learning_rate": 8.293485284614934e-06, "loss": 0.1549, "step": 5810 }, { "epoch": 0.17094519179933032, "grad_norm": 176424.21875, "learning_rate": 8.290548082006697e-06, "loss": 0.159, "step": 5820 }, { "epoch": 0.1712389120601539, "grad_norm": 459937.78125, "learning_rate": 8.287610879398461e-06, "loss": 0.1738, "step": 5830 }, { "epoch": 0.1715326323209775, "grad_norm": 267755.4375, "learning_rate": 8.284673676790226e-06, "loss": 0.1639, "step": 5840 }, { "epoch": 0.1718263525818011, "grad_norm": 304016.78125, "learning_rate": 8.281736474181989e-06, "loss": 0.1713, "step": 5850 }, { "epoch": 0.17212007284262468, "grad_norm": 194975.484375, "learning_rate": 8.278799271573754e-06, "loss": 0.1624, "step": 5860 }, { "epoch": 0.1724137931034483, "grad_norm": 262520.375, "learning_rate": 8.275862068965518e-06, "loss": 0.1601, "step": 5870 }, { "epoch": 0.17270751336427187, "grad_norm": 317971.46875, "learning_rate": 8.272924866357283e-06, "loss": 0.1682, "step": 5880 }, { "epoch": 0.17300123362509545, "grad_norm": 157381.25, "learning_rate": 8.269987663749046e-06, "loss": 0.1675, "step": 5890 }, { "epoch": 0.17329495388591906, "grad_norm": 282532.0, "learning_rate": 8.26705046114081e-06, "loss": 0.1655, "step": 5900 }, { "epoch": 0.17358867414674264, "grad_norm": 392544.40625, "learning_rate": 8.264113258532575e-06, "loss": 0.1691, "step": 5910 }, { "epoch": 0.17388239440756623, "grad_norm": 214476.046875, "learning_rate": 8.261176055924338e-06, "loss": 0.1802, "step": 5920 }, { "epoch": 0.17417611466838984, "grad_norm": 236965.65625, "learning_rate": 8.258238853316103e-06, "loss": 0.1747, "step": 5930 }, { "epoch": 0.17446983492921342, "grad_norm": 380262.78125, "learning_rate": 8.255301650707866e-06, "loss": 0.1651, "step": 5940 }, { "epoch": 0.174763555190037, "grad_norm": 232760.265625, "learning_rate": 8.25236444809963e-06, "loss": 0.1555, "step": 5950 }, { "epoch": 0.1750572754508606, "grad_norm": 183665.25, "learning_rate": 8.249427245491395e-06, "loss": 0.1572, "step": 5960 }, { "epoch": 0.1753509957116842, "grad_norm": 224105.03125, "learning_rate": 8.246490042883158e-06, "loss": 0.1704, "step": 5970 }, { "epoch": 0.17564471597250778, "grad_norm": 194720.140625, "learning_rate": 8.243552840274923e-06, "loss": 0.1715, "step": 5980 }, { "epoch": 0.17593843623333139, "grad_norm": 377128.78125, "learning_rate": 8.240615637666687e-06, "loss": 0.1767, "step": 5990 }, { "epoch": 0.17623215649415497, "grad_norm": 201354.90625, "learning_rate": 8.237678435058452e-06, "loss": 0.1549, "step": 6000 }, { "epoch": 0.17652587675497855, "grad_norm": 178526.03125, "learning_rate": 8.234741232450215e-06, "loss": 0.1727, "step": 6010 }, { "epoch": 0.17681959701580216, "grad_norm": 148855.578125, "learning_rate": 8.23180402984198e-06, "loss": 0.1725, "step": 6020 }, { "epoch": 0.17711331727662574, "grad_norm": 234181.15625, "learning_rate": 8.228866827233744e-06, "loss": 0.1794, "step": 6030 }, { "epoch": 0.17740703753744932, "grad_norm": 193459.546875, "learning_rate": 8.225929624625507e-06, "loss": 0.1638, "step": 6040 }, { "epoch": 0.17770075779827293, "grad_norm": 355548.5625, "learning_rate": 8.222992422017272e-06, "loss": 0.1668, "step": 6050 }, { "epoch": 0.17799447805909652, "grad_norm": 249546.390625, "learning_rate": 8.220055219409036e-06, "loss": 0.1762, "step": 6060 }, { "epoch": 0.1782881983199201, "grad_norm": 374010.09375, "learning_rate": 8.217118016800799e-06, "loss": 0.1809, "step": 6070 }, { "epoch": 0.1785819185807437, "grad_norm": 181436.75, "learning_rate": 8.214180814192564e-06, "loss": 0.1669, "step": 6080 }, { "epoch": 0.1788756388415673, "grad_norm": 317086.75, "learning_rate": 8.211243611584327e-06, "loss": 0.1733, "step": 6090 }, { "epoch": 0.17916935910239087, "grad_norm": 165482.40625, "learning_rate": 8.208306408976093e-06, "loss": 0.1582, "step": 6100 }, { "epoch": 0.17946307936321448, "grad_norm": 234284.421875, "learning_rate": 8.205369206367856e-06, "loss": 0.1626, "step": 6110 }, { "epoch": 0.17975679962403807, "grad_norm": 366261.5, "learning_rate": 8.202432003759619e-06, "loss": 0.1793, "step": 6120 }, { "epoch": 0.18005051988486165, "grad_norm": 180363.125, "learning_rate": 8.199494801151384e-06, "loss": 0.1696, "step": 6130 }, { "epoch": 0.18034424014568526, "grad_norm": 87833.4765625, "learning_rate": 8.196557598543148e-06, "loss": 0.1555, "step": 6140 }, { "epoch": 0.18063796040650884, "grad_norm": 212245.234375, "learning_rate": 8.193620395934913e-06, "loss": 0.1714, "step": 6150 }, { "epoch": 0.18093168066733242, "grad_norm": 379046.71875, "learning_rate": 8.190683193326676e-06, "loss": 0.1692, "step": 6160 }, { "epoch": 0.18122540092815603, "grad_norm": 458607.71875, "learning_rate": 8.18774599071844e-06, "loss": 0.1768, "step": 6170 }, { "epoch": 0.18151912118897962, "grad_norm": 209235.90625, "learning_rate": 8.184808788110205e-06, "loss": 0.1696, "step": 6180 }, { "epoch": 0.1818128414498032, "grad_norm": 186845.0, "learning_rate": 8.181871585501968e-06, "loss": 0.1795, "step": 6190 }, { "epoch": 0.1821065617106268, "grad_norm": 198659.296875, "learning_rate": 8.178934382893733e-06, "loss": 0.1613, "step": 6200 }, { "epoch": 0.1824002819714504, "grad_norm": 151656.5, "learning_rate": 8.175997180285497e-06, "loss": 0.1793, "step": 6210 }, { "epoch": 0.18269400223227397, "grad_norm": 262575.65625, "learning_rate": 8.173059977677262e-06, "loss": 0.1783, "step": 6220 }, { "epoch": 0.18298772249309758, "grad_norm": 150606.265625, "learning_rate": 8.170122775069025e-06, "loss": 0.1482, "step": 6230 }, { "epoch": 0.18328144275392116, "grad_norm": 312778.6875, "learning_rate": 8.167185572460788e-06, "loss": 0.1633, "step": 6240 }, { "epoch": 0.18357516301474475, "grad_norm": 280536.375, "learning_rate": 8.164248369852554e-06, "loss": 0.1741, "step": 6250 }, { "epoch": 0.18386888327556836, "grad_norm": 197858.46875, "learning_rate": 8.161311167244317e-06, "loss": 0.1644, "step": 6260 }, { "epoch": 0.18416260353639194, "grad_norm": 249740.671875, "learning_rate": 8.158373964636082e-06, "loss": 0.1736, "step": 6270 }, { "epoch": 0.18445632379721552, "grad_norm": 254694.5625, "learning_rate": 8.155436762027845e-06, "loss": 0.1673, "step": 6280 }, { "epoch": 0.18475004405803913, "grad_norm": 189882.453125, "learning_rate": 8.15249955941961e-06, "loss": 0.1656, "step": 6290 }, { "epoch": 0.1850437643188627, "grad_norm": 106174.0703125, "learning_rate": 8.149562356811374e-06, "loss": 0.1717, "step": 6300 }, { "epoch": 0.1853374845796863, "grad_norm": 260598.34375, "learning_rate": 8.146625154203137e-06, "loss": 0.1843, "step": 6310 }, { "epoch": 0.1856312048405099, "grad_norm": 387513.28125, "learning_rate": 8.143687951594901e-06, "loss": 0.172, "step": 6320 }, { "epoch": 0.1859249251013335, "grad_norm": 178582.890625, "learning_rate": 8.140750748986666e-06, "loss": 0.1856, "step": 6330 }, { "epoch": 0.18621864536215707, "grad_norm": 230622.4375, "learning_rate": 8.13781354637843e-06, "loss": 0.1668, "step": 6340 }, { "epoch": 0.18651236562298068, "grad_norm": 207139.046875, "learning_rate": 8.134876343770194e-06, "loss": 0.1603, "step": 6350 }, { "epoch": 0.18680608588380426, "grad_norm": 244549.28125, "learning_rate": 8.131939141161958e-06, "loss": 0.1643, "step": 6360 }, { "epoch": 0.18709980614462784, "grad_norm": 180505.515625, "learning_rate": 8.129001938553723e-06, "loss": 0.1699, "step": 6370 }, { "epoch": 0.18739352640545145, "grad_norm": 269798.25, "learning_rate": 8.126064735945486e-06, "loss": 0.1759, "step": 6380 }, { "epoch": 0.18768724666627504, "grad_norm": 357197.03125, "learning_rate": 8.12312753333725e-06, "loss": 0.1672, "step": 6390 }, { "epoch": 0.18798096692709862, "grad_norm": 284473.125, "learning_rate": 8.120190330729013e-06, "loss": 0.1684, "step": 6400 }, { "epoch": 0.18827468718792223, "grad_norm": 171043.859375, "learning_rate": 8.11725312812078e-06, "loss": 0.1737, "step": 6410 }, { "epoch": 0.1885684074487458, "grad_norm": 271189.375, "learning_rate": 8.114315925512543e-06, "loss": 0.1558, "step": 6420 }, { "epoch": 0.1888621277095694, "grad_norm": 255981.984375, "learning_rate": 8.111378722904306e-06, "loss": 0.1775, "step": 6430 }, { "epoch": 0.189155847970393, "grad_norm": 181842.96875, "learning_rate": 8.10844152029607e-06, "loss": 0.1579, "step": 6440 }, { "epoch": 0.1894495682312166, "grad_norm": 204545.65625, "learning_rate": 8.105504317687835e-06, "loss": 0.1789, "step": 6450 }, { "epoch": 0.18974328849204017, "grad_norm": 251700.84375, "learning_rate": 8.1025671150796e-06, "loss": 0.1751, "step": 6460 }, { "epoch": 0.19003700875286378, "grad_norm": 304535.28125, "learning_rate": 8.099629912471362e-06, "loss": 0.1535, "step": 6470 }, { "epoch": 0.19033072901368736, "grad_norm": 299912.3125, "learning_rate": 8.096692709863127e-06, "loss": 0.1605, "step": 6480 }, { "epoch": 0.19062444927451094, "grad_norm": 300809.8125, "learning_rate": 8.093755507254892e-06, "loss": 0.1751, "step": 6490 }, { "epoch": 0.19091816953533455, "grad_norm": 225824.96875, "learning_rate": 8.090818304646655e-06, "loss": 0.1687, "step": 6500 }, { "epoch": 0.19121188979615814, "grad_norm": 205912.609375, "learning_rate": 8.08788110203842e-06, "loss": 0.1633, "step": 6510 }, { "epoch": 0.19150561005698172, "grad_norm": 140823.3125, "learning_rate": 8.084943899430184e-06, "loss": 0.1681, "step": 6520 }, { "epoch": 0.19179933031780533, "grad_norm": 288549.09375, "learning_rate": 8.082006696821949e-06, "loss": 0.1807, "step": 6530 }, { "epoch": 0.1920930505786289, "grad_norm": 272402.90625, "learning_rate": 8.079069494213711e-06, "loss": 0.1769, "step": 6540 }, { "epoch": 0.1923867708394525, "grad_norm": 228751.921875, "learning_rate": 8.076132291605474e-06, "loss": 0.1709, "step": 6550 }, { "epoch": 0.1926804911002761, "grad_norm": 223772.3125, "learning_rate": 8.07319508899724e-06, "loss": 0.1696, "step": 6560 }, { "epoch": 0.19297421136109968, "grad_norm": 185398.296875, "learning_rate": 8.070257886389004e-06, "loss": 0.171, "step": 6570 }, { "epoch": 0.19326793162192327, "grad_norm": 190352.5, "learning_rate": 8.067320683780768e-06, "loss": 0.1653, "step": 6580 }, { "epoch": 0.19356165188274688, "grad_norm": 241042.734375, "learning_rate": 8.064383481172531e-06, "loss": 0.1658, "step": 6590 }, { "epoch": 0.19385537214357046, "grad_norm": 213641.4375, "learning_rate": 8.061446278564296e-06, "loss": 0.1561, "step": 6600 }, { "epoch": 0.19414909240439404, "grad_norm": 188340.09375, "learning_rate": 8.05850907595606e-06, "loss": 0.1455, "step": 6610 }, { "epoch": 0.19444281266521765, "grad_norm": 139350.9375, "learning_rate": 8.055571873347823e-06, "loss": 0.1641, "step": 6620 }, { "epoch": 0.19473653292604123, "grad_norm": 254549.484375, "learning_rate": 8.052634670739588e-06, "loss": 0.1628, "step": 6630 }, { "epoch": 0.19503025318686484, "grad_norm": 224238.859375, "learning_rate": 8.049697468131353e-06, "loss": 0.1628, "step": 6640 }, { "epoch": 0.19532397344768843, "grad_norm": 172856.53125, "learning_rate": 8.046760265523117e-06, "loss": 0.1517, "step": 6650 }, { "epoch": 0.195617693708512, "grad_norm": 224692.0625, "learning_rate": 8.04382306291488e-06, "loss": 0.1729, "step": 6660 }, { "epoch": 0.19591141396933562, "grad_norm": 192777.734375, "learning_rate": 8.040885860306645e-06, "loss": 0.1701, "step": 6670 }, { "epoch": 0.1962051342301592, "grad_norm": 200746.234375, "learning_rate": 8.03794865769841e-06, "loss": 0.1635, "step": 6680 }, { "epoch": 0.19649885449098278, "grad_norm": 146534.90625, "learning_rate": 8.035011455090172e-06, "loss": 0.1651, "step": 6690 }, { "epoch": 0.1967925747518064, "grad_norm": 93319.9375, "learning_rate": 8.032074252481937e-06, "loss": 0.1718, "step": 6700 }, { "epoch": 0.19708629501262998, "grad_norm": 258645.28125, "learning_rate": 8.029137049873702e-06, "loss": 0.1776, "step": 6710 }, { "epoch": 0.19738001527345356, "grad_norm": 313136.59375, "learning_rate": 8.026199847265465e-06, "loss": 0.1657, "step": 6720 }, { "epoch": 0.19767373553427717, "grad_norm": 174585.484375, "learning_rate": 8.02326264465723e-06, "loss": 0.1554, "step": 6730 }, { "epoch": 0.19796745579510075, "grad_norm": 297393.90625, "learning_rate": 8.020325442048992e-06, "loss": 0.1654, "step": 6740 }, { "epoch": 0.19826117605592433, "grad_norm": 226694.796875, "learning_rate": 8.017388239440757e-06, "loss": 0.1612, "step": 6750 }, { "epoch": 0.19855489631674794, "grad_norm": 227624.484375, "learning_rate": 8.014451036832522e-06, "loss": 0.1787, "step": 6760 }, { "epoch": 0.19884861657757152, "grad_norm": 288023.34375, "learning_rate": 8.011513834224286e-06, "loss": 0.1614, "step": 6770 }, { "epoch": 0.1991423368383951, "grad_norm": 236949.921875, "learning_rate": 8.008576631616049e-06, "loss": 0.184, "step": 6780 }, { "epoch": 0.19943605709921872, "grad_norm": 235775.609375, "learning_rate": 8.005639429007814e-06, "loss": 0.173, "step": 6790 }, { "epoch": 0.1997297773600423, "grad_norm": 225928.734375, "learning_rate": 8.002702226399578e-06, "loss": 0.1672, "step": 6800 }, { "epoch": 0.20002349762086588, "grad_norm": 175582.609375, "learning_rate": 7.999765023791341e-06, "loss": 0.1633, "step": 6810 }, { "epoch": 0.2003172178816895, "grad_norm": 369828.34375, "learning_rate": 7.996827821183106e-06, "loss": 0.1699, "step": 6820 }, { "epoch": 0.20061093814251307, "grad_norm": 140697.46875, "learning_rate": 7.99389061857487e-06, "loss": 0.1555, "step": 6830 }, { "epoch": 0.20090465840333666, "grad_norm": 210711.640625, "learning_rate": 7.990953415966633e-06, "loss": 0.1574, "step": 6840 }, { "epoch": 0.20119837866416027, "grad_norm": 304239.375, "learning_rate": 7.988016213358398e-06, "loss": 0.1679, "step": 6850 }, { "epoch": 0.20149209892498385, "grad_norm": 160248.265625, "learning_rate": 7.985079010750161e-06, "loss": 0.177, "step": 6860 }, { "epoch": 0.20178581918580743, "grad_norm": 151687.65625, "learning_rate": 7.982141808141927e-06, "loss": 0.1692, "step": 6870 }, { "epoch": 0.20207953944663104, "grad_norm": 203173.640625, "learning_rate": 7.97920460553369e-06, "loss": 0.1678, "step": 6880 }, { "epoch": 0.20237325970745462, "grad_norm": 157427.984375, "learning_rate": 7.976267402925455e-06, "loss": 0.1643, "step": 6890 }, { "epoch": 0.2026669799682782, "grad_norm": 404341.59375, "learning_rate": 7.973330200317218e-06, "loss": 0.1527, "step": 6900 }, { "epoch": 0.20296070022910181, "grad_norm": 370521.90625, "learning_rate": 7.970392997708983e-06, "loss": 0.164, "step": 6910 }, { "epoch": 0.2032544204899254, "grad_norm": 285121.5625, "learning_rate": 7.967455795100747e-06, "loss": 0.1746, "step": 6920 }, { "epoch": 0.20354814075074898, "grad_norm": 274783.0, "learning_rate": 7.96451859249251e-06, "loss": 0.1801, "step": 6930 }, { "epoch": 0.2038418610115726, "grad_norm": 334753.4375, "learning_rate": 7.961581389884275e-06, "loss": 0.1603, "step": 6940 }, { "epoch": 0.20413558127239617, "grad_norm": 250909.15625, "learning_rate": 7.95864418727604e-06, "loss": 0.1631, "step": 6950 }, { "epoch": 0.20442930153321975, "grad_norm": 394711.90625, "learning_rate": 7.955706984667802e-06, "loss": 0.1715, "step": 6960 }, { "epoch": 0.20472302179404336, "grad_norm": 212206.453125, "learning_rate": 7.952769782059567e-06, "loss": 0.1705, "step": 6970 }, { "epoch": 0.20501674205486695, "grad_norm": 149834.484375, "learning_rate": 7.949832579451332e-06, "loss": 0.1651, "step": 6980 }, { "epoch": 0.20531046231569053, "grad_norm": 237112.3125, "learning_rate": 7.946895376843096e-06, "loss": 0.18, "step": 6990 }, { "epoch": 0.20560418257651414, "grad_norm": 258142.84375, "learning_rate": 7.943958174234859e-06, "loss": 0.1595, "step": 7000 }, { "epoch": 0.20589790283733772, "grad_norm": 214114.671875, "learning_rate": 7.941020971626624e-06, "loss": 0.1722, "step": 7010 }, { "epoch": 0.2061916230981613, "grad_norm": 235687.421875, "learning_rate": 7.938083769018388e-06, "loss": 0.1635, "step": 7020 }, { "epoch": 0.2064853433589849, "grad_norm": 318844.96875, "learning_rate": 7.935146566410151e-06, "loss": 0.1679, "step": 7030 }, { "epoch": 0.2067790636198085, "grad_norm": 171683.515625, "learning_rate": 7.932209363801916e-06, "loss": 0.1643, "step": 7040 }, { "epoch": 0.20707278388063208, "grad_norm": 315864.21875, "learning_rate": 7.929272161193679e-06, "loss": 0.1695, "step": 7050 }, { "epoch": 0.2073665041414557, "grad_norm": 168165.515625, "learning_rate": 7.926334958585444e-06, "loss": 0.1576, "step": 7060 }, { "epoch": 0.20766022440227927, "grad_norm": 217831.203125, "learning_rate": 7.923397755977208e-06, "loss": 0.1762, "step": 7070 }, { "epoch": 0.20795394466310285, "grad_norm": 245565.140625, "learning_rate": 7.920460553368971e-06, "loss": 0.1621, "step": 7080 }, { "epoch": 0.20824766492392646, "grad_norm": 219497.84375, "learning_rate": 7.917523350760736e-06, "loss": 0.1712, "step": 7090 }, { "epoch": 0.20854138518475004, "grad_norm": 167547.203125, "learning_rate": 7.9145861481525e-06, "loss": 0.1357, "step": 7100 }, { "epoch": 0.20883510544557363, "grad_norm": 254226.46875, "learning_rate": 7.911648945544265e-06, "loss": 0.1632, "step": 7110 }, { "epoch": 0.20912882570639724, "grad_norm": 206875.984375, "learning_rate": 7.908711742936028e-06, "loss": 0.1743, "step": 7120 }, { "epoch": 0.20942254596722082, "grad_norm": 306533.375, "learning_rate": 7.905774540327793e-06, "loss": 0.1433, "step": 7130 }, { "epoch": 0.2097162662280444, "grad_norm": 142684.484375, "learning_rate": 7.902837337719557e-06, "loss": 0.1735, "step": 7140 }, { "epoch": 0.210009986488868, "grad_norm": 179839.9375, "learning_rate": 7.89990013511132e-06, "loss": 0.1754, "step": 7150 }, { "epoch": 0.2103037067496916, "grad_norm": 348931.15625, "learning_rate": 7.896962932503085e-06, "loss": 0.1658, "step": 7160 }, { "epoch": 0.21059742701051518, "grad_norm": 172417.9375, "learning_rate": 7.89402572989485e-06, "loss": 0.1718, "step": 7170 }, { "epoch": 0.21089114727133879, "grad_norm": 217528.65625, "learning_rate": 7.891088527286614e-06, "loss": 0.1728, "step": 7180 }, { "epoch": 0.21118486753216237, "grad_norm": 225060.984375, "learning_rate": 7.888151324678377e-06, "loss": 0.1576, "step": 7190 }, { "epoch": 0.21147858779298595, "grad_norm": 187653.78125, "learning_rate": 7.88521412207014e-06, "loss": 0.1627, "step": 7200 }, { "epoch": 0.21177230805380956, "grad_norm": 135145.359375, "learning_rate": 7.882276919461905e-06, "loss": 0.1612, "step": 7210 }, { "epoch": 0.21206602831463314, "grad_norm": 223723.59375, "learning_rate": 7.87933971685367e-06, "loss": 0.1481, "step": 7220 }, { "epoch": 0.21235974857545672, "grad_norm": 391489.34375, "learning_rate": 7.876402514245434e-06, "loss": 0.1646, "step": 7230 }, { "epoch": 0.21265346883628033, "grad_norm": 192561.125, "learning_rate": 7.873465311637197e-06, "loss": 0.1616, "step": 7240 }, { "epoch": 0.21294718909710392, "grad_norm": 299853.125, "learning_rate": 7.870528109028961e-06, "loss": 0.1715, "step": 7250 }, { "epoch": 0.2132409093579275, "grad_norm": 229320.453125, "learning_rate": 7.867590906420726e-06, "loss": 0.1677, "step": 7260 }, { "epoch": 0.2135346296187511, "grad_norm": 259435.765625, "learning_rate": 7.864653703812489e-06, "loss": 0.1685, "step": 7270 }, { "epoch": 0.2138283498795747, "grad_norm": 349305.46875, "learning_rate": 7.861716501204254e-06, "loss": 0.1654, "step": 7280 }, { "epoch": 0.21412207014039827, "grad_norm": 163699.3125, "learning_rate": 7.858779298596018e-06, "loss": 0.1607, "step": 7290 }, { "epoch": 0.21441579040122188, "grad_norm": 274903.0625, "learning_rate": 7.855842095987783e-06, "loss": 0.1742, "step": 7300 }, { "epoch": 0.21470951066204547, "grad_norm": 259789.1875, "learning_rate": 7.852904893379546e-06, "loss": 0.1614, "step": 7310 }, { "epoch": 0.21500323092286905, "grad_norm": 175081.140625, "learning_rate": 7.84996769077131e-06, "loss": 0.1582, "step": 7320 }, { "epoch": 0.21529695118369266, "grad_norm": 327636.4375, "learning_rate": 7.847030488163075e-06, "loss": 0.1651, "step": 7330 }, { "epoch": 0.21559067144451624, "grad_norm": 500474.0, "learning_rate": 7.844093285554838e-06, "loss": 0.1723, "step": 7340 }, { "epoch": 0.21588439170533982, "grad_norm": 205526.015625, "learning_rate": 7.841156082946603e-06, "loss": 0.159, "step": 7350 }, { "epoch": 0.21617811196616343, "grad_norm": 223578.484375, "learning_rate": 7.838218880338366e-06, "loss": 0.1575, "step": 7360 }, { "epoch": 0.21647183222698702, "grad_norm": 219026.078125, "learning_rate": 7.83528167773013e-06, "loss": 0.1632, "step": 7370 }, { "epoch": 0.2167655524878106, "grad_norm": 211892.15625, "learning_rate": 7.832344475121895e-06, "loss": 0.1447, "step": 7380 }, { "epoch": 0.2170592727486342, "grad_norm": 200613.296875, "learning_rate": 7.829407272513658e-06, "loss": 0.1612, "step": 7390 }, { "epoch": 0.2173529930094578, "grad_norm": 201760.265625, "learning_rate": 7.826470069905422e-06, "loss": 0.168, "step": 7400 }, { "epoch": 0.21764671327028137, "grad_norm": 217315.5625, "learning_rate": 7.823532867297187e-06, "loss": 0.1698, "step": 7410 }, { "epoch": 0.21794043353110498, "grad_norm": 279247.25, "learning_rate": 7.820595664688952e-06, "loss": 0.1662, "step": 7420 }, { "epoch": 0.21823415379192856, "grad_norm": 211801.71875, "learning_rate": 7.817658462080715e-06, "loss": 0.1722, "step": 7430 }, { "epoch": 0.21852787405275215, "grad_norm": 176563.15625, "learning_rate": 7.81472125947248e-06, "loss": 0.1718, "step": 7440 }, { "epoch": 0.21882159431357576, "grad_norm": 332069.21875, "learning_rate": 7.811784056864244e-06, "loss": 0.1612, "step": 7450 }, { "epoch": 0.21911531457439934, "grad_norm": 157834.828125, "learning_rate": 7.808846854256007e-06, "loss": 0.151, "step": 7460 }, { "epoch": 0.21940903483522292, "grad_norm": 131188.65625, "learning_rate": 7.805909651647771e-06, "loss": 0.1677, "step": 7470 }, { "epoch": 0.21970275509604653, "grad_norm": 178877.8125, "learning_rate": 7.802972449039536e-06, "loss": 0.1523, "step": 7480 }, { "epoch": 0.2199964753568701, "grad_norm": 238951.203125, "learning_rate": 7.800035246431299e-06, "loss": 0.1696, "step": 7490 }, { "epoch": 0.2202901956176937, "grad_norm": 165792.34375, "learning_rate": 7.797098043823064e-06, "loss": 0.1819, "step": 7500 }, { "epoch": 0.2205839158785173, "grad_norm": 260235.5, "learning_rate": 7.794160841214827e-06, "loss": 0.1533, "step": 7510 }, { "epoch": 0.2208776361393409, "grad_norm": 236633.828125, "learning_rate": 7.791223638606591e-06, "loss": 0.1662, "step": 7520 }, { "epoch": 0.22117135640016447, "grad_norm": 173101.484375, "learning_rate": 7.788286435998356e-06, "loss": 0.164, "step": 7530 }, { "epoch": 0.22146507666098808, "grad_norm": 183752.609375, "learning_rate": 7.78534923339012e-06, "loss": 0.1602, "step": 7540 }, { "epoch": 0.22175879692181166, "grad_norm": 180423.15625, "learning_rate": 7.782412030781883e-06, "loss": 0.1658, "step": 7550 }, { "epoch": 0.22205251718263525, "grad_norm": 235024.71875, "learning_rate": 7.779474828173648e-06, "loss": 0.1506, "step": 7560 }, { "epoch": 0.22234623744345886, "grad_norm": 382197.25, "learning_rate": 7.776537625565413e-06, "loss": 0.1791, "step": 7570 }, { "epoch": 0.22263995770428244, "grad_norm": 203324.125, "learning_rate": 7.773600422957176e-06, "loss": 0.1593, "step": 7580 }, { "epoch": 0.22293367796510602, "grad_norm": 213744.828125, "learning_rate": 7.77066322034894e-06, "loss": 0.1631, "step": 7590 }, { "epoch": 0.22322739822592963, "grad_norm": 238311.328125, "learning_rate": 7.767726017740705e-06, "loss": 0.1504, "step": 7600 }, { "epoch": 0.2235211184867532, "grad_norm": 232129.75, "learning_rate": 7.764788815132468e-06, "loss": 0.1653, "step": 7610 }, { "epoch": 0.2238148387475768, "grad_norm": 448897.71875, "learning_rate": 7.761851612524232e-06, "loss": 0.1692, "step": 7620 }, { "epoch": 0.2241085590084004, "grad_norm": 180915.734375, "learning_rate": 7.758914409915997e-06, "loss": 0.1469, "step": 7630 }, { "epoch": 0.224402279269224, "grad_norm": 241248.828125, "learning_rate": 7.755977207307762e-06, "loss": 0.1713, "step": 7640 }, { "epoch": 0.2246959995300476, "grad_norm": 201821.0625, "learning_rate": 7.753040004699525e-06, "loss": 0.1681, "step": 7650 }, { "epoch": 0.22498971979087118, "grad_norm": 173258.9375, "learning_rate": 7.75010280209129e-06, "loss": 0.158, "step": 7660 }, { "epoch": 0.22528344005169476, "grad_norm": 349543.375, "learning_rate": 7.747165599483052e-06, "loss": 0.155, "step": 7670 }, { "epoch": 0.22557716031251837, "grad_norm": 175794.921875, "learning_rate": 7.744228396874817e-06, "loss": 0.1702, "step": 7680 }, { "epoch": 0.22587088057334195, "grad_norm": 167703.734375, "learning_rate": 7.741291194266582e-06, "loss": 0.1585, "step": 7690 }, { "epoch": 0.22616460083416554, "grad_norm": 222277.609375, "learning_rate": 7.738353991658344e-06, "loss": 0.1514, "step": 7700 }, { "epoch": 0.22645832109498915, "grad_norm": 244921.8125, "learning_rate": 7.735416789050109e-06, "loss": 0.154, "step": 7710 }, { "epoch": 0.22675204135581273, "grad_norm": 286948.0625, "learning_rate": 7.732479586441874e-06, "loss": 0.1758, "step": 7720 }, { "epoch": 0.2270457616166363, "grad_norm": 177268.390625, "learning_rate": 7.729542383833637e-06, "loss": 0.151, "step": 7730 }, { "epoch": 0.22733948187745992, "grad_norm": 211758.203125, "learning_rate": 7.726605181225401e-06, "loss": 0.1732, "step": 7740 }, { "epoch": 0.2276332021382835, "grad_norm": 235196.296875, "learning_rate": 7.723667978617166e-06, "loss": 0.1717, "step": 7750 }, { "epoch": 0.22792692239910708, "grad_norm": 128260.8515625, "learning_rate": 7.72073077600893e-06, "loss": 0.1584, "step": 7760 }, { "epoch": 0.2282206426599307, "grad_norm": 232000.015625, "learning_rate": 7.717793573400694e-06, "loss": 0.1706, "step": 7770 }, { "epoch": 0.22851436292075428, "grad_norm": 137284.03125, "learning_rate": 7.714856370792458e-06, "loss": 0.1667, "step": 7780 }, { "epoch": 0.22880808318157786, "grad_norm": 198919.328125, "learning_rate": 7.711919168184223e-06, "loss": 0.1598, "step": 7790 }, { "epoch": 0.22910180344240147, "grad_norm": 136362.140625, "learning_rate": 7.708981965575986e-06, "loss": 0.1544, "step": 7800 }, { "epoch": 0.22939552370322505, "grad_norm": 236206.21875, "learning_rate": 7.70604476296775e-06, "loss": 0.1699, "step": 7810 }, { "epoch": 0.22968924396404863, "grad_norm": 222066.703125, "learning_rate": 7.703107560359513e-06, "loss": 0.1676, "step": 7820 }, { "epoch": 0.22998296422487224, "grad_norm": 218357.234375, "learning_rate": 7.700170357751278e-06, "loss": 0.1653, "step": 7830 }, { "epoch": 0.23027668448569583, "grad_norm": 134275.734375, "learning_rate": 7.697233155143043e-06, "loss": 0.1513, "step": 7840 }, { "epoch": 0.2305704047465194, "grad_norm": 207569.34375, "learning_rate": 7.694295952534805e-06, "loss": 0.167, "step": 7850 }, { "epoch": 0.23086412500734302, "grad_norm": 156438.984375, "learning_rate": 7.69135874992657e-06, "loss": 0.1552, "step": 7860 }, { "epoch": 0.2311578452681666, "grad_norm": 166147.46875, "learning_rate": 7.688421547318335e-06, "loss": 0.1469, "step": 7870 }, { "epoch": 0.23145156552899018, "grad_norm": 214790.375, "learning_rate": 7.6854843447101e-06, "loss": 0.168, "step": 7880 }, { "epoch": 0.2317452857898138, "grad_norm": 95703.7265625, "learning_rate": 7.682547142101862e-06, "loss": 0.1562, "step": 7890 }, { "epoch": 0.23203900605063738, "grad_norm": 238272.78125, "learning_rate": 7.679609939493627e-06, "loss": 0.1609, "step": 7900 }, { "epoch": 0.23233272631146096, "grad_norm": 237610.4375, "learning_rate": 7.676672736885392e-06, "loss": 0.1655, "step": 7910 }, { "epoch": 0.23262644657228457, "grad_norm": 134695.359375, "learning_rate": 7.673735534277155e-06, "loss": 0.1652, "step": 7920 }, { "epoch": 0.23292016683310815, "grad_norm": 174274.015625, "learning_rate": 7.67079833166892e-06, "loss": 0.1495, "step": 7930 }, { "epoch": 0.23321388709393173, "grad_norm": 303565.71875, "learning_rate": 7.667861129060684e-06, "loss": 0.1407, "step": 7940 }, { "epoch": 0.23350760735475534, "grad_norm": 358923.96875, "learning_rate": 7.664923926452448e-06, "loss": 0.1542, "step": 7950 }, { "epoch": 0.23380132761557892, "grad_norm": 157806.65625, "learning_rate": 7.661986723844211e-06, "loss": 0.1606, "step": 7960 }, { "epoch": 0.2340950478764025, "grad_norm": 223342.234375, "learning_rate": 7.659049521235974e-06, "loss": 0.1563, "step": 7970 }, { "epoch": 0.23438876813722612, "grad_norm": 285937.1875, "learning_rate": 7.656112318627739e-06, "loss": 0.1548, "step": 7980 }, { "epoch": 0.2346824883980497, "grad_norm": 202134.0625, "learning_rate": 7.653175116019504e-06, "loss": 0.1564, "step": 7990 }, { "epoch": 0.23497620865887328, "grad_norm": 205873.34375, "learning_rate": 7.650237913411268e-06, "loss": 0.1547, "step": 8000 }, { "epoch": 0.2352699289196969, "grad_norm": 142952.078125, "learning_rate": 7.647300710803031e-06, "loss": 0.1753, "step": 8010 }, { "epoch": 0.23556364918052047, "grad_norm": 239001.484375, "learning_rate": 7.644363508194796e-06, "loss": 0.1538, "step": 8020 }, { "epoch": 0.23585736944134406, "grad_norm": 136623.515625, "learning_rate": 7.64142630558656e-06, "loss": 0.1692, "step": 8030 }, { "epoch": 0.23615108970216767, "grad_norm": 189069.296875, "learning_rate": 7.638489102978323e-06, "loss": 0.1661, "step": 8040 }, { "epoch": 0.23644480996299125, "grad_norm": 341026.40625, "learning_rate": 7.635551900370088e-06, "loss": 0.1502, "step": 8050 }, { "epoch": 0.23673853022381483, "grad_norm": 321209.75, "learning_rate": 7.632614697761853e-06, "loss": 0.1606, "step": 8060 }, { "epoch": 0.23703225048463844, "grad_norm": 249931.21875, "learning_rate": 7.629677495153617e-06, "loss": 0.1594, "step": 8070 }, { "epoch": 0.23732597074546202, "grad_norm": 260555.78125, "learning_rate": 7.62674029254538e-06, "loss": 0.171, "step": 8080 }, { "epoch": 0.2376196910062856, "grad_norm": 87679.03125, "learning_rate": 7.623803089937144e-06, "loss": 0.1476, "step": 8090 }, { "epoch": 0.23791341126710921, "grad_norm": 314837.0625, "learning_rate": 7.620865887328909e-06, "loss": 0.1727, "step": 8100 }, { "epoch": 0.2382071315279328, "grad_norm": 226602.609375, "learning_rate": 7.617928684720672e-06, "loss": 0.1533, "step": 8110 }, { "epoch": 0.23850085178875638, "grad_norm": 164546.8125, "learning_rate": 7.614991482112437e-06, "loss": 0.1553, "step": 8120 }, { "epoch": 0.23879457204958, "grad_norm": 167901.015625, "learning_rate": 7.612054279504201e-06, "loss": 0.1598, "step": 8130 }, { "epoch": 0.23908829231040357, "grad_norm": 193230.203125, "learning_rate": 7.6091170768959654e-06, "loss": 0.1604, "step": 8140 }, { "epoch": 0.23938201257122715, "grad_norm": 367002.8125, "learning_rate": 7.606179874287729e-06, "loss": 0.1528, "step": 8150 }, { "epoch": 0.23967573283205076, "grad_norm": 368076.78125, "learning_rate": 7.603242671679493e-06, "loss": 0.1554, "step": 8160 }, { "epoch": 0.23996945309287435, "grad_norm": 144549.3125, "learning_rate": 7.600305469071258e-06, "loss": 0.1505, "step": 8170 }, { "epoch": 0.24026317335369793, "grad_norm": 155373.671875, "learning_rate": 7.597368266463021e-06, "loss": 0.1589, "step": 8180 }, { "epoch": 0.24055689361452154, "grad_norm": 198464.078125, "learning_rate": 7.594431063854786e-06, "loss": 0.159, "step": 8190 }, { "epoch": 0.24085061387534512, "grad_norm": 163801.484375, "learning_rate": 7.591493861246549e-06, "loss": 0.1702, "step": 8200 }, { "epoch": 0.2411443341361687, "grad_norm": 230215.484375, "learning_rate": 7.588556658638313e-06, "loss": 0.1664, "step": 8210 }, { "epoch": 0.2414380543969923, "grad_norm": 172491.03125, "learning_rate": 7.585619456030077e-06, "loss": 0.156, "step": 8220 }, { "epoch": 0.2417317746578159, "grad_norm": 270058.4375, "learning_rate": 7.582682253421841e-06, "loss": 0.1744, "step": 8230 }, { "epoch": 0.24202549491863948, "grad_norm": 163430.921875, "learning_rate": 7.579745050813606e-06, "loss": 0.1516, "step": 8240 }, { "epoch": 0.2423192151794631, "grad_norm": 320256.78125, "learning_rate": 7.57680784820537e-06, "loss": 0.1434, "step": 8250 }, { "epoch": 0.24261293544028667, "grad_norm": 214024.46875, "learning_rate": 7.573870645597133e-06, "loss": 0.1627, "step": 8260 }, { "epoch": 0.24290665570111025, "grad_norm": 236677.484375, "learning_rate": 7.570933442988898e-06, "loss": 0.1529, "step": 8270 }, { "epoch": 0.24320037596193386, "grad_norm": 216065.890625, "learning_rate": 7.567996240380662e-06, "loss": 0.1454, "step": 8280 }, { "epoch": 0.24349409622275744, "grad_norm": 231383.03125, "learning_rate": 7.5650590377724265e-06, "loss": 0.1632, "step": 8290 }, { "epoch": 0.24378781648358103, "grad_norm": 221104.65625, "learning_rate": 7.56212183516419e-06, "loss": 0.1564, "step": 8300 }, { "epoch": 0.24408153674440464, "grad_norm": 238319.703125, "learning_rate": 7.559184632555955e-06, "loss": 0.1746, "step": 8310 }, { "epoch": 0.24437525700522822, "grad_norm": 274405.4375, "learning_rate": 7.556247429947719e-06, "loss": 0.1629, "step": 8320 }, { "epoch": 0.2446689772660518, "grad_norm": 202204.296875, "learning_rate": 7.5533102273394824e-06, "loss": 0.1604, "step": 8330 }, { "epoch": 0.2449626975268754, "grad_norm": 171876.234375, "learning_rate": 7.550373024731247e-06, "loss": 0.1703, "step": 8340 }, { "epoch": 0.245256417787699, "grad_norm": 241274.65625, "learning_rate": 7.54743582212301e-06, "loss": 0.1662, "step": 8350 }, { "epoch": 0.24555013804852258, "grad_norm": 208633.359375, "learning_rate": 7.5444986195147755e-06, "loss": 0.1698, "step": 8360 }, { "epoch": 0.24584385830934619, "grad_norm": 252050.328125, "learning_rate": 7.5415614169065384e-06, "loss": 0.1646, "step": 8370 }, { "epoch": 0.24613757857016977, "grad_norm": 327845.3125, "learning_rate": 7.538624214298302e-06, "loss": 0.1531, "step": 8380 }, { "epoch": 0.24643129883099335, "grad_norm": 371716.875, "learning_rate": 7.535687011690067e-06, "loss": 0.1568, "step": 8390 }, { "epoch": 0.24672501909181696, "grad_norm": 222025.875, "learning_rate": 7.532749809081831e-06, "loss": 0.1505, "step": 8400 }, { "epoch": 0.24701873935264054, "grad_norm": 227872.15625, "learning_rate": 7.529812606473595e-06, "loss": 0.1726, "step": 8410 }, { "epoch": 0.24731245961346413, "grad_norm": 154668.375, "learning_rate": 7.526875403865359e-06, "loss": 0.1535, "step": 8420 }, { "epoch": 0.24760617987428774, "grad_norm": 370904.28125, "learning_rate": 7.523938201257124e-06, "loss": 0.1445, "step": 8430 }, { "epoch": 0.24789990013511132, "grad_norm": 211152.25, "learning_rate": 7.5210009986488875e-06, "loss": 0.1706, "step": 8440 }, { "epoch": 0.2481936203959349, "grad_norm": 96592.1171875, "learning_rate": 7.518063796040651e-06, "loss": 0.1447, "step": 8450 }, { "epoch": 0.2484873406567585, "grad_norm": 175777.359375, "learning_rate": 7.515126593432416e-06, "loss": 0.169, "step": 8460 }, { "epoch": 0.2487810609175821, "grad_norm": 386567.9375, "learning_rate": 7.51218939082418e-06, "loss": 0.1639, "step": 8470 }, { "epoch": 0.24907478117840567, "grad_norm": 488862.46875, "learning_rate": 7.509252188215944e-06, "loss": 0.1617, "step": 8480 }, { "epoch": 0.24936850143922928, "grad_norm": 234411.140625, "learning_rate": 7.506314985607708e-06, "loss": 0.1608, "step": 8490 }, { "epoch": 0.24966222170005287, "grad_norm": 157207.5625, "learning_rate": 7.503377782999471e-06, "loss": 0.1567, "step": 8500 }, { "epoch": 0.24995594196087645, "grad_norm": 219634.359375, "learning_rate": 7.5004405803912365e-06, "loss": 0.1594, "step": 8510 }, { "epoch": 0.25024966222170003, "grad_norm": 183684.1875, "learning_rate": 7.4975033777829995e-06, "loss": 0.1684, "step": 8520 }, { "epoch": 0.25054338248252367, "grad_norm": 234019.953125, "learning_rate": 7.494566175174764e-06, "loss": 0.1663, "step": 8530 }, { "epoch": 0.25083710274334725, "grad_norm": 214818.40625, "learning_rate": 7.491628972566528e-06, "loss": 0.1637, "step": 8540 }, { "epoch": 0.25113082300417083, "grad_norm": 174376.453125, "learning_rate": 7.4886917699582925e-06, "loss": 0.1518, "step": 8550 }, { "epoch": 0.2514245432649944, "grad_norm": 156092.1875, "learning_rate": 7.485754567350056e-06, "loss": 0.1581, "step": 8560 }, { "epoch": 0.251718263525818, "grad_norm": 283501.0625, "learning_rate": 7.48281736474182e-06, "loss": 0.1657, "step": 8570 }, { "epoch": 0.2520119837866416, "grad_norm": 290077.34375, "learning_rate": 7.479880162133585e-06, "loss": 0.1573, "step": 8580 }, { "epoch": 0.2523057040474652, "grad_norm": 217102.25, "learning_rate": 7.4769429595253485e-06, "loss": 0.1557, "step": 8590 }, { "epoch": 0.2525994243082888, "grad_norm": 388749.15625, "learning_rate": 7.474005756917113e-06, "loss": 0.1658, "step": 8600 }, { "epoch": 0.2528931445691124, "grad_norm": 169426.953125, "learning_rate": 7.471068554308877e-06, "loss": 0.1433, "step": 8610 }, { "epoch": 0.25318686482993596, "grad_norm": 418176.59375, "learning_rate": 7.468131351700641e-06, "loss": 0.1598, "step": 8620 }, { "epoch": 0.25348058509075955, "grad_norm": 154703.09375, "learning_rate": 7.465194149092405e-06, "loss": 0.1592, "step": 8630 }, { "epoch": 0.25377430535158313, "grad_norm": 223693.375, "learning_rate": 7.462256946484169e-06, "loss": 0.1809, "step": 8640 }, { "epoch": 0.25406802561240677, "grad_norm": 343414.09375, "learning_rate": 7.459319743875934e-06, "loss": 0.1568, "step": 8650 }, { "epoch": 0.25436174587323035, "grad_norm": 286535.46875, "learning_rate": 7.4563825412676975e-06, "loss": 0.1722, "step": 8660 }, { "epoch": 0.25465546613405393, "grad_norm": 172571.90625, "learning_rate": 7.453445338659462e-06, "loss": 0.1658, "step": 8670 }, { "epoch": 0.2549491863948775, "grad_norm": 127405.34375, "learning_rate": 7.450508136051225e-06, "loss": 0.1693, "step": 8680 }, { "epoch": 0.2552429066557011, "grad_norm": 165190.546875, "learning_rate": 7.447570933442989e-06, "loss": 0.1495, "step": 8690 }, { "epoch": 0.2555366269165247, "grad_norm": 205026.734375, "learning_rate": 7.4446337308347535e-06, "loss": 0.1453, "step": 8700 }, { "epoch": 0.2558303471773483, "grad_norm": 370021.3125, "learning_rate": 7.441696528226517e-06, "loss": 0.1538, "step": 8710 }, { "epoch": 0.2561240674381719, "grad_norm": 192498.3125, "learning_rate": 7.438759325618282e-06, "loss": 0.156, "step": 8720 }, { "epoch": 0.2564177876989955, "grad_norm": 315597.5, "learning_rate": 7.435822123010046e-06, "loss": 0.1561, "step": 8730 }, { "epoch": 0.25671150795981906, "grad_norm": 228185.078125, "learning_rate": 7.4328849204018095e-06, "loss": 0.1616, "step": 8740 }, { "epoch": 0.25700522822064265, "grad_norm": 233983.171875, "learning_rate": 7.429947717793574e-06, "loss": 0.1732, "step": 8750 }, { "epoch": 0.2572989484814662, "grad_norm": 151252.84375, "learning_rate": 7.427010515185338e-06, "loss": 0.1611, "step": 8760 }, { "epoch": 0.25759266874228987, "grad_norm": 148983.328125, "learning_rate": 7.4240733125771026e-06, "loss": 0.1584, "step": 8770 }, { "epoch": 0.25788638900311345, "grad_norm": 193827.65625, "learning_rate": 7.421136109968866e-06, "loss": 0.1475, "step": 8780 }, { "epoch": 0.25818010926393703, "grad_norm": 169960.828125, "learning_rate": 7.418198907360631e-06, "loss": 0.1442, "step": 8790 }, { "epoch": 0.2584738295247606, "grad_norm": 196272.078125, "learning_rate": 7.415261704752395e-06, "loss": 0.1487, "step": 8800 }, { "epoch": 0.2587675497855842, "grad_norm": 214772.5625, "learning_rate": 7.412324502144158e-06, "loss": 0.1545, "step": 8810 }, { "epoch": 0.2590612700464078, "grad_norm": 242651.859375, "learning_rate": 7.409387299535923e-06, "loss": 0.1688, "step": 8820 }, { "epoch": 0.2593549903072314, "grad_norm": 181310.625, "learning_rate": 7.406450096927686e-06, "loss": 0.1703, "step": 8830 }, { "epoch": 0.259648710568055, "grad_norm": 200626.359375, "learning_rate": 7.403512894319452e-06, "loss": 0.1597, "step": 8840 }, { "epoch": 0.2599424308288786, "grad_norm": 267139.125, "learning_rate": 7.4005756917112145e-06, "loss": 0.1537, "step": 8850 }, { "epoch": 0.26023615108970216, "grad_norm": 365719.65625, "learning_rate": 7.397638489102978e-06, "loss": 0.1673, "step": 8860 }, { "epoch": 0.26052987135052574, "grad_norm": 245305.65625, "learning_rate": 7.394701286494743e-06, "loss": 0.1515, "step": 8870 }, { "epoch": 0.2608235916113493, "grad_norm": 320768.90625, "learning_rate": 7.391764083886507e-06, "loss": 0.1749, "step": 8880 }, { "epoch": 0.26111731187217296, "grad_norm": 229348.0, "learning_rate": 7.388826881278271e-06, "loss": 0.1689, "step": 8890 }, { "epoch": 0.26141103213299655, "grad_norm": 290552.40625, "learning_rate": 7.385889678670035e-06, "loss": 0.1635, "step": 8900 }, { "epoch": 0.26170475239382013, "grad_norm": 113114.9609375, "learning_rate": 7.3829524760618e-06, "loss": 0.1581, "step": 8910 }, { "epoch": 0.2619984726546437, "grad_norm": 320671.34375, "learning_rate": 7.380015273453564e-06, "loss": 0.1513, "step": 8920 }, { "epoch": 0.2622921929154673, "grad_norm": 199473.125, "learning_rate": 7.377078070845327e-06, "loss": 0.1547, "step": 8930 }, { "epoch": 0.2625859131762909, "grad_norm": 157626.265625, "learning_rate": 7.374140868237092e-06, "loss": 0.1569, "step": 8940 }, { "epoch": 0.2628796334371145, "grad_norm": 261495.421875, "learning_rate": 7.371203665628856e-06, "loss": 0.1638, "step": 8950 }, { "epoch": 0.2631733536979381, "grad_norm": 319998.78125, "learning_rate": 7.36826646302062e-06, "loss": 0.154, "step": 8960 }, { "epoch": 0.2634670739587617, "grad_norm": 232536.484375, "learning_rate": 7.365329260412384e-06, "loss": 0.1522, "step": 8970 }, { "epoch": 0.26376079421958526, "grad_norm": 149544.375, "learning_rate": 7.362392057804147e-06, "loss": 0.1629, "step": 8980 }, { "epoch": 0.26405451448040884, "grad_norm": 132142.109375, "learning_rate": 7.359454855195912e-06, "loss": 0.1643, "step": 8990 }, { "epoch": 0.2643482347412324, "grad_norm": 309117.0625, "learning_rate": 7.3565176525876756e-06, "loss": 0.1561, "step": 9000 }, { "epoch": 0.26464195500205606, "grad_norm": 324814.375, "learning_rate": 7.35358044997944e-06, "loss": 0.1553, "step": 9010 }, { "epoch": 0.26493567526287964, "grad_norm": 199455.15625, "learning_rate": 7.350643247371204e-06, "loss": 0.1497, "step": 9020 }, { "epoch": 0.2652293955237032, "grad_norm": 154776.375, "learning_rate": 7.347706044762969e-06, "loss": 0.1556, "step": 9030 }, { "epoch": 0.2655231157845268, "grad_norm": 188926.609375, "learning_rate": 7.344768842154732e-06, "loss": 0.1603, "step": 9040 }, { "epoch": 0.2658168360453504, "grad_norm": 157414.609375, "learning_rate": 7.341831639546496e-06, "loss": 0.1565, "step": 9050 }, { "epoch": 0.266110556306174, "grad_norm": 184798.75, "learning_rate": 7.338894436938261e-06, "loss": 0.1554, "step": 9060 }, { "epoch": 0.2664042765669976, "grad_norm": 265182.4375, "learning_rate": 7.335957234330025e-06, "loss": 0.1517, "step": 9070 }, { "epoch": 0.2666979968278212, "grad_norm": 174104.578125, "learning_rate": 7.333020031721789e-06, "loss": 0.1655, "step": 9080 }, { "epoch": 0.2669917170886448, "grad_norm": 135279.671875, "learning_rate": 7.330082829113553e-06, "loss": 0.1483, "step": 9090 }, { "epoch": 0.26728543734946836, "grad_norm": 202243.0, "learning_rate": 7.327145626505317e-06, "loss": 0.1599, "step": 9100 }, { "epoch": 0.26757915761029194, "grad_norm": 241554.515625, "learning_rate": 7.3242084238970814e-06, "loss": 0.1633, "step": 9110 }, { "epoch": 0.2678728778711155, "grad_norm": 286122.84375, "learning_rate": 7.321271221288845e-06, "loss": 0.1694, "step": 9120 }, { "epoch": 0.26816659813193916, "grad_norm": 233427.015625, "learning_rate": 7.31833401868061e-06, "loss": 0.1529, "step": 9130 }, { "epoch": 0.26846031839276274, "grad_norm": 232654.171875, "learning_rate": 7.315396816072373e-06, "loss": 0.1488, "step": 9140 }, { "epoch": 0.2687540386535863, "grad_norm": 254122.25, "learning_rate": 7.312459613464138e-06, "loss": 0.1622, "step": 9150 }, { "epoch": 0.2690477589144099, "grad_norm": 386432.03125, "learning_rate": 7.309522410855901e-06, "loss": 0.1516, "step": 9160 }, { "epoch": 0.2693414791752335, "grad_norm": 162337.203125, "learning_rate": 7.306585208247665e-06, "loss": 0.1403, "step": 9170 }, { "epoch": 0.2696351994360571, "grad_norm": 216766.90625, "learning_rate": 7.30364800563943e-06, "loss": 0.149, "step": 9180 }, { "epoch": 0.2699289196968807, "grad_norm": 107533.671875, "learning_rate": 7.300710803031193e-06, "loss": 0.154, "step": 9190 }, { "epoch": 0.2702226399577043, "grad_norm": 224762.875, "learning_rate": 7.297773600422958e-06, "loss": 0.1686, "step": 9200 }, { "epoch": 0.2705163602185279, "grad_norm": 151108.78125, "learning_rate": 7.294836397814722e-06, "loss": 0.1562, "step": 9210 }, { "epoch": 0.27081008047935146, "grad_norm": 171499.265625, "learning_rate": 7.291899195206486e-06, "loss": 0.1517, "step": 9220 }, { "epoch": 0.27110380074017504, "grad_norm": 235241.125, "learning_rate": 7.28896199259825e-06, "loss": 0.1493, "step": 9230 }, { "epoch": 0.2713975210009987, "grad_norm": 121251.390625, "learning_rate": 7.286024789990014e-06, "loss": 0.1363, "step": 9240 }, { "epoch": 0.27169124126182226, "grad_norm": 201454.0625, "learning_rate": 7.283087587381779e-06, "loss": 0.1552, "step": 9250 }, { "epoch": 0.27198496152264584, "grad_norm": 143932.875, "learning_rate": 7.2801503847735425e-06, "loss": 0.1573, "step": 9260 }, { "epoch": 0.2722786817834694, "grad_norm": 130311.2109375, "learning_rate": 7.277213182165305e-06, "loss": 0.1427, "step": 9270 }, { "epoch": 0.272572402044293, "grad_norm": 135479.34375, "learning_rate": 7.274275979557071e-06, "loss": 0.1495, "step": 9280 }, { "epoch": 0.2728661223051166, "grad_norm": 246684.078125, "learning_rate": 7.271338776948834e-06, "loss": 0.1618, "step": 9290 }, { "epoch": 0.2731598425659402, "grad_norm": 186613.921875, "learning_rate": 7.268401574340599e-06, "loss": 0.1487, "step": 9300 }, { "epoch": 0.2734535628267638, "grad_norm": 334931.125, "learning_rate": 7.265464371732362e-06, "loss": 0.1521, "step": 9310 }, { "epoch": 0.2737472830875874, "grad_norm": 142190.421875, "learning_rate": 7.262527169124127e-06, "loss": 0.1619, "step": 9320 }, { "epoch": 0.27404100334841097, "grad_norm": 180846.203125, "learning_rate": 7.259589966515891e-06, "loss": 0.1456, "step": 9330 }, { "epoch": 0.27433472360923455, "grad_norm": 192235.578125, "learning_rate": 7.2566527639076544e-06, "loss": 0.1601, "step": 9340 }, { "epoch": 0.27462844387005814, "grad_norm": 139917.546875, "learning_rate": 7.253715561299419e-06, "loss": 0.1645, "step": 9350 }, { "epoch": 0.2749221641308818, "grad_norm": 336735.46875, "learning_rate": 7.250778358691183e-06, "loss": 0.1565, "step": 9360 }, { "epoch": 0.27521588439170536, "grad_norm": 160878.3125, "learning_rate": 7.2478411560829475e-06, "loss": 0.1591, "step": 9370 }, { "epoch": 0.27550960465252894, "grad_norm": 194919.703125, "learning_rate": 7.244903953474711e-06, "loss": 0.1512, "step": 9380 }, { "epoch": 0.2758033249133525, "grad_norm": 183965.75, "learning_rate": 7.241966750866475e-06, "loss": 0.1475, "step": 9390 }, { "epoch": 0.2760970451741761, "grad_norm": 143286.421875, "learning_rate": 7.23902954825824e-06, "loss": 0.1732, "step": 9400 }, { "epoch": 0.2763907654349997, "grad_norm": 200878.546875, "learning_rate": 7.2360923456500035e-06, "loss": 0.1601, "step": 9410 }, { "epoch": 0.2766844856958233, "grad_norm": 145810.859375, "learning_rate": 7.233155143041768e-06, "loss": 0.156, "step": 9420 }, { "epoch": 0.2769782059566469, "grad_norm": 150392.890625, "learning_rate": 7.230217940433532e-06, "loss": 0.1355, "step": 9430 }, { "epoch": 0.2772719262174705, "grad_norm": 206473.546875, "learning_rate": 7.2272807378252965e-06, "loss": 0.1575, "step": 9440 }, { "epoch": 0.27756564647829407, "grad_norm": 163677.921875, "learning_rate": 7.2243435352170595e-06, "loss": 0.1493, "step": 9450 }, { "epoch": 0.27785936673911765, "grad_norm": 131252.265625, "learning_rate": 7.221406332608823e-06, "loss": 0.1535, "step": 9460 }, { "epoch": 0.27815308699994123, "grad_norm": 133826.890625, "learning_rate": 7.218469130000588e-06, "loss": 0.16, "step": 9470 }, { "epoch": 0.2784468072607649, "grad_norm": 192644.875, "learning_rate": 7.215531927392352e-06, "loss": 0.1537, "step": 9480 }, { "epoch": 0.27874052752158845, "grad_norm": 259259.265625, "learning_rate": 7.212594724784116e-06, "loss": 0.1622, "step": 9490 }, { "epoch": 0.27903424778241204, "grad_norm": 203275.953125, "learning_rate": 7.20965752217588e-06, "loss": 0.149, "step": 9500 }, { "epoch": 0.2793279680432356, "grad_norm": 249282.515625, "learning_rate": 7.206720319567644e-06, "loss": 0.1597, "step": 9510 }, { "epoch": 0.2796216883040592, "grad_norm": 198705.046875, "learning_rate": 7.2037831169594085e-06, "loss": 0.1575, "step": 9520 }, { "epoch": 0.2799154085648828, "grad_norm": 220062.515625, "learning_rate": 7.200845914351172e-06, "loss": 0.148, "step": 9530 }, { "epoch": 0.2802091288257064, "grad_norm": 281642.71875, "learning_rate": 7.197908711742937e-06, "loss": 0.1421, "step": 9540 }, { "epoch": 0.28050284908653, "grad_norm": 192643.25, "learning_rate": 7.194971509134701e-06, "loss": 0.146, "step": 9550 }, { "epoch": 0.2807965693473536, "grad_norm": 231268.125, "learning_rate": 7.192034306526465e-06, "loss": 0.1556, "step": 9560 }, { "epoch": 0.28109028960817717, "grad_norm": 409406.625, "learning_rate": 7.189097103918229e-06, "loss": 0.1575, "step": 9570 }, { "epoch": 0.28138400986900075, "grad_norm": 329723.78125, "learning_rate": 7.186159901309993e-06, "loss": 0.1634, "step": 9580 }, { "epoch": 0.28167773012982433, "grad_norm": 128589.1171875, "learning_rate": 7.1832226987017575e-06, "loss": 0.1431, "step": 9590 }, { "epoch": 0.28197145039064797, "grad_norm": 180929.671875, "learning_rate": 7.1802854960935205e-06, "loss": 0.1411, "step": 9600 }, { "epoch": 0.28226517065147155, "grad_norm": 301007.125, "learning_rate": 7.177348293485286e-06, "loss": 0.1606, "step": 9610 }, { "epoch": 0.28255889091229514, "grad_norm": 216585.9375, "learning_rate": 7.174411090877049e-06, "loss": 0.159, "step": 9620 }, { "epoch": 0.2828526111731187, "grad_norm": 188050.71875, "learning_rate": 7.171473888268813e-06, "loss": 0.1526, "step": 9630 }, { "epoch": 0.2831463314339423, "grad_norm": 416414.625, "learning_rate": 7.168536685660577e-06, "loss": 0.1571, "step": 9640 }, { "epoch": 0.2834400516947659, "grad_norm": 216491.609375, "learning_rate": 7.165599483052341e-06, "loss": 0.1504, "step": 9650 }, { "epoch": 0.2837337719555895, "grad_norm": 128507.2265625, "learning_rate": 7.162662280444106e-06, "loss": 0.1493, "step": 9660 }, { "epoch": 0.2840274922164131, "grad_norm": 228220.15625, "learning_rate": 7.1597250778358695e-06, "loss": 0.1419, "step": 9670 }, { "epoch": 0.2843212124772367, "grad_norm": 286682.78125, "learning_rate": 7.156787875227634e-06, "loss": 0.1604, "step": 9680 }, { "epoch": 0.28461493273806027, "grad_norm": 190767.859375, "learning_rate": 7.153850672619398e-06, "loss": 0.1372, "step": 9690 }, { "epoch": 0.28490865299888385, "grad_norm": 140734.703125, "learning_rate": 7.150913470011162e-06, "loss": 0.1615, "step": 9700 }, { "epoch": 0.28520237325970743, "grad_norm": 191781.09375, "learning_rate": 7.147976267402926e-06, "loss": 0.1429, "step": 9710 }, { "epoch": 0.28549609352053107, "grad_norm": 112984.2578125, "learning_rate": 7.14503906479469e-06, "loss": 0.1493, "step": 9720 }, { "epoch": 0.28578981378135465, "grad_norm": 271734.34375, "learning_rate": 7.142101862186455e-06, "loss": 0.1684, "step": 9730 }, { "epoch": 0.28608353404217823, "grad_norm": 150235.0, "learning_rate": 7.1391646595782186e-06, "loss": 0.152, "step": 9740 }, { "epoch": 0.2863772543030018, "grad_norm": 304303.40625, "learning_rate": 7.1362274569699815e-06, "loss": 0.1526, "step": 9750 }, { "epoch": 0.2866709745638254, "grad_norm": 126028.4140625, "learning_rate": 7.133290254361747e-06, "loss": 0.172, "step": 9760 }, { "epoch": 0.286964694824649, "grad_norm": 140292.09375, "learning_rate": 7.13035305175351e-06, "loss": 0.1674, "step": 9770 }, { "epoch": 0.2872584150854726, "grad_norm": 153587.859375, "learning_rate": 7.1274158491452746e-06, "loss": 0.163, "step": 9780 }, { "epoch": 0.2875521353462962, "grad_norm": 220247.09375, "learning_rate": 7.124478646537038e-06, "loss": 0.1449, "step": 9790 }, { "epoch": 0.2878458556071198, "grad_norm": 191597.5, "learning_rate": 7.121541443928803e-06, "loss": 0.1655, "step": 9800 }, { "epoch": 0.28813957586794336, "grad_norm": 152525.203125, "learning_rate": 7.118604241320567e-06, "loss": 0.1532, "step": 9810 }, { "epoch": 0.28843329612876695, "grad_norm": 141101.859375, "learning_rate": 7.1156670387123305e-06, "loss": 0.149, "step": 9820 }, { "epoch": 0.28872701638959053, "grad_norm": 281665.8125, "learning_rate": 7.112729836104095e-06, "loss": 0.1641, "step": 9830 }, { "epoch": 0.28902073665041417, "grad_norm": 233816.796875, "learning_rate": 7.109792633495859e-06, "loss": 0.1559, "step": 9840 }, { "epoch": 0.28931445691123775, "grad_norm": 177241.421875, "learning_rate": 7.106855430887624e-06, "loss": 0.1578, "step": 9850 }, { "epoch": 0.28960817717206133, "grad_norm": 168700.03125, "learning_rate": 7.103918228279387e-06, "loss": 0.1537, "step": 9860 }, { "epoch": 0.2899018974328849, "grad_norm": 144514.5, "learning_rate": 7.100981025671151e-06, "loss": 0.1455, "step": 9870 }, { "epoch": 0.2901956176937085, "grad_norm": 120298.1171875, "learning_rate": 7.098043823062916e-06, "loss": 0.1584, "step": 9880 }, { "epoch": 0.2904893379545321, "grad_norm": 331113.28125, "learning_rate": 7.09510662045468e-06, "loss": 0.1613, "step": 9890 }, { "epoch": 0.2907830582153557, "grad_norm": 254908.765625, "learning_rate": 7.092169417846444e-06, "loss": 0.1567, "step": 9900 }, { "epoch": 0.2910767784761793, "grad_norm": 463953.65625, "learning_rate": 7.089232215238208e-06, "loss": 0.1299, "step": 9910 }, { "epoch": 0.2913704987370029, "grad_norm": 151708.796875, "learning_rate": 7.086295012629973e-06, "loss": 0.1558, "step": 9920 }, { "epoch": 0.29166421899782646, "grad_norm": 225228.921875, "learning_rate": 7.083357810021736e-06, "loss": 0.1423, "step": 9930 }, { "epoch": 0.29195793925865005, "grad_norm": 155666.34375, "learning_rate": 7.080420607413499e-06, "loss": 0.1522, "step": 9940 }, { "epoch": 0.2922516595194736, "grad_norm": 213255.828125, "learning_rate": 7.077483404805264e-06, "loss": 0.1487, "step": 9950 }, { "epoch": 0.29254537978029727, "grad_norm": 192078.546875, "learning_rate": 7.074546202197028e-06, "loss": 0.1374, "step": 9960 }, { "epoch": 0.29283910004112085, "grad_norm": 126610.8359375, "learning_rate": 7.071608999588792e-06, "loss": 0.1479, "step": 9970 }, { "epoch": 0.29313282030194443, "grad_norm": 213245.5625, "learning_rate": 7.068671796980556e-06, "loss": 0.1676, "step": 9980 }, { "epoch": 0.293426540562768, "grad_norm": 231540.890625, "learning_rate": 7.06573459437232e-06, "loss": 0.1562, "step": 9990 }, { "epoch": 0.2937202608235916, "grad_norm": 167814.4375, "learning_rate": 7.062797391764085e-06, "loss": 0.1472, "step": 10000 }, { "epoch": 0.2940139810844152, "grad_norm": 164546.65625, "learning_rate": 7.059860189155848e-06, "loss": 0.1455, "step": 10010 }, { "epoch": 0.2943077013452388, "grad_norm": 259815.015625, "learning_rate": 7.056922986547613e-06, "loss": 0.1598, "step": 10020 }, { "epoch": 0.2946014216060624, "grad_norm": 138814.546875, "learning_rate": 7.053985783939377e-06, "loss": 0.1374, "step": 10030 }, { "epoch": 0.294895141866886, "grad_norm": 405065.125, "learning_rate": 7.0510485813311415e-06, "loss": 0.1505, "step": 10040 }, { "epoch": 0.29518886212770956, "grad_norm": 158367.90625, "learning_rate": 7.048111378722905e-06, "loss": 0.1498, "step": 10050 }, { "epoch": 0.29548258238853314, "grad_norm": 246234.859375, "learning_rate": 7.045174176114668e-06, "loss": 0.1527, "step": 10060 }, { "epoch": 0.2957763026493567, "grad_norm": 158356.53125, "learning_rate": 7.042236973506434e-06, "loss": 0.1577, "step": 10070 }, { "epoch": 0.29607002291018036, "grad_norm": 140241.25, "learning_rate": 7.039299770898197e-06, "loss": 0.147, "step": 10080 }, { "epoch": 0.29636374317100395, "grad_norm": 276679.25, "learning_rate": 7.036362568289962e-06, "loss": 0.1483, "step": 10090 }, { "epoch": 0.29665746343182753, "grad_norm": 193811.671875, "learning_rate": 7.033425365681725e-06, "loss": 0.1555, "step": 10100 }, { "epoch": 0.2969511836926511, "grad_norm": 137235.203125, "learning_rate": 7.030488163073489e-06, "loss": 0.1514, "step": 10110 }, { "epoch": 0.2972449039534747, "grad_norm": 125308.15625, "learning_rate": 7.0275509604652534e-06, "loss": 0.1548, "step": 10120 }, { "epoch": 0.2975386242142983, "grad_norm": 161952.875, "learning_rate": 7.024613757857017e-06, "loss": 0.1535, "step": 10130 }, { "epoch": 0.2978323444751219, "grad_norm": 162677.40625, "learning_rate": 7.021676555248782e-06, "loss": 0.1525, "step": 10140 }, { "epoch": 0.2981260647359455, "grad_norm": 187843.421875, "learning_rate": 7.018739352640546e-06, "loss": 0.1541, "step": 10150 }, { "epoch": 0.2984197849967691, "grad_norm": 88582.734375, "learning_rate": 7.01580215003231e-06, "loss": 0.155, "step": 10160 }, { "epoch": 0.29871350525759266, "grad_norm": 425999.8125, "learning_rate": 7.012864947424074e-06, "loss": 0.1484, "step": 10170 }, { "epoch": 0.29900722551841624, "grad_norm": 225384.171875, "learning_rate": 7.009927744815838e-06, "loss": 0.1586, "step": 10180 }, { "epoch": 0.2993009457792398, "grad_norm": 154466.015625, "learning_rate": 7.0069905422076025e-06, "loss": 0.1579, "step": 10190 }, { "epoch": 0.29959466604006346, "grad_norm": 262384.34375, "learning_rate": 7.004053339599366e-06, "loss": 0.1549, "step": 10200 }, { "epoch": 0.29988838630088704, "grad_norm": 178974.046875, "learning_rate": 7.001116136991131e-06, "loss": 0.1557, "step": 10210 }, { "epoch": 0.3001821065617106, "grad_norm": 139811.28125, "learning_rate": 6.998178934382895e-06, "loss": 0.1581, "step": 10220 }, { "epoch": 0.3004758268225342, "grad_norm": 199970.921875, "learning_rate": 6.995241731774658e-06, "loss": 0.1536, "step": 10230 }, { "epoch": 0.3007695470833578, "grad_norm": 168264.34375, "learning_rate": 6.992304529166422e-06, "loss": 0.1503, "step": 10240 }, { "epoch": 0.30106326734418143, "grad_norm": 184171.75, "learning_rate": 6.989367326558186e-06, "loss": 0.1572, "step": 10250 }, { "epoch": 0.301356987605005, "grad_norm": 94177.234375, "learning_rate": 6.986430123949951e-06, "loss": 0.1505, "step": 10260 }, { "epoch": 0.3016507078658286, "grad_norm": 160894.515625, "learning_rate": 6.9834929213417145e-06, "loss": 0.1616, "step": 10270 }, { "epoch": 0.3019444281266522, "grad_norm": 182437.765625, "learning_rate": 6.980555718733478e-06, "loss": 0.1514, "step": 10280 }, { "epoch": 0.30223814838747576, "grad_norm": 128492.140625, "learning_rate": 6.977618516125243e-06, "loss": 0.1511, "step": 10290 }, { "epoch": 0.30253186864829934, "grad_norm": 230882.3125, "learning_rate": 6.974681313517007e-06, "loss": 0.1502, "step": 10300 }, { "epoch": 0.302825588909123, "grad_norm": 177313.609375, "learning_rate": 6.971744110908771e-06, "loss": 0.1651, "step": 10310 }, { "epoch": 0.30311930916994656, "grad_norm": 201054.0625, "learning_rate": 6.968806908300535e-06, "loss": 0.149, "step": 10320 }, { "epoch": 0.30341302943077014, "grad_norm": 191494.921875, "learning_rate": 6.9658697056923e-06, "loss": 0.1522, "step": 10330 }, { "epoch": 0.3037067496915937, "grad_norm": 532481.3125, "learning_rate": 6.9629325030840635e-06, "loss": 0.1592, "step": 10340 }, { "epoch": 0.3040004699524173, "grad_norm": 187130.0, "learning_rate": 6.959995300475827e-06, "loss": 0.137, "step": 10350 }, { "epoch": 0.3042941902132409, "grad_norm": 149610.96875, "learning_rate": 6.957058097867592e-06, "loss": 0.1392, "step": 10360 }, { "epoch": 0.3045879104740645, "grad_norm": 141893.640625, "learning_rate": 6.954120895259356e-06, "loss": 0.1554, "step": 10370 }, { "epoch": 0.3048816307348881, "grad_norm": 87010.71875, "learning_rate": 6.95118369265112e-06, "loss": 0.155, "step": 10380 }, { "epoch": 0.3051753509957117, "grad_norm": 281970.53125, "learning_rate": 6.948246490042883e-06, "loss": 0.151, "step": 10390 }, { "epoch": 0.3054690712565353, "grad_norm": 152031.6875, "learning_rate": 6.945309287434647e-06, "loss": 0.1558, "step": 10400 }, { "epoch": 0.30576279151735886, "grad_norm": 200066.921875, "learning_rate": 6.942372084826412e-06, "loss": 0.157, "step": 10410 }, { "epoch": 0.30605651177818244, "grad_norm": 174318.84375, "learning_rate": 6.9394348822181755e-06, "loss": 0.1522, "step": 10420 }, { "epoch": 0.3063502320390061, "grad_norm": 322872.90625, "learning_rate": 6.93649767960994e-06, "loss": 0.1526, "step": 10430 }, { "epoch": 0.30664395229982966, "grad_norm": 185732.71875, "learning_rate": 6.933560477001704e-06, "loss": 0.1521, "step": 10440 }, { "epoch": 0.30693767256065324, "grad_norm": 180725.015625, "learning_rate": 6.9306232743934685e-06, "loss": 0.152, "step": 10450 }, { "epoch": 0.3072313928214768, "grad_norm": 255375.34375, "learning_rate": 6.927686071785232e-06, "loss": 0.1621, "step": 10460 }, { "epoch": 0.3075251130823004, "grad_norm": 151086.734375, "learning_rate": 6.924748869176996e-06, "loss": 0.1534, "step": 10470 }, { "epoch": 0.307818833343124, "grad_norm": 218746.921875, "learning_rate": 6.921811666568761e-06, "loss": 0.1582, "step": 10480 }, { "epoch": 0.3081125536039476, "grad_norm": 272242.40625, "learning_rate": 6.9188744639605245e-06, "loss": 0.1614, "step": 10490 }, { "epoch": 0.3084062738647712, "grad_norm": 144106.921875, "learning_rate": 6.915937261352289e-06, "loss": 0.1548, "step": 10500 }, { "epoch": 0.3086999941255948, "grad_norm": 131605.921875, "learning_rate": 6.913000058744053e-06, "loss": 0.1456, "step": 10510 }, { "epoch": 0.30899371438641837, "grad_norm": 273912.53125, "learning_rate": 6.910062856135816e-06, "loss": 0.1558, "step": 10520 }, { "epoch": 0.30928743464724195, "grad_norm": 181289.375, "learning_rate": 6.907125653527581e-06, "loss": 0.1589, "step": 10530 }, { "epoch": 0.30958115490806554, "grad_norm": 129364.96875, "learning_rate": 6.904188450919344e-06, "loss": 0.1409, "step": 10540 }, { "epoch": 0.3098748751688892, "grad_norm": 125193.265625, "learning_rate": 6.90125124831111e-06, "loss": 0.1388, "step": 10550 }, { "epoch": 0.31016859542971276, "grad_norm": 164930.796875, "learning_rate": 6.898314045702873e-06, "loss": 0.1443, "step": 10560 }, { "epoch": 0.31046231569053634, "grad_norm": 331792.84375, "learning_rate": 6.895376843094637e-06, "loss": 0.1648, "step": 10570 }, { "epoch": 0.3107560359513599, "grad_norm": 270686.53125, "learning_rate": 6.892439640486401e-06, "loss": 0.1642, "step": 10580 }, { "epoch": 0.3110497562121835, "grad_norm": 314514.15625, "learning_rate": 6.889502437878165e-06, "loss": 0.1688, "step": 10590 }, { "epoch": 0.3113434764730071, "grad_norm": 146337.0, "learning_rate": 6.8865652352699295e-06, "loss": 0.1476, "step": 10600 }, { "epoch": 0.3116371967338307, "grad_norm": 153272.265625, "learning_rate": 6.883628032661693e-06, "loss": 0.1562, "step": 10610 }, { "epoch": 0.3119309169946543, "grad_norm": 152075.203125, "learning_rate": 6.880690830053458e-06, "loss": 0.1531, "step": 10620 }, { "epoch": 0.3122246372554779, "grad_norm": 263352.15625, "learning_rate": 6.877753627445222e-06, "loss": 0.1491, "step": 10630 }, { "epoch": 0.31251835751630147, "grad_norm": 187299.234375, "learning_rate": 6.8748164248369855e-06, "loss": 0.138, "step": 10640 }, { "epoch": 0.31281207777712505, "grad_norm": 167285.84375, "learning_rate": 6.87187922222875e-06, "loss": 0.1572, "step": 10650 }, { "epoch": 0.31310579803794863, "grad_norm": 244426.203125, "learning_rate": 6.868942019620514e-06, "loss": 0.1491, "step": 10660 }, { "epoch": 0.3133995182987723, "grad_norm": 151857.328125, "learning_rate": 6.866004817012279e-06, "loss": 0.1529, "step": 10670 }, { "epoch": 0.31369323855959586, "grad_norm": 178937.796875, "learning_rate": 6.863067614404042e-06, "loss": 0.1448, "step": 10680 }, { "epoch": 0.31398695882041944, "grad_norm": 234868.5625, "learning_rate": 6.860130411795807e-06, "loss": 0.1546, "step": 10690 }, { "epoch": 0.314280679081243, "grad_norm": 164417.125, "learning_rate": 6.85719320918757e-06, "loss": 0.1634, "step": 10700 }, { "epoch": 0.3145743993420666, "grad_norm": 144326.625, "learning_rate": 6.854256006579334e-06, "loss": 0.1623, "step": 10710 }, { "epoch": 0.3148681196028902, "grad_norm": 165597.03125, "learning_rate": 6.851318803971098e-06, "loss": 0.1627, "step": 10720 }, { "epoch": 0.3151618398637138, "grad_norm": 143849.28125, "learning_rate": 6.848381601362862e-06, "loss": 0.1406, "step": 10730 }, { "epoch": 0.3154555601245374, "grad_norm": 219509.640625, "learning_rate": 6.845444398754627e-06, "loss": 0.1506, "step": 10740 }, { "epoch": 0.315749280385361, "grad_norm": 174706.421875, "learning_rate": 6.8425071961463906e-06, "loss": 0.1732, "step": 10750 }, { "epoch": 0.31604300064618457, "grad_norm": 124074.8359375, "learning_rate": 6.839569993538154e-06, "loss": 0.1471, "step": 10760 }, { "epoch": 0.31633672090700815, "grad_norm": 171619.46875, "learning_rate": 6.836632790929919e-06, "loss": 0.146, "step": 10770 }, { "epoch": 0.31663044116783173, "grad_norm": 141738.171875, "learning_rate": 6.833695588321683e-06, "loss": 0.1469, "step": 10780 }, { "epoch": 0.31692416142865537, "grad_norm": 151984.46875, "learning_rate": 6.830758385713447e-06, "loss": 0.1512, "step": 10790 }, { "epoch": 0.31721788168947895, "grad_norm": 359193.3125, "learning_rate": 6.827821183105211e-06, "loss": 0.139, "step": 10800 }, { "epoch": 0.31751160195030254, "grad_norm": 128346.5234375, "learning_rate": 6.824883980496976e-06, "loss": 0.1425, "step": 10810 }, { "epoch": 0.3178053222111261, "grad_norm": 313775.03125, "learning_rate": 6.82194677788874e-06, "loss": 0.168, "step": 10820 }, { "epoch": 0.3180990424719497, "grad_norm": 217189.359375, "learning_rate": 6.819009575280503e-06, "loss": 0.1433, "step": 10830 }, { "epoch": 0.3183927627327733, "grad_norm": 327896.71875, "learning_rate": 6.816072372672268e-06, "loss": 0.1603, "step": 10840 }, { "epoch": 0.3186864829935969, "grad_norm": 253684.484375, "learning_rate": 6.813135170064031e-06, "loss": 0.1691, "step": 10850 }, { "epoch": 0.3189802032544205, "grad_norm": 147877.046875, "learning_rate": 6.8101979674557964e-06, "loss": 0.1429, "step": 10860 }, { "epoch": 0.3192739235152441, "grad_norm": 207623.265625, "learning_rate": 6.807260764847559e-06, "loss": 0.1564, "step": 10870 }, { "epoch": 0.31956764377606767, "grad_norm": 271787.46875, "learning_rate": 6.804323562239323e-06, "loss": 0.1546, "step": 10880 }, { "epoch": 0.31986136403689125, "grad_norm": 181126.8125, "learning_rate": 6.801386359631088e-06, "loss": 0.149, "step": 10890 }, { "epoch": 0.32015508429771483, "grad_norm": 221490.171875, "learning_rate": 6.798449157022852e-06, "loss": 0.1598, "step": 10900 }, { "epoch": 0.32044880455853847, "grad_norm": 250727.8125, "learning_rate": 6.795511954414616e-06, "loss": 0.15, "step": 10910 }, { "epoch": 0.32074252481936205, "grad_norm": 254771.671875, "learning_rate": 6.79257475180638e-06, "loss": 0.1438, "step": 10920 }, { "epoch": 0.32103624508018563, "grad_norm": 182754.25, "learning_rate": 6.789637549198145e-06, "loss": 0.1576, "step": 10930 }, { "epoch": 0.3213299653410092, "grad_norm": 168632.703125, "learning_rate": 6.786700346589908e-06, "loss": 0.1533, "step": 10940 }, { "epoch": 0.3216236856018328, "grad_norm": 186513.84375, "learning_rate": 6.783763143981672e-06, "loss": 0.1491, "step": 10950 }, { "epoch": 0.3219174058626564, "grad_norm": 148535.09375, "learning_rate": 6.780825941373437e-06, "loss": 0.1287, "step": 10960 }, { "epoch": 0.32221112612348, "grad_norm": 187303.0, "learning_rate": 6.777888738765201e-06, "loss": 0.1509, "step": 10970 }, { "epoch": 0.3225048463843036, "grad_norm": 208010.921875, "learning_rate": 6.774951536156965e-06, "loss": 0.1447, "step": 10980 }, { "epoch": 0.3227985666451272, "grad_norm": 196356.84375, "learning_rate": 6.772014333548729e-06, "loss": 0.1515, "step": 10990 }, { "epoch": 0.32309228690595077, "grad_norm": 200648.25, "learning_rate": 6.769077130940492e-06, "loss": 0.1567, "step": 11000 }, { "epoch": 0.32338600716677435, "grad_norm": 201878.078125, "learning_rate": 6.7661399283322575e-06, "loss": 0.1577, "step": 11010 }, { "epoch": 0.32367972742759793, "grad_norm": 294728.40625, "learning_rate": 6.76320272572402e-06, "loss": 0.1584, "step": 11020 }, { "epoch": 0.32397344768842157, "grad_norm": 193918.15625, "learning_rate": 6.760265523115785e-06, "loss": 0.1518, "step": 11030 }, { "epoch": 0.32426716794924515, "grad_norm": 174517.5, "learning_rate": 6.757328320507549e-06, "loss": 0.159, "step": 11040 }, { "epoch": 0.32456088821006873, "grad_norm": 166722.09375, "learning_rate": 6.7543911178993135e-06, "loss": 0.1511, "step": 11050 }, { "epoch": 0.3248546084708923, "grad_norm": 195251.09375, "learning_rate": 6.751453915291077e-06, "loss": 0.1593, "step": 11060 }, { "epoch": 0.3251483287317159, "grad_norm": 249898.875, "learning_rate": 6.748516712682841e-06, "loss": 0.1406, "step": 11070 }, { "epoch": 0.3254420489925395, "grad_norm": 177800.53125, "learning_rate": 6.745579510074606e-06, "loss": 0.1526, "step": 11080 }, { "epoch": 0.3257357692533631, "grad_norm": 186737.453125, "learning_rate": 6.7426423074663694e-06, "loss": 0.1606, "step": 11090 }, { "epoch": 0.3260294895141867, "grad_norm": 190413.796875, "learning_rate": 6.739705104858134e-06, "loss": 0.1567, "step": 11100 }, { "epoch": 0.3263232097750103, "grad_norm": 192582.25, "learning_rate": 6.736767902249898e-06, "loss": 0.1534, "step": 11110 }, { "epoch": 0.32661693003583386, "grad_norm": 166084.703125, "learning_rate": 6.733830699641662e-06, "loss": 0.1645, "step": 11120 }, { "epoch": 0.32691065029665745, "grad_norm": 85138.7578125, "learning_rate": 6.730893497033426e-06, "loss": 0.1518, "step": 11130 }, { "epoch": 0.32720437055748103, "grad_norm": 163285.234375, "learning_rate": 6.72795629442519e-06, "loss": 0.1509, "step": 11140 }, { "epoch": 0.32749809081830467, "grad_norm": 187227.296875, "learning_rate": 6.725019091816955e-06, "loss": 0.1474, "step": 11150 }, { "epoch": 0.32779181107912825, "grad_norm": 199900.71875, "learning_rate": 6.7220818892087185e-06, "loss": 0.1457, "step": 11160 }, { "epoch": 0.32808553133995183, "grad_norm": 230438.6875, "learning_rate": 6.719144686600483e-06, "loss": 0.1528, "step": 11170 }, { "epoch": 0.3283792516007754, "grad_norm": 169018.78125, "learning_rate": 6.716207483992246e-06, "loss": 0.1552, "step": 11180 }, { "epoch": 0.328672971861599, "grad_norm": 184904.8125, "learning_rate": 6.71327028138401e-06, "loss": 0.1509, "step": 11190 }, { "epoch": 0.3289666921224226, "grad_norm": 176543.03125, "learning_rate": 6.7103330787757745e-06, "loss": 0.144, "step": 11200 }, { "epoch": 0.3292604123832462, "grad_norm": 236225.984375, "learning_rate": 6.707395876167538e-06, "loss": 0.1392, "step": 11210 }, { "epoch": 0.3295541326440698, "grad_norm": 107893.7109375, "learning_rate": 6.704458673559303e-06, "loss": 0.1587, "step": 11220 }, { "epoch": 0.3298478529048934, "grad_norm": 131778.078125, "learning_rate": 6.701521470951067e-06, "loss": 0.1431, "step": 11230 }, { "epoch": 0.33014157316571696, "grad_norm": 394511.5625, "learning_rate": 6.6985842683428305e-06, "loss": 0.1558, "step": 11240 }, { "epoch": 0.33043529342654054, "grad_norm": 167207.5625, "learning_rate": 6.695647065734595e-06, "loss": 0.1561, "step": 11250 }, { "epoch": 0.3307290136873642, "grad_norm": 629207.9375, "learning_rate": 6.692709863126359e-06, "loss": 0.1572, "step": 11260 }, { "epoch": 0.33102273394818776, "grad_norm": 235014.515625, "learning_rate": 6.6897726605181235e-06, "loss": 0.1436, "step": 11270 }, { "epoch": 0.33131645420901135, "grad_norm": 135123.78125, "learning_rate": 6.686835457909887e-06, "loss": 0.1371, "step": 11280 }, { "epoch": 0.33161017446983493, "grad_norm": 195838.09375, "learning_rate": 6.683898255301651e-06, "loss": 0.1512, "step": 11290 }, { "epoch": 0.3319038947306585, "grad_norm": 130388.296875, "learning_rate": 6.680961052693416e-06, "loss": 0.1422, "step": 11300 }, { "epoch": 0.3321976149914821, "grad_norm": 462908.09375, "learning_rate": 6.678023850085179e-06, "loss": 0.1686, "step": 11310 }, { "epoch": 0.33249133525230573, "grad_norm": 191809.96875, "learning_rate": 6.675086647476944e-06, "loss": 0.1518, "step": 11320 }, { "epoch": 0.3327850555131293, "grad_norm": 197299.203125, "learning_rate": 6.672149444868707e-06, "loss": 0.1505, "step": 11330 }, { "epoch": 0.3330787757739529, "grad_norm": 278034.15625, "learning_rate": 6.6692122422604726e-06, "loss": 0.1493, "step": 11340 }, { "epoch": 0.3333724960347765, "grad_norm": 135180.78125, "learning_rate": 6.6662750396522355e-06, "loss": 0.1478, "step": 11350 }, { "epoch": 0.33366621629560006, "grad_norm": 250813.71875, "learning_rate": 6.663337837043999e-06, "loss": 0.1564, "step": 11360 }, { "epoch": 0.33395993655642364, "grad_norm": 118590.859375, "learning_rate": 6.660400634435764e-06, "loss": 0.1608, "step": 11370 }, { "epoch": 0.3342536568172473, "grad_norm": 210824.515625, "learning_rate": 6.657463431827528e-06, "loss": 0.1517, "step": 11380 }, { "epoch": 0.33454737707807086, "grad_norm": 113880.28125, "learning_rate": 6.654526229219292e-06, "loss": 0.1599, "step": 11390 }, { "epoch": 0.33484109733889444, "grad_norm": 168469.953125, "learning_rate": 6.651589026611056e-06, "loss": 0.1419, "step": 11400 }, { "epoch": 0.335134817599718, "grad_norm": 186152.90625, "learning_rate": 6.64865182400282e-06, "loss": 0.1407, "step": 11410 }, { "epoch": 0.3354285378605416, "grad_norm": 180142.484375, "learning_rate": 6.6457146213945845e-06, "loss": 0.1422, "step": 11420 }, { "epoch": 0.3357222581213652, "grad_norm": 159466.625, "learning_rate": 6.642777418786348e-06, "loss": 0.1519, "step": 11430 }, { "epoch": 0.33601597838218883, "grad_norm": 129229.921875, "learning_rate": 6.639840216178113e-06, "loss": 0.1577, "step": 11440 }, { "epoch": 0.3363096986430124, "grad_norm": 250213.890625, "learning_rate": 6.636903013569877e-06, "loss": 0.1513, "step": 11450 }, { "epoch": 0.336603418903836, "grad_norm": 235136.5, "learning_rate": 6.633965810961641e-06, "loss": 0.1489, "step": 11460 }, { "epoch": 0.3368971391646596, "grad_norm": 217147.84375, "learning_rate": 6.631028608353405e-06, "loss": 0.1563, "step": 11470 }, { "epoch": 0.33719085942548316, "grad_norm": 163570.15625, "learning_rate": 6.628091405745168e-06, "loss": 0.1437, "step": 11480 }, { "epoch": 0.33748457968630674, "grad_norm": 215439.21875, "learning_rate": 6.625154203136933e-06, "loss": 0.1455, "step": 11490 }, { "epoch": 0.3377782999471304, "grad_norm": 285324.21875, "learning_rate": 6.6222170005286965e-06, "loss": 0.1456, "step": 11500 }, { "epoch": 0.33807202020795396, "grad_norm": 111998.609375, "learning_rate": 6.619279797920461e-06, "loss": 0.1357, "step": 11510 }, { "epoch": 0.33836574046877754, "grad_norm": 155274.78125, "learning_rate": 6.616342595312225e-06, "loss": 0.1413, "step": 11520 }, { "epoch": 0.3386594607296011, "grad_norm": 330166.46875, "learning_rate": 6.613405392703989e-06, "loss": 0.1515, "step": 11530 }, { "epoch": 0.3389531809904247, "grad_norm": 149721.03125, "learning_rate": 6.610468190095753e-06, "loss": 0.1397, "step": 11540 }, { "epoch": 0.3392469012512483, "grad_norm": 377898.625, "learning_rate": 6.607530987487517e-06, "loss": 0.1566, "step": 11550 }, { "epoch": 0.3395406215120719, "grad_norm": 247632.265625, "learning_rate": 6.604593784879282e-06, "loss": 0.1351, "step": 11560 }, { "epoch": 0.3398343417728955, "grad_norm": 286133.625, "learning_rate": 6.6016565822710456e-06, "loss": 0.1497, "step": 11570 }, { "epoch": 0.3401280620337191, "grad_norm": 189199.8125, "learning_rate": 6.59871937966281e-06, "loss": 0.1546, "step": 11580 }, { "epoch": 0.3404217822945427, "grad_norm": 348327.875, "learning_rate": 6.595782177054574e-06, "loss": 0.145, "step": 11590 }, { "epoch": 0.34071550255536626, "grad_norm": 175171.4375, "learning_rate": 6.592844974446338e-06, "loss": 0.1415, "step": 11600 }, { "epoch": 0.34100922281618984, "grad_norm": 164789.28125, "learning_rate": 6.589907771838102e-06, "loss": 0.1579, "step": 11610 }, { "epoch": 0.3413029430770135, "grad_norm": 134184.890625, "learning_rate": 6.586970569229866e-06, "loss": 0.1434, "step": 11620 }, { "epoch": 0.34159666333783706, "grad_norm": 127069.0859375, "learning_rate": 6.584033366621631e-06, "loss": 0.1304, "step": 11630 }, { "epoch": 0.34189038359866064, "grad_norm": 192855.15625, "learning_rate": 6.581096164013394e-06, "loss": 0.1491, "step": 11640 }, { "epoch": 0.3421841038594842, "grad_norm": 135585.859375, "learning_rate": 6.5781589614051575e-06, "loss": 0.1456, "step": 11650 }, { "epoch": 0.3424778241203078, "grad_norm": 185143.453125, "learning_rate": 6.575221758796922e-06, "loss": 0.1437, "step": 11660 }, { "epoch": 0.3427715443811314, "grad_norm": 315043.15625, "learning_rate": 6.572284556188686e-06, "loss": 0.1636, "step": 11670 }, { "epoch": 0.343065264641955, "grad_norm": 237661.265625, "learning_rate": 6.569347353580451e-06, "loss": 0.1304, "step": 11680 }, { "epoch": 0.3433589849027786, "grad_norm": 263983.75, "learning_rate": 6.566410150972214e-06, "loss": 0.1525, "step": 11690 }, { "epoch": 0.3436527051636022, "grad_norm": 197496.578125, "learning_rate": 6.563472948363979e-06, "loss": 0.1555, "step": 11700 }, { "epoch": 0.3439464254244258, "grad_norm": 146370.125, "learning_rate": 6.560535745755743e-06, "loss": 0.1505, "step": 11710 }, { "epoch": 0.34424014568524935, "grad_norm": 190524.640625, "learning_rate": 6.5575985431475066e-06, "loss": 0.1414, "step": 11720 }, { "epoch": 0.34453386594607294, "grad_norm": 204665.46875, "learning_rate": 6.554661340539271e-06, "loss": 0.1529, "step": 11730 }, { "epoch": 0.3448275862068966, "grad_norm": 172809.390625, "learning_rate": 6.551724137931035e-06, "loss": 0.1491, "step": 11740 }, { "epoch": 0.34512130646772016, "grad_norm": 128774.953125, "learning_rate": 6.5487869353228e-06, "loss": 0.1335, "step": 11750 }, { "epoch": 0.34541502672854374, "grad_norm": 187821.828125, "learning_rate": 6.545849732714563e-06, "loss": 0.1598, "step": 11760 }, { "epoch": 0.3457087469893673, "grad_norm": 234735.796875, "learning_rate": 6.542912530106326e-06, "loss": 0.1577, "step": 11770 }, { "epoch": 0.3460024672501909, "grad_norm": 154827.875, "learning_rate": 6.539975327498092e-06, "loss": 0.1404, "step": 11780 }, { "epoch": 0.3462961875110145, "grad_norm": 154395.390625, "learning_rate": 6.537038124889855e-06, "loss": 0.1533, "step": 11790 }, { "epoch": 0.3465899077718381, "grad_norm": 240757.40625, "learning_rate": 6.53410092228162e-06, "loss": 0.1578, "step": 11800 }, { "epoch": 0.3468836280326617, "grad_norm": 210273.171875, "learning_rate": 6.531163719673383e-06, "loss": 0.1493, "step": 11810 }, { "epoch": 0.3471773482934853, "grad_norm": 298581.4375, "learning_rate": 6.528226517065148e-06, "loss": 0.1544, "step": 11820 }, { "epoch": 0.34747106855430887, "grad_norm": 166957.8125, "learning_rate": 6.525289314456912e-06, "loss": 0.1703, "step": 11830 }, { "epoch": 0.34776478881513245, "grad_norm": 126136.671875, "learning_rate": 6.522352111848675e-06, "loss": 0.1465, "step": 11840 }, { "epoch": 0.34805850907595604, "grad_norm": 187047.1875, "learning_rate": 6.51941490924044e-06, "loss": 0.1508, "step": 11850 }, { "epoch": 0.3483522293367797, "grad_norm": 189789.734375, "learning_rate": 6.516477706632204e-06, "loss": 0.1459, "step": 11860 }, { "epoch": 0.34864594959760326, "grad_norm": 251101.046875, "learning_rate": 6.5135405040239684e-06, "loss": 0.1613, "step": 11870 }, { "epoch": 0.34893966985842684, "grad_norm": 312133.84375, "learning_rate": 6.510603301415732e-06, "loss": 0.1528, "step": 11880 }, { "epoch": 0.3492333901192504, "grad_norm": 140083.171875, "learning_rate": 6.507666098807496e-06, "loss": 0.1546, "step": 11890 }, { "epoch": 0.349527110380074, "grad_norm": 190751.265625, "learning_rate": 6.504728896199261e-06, "loss": 0.1421, "step": 11900 }, { "epoch": 0.3498208306408976, "grad_norm": 287677.6875, "learning_rate": 6.5017916935910244e-06, "loss": 0.1576, "step": 11910 }, { "epoch": 0.3501145509017212, "grad_norm": 162402.859375, "learning_rate": 6.498854490982789e-06, "loss": 0.157, "step": 11920 }, { "epoch": 0.3504082711625448, "grad_norm": 158008.546875, "learning_rate": 6.495917288374553e-06, "loss": 0.1618, "step": 11930 }, { "epoch": 0.3507019914233684, "grad_norm": 145390.9375, "learning_rate": 6.4929800857663175e-06, "loss": 0.1493, "step": 11940 }, { "epoch": 0.35099571168419197, "grad_norm": 146415.46875, "learning_rate": 6.490042883158081e-06, "loss": 0.1576, "step": 11950 }, { "epoch": 0.35128943194501555, "grad_norm": 218967.078125, "learning_rate": 6.487105680549844e-06, "loss": 0.1468, "step": 11960 }, { "epoch": 0.35158315220583913, "grad_norm": 152845.734375, "learning_rate": 6.484168477941609e-06, "loss": 0.1479, "step": 11970 }, { "epoch": 0.35187687246666277, "grad_norm": 145103.6875, "learning_rate": 6.481231275333373e-06, "loss": 0.1474, "step": 11980 }, { "epoch": 0.35217059272748635, "grad_norm": 190792.453125, "learning_rate": 6.478294072725137e-06, "loss": 0.1504, "step": 11990 }, { "epoch": 0.35246431298830994, "grad_norm": 113986.5859375, "learning_rate": 6.475356870116901e-06, "loss": 0.1522, "step": 12000 }, { "epoch": 0.3527580332491335, "grad_norm": 221479.015625, "learning_rate": 6.472419667508665e-06, "loss": 0.1662, "step": 12010 }, { "epoch": 0.3530517535099571, "grad_norm": 140832.5625, "learning_rate": 6.4694824649004295e-06, "loss": 0.1464, "step": 12020 }, { "epoch": 0.3533454737707807, "grad_norm": 177911.578125, "learning_rate": 6.466545262292193e-06, "loss": 0.1475, "step": 12030 }, { "epoch": 0.3536391940316043, "grad_norm": 185594.921875, "learning_rate": 6.463608059683958e-06, "loss": 0.1543, "step": 12040 }, { "epoch": 0.3539329142924279, "grad_norm": 151495.09375, "learning_rate": 6.460670857075722e-06, "loss": 0.142, "step": 12050 }, { "epoch": 0.3542266345532515, "grad_norm": 124062.3046875, "learning_rate": 6.457733654467486e-06, "loss": 0.155, "step": 12060 }, { "epoch": 0.35452035481407507, "grad_norm": 172662.5625, "learning_rate": 6.45479645185925e-06, "loss": 0.1433, "step": 12070 }, { "epoch": 0.35481407507489865, "grad_norm": 227279.328125, "learning_rate": 6.451859249251014e-06, "loss": 0.132, "step": 12080 }, { "epoch": 0.35510779533572223, "grad_norm": 144244.578125, "learning_rate": 6.4489220466427785e-06, "loss": 0.1523, "step": 12090 }, { "epoch": 0.35540151559654587, "grad_norm": 302641.125, "learning_rate": 6.4459848440345414e-06, "loss": 0.1514, "step": 12100 }, { "epoch": 0.35569523585736945, "grad_norm": 182856.0625, "learning_rate": 6.443047641426307e-06, "loss": 0.1527, "step": 12110 }, { "epoch": 0.35598895611819303, "grad_norm": 155488.640625, "learning_rate": 6.44011043881807e-06, "loss": 0.1559, "step": 12120 }, { "epoch": 0.3562826763790166, "grad_norm": 144641.3125, "learning_rate": 6.437173236209834e-06, "loss": 0.1495, "step": 12130 }, { "epoch": 0.3565763966398402, "grad_norm": 186452.578125, "learning_rate": 6.434236033601598e-06, "loss": 0.1487, "step": 12140 }, { "epoch": 0.3568701169006638, "grad_norm": 183476.0625, "learning_rate": 6.431298830993362e-06, "loss": 0.1591, "step": 12150 }, { "epoch": 0.3571638371614874, "grad_norm": 191542.9375, "learning_rate": 6.428361628385127e-06, "loss": 0.1427, "step": 12160 }, { "epoch": 0.357457557422311, "grad_norm": 239172.0625, "learning_rate": 6.4254244257768905e-06, "loss": 0.128, "step": 12170 }, { "epoch": 0.3577512776831346, "grad_norm": 118575.0625, "learning_rate": 6.422487223168655e-06, "loss": 0.1585, "step": 12180 }, { "epoch": 0.35804499794395817, "grad_norm": 120360.71875, "learning_rate": 6.419550020560419e-06, "loss": 0.1459, "step": 12190 }, { "epoch": 0.35833871820478175, "grad_norm": 119855.921875, "learning_rate": 6.416612817952183e-06, "loss": 0.1416, "step": 12200 }, { "epoch": 0.35863243846560533, "grad_norm": 142540.28125, "learning_rate": 6.413675615343947e-06, "loss": 0.1318, "step": 12210 }, { "epoch": 0.35892615872642897, "grad_norm": 188657.328125, "learning_rate": 6.410738412735711e-06, "loss": 0.1538, "step": 12220 }, { "epoch": 0.35921987898725255, "grad_norm": 172549.34375, "learning_rate": 6.407801210127476e-06, "loss": 0.1454, "step": 12230 }, { "epoch": 0.35951359924807613, "grad_norm": 164581.59375, "learning_rate": 6.4048640075192395e-06, "loss": 0.1386, "step": 12240 }, { "epoch": 0.3598073195088997, "grad_norm": 184888.265625, "learning_rate": 6.4019268049110025e-06, "loss": 0.1347, "step": 12250 }, { "epoch": 0.3601010397697233, "grad_norm": 177656.5, "learning_rate": 6.398989602302768e-06, "loss": 0.1526, "step": 12260 }, { "epoch": 0.36039476003054693, "grad_norm": 235143.546875, "learning_rate": 6.396052399694531e-06, "loss": 0.1284, "step": 12270 }, { "epoch": 0.3606884802913705, "grad_norm": 225060.796875, "learning_rate": 6.3931151970862955e-06, "loss": 0.1582, "step": 12280 }, { "epoch": 0.3609822005521941, "grad_norm": 274509.40625, "learning_rate": 6.390177994478059e-06, "loss": 0.1491, "step": 12290 }, { "epoch": 0.3612759208130177, "grad_norm": 304344.5625, "learning_rate": 6.387240791869823e-06, "loss": 0.1605, "step": 12300 }, { "epoch": 0.36156964107384126, "grad_norm": 148449.1875, "learning_rate": 6.384303589261588e-06, "loss": 0.1398, "step": 12310 }, { "epoch": 0.36186336133466485, "grad_norm": 277225.28125, "learning_rate": 6.3813663866533515e-06, "loss": 0.1448, "step": 12320 }, { "epoch": 0.3621570815954885, "grad_norm": 189766.046875, "learning_rate": 6.378429184045116e-06, "loss": 0.1481, "step": 12330 }, { "epoch": 0.36245080185631207, "grad_norm": 160213.265625, "learning_rate": 6.37549198143688e-06, "loss": 0.1492, "step": 12340 }, { "epoch": 0.36274452211713565, "grad_norm": 122334.8046875, "learning_rate": 6.3725547788286445e-06, "loss": 0.1413, "step": 12350 }, { "epoch": 0.36303824237795923, "grad_norm": 198967.109375, "learning_rate": 6.369617576220408e-06, "loss": 0.1506, "step": 12360 }, { "epoch": 0.3633319626387828, "grad_norm": 215571.546875, "learning_rate": 6.366680373612172e-06, "loss": 0.1348, "step": 12370 }, { "epoch": 0.3636256828996064, "grad_norm": 174385.484375, "learning_rate": 6.363743171003937e-06, "loss": 0.1522, "step": 12380 }, { "epoch": 0.36391940316043003, "grad_norm": 298367.9375, "learning_rate": 6.3608059683957005e-06, "loss": 0.1288, "step": 12390 }, { "epoch": 0.3642131234212536, "grad_norm": 164543.015625, "learning_rate": 6.357868765787465e-06, "loss": 0.1626, "step": 12400 }, { "epoch": 0.3645068436820772, "grad_norm": 97389.9375, "learning_rate": 6.354931563179229e-06, "loss": 0.15, "step": 12410 }, { "epoch": 0.3648005639429008, "grad_norm": 118250.5859375, "learning_rate": 6.351994360570992e-06, "loss": 0.1384, "step": 12420 }, { "epoch": 0.36509428420372436, "grad_norm": 139542.78125, "learning_rate": 6.3490571579627565e-06, "loss": 0.1576, "step": 12430 }, { "epoch": 0.36538800446454794, "grad_norm": 186971.0625, "learning_rate": 6.34611995535452e-06, "loss": 0.1528, "step": 12440 }, { "epoch": 0.3656817247253716, "grad_norm": 109889.3671875, "learning_rate": 6.343182752746285e-06, "loss": 0.1444, "step": 12450 }, { "epoch": 0.36597544498619516, "grad_norm": 295200.40625, "learning_rate": 6.340245550138049e-06, "loss": 0.151, "step": 12460 }, { "epoch": 0.36626916524701875, "grad_norm": 339499.90625, "learning_rate": 6.337308347529813e-06, "loss": 0.1487, "step": 12470 }, { "epoch": 0.36656288550784233, "grad_norm": 170429.453125, "learning_rate": 6.334371144921577e-06, "loss": 0.1555, "step": 12480 }, { "epoch": 0.3668566057686659, "grad_norm": 200499.921875, "learning_rate": 6.331433942313341e-06, "loss": 0.1468, "step": 12490 }, { "epoch": 0.3671503260294895, "grad_norm": 163329.4375, "learning_rate": 6.3284967397051056e-06, "loss": 0.1497, "step": 12500 }, { "epoch": 0.36744404629031313, "grad_norm": 219345.46875, "learning_rate": 6.325559537096869e-06, "loss": 0.1544, "step": 12510 }, { "epoch": 0.3677377665511367, "grad_norm": 118375.2265625, "learning_rate": 6.322622334488634e-06, "loss": 0.1453, "step": 12520 }, { "epoch": 0.3680314868119603, "grad_norm": 112055.0390625, "learning_rate": 6.319685131880398e-06, "loss": 0.1334, "step": 12530 }, { "epoch": 0.3683252070727839, "grad_norm": 469340.34375, "learning_rate": 6.3167479292721616e-06, "loss": 0.1452, "step": 12540 }, { "epoch": 0.36861892733360746, "grad_norm": 147614.796875, "learning_rate": 6.313810726663926e-06, "loss": 0.1497, "step": 12550 }, { "epoch": 0.36891264759443104, "grad_norm": 138620.421875, "learning_rate": 6.310873524055689e-06, "loss": 0.1462, "step": 12560 }, { "epoch": 0.3692063678552547, "grad_norm": 124841.9140625, "learning_rate": 6.307936321447455e-06, "loss": 0.1484, "step": 12570 }, { "epoch": 0.36950008811607826, "grad_norm": 223746.78125, "learning_rate": 6.3049991188392175e-06, "loss": 0.1444, "step": 12580 }, { "epoch": 0.36979380837690184, "grad_norm": 264213.375, "learning_rate": 6.302061916230983e-06, "loss": 0.1548, "step": 12590 }, { "epoch": 0.3700875286377254, "grad_norm": 172677.234375, "learning_rate": 6.299124713622746e-06, "loss": 0.1431, "step": 12600 }, { "epoch": 0.370381248898549, "grad_norm": 166656.78125, "learning_rate": 6.29618751101451e-06, "loss": 0.1445, "step": 12610 }, { "epoch": 0.3706749691593726, "grad_norm": 268525.25, "learning_rate": 6.293250308406274e-06, "loss": 0.1462, "step": 12620 }, { "epoch": 0.37096868942019623, "grad_norm": 195054.359375, "learning_rate": 6.290313105798038e-06, "loss": 0.1418, "step": 12630 }, { "epoch": 0.3712624096810198, "grad_norm": 222696.59375, "learning_rate": 6.287375903189803e-06, "loss": 0.1232, "step": 12640 }, { "epoch": 0.3715561299418434, "grad_norm": 203631.390625, "learning_rate": 6.284438700581567e-06, "loss": 0.1318, "step": 12650 }, { "epoch": 0.371849850202667, "grad_norm": 159174.78125, "learning_rate": 6.28150149797333e-06, "loss": 0.135, "step": 12660 }, { "epoch": 0.37214357046349056, "grad_norm": 140163.921875, "learning_rate": 6.278564295365095e-06, "loss": 0.1533, "step": 12670 }, { "epoch": 0.37243729072431414, "grad_norm": 280767.09375, "learning_rate": 6.275627092756859e-06, "loss": 0.1439, "step": 12680 }, { "epoch": 0.3727310109851378, "grad_norm": 115417.96875, "learning_rate": 6.2726898901486234e-06, "loss": 0.1457, "step": 12690 }, { "epoch": 0.37302473124596136, "grad_norm": 153825.484375, "learning_rate": 6.269752687540387e-06, "loss": 0.1457, "step": 12700 }, { "epoch": 0.37331845150678494, "grad_norm": 177306.8125, "learning_rate": 6.266815484932152e-06, "loss": 0.1534, "step": 12710 }, { "epoch": 0.3736121717676085, "grad_norm": 192261.734375, "learning_rate": 6.263878282323916e-06, "loss": 0.1395, "step": 12720 }, { "epoch": 0.3739058920284321, "grad_norm": 180766.359375, "learning_rate": 6.2609410797156786e-06, "loss": 0.1508, "step": 12730 }, { "epoch": 0.3741996122892557, "grad_norm": 223022.8125, "learning_rate": 6.258003877107443e-06, "loss": 0.1473, "step": 12740 }, { "epoch": 0.3744933325500793, "grad_norm": 235017.828125, "learning_rate": 6.255066674499207e-06, "loss": 0.1456, "step": 12750 }, { "epoch": 0.3747870528109029, "grad_norm": 112224.6875, "learning_rate": 6.252129471890972e-06, "loss": 0.1404, "step": 12760 }, { "epoch": 0.3750807730717265, "grad_norm": 151964.453125, "learning_rate": 6.249192269282735e-06, "loss": 0.1576, "step": 12770 }, { "epoch": 0.3753744933325501, "grad_norm": 137613.671875, "learning_rate": 6.246255066674499e-06, "loss": 0.1527, "step": 12780 }, { "epoch": 0.37566821359337366, "grad_norm": 395470.75, "learning_rate": 6.243317864066264e-06, "loss": 0.1395, "step": 12790 }, { "epoch": 0.37596193385419724, "grad_norm": 144767.921875, "learning_rate": 6.240380661458028e-06, "loss": 0.1495, "step": 12800 }, { "epoch": 0.3762556541150209, "grad_norm": 239347.0, "learning_rate": 6.237443458849792e-06, "loss": 0.1499, "step": 12810 }, { "epoch": 0.37654937437584446, "grad_norm": 248679.0, "learning_rate": 6.234506256241556e-06, "loss": 0.1549, "step": 12820 }, { "epoch": 0.37684309463666804, "grad_norm": 188198.0625, "learning_rate": 6.231569053633321e-06, "loss": 0.1392, "step": 12830 }, { "epoch": 0.3771368148974916, "grad_norm": 139922.71875, "learning_rate": 6.2286318510250844e-06, "loss": 0.1473, "step": 12840 }, { "epoch": 0.3774305351583152, "grad_norm": 143881.859375, "learning_rate": 6.225694648416848e-06, "loss": 0.1413, "step": 12850 }, { "epoch": 0.3777242554191388, "grad_norm": 186203.609375, "learning_rate": 6.222757445808613e-06, "loss": 0.1517, "step": 12860 }, { "epoch": 0.3780179756799624, "grad_norm": 231715.90625, "learning_rate": 6.219820243200377e-06, "loss": 0.1437, "step": 12870 }, { "epoch": 0.378311695940786, "grad_norm": 125479.921875, "learning_rate": 6.216883040592141e-06, "loss": 0.1466, "step": 12880 }, { "epoch": 0.3786054162016096, "grad_norm": 204049.046875, "learning_rate": 6.213945837983904e-06, "loss": 0.15, "step": 12890 }, { "epoch": 0.3788991364624332, "grad_norm": 179720.28125, "learning_rate": 6.211008635375668e-06, "loss": 0.1425, "step": 12900 }, { "epoch": 0.37919285672325675, "grad_norm": 182713.421875, "learning_rate": 6.208071432767433e-06, "loss": 0.1501, "step": 12910 }, { "epoch": 0.37948657698408034, "grad_norm": 169826.390625, "learning_rate": 6.2051342301591964e-06, "loss": 0.1614, "step": 12920 }, { "epoch": 0.379780297244904, "grad_norm": 218687.296875, "learning_rate": 6.202197027550961e-06, "loss": 0.1441, "step": 12930 }, { "epoch": 0.38007401750572756, "grad_norm": 178172.734375, "learning_rate": 6.199259824942725e-06, "loss": 0.1643, "step": 12940 }, { "epoch": 0.38036773776655114, "grad_norm": 98917.1796875, "learning_rate": 6.1963226223344895e-06, "loss": 0.1236, "step": 12950 }, { "epoch": 0.3806614580273747, "grad_norm": 169950.859375, "learning_rate": 6.193385419726253e-06, "loss": 0.1406, "step": 12960 }, { "epoch": 0.3809551782881983, "grad_norm": 213210.328125, "learning_rate": 6.190448217118017e-06, "loss": 0.1506, "step": 12970 }, { "epoch": 0.3812488985490219, "grad_norm": 128506.078125, "learning_rate": 6.187511014509782e-06, "loss": 0.1546, "step": 12980 }, { "epoch": 0.3815426188098455, "grad_norm": 172540.15625, "learning_rate": 6.1845738119015455e-06, "loss": 0.1454, "step": 12990 }, { "epoch": 0.3818363390706691, "grad_norm": 257621.390625, "learning_rate": 6.18163660929331e-06, "loss": 0.1505, "step": 13000 }, { "epoch": 0.3821300593314927, "grad_norm": 153605.34375, "learning_rate": 6.178699406685074e-06, "loss": 0.1629, "step": 13010 }, { "epoch": 0.38242377959231627, "grad_norm": 213047.625, "learning_rate": 6.175762204076837e-06, "loss": 0.1434, "step": 13020 }, { "epoch": 0.38271749985313985, "grad_norm": 113518.453125, "learning_rate": 6.172825001468602e-06, "loss": 0.1394, "step": 13030 }, { "epoch": 0.38301122011396344, "grad_norm": 299530.625, "learning_rate": 6.169887798860365e-06, "loss": 0.1512, "step": 13040 }, { "epoch": 0.3833049403747871, "grad_norm": 139068.171875, "learning_rate": 6.166950596252131e-06, "loss": 0.1396, "step": 13050 }, { "epoch": 0.38359866063561066, "grad_norm": 283712.8125, "learning_rate": 6.164013393643894e-06, "loss": 0.159, "step": 13060 }, { "epoch": 0.38389238089643424, "grad_norm": 194810.265625, "learning_rate": 6.161076191035658e-06, "loss": 0.1324, "step": 13070 }, { "epoch": 0.3841861011572578, "grad_norm": 233170.546875, "learning_rate": 6.158138988427422e-06, "loss": 0.1491, "step": 13080 }, { "epoch": 0.3844798214180814, "grad_norm": 156581.109375, "learning_rate": 6.155201785819186e-06, "loss": 0.1454, "step": 13090 }, { "epoch": 0.384773541678905, "grad_norm": 246566.796875, "learning_rate": 6.1522645832109505e-06, "loss": 0.1553, "step": 13100 }, { "epoch": 0.3850672619397286, "grad_norm": 142250.734375, "learning_rate": 6.149327380602714e-06, "loss": 0.1466, "step": 13110 }, { "epoch": 0.3853609822005522, "grad_norm": 116698.3828125, "learning_rate": 6.146390177994479e-06, "loss": 0.1312, "step": 13120 }, { "epoch": 0.3856547024613758, "grad_norm": 250620.546875, "learning_rate": 6.143452975386243e-06, "loss": 0.1318, "step": 13130 }, { "epoch": 0.38594842272219937, "grad_norm": 123446.8046875, "learning_rate": 6.1405157727780065e-06, "loss": 0.1424, "step": 13140 }, { "epoch": 0.38624214298302295, "grad_norm": 166204.75, "learning_rate": 6.137578570169771e-06, "loss": 0.1491, "step": 13150 }, { "epoch": 0.38653586324384653, "grad_norm": 119673.0703125, "learning_rate": 6.134641367561535e-06, "loss": 0.1438, "step": 13160 }, { "epoch": 0.38682958350467017, "grad_norm": 134339.421875, "learning_rate": 6.1317041649532995e-06, "loss": 0.1479, "step": 13170 }, { "epoch": 0.38712330376549375, "grad_norm": 368867.59375, "learning_rate": 6.128766962345063e-06, "loss": 0.141, "step": 13180 }, { "epoch": 0.38741702402631734, "grad_norm": 190245.84375, "learning_rate": 6.125829759736828e-06, "loss": 0.1316, "step": 13190 }, { "epoch": 0.3877107442871409, "grad_norm": 105126.9609375, "learning_rate": 6.122892557128592e-06, "loss": 0.1392, "step": 13200 }, { "epoch": 0.3880044645479645, "grad_norm": 204328.25, "learning_rate": 6.119955354520355e-06, "loss": 0.1417, "step": 13210 }, { "epoch": 0.3882981848087881, "grad_norm": 228466.953125, "learning_rate": 6.117018151912119e-06, "loss": 0.1446, "step": 13220 }, { "epoch": 0.3885919050696117, "grad_norm": 167021.171875, "learning_rate": 6.114080949303883e-06, "loss": 0.144, "step": 13230 }, { "epoch": 0.3888856253304353, "grad_norm": 195113.953125, "learning_rate": 6.111143746695648e-06, "loss": 0.155, "step": 13240 }, { "epoch": 0.3891793455912589, "grad_norm": 162193.546875, "learning_rate": 6.1082065440874115e-06, "loss": 0.1517, "step": 13250 }, { "epoch": 0.38947306585208247, "grad_norm": 128742.8125, "learning_rate": 6.105269341479175e-06, "loss": 0.145, "step": 13260 }, { "epoch": 0.38976678611290605, "grad_norm": 158160.171875, "learning_rate": 6.10233213887094e-06, "loss": 0.1399, "step": 13270 }, { "epoch": 0.3900605063737297, "grad_norm": 147707.5, "learning_rate": 6.099394936262704e-06, "loss": 0.1357, "step": 13280 }, { "epoch": 0.39035422663455327, "grad_norm": 179734.46875, "learning_rate": 6.096457733654468e-06, "loss": 0.1376, "step": 13290 }, { "epoch": 0.39064794689537685, "grad_norm": 126575.2265625, "learning_rate": 6.093520531046232e-06, "loss": 0.1493, "step": 13300 }, { "epoch": 0.39094166715620043, "grad_norm": 271722.21875, "learning_rate": 6.090583328437996e-06, "loss": 0.1474, "step": 13310 }, { "epoch": 0.391235387417024, "grad_norm": 131755.96875, "learning_rate": 6.0876461258297606e-06, "loss": 0.144, "step": 13320 }, { "epoch": 0.3915291076778476, "grad_norm": 134933.921875, "learning_rate": 6.084708923221524e-06, "loss": 0.1314, "step": 13330 }, { "epoch": 0.39182282793867124, "grad_norm": 139939.59375, "learning_rate": 6.081771720613289e-06, "loss": 0.1487, "step": 13340 }, { "epoch": 0.3921165481994948, "grad_norm": 179411.15625, "learning_rate": 6.078834518005052e-06, "loss": 0.1456, "step": 13350 }, { "epoch": 0.3924102684603184, "grad_norm": 206660.53125, "learning_rate": 6.075897315396817e-06, "loss": 0.1395, "step": 13360 }, { "epoch": 0.392703988721142, "grad_norm": 217986.609375, "learning_rate": 6.07296011278858e-06, "loss": 0.142, "step": 13370 }, { "epoch": 0.39299770898196557, "grad_norm": 251145.453125, "learning_rate": 6.070022910180344e-06, "loss": 0.1453, "step": 13380 }, { "epoch": 0.39329142924278915, "grad_norm": 116760.0078125, "learning_rate": 6.067085707572109e-06, "loss": 0.1388, "step": 13390 }, { "epoch": 0.3935851495036128, "grad_norm": 172560.359375, "learning_rate": 6.0641485049638725e-06, "loss": 0.1438, "step": 13400 }, { "epoch": 0.39387886976443637, "grad_norm": 284277.25, "learning_rate": 6.061211302355637e-06, "loss": 0.1512, "step": 13410 }, { "epoch": 0.39417259002525995, "grad_norm": 134297.8125, "learning_rate": 6.058274099747401e-06, "loss": 0.147, "step": 13420 }, { "epoch": 0.39446631028608353, "grad_norm": 148698.609375, "learning_rate": 6.055336897139165e-06, "loss": 0.1422, "step": 13430 }, { "epoch": 0.3947600305469071, "grad_norm": 200046.03125, "learning_rate": 6.052399694530929e-06, "loss": 0.1525, "step": 13440 }, { "epoch": 0.3950537508077307, "grad_norm": 132165.0, "learning_rate": 6.049462491922693e-06, "loss": 0.1486, "step": 13450 }, { "epoch": 0.39534747106855433, "grad_norm": 172687.671875, "learning_rate": 6.046525289314458e-06, "loss": 0.1447, "step": 13460 }, { "epoch": 0.3956411913293779, "grad_norm": 104336.0234375, "learning_rate": 6.0435880867062216e-06, "loss": 0.1478, "step": 13470 }, { "epoch": 0.3959349115902015, "grad_norm": 137444.34375, "learning_rate": 6.040650884097986e-06, "loss": 0.1518, "step": 13480 }, { "epoch": 0.3962286318510251, "grad_norm": 169980.40625, "learning_rate": 6.03771368148975e-06, "loss": 0.1515, "step": 13490 }, { "epoch": 0.39652235211184866, "grad_norm": 133237.75, "learning_rate": 6.034776478881513e-06, "loss": 0.1416, "step": 13500 }, { "epoch": 0.39681607237267225, "grad_norm": 170778.578125, "learning_rate": 6.031839276273278e-06, "loss": 0.1515, "step": 13510 }, { "epoch": 0.3971097926334959, "grad_norm": 232748.71875, "learning_rate": 6.028902073665041e-06, "loss": 0.139, "step": 13520 }, { "epoch": 0.39740351289431947, "grad_norm": 477701.15625, "learning_rate": 6.025964871056806e-06, "loss": 0.1363, "step": 13530 }, { "epoch": 0.39769723315514305, "grad_norm": 198940.96875, "learning_rate": 6.02302766844857e-06, "loss": 0.1539, "step": 13540 }, { "epoch": 0.39799095341596663, "grad_norm": 254407.40625, "learning_rate": 6.0200904658403336e-06, "loss": 0.1536, "step": 13550 }, { "epoch": 0.3982846736767902, "grad_norm": 216257.53125, "learning_rate": 6.017153263232098e-06, "loss": 0.1402, "step": 13560 }, { "epoch": 0.3985783939376138, "grad_norm": 173638.703125, "learning_rate": 6.014216060623862e-06, "loss": 0.1481, "step": 13570 }, { "epoch": 0.39887211419843743, "grad_norm": 131054.2734375, "learning_rate": 6.011278858015627e-06, "loss": 0.1451, "step": 13580 }, { "epoch": 0.399165834459261, "grad_norm": 145474.171875, "learning_rate": 6.00834165540739e-06, "loss": 0.148, "step": 13590 }, { "epoch": 0.3994595547200846, "grad_norm": 155756.28125, "learning_rate": 6.005404452799155e-06, "loss": 0.142, "step": 13600 }, { "epoch": 0.3997532749809082, "grad_norm": 167225.515625, "learning_rate": 6.002467250190919e-06, "loss": 0.1306, "step": 13610 }, { "epoch": 0.40004699524173176, "grad_norm": 260030.5625, "learning_rate": 5.999530047582683e-06, "loss": 0.1528, "step": 13620 }, { "epoch": 0.40034071550255534, "grad_norm": 148043.1875, "learning_rate": 5.996592844974447e-06, "loss": 0.1438, "step": 13630 }, { "epoch": 0.400634435763379, "grad_norm": 359218.5625, "learning_rate": 5.993655642366211e-06, "loss": 0.149, "step": 13640 }, { "epoch": 0.40092815602420256, "grad_norm": 97202.640625, "learning_rate": 5.990718439757976e-06, "loss": 0.1244, "step": 13650 }, { "epoch": 0.40122187628502615, "grad_norm": 262908.09375, "learning_rate": 5.9877812371497394e-06, "loss": 0.1455, "step": 13660 }, { "epoch": 0.40151559654584973, "grad_norm": 215175.75, "learning_rate": 5.984844034541502e-06, "loss": 0.1434, "step": 13670 }, { "epoch": 0.4018093168066733, "grad_norm": 137397.65625, "learning_rate": 5.981906831933267e-06, "loss": 0.1467, "step": 13680 }, { "epoch": 0.4021030370674969, "grad_norm": 205308.625, "learning_rate": 5.978969629325031e-06, "loss": 0.1562, "step": 13690 }, { "epoch": 0.40239675732832053, "grad_norm": 130505.734375, "learning_rate": 5.976032426716795e-06, "loss": 0.143, "step": 13700 }, { "epoch": 0.4026904775891441, "grad_norm": 157003.28125, "learning_rate": 5.973095224108559e-06, "loss": 0.1445, "step": 13710 }, { "epoch": 0.4029841978499677, "grad_norm": 244154.0625, "learning_rate": 5.970158021500324e-06, "loss": 0.145, "step": 13720 }, { "epoch": 0.4032779181107913, "grad_norm": 172163.390625, "learning_rate": 5.967220818892088e-06, "loss": 0.1453, "step": 13730 }, { "epoch": 0.40357163837161486, "grad_norm": 253048.1875, "learning_rate": 5.964283616283851e-06, "loss": 0.1576, "step": 13740 }, { "epoch": 0.40386535863243844, "grad_norm": 107073.7421875, "learning_rate": 5.961346413675616e-06, "loss": 0.1411, "step": 13750 }, { "epoch": 0.4041590788932621, "grad_norm": 161997.046875, "learning_rate": 5.95840921106738e-06, "loss": 0.1475, "step": 13760 }, { "epoch": 0.40445279915408566, "grad_norm": 328203.625, "learning_rate": 5.9554720084591445e-06, "loss": 0.1452, "step": 13770 }, { "epoch": 0.40474651941490924, "grad_norm": 166164.6875, "learning_rate": 5.952534805850908e-06, "loss": 0.1522, "step": 13780 }, { "epoch": 0.4050402396757328, "grad_norm": 210009.859375, "learning_rate": 5.949597603242672e-06, "loss": 0.1544, "step": 13790 }, { "epoch": 0.4053339599365564, "grad_norm": 145416.015625, "learning_rate": 5.946660400634437e-06, "loss": 0.1351, "step": 13800 }, { "epoch": 0.40562768019738, "grad_norm": 349677.0, "learning_rate": 5.9437231980262e-06, "loss": 0.1478, "step": 13810 }, { "epoch": 0.40592140045820363, "grad_norm": 146098.546875, "learning_rate": 5.940785995417965e-06, "loss": 0.1451, "step": 13820 }, { "epoch": 0.4062151207190272, "grad_norm": 233271.15625, "learning_rate": 5.937848792809728e-06, "loss": 0.15, "step": 13830 }, { "epoch": 0.4065088409798508, "grad_norm": 155964.90625, "learning_rate": 5.9349115902014935e-06, "loss": 0.1478, "step": 13840 }, { "epoch": 0.4068025612406744, "grad_norm": 206375.484375, "learning_rate": 5.9319743875932564e-06, "loss": 0.1458, "step": 13850 }, { "epoch": 0.40709628150149796, "grad_norm": 225432.390625, "learning_rate": 5.92903718498502e-06, "loss": 0.1481, "step": 13860 }, { "epoch": 0.40739000176232154, "grad_norm": 169586.625, "learning_rate": 5.926099982376785e-06, "loss": 0.1432, "step": 13870 }, { "epoch": 0.4076837220231452, "grad_norm": 166294.046875, "learning_rate": 5.923162779768549e-06, "loss": 0.1448, "step": 13880 }, { "epoch": 0.40797744228396876, "grad_norm": 168468.953125, "learning_rate": 5.920225577160313e-06, "loss": 0.1364, "step": 13890 }, { "epoch": 0.40827116254479234, "grad_norm": 196459.125, "learning_rate": 5.917288374552077e-06, "loss": 0.1464, "step": 13900 }, { "epoch": 0.4085648828056159, "grad_norm": 122774.5703125, "learning_rate": 5.914351171943841e-06, "loss": 0.1506, "step": 13910 }, { "epoch": 0.4088586030664395, "grad_norm": 356559.90625, "learning_rate": 5.9114139693356055e-06, "loss": 0.1392, "step": 13920 }, { "epoch": 0.4091523233272631, "grad_norm": 159804.96875, "learning_rate": 5.908476766727369e-06, "loss": 0.1475, "step": 13930 }, { "epoch": 0.4094460435880867, "grad_norm": 196579.671875, "learning_rate": 5.905539564119134e-06, "loss": 0.1492, "step": 13940 }, { "epoch": 0.4097397638489103, "grad_norm": 211365.0, "learning_rate": 5.902602361510898e-06, "loss": 0.1555, "step": 13950 }, { "epoch": 0.4100334841097339, "grad_norm": 192269.5, "learning_rate": 5.899665158902662e-06, "loss": 0.1435, "step": 13960 }, { "epoch": 0.4103272043705575, "grad_norm": 230383.703125, "learning_rate": 5.896727956294426e-06, "loss": 0.1547, "step": 13970 }, { "epoch": 0.41062092463138106, "grad_norm": 117761.90625, "learning_rate": 5.893790753686189e-06, "loss": 0.1297, "step": 13980 }, { "epoch": 0.41091464489220464, "grad_norm": 213055.34375, "learning_rate": 5.890853551077954e-06, "loss": 0.15, "step": 13990 }, { "epoch": 0.4112083651530283, "grad_norm": 281667.75, "learning_rate": 5.8879163484697175e-06, "loss": 0.1525, "step": 14000 }, { "epoch": 0.41150208541385186, "grad_norm": 193386.9375, "learning_rate": 5.884979145861482e-06, "loss": 0.1427, "step": 14010 }, { "epoch": 0.41179580567467544, "grad_norm": 240664.3125, "learning_rate": 5.882041943253246e-06, "loss": 0.1589, "step": 14020 }, { "epoch": 0.412089525935499, "grad_norm": 98052.875, "learning_rate": 5.87910474064501e-06, "loss": 0.1457, "step": 14030 }, { "epoch": 0.4123832461963226, "grad_norm": 207681.828125, "learning_rate": 5.876167538036774e-06, "loss": 0.1604, "step": 14040 }, { "epoch": 0.4126769664571462, "grad_norm": 147493.578125, "learning_rate": 5.873230335428538e-06, "loss": 0.1444, "step": 14050 }, { "epoch": 0.4129706867179698, "grad_norm": 114289.84375, "learning_rate": 5.870293132820303e-06, "loss": 0.1456, "step": 14060 }, { "epoch": 0.4132644069787934, "grad_norm": 196512.75, "learning_rate": 5.8673559302120665e-06, "loss": 0.1483, "step": 14070 }, { "epoch": 0.413558127239617, "grad_norm": 109689.4296875, "learning_rate": 5.864418727603831e-06, "loss": 0.1391, "step": 14080 }, { "epoch": 0.4138518475004406, "grad_norm": 174217.046875, "learning_rate": 5.861481524995595e-06, "loss": 0.1516, "step": 14090 }, { "epoch": 0.41414556776126416, "grad_norm": 293492.25, "learning_rate": 5.858544322387359e-06, "loss": 0.1429, "step": 14100 }, { "epoch": 0.41443928802208774, "grad_norm": 181409.375, "learning_rate": 5.855607119779123e-06, "loss": 0.1339, "step": 14110 }, { "epoch": 0.4147330082829114, "grad_norm": 137640.328125, "learning_rate": 5.852669917170887e-06, "loss": 0.1509, "step": 14120 }, { "epoch": 0.41502672854373496, "grad_norm": 147290.453125, "learning_rate": 5.849732714562652e-06, "loss": 0.1411, "step": 14130 }, { "epoch": 0.41532044880455854, "grad_norm": 168648.640625, "learning_rate": 5.846795511954415e-06, "loss": 0.1546, "step": 14140 }, { "epoch": 0.4156141690653821, "grad_norm": 208746.078125, "learning_rate": 5.8438583093461785e-06, "loss": 0.1557, "step": 14150 }, { "epoch": 0.4159078893262057, "grad_norm": 166517.953125, "learning_rate": 5.840921106737943e-06, "loss": 0.1422, "step": 14160 }, { "epoch": 0.4162016095870293, "grad_norm": 267690.25, "learning_rate": 5.837983904129707e-06, "loss": 0.1331, "step": 14170 }, { "epoch": 0.4164953298478529, "grad_norm": 140064.625, "learning_rate": 5.8350467015214715e-06, "loss": 0.1345, "step": 14180 }, { "epoch": 0.4167890501086765, "grad_norm": 143400.609375, "learning_rate": 5.832109498913235e-06, "loss": 0.1341, "step": 14190 }, { "epoch": 0.4170827703695001, "grad_norm": 164792.875, "learning_rate": 5.829172296305e-06, "loss": 0.1324, "step": 14200 }, { "epoch": 0.41737649063032367, "grad_norm": 119442.890625, "learning_rate": 5.826235093696764e-06, "loss": 0.1369, "step": 14210 }, { "epoch": 0.41767021089114725, "grad_norm": 153290.609375, "learning_rate": 5.8232978910885275e-06, "loss": 0.1432, "step": 14220 }, { "epoch": 0.41796393115197084, "grad_norm": 431084.15625, "learning_rate": 5.820360688480292e-06, "loss": 0.1344, "step": 14230 }, { "epoch": 0.4182576514127945, "grad_norm": 148966.21875, "learning_rate": 5.817423485872056e-06, "loss": 0.1379, "step": 14240 }, { "epoch": 0.41855137167361806, "grad_norm": 191970.1875, "learning_rate": 5.8144862832638206e-06, "loss": 0.1342, "step": 14250 }, { "epoch": 0.41884509193444164, "grad_norm": 179703.921875, "learning_rate": 5.811549080655584e-06, "loss": 0.1322, "step": 14260 }, { "epoch": 0.4191388121952652, "grad_norm": 311960.0625, "learning_rate": 5.808611878047347e-06, "loss": 0.1431, "step": 14270 }, { "epoch": 0.4194325324560888, "grad_norm": 95266.59375, "learning_rate": 5.805674675439113e-06, "loss": 0.1373, "step": 14280 }, { "epoch": 0.41972625271691244, "grad_norm": 136490.875, "learning_rate": 5.802737472830876e-06, "loss": 0.1422, "step": 14290 }, { "epoch": 0.420019972977736, "grad_norm": 299180.34375, "learning_rate": 5.799800270222641e-06, "loss": 0.1516, "step": 14300 }, { "epoch": 0.4203136932385596, "grad_norm": 176842.078125, "learning_rate": 5.796863067614404e-06, "loss": 0.1452, "step": 14310 }, { "epoch": 0.4206074134993832, "grad_norm": 132097.015625, "learning_rate": 5.793925865006168e-06, "loss": 0.1439, "step": 14320 }, { "epoch": 0.42090113376020677, "grad_norm": 188803.46875, "learning_rate": 5.7909886623979326e-06, "loss": 0.1392, "step": 14330 }, { "epoch": 0.42119485402103035, "grad_norm": 139232.296875, "learning_rate": 5.788051459789696e-06, "loss": 0.1483, "step": 14340 }, { "epoch": 0.421488574281854, "grad_norm": 130225.8984375, "learning_rate": 5.785114257181461e-06, "loss": 0.148, "step": 14350 }, { "epoch": 0.42178229454267757, "grad_norm": 207499.21875, "learning_rate": 5.782177054573225e-06, "loss": 0.1405, "step": 14360 }, { "epoch": 0.42207601480350115, "grad_norm": 126696.296875, "learning_rate": 5.779239851964989e-06, "loss": 0.1428, "step": 14370 }, { "epoch": 0.42236973506432474, "grad_norm": 155559.25, "learning_rate": 5.776302649356753e-06, "loss": 0.1304, "step": 14380 }, { "epoch": 0.4226634553251483, "grad_norm": 190373.09375, "learning_rate": 5.773365446748517e-06, "loss": 0.1537, "step": 14390 }, { "epoch": 0.4229571755859719, "grad_norm": 120256.9140625, "learning_rate": 5.770428244140282e-06, "loss": 0.1416, "step": 14400 }, { "epoch": 0.42325089584679554, "grad_norm": 398999.375, "learning_rate": 5.767491041532045e-06, "loss": 0.1375, "step": 14410 }, { "epoch": 0.4235446161076191, "grad_norm": 159277.0625, "learning_rate": 5.76455383892381e-06, "loss": 0.1414, "step": 14420 }, { "epoch": 0.4238383363684427, "grad_norm": 168966.546875, "learning_rate": 5.761616636315574e-06, "loss": 0.1433, "step": 14430 }, { "epoch": 0.4241320566292663, "grad_norm": 153324.546875, "learning_rate": 5.758679433707337e-06, "loss": 0.1471, "step": 14440 }, { "epoch": 0.42442577689008987, "grad_norm": 247973.828125, "learning_rate": 5.755742231099102e-06, "loss": 0.1481, "step": 14450 }, { "epoch": 0.42471949715091345, "grad_norm": 138343.265625, "learning_rate": 5.752805028490865e-06, "loss": 0.1423, "step": 14460 }, { "epoch": 0.4250132174117371, "grad_norm": 134588.46875, "learning_rate": 5.74986782588263e-06, "loss": 0.1528, "step": 14470 }, { "epoch": 0.42530693767256067, "grad_norm": 125572.0859375, "learning_rate": 5.7469306232743936e-06, "loss": 0.1515, "step": 14480 }, { "epoch": 0.42560065793338425, "grad_norm": 183071.328125, "learning_rate": 5.743993420666158e-06, "loss": 0.1468, "step": 14490 }, { "epoch": 0.42589437819420783, "grad_norm": 145161.671875, "learning_rate": 5.741056218057922e-06, "loss": 0.1355, "step": 14500 }, { "epoch": 0.4261880984550314, "grad_norm": 227798.765625, "learning_rate": 5.738119015449686e-06, "loss": 0.1569, "step": 14510 }, { "epoch": 0.426481818715855, "grad_norm": 142342.640625, "learning_rate": 5.73518181284145e-06, "loss": 0.14, "step": 14520 }, { "epoch": 0.42677553897667864, "grad_norm": 331310.84375, "learning_rate": 5.732244610233214e-06, "loss": 0.1494, "step": 14530 }, { "epoch": 0.4270692592375022, "grad_norm": 135011.671875, "learning_rate": 5.729307407624979e-06, "loss": 0.1416, "step": 14540 }, { "epoch": 0.4273629794983258, "grad_norm": 145969.453125, "learning_rate": 5.726370205016743e-06, "loss": 0.1383, "step": 14550 }, { "epoch": 0.4276566997591494, "grad_norm": 165225.953125, "learning_rate": 5.723433002408506e-06, "loss": 0.1543, "step": 14560 }, { "epoch": 0.42795042001997297, "grad_norm": 234370.296875, "learning_rate": 5.720495799800271e-06, "loss": 0.137, "step": 14570 }, { "epoch": 0.42824414028079655, "grad_norm": 128997.8515625, "learning_rate": 5.717558597192035e-06, "loss": 0.1391, "step": 14580 }, { "epoch": 0.4285378605416202, "grad_norm": 226582.578125, "learning_rate": 5.7146213945837994e-06, "loss": 0.1464, "step": 14590 }, { "epoch": 0.42883158080244377, "grad_norm": 150861.828125, "learning_rate": 5.711684191975562e-06, "loss": 0.1523, "step": 14600 }, { "epoch": 0.42912530106326735, "grad_norm": 176946.984375, "learning_rate": 5.708746989367328e-06, "loss": 0.1376, "step": 14610 }, { "epoch": 0.42941902132409093, "grad_norm": 230648.671875, "learning_rate": 5.705809786759091e-06, "loss": 0.1329, "step": 14620 }, { "epoch": 0.4297127415849145, "grad_norm": 201357.5, "learning_rate": 5.702872584150855e-06, "loss": 0.1432, "step": 14630 }, { "epoch": 0.4300064618457381, "grad_norm": 186205.40625, "learning_rate": 5.699935381542619e-06, "loss": 0.1398, "step": 14640 }, { "epoch": 0.43030018210656173, "grad_norm": 173441.953125, "learning_rate": 5.696998178934383e-06, "loss": 0.1459, "step": 14650 }, { "epoch": 0.4305939023673853, "grad_norm": 231133.140625, "learning_rate": 5.694060976326148e-06, "loss": 0.1451, "step": 14660 }, { "epoch": 0.4308876226282089, "grad_norm": 162924.453125, "learning_rate": 5.6911237737179114e-06, "loss": 0.1389, "step": 14670 }, { "epoch": 0.4311813428890325, "grad_norm": 203468.703125, "learning_rate": 5.688186571109675e-06, "loss": 0.1398, "step": 14680 }, { "epoch": 0.43147506314985606, "grad_norm": 196564.65625, "learning_rate": 5.68524936850144e-06, "loss": 0.1395, "step": 14690 }, { "epoch": 0.43176878341067965, "grad_norm": 133717.484375, "learning_rate": 5.682312165893204e-06, "loss": 0.1419, "step": 14700 }, { "epoch": 0.4320625036715033, "grad_norm": 187129.59375, "learning_rate": 5.679374963284968e-06, "loss": 0.1364, "step": 14710 }, { "epoch": 0.43235622393232687, "grad_norm": 267627.59375, "learning_rate": 5.676437760676732e-06, "loss": 0.1424, "step": 14720 }, { "epoch": 0.43264994419315045, "grad_norm": 210124.015625, "learning_rate": 5.673500558068497e-06, "loss": 0.1364, "step": 14730 }, { "epoch": 0.43294366445397403, "grad_norm": 85799.0234375, "learning_rate": 5.6705633554602605e-06, "loss": 0.1397, "step": 14740 }, { "epoch": 0.4332373847147976, "grad_norm": 149093.359375, "learning_rate": 5.667626152852023e-06, "loss": 0.1466, "step": 14750 }, { "epoch": 0.4335311049756212, "grad_norm": 219140.03125, "learning_rate": 5.664688950243789e-06, "loss": 0.1328, "step": 14760 }, { "epoch": 0.43382482523644483, "grad_norm": 151151.03125, "learning_rate": 5.661751747635552e-06, "loss": 0.138, "step": 14770 }, { "epoch": 0.4341185454972684, "grad_norm": 149474.53125, "learning_rate": 5.6588145450273165e-06, "loss": 0.1538, "step": 14780 }, { "epoch": 0.434412265758092, "grad_norm": 301110.96875, "learning_rate": 5.65587734241908e-06, "loss": 0.1289, "step": 14790 }, { "epoch": 0.4347059860189156, "grad_norm": 244205.671875, "learning_rate": 5.652940139810844e-06, "loss": 0.1418, "step": 14800 }, { "epoch": 0.43499970627973916, "grad_norm": 187228.90625, "learning_rate": 5.650002937202609e-06, "loss": 0.1386, "step": 14810 }, { "epoch": 0.43529342654056274, "grad_norm": 215439.359375, "learning_rate": 5.6470657345943724e-06, "loss": 0.153, "step": 14820 }, { "epoch": 0.4355871468013864, "grad_norm": 177774.40625, "learning_rate": 5.644128531986137e-06, "loss": 0.1383, "step": 14830 }, { "epoch": 0.43588086706220996, "grad_norm": 179718.359375, "learning_rate": 5.641191329377901e-06, "loss": 0.1334, "step": 14840 }, { "epoch": 0.43617458732303355, "grad_norm": 159344.796875, "learning_rate": 5.6382541267696655e-06, "loss": 0.1519, "step": 14850 }, { "epoch": 0.43646830758385713, "grad_norm": 198206.328125, "learning_rate": 5.635316924161429e-06, "loss": 0.1419, "step": 14860 }, { "epoch": 0.4367620278446807, "grad_norm": 220178.40625, "learning_rate": 5.632379721553193e-06, "loss": 0.1431, "step": 14870 }, { "epoch": 0.4370557481055043, "grad_norm": 128212.7578125, "learning_rate": 5.629442518944958e-06, "loss": 0.1465, "step": 14880 }, { "epoch": 0.43734946836632793, "grad_norm": 158241.71875, "learning_rate": 5.6265053163367215e-06, "loss": 0.1536, "step": 14890 }, { "epoch": 0.4376431886271515, "grad_norm": 301046.125, "learning_rate": 5.623568113728486e-06, "loss": 0.1475, "step": 14900 }, { "epoch": 0.4379369088879751, "grad_norm": 176395.96875, "learning_rate": 5.62063091112025e-06, "loss": 0.1396, "step": 14910 }, { "epoch": 0.4382306291487987, "grad_norm": 277156.15625, "learning_rate": 5.617693708512013e-06, "loss": 0.1273, "step": 14920 }, { "epoch": 0.43852434940962226, "grad_norm": 157772.9375, "learning_rate": 5.6147565059037775e-06, "loss": 0.1389, "step": 14930 }, { "epoch": 0.43881806967044584, "grad_norm": 126531.7890625, "learning_rate": 5.611819303295541e-06, "loss": 0.1363, "step": 14940 }, { "epoch": 0.4391117899312695, "grad_norm": 109837.03125, "learning_rate": 5.608882100687306e-06, "loss": 0.1373, "step": 14950 }, { "epoch": 0.43940551019209306, "grad_norm": 149402.734375, "learning_rate": 5.60594489807907e-06, "loss": 0.144, "step": 14960 }, { "epoch": 0.43969923045291665, "grad_norm": 271410.84375, "learning_rate": 5.603007695470834e-06, "loss": 0.1433, "step": 14970 }, { "epoch": 0.4399929507137402, "grad_norm": 124755.3984375, "learning_rate": 5.600070492862598e-06, "loss": 0.1545, "step": 14980 }, { "epoch": 0.4402866709745638, "grad_norm": 123504.6328125, "learning_rate": 5.597133290254362e-06, "loss": 0.1515, "step": 14990 }, { "epoch": 0.4405803912353874, "grad_norm": 116067.25, "learning_rate": 5.5941960876461265e-06, "loss": 0.145, "step": 15000 }, { "epoch": 0.44087411149621103, "grad_norm": 141068.03125, "learning_rate": 5.59125888503789e-06, "loss": 0.1427, "step": 15010 }, { "epoch": 0.4411678317570346, "grad_norm": 186764.0625, "learning_rate": 5.588321682429655e-06, "loss": 0.1488, "step": 15020 }, { "epoch": 0.4414615520178582, "grad_norm": 202034.796875, "learning_rate": 5.585384479821419e-06, "loss": 0.1505, "step": 15030 }, { "epoch": 0.4417552722786818, "grad_norm": 157175.484375, "learning_rate": 5.5824472772131825e-06, "loss": 0.1296, "step": 15040 }, { "epoch": 0.44204899253950536, "grad_norm": 153630.90625, "learning_rate": 5.579510074604947e-06, "loss": 0.1443, "step": 15050 }, { "epoch": 0.44234271280032894, "grad_norm": 298057.75, "learning_rate": 5.57657287199671e-06, "loss": 0.1385, "step": 15060 }, { "epoch": 0.4426364330611526, "grad_norm": 236720.703125, "learning_rate": 5.5736356693884756e-06, "loss": 0.1415, "step": 15070 }, { "epoch": 0.44293015332197616, "grad_norm": 133707.53125, "learning_rate": 5.5706984667802385e-06, "loss": 0.1521, "step": 15080 }, { "epoch": 0.44322387358279974, "grad_norm": 125614.328125, "learning_rate": 5.567761264172004e-06, "loss": 0.1401, "step": 15090 }, { "epoch": 0.4435175938436233, "grad_norm": 178335.375, "learning_rate": 5.564824061563767e-06, "loss": 0.1469, "step": 15100 }, { "epoch": 0.4438113141044469, "grad_norm": 229934.25, "learning_rate": 5.561886858955531e-06, "loss": 0.1406, "step": 15110 }, { "epoch": 0.4441050343652705, "grad_norm": 103534.5859375, "learning_rate": 5.558949656347295e-06, "loss": 0.138, "step": 15120 }, { "epoch": 0.44439875462609413, "grad_norm": 212464.375, "learning_rate": 5.556012453739059e-06, "loss": 0.1379, "step": 15130 }, { "epoch": 0.4446924748869177, "grad_norm": 243842.453125, "learning_rate": 5.553075251130824e-06, "loss": 0.1466, "step": 15140 }, { "epoch": 0.4449861951477413, "grad_norm": 286225.875, "learning_rate": 5.5501380485225875e-06, "loss": 0.1439, "step": 15150 }, { "epoch": 0.4452799154085649, "grad_norm": 166135.1875, "learning_rate": 5.547200845914351e-06, "loss": 0.1396, "step": 15160 }, { "epoch": 0.44557363566938846, "grad_norm": 134351.390625, "learning_rate": 5.544263643306116e-06, "loss": 0.1191, "step": 15170 }, { "epoch": 0.44586735593021204, "grad_norm": 202930.5, "learning_rate": 5.54132644069788e-06, "loss": 0.1504, "step": 15180 }, { "epoch": 0.4461610761910357, "grad_norm": 143812.28125, "learning_rate": 5.538389238089644e-06, "loss": 0.1411, "step": 15190 }, { "epoch": 0.44645479645185926, "grad_norm": 174069.1875, "learning_rate": 5.535452035481408e-06, "loss": 0.1417, "step": 15200 }, { "epoch": 0.44674851671268284, "grad_norm": 284775.0625, "learning_rate": 5.532514832873173e-06, "loss": 0.1382, "step": 15210 }, { "epoch": 0.4470422369735064, "grad_norm": 196887.046875, "learning_rate": 5.529577630264937e-06, "loss": 0.1559, "step": 15220 }, { "epoch": 0.44733595723433, "grad_norm": 180257.171875, "learning_rate": 5.5266404276566995e-06, "loss": 0.1465, "step": 15230 }, { "epoch": 0.4476296774951536, "grad_norm": 429212.5, "learning_rate": 5.523703225048464e-06, "loss": 0.1401, "step": 15240 }, { "epoch": 0.4479233977559772, "grad_norm": 147377.5, "learning_rate": 5.520766022440228e-06, "loss": 0.1504, "step": 15250 }, { "epoch": 0.4482171180168008, "grad_norm": 228239.1875, "learning_rate": 5.5178288198319926e-06, "loss": 0.1356, "step": 15260 }, { "epoch": 0.4485108382776244, "grad_norm": 185121.125, "learning_rate": 5.514891617223756e-06, "loss": 0.1384, "step": 15270 }, { "epoch": 0.448804558538448, "grad_norm": 140824.78125, "learning_rate": 5.51195441461552e-06, "loss": 0.1456, "step": 15280 }, { "epoch": 0.44909827879927156, "grad_norm": 101525.8828125, "learning_rate": 5.509017212007285e-06, "loss": 0.1486, "step": 15290 }, { "epoch": 0.4493919990600952, "grad_norm": 151479.96875, "learning_rate": 5.5060800093990486e-06, "loss": 0.1575, "step": 15300 }, { "epoch": 0.4496857193209188, "grad_norm": 301916.90625, "learning_rate": 5.503142806790813e-06, "loss": 0.1457, "step": 15310 }, { "epoch": 0.44997943958174236, "grad_norm": 197181.71875, "learning_rate": 5.500205604182577e-06, "loss": 0.1395, "step": 15320 }, { "epoch": 0.45027315984256594, "grad_norm": 224294.625, "learning_rate": 5.497268401574341e-06, "loss": 0.132, "step": 15330 }, { "epoch": 0.4505668801033895, "grad_norm": 200269.828125, "learning_rate": 5.494331198966105e-06, "loss": 0.1417, "step": 15340 }, { "epoch": 0.4508606003642131, "grad_norm": 228039.90625, "learning_rate": 5.491393996357869e-06, "loss": 0.1385, "step": 15350 }, { "epoch": 0.45115432062503674, "grad_norm": 197326.4375, "learning_rate": 5.488456793749634e-06, "loss": 0.1451, "step": 15360 }, { "epoch": 0.4514480408858603, "grad_norm": 225809.40625, "learning_rate": 5.485519591141398e-06, "loss": 0.1471, "step": 15370 }, { "epoch": 0.4517417611466839, "grad_norm": 140418.453125, "learning_rate": 5.482582388533162e-06, "loss": 0.14, "step": 15380 }, { "epoch": 0.4520354814075075, "grad_norm": 158583.78125, "learning_rate": 5.479645185924925e-06, "loss": 0.1392, "step": 15390 }, { "epoch": 0.45232920166833107, "grad_norm": 108601.6640625, "learning_rate": 5.476707983316689e-06, "loss": 0.1456, "step": 15400 }, { "epoch": 0.45262292192915465, "grad_norm": 133921.109375, "learning_rate": 5.473770780708454e-06, "loss": 0.1423, "step": 15410 }, { "epoch": 0.4529166421899783, "grad_norm": 147635.796875, "learning_rate": 5.470833578100217e-06, "loss": 0.1361, "step": 15420 }, { "epoch": 0.4532103624508019, "grad_norm": 158667.015625, "learning_rate": 5.467896375491982e-06, "loss": 0.1405, "step": 15430 }, { "epoch": 0.45350408271162546, "grad_norm": 125217.1171875, "learning_rate": 5.464959172883746e-06, "loss": 0.1391, "step": 15440 }, { "epoch": 0.45379780297244904, "grad_norm": 235240.046875, "learning_rate": 5.46202197027551e-06, "loss": 0.1472, "step": 15450 }, { "epoch": 0.4540915232332726, "grad_norm": 160917.875, "learning_rate": 5.459084767667274e-06, "loss": 0.1495, "step": 15460 }, { "epoch": 0.4543852434940962, "grad_norm": 215709.0625, "learning_rate": 5.456147565059038e-06, "loss": 0.1468, "step": 15470 }, { "epoch": 0.45467896375491984, "grad_norm": 249227.140625, "learning_rate": 5.453210362450803e-06, "loss": 0.1489, "step": 15480 }, { "epoch": 0.4549726840157434, "grad_norm": 135117.46875, "learning_rate": 5.450273159842566e-06, "loss": 0.1528, "step": 15490 }, { "epoch": 0.455266404276567, "grad_norm": 316493.21875, "learning_rate": 5.447335957234331e-06, "loss": 0.1405, "step": 15500 }, { "epoch": 0.4555601245373906, "grad_norm": 174509.90625, "learning_rate": 5.444398754626095e-06, "loss": 0.1427, "step": 15510 }, { "epoch": 0.45585384479821417, "grad_norm": 364155.53125, "learning_rate": 5.441461552017859e-06, "loss": 0.1467, "step": 15520 }, { "epoch": 0.45614756505903775, "grad_norm": 195497.34375, "learning_rate": 5.438524349409623e-06, "loss": 0.1416, "step": 15530 }, { "epoch": 0.4564412853198614, "grad_norm": 157727.0, "learning_rate": 5.435587146801386e-06, "loss": 0.1451, "step": 15540 }, { "epoch": 0.45673500558068497, "grad_norm": 202579.265625, "learning_rate": 5.432649944193152e-06, "loss": 0.1502, "step": 15550 }, { "epoch": 0.45702872584150855, "grad_norm": 291132.9375, "learning_rate": 5.429712741584915e-06, "loss": 0.1266, "step": 15560 }, { "epoch": 0.45732244610233214, "grad_norm": 150382.734375, "learning_rate": 5.426775538976678e-06, "loss": 0.1309, "step": 15570 }, { "epoch": 0.4576161663631557, "grad_norm": 355642.0625, "learning_rate": 5.423838336368443e-06, "loss": 0.1347, "step": 15580 }, { "epoch": 0.4579098866239793, "grad_norm": 154378.5625, "learning_rate": 5.420901133760207e-06, "loss": 0.1385, "step": 15590 }, { "epoch": 0.45820360688480294, "grad_norm": 199555.59375, "learning_rate": 5.4179639311519714e-06, "loss": 0.143, "step": 15600 }, { "epoch": 0.4584973271456265, "grad_norm": 223453.171875, "learning_rate": 5.415026728543735e-06, "loss": 0.1374, "step": 15610 }, { "epoch": 0.4587910474064501, "grad_norm": 151338.265625, "learning_rate": 5.4120895259355e-06, "loss": 0.132, "step": 15620 }, { "epoch": 0.4590847676672737, "grad_norm": 162508.375, "learning_rate": 5.409152323327264e-06, "loss": 0.1377, "step": 15630 }, { "epoch": 0.45937848792809727, "grad_norm": 118692.640625, "learning_rate": 5.4062151207190274e-06, "loss": 0.1291, "step": 15640 }, { "epoch": 0.45967220818892085, "grad_norm": 211990.390625, "learning_rate": 5.403277918110792e-06, "loss": 0.1389, "step": 15650 }, { "epoch": 0.4599659284497445, "grad_norm": 198654.171875, "learning_rate": 5.400340715502556e-06, "loss": 0.1328, "step": 15660 }, { "epoch": 0.46025964871056807, "grad_norm": 148621.890625, "learning_rate": 5.3974035128943205e-06, "loss": 0.124, "step": 15670 }, { "epoch": 0.46055336897139165, "grad_norm": 170214.21875, "learning_rate": 5.394466310286084e-06, "loss": 0.1302, "step": 15680 }, { "epoch": 0.46084708923221523, "grad_norm": 182282.65625, "learning_rate": 5.391529107677847e-06, "loss": 0.1439, "step": 15690 }, { "epoch": 0.4611408094930388, "grad_norm": 200166.734375, "learning_rate": 5.388591905069613e-06, "loss": 0.1388, "step": 15700 }, { "epoch": 0.4614345297538624, "grad_norm": 87715.9765625, "learning_rate": 5.385654702461376e-06, "loss": 0.1302, "step": 15710 }, { "epoch": 0.46172825001468604, "grad_norm": 124504.1328125, "learning_rate": 5.38271749985314e-06, "loss": 0.1359, "step": 15720 }, { "epoch": 0.4620219702755096, "grad_norm": 108195.125, "learning_rate": 5.379780297244904e-06, "loss": 0.1405, "step": 15730 }, { "epoch": 0.4623156905363332, "grad_norm": 405387.9375, "learning_rate": 5.376843094636669e-06, "loss": 0.1359, "step": 15740 }, { "epoch": 0.4626094107971568, "grad_norm": 218572.171875, "learning_rate": 5.3739058920284325e-06, "loss": 0.1315, "step": 15750 }, { "epoch": 0.46290313105798037, "grad_norm": 194429.296875, "learning_rate": 5.370968689420196e-06, "loss": 0.1435, "step": 15760 }, { "epoch": 0.46319685131880395, "grad_norm": 225333.484375, "learning_rate": 5.368031486811961e-06, "loss": 0.1376, "step": 15770 }, { "epoch": 0.4634905715796276, "grad_norm": 118082.1484375, "learning_rate": 5.365094284203725e-06, "loss": 0.1384, "step": 15780 }, { "epoch": 0.46378429184045117, "grad_norm": 168539.90625, "learning_rate": 5.362157081595489e-06, "loss": 0.1426, "step": 15790 }, { "epoch": 0.46407801210127475, "grad_norm": 176555.125, "learning_rate": 5.359219878987253e-06, "loss": 0.1477, "step": 15800 }, { "epoch": 0.46437173236209833, "grad_norm": 84990.0, "learning_rate": 5.356282676379017e-06, "loss": 0.1354, "step": 15810 }, { "epoch": 0.4646654526229219, "grad_norm": 241017.765625, "learning_rate": 5.3533454737707815e-06, "loss": 0.1325, "step": 15820 }, { "epoch": 0.4649591728837455, "grad_norm": 209981.15625, "learning_rate": 5.350408271162545e-06, "loss": 0.1349, "step": 15830 }, { "epoch": 0.46525289314456914, "grad_norm": 163274.96875, "learning_rate": 5.34747106855431e-06, "loss": 0.1445, "step": 15840 }, { "epoch": 0.4655466134053927, "grad_norm": 176720.109375, "learning_rate": 5.344533865946073e-06, "loss": 0.1439, "step": 15850 }, { "epoch": 0.4658403336662163, "grad_norm": 138519.5, "learning_rate": 5.341596663337838e-06, "loss": 0.1302, "step": 15860 }, { "epoch": 0.4661340539270399, "grad_norm": 222276.6875, "learning_rate": 5.338659460729601e-06, "loss": 0.1498, "step": 15870 }, { "epoch": 0.46642777418786346, "grad_norm": 170252.390625, "learning_rate": 5.335722258121365e-06, "loss": 0.1283, "step": 15880 }, { "epoch": 0.46672149444868705, "grad_norm": 218772.5, "learning_rate": 5.33278505551313e-06, "loss": 0.1418, "step": 15890 }, { "epoch": 0.4670152147095107, "grad_norm": 308925.875, "learning_rate": 5.3298478529048935e-06, "loss": 0.1455, "step": 15900 }, { "epoch": 0.46730893497033427, "grad_norm": 117497.125, "learning_rate": 5.326910650296658e-06, "loss": 0.138, "step": 15910 }, { "epoch": 0.46760265523115785, "grad_norm": 164478.359375, "learning_rate": 5.323973447688422e-06, "loss": 0.1511, "step": 15920 }, { "epoch": 0.46789637549198143, "grad_norm": 167570.75, "learning_rate": 5.321036245080186e-06, "loss": 0.1168, "step": 15930 }, { "epoch": 0.468190095752805, "grad_norm": 164593.375, "learning_rate": 5.31809904247195e-06, "loss": 0.136, "step": 15940 }, { "epoch": 0.4684838160136286, "grad_norm": 177238.65625, "learning_rate": 5.315161839863714e-06, "loss": 0.142, "step": 15950 }, { "epoch": 0.46877753627445223, "grad_norm": 172288.375, "learning_rate": 5.312224637255479e-06, "loss": 0.1473, "step": 15960 }, { "epoch": 0.4690712565352758, "grad_norm": 164748.875, "learning_rate": 5.3092874346472425e-06, "loss": 0.1404, "step": 15970 }, { "epoch": 0.4693649767960994, "grad_norm": 174954.765625, "learning_rate": 5.306350232039007e-06, "loss": 0.1246, "step": 15980 }, { "epoch": 0.469658697056923, "grad_norm": 168307.453125, "learning_rate": 5.303413029430771e-06, "loss": 0.1456, "step": 15990 }, { "epoch": 0.46995241731774656, "grad_norm": 194710.8125, "learning_rate": 5.300475826822534e-06, "loss": 0.1508, "step": 16000 }, { "epoch": 0.47024613757857014, "grad_norm": 131202.796875, "learning_rate": 5.297538624214299e-06, "loss": 0.1328, "step": 16010 }, { "epoch": 0.4705398578393938, "grad_norm": 284565.0625, "learning_rate": 5.294601421606062e-06, "loss": 0.1457, "step": 16020 }, { "epoch": 0.47083357810021736, "grad_norm": 169318.28125, "learning_rate": 5.291664218997827e-06, "loss": 0.1488, "step": 16030 }, { "epoch": 0.47112729836104095, "grad_norm": 129956.84375, "learning_rate": 5.288727016389591e-06, "loss": 0.1398, "step": 16040 }, { "epoch": 0.47142101862186453, "grad_norm": 147247.25, "learning_rate": 5.2857898137813545e-06, "loss": 0.1401, "step": 16050 }, { "epoch": 0.4717147388826881, "grad_norm": 169177.0, "learning_rate": 5.282852611173119e-06, "loss": 0.1391, "step": 16060 }, { "epoch": 0.4720084591435117, "grad_norm": 255577.625, "learning_rate": 5.279915408564883e-06, "loss": 0.1521, "step": 16070 }, { "epoch": 0.47230217940433533, "grad_norm": 144562.703125, "learning_rate": 5.2769782059566476e-06, "loss": 0.131, "step": 16080 }, { "epoch": 0.4725958996651589, "grad_norm": 115584.4140625, "learning_rate": 5.274041003348411e-06, "loss": 0.117, "step": 16090 }, { "epoch": 0.4728896199259825, "grad_norm": 114572.5078125, "learning_rate": 5.271103800740176e-06, "loss": 0.1392, "step": 16100 }, { "epoch": 0.4731833401868061, "grad_norm": 237809.59375, "learning_rate": 5.26816659813194e-06, "loss": 0.141, "step": 16110 }, { "epoch": 0.47347706044762966, "grad_norm": 141721.515625, "learning_rate": 5.2652293955237035e-06, "loss": 0.1417, "step": 16120 }, { "epoch": 0.47377078070845324, "grad_norm": 217641.609375, "learning_rate": 5.262292192915468e-06, "loss": 0.1362, "step": 16130 }, { "epoch": 0.4740645009692769, "grad_norm": 416683.71875, "learning_rate": 5.259354990307232e-06, "loss": 0.1332, "step": 16140 }, { "epoch": 0.47435822123010046, "grad_norm": 253494.390625, "learning_rate": 5.256417787698997e-06, "loss": 0.1288, "step": 16150 }, { "epoch": 0.47465194149092405, "grad_norm": 153454.15625, "learning_rate": 5.25348058509076e-06, "loss": 0.1369, "step": 16160 }, { "epoch": 0.4749456617517476, "grad_norm": 225912.140625, "learning_rate": 5.250543382482523e-06, "loss": 0.1294, "step": 16170 }, { "epoch": 0.4752393820125712, "grad_norm": 115728.1171875, "learning_rate": 5.247606179874288e-06, "loss": 0.1363, "step": 16180 }, { "epoch": 0.4755331022733948, "grad_norm": 92925.0703125, "learning_rate": 5.244668977266052e-06, "loss": 0.1419, "step": 16190 }, { "epoch": 0.47582682253421843, "grad_norm": 128054.4140625, "learning_rate": 5.241731774657816e-06, "loss": 0.1422, "step": 16200 }, { "epoch": 0.476120542795042, "grad_norm": 219703.203125, "learning_rate": 5.23879457204958e-06, "loss": 0.14, "step": 16210 }, { "epoch": 0.4764142630558656, "grad_norm": 191585.5, "learning_rate": 5.235857369441345e-06, "loss": 0.1462, "step": 16220 }, { "epoch": 0.4767079833166892, "grad_norm": 152733.609375, "learning_rate": 5.2329201668331086e-06, "loss": 0.1389, "step": 16230 }, { "epoch": 0.47700170357751276, "grad_norm": 133746.15625, "learning_rate": 5.229982964224872e-06, "loss": 0.1361, "step": 16240 }, { "epoch": 0.47729542383833634, "grad_norm": 154618.140625, "learning_rate": 5.227045761616637e-06, "loss": 0.1456, "step": 16250 }, { "epoch": 0.47758914409916, "grad_norm": 194602.703125, "learning_rate": 5.224108559008401e-06, "loss": 0.1389, "step": 16260 }, { "epoch": 0.47788286435998356, "grad_norm": 155923.484375, "learning_rate": 5.221171356400165e-06, "loss": 0.1363, "step": 16270 }, { "epoch": 0.47817658462080714, "grad_norm": 249862.625, "learning_rate": 5.218234153791929e-06, "loss": 0.15, "step": 16280 }, { "epoch": 0.4784703048816307, "grad_norm": 110570.390625, "learning_rate": 5.215296951183693e-06, "loss": 0.1421, "step": 16290 }, { "epoch": 0.4787640251424543, "grad_norm": 215226.53125, "learning_rate": 5.212359748575458e-06, "loss": 0.128, "step": 16300 }, { "epoch": 0.47905774540327795, "grad_norm": 263185.46875, "learning_rate": 5.2094225459672206e-06, "loss": 0.1347, "step": 16310 }, { "epoch": 0.47935146566410153, "grad_norm": 93575.1484375, "learning_rate": 5.206485343358986e-06, "loss": 0.141, "step": 16320 }, { "epoch": 0.4796451859249251, "grad_norm": 149112.0625, "learning_rate": 5.203548140750749e-06, "loss": 0.1366, "step": 16330 }, { "epoch": 0.4799389061857487, "grad_norm": 152597.28125, "learning_rate": 5.200610938142513e-06, "loss": 0.1464, "step": 16340 }, { "epoch": 0.4802326264465723, "grad_norm": 235806.515625, "learning_rate": 5.197673735534277e-06, "loss": 0.1414, "step": 16350 }, { "epoch": 0.48052634670739586, "grad_norm": 121250.75, "learning_rate": 5.194736532926041e-06, "loss": 0.1434, "step": 16360 }, { "epoch": 0.4808200669682195, "grad_norm": 109814.78125, "learning_rate": 5.191799330317806e-06, "loss": 0.1388, "step": 16370 }, { "epoch": 0.4811137872290431, "grad_norm": 227238.359375, "learning_rate": 5.18886212770957e-06, "loss": 0.1402, "step": 16380 }, { "epoch": 0.48140750748986666, "grad_norm": 106323.5, "learning_rate": 5.185924925101334e-06, "loss": 0.1561, "step": 16390 }, { "epoch": 0.48170122775069024, "grad_norm": 107270.703125, "learning_rate": 5.182987722493098e-06, "loss": 0.1369, "step": 16400 }, { "epoch": 0.4819949480115138, "grad_norm": 157627.375, "learning_rate": 5.180050519884862e-06, "loss": 0.1409, "step": 16410 }, { "epoch": 0.4822886682723374, "grad_norm": 230790.8125, "learning_rate": 5.1771133172766264e-06, "loss": 0.1329, "step": 16420 }, { "epoch": 0.48258238853316104, "grad_norm": 314450.4375, "learning_rate": 5.17417611466839e-06, "loss": 0.1325, "step": 16430 }, { "epoch": 0.4828761087939846, "grad_norm": 195402.0625, "learning_rate": 5.171238912060155e-06, "loss": 0.1362, "step": 16440 }, { "epoch": 0.4831698290548082, "grad_norm": 160965.484375, "learning_rate": 5.168301709451919e-06, "loss": 0.1316, "step": 16450 }, { "epoch": 0.4834635493156318, "grad_norm": 226564.34375, "learning_rate": 5.1653645068436816e-06, "loss": 0.1357, "step": 16460 }, { "epoch": 0.4837572695764554, "grad_norm": 182464.875, "learning_rate": 5.162427304235447e-06, "loss": 0.14, "step": 16470 }, { "epoch": 0.48405098983727896, "grad_norm": 157130.984375, "learning_rate": 5.15949010162721e-06, "loss": 0.1315, "step": 16480 }, { "epoch": 0.4843447100981026, "grad_norm": 150366.328125, "learning_rate": 5.156552899018975e-06, "loss": 0.1296, "step": 16490 }, { "epoch": 0.4846384303589262, "grad_norm": 212733.59375, "learning_rate": 5.153615696410738e-06, "loss": 0.1426, "step": 16500 }, { "epoch": 0.48493215061974976, "grad_norm": 174483.859375, "learning_rate": 5.150678493802503e-06, "loss": 0.1404, "step": 16510 }, { "epoch": 0.48522587088057334, "grad_norm": 193396.84375, "learning_rate": 5.147741291194267e-06, "loss": 0.1388, "step": 16520 }, { "epoch": 0.4855195911413969, "grad_norm": 177526.09375, "learning_rate": 5.144804088586031e-06, "loss": 0.1563, "step": 16530 }, { "epoch": 0.4858133114022205, "grad_norm": 92876.859375, "learning_rate": 5.141866885977795e-06, "loss": 0.1497, "step": 16540 }, { "epoch": 0.48610703166304414, "grad_norm": 152260.734375, "learning_rate": 5.138929683369559e-06, "loss": 0.1411, "step": 16550 }, { "epoch": 0.4864007519238677, "grad_norm": 148989.28125, "learning_rate": 5.135992480761324e-06, "loss": 0.1272, "step": 16560 }, { "epoch": 0.4866944721846913, "grad_norm": 291311.75, "learning_rate": 5.1330552781530875e-06, "loss": 0.1351, "step": 16570 }, { "epoch": 0.4869881924455149, "grad_norm": 327421.875, "learning_rate": 5.130118075544851e-06, "loss": 0.1489, "step": 16580 }, { "epoch": 0.48728191270633847, "grad_norm": 170894.046875, "learning_rate": 5.127180872936616e-06, "loss": 0.1407, "step": 16590 }, { "epoch": 0.48757563296716205, "grad_norm": 240988.375, "learning_rate": 5.12424367032838e-06, "loss": 0.1484, "step": 16600 }, { "epoch": 0.4878693532279857, "grad_norm": 167596.65625, "learning_rate": 5.121306467720144e-06, "loss": 0.1444, "step": 16610 }, { "epoch": 0.4881630734888093, "grad_norm": 264655.3125, "learning_rate": 5.118369265111908e-06, "loss": 0.1368, "step": 16620 }, { "epoch": 0.48845679374963286, "grad_norm": 172498.515625, "learning_rate": 5.115432062503673e-06, "loss": 0.1391, "step": 16630 }, { "epoch": 0.48875051401045644, "grad_norm": 188654.9375, "learning_rate": 5.112494859895436e-06, "loss": 0.1394, "step": 16640 }, { "epoch": 0.48904423427128, "grad_norm": 198219.25, "learning_rate": 5.1095576572871994e-06, "loss": 0.139, "step": 16650 }, { "epoch": 0.4893379545321036, "grad_norm": 205222.578125, "learning_rate": 5.106620454678964e-06, "loss": 0.1379, "step": 16660 }, { "epoch": 0.48963167479292724, "grad_norm": 202724.84375, "learning_rate": 5.103683252070728e-06, "loss": 0.1301, "step": 16670 }, { "epoch": 0.4899253950537508, "grad_norm": 140039.140625, "learning_rate": 5.1007460494624925e-06, "loss": 0.1396, "step": 16680 }, { "epoch": 0.4902191153145744, "grad_norm": 112549.453125, "learning_rate": 5.097808846854256e-06, "loss": 0.1445, "step": 16690 }, { "epoch": 0.490512835575398, "grad_norm": 215928.328125, "learning_rate": 5.09487164424602e-06, "loss": 0.1511, "step": 16700 }, { "epoch": 0.49080655583622157, "grad_norm": 202370.28125, "learning_rate": 5.091934441637785e-06, "loss": 0.132, "step": 16710 }, { "epoch": 0.49110027609704515, "grad_norm": 142816.8125, "learning_rate": 5.0889972390295485e-06, "loss": 0.1344, "step": 16720 }, { "epoch": 0.4913939963578688, "grad_norm": 185454.21875, "learning_rate": 5.086060036421313e-06, "loss": 0.146, "step": 16730 }, { "epoch": 0.49168771661869237, "grad_norm": 124930.1875, "learning_rate": 5.083122833813077e-06, "loss": 0.1314, "step": 16740 }, { "epoch": 0.49198143687951595, "grad_norm": 111389.2421875, "learning_rate": 5.0801856312048415e-06, "loss": 0.1445, "step": 16750 }, { "epoch": 0.49227515714033954, "grad_norm": 245179.28125, "learning_rate": 5.077248428596605e-06, "loss": 0.1419, "step": 16760 }, { "epoch": 0.4925688774011631, "grad_norm": 308087.0625, "learning_rate": 5.074311225988369e-06, "loss": 0.1366, "step": 16770 }, { "epoch": 0.4928625976619867, "grad_norm": 168501.796875, "learning_rate": 5.071374023380134e-06, "loss": 0.1474, "step": 16780 }, { "epoch": 0.49315631792281034, "grad_norm": 124130.59375, "learning_rate": 5.068436820771897e-06, "loss": 0.1507, "step": 16790 }, { "epoch": 0.4934500381836339, "grad_norm": 225632.8125, "learning_rate": 5.065499618163662e-06, "loss": 0.1279, "step": 16800 }, { "epoch": 0.4937437584444575, "grad_norm": 223519.453125, "learning_rate": 5.062562415555425e-06, "loss": 0.1367, "step": 16810 }, { "epoch": 0.4940374787052811, "grad_norm": 217740.890625, "learning_rate": 5.059625212947189e-06, "loss": 0.1439, "step": 16820 }, { "epoch": 0.49433119896610467, "grad_norm": 149758.515625, "learning_rate": 5.0566880103389535e-06, "loss": 0.1602, "step": 16830 }, { "epoch": 0.49462491922692825, "grad_norm": 299349.96875, "learning_rate": 5.053750807730717e-06, "loss": 0.1356, "step": 16840 }, { "epoch": 0.4949186394877519, "grad_norm": 128984.6171875, "learning_rate": 5.050813605122482e-06, "loss": 0.1338, "step": 16850 }, { "epoch": 0.49521235974857547, "grad_norm": 162096.15625, "learning_rate": 5.047876402514246e-06, "loss": 0.1338, "step": 16860 }, { "epoch": 0.49550608000939905, "grad_norm": 115929.296875, "learning_rate": 5.04493919990601e-06, "loss": 0.1435, "step": 16870 }, { "epoch": 0.49579980027022263, "grad_norm": 174818.59375, "learning_rate": 5.042001997297774e-06, "loss": 0.1406, "step": 16880 }, { "epoch": 0.4960935205310462, "grad_norm": 147203.171875, "learning_rate": 5.039064794689538e-06, "loss": 0.1404, "step": 16890 }, { "epoch": 0.4963872407918698, "grad_norm": 159659.515625, "learning_rate": 5.0361275920813025e-06, "loss": 0.1404, "step": 16900 }, { "epoch": 0.49668096105269344, "grad_norm": 147019.75, "learning_rate": 5.033190389473066e-06, "loss": 0.1385, "step": 16910 }, { "epoch": 0.496974681313517, "grad_norm": 211734.75, "learning_rate": 5.030253186864831e-06, "loss": 0.1235, "step": 16920 }, { "epoch": 0.4972684015743406, "grad_norm": 184320.53125, "learning_rate": 5.027315984256595e-06, "loss": 0.1468, "step": 16930 }, { "epoch": 0.4975621218351642, "grad_norm": 138728.9375, "learning_rate": 5.024378781648358e-06, "loss": 0.1416, "step": 16940 }, { "epoch": 0.49785584209598777, "grad_norm": 223885.171875, "learning_rate": 5.021441579040123e-06, "loss": 0.1385, "step": 16950 }, { "epoch": 0.49814956235681135, "grad_norm": 128255.9140625, "learning_rate": 5.018504376431886e-06, "loss": 0.1483, "step": 16960 }, { "epoch": 0.498443282617635, "grad_norm": 232754.75, "learning_rate": 5.015567173823651e-06, "loss": 0.1351, "step": 16970 }, { "epoch": 0.49873700287845857, "grad_norm": 103317.734375, "learning_rate": 5.0126299712154145e-06, "loss": 0.1354, "step": 16980 }, { "epoch": 0.49903072313928215, "grad_norm": 127699.3046875, "learning_rate": 5.009692768607179e-06, "loss": 0.143, "step": 16990 }, { "epoch": 0.49932444340010573, "grad_norm": 144912.140625, "learning_rate": 5.006755565998943e-06, "loss": 0.1362, "step": 17000 }, { "epoch": 0.4996181636609293, "grad_norm": 242816.859375, "learning_rate": 5.003818363390707e-06, "loss": 0.1355, "step": 17010 }, { "epoch": 0.4999118839217529, "grad_norm": 92177.3046875, "learning_rate": 5.000881160782471e-06, "loss": 0.1302, "step": 17020 }, { "epoch": 0.5002056041825765, "grad_norm": 213820.265625, "learning_rate": 4.997943958174235e-06, "loss": 0.1388, "step": 17030 }, { "epoch": 0.5004993244434001, "grad_norm": 244130.375, "learning_rate": 4.995006755565999e-06, "loss": 0.1432, "step": 17040 }, { "epoch": 0.5007930447042237, "grad_norm": 176100.296875, "learning_rate": 4.9920695529577636e-06, "loss": 0.1383, "step": 17050 }, { "epoch": 0.5010867649650473, "grad_norm": 118049.21875, "learning_rate": 4.989132350349527e-06, "loss": 0.1479, "step": 17060 }, { "epoch": 0.5013804852258709, "grad_norm": 140879.96875, "learning_rate": 4.986195147741292e-06, "loss": 0.1422, "step": 17070 }, { "epoch": 0.5016742054866945, "grad_norm": 293127.0, "learning_rate": 4.983257945133056e-06, "loss": 0.1395, "step": 17080 }, { "epoch": 0.501967925747518, "grad_norm": 182533.578125, "learning_rate": 4.9803207425248196e-06, "loss": 0.1374, "step": 17090 }, { "epoch": 0.5022616460083417, "grad_norm": 84066.9765625, "learning_rate": 4.977383539916583e-06, "loss": 0.1299, "step": 17100 }, { "epoch": 0.5025553662691652, "grad_norm": 173638.796875, "learning_rate": 4.974446337308348e-06, "loss": 0.1351, "step": 17110 }, { "epoch": 0.5028490865299888, "grad_norm": 175291.59375, "learning_rate": 4.971509134700112e-06, "loss": 0.1364, "step": 17120 }, { "epoch": 0.5031428067908125, "grad_norm": 154014.734375, "learning_rate": 4.968571932091876e-06, "loss": 0.1331, "step": 17130 }, { "epoch": 0.503436527051636, "grad_norm": 274103.625, "learning_rate": 4.96563472948364e-06, "loss": 0.1336, "step": 17140 }, { "epoch": 0.5037302473124596, "grad_norm": 190735.828125, "learning_rate": 4.962697526875404e-06, "loss": 0.1353, "step": 17150 }, { "epoch": 0.5040239675732832, "grad_norm": 93992.96875, "learning_rate": 4.959760324267169e-06, "loss": 0.1336, "step": 17160 }, { "epoch": 0.5043176878341068, "grad_norm": 122731.890625, "learning_rate": 4.956823121658932e-06, "loss": 0.1422, "step": 17170 }, { "epoch": 0.5046114080949304, "grad_norm": 133700.046875, "learning_rate": 4.953885919050697e-06, "loss": 0.1309, "step": 17180 }, { "epoch": 0.504905128355754, "grad_norm": 196856.328125, "learning_rate": 4.950948716442461e-06, "loss": 0.1433, "step": 17190 }, { "epoch": 0.5051988486165776, "grad_norm": 154393.078125, "learning_rate": 4.948011513834225e-06, "loss": 0.1342, "step": 17200 }, { "epoch": 0.5054925688774011, "grad_norm": 72770.6015625, "learning_rate": 4.945074311225988e-06, "loss": 0.1193, "step": 17210 }, { "epoch": 0.5057862891382248, "grad_norm": 103036.7109375, "learning_rate": 4.942137108617753e-06, "loss": 0.1303, "step": 17220 }, { "epoch": 0.5060800093990483, "grad_norm": 205044.21875, "learning_rate": 4.939199906009517e-06, "loss": 0.1351, "step": 17230 }, { "epoch": 0.5063737296598719, "grad_norm": 138215.125, "learning_rate": 4.936262703401281e-06, "loss": 0.116, "step": 17240 }, { "epoch": 0.5066674499206956, "grad_norm": 183347.46875, "learning_rate": 4.933325500793045e-06, "loss": 0.1271, "step": 17250 }, { "epoch": 0.5069611701815191, "grad_norm": 153643.09375, "learning_rate": 4.93038829818481e-06, "loss": 0.1438, "step": 17260 }, { "epoch": 0.5072548904423427, "grad_norm": 206823.921875, "learning_rate": 4.927451095576573e-06, "loss": 0.1432, "step": 17270 }, { "epoch": 0.5075486107031663, "grad_norm": 100375.3515625, "learning_rate": 4.924513892968337e-06, "loss": 0.1274, "step": 17280 }, { "epoch": 0.5078423309639899, "grad_norm": 265129.0625, "learning_rate": 4.921576690360101e-06, "loss": 0.1362, "step": 17290 }, { "epoch": 0.5081360512248135, "grad_norm": 130878.4921875, "learning_rate": 4.918639487751866e-06, "loss": 0.1361, "step": 17300 }, { "epoch": 0.5084297714856371, "grad_norm": 244602.953125, "learning_rate": 4.91570228514363e-06, "loss": 0.1354, "step": 17310 }, { "epoch": 0.5087234917464607, "grad_norm": 105623.21875, "learning_rate": 4.912765082535394e-06, "loss": 0.1306, "step": 17320 }, { "epoch": 0.5090172120072842, "grad_norm": 161471.515625, "learning_rate": 4.909827879927157e-06, "loss": 0.1353, "step": 17330 }, { "epoch": 0.5093109322681079, "grad_norm": 141315.40625, "learning_rate": 4.906890677318922e-06, "loss": 0.1358, "step": 17340 }, { "epoch": 0.5096046525289315, "grad_norm": 255060.59375, "learning_rate": 4.903953474710686e-06, "loss": 0.1305, "step": 17350 }, { "epoch": 0.509898372789755, "grad_norm": 94877.2734375, "learning_rate": 4.90101627210245e-06, "loss": 0.135, "step": 17360 }, { "epoch": 0.5101920930505787, "grad_norm": 159647.015625, "learning_rate": 4.898079069494214e-06, "loss": 0.1406, "step": 17370 }, { "epoch": 0.5104858133114022, "grad_norm": 154976.828125, "learning_rate": 4.895141866885978e-06, "loss": 0.1311, "step": 17380 }, { "epoch": 0.5107795335722258, "grad_norm": 144364.96875, "learning_rate": 4.8922046642777424e-06, "loss": 0.1278, "step": 17390 }, { "epoch": 0.5110732538330494, "grad_norm": 417644.0, "learning_rate": 4.889267461669506e-06, "loss": 0.1313, "step": 17400 }, { "epoch": 0.511366974093873, "grad_norm": 228450.9375, "learning_rate": 4.886330259061271e-06, "loss": 0.14, "step": 17410 }, { "epoch": 0.5116606943546966, "grad_norm": 123531.6875, "learning_rate": 4.883393056453035e-06, "loss": 0.1344, "step": 17420 }, { "epoch": 0.5119544146155202, "grad_norm": 416566.03125, "learning_rate": 4.8804558538447984e-06, "loss": 0.1418, "step": 17430 }, { "epoch": 0.5122481348763438, "grad_norm": 178808.484375, "learning_rate": 4.877518651236562e-06, "loss": 0.1341, "step": 17440 }, { "epoch": 0.5125418551371673, "grad_norm": 229653.96875, "learning_rate": 4.874581448628327e-06, "loss": 0.1431, "step": 17450 }, { "epoch": 0.512835575397991, "grad_norm": 142433.65625, "learning_rate": 4.871644246020091e-06, "loss": 0.123, "step": 17460 }, { "epoch": 0.5131292956588146, "grad_norm": 154178.796875, "learning_rate": 4.868707043411855e-06, "loss": 0.157, "step": 17470 }, { "epoch": 0.5134230159196381, "grad_norm": 124316.0234375, "learning_rate": 4.865769840803619e-06, "loss": 0.1341, "step": 17480 }, { "epoch": 0.5137167361804618, "grad_norm": 252071.125, "learning_rate": 4.862832638195384e-06, "loss": 0.1301, "step": 17490 }, { "epoch": 0.5140104564412853, "grad_norm": 721314.875, "learning_rate": 4.859895435587147e-06, "loss": 0.1258, "step": 17500 }, { "epoch": 0.5143041767021089, "grad_norm": 163484.59375, "learning_rate": 4.856958232978911e-06, "loss": 0.1353, "step": 17510 }, { "epoch": 0.5145978969629325, "grad_norm": 123508.2265625, "learning_rate": 4.854021030370675e-06, "loss": 0.1259, "step": 17520 }, { "epoch": 0.5148916172237561, "grad_norm": 145774.75, "learning_rate": 4.85108382776244e-06, "loss": 0.1401, "step": 17530 }, { "epoch": 0.5151853374845797, "grad_norm": 282168.34375, "learning_rate": 4.8481466251542035e-06, "loss": 0.1547, "step": 17540 }, { "epoch": 0.5154790577454033, "grad_norm": 218900.59375, "learning_rate": 4.845209422545968e-06, "loss": 0.1276, "step": 17550 }, { "epoch": 0.5157727780062269, "grad_norm": 180704.265625, "learning_rate": 4.842272219937731e-06, "loss": 0.1425, "step": 17560 }, { "epoch": 0.5160664982670504, "grad_norm": 141613.78125, "learning_rate": 4.839335017329496e-06, "loss": 0.1414, "step": 17570 }, { "epoch": 0.5163602185278741, "grad_norm": 120430.984375, "learning_rate": 4.8363978147212594e-06, "loss": 0.1461, "step": 17580 }, { "epoch": 0.5166539387886977, "grad_norm": 135613.125, "learning_rate": 4.833460612113024e-06, "loss": 0.1245, "step": 17590 }, { "epoch": 0.5169476590495212, "grad_norm": 120904.625, "learning_rate": 4.830523409504788e-06, "loss": 0.1356, "step": 17600 }, { "epoch": 0.5172413793103449, "grad_norm": 97275.65625, "learning_rate": 4.8275862068965525e-06, "loss": 0.1266, "step": 17610 }, { "epoch": 0.5175350995711684, "grad_norm": 291885.6875, "learning_rate": 4.824649004288316e-06, "loss": 0.1297, "step": 17620 }, { "epoch": 0.517828819831992, "grad_norm": 287584.15625, "learning_rate": 4.82171180168008e-06, "loss": 0.1478, "step": 17630 }, { "epoch": 0.5181225400928156, "grad_norm": 129057.75, "learning_rate": 4.818774599071845e-06, "loss": 0.1391, "step": 17640 }, { "epoch": 0.5184162603536392, "grad_norm": 195824.875, "learning_rate": 4.8158373964636085e-06, "loss": 0.1352, "step": 17650 }, { "epoch": 0.5187099806144628, "grad_norm": 112177.6171875, "learning_rate": 4.812900193855372e-06, "loss": 0.1298, "step": 17660 }, { "epoch": 0.5190037008752864, "grad_norm": 149930.71875, "learning_rate": 4.809962991247137e-06, "loss": 0.1388, "step": 17670 }, { "epoch": 0.51929742113611, "grad_norm": 129142.0390625, "learning_rate": 4.807025788638901e-06, "loss": 0.1406, "step": 17680 }, { "epoch": 0.5195911413969335, "grad_norm": 236448.71875, "learning_rate": 4.8040885860306645e-06, "loss": 0.1289, "step": 17690 }, { "epoch": 0.5198848616577572, "grad_norm": 131664.40625, "learning_rate": 4.801151383422429e-06, "loss": 0.148, "step": 17700 }, { "epoch": 0.5201785819185808, "grad_norm": 156222.703125, "learning_rate": 4.798214180814193e-06, "loss": 0.1319, "step": 17710 }, { "epoch": 0.5204723021794043, "grad_norm": 125466.90625, "learning_rate": 4.7952769782059575e-06, "loss": 0.141, "step": 17720 }, { "epoch": 0.520766022440228, "grad_norm": 213040.40625, "learning_rate": 4.792339775597721e-06, "loss": 0.1342, "step": 17730 }, { "epoch": 0.5210597427010515, "grad_norm": 216308.765625, "learning_rate": 4.789402572989485e-06, "loss": 0.1317, "step": 17740 }, { "epoch": 0.5213534629618751, "grad_norm": 128859.15625, "learning_rate": 4.786465370381249e-06, "loss": 0.1451, "step": 17750 }, { "epoch": 0.5216471832226987, "grad_norm": 136018.84375, "learning_rate": 4.7835281677730135e-06, "loss": 0.1273, "step": 17760 }, { "epoch": 0.5219409034835223, "grad_norm": 258841.359375, "learning_rate": 4.780590965164777e-06, "loss": 0.13, "step": 17770 }, { "epoch": 0.5222346237443459, "grad_norm": 97800.3359375, "learning_rate": 4.777653762556542e-06, "loss": 0.1429, "step": 17780 }, { "epoch": 0.5225283440051695, "grad_norm": 190724.21875, "learning_rate": 4.774716559948306e-06, "loss": 0.1332, "step": 17790 }, { "epoch": 0.5228220642659931, "grad_norm": 158274.3125, "learning_rate": 4.7717793573400695e-06, "loss": 0.1408, "step": 17800 }, { "epoch": 0.5231157845268166, "grad_norm": 99132.21875, "learning_rate": 4.768842154731833e-06, "loss": 0.1311, "step": 17810 }, { "epoch": 0.5234095047876403, "grad_norm": 320006.53125, "learning_rate": 4.765904952123598e-06, "loss": 0.1385, "step": 17820 }, { "epoch": 0.5237032250484639, "grad_norm": 240046.953125, "learning_rate": 4.762967749515362e-06, "loss": 0.1238, "step": 17830 }, { "epoch": 0.5239969453092874, "grad_norm": 195379.859375, "learning_rate": 4.760030546907126e-06, "loss": 0.1304, "step": 17840 }, { "epoch": 0.5242906655701111, "grad_norm": 248273.046875, "learning_rate": 4.75709334429889e-06, "loss": 0.1354, "step": 17850 }, { "epoch": 0.5245843858309346, "grad_norm": 111151.890625, "learning_rate": 4.754156141690654e-06, "loss": 0.147, "step": 17860 }, { "epoch": 0.5248781060917582, "grad_norm": 129149.9921875, "learning_rate": 4.7512189390824185e-06, "loss": 0.1402, "step": 17870 }, { "epoch": 0.5251718263525817, "grad_norm": 130479.7109375, "learning_rate": 4.748281736474182e-06, "loss": 0.1287, "step": 17880 }, { "epoch": 0.5254655466134054, "grad_norm": 176159.578125, "learning_rate": 4.745344533865946e-06, "loss": 0.1312, "step": 17890 }, { "epoch": 0.525759266874229, "grad_norm": 122890.375, "learning_rate": 4.742407331257711e-06, "loss": 0.1305, "step": 17900 }, { "epoch": 0.5260529871350526, "grad_norm": 166212.625, "learning_rate": 4.7394701286494745e-06, "loss": 0.1377, "step": 17910 }, { "epoch": 0.5263467073958762, "grad_norm": 246541.171875, "learning_rate": 4.736532926041238e-06, "loss": 0.1322, "step": 17920 }, { "epoch": 0.5266404276566997, "grad_norm": 120976.359375, "learning_rate": 4.733595723433003e-06, "loss": 0.1249, "step": 17930 }, { "epoch": 0.5269341479175234, "grad_norm": 157943.4375, "learning_rate": 4.730658520824767e-06, "loss": 0.1456, "step": 17940 }, { "epoch": 0.527227868178347, "grad_norm": 118608.9296875, "learning_rate": 4.727721318216531e-06, "loss": 0.1362, "step": 17950 }, { "epoch": 0.5275215884391705, "grad_norm": 342388.75, "learning_rate": 4.724784115608295e-06, "loss": 0.1357, "step": 17960 }, { "epoch": 0.5278153086999942, "grad_norm": 176172.53125, "learning_rate": 4.72184691300006e-06, "loss": 0.1346, "step": 17970 }, { "epoch": 0.5281090289608177, "grad_norm": 103153.28125, "learning_rate": 4.718909710391823e-06, "loss": 0.1399, "step": 17980 }, { "epoch": 0.5284027492216413, "grad_norm": 334384.53125, "learning_rate": 4.715972507783587e-06, "loss": 0.133, "step": 17990 }, { "epoch": 0.5286964694824648, "grad_norm": 147494.890625, "learning_rate": 4.713035305175351e-06, "loss": 0.1237, "step": 18000 }, { "epoch": 0.5289901897432885, "grad_norm": 200110.703125, "learning_rate": 4.710098102567116e-06, "loss": 0.1351, "step": 18010 }, { "epoch": 0.5292839100041121, "grad_norm": 206358.125, "learning_rate": 4.7071608999588796e-06, "loss": 0.1435, "step": 18020 }, { "epoch": 0.5295776302649357, "grad_norm": 192771.640625, "learning_rate": 4.704223697350644e-06, "loss": 0.1363, "step": 18030 }, { "epoch": 0.5298713505257593, "grad_norm": 502267.0625, "learning_rate": 4.701286494742407e-06, "loss": 0.1441, "step": 18040 }, { "epoch": 0.5301650707865828, "grad_norm": 160996.296875, "learning_rate": 4.698349292134172e-06, "loss": 0.1406, "step": 18050 }, { "epoch": 0.5304587910474065, "grad_norm": 114702.046875, "learning_rate": 4.6954120895259356e-06, "loss": 0.143, "step": 18060 }, { "epoch": 0.5307525113082301, "grad_norm": 121672.9375, "learning_rate": 4.6924748869177e-06, "loss": 0.1408, "step": 18070 }, { "epoch": 0.5310462315690536, "grad_norm": 130263.1171875, "learning_rate": 4.689537684309464e-06, "loss": 0.1367, "step": 18080 }, { "epoch": 0.5313399518298773, "grad_norm": 171215.109375, "learning_rate": 4.686600481701229e-06, "loss": 0.1398, "step": 18090 }, { "epoch": 0.5316336720907008, "grad_norm": 195877.609375, "learning_rate": 4.683663279092992e-06, "loss": 0.1279, "step": 18100 }, { "epoch": 0.5319273923515244, "grad_norm": 340489.625, "learning_rate": 4.680726076484756e-06, "loss": 0.142, "step": 18110 }, { "epoch": 0.532221112612348, "grad_norm": 112586.8359375, "learning_rate": 4.67778887387652e-06, "loss": 0.1349, "step": 18120 }, { "epoch": 0.5325148328731716, "grad_norm": 164396.0, "learning_rate": 4.674851671268285e-06, "loss": 0.1315, "step": 18130 }, { "epoch": 0.5328085531339952, "grad_norm": 217985.84375, "learning_rate": 4.671914468660048e-06, "loss": 0.1459, "step": 18140 }, { "epoch": 0.5331022733948187, "grad_norm": 143936.984375, "learning_rate": 4.668977266051813e-06, "loss": 0.1329, "step": 18150 }, { "epoch": 0.5333959936556424, "grad_norm": 138513.34375, "learning_rate": 4.666040063443577e-06, "loss": 0.1417, "step": 18160 }, { "epoch": 0.5336897139164659, "grad_norm": 162557.15625, "learning_rate": 4.663102860835341e-06, "loss": 0.1321, "step": 18170 }, { "epoch": 0.5339834341772896, "grad_norm": 120594.5625, "learning_rate": 4.660165658227105e-06, "loss": 0.1424, "step": 18180 }, { "epoch": 0.5342771544381132, "grad_norm": 172631.296875, "learning_rate": 4.657228455618869e-06, "loss": 0.1345, "step": 18190 }, { "epoch": 0.5345708746989367, "grad_norm": 164896.515625, "learning_rate": 4.654291253010634e-06, "loss": 0.133, "step": 18200 }, { "epoch": 0.5348645949597604, "grad_norm": 160794.046875, "learning_rate": 4.6513540504023974e-06, "loss": 0.1535, "step": 18210 }, { "epoch": 0.5351583152205839, "grad_norm": 127096.1171875, "learning_rate": 4.648416847794161e-06, "loss": 0.1286, "step": 18220 }, { "epoch": 0.5354520354814075, "grad_norm": 161992.703125, "learning_rate": 4.645479645185925e-06, "loss": 0.1242, "step": 18230 }, { "epoch": 0.535745755742231, "grad_norm": 156928.890625, "learning_rate": 4.64254244257769e-06, "loss": 0.1453, "step": 18240 }, { "epoch": 0.5360394760030547, "grad_norm": 206046.671875, "learning_rate": 4.639605239969453e-06, "loss": 0.1519, "step": 18250 }, { "epoch": 0.5363331962638783, "grad_norm": 255910.9375, "learning_rate": 4.636668037361218e-06, "loss": 0.1306, "step": 18260 }, { "epoch": 0.5366269165247018, "grad_norm": 211129.828125, "learning_rate": 4.633730834752982e-06, "loss": 0.1398, "step": 18270 }, { "epoch": 0.5369206367855255, "grad_norm": 142060.875, "learning_rate": 4.630793632144746e-06, "loss": 0.1399, "step": 18280 }, { "epoch": 0.537214357046349, "grad_norm": 106899.0625, "learning_rate": 4.627856429536509e-06, "loss": 0.1487, "step": 18290 }, { "epoch": 0.5375080773071726, "grad_norm": 123904.5859375, "learning_rate": 4.624919226928274e-06, "loss": 0.1316, "step": 18300 }, { "epoch": 0.5378017975679963, "grad_norm": 160047.984375, "learning_rate": 4.621982024320038e-06, "loss": 0.1329, "step": 18310 }, { "epoch": 0.5380955178288198, "grad_norm": 178315.578125, "learning_rate": 4.6190448217118025e-06, "loss": 0.1308, "step": 18320 }, { "epoch": 0.5383892380896435, "grad_norm": 204004.5625, "learning_rate": 4.616107619103566e-06, "loss": 0.1366, "step": 18330 }, { "epoch": 0.538682958350467, "grad_norm": 162894.71875, "learning_rate": 4.61317041649533e-06, "loss": 0.1323, "step": 18340 }, { "epoch": 0.5389766786112906, "grad_norm": 159911.53125, "learning_rate": 4.610233213887094e-06, "loss": 0.1276, "step": 18350 }, { "epoch": 0.5392703988721143, "grad_norm": 178332.71875, "learning_rate": 4.6072960112788584e-06, "loss": 0.1348, "step": 18360 }, { "epoch": 0.5395641191329378, "grad_norm": 172871.09375, "learning_rate": 4.604358808670622e-06, "loss": 0.1381, "step": 18370 }, { "epoch": 0.5398578393937614, "grad_norm": 188662.109375, "learning_rate": 4.601421606062387e-06, "loss": 0.142, "step": 18380 }, { "epoch": 0.540151559654585, "grad_norm": 283906.40625, "learning_rate": 4.598484403454151e-06, "loss": 0.1327, "step": 18390 }, { "epoch": 0.5404452799154086, "grad_norm": 178640.90625, "learning_rate": 4.5955472008459144e-06, "loss": 0.1368, "step": 18400 }, { "epoch": 0.5407390001762321, "grad_norm": 137402.171875, "learning_rate": 4.592609998237679e-06, "loss": 0.142, "step": 18410 }, { "epoch": 0.5410327204370557, "grad_norm": 162366.375, "learning_rate": 4.589672795629443e-06, "loss": 0.1334, "step": 18420 }, { "epoch": 0.5413264406978794, "grad_norm": 158749.203125, "learning_rate": 4.5867355930212075e-06, "loss": 0.1482, "step": 18430 }, { "epoch": 0.5416201609587029, "grad_norm": 181459.25, "learning_rate": 4.583798390412971e-06, "loss": 0.1442, "step": 18440 }, { "epoch": 0.5419138812195265, "grad_norm": 198206.828125, "learning_rate": 4.580861187804735e-06, "loss": 0.1341, "step": 18450 }, { "epoch": 0.5422076014803501, "grad_norm": 143788.171875, "learning_rate": 4.577923985196499e-06, "loss": 0.1432, "step": 18460 }, { "epoch": 0.5425013217411737, "grad_norm": 186862.140625, "learning_rate": 4.5749867825882635e-06, "loss": 0.1366, "step": 18470 }, { "epoch": 0.5427950420019974, "grad_norm": 269474.3125, "learning_rate": 4.572049579980027e-06, "loss": 0.1452, "step": 18480 }, { "epoch": 0.5430887622628209, "grad_norm": 170677.328125, "learning_rate": 4.569112377371792e-06, "loss": 0.1387, "step": 18490 }, { "epoch": 0.5433824825236445, "grad_norm": 222676.921875, "learning_rate": 4.566175174763556e-06, "loss": 0.1328, "step": 18500 }, { "epoch": 0.543676202784468, "grad_norm": 148424.734375, "learning_rate": 4.5632379721553195e-06, "loss": 0.1372, "step": 18510 }, { "epoch": 0.5439699230452917, "grad_norm": 132838.0625, "learning_rate": 4.560300769547083e-06, "loss": 0.1421, "step": 18520 }, { "epoch": 0.5442636433061152, "grad_norm": 137668.0625, "learning_rate": 4.557363566938848e-06, "loss": 0.1382, "step": 18530 }, { "epoch": 0.5445573635669388, "grad_norm": 120155.859375, "learning_rate": 4.554426364330612e-06, "loss": 0.1351, "step": 18540 }, { "epoch": 0.5448510838277625, "grad_norm": 129854.453125, "learning_rate": 4.551489161722376e-06, "loss": 0.1317, "step": 18550 }, { "epoch": 0.545144804088586, "grad_norm": 129500.2578125, "learning_rate": 4.54855195911414e-06, "loss": 0.147, "step": 18560 }, { "epoch": 0.5454385243494096, "grad_norm": 171172.3125, "learning_rate": 4.545614756505904e-06, "loss": 0.1414, "step": 18570 }, { "epoch": 0.5457322446102332, "grad_norm": 155834.1875, "learning_rate": 4.542677553897668e-06, "loss": 0.1244, "step": 18580 }, { "epoch": 0.5460259648710568, "grad_norm": 311486.0625, "learning_rate": 4.539740351289432e-06, "loss": 0.1404, "step": 18590 }, { "epoch": 0.5463196851318805, "grad_norm": 198402.0, "learning_rate": 4.536803148681196e-06, "loss": 0.1398, "step": 18600 }, { "epoch": 0.546613405392704, "grad_norm": 177368.546875, "learning_rate": 4.533865946072961e-06, "loss": 0.1274, "step": 18610 }, { "epoch": 0.5469071256535276, "grad_norm": 95346.6171875, "learning_rate": 4.5309287434647245e-06, "loss": 0.14, "step": 18620 }, { "epoch": 0.5472008459143511, "grad_norm": 214108.90625, "learning_rate": 4.527991540856488e-06, "loss": 0.1318, "step": 18630 }, { "epoch": 0.5474945661751748, "grad_norm": 156706.671875, "learning_rate": 4.525054338248253e-06, "loss": 0.1421, "step": 18640 }, { "epoch": 0.5477882864359983, "grad_norm": 228830.59375, "learning_rate": 4.522117135640017e-06, "loss": 0.1316, "step": 18650 }, { "epoch": 0.5480820066968219, "grad_norm": 152049.5625, "learning_rate": 4.519179933031781e-06, "loss": 0.1217, "step": 18660 }, { "epoch": 0.5483757269576456, "grad_norm": 188287.15625, "learning_rate": 4.516242730423545e-06, "loss": 0.1333, "step": 18670 }, { "epoch": 0.5486694472184691, "grad_norm": 211761.015625, "learning_rate": 4.513305527815309e-06, "loss": 0.1223, "step": 18680 }, { "epoch": 0.5489631674792927, "grad_norm": 152822.890625, "learning_rate": 4.510368325207073e-06, "loss": 0.1203, "step": 18690 }, { "epoch": 0.5492568877401163, "grad_norm": 201808.0625, "learning_rate": 4.507431122598837e-06, "loss": 0.1435, "step": 18700 }, { "epoch": 0.5495506080009399, "grad_norm": 115478.65625, "learning_rate": 4.504493919990601e-06, "loss": 0.1364, "step": 18710 }, { "epoch": 0.5498443282617635, "grad_norm": 222998.0, "learning_rate": 4.501556717382366e-06, "loss": 0.1268, "step": 18720 }, { "epoch": 0.5501380485225871, "grad_norm": 157721.84375, "learning_rate": 4.4986195147741295e-06, "loss": 0.1314, "step": 18730 }, { "epoch": 0.5504317687834107, "grad_norm": 121984.7578125, "learning_rate": 4.495682312165894e-06, "loss": 0.1339, "step": 18740 }, { "epoch": 0.5507254890442342, "grad_norm": 130417.515625, "learning_rate": 4.492745109557657e-06, "loss": 0.127, "step": 18750 }, { "epoch": 0.5510192093050579, "grad_norm": 200088.828125, "learning_rate": 4.489807906949422e-06, "loss": 0.119, "step": 18760 }, { "epoch": 0.5513129295658814, "grad_norm": 147127.0, "learning_rate": 4.4868707043411855e-06, "loss": 0.1396, "step": 18770 }, { "epoch": 0.551606649826705, "grad_norm": 168878.671875, "learning_rate": 4.48393350173295e-06, "loss": 0.1249, "step": 18780 }, { "epoch": 0.5519003700875287, "grad_norm": 145807.015625, "learning_rate": 4.480996299124714e-06, "loss": 0.1278, "step": 18790 }, { "epoch": 0.5521940903483522, "grad_norm": 114974.421875, "learning_rate": 4.4780590965164786e-06, "loss": 0.1412, "step": 18800 }, { "epoch": 0.5524878106091758, "grad_norm": 143474.78125, "learning_rate": 4.4751218939082415e-06, "loss": 0.1596, "step": 18810 }, { "epoch": 0.5527815308699994, "grad_norm": 100106.5078125, "learning_rate": 4.472184691300006e-06, "loss": 0.1359, "step": 18820 }, { "epoch": 0.553075251130823, "grad_norm": 197502.4375, "learning_rate": 4.46924748869177e-06, "loss": 0.1313, "step": 18830 }, { "epoch": 0.5533689713916466, "grad_norm": 185706.5, "learning_rate": 4.4663102860835346e-06, "loss": 0.125, "step": 18840 }, { "epoch": 0.5536626916524702, "grad_norm": 156661.34375, "learning_rate": 4.463373083475298e-06, "loss": 0.1262, "step": 18850 }, { "epoch": 0.5539564119132938, "grad_norm": 201340.453125, "learning_rate": 4.460435880867063e-06, "loss": 0.1431, "step": 18860 }, { "epoch": 0.5542501321741173, "grad_norm": 371819.59375, "learning_rate": 4.457498678258827e-06, "loss": 0.1298, "step": 18870 }, { "epoch": 0.554543852434941, "grad_norm": 147432.359375, "learning_rate": 4.4545614756505905e-06, "loss": 0.1383, "step": 18880 }, { "epoch": 0.5548375726957645, "grad_norm": 162979.265625, "learning_rate": 4.451624273042355e-06, "loss": 0.1335, "step": 18890 }, { "epoch": 0.5551312929565881, "grad_norm": 143561.46875, "learning_rate": 4.448687070434119e-06, "loss": 0.133, "step": 18900 }, { "epoch": 0.5554250132174118, "grad_norm": 118938.0078125, "learning_rate": 4.445749867825883e-06, "loss": 0.1471, "step": 18910 }, { "epoch": 0.5557187334782353, "grad_norm": 160810.0, "learning_rate": 4.442812665217647e-06, "loss": 0.1309, "step": 18920 }, { "epoch": 0.5560124537390589, "grad_norm": 110208.03125, "learning_rate": 4.439875462609411e-06, "loss": 0.1311, "step": 18930 }, { "epoch": 0.5563061739998825, "grad_norm": 232020.90625, "learning_rate": 4.436938260001175e-06, "loss": 0.131, "step": 18940 }, { "epoch": 0.5565998942607061, "grad_norm": 153478.34375, "learning_rate": 4.43400105739294e-06, "loss": 0.1299, "step": 18950 }, { "epoch": 0.5568936145215297, "grad_norm": 215265.65625, "learning_rate": 4.431063854784703e-06, "loss": 0.1204, "step": 18960 }, { "epoch": 0.5571873347823533, "grad_norm": 151193.515625, "learning_rate": 4.428126652176468e-06, "loss": 0.1391, "step": 18970 }, { "epoch": 0.5574810550431769, "grad_norm": 148156.703125, "learning_rate": 4.425189449568232e-06, "loss": 0.1401, "step": 18980 }, { "epoch": 0.5577747753040004, "grad_norm": 469397.9375, "learning_rate": 4.4222522469599956e-06, "loss": 0.1325, "step": 18990 }, { "epoch": 0.5580684955648241, "grad_norm": 245037.25, "learning_rate": 4.419315044351759e-06, "loss": 0.1426, "step": 19000 }, { "epoch": 0.5583622158256476, "grad_norm": 126274.2421875, "learning_rate": 4.416377841743524e-06, "loss": 0.1293, "step": 19010 }, { "epoch": 0.5586559360864712, "grad_norm": 252202.578125, "learning_rate": 4.413440639135288e-06, "loss": 0.1444, "step": 19020 }, { "epoch": 0.5589496563472949, "grad_norm": 163657.796875, "learning_rate": 4.410503436527052e-06, "loss": 0.1329, "step": 19030 }, { "epoch": 0.5592433766081184, "grad_norm": 162087.0625, "learning_rate": 4.407566233918816e-06, "loss": 0.143, "step": 19040 }, { "epoch": 0.559537096868942, "grad_norm": 143252.03125, "learning_rate": 4.40462903131058e-06, "loss": 0.1359, "step": 19050 }, { "epoch": 0.5598308171297656, "grad_norm": 208670.90625, "learning_rate": 4.401691828702344e-06, "loss": 0.1336, "step": 19060 }, { "epoch": 0.5601245373905892, "grad_norm": 152675.875, "learning_rate": 4.398754626094108e-06, "loss": 0.1244, "step": 19070 }, { "epoch": 0.5604182576514128, "grad_norm": 118949.171875, "learning_rate": 4.395817423485872e-06, "loss": 0.129, "step": 19080 }, { "epoch": 0.5607119779122364, "grad_norm": 122906.203125, "learning_rate": 4.392880220877637e-06, "loss": 0.1306, "step": 19090 }, { "epoch": 0.56100569817306, "grad_norm": 143880.96875, "learning_rate": 4.389943018269401e-06, "loss": 0.1399, "step": 19100 }, { "epoch": 0.5612994184338835, "grad_norm": 228097.890625, "learning_rate": 4.387005815661164e-06, "loss": 0.148, "step": 19110 }, { "epoch": 0.5615931386947072, "grad_norm": 234658.953125, "learning_rate": 4.384068613052929e-06, "loss": 0.1312, "step": 19120 }, { "epoch": 0.5618868589555307, "grad_norm": 209854.875, "learning_rate": 4.381131410444693e-06, "loss": 0.1309, "step": 19130 }, { "epoch": 0.5621805792163543, "grad_norm": 186674.6875, "learning_rate": 4.378194207836457e-06, "loss": 0.1432, "step": 19140 }, { "epoch": 0.562474299477178, "grad_norm": 174023.8125, "learning_rate": 4.375257005228221e-06, "loss": 0.1115, "step": 19150 }, { "epoch": 0.5627680197380015, "grad_norm": 126978.6015625, "learning_rate": 4.372319802619985e-06, "loss": 0.1351, "step": 19160 }, { "epoch": 0.5630617399988251, "grad_norm": 247551.84375, "learning_rate": 4.369382600011749e-06, "loss": 0.1253, "step": 19170 }, { "epoch": 0.5633554602596487, "grad_norm": 107736.6875, "learning_rate": 4.3664453974035134e-06, "loss": 0.1283, "step": 19180 }, { "epoch": 0.5636491805204723, "grad_norm": 130190.9765625, "learning_rate": 4.363508194795277e-06, "loss": 0.1313, "step": 19190 }, { "epoch": 0.5639429007812959, "grad_norm": 168582.203125, "learning_rate": 4.360570992187042e-06, "loss": 0.1335, "step": 19200 }, { "epoch": 0.5642366210421195, "grad_norm": 130095.5078125, "learning_rate": 4.357633789578806e-06, "loss": 0.1237, "step": 19210 }, { "epoch": 0.5645303413029431, "grad_norm": 218665.171875, "learning_rate": 4.35469658697057e-06, "loss": 0.1401, "step": 19220 }, { "epoch": 0.5648240615637666, "grad_norm": 111713.4375, "learning_rate": 4.351759384362333e-06, "loss": 0.132, "step": 19230 }, { "epoch": 0.5651177818245903, "grad_norm": 144644.640625, "learning_rate": 4.348822181754098e-06, "loss": 0.1487, "step": 19240 }, { "epoch": 0.5654115020854138, "grad_norm": 289668.65625, "learning_rate": 4.345884979145862e-06, "loss": 0.1347, "step": 19250 }, { "epoch": 0.5657052223462374, "grad_norm": 178716.640625, "learning_rate": 4.342947776537626e-06, "loss": 0.1339, "step": 19260 }, { "epoch": 0.5659989426070611, "grad_norm": 205225.703125, "learning_rate": 4.34001057392939e-06, "loss": 0.1284, "step": 19270 }, { "epoch": 0.5662926628678846, "grad_norm": 132035.359375, "learning_rate": 4.337073371321155e-06, "loss": 0.1352, "step": 19280 }, { "epoch": 0.5665863831287082, "grad_norm": 141069.046875, "learning_rate": 4.334136168712918e-06, "loss": 0.1322, "step": 19290 }, { "epoch": 0.5668801033895318, "grad_norm": 145988.53125, "learning_rate": 4.331198966104682e-06, "loss": 0.1278, "step": 19300 }, { "epoch": 0.5671738236503554, "grad_norm": 133740.828125, "learning_rate": 4.328261763496446e-06, "loss": 0.1473, "step": 19310 }, { "epoch": 0.567467543911179, "grad_norm": 158371.203125, "learning_rate": 4.325324560888211e-06, "loss": 0.1285, "step": 19320 }, { "epoch": 0.5677612641720026, "grad_norm": 168471.09375, "learning_rate": 4.3223873582799745e-06, "loss": 0.1343, "step": 19330 }, { "epoch": 0.5680549844328262, "grad_norm": 105714.1796875, "learning_rate": 4.319450155671739e-06, "loss": 0.1311, "step": 19340 }, { "epoch": 0.5683487046936497, "grad_norm": 170467.125, "learning_rate": 4.316512953063503e-06, "loss": 0.1346, "step": 19350 }, { "epoch": 0.5686424249544734, "grad_norm": 376429.125, "learning_rate": 4.313575750455267e-06, "loss": 0.1328, "step": 19360 }, { "epoch": 0.568936145215297, "grad_norm": 215678.03125, "learning_rate": 4.3106385478470304e-06, "loss": 0.129, "step": 19370 }, { "epoch": 0.5692298654761205, "grad_norm": 180557.71875, "learning_rate": 4.307701345238795e-06, "loss": 0.1349, "step": 19380 }, { "epoch": 0.5695235857369442, "grad_norm": 133490.609375, "learning_rate": 4.304764142630559e-06, "loss": 0.1415, "step": 19390 }, { "epoch": 0.5698173059977677, "grad_norm": 180409.078125, "learning_rate": 4.301826940022323e-06, "loss": 0.1321, "step": 19400 }, { "epoch": 0.5701110262585913, "grad_norm": 192105.578125, "learning_rate": 4.298889737414087e-06, "loss": 0.1241, "step": 19410 }, { "epoch": 0.5704047465194149, "grad_norm": 203302.46875, "learning_rate": 4.295952534805851e-06, "loss": 0.142, "step": 19420 }, { "epoch": 0.5706984667802385, "grad_norm": 186819.734375, "learning_rate": 4.293015332197616e-06, "loss": 0.1301, "step": 19430 }, { "epoch": 0.5709921870410621, "grad_norm": 206158.28125, "learning_rate": 4.2900781295893795e-06, "loss": 0.137, "step": 19440 }, { "epoch": 0.5712859073018857, "grad_norm": 173238.421875, "learning_rate": 4.287140926981144e-06, "loss": 0.132, "step": 19450 }, { "epoch": 0.5715796275627093, "grad_norm": 148299.5, "learning_rate": 4.284203724372907e-06, "loss": 0.1205, "step": 19460 }, { "epoch": 0.5718733478235328, "grad_norm": 201469.375, "learning_rate": 4.281266521764672e-06, "loss": 0.1306, "step": 19470 }, { "epoch": 0.5721670680843565, "grad_norm": 200679.78125, "learning_rate": 4.2783293191564355e-06, "loss": 0.1332, "step": 19480 }, { "epoch": 0.5724607883451801, "grad_norm": 195174.6875, "learning_rate": 4.2753921165482e-06, "loss": 0.1406, "step": 19490 }, { "epoch": 0.5727545086060036, "grad_norm": 109344.4765625, "learning_rate": 4.272454913939964e-06, "loss": 0.1394, "step": 19500 }, { "epoch": 0.5730482288668273, "grad_norm": 154944.09375, "learning_rate": 4.2695177113317285e-06, "loss": 0.1362, "step": 19510 }, { "epoch": 0.5733419491276508, "grad_norm": 136383.3125, "learning_rate": 4.2665805087234915e-06, "loss": 0.1229, "step": 19520 }, { "epoch": 0.5736356693884744, "grad_norm": 141830.46875, "learning_rate": 4.263643306115256e-06, "loss": 0.1289, "step": 19530 }, { "epoch": 0.573929389649298, "grad_norm": 197130.8125, "learning_rate": 4.26070610350702e-06, "loss": 0.1475, "step": 19540 }, { "epoch": 0.5742231099101216, "grad_norm": 468539.75, "learning_rate": 4.2577689008987845e-06, "loss": 0.1329, "step": 19550 }, { "epoch": 0.5745168301709452, "grad_norm": 114585.9609375, "learning_rate": 4.254831698290548e-06, "loss": 0.1414, "step": 19560 }, { "epoch": 0.5748105504317688, "grad_norm": 172483.296875, "learning_rate": 4.251894495682313e-06, "loss": 0.1278, "step": 19570 }, { "epoch": 0.5751042706925924, "grad_norm": 154103.984375, "learning_rate": 4.248957293074077e-06, "loss": 0.1343, "step": 19580 }, { "epoch": 0.5753979909534159, "grad_norm": 279139.9375, "learning_rate": 4.2460200904658405e-06, "loss": 0.137, "step": 19590 }, { "epoch": 0.5756917112142396, "grad_norm": 93788.046875, "learning_rate": 4.243082887857604e-06, "loss": 0.1416, "step": 19600 }, { "epoch": 0.5759854314750632, "grad_norm": 178302.421875, "learning_rate": 4.240145685249369e-06, "loss": 0.1446, "step": 19610 }, { "epoch": 0.5762791517358867, "grad_norm": 186002.171875, "learning_rate": 4.237208482641133e-06, "loss": 0.1197, "step": 19620 }, { "epoch": 0.5765728719967104, "grad_norm": 231235.125, "learning_rate": 4.234271280032897e-06, "loss": 0.1383, "step": 19630 }, { "epoch": 0.5768665922575339, "grad_norm": 125342.859375, "learning_rate": 4.231334077424661e-06, "loss": 0.1232, "step": 19640 }, { "epoch": 0.5771603125183575, "grad_norm": 349492.875, "learning_rate": 4.228396874816425e-06, "loss": 0.1273, "step": 19650 }, { "epoch": 0.5774540327791811, "grad_norm": 177517.15625, "learning_rate": 4.2254596722081895e-06, "loss": 0.1348, "step": 19660 }, { "epoch": 0.5777477530400047, "grad_norm": 213086.390625, "learning_rate": 4.222522469599953e-06, "loss": 0.1289, "step": 19670 }, { "epoch": 0.5780414733008283, "grad_norm": 268785.8125, "learning_rate": 4.219585266991718e-06, "loss": 0.1521, "step": 19680 }, { "epoch": 0.5783351935616519, "grad_norm": 140197.0625, "learning_rate": 4.216648064383482e-06, "loss": 0.1336, "step": 19690 }, { "epoch": 0.5786289138224755, "grad_norm": 113738.0234375, "learning_rate": 4.2137108617752455e-06, "loss": 0.1222, "step": 19700 }, { "epoch": 0.578922634083299, "grad_norm": 172504.984375, "learning_rate": 4.210773659167009e-06, "loss": 0.1145, "step": 19710 }, { "epoch": 0.5792163543441227, "grad_norm": 220625.078125, "learning_rate": 4.207836456558774e-06, "loss": 0.1263, "step": 19720 }, { "epoch": 0.5795100746049463, "grad_norm": 228362.671875, "learning_rate": 4.204899253950538e-06, "loss": 0.1321, "step": 19730 }, { "epoch": 0.5798037948657698, "grad_norm": 266373.0625, "learning_rate": 4.201962051342302e-06, "loss": 0.1394, "step": 19740 }, { "epoch": 0.5800975151265935, "grad_norm": 240315.453125, "learning_rate": 4.199024848734066e-06, "loss": 0.1333, "step": 19750 }, { "epoch": 0.580391235387417, "grad_norm": 87485.6328125, "learning_rate": 4.19608764612583e-06, "loss": 0.132, "step": 19760 }, { "epoch": 0.5806849556482406, "grad_norm": 244759.21875, "learning_rate": 4.193150443517594e-06, "loss": 0.1226, "step": 19770 }, { "epoch": 0.5809786759090642, "grad_norm": 196821.9375, "learning_rate": 4.190213240909358e-06, "loss": 0.1323, "step": 19780 }, { "epoch": 0.5812723961698878, "grad_norm": 196409.40625, "learning_rate": 4.187276038301122e-06, "loss": 0.1337, "step": 19790 }, { "epoch": 0.5815661164307114, "grad_norm": 171972.21875, "learning_rate": 4.184338835692887e-06, "loss": 0.1327, "step": 19800 }, { "epoch": 0.581859836691535, "grad_norm": 256576.1875, "learning_rate": 4.1814016330846506e-06, "loss": 0.131, "step": 19810 }, { "epoch": 0.5821535569523586, "grad_norm": 161864.21875, "learning_rate": 4.178464430476414e-06, "loss": 0.1155, "step": 19820 }, { "epoch": 0.5824472772131821, "grad_norm": 94650.0625, "learning_rate": 4.175527227868178e-06, "loss": 0.1426, "step": 19830 }, { "epoch": 0.5827409974740058, "grad_norm": 288930.5625, "learning_rate": 4.172590025259943e-06, "loss": 0.1424, "step": 19840 }, { "epoch": 0.5830347177348294, "grad_norm": 236698.703125, "learning_rate": 4.1696528226517066e-06, "loss": 0.14, "step": 19850 }, { "epoch": 0.5833284379956529, "grad_norm": 107895.7734375, "learning_rate": 4.166715620043471e-06, "loss": 0.1347, "step": 19860 }, { "epoch": 0.5836221582564766, "grad_norm": 229393.984375, "learning_rate": 4.163778417435235e-06, "loss": 0.1328, "step": 19870 }, { "epoch": 0.5839158785173001, "grad_norm": 195969.96875, "learning_rate": 4.160841214826999e-06, "loss": 0.1372, "step": 19880 }, { "epoch": 0.5842095987781237, "grad_norm": 322127.0, "learning_rate": 4.157904012218763e-06, "loss": 0.1358, "step": 19890 }, { "epoch": 0.5845033190389473, "grad_norm": 187826.0625, "learning_rate": 4.154966809610527e-06, "loss": 0.1287, "step": 19900 }, { "epoch": 0.5847970392997709, "grad_norm": 121977.9453125, "learning_rate": 4.152029607002292e-06, "loss": 0.128, "step": 19910 }, { "epoch": 0.5850907595605945, "grad_norm": 262632.90625, "learning_rate": 4.149092404394056e-06, "loss": 0.1289, "step": 19920 }, { "epoch": 0.5853844798214181, "grad_norm": 136459.671875, "learning_rate": 4.146155201785819e-06, "loss": 0.135, "step": 19930 }, { "epoch": 0.5856782000822417, "grad_norm": 109030.8046875, "learning_rate": 4.143217999177583e-06, "loss": 0.1349, "step": 19940 }, { "epoch": 0.5859719203430652, "grad_norm": 135564.71875, "learning_rate": 4.140280796569348e-06, "loss": 0.1284, "step": 19950 }, { "epoch": 0.5862656406038889, "grad_norm": 197721.5, "learning_rate": 4.137343593961112e-06, "loss": 0.1397, "step": 19960 }, { "epoch": 0.5865593608647125, "grad_norm": 188785.078125, "learning_rate": 4.134406391352876e-06, "loss": 0.1418, "step": 19970 }, { "epoch": 0.586853081125536, "grad_norm": 129679.6484375, "learning_rate": 4.13146918874464e-06, "loss": 0.1343, "step": 19980 }, { "epoch": 0.5871468013863597, "grad_norm": 161517.46875, "learning_rate": 4.128531986136405e-06, "loss": 0.1384, "step": 19990 }, { "epoch": 0.5874405216471832, "grad_norm": 124163.0390625, "learning_rate": 4.1255947835281676e-06, "loss": 0.1297, "step": 20000 }, { "epoch": 0.5877342419080068, "grad_norm": 194213.265625, "learning_rate": 4.122657580919932e-06, "loss": 0.1291, "step": 20010 }, { "epoch": 0.5880279621688304, "grad_norm": 126513.59375, "learning_rate": 4.119720378311696e-06, "loss": 0.1405, "step": 20020 }, { "epoch": 0.588321682429654, "grad_norm": 196932.328125, "learning_rate": 4.116783175703461e-06, "loss": 0.1294, "step": 20030 }, { "epoch": 0.5886154026904776, "grad_norm": 153861.53125, "learning_rate": 4.113845973095224e-06, "loss": 0.1203, "step": 20040 }, { "epoch": 0.5889091229513012, "grad_norm": 204548.53125, "learning_rate": 4.110908770486989e-06, "loss": 0.1303, "step": 20050 }, { "epoch": 0.5892028432121248, "grad_norm": 146011.71875, "learning_rate": 4.107971567878752e-06, "loss": 0.1392, "step": 20060 }, { "epoch": 0.5894965634729483, "grad_norm": 155864.421875, "learning_rate": 4.105034365270517e-06, "loss": 0.1332, "step": 20070 }, { "epoch": 0.589790283733772, "grad_norm": 101985.71875, "learning_rate": 4.10209716266228e-06, "loss": 0.1236, "step": 20080 }, { "epoch": 0.5900840039945956, "grad_norm": 146565.984375, "learning_rate": 4.099159960054045e-06, "loss": 0.1327, "step": 20090 }, { "epoch": 0.5903777242554191, "grad_norm": 162339.125, "learning_rate": 4.096222757445809e-06, "loss": 0.147, "step": 20100 }, { "epoch": 0.5906714445162428, "grad_norm": 162467.046875, "learning_rate": 4.0932855548375734e-06, "loss": 0.1343, "step": 20110 }, { "epoch": 0.5909651647770663, "grad_norm": 207741.046875, "learning_rate": 4.090348352229337e-06, "loss": 0.139, "step": 20120 }, { "epoch": 0.5912588850378899, "grad_norm": 133799.09375, "learning_rate": 4.087411149621101e-06, "loss": 0.1329, "step": 20130 }, { "epoch": 0.5915526052987135, "grad_norm": 256967.125, "learning_rate": 4.084473947012866e-06, "loss": 0.1396, "step": 20140 }, { "epoch": 0.5918463255595371, "grad_norm": 205315.203125, "learning_rate": 4.0815367444046294e-06, "loss": 0.1247, "step": 20150 }, { "epoch": 0.5921400458203607, "grad_norm": 199664.109375, "learning_rate": 4.078599541796393e-06, "loss": 0.133, "step": 20160 }, { "epoch": 0.5924337660811843, "grad_norm": 137491.203125, "learning_rate": 4.075662339188158e-06, "loss": 0.1286, "step": 20170 }, { "epoch": 0.5927274863420079, "grad_norm": 160388.75, "learning_rate": 4.072725136579922e-06, "loss": 0.1372, "step": 20180 }, { "epoch": 0.5930212066028314, "grad_norm": 196758.984375, "learning_rate": 4.0697879339716854e-06, "loss": 0.1277, "step": 20190 }, { "epoch": 0.5933149268636551, "grad_norm": 176526.0625, "learning_rate": 4.06685073136345e-06, "loss": 0.1332, "step": 20200 }, { "epoch": 0.5936086471244787, "grad_norm": 238992.171875, "learning_rate": 4.063913528755214e-06, "loss": 0.1282, "step": 20210 }, { "epoch": 0.5939023673853022, "grad_norm": 190986.484375, "learning_rate": 4.0609763261469785e-06, "loss": 0.131, "step": 20220 }, { "epoch": 0.5941960876461259, "grad_norm": 167066.921875, "learning_rate": 4.058039123538742e-06, "loss": 0.1248, "step": 20230 }, { "epoch": 0.5944898079069494, "grad_norm": 177155.90625, "learning_rate": 4.055101920930506e-06, "loss": 0.1321, "step": 20240 }, { "epoch": 0.594783528167773, "grad_norm": 89911.796875, "learning_rate": 4.05216471832227e-06, "loss": 0.1277, "step": 20250 }, { "epoch": 0.5950772484285966, "grad_norm": 166302.65625, "learning_rate": 4.0492275157140345e-06, "loss": 0.1256, "step": 20260 }, { "epoch": 0.5953709686894202, "grad_norm": 235511.40625, "learning_rate": 4.046290313105798e-06, "loss": 0.1364, "step": 20270 }, { "epoch": 0.5956646889502438, "grad_norm": 251708.125, "learning_rate": 4.043353110497563e-06, "loss": 0.1341, "step": 20280 }, { "epoch": 0.5959584092110674, "grad_norm": 165072.3125, "learning_rate": 4.040415907889327e-06, "loss": 0.1254, "step": 20290 }, { "epoch": 0.596252129471891, "grad_norm": 203821.21875, "learning_rate": 4.0374787052810905e-06, "loss": 0.1512, "step": 20300 }, { "epoch": 0.5965458497327145, "grad_norm": 140605.0625, "learning_rate": 4.034541502672854e-06, "loss": 0.1222, "step": 20310 }, { "epoch": 0.5968395699935382, "grad_norm": 170799.796875, "learning_rate": 4.031604300064619e-06, "loss": 0.1245, "step": 20320 }, { "epoch": 0.5971332902543618, "grad_norm": 642723.8125, "learning_rate": 4.028667097456383e-06, "loss": 0.1344, "step": 20330 }, { "epoch": 0.5974270105151853, "grad_norm": 191665.765625, "learning_rate": 4.025729894848147e-06, "loss": 0.1418, "step": 20340 }, { "epoch": 0.597720730776009, "grad_norm": 143950.6875, "learning_rate": 4.022792692239911e-06, "loss": 0.1474, "step": 20350 }, { "epoch": 0.5980144510368325, "grad_norm": 115630.0703125, "learning_rate": 4.019855489631675e-06, "loss": 0.1247, "step": 20360 }, { "epoch": 0.5983081712976561, "grad_norm": 175138.171875, "learning_rate": 4.0169182870234395e-06, "loss": 0.1352, "step": 20370 }, { "epoch": 0.5986018915584796, "grad_norm": 169551.765625, "learning_rate": 4.013981084415203e-06, "loss": 0.1197, "step": 20380 }, { "epoch": 0.5988956118193033, "grad_norm": 165891.8125, "learning_rate": 4.011043881806967e-06, "loss": 0.1478, "step": 20390 }, { "epoch": 0.5991893320801269, "grad_norm": 196062.71875, "learning_rate": 4.008106679198732e-06, "loss": 0.1304, "step": 20400 }, { "epoch": 0.5994830523409505, "grad_norm": 142650.6875, "learning_rate": 4.0051694765904955e-06, "loss": 0.1417, "step": 20410 }, { "epoch": 0.5997767726017741, "grad_norm": 112190.1328125, "learning_rate": 4.002232273982259e-06, "loss": 0.1374, "step": 20420 }, { "epoch": 0.6000704928625976, "grad_norm": 119580.4296875, "learning_rate": 3.999295071374024e-06, "loss": 0.1269, "step": 20430 }, { "epoch": 0.6003642131234213, "grad_norm": 125104.03125, "learning_rate": 3.996357868765788e-06, "loss": 0.1274, "step": 20440 }, { "epoch": 0.6006579333842449, "grad_norm": 182104.125, "learning_rate": 3.993420666157552e-06, "loss": 0.1258, "step": 20450 }, { "epoch": 0.6009516536450684, "grad_norm": 142056.796875, "learning_rate": 3.990483463549316e-06, "loss": 0.1329, "step": 20460 }, { "epoch": 0.601245373905892, "grad_norm": 119513.09375, "learning_rate": 3.98754626094108e-06, "loss": 0.1268, "step": 20470 }, { "epoch": 0.6015390941667156, "grad_norm": 139123.53125, "learning_rate": 3.984609058332844e-06, "loss": 0.1281, "step": 20480 }, { "epoch": 0.6018328144275392, "grad_norm": 268848.6875, "learning_rate": 3.981671855724608e-06, "loss": 0.1328, "step": 20490 }, { "epoch": 0.6021265346883629, "grad_norm": 146035.75, "learning_rate": 3.978734653116372e-06, "loss": 0.1274, "step": 20500 }, { "epoch": 0.6024202549491864, "grad_norm": 148400.5, "learning_rate": 3.975797450508137e-06, "loss": 0.1239, "step": 20510 }, { "epoch": 0.60271397521001, "grad_norm": 163948.640625, "learning_rate": 3.9728602478999005e-06, "loss": 0.1324, "step": 20520 }, { "epoch": 0.6030076954708335, "grad_norm": 148222.828125, "learning_rate": 3.969923045291664e-06, "loss": 0.1245, "step": 20530 }, { "epoch": 0.6033014157316572, "grad_norm": 144470.890625, "learning_rate": 3.966985842683428e-06, "loss": 0.1374, "step": 20540 }, { "epoch": 0.6035951359924807, "grad_norm": 134027.953125, "learning_rate": 3.964048640075193e-06, "loss": 0.1162, "step": 20550 }, { "epoch": 0.6038888562533044, "grad_norm": 171985.609375, "learning_rate": 3.9611114374669565e-06, "loss": 0.1243, "step": 20560 }, { "epoch": 0.604182576514128, "grad_norm": 323818.0, "learning_rate": 3.958174234858721e-06, "loss": 0.1257, "step": 20570 }, { "epoch": 0.6044762967749515, "grad_norm": 224496.953125, "learning_rate": 3.955237032250485e-06, "loss": 0.1326, "step": 20580 }, { "epoch": 0.6047700170357752, "grad_norm": 122852.015625, "learning_rate": 3.952299829642249e-06, "loss": 0.1319, "step": 20590 }, { "epoch": 0.6050637372965987, "grad_norm": 141083.84375, "learning_rate": 3.949362627034013e-06, "loss": 0.1244, "step": 20600 }, { "epoch": 0.6053574575574223, "grad_norm": 327487.90625, "learning_rate": 3.946425424425777e-06, "loss": 0.1323, "step": 20610 }, { "epoch": 0.605651177818246, "grad_norm": 211256.40625, "learning_rate": 3.943488221817541e-06, "loss": 0.1357, "step": 20620 }, { "epoch": 0.6059448980790695, "grad_norm": 164892.25, "learning_rate": 3.9405510192093055e-06, "loss": 0.1242, "step": 20630 }, { "epoch": 0.6062386183398931, "grad_norm": 188943.59375, "learning_rate": 3.937613816601069e-06, "loss": 0.1362, "step": 20640 }, { "epoch": 0.6065323386007166, "grad_norm": 198671.859375, "learning_rate": 3.934676613992833e-06, "loss": 0.1167, "step": 20650 }, { "epoch": 0.6068260588615403, "grad_norm": 224887.109375, "learning_rate": 3.931739411384598e-06, "loss": 0.1308, "step": 20660 }, { "epoch": 0.6071197791223638, "grad_norm": 201912.203125, "learning_rate": 3.9288022087763615e-06, "loss": 0.1226, "step": 20670 }, { "epoch": 0.6074134993831874, "grad_norm": 139801.421875, "learning_rate": 3.925865006168126e-06, "loss": 0.1329, "step": 20680 }, { "epoch": 0.6077072196440111, "grad_norm": 133613.796875, "learning_rate": 3.92292780355989e-06, "loss": 0.1341, "step": 20690 }, { "epoch": 0.6080009399048346, "grad_norm": 121561.28125, "learning_rate": 3.919990600951655e-06, "loss": 0.1328, "step": 20700 }, { "epoch": 0.6082946601656583, "grad_norm": 243071.5, "learning_rate": 3.9170533983434175e-06, "loss": 0.1414, "step": 20710 }, { "epoch": 0.6085883804264818, "grad_norm": 124730.9453125, "learning_rate": 3.914116195735182e-06, "loss": 0.1334, "step": 20720 }, { "epoch": 0.6088821006873054, "grad_norm": 171567.1875, "learning_rate": 3.911178993126946e-06, "loss": 0.1353, "step": 20730 }, { "epoch": 0.609175820948129, "grad_norm": 158724.390625, "learning_rate": 3.908241790518711e-06, "loss": 0.1331, "step": 20740 }, { "epoch": 0.6094695412089526, "grad_norm": 159401.9375, "learning_rate": 3.905304587910474e-06, "loss": 0.1394, "step": 20750 }, { "epoch": 0.6097632614697762, "grad_norm": 201227.765625, "learning_rate": 3.902367385302239e-06, "loss": 0.1387, "step": 20760 }, { "epoch": 0.6100569817305997, "grad_norm": 108653.9296875, "learning_rate": 3.899430182694002e-06, "loss": 0.1234, "step": 20770 }, { "epoch": 0.6103507019914234, "grad_norm": 118811.1484375, "learning_rate": 3.8964929800857666e-06, "loss": 0.143, "step": 20780 }, { "epoch": 0.6106444222522469, "grad_norm": 140939.140625, "learning_rate": 3.89355577747753e-06, "loss": 0.1301, "step": 20790 }, { "epoch": 0.6109381425130705, "grad_norm": 164571.671875, "learning_rate": 3.890618574869295e-06, "loss": 0.1351, "step": 20800 }, { "epoch": 0.6112318627738942, "grad_norm": 124400.4609375, "learning_rate": 3.887681372261059e-06, "loss": 0.1178, "step": 20810 }, { "epoch": 0.6115255830347177, "grad_norm": 101890.859375, "learning_rate": 3.884744169652823e-06, "loss": 0.131, "step": 20820 }, { "epoch": 0.6118193032955414, "grad_norm": 146497.234375, "learning_rate": 3.881806967044587e-06, "loss": 0.1374, "step": 20830 }, { "epoch": 0.6121130235563649, "grad_norm": 129567.171875, "learning_rate": 3.878869764436351e-06, "loss": 0.1207, "step": 20840 }, { "epoch": 0.6124067438171885, "grad_norm": 167691.46875, "learning_rate": 3.875932561828115e-06, "loss": 0.1364, "step": 20850 }, { "epoch": 0.6127004640780122, "grad_norm": 175912.46875, "learning_rate": 3.872995359219879e-06, "loss": 0.1396, "step": 20860 }, { "epoch": 0.6129941843388357, "grad_norm": 105786.625, "learning_rate": 3.870058156611643e-06, "loss": 0.1382, "step": 20870 }, { "epoch": 0.6132879045996593, "grad_norm": 117097.2109375, "learning_rate": 3.867120954003408e-06, "loss": 0.1314, "step": 20880 }, { "epoch": 0.6135816248604828, "grad_norm": 166827.53125, "learning_rate": 3.864183751395172e-06, "loss": 0.136, "step": 20890 }, { "epoch": 0.6138753451213065, "grad_norm": 133331.390625, "learning_rate": 3.861246548786935e-06, "loss": 0.1263, "step": 20900 }, { "epoch": 0.61416906538213, "grad_norm": 167747.625, "learning_rate": 3.8583093461787e-06, "loss": 0.1327, "step": 20910 }, { "epoch": 0.6144627856429536, "grad_norm": 127530.515625, "learning_rate": 3.855372143570464e-06, "loss": 0.1237, "step": 20920 }, { "epoch": 0.6147565059037773, "grad_norm": 290102.75, "learning_rate": 3.8524349409622284e-06, "loss": 0.1383, "step": 20930 }, { "epoch": 0.6150502261646008, "grad_norm": 202433.90625, "learning_rate": 3.849497738353992e-06, "loss": 0.1141, "step": 20940 }, { "epoch": 0.6153439464254244, "grad_norm": 187627.875, "learning_rate": 3.846560535745756e-06, "loss": 0.1367, "step": 20950 }, { "epoch": 0.615637666686248, "grad_norm": 104463.1171875, "learning_rate": 3.84362333313752e-06, "loss": 0.1262, "step": 20960 }, { "epoch": 0.6159313869470716, "grad_norm": 244454.921875, "learning_rate": 3.8406861305292844e-06, "loss": 0.1346, "step": 20970 }, { "epoch": 0.6162251072078953, "grad_norm": 142700.0625, "learning_rate": 3.837748927921048e-06, "loss": 0.1233, "step": 20980 }, { "epoch": 0.6165188274687188, "grad_norm": 251673.5, "learning_rate": 3.834811725312813e-06, "loss": 0.1303, "step": 20990 }, { "epoch": 0.6168125477295424, "grad_norm": 201747.015625, "learning_rate": 3.831874522704577e-06, "loss": 0.127, "step": 21000 }, { "epoch": 0.6171062679903659, "grad_norm": 179689.859375, "learning_rate": 3.82893732009634e-06, "loss": 0.1315, "step": 21010 }, { "epoch": 0.6173999882511896, "grad_norm": 175809.90625, "learning_rate": 3.826000117488104e-06, "loss": 0.1217, "step": 21020 }, { "epoch": 0.6176937085120131, "grad_norm": 300751.5625, "learning_rate": 3.823062914879869e-06, "loss": 0.1299, "step": 21030 }, { "epoch": 0.6179874287728367, "grad_norm": 347428.0625, "learning_rate": 3.820125712271633e-06, "loss": 0.1213, "step": 21040 }, { "epoch": 0.6182811490336604, "grad_norm": 166948.53125, "learning_rate": 3.817188509663397e-06, "loss": 0.1328, "step": 21050 }, { "epoch": 0.6185748692944839, "grad_norm": 120875.7734375, "learning_rate": 3.814251307055161e-06, "loss": 0.1317, "step": 21060 }, { "epoch": 0.6188685895553075, "grad_norm": 141389.484375, "learning_rate": 3.811314104446925e-06, "loss": 0.1253, "step": 21070 }, { "epoch": 0.6191623098161311, "grad_norm": 166199.859375, "learning_rate": 3.808376901838689e-06, "loss": 0.1243, "step": 21080 }, { "epoch": 0.6194560300769547, "grad_norm": 186381.828125, "learning_rate": 3.8054396992304532e-06, "loss": 0.1412, "step": 21090 }, { "epoch": 0.6197497503377783, "grad_norm": 163043.59375, "learning_rate": 3.8025024966222175e-06, "loss": 0.1452, "step": 21100 }, { "epoch": 0.6200434705986019, "grad_norm": 112956.0859375, "learning_rate": 3.7995652940139817e-06, "loss": 0.1319, "step": 21110 }, { "epoch": 0.6203371908594255, "grad_norm": 117550.375, "learning_rate": 3.796628091405746e-06, "loss": 0.1259, "step": 21120 }, { "epoch": 0.620630911120249, "grad_norm": 169067.359375, "learning_rate": 3.7936908887975092e-06, "loss": 0.1278, "step": 21130 }, { "epoch": 0.6209246313810727, "grad_norm": 152867.34375, "learning_rate": 3.7907536861892734e-06, "loss": 0.1294, "step": 21140 }, { "epoch": 0.6212183516418962, "grad_norm": 153915.640625, "learning_rate": 3.7878164835810376e-06, "loss": 0.1324, "step": 21150 }, { "epoch": 0.6215120719027198, "grad_norm": 206597.46875, "learning_rate": 3.784879280972802e-06, "loss": 0.1275, "step": 21160 }, { "epoch": 0.6218057921635435, "grad_norm": 103259.6796875, "learning_rate": 3.781942078364566e-06, "loss": 0.1279, "step": 21170 }, { "epoch": 0.622099512424367, "grad_norm": 122185.484375, "learning_rate": 3.7790048757563303e-06, "loss": 0.1338, "step": 21180 }, { "epoch": 0.6223932326851906, "grad_norm": 185931.1875, "learning_rate": 3.776067673148094e-06, "loss": 0.1249, "step": 21190 }, { "epoch": 0.6226869529460142, "grad_norm": 204115.1875, "learning_rate": 3.773130470539858e-06, "loss": 0.1262, "step": 21200 }, { "epoch": 0.6229806732068378, "grad_norm": 150617.40625, "learning_rate": 3.770193267931622e-06, "loss": 0.1285, "step": 21210 }, { "epoch": 0.6232743934676614, "grad_norm": 133204.21875, "learning_rate": 3.7672560653233863e-06, "loss": 0.1286, "step": 21220 }, { "epoch": 0.623568113728485, "grad_norm": 124998.5390625, "learning_rate": 3.7643188627151505e-06, "loss": 0.1229, "step": 21230 }, { "epoch": 0.6238618339893086, "grad_norm": 248133.90625, "learning_rate": 3.7613816601069147e-06, "loss": 0.1429, "step": 21240 }, { "epoch": 0.6241555542501321, "grad_norm": 107600.546875, "learning_rate": 3.7584444574986785e-06, "loss": 0.1279, "step": 21250 }, { "epoch": 0.6244492745109558, "grad_norm": 158450.4375, "learning_rate": 3.7555072548904427e-06, "loss": 0.1227, "step": 21260 }, { "epoch": 0.6247429947717793, "grad_norm": 217900.671875, "learning_rate": 3.752570052282207e-06, "loss": 0.1132, "step": 21270 }, { "epoch": 0.6250367150326029, "grad_norm": 222517.671875, "learning_rate": 3.749632849673971e-06, "loss": 0.1297, "step": 21280 }, { "epoch": 0.6253304352934266, "grad_norm": 144930.390625, "learning_rate": 3.746695647065735e-06, "loss": 0.1349, "step": 21290 }, { "epoch": 0.6256241555542501, "grad_norm": 145129.578125, "learning_rate": 3.743758444457499e-06, "loss": 0.1346, "step": 21300 }, { "epoch": 0.6259178758150737, "grad_norm": 151930.234375, "learning_rate": 3.740821241849263e-06, "loss": 0.1331, "step": 21310 }, { "epoch": 0.6262115960758973, "grad_norm": 125537.5, "learning_rate": 3.737884039241027e-06, "loss": 0.128, "step": 21320 }, { "epoch": 0.6265053163367209, "grad_norm": 161082.0, "learning_rate": 3.7349468366327913e-06, "loss": 0.1347, "step": 21330 }, { "epoch": 0.6267990365975445, "grad_norm": 133662.625, "learning_rate": 3.7320096340245555e-06, "loss": 0.1217, "step": 21340 }, { "epoch": 0.6270927568583681, "grad_norm": 149386.546875, "learning_rate": 3.7290724314163197e-06, "loss": 0.1362, "step": 21350 }, { "epoch": 0.6273864771191917, "grad_norm": 117846.875, "learning_rate": 3.726135228808084e-06, "loss": 0.1331, "step": 21360 }, { "epoch": 0.6276801973800152, "grad_norm": 154157.875, "learning_rate": 3.7231980261998473e-06, "loss": 0.1259, "step": 21370 }, { "epoch": 0.6279739176408389, "grad_norm": 205964.84375, "learning_rate": 3.7202608235916115e-06, "loss": 0.1218, "step": 21380 }, { "epoch": 0.6282676379016624, "grad_norm": 127863.203125, "learning_rate": 3.7173236209833757e-06, "loss": 0.1204, "step": 21390 }, { "epoch": 0.628561358162486, "grad_norm": 133021.625, "learning_rate": 3.71438641837514e-06, "loss": 0.1307, "step": 21400 }, { "epoch": 0.6288550784233097, "grad_norm": 159931.953125, "learning_rate": 3.711449215766904e-06, "loss": 0.1189, "step": 21410 }, { "epoch": 0.6291487986841332, "grad_norm": 132633.9375, "learning_rate": 3.708512013158668e-06, "loss": 0.1378, "step": 21420 }, { "epoch": 0.6294425189449568, "grad_norm": 166524.015625, "learning_rate": 3.7055748105504317e-06, "loss": 0.1262, "step": 21430 }, { "epoch": 0.6297362392057804, "grad_norm": 158015.59375, "learning_rate": 3.702637607942196e-06, "loss": 0.1354, "step": 21440 }, { "epoch": 0.630029959466604, "grad_norm": 112782.3046875, "learning_rate": 3.69970040533396e-06, "loss": 0.1315, "step": 21450 }, { "epoch": 0.6303236797274276, "grad_norm": 249521.71875, "learning_rate": 3.6967632027257243e-06, "loss": 0.1289, "step": 21460 }, { "epoch": 0.6306173999882512, "grad_norm": 228606.484375, "learning_rate": 3.6938260001174885e-06, "loss": 0.1347, "step": 21470 }, { "epoch": 0.6309111202490748, "grad_norm": 194494.765625, "learning_rate": 3.6908887975092523e-06, "loss": 0.1118, "step": 21480 }, { "epoch": 0.6312048405098983, "grad_norm": 168272.015625, "learning_rate": 3.6879515949010165e-06, "loss": 0.1253, "step": 21490 }, { "epoch": 0.631498560770722, "grad_norm": 213560.234375, "learning_rate": 3.6850143922927807e-06, "loss": 0.1202, "step": 21500 }, { "epoch": 0.6317922810315456, "grad_norm": 134400.453125, "learning_rate": 3.682077189684545e-06, "loss": 0.1265, "step": 21510 }, { "epoch": 0.6320860012923691, "grad_norm": 177740.734375, "learning_rate": 3.6791399870763087e-06, "loss": 0.1315, "step": 21520 }, { "epoch": 0.6323797215531928, "grad_norm": 160834.53125, "learning_rate": 3.676202784468073e-06, "loss": 0.1327, "step": 21530 }, { "epoch": 0.6326734418140163, "grad_norm": 148452.265625, "learning_rate": 3.6732655818598367e-06, "loss": 0.1375, "step": 21540 }, { "epoch": 0.6329671620748399, "grad_norm": 111941.296875, "learning_rate": 3.670328379251601e-06, "loss": 0.1303, "step": 21550 }, { "epoch": 0.6332608823356635, "grad_norm": 184742.578125, "learning_rate": 3.667391176643365e-06, "loss": 0.1252, "step": 21560 }, { "epoch": 0.6335546025964871, "grad_norm": 262210.8125, "learning_rate": 3.6644539740351294e-06, "loss": 0.1285, "step": 21570 }, { "epoch": 0.6338483228573107, "grad_norm": 196200.046875, "learning_rate": 3.6615167714268936e-06, "loss": 0.1421, "step": 21580 }, { "epoch": 0.6341420431181343, "grad_norm": 211069.421875, "learning_rate": 3.6585795688186578e-06, "loss": 0.1343, "step": 21590 }, { "epoch": 0.6344357633789579, "grad_norm": 146797.78125, "learning_rate": 3.655642366210421e-06, "loss": 0.1298, "step": 21600 }, { "epoch": 0.6347294836397814, "grad_norm": 71267.140625, "learning_rate": 3.6527051636021853e-06, "loss": 0.1252, "step": 21610 }, { "epoch": 0.6350232039006051, "grad_norm": 159670.671875, "learning_rate": 3.6497679609939496e-06, "loss": 0.1305, "step": 21620 }, { "epoch": 0.6353169241614287, "grad_norm": 100782.96875, "learning_rate": 3.6468307583857138e-06, "loss": 0.1222, "step": 21630 }, { "epoch": 0.6356106444222522, "grad_norm": 165235.859375, "learning_rate": 3.643893555777478e-06, "loss": 0.1271, "step": 21640 }, { "epoch": 0.6359043646830759, "grad_norm": 497018.3125, "learning_rate": 3.640956353169242e-06, "loss": 0.1253, "step": 21650 }, { "epoch": 0.6361980849438994, "grad_norm": 159420.03125, "learning_rate": 3.6380191505610055e-06, "loss": 0.1181, "step": 21660 }, { "epoch": 0.636491805204723, "grad_norm": 144907.5625, "learning_rate": 3.6350819479527697e-06, "loss": 0.1216, "step": 21670 }, { "epoch": 0.6367855254655466, "grad_norm": 150227.75, "learning_rate": 3.632144745344534e-06, "loss": 0.1245, "step": 21680 }, { "epoch": 0.6370792457263702, "grad_norm": 270763.84375, "learning_rate": 3.629207542736298e-06, "loss": 0.1343, "step": 21690 }, { "epoch": 0.6373729659871938, "grad_norm": 269547.03125, "learning_rate": 3.6262703401280624e-06, "loss": 0.1225, "step": 21700 }, { "epoch": 0.6376666862480174, "grad_norm": 192482.09375, "learning_rate": 3.6233331375198266e-06, "loss": 0.123, "step": 21710 }, { "epoch": 0.637960406508841, "grad_norm": 154225.484375, "learning_rate": 3.6203959349115904e-06, "loss": 0.1267, "step": 21720 }, { "epoch": 0.6382541267696645, "grad_norm": 129114.1796875, "learning_rate": 3.6174587323033546e-06, "loss": 0.1321, "step": 21730 }, { "epoch": 0.6385478470304882, "grad_norm": 96326.015625, "learning_rate": 3.614521529695119e-06, "loss": 0.1304, "step": 21740 }, { "epoch": 0.6388415672913118, "grad_norm": 121756.0546875, "learning_rate": 3.6115843270868826e-06, "loss": 0.1315, "step": 21750 }, { "epoch": 0.6391352875521353, "grad_norm": 129517.9765625, "learning_rate": 3.6086471244786468e-06, "loss": 0.1371, "step": 21760 }, { "epoch": 0.639429007812959, "grad_norm": 135876.96875, "learning_rate": 3.605709921870411e-06, "loss": 0.1282, "step": 21770 }, { "epoch": 0.6397227280737825, "grad_norm": 103555.140625, "learning_rate": 3.6027727192621748e-06, "loss": 0.1242, "step": 21780 }, { "epoch": 0.6400164483346061, "grad_norm": 141509.96875, "learning_rate": 3.599835516653939e-06, "loss": 0.1291, "step": 21790 }, { "epoch": 0.6403101685954297, "grad_norm": 135166.1875, "learning_rate": 3.596898314045703e-06, "loss": 0.1384, "step": 21800 }, { "epoch": 0.6406038888562533, "grad_norm": 110812.5703125, "learning_rate": 3.5939611114374674e-06, "loss": 0.1296, "step": 21810 }, { "epoch": 0.6408976091170769, "grad_norm": 117062.578125, "learning_rate": 3.5910239088292316e-06, "loss": 0.1301, "step": 21820 }, { "epoch": 0.6411913293779005, "grad_norm": 180458.6875, "learning_rate": 3.588086706220996e-06, "loss": 0.1353, "step": 21830 }, { "epoch": 0.6414850496387241, "grad_norm": 136964.296875, "learning_rate": 3.585149503612759e-06, "loss": 0.1331, "step": 21840 }, { "epoch": 0.6417787698995476, "grad_norm": 134571.125, "learning_rate": 3.5822123010045234e-06, "loss": 0.1237, "step": 21850 }, { "epoch": 0.6420724901603713, "grad_norm": 193002.328125, "learning_rate": 3.5792750983962876e-06, "loss": 0.1151, "step": 21860 }, { "epoch": 0.6423662104211949, "grad_norm": 146783.296875, "learning_rate": 3.576337895788052e-06, "loss": 0.1283, "step": 21870 }, { "epoch": 0.6426599306820184, "grad_norm": 249630.078125, "learning_rate": 3.573400693179816e-06, "loss": 0.1171, "step": 21880 }, { "epoch": 0.6429536509428421, "grad_norm": 131438.5, "learning_rate": 3.5704634905715802e-06, "loss": 0.1306, "step": 21890 }, { "epoch": 0.6432473712036656, "grad_norm": 134157.875, "learning_rate": 3.5675262879633436e-06, "loss": 0.1363, "step": 21900 }, { "epoch": 0.6435410914644892, "grad_norm": 170148.890625, "learning_rate": 3.564589085355108e-06, "loss": 0.1184, "step": 21910 }, { "epoch": 0.6438348117253128, "grad_norm": 133147.484375, "learning_rate": 3.561651882746872e-06, "loss": 0.1185, "step": 21920 }, { "epoch": 0.6441285319861364, "grad_norm": 163496.8125, "learning_rate": 3.5587146801386362e-06, "loss": 0.1318, "step": 21930 }, { "epoch": 0.64442225224696, "grad_norm": 266560.625, "learning_rate": 3.5557774775304004e-06, "loss": 0.1351, "step": 21940 }, { "epoch": 0.6447159725077836, "grad_norm": 133070.046875, "learning_rate": 3.5528402749221646e-06, "loss": 0.1388, "step": 21950 }, { "epoch": 0.6450096927686072, "grad_norm": 166592.703125, "learning_rate": 3.5499030723139284e-06, "loss": 0.1273, "step": 21960 }, { "epoch": 0.6453034130294307, "grad_norm": 161609.8125, "learning_rate": 3.5469658697056926e-06, "loss": 0.1274, "step": 21970 }, { "epoch": 0.6455971332902544, "grad_norm": 175073.609375, "learning_rate": 3.5440286670974564e-06, "loss": 0.1218, "step": 21980 }, { "epoch": 0.645890853551078, "grad_norm": 188345.9375, "learning_rate": 3.5410914644892206e-06, "loss": 0.1291, "step": 21990 }, { "epoch": 0.6461845738119015, "grad_norm": 159775.546875, "learning_rate": 3.538154261880985e-06, "loss": 0.1251, "step": 22000 }, { "epoch": 0.6464782940727252, "grad_norm": 131506.859375, "learning_rate": 3.535217059272749e-06, "loss": 0.1245, "step": 22010 }, { "epoch": 0.6467720143335487, "grad_norm": 190087.40625, "learning_rate": 3.532279856664513e-06, "loss": 0.1374, "step": 22020 }, { "epoch": 0.6470657345943723, "grad_norm": 141657.203125, "learning_rate": 3.529342654056277e-06, "loss": 0.1287, "step": 22030 }, { "epoch": 0.6473594548551959, "grad_norm": 143593.484375, "learning_rate": 3.5264054514480413e-06, "loss": 0.1371, "step": 22040 }, { "epoch": 0.6476531751160195, "grad_norm": 110353.90625, "learning_rate": 3.5234682488398055e-06, "loss": 0.1365, "step": 22050 }, { "epoch": 0.6479468953768431, "grad_norm": 211723.265625, "learning_rate": 3.5205310462315697e-06, "loss": 0.1405, "step": 22060 }, { "epoch": 0.6482406156376667, "grad_norm": 162270.203125, "learning_rate": 3.517593843623334e-06, "loss": 0.1244, "step": 22070 }, { "epoch": 0.6485343358984903, "grad_norm": 150695.28125, "learning_rate": 3.5146566410150972e-06, "loss": 0.132, "step": 22080 }, { "epoch": 0.6488280561593138, "grad_norm": 172381.21875, "learning_rate": 3.5117194384068615e-06, "loss": 0.134, "step": 22090 }, { "epoch": 0.6491217764201375, "grad_norm": 346378.0625, "learning_rate": 3.5087822357986257e-06, "loss": 0.1226, "step": 22100 }, { "epoch": 0.6494154966809611, "grad_norm": 131885.84375, "learning_rate": 3.50584503319039e-06, "loss": 0.1222, "step": 22110 }, { "epoch": 0.6497092169417846, "grad_norm": 175249.546875, "learning_rate": 3.502907830582154e-06, "loss": 0.1236, "step": 22120 }, { "epoch": 0.6500029372026083, "grad_norm": 172564.375, "learning_rate": 3.4999706279739183e-06, "loss": 0.1211, "step": 22130 }, { "epoch": 0.6502966574634318, "grad_norm": 161418.59375, "learning_rate": 3.4970334253656817e-06, "loss": 0.1272, "step": 22140 }, { "epoch": 0.6505903777242554, "grad_norm": 110097.234375, "learning_rate": 3.494096222757446e-06, "loss": 0.1222, "step": 22150 }, { "epoch": 0.650884097985079, "grad_norm": 180978.8125, "learning_rate": 3.49115902014921e-06, "loss": 0.1284, "step": 22160 }, { "epoch": 0.6511778182459026, "grad_norm": 146224.171875, "learning_rate": 3.4882218175409743e-06, "loss": 0.1262, "step": 22170 }, { "epoch": 0.6514715385067262, "grad_norm": 116405.9296875, "learning_rate": 3.4852846149327385e-06, "loss": 0.135, "step": 22180 }, { "epoch": 0.6517652587675498, "grad_norm": 225208.46875, "learning_rate": 3.4823474123245027e-06, "loss": 0.126, "step": 22190 }, { "epoch": 0.6520589790283734, "grad_norm": 168396.375, "learning_rate": 3.4794102097162665e-06, "loss": 0.135, "step": 22200 }, { "epoch": 0.6523526992891969, "grad_norm": 155975.984375, "learning_rate": 3.4764730071080307e-06, "loss": 0.1218, "step": 22210 }, { "epoch": 0.6526464195500206, "grad_norm": 125064.25, "learning_rate": 3.4735358044997945e-06, "loss": 0.1271, "step": 22220 }, { "epoch": 0.6529401398108442, "grad_norm": 147245.15625, "learning_rate": 3.4705986018915587e-06, "loss": 0.1246, "step": 22230 }, { "epoch": 0.6532338600716677, "grad_norm": 121026.5703125, "learning_rate": 3.467661399283323e-06, "loss": 0.1275, "step": 22240 }, { "epoch": 0.6535275803324914, "grad_norm": 149524.59375, "learning_rate": 3.464724196675087e-06, "loss": 0.1221, "step": 22250 }, { "epoch": 0.6538213005933149, "grad_norm": 176694.046875, "learning_rate": 3.461786994066851e-06, "loss": 0.12, "step": 22260 }, { "epoch": 0.6541150208541385, "grad_norm": 158084.828125, "learning_rate": 3.458849791458615e-06, "loss": 0.1151, "step": 22270 }, { "epoch": 0.6544087411149621, "grad_norm": 203627.75, "learning_rate": 3.4559125888503793e-06, "loss": 0.1177, "step": 22280 }, { "epoch": 0.6547024613757857, "grad_norm": 202748.953125, "learning_rate": 3.4529753862421435e-06, "loss": 0.1341, "step": 22290 }, { "epoch": 0.6549961816366093, "grad_norm": 222123.484375, "learning_rate": 3.4500381836339077e-06, "loss": 0.1349, "step": 22300 }, { "epoch": 0.6552899018974329, "grad_norm": 236087.953125, "learning_rate": 3.4471009810256715e-06, "loss": 0.1206, "step": 22310 }, { "epoch": 0.6555836221582565, "grad_norm": 131744.65625, "learning_rate": 3.4441637784174353e-06, "loss": 0.1264, "step": 22320 }, { "epoch": 0.65587734241908, "grad_norm": 119662.9453125, "learning_rate": 3.4412265758091995e-06, "loss": 0.1254, "step": 22330 }, { "epoch": 0.6561710626799037, "grad_norm": 479802.9375, "learning_rate": 3.4382893732009637e-06, "loss": 0.1181, "step": 22340 }, { "epoch": 0.6564647829407273, "grad_norm": 168894.546875, "learning_rate": 3.435352170592728e-06, "loss": 0.1285, "step": 22350 }, { "epoch": 0.6567585032015508, "grad_norm": 116427.953125, "learning_rate": 3.432414967984492e-06, "loss": 0.1235, "step": 22360 }, { "epoch": 0.6570522234623745, "grad_norm": 124697.984375, "learning_rate": 3.4294777653762563e-06, "loss": 0.1205, "step": 22370 }, { "epoch": 0.657345943723198, "grad_norm": 151503.828125, "learning_rate": 3.4265405627680197e-06, "loss": 0.1186, "step": 22380 }, { "epoch": 0.6576396639840216, "grad_norm": 189847.1875, "learning_rate": 3.423603360159784e-06, "loss": 0.1255, "step": 22390 }, { "epoch": 0.6579333842448452, "grad_norm": 172281.546875, "learning_rate": 3.420666157551548e-06, "loss": 0.125, "step": 22400 }, { "epoch": 0.6582271045056688, "grad_norm": 94609.21875, "learning_rate": 3.4177289549433123e-06, "loss": 0.1251, "step": 22410 }, { "epoch": 0.6585208247664924, "grad_norm": 137817.6875, "learning_rate": 3.4147917523350765e-06, "loss": 0.1264, "step": 22420 }, { "epoch": 0.658814545027316, "grad_norm": 133201.59375, "learning_rate": 3.4118545497268403e-06, "loss": 0.1184, "step": 22430 }, { "epoch": 0.6591082652881396, "grad_norm": 102397.0234375, "learning_rate": 3.4089173471186045e-06, "loss": 0.1202, "step": 22440 }, { "epoch": 0.6594019855489631, "grad_norm": 286333.25, "learning_rate": 3.4059801445103683e-06, "loss": 0.1341, "step": 22450 }, { "epoch": 0.6596957058097868, "grad_norm": 200486.46875, "learning_rate": 3.4030429419021325e-06, "loss": 0.1208, "step": 22460 }, { "epoch": 0.6599894260706104, "grad_norm": 130478.1640625, "learning_rate": 3.4001057392938967e-06, "loss": 0.1325, "step": 22470 }, { "epoch": 0.6602831463314339, "grad_norm": 146848.71875, "learning_rate": 3.397168536685661e-06, "loss": 0.1372, "step": 22480 }, { "epoch": 0.6605768665922576, "grad_norm": 145916.046875, "learning_rate": 3.3942313340774247e-06, "loss": 0.1159, "step": 22490 }, { "epoch": 0.6608705868530811, "grad_norm": 96215.25, "learning_rate": 3.391294131469189e-06, "loss": 0.1138, "step": 22500 }, { "epoch": 0.6611643071139047, "grad_norm": 189628.65625, "learning_rate": 3.388356928860953e-06, "loss": 0.1307, "step": 22510 }, { "epoch": 0.6614580273747284, "grad_norm": 200940.1875, "learning_rate": 3.3854197262527174e-06, "loss": 0.1137, "step": 22520 }, { "epoch": 0.6617517476355519, "grad_norm": 126350.671875, "learning_rate": 3.3824825236444816e-06, "loss": 0.1147, "step": 22530 }, { "epoch": 0.6620454678963755, "grad_norm": 122247.765625, "learning_rate": 3.3795453210362454e-06, "loss": 0.1285, "step": 22540 }, { "epoch": 0.662339188157199, "grad_norm": 270397.9375, "learning_rate": 3.376608118428009e-06, "loss": 0.1229, "step": 22550 }, { "epoch": 0.6626329084180227, "grad_norm": 212195.71875, "learning_rate": 3.3736709158197734e-06, "loss": 0.1171, "step": 22560 }, { "epoch": 0.6629266286788462, "grad_norm": 134662.859375, "learning_rate": 3.3707337132115376e-06, "loss": 0.1358, "step": 22570 }, { "epoch": 0.6632203489396699, "grad_norm": 303568.375, "learning_rate": 3.3677965106033018e-06, "loss": 0.1222, "step": 22580 }, { "epoch": 0.6635140692004935, "grad_norm": 378332.5625, "learning_rate": 3.364859307995066e-06, "loss": 0.1418, "step": 22590 }, { "epoch": 0.663807789461317, "grad_norm": 152971.265625, "learning_rate": 3.36192210538683e-06, "loss": 0.1335, "step": 22600 }, { "epoch": 0.6641015097221407, "grad_norm": 178732.640625, "learning_rate": 3.3589849027785936e-06, "loss": 0.1339, "step": 22610 }, { "epoch": 0.6643952299829642, "grad_norm": 184873.0, "learning_rate": 3.3560477001703578e-06, "loss": 0.1272, "step": 22620 }, { "epoch": 0.6646889502437878, "grad_norm": 359615.15625, "learning_rate": 3.353110497562122e-06, "loss": 0.1411, "step": 22630 }, { "epoch": 0.6649826705046115, "grad_norm": 270113.75, "learning_rate": 3.350173294953886e-06, "loss": 0.1277, "step": 22640 }, { "epoch": 0.665276390765435, "grad_norm": 118175.5546875, "learning_rate": 3.3472360923456504e-06, "loss": 0.1358, "step": 22650 }, { "epoch": 0.6655701110262586, "grad_norm": 166060.453125, "learning_rate": 3.3442988897374146e-06, "loss": 0.1193, "step": 22660 }, { "epoch": 0.6658638312870822, "grad_norm": 194796.375, "learning_rate": 3.3413616871291784e-06, "loss": 0.119, "step": 22670 }, { "epoch": 0.6661575515479058, "grad_norm": 129049.9609375, "learning_rate": 3.338424484520942e-06, "loss": 0.1302, "step": 22680 }, { "epoch": 0.6664512718087293, "grad_norm": 182270.65625, "learning_rate": 3.3354872819127064e-06, "loss": 0.1088, "step": 22690 }, { "epoch": 0.666744992069553, "grad_norm": 143190.734375, "learning_rate": 3.3325500793044706e-06, "loss": 0.1316, "step": 22700 }, { "epoch": 0.6670387123303766, "grad_norm": 194144.375, "learning_rate": 3.329612876696235e-06, "loss": 0.1238, "step": 22710 }, { "epoch": 0.6673324325912001, "grad_norm": 139211.515625, "learning_rate": 3.326675674087999e-06, "loss": 0.115, "step": 22720 }, { "epoch": 0.6676261528520238, "grad_norm": 147356.625, "learning_rate": 3.323738471479763e-06, "loss": 0.1257, "step": 22730 }, { "epoch": 0.6679198731128473, "grad_norm": 202991.390625, "learning_rate": 3.320801268871527e-06, "loss": 0.121, "step": 22740 }, { "epoch": 0.6682135933736709, "grad_norm": 129617.96875, "learning_rate": 3.317864066263291e-06, "loss": 0.132, "step": 22750 }, { "epoch": 0.6685073136344946, "grad_norm": 120040.75, "learning_rate": 3.3149268636550554e-06, "loss": 0.1349, "step": 22760 }, { "epoch": 0.6688010338953181, "grad_norm": 322707.53125, "learning_rate": 3.311989661046819e-06, "loss": 0.1216, "step": 22770 }, { "epoch": 0.6690947541561417, "grad_norm": 184062.125, "learning_rate": 3.3090524584385834e-06, "loss": 0.1223, "step": 22780 }, { "epoch": 0.6693884744169653, "grad_norm": 160626.09375, "learning_rate": 3.306115255830347e-06, "loss": 0.1343, "step": 22790 }, { "epoch": 0.6696821946777889, "grad_norm": 139621.78125, "learning_rate": 3.3031780532221114e-06, "loss": 0.12, "step": 22800 }, { "epoch": 0.6699759149386124, "grad_norm": 124742.890625, "learning_rate": 3.3002408506138756e-06, "loss": 0.1262, "step": 22810 }, { "epoch": 0.670269635199436, "grad_norm": 187124.390625, "learning_rate": 3.29730364800564e-06, "loss": 0.1366, "step": 22820 }, { "epoch": 0.6705633554602597, "grad_norm": 244901.453125, "learning_rate": 3.294366445397404e-06, "loss": 0.136, "step": 22830 }, { "epoch": 0.6708570757210832, "grad_norm": 172507.5, "learning_rate": 3.2914292427891682e-06, "loss": 0.1193, "step": 22840 }, { "epoch": 0.6711507959819069, "grad_norm": 137984.03125, "learning_rate": 3.2884920401809316e-06, "loss": 0.1339, "step": 22850 }, { "epoch": 0.6714445162427304, "grad_norm": 165137.28125, "learning_rate": 3.285554837572696e-06, "loss": 0.1201, "step": 22860 }, { "epoch": 0.671738236503554, "grad_norm": 430119.875, "learning_rate": 3.28261763496446e-06, "loss": 0.1376, "step": 22870 }, { "epoch": 0.6720319567643777, "grad_norm": 131288.296875, "learning_rate": 3.2796804323562242e-06, "loss": 0.1256, "step": 22880 }, { "epoch": 0.6723256770252012, "grad_norm": 251740.765625, "learning_rate": 3.2767432297479884e-06, "loss": 0.1137, "step": 22890 }, { "epoch": 0.6726193972860248, "grad_norm": 256072.359375, "learning_rate": 3.2738060271397527e-06, "loss": 0.127, "step": 22900 }, { "epoch": 0.6729131175468483, "grad_norm": 178338.34375, "learning_rate": 3.270868824531516e-06, "loss": 0.1399, "step": 22910 }, { "epoch": 0.673206837807672, "grad_norm": 147900.515625, "learning_rate": 3.2679316219232802e-06, "loss": 0.1204, "step": 22920 }, { "epoch": 0.6735005580684955, "grad_norm": 243357.28125, "learning_rate": 3.2649944193150444e-06, "loss": 0.1174, "step": 22930 }, { "epoch": 0.6737942783293192, "grad_norm": 212985.109375, "learning_rate": 3.2620572167068086e-06, "loss": 0.1271, "step": 22940 }, { "epoch": 0.6740879985901428, "grad_norm": 147234.90625, "learning_rate": 3.259120014098573e-06, "loss": 0.1354, "step": 22950 }, { "epoch": 0.6743817188509663, "grad_norm": 140408.875, "learning_rate": 3.256182811490337e-06, "loss": 0.1288, "step": 22960 }, { "epoch": 0.67467543911179, "grad_norm": 284922.96875, "learning_rate": 3.253245608882101e-06, "loss": 0.123, "step": 22970 }, { "epoch": 0.6749691593726135, "grad_norm": 181407.25, "learning_rate": 3.250308406273865e-06, "loss": 0.1176, "step": 22980 }, { "epoch": 0.6752628796334371, "grad_norm": 111288.1953125, "learning_rate": 3.2473712036656293e-06, "loss": 0.135, "step": 22990 }, { "epoch": 0.6755565998942608, "grad_norm": 124249.546875, "learning_rate": 3.244434001057393e-06, "loss": 0.1329, "step": 23000 }, { "epoch": 0.6758503201550843, "grad_norm": 190688.703125, "learning_rate": 3.2414967984491573e-06, "loss": 0.1288, "step": 23010 }, { "epoch": 0.6761440404159079, "grad_norm": 113874.15625, "learning_rate": 3.2385595958409215e-06, "loss": 0.1213, "step": 23020 }, { "epoch": 0.6764377606767314, "grad_norm": 291767.46875, "learning_rate": 3.2356223932326853e-06, "loss": 0.129, "step": 23030 }, { "epoch": 0.6767314809375551, "grad_norm": 169406.34375, "learning_rate": 3.2326851906244495e-06, "loss": 0.1284, "step": 23040 }, { "epoch": 0.6770252011983786, "grad_norm": 179564.84375, "learning_rate": 3.2297479880162137e-06, "loss": 0.1215, "step": 23050 }, { "epoch": 0.6773189214592023, "grad_norm": 182552.4375, "learning_rate": 3.226810785407978e-06, "loss": 0.1237, "step": 23060 }, { "epoch": 0.6776126417200259, "grad_norm": 252231.28125, "learning_rate": 3.223873582799742e-06, "loss": 0.1291, "step": 23070 }, { "epoch": 0.6779063619808494, "grad_norm": 155154.328125, "learning_rate": 3.2209363801915063e-06, "loss": 0.1266, "step": 23080 }, { "epoch": 0.678200082241673, "grad_norm": 188145.125, "learning_rate": 3.2179991775832697e-06, "loss": 0.1271, "step": 23090 }, { "epoch": 0.6784938025024966, "grad_norm": 233014.75, "learning_rate": 3.215061974975034e-06, "loss": 0.1273, "step": 23100 }, { "epoch": 0.6787875227633202, "grad_norm": 148426.015625, "learning_rate": 3.212124772366798e-06, "loss": 0.1184, "step": 23110 }, { "epoch": 0.6790812430241439, "grad_norm": 149555.625, "learning_rate": 3.2091875697585623e-06, "loss": 0.1247, "step": 23120 }, { "epoch": 0.6793749632849674, "grad_norm": 152986.59375, "learning_rate": 3.2062503671503265e-06, "loss": 0.1302, "step": 23130 }, { "epoch": 0.679668683545791, "grad_norm": 141555.0, "learning_rate": 3.2033131645420907e-06, "loss": 0.1248, "step": 23140 }, { "epoch": 0.6799624038066145, "grad_norm": 196569.5625, "learning_rate": 3.200375961933854e-06, "loss": 0.1257, "step": 23150 }, { "epoch": 0.6802561240674382, "grad_norm": 132318.421875, "learning_rate": 3.1974387593256183e-06, "loss": 0.1313, "step": 23160 }, { "epoch": 0.6805498443282617, "grad_norm": 253923.90625, "learning_rate": 3.1945015567173825e-06, "loss": 0.1244, "step": 23170 }, { "epoch": 0.6808435645890853, "grad_norm": 172882.828125, "learning_rate": 3.1915643541091467e-06, "loss": 0.1292, "step": 23180 }, { "epoch": 0.681137284849909, "grad_norm": 110752.5703125, "learning_rate": 3.188627151500911e-06, "loss": 0.1204, "step": 23190 }, { "epoch": 0.6814310051107325, "grad_norm": 111513.53125, "learning_rate": 3.185689948892675e-06, "loss": 0.1301, "step": 23200 }, { "epoch": 0.6817247253715562, "grad_norm": 189574.3125, "learning_rate": 3.182752746284439e-06, "loss": 0.1227, "step": 23210 }, { "epoch": 0.6820184456323797, "grad_norm": 113356.1796875, "learning_rate": 3.179815543676203e-06, "loss": 0.1183, "step": 23220 }, { "epoch": 0.6823121658932033, "grad_norm": 169834.53125, "learning_rate": 3.1768783410679673e-06, "loss": 0.1297, "step": 23230 }, { "epoch": 0.682605886154027, "grad_norm": 177538.171875, "learning_rate": 3.173941138459731e-06, "loss": 0.1227, "step": 23240 }, { "epoch": 0.6828996064148505, "grad_norm": 191539.96875, "learning_rate": 3.1710039358514953e-06, "loss": 0.1104, "step": 23250 }, { "epoch": 0.6831933266756741, "grad_norm": 251628.9375, "learning_rate": 3.1680667332432595e-06, "loss": 0.1287, "step": 23260 }, { "epoch": 0.6834870469364976, "grad_norm": 161610.5625, "learning_rate": 3.1651295306350233e-06, "loss": 0.1285, "step": 23270 }, { "epoch": 0.6837807671973213, "grad_norm": 136294.234375, "learning_rate": 3.1621923280267875e-06, "loss": 0.1252, "step": 23280 }, { "epoch": 0.6840744874581448, "grad_norm": 289802.09375, "learning_rate": 3.1592551254185517e-06, "loss": 0.1218, "step": 23290 }, { "epoch": 0.6843682077189684, "grad_norm": 195036.859375, "learning_rate": 3.156317922810316e-06, "loss": 0.1341, "step": 23300 }, { "epoch": 0.6846619279797921, "grad_norm": 181082.3125, "learning_rate": 3.15338072020208e-06, "loss": 0.1182, "step": 23310 }, { "epoch": 0.6849556482406156, "grad_norm": 254649.203125, "learning_rate": 3.1504435175938444e-06, "loss": 0.1205, "step": 23320 }, { "epoch": 0.6852493685014392, "grad_norm": 493018.59375, "learning_rate": 3.1475063149856077e-06, "loss": 0.1195, "step": 23330 }, { "epoch": 0.6855430887622628, "grad_norm": 199705.71875, "learning_rate": 3.144569112377372e-06, "loss": 0.1163, "step": 23340 }, { "epoch": 0.6858368090230864, "grad_norm": 194040.390625, "learning_rate": 3.141631909769136e-06, "loss": 0.1223, "step": 23350 }, { "epoch": 0.68613052928391, "grad_norm": 177180.234375, "learning_rate": 3.1386947071609003e-06, "loss": 0.1227, "step": 23360 }, { "epoch": 0.6864242495447336, "grad_norm": 128016.53125, "learning_rate": 3.1357575045526646e-06, "loss": 0.1193, "step": 23370 }, { "epoch": 0.6867179698055572, "grad_norm": 200678.953125, "learning_rate": 3.1328203019444288e-06, "loss": 0.1192, "step": 23380 }, { "epoch": 0.6870116900663807, "grad_norm": 192149.5625, "learning_rate": 3.129883099336192e-06, "loss": 0.1194, "step": 23390 }, { "epoch": 0.6873054103272044, "grad_norm": 204724.53125, "learning_rate": 3.1269458967279563e-06, "loss": 0.1285, "step": 23400 }, { "epoch": 0.6875991305880279, "grad_norm": 212432.578125, "learning_rate": 3.1240086941197205e-06, "loss": 0.1212, "step": 23410 }, { "epoch": 0.6878928508488515, "grad_norm": 219049.71875, "learning_rate": 3.1210714915114848e-06, "loss": 0.1191, "step": 23420 }, { "epoch": 0.6881865711096752, "grad_norm": 144314.140625, "learning_rate": 3.118134288903249e-06, "loss": 0.1305, "step": 23430 }, { "epoch": 0.6884802913704987, "grad_norm": 82240.3671875, "learning_rate": 3.115197086295013e-06, "loss": 0.1221, "step": 23440 }, { "epoch": 0.6887740116313223, "grad_norm": 118152.2109375, "learning_rate": 3.112259883686777e-06, "loss": 0.1223, "step": 23450 }, { "epoch": 0.6890677318921459, "grad_norm": 307595.90625, "learning_rate": 3.109322681078541e-06, "loss": 0.1276, "step": 23460 }, { "epoch": 0.6893614521529695, "grad_norm": 129597.5546875, "learning_rate": 3.106385478470305e-06, "loss": 0.1258, "step": 23470 }, { "epoch": 0.6896551724137931, "grad_norm": 135171.921875, "learning_rate": 3.103448275862069e-06, "loss": 0.1246, "step": 23480 }, { "epoch": 0.6899488926746167, "grad_norm": 191814.03125, "learning_rate": 3.1005110732538334e-06, "loss": 0.1272, "step": 23490 }, { "epoch": 0.6902426129354403, "grad_norm": 182678.84375, "learning_rate": 3.097573870645597e-06, "loss": 0.1232, "step": 23500 }, { "epoch": 0.6905363331962638, "grad_norm": 178368.609375, "learning_rate": 3.0946366680373614e-06, "loss": 0.1223, "step": 23510 }, { "epoch": 0.6908300534570875, "grad_norm": 224704.25, "learning_rate": 3.0916994654291256e-06, "loss": 0.1143, "step": 23520 }, { "epoch": 0.6911237737179111, "grad_norm": 208133.9375, "learning_rate": 3.0887622628208898e-06, "loss": 0.1176, "step": 23530 }, { "epoch": 0.6914174939787346, "grad_norm": 122765.0625, "learning_rate": 3.085825060212654e-06, "loss": 0.1128, "step": 23540 }, { "epoch": 0.6917112142395583, "grad_norm": 83180.671875, "learning_rate": 3.082887857604418e-06, "loss": 0.1007, "step": 23550 }, { "epoch": 0.6920049345003818, "grad_norm": 200645.5625, "learning_rate": 3.0799506549961816e-06, "loss": 0.1276, "step": 23560 }, { "epoch": 0.6922986547612054, "grad_norm": 173914.59375, "learning_rate": 3.0770134523879458e-06, "loss": 0.1197, "step": 23570 }, { "epoch": 0.692592375022029, "grad_norm": 139099.234375, "learning_rate": 3.07407624977971e-06, "loss": 0.1216, "step": 23580 }, { "epoch": 0.6928860952828526, "grad_norm": 217990.53125, "learning_rate": 3.071139047171474e-06, "loss": 0.1271, "step": 23590 }, { "epoch": 0.6931798155436762, "grad_norm": 139766.875, "learning_rate": 3.0682018445632384e-06, "loss": 0.1256, "step": 23600 }, { "epoch": 0.6934735358044998, "grad_norm": 205950.8125, "learning_rate": 3.0652646419550026e-06, "loss": 0.1216, "step": 23610 }, { "epoch": 0.6937672560653234, "grad_norm": 183630.84375, "learning_rate": 3.062327439346766e-06, "loss": 0.1246, "step": 23620 }, { "epoch": 0.6940609763261469, "grad_norm": 245849.359375, "learning_rate": 3.05939023673853e-06, "loss": 0.1286, "step": 23630 }, { "epoch": 0.6943546965869706, "grad_norm": 207429.640625, "learning_rate": 3.0564530341302944e-06, "loss": 0.1249, "step": 23640 }, { "epoch": 0.6946484168477942, "grad_norm": 243279.9375, "learning_rate": 3.0535158315220586e-06, "loss": 0.1335, "step": 23650 }, { "epoch": 0.6949421371086177, "grad_norm": 161678.53125, "learning_rate": 3.050578628913823e-06, "loss": 0.1114, "step": 23660 }, { "epoch": 0.6952358573694414, "grad_norm": 196916.484375, "learning_rate": 3.047641426305587e-06, "loss": 0.1249, "step": 23670 }, { "epoch": 0.6955295776302649, "grad_norm": 137035.78125, "learning_rate": 3.044704223697351e-06, "loss": 0.1224, "step": 23680 }, { "epoch": 0.6958232978910885, "grad_norm": 171629.3125, "learning_rate": 3.041767021089115e-06, "loss": 0.1226, "step": 23690 }, { "epoch": 0.6961170181519121, "grad_norm": 129786.3359375, "learning_rate": 3.038829818480879e-06, "loss": 0.1216, "step": 23700 }, { "epoch": 0.6964107384127357, "grad_norm": 131921.265625, "learning_rate": 3.035892615872643e-06, "loss": 0.1308, "step": 23710 }, { "epoch": 0.6967044586735593, "grad_norm": 216769.1875, "learning_rate": 3.0329554132644072e-06, "loss": 0.1372, "step": 23720 }, { "epoch": 0.6969981789343829, "grad_norm": 225094.265625, "learning_rate": 3.0300182106561714e-06, "loss": 0.106, "step": 23730 }, { "epoch": 0.6972918991952065, "grad_norm": 152619.625, "learning_rate": 3.027081008047935e-06, "loss": 0.1341, "step": 23740 }, { "epoch": 0.69758561945603, "grad_norm": 195728.515625, "learning_rate": 3.0241438054396994e-06, "loss": 0.1247, "step": 23750 }, { "epoch": 0.6978793397168537, "grad_norm": 187530.796875, "learning_rate": 3.0212066028314636e-06, "loss": 0.1156, "step": 23760 }, { "epoch": 0.6981730599776773, "grad_norm": 105534.2265625, "learning_rate": 3.018269400223228e-06, "loss": 0.1228, "step": 23770 }, { "epoch": 0.6984667802385008, "grad_norm": 161200.53125, "learning_rate": 3.015332197614992e-06, "loss": 0.1212, "step": 23780 }, { "epoch": 0.6987605004993245, "grad_norm": 186010.421875, "learning_rate": 3.012394995006756e-06, "loss": 0.1361, "step": 23790 }, { "epoch": 0.699054220760148, "grad_norm": 134779.53125, "learning_rate": 3.0094577923985196e-06, "loss": 0.1307, "step": 23800 }, { "epoch": 0.6993479410209716, "grad_norm": 315874.90625, "learning_rate": 3.006520589790284e-06, "loss": 0.1161, "step": 23810 }, { "epoch": 0.6996416612817952, "grad_norm": 175491.0625, "learning_rate": 3.003583387182048e-06, "loss": 0.1326, "step": 23820 }, { "epoch": 0.6999353815426188, "grad_norm": 131584.578125, "learning_rate": 3.0006461845738122e-06, "loss": 0.1141, "step": 23830 }, { "epoch": 0.7002291018034424, "grad_norm": 149024.09375, "learning_rate": 2.9977089819655765e-06, "loss": 0.1195, "step": 23840 }, { "epoch": 0.700522822064266, "grad_norm": 265987.3125, "learning_rate": 2.9947717793573407e-06, "loss": 0.1375, "step": 23850 }, { "epoch": 0.7008165423250896, "grad_norm": 163986.03125, "learning_rate": 2.991834576749104e-06, "loss": 0.1269, "step": 23860 }, { "epoch": 0.7011102625859131, "grad_norm": 143668.203125, "learning_rate": 2.9888973741408682e-06, "loss": 0.1247, "step": 23870 }, { "epoch": 0.7014039828467368, "grad_norm": 316949.375, "learning_rate": 2.9859601715326324e-06, "loss": 0.1437, "step": 23880 }, { "epoch": 0.7016977031075604, "grad_norm": 162409.4375, "learning_rate": 2.9830229689243967e-06, "loss": 0.1255, "step": 23890 }, { "epoch": 0.7019914233683839, "grad_norm": 496977.71875, "learning_rate": 2.980085766316161e-06, "loss": 0.1219, "step": 23900 }, { "epoch": 0.7022851436292076, "grad_norm": 172880.390625, "learning_rate": 2.977148563707925e-06, "loss": 0.1324, "step": 23910 }, { "epoch": 0.7025788638900311, "grad_norm": 142604.953125, "learning_rate": 2.974211361099689e-06, "loss": 0.1202, "step": 23920 }, { "epoch": 0.7028725841508547, "grad_norm": 143251.421875, "learning_rate": 2.9712741584914526e-06, "loss": 0.1183, "step": 23930 }, { "epoch": 0.7031663044116783, "grad_norm": 115055.21875, "learning_rate": 2.968336955883217e-06, "loss": 0.1314, "step": 23940 }, { "epoch": 0.7034600246725019, "grad_norm": 144167.859375, "learning_rate": 2.965399753274981e-06, "loss": 0.1344, "step": 23950 }, { "epoch": 0.7037537449333255, "grad_norm": 119091.796875, "learning_rate": 2.9624625506667453e-06, "loss": 0.1289, "step": 23960 }, { "epoch": 0.7040474651941491, "grad_norm": 123711.0859375, "learning_rate": 2.9595253480585095e-06, "loss": 0.1243, "step": 23970 }, { "epoch": 0.7043411854549727, "grad_norm": 178743.15625, "learning_rate": 2.9565881454502733e-06, "loss": 0.1347, "step": 23980 }, { "epoch": 0.7046349057157962, "grad_norm": 191809.9375, "learning_rate": 2.9536509428420375e-06, "loss": 0.1159, "step": 23990 }, { "epoch": 0.7049286259766199, "grad_norm": 256818.765625, "learning_rate": 2.9507137402338017e-06, "loss": 0.1327, "step": 24000 }, { "epoch": 0.7052223462374435, "grad_norm": 187993.46875, "learning_rate": 2.947776537625566e-06, "loss": 0.1114, "step": 24010 }, { "epoch": 0.705516066498267, "grad_norm": 190516.203125, "learning_rate": 2.9448393350173297e-06, "loss": 0.1327, "step": 24020 }, { "epoch": 0.7058097867590907, "grad_norm": 193995.3125, "learning_rate": 2.941902132409094e-06, "loss": 0.119, "step": 24030 }, { "epoch": 0.7061035070199142, "grad_norm": 169897.34375, "learning_rate": 2.9389649298008577e-06, "loss": 0.1221, "step": 24040 }, { "epoch": 0.7063972272807378, "grad_norm": 122391.546875, "learning_rate": 2.936027727192622e-06, "loss": 0.1356, "step": 24050 }, { "epoch": 0.7066909475415614, "grad_norm": 162383.796875, "learning_rate": 2.933090524584386e-06, "loss": 0.1307, "step": 24060 }, { "epoch": 0.706984667802385, "grad_norm": 174549.84375, "learning_rate": 2.9301533219761503e-06, "loss": 0.1148, "step": 24070 }, { "epoch": 0.7072783880632086, "grad_norm": 342677.15625, "learning_rate": 2.9272161193679145e-06, "loss": 0.1198, "step": 24080 }, { "epoch": 0.7075721083240322, "grad_norm": 193647.875, "learning_rate": 2.9242789167596787e-06, "loss": 0.1181, "step": 24090 }, { "epoch": 0.7078658285848558, "grad_norm": 142687.40625, "learning_rate": 2.921341714151442e-06, "loss": 0.1228, "step": 24100 }, { "epoch": 0.7081595488456793, "grad_norm": 207669.046875, "learning_rate": 2.9184045115432063e-06, "loss": 0.1096, "step": 24110 }, { "epoch": 0.708453269106503, "grad_norm": 205675.859375, "learning_rate": 2.9154673089349705e-06, "loss": 0.1301, "step": 24120 }, { "epoch": 0.7087469893673266, "grad_norm": 252757.734375, "learning_rate": 2.9125301063267347e-06, "loss": 0.1172, "step": 24130 }, { "epoch": 0.7090407096281501, "grad_norm": 125747.1015625, "learning_rate": 2.909592903718499e-06, "loss": 0.1221, "step": 24140 }, { "epoch": 0.7093344298889738, "grad_norm": 226948.34375, "learning_rate": 2.906655701110263e-06, "loss": 0.117, "step": 24150 }, { "epoch": 0.7096281501497973, "grad_norm": 167790.859375, "learning_rate": 2.9037184985020265e-06, "loss": 0.1181, "step": 24160 }, { "epoch": 0.7099218704106209, "grad_norm": 197507.03125, "learning_rate": 2.9007812958937907e-06, "loss": 0.1277, "step": 24170 }, { "epoch": 0.7102155906714445, "grad_norm": 125973.234375, "learning_rate": 2.897844093285555e-06, "loss": 0.1066, "step": 24180 }, { "epoch": 0.7105093109322681, "grad_norm": 142368.84375, "learning_rate": 2.894906890677319e-06, "loss": 0.1336, "step": 24190 }, { "epoch": 0.7108030311930917, "grad_norm": 161594.0, "learning_rate": 2.8919696880690833e-06, "loss": 0.131, "step": 24200 }, { "epoch": 0.7110967514539153, "grad_norm": 114692.0, "learning_rate": 2.8890324854608475e-06, "loss": 0.1268, "step": 24210 }, { "epoch": 0.7113904717147389, "grad_norm": 113139.96875, "learning_rate": 2.8860952828526113e-06, "loss": 0.1232, "step": 24220 }, { "epoch": 0.7116841919755624, "grad_norm": 187384.828125, "learning_rate": 2.8831580802443755e-06, "loss": 0.1155, "step": 24230 }, { "epoch": 0.7119779122363861, "grad_norm": 109700.40625, "learning_rate": 2.8802208776361397e-06, "loss": 0.1191, "step": 24240 }, { "epoch": 0.7122716324972097, "grad_norm": 197717.765625, "learning_rate": 2.8772836750279035e-06, "loss": 0.102, "step": 24250 }, { "epoch": 0.7125653527580332, "grad_norm": 163935.453125, "learning_rate": 2.8743464724196677e-06, "loss": 0.1299, "step": 24260 }, { "epoch": 0.7128590730188569, "grad_norm": 145076.046875, "learning_rate": 2.871409269811432e-06, "loss": 0.1266, "step": 24270 }, { "epoch": 0.7131527932796804, "grad_norm": 143122.765625, "learning_rate": 2.8684720672031957e-06, "loss": 0.1158, "step": 24280 }, { "epoch": 0.713446513540504, "grad_norm": 158235.59375, "learning_rate": 2.86553486459496e-06, "loss": 0.1156, "step": 24290 }, { "epoch": 0.7137402338013276, "grad_norm": 152984.578125, "learning_rate": 2.862597661986724e-06, "loss": 0.1179, "step": 24300 }, { "epoch": 0.7140339540621512, "grad_norm": 207497.953125, "learning_rate": 2.8596604593784884e-06, "loss": 0.1206, "step": 24310 }, { "epoch": 0.7143276743229748, "grad_norm": 223292.640625, "learning_rate": 2.8567232567702526e-06, "loss": 0.1217, "step": 24320 }, { "epoch": 0.7146213945837984, "grad_norm": 138753.9375, "learning_rate": 2.8537860541620168e-06, "loss": 0.1241, "step": 24330 }, { "epoch": 0.714915114844622, "grad_norm": 142772.28125, "learning_rate": 2.85084885155378e-06, "loss": 0.1201, "step": 24340 }, { "epoch": 0.7152088351054455, "grad_norm": 151796.59375, "learning_rate": 2.8479116489455443e-06, "loss": 0.1207, "step": 24350 }, { "epoch": 0.7155025553662692, "grad_norm": 166451.109375, "learning_rate": 2.8449744463373086e-06, "loss": 0.1189, "step": 24360 }, { "epoch": 0.7157962756270928, "grad_norm": 155269.625, "learning_rate": 2.8420372437290728e-06, "loss": 0.1163, "step": 24370 }, { "epoch": 0.7160899958879163, "grad_norm": 208385.578125, "learning_rate": 2.839100041120837e-06, "loss": 0.1268, "step": 24380 }, { "epoch": 0.71638371614874, "grad_norm": 195035.9375, "learning_rate": 2.836162838512601e-06, "loss": 0.1196, "step": 24390 }, { "epoch": 0.7166774364095635, "grad_norm": 157522.5625, "learning_rate": 2.8332256359043645e-06, "loss": 0.1354, "step": 24400 }, { "epoch": 0.7169711566703871, "grad_norm": 144180.953125, "learning_rate": 2.8302884332961288e-06, "loss": 0.1166, "step": 24410 }, { "epoch": 0.7172648769312107, "grad_norm": 102094.703125, "learning_rate": 2.827351230687893e-06, "loss": 0.1211, "step": 24420 }, { "epoch": 0.7175585971920343, "grad_norm": 133269.796875, "learning_rate": 2.824414028079657e-06, "loss": 0.1203, "step": 24430 }, { "epoch": 0.7178523174528579, "grad_norm": 179035.203125, "learning_rate": 2.8214768254714214e-06, "loss": 0.1182, "step": 24440 }, { "epoch": 0.7181460377136815, "grad_norm": 151602.328125, "learning_rate": 2.8185396228631856e-06, "loss": 0.1258, "step": 24450 }, { "epoch": 0.7184397579745051, "grad_norm": 110294.4921875, "learning_rate": 2.8156024202549494e-06, "loss": 0.1262, "step": 24460 }, { "epoch": 0.7187334782353286, "grad_norm": 287776.78125, "learning_rate": 2.8126652176467136e-06, "loss": 0.128, "step": 24470 }, { "epoch": 0.7190271984961523, "grad_norm": 114059.5859375, "learning_rate": 2.809728015038478e-06, "loss": 0.1336, "step": 24480 }, { "epoch": 0.7193209187569759, "grad_norm": 167837.8125, "learning_rate": 2.8067908124302416e-06, "loss": 0.1182, "step": 24490 }, { "epoch": 0.7196146390177994, "grad_norm": 126604.8046875, "learning_rate": 2.803853609822006e-06, "loss": 0.1309, "step": 24500 }, { "epoch": 0.7199083592786231, "grad_norm": 178925.390625, "learning_rate": 2.8009164072137696e-06, "loss": 0.1099, "step": 24510 }, { "epoch": 0.7202020795394466, "grad_norm": 300823.59375, "learning_rate": 2.7979792046055338e-06, "loss": 0.1044, "step": 24520 }, { "epoch": 0.7204957998002702, "grad_norm": 229313.859375, "learning_rate": 2.795042001997298e-06, "loss": 0.1319, "step": 24530 }, { "epoch": 0.7207895200610939, "grad_norm": 287399.65625, "learning_rate": 2.792104799389062e-06, "loss": 0.112, "step": 24540 }, { "epoch": 0.7210832403219174, "grad_norm": 167578.53125, "learning_rate": 2.7891675967808264e-06, "loss": 0.1234, "step": 24550 }, { "epoch": 0.721376960582741, "grad_norm": 195642.65625, "learning_rate": 2.7862303941725906e-06, "loss": 0.1208, "step": 24560 }, { "epoch": 0.7216706808435646, "grad_norm": 146982.453125, "learning_rate": 2.783293191564354e-06, "loss": 0.1217, "step": 24570 }, { "epoch": 0.7219644011043882, "grad_norm": 246565.46875, "learning_rate": 2.780355988956118e-06, "loss": 0.121, "step": 24580 }, { "epoch": 0.7222581213652117, "grad_norm": 187169.546875, "learning_rate": 2.7774187863478824e-06, "loss": 0.1212, "step": 24590 }, { "epoch": 0.7225518416260354, "grad_norm": 145104.25, "learning_rate": 2.7744815837396466e-06, "loss": 0.1312, "step": 24600 }, { "epoch": 0.722845561886859, "grad_norm": 203803.703125, "learning_rate": 2.771544381131411e-06, "loss": 0.1204, "step": 24610 }, { "epoch": 0.7231392821476825, "grad_norm": 149926.4375, "learning_rate": 2.768607178523175e-06, "loss": 0.1189, "step": 24620 }, { "epoch": 0.7234330024085062, "grad_norm": 162387.890625, "learning_rate": 2.7656699759149384e-06, "loss": 0.1233, "step": 24630 }, { "epoch": 0.7237267226693297, "grad_norm": 154282.4375, "learning_rate": 2.7627327733067026e-06, "loss": 0.1232, "step": 24640 }, { "epoch": 0.7240204429301533, "grad_norm": 217680.640625, "learning_rate": 2.759795570698467e-06, "loss": 0.1177, "step": 24650 }, { "epoch": 0.724314163190977, "grad_norm": 134617.8125, "learning_rate": 2.756858368090231e-06, "loss": 0.1305, "step": 24660 }, { "epoch": 0.7246078834518005, "grad_norm": 138280.5, "learning_rate": 2.7539211654819952e-06, "loss": 0.117, "step": 24670 }, { "epoch": 0.7249016037126241, "grad_norm": 271527.5, "learning_rate": 2.7509839628737594e-06, "loss": 0.1202, "step": 24680 }, { "epoch": 0.7251953239734477, "grad_norm": 115356.8828125, "learning_rate": 2.7480467602655232e-06, "loss": 0.1205, "step": 24690 }, { "epoch": 0.7254890442342713, "grad_norm": 134128.296875, "learning_rate": 2.7451095576572874e-06, "loss": 0.1285, "step": 24700 }, { "epoch": 0.7257827644950948, "grad_norm": 305759.46875, "learning_rate": 2.7421723550490516e-06, "loss": 0.1257, "step": 24710 }, { "epoch": 0.7260764847559185, "grad_norm": 127932.9921875, "learning_rate": 2.7392351524408154e-06, "loss": 0.1196, "step": 24720 }, { "epoch": 0.7263702050167421, "grad_norm": 114992.125, "learning_rate": 2.7362979498325796e-06, "loss": 0.1194, "step": 24730 }, { "epoch": 0.7266639252775656, "grad_norm": 216979.09375, "learning_rate": 2.733360747224344e-06, "loss": 0.1051, "step": 24740 }, { "epoch": 0.7269576455383893, "grad_norm": 355554.5, "learning_rate": 2.7304235446161076e-06, "loss": 0.1318, "step": 24750 }, { "epoch": 0.7272513657992128, "grad_norm": 205115.46875, "learning_rate": 2.727486342007872e-06, "loss": 0.133, "step": 24760 }, { "epoch": 0.7275450860600364, "grad_norm": 194160.15625, "learning_rate": 2.724549139399636e-06, "loss": 0.1189, "step": 24770 }, { "epoch": 0.7278388063208601, "grad_norm": 169111.09375, "learning_rate": 2.7216119367914003e-06, "loss": 0.1101, "step": 24780 }, { "epoch": 0.7281325265816836, "grad_norm": 170897.671875, "learning_rate": 2.7186747341831645e-06, "loss": 0.1129, "step": 24790 }, { "epoch": 0.7284262468425072, "grad_norm": 279027.25, "learning_rate": 2.7157375315749287e-06, "loss": 0.1256, "step": 24800 }, { "epoch": 0.7287199671033308, "grad_norm": 142723.71875, "learning_rate": 2.712800328966692e-06, "loss": 0.1238, "step": 24810 }, { "epoch": 0.7290136873641544, "grad_norm": 118820.4765625, "learning_rate": 2.7098631263584562e-06, "loss": 0.1177, "step": 24820 }, { "epoch": 0.7293074076249779, "grad_norm": 344553.65625, "learning_rate": 2.7069259237502205e-06, "loss": 0.1295, "step": 24830 }, { "epoch": 0.7296011278858016, "grad_norm": 96117.4609375, "learning_rate": 2.7039887211419847e-06, "loss": 0.1183, "step": 24840 }, { "epoch": 0.7298948481466252, "grad_norm": 156803.890625, "learning_rate": 2.701051518533749e-06, "loss": 0.1117, "step": 24850 }, { "epoch": 0.7301885684074487, "grad_norm": 166847.234375, "learning_rate": 2.698114315925513e-06, "loss": 0.1108, "step": 24860 }, { "epoch": 0.7304822886682724, "grad_norm": 214024.546875, "learning_rate": 2.6951771133172764e-06, "loss": 0.1356, "step": 24870 }, { "epoch": 0.7307760089290959, "grad_norm": 176908.15625, "learning_rate": 2.6922399107090407e-06, "loss": 0.1253, "step": 24880 }, { "epoch": 0.7310697291899195, "grad_norm": 128578.65625, "learning_rate": 2.689302708100805e-06, "loss": 0.1247, "step": 24890 }, { "epoch": 0.7313634494507432, "grad_norm": 192330.328125, "learning_rate": 2.686365505492569e-06, "loss": 0.1202, "step": 24900 }, { "epoch": 0.7316571697115667, "grad_norm": 124802.1171875, "learning_rate": 2.6834283028843333e-06, "loss": 0.1234, "step": 24910 }, { "epoch": 0.7319508899723903, "grad_norm": 155451.375, "learning_rate": 2.6804911002760975e-06, "loss": 0.1295, "step": 24920 }, { "epoch": 0.7322446102332139, "grad_norm": 246621.046875, "learning_rate": 2.6775538976678613e-06, "loss": 0.1061, "step": 24930 }, { "epoch": 0.7325383304940375, "grad_norm": 410684.25, "learning_rate": 2.6746166950596255e-06, "loss": 0.1262, "step": 24940 }, { "epoch": 0.732832050754861, "grad_norm": 228199.859375, "learning_rate": 2.6716794924513893e-06, "loss": 0.1149, "step": 24950 }, { "epoch": 0.7331257710156847, "grad_norm": 184926.5, "learning_rate": 2.6687422898431535e-06, "loss": 0.1208, "step": 24960 }, { "epoch": 0.7334194912765083, "grad_norm": 182161.515625, "learning_rate": 2.6658050872349177e-06, "loss": 0.1314, "step": 24970 }, { "epoch": 0.7337132115373318, "grad_norm": 192233.21875, "learning_rate": 2.662867884626682e-06, "loss": 0.1204, "step": 24980 }, { "epoch": 0.7340069317981555, "grad_norm": 186681.921875, "learning_rate": 2.6599306820184457e-06, "loss": 0.1128, "step": 24990 }, { "epoch": 0.734300652058979, "grad_norm": 120924.125, "learning_rate": 2.65699347941021e-06, "loss": 0.1171, "step": 25000 }, { "epoch": 0.7345943723198026, "grad_norm": 117526.0390625, "learning_rate": 2.654056276801974e-06, "loss": 0.1135, "step": 25010 }, { "epoch": 0.7348880925806263, "grad_norm": 94019.0625, "learning_rate": 2.6511190741937383e-06, "loss": 0.1173, "step": 25020 }, { "epoch": 0.7351818128414498, "grad_norm": 160830.609375, "learning_rate": 2.6481818715855025e-06, "loss": 0.1317, "step": 25030 }, { "epoch": 0.7354755331022734, "grad_norm": 127589.078125, "learning_rate": 2.6452446689772663e-06, "loss": 0.116, "step": 25040 }, { "epoch": 0.735769253363097, "grad_norm": 221977.796875, "learning_rate": 2.64230746636903e-06, "loss": 0.1218, "step": 25050 }, { "epoch": 0.7360629736239206, "grad_norm": 167554.96875, "learning_rate": 2.6393702637607943e-06, "loss": 0.1071, "step": 25060 }, { "epoch": 0.7363566938847441, "grad_norm": 110573.5, "learning_rate": 2.6364330611525585e-06, "loss": 0.1271, "step": 25070 }, { "epoch": 0.7366504141455678, "grad_norm": 240960.515625, "learning_rate": 2.6334958585443227e-06, "loss": 0.1286, "step": 25080 }, { "epoch": 0.7369441344063914, "grad_norm": 174359.625, "learning_rate": 2.630558655936087e-06, "loss": 0.1197, "step": 25090 }, { "epoch": 0.7372378546672149, "grad_norm": 124844.921875, "learning_rate": 2.627621453327851e-06, "loss": 0.1208, "step": 25100 }, { "epoch": 0.7375315749280386, "grad_norm": 269268.4375, "learning_rate": 2.6246842507196145e-06, "loss": 0.1153, "step": 25110 }, { "epoch": 0.7378252951888621, "grad_norm": 211259.890625, "learning_rate": 2.6217470481113787e-06, "loss": 0.1343, "step": 25120 }, { "epoch": 0.7381190154496857, "grad_norm": 227640.59375, "learning_rate": 2.618809845503143e-06, "loss": 0.1182, "step": 25130 }, { "epoch": 0.7384127357105094, "grad_norm": 146829.859375, "learning_rate": 2.615872642894907e-06, "loss": 0.1208, "step": 25140 }, { "epoch": 0.7387064559713329, "grad_norm": 281092.8125, "learning_rate": 2.6129354402866713e-06, "loss": 0.1157, "step": 25150 }, { "epoch": 0.7390001762321565, "grad_norm": 230210.640625, "learning_rate": 2.6099982376784355e-06, "loss": 0.1245, "step": 25160 }, { "epoch": 0.73929389649298, "grad_norm": 117088.171875, "learning_rate": 2.6070610350701993e-06, "loss": 0.1201, "step": 25170 }, { "epoch": 0.7395876167538037, "grad_norm": 171895.671875, "learning_rate": 2.604123832461963e-06, "loss": 0.1305, "step": 25180 }, { "epoch": 0.7398813370146272, "grad_norm": 152405.0625, "learning_rate": 2.6011866298537273e-06, "loss": 0.1129, "step": 25190 }, { "epoch": 0.7401750572754509, "grad_norm": 284435.5625, "learning_rate": 2.5982494272454915e-06, "loss": 0.1102, "step": 25200 }, { "epoch": 0.7404687775362745, "grad_norm": 335025.34375, "learning_rate": 2.5953122246372557e-06, "loss": 0.1213, "step": 25210 }, { "epoch": 0.740762497797098, "grad_norm": 157936.046875, "learning_rate": 2.59237502202902e-06, "loss": 0.1152, "step": 25220 }, { "epoch": 0.7410562180579217, "grad_norm": 138446.984375, "learning_rate": 2.5894378194207837e-06, "loss": 0.1157, "step": 25230 }, { "epoch": 0.7413499383187452, "grad_norm": 184848.5, "learning_rate": 2.586500616812548e-06, "loss": 0.1189, "step": 25240 }, { "epoch": 0.7416436585795688, "grad_norm": 152943.796875, "learning_rate": 2.583563414204312e-06, "loss": 0.1162, "step": 25250 }, { "epoch": 0.7419373788403925, "grad_norm": 145209.78125, "learning_rate": 2.5806262115960764e-06, "loss": 0.1166, "step": 25260 }, { "epoch": 0.742231099101216, "grad_norm": 179295.796875, "learning_rate": 2.57768900898784e-06, "loss": 0.1249, "step": 25270 }, { "epoch": 0.7425248193620396, "grad_norm": 139434.0, "learning_rate": 2.5747518063796044e-06, "loss": 0.1289, "step": 25280 }, { "epoch": 0.7428185396228631, "grad_norm": 184712.78125, "learning_rate": 2.571814603771368e-06, "loss": 0.1174, "step": 25290 }, { "epoch": 0.7431122598836868, "grad_norm": 228575.3125, "learning_rate": 2.5688774011631324e-06, "loss": 0.1093, "step": 25300 }, { "epoch": 0.7434059801445103, "grad_norm": 164864.578125, "learning_rate": 2.5659401985548966e-06, "loss": 0.1161, "step": 25310 }, { "epoch": 0.743699700405334, "grad_norm": 149956.875, "learning_rate": 2.5630029959466608e-06, "loss": 0.1136, "step": 25320 }, { "epoch": 0.7439934206661576, "grad_norm": 100769.7109375, "learning_rate": 2.560065793338425e-06, "loss": 0.1226, "step": 25330 }, { "epoch": 0.7442871409269811, "grad_norm": 177496.625, "learning_rate": 2.557128590730189e-06, "loss": 0.1202, "step": 25340 }, { "epoch": 0.7445808611878048, "grad_norm": 244941.90625, "learning_rate": 2.5541913881219526e-06, "loss": 0.12, "step": 25350 }, { "epoch": 0.7448745814486283, "grad_norm": 246567.671875, "learning_rate": 2.5512541855137168e-06, "loss": 0.1178, "step": 25360 }, { "epoch": 0.7451683017094519, "grad_norm": 296481.125, "learning_rate": 2.548316982905481e-06, "loss": 0.1263, "step": 25370 }, { "epoch": 0.7454620219702756, "grad_norm": 118981.4140625, "learning_rate": 2.545379780297245e-06, "loss": 0.1109, "step": 25380 }, { "epoch": 0.7457557422310991, "grad_norm": 188260.703125, "learning_rate": 2.5424425776890094e-06, "loss": 0.1195, "step": 25390 }, { "epoch": 0.7460494624919227, "grad_norm": 199914.125, "learning_rate": 2.5395053750807736e-06, "loss": 0.1178, "step": 25400 }, { "epoch": 0.7463431827527462, "grad_norm": 149023.453125, "learning_rate": 2.536568172472537e-06, "loss": 0.1189, "step": 25410 }, { "epoch": 0.7466369030135699, "grad_norm": 625422.75, "learning_rate": 2.533630969864301e-06, "loss": 0.1044, "step": 25420 }, { "epoch": 0.7469306232743934, "grad_norm": 148014.828125, "learning_rate": 2.5306937672560654e-06, "loss": 0.1135, "step": 25430 }, { "epoch": 0.747224343535217, "grad_norm": 191885.28125, "learning_rate": 2.5277565646478296e-06, "loss": 0.1188, "step": 25440 }, { "epoch": 0.7475180637960407, "grad_norm": 130971.828125, "learning_rate": 2.524819362039594e-06, "loss": 0.1296, "step": 25450 }, { "epoch": 0.7478117840568642, "grad_norm": 169436.265625, "learning_rate": 2.521882159431358e-06, "loss": 0.1211, "step": 25460 }, { "epoch": 0.7481055043176879, "grad_norm": 138491.0625, "learning_rate": 2.518944956823122e-06, "loss": 0.1109, "step": 25470 }, { "epoch": 0.7483992245785114, "grad_norm": 185888.71875, "learning_rate": 2.516007754214886e-06, "loss": 0.1246, "step": 25480 }, { "epoch": 0.748692944839335, "grad_norm": 171212.890625, "learning_rate": 2.5130705516066502e-06, "loss": 0.1229, "step": 25490 }, { "epoch": 0.7489866651001587, "grad_norm": 287351.1875, "learning_rate": 2.510133348998414e-06, "loss": 0.1199, "step": 25500 }, { "epoch": 0.7492803853609822, "grad_norm": 135918.90625, "learning_rate": 2.507196146390178e-06, "loss": 0.1193, "step": 25510 }, { "epoch": 0.7495741056218058, "grad_norm": 166727.890625, "learning_rate": 2.504258943781942e-06, "loss": 0.1172, "step": 25520 }, { "epoch": 0.7498678258826293, "grad_norm": 326288.96875, "learning_rate": 2.501321741173706e-06, "loss": 0.1201, "step": 25530 }, { "epoch": 0.750161546143453, "grad_norm": 206013.765625, "learning_rate": 2.4983845385654704e-06, "loss": 0.1188, "step": 25540 }, { "epoch": 0.7504552664042766, "grad_norm": 474280.1875, "learning_rate": 2.4954473359572346e-06, "loss": 0.1243, "step": 25550 }, { "epoch": 0.7507489866651001, "grad_norm": 170151.5625, "learning_rate": 2.492510133348999e-06, "loss": 0.1175, "step": 25560 }, { "epoch": 0.7510427069259238, "grad_norm": 129460.4609375, "learning_rate": 2.4895729307407626e-06, "loss": 0.1245, "step": 25570 }, { "epoch": 0.7513364271867473, "grad_norm": 263831.6875, "learning_rate": 2.486635728132527e-06, "loss": 0.1238, "step": 25580 }, { "epoch": 0.751630147447571, "grad_norm": 269447.15625, "learning_rate": 2.483698525524291e-06, "loss": 0.1284, "step": 25590 }, { "epoch": 0.7519238677083945, "grad_norm": 340302.71875, "learning_rate": 2.480761322916055e-06, "loss": 0.1259, "step": 25600 }, { "epoch": 0.7522175879692181, "grad_norm": 169364.59375, "learning_rate": 2.477824120307819e-06, "loss": 0.1185, "step": 25610 }, { "epoch": 0.7525113082300418, "grad_norm": 176307.015625, "learning_rate": 2.4748869176995832e-06, "loss": 0.1197, "step": 25620 }, { "epoch": 0.7528050284908653, "grad_norm": 228709.6875, "learning_rate": 2.471949715091347e-06, "loss": 0.1015, "step": 25630 }, { "epoch": 0.7530987487516889, "grad_norm": 266566.6875, "learning_rate": 2.4690125124831112e-06, "loss": 0.1132, "step": 25640 }, { "epoch": 0.7533924690125124, "grad_norm": 132856.25, "learning_rate": 2.4660753098748754e-06, "loss": 0.1153, "step": 25650 }, { "epoch": 0.7536861892733361, "grad_norm": 183502.15625, "learning_rate": 2.4631381072666392e-06, "loss": 0.1311, "step": 25660 }, { "epoch": 0.7539799095341597, "grad_norm": 229616.796875, "learning_rate": 2.4602009046584034e-06, "loss": 0.1245, "step": 25670 }, { "epoch": 0.7542736297949832, "grad_norm": 165611.4375, "learning_rate": 2.4572637020501676e-06, "loss": 0.1272, "step": 25680 }, { "epoch": 0.7545673500558069, "grad_norm": 139136.703125, "learning_rate": 2.4543264994419314e-06, "loss": 0.1212, "step": 25690 }, { "epoch": 0.7548610703166304, "grad_norm": 161449.78125, "learning_rate": 2.4513892968336956e-06, "loss": 0.1175, "step": 25700 }, { "epoch": 0.755154790577454, "grad_norm": 203603.0625, "learning_rate": 2.44845209422546e-06, "loss": 0.1212, "step": 25710 }, { "epoch": 0.7554485108382776, "grad_norm": 139211.65625, "learning_rate": 2.445514891617224e-06, "loss": 0.1201, "step": 25720 }, { "epoch": 0.7557422310991012, "grad_norm": 128458.390625, "learning_rate": 2.4425776890089883e-06, "loss": 0.114, "step": 25730 }, { "epoch": 0.7560359513599249, "grad_norm": 162019.6875, "learning_rate": 2.439640486400752e-06, "loss": 0.1168, "step": 25740 }, { "epoch": 0.7563296716207484, "grad_norm": 167393.703125, "learning_rate": 2.4367032837925163e-06, "loss": 0.1202, "step": 25750 }, { "epoch": 0.756623391881572, "grad_norm": 178086.828125, "learning_rate": 2.4337660811842805e-06, "loss": 0.1253, "step": 25760 }, { "epoch": 0.7569171121423955, "grad_norm": 151353.40625, "learning_rate": 2.4308288785760447e-06, "loss": 0.1213, "step": 25770 }, { "epoch": 0.7572108324032192, "grad_norm": 155261.3125, "learning_rate": 2.4278916759678085e-06, "loss": 0.1252, "step": 25780 }, { "epoch": 0.7575045526640428, "grad_norm": 199817.5, "learning_rate": 2.4249544733595727e-06, "loss": 0.1221, "step": 25790 }, { "epoch": 0.7577982729248663, "grad_norm": 183704.171875, "learning_rate": 2.422017270751337e-06, "loss": 0.1248, "step": 25800 }, { "epoch": 0.75809199318569, "grad_norm": 168149.578125, "learning_rate": 2.4190800681431007e-06, "loss": 0.1197, "step": 25810 }, { "epoch": 0.7583857134465135, "grad_norm": 148806.59375, "learning_rate": 2.416142865534865e-06, "loss": 0.1145, "step": 25820 }, { "epoch": 0.7586794337073371, "grad_norm": 161965.546875, "learning_rate": 2.413205662926629e-06, "loss": 0.1207, "step": 25830 }, { "epoch": 0.7589731539681607, "grad_norm": 231353.046875, "learning_rate": 2.410268460318393e-06, "loss": 0.124, "step": 25840 }, { "epoch": 0.7592668742289843, "grad_norm": 232845.484375, "learning_rate": 2.407331257710157e-06, "loss": 0.1139, "step": 25850 }, { "epoch": 0.759560594489808, "grad_norm": 135321.515625, "learning_rate": 2.4043940551019213e-06, "loss": 0.1259, "step": 25860 }, { "epoch": 0.7598543147506315, "grad_norm": 193068.359375, "learning_rate": 2.401456852493685e-06, "loss": 0.1179, "step": 25870 }, { "epoch": 0.7601480350114551, "grad_norm": 302036.15625, "learning_rate": 2.3985196498854493e-06, "loss": 0.1075, "step": 25880 }, { "epoch": 0.7604417552722786, "grad_norm": 127785.2578125, "learning_rate": 2.3955824472772135e-06, "loss": 0.1082, "step": 25890 }, { "epoch": 0.7607354755331023, "grad_norm": 190778.15625, "learning_rate": 2.3926452446689773e-06, "loss": 0.125, "step": 25900 }, { "epoch": 0.7610291957939259, "grad_norm": 224608.609375, "learning_rate": 2.3897080420607415e-06, "loss": 0.1247, "step": 25910 }, { "epoch": 0.7613229160547494, "grad_norm": 209608.828125, "learning_rate": 2.3867708394525057e-06, "loss": 0.1247, "step": 25920 }, { "epoch": 0.7616166363155731, "grad_norm": 188213.828125, "learning_rate": 2.3838336368442695e-06, "loss": 0.1072, "step": 25930 }, { "epoch": 0.7619103565763966, "grad_norm": 149043.875, "learning_rate": 2.3808964342360337e-06, "loss": 0.1053, "step": 25940 }, { "epoch": 0.7622040768372202, "grad_norm": 266837.625, "learning_rate": 2.377959231627798e-06, "loss": 0.1084, "step": 25950 }, { "epoch": 0.7624977970980438, "grad_norm": 162924.109375, "learning_rate": 2.375022029019562e-06, "loss": 0.1249, "step": 25960 }, { "epoch": 0.7627915173588674, "grad_norm": 237460.1875, "learning_rate": 2.372084826411326e-06, "loss": 0.1141, "step": 25970 }, { "epoch": 0.763085237619691, "grad_norm": 155155.515625, "learning_rate": 2.36914762380309e-06, "loss": 0.12, "step": 25980 }, { "epoch": 0.7633789578805146, "grad_norm": 145958.09375, "learning_rate": 2.3662104211948543e-06, "loss": 0.1198, "step": 25990 }, { "epoch": 0.7636726781413382, "grad_norm": 202063.9375, "learning_rate": 2.3632732185866185e-06, "loss": 0.1135, "step": 26000 }, { "epoch": 0.7639663984021617, "grad_norm": 257020.546875, "learning_rate": 2.3603360159783823e-06, "loss": 0.1204, "step": 26010 }, { "epoch": 0.7642601186629854, "grad_norm": 243660.140625, "learning_rate": 2.3573988133701465e-06, "loss": 0.1265, "step": 26020 }, { "epoch": 0.764553838923809, "grad_norm": 115389.1484375, "learning_rate": 2.3544616107619107e-06, "loss": 0.1067, "step": 26030 }, { "epoch": 0.7648475591846325, "grad_norm": 165378.25, "learning_rate": 2.3515244081536745e-06, "loss": 0.106, "step": 26040 }, { "epoch": 0.7651412794454562, "grad_norm": 206303.109375, "learning_rate": 2.3485872055454387e-06, "loss": 0.1184, "step": 26050 }, { "epoch": 0.7654349997062797, "grad_norm": 277581.96875, "learning_rate": 2.345650002937203e-06, "loss": 0.1145, "step": 26060 }, { "epoch": 0.7657287199671033, "grad_norm": 195396.21875, "learning_rate": 2.3427128003289667e-06, "loss": 0.1118, "step": 26070 }, { "epoch": 0.7660224402279269, "grad_norm": 205071.59375, "learning_rate": 2.339775597720731e-06, "loss": 0.114, "step": 26080 }, { "epoch": 0.7663161604887505, "grad_norm": 198328.78125, "learning_rate": 2.336838395112495e-06, "loss": 0.1271, "step": 26090 }, { "epoch": 0.7666098807495741, "grad_norm": 122764.8671875, "learning_rate": 2.333901192504259e-06, "loss": 0.1172, "step": 26100 }, { "epoch": 0.7669036010103977, "grad_norm": 132897.125, "learning_rate": 2.330963989896023e-06, "loss": 0.1175, "step": 26110 }, { "epoch": 0.7671973212712213, "grad_norm": 155887.375, "learning_rate": 2.3280267872877873e-06, "loss": 0.129, "step": 26120 }, { "epoch": 0.7674910415320448, "grad_norm": 181487.1875, "learning_rate": 2.325089584679551e-06, "loss": 0.1193, "step": 26130 }, { "epoch": 0.7677847617928685, "grad_norm": 201728.875, "learning_rate": 2.3221523820713153e-06, "loss": 0.1194, "step": 26140 }, { "epoch": 0.7680784820536921, "grad_norm": 179552.25, "learning_rate": 2.3192151794630795e-06, "loss": 0.13, "step": 26150 }, { "epoch": 0.7683722023145156, "grad_norm": 165075.328125, "learning_rate": 2.3162779768548433e-06, "loss": 0.1069, "step": 26160 }, { "epoch": 0.7686659225753393, "grad_norm": 153044.703125, "learning_rate": 2.3133407742466075e-06, "loss": 0.1097, "step": 26170 }, { "epoch": 0.7689596428361628, "grad_norm": 263906.5, "learning_rate": 2.3104035716383718e-06, "loss": 0.1198, "step": 26180 }, { "epoch": 0.7692533630969864, "grad_norm": 169107.28125, "learning_rate": 2.307466369030136e-06, "loss": 0.1112, "step": 26190 }, { "epoch": 0.76954708335781, "grad_norm": 105174.4609375, "learning_rate": 2.3045291664218997e-06, "loss": 0.1085, "step": 26200 }, { "epoch": 0.7698408036186336, "grad_norm": 194425.78125, "learning_rate": 2.301591963813664e-06, "loss": 0.1233, "step": 26210 }, { "epoch": 0.7701345238794572, "grad_norm": 142723.046875, "learning_rate": 2.298654761205428e-06, "loss": 0.1191, "step": 26220 }, { "epoch": 0.7704282441402808, "grad_norm": 188108.1875, "learning_rate": 2.2957175585971924e-06, "loss": 0.1158, "step": 26230 }, { "epoch": 0.7707219644011044, "grad_norm": 194522.078125, "learning_rate": 2.2927803559889566e-06, "loss": 0.1208, "step": 26240 }, { "epoch": 0.7710156846619279, "grad_norm": 214518.25, "learning_rate": 2.2898431533807204e-06, "loss": 0.1251, "step": 26250 }, { "epoch": 0.7713094049227516, "grad_norm": 151408.515625, "learning_rate": 2.2869059507724846e-06, "loss": 0.126, "step": 26260 }, { "epoch": 0.7716031251835752, "grad_norm": 134856.375, "learning_rate": 2.283968748164249e-06, "loss": 0.1168, "step": 26270 }, { "epoch": 0.7718968454443987, "grad_norm": 216513.875, "learning_rate": 2.2810315455560126e-06, "loss": 0.119, "step": 26280 }, { "epoch": 0.7721905657052224, "grad_norm": 169240.359375, "learning_rate": 2.2780943429477768e-06, "loss": 0.1181, "step": 26290 }, { "epoch": 0.7724842859660459, "grad_norm": 236674.703125, "learning_rate": 2.275157140339541e-06, "loss": 0.1266, "step": 26300 }, { "epoch": 0.7727780062268695, "grad_norm": 208063.046875, "learning_rate": 2.2722199377313048e-06, "loss": 0.1227, "step": 26310 }, { "epoch": 0.7730717264876931, "grad_norm": 179248.078125, "learning_rate": 2.269282735123069e-06, "loss": 0.1168, "step": 26320 }, { "epoch": 0.7733654467485167, "grad_norm": 164122.6875, "learning_rate": 2.266345532514833e-06, "loss": 0.1178, "step": 26330 }, { "epoch": 0.7736591670093403, "grad_norm": 207180.203125, "learning_rate": 2.263408329906597e-06, "loss": 0.1115, "step": 26340 }, { "epoch": 0.7739528872701639, "grad_norm": 264635.40625, "learning_rate": 2.260471127298361e-06, "loss": 0.1156, "step": 26350 }, { "epoch": 0.7742466075309875, "grad_norm": 364145.0, "learning_rate": 2.2575339246901254e-06, "loss": 0.1232, "step": 26360 }, { "epoch": 0.774540327791811, "grad_norm": 148215.15625, "learning_rate": 2.254596722081889e-06, "loss": 0.1124, "step": 26370 }, { "epoch": 0.7748340480526347, "grad_norm": 161168.140625, "learning_rate": 2.2516595194736534e-06, "loss": 0.117, "step": 26380 }, { "epoch": 0.7751277683134583, "grad_norm": 181772.78125, "learning_rate": 2.2487223168654176e-06, "loss": 0.1207, "step": 26390 }, { "epoch": 0.7754214885742818, "grad_norm": 175143.046875, "learning_rate": 2.2457851142571814e-06, "loss": 0.1209, "step": 26400 }, { "epoch": 0.7757152088351055, "grad_norm": 113961.5, "learning_rate": 2.2428479116489456e-06, "loss": 0.1185, "step": 26410 }, { "epoch": 0.776008929095929, "grad_norm": 145810.296875, "learning_rate": 2.23991070904071e-06, "loss": 0.1139, "step": 26420 }, { "epoch": 0.7763026493567526, "grad_norm": 203291.328125, "learning_rate": 2.2369735064324736e-06, "loss": 0.1146, "step": 26430 }, { "epoch": 0.7765963696175762, "grad_norm": 251485.9375, "learning_rate": 2.234036303824238e-06, "loss": 0.1193, "step": 26440 }, { "epoch": 0.7768900898783998, "grad_norm": 262067.4375, "learning_rate": 2.231099101216002e-06, "loss": 0.1081, "step": 26450 }, { "epoch": 0.7771838101392234, "grad_norm": 332108.625, "learning_rate": 2.2281618986077662e-06, "loss": 0.1249, "step": 26460 }, { "epoch": 0.777477530400047, "grad_norm": 197788.328125, "learning_rate": 2.2252246959995304e-06, "loss": 0.1201, "step": 26470 }, { "epoch": 0.7777712506608706, "grad_norm": 152662.375, "learning_rate": 2.2222874933912942e-06, "loss": 0.1277, "step": 26480 }, { "epoch": 0.7780649709216941, "grad_norm": 162678.671875, "learning_rate": 2.2193502907830584e-06, "loss": 0.1078, "step": 26490 }, { "epoch": 0.7783586911825178, "grad_norm": 220589.6875, "learning_rate": 2.2164130881748226e-06, "loss": 0.1098, "step": 26500 }, { "epoch": 0.7786524114433414, "grad_norm": 184769.140625, "learning_rate": 2.213475885566587e-06, "loss": 0.1133, "step": 26510 }, { "epoch": 0.7789461317041649, "grad_norm": 265959.40625, "learning_rate": 2.2105386829583506e-06, "loss": 0.115, "step": 26520 }, { "epoch": 0.7792398519649886, "grad_norm": 404105.84375, "learning_rate": 2.207601480350115e-06, "loss": 0.1261, "step": 26530 }, { "epoch": 0.7795335722258121, "grad_norm": 213968.0625, "learning_rate": 2.204664277741879e-06, "loss": 0.1335, "step": 26540 }, { "epoch": 0.7798272924866357, "grad_norm": 143235.421875, "learning_rate": 2.201727075133643e-06, "loss": 0.1115, "step": 26550 }, { "epoch": 0.7801210127474594, "grad_norm": 165685.703125, "learning_rate": 2.198789872525407e-06, "loss": 0.1001, "step": 26560 }, { "epoch": 0.7804147330082829, "grad_norm": 162693.984375, "learning_rate": 2.1958526699171713e-06, "loss": 0.1225, "step": 26570 }, { "epoch": 0.7807084532691065, "grad_norm": 169003.109375, "learning_rate": 2.192915467308935e-06, "loss": 0.1196, "step": 26580 }, { "epoch": 0.7810021735299301, "grad_norm": 148495.9375, "learning_rate": 2.1899782647006992e-06, "loss": 0.1129, "step": 26590 }, { "epoch": 0.7812958937907537, "grad_norm": 164002.1875, "learning_rate": 2.1870410620924635e-06, "loss": 0.1092, "step": 26600 }, { "epoch": 0.7815896140515772, "grad_norm": 169507.640625, "learning_rate": 2.1841038594842272e-06, "loss": 0.1135, "step": 26610 }, { "epoch": 0.7818833343124009, "grad_norm": 182172.65625, "learning_rate": 2.1811666568759915e-06, "loss": 0.1153, "step": 26620 }, { "epoch": 0.7821770545732245, "grad_norm": 208195.0625, "learning_rate": 2.1782294542677557e-06, "loss": 0.1158, "step": 26630 }, { "epoch": 0.782470774834048, "grad_norm": 194285.84375, "learning_rate": 2.1752922516595194e-06, "loss": 0.1312, "step": 26640 }, { "epoch": 0.7827644950948717, "grad_norm": 275925.1875, "learning_rate": 2.1723550490512837e-06, "loss": 0.1192, "step": 26650 }, { "epoch": 0.7830582153556952, "grad_norm": 230753.734375, "learning_rate": 2.169417846443048e-06, "loss": 0.1192, "step": 26660 }, { "epoch": 0.7833519356165188, "grad_norm": 244829.625, "learning_rate": 2.1664806438348116e-06, "loss": 0.1186, "step": 26670 }, { "epoch": 0.7836456558773425, "grad_norm": 135014.5, "learning_rate": 2.163543441226576e-06, "loss": 0.1216, "step": 26680 }, { "epoch": 0.783939376138166, "grad_norm": 203124.09375, "learning_rate": 2.16060623861834e-06, "loss": 0.1168, "step": 26690 }, { "epoch": 0.7842330963989896, "grad_norm": 172622.734375, "learning_rate": 2.1576690360101043e-06, "loss": 0.1213, "step": 26700 }, { "epoch": 0.7845268166598132, "grad_norm": 184437.46875, "learning_rate": 2.154731833401868e-06, "loss": 0.1155, "step": 26710 }, { "epoch": 0.7848205369206368, "grad_norm": 179892.53125, "learning_rate": 2.1517946307936323e-06, "loss": 0.1066, "step": 26720 }, { "epoch": 0.7851142571814603, "grad_norm": 158513.71875, "learning_rate": 2.1488574281853965e-06, "loss": 0.1239, "step": 26730 }, { "epoch": 0.785407977442284, "grad_norm": 137577.125, "learning_rate": 2.1459202255771607e-06, "loss": 0.112, "step": 26740 }, { "epoch": 0.7857016977031076, "grad_norm": 124564.78125, "learning_rate": 2.1429830229689245e-06, "loss": 0.1152, "step": 26750 }, { "epoch": 0.7859954179639311, "grad_norm": 479449.3125, "learning_rate": 2.1400458203606887e-06, "loss": 0.1186, "step": 26760 }, { "epoch": 0.7862891382247548, "grad_norm": 206515.453125, "learning_rate": 2.137108617752453e-06, "loss": 0.1103, "step": 26770 }, { "epoch": 0.7865828584855783, "grad_norm": 130641.9375, "learning_rate": 2.134171415144217e-06, "loss": 0.1132, "step": 26780 }, { "epoch": 0.7868765787464019, "grad_norm": 126372.84375, "learning_rate": 2.131234212535981e-06, "loss": 0.1101, "step": 26790 }, { "epoch": 0.7871702990072256, "grad_norm": 139446.390625, "learning_rate": 2.128297009927745e-06, "loss": 0.1176, "step": 26800 }, { "epoch": 0.7874640192680491, "grad_norm": 231313.671875, "learning_rate": 2.1253598073195093e-06, "loss": 0.1213, "step": 26810 }, { "epoch": 0.7877577395288727, "grad_norm": 274819.0, "learning_rate": 2.122422604711273e-06, "loss": 0.0984, "step": 26820 }, { "epoch": 0.7880514597896963, "grad_norm": 168578.484375, "learning_rate": 2.1194854021030373e-06, "loss": 0.1214, "step": 26830 }, { "epoch": 0.7883451800505199, "grad_norm": 262083.40625, "learning_rate": 2.1165481994948015e-06, "loss": 0.1168, "step": 26840 }, { "epoch": 0.7886389003113434, "grad_norm": 261776.125, "learning_rate": 2.1136109968865653e-06, "loss": 0.1179, "step": 26850 }, { "epoch": 0.7889326205721671, "grad_norm": 163355.6875, "learning_rate": 2.1106737942783295e-06, "loss": 0.1192, "step": 26860 }, { "epoch": 0.7892263408329907, "grad_norm": 338837.375, "learning_rate": 2.1077365916700937e-06, "loss": 0.1256, "step": 26870 }, { "epoch": 0.7895200610938142, "grad_norm": 177715.125, "learning_rate": 2.1047993890618575e-06, "loss": 0.1078, "step": 26880 }, { "epoch": 0.7898137813546379, "grad_norm": 151379.765625, "learning_rate": 2.1018621864536217e-06, "loss": 0.1211, "step": 26890 }, { "epoch": 0.7901075016154614, "grad_norm": 170741.625, "learning_rate": 2.098924983845386e-06, "loss": 0.1108, "step": 26900 }, { "epoch": 0.790401221876285, "grad_norm": 153456.21875, "learning_rate": 2.0959877812371497e-06, "loss": 0.1165, "step": 26910 }, { "epoch": 0.7906949421371087, "grad_norm": 238320.96875, "learning_rate": 2.093050578628914e-06, "loss": 0.1081, "step": 26920 }, { "epoch": 0.7909886623979322, "grad_norm": 213691.84375, "learning_rate": 2.090113376020678e-06, "loss": 0.1129, "step": 26930 }, { "epoch": 0.7912823826587558, "grad_norm": 235233.203125, "learning_rate": 2.087176173412442e-06, "loss": 0.1074, "step": 26940 }, { "epoch": 0.7915761029195794, "grad_norm": 211036.984375, "learning_rate": 2.084238970804206e-06, "loss": 0.114, "step": 26950 }, { "epoch": 0.791869823180403, "grad_norm": 169818.3125, "learning_rate": 2.0813017681959703e-06, "loss": 0.1142, "step": 26960 }, { "epoch": 0.7921635434412265, "grad_norm": 202162.34375, "learning_rate": 2.0783645655877345e-06, "loss": 0.1119, "step": 26970 }, { "epoch": 0.7924572637020502, "grad_norm": 273083.125, "learning_rate": 2.0754273629794987e-06, "loss": 0.1271, "step": 26980 }, { "epoch": 0.7927509839628738, "grad_norm": 173229.46875, "learning_rate": 2.0724901603712625e-06, "loss": 0.124, "step": 26990 }, { "epoch": 0.7930447042236973, "grad_norm": 172363.859375, "learning_rate": 2.0695529577630267e-06, "loss": 0.1082, "step": 27000 }, { "epoch": 0.793338424484521, "grad_norm": 273020.375, "learning_rate": 2.066615755154791e-06, "loss": 0.1137, "step": 27010 }, { "epoch": 0.7936321447453445, "grad_norm": 141774.296875, "learning_rate": 2.0636785525465547e-06, "loss": 0.1182, "step": 27020 }, { "epoch": 0.7939258650061681, "grad_norm": 186953.03125, "learning_rate": 2.060741349938319e-06, "loss": 0.1128, "step": 27030 }, { "epoch": 0.7942195852669918, "grad_norm": 143658.359375, "learning_rate": 2.057804147330083e-06, "loss": 0.1171, "step": 27040 }, { "epoch": 0.7945133055278153, "grad_norm": 124814.8984375, "learning_rate": 2.054866944721847e-06, "loss": 0.1037, "step": 27050 }, { "epoch": 0.7948070257886389, "grad_norm": 131915.40625, "learning_rate": 2.051929742113611e-06, "loss": 0.1155, "step": 27060 }, { "epoch": 0.7951007460494625, "grad_norm": 163746.875, "learning_rate": 2.0489925395053754e-06, "loss": 0.1176, "step": 27070 }, { "epoch": 0.7953944663102861, "grad_norm": 142317.8125, "learning_rate": 2.046055336897139e-06, "loss": 0.1207, "step": 27080 }, { "epoch": 0.7956881865711096, "grad_norm": 209084.125, "learning_rate": 2.0431181342889034e-06, "loss": 0.12, "step": 27090 }, { "epoch": 0.7959819068319333, "grad_norm": 156782.0, "learning_rate": 2.0401809316806676e-06, "loss": 0.1206, "step": 27100 }, { "epoch": 0.7962756270927569, "grad_norm": 158553.90625, "learning_rate": 2.0372437290724313e-06, "loss": 0.1154, "step": 27110 }, { "epoch": 0.7965693473535804, "grad_norm": 171578.109375, "learning_rate": 2.0343065264641956e-06, "loss": 0.1117, "step": 27120 }, { "epoch": 0.7968630676144041, "grad_norm": 172856.359375, "learning_rate": 2.0313693238559598e-06, "loss": 0.1139, "step": 27130 }, { "epoch": 0.7971567878752276, "grad_norm": 388703.6875, "learning_rate": 2.0284321212477236e-06, "loss": 0.1231, "step": 27140 }, { "epoch": 0.7974505081360512, "grad_norm": 190363.5, "learning_rate": 2.0254949186394878e-06, "loss": 0.0989, "step": 27150 }, { "epoch": 0.7977442283968749, "grad_norm": 179383.765625, "learning_rate": 2.022557716031252e-06, "loss": 0.115, "step": 27160 }, { "epoch": 0.7980379486576984, "grad_norm": 170238.265625, "learning_rate": 2.0196205134230158e-06, "loss": 0.1073, "step": 27170 }, { "epoch": 0.798331668918522, "grad_norm": 253299.125, "learning_rate": 2.01668331081478e-06, "loss": 0.1253, "step": 27180 }, { "epoch": 0.7986253891793456, "grad_norm": 276214.0625, "learning_rate": 2.013746108206544e-06, "loss": 0.1103, "step": 27190 }, { "epoch": 0.7989191094401692, "grad_norm": 210705.765625, "learning_rate": 2.0108089055983084e-06, "loss": 0.1083, "step": 27200 }, { "epoch": 0.7992128297009927, "grad_norm": 150741.109375, "learning_rate": 2.0078717029900726e-06, "loss": 0.1093, "step": 27210 }, { "epoch": 0.7995065499618164, "grad_norm": 227413.46875, "learning_rate": 2.0049345003818364e-06, "loss": 0.1248, "step": 27220 }, { "epoch": 0.79980027022264, "grad_norm": 160776.0625, "learning_rate": 2.0019972977736006e-06, "loss": 0.1065, "step": 27230 }, { "epoch": 0.8000939904834635, "grad_norm": 229915.40625, "learning_rate": 1.999060095165365e-06, "loss": 0.1196, "step": 27240 }, { "epoch": 0.8003877107442872, "grad_norm": 237403.296875, "learning_rate": 1.996122892557129e-06, "loss": 0.1138, "step": 27250 }, { "epoch": 0.8006814310051107, "grad_norm": 158606.03125, "learning_rate": 1.993185689948893e-06, "loss": 0.1173, "step": 27260 }, { "epoch": 0.8009751512659343, "grad_norm": 265077.46875, "learning_rate": 1.990248487340657e-06, "loss": 0.1136, "step": 27270 }, { "epoch": 0.801268871526758, "grad_norm": 152428.984375, "learning_rate": 1.987311284732421e-06, "loss": 0.1145, "step": 27280 }, { "epoch": 0.8015625917875815, "grad_norm": 145692.9375, "learning_rate": 1.984374082124185e-06, "loss": 0.1088, "step": 27290 }, { "epoch": 0.8018563120484051, "grad_norm": 264290.0625, "learning_rate": 1.981436879515949e-06, "loss": 0.1155, "step": 27300 }, { "epoch": 0.8021500323092287, "grad_norm": 219483.21875, "learning_rate": 1.9784996769077134e-06, "loss": 0.1018, "step": 27310 }, { "epoch": 0.8024437525700523, "grad_norm": 184102.640625, "learning_rate": 1.975562474299477e-06, "loss": 0.1175, "step": 27320 }, { "epoch": 0.8027374728308758, "grad_norm": 182575.125, "learning_rate": 1.9726252716912414e-06, "loss": 0.108, "step": 27330 }, { "epoch": 0.8030311930916995, "grad_norm": 243109.265625, "learning_rate": 1.9696880690830056e-06, "loss": 0.1235, "step": 27340 }, { "epoch": 0.8033249133525231, "grad_norm": 222126.78125, "learning_rate": 1.9667508664747694e-06, "loss": 0.1158, "step": 27350 }, { "epoch": 0.8036186336133466, "grad_norm": 214465.296875, "learning_rate": 1.9638136638665336e-06, "loss": 0.1135, "step": 27360 }, { "epoch": 0.8039123538741703, "grad_norm": 128064.265625, "learning_rate": 1.960876461258298e-06, "loss": 0.1134, "step": 27370 }, { "epoch": 0.8042060741349938, "grad_norm": 145561.59375, "learning_rate": 1.9579392586500616e-06, "loss": 0.123, "step": 27380 }, { "epoch": 0.8044997943958174, "grad_norm": 141528.859375, "learning_rate": 1.955002056041826e-06, "loss": 0.1084, "step": 27390 }, { "epoch": 0.8047935146566411, "grad_norm": 200213.09375, "learning_rate": 1.95206485343359e-06, "loss": 0.115, "step": 27400 }, { "epoch": 0.8050872349174646, "grad_norm": 134089.4375, "learning_rate": 1.949127650825354e-06, "loss": 0.1215, "step": 27410 }, { "epoch": 0.8053809551782882, "grad_norm": 218461.734375, "learning_rate": 1.946190448217118e-06, "loss": 0.1212, "step": 27420 }, { "epoch": 0.8056746754391118, "grad_norm": 135406.796875, "learning_rate": 1.9432532456088822e-06, "loss": 0.1087, "step": 27430 }, { "epoch": 0.8059683956999354, "grad_norm": 226242.15625, "learning_rate": 1.9403160430006464e-06, "loss": 0.1127, "step": 27440 }, { "epoch": 0.8062621159607589, "grad_norm": 230709.921875, "learning_rate": 1.9373788403924102e-06, "loss": 0.1122, "step": 27450 }, { "epoch": 0.8065558362215826, "grad_norm": 189805.5, "learning_rate": 1.9344416377841744e-06, "loss": 0.1042, "step": 27460 }, { "epoch": 0.8068495564824062, "grad_norm": 165059.078125, "learning_rate": 1.9315044351759386e-06, "loss": 0.1175, "step": 27470 }, { "epoch": 0.8071432767432297, "grad_norm": 188931.984375, "learning_rate": 1.928567232567703e-06, "loss": 0.1124, "step": 27480 }, { "epoch": 0.8074369970040534, "grad_norm": 160278.015625, "learning_rate": 1.925630029959467e-06, "loss": 0.1172, "step": 27490 }, { "epoch": 0.8077307172648769, "grad_norm": 523682.3125, "learning_rate": 1.922692827351231e-06, "loss": 0.1262, "step": 27500 }, { "epoch": 0.8080244375257005, "grad_norm": 187523.78125, "learning_rate": 1.919755624742995e-06, "loss": 0.1257, "step": 27510 }, { "epoch": 0.8083181577865242, "grad_norm": 203493.4375, "learning_rate": 1.9168184221347593e-06, "loss": 0.1151, "step": 27520 }, { "epoch": 0.8086118780473477, "grad_norm": 180036.875, "learning_rate": 1.913881219526523e-06, "loss": 0.1077, "step": 27530 }, { "epoch": 0.8089055983081713, "grad_norm": 218194.859375, "learning_rate": 1.9109440169182873e-06, "loss": 0.1275, "step": 27540 }, { "epoch": 0.8091993185689949, "grad_norm": 119250.953125, "learning_rate": 1.9080068143100515e-06, "loss": 0.1133, "step": 27550 }, { "epoch": 0.8094930388298185, "grad_norm": 289209.15625, "learning_rate": 1.9050696117018153e-06, "loss": 0.1242, "step": 27560 }, { "epoch": 0.8097867590906421, "grad_norm": 135805.03125, "learning_rate": 1.9021324090935795e-06, "loss": 0.1185, "step": 27570 }, { "epoch": 0.8100804793514657, "grad_norm": 177595.9375, "learning_rate": 1.8991952064853437e-06, "loss": 0.1146, "step": 27580 }, { "epoch": 0.8103741996122893, "grad_norm": 245202.546875, "learning_rate": 1.8962580038771075e-06, "loss": 0.116, "step": 27590 }, { "epoch": 0.8106679198731128, "grad_norm": 133776.734375, "learning_rate": 1.8933208012688717e-06, "loss": 0.12, "step": 27600 }, { "epoch": 0.8109616401339365, "grad_norm": 177775.5625, "learning_rate": 1.8903835986606359e-06, "loss": 0.1246, "step": 27610 }, { "epoch": 0.81125536039476, "grad_norm": 181830.796875, "learning_rate": 1.8874463960523997e-06, "loss": 0.1052, "step": 27620 }, { "epoch": 0.8115490806555836, "grad_norm": 253829.359375, "learning_rate": 1.8845091934441639e-06, "loss": 0.1135, "step": 27630 }, { "epoch": 0.8118428009164073, "grad_norm": 161068.046875, "learning_rate": 1.881571990835928e-06, "loss": 0.1236, "step": 27640 }, { "epoch": 0.8121365211772308, "grad_norm": 175127.578125, "learning_rate": 1.878634788227692e-06, "loss": 0.1143, "step": 27650 }, { "epoch": 0.8124302414380544, "grad_norm": 170564.6875, "learning_rate": 1.8756975856194563e-06, "loss": 0.1159, "step": 27660 }, { "epoch": 0.812723961698878, "grad_norm": 98747.2265625, "learning_rate": 1.8727603830112203e-06, "loss": 0.1173, "step": 27670 }, { "epoch": 0.8130176819597016, "grad_norm": 137887.640625, "learning_rate": 1.8698231804029843e-06, "loss": 0.1148, "step": 27680 }, { "epoch": 0.8133114022205252, "grad_norm": 223936.390625, "learning_rate": 1.8668859777947485e-06, "loss": 0.1228, "step": 27690 }, { "epoch": 0.8136051224813488, "grad_norm": 199416.203125, "learning_rate": 1.8639487751865127e-06, "loss": 0.1177, "step": 27700 }, { "epoch": 0.8138988427421724, "grad_norm": 190696.328125, "learning_rate": 1.8610115725782765e-06, "loss": 0.1168, "step": 27710 }, { "epoch": 0.8141925630029959, "grad_norm": 218024.421875, "learning_rate": 1.8580743699700407e-06, "loss": 0.1192, "step": 27720 }, { "epoch": 0.8144862832638196, "grad_norm": 177251.78125, "learning_rate": 1.855137167361805e-06, "loss": 0.1166, "step": 27730 }, { "epoch": 0.8147800035246431, "grad_norm": 139056.359375, "learning_rate": 1.8521999647535687e-06, "loss": 0.1199, "step": 27740 }, { "epoch": 0.8150737237854667, "grad_norm": 150722.109375, "learning_rate": 1.849262762145333e-06, "loss": 0.0994, "step": 27750 }, { "epoch": 0.8153674440462904, "grad_norm": 272079.59375, "learning_rate": 1.8463255595370971e-06, "loss": 0.116, "step": 27760 }, { "epoch": 0.8156611643071139, "grad_norm": 145554.90625, "learning_rate": 1.843388356928861e-06, "loss": 0.1152, "step": 27770 }, { "epoch": 0.8159548845679375, "grad_norm": 164698.03125, "learning_rate": 1.8404511543206253e-06, "loss": 0.1037, "step": 27780 }, { "epoch": 0.816248604828761, "grad_norm": 315569.125, "learning_rate": 1.8375139517123893e-06, "loss": 0.1162, "step": 27790 }, { "epoch": 0.8165423250895847, "grad_norm": 160905.703125, "learning_rate": 1.8345767491041533e-06, "loss": 0.1122, "step": 27800 }, { "epoch": 0.8168360453504083, "grad_norm": 198813.421875, "learning_rate": 1.8316395464959175e-06, "loss": 0.1146, "step": 27810 }, { "epoch": 0.8171297656112319, "grad_norm": 272145.0, "learning_rate": 1.8287023438876817e-06, "loss": 0.1153, "step": 27820 }, { "epoch": 0.8174234858720555, "grad_norm": 193610.921875, "learning_rate": 1.8257651412794455e-06, "loss": 0.1147, "step": 27830 }, { "epoch": 0.817717206132879, "grad_norm": 121240.421875, "learning_rate": 1.8228279386712097e-06, "loss": 0.1156, "step": 27840 }, { "epoch": 0.8180109263937027, "grad_norm": 151174.265625, "learning_rate": 1.819890736062974e-06, "loss": 0.1063, "step": 27850 }, { "epoch": 0.8183046466545262, "grad_norm": 215001.78125, "learning_rate": 1.8169535334547377e-06, "loss": 0.1225, "step": 27860 }, { "epoch": 0.8185983669153498, "grad_norm": 177882.109375, "learning_rate": 1.814016330846502e-06, "loss": 0.112, "step": 27870 }, { "epoch": 0.8188920871761735, "grad_norm": 204134.359375, "learning_rate": 1.8110791282382661e-06, "loss": 0.1183, "step": 27880 }, { "epoch": 0.819185807436997, "grad_norm": 201591.15625, "learning_rate": 1.8081419256300301e-06, "loss": 0.1111, "step": 27890 }, { "epoch": 0.8194795276978206, "grad_norm": 179000.75, "learning_rate": 1.8052047230217941e-06, "loss": 0.1254, "step": 27900 }, { "epoch": 0.8197732479586441, "grad_norm": 196845.515625, "learning_rate": 1.8022675204135583e-06, "loss": 0.11, "step": 27910 }, { "epoch": 0.8200669682194678, "grad_norm": 172538.484375, "learning_rate": 1.7993303178053223e-06, "loss": 0.1156, "step": 27920 }, { "epoch": 0.8203606884802914, "grad_norm": 198944.0625, "learning_rate": 1.7963931151970865e-06, "loss": 0.131, "step": 27930 }, { "epoch": 0.820654408741115, "grad_norm": 164185.078125, "learning_rate": 1.7934559125888508e-06, "loss": 0.1092, "step": 27940 }, { "epoch": 0.8209481290019386, "grad_norm": 220186.609375, "learning_rate": 1.7905187099806145e-06, "loss": 0.1144, "step": 27950 }, { "epoch": 0.8212418492627621, "grad_norm": 276335.0, "learning_rate": 1.7875815073723787e-06, "loss": 0.1174, "step": 27960 }, { "epoch": 0.8215355695235858, "grad_norm": 219303.59375, "learning_rate": 1.784644304764143e-06, "loss": 0.1193, "step": 27970 }, { "epoch": 0.8218292897844093, "grad_norm": 169154.921875, "learning_rate": 1.7817071021559067e-06, "loss": 0.0976, "step": 27980 }, { "epoch": 0.8221230100452329, "grad_norm": 244849.703125, "learning_rate": 1.778769899547671e-06, "loss": 0.101, "step": 27990 }, { "epoch": 0.8224167303060566, "grad_norm": 223639.515625, "learning_rate": 1.7758326969394352e-06, "loss": 0.1164, "step": 28000 }, { "epoch": 0.8227104505668801, "grad_norm": 228385.375, "learning_rate": 1.7728954943311992e-06, "loss": 0.1186, "step": 28010 }, { "epoch": 0.8230041708277037, "grad_norm": 182263.828125, "learning_rate": 1.7699582917229632e-06, "loss": 0.1219, "step": 28020 }, { "epoch": 0.8232978910885272, "grad_norm": 221114.484375, "learning_rate": 1.7670210891147274e-06, "loss": 0.1165, "step": 28030 }, { "epoch": 0.8235916113493509, "grad_norm": 212769.34375, "learning_rate": 1.7640838865064914e-06, "loss": 0.1211, "step": 28040 }, { "epoch": 0.8238853316101745, "grad_norm": 170714.28125, "learning_rate": 1.7611466838982556e-06, "loss": 0.1194, "step": 28050 }, { "epoch": 0.824179051870998, "grad_norm": 249175.0, "learning_rate": 1.7582094812900194e-06, "loss": 0.1271, "step": 28060 }, { "epoch": 0.8244727721318217, "grad_norm": 157598.75, "learning_rate": 1.7552722786817836e-06, "loss": 0.1156, "step": 28070 }, { "epoch": 0.8247664923926452, "grad_norm": 215363.78125, "learning_rate": 1.7523350760735478e-06, "loss": 0.1158, "step": 28080 }, { "epoch": 0.8250602126534688, "grad_norm": 184777.15625, "learning_rate": 1.7493978734653116e-06, "loss": 0.1211, "step": 28090 }, { "epoch": 0.8253539329142924, "grad_norm": 187580.46875, "learning_rate": 1.7464606708570758e-06, "loss": 0.1093, "step": 28100 }, { "epoch": 0.825647653175116, "grad_norm": 204877.875, "learning_rate": 1.74352346824884e-06, "loss": 0.1142, "step": 28110 }, { "epoch": 0.8259413734359397, "grad_norm": 177419.859375, "learning_rate": 1.740586265640604e-06, "loss": 0.1136, "step": 28120 }, { "epoch": 0.8262350936967632, "grad_norm": 148655.65625, "learning_rate": 1.737649063032368e-06, "loss": 0.1088, "step": 28130 }, { "epoch": 0.8265288139575868, "grad_norm": 161278.171875, "learning_rate": 1.7347118604241322e-06, "loss": 0.1025, "step": 28140 }, { "epoch": 0.8268225342184103, "grad_norm": 172056.796875, "learning_rate": 1.7317746578158962e-06, "loss": 0.1178, "step": 28150 }, { "epoch": 0.827116254479234, "grad_norm": 186397.015625, "learning_rate": 1.7288374552076604e-06, "loss": 0.1134, "step": 28160 }, { "epoch": 0.8274099747400576, "grad_norm": 166996.015625, "learning_rate": 1.7259002525994246e-06, "loss": 0.1235, "step": 28170 }, { "epoch": 0.8277036950008811, "grad_norm": 178211.109375, "learning_rate": 1.7229630499911884e-06, "loss": 0.116, "step": 28180 }, { "epoch": 0.8279974152617048, "grad_norm": 304139.71875, "learning_rate": 1.7200258473829526e-06, "loss": 0.1161, "step": 28190 }, { "epoch": 0.8282911355225283, "grad_norm": 504701.375, "learning_rate": 1.7170886447747168e-06, "loss": 0.1136, "step": 28200 }, { "epoch": 0.828584855783352, "grad_norm": 209573.828125, "learning_rate": 1.7141514421664806e-06, "loss": 0.1052, "step": 28210 }, { "epoch": 0.8288785760441755, "grad_norm": 160252.4375, "learning_rate": 1.7112142395582448e-06, "loss": 0.111, "step": 28220 }, { "epoch": 0.8291722963049991, "grad_norm": 233859.0625, "learning_rate": 1.708277036950009e-06, "loss": 0.1209, "step": 28230 }, { "epoch": 0.8294660165658228, "grad_norm": 144380.0, "learning_rate": 1.705339834341773e-06, "loss": 0.106, "step": 28240 }, { "epoch": 0.8297597368266463, "grad_norm": 155994.1875, "learning_rate": 1.702402631733537e-06, "loss": 0.1091, "step": 28250 }, { "epoch": 0.8300534570874699, "grad_norm": 164680.265625, "learning_rate": 1.6994654291253012e-06, "loss": 0.1245, "step": 28260 }, { "epoch": 0.8303471773482934, "grad_norm": 271525.71875, "learning_rate": 1.6965282265170652e-06, "loss": 0.1249, "step": 28270 }, { "epoch": 0.8306408976091171, "grad_norm": 230666.90625, "learning_rate": 1.6935910239088294e-06, "loss": 0.1118, "step": 28280 }, { "epoch": 0.8309346178699407, "grad_norm": 204987.34375, "learning_rate": 1.6906538213005936e-06, "loss": 0.1099, "step": 28290 }, { "epoch": 0.8312283381307642, "grad_norm": 197383.953125, "learning_rate": 1.6877166186923574e-06, "loss": 0.1126, "step": 28300 }, { "epoch": 0.8315220583915879, "grad_norm": 168853.53125, "learning_rate": 1.6847794160841216e-06, "loss": 0.1125, "step": 28310 }, { "epoch": 0.8318157786524114, "grad_norm": 190823.6875, "learning_rate": 1.6818422134758858e-06, "loss": 0.1081, "step": 28320 }, { "epoch": 0.832109498913235, "grad_norm": 164764.078125, "learning_rate": 1.6789050108676496e-06, "loss": 0.1102, "step": 28330 }, { "epoch": 0.8324032191740586, "grad_norm": 132922.859375, "learning_rate": 1.6759678082594138e-06, "loss": 0.1161, "step": 28340 }, { "epoch": 0.8326969394348822, "grad_norm": 143376.828125, "learning_rate": 1.673030605651178e-06, "loss": 0.1064, "step": 28350 }, { "epoch": 0.8329906596957058, "grad_norm": 183298.171875, "learning_rate": 1.670093403042942e-06, "loss": 0.1088, "step": 28360 }, { "epoch": 0.8332843799565294, "grad_norm": 205743.0625, "learning_rate": 1.667156200434706e-06, "loss": 0.1067, "step": 28370 }, { "epoch": 0.833578100217353, "grad_norm": 146323.90625, "learning_rate": 1.6642189978264702e-06, "loss": 0.1191, "step": 28380 }, { "epoch": 0.8338718204781765, "grad_norm": 290345.125, "learning_rate": 1.6612817952182342e-06, "loss": 0.1114, "step": 28390 }, { "epoch": 0.8341655407390002, "grad_norm": 198691.234375, "learning_rate": 1.6583445926099984e-06, "loss": 0.1202, "step": 28400 }, { "epoch": 0.8344592609998238, "grad_norm": 113169.9296875, "learning_rate": 1.6554073900017624e-06, "loss": 0.1093, "step": 28410 }, { "epoch": 0.8347529812606473, "grad_norm": 176092.828125, "learning_rate": 1.6524701873935264e-06, "loss": 0.1186, "step": 28420 }, { "epoch": 0.835046701521471, "grad_norm": 206197.921875, "learning_rate": 1.6495329847852907e-06, "loss": 0.1091, "step": 28430 }, { "epoch": 0.8353404217822945, "grad_norm": 226274.3125, "learning_rate": 1.6465957821770549e-06, "loss": 0.1101, "step": 28440 }, { "epoch": 0.8356341420431181, "grad_norm": 160644.765625, "learning_rate": 1.6436585795688186e-06, "loss": 0.1102, "step": 28450 }, { "epoch": 0.8359278623039417, "grad_norm": 193600.796875, "learning_rate": 1.6407213769605829e-06, "loss": 0.1059, "step": 28460 }, { "epoch": 0.8362215825647653, "grad_norm": 277807.375, "learning_rate": 1.637784174352347e-06, "loss": 0.1114, "step": 28470 }, { "epoch": 0.836515302825589, "grad_norm": 190419.90625, "learning_rate": 1.6348469717441108e-06, "loss": 0.1254, "step": 28480 }, { "epoch": 0.8368090230864125, "grad_norm": 190578.3125, "learning_rate": 1.631909769135875e-06, "loss": 0.1116, "step": 28490 }, { "epoch": 0.8371027433472361, "grad_norm": 180978.21875, "learning_rate": 1.6289725665276393e-06, "loss": 0.1118, "step": 28500 }, { "epoch": 0.8373964636080596, "grad_norm": 176582.796875, "learning_rate": 1.6260353639194033e-06, "loss": 0.1094, "step": 28510 }, { "epoch": 0.8376901838688833, "grad_norm": 243653.703125, "learning_rate": 1.6230981613111675e-06, "loss": 0.1091, "step": 28520 }, { "epoch": 0.8379839041297069, "grad_norm": 157460.65625, "learning_rate": 1.6201609587029315e-06, "loss": 0.1081, "step": 28530 }, { "epoch": 0.8382776243905304, "grad_norm": 299643.75, "learning_rate": 1.6172237560946955e-06, "loss": 0.1215, "step": 28540 }, { "epoch": 0.8385713446513541, "grad_norm": 225684.296875, "learning_rate": 1.6142865534864597e-06, "loss": 0.1046, "step": 28550 }, { "epoch": 0.8388650649121776, "grad_norm": 132222.265625, "learning_rate": 1.6113493508782239e-06, "loss": 0.113, "step": 28560 }, { "epoch": 0.8391587851730012, "grad_norm": 260264.890625, "learning_rate": 1.6084121482699877e-06, "loss": 0.1074, "step": 28570 }, { "epoch": 0.8394525054338249, "grad_norm": 147743.375, "learning_rate": 1.6054749456617519e-06, "loss": 0.0844, "step": 28580 }, { "epoch": 0.8397462256946484, "grad_norm": 183485.421875, "learning_rate": 1.602537743053516e-06, "loss": 0.1137, "step": 28590 }, { "epoch": 0.840039945955472, "grad_norm": 237634.9375, "learning_rate": 1.5996005404452799e-06, "loss": 0.1117, "step": 28600 }, { "epoch": 0.8403336662162956, "grad_norm": 186034.34375, "learning_rate": 1.596663337837044e-06, "loss": 0.1076, "step": 28610 }, { "epoch": 0.8406273864771192, "grad_norm": 178297.78125, "learning_rate": 1.5937261352288083e-06, "loss": 0.1, "step": 28620 }, { "epoch": 0.8409211067379427, "grad_norm": 350507.875, "learning_rate": 1.5907889326205723e-06, "loss": 0.1091, "step": 28630 }, { "epoch": 0.8412148269987664, "grad_norm": 129400.71875, "learning_rate": 1.5878517300123363e-06, "loss": 0.1254, "step": 28640 }, { "epoch": 0.84150854725959, "grad_norm": 380112.8125, "learning_rate": 1.5849145274041005e-06, "loss": 0.1158, "step": 28650 }, { "epoch": 0.8418022675204135, "grad_norm": 216410.359375, "learning_rate": 1.5819773247958645e-06, "loss": 0.1158, "step": 28660 }, { "epoch": 0.8420959877812372, "grad_norm": 232997.296875, "learning_rate": 1.5790401221876287e-06, "loss": 0.1178, "step": 28670 }, { "epoch": 0.8423897080420607, "grad_norm": 315855.25, "learning_rate": 1.576102919579393e-06, "loss": 0.1099, "step": 28680 }, { "epoch": 0.8426834283028843, "grad_norm": 224950.046875, "learning_rate": 1.5731657169711567e-06, "loss": 0.1114, "step": 28690 }, { "epoch": 0.842977148563708, "grad_norm": 349500.5, "learning_rate": 1.570228514362921e-06, "loss": 0.1086, "step": 28700 }, { "epoch": 0.8432708688245315, "grad_norm": 180505.40625, "learning_rate": 1.5672913117546851e-06, "loss": 0.1044, "step": 28710 }, { "epoch": 0.8435645890853551, "grad_norm": 194974.484375, "learning_rate": 1.564354109146449e-06, "loss": 0.1123, "step": 28720 }, { "epoch": 0.8438583093461787, "grad_norm": 192900.203125, "learning_rate": 1.5614169065382131e-06, "loss": 0.0985, "step": 28730 }, { "epoch": 0.8441520296070023, "grad_norm": 174587.34375, "learning_rate": 1.5584797039299773e-06, "loss": 0.1061, "step": 28740 }, { "epoch": 0.8444457498678258, "grad_norm": 229919.3125, "learning_rate": 1.5555425013217413e-06, "loss": 0.1092, "step": 28750 }, { "epoch": 0.8447394701286495, "grad_norm": 199961.5, "learning_rate": 1.5526052987135053e-06, "loss": 0.1011, "step": 28760 }, { "epoch": 0.8450331903894731, "grad_norm": 232308.96875, "learning_rate": 1.5496680961052695e-06, "loss": 0.1095, "step": 28770 }, { "epoch": 0.8453269106502966, "grad_norm": 177829.078125, "learning_rate": 1.5467308934970335e-06, "loss": 0.1044, "step": 28780 }, { "epoch": 0.8456206309111203, "grad_norm": 209982.234375, "learning_rate": 1.5437936908887977e-06, "loss": 0.1058, "step": 28790 }, { "epoch": 0.8459143511719438, "grad_norm": 134262.453125, "learning_rate": 1.5408564882805617e-06, "loss": 0.1075, "step": 28800 }, { "epoch": 0.8462080714327674, "grad_norm": 217588.046875, "learning_rate": 1.5379192856723257e-06, "loss": 0.1152, "step": 28810 }, { "epoch": 0.8465017916935911, "grad_norm": 189201.921875, "learning_rate": 1.53498208306409e-06, "loss": 0.1076, "step": 28820 }, { "epoch": 0.8467955119544146, "grad_norm": 304100.71875, "learning_rate": 1.5320448804558541e-06, "loss": 0.1234, "step": 28830 }, { "epoch": 0.8470892322152382, "grad_norm": 165239.984375, "learning_rate": 1.529107677847618e-06, "loss": 0.1039, "step": 28840 }, { "epoch": 0.8473829524760618, "grad_norm": 232097.6875, "learning_rate": 1.5261704752393821e-06, "loss": 0.1175, "step": 28850 }, { "epoch": 0.8476766727368854, "grad_norm": 230194.578125, "learning_rate": 1.5232332726311464e-06, "loss": 0.1135, "step": 28860 }, { "epoch": 0.8479703929977089, "grad_norm": 229419.515625, "learning_rate": 1.5202960700229101e-06, "loss": 0.1189, "step": 28870 }, { "epoch": 0.8482641132585326, "grad_norm": 270640.84375, "learning_rate": 1.5173588674146743e-06, "loss": 0.1051, "step": 28880 }, { "epoch": 0.8485578335193562, "grad_norm": 260878.078125, "learning_rate": 1.5144216648064386e-06, "loss": 0.1169, "step": 28890 }, { "epoch": 0.8488515537801797, "grad_norm": 236307.203125, "learning_rate": 1.5114844621982026e-06, "loss": 0.1209, "step": 28900 }, { "epoch": 0.8491452740410034, "grad_norm": 215207.375, "learning_rate": 1.5085472595899668e-06, "loss": 0.1043, "step": 28910 }, { "epoch": 0.8494389943018269, "grad_norm": 128040.109375, "learning_rate": 1.5056100569817308e-06, "loss": 0.1105, "step": 28920 }, { "epoch": 0.8497327145626505, "grad_norm": 173334.515625, "learning_rate": 1.5026728543734948e-06, "loss": 0.094, "step": 28930 }, { "epoch": 0.8500264348234742, "grad_norm": 205235.921875, "learning_rate": 1.499735651765259e-06, "loss": 0.113, "step": 28940 }, { "epoch": 0.8503201550842977, "grad_norm": 173720.109375, "learning_rate": 1.4967984491570232e-06, "loss": 0.1105, "step": 28950 }, { "epoch": 0.8506138753451213, "grad_norm": 903878.375, "learning_rate": 1.493861246548787e-06, "loss": 0.1048, "step": 28960 }, { "epoch": 0.8509075956059449, "grad_norm": 137942.90625, "learning_rate": 1.4909240439405512e-06, "loss": 0.1079, "step": 28970 }, { "epoch": 0.8512013158667685, "grad_norm": 190479.84375, "learning_rate": 1.4879868413323154e-06, "loss": 0.1115, "step": 28980 }, { "epoch": 0.851495036127592, "grad_norm": 196279.28125, "learning_rate": 1.4850496387240792e-06, "loss": 0.1216, "step": 28990 }, { "epoch": 0.8517887563884157, "grad_norm": 138359.4375, "learning_rate": 1.4821124361158434e-06, "loss": 0.1117, "step": 29000 }, { "epoch": 0.8520824766492393, "grad_norm": 200605.015625, "learning_rate": 1.4791752335076076e-06, "loss": 0.1198, "step": 29010 }, { "epoch": 0.8523761969100628, "grad_norm": 210129.03125, "learning_rate": 1.4762380308993716e-06, "loss": 0.1125, "step": 29020 }, { "epoch": 0.8526699171708865, "grad_norm": 168677.671875, "learning_rate": 1.4733008282911358e-06, "loss": 0.1049, "step": 29030 }, { "epoch": 0.85296363743171, "grad_norm": 236035.640625, "learning_rate": 1.4703636256828998e-06, "loss": 0.1219, "step": 29040 }, { "epoch": 0.8532573576925336, "grad_norm": 279565.09375, "learning_rate": 1.4674264230746638e-06, "loss": 0.1061, "step": 29050 }, { "epoch": 0.8535510779533573, "grad_norm": 133646.734375, "learning_rate": 1.464489220466428e-06, "loss": 0.1122, "step": 29060 }, { "epoch": 0.8538447982141808, "grad_norm": 134161.109375, "learning_rate": 1.4615520178581918e-06, "loss": 0.1231, "step": 29070 }, { "epoch": 0.8541385184750044, "grad_norm": 275794.1875, "learning_rate": 1.458614815249956e-06, "loss": 0.1098, "step": 29080 }, { "epoch": 0.854432238735828, "grad_norm": 161704.84375, "learning_rate": 1.4556776126417202e-06, "loss": 0.1166, "step": 29090 }, { "epoch": 0.8547259589966516, "grad_norm": 221354.15625, "learning_rate": 1.4527404100334842e-06, "loss": 0.1087, "step": 29100 }, { "epoch": 0.8550196792574751, "grad_norm": 164069.046875, "learning_rate": 1.4498032074252482e-06, "loss": 0.1196, "step": 29110 }, { "epoch": 0.8553133995182988, "grad_norm": 175114.578125, "learning_rate": 1.4468660048170124e-06, "loss": 0.108, "step": 29120 }, { "epoch": 0.8556071197791224, "grad_norm": 217924.453125, "learning_rate": 1.4439288022087764e-06, "loss": 0.0961, "step": 29130 }, { "epoch": 0.8559008400399459, "grad_norm": 204516.671875, "learning_rate": 1.4409915996005406e-06, "loss": 0.1068, "step": 29140 }, { "epoch": 0.8561945603007696, "grad_norm": 248817.59375, "learning_rate": 1.4380543969923046e-06, "loss": 0.1139, "step": 29150 }, { "epoch": 0.8564882805615931, "grad_norm": 278720.6875, "learning_rate": 1.4351171943840686e-06, "loss": 0.1047, "step": 29160 }, { "epoch": 0.8567820008224167, "grad_norm": 221577.640625, "learning_rate": 1.4321799917758328e-06, "loss": 0.1069, "step": 29170 }, { "epoch": 0.8570757210832404, "grad_norm": 176772.71875, "learning_rate": 1.429242789167597e-06, "loss": 0.1184, "step": 29180 }, { "epoch": 0.8573694413440639, "grad_norm": 163188.0625, "learning_rate": 1.4263055865593608e-06, "loss": 0.1038, "step": 29190 }, { "epoch": 0.8576631616048875, "grad_norm": 247339.609375, "learning_rate": 1.423368383951125e-06, "loss": 0.1118, "step": 29200 }, { "epoch": 0.8579568818657111, "grad_norm": 194174.0625, "learning_rate": 1.4204311813428892e-06, "loss": 0.1068, "step": 29210 }, { "epoch": 0.8582506021265347, "grad_norm": 231852.203125, "learning_rate": 1.417493978734653e-06, "loss": 0.1146, "step": 29220 }, { "epoch": 0.8585443223873582, "grad_norm": 123287.4609375, "learning_rate": 1.4145567761264172e-06, "loss": 0.0998, "step": 29230 }, { "epoch": 0.8588380426481819, "grad_norm": 242176.59375, "learning_rate": 1.4116195735181814e-06, "loss": 0.123, "step": 29240 }, { "epoch": 0.8591317629090055, "grad_norm": 167381.0, "learning_rate": 1.4086823709099454e-06, "loss": 0.1067, "step": 29250 }, { "epoch": 0.859425483169829, "grad_norm": 209588.953125, "learning_rate": 1.4057451683017096e-06, "loss": 0.1096, "step": 29260 }, { "epoch": 0.8597192034306527, "grad_norm": 198062.6875, "learning_rate": 1.4028079656934736e-06, "loss": 0.102, "step": 29270 }, { "epoch": 0.8600129236914762, "grad_norm": 141557.65625, "learning_rate": 1.3998707630852376e-06, "loss": 0.1075, "step": 29280 }, { "epoch": 0.8603066439522998, "grad_norm": 163113.0625, "learning_rate": 1.3969335604770018e-06, "loss": 0.1018, "step": 29290 }, { "epoch": 0.8606003642131235, "grad_norm": 292049.78125, "learning_rate": 1.393996357868766e-06, "loss": 0.101, "step": 29300 }, { "epoch": 0.860894084473947, "grad_norm": 171639.828125, "learning_rate": 1.3910591552605298e-06, "loss": 0.1067, "step": 29310 }, { "epoch": 0.8611878047347706, "grad_norm": 297069.5, "learning_rate": 1.388121952652294e-06, "loss": 0.1165, "step": 29320 }, { "epoch": 0.8614815249955942, "grad_norm": 206203.046875, "learning_rate": 1.3851847500440583e-06, "loss": 0.1008, "step": 29330 }, { "epoch": 0.8617752452564178, "grad_norm": 208322.25, "learning_rate": 1.382247547435822e-06, "loss": 0.118, "step": 29340 }, { "epoch": 0.8620689655172413, "grad_norm": 247562.859375, "learning_rate": 1.3793103448275862e-06, "loss": 0.1122, "step": 29350 }, { "epoch": 0.862362685778065, "grad_norm": 206287.921875, "learning_rate": 1.3763731422193505e-06, "loss": 0.1202, "step": 29360 }, { "epoch": 0.8626564060388886, "grad_norm": 153206.265625, "learning_rate": 1.3734359396111145e-06, "loss": 0.1116, "step": 29370 }, { "epoch": 0.8629501262997121, "grad_norm": 200385.5, "learning_rate": 1.3704987370028785e-06, "loss": 0.1083, "step": 29380 }, { "epoch": 0.8632438465605358, "grad_norm": 236098.5625, "learning_rate": 1.3675615343946427e-06, "loss": 0.1069, "step": 29390 }, { "epoch": 0.8635375668213593, "grad_norm": 175364.1875, "learning_rate": 1.3646243317864067e-06, "loss": 0.1158, "step": 29400 }, { "epoch": 0.8638312870821829, "grad_norm": 241788.65625, "learning_rate": 1.3616871291781709e-06, "loss": 0.0974, "step": 29410 }, { "epoch": 0.8641250073430066, "grad_norm": 295731.84375, "learning_rate": 1.358749926569935e-06, "loss": 0.1064, "step": 29420 }, { "epoch": 0.8644187276038301, "grad_norm": 330391.34375, "learning_rate": 1.3558127239616989e-06, "loss": 0.1061, "step": 29430 }, { "epoch": 0.8647124478646537, "grad_norm": 200177.5625, "learning_rate": 1.352875521353463e-06, "loss": 0.1107, "step": 29440 }, { "epoch": 0.8650061681254773, "grad_norm": 286203.125, "learning_rate": 1.3499383187452273e-06, "loss": 0.1129, "step": 29450 }, { "epoch": 0.8652998883863009, "grad_norm": 197170.578125, "learning_rate": 1.347001116136991e-06, "loss": 0.1123, "step": 29460 }, { "epoch": 0.8655936086471244, "grad_norm": 202622.90625, "learning_rate": 1.3440639135287553e-06, "loss": 0.1123, "step": 29470 }, { "epoch": 0.8658873289079481, "grad_norm": 195685.046875, "learning_rate": 1.3411267109205195e-06, "loss": 0.1096, "step": 29480 }, { "epoch": 0.8661810491687717, "grad_norm": 222190.515625, "learning_rate": 1.3381895083122835e-06, "loss": 0.1153, "step": 29490 }, { "epoch": 0.8664747694295952, "grad_norm": 144493.515625, "learning_rate": 1.3352523057040475e-06, "loss": 0.1078, "step": 29500 }, { "epoch": 0.8667684896904189, "grad_norm": 204864.15625, "learning_rate": 1.3323151030958117e-06, "loss": 0.1132, "step": 29510 }, { "epoch": 0.8670622099512424, "grad_norm": 203113.96875, "learning_rate": 1.3293779004875757e-06, "loss": 0.0995, "step": 29520 }, { "epoch": 0.867355930212066, "grad_norm": 209767.921875, "learning_rate": 1.3264406978793399e-06, "loss": 0.1123, "step": 29530 }, { "epoch": 0.8676496504728897, "grad_norm": 183247.90625, "learning_rate": 1.323503495271104e-06, "loss": 0.1048, "step": 29540 }, { "epoch": 0.8679433707337132, "grad_norm": 269562.3125, "learning_rate": 1.3205662926628679e-06, "loss": 0.1074, "step": 29550 }, { "epoch": 0.8682370909945368, "grad_norm": 179921.015625, "learning_rate": 1.317629090054632e-06, "loss": 0.1095, "step": 29560 }, { "epoch": 0.8685308112553604, "grad_norm": 170599.265625, "learning_rate": 1.3146918874463963e-06, "loss": 0.1122, "step": 29570 }, { "epoch": 0.868824531516184, "grad_norm": 196366.734375, "learning_rate": 1.31175468483816e-06, "loss": 0.1051, "step": 29580 }, { "epoch": 0.8691182517770076, "grad_norm": 822291.75, "learning_rate": 1.3088174822299243e-06, "loss": 0.1193, "step": 29590 }, { "epoch": 0.8694119720378312, "grad_norm": 240729.75, "learning_rate": 1.3058802796216885e-06, "loss": 0.1173, "step": 29600 }, { "epoch": 0.8697056922986548, "grad_norm": 191027.40625, "learning_rate": 1.3029430770134525e-06, "loss": 0.1054, "step": 29610 }, { "epoch": 0.8699994125594783, "grad_norm": 214924.484375, "learning_rate": 1.3000058744052165e-06, "loss": 0.1165, "step": 29620 }, { "epoch": 0.870293132820302, "grad_norm": 182364.796875, "learning_rate": 1.2970686717969807e-06, "loss": 0.1101, "step": 29630 }, { "epoch": 0.8705868530811255, "grad_norm": 188996.4375, "learning_rate": 1.2941314691887447e-06, "loss": 0.1075, "step": 29640 }, { "epoch": 0.8708805733419491, "grad_norm": 210653.890625, "learning_rate": 1.291194266580509e-06, "loss": 0.1156, "step": 29650 }, { "epoch": 0.8711742936027728, "grad_norm": 149384.203125, "learning_rate": 1.288257063972273e-06, "loss": 0.1023, "step": 29660 }, { "epoch": 0.8714680138635963, "grad_norm": 215161.65625, "learning_rate": 1.285319861364037e-06, "loss": 0.1024, "step": 29670 }, { "epoch": 0.8717617341244199, "grad_norm": 196629.6875, "learning_rate": 1.2823826587558011e-06, "loss": 0.1123, "step": 29680 }, { "epoch": 0.8720554543852435, "grad_norm": 195365.890625, "learning_rate": 1.2794454561475653e-06, "loss": 0.106, "step": 29690 }, { "epoch": 0.8723491746460671, "grad_norm": 198202.21875, "learning_rate": 1.2765082535393291e-06, "loss": 0.1127, "step": 29700 }, { "epoch": 0.8726428949068907, "grad_norm": 212255.296875, "learning_rate": 1.2735710509310933e-06, "loss": 0.098, "step": 29710 }, { "epoch": 0.8729366151677143, "grad_norm": 733699.5625, "learning_rate": 1.2706338483228575e-06, "loss": 0.113, "step": 29720 }, { "epoch": 0.8732303354285379, "grad_norm": 216793.796875, "learning_rate": 1.2676966457146213e-06, "loss": 0.1065, "step": 29730 }, { "epoch": 0.8735240556893614, "grad_norm": 196900.078125, "learning_rate": 1.2647594431063855e-06, "loss": 0.1096, "step": 29740 }, { "epoch": 0.8738177759501851, "grad_norm": 730908.625, "learning_rate": 1.2618222404981497e-06, "loss": 0.108, "step": 29750 }, { "epoch": 0.8741114962110086, "grad_norm": 156176.53125, "learning_rate": 1.2588850378899137e-06, "loss": 0.1115, "step": 29760 }, { "epoch": 0.8744052164718322, "grad_norm": 191988.875, "learning_rate": 1.255947835281678e-06, "loss": 0.109, "step": 29770 }, { "epoch": 0.8746989367326559, "grad_norm": 177288.71875, "learning_rate": 1.253010632673442e-06, "loss": 0.1064, "step": 29780 }, { "epoch": 0.8749926569934794, "grad_norm": 187456.5, "learning_rate": 1.250073430065206e-06, "loss": 0.1092, "step": 29790 }, { "epoch": 0.875286377254303, "grad_norm": 194590.28125, "learning_rate": 1.2471362274569702e-06, "loss": 0.1062, "step": 29800 }, { "epoch": 0.8755800975151266, "grad_norm": 376005.6875, "learning_rate": 1.2441990248487342e-06, "loss": 0.1112, "step": 29810 }, { "epoch": 0.8758738177759502, "grad_norm": 172325.109375, "learning_rate": 1.2412618222404981e-06, "loss": 0.1193, "step": 29820 }, { "epoch": 0.8761675380367738, "grad_norm": 252895.1875, "learning_rate": 1.2383246196322624e-06, "loss": 0.1065, "step": 29830 }, { "epoch": 0.8764612582975974, "grad_norm": 240583.421875, "learning_rate": 1.2353874170240264e-06, "loss": 0.1077, "step": 29840 }, { "epoch": 0.876754978558421, "grad_norm": 187752.578125, "learning_rate": 1.2324502144157904e-06, "loss": 0.0878, "step": 29850 }, { "epoch": 0.8770486988192445, "grad_norm": 182182.03125, "learning_rate": 1.2295130118075546e-06, "loss": 0.1065, "step": 29860 }, { "epoch": 0.8773424190800682, "grad_norm": 236515.765625, "learning_rate": 1.2265758091993186e-06, "loss": 0.1171, "step": 29870 }, { "epoch": 0.8776361393408917, "grad_norm": 156151.515625, "learning_rate": 1.2236386065910828e-06, "loss": 0.1093, "step": 29880 }, { "epoch": 0.8779298596017153, "grad_norm": 162030.90625, "learning_rate": 1.2207014039828468e-06, "loss": 0.1145, "step": 29890 }, { "epoch": 0.878223579862539, "grad_norm": 294108.875, "learning_rate": 1.217764201374611e-06, "loss": 0.103, "step": 29900 }, { "epoch": 0.8785173001233625, "grad_norm": 226430.9375, "learning_rate": 1.214826998766375e-06, "loss": 0.1055, "step": 29910 }, { "epoch": 0.8788110203841861, "grad_norm": 186796.890625, "learning_rate": 1.2118897961581392e-06, "loss": 0.1116, "step": 29920 }, { "epoch": 0.8791047406450097, "grad_norm": 166362.953125, "learning_rate": 1.2089525935499032e-06, "loss": 0.1219, "step": 29930 }, { "epoch": 0.8793984609058333, "grad_norm": 144186.109375, "learning_rate": 1.2060153909416672e-06, "loss": 0.1077, "step": 29940 }, { "epoch": 0.8796921811666569, "grad_norm": 223514.890625, "learning_rate": 1.2030781883334314e-06, "loss": 0.1095, "step": 29950 }, { "epoch": 0.8799859014274805, "grad_norm": 225448.484375, "learning_rate": 1.2001409857251954e-06, "loss": 0.1095, "step": 29960 }, { "epoch": 0.8802796216883041, "grad_norm": 194504.515625, "learning_rate": 1.1972037831169594e-06, "loss": 0.0959, "step": 29970 }, { "epoch": 0.8805733419491276, "grad_norm": 194019.828125, "learning_rate": 1.1942665805087236e-06, "loss": 0.1018, "step": 29980 }, { "epoch": 0.8808670622099513, "grad_norm": 235392.4375, "learning_rate": 1.1913293779004876e-06, "loss": 0.1052, "step": 29990 }, { "epoch": 0.8811607824707748, "grad_norm": 178688.1875, "learning_rate": 1.1883921752922518e-06, "loss": 0.1073, "step": 30000 }, { "epoch": 0.8814545027315984, "grad_norm": 287730.4375, "learning_rate": 1.1854549726840158e-06, "loss": 0.1196, "step": 30010 }, { "epoch": 0.8817482229924221, "grad_norm": 234346.875, "learning_rate": 1.18251777007578e-06, "loss": 0.1059, "step": 30020 }, { "epoch": 0.8820419432532456, "grad_norm": 172542.75, "learning_rate": 1.179580567467544e-06, "loss": 0.104, "step": 30030 }, { "epoch": 0.8823356635140692, "grad_norm": 258098.6875, "learning_rate": 1.1766433648593082e-06, "loss": 0.1104, "step": 30040 }, { "epoch": 0.8826293837748928, "grad_norm": 157345.015625, "learning_rate": 1.1737061622510722e-06, "loss": 0.1034, "step": 30050 }, { "epoch": 0.8829231040357164, "grad_norm": 187092.296875, "learning_rate": 1.1707689596428362e-06, "loss": 0.1047, "step": 30060 }, { "epoch": 0.88321682429654, "grad_norm": 208838.78125, "learning_rate": 1.1678317570346004e-06, "loss": 0.1238, "step": 30070 }, { "epoch": 0.8835105445573636, "grad_norm": 323236.96875, "learning_rate": 1.1648945544263644e-06, "loss": 0.107, "step": 30080 }, { "epoch": 0.8838042648181872, "grad_norm": 167006.875, "learning_rate": 1.1619573518181284e-06, "loss": 0.1022, "step": 30090 }, { "epoch": 0.8840979850790107, "grad_norm": 141526.265625, "learning_rate": 1.1590201492098926e-06, "loss": 0.1104, "step": 30100 }, { "epoch": 0.8843917053398344, "grad_norm": 144463.28125, "learning_rate": 1.1560829466016566e-06, "loss": 0.112, "step": 30110 }, { "epoch": 0.8846854256006579, "grad_norm": 293282.5, "learning_rate": 1.1531457439934208e-06, "loss": 0.105, "step": 30120 }, { "epoch": 0.8849791458614815, "grad_norm": 153171.4375, "learning_rate": 1.1502085413851848e-06, "loss": 0.1076, "step": 30130 }, { "epoch": 0.8852728661223052, "grad_norm": 260931.765625, "learning_rate": 1.147271338776949e-06, "loss": 0.105, "step": 30140 }, { "epoch": 0.8855665863831287, "grad_norm": 179499.890625, "learning_rate": 1.144334136168713e-06, "loss": 0.1085, "step": 30150 }, { "epoch": 0.8858603066439523, "grad_norm": 234585.890625, "learning_rate": 1.1413969335604772e-06, "loss": 0.1083, "step": 30160 }, { "epoch": 0.8861540269047758, "grad_norm": 228379.015625, "learning_rate": 1.1384597309522412e-06, "loss": 0.1057, "step": 30170 }, { "epoch": 0.8864477471655995, "grad_norm": 209093.265625, "learning_rate": 1.1355225283440052e-06, "loss": 0.1098, "step": 30180 }, { "epoch": 0.8867414674264231, "grad_norm": 188282.859375, "learning_rate": 1.1325853257357694e-06, "loss": 0.1092, "step": 30190 }, { "epoch": 0.8870351876872467, "grad_norm": 272489.0625, "learning_rate": 1.1296481231275334e-06, "loss": 0.1091, "step": 30200 }, { "epoch": 0.8873289079480703, "grad_norm": 178743.9375, "learning_rate": 1.1267109205192974e-06, "loss": 0.1017, "step": 30210 }, { "epoch": 0.8876226282088938, "grad_norm": 154176.484375, "learning_rate": 1.1237737179110616e-06, "loss": 0.1058, "step": 30220 }, { "epoch": 0.8879163484697175, "grad_norm": 154746.4375, "learning_rate": 1.1208365153028256e-06, "loss": 0.1131, "step": 30230 }, { "epoch": 0.888210068730541, "grad_norm": 379608.71875, "learning_rate": 1.1178993126945896e-06, "loss": 0.1075, "step": 30240 }, { "epoch": 0.8885037889913646, "grad_norm": 289364.59375, "learning_rate": 1.1149621100863538e-06, "loss": 0.1096, "step": 30250 }, { "epoch": 0.8887975092521883, "grad_norm": 203852.21875, "learning_rate": 1.1120249074781178e-06, "loss": 0.0979, "step": 30260 }, { "epoch": 0.8890912295130118, "grad_norm": 180420.40625, "learning_rate": 1.109087704869882e-06, "loss": 0.1173, "step": 30270 }, { "epoch": 0.8893849497738354, "grad_norm": 154938.03125, "learning_rate": 1.1061505022616463e-06, "loss": 0.1013, "step": 30280 }, { "epoch": 0.889678670034659, "grad_norm": 185752.828125, "learning_rate": 1.1032132996534103e-06, "loss": 0.1137, "step": 30290 }, { "epoch": 0.8899723902954826, "grad_norm": 349233.28125, "learning_rate": 1.1002760970451743e-06, "loss": 0.1128, "step": 30300 }, { "epoch": 0.8902661105563062, "grad_norm": 214000.78125, "learning_rate": 1.0973388944369385e-06, "loss": 0.112, "step": 30310 }, { "epoch": 0.8905598308171297, "grad_norm": 236679.828125, "learning_rate": 1.0944016918287025e-06, "loss": 0.1077, "step": 30320 }, { "epoch": 0.8908535510779534, "grad_norm": 237283.6875, "learning_rate": 1.0914644892204665e-06, "loss": 0.116, "step": 30330 }, { "epoch": 0.8911472713387769, "grad_norm": 289281.90625, "learning_rate": 1.0885272866122305e-06, "loss": 0.1025, "step": 30340 }, { "epoch": 0.8914409915996006, "grad_norm": 127043.8125, "learning_rate": 1.0855900840039947e-06, "loss": 0.1059, "step": 30350 }, { "epoch": 0.8917347118604241, "grad_norm": 166583.484375, "learning_rate": 1.0826528813957587e-06, "loss": 0.1006, "step": 30360 }, { "epoch": 0.8920284321212477, "grad_norm": 201240.875, "learning_rate": 1.0797156787875229e-06, "loss": 0.1069, "step": 30370 }, { "epoch": 0.8923221523820714, "grad_norm": 189538.65625, "learning_rate": 1.0767784761792869e-06, "loss": 0.1022, "step": 30380 }, { "epoch": 0.8926158726428949, "grad_norm": 198855.09375, "learning_rate": 1.073841273571051e-06, "loss": 0.1077, "step": 30390 }, { "epoch": 0.8929095929037185, "grad_norm": 183571.59375, "learning_rate": 1.070904070962815e-06, "loss": 0.1099, "step": 30400 }, { "epoch": 0.893203313164542, "grad_norm": 231892.203125, "learning_rate": 1.0679668683545793e-06, "loss": 0.102, "step": 30410 }, { "epoch": 0.8934970334253657, "grad_norm": 225336.359375, "learning_rate": 1.0650296657463433e-06, "loss": 0.1115, "step": 30420 }, { "epoch": 0.8937907536861893, "grad_norm": 150597.640625, "learning_rate": 1.0620924631381073e-06, "loss": 0.101, "step": 30430 }, { "epoch": 0.8940844739470128, "grad_norm": 292334.0, "learning_rate": 1.0591552605298715e-06, "loss": 0.1026, "step": 30440 }, { "epoch": 0.8943781942078365, "grad_norm": 199593.21875, "learning_rate": 1.0562180579216355e-06, "loss": 0.102, "step": 30450 }, { "epoch": 0.89467191446866, "grad_norm": 233461.953125, "learning_rate": 1.0532808553133995e-06, "loss": 0.112, "step": 30460 }, { "epoch": 0.8949656347294837, "grad_norm": 163784.984375, "learning_rate": 1.0503436527051637e-06, "loss": 0.1088, "step": 30470 }, { "epoch": 0.8952593549903072, "grad_norm": 236529.0, "learning_rate": 1.0474064500969277e-06, "loss": 0.11, "step": 30480 }, { "epoch": 0.8955530752511308, "grad_norm": 197680.0, "learning_rate": 1.044469247488692e-06, "loss": 0.1216, "step": 30490 }, { "epoch": 0.8958467955119545, "grad_norm": 240445.296875, "learning_rate": 1.041532044880456e-06, "loss": 0.1073, "step": 30500 }, { "epoch": 0.896140515772778, "grad_norm": 226538.0625, "learning_rate": 1.0385948422722201e-06, "loss": 0.1207, "step": 30510 }, { "epoch": 0.8964342360336016, "grad_norm": 195872.25, "learning_rate": 1.0356576396639841e-06, "loss": 0.1052, "step": 30520 }, { "epoch": 0.8967279562944251, "grad_norm": 220755.40625, "learning_rate": 1.0327204370557483e-06, "loss": 0.1073, "step": 30530 }, { "epoch": 0.8970216765552488, "grad_norm": 174512.546875, "learning_rate": 1.0297832344475123e-06, "loss": 0.1093, "step": 30540 }, { "epoch": 0.8973153968160724, "grad_norm": 578259.6875, "learning_rate": 1.0268460318392763e-06, "loss": 0.1152, "step": 30550 }, { "epoch": 0.897609117076896, "grad_norm": 255680.9375, "learning_rate": 1.0239088292310405e-06, "loss": 0.1087, "step": 30560 }, { "epoch": 0.8979028373377196, "grad_norm": 239722.921875, "learning_rate": 1.0209716266228045e-06, "loss": 0.1079, "step": 30570 }, { "epoch": 0.8981965575985431, "grad_norm": 227656.453125, "learning_rate": 1.0180344240145685e-06, "loss": 0.107, "step": 30580 }, { "epoch": 0.8984902778593667, "grad_norm": 176225.140625, "learning_rate": 1.0150972214063327e-06, "loss": 0.1074, "step": 30590 }, { "epoch": 0.8987839981201904, "grad_norm": 187623.734375, "learning_rate": 1.0121600187980967e-06, "loss": 0.1036, "step": 30600 }, { "epoch": 0.8990777183810139, "grad_norm": 369449.625, "learning_rate": 1.0092228161898607e-06, "loss": 0.1029, "step": 30610 }, { "epoch": 0.8993714386418376, "grad_norm": 237600.171875, "learning_rate": 1.006285613581625e-06, "loss": 0.1128, "step": 30620 }, { "epoch": 0.8996651589026611, "grad_norm": 246344.640625, "learning_rate": 1.003348410973389e-06, "loss": 0.1168, "step": 30630 }, { "epoch": 0.8999588791634847, "grad_norm": 154846.140625, "learning_rate": 1.0004112083651531e-06, "loss": 0.1105, "step": 30640 }, { "epoch": 0.9002525994243082, "grad_norm": 156150.953125, "learning_rate": 9.974740057569173e-07, "loss": 0.0994, "step": 30650 }, { "epoch": 0.9005463196851319, "grad_norm": 244532.109375, "learning_rate": 9.945368031486813e-07, "loss": 0.1266, "step": 30660 }, { "epoch": 0.9008400399459555, "grad_norm": 185154.265625, "learning_rate": 9.915996005404453e-07, "loss": 0.1101, "step": 30670 }, { "epoch": 0.901133760206779, "grad_norm": 168864.28125, "learning_rate": 9.886623979322095e-07, "loss": 0.1087, "step": 30680 }, { "epoch": 0.9014274804676027, "grad_norm": 190931.96875, "learning_rate": 9.857251953239735e-07, "loss": 0.1041, "step": 30690 }, { "epoch": 0.9017212007284262, "grad_norm": 187037.3125, "learning_rate": 9.827879927157375e-07, "loss": 0.0987, "step": 30700 }, { "epoch": 0.9020149209892498, "grad_norm": 139614.21875, "learning_rate": 9.798507901075018e-07, "loss": 0.1166, "step": 30710 }, { "epoch": 0.9023086412500735, "grad_norm": 239515.78125, "learning_rate": 9.769135874992657e-07, "loss": 0.1217, "step": 30720 }, { "epoch": 0.902602361510897, "grad_norm": 193703.40625, "learning_rate": 9.739763848910297e-07, "loss": 0.1101, "step": 30730 }, { "epoch": 0.9028960817717206, "grad_norm": 208664.125, "learning_rate": 9.71039182282794e-07, "loss": 0.1108, "step": 30740 }, { "epoch": 0.9031898020325442, "grad_norm": 235685.03125, "learning_rate": 9.68101979674558e-07, "loss": 0.1096, "step": 30750 }, { "epoch": 0.9034835222933678, "grad_norm": 292386.9375, "learning_rate": 9.651647770663222e-07, "loss": 0.1161, "step": 30760 }, { "epoch": 0.9037772425541913, "grad_norm": 350266.84375, "learning_rate": 9.622275744580862e-07, "loss": 0.1073, "step": 30770 }, { "epoch": 0.904070962815015, "grad_norm": 294879.8125, "learning_rate": 9.592903718498504e-07, "loss": 0.1127, "step": 30780 }, { "epoch": 0.9043646830758386, "grad_norm": 215931.296875, "learning_rate": 9.563531692416144e-07, "loss": 0.1071, "step": 30790 }, { "epoch": 0.9046584033366621, "grad_norm": 219124.453125, "learning_rate": 9.534159666333785e-07, "loss": 0.1032, "step": 30800 }, { "epoch": 0.9049521235974858, "grad_norm": 163980.0, "learning_rate": 9.504787640251426e-07, "loss": 0.1044, "step": 30810 }, { "epoch": 0.9052458438583093, "grad_norm": 176048.4375, "learning_rate": 9.475415614169066e-07, "loss": 0.1026, "step": 30820 }, { "epoch": 0.905539564119133, "grad_norm": 229829.0625, "learning_rate": 9.446043588086708e-07, "loss": 0.1154, "step": 30830 }, { "epoch": 0.9058332843799566, "grad_norm": 231529.875, "learning_rate": 9.416671562004348e-07, "loss": 0.0988, "step": 30840 }, { "epoch": 0.9061270046407801, "grad_norm": 223470.875, "learning_rate": 9.387299535921989e-07, "loss": 0.1135, "step": 30850 }, { "epoch": 0.9064207249016037, "grad_norm": 282425.9375, "learning_rate": 9.357927509839629e-07, "loss": 0.1142, "step": 30860 }, { "epoch": 0.9067144451624273, "grad_norm": 240407.859375, "learning_rate": 9.328555483757271e-07, "loss": 0.1007, "step": 30870 }, { "epoch": 0.9070081654232509, "grad_norm": 258440.890625, "learning_rate": 9.299183457674911e-07, "loss": 0.1102, "step": 30880 }, { "epoch": 0.9073018856840744, "grad_norm": 276613.6875, "learning_rate": 9.269811431592552e-07, "loss": 0.1082, "step": 30890 }, { "epoch": 0.9075956059448981, "grad_norm": 208577.0625, "learning_rate": 9.240439405510193e-07, "loss": 0.1011, "step": 30900 }, { "epoch": 0.9078893262057217, "grad_norm": 283644.84375, "learning_rate": 9.211067379427834e-07, "loss": 0.1051, "step": 30910 }, { "epoch": 0.9081830464665452, "grad_norm": 371911.4375, "learning_rate": 9.181695353345474e-07, "loss": 0.1119, "step": 30920 }, { "epoch": 0.9084767667273689, "grad_norm": 167519.84375, "learning_rate": 9.152323327263116e-07, "loss": 0.1081, "step": 30930 }, { "epoch": 0.9087704869881924, "grad_norm": 321890.53125, "learning_rate": 9.122951301180756e-07, "loss": 0.1088, "step": 30940 }, { "epoch": 0.909064207249016, "grad_norm": 261685.28125, "learning_rate": 9.093579275098396e-07, "loss": 0.0981, "step": 30950 }, { "epoch": 0.9093579275098397, "grad_norm": 197887.78125, "learning_rate": 9.064207249016038e-07, "loss": 0.1177, "step": 30960 }, { "epoch": 0.9096516477706632, "grad_norm": 202088.640625, "learning_rate": 9.034835222933679e-07, "loss": 0.111, "step": 30970 }, { "epoch": 0.9099453680314868, "grad_norm": 218941.46875, "learning_rate": 9.005463196851319e-07, "loss": 0.1101, "step": 30980 }, { "epoch": 0.9102390882923104, "grad_norm": 335938.46875, "learning_rate": 8.976091170768961e-07, "loss": 0.1095, "step": 30990 }, { "epoch": 0.910532808553134, "grad_norm": 248076.265625, "learning_rate": 8.946719144686601e-07, "loss": 0.1121, "step": 31000 }, { "epoch": 0.9108265288139575, "grad_norm": 190697.03125, "learning_rate": 8.917347118604241e-07, "loss": 0.1062, "step": 31010 }, { "epoch": 0.9111202490747812, "grad_norm": 248143.21875, "learning_rate": 8.887975092521883e-07, "loss": 0.1082, "step": 31020 }, { "epoch": 0.9114139693356048, "grad_norm": 214770.1875, "learning_rate": 8.858603066439523e-07, "loss": 0.1013, "step": 31030 }, { "epoch": 0.9117076895964283, "grad_norm": 286369.5625, "learning_rate": 8.829231040357164e-07, "loss": 0.104, "step": 31040 }, { "epoch": 0.912001409857252, "grad_norm": 165609.78125, "learning_rate": 8.799859014274806e-07, "loss": 0.1158, "step": 31050 }, { "epoch": 0.9122951301180755, "grad_norm": 225256.28125, "learning_rate": 8.770486988192446e-07, "loss": 0.1163, "step": 31060 }, { "epoch": 0.9125888503788991, "grad_norm": 127722.203125, "learning_rate": 8.741114962110086e-07, "loss": 0.092, "step": 31070 }, { "epoch": 0.9128825706397228, "grad_norm": 177179.84375, "learning_rate": 8.711742936027728e-07, "loss": 0.1047, "step": 31080 }, { "epoch": 0.9131762909005463, "grad_norm": 197157.796875, "learning_rate": 8.682370909945368e-07, "loss": 0.1123, "step": 31090 }, { "epoch": 0.9134700111613699, "grad_norm": 215888.21875, "learning_rate": 8.652998883863009e-07, "loss": 0.1119, "step": 31100 }, { "epoch": 0.9137637314221935, "grad_norm": 192655.21875, "learning_rate": 8.623626857780651e-07, "loss": 0.1034, "step": 31110 }, { "epoch": 0.9140574516830171, "grad_norm": 225764.546875, "learning_rate": 8.594254831698291e-07, "loss": 0.1017, "step": 31120 }, { "epoch": 0.9143511719438406, "grad_norm": 144555.859375, "learning_rate": 8.564882805615931e-07, "loss": 0.1108, "step": 31130 }, { "epoch": 0.9146448922046643, "grad_norm": 194178.375, "learning_rate": 8.535510779533573e-07, "loss": 0.1081, "step": 31140 }, { "epoch": 0.9149386124654879, "grad_norm": 200283.953125, "learning_rate": 8.506138753451213e-07, "loss": 0.1088, "step": 31150 }, { "epoch": 0.9152323327263114, "grad_norm": 249154.328125, "learning_rate": 8.476766727368854e-07, "loss": 0.1031, "step": 31160 }, { "epoch": 0.9155260529871351, "grad_norm": 252716.734375, "learning_rate": 8.447394701286496e-07, "loss": 0.1133, "step": 31170 }, { "epoch": 0.9158197732479586, "grad_norm": 252982.328125, "learning_rate": 8.418022675204137e-07, "loss": 0.1035, "step": 31180 }, { "epoch": 0.9161134935087822, "grad_norm": 221692.625, "learning_rate": 8.388650649121777e-07, "loss": 0.1033, "step": 31190 }, { "epoch": 0.9164072137696059, "grad_norm": 185507.09375, "learning_rate": 8.359278623039419e-07, "loss": 0.0998, "step": 31200 }, { "epoch": 0.9167009340304294, "grad_norm": 146931.453125, "learning_rate": 8.329906596957059e-07, "loss": 0.0979, "step": 31210 }, { "epoch": 0.916994654291253, "grad_norm": 253142.1875, "learning_rate": 8.3005345708747e-07, "loss": 0.1111, "step": 31220 }, { "epoch": 0.9172883745520766, "grad_norm": 244678.921875, "learning_rate": 8.271162544792341e-07, "loss": 0.0975, "step": 31230 }, { "epoch": 0.9175820948129002, "grad_norm": 176981.15625, "learning_rate": 8.241790518709982e-07, "loss": 0.1024, "step": 31240 }, { "epoch": 0.9178758150737237, "grad_norm": 193780.15625, "learning_rate": 8.212418492627622e-07, "loss": 0.1034, "step": 31250 }, { "epoch": 0.9181695353345474, "grad_norm": 310933.59375, "learning_rate": 8.183046466545264e-07, "loss": 0.1133, "step": 31260 }, { "epoch": 0.918463255595371, "grad_norm": 221911.53125, "learning_rate": 8.153674440462904e-07, "loss": 0.111, "step": 31270 }, { "epoch": 0.9187569758561945, "grad_norm": 158537.390625, "learning_rate": 8.124302414380545e-07, "loss": 0.1042, "step": 31280 }, { "epoch": 0.9190506961170182, "grad_norm": 297996.8125, "learning_rate": 8.094930388298186e-07, "loss": 0.1083, "step": 31290 }, { "epoch": 0.9193444163778417, "grad_norm": 259850.21875, "learning_rate": 8.065558362215827e-07, "loss": 0.103, "step": 31300 }, { "epoch": 0.9196381366386653, "grad_norm": 232684.96875, "learning_rate": 8.036186336133467e-07, "loss": 0.1027, "step": 31310 }, { "epoch": 0.919931856899489, "grad_norm": 166803.296875, "learning_rate": 8.006814310051109e-07, "loss": 0.1037, "step": 31320 }, { "epoch": 0.9202255771603125, "grad_norm": 215691.921875, "learning_rate": 7.977442283968749e-07, "loss": 0.0977, "step": 31330 }, { "epoch": 0.9205192974211361, "grad_norm": 261310.953125, "learning_rate": 7.94807025788639e-07, "loss": 0.1208, "step": 31340 }, { "epoch": 0.9208130176819597, "grad_norm": 163626.109375, "learning_rate": 7.91869823180403e-07, "loss": 0.0932, "step": 31350 }, { "epoch": 0.9211067379427833, "grad_norm": 212001.859375, "learning_rate": 7.889326205721672e-07, "loss": 0.1188, "step": 31360 }, { "epoch": 0.9214004582036068, "grad_norm": 135615.125, "learning_rate": 7.859954179639312e-07, "loss": 0.1087, "step": 31370 }, { "epoch": 0.9216941784644305, "grad_norm": 271894.3125, "learning_rate": 7.830582153556952e-07, "loss": 0.106, "step": 31380 }, { "epoch": 0.9219878987252541, "grad_norm": 222865.734375, "learning_rate": 7.801210127474594e-07, "loss": 0.1093, "step": 31390 }, { "epoch": 0.9222816189860776, "grad_norm": 216319.234375, "learning_rate": 7.771838101392235e-07, "loss": 0.1053, "step": 31400 }, { "epoch": 0.9225753392469013, "grad_norm": 228484.078125, "learning_rate": 7.742466075309875e-07, "loss": 0.1104, "step": 31410 }, { "epoch": 0.9228690595077248, "grad_norm": 219236.75, "learning_rate": 7.713094049227517e-07, "loss": 0.1143, "step": 31420 }, { "epoch": 0.9231627797685484, "grad_norm": 239364.953125, "learning_rate": 7.683722023145157e-07, "loss": 0.1066, "step": 31430 }, { "epoch": 0.9234565000293721, "grad_norm": 253768.875, "learning_rate": 7.654349997062797e-07, "loss": 0.0951, "step": 31440 }, { "epoch": 0.9237502202901956, "grad_norm": 169699.734375, "learning_rate": 7.624977970980439e-07, "loss": 0.1039, "step": 31450 }, { "epoch": 0.9240439405510192, "grad_norm": 208143.953125, "learning_rate": 7.595605944898079e-07, "loss": 0.1121, "step": 31460 }, { "epoch": 0.9243376608118428, "grad_norm": 710585.875, "learning_rate": 7.56623391881572e-07, "loss": 0.1154, "step": 31470 }, { "epoch": 0.9246313810726664, "grad_norm": 253480.078125, "learning_rate": 7.536861892733362e-07, "loss": 0.1035, "step": 31480 }, { "epoch": 0.9249251013334899, "grad_norm": 191526.65625, "learning_rate": 7.507489866651002e-07, "loss": 0.1144, "step": 31490 }, { "epoch": 0.9252188215943136, "grad_norm": 265941.15625, "learning_rate": 7.478117840568642e-07, "loss": 0.1094, "step": 31500 }, { "epoch": 0.9255125418551372, "grad_norm": 378189.0, "learning_rate": 7.448745814486284e-07, "loss": 0.112, "step": 31510 }, { "epoch": 0.9258062621159607, "grad_norm": 265851.28125, "learning_rate": 7.419373788403924e-07, "loss": 0.112, "step": 31520 }, { "epoch": 0.9260999823767844, "grad_norm": 169582.59375, "learning_rate": 7.390001762321565e-07, "loss": 0.0927, "step": 31530 }, { "epoch": 0.9263937026376079, "grad_norm": 187023.984375, "learning_rate": 7.360629736239206e-07, "loss": 0.1171, "step": 31540 }, { "epoch": 0.9266874228984315, "grad_norm": 195870.875, "learning_rate": 7.331257710156847e-07, "loss": 0.1035, "step": 31550 }, { "epoch": 0.9269811431592552, "grad_norm": 242149.09375, "learning_rate": 7.301885684074487e-07, "loss": 0.1094, "step": 31560 }, { "epoch": 0.9272748634200787, "grad_norm": 306258.25, "learning_rate": 7.272513657992129e-07, "loss": 0.1138, "step": 31570 }, { "epoch": 0.9275685836809023, "grad_norm": 325606.375, "learning_rate": 7.243141631909769e-07, "loss": 0.1113, "step": 31580 }, { "epoch": 0.9278623039417259, "grad_norm": 243928.046875, "learning_rate": 7.21376960582741e-07, "loss": 0.0913, "step": 31590 }, { "epoch": 0.9281560242025495, "grad_norm": 260561.5625, "learning_rate": 7.184397579745051e-07, "loss": 0.1046, "step": 31600 }, { "epoch": 0.9284497444633731, "grad_norm": 154495.546875, "learning_rate": 7.155025553662692e-07, "loss": 0.1107, "step": 31610 }, { "epoch": 0.9287434647241967, "grad_norm": 419437.75, "learning_rate": 7.125653527580332e-07, "loss": 0.1071, "step": 31620 }, { "epoch": 0.9290371849850203, "grad_norm": 305030.96875, "learning_rate": 7.096281501497975e-07, "loss": 0.1084, "step": 31630 }, { "epoch": 0.9293309052458438, "grad_norm": 221663.84375, "learning_rate": 7.066909475415615e-07, "loss": 0.1018, "step": 31640 }, { "epoch": 0.9296246255066675, "grad_norm": 344650.8125, "learning_rate": 7.037537449333256e-07, "loss": 0.0978, "step": 31650 }, { "epoch": 0.929918345767491, "grad_norm": 195080.53125, "learning_rate": 7.008165423250897e-07, "loss": 0.112, "step": 31660 }, { "epoch": 0.9302120660283146, "grad_norm": 252234.6875, "learning_rate": 6.978793397168538e-07, "loss": 0.1145, "step": 31670 }, { "epoch": 0.9305057862891383, "grad_norm": 187894.0, "learning_rate": 6.949421371086178e-07, "loss": 0.1027, "step": 31680 }, { "epoch": 0.9307995065499618, "grad_norm": 182597.28125, "learning_rate": 6.92004934500382e-07, "loss": 0.1096, "step": 31690 }, { "epoch": 0.9310932268107854, "grad_norm": 172854.6875, "learning_rate": 6.89067731892146e-07, "loss": 0.1029, "step": 31700 }, { "epoch": 0.931386947071609, "grad_norm": 160632.265625, "learning_rate": 6.861305292839101e-07, "loss": 0.1, "step": 31710 }, { "epoch": 0.9316806673324326, "grad_norm": 178080.984375, "learning_rate": 6.831933266756742e-07, "loss": 0.0936, "step": 31720 }, { "epoch": 0.9319743875932562, "grad_norm": 255551.609375, "learning_rate": 6.802561240674383e-07, "loss": 0.1, "step": 31730 }, { "epoch": 0.9322681078540798, "grad_norm": 213992.140625, "learning_rate": 6.773189214592023e-07, "loss": 0.1025, "step": 31740 }, { "epoch": 0.9325618281149034, "grad_norm": 196835.375, "learning_rate": 6.743817188509665e-07, "loss": 0.1072, "step": 31750 }, { "epoch": 0.9328555483757269, "grad_norm": 333363.75, "learning_rate": 6.714445162427305e-07, "loss": 0.1123, "step": 31760 }, { "epoch": 0.9331492686365506, "grad_norm": 227404.625, "learning_rate": 6.685073136344946e-07, "loss": 0.1162, "step": 31770 }, { "epoch": 0.9334429888973741, "grad_norm": 167229.203125, "learning_rate": 6.655701110262587e-07, "loss": 0.1097, "step": 31780 }, { "epoch": 0.9337367091581977, "grad_norm": 129989.921875, "learning_rate": 6.626329084180228e-07, "loss": 0.0961, "step": 31790 }, { "epoch": 0.9340304294190214, "grad_norm": 188959.796875, "learning_rate": 6.596957058097868e-07, "loss": 0.098, "step": 31800 }, { "epoch": 0.9343241496798449, "grad_norm": 183996.15625, "learning_rate": 6.56758503201551e-07, "loss": 0.1037, "step": 31810 }, { "epoch": 0.9346178699406685, "grad_norm": 171346.84375, "learning_rate": 6.53821300593315e-07, "loss": 0.1084, "step": 31820 }, { "epoch": 0.9349115902014921, "grad_norm": 211266.125, "learning_rate": 6.50884097985079e-07, "loss": 0.1011, "step": 31830 }, { "epoch": 0.9352053104623157, "grad_norm": 223795.140625, "learning_rate": 6.479468953768432e-07, "loss": 0.1112, "step": 31840 }, { "epoch": 0.9354990307231393, "grad_norm": 176452.46875, "learning_rate": 6.450096927686073e-07, "loss": 0.1091, "step": 31850 }, { "epoch": 0.9357927509839629, "grad_norm": 146030.890625, "learning_rate": 6.420724901603713e-07, "loss": 0.0961, "step": 31860 }, { "epoch": 0.9360864712447865, "grad_norm": 357883.15625, "learning_rate": 6.391352875521353e-07, "loss": 0.106, "step": 31870 }, { "epoch": 0.93638019150561, "grad_norm": 199097.421875, "learning_rate": 6.361980849438995e-07, "loss": 0.1094, "step": 31880 }, { "epoch": 0.9366739117664337, "grad_norm": 237670.078125, "learning_rate": 6.332608823356635e-07, "loss": 0.109, "step": 31890 }, { "epoch": 0.9369676320272572, "grad_norm": 124938.53125, "learning_rate": 6.303236797274276e-07, "loss": 0.1063, "step": 31900 }, { "epoch": 0.9372613522880808, "grad_norm": 161646.4375, "learning_rate": 6.273864771191917e-07, "loss": 0.1103, "step": 31910 }, { "epoch": 0.9375550725489045, "grad_norm": 241741.28125, "learning_rate": 6.244492745109558e-07, "loss": 0.1068, "step": 31920 }, { "epoch": 0.937848792809728, "grad_norm": 184621.671875, "learning_rate": 6.215120719027199e-07, "loss": 0.0981, "step": 31930 }, { "epoch": 0.9381425130705516, "grad_norm": 421708.21875, "learning_rate": 6.18574869294484e-07, "loss": 0.1011, "step": 31940 }, { "epoch": 0.9384362333313752, "grad_norm": 193987.890625, "learning_rate": 6.15637666686248e-07, "loss": 0.1107, "step": 31950 }, { "epoch": 0.9387299535921988, "grad_norm": 353717.8125, "learning_rate": 6.127004640780121e-07, "loss": 0.1085, "step": 31960 }, { "epoch": 0.9390236738530224, "grad_norm": 281090.625, "learning_rate": 6.097632614697762e-07, "loss": 0.1015, "step": 31970 }, { "epoch": 0.939317394113846, "grad_norm": 202872.59375, "learning_rate": 6.068260588615403e-07, "loss": 0.1104, "step": 31980 }, { "epoch": 0.9396111143746696, "grad_norm": 250266.953125, "learning_rate": 6.038888562533044e-07, "loss": 0.113, "step": 31990 }, { "epoch": 0.9399048346354931, "grad_norm": 179762.796875, "learning_rate": 6.009516536450684e-07, "loss": 0.1122, "step": 32000 }, { "epoch": 0.9401985548963168, "grad_norm": 165452.359375, "learning_rate": 5.980144510368325e-07, "loss": 0.0972, "step": 32010 }, { "epoch": 0.9404922751571403, "grad_norm": 237929.53125, "learning_rate": 5.950772484285966e-07, "loss": 0.1061, "step": 32020 }, { "epoch": 0.9407859954179639, "grad_norm": 200812.484375, "learning_rate": 5.921400458203607e-07, "loss": 0.1038, "step": 32030 }, { "epoch": 0.9410797156787876, "grad_norm": 282003.78125, "learning_rate": 5.892028432121248e-07, "loss": 0.1073, "step": 32040 }, { "epoch": 0.9413734359396111, "grad_norm": 211212.171875, "learning_rate": 5.862656406038889e-07, "loss": 0.1097, "step": 32050 }, { "epoch": 0.9416671562004347, "grad_norm": 174662.359375, "learning_rate": 5.833284379956529e-07, "loss": 0.1071, "step": 32060 }, { "epoch": 0.9419608764612583, "grad_norm": 239921.515625, "learning_rate": 5.80391235387417e-07, "loss": 0.0892, "step": 32070 }, { "epoch": 0.9422545967220819, "grad_norm": 184081.71875, "learning_rate": 5.774540327791811e-07, "loss": 0.1072, "step": 32080 }, { "epoch": 0.9425483169829055, "grad_norm": 195370.75, "learning_rate": 5.745168301709453e-07, "loss": 0.1097, "step": 32090 }, { "epoch": 0.9428420372437291, "grad_norm": 181489.4375, "learning_rate": 5.715796275627094e-07, "loss": 0.1191, "step": 32100 }, { "epoch": 0.9431357575045527, "grad_norm": 205051.59375, "learning_rate": 5.686424249544735e-07, "loss": 0.1066, "step": 32110 }, { "epoch": 0.9434294777653762, "grad_norm": 190375.234375, "learning_rate": 5.657052223462375e-07, "loss": 0.1179, "step": 32120 }, { "epoch": 0.9437231980261999, "grad_norm": 295989.78125, "learning_rate": 5.627680197380016e-07, "loss": 0.1081, "step": 32130 }, { "epoch": 0.9440169182870234, "grad_norm": 232592.140625, "learning_rate": 5.598308171297657e-07, "loss": 0.1033, "step": 32140 }, { "epoch": 0.944310638547847, "grad_norm": 203624.171875, "learning_rate": 5.568936145215298e-07, "loss": 0.1152, "step": 32150 }, { "epoch": 0.9446043588086707, "grad_norm": 259397.21875, "learning_rate": 5.539564119132939e-07, "loss": 0.105, "step": 32160 }, { "epoch": 0.9448980790694942, "grad_norm": 257231.890625, "learning_rate": 5.51019209305058e-07, "loss": 0.1129, "step": 32170 }, { "epoch": 0.9451917993303178, "grad_norm": 163911.984375, "learning_rate": 5.48082006696822e-07, "loss": 0.1008, "step": 32180 }, { "epoch": 0.9454855195911414, "grad_norm": 178127.015625, "learning_rate": 5.451448040885861e-07, "loss": 0.1056, "step": 32190 }, { "epoch": 0.945779239851965, "grad_norm": 332964.78125, "learning_rate": 5.422076014803502e-07, "loss": 0.1144, "step": 32200 }, { "epoch": 0.9460729601127886, "grad_norm": 208783.015625, "learning_rate": 5.392703988721143e-07, "loss": 0.0982, "step": 32210 }, { "epoch": 0.9463666803736122, "grad_norm": 198685.671875, "learning_rate": 5.363331962638784e-07, "loss": 0.1096, "step": 32220 }, { "epoch": 0.9466604006344358, "grad_norm": 165793.234375, "learning_rate": 5.333959936556425e-07, "loss": 0.1089, "step": 32230 }, { "epoch": 0.9469541208952593, "grad_norm": 171279.34375, "learning_rate": 5.304587910474065e-07, "loss": 0.1067, "step": 32240 }, { "epoch": 0.947247841156083, "grad_norm": 169712.109375, "learning_rate": 5.275215884391706e-07, "loss": 0.1027, "step": 32250 }, { "epoch": 0.9475415614169065, "grad_norm": 186874.109375, "learning_rate": 5.245843858309346e-07, "loss": 0.0963, "step": 32260 }, { "epoch": 0.9478352816777301, "grad_norm": 196027.375, "learning_rate": 5.216471832226987e-07, "loss": 0.1035, "step": 32270 }, { "epoch": 0.9481290019385538, "grad_norm": 218183.90625, "learning_rate": 5.187099806144628e-07, "loss": 0.1163, "step": 32280 }, { "epoch": 0.9484227221993773, "grad_norm": 204202.65625, "learning_rate": 5.157727780062269e-07, "loss": 0.1013, "step": 32290 }, { "epoch": 0.9487164424602009, "grad_norm": 188785.625, "learning_rate": 5.12835575397991e-07, "loss": 0.0933, "step": 32300 }, { "epoch": 0.9490101627210245, "grad_norm": 206233.25, "learning_rate": 5.098983727897551e-07, "loss": 0.1052, "step": 32310 }, { "epoch": 0.9493038829818481, "grad_norm": 241111.578125, "learning_rate": 5.069611701815191e-07, "loss": 0.0927, "step": 32320 }, { "epoch": 0.9495976032426717, "grad_norm": 132312.8125, "learning_rate": 5.040239675732832e-07, "loss": 0.104, "step": 32330 }, { "epoch": 0.9498913235034953, "grad_norm": 249602.3125, "learning_rate": 5.010867649650473e-07, "loss": 0.0987, "step": 32340 }, { "epoch": 0.9501850437643189, "grad_norm": 177563.828125, "learning_rate": 4.981495623568114e-07, "loss": 0.0972, "step": 32350 }, { "epoch": 0.9504787640251424, "grad_norm": 204344.734375, "learning_rate": 4.952123597485755e-07, "loss": 0.1067, "step": 32360 }, { "epoch": 0.9507724842859661, "grad_norm": 219151.515625, "learning_rate": 4.922751571403396e-07, "loss": 0.1152, "step": 32370 }, { "epoch": 0.9510662045467896, "grad_norm": 187519.921875, "learning_rate": 4.893379545321036e-07, "loss": 0.0947, "step": 32380 }, { "epoch": 0.9513599248076132, "grad_norm": 159222.359375, "learning_rate": 4.864007519238677e-07, "loss": 0.0928, "step": 32390 }, { "epoch": 0.9516536450684369, "grad_norm": 228226.625, "learning_rate": 4.834635493156318e-07, "loss": 0.0988, "step": 32400 }, { "epoch": 0.9519473653292604, "grad_norm": 275595.3125, "learning_rate": 4.805263467073959e-07, "loss": 0.1133, "step": 32410 }, { "epoch": 0.952241085590084, "grad_norm": 175004.265625, "learning_rate": 4.7758914409916e-07, "loss": 0.0998, "step": 32420 }, { "epoch": 0.9525348058509076, "grad_norm": 250343.40625, "learning_rate": 4.7465194149092413e-07, "loss": 0.0906, "step": 32430 }, { "epoch": 0.9528285261117312, "grad_norm": 158198.0, "learning_rate": 4.717147388826881e-07, "loss": 0.1006, "step": 32440 }, { "epoch": 0.9531222463725548, "grad_norm": 230795.390625, "learning_rate": 4.687775362744523e-07, "loss": 0.0949, "step": 32450 }, { "epoch": 0.9534159666333784, "grad_norm": 152086.703125, "learning_rate": 4.658403336662164e-07, "loss": 0.0875, "step": 32460 }, { "epoch": 0.953709686894202, "grad_norm": 159533.375, "learning_rate": 4.629031310579804e-07, "loss": 0.104, "step": 32470 }, { "epoch": 0.9540034071550255, "grad_norm": 179286.390625, "learning_rate": 4.599659284497445e-07, "loss": 0.1118, "step": 32480 }, { "epoch": 0.9542971274158492, "grad_norm": 266295.28125, "learning_rate": 4.5702872584150864e-07, "loss": 0.1117, "step": 32490 }, { "epoch": 0.9545908476766727, "grad_norm": 154850.21875, "learning_rate": 4.5409152323327264e-07, "loss": 0.0946, "step": 32500 }, { "epoch": 0.9548845679374963, "grad_norm": 215472.265625, "learning_rate": 4.5115432062503674e-07, "loss": 0.1005, "step": 32510 }, { "epoch": 0.95517828819832, "grad_norm": 261068.1875, "learning_rate": 4.482171180168008e-07, "loss": 0.1025, "step": 32520 }, { "epoch": 0.9554720084591435, "grad_norm": 216821.265625, "learning_rate": 4.452799154085649e-07, "loss": 0.0994, "step": 32530 }, { "epoch": 0.9557657287199671, "grad_norm": 196158.09375, "learning_rate": 4.42342712800329e-07, "loss": 0.1058, "step": 32540 }, { "epoch": 0.9560594489807906, "grad_norm": 170745.015625, "learning_rate": 4.3940551019209305e-07, "loss": 0.1031, "step": 32550 }, { "epoch": 0.9563531692416143, "grad_norm": 227021.96875, "learning_rate": 4.3646830758385715e-07, "loss": 0.1, "step": 32560 }, { "epoch": 0.9566468895024379, "grad_norm": 184391.921875, "learning_rate": 4.3353110497562126e-07, "loss": 0.1018, "step": 32570 }, { "epoch": 0.9569406097632615, "grad_norm": 241544.53125, "learning_rate": 4.305939023673853e-07, "loss": 0.1089, "step": 32580 }, { "epoch": 0.9572343300240851, "grad_norm": 180414.671875, "learning_rate": 4.276566997591494e-07, "loss": 0.0899, "step": 32590 }, { "epoch": 0.9575280502849086, "grad_norm": 263073.6875, "learning_rate": 4.247194971509135e-07, "loss": 0.1072, "step": 32600 }, { "epoch": 0.9578217705457323, "grad_norm": 248049.03125, "learning_rate": 4.2178229454267756e-07, "loss": 0.1065, "step": 32610 }, { "epoch": 0.9581154908065559, "grad_norm": 234650.375, "learning_rate": 4.1884509193444167e-07, "loss": 0.1086, "step": 32620 }, { "epoch": 0.9584092110673794, "grad_norm": 172111.40625, "learning_rate": 4.1590788932620577e-07, "loss": 0.1149, "step": 32630 }, { "epoch": 0.9587029313282031, "grad_norm": 154759.140625, "learning_rate": 4.129706867179698e-07, "loss": 0.0984, "step": 32640 }, { "epoch": 0.9589966515890266, "grad_norm": 210516.578125, "learning_rate": 4.100334841097339e-07, "loss": 0.1051, "step": 32650 }, { "epoch": 0.9592903718498502, "grad_norm": 225150.140625, "learning_rate": 4.0709628150149803e-07, "loss": 0.0988, "step": 32660 }, { "epoch": 0.9595840921106737, "grad_norm": 242676.921875, "learning_rate": 4.041590788932621e-07, "loss": 0.1087, "step": 32670 }, { "epoch": 0.9598778123714974, "grad_norm": 152935.09375, "learning_rate": 4.012218762850262e-07, "loss": 0.1007, "step": 32680 }, { "epoch": 0.960171532632321, "grad_norm": 222827.09375, "learning_rate": 3.982846736767903e-07, "loss": 0.1221, "step": 32690 }, { "epoch": 0.9604652528931445, "grad_norm": 211663.40625, "learning_rate": 3.9534747106855433e-07, "loss": 0.1079, "step": 32700 }, { "epoch": 0.9607589731539682, "grad_norm": 168491.515625, "learning_rate": 3.9241026846031844e-07, "loss": 0.105, "step": 32710 }, { "epoch": 0.9610526934147917, "grad_norm": 192430.0625, "learning_rate": 3.8947306585208254e-07, "loss": 0.1007, "step": 32720 }, { "epoch": 0.9613464136756154, "grad_norm": 169525.515625, "learning_rate": 3.865358632438466e-07, "loss": 0.1118, "step": 32730 }, { "epoch": 0.961640133936439, "grad_norm": 215391.84375, "learning_rate": 3.835986606356107e-07, "loss": 0.0992, "step": 32740 }, { "epoch": 0.9619338541972625, "grad_norm": 190734.828125, "learning_rate": 3.8066145802737475e-07, "loss": 0.1043, "step": 32750 }, { "epoch": 0.9622275744580862, "grad_norm": 193281.203125, "learning_rate": 3.7772425541913885e-07, "loss": 0.0974, "step": 32760 }, { "epoch": 0.9625212947189097, "grad_norm": 228398.8125, "learning_rate": 3.7478705281090295e-07, "loss": 0.1013, "step": 32770 }, { "epoch": 0.9628150149797333, "grad_norm": 322533.0, "learning_rate": 3.71849850202667e-07, "loss": 0.0972, "step": 32780 }, { "epoch": 0.9631087352405568, "grad_norm": 302921.15625, "learning_rate": 3.689126475944311e-07, "loss": 0.1158, "step": 32790 }, { "epoch": 0.9634024555013805, "grad_norm": 224529.53125, "learning_rate": 3.659754449861952e-07, "loss": 0.0926, "step": 32800 }, { "epoch": 0.9636961757622041, "grad_norm": 169848.6875, "learning_rate": 3.630382423779592e-07, "loss": 0.0967, "step": 32810 }, { "epoch": 0.9639898960230276, "grad_norm": 173673.015625, "learning_rate": 3.6010103976972336e-07, "loss": 0.0982, "step": 32820 }, { "epoch": 0.9642836162838513, "grad_norm": 190520.328125, "learning_rate": 3.5716383716148747e-07, "loss": 0.1007, "step": 32830 }, { "epoch": 0.9645773365446748, "grad_norm": 212232.296875, "learning_rate": 3.5422663455325146e-07, "loss": 0.0948, "step": 32840 }, { "epoch": 0.9648710568054985, "grad_norm": 238677.140625, "learning_rate": 3.512894319450156e-07, "loss": 0.1039, "step": 32850 }, { "epoch": 0.9651647770663221, "grad_norm": 274947.96875, "learning_rate": 3.483522293367797e-07, "loss": 0.1148, "step": 32860 }, { "epoch": 0.9654584973271456, "grad_norm": 192057.09375, "learning_rate": 3.454150267285437e-07, "loss": 0.0927, "step": 32870 }, { "epoch": 0.9657522175879693, "grad_norm": 230316.140625, "learning_rate": 3.424778241203078e-07, "loss": 0.1036, "step": 32880 }, { "epoch": 0.9660459378487928, "grad_norm": 240906.921875, "learning_rate": 3.39540621512072e-07, "loss": 0.1054, "step": 32890 }, { "epoch": 0.9663396581096164, "grad_norm": 232282.65625, "learning_rate": 3.36603418903836e-07, "loss": 0.1025, "step": 32900 }, { "epoch": 0.9666333783704399, "grad_norm": 409313.25, "learning_rate": 3.336662162956001e-07, "loss": 0.1017, "step": 32910 }, { "epoch": 0.9669270986312636, "grad_norm": 199234.53125, "learning_rate": 3.307290136873642e-07, "loss": 0.1021, "step": 32920 }, { "epoch": 0.9672208188920872, "grad_norm": 225653.84375, "learning_rate": 3.2779181107912823e-07, "loss": 0.1106, "step": 32930 }, { "epoch": 0.9675145391529107, "grad_norm": 209870.796875, "learning_rate": 3.2485460847089234e-07, "loss": 0.1041, "step": 32940 }, { "epoch": 0.9678082594137344, "grad_norm": 189534.671875, "learning_rate": 3.2191740586265644e-07, "loss": 0.1089, "step": 32950 }, { "epoch": 0.9681019796745579, "grad_norm": 196876.625, "learning_rate": 3.189802032544205e-07, "loss": 0.1053, "step": 32960 }, { "epoch": 0.9683956999353815, "grad_norm": 216982.46875, "learning_rate": 3.160430006461846e-07, "loss": 0.1077, "step": 32970 }, { "epoch": 0.9686894201962052, "grad_norm": 276085.65625, "learning_rate": 3.131057980379487e-07, "loss": 0.0938, "step": 32980 }, { "epoch": 0.9689831404570287, "grad_norm": 178878.78125, "learning_rate": 3.1016859542971275e-07, "loss": 0.107, "step": 32990 }, { "epoch": 0.9692768607178524, "grad_norm": 171031.015625, "learning_rate": 3.0723139282147685e-07, "loss": 0.0995, "step": 33000 }, { "epoch": 0.9695705809786759, "grad_norm": 172628.0625, "learning_rate": 3.0429419021324095e-07, "loss": 0.1006, "step": 33010 }, { "epoch": 0.9698643012394995, "grad_norm": 228133.515625, "learning_rate": 3.01356987605005e-07, "loss": 0.1084, "step": 33020 }, { "epoch": 0.970158021500323, "grad_norm": 281896.3125, "learning_rate": 2.984197849967691e-07, "loss": 0.0998, "step": 33030 }, { "epoch": 0.9704517417611467, "grad_norm": 283343.0, "learning_rate": 2.954825823885332e-07, "loss": 0.1014, "step": 33040 }, { "epoch": 0.9707454620219703, "grad_norm": 205182.46875, "learning_rate": 2.9254537978029726e-07, "loss": 0.1001, "step": 33050 }, { "epoch": 0.9710391822827938, "grad_norm": 181874.0625, "learning_rate": 2.8960817717206136e-07, "loss": 0.1027, "step": 33060 }, { "epoch": 0.9713329025436175, "grad_norm": 260625.96875, "learning_rate": 2.8667097456382547e-07, "loss": 0.1039, "step": 33070 }, { "epoch": 0.971626622804441, "grad_norm": 211794.53125, "learning_rate": 2.837337719555895e-07, "loss": 0.1081, "step": 33080 }, { "epoch": 0.9719203430652646, "grad_norm": 210478.578125, "learning_rate": 2.8079656934735357e-07, "loss": 0.1043, "step": 33090 }, { "epoch": 0.9722140633260883, "grad_norm": 283088.28125, "learning_rate": 2.7785936673911767e-07, "loss": 0.1071, "step": 33100 }, { "epoch": 0.9725077835869118, "grad_norm": 206048.578125, "learning_rate": 2.749221641308818e-07, "loss": 0.0932, "step": 33110 }, { "epoch": 0.9728015038477354, "grad_norm": 226760.734375, "learning_rate": 2.719849615226458e-07, "loss": 0.0997, "step": 33120 }, { "epoch": 0.973095224108559, "grad_norm": 216545.171875, "learning_rate": 2.6904775891440993e-07, "loss": 0.1068, "step": 33130 }, { "epoch": 0.9733889443693826, "grad_norm": 187308.359375, "learning_rate": 2.6611055630617403e-07, "loss": 0.1042, "step": 33140 }, { "epoch": 0.9736826646302061, "grad_norm": 224180.96875, "learning_rate": 2.631733536979381e-07, "loss": 0.1082, "step": 33150 }, { "epoch": 0.9739763848910298, "grad_norm": 265727.25, "learning_rate": 2.602361510897022e-07, "loss": 0.1024, "step": 33160 }, { "epoch": 0.9742701051518534, "grad_norm": 180460.75, "learning_rate": 2.572989484814663e-07, "loss": 0.1158, "step": 33170 }, { "epoch": 0.9745638254126769, "grad_norm": 204853.578125, "learning_rate": 2.5436174587323034e-07, "loss": 0.1043, "step": 33180 }, { "epoch": 0.9748575456735006, "grad_norm": 199292.625, "learning_rate": 2.5142454326499444e-07, "loss": 0.1075, "step": 33190 }, { "epoch": 0.9751512659343241, "grad_norm": 188737.65625, "learning_rate": 2.4848734065675855e-07, "loss": 0.1003, "step": 33200 }, { "epoch": 0.9754449861951477, "grad_norm": 159166.03125, "learning_rate": 2.455501380485226e-07, "loss": 0.0997, "step": 33210 }, { "epoch": 0.9757387064559714, "grad_norm": 197897.71875, "learning_rate": 2.426129354402867e-07, "loss": 0.1106, "step": 33220 }, { "epoch": 0.9760324267167949, "grad_norm": 273173.40625, "learning_rate": 2.396757328320508e-07, "loss": 0.1045, "step": 33230 }, { "epoch": 0.9763261469776185, "grad_norm": 237938.5625, "learning_rate": 2.3673853022381485e-07, "loss": 0.113, "step": 33240 }, { "epoch": 0.9766198672384421, "grad_norm": 217068.828125, "learning_rate": 2.3380132761557893e-07, "loss": 0.1131, "step": 33250 }, { "epoch": 0.9769135874992657, "grad_norm": 207630.984375, "learning_rate": 2.3086412500734303e-07, "loss": 0.0898, "step": 33260 }, { "epoch": 0.9772073077600892, "grad_norm": 373682.625, "learning_rate": 2.279269223991071e-07, "loss": 0.1109, "step": 33270 }, { "epoch": 0.9775010280209129, "grad_norm": 186703.109375, "learning_rate": 2.249897197908712e-07, "loss": 0.1069, "step": 33280 }, { "epoch": 0.9777947482817365, "grad_norm": 226360.1875, "learning_rate": 2.220525171826353e-07, "loss": 0.1036, "step": 33290 }, { "epoch": 0.97808846854256, "grad_norm": 359631.4375, "learning_rate": 2.1911531457439937e-07, "loss": 0.1054, "step": 33300 }, { "epoch": 0.9783821888033837, "grad_norm": 172875.984375, "learning_rate": 2.1617811196616344e-07, "loss": 0.0881, "step": 33310 }, { "epoch": 0.9786759090642072, "grad_norm": 196348.0, "learning_rate": 2.1324090935792752e-07, "loss": 0.1036, "step": 33320 }, { "epoch": 0.9789696293250308, "grad_norm": 199160.96875, "learning_rate": 2.1030370674969162e-07, "loss": 0.1012, "step": 33330 }, { "epoch": 0.9792633495858545, "grad_norm": 284381.65625, "learning_rate": 2.073665041414557e-07, "loss": 0.1072, "step": 33340 }, { "epoch": 0.979557069846678, "grad_norm": 175920.203125, "learning_rate": 2.0442930153321975e-07, "loss": 0.1125, "step": 33350 }, { "epoch": 0.9798507901075016, "grad_norm": 232810.546875, "learning_rate": 2.0149209892498388e-07, "loss": 0.1147, "step": 33360 }, { "epoch": 0.9801445103683252, "grad_norm": 201325.84375, "learning_rate": 1.9855489631674796e-07, "loss": 0.0956, "step": 33370 }, { "epoch": 0.9804382306291488, "grad_norm": 184511.515625, "learning_rate": 1.95617693708512e-07, "loss": 0.0982, "step": 33380 }, { "epoch": 0.9807319508899723, "grad_norm": 282978.9375, "learning_rate": 1.9268049110027614e-07, "loss": 0.1101, "step": 33390 }, { "epoch": 0.981025671150796, "grad_norm": 215749.4375, "learning_rate": 1.897432884920402e-07, "loss": 0.1161, "step": 33400 }, { "epoch": 0.9813193914116196, "grad_norm": 250598.796875, "learning_rate": 1.8680608588380426e-07, "loss": 0.1117, "step": 33410 }, { "epoch": 0.9816131116724431, "grad_norm": 179392.65625, "learning_rate": 1.8386888327556837e-07, "loss": 0.0962, "step": 33420 }, { "epoch": 0.9819068319332668, "grad_norm": 202555.828125, "learning_rate": 1.8093168066733244e-07, "loss": 0.1171, "step": 33430 }, { "epoch": 0.9822005521940903, "grad_norm": 321450.15625, "learning_rate": 1.7799447805909652e-07, "loss": 0.101, "step": 33440 }, { "epoch": 0.9824942724549139, "grad_norm": 168368.796875, "learning_rate": 1.750572754508606e-07, "loss": 0.1081, "step": 33450 }, { "epoch": 0.9827879927157376, "grad_norm": 246603.140625, "learning_rate": 1.721200728426247e-07, "loss": 0.1011, "step": 33460 }, { "epoch": 0.9830817129765611, "grad_norm": 296334.53125, "learning_rate": 1.6918287023438878e-07, "loss": 0.0984, "step": 33470 }, { "epoch": 0.9833754332373847, "grad_norm": 213755.40625, "learning_rate": 1.6624566762615286e-07, "loss": 0.0975, "step": 33480 }, { "epoch": 0.9836691534982083, "grad_norm": 234598.125, "learning_rate": 1.6330846501791696e-07, "loss": 0.096, "step": 33490 }, { "epoch": 0.9839628737590319, "grad_norm": 227136.796875, "learning_rate": 1.6037126240968104e-07, "loss": 0.1047, "step": 33500 }, { "epoch": 0.9842565940198554, "grad_norm": 215299.328125, "learning_rate": 1.574340598014451e-07, "loss": 0.1048, "step": 33510 }, { "epoch": 0.9845503142806791, "grad_norm": 163650.609375, "learning_rate": 1.544968571932092e-07, "loss": 0.0928, "step": 33520 }, { "epoch": 0.9848440345415027, "grad_norm": 201340.4375, "learning_rate": 1.515596545849733e-07, "loss": 0.102, "step": 33530 }, { "epoch": 0.9851377548023262, "grad_norm": 184692.265625, "learning_rate": 1.4862245197673737e-07, "loss": 0.0898, "step": 33540 }, { "epoch": 0.9854314750631499, "grad_norm": 190677.9375, "learning_rate": 1.4568524936850145e-07, "loss": 0.1095, "step": 33550 }, { "epoch": 0.9857251953239734, "grad_norm": 265387.46875, "learning_rate": 1.4274804676026555e-07, "loss": 0.1051, "step": 33560 }, { "epoch": 0.986018915584797, "grad_norm": 171414.9375, "learning_rate": 1.398108441520296e-07, "loss": 0.0976, "step": 33570 }, { "epoch": 0.9863126358456207, "grad_norm": 180341.171875, "learning_rate": 1.368736415437937e-07, "loss": 0.101, "step": 33580 }, { "epoch": 0.9866063561064442, "grad_norm": 160201.828125, "learning_rate": 1.3393643893555778e-07, "loss": 0.1054, "step": 33590 }, { "epoch": 0.9869000763672678, "grad_norm": 213433.40625, "learning_rate": 1.3099923632732186e-07, "loss": 0.0962, "step": 33600 }, { "epoch": 0.9871937966280914, "grad_norm": 264138.71875, "learning_rate": 1.2806203371908596e-07, "loss": 0.1028, "step": 33610 }, { "epoch": 0.987487516888915, "grad_norm": 178577.28125, "learning_rate": 1.2512483111085004e-07, "loss": 0.1015, "step": 33620 }, { "epoch": 0.9877812371497385, "grad_norm": 437110.4375, "learning_rate": 1.2218762850261411e-07, "loss": 0.1115, "step": 33630 }, { "epoch": 0.9880749574105622, "grad_norm": 207703.046875, "learning_rate": 1.1925042589437822e-07, "loss": 0.1114, "step": 33640 }, { "epoch": 0.9883686776713858, "grad_norm": 176473.984375, "learning_rate": 1.1631322328614228e-07, "loss": 0.1042, "step": 33650 }, { "epoch": 0.9886623979322093, "grad_norm": 235263.84375, "learning_rate": 1.1337602067790637e-07, "loss": 0.1101, "step": 33660 }, { "epoch": 0.988956118193033, "grad_norm": 233040.546875, "learning_rate": 1.1043881806967046e-07, "loss": 0.092, "step": 33670 }, { "epoch": 0.9892498384538565, "grad_norm": 187564.921875, "learning_rate": 1.0750161546143454e-07, "loss": 0.1017, "step": 33680 }, { "epoch": 0.9895435587146801, "grad_norm": 257444.1875, "learning_rate": 1.0456441285319863e-07, "loss": 0.1036, "step": 33690 }, { "epoch": 0.9898372789755038, "grad_norm": 186813.640625, "learning_rate": 1.016272102449627e-07, "loss": 0.1032, "step": 33700 }, { "epoch": 0.9901309992363273, "grad_norm": 326229.5, "learning_rate": 9.86900076367268e-08, "loss": 0.1078, "step": 33710 }, { "epoch": 0.9904247194971509, "grad_norm": 143506.15625, "learning_rate": 9.575280502849088e-08, "loss": 0.1025, "step": 33720 }, { "epoch": 0.9907184397579745, "grad_norm": 192545.671875, "learning_rate": 9.281560242025495e-08, "loss": 0.0936, "step": 33730 }, { "epoch": 0.9910121600187981, "grad_norm": 256515.78125, "learning_rate": 8.987839981201904e-08, "loss": 0.1014, "step": 33740 }, { "epoch": 0.9913058802796217, "grad_norm": 256223.265625, "learning_rate": 8.694119720378311e-08, "loss": 0.0951, "step": 33750 }, { "epoch": 0.9915996005404453, "grad_norm": 210804.46875, "learning_rate": 8.40039945955472e-08, "loss": 0.0991, "step": 33760 }, { "epoch": 0.9918933208012689, "grad_norm": 169832.078125, "learning_rate": 8.10667919873113e-08, "loss": 0.1018, "step": 33770 }, { "epoch": 0.9921870410620924, "grad_norm": 222814.265625, "learning_rate": 7.812958937907537e-08, "loss": 0.0977, "step": 33780 }, { "epoch": 0.9924807613229161, "grad_norm": 226384.09375, "learning_rate": 7.519238677083946e-08, "loss": 0.1023, "step": 33790 }, { "epoch": 0.9927744815837396, "grad_norm": 244384.515625, "learning_rate": 7.225518416260354e-08, "loss": 0.1072, "step": 33800 }, { "epoch": 0.9930682018445632, "grad_norm": 295552.375, "learning_rate": 6.931798155436763e-08, "loss": 0.1022, "step": 33810 }, { "epoch": 0.9933619221053869, "grad_norm": 246645.734375, "learning_rate": 6.63807789461317e-08, "loss": 0.1044, "step": 33820 }, { "epoch": 0.9936556423662104, "grad_norm": 369934.0, "learning_rate": 6.34435763378958e-08, "loss": 0.1123, "step": 33830 }, { "epoch": 0.993949362627034, "grad_norm": 166507.40625, "learning_rate": 6.050637372965987e-08, "loss": 0.0896, "step": 33840 }, { "epoch": 0.9942430828878576, "grad_norm": 202733.90625, "learning_rate": 5.756917112142396e-08, "loss": 0.1085, "step": 33850 }, { "epoch": 0.9945368031486812, "grad_norm": 182501.265625, "learning_rate": 5.463196851318804e-08, "loss": 0.1077, "step": 33860 }, { "epoch": 0.9948305234095048, "grad_norm": 198021.09375, "learning_rate": 5.169476590495213e-08, "loss": 0.1049, "step": 33870 }, { "epoch": 0.9951242436703284, "grad_norm": 212111.234375, "learning_rate": 4.875756329671621e-08, "loss": 0.1066, "step": 33880 }, { "epoch": 0.995417963931152, "grad_norm": 241318.796875, "learning_rate": 4.5820360688480296e-08, "loss": 0.1044, "step": 33890 }, { "epoch": 0.9957116841919755, "grad_norm": 237178.640625, "learning_rate": 4.288315808024438e-08, "loss": 0.1068, "step": 33900 }, { "epoch": 0.9960054044527992, "grad_norm": 237952.328125, "learning_rate": 3.9945955472008456e-08, "loss": 0.098, "step": 33910 }, { "epoch": 0.9962991247136227, "grad_norm": 300814.78125, "learning_rate": 3.7008752863772546e-08, "loss": 0.0972, "step": 33920 }, { "epoch": 0.9965928449744463, "grad_norm": 228021.390625, "learning_rate": 3.407155025553663e-08, "loss": 0.0982, "step": 33930 }, { "epoch": 0.99688656523527, "grad_norm": 249992.15625, "learning_rate": 3.113434764730071e-08, "loss": 0.1167, "step": 33940 }, { "epoch": 0.9971802854960935, "grad_norm": 175068.03125, "learning_rate": 2.8197145039064796e-08, "loss": 0.1041, "step": 33950 }, { "epoch": 0.9974740057569171, "grad_norm": 124420.75, "learning_rate": 2.525994243082888e-08, "loss": 0.1022, "step": 33960 }, { "epoch": 0.9977677260177407, "grad_norm": 312982.4375, "learning_rate": 2.2322739822592967e-08, "loss": 0.1056, "step": 33970 }, { "epoch": 0.9980614462785643, "grad_norm": 169339.078125, "learning_rate": 1.9385537214357047e-08, "loss": 0.1081, "step": 33980 }, { "epoch": 0.9983551665393879, "grad_norm": 246406.75, "learning_rate": 1.644833460612113e-08, "loss": 0.0985, "step": 33990 }, { "epoch": 0.9986488868002115, "grad_norm": 224328.25, "learning_rate": 1.3511131997885215e-08, "loss": 0.0938, "step": 34000 }, { "epoch": 0.9989426070610351, "grad_norm": 356246.90625, "learning_rate": 1.0573929389649299e-08, "loss": 0.0995, "step": 34010 }, { "epoch": 0.9992363273218586, "grad_norm": 159079.046875, "learning_rate": 7.636726781413382e-09, "loss": 0.1014, "step": 34020 }, { "epoch": 0.9995300475826823, "grad_norm": 223452.921875, "learning_rate": 4.699524173177466e-09, "loss": 0.1023, "step": 34030 }, { "epoch": 0.9998237678435058, "grad_norm": 250883.703125, "learning_rate": 1.7623215649415498e-09, "loss": 0.1075, "step": 34040 } ], "logging_steps": 10, "max_steps": 34046, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.807932687428354e+18, "train_batch_size": 200, "trial_name": null, "trial_params": null }