{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 46153, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00010833531948085715, "grad_norm": 2.6521787643432617, "learning_rate": 1.9999999420824903e-05, "loss": 0.3348, "step": 5 }, { "epoch": 0.0002166706389617143, "grad_norm": 1.742197871208191, "learning_rate": 1.999999768329967e-05, "loss": 0.3096, "step": 10 }, { "epoch": 0.00032500595844257147, "grad_norm": 1.5409448146820068, "learning_rate": 1.9999994787424503e-05, "loss": 0.1622, "step": 15 }, { "epoch": 0.0004333412779234286, "grad_norm": 1.746706485748291, "learning_rate": 1.9999990733199743e-05, "loss": 0.2691, "step": 20 }, { "epoch": 0.0005416765974042857, "grad_norm": 1.7288819551467896, "learning_rate": 1.9999985520625857e-05, "loss": 0.2862, "step": 25 }, { "epoch": 0.0006500119168851429, "grad_norm": 1.5951204299926758, "learning_rate": 1.9999979149703445e-05, "loss": 0.209, "step": 30 }, { "epoch": 0.000758347236366, "grad_norm": 1.3798725605010986, "learning_rate": 1.9999971620433252e-05, "loss": 0.2321, "step": 35 }, { "epoch": 0.0008666825558468572, "grad_norm": 1.489790916442871, "learning_rate": 1.9999962932816142e-05, "loss": 0.2176, "step": 40 }, { "epoch": 0.0009750178753277143, "grad_norm": 1.979129433631897, "learning_rate": 1.999995308685313e-05, "loss": 0.1856, "step": 45 }, { "epoch": 0.0010833531948085714, "grad_norm": 1.4201934337615967, "learning_rate": 1.999994208254535e-05, "loss": 0.304, "step": 50 }, { "epoch": 0.0011916885142894287, "grad_norm": 2.0393104553222656, "learning_rate": 1.999992991989408e-05, "loss": 0.1762, "step": 55 }, { "epoch": 0.0013000238337702859, "grad_norm": 1.8333830833435059, "learning_rate": 1.9999916598900726e-05, "loss": 0.2214, "step": 60 }, { "epoch": 0.001408359153251143, "grad_norm": 1.7429314851760864, "learning_rate": 1.9999902119566836e-05, "loss": 0.1788, "step": 65 }, { "epoch": 0.001516694472732, "grad_norm": 1.4592255353927612, "learning_rate": 1.999988648189408e-05, "loss": 0.2672, "step": 70 }, { "epoch": 0.0016250297922128572, "grad_norm": 2.6618306636810303, "learning_rate": 1.999986968588427e-05, "loss": 0.2475, "step": 75 }, { "epoch": 0.0017333651116937143, "grad_norm": 2.186831474304199, "learning_rate": 1.9999851731539365e-05, "loss": 0.2824, "step": 80 }, { "epoch": 0.0018417004311745715, "grad_norm": 1.7947226762771606, "learning_rate": 1.9999832618861426e-05, "loss": 0.1965, "step": 85 }, { "epoch": 0.0019500357506554286, "grad_norm": 2.5878849029541016, "learning_rate": 1.999981234785268e-05, "loss": 0.2303, "step": 90 }, { "epoch": 0.002058371070136286, "grad_norm": 1.705706000328064, "learning_rate": 1.999979091851547e-05, "loss": 0.2217, "step": 95 }, { "epoch": 0.002166706389617143, "grad_norm": 1.6379587650299072, "learning_rate": 1.999976833085228e-05, "loss": 0.1999, "step": 100 }, { "epoch": 0.002275041709098, "grad_norm": 2.413855791091919, "learning_rate": 1.9999744584865724e-05, "loss": 0.2552, "step": 105 }, { "epoch": 0.0023833770285788575, "grad_norm": 1.4965903759002686, "learning_rate": 1.9999719680558555e-05, "loss": 0.2161, "step": 110 }, { "epoch": 0.0024917123480597144, "grad_norm": 1.3956011533737183, "learning_rate": 1.9999693617933654e-05, "loss": 0.1724, "step": 115 }, { "epoch": 0.0026000476675405717, "grad_norm": 1.5313043594360352, "learning_rate": 1.9999666396994044e-05, "loss": 0.1958, "step": 120 }, { "epoch": 0.0027083829870214286, "grad_norm": 1.752564549446106, "learning_rate": 1.999963801774288e-05, "loss": 0.3177, "step": 125 }, { "epoch": 0.002816718306502286, "grad_norm": 1.7295702695846558, "learning_rate": 1.9999608480183445e-05, "loss": 0.2154, "step": 130 }, { "epoch": 0.002925053625983143, "grad_norm": 1.550844669342041, "learning_rate": 1.9999577784319158e-05, "loss": 0.2133, "step": 135 }, { "epoch": 0.003033388945464, "grad_norm": 2.676785707473755, "learning_rate": 1.999954593015358e-05, "loss": 0.2613, "step": 140 }, { "epoch": 0.0031417242649448575, "grad_norm": 1.4622076749801636, "learning_rate": 1.99995129176904e-05, "loss": 0.2355, "step": 145 }, { "epoch": 0.0032500595844257144, "grad_norm": 1.2596291303634644, "learning_rate": 1.9999478746933443e-05, "loss": 0.1772, "step": 150 }, { "epoch": 0.0033583949039065718, "grad_norm": 1.3076326847076416, "learning_rate": 1.999944341788666e-05, "loss": 0.2201, "step": 155 }, { "epoch": 0.0034667302233874287, "grad_norm": 1.4971131086349487, "learning_rate": 1.9999406930554153e-05, "loss": 0.2488, "step": 160 }, { "epoch": 0.003575065542868286, "grad_norm": 0.8197855353355408, "learning_rate": 1.9999369284940144e-05, "loss": 0.2288, "step": 165 }, { "epoch": 0.003683400862349143, "grad_norm": 1.7882953882217407, "learning_rate": 1.9999330481048993e-05, "loss": 0.2289, "step": 170 }, { "epoch": 0.0037917361818300003, "grad_norm": 1.2168915271759033, "learning_rate": 1.9999290518885197e-05, "loss": 0.2603, "step": 175 }, { "epoch": 0.003900071501310857, "grad_norm": 1.867053747177124, "learning_rate": 1.9999249398453383e-05, "loss": 0.2872, "step": 180 }, { "epoch": 0.004008406820791715, "grad_norm": 1.0832598209381104, "learning_rate": 1.9999207119758315e-05, "loss": 0.1751, "step": 185 }, { "epoch": 0.004116742140272572, "grad_norm": 1.6589012145996094, "learning_rate": 1.999916368280489e-05, "loss": 0.2325, "step": 190 }, { "epoch": 0.004225077459753429, "grad_norm": 1.6422487497329712, "learning_rate": 1.9999119087598137e-05, "loss": 0.228, "step": 195 }, { "epoch": 0.004333412779234286, "grad_norm": 1.790552020072937, "learning_rate": 1.9999073334143228e-05, "loss": 0.3411, "step": 200 }, { "epoch": 0.004441748098715143, "grad_norm": 1.8768460750579834, "learning_rate": 1.999902642244546e-05, "loss": 0.2131, "step": 205 }, { "epoch": 0.004550083418196, "grad_norm": 1.8717234134674072, "learning_rate": 1.9998978352510267e-05, "loss": 0.318, "step": 210 }, { "epoch": 0.004658418737676857, "grad_norm": 1.768964171409607, "learning_rate": 1.9998929124343212e-05, "loss": 0.2042, "step": 215 }, { "epoch": 0.004766754057157715, "grad_norm": 1.7787110805511475, "learning_rate": 1.9998878737950004e-05, "loss": 0.276, "step": 220 }, { "epoch": 0.004875089376638572, "grad_norm": 2.0831949710845947, "learning_rate": 1.999882719333648e-05, "loss": 0.2183, "step": 225 }, { "epoch": 0.004983424696119429, "grad_norm": 1.0717750787734985, "learning_rate": 1.9998774490508605e-05, "loss": 0.1388, "step": 230 }, { "epoch": 0.005091760015600286, "grad_norm": 1.531943678855896, "learning_rate": 1.9998720629472488e-05, "loss": 0.2575, "step": 235 }, { "epoch": 0.0052000953350811435, "grad_norm": 1.2452030181884766, "learning_rate": 1.9998665610234366e-05, "loss": 0.1576, "step": 240 }, { "epoch": 0.005308430654562, "grad_norm": 1.683838963508606, "learning_rate": 1.9998609432800612e-05, "loss": 0.3028, "step": 245 }, { "epoch": 0.005416765974042857, "grad_norm": 1.141945719718933, "learning_rate": 1.999855209717774e-05, "loss": 0.2524, "step": 250 }, { "epoch": 0.005525101293523715, "grad_norm": 1.9700487852096558, "learning_rate": 1.9998493603372383e-05, "loss": 0.3414, "step": 255 }, { "epoch": 0.005633436613004572, "grad_norm": 1.8511676788330078, "learning_rate": 1.9998433951391315e-05, "loss": 0.2253, "step": 260 }, { "epoch": 0.005741771932485429, "grad_norm": 1.4467716217041016, "learning_rate": 1.9998373141241454e-05, "loss": 0.2389, "step": 265 }, { "epoch": 0.005850107251966286, "grad_norm": 1.7981977462768555, "learning_rate": 1.999831117292984e-05, "loss": 0.24, "step": 270 }, { "epoch": 0.0059584425714471435, "grad_norm": 1.524837851524353, "learning_rate": 1.999824804646365e-05, "loss": 0.296, "step": 275 }, { "epoch": 0.006066777890928, "grad_norm": 1.2462334632873535, "learning_rate": 1.99981837618502e-05, "loss": 0.1733, "step": 280 }, { "epoch": 0.006175113210408857, "grad_norm": 2.347177267074585, "learning_rate": 1.9998118319096935e-05, "loss": 0.1833, "step": 285 }, { "epoch": 0.006283448529889715, "grad_norm": 2.1643190383911133, "learning_rate": 1.999805171821143e-05, "loss": 0.2043, "step": 290 }, { "epoch": 0.006391783849370572, "grad_norm": 1.688205361366272, "learning_rate": 1.9997983959201408e-05, "loss": 0.2913, "step": 295 }, { "epoch": 0.006500119168851429, "grad_norm": 1.1432108879089355, "learning_rate": 1.9997915042074713e-05, "loss": 0.253, "step": 300 }, { "epoch": 0.006608454488332286, "grad_norm": 1.6888271570205688, "learning_rate": 1.9997844966839328e-05, "loss": 0.2514, "step": 305 }, { "epoch": 0.0067167898078131436, "grad_norm": 1.8303322792053223, "learning_rate": 1.9997773733503373e-05, "loss": 0.1851, "step": 310 }, { "epoch": 0.0068251251272940005, "grad_norm": 1.8913966417312622, "learning_rate": 1.9997701342075095e-05, "loss": 0.2832, "step": 315 }, { "epoch": 0.006933460446774857, "grad_norm": 1.5696371793746948, "learning_rate": 1.9997627792562885e-05, "loss": 0.1757, "step": 320 }, { "epoch": 0.007041795766255714, "grad_norm": 1.294601559638977, "learning_rate": 1.9997553084975258e-05, "loss": 0.2829, "step": 325 }, { "epoch": 0.007150131085736572, "grad_norm": 1.72998046875, "learning_rate": 1.9997477219320872e-05, "loss": 0.2484, "step": 330 }, { "epoch": 0.007258466405217429, "grad_norm": 1.8302903175354004, "learning_rate": 1.999740019560851e-05, "loss": 0.2596, "step": 335 }, { "epoch": 0.007366801724698286, "grad_norm": 2.273862838745117, "learning_rate": 1.9997322013847097e-05, "loss": 0.3158, "step": 340 }, { "epoch": 0.007475137044179144, "grad_norm": 2.0528604984283447, "learning_rate": 1.9997242674045686e-05, "loss": 0.2392, "step": 345 }, { "epoch": 0.0075834723636600005, "grad_norm": 1.493494987487793, "learning_rate": 1.9997162176213473e-05, "loss": 0.1561, "step": 350 }, { "epoch": 0.007691807683140857, "grad_norm": 2.112382173538208, "learning_rate": 1.999708052035978e-05, "loss": 0.2115, "step": 355 }, { "epoch": 0.007800143002621714, "grad_norm": 1.4557229280471802, "learning_rate": 1.9996997706494064e-05, "loss": 0.2373, "step": 360 }, { "epoch": 0.007908478322102572, "grad_norm": 2.6282355785369873, "learning_rate": 1.9996913734625916e-05, "loss": 0.2591, "step": 365 }, { "epoch": 0.00801681364158343, "grad_norm": 1.3133039474487305, "learning_rate": 1.999682860476507e-05, "loss": 0.157, "step": 370 }, { "epoch": 0.008125148961064286, "grad_norm": 1.9293413162231445, "learning_rate": 1.9996742316921378e-05, "loss": 0.2579, "step": 375 }, { "epoch": 0.008233484280545144, "grad_norm": 1.8038097620010376, "learning_rate": 1.9996654871104843e-05, "loss": 0.2903, "step": 380 }, { "epoch": 0.008341819600026, "grad_norm": 1.3703402280807495, "learning_rate": 1.999656626732559e-05, "loss": 0.1735, "step": 385 }, { "epoch": 0.008450154919506857, "grad_norm": 1.8002214431762695, "learning_rate": 1.9996476505593883e-05, "loss": 0.254, "step": 390 }, { "epoch": 0.008558490238987715, "grad_norm": 2.2936806678771973, "learning_rate": 1.9996385585920123e-05, "loss": 0.3762, "step": 395 }, { "epoch": 0.008666825558468571, "grad_norm": 1.383238673210144, "learning_rate": 1.9996293508314837e-05, "loss": 0.1921, "step": 400 }, { "epoch": 0.008775160877949429, "grad_norm": 1.6505705118179321, "learning_rate": 1.9996200272788692e-05, "loss": 0.2999, "step": 405 }, { "epoch": 0.008883496197430287, "grad_norm": 1.893597960472107, "learning_rate": 1.999610587935249e-05, "loss": 0.2756, "step": 410 }, { "epoch": 0.008991831516911143, "grad_norm": 1.6049712896347046, "learning_rate": 1.9996010328017158e-05, "loss": 0.2276, "step": 415 }, { "epoch": 0.009100166836392, "grad_norm": 2.058567762374878, "learning_rate": 1.9995913618793775e-05, "loss": 0.2845, "step": 420 }, { "epoch": 0.009208502155872858, "grad_norm": 1.6972384452819824, "learning_rate": 1.9995815751693538e-05, "loss": 0.2354, "step": 425 }, { "epoch": 0.009316837475353714, "grad_norm": 1.5970351696014404, "learning_rate": 1.999571672672778e-05, "loss": 0.2305, "step": 430 }, { "epoch": 0.009425172794834572, "grad_norm": 2.047508955001831, "learning_rate": 1.999561654390798e-05, "loss": 0.2297, "step": 435 }, { "epoch": 0.00953350811431543, "grad_norm": 1.445166826248169, "learning_rate": 1.999551520324573e-05, "loss": 0.2589, "step": 440 }, { "epoch": 0.009641843433796286, "grad_norm": 1.6712799072265625, "learning_rate": 1.9995412704752786e-05, "loss": 0.2634, "step": 445 }, { "epoch": 0.009750178753277144, "grad_norm": 1.9628418684005737, "learning_rate": 1.9995309048441007e-05, "loss": 0.2563, "step": 450 }, { "epoch": 0.009858514072758, "grad_norm": 1.4756723642349243, "learning_rate": 1.9995204234322404e-05, "loss": 0.2797, "step": 455 }, { "epoch": 0.009966849392238858, "grad_norm": 1.635072112083435, "learning_rate": 1.999509826240912e-05, "loss": 0.2863, "step": 460 }, { "epoch": 0.010075184711719715, "grad_norm": 1.8329048156738281, "learning_rate": 1.9994991132713427e-05, "loss": 0.2865, "step": 465 }, { "epoch": 0.010183520031200571, "grad_norm": 1.3737131357192993, "learning_rate": 1.9994882845247735e-05, "loss": 0.178, "step": 470 }, { "epoch": 0.01029185535068143, "grad_norm": 1.5462311506271362, "learning_rate": 1.999477340002459e-05, "loss": 0.1811, "step": 475 }, { "epoch": 0.010400190670162287, "grad_norm": 1.4888426065444946, "learning_rate": 1.999466279705667e-05, "loss": 0.1824, "step": 480 }, { "epoch": 0.010508525989643143, "grad_norm": 1.8794163465499878, "learning_rate": 1.9994551036356788e-05, "loss": 0.2559, "step": 485 }, { "epoch": 0.010616861309124, "grad_norm": 1.4744220972061157, "learning_rate": 1.9994438117937883e-05, "loss": 0.1373, "step": 490 }, { "epoch": 0.010725196628604858, "grad_norm": 1.6283189058303833, "learning_rate": 1.9994324041813038e-05, "loss": 0.2044, "step": 495 }, { "epoch": 0.010833531948085715, "grad_norm": 2.260960340499878, "learning_rate": 1.9994208807995466e-05, "loss": 0.1986, "step": 500 }, { "epoch": 0.010941867267566572, "grad_norm": 1.4087194204330444, "learning_rate": 1.9994092416498518e-05, "loss": 0.2512, "step": 505 }, { "epoch": 0.01105020258704743, "grad_norm": 1.8002055883407593, "learning_rate": 1.999397486733568e-05, "loss": 0.1353, "step": 510 }, { "epoch": 0.011158537906528286, "grad_norm": 2.1779873371124268, "learning_rate": 1.9993856160520558e-05, "loss": 0.2365, "step": 515 }, { "epoch": 0.011266873226009144, "grad_norm": 1.6223790645599365, "learning_rate": 1.999373629606691e-05, "loss": 0.163, "step": 520 }, { "epoch": 0.01137520854549, "grad_norm": 1.3313829898834229, "learning_rate": 1.9993615273988615e-05, "loss": 0.2268, "step": 525 }, { "epoch": 0.011483543864970858, "grad_norm": 1.6336294412612915, "learning_rate": 1.99934930942997e-05, "loss": 0.1994, "step": 530 }, { "epoch": 0.011591879184451715, "grad_norm": 1.5389949083328247, "learning_rate": 1.999336975701431e-05, "loss": 0.246, "step": 535 }, { "epoch": 0.011700214503932571, "grad_norm": 1.4158190488815308, "learning_rate": 1.9993245262146733e-05, "loss": 0.2082, "step": 540 }, { "epoch": 0.01180854982341343, "grad_norm": 1.962289571762085, "learning_rate": 1.9993119609711394e-05, "loss": 0.2454, "step": 545 }, { "epoch": 0.011916885142894287, "grad_norm": 1.6048647165298462, "learning_rate": 1.9992992799722845e-05, "loss": 0.237, "step": 550 }, { "epoch": 0.012025220462375143, "grad_norm": 1.507602334022522, "learning_rate": 1.999286483219577e-05, "loss": 0.308, "step": 555 }, { "epoch": 0.012133555781856, "grad_norm": 1.6354038715362549, "learning_rate": 1.9992735707145004e-05, "loss": 0.219, "step": 560 }, { "epoch": 0.012241891101336859, "grad_norm": 1.9862715005874634, "learning_rate": 1.9992605424585493e-05, "loss": 0.2404, "step": 565 }, { "epoch": 0.012350226420817715, "grad_norm": 1.9061675071716309, "learning_rate": 1.9992473984532336e-05, "loss": 0.1263, "step": 570 }, { "epoch": 0.012458561740298572, "grad_norm": 1.515169620513916, "learning_rate": 1.999234138700075e-05, "loss": 0.1725, "step": 575 }, { "epoch": 0.01256689705977943, "grad_norm": 1.3728933334350586, "learning_rate": 1.9992207632006106e-05, "loss": 0.2033, "step": 580 }, { "epoch": 0.012675232379260286, "grad_norm": 3.4557700157165527, "learning_rate": 1.999207271956389e-05, "loss": 0.312, "step": 585 }, { "epoch": 0.012783567698741144, "grad_norm": 1.8126544952392578, "learning_rate": 1.9991936649689726e-05, "loss": 0.2633, "step": 590 }, { "epoch": 0.012891903018222, "grad_norm": 1.796362280845642, "learning_rate": 1.9991799422399384e-05, "loss": 0.2796, "step": 595 }, { "epoch": 0.013000238337702858, "grad_norm": 1.8216372728347778, "learning_rate": 1.9991661037708755e-05, "loss": 0.1806, "step": 600 }, { "epoch": 0.013108573657183716, "grad_norm": 2.094696283340454, "learning_rate": 1.9991521495633873e-05, "loss": 0.1974, "step": 605 }, { "epoch": 0.013216908976664572, "grad_norm": 1.825470209121704, "learning_rate": 1.9991380796190894e-05, "loss": 0.3378, "step": 610 }, { "epoch": 0.01332524429614543, "grad_norm": 1.131395697593689, "learning_rate": 1.9991238939396123e-05, "loss": 0.2313, "step": 615 }, { "epoch": 0.013433579615626287, "grad_norm": 1.9503051042556763, "learning_rate": 1.999109592526599e-05, "loss": 0.2501, "step": 620 }, { "epoch": 0.013541914935107143, "grad_norm": 1.7290587425231934, "learning_rate": 1.9990951753817058e-05, "loss": 0.2507, "step": 625 }, { "epoch": 0.013650250254588001, "grad_norm": 0.8380336761474609, "learning_rate": 1.999080642506603e-05, "loss": 0.1303, "step": 630 }, { "epoch": 0.013758585574068859, "grad_norm": 2.201349973678589, "learning_rate": 1.9990659939029744e-05, "loss": 0.2186, "step": 635 }, { "epoch": 0.013866920893549715, "grad_norm": 1.8932205438613892, "learning_rate": 1.999051229572516e-05, "loss": 0.2543, "step": 640 }, { "epoch": 0.013975256213030572, "grad_norm": 1.603873372077942, "learning_rate": 1.9990363495169386e-05, "loss": 0.1591, "step": 645 }, { "epoch": 0.014083591532511429, "grad_norm": 1.3565785884857178, "learning_rate": 1.9990213537379656e-05, "loss": 0.2415, "step": 650 }, { "epoch": 0.014191926851992286, "grad_norm": 1.5821009874343872, "learning_rate": 1.9990062422373345e-05, "loss": 0.2105, "step": 655 }, { "epoch": 0.014300262171473144, "grad_norm": 1.1075721979141235, "learning_rate": 1.9989910150167948e-05, "loss": 0.2046, "step": 660 }, { "epoch": 0.014408597490954, "grad_norm": 1.2521450519561768, "learning_rate": 1.9989756720781114e-05, "loss": 0.2515, "step": 665 }, { "epoch": 0.014516932810434858, "grad_norm": 1.961208462715149, "learning_rate": 1.9989602134230607e-05, "loss": 0.2933, "step": 670 }, { "epoch": 0.014625268129915716, "grad_norm": 1.934740662574768, "learning_rate": 1.9989446390534337e-05, "loss": 0.2025, "step": 675 }, { "epoch": 0.014733603449396572, "grad_norm": 2.0839850902557373, "learning_rate": 1.9989289489710345e-05, "loss": 0.2724, "step": 680 }, { "epoch": 0.01484193876887743, "grad_norm": 1.8542290925979614, "learning_rate": 1.9989131431776806e-05, "loss": 0.242, "step": 685 }, { "epoch": 0.014950274088358287, "grad_norm": 1.5249747037887573, "learning_rate": 1.9988972216752026e-05, "loss": 0.1333, "step": 690 }, { "epoch": 0.015058609407839143, "grad_norm": 2.2680087089538574, "learning_rate": 1.9988811844654453e-05, "loss": 0.2664, "step": 695 }, { "epoch": 0.015166944727320001, "grad_norm": 1.5897984504699707, "learning_rate": 1.9988650315502656e-05, "loss": 0.31, "step": 700 }, { "epoch": 0.015275280046800859, "grad_norm": 1.2968108654022217, "learning_rate": 1.998848762931535e-05, "loss": 0.1919, "step": 705 }, { "epoch": 0.015383615366281715, "grad_norm": 1.804995059967041, "learning_rate": 1.9988323786111383e-05, "loss": 0.3417, "step": 710 }, { "epoch": 0.015491950685762573, "grad_norm": 1.4559403657913208, "learning_rate": 1.998815878590973e-05, "loss": 0.2015, "step": 715 }, { "epoch": 0.015600286005243429, "grad_norm": 2.2961347103118896, "learning_rate": 1.99879926287295e-05, "loss": 0.2872, "step": 720 }, { "epoch": 0.015708621324724288, "grad_norm": 2.2544925212860107, "learning_rate": 1.998782531458995e-05, "loss": 0.3223, "step": 725 }, { "epoch": 0.015816956644205144, "grad_norm": 2.1344618797302246, "learning_rate": 1.9987656843510454e-05, "loss": 0.1847, "step": 730 }, { "epoch": 0.015925291963686, "grad_norm": 2.1203818321228027, "learning_rate": 1.9987487215510524e-05, "loss": 0.1582, "step": 735 }, { "epoch": 0.01603362728316686, "grad_norm": 2.0289828777313232, "learning_rate": 1.9987316430609817e-05, "loss": 0.2324, "step": 740 }, { "epoch": 0.016141962602647716, "grad_norm": 1.7644481658935547, "learning_rate": 1.998714448882811e-05, "loss": 0.2559, "step": 745 }, { "epoch": 0.016250297922128572, "grad_norm": 1.2906551361083984, "learning_rate": 1.9986971390185318e-05, "loss": 0.1828, "step": 750 }, { "epoch": 0.016358633241609428, "grad_norm": 1.6500940322875977, "learning_rate": 1.99867971347015e-05, "loss": 0.2499, "step": 755 }, { "epoch": 0.016466968561090287, "grad_norm": 1.7500755786895752, "learning_rate": 1.998662172239683e-05, "loss": 0.2271, "step": 760 }, { "epoch": 0.016575303880571143, "grad_norm": 1.535461664199829, "learning_rate": 1.998644515329164e-05, "loss": 0.1691, "step": 765 }, { "epoch": 0.016683639200052, "grad_norm": 1.210924506187439, "learning_rate": 1.9986267427406373e-05, "loss": 0.227, "step": 770 }, { "epoch": 0.01679197451953286, "grad_norm": 2.8434038162231445, "learning_rate": 1.998608854476162e-05, "loss": 0.2055, "step": 775 }, { "epoch": 0.016900309839013715, "grad_norm": 1.8732430934906006, "learning_rate": 1.9985908505378098e-05, "loss": 0.2275, "step": 780 }, { "epoch": 0.01700864515849457, "grad_norm": 1.8578499555587769, "learning_rate": 1.9985727309276667e-05, "loss": 0.2296, "step": 785 }, { "epoch": 0.01711698047797543, "grad_norm": 1.3183101415634155, "learning_rate": 1.9985544956478312e-05, "loss": 0.158, "step": 790 }, { "epoch": 0.017225315797456286, "grad_norm": 2.243936061859131, "learning_rate": 1.9985361447004155e-05, "loss": 0.2067, "step": 795 }, { "epoch": 0.017333651116937143, "grad_norm": 2.5474531650543213, "learning_rate": 1.998517678087546e-05, "loss": 0.2998, "step": 800 }, { "epoch": 0.017441986436418002, "grad_norm": 2.1101415157318115, "learning_rate": 1.998499095811361e-05, "loss": 0.2079, "step": 805 }, { "epoch": 0.017550321755898858, "grad_norm": 2.5708842277526855, "learning_rate": 1.9984803978740133e-05, "loss": 0.2004, "step": 810 }, { "epoch": 0.017658657075379714, "grad_norm": 3.5689611434936523, "learning_rate": 1.9984615842776687e-05, "loss": 0.3049, "step": 815 }, { "epoch": 0.017766992394860574, "grad_norm": 1.6031129360198975, "learning_rate": 1.9984426550245067e-05, "loss": 0.3145, "step": 820 }, { "epoch": 0.01787532771434143, "grad_norm": 1.9609709978103638, "learning_rate": 1.9984236101167195e-05, "loss": 0.1414, "step": 825 }, { "epoch": 0.017983663033822286, "grad_norm": 1.5163270235061646, "learning_rate": 1.9984044495565138e-05, "loss": 0.2826, "step": 830 }, { "epoch": 0.018091998353303145, "grad_norm": 2.450927257537842, "learning_rate": 1.9983851733461085e-05, "loss": 0.336, "step": 835 }, { "epoch": 0.018200333672784, "grad_norm": 1.8311290740966797, "learning_rate": 1.9983657814877366e-05, "loss": 0.2746, "step": 840 }, { "epoch": 0.018308668992264857, "grad_norm": 1.4729841947555542, "learning_rate": 1.9983462739836444e-05, "loss": 0.193, "step": 845 }, { "epoch": 0.018417004311745717, "grad_norm": 2.1966664791107178, "learning_rate": 1.998326650836092e-05, "loss": 0.1956, "step": 850 }, { "epoch": 0.018525339631226573, "grad_norm": 1.365146517753601, "learning_rate": 1.9983069120473517e-05, "loss": 0.2709, "step": 855 }, { "epoch": 0.01863367495070743, "grad_norm": 1.0250957012176514, "learning_rate": 1.9982870576197103e-05, "loss": 0.1948, "step": 860 }, { "epoch": 0.01874201027018829, "grad_norm": 1.7017226219177246, "learning_rate": 1.9982670875554677e-05, "loss": 0.1797, "step": 865 }, { "epoch": 0.018850345589669144, "grad_norm": 1.188413143157959, "learning_rate": 1.998247001856937e-05, "loss": 0.2057, "step": 870 }, { "epoch": 0.01895868090915, "grad_norm": 2.106862783432007, "learning_rate": 1.9982268005264448e-05, "loss": 0.2334, "step": 875 }, { "epoch": 0.01906701622863086, "grad_norm": 1.524610161781311, "learning_rate": 1.9982064835663313e-05, "loss": 0.1503, "step": 880 }, { "epoch": 0.019175351548111716, "grad_norm": 1.8483318090438843, "learning_rate": 1.99818605097895e-05, "loss": 0.1756, "step": 885 }, { "epoch": 0.019283686867592572, "grad_norm": 1.63271164894104, "learning_rate": 1.9981655027666673e-05, "loss": 0.1247, "step": 890 }, { "epoch": 0.019392022187073428, "grad_norm": 1.3422389030456543, "learning_rate": 1.9981448389318636e-05, "loss": 0.1684, "step": 895 }, { "epoch": 0.019500357506554288, "grad_norm": 1.8058654069900513, "learning_rate": 1.9981240594769324e-05, "loss": 0.2805, "step": 900 }, { "epoch": 0.019608692826035144, "grad_norm": 1.4573131799697876, "learning_rate": 1.998103164404281e-05, "loss": 0.223, "step": 905 }, { "epoch": 0.019717028145516, "grad_norm": 2.9336905479431152, "learning_rate": 1.9980821537163295e-05, "loss": 0.2825, "step": 910 }, { "epoch": 0.01982536346499686, "grad_norm": 1.8208544254302979, "learning_rate": 1.998061027415512e-05, "loss": 0.2567, "step": 915 }, { "epoch": 0.019933698784477715, "grad_norm": 1.8420718908309937, "learning_rate": 1.998039785504275e-05, "loss": 0.2259, "step": 920 }, { "epoch": 0.02004203410395857, "grad_norm": 2.2099032402038574, "learning_rate": 1.9980184279850797e-05, "loss": 0.2134, "step": 925 }, { "epoch": 0.02015036942343943, "grad_norm": 1.372920036315918, "learning_rate": 1.9979969548604e-05, "loss": 0.1999, "step": 930 }, { "epoch": 0.020258704742920287, "grad_norm": 3.472062826156616, "learning_rate": 1.9979753661327228e-05, "loss": 0.1959, "step": 935 }, { "epoch": 0.020367040062401143, "grad_norm": 1.7562220096588135, "learning_rate": 1.9979536618045494e-05, "loss": 0.1788, "step": 940 }, { "epoch": 0.020475375381882002, "grad_norm": 2.0572431087493896, "learning_rate": 1.9979318418783934e-05, "loss": 0.1973, "step": 945 }, { "epoch": 0.02058371070136286, "grad_norm": 2.070707321166992, "learning_rate": 1.9979099063567828e-05, "loss": 0.2693, "step": 950 }, { "epoch": 0.020692046020843714, "grad_norm": 1.5137685537338257, "learning_rate": 1.9978878552422575e-05, "loss": 0.2333, "step": 955 }, { "epoch": 0.020800381340324574, "grad_norm": 1.9519044160842896, "learning_rate": 1.9978656885373733e-05, "loss": 0.2137, "step": 960 }, { "epoch": 0.02090871665980543, "grad_norm": 1.7894543409347534, "learning_rate": 1.9978434062446967e-05, "loss": 0.2202, "step": 965 }, { "epoch": 0.021017051979286286, "grad_norm": 2.2815663814544678, "learning_rate": 1.9978210083668094e-05, "loss": 0.2027, "step": 970 }, { "epoch": 0.021125387298767145, "grad_norm": 2.0153818130493164, "learning_rate": 1.9977984949063057e-05, "loss": 0.2499, "step": 975 }, { "epoch": 0.021233722618248, "grad_norm": 2.456092596054077, "learning_rate": 1.9977758658657935e-05, "loss": 0.2287, "step": 980 }, { "epoch": 0.021342057937728857, "grad_norm": 2.0086989402770996, "learning_rate": 1.9977531212478934e-05, "loss": 0.291, "step": 985 }, { "epoch": 0.021450393257209717, "grad_norm": 2.419847249984741, "learning_rate": 1.997730261055241e-05, "loss": 0.1809, "step": 990 }, { "epoch": 0.021558728576690573, "grad_norm": 1.2933825254440308, "learning_rate": 1.9977072852904836e-05, "loss": 0.2133, "step": 995 }, { "epoch": 0.02166706389617143, "grad_norm": 2.5177388191223145, "learning_rate": 1.9976841939562827e-05, "loss": 0.2481, "step": 1000 }, { "epoch": 0.02177539921565229, "grad_norm": 1.8977940082550049, "learning_rate": 1.9976609870553137e-05, "loss": 0.281, "step": 1005 }, { "epoch": 0.021883734535133145, "grad_norm": 1.7671210765838623, "learning_rate": 1.997637664590264e-05, "loss": 0.2406, "step": 1010 }, { "epoch": 0.021992069854614, "grad_norm": 1.1880853176116943, "learning_rate": 1.9976142265638357e-05, "loss": 0.2378, "step": 1015 }, { "epoch": 0.02210040517409486, "grad_norm": 1.7973101139068604, "learning_rate": 1.9975906729787432e-05, "loss": 0.2796, "step": 1020 }, { "epoch": 0.022208740493575716, "grad_norm": 1.6292130947113037, "learning_rate": 1.9975670038377152e-05, "loss": 0.2493, "step": 1025 }, { "epoch": 0.022317075813056572, "grad_norm": 2.1482033729553223, "learning_rate": 1.9975432191434934e-05, "loss": 0.187, "step": 1030 }, { "epoch": 0.022425411132537428, "grad_norm": 1.769181489944458, "learning_rate": 1.9975193188988333e-05, "loss": 0.2515, "step": 1035 }, { "epoch": 0.022533746452018288, "grad_norm": 2.1935577392578125, "learning_rate": 1.9974953031065024e-05, "loss": 0.2793, "step": 1040 }, { "epoch": 0.022642081771499144, "grad_norm": 2.401862859725952, "learning_rate": 1.9974711717692833e-05, "loss": 0.253, "step": 1045 }, { "epoch": 0.02275041709098, "grad_norm": 1.9109768867492676, "learning_rate": 1.9974469248899712e-05, "loss": 0.2581, "step": 1050 }, { "epoch": 0.02285875241046086, "grad_norm": 2.3280282020568848, "learning_rate": 1.9974225624713742e-05, "loss": 0.2641, "step": 1055 }, { "epoch": 0.022967087729941715, "grad_norm": 1.0294650793075562, "learning_rate": 1.997398084516315e-05, "loss": 0.2784, "step": 1060 }, { "epoch": 0.02307542304942257, "grad_norm": 1.8445751667022705, "learning_rate": 1.997373491027629e-05, "loss": 0.2156, "step": 1065 }, { "epoch": 0.02318375836890343, "grad_norm": 2.0539259910583496, "learning_rate": 1.9973487820081642e-05, "loss": 0.1936, "step": 1070 }, { "epoch": 0.023292093688384287, "grad_norm": 1.2544612884521484, "learning_rate": 1.9973239574607833e-05, "loss": 0.2748, "step": 1075 }, { "epoch": 0.023400429007865143, "grad_norm": 2.5687389373779297, "learning_rate": 1.9972990173883625e-05, "loss": 0.1819, "step": 1080 }, { "epoch": 0.023508764327346002, "grad_norm": 2.0241103172302246, "learning_rate": 1.9972739617937894e-05, "loss": 0.2118, "step": 1085 }, { "epoch": 0.02361709964682686, "grad_norm": 1.760931372642517, "learning_rate": 1.9972487906799673e-05, "loss": 0.2157, "step": 1090 }, { "epoch": 0.023725434966307715, "grad_norm": 1.4993475675582886, "learning_rate": 1.9972235040498116e-05, "loss": 0.2262, "step": 1095 }, { "epoch": 0.023833770285788574, "grad_norm": 1.8170698881149292, "learning_rate": 1.9971981019062513e-05, "loss": 0.2667, "step": 1100 }, { "epoch": 0.02394210560526943, "grad_norm": 2.2465713024139404, "learning_rate": 1.997172584252229e-05, "loss": 0.2161, "step": 1105 }, { "epoch": 0.024050440924750286, "grad_norm": 2.142155885696411, "learning_rate": 1.9971469510907003e-05, "loss": 0.2223, "step": 1110 }, { "epoch": 0.024158776244231146, "grad_norm": 1.762592077255249, "learning_rate": 1.9971212024246344e-05, "loss": 0.1969, "step": 1115 }, { "epoch": 0.024267111563712, "grad_norm": 2.0139317512512207, "learning_rate": 1.9970953382570143e-05, "loss": 0.2726, "step": 1120 }, { "epoch": 0.024375446883192858, "grad_norm": 2.201205015182495, "learning_rate": 1.997069358590836e-05, "loss": 0.2766, "step": 1125 }, { "epoch": 0.024483782202673717, "grad_norm": 1.7144205570220947, "learning_rate": 1.997043263429108e-05, "loss": 0.2367, "step": 1130 }, { "epoch": 0.024592117522154573, "grad_norm": 1.6792316436767578, "learning_rate": 1.9970170527748542e-05, "loss": 0.2984, "step": 1135 }, { "epoch": 0.02470045284163543, "grad_norm": 1.600451111793518, "learning_rate": 1.9969907266311098e-05, "loss": 0.2864, "step": 1140 }, { "epoch": 0.02480878816111629, "grad_norm": 1.8225024938583374, "learning_rate": 1.996964285000925e-05, "loss": 0.2754, "step": 1145 }, { "epoch": 0.024917123480597145, "grad_norm": 1.9770939350128174, "learning_rate": 1.996937727887362e-05, "loss": 0.2443, "step": 1150 }, { "epoch": 0.025025458800078, "grad_norm": 2.1396515369415283, "learning_rate": 1.9969110552934975e-05, "loss": 0.1409, "step": 1155 }, { "epoch": 0.02513379411955886, "grad_norm": 1.489656686782837, "learning_rate": 1.9968842672224208e-05, "loss": 0.1747, "step": 1160 }, { "epoch": 0.025242129439039716, "grad_norm": 2.2572667598724365, "learning_rate": 1.9968573636772356e-05, "loss": 0.2889, "step": 1165 }, { "epoch": 0.025350464758520572, "grad_norm": 1.387824296951294, "learning_rate": 1.996830344661057e-05, "loss": 0.1598, "step": 1170 }, { "epoch": 0.02545880007800143, "grad_norm": 2.364814281463623, "learning_rate": 1.996803210177016e-05, "loss": 0.2349, "step": 1175 }, { "epoch": 0.025567135397482288, "grad_norm": 1.0829187631607056, "learning_rate": 1.9967759602282547e-05, "loss": 0.2391, "step": 1180 }, { "epoch": 0.025675470716963144, "grad_norm": 2.113699436187744, "learning_rate": 1.9967485948179307e-05, "loss": 0.2859, "step": 1185 }, { "epoch": 0.025783806036444, "grad_norm": 2.1771605014801025, "learning_rate": 1.9967211139492128e-05, "loss": 0.2201, "step": 1190 }, { "epoch": 0.02589214135592486, "grad_norm": 2.230182409286499, "learning_rate": 1.996693517625285e-05, "loss": 0.3342, "step": 1195 }, { "epoch": 0.026000476675405716, "grad_norm": 2.1255228519439697, "learning_rate": 1.9966658058493437e-05, "loss": 0.2764, "step": 1200 }, { "epoch": 0.02610881199488657, "grad_norm": 1.7681760787963867, "learning_rate": 1.9966379786245985e-05, "loss": 0.3466, "step": 1205 }, { "epoch": 0.02621714731436743, "grad_norm": 1.1652841567993164, "learning_rate": 1.9966100359542737e-05, "loss": 0.2527, "step": 1210 }, { "epoch": 0.026325482633848287, "grad_norm": 1.985944390296936, "learning_rate": 1.996581977841605e-05, "loss": 0.2132, "step": 1215 }, { "epoch": 0.026433817953329143, "grad_norm": 1.129623293876648, "learning_rate": 1.9965538042898432e-05, "loss": 0.2541, "step": 1220 }, { "epoch": 0.026542153272810003, "grad_norm": 2.0854952335357666, "learning_rate": 1.9965255153022513e-05, "loss": 0.3175, "step": 1225 }, { "epoch": 0.02665048859229086, "grad_norm": 0.852756142616272, "learning_rate": 1.9964971108821064e-05, "loss": 0.1815, "step": 1230 }, { "epoch": 0.026758823911771715, "grad_norm": 1.715214490890503, "learning_rate": 1.996468591032699e-05, "loss": 0.1351, "step": 1235 }, { "epoch": 0.026867159231252574, "grad_norm": 3.104651689529419, "learning_rate": 1.996439955757332e-05, "loss": 0.2188, "step": 1240 }, { "epoch": 0.02697549455073343, "grad_norm": 1.8865550756454468, "learning_rate": 1.9964112050593234e-05, "loss": 0.2015, "step": 1245 }, { "epoch": 0.027083829870214286, "grad_norm": 1.9253597259521484, "learning_rate": 1.9963823389420026e-05, "loss": 0.2749, "step": 1250 }, { "epoch": 0.027192165189695146, "grad_norm": 1.5005290508270264, "learning_rate": 1.9963533574087137e-05, "loss": 0.1952, "step": 1255 }, { "epoch": 0.027300500509176002, "grad_norm": 2.074547290802002, "learning_rate": 1.9963242604628137e-05, "loss": 0.2043, "step": 1260 }, { "epoch": 0.027408835828656858, "grad_norm": 2.009446859359741, "learning_rate": 1.996295048107673e-05, "loss": 0.271, "step": 1265 }, { "epoch": 0.027517171148137717, "grad_norm": 1.9132442474365234, "learning_rate": 1.9962657203466757e-05, "loss": 0.2745, "step": 1270 }, { "epoch": 0.027625506467618573, "grad_norm": 2.1632556915283203, "learning_rate": 1.9962362771832183e-05, "loss": 0.189, "step": 1275 }, { "epoch": 0.02773384178709943, "grad_norm": 2.5707435607910156, "learning_rate": 1.996206718620712e-05, "loss": 0.2828, "step": 1280 }, { "epoch": 0.02784217710658029, "grad_norm": 1.9074193239212036, "learning_rate": 1.9961770446625806e-05, "loss": 0.2156, "step": 1285 }, { "epoch": 0.027950512426061145, "grad_norm": 1.7028558254241943, "learning_rate": 1.9961472553122613e-05, "loss": 0.2218, "step": 1290 }, { "epoch": 0.028058847745542, "grad_norm": 1.8417338132858276, "learning_rate": 1.9961173505732047e-05, "loss": 0.2426, "step": 1295 }, { "epoch": 0.028167183065022857, "grad_norm": 1.1753895282745361, "learning_rate": 1.996087330448875e-05, "loss": 0.2811, "step": 1300 }, { "epoch": 0.028275518384503717, "grad_norm": 1.3819504976272583, "learning_rate": 1.9960571949427495e-05, "loss": 0.3104, "step": 1305 }, { "epoch": 0.028383853703984573, "grad_norm": 2.5825283527374268, "learning_rate": 1.996026944058319e-05, "loss": 0.3094, "step": 1310 }, { "epoch": 0.02849218902346543, "grad_norm": 1.5192958116531372, "learning_rate": 1.995996577799087e-05, "loss": 0.2458, "step": 1315 }, { "epoch": 0.028600524342946288, "grad_norm": 1.6004291772842407, "learning_rate": 1.995966096168572e-05, "loss": 0.2016, "step": 1320 }, { "epoch": 0.028708859662427144, "grad_norm": 2.0647666454315186, "learning_rate": 1.9959354991703042e-05, "loss": 0.2383, "step": 1325 }, { "epoch": 0.028817194981908, "grad_norm": 1.6642508506774902, "learning_rate": 1.9959047868078275e-05, "loss": 0.2818, "step": 1330 }, { "epoch": 0.02892553030138886, "grad_norm": 2.7128703594207764, "learning_rate": 1.9958739590847e-05, "loss": 0.2554, "step": 1335 }, { "epoch": 0.029033865620869716, "grad_norm": 1.3758823871612549, "learning_rate": 1.995843016004493e-05, "loss": 0.2165, "step": 1340 }, { "epoch": 0.029142200940350572, "grad_norm": 1.3142551183700562, "learning_rate": 1.9958119575707904e-05, "loss": 0.2088, "step": 1345 }, { "epoch": 0.02925053625983143, "grad_norm": 1.877597689628601, "learning_rate": 1.9957807837871893e-05, "loss": 0.3066, "step": 1350 }, { "epoch": 0.029358871579312287, "grad_norm": 1.6082719564437866, "learning_rate": 1.9957494946573014e-05, "loss": 0.2586, "step": 1355 }, { "epoch": 0.029467206898793143, "grad_norm": 0.5531467199325562, "learning_rate": 1.995718090184751e-05, "loss": 0.1951, "step": 1360 }, { "epoch": 0.029575542218274003, "grad_norm": 1.5399668216705322, "learning_rate": 1.9956865703731757e-05, "loss": 0.2002, "step": 1365 }, { "epoch": 0.02968387753775486, "grad_norm": 1.7544203996658325, "learning_rate": 1.9956549352262265e-05, "loss": 0.3405, "step": 1370 }, { "epoch": 0.029792212857235715, "grad_norm": 2.6023194789886475, "learning_rate": 1.9956231847475676e-05, "loss": 0.2382, "step": 1375 }, { "epoch": 0.029900548176716574, "grad_norm": 2.0200066566467285, "learning_rate": 1.9955913189408777e-05, "loss": 0.2353, "step": 1380 }, { "epoch": 0.03000888349619743, "grad_norm": 1.5594794750213623, "learning_rate": 1.9955593378098472e-05, "loss": 0.1673, "step": 1385 }, { "epoch": 0.030117218815678286, "grad_norm": 1.6159828901290894, "learning_rate": 1.995527241358181e-05, "loss": 0.2869, "step": 1390 }, { "epoch": 0.030225554135159146, "grad_norm": 1.4383025169372559, "learning_rate": 1.995495029589597e-05, "loss": 0.2399, "step": 1395 }, { "epoch": 0.030333889454640002, "grad_norm": 2.2891042232513428, "learning_rate": 1.995462702507826e-05, "loss": 0.1377, "step": 1400 }, { "epoch": 0.030442224774120858, "grad_norm": 2.3495635986328125, "learning_rate": 1.9954302601166132e-05, "loss": 0.2639, "step": 1405 }, { "epoch": 0.030550560093601718, "grad_norm": 1.7356265783309937, "learning_rate": 1.9953977024197163e-05, "loss": 0.2495, "step": 1410 }, { "epoch": 0.030658895413082574, "grad_norm": 1.296764612197876, "learning_rate": 1.9953650294209063e-05, "loss": 0.2235, "step": 1415 }, { "epoch": 0.03076723073256343, "grad_norm": 2.4589600563049316, "learning_rate": 1.9953322411239688e-05, "loss": 0.1705, "step": 1420 }, { "epoch": 0.03087556605204429, "grad_norm": 2.7593789100646973, "learning_rate": 1.995299337532701e-05, "loss": 0.2807, "step": 1425 }, { "epoch": 0.030983901371525145, "grad_norm": 2.037738561630249, "learning_rate": 1.9952663186509142e-05, "loss": 0.2291, "step": 1430 }, { "epoch": 0.031092236691006, "grad_norm": 2.0819168090820312, "learning_rate": 1.9952331844824336e-05, "loss": 0.2071, "step": 1435 }, { "epoch": 0.031200572010486857, "grad_norm": 2.867840051651001, "learning_rate": 1.9951999350310976e-05, "loss": 0.311, "step": 1440 }, { "epoch": 0.03130890732996772, "grad_norm": 1.7799327373504639, "learning_rate": 1.9951665703007566e-05, "loss": 0.3163, "step": 1445 }, { "epoch": 0.031417242649448576, "grad_norm": 1.1071172952651978, "learning_rate": 1.9951330902952763e-05, "loss": 0.193, "step": 1450 }, { "epoch": 0.03152557796892943, "grad_norm": 1.7083449363708496, "learning_rate": 1.995099495018535e-05, "loss": 0.3597, "step": 1455 }, { "epoch": 0.03163391328841029, "grad_norm": 2.551522970199585, "learning_rate": 1.995065784474423e-05, "loss": 0.2866, "step": 1460 }, { "epoch": 0.03174224860789115, "grad_norm": 1.9973173141479492, "learning_rate": 1.9950319586668466e-05, "loss": 0.1835, "step": 1465 }, { "epoch": 0.031850583927372, "grad_norm": 2.646312952041626, "learning_rate": 1.994998017599723e-05, "loss": 0.2428, "step": 1470 }, { "epoch": 0.03195891924685286, "grad_norm": 1.8218218088150024, "learning_rate": 1.9949639612769844e-05, "loss": 0.228, "step": 1475 }, { "epoch": 0.03206725456633372, "grad_norm": 1.9403351545333862, "learning_rate": 1.9949297897025754e-05, "loss": 0.1961, "step": 1480 }, { "epoch": 0.03217558988581457, "grad_norm": 1.7759265899658203, "learning_rate": 1.9948955028804543e-05, "loss": 0.2351, "step": 1485 }, { "epoch": 0.03228392520529543, "grad_norm": 2.506230115890503, "learning_rate": 1.9948611008145927e-05, "loss": 0.2508, "step": 1490 }, { "epoch": 0.03239226052477629, "grad_norm": 2.03023624420166, "learning_rate": 1.9948265835089753e-05, "loss": 0.2628, "step": 1495 }, { "epoch": 0.032500595844257144, "grad_norm": 1.6411176919937134, "learning_rate": 1.994791950967601e-05, "loss": 0.2367, "step": 1500 }, { "epoch": 0.032608931163738, "grad_norm": 1.582498550415039, "learning_rate": 1.994757203194481e-05, "loss": 0.2725, "step": 1505 }, { "epoch": 0.032717266483218856, "grad_norm": 0.8866876363754272, "learning_rate": 1.9947223401936406e-05, "loss": 0.1859, "step": 1510 }, { "epoch": 0.032825601802699715, "grad_norm": 1.635899305343628, "learning_rate": 1.994687361969118e-05, "loss": 0.1507, "step": 1515 }, { "epoch": 0.032933937122180575, "grad_norm": 2.5366458892822266, "learning_rate": 1.994652268524965e-05, "loss": 0.2258, "step": 1520 }, { "epoch": 0.03304227244166143, "grad_norm": 1.79954195022583, "learning_rate": 1.9946170598652465e-05, "loss": 0.2246, "step": 1525 }, { "epoch": 0.03315060776114229, "grad_norm": 1.7249542474746704, "learning_rate": 1.9945817359940408e-05, "loss": 0.202, "step": 1530 }, { "epoch": 0.033258943080623146, "grad_norm": 1.6779597997665405, "learning_rate": 1.99454629691544e-05, "loss": 0.3107, "step": 1535 }, { "epoch": 0.033367278400104, "grad_norm": 2.2703070640563965, "learning_rate": 1.994510742633549e-05, "loss": 0.2843, "step": 1540 }, { "epoch": 0.03347561371958486, "grad_norm": 1.903990626335144, "learning_rate": 1.9944750731524866e-05, "loss": 0.2097, "step": 1545 }, { "epoch": 0.03358394903906572, "grad_norm": 1.2967238426208496, "learning_rate": 1.9944392884763837e-05, "loss": 0.2185, "step": 1550 }, { "epoch": 0.03369228435854657, "grad_norm": 1.6734142303466797, "learning_rate": 1.9944033886093863e-05, "loss": 0.2577, "step": 1555 }, { "epoch": 0.03380061967802743, "grad_norm": 1.6814600229263306, "learning_rate": 1.9943673735556523e-05, "loss": 0.2507, "step": 1560 }, { "epoch": 0.03390895499750829, "grad_norm": 1.7679237127304077, "learning_rate": 1.9943312433193536e-05, "loss": 0.2977, "step": 1565 }, { "epoch": 0.03401729031698914, "grad_norm": 1.7885723114013672, "learning_rate": 1.9942949979046755e-05, "loss": 0.1945, "step": 1570 }, { "epoch": 0.03412562563647, "grad_norm": 2.400825023651123, "learning_rate": 1.9942586373158162e-05, "loss": 0.21, "step": 1575 }, { "epoch": 0.03423396095595086, "grad_norm": 1.3808958530426025, "learning_rate": 1.9942221615569882e-05, "loss": 0.1561, "step": 1580 }, { "epoch": 0.034342296275431713, "grad_norm": 2.0554537773132324, "learning_rate": 1.994185570632416e-05, "loss": 0.2288, "step": 1585 }, { "epoch": 0.03445063159491257, "grad_norm": 1.7263554334640503, "learning_rate": 1.994148864546338e-05, "loss": 0.2325, "step": 1590 }, { "epoch": 0.03455896691439343, "grad_norm": 1.7419400215148926, "learning_rate": 1.994112043303007e-05, "loss": 0.2033, "step": 1595 }, { "epoch": 0.034667302233874285, "grad_norm": 1.923648715019226, "learning_rate": 1.9940751069066873e-05, "loss": 0.1593, "step": 1600 }, { "epoch": 0.034775637553355145, "grad_norm": 1.7419142723083496, "learning_rate": 1.994038055361658e-05, "loss": 0.3001, "step": 1605 }, { "epoch": 0.034883972872836004, "grad_norm": 1.6638823747634888, "learning_rate": 1.9940008886722103e-05, "loss": 0.2256, "step": 1610 }, { "epoch": 0.03499230819231686, "grad_norm": 1.8571423292160034, "learning_rate": 1.99396360684265e-05, "loss": 0.246, "step": 1615 }, { "epoch": 0.035100643511797716, "grad_norm": 1.5621343851089478, "learning_rate": 1.9939262098772952e-05, "loss": 0.207, "step": 1620 }, { "epoch": 0.035208978831278576, "grad_norm": 1.9511545896530151, "learning_rate": 1.9938886977804783e-05, "loss": 0.2421, "step": 1625 }, { "epoch": 0.03531731415075943, "grad_norm": 1.628841519355774, "learning_rate": 1.993851070556544e-05, "loss": 0.2706, "step": 1630 }, { "epoch": 0.03542564947024029, "grad_norm": 1.517113208770752, "learning_rate": 1.9938133282098514e-05, "loss": 0.3053, "step": 1635 }, { "epoch": 0.03553398478972115, "grad_norm": 2.0385334491729736, "learning_rate": 1.9937754707447716e-05, "loss": 0.2638, "step": 1640 }, { "epoch": 0.035642320109202, "grad_norm": 1.7749601602554321, "learning_rate": 1.9937374981656907e-05, "loss": 0.2456, "step": 1645 }, { "epoch": 0.03575065542868286, "grad_norm": 1.584962010383606, "learning_rate": 1.9936994104770064e-05, "loss": 0.1986, "step": 1650 }, { "epoch": 0.03585899074816372, "grad_norm": 1.9385052919387817, "learning_rate": 1.993661207683131e-05, "loss": 0.1624, "step": 1655 }, { "epoch": 0.03596732606764457, "grad_norm": 1.797092080116272, "learning_rate": 1.99362288978849e-05, "loss": 0.2509, "step": 1660 }, { "epoch": 0.03607566138712543, "grad_norm": 1.8901883363723755, "learning_rate": 1.9935844567975215e-05, "loss": 0.2446, "step": 1665 }, { "epoch": 0.03618399670660629, "grad_norm": 1.356101155281067, "learning_rate": 1.9935459087146775e-05, "loss": 0.2397, "step": 1670 }, { "epoch": 0.03629233202608714, "grad_norm": 1.2080035209655762, "learning_rate": 1.9935072455444237e-05, "loss": 0.1847, "step": 1675 }, { "epoch": 0.036400667345568, "grad_norm": 1.6870701313018799, "learning_rate": 1.993468467291238e-05, "loss": 0.2008, "step": 1680 }, { "epoch": 0.03650900266504886, "grad_norm": 2.324667453765869, "learning_rate": 1.9934295739596125e-05, "loss": 0.2826, "step": 1685 }, { "epoch": 0.036617337984529715, "grad_norm": 1.8784936666488647, "learning_rate": 1.9933905655540525e-05, "loss": 0.233, "step": 1690 }, { "epoch": 0.036725673304010574, "grad_norm": 1.878878116607666, "learning_rate": 1.9933514420790762e-05, "loss": 0.2288, "step": 1695 }, { "epoch": 0.036834008623491434, "grad_norm": 2.2853152751922607, "learning_rate": 1.993312203539216e-05, "loss": 0.2413, "step": 1700 }, { "epoch": 0.036942343942972286, "grad_norm": 1.7912126779556274, "learning_rate": 1.9932728499390168e-05, "loss": 0.2241, "step": 1705 }, { "epoch": 0.037050679262453146, "grad_norm": 2.2068419456481934, "learning_rate": 1.9932333812830373e-05, "loss": 0.2759, "step": 1710 }, { "epoch": 0.037159014581934005, "grad_norm": 2.366373300552368, "learning_rate": 1.993193797575849e-05, "loss": 0.2402, "step": 1715 }, { "epoch": 0.03726734990141486, "grad_norm": 2.6763951778411865, "learning_rate": 1.9931540988220373e-05, "loss": 0.1796, "step": 1720 }, { "epoch": 0.03737568522089572, "grad_norm": 1.5586973428726196, "learning_rate": 1.9931142850262007e-05, "loss": 0.1651, "step": 1725 }, { "epoch": 0.03748402054037658, "grad_norm": 2.1950485706329346, "learning_rate": 1.9930743561929514e-05, "loss": 0.3085, "step": 1730 }, { "epoch": 0.03759235585985743, "grad_norm": 1.4935556650161743, "learning_rate": 1.9930343123269137e-05, "loss": 0.2585, "step": 1735 }, { "epoch": 0.03770069117933829, "grad_norm": 1.441215991973877, "learning_rate": 1.9929941534327268e-05, "loss": 0.2591, "step": 1740 }, { "epoch": 0.03780902649881915, "grad_norm": 1.725001573562622, "learning_rate": 1.9929538795150424e-05, "loss": 0.323, "step": 1745 }, { "epoch": 0.0379173618183, "grad_norm": 2.1027770042419434, "learning_rate": 1.992913490578525e-05, "loss": 0.2358, "step": 1750 }, { "epoch": 0.03802569713778086, "grad_norm": 2.5493128299713135, "learning_rate": 1.992872986627854e-05, "loss": 0.3258, "step": 1755 }, { "epoch": 0.03813403245726172, "grad_norm": 2.369302272796631, "learning_rate": 1.992832367667721e-05, "loss": 0.1917, "step": 1760 }, { "epoch": 0.03824236777674257, "grad_norm": 1.4066193103790283, "learning_rate": 1.9927916337028304e-05, "loss": 0.2256, "step": 1765 }, { "epoch": 0.03835070309622343, "grad_norm": 1.7154139280319214, "learning_rate": 1.9927507847379012e-05, "loss": 0.2834, "step": 1770 }, { "epoch": 0.03845903841570429, "grad_norm": 2.1489479541778564, "learning_rate": 1.992709820777665e-05, "loss": 0.2606, "step": 1775 }, { "epoch": 0.038567373735185144, "grad_norm": 1.737362027168274, "learning_rate": 1.9926687418268665e-05, "loss": 0.2565, "step": 1780 }, { "epoch": 0.038675709054666, "grad_norm": 1.3819221258163452, "learning_rate": 1.9926275478902644e-05, "loss": 0.2881, "step": 1785 }, { "epoch": 0.038784044374146856, "grad_norm": 1.7133891582489014, "learning_rate": 1.9925862389726308e-05, "loss": 0.2211, "step": 1790 }, { "epoch": 0.038892379693627716, "grad_norm": 1.7115354537963867, "learning_rate": 1.9925448150787506e-05, "loss": 0.2317, "step": 1795 }, { "epoch": 0.039000715013108575, "grad_norm": 1.7718231678009033, "learning_rate": 1.9925032762134216e-05, "loss": 0.2348, "step": 1800 }, { "epoch": 0.03910905033258943, "grad_norm": 1.6288777589797974, "learning_rate": 1.9924616223814558e-05, "loss": 0.2209, "step": 1805 }, { "epoch": 0.03921738565207029, "grad_norm": 1.1104488372802734, "learning_rate": 1.992419853587678e-05, "loss": 0.2137, "step": 1810 }, { "epoch": 0.03932572097155115, "grad_norm": 2.4533531665802, "learning_rate": 1.9923779698369266e-05, "loss": 0.2039, "step": 1815 }, { "epoch": 0.039434056291032, "grad_norm": 2.172729253768921, "learning_rate": 1.9923359711340533e-05, "loss": 0.2537, "step": 1820 }, { "epoch": 0.03954239161051286, "grad_norm": 1.6598730087280273, "learning_rate": 1.9922938574839227e-05, "loss": 0.2617, "step": 1825 }, { "epoch": 0.03965072692999372, "grad_norm": 1.0563995838165283, "learning_rate": 1.9922516288914137e-05, "loss": 0.2187, "step": 1830 }, { "epoch": 0.03975906224947457, "grad_norm": 1.4337453842163086, "learning_rate": 1.992209285361417e-05, "loss": 0.2126, "step": 1835 }, { "epoch": 0.03986739756895543, "grad_norm": 2.420957565307617, "learning_rate": 1.9921668268988382e-05, "loss": 0.2171, "step": 1840 }, { "epoch": 0.03997573288843629, "grad_norm": 1.2719870805740356, "learning_rate": 1.9921242535085952e-05, "loss": 0.1584, "step": 1845 }, { "epoch": 0.04008406820791714, "grad_norm": 1.8894264698028564, "learning_rate": 1.9920815651956193e-05, "loss": 0.2232, "step": 1850 }, { "epoch": 0.040192403527398, "grad_norm": 1.6906014680862427, "learning_rate": 1.9920387619648553e-05, "loss": 0.1578, "step": 1855 }, { "epoch": 0.04030073884687886, "grad_norm": 2.173361301422119, "learning_rate": 1.9919958438212618e-05, "loss": 0.1975, "step": 1860 }, { "epoch": 0.040409074166359714, "grad_norm": 2.1866798400878906, "learning_rate": 1.9919528107698096e-05, "loss": 0.3131, "step": 1865 }, { "epoch": 0.04051740948584057, "grad_norm": 1.3253365755081177, "learning_rate": 1.991909662815484e-05, "loss": 0.1783, "step": 1870 }, { "epoch": 0.04062574480532143, "grad_norm": 1.3264597654342651, "learning_rate": 1.9918663999632826e-05, "loss": 0.2382, "step": 1875 }, { "epoch": 0.040734080124802285, "grad_norm": 2.614783525466919, "learning_rate": 1.991823022218217e-05, "loss": 0.3667, "step": 1880 }, { "epoch": 0.040842415444283145, "grad_norm": 1.7612636089324951, "learning_rate": 1.9917795295853115e-05, "loss": 0.2251, "step": 1885 }, { "epoch": 0.040950750763764004, "grad_norm": 1.157195806503296, "learning_rate": 1.9917359220696045e-05, "loss": 0.2361, "step": 1890 }, { "epoch": 0.04105908608324486, "grad_norm": 1.2468265295028687, "learning_rate": 1.9916921996761474e-05, "loss": 0.1155, "step": 1895 }, { "epoch": 0.04116742140272572, "grad_norm": 1.2751070261001587, "learning_rate": 1.9916483624100044e-05, "loss": 0.2011, "step": 1900 }, { "epoch": 0.041275756722206576, "grad_norm": 1.8879942893981934, "learning_rate": 1.9916044102762533e-05, "loss": 0.2246, "step": 1905 }, { "epoch": 0.04138409204168743, "grad_norm": 2.467174530029297, "learning_rate": 1.9915603432799858e-05, "loss": 0.3629, "step": 1910 }, { "epoch": 0.04149242736116829, "grad_norm": 1.5791605710983276, "learning_rate": 1.9915161614263057e-05, "loss": 0.2857, "step": 1915 }, { "epoch": 0.04160076268064915, "grad_norm": 3.3022336959838867, "learning_rate": 1.9914718647203316e-05, "loss": 0.3467, "step": 1920 }, { "epoch": 0.04170909800013, "grad_norm": 1.9906675815582275, "learning_rate": 1.991427453167194e-05, "loss": 0.2237, "step": 1925 }, { "epoch": 0.04181743331961086, "grad_norm": 1.5464974641799927, "learning_rate": 1.9913829267720377e-05, "loss": 0.2484, "step": 1930 }, { "epoch": 0.04192576863909172, "grad_norm": 2.2862541675567627, "learning_rate": 1.9913382855400202e-05, "loss": 0.2014, "step": 1935 }, { "epoch": 0.04203410395857257, "grad_norm": 2.6669397354125977, "learning_rate": 1.9912935294763125e-05, "loss": 0.2453, "step": 1940 }, { "epoch": 0.04214243927805343, "grad_norm": 1.7905025482177734, "learning_rate": 1.9912486585860988e-05, "loss": 0.2152, "step": 1945 }, { "epoch": 0.04225077459753429, "grad_norm": 1.4796963930130005, "learning_rate": 1.9912036728745772e-05, "loss": 0.1464, "step": 1950 }, { "epoch": 0.04235910991701514, "grad_norm": 2.185830593109131, "learning_rate": 1.9911585723469583e-05, "loss": 0.2844, "step": 1955 }, { "epoch": 0.042467445236496, "grad_norm": 1.8466300964355469, "learning_rate": 1.9911133570084663e-05, "loss": 0.1853, "step": 1960 }, { "epoch": 0.04257578055597686, "grad_norm": 1.9819483757019043, "learning_rate": 1.9910680268643388e-05, "loss": 0.2212, "step": 1965 }, { "epoch": 0.042684115875457715, "grad_norm": 1.7353750467300415, "learning_rate": 1.9910225819198265e-05, "loss": 0.2174, "step": 1970 }, { "epoch": 0.042792451194938574, "grad_norm": 0.7839465141296387, "learning_rate": 1.9909770221801935e-05, "loss": 0.1915, "step": 1975 }, { "epoch": 0.042900786514419434, "grad_norm": 1.5201692581176758, "learning_rate": 1.9909313476507175e-05, "loss": 0.2181, "step": 1980 }, { "epoch": 0.043009121833900286, "grad_norm": 1.845149278640747, "learning_rate": 1.990885558336689e-05, "loss": 0.2375, "step": 1985 }, { "epoch": 0.043117457153381146, "grad_norm": 1.4549031257629395, "learning_rate": 1.9908396542434122e-05, "loss": 0.2836, "step": 1990 }, { "epoch": 0.043225792472862005, "grad_norm": 2.0876598358154297, "learning_rate": 1.9907936353762038e-05, "loss": 0.3213, "step": 1995 }, { "epoch": 0.04333412779234286, "grad_norm": 1.8857722282409668, "learning_rate": 1.9907475017403952e-05, "loss": 0.2871, "step": 2000 }, { "epoch": 0.04344246311182372, "grad_norm": 1.9452725648880005, "learning_rate": 1.99070125334133e-05, "loss": 0.1995, "step": 2005 }, { "epoch": 0.04355079843130458, "grad_norm": 1.323633074760437, "learning_rate": 1.9906548901843648e-05, "loss": 0.203, "step": 2010 }, { "epoch": 0.04365913375078543, "grad_norm": 1.7141679525375366, "learning_rate": 1.990608412274871e-05, "loss": 0.2135, "step": 2015 }, { "epoch": 0.04376746907026629, "grad_norm": 1.8232243061065674, "learning_rate": 1.9905618196182318e-05, "loss": 0.2194, "step": 2020 }, { "epoch": 0.04387580438974715, "grad_norm": 1.6287857294082642, "learning_rate": 1.9905151122198445e-05, "loss": 0.214, "step": 2025 }, { "epoch": 0.043984139709228, "grad_norm": 1.5031906366348267, "learning_rate": 1.9904682900851195e-05, "loss": 0.2279, "step": 2030 }, { "epoch": 0.04409247502870886, "grad_norm": 1.5844337940216064, "learning_rate": 1.99042135321948e-05, "loss": 0.1697, "step": 2035 }, { "epoch": 0.04420081034818972, "grad_norm": 1.8986237049102783, "learning_rate": 1.9903743016283634e-05, "loss": 0.2184, "step": 2040 }, { "epoch": 0.04430914566767057, "grad_norm": 1.116105079650879, "learning_rate": 1.9903271353172197e-05, "loss": 0.253, "step": 2045 }, { "epoch": 0.04441748098715143, "grad_norm": 2.18108868598938, "learning_rate": 1.9902798542915125e-05, "loss": 0.3344, "step": 2050 }, { "epoch": 0.044525816306632285, "grad_norm": 1.2250146865844727, "learning_rate": 1.9902324585567185e-05, "loss": 0.1914, "step": 2055 }, { "epoch": 0.044634151626113144, "grad_norm": 2.3511312007904053, "learning_rate": 1.9901849481183277e-05, "loss": 0.2238, "step": 2060 }, { "epoch": 0.044742486945594004, "grad_norm": 1.4698543548583984, "learning_rate": 1.9901373229818438e-05, "loss": 0.2894, "step": 2065 }, { "epoch": 0.044850822265074856, "grad_norm": 2.3651790618896484, "learning_rate": 1.9900895831527837e-05, "loss": 0.2615, "step": 2070 }, { "epoch": 0.044959157584555716, "grad_norm": 2.47879695892334, "learning_rate": 1.9900417286366766e-05, "loss": 0.2403, "step": 2075 }, { "epoch": 0.045067492904036575, "grad_norm": 2.03841495513916, "learning_rate": 1.989993759439066e-05, "loss": 0.324, "step": 2080 }, { "epoch": 0.04517582822351743, "grad_norm": 1.8500248193740845, "learning_rate": 1.9899456755655085e-05, "loss": 0.2411, "step": 2085 }, { "epoch": 0.04528416354299829, "grad_norm": 2.279881000518799, "learning_rate": 1.989897477021574e-05, "loss": 0.2407, "step": 2090 }, { "epoch": 0.04539249886247915, "grad_norm": 1.6610267162322998, "learning_rate": 1.989849163812845e-05, "loss": 0.2302, "step": 2095 }, { "epoch": 0.04550083418196, "grad_norm": 1.2934659719467163, "learning_rate": 1.9898007359449186e-05, "loss": 0.2214, "step": 2100 }, { "epoch": 0.04560916950144086, "grad_norm": 2.2501916885375977, "learning_rate": 1.9897521934234042e-05, "loss": 0.2826, "step": 2105 }, { "epoch": 0.04571750482092172, "grad_norm": 1.671309232711792, "learning_rate": 1.989703536253925e-05, "loss": 0.2347, "step": 2110 }, { "epoch": 0.04582584014040257, "grad_norm": 1.8444362878799438, "learning_rate": 1.9896547644421162e-05, "loss": 0.1539, "step": 2115 }, { "epoch": 0.04593417545988343, "grad_norm": 1.9706395864486694, "learning_rate": 1.9896058779936285e-05, "loss": 0.2375, "step": 2120 }, { "epoch": 0.04604251077936429, "grad_norm": 1.6940301656723022, "learning_rate": 1.989556876914124e-05, "loss": 0.2965, "step": 2125 }, { "epoch": 0.04615084609884514, "grad_norm": 2.574518918991089, "learning_rate": 1.9895077612092787e-05, "loss": 0.1238, "step": 2130 }, { "epoch": 0.046259181418326, "grad_norm": 1.5209705829620361, "learning_rate": 1.9894585308847826e-05, "loss": 0.2902, "step": 2135 }, { "epoch": 0.04636751673780686, "grad_norm": 2.2368481159210205, "learning_rate": 1.989409185946337e-05, "loss": 0.3123, "step": 2140 }, { "epoch": 0.046475852057287714, "grad_norm": 2.225602388381958, "learning_rate": 1.9893597263996592e-05, "loss": 0.2393, "step": 2145 }, { "epoch": 0.046584187376768574, "grad_norm": 1.650266408920288, "learning_rate": 1.9893101522504773e-05, "loss": 0.2082, "step": 2150 }, { "epoch": 0.04669252269624943, "grad_norm": 2.4030184745788574, "learning_rate": 1.989260463504534e-05, "loss": 0.2924, "step": 2155 }, { "epoch": 0.046800858015730286, "grad_norm": 2.0732359886169434, "learning_rate": 1.9892106601675856e-05, "loss": 0.2414, "step": 2160 }, { "epoch": 0.046909193335211145, "grad_norm": 2.2081105709075928, "learning_rate": 1.9891607422454003e-05, "loss": 0.2875, "step": 2165 }, { "epoch": 0.047017528654692005, "grad_norm": 2.0362186431884766, "learning_rate": 1.989110709743761e-05, "loss": 0.1846, "step": 2170 }, { "epoch": 0.04712586397417286, "grad_norm": 3.1812100410461426, "learning_rate": 1.9890605626684623e-05, "loss": 0.2319, "step": 2175 }, { "epoch": 0.04723419929365372, "grad_norm": 1.1762529611587524, "learning_rate": 1.989010301025314e-05, "loss": 0.1856, "step": 2180 }, { "epoch": 0.047342534613134576, "grad_norm": 1.6591203212738037, "learning_rate": 1.9889599248201376e-05, "loss": 0.2668, "step": 2185 }, { "epoch": 0.04745086993261543, "grad_norm": 3.4553403854370117, "learning_rate": 1.988909434058769e-05, "loss": 0.2291, "step": 2190 }, { "epoch": 0.04755920525209629, "grad_norm": 2.285890579223633, "learning_rate": 1.9888588287470553e-05, "loss": 0.1644, "step": 2195 }, { "epoch": 0.04766754057157715, "grad_norm": 1.532900094985962, "learning_rate": 1.9888081088908598e-05, "loss": 0.2965, "step": 2200 }, { "epoch": 0.047775875891058, "grad_norm": 2.3953723907470703, "learning_rate": 1.9887572744960574e-05, "loss": 0.1839, "step": 2205 }, { "epoch": 0.04788421121053886, "grad_norm": 2.287987470626831, "learning_rate": 1.9887063255685362e-05, "loss": 0.2353, "step": 2210 }, { "epoch": 0.04799254653001972, "grad_norm": 1.9728647470474243, "learning_rate": 1.988655262114198e-05, "loss": 0.2546, "step": 2215 }, { "epoch": 0.04810088184950057, "grad_norm": 1.6002538204193115, "learning_rate": 1.988604084138958e-05, "loss": 0.2873, "step": 2220 }, { "epoch": 0.04820921716898143, "grad_norm": 2.193291187286377, "learning_rate": 1.9885527916487438e-05, "loss": 0.1799, "step": 2225 }, { "epoch": 0.04831755248846229, "grad_norm": 1.5331041812896729, "learning_rate": 1.9885013846494975e-05, "loss": 0.3175, "step": 2230 }, { "epoch": 0.048425887807943144, "grad_norm": 1.6369857788085938, "learning_rate": 1.9884498631471733e-05, "loss": 0.2898, "step": 2235 }, { "epoch": 0.048534223127424, "grad_norm": 1.8686329126358032, "learning_rate": 1.9883982271477396e-05, "loss": 0.2786, "step": 2240 }, { "epoch": 0.04864255844690486, "grad_norm": 1.724786400794983, "learning_rate": 1.9883464766571775e-05, "loss": 0.2168, "step": 2245 }, { "epoch": 0.048750893766385715, "grad_norm": 1.869757056236267, "learning_rate": 1.9882946116814813e-05, "loss": 0.2347, "step": 2250 }, { "epoch": 0.048859229085866575, "grad_norm": 1.3603947162628174, "learning_rate": 1.988242632226659e-05, "loss": 0.283, "step": 2255 }, { "epoch": 0.048967564405347434, "grad_norm": 1.4756799936294556, "learning_rate": 1.988190538298732e-05, "loss": 0.2784, "step": 2260 }, { "epoch": 0.04907589972482829, "grad_norm": 1.5361484289169312, "learning_rate": 1.9881383299037334e-05, "loss": 0.2863, "step": 2265 }, { "epoch": 0.049184235044309146, "grad_norm": 2.7092573642730713, "learning_rate": 1.9880860070477124e-05, "loss": 0.2217, "step": 2270 }, { "epoch": 0.049292570363790006, "grad_norm": 0.7973708510398865, "learning_rate": 1.9880335697367286e-05, "loss": 0.1593, "step": 2275 }, { "epoch": 0.04940090568327086, "grad_norm": 2.0661513805389404, "learning_rate": 1.987981017976857e-05, "loss": 0.2205, "step": 2280 }, { "epoch": 0.04950924100275172, "grad_norm": 2.033930778503418, "learning_rate": 1.987928351774184e-05, "loss": 0.1995, "step": 2285 }, { "epoch": 0.04961757632223258, "grad_norm": 1.5549722909927368, "learning_rate": 1.9878755711348105e-05, "loss": 0.1623, "step": 2290 }, { "epoch": 0.04972591164171343, "grad_norm": 1.5209996700286865, "learning_rate": 1.987822676064851e-05, "loss": 0.2281, "step": 2295 }, { "epoch": 0.04983424696119429, "grad_norm": 2.146106719970703, "learning_rate": 1.9877696665704315e-05, "loss": 0.3011, "step": 2300 }, { "epoch": 0.04994258228067515, "grad_norm": 1.4894360303878784, "learning_rate": 1.987716542657693e-05, "loss": 0.2645, "step": 2305 }, { "epoch": 0.050050917600156, "grad_norm": 1.1804884672164917, "learning_rate": 1.9876633043327892e-05, "loss": 0.2022, "step": 2310 }, { "epoch": 0.05015925291963686, "grad_norm": 1.6027973890304565, "learning_rate": 1.9876099516018866e-05, "loss": 0.2586, "step": 2315 }, { "epoch": 0.05026758823911772, "grad_norm": 1.9692716598510742, "learning_rate": 1.987556484471166e-05, "loss": 0.18, "step": 2320 }, { "epoch": 0.05037592355859857, "grad_norm": 1.9545280933380127, "learning_rate": 1.98750290294682e-05, "loss": 0.2247, "step": 2325 }, { "epoch": 0.05048425887807943, "grad_norm": 1.5799639225006104, "learning_rate": 1.9874492070350554e-05, "loss": 0.1639, "step": 2330 }, { "epoch": 0.050592594197560285, "grad_norm": 1.730843424797058, "learning_rate": 1.987395396742092e-05, "loss": 0.2716, "step": 2335 }, { "epoch": 0.050700929517041145, "grad_norm": 1.4645315408706665, "learning_rate": 1.9873414720741633e-05, "loss": 0.1984, "step": 2340 }, { "epoch": 0.050809264836522004, "grad_norm": 2.1212048530578613, "learning_rate": 1.987287433037515e-05, "loss": 0.3014, "step": 2345 }, { "epoch": 0.05091760015600286, "grad_norm": 1.4734554290771484, "learning_rate": 1.9872332796384077e-05, "loss": 0.206, "step": 2350 }, { "epoch": 0.051025935475483716, "grad_norm": 2.0100786685943604, "learning_rate": 1.9871790118831134e-05, "loss": 0.2811, "step": 2355 }, { "epoch": 0.051134270794964576, "grad_norm": 1.633396029472351, "learning_rate": 1.987124629777919e-05, "loss": 0.2083, "step": 2360 }, { "epoch": 0.05124260611444543, "grad_norm": 1.590112566947937, "learning_rate": 1.9870701333291228e-05, "loss": 0.1932, "step": 2365 }, { "epoch": 0.05135094143392629, "grad_norm": 2.363271474838257, "learning_rate": 1.987015522543038e-05, "loss": 0.2247, "step": 2370 }, { "epoch": 0.05145927675340715, "grad_norm": 1.448240041732788, "learning_rate": 1.9869607974259904e-05, "loss": 0.1816, "step": 2375 }, { "epoch": 0.051567612072888, "grad_norm": 1.4498873949050903, "learning_rate": 1.9869059579843194e-05, "loss": 0.3048, "step": 2380 }, { "epoch": 0.05167594739236886, "grad_norm": 2.491941213607788, "learning_rate": 1.9868510042243768e-05, "loss": 0.2474, "step": 2385 }, { "epoch": 0.05178428271184972, "grad_norm": 1.916978359222412, "learning_rate": 1.9867959361525288e-05, "loss": 0.2123, "step": 2390 }, { "epoch": 0.05189261803133057, "grad_norm": 1.7712520360946655, "learning_rate": 1.9867407537751533e-05, "loss": 0.1749, "step": 2395 }, { "epoch": 0.05200095335081143, "grad_norm": 1.657105803489685, "learning_rate": 1.986685457098643e-05, "loss": 0.29, "step": 2400 }, { "epoch": 0.05210928867029229, "grad_norm": 1.8053605556488037, "learning_rate": 1.986630046129403e-05, "loss": 0.2575, "step": 2405 }, { "epoch": 0.05221762398977314, "grad_norm": 1.657568097114563, "learning_rate": 1.986574520873852e-05, "loss": 0.1866, "step": 2410 }, { "epoch": 0.052325959309254, "grad_norm": 1.1673707962036133, "learning_rate": 1.9865188813384212e-05, "loss": 0.2206, "step": 2415 }, { "epoch": 0.05243429462873486, "grad_norm": 1.8761707544326782, "learning_rate": 1.986463127529557e-05, "loss": 0.2683, "step": 2420 }, { "epoch": 0.052542629948215715, "grad_norm": 1.8689488172531128, "learning_rate": 1.9864072594537157e-05, "loss": 0.2289, "step": 2425 }, { "epoch": 0.052650965267696574, "grad_norm": 1.9775108098983765, "learning_rate": 1.9863512771173703e-05, "loss": 0.2173, "step": 2430 }, { "epoch": 0.052759300587177434, "grad_norm": 2.2740554809570312, "learning_rate": 1.9862951805270047e-05, "loss": 0.3309, "step": 2435 }, { "epoch": 0.052867635906658286, "grad_norm": 2.119534969329834, "learning_rate": 1.9862389696891175e-05, "loss": 0.2563, "step": 2440 }, { "epoch": 0.052975971226139146, "grad_norm": 1.9183987379074097, "learning_rate": 1.9861826446102192e-05, "loss": 0.1704, "step": 2445 }, { "epoch": 0.053084306545620005, "grad_norm": 1.4222071170806885, "learning_rate": 1.9861262052968347e-05, "loss": 0.1828, "step": 2450 }, { "epoch": 0.05319264186510086, "grad_norm": 2.447317123413086, "learning_rate": 1.9860696517555013e-05, "loss": 0.2158, "step": 2455 }, { "epoch": 0.05330097718458172, "grad_norm": 2.033207893371582, "learning_rate": 1.9860129839927702e-05, "loss": 0.2458, "step": 2460 }, { "epoch": 0.05340931250406258, "grad_norm": 2.4073686599731445, "learning_rate": 1.9859562020152054e-05, "loss": 0.2239, "step": 2465 }, { "epoch": 0.05351764782354343, "grad_norm": 1.092110276222229, "learning_rate": 1.9858993058293842e-05, "loss": 0.3157, "step": 2470 }, { "epoch": 0.05362598314302429, "grad_norm": 1.8795700073242188, "learning_rate": 1.985842295441897e-05, "loss": 0.2134, "step": 2475 }, { "epoch": 0.05373431846250515, "grad_norm": 1.2525506019592285, "learning_rate": 1.9857851708593484e-05, "loss": 0.2355, "step": 2480 }, { "epoch": 0.053842653781986, "grad_norm": 2.7610669136047363, "learning_rate": 1.9857279320883544e-05, "loss": 0.3088, "step": 2485 }, { "epoch": 0.05395098910146686, "grad_norm": 2.1813924312591553, "learning_rate": 1.985670579135546e-05, "loss": 0.2483, "step": 2490 }, { "epoch": 0.05405932442094772, "grad_norm": 3.147660493850708, "learning_rate": 1.9856131120075654e-05, "loss": 0.1666, "step": 2495 }, { "epoch": 0.05416765974042857, "grad_norm": 1.8246792554855347, "learning_rate": 1.985555530711071e-05, "loss": 0.2163, "step": 2500 }, { "epoch": 0.05427599505990943, "grad_norm": 2.1287143230438232, "learning_rate": 1.9854978352527322e-05, "loss": 0.2558, "step": 2505 }, { "epoch": 0.05438433037939029, "grad_norm": 2.316995620727539, "learning_rate": 1.9854400256392314e-05, "loss": 0.3117, "step": 2510 }, { "epoch": 0.054492665698871144, "grad_norm": 2.239253044128418, "learning_rate": 1.9853821018772656e-05, "loss": 0.3234, "step": 2515 }, { "epoch": 0.054601001018352004, "grad_norm": 1.9239948987960815, "learning_rate": 1.9853240639735448e-05, "loss": 0.3127, "step": 2520 }, { "epoch": 0.05470933633783286, "grad_norm": 1.5289472341537476, "learning_rate": 1.9852659119347906e-05, "loss": 0.2252, "step": 2525 }, { "epoch": 0.054817671657313716, "grad_norm": 1.678780198097229, "learning_rate": 1.9852076457677404e-05, "loss": 0.2231, "step": 2530 }, { "epoch": 0.054926006976794575, "grad_norm": 1.6351174116134644, "learning_rate": 1.9851492654791426e-05, "loss": 0.2382, "step": 2535 }, { "epoch": 0.055034342296275435, "grad_norm": 2.1683671474456787, "learning_rate": 1.98509077107576e-05, "loss": 0.216, "step": 2540 }, { "epoch": 0.05514267761575629, "grad_norm": 1.9686200618743896, "learning_rate": 1.985032162564368e-05, "loss": 0.3055, "step": 2545 }, { "epoch": 0.05525101293523715, "grad_norm": 1.262071132659912, "learning_rate": 1.984973439951756e-05, "loss": 0.1908, "step": 2550 }, { "epoch": 0.055359348254718006, "grad_norm": 2.0208816528320312, "learning_rate": 1.9849146032447256e-05, "loss": 0.2393, "step": 2555 }, { "epoch": 0.05546768357419886, "grad_norm": 2.162884473800659, "learning_rate": 1.984855652450093e-05, "loss": 0.2099, "step": 2560 }, { "epoch": 0.05557601889367972, "grad_norm": 2.4069664478302, "learning_rate": 1.9847965875746857e-05, "loss": 0.2357, "step": 2565 }, { "epoch": 0.05568435421316058, "grad_norm": 2.356454849243164, "learning_rate": 1.984737408625346e-05, "loss": 0.2373, "step": 2570 }, { "epoch": 0.05579268953264143, "grad_norm": 1.4640111923217773, "learning_rate": 1.9846781156089293e-05, "loss": 0.2266, "step": 2575 }, { "epoch": 0.05590102485212229, "grad_norm": 1.7396844625473022, "learning_rate": 1.984618708532303e-05, "loss": 0.2492, "step": 2580 }, { "epoch": 0.05600936017160315, "grad_norm": 1.7953095436096191, "learning_rate": 1.984559187402349e-05, "loss": 0.2649, "step": 2585 }, { "epoch": 0.056117695491084, "grad_norm": 2.1704416275024414, "learning_rate": 1.9844995522259618e-05, "loss": 0.1795, "step": 2590 }, { "epoch": 0.05622603081056486, "grad_norm": 2.199700355529785, "learning_rate": 1.9844398030100492e-05, "loss": 0.1993, "step": 2595 }, { "epoch": 0.056334366130045714, "grad_norm": 1.7486543655395508, "learning_rate": 1.9843799397615323e-05, "loss": 0.3054, "step": 2600 }, { "epoch": 0.056442701449526574, "grad_norm": 1.6034950017929077, "learning_rate": 1.9843199624873458e-05, "loss": 0.1823, "step": 2605 }, { "epoch": 0.05655103676900743, "grad_norm": 2.236668825149536, "learning_rate": 1.9842598711944365e-05, "loss": 0.2057, "step": 2610 }, { "epoch": 0.056659372088488286, "grad_norm": 1.3103013038635254, "learning_rate": 1.9841996658897653e-05, "loss": 0.2014, "step": 2615 }, { "epoch": 0.056767707407969145, "grad_norm": 1.5845485925674438, "learning_rate": 1.9841393465803062e-05, "loss": 0.1545, "step": 2620 }, { "epoch": 0.056876042727450005, "grad_norm": 2.4249606132507324, "learning_rate": 1.984078913273046e-05, "loss": 0.2912, "step": 2625 }, { "epoch": 0.05698437804693086, "grad_norm": 1.4510772228240967, "learning_rate": 1.9840183659749853e-05, "loss": 0.312, "step": 2630 }, { "epoch": 0.05709271336641172, "grad_norm": 2.5385146141052246, "learning_rate": 1.9839577046931374e-05, "loss": 0.2318, "step": 2635 }, { "epoch": 0.057201048685892576, "grad_norm": 1.4803879261016846, "learning_rate": 1.9838969294345297e-05, "loss": 0.2247, "step": 2640 }, { "epoch": 0.05730938400537343, "grad_norm": 1.696042537689209, "learning_rate": 1.983836040206201e-05, "loss": 0.2436, "step": 2645 }, { "epoch": 0.05741771932485429, "grad_norm": 2.250556230545044, "learning_rate": 1.9837750370152048e-05, "loss": 0.2255, "step": 2650 }, { "epoch": 0.05752605464433515, "grad_norm": 2.3141355514526367, "learning_rate": 1.9837139198686076e-05, "loss": 0.2306, "step": 2655 }, { "epoch": 0.057634389963816, "grad_norm": 1.8193001747131348, "learning_rate": 1.983652688773489e-05, "loss": 0.2285, "step": 2660 }, { "epoch": 0.05774272528329686, "grad_norm": 1.182133674621582, "learning_rate": 1.9835913437369413e-05, "loss": 0.1977, "step": 2665 }, { "epoch": 0.05785106060277772, "grad_norm": 2.2842888832092285, "learning_rate": 1.9835298847660708e-05, "loss": 0.2092, "step": 2670 }, { "epoch": 0.05795939592225857, "grad_norm": 2.286410093307495, "learning_rate": 1.9834683118679963e-05, "loss": 0.3182, "step": 2675 }, { "epoch": 0.05806773124173943, "grad_norm": 2.1265032291412354, "learning_rate": 1.9834066250498502e-05, "loss": 0.2815, "step": 2680 }, { "epoch": 0.05817606656122029, "grad_norm": 1.8158446550369263, "learning_rate": 1.983344824318778e-05, "loss": 0.2667, "step": 2685 }, { "epoch": 0.058284401880701144, "grad_norm": 1.8817071914672852, "learning_rate": 1.9832829096819386e-05, "loss": 0.2007, "step": 2690 }, { "epoch": 0.058392737200182, "grad_norm": 2.117081642150879, "learning_rate": 1.9832208811465036e-05, "loss": 0.306, "step": 2695 }, { "epoch": 0.05850107251966286, "grad_norm": 2.0368809700012207, "learning_rate": 1.983158738719658e-05, "loss": 0.2123, "step": 2700 }, { "epoch": 0.058609407839143715, "grad_norm": 2.48071026802063, "learning_rate": 1.9830964824086007e-05, "loss": 0.2648, "step": 2705 }, { "epoch": 0.058717743158624575, "grad_norm": 1.7533595561981201, "learning_rate": 1.9830341122205422e-05, "loss": 0.188, "step": 2710 }, { "epoch": 0.058826078478105434, "grad_norm": 1.9877156019210815, "learning_rate": 1.982971628162708e-05, "loss": 0.2628, "step": 2715 }, { "epoch": 0.05893441379758629, "grad_norm": 1.2910488843917847, "learning_rate": 1.982909030242335e-05, "loss": 0.2188, "step": 2720 }, { "epoch": 0.059042749117067146, "grad_norm": 2.063624143600464, "learning_rate": 1.9828463184666756e-05, "loss": 0.3018, "step": 2725 }, { "epoch": 0.059151084436548006, "grad_norm": 1.8045001029968262, "learning_rate": 1.9827834928429927e-05, "loss": 0.2695, "step": 2730 }, { "epoch": 0.05925941975602886, "grad_norm": 1.6600291728973389, "learning_rate": 1.9827205533785644e-05, "loss": 0.195, "step": 2735 }, { "epoch": 0.05936775507550972, "grad_norm": 2.3409950733184814, "learning_rate": 1.982657500080681e-05, "loss": 0.1758, "step": 2740 }, { "epoch": 0.05947609039499058, "grad_norm": 1.9491496086120605, "learning_rate": 1.982594332956647e-05, "loss": 0.2174, "step": 2745 }, { "epoch": 0.05958442571447143, "grad_norm": 1.7419601678848267, "learning_rate": 1.982531052013778e-05, "loss": 0.2266, "step": 2750 }, { "epoch": 0.05969276103395229, "grad_norm": 1.3443435430526733, "learning_rate": 1.9824676572594053e-05, "loss": 0.2663, "step": 2755 }, { "epoch": 0.05980109635343315, "grad_norm": 1.4742388725280762, "learning_rate": 1.982404148700872e-05, "loss": 0.2013, "step": 2760 }, { "epoch": 0.059909431672914, "grad_norm": 1.2738547325134277, "learning_rate": 1.9823405263455345e-05, "loss": 0.2679, "step": 2765 }, { "epoch": 0.06001776699239486, "grad_norm": 1.550423502922058, "learning_rate": 1.9822767902007624e-05, "loss": 0.182, "step": 2770 }, { "epoch": 0.06012610231187572, "grad_norm": 1.3568631410598755, "learning_rate": 1.9822129402739386e-05, "loss": 0.2231, "step": 2775 }, { "epoch": 0.06023443763135657, "grad_norm": 2.116978645324707, "learning_rate": 1.9821489765724594e-05, "loss": 0.2604, "step": 2780 }, { "epoch": 0.06034277295083743, "grad_norm": 2.44274640083313, "learning_rate": 1.9820848991037337e-05, "loss": 0.3183, "step": 2785 }, { "epoch": 0.06045110827031829, "grad_norm": 1.977244257926941, "learning_rate": 1.982020707875184e-05, "loss": 0.3166, "step": 2790 }, { "epoch": 0.060559443589799145, "grad_norm": 1.195888876914978, "learning_rate": 1.981956402894246e-05, "loss": 0.2651, "step": 2795 }, { "epoch": 0.060667778909280004, "grad_norm": 1.6300307512283325, "learning_rate": 1.9818919841683686e-05, "loss": 0.3021, "step": 2800 }, { "epoch": 0.060776114228760864, "grad_norm": 1.698995590209961, "learning_rate": 1.9818274517050136e-05, "loss": 0.1588, "step": 2805 }, { "epoch": 0.060884449548241716, "grad_norm": 1.9589048624038696, "learning_rate": 1.9817628055116557e-05, "loss": 0.2071, "step": 2810 }, { "epoch": 0.060992784867722576, "grad_norm": 2.5690643787384033, "learning_rate": 1.981698045595784e-05, "loss": 0.2844, "step": 2815 }, { "epoch": 0.061101120187203435, "grad_norm": 2.6523241996765137, "learning_rate": 1.9816331719648994e-05, "loss": 0.2568, "step": 2820 }, { "epoch": 0.06120945550668429, "grad_norm": 1.664703130722046, "learning_rate": 1.9815681846265168e-05, "loss": 0.2649, "step": 2825 }, { "epoch": 0.06131779082616515, "grad_norm": 2.1780145168304443, "learning_rate": 1.9815030835881634e-05, "loss": 0.2625, "step": 2830 }, { "epoch": 0.06142612614564601, "grad_norm": 1.450741171836853, "learning_rate": 1.981437868857381e-05, "loss": 0.2638, "step": 2835 }, { "epoch": 0.06153446146512686, "grad_norm": 1.6818523406982422, "learning_rate": 1.9813725404417237e-05, "loss": 0.205, "step": 2840 }, { "epoch": 0.06164279678460772, "grad_norm": 1.8246043920516968, "learning_rate": 1.9813070983487583e-05, "loss": 0.1727, "step": 2845 }, { "epoch": 0.06175113210408858, "grad_norm": 1.302671194076538, "learning_rate": 1.9812415425860652e-05, "loss": 0.2226, "step": 2850 }, { "epoch": 0.06185946742356943, "grad_norm": 1.9659546613693237, "learning_rate": 1.9811758731612393e-05, "loss": 0.2506, "step": 2855 }, { "epoch": 0.06196780274305029, "grad_norm": 1.9631831645965576, "learning_rate": 1.981110090081886e-05, "loss": 0.2449, "step": 2860 }, { "epoch": 0.06207613806253115, "grad_norm": 2.061121702194214, "learning_rate": 1.9810441933556256e-05, "loss": 0.3285, "step": 2865 }, { "epoch": 0.062184473382012, "grad_norm": 2.019064426422119, "learning_rate": 1.9809781829900916e-05, "loss": 0.2295, "step": 2870 }, { "epoch": 0.06229280870149286, "grad_norm": 1.9226566553115845, "learning_rate": 1.98091205899293e-05, "loss": 0.1834, "step": 2875 }, { "epoch": 0.062401144020973714, "grad_norm": 2.2942562103271484, "learning_rate": 1.9808458213718007e-05, "loss": 0.3162, "step": 2880 }, { "epoch": 0.06250947934045457, "grad_norm": 1.7093404531478882, "learning_rate": 1.980779470134376e-05, "loss": 0.2345, "step": 2885 }, { "epoch": 0.06261781465993543, "grad_norm": 0.9167007207870483, "learning_rate": 1.9807130052883415e-05, "loss": 0.1743, "step": 2890 }, { "epoch": 0.06272614997941629, "grad_norm": 2.231410026550293, "learning_rate": 1.9806464268413966e-05, "loss": 0.1855, "step": 2895 }, { "epoch": 0.06283448529889715, "grad_norm": 1.578213095664978, "learning_rate": 1.9805797348012534e-05, "loss": 0.2701, "step": 2900 }, { "epoch": 0.062942820618378, "grad_norm": 1.6162304878234863, "learning_rate": 1.980512929175637e-05, "loss": 0.2532, "step": 2905 }, { "epoch": 0.06305115593785886, "grad_norm": 2.3388516902923584, "learning_rate": 1.9804460099722856e-05, "loss": 0.2137, "step": 2910 }, { "epoch": 0.06315949125733972, "grad_norm": 1.563902735710144, "learning_rate": 1.9803789771989513e-05, "loss": 0.253, "step": 2915 }, { "epoch": 0.06326782657682058, "grad_norm": 2.1047160625457764, "learning_rate": 1.980311830863398e-05, "loss": 0.2098, "step": 2920 }, { "epoch": 0.06337616189630144, "grad_norm": 2.2954585552215576, "learning_rate": 1.9802445709734045e-05, "loss": 0.2512, "step": 2925 }, { "epoch": 0.0634844972157823, "grad_norm": 2.006682872772217, "learning_rate": 1.980177197536762e-05, "loss": 0.26, "step": 2930 }, { "epoch": 0.06359283253526314, "grad_norm": 1.3665778636932373, "learning_rate": 1.9801097105612738e-05, "loss": 0.2666, "step": 2935 }, { "epoch": 0.063701167854744, "grad_norm": 1.7003076076507568, "learning_rate": 1.9800421100547576e-05, "loss": 0.2575, "step": 2940 }, { "epoch": 0.06380950317422486, "grad_norm": 2.5453264713287354, "learning_rate": 1.9799743960250438e-05, "loss": 0.2881, "step": 2945 }, { "epoch": 0.06391783849370572, "grad_norm": 2.49572491645813, "learning_rate": 1.9799065684799767e-05, "loss": 0.16, "step": 2950 }, { "epoch": 0.06402617381318658, "grad_norm": 2.013604164123535, "learning_rate": 1.9798386274274125e-05, "loss": 0.2588, "step": 2955 }, { "epoch": 0.06413450913266744, "grad_norm": 2.1841654777526855, "learning_rate": 1.979770572875221e-05, "loss": 0.2746, "step": 2960 }, { "epoch": 0.06424284445214828, "grad_norm": 1.335001826286316, "learning_rate": 1.9797024048312858e-05, "loss": 0.2489, "step": 2965 }, { "epoch": 0.06435117977162914, "grad_norm": 1.2738784551620483, "learning_rate": 1.979634123303503e-05, "loss": 0.1625, "step": 2970 }, { "epoch": 0.06445951509111, "grad_norm": 2.1630687713623047, "learning_rate": 1.979565728299782e-05, "loss": 0.2664, "step": 2975 }, { "epoch": 0.06456785041059086, "grad_norm": 3.2392303943634033, "learning_rate": 1.979497219828045e-05, "loss": 0.2825, "step": 2980 }, { "epoch": 0.06467618573007172, "grad_norm": 2.2043404579162598, "learning_rate": 1.9794285978962283e-05, "loss": 0.1998, "step": 2985 }, { "epoch": 0.06478452104955258, "grad_norm": 1.572678804397583, "learning_rate": 1.97935986251228e-05, "loss": 0.3189, "step": 2990 }, { "epoch": 0.06489285636903343, "grad_norm": 1.9458906650543213, "learning_rate": 1.9792910136841627e-05, "loss": 0.2571, "step": 2995 }, { "epoch": 0.06500119168851429, "grad_norm": 1.8588043451309204, "learning_rate": 1.979222051419851e-05, "loss": 0.2365, "step": 3000 }, { "epoch": 0.06510952700799515, "grad_norm": 2.963841199874878, "learning_rate": 1.9791529757273338e-05, "loss": 0.2593, "step": 3005 }, { "epoch": 0.065217862327476, "grad_norm": 1.0791171789169312, "learning_rate": 1.979083786614612e-05, "loss": 0.2699, "step": 3010 }, { "epoch": 0.06532619764695687, "grad_norm": 2.693938970565796, "learning_rate": 1.9790144840897e-05, "loss": 0.1887, "step": 3015 }, { "epoch": 0.06543453296643771, "grad_norm": 1.9051679372787476, "learning_rate": 1.978945068160625e-05, "loss": 0.2153, "step": 3020 }, { "epoch": 0.06554286828591857, "grad_norm": 1.066339135169983, "learning_rate": 1.9788755388354296e-05, "loss": 0.2133, "step": 3025 }, { "epoch": 0.06565120360539943, "grad_norm": 2.306446075439453, "learning_rate": 1.978805896122166e-05, "loss": 0.2958, "step": 3030 }, { "epoch": 0.06575953892488029, "grad_norm": 2.0741586685180664, "learning_rate": 1.978736140028902e-05, "loss": 0.2099, "step": 3035 }, { "epoch": 0.06586787424436115, "grad_norm": 1.8937491178512573, "learning_rate": 1.9786662705637172e-05, "loss": 0.2874, "step": 3040 }, { "epoch": 0.06597620956384201, "grad_norm": 1.2779574394226074, "learning_rate": 1.978596287734706e-05, "loss": 0.1689, "step": 3045 }, { "epoch": 0.06608454488332285, "grad_norm": 2.1557865142822266, "learning_rate": 1.9785261915499744e-05, "loss": 0.3184, "step": 3050 }, { "epoch": 0.06619288020280371, "grad_norm": 1.8658050298690796, "learning_rate": 1.9784559820176414e-05, "loss": 0.2222, "step": 3055 }, { "epoch": 0.06630121552228457, "grad_norm": 2.09879994392395, "learning_rate": 1.9783856591458403e-05, "loss": 0.2446, "step": 3060 }, { "epoch": 0.06640955084176543, "grad_norm": 2.2757208347320557, "learning_rate": 1.9783152229427164e-05, "loss": 0.21, "step": 3065 }, { "epoch": 0.06651788616124629, "grad_norm": 1.9638715982437134, "learning_rate": 1.9782446734164294e-05, "loss": 0.2006, "step": 3070 }, { "epoch": 0.06662622148072715, "grad_norm": 1.7379367351531982, "learning_rate": 1.978174010575151e-05, "loss": 0.2707, "step": 3075 }, { "epoch": 0.066734556800208, "grad_norm": 2.275560140609741, "learning_rate": 1.9781032344270666e-05, "loss": 0.2504, "step": 3080 }, { "epoch": 0.06684289211968886, "grad_norm": 1.5352935791015625, "learning_rate": 1.9780323449803745e-05, "loss": 0.2194, "step": 3085 }, { "epoch": 0.06695122743916972, "grad_norm": 2.5290777683258057, "learning_rate": 1.977961342243286e-05, "loss": 0.2266, "step": 3090 }, { "epoch": 0.06705956275865058, "grad_norm": 2.1098263263702393, "learning_rate": 1.977890226224026e-05, "loss": 0.3511, "step": 3095 }, { "epoch": 0.06716789807813144, "grad_norm": 1.8988572359085083, "learning_rate": 1.9778189969308323e-05, "loss": 0.3499, "step": 3100 }, { "epoch": 0.0672762333976123, "grad_norm": 2.192338228225708, "learning_rate": 1.9777476543719552e-05, "loss": 0.3294, "step": 3105 }, { "epoch": 0.06738456871709314, "grad_norm": 1.5799708366394043, "learning_rate": 1.977676198555659e-05, "loss": 0.2553, "step": 3110 }, { "epoch": 0.067492904036574, "grad_norm": 2.663888931274414, "learning_rate": 1.977604629490221e-05, "loss": 0.2537, "step": 3115 }, { "epoch": 0.06760123935605486, "grad_norm": 1.4971915483474731, "learning_rate": 1.977532947183931e-05, "loss": 0.1956, "step": 3120 }, { "epoch": 0.06770957467553572, "grad_norm": 1.7423458099365234, "learning_rate": 1.9774611516450925e-05, "loss": 0.2628, "step": 3125 }, { "epoch": 0.06781790999501658, "grad_norm": 2.0324697494506836, "learning_rate": 1.9773892428820223e-05, "loss": 0.2192, "step": 3130 }, { "epoch": 0.06792624531449744, "grad_norm": 1.7469598054885864, "learning_rate": 1.977317220903049e-05, "loss": 0.2828, "step": 3135 }, { "epoch": 0.06803458063397828, "grad_norm": 1.372169852256775, "learning_rate": 1.9772450857165163e-05, "loss": 0.2425, "step": 3140 }, { "epoch": 0.06814291595345914, "grad_norm": 1.212729573249817, "learning_rate": 1.9771728373307795e-05, "loss": 0.2145, "step": 3145 }, { "epoch": 0.06825125127294, "grad_norm": 2.236246347427368, "learning_rate": 1.9771004757542075e-05, "loss": 0.2306, "step": 3150 }, { "epoch": 0.06835958659242086, "grad_norm": 2.2510881423950195, "learning_rate": 1.9770280009951822e-05, "loss": 0.2525, "step": 3155 }, { "epoch": 0.06846792191190172, "grad_norm": 2.4277663230895996, "learning_rate": 1.976955413062099e-05, "loss": 0.2023, "step": 3160 }, { "epoch": 0.06857625723138258, "grad_norm": 2.110821485519409, "learning_rate": 1.9768827119633663e-05, "loss": 0.3519, "step": 3165 }, { "epoch": 0.06868459255086343, "grad_norm": 1.5640466213226318, "learning_rate": 1.976809897707405e-05, "loss": 0.2238, "step": 3170 }, { "epoch": 0.06879292787034429, "grad_norm": 1.3093199729919434, "learning_rate": 1.9767369703026492e-05, "loss": 0.1803, "step": 3175 }, { "epoch": 0.06890126318982515, "grad_norm": 2.2643799781799316, "learning_rate": 1.9766639297575473e-05, "loss": 0.2308, "step": 3180 }, { "epoch": 0.069009598509306, "grad_norm": 2.5918045043945312, "learning_rate": 1.9765907760805595e-05, "loss": 0.1598, "step": 3185 }, { "epoch": 0.06911793382878686, "grad_norm": 1.6010665893554688, "learning_rate": 1.9765175092801594e-05, "loss": 0.1846, "step": 3190 }, { "epoch": 0.06922626914826772, "grad_norm": 2.060706377029419, "learning_rate": 1.9764441293648344e-05, "loss": 0.2309, "step": 3195 }, { "epoch": 0.06933460446774857, "grad_norm": 2.1347570419311523, "learning_rate": 1.9763706363430838e-05, "loss": 0.2847, "step": 3200 }, { "epoch": 0.06944293978722943, "grad_norm": 1.7144050598144531, "learning_rate": 1.9762970302234215e-05, "loss": 0.2538, "step": 3205 }, { "epoch": 0.06955127510671029, "grad_norm": 1.803955316543579, "learning_rate": 1.9762233110143728e-05, "loss": 0.2409, "step": 3210 }, { "epoch": 0.06965961042619115, "grad_norm": 2.246629476547241, "learning_rate": 1.9761494787244775e-05, "loss": 0.2384, "step": 3215 }, { "epoch": 0.06976794574567201, "grad_norm": 1.9237195253372192, "learning_rate": 1.9760755333622875e-05, "loss": 0.221, "step": 3220 }, { "epoch": 0.06987628106515287, "grad_norm": 2.2896955013275146, "learning_rate": 1.9760014749363688e-05, "loss": 0.2665, "step": 3225 }, { "epoch": 0.06998461638463371, "grad_norm": 1.99830162525177, "learning_rate": 1.9759273034552997e-05, "loss": 0.2397, "step": 3230 }, { "epoch": 0.07009295170411457, "grad_norm": 2.347290515899658, "learning_rate": 1.975853018927672e-05, "loss": 0.2082, "step": 3235 }, { "epoch": 0.07020128702359543, "grad_norm": 2.1156601905822754, "learning_rate": 1.9757786213620904e-05, "loss": 0.237, "step": 3240 }, { "epoch": 0.07030962234307629, "grad_norm": 2.501328229904175, "learning_rate": 1.9757041107671724e-05, "loss": 0.2203, "step": 3245 }, { "epoch": 0.07041795766255715, "grad_norm": 2.5540120601654053, "learning_rate": 1.9756294871515496e-05, "loss": 0.296, "step": 3250 }, { "epoch": 0.07052629298203801, "grad_norm": 1.9040573835372925, "learning_rate": 1.9755547505238652e-05, "loss": 0.2889, "step": 3255 }, { "epoch": 0.07063462830151886, "grad_norm": 1.3102972507476807, "learning_rate": 1.975479900892777e-05, "loss": 0.2329, "step": 3260 }, { "epoch": 0.07074296362099972, "grad_norm": 1.7544326782226562, "learning_rate": 1.9754049382669548e-05, "loss": 0.2688, "step": 3265 }, { "epoch": 0.07085129894048058, "grad_norm": 2.7944231033325195, "learning_rate": 1.9753298626550824e-05, "loss": 0.2236, "step": 3270 }, { "epoch": 0.07095963425996143, "grad_norm": 1.4624367952346802, "learning_rate": 1.9752546740658555e-05, "loss": 0.2552, "step": 3275 }, { "epoch": 0.0710679695794423, "grad_norm": 3.424142360687256, "learning_rate": 1.975179372507984e-05, "loss": 0.212, "step": 3280 }, { "epoch": 0.07117630489892314, "grad_norm": 1.4042150974273682, "learning_rate": 1.9751039579901908e-05, "loss": 0.1899, "step": 3285 }, { "epoch": 0.071284640218404, "grad_norm": 1.8535763025283813, "learning_rate": 1.9750284305212103e-05, "loss": 0.3245, "step": 3290 }, { "epoch": 0.07139297553788486, "grad_norm": 2.341346263885498, "learning_rate": 1.974952790109793e-05, "loss": 0.2327, "step": 3295 }, { "epoch": 0.07150131085736572, "grad_norm": 2.3846192359924316, "learning_rate": 1.974877036764699e-05, "loss": 0.2673, "step": 3300 }, { "epoch": 0.07160964617684658, "grad_norm": 1.7157741785049438, "learning_rate": 1.9748011704947044e-05, "loss": 0.1729, "step": 3305 }, { "epoch": 0.07171798149632744, "grad_norm": 1.76881742477417, "learning_rate": 1.9747251913085965e-05, "loss": 0.2797, "step": 3310 }, { "epoch": 0.07182631681580828, "grad_norm": 1.7319427728652954, "learning_rate": 1.9746490992151766e-05, "loss": 0.2585, "step": 3315 }, { "epoch": 0.07193465213528914, "grad_norm": 2.082321882247925, "learning_rate": 1.974572894223259e-05, "loss": 0.2519, "step": 3320 }, { "epoch": 0.07204298745477, "grad_norm": 1.6412683725357056, "learning_rate": 1.9744965763416703e-05, "loss": 0.2507, "step": 3325 }, { "epoch": 0.07215132277425086, "grad_norm": 2.2817442417144775, "learning_rate": 1.9744201455792514e-05, "loss": 0.2717, "step": 3330 }, { "epoch": 0.07225965809373172, "grad_norm": 1.5904638767242432, "learning_rate": 1.9743436019448554e-05, "loss": 0.3017, "step": 3335 }, { "epoch": 0.07236799341321258, "grad_norm": 1.4526845216751099, "learning_rate": 1.974266945447349e-05, "loss": 0.215, "step": 3340 }, { "epoch": 0.07247632873269343, "grad_norm": 1.5245006084442139, "learning_rate": 1.974190176095611e-05, "loss": 0.3106, "step": 3345 }, { "epoch": 0.07258466405217429, "grad_norm": 2.0811872482299805, "learning_rate": 1.974113293898535e-05, "loss": 0.3444, "step": 3350 }, { "epoch": 0.07269299937165515, "grad_norm": 1.820644497871399, "learning_rate": 1.974036298865026e-05, "loss": 0.2574, "step": 3355 }, { "epoch": 0.072801334691136, "grad_norm": 2.122102975845337, "learning_rate": 1.9739591910040027e-05, "loss": 0.246, "step": 3360 }, { "epoch": 0.07290967001061686, "grad_norm": 1.8308725357055664, "learning_rate": 1.973881970324397e-05, "loss": 0.2654, "step": 3365 }, { "epoch": 0.07301800533009772, "grad_norm": 2.78593373298645, "learning_rate": 1.973804636835154e-05, "loss": 0.3452, "step": 3370 }, { "epoch": 0.07312634064957857, "grad_norm": 1.4992820024490356, "learning_rate": 1.973727190545231e-05, "loss": 0.2173, "step": 3375 }, { "epoch": 0.07323467596905943, "grad_norm": 2.580909252166748, "learning_rate": 1.9736496314635998e-05, "loss": 0.1809, "step": 3380 }, { "epoch": 0.07334301128854029, "grad_norm": 1.6747525930404663, "learning_rate": 1.973571959599244e-05, "loss": 0.2173, "step": 3385 }, { "epoch": 0.07345134660802115, "grad_norm": 1.5327948331832886, "learning_rate": 1.973494174961161e-05, "loss": 0.247, "step": 3390 }, { "epoch": 0.07355968192750201, "grad_norm": 1.6421509981155396, "learning_rate": 1.97341627755836e-05, "loss": 0.3038, "step": 3395 }, { "epoch": 0.07366801724698287, "grad_norm": 1.6315008401870728, "learning_rate": 1.973338267399866e-05, "loss": 0.2681, "step": 3400 }, { "epoch": 0.07377635256646371, "grad_norm": 1.888870358467102, "learning_rate": 1.9732601444947136e-05, "loss": 0.2312, "step": 3405 }, { "epoch": 0.07388468788594457, "grad_norm": 1.9270414113998413, "learning_rate": 1.9731819088519532e-05, "loss": 0.311, "step": 3410 }, { "epoch": 0.07399302320542543, "grad_norm": 2.709501028060913, "learning_rate": 1.973103560480647e-05, "loss": 0.3204, "step": 3415 }, { "epoch": 0.07410135852490629, "grad_norm": 1.5353747606277466, "learning_rate": 1.9730250993898702e-05, "loss": 0.2477, "step": 3420 }, { "epoch": 0.07420969384438715, "grad_norm": 2.382619619369507, "learning_rate": 1.9729465255887118e-05, "loss": 0.2085, "step": 3425 }, { "epoch": 0.07431802916386801, "grad_norm": 1.971044659614563, "learning_rate": 1.972867839086273e-05, "loss": 0.2155, "step": 3430 }, { "epoch": 0.07442636448334886, "grad_norm": 2.9511077404022217, "learning_rate": 1.9727890398916686e-05, "loss": 0.2353, "step": 3435 }, { "epoch": 0.07453469980282972, "grad_norm": 1.89426851272583, "learning_rate": 1.9727101280140263e-05, "loss": 0.2314, "step": 3440 }, { "epoch": 0.07464303512231057, "grad_norm": 1.4741225242614746, "learning_rate": 1.972631103462487e-05, "loss": 0.2809, "step": 3445 }, { "epoch": 0.07475137044179143, "grad_norm": 2.2105629444122314, "learning_rate": 1.972551966246204e-05, "loss": 0.2332, "step": 3450 }, { "epoch": 0.0748597057612723, "grad_norm": 1.7792187929153442, "learning_rate": 1.9724727163743447e-05, "loss": 0.2668, "step": 3455 }, { "epoch": 0.07496804108075315, "grad_norm": 1.809885859489441, "learning_rate": 1.972393353856089e-05, "loss": 0.2332, "step": 3460 }, { "epoch": 0.075076376400234, "grad_norm": 1.5225903987884521, "learning_rate": 1.97231387870063e-05, "loss": 0.2294, "step": 3465 }, { "epoch": 0.07518471171971486, "grad_norm": 2.117460250854492, "learning_rate": 1.972234290917173e-05, "loss": 0.2519, "step": 3470 }, { "epoch": 0.07529304703919572, "grad_norm": 2.19724440574646, "learning_rate": 1.9721545905149373e-05, "loss": 0.1767, "step": 3475 }, { "epoch": 0.07540138235867658, "grad_norm": 1.6327821016311646, "learning_rate": 1.9720747775031553e-05, "loss": 0.1895, "step": 3480 }, { "epoch": 0.07550971767815744, "grad_norm": 1.753485918045044, "learning_rate": 1.9719948518910722e-05, "loss": 0.2154, "step": 3485 }, { "epoch": 0.0756180529976383, "grad_norm": 1.8035093545913696, "learning_rate": 1.9719148136879457e-05, "loss": 0.1488, "step": 3490 }, { "epoch": 0.07572638831711914, "grad_norm": 2.1067516803741455, "learning_rate": 1.9718346629030475e-05, "loss": 0.318, "step": 3495 }, { "epoch": 0.0758347236366, "grad_norm": 2.1622562408447266, "learning_rate": 1.971754399545662e-05, "loss": 0.2811, "step": 3500 }, { "epoch": 0.07594305895608086, "grad_norm": 1.9296119213104248, "learning_rate": 1.971674023625086e-05, "loss": 0.266, "step": 3505 }, { "epoch": 0.07605139427556172, "grad_norm": 2.156327486038208, "learning_rate": 1.9715935351506297e-05, "loss": 0.2537, "step": 3510 }, { "epoch": 0.07615972959504258, "grad_norm": 1.239894986152649, "learning_rate": 1.9715129341316168e-05, "loss": 0.1935, "step": 3515 }, { "epoch": 0.07626806491452344, "grad_norm": 1.7825442552566528, "learning_rate": 1.9714322205773843e-05, "loss": 0.2309, "step": 3520 }, { "epoch": 0.07637640023400429, "grad_norm": 1.8066359758377075, "learning_rate": 1.9713513944972808e-05, "loss": 0.232, "step": 3525 }, { "epoch": 0.07648473555348514, "grad_norm": 1.6354200839996338, "learning_rate": 1.971270455900669e-05, "loss": 0.2657, "step": 3530 }, { "epoch": 0.076593070872966, "grad_norm": 1.7982183694839478, "learning_rate": 1.9711894047969245e-05, "loss": 0.2215, "step": 3535 }, { "epoch": 0.07670140619244686, "grad_norm": 1.6945679187774658, "learning_rate": 1.9711082411954358e-05, "loss": 0.2195, "step": 3540 }, { "epoch": 0.07680974151192772, "grad_norm": 1.9245054721832275, "learning_rate": 1.9710269651056047e-05, "loss": 0.2043, "step": 3545 }, { "epoch": 0.07691807683140858, "grad_norm": 2.8967783451080322, "learning_rate": 1.970945576536846e-05, "loss": 0.1941, "step": 3550 }, { "epoch": 0.07702641215088943, "grad_norm": 1.5349349975585938, "learning_rate": 1.9708640754985862e-05, "loss": 0.1613, "step": 3555 }, { "epoch": 0.07713474747037029, "grad_norm": 2.036846399307251, "learning_rate": 1.9707824620002676e-05, "loss": 0.2303, "step": 3560 }, { "epoch": 0.07724308278985115, "grad_norm": 1.6066871881484985, "learning_rate": 1.970700736051343e-05, "loss": 0.2345, "step": 3565 }, { "epoch": 0.077351418109332, "grad_norm": 1.8198641538619995, "learning_rate": 1.9706188976612788e-05, "loss": 0.2811, "step": 3570 }, { "epoch": 0.07745975342881287, "grad_norm": 1.8107492923736572, "learning_rate": 1.9705369468395553e-05, "loss": 0.1815, "step": 3575 }, { "epoch": 0.07756808874829371, "grad_norm": 2.2368483543395996, "learning_rate": 1.9704548835956655e-05, "loss": 0.2256, "step": 3580 }, { "epoch": 0.07767642406777457, "grad_norm": 2.3086516857147217, "learning_rate": 1.9703727079391144e-05, "loss": 0.2845, "step": 3585 }, { "epoch": 0.07778475938725543, "grad_norm": 1.2305593490600586, "learning_rate": 1.9702904198794216e-05, "loss": 0.193, "step": 3590 }, { "epoch": 0.07789309470673629, "grad_norm": 1.2365977764129639, "learning_rate": 1.9702080194261187e-05, "loss": 0.2477, "step": 3595 }, { "epoch": 0.07800143002621715, "grad_norm": 1.8557957410812378, "learning_rate": 1.9701255065887502e-05, "loss": 0.2016, "step": 3600 }, { "epoch": 0.07810976534569801, "grad_norm": 2.3120768070220947, "learning_rate": 1.9700428813768742e-05, "loss": 0.2892, "step": 3605 }, { "epoch": 0.07821810066517886, "grad_norm": 2.360295295715332, "learning_rate": 1.9699601438000618e-05, "loss": 0.2751, "step": 3610 }, { "epoch": 0.07832643598465971, "grad_norm": 1.9063299894332886, "learning_rate": 1.9698772938678966e-05, "loss": 0.2398, "step": 3615 }, { "epoch": 0.07843477130414057, "grad_norm": 1.8181096315383911, "learning_rate": 1.9697943315899756e-05, "loss": 0.2578, "step": 3620 }, { "epoch": 0.07854310662362143, "grad_norm": 1.6403090953826904, "learning_rate": 1.9697112569759088e-05, "loss": 0.229, "step": 3625 }, { "epoch": 0.0786514419431023, "grad_norm": 1.8291351795196533, "learning_rate": 1.9696280700353195e-05, "loss": 0.1684, "step": 3630 }, { "epoch": 0.07875977726258315, "grad_norm": 1.5774483680725098, "learning_rate": 1.969544770777843e-05, "loss": 0.2113, "step": 3635 }, { "epoch": 0.078868112582064, "grad_norm": 2.3660929203033447, "learning_rate": 1.9694613592131285e-05, "loss": 0.1859, "step": 3640 }, { "epoch": 0.07897644790154486, "grad_norm": 1.4094266891479492, "learning_rate": 1.9693778353508385e-05, "loss": 0.1998, "step": 3645 }, { "epoch": 0.07908478322102572, "grad_norm": 1.7274092435836792, "learning_rate": 1.9692941992006473e-05, "loss": 0.2375, "step": 3650 }, { "epoch": 0.07919311854050658, "grad_norm": 2.796656847000122, "learning_rate": 1.9692104507722433e-05, "loss": 0.1746, "step": 3655 }, { "epoch": 0.07930145385998744, "grad_norm": 1.4877265691757202, "learning_rate": 1.969126590075327e-05, "loss": 0.224, "step": 3660 }, { "epoch": 0.0794097891794683, "grad_norm": 1.835390567779541, "learning_rate": 1.9690426171196128e-05, "loss": 0.2079, "step": 3665 }, { "epoch": 0.07951812449894914, "grad_norm": 1.7686192989349365, "learning_rate": 1.968958531914828e-05, "loss": 0.2853, "step": 3670 }, { "epoch": 0.07962645981843, "grad_norm": 2.101621150970459, "learning_rate": 1.9688743344707123e-05, "loss": 0.3467, "step": 3675 }, { "epoch": 0.07973479513791086, "grad_norm": 2.50555157661438, "learning_rate": 1.968790024797018e-05, "loss": 0.1309, "step": 3680 }, { "epoch": 0.07984313045739172, "grad_norm": 2.0748908519744873, "learning_rate": 1.9687056029035127e-05, "loss": 0.2145, "step": 3685 }, { "epoch": 0.07995146577687258, "grad_norm": 2.6731245517730713, "learning_rate": 1.9686210687999742e-05, "loss": 0.3011, "step": 3690 }, { "epoch": 0.08005980109635344, "grad_norm": 1.6343482732772827, "learning_rate": 1.9685364224961943e-05, "loss": 0.1756, "step": 3695 }, { "epoch": 0.08016813641583428, "grad_norm": 2.504192352294922, "learning_rate": 1.9684516640019794e-05, "loss": 0.2017, "step": 3700 }, { "epoch": 0.08027647173531514, "grad_norm": 1.6345866918563843, "learning_rate": 1.968366793327146e-05, "loss": 0.1815, "step": 3705 }, { "epoch": 0.080384807054796, "grad_norm": 2.4433658123016357, "learning_rate": 1.968281810481526e-05, "loss": 0.1966, "step": 3710 }, { "epoch": 0.08049314237427686, "grad_norm": 1.41312575340271, "learning_rate": 1.9681967154749628e-05, "loss": 0.2504, "step": 3715 }, { "epoch": 0.08060147769375772, "grad_norm": 2.8340089321136475, "learning_rate": 1.968111508317314e-05, "loss": 0.2485, "step": 3720 }, { "epoch": 0.08070981301323858, "grad_norm": 1.7927610874176025, "learning_rate": 1.968026189018449e-05, "loss": 0.2125, "step": 3725 }, { "epoch": 0.08081814833271943, "grad_norm": 2.8285012245178223, "learning_rate": 1.9679407575882514e-05, "loss": 0.2183, "step": 3730 }, { "epoch": 0.08092648365220029, "grad_norm": 1.466698169708252, "learning_rate": 1.9678552140366167e-05, "loss": 0.2022, "step": 3735 }, { "epoch": 0.08103481897168115, "grad_norm": 2.1553120613098145, "learning_rate": 1.9677695583734537e-05, "loss": 0.2664, "step": 3740 }, { "epoch": 0.081143154291162, "grad_norm": 1.4989807605743408, "learning_rate": 1.9676837906086847e-05, "loss": 0.3242, "step": 3745 }, { "epoch": 0.08125148961064287, "grad_norm": 1.5748380422592163, "learning_rate": 1.9675979107522446e-05, "loss": 0.2337, "step": 3750 }, { "epoch": 0.08135982493012373, "grad_norm": 1.8182388544082642, "learning_rate": 1.967511918814081e-05, "loss": 0.3322, "step": 3755 }, { "epoch": 0.08146816024960457, "grad_norm": 2.2501368522644043, "learning_rate": 1.967425814804155e-05, "loss": 0.2554, "step": 3760 }, { "epoch": 0.08157649556908543, "grad_norm": 1.4522212743759155, "learning_rate": 1.9673395987324402e-05, "loss": 0.1676, "step": 3765 }, { "epoch": 0.08168483088856629, "grad_norm": 2.646862506866455, "learning_rate": 1.967253270608924e-05, "loss": 0.275, "step": 3770 }, { "epoch": 0.08179316620804715, "grad_norm": 1.7067419290542603, "learning_rate": 1.9671668304436055e-05, "loss": 0.2071, "step": 3775 }, { "epoch": 0.08190150152752801, "grad_norm": 2.1775808334350586, "learning_rate": 1.9670802782464978e-05, "loss": 0.2331, "step": 3780 }, { "epoch": 0.08200983684700887, "grad_norm": 1.991139531135559, "learning_rate": 1.966993614027627e-05, "loss": 0.2694, "step": 3785 }, { "epoch": 0.08211817216648971, "grad_norm": 2.2614188194274902, "learning_rate": 1.9669068377970313e-05, "loss": 0.2801, "step": 3790 }, { "epoch": 0.08222650748597057, "grad_norm": 1.5931991338729858, "learning_rate": 1.9668199495647627e-05, "loss": 0.1788, "step": 3795 }, { "epoch": 0.08233484280545143, "grad_norm": 1.4487007856369019, "learning_rate": 1.9667329493408865e-05, "loss": 0.2434, "step": 3800 }, { "epoch": 0.08244317812493229, "grad_norm": 2.622750759124756, "learning_rate": 1.966645837135479e-05, "loss": 0.2188, "step": 3805 }, { "epoch": 0.08255151344441315, "grad_norm": 1.7593517303466797, "learning_rate": 1.966558612958632e-05, "loss": 0.2442, "step": 3810 }, { "epoch": 0.08265984876389401, "grad_norm": 1.9523718357086182, "learning_rate": 1.9664712768204488e-05, "loss": 0.2376, "step": 3815 }, { "epoch": 0.08276818408337486, "grad_norm": 2.392566680908203, "learning_rate": 1.9663838287310456e-05, "loss": 0.2189, "step": 3820 }, { "epoch": 0.08287651940285572, "grad_norm": 1.9803069829940796, "learning_rate": 1.966296268700553e-05, "loss": 0.2035, "step": 3825 }, { "epoch": 0.08298485472233658, "grad_norm": 2.0452990531921387, "learning_rate": 1.966208596739112e-05, "loss": 0.2135, "step": 3830 }, { "epoch": 0.08309319004181744, "grad_norm": 3.876455307006836, "learning_rate": 1.9661208128568793e-05, "loss": 0.2129, "step": 3835 }, { "epoch": 0.0832015253612983, "grad_norm": 1.9231977462768555, "learning_rate": 1.9660329170640227e-05, "loss": 0.2952, "step": 3840 }, { "epoch": 0.08330986068077914, "grad_norm": 1.4919040203094482, "learning_rate": 1.965944909370724e-05, "loss": 0.2471, "step": 3845 }, { "epoch": 0.08341819600026, "grad_norm": 2.3997082710266113, "learning_rate": 1.965856789787177e-05, "loss": 0.2401, "step": 3850 }, { "epoch": 0.08352653131974086, "grad_norm": 1.4165568351745605, "learning_rate": 1.96576855832359e-05, "loss": 0.1809, "step": 3855 }, { "epoch": 0.08363486663922172, "grad_norm": 1.5290099382400513, "learning_rate": 1.9656802149901826e-05, "loss": 0.2343, "step": 3860 }, { "epoch": 0.08374320195870258, "grad_norm": 1.6090030670166016, "learning_rate": 1.965591759797188e-05, "loss": 0.1844, "step": 3865 }, { "epoch": 0.08385153727818344, "grad_norm": 2.1734120845794678, "learning_rate": 1.9655031927548527e-05, "loss": 0.1516, "step": 3870 }, { "epoch": 0.08395987259766428, "grad_norm": 2.201277256011963, "learning_rate": 1.9654145138734356e-05, "loss": 0.2186, "step": 3875 }, { "epoch": 0.08406820791714514, "grad_norm": 1.87333083152771, "learning_rate": 1.965325723163209e-05, "loss": 0.2439, "step": 3880 }, { "epoch": 0.084176543236626, "grad_norm": 1.2692451477050781, "learning_rate": 1.9652368206344582e-05, "loss": 0.2303, "step": 3885 }, { "epoch": 0.08428487855610686, "grad_norm": 1.5422571897506714, "learning_rate": 1.9651478062974805e-05, "loss": 0.3044, "step": 3890 }, { "epoch": 0.08439321387558772, "grad_norm": 2.5769972801208496, "learning_rate": 1.965058680162588e-05, "loss": 0.2578, "step": 3895 }, { "epoch": 0.08450154919506858, "grad_norm": 1.8943219184875488, "learning_rate": 1.9649694422401036e-05, "loss": 0.2492, "step": 3900 }, { "epoch": 0.08460988451454943, "grad_norm": 1.7564679384231567, "learning_rate": 1.9648800925403645e-05, "loss": 0.2934, "step": 3905 }, { "epoch": 0.08471821983403029, "grad_norm": 1.8257285356521606, "learning_rate": 1.9647906310737206e-05, "loss": 0.1969, "step": 3910 }, { "epoch": 0.08482655515351115, "grad_norm": 2.1018271446228027, "learning_rate": 1.964701057850535e-05, "loss": 0.1893, "step": 3915 }, { "epoch": 0.084934890472992, "grad_norm": 1.8458853960037231, "learning_rate": 1.9646113728811827e-05, "loss": 0.2593, "step": 3920 }, { "epoch": 0.08504322579247287, "grad_norm": 1.637467622756958, "learning_rate": 1.9645215761760528e-05, "loss": 0.2524, "step": 3925 }, { "epoch": 0.08515156111195372, "grad_norm": 1.4934026002883911, "learning_rate": 1.964431667745547e-05, "loss": 0.2008, "step": 3930 }, { "epoch": 0.08525989643143457, "grad_norm": 1.4781917333602905, "learning_rate": 1.9643416476000796e-05, "loss": 0.2025, "step": 3935 }, { "epoch": 0.08536823175091543, "grad_norm": 1.8445274829864502, "learning_rate": 1.964251515750078e-05, "loss": 0.2474, "step": 3940 }, { "epoch": 0.08547656707039629, "grad_norm": 1.5816277265548706, "learning_rate": 1.9641612722059827e-05, "loss": 0.2358, "step": 3945 }, { "epoch": 0.08558490238987715, "grad_norm": 2.1981892585754395, "learning_rate": 1.9640709169782473e-05, "loss": 0.226, "step": 3950 }, { "epoch": 0.08569323770935801, "grad_norm": 1.7624729871749878, "learning_rate": 1.9639804500773382e-05, "loss": 0.263, "step": 3955 }, { "epoch": 0.08580157302883887, "grad_norm": 2.2359025478363037, "learning_rate": 1.963889871513734e-05, "loss": 0.2157, "step": 3960 }, { "epoch": 0.08590990834831971, "grad_norm": 1.8036093711853027, "learning_rate": 1.9637991812979278e-05, "loss": 0.2448, "step": 3965 }, { "epoch": 0.08601824366780057, "grad_norm": 2.2935030460357666, "learning_rate": 1.9637083794404238e-05, "loss": 0.1987, "step": 3970 }, { "epoch": 0.08612657898728143, "grad_norm": 1.679460883140564, "learning_rate": 1.9636174659517402e-05, "loss": 0.2483, "step": 3975 }, { "epoch": 0.08623491430676229, "grad_norm": 6.089076519012451, "learning_rate": 1.9635264408424084e-05, "loss": 0.214, "step": 3980 }, { "epoch": 0.08634324962624315, "grad_norm": 2.3105814456939697, "learning_rate": 1.963435304122972e-05, "loss": 0.2784, "step": 3985 }, { "epoch": 0.08645158494572401, "grad_norm": 2.6269655227661133, "learning_rate": 1.963344055803988e-05, "loss": 0.2626, "step": 3990 }, { "epoch": 0.08655992026520486, "grad_norm": 1.7227693796157837, "learning_rate": 1.9632526958960257e-05, "loss": 0.2798, "step": 3995 }, { "epoch": 0.08666825558468572, "grad_norm": 2.5646049976348877, "learning_rate": 1.9631612244096684e-05, "loss": 0.2507, "step": 4000 }, { "epoch": 0.08677659090416658, "grad_norm": 2.047135591506958, "learning_rate": 1.9630696413555113e-05, "loss": 0.2196, "step": 4005 }, { "epoch": 0.08688492622364744, "grad_norm": 2.1482388973236084, "learning_rate": 1.9629779467441633e-05, "loss": 0.2961, "step": 4010 }, { "epoch": 0.0869932615431283, "grad_norm": 2.1228525638580322, "learning_rate": 1.962886140586245e-05, "loss": 0.2371, "step": 4015 }, { "epoch": 0.08710159686260915, "grad_norm": 2.444570302963257, "learning_rate": 1.962794222892392e-05, "loss": 0.2699, "step": 4020 }, { "epoch": 0.08720993218209, "grad_norm": 2.211974859237671, "learning_rate": 1.96270219367325e-05, "loss": 0.238, "step": 4025 }, { "epoch": 0.08731826750157086, "grad_norm": 2.244317054748535, "learning_rate": 1.962610052939481e-05, "loss": 0.2123, "step": 4030 }, { "epoch": 0.08742660282105172, "grad_norm": 2.402841091156006, "learning_rate": 1.9625178007017573e-05, "loss": 0.2273, "step": 4035 }, { "epoch": 0.08753493814053258, "grad_norm": 1.0087205171585083, "learning_rate": 1.9624254369707644e-05, "loss": 0.1979, "step": 4040 }, { "epoch": 0.08764327346001344, "grad_norm": 1.8760411739349365, "learning_rate": 1.962332961757202e-05, "loss": 0.2785, "step": 4045 }, { "epoch": 0.0877516087794943, "grad_norm": 2.1966068744659424, "learning_rate": 1.962240375071782e-05, "loss": 0.2644, "step": 4050 }, { "epoch": 0.08785994409897514, "grad_norm": 1.8222097158432007, "learning_rate": 1.9621476769252283e-05, "loss": 0.3113, "step": 4055 }, { "epoch": 0.087968279418456, "grad_norm": 2.114047050476074, "learning_rate": 1.9620548673282794e-05, "loss": 0.2108, "step": 4060 }, { "epoch": 0.08807661473793686, "grad_norm": 2.191697597503662, "learning_rate": 1.9619619462916858e-05, "loss": 0.2904, "step": 4065 }, { "epoch": 0.08818495005741772, "grad_norm": 2.003347635269165, "learning_rate": 1.9618689138262112e-05, "loss": 0.1935, "step": 4070 }, { "epoch": 0.08829328537689858, "grad_norm": 1.427628993988037, "learning_rate": 1.9617757699426315e-05, "loss": 0.2747, "step": 4075 }, { "epoch": 0.08840162069637944, "grad_norm": 2.0305981636047363, "learning_rate": 1.961682514651736e-05, "loss": 0.2674, "step": 4080 }, { "epoch": 0.08850995601586029, "grad_norm": 2.023725748062134, "learning_rate": 1.9615891479643274e-05, "loss": 0.2479, "step": 4085 }, { "epoch": 0.08861829133534115, "grad_norm": 1.5950899124145508, "learning_rate": 1.9614956698912205e-05, "loss": 0.2276, "step": 4090 }, { "epoch": 0.088726626654822, "grad_norm": 2.131404161453247, "learning_rate": 1.9614020804432435e-05, "loss": 0.306, "step": 4095 }, { "epoch": 0.08883496197430286, "grad_norm": 2.0052337646484375, "learning_rate": 1.9613083796312374e-05, "loss": 0.2476, "step": 4100 }, { "epoch": 0.08894329729378372, "grad_norm": 1.9502480030059814, "learning_rate": 1.961214567466056e-05, "loss": 0.1956, "step": 4105 }, { "epoch": 0.08905163261326457, "grad_norm": 1.7680630683898926, "learning_rate": 1.9611206439585657e-05, "loss": 0.1951, "step": 4110 }, { "epoch": 0.08915996793274543, "grad_norm": 2.0083017349243164, "learning_rate": 1.9610266091196464e-05, "loss": 0.2067, "step": 4115 }, { "epoch": 0.08926830325222629, "grad_norm": 1.2491459846496582, "learning_rate": 1.9609324629601908e-05, "loss": 0.2731, "step": 4120 }, { "epoch": 0.08937663857170715, "grad_norm": 1.4271960258483887, "learning_rate": 1.9608382054911043e-05, "loss": 0.2927, "step": 4125 }, { "epoch": 0.08948497389118801, "grad_norm": 1.9926037788391113, "learning_rate": 1.9607438367233044e-05, "loss": 0.2419, "step": 4130 }, { "epoch": 0.08959330921066887, "grad_norm": 1.7415533065795898, "learning_rate": 1.9606493566677236e-05, "loss": 0.1945, "step": 4135 }, { "epoch": 0.08970164453014971, "grad_norm": 2.213686466217041, "learning_rate": 1.960554765335305e-05, "loss": 0.3404, "step": 4140 }, { "epoch": 0.08980997984963057, "grad_norm": 2.400777578353882, "learning_rate": 1.960460062737006e-05, "loss": 0.345, "step": 4145 }, { "epoch": 0.08991831516911143, "grad_norm": 1.167728304862976, "learning_rate": 1.9603652488837963e-05, "loss": 0.2326, "step": 4150 }, { "epoch": 0.09002665048859229, "grad_norm": 1.786880612373352, "learning_rate": 1.960270323786659e-05, "loss": 0.2764, "step": 4155 }, { "epoch": 0.09013498580807315, "grad_norm": 1.3520445823669434, "learning_rate": 1.9601752874565897e-05, "loss": 0.1797, "step": 4160 }, { "epoch": 0.09024332112755401, "grad_norm": 1.4225924015045166, "learning_rate": 1.9600801399045966e-05, "loss": 0.2575, "step": 4165 }, { "epoch": 0.09035165644703486, "grad_norm": 1.1863045692443848, "learning_rate": 1.959984881141701e-05, "loss": 0.1699, "step": 4170 }, { "epoch": 0.09045999176651572, "grad_norm": 1.537259817123413, "learning_rate": 1.9598895111789378e-05, "loss": 0.3091, "step": 4175 }, { "epoch": 0.09056832708599657, "grad_norm": 1.8017233610153198, "learning_rate": 1.9597940300273536e-05, "loss": 0.2756, "step": 4180 }, { "epoch": 0.09067666240547743, "grad_norm": 2.7120516300201416, "learning_rate": 1.959698437698009e-05, "loss": 0.2021, "step": 4185 }, { "epoch": 0.0907849977249583, "grad_norm": 2.7187750339508057, "learning_rate": 1.959602734201977e-05, "loss": 0.2453, "step": 4190 }, { "epoch": 0.09089333304443915, "grad_norm": 1.591194987297058, "learning_rate": 1.9595069195503424e-05, "loss": 0.177, "step": 4195 }, { "epoch": 0.09100166836392, "grad_norm": 1.6496496200561523, "learning_rate": 1.959410993754205e-05, "loss": 0.1979, "step": 4200 }, { "epoch": 0.09111000368340086, "grad_norm": 2.165963888168335, "learning_rate": 1.959314956824676e-05, "loss": 0.1649, "step": 4205 }, { "epoch": 0.09121833900288172, "grad_norm": 2.0191280841827393, "learning_rate": 1.9592188087728794e-05, "loss": 0.2919, "step": 4210 }, { "epoch": 0.09132667432236258, "grad_norm": 2.221555709838867, "learning_rate": 1.959122549609953e-05, "loss": 0.2854, "step": 4215 }, { "epoch": 0.09143500964184344, "grad_norm": 1.9604482650756836, "learning_rate": 1.9590261793470474e-05, "loss": 0.2695, "step": 4220 }, { "epoch": 0.0915433449613243, "grad_norm": 1.6458537578582764, "learning_rate": 1.9589296979953248e-05, "loss": 0.2834, "step": 4225 }, { "epoch": 0.09165168028080514, "grad_norm": 1.879210114479065, "learning_rate": 1.9588331055659614e-05, "loss": 0.1936, "step": 4230 }, { "epoch": 0.091760015600286, "grad_norm": 1.8881080150604248, "learning_rate": 1.9587364020701458e-05, "loss": 0.1943, "step": 4235 }, { "epoch": 0.09186835091976686, "grad_norm": 1.8217177391052246, "learning_rate": 1.95863958751908e-05, "loss": 0.2823, "step": 4240 }, { "epoch": 0.09197668623924772, "grad_norm": 1.6431145668029785, "learning_rate": 1.958542661923979e-05, "loss": 0.2637, "step": 4245 }, { "epoch": 0.09208502155872858, "grad_norm": 1.7003871202468872, "learning_rate": 1.958445625296069e-05, "loss": 0.2304, "step": 4250 }, { "epoch": 0.09219335687820944, "grad_norm": 1.607290506362915, "learning_rate": 1.958348477646591e-05, "loss": 0.2034, "step": 4255 }, { "epoch": 0.09230169219769029, "grad_norm": 2.0044918060302734, "learning_rate": 1.958251218986798e-05, "loss": 0.1976, "step": 4260 }, { "epoch": 0.09241002751717114, "grad_norm": 2.034909248352051, "learning_rate": 1.9581538493279553e-05, "loss": 0.1471, "step": 4265 }, { "epoch": 0.092518362836652, "grad_norm": 2.0954389572143555, "learning_rate": 1.958056368681343e-05, "loss": 0.2202, "step": 4270 }, { "epoch": 0.09262669815613286, "grad_norm": 2.012875556945801, "learning_rate": 1.9579587770582517e-05, "loss": 0.2734, "step": 4275 }, { "epoch": 0.09273503347561372, "grad_norm": 1.9057338237762451, "learning_rate": 1.9578610744699868e-05, "loss": 0.2627, "step": 4280 }, { "epoch": 0.09284336879509458, "grad_norm": 1.6985410451889038, "learning_rate": 1.9577632609278644e-05, "loss": 0.2793, "step": 4285 }, { "epoch": 0.09295170411457543, "grad_norm": 1.511784553527832, "learning_rate": 1.957665336443216e-05, "loss": 0.2994, "step": 4290 }, { "epoch": 0.09306003943405629, "grad_norm": 1.7003707885742188, "learning_rate": 1.957567301027384e-05, "loss": 0.145, "step": 4295 }, { "epoch": 0.09316837475353715, "grad_norm": 1.4249892234802246, "learning_rate": 1.9574691546917247e-05, "loss": 0.1908, "step": 4300 }, { "epoch": 0.09327671007301801, "grad_norm": 1.9096426963806152, "learning_rate": 1.9573708974476068e-05, "loss": 0.2941, "step": 4305 }, { "epoch": 0.09338504539249887, "grad_norm": 1.4391669034957886, "learning_rate": 1.9572725293064118e-05, "loss": 0.199, "step": 4310 }, { "epoch": 0.09349338071197973, "grad_norm": 2.1198370456695557, "learning_rate": 1.957174050279534e-05, "loss": 0.2975, "step": 4315 }, { "epoch": 0.09360171603146057, "grad_norm": 1.5862140655517578, "learning_rate": 1.957075460378381e-05, "loss": 0.2289, "step": 4320 }, { "epoch": 0.09371005135094143, "grad_norm": 2.2644853591918945, "learning_rate": 1.956976759614373e-05, "loss": 0.2828, "step": 4325 }, { "epoch": 0.09381838667042229, "grad_norm": 1.0962896347045898, "learning_rate": 1.956877947998943e-05, "loss": 0.1641, "step": 4330 }, { "epoch": 0.09392672198990315, "grad_norm": 1.7039878368377686, "learning_rate": 1.9567790255435367e-05, "loss": 0.1736, "step": 4335 }, { "epoch": 0.09403505730938401, "grad_norm": 1.4395534992218018, "learning_rate": 1.9566799922596127e-05, "loss": 0.2262, "step": 4340 }, { "epoch": 0.09414339262886487, "grad_norm": 2.8208065032958984, "learning_rate": 1.9565808481586427e-05, "loss": 0.259, "step": 4345 }, { "epoch": 0.09425172794834571, "grad_norm": 2.0415501594543457, "learning_rate": 1.9564815932521113e-05, "loss": 0.1976, "step": 4350 }, { "epoch": 0.09436006326782657, "grad_norm": 2.132080554962158, "learning_rate": 1.9563822275515153e-05, "loss": 0.2902, "step": 4355 }, { "epoch": 0.09446839858730743, "grad_norm": 1.2803922891616821, "learning_rate": 1.9562827510683644e-05, "loss": 0.2021, "step": 4360 }, { "epoch": 0.0945767339067883, "grad_norm": 1.4983223676681519, "learning_rate": 1.9561831638141823e-05, "loss": 0.2023, "step": 4365 }, { "epoch": 0.09468506922626915, "grad_norm": 1.4862704277038574, "learning_rate": 1.956083465800504e-05, "loss": 0.1962, "step": 4370 }, { "epoch": 0.09479340454575, "grad_norm": 1.4699090719223022, "learning_rate": 1.955983657038879e-05, "loss": 0.1645, "step": 4375 }, { "epoch": 0.09490173986523086, "grad_norm": 2.7896580696105957, "learning_rate": 1.9558837375408673e-05, "loss": 0.2249, "step": 4380 }, { "epoch": 0.09501007518471172, "grad_norm": 1.7890442609786987, "learning_rate": 1.955783707318044e-05, "loss": 0.1891, "step": 4385 }, { "epoch": 0.09511841050419258, "grad_norm": 2.3354225158691406, "learning_rate": 1.9556835663819956e-05, "loss": 0.243, "step": 4390 }, { "epoch": 0.09522674582367344, "grad_norm": 1.5206886529922485, "learning_rate": 1.9555833147443225e-05, "loss": 0.2287, "step": 4395 }, { "epoch": 0.0953350811431543, "grad_norm": 1.6618541479110718, "learning_rate": 1.955482952416637e-05, "loss": 0.2116, "step": 4400 }, { "epoch": 0.09544341646263514, "grad_norm": 2.746255874633789, "learning_rate": 1.9553824794105644e-05, "loss": 0.313, "step": 4405 }, { "epoch": 0.095551751782116, "grad_norm": 2.089911937713623, "learning_rate": 1.9552818957377433e-05, "loss": 0.2971, "step": 4410 }, { "epoch": 0.09566008710159686, "grad_norm": 1.716422438621521, "learning_rate": 1.9551812014098244e-05, "loss": 0.2595, "step": 4415 }, { "epoch": 0.09576842242107772, "grad_norm": 1.7489256858825684, "learning_rate": 1.9550803964384725e-05, "loss": 0.2554, "step": 4420 }, { "epoch": 0.09587675774055858, "grad_norm": 2.0086517333984375, "learning_rate": 1.9549794808353636e-05, "loss": 0.2939, "step": 4425 }, { "epoch": 0.09598509306003944, "grad_norm": 2.097663402557373, "learning_rate": 1.954878454612187e-05, "loss": 0.2889, "step": 4430 }, { "epoch": 0.09609342837952028, "grad_norm": 1.8270446062088013, "learning_rate": 1.9547773177806458e-05, "loss": 0.2169, "step": 4435 }, { "epoch": 0.09620176369900114, "grad_norm": 1.7345998287200928, "learning_rate": 1.954676070352455e-05, "loss": 0.267, "step": 4440 }, { "epoch": 0.096310099018482, "grad_norm": 1.9243344068527222, "learning_rate": 1.954574712339343e-05, "loss": 0.1647, "step": 4445 }, { "epoch": 0.09641843433796286, "grad_norm": 1.521324872970581, "learning_rate": 1.9544732437530492e-05, "loss": 0.3034, "step": 4450 }, { "epoch": 0.09652676965744372, "grad_norm": 1.3503543138504028, "learning_rate": 1.9543716646053284e-05, "loss": 0.2803, "step": 4455 }, { "epoch": 0.09663510497692458, "grad_norm": 0.9564811587333679, "learning_rate": 1.954269974907947e-05, "loss": 0.1664, "step": 4460 }, { "epoch": 0.09674344029640543, "grad_norm": 1.9321684837341309, "learning_rate": 1.9541681746726836e-05, "loss": 0.2702, "step": 4465 }, { "epoch": 0.09685177561588629, "grad_norm": 1.7572699785232544, "learning_rate": 1.9540662639113308e-05, "loss": 0.1243, "step": 4470 }, { "epoch": 0.09696011093536715, "grad_norm": 1.9000691175460815, "learning_rate": 1.953964242635693e-05, "loss": 0.3047, "step": 4475 }, { "epoch": 0.097068446254848, "grad_norm": 2.1372039318084717, "learning_rate": 1.9538621108575885e-05, "loss": 0.2578, "step": 4480 }, { "epoch": 0.09717678157432887, "grad_norm": 2.051020860671997, "learning_rate": 1.953759868588847e-05, "loss": 0.2882, "step": 4485 }, { "epoch": 0.09728511689380973, "grad_norm": 1.2584060430526733, "learning_rate": 1.9536575158413122e-05, "loss": 0.2519, "step": 4490 }, { "epoch": 0.09739345221329057, "grad_norm": 2.133814573287964, "learning_rate": 1.95355505262684e-05, "loss": 0.2459, "step": 4495 }, { "epoch": 0.09750178753277143, "grad_norm": 0.7904610633850098, "learning_rate": 1.953452478957299e-05, "loss": 0.1948, "step": 4500 }, { "epoch": 0.09761012285225229, "grad_norm": 1.5458906888961792, "learning_rate": 1.9533497948445712e-05, "loss": 0.203, "step": 4505 }, { "epoch": 0.09771845817173315, "grad_norm": 2.260592222213745, "learning_rate": 1.9532470003005508e-05, "loss": 0.2383, "step": 4510 }, { "epoch": 0.09782679349121401, "grad_norm": 1.438579797744751, "learning_rate": 1.953144095337145e-05, "loss": 0.2651, "step": 4515 }, { "epoch": 0.09793512881069487, "grad_norm": 1.5491383075714111, "learning_rate": 1.953041079966274e-05, "loss": 0.2513, "step": 4520 }, { "epoch": 0.09804346413017571, "grad_norm": 2.306760787963867, "learning_rate": 1.9529379541998703e-05, "loss": 0.2125, "step": 4525 }, { "epoch": 0.09815179944965657, "grad_norm": 1.8859531879425049, "learning_rate": 1.95283471804988e-05, "loss": 0.2193, "step": 4530 }, { "epoch": 0.09826013476913743, "grad_norm": 1.5477559566497803, "learning_rate": 1.9527313715282604e-05, "loss": 0.1585, "step": 4535 }, { "epoch": 0.09836847008861829, "grad_norm": 2.7212345600128174, "learning_rate": 1.9526279146469837e-05, "loss": 0.2441, "step": 4540 }, { "epoch": 0.09847680540809915, "grad_norm": 2.9736382961273193, "learning_rate": 1.9525243474180335e-05, "loss": 0.2161, "step": 4545 }, { "epoch": 0.09858514072758001, "grad_norm": 2.1352150440216064, "learning_rate": 1.9524206698534063e-05, "loss": 0.3012, "step": 4550 }, { "epoch": 0.09869347604706086, "grad_norm": 1.7064807415008545, "learning_rate": 1.952316881965112e-05, "loss": 0.3125, "step": 4555 }, { "epoch": 0.09880181136654172, "grad_norm": 2.604336738586426, "learning_rate": 1.9522129837651725e-05, "loss": 0.1961, "step": 4560 }, { "epoch": 0.09891014668602258, "grad_norm": 1.8105396032333374, "learning_rate": 1.952108975265623e-05, "loss": 0.2352, "step": 4565 }, { "epoch": 0.09901848200550344, "grad_norm": 1.6036890745162964, "learning_rate": 1.952004856478511e-05, "loss": 0.2093, "step": 4570 }, { "epoch": 0.0991268173249843, "grad_norm": 1.188741683959961, "learning_rate": 1.9519006274158978e-05, "loss": 0.261, "step": 4575 }, { "epoch": 0.09923515264446515, "grad_norm": 1.1476854085922241, "learning_rate": 1.9517962880898562e-05, "loss": 0.2953, "step": 4580 }, { "epoch": 0.099343487963946, "grad_norm": 2.1903626918792725, "learning_rate": 1.9516918385124724e-05, "loss": 0.2957, "step": 4585 }, { "epoch": 0.09945182328342686, "grad_norm": 2.084172248840332, "learning_rate": 1.9515872786958458e-05, "loss": 0.3301, "step": 4590 }, { "epoch": 0.09956015860290772, "grad_norm": 1.8193485736846924, "learning_rate": 1.9514826086520874e-05, "loss": 0.2221, "step": 4595 }, { "epoch": 0.09966849392238858, "grad_norm": 1.6388713121414185, "learning_rate": 1.9513778283933226e-05, "loss": 0.2347, "step": 4600 }, { "epoch": 0.09977682924186944, "grad_norm": 1.35920250415802, "learning_rate": 1.9512729379316875e-05, "loss": 0.212, "step": 4605 }, { "epoch": 0.0998851645613503, "grad_norm": 1.6679922342300415, "learning_rate": 1.9511679372793326e-05, "loss": 0.2287, "step": 4610 }, { "epoch": 0.09999349988083114, "grad_norm": 2.2786998748779297, "learning_rate": 1.9510628264484207e-05, "loss": 0.2665, "step": 4615 }, { "epoch": 0.100101835200312, "grad_norm": 1.7411468029022217, "learning_rate": 1.9509576054511274e-05, "loss": 0.2751, "step": 4620 }, { "epoch": 0.10021017051979286, "grad_norm": 4.406885147094727, "learning_rate": 1.9508522742996408e-05, "loss": 0.1776, "step": 4625 }, { "epoch": 0.10031850583927372, "grad_norm": 2.0615234375, "learning_rate": 1.950746833006162e-05, "loss": 0.3111, "step": 4630 }, { "epoch": 0.10042684115875458, "grad_norm": 2.351184844970703, "learning_rate": 1.950641281582905e-05, "loss": 0.2344, "step": 4635 }, { "epoch": 0.10053517647823544, "grad_norm": 1.2274166345596313, "learning_rate": 1.9505356200420956e-05, "loss": 0.2116, "step": 4640 }, { "epoch": 0.10064351179771629, "grad_norm": 1.2116609811782837, "learning_rate": 1.950429848395974e-05, "loss": 0.1884, "step": 4645 }, { "epoch": 0.10075184711719715, "grad_norm": 2.405745029449463, "learning_rate": 1.9503239666567916e-05, "loss": 0.1942, "step": 4650 }, { "epoch": 0.100860182436678, "grad_norm": 1.2979849576950073, "learning_rate": 1.950217974836814e-05, "loss": 0.2577, "step": 4655 }, { "epoch": 0.10096851775615887, "grad_norm": 1.8236923217773438, "learning_rate": 1.950111872948318e-05, "loss": 0.285, "step": 4660 }, { "epoch": 0.10107685307563972, "grad_norm": 2.10845947265625, "learning_rate": 1.9500056610035944e-05, "loss": 0.218, "step": 4665 }, { "epoch": 0.10118518839512057, "grad_norm": 1.5836045742034912, "learning_rate": 1.949899339014946e-05, "loss": 0.145, "step": 4670 }, { "epoch": 0.10129352371460143, "grad_norm": 1.7591538429260254, "learning_rate": 1.9497929069946884e-05, "loss": 0.1735, "step": 4675 }, { "epoch": 0.10140185903408229, "grad_norm": 1.3886202573776245, "learning_rate": 1.9496863649551507e-05, "loss": 0.1713, "step": 4680 }, { "epoch": 0.10151019435356315, "grad_norm": 2.1730618476867676, "learning_rate": 1.9495797129086736e-05, "loss": 0.2092, "step": 4685 }, { "epoch": 0.10161852967304401, "grad_norm": 2.0523808002471924, "learning_rate": 1.9494729508676117e-05, "loss": 0.1707, "step": 4690 }, { "epoch": 0.10172686499252487, "grad_norm": 2.053034543991089, "learning_rate": 1.9493660788443312e-05, "loss": 0.305, "step": 4695 }, { "epoch": 0.10183520031200571, "grad_norm": 1.6022275686264038, "learning_rate": 1.9492590968512125e-05, "loss": 0.2311, "step": 4700 }, { "epoch": 0.10194353563148657, "grad_norm": 2.21823787689209, "learning_rate": 1.949152004900647e-05, "loss": 0.2646, "step": 4705 }, { "epoch": 0.10205187095096743, "grad_norm": 2.180652618408203, "learning_rate": 1.94904480300504e-05, "loss": 0.2059, "step": 4710 }, { "epoch": 0.10216020627044829, "grad_norm": 2.0722618103027344, "learning_rate": 1.9489374911768097e-05, "loss": 0.2481, "step": 4715 }, { "epoch": 0.10226854158992915, "grad_norm": 1.1645525693893433, "learning_rate": 1.9488300694283858e-05, "loss": 0.3048, "step": 4720 }, { "epoch": 0.10237687690941001, "grad_norm": 2.293919563293457, "learning_rate": 1.948722537772212e-05, "loss": 0.1942, "step": 4725 }, { "epoch": 0.10248521222889086, "grad_norm": 1.6878817081451416, "learning_rate": 1.9486148962207437e-05, "loss": 0.2293, "step": 4730 }, { "epoch": 0.10259354754837172, "grad_norm": 2.889211416244507, "learning_rate": 1.94850714478645e-05, "loss": 0.2663, "step": 4735 }, { "epoch": 0.10270188286785258, "grad_norm": 1.779321551322937, "learning_rate": 1.9483992834818126e-05, "loss": 0.2549, "step": 4740 }, { "epoch": 0.10281021818733344, "grad_norm": 1.5327274799346924, "learning_rate": 1.948291312319325e-05, "loss": 0.2595, "step": 4745 }, { "epoch": 0.1029185535068143, "grad_norm": 1.8245292901992798, "learning_rate": 1.9481832313114943e-05, "loss": 0.3435, "step": 4750 }, { "epoch": 0.10302688882629515, "grad_norm": 1.8814691305160522, "learning_rate": 1.94807504047084e-05, "loss": 0.1585, "step": 4755 }, { "epoch": 0.103135224145776, "grad_norm": 3.4667418003082275, "learning_rate": 1.947966739809894e-05, "loss": 0.2259, "step": 4760 }, { "epoch": 0.10324355946525686, "grad_norm": 2.053940773010254, "learning_rate": 1.9478583293412024e-05, "loss": 0.332, "step": 4765 }, { "epoch": 0.10335189478473772, "grad_norm": 1.8095386028289795, "learning_rate": 1.947749809077322e-05, "loss": 0.1881, "step": 4770 }, { "epoch": 0.10346023010421858, "grad_norm": 2.604734420776367, "learning_rate": 1.9476411790308237e-05, "loss": 0.2074, "step": 4775 }, { "epoch": 0.10356856542369944, "grad_norm": 2.214813709259033, "learning_rate": 1.94753243921429e-05, "loss": 0.1737, "step": 4780 }, { "epoch": 0.1036769007431803, "grad_norm": 2.5291292667388916, "learning_rate": 1.9474235896403177e-05, "loss": 0.2044, "step": 4785 }, { "epoch": 0.10378523606266114, "grad_norm": 2.4027013778686523, "learning_rate": 1.9473146303215146e-05, "loss": 0.2223, "step": 4790 }, { "epoch": 0.103893571382142, "grad_norm": 2.193058729171753, "learning_rate": 1.9472055612705026e-05, "loss": 0.1885, "step": 4795 }, { "epoch": 0.10400190670162286, "grad_norm": 3.190047025680542, "learning_rate": 1.9470963824999153e-05, "loss": 0.252, "step": 4800 }, { "epoch": 0.10411024202110372, "grad_norm": 1.851123571395874, "learning_rate": 1.9469870940224e-05, "loss": 0.2189, "step": 4805 }, { "epoch": 0.10421857734058458, "grad_norm": 1.9947192668914795, "learning_rate": 1.9468776958506152e-05, "loss": 0.1904, "step": 4810 }, { "epoch": 0.10432691266006544, "grad_norm": 1.8086005449295044, "learning_rate": 1.946768187997234e-05, "loss": 0.2344, "step": 4815 }, { "epoch": 0.10443524797954629, "grad_norm": 1.4183101654052734, "learning_rate": 1.9466585704749406e-05, "loss": 0.3387, "step": 4820 }, { "epoch": 0.10454358329902715, "grad_norm": 1.8045018911361694, "learning_rate": 1.9465488432964326e-05, "loss": 0.2141, "step": 4825 }, { "epoch": 0.104651918618508, "grad_norm": 1.8007984161376953, "learning_rate": 1.9464390064744206e-05, "loss": 0.2476, "step": 4830 }, { "epoch": 0.10476025393798886, "grad_norm": 1.7314225435256958, "learning_rate": 1.9463290600216274e-05, "loss": 0.3134, "step": 4835 }, { "epoch": 0.10486858925746972, "grad_norm": 2.1718015670776367, "learning_rate": 1.9462190039507884e-05, "loss": 0.2941, "step": 4840 }, { "epoch": 0.10497692457695058, "grad_norm": 2.2042999267578125, "learning_rate": 1.946108838274652e-05, "loss": 0.2582, "step": 4845 }, { "epoch": 0.10508525989643143, "grad_norm": 2.5267317295074463, "learning_rate": 1.9459985630059795e-05, "loss": 0.2565, "step": 4850 }, { "epoch": 0.10519359521591229, "grad_norm": 2.234807252883911, "learning_rate": 1.945888178157545e-05, "loss": 0.2939, "step": 4855 }, { "epoch": 0.10530193053539315, "grad_norm": 1.5595486164093018, "learning_rate": 1.9457776837421342e-05, "loss": 0.2098, "step": 4860 }, { "epoch": 0.10541026585487401, "grad_norm": 1.7091070413589478, "learning_rate": 1.9456670797725463e-05, "loss": 0.2275, "step": 4865 }, { "epoch": 0.10551860117435487, "grad_norm": 2.0374577045440674, "learning_rate": 1.9455563662615934e-05, "loss": 0.2861, "step": 4870 }, { "epoch": 0.10562693649383573, "grad_norm": 2.2183594703674316, "learning_rate": 1.9454455432221003e-05, "loss": 0.2015, "step": 4875 }, { "epoch": 0.10573527181331657, "grad_norm": 2.718339204788208, "learning_rate": 1.9453346106669032e-05, "loss": 0.2594, "step": 4880 }, { "epoch": 0.10584360713279743, "grad_norm": 2.0788795948028564, "learning_rate": 1.945223568608853e-05, "loss": 0.3736, "step": 4885 }, { "epoch": 0.10595194245227829, "grad_norm": 2.1102993488311768, "learning_rate": 1.9451124170608112e-05, "loss": 0.262, "step": 4890 }, { "epoch": 0.10606027777175915, "grad_norm": 2.3669936656951904, "learning_rate": 1.9450011560356542e-05, "loss": 0.2294, "step": 4895 }, { "epoch": 0.10616861309124001, "grad_norm": 1.7020565271377563, "learning_rate": 1.9448897855462695e-05, "loss": 0.204, "step": 4900 }, { "epoch": 0.10627694841072087, "grad_norm": 1.3402457237243652, "learning_rate": 1.9447783056055574e-05, "loss": 0.2239, "step": 4905 }, { "epoch": 0.10638528373020172, "grad_norm": 2.137190103530884, "learning_rate": 1.9446667162264314e-05, "loss": 0.2153, "step": 4910 }, { "epoch": 0.10649361904968258, "grad_norm": 1.5231975317001343, "learning_rate": 1.9445550174218172e-05, "loss": 0.1761, "step": 4915 }, { "epoch": 0.10660195436916343, "grad_norm": 2.6731460094451904, "learning_rate": 1.944443209204654e-05, "loss": 0.2183, "step": 4920 }, { "epoch": 0.1067102896886443, "grad_norm": 1.6533446311950684, "learning_rate": 1.9443312915878925e-05, "loss": 0.2527, "step": 4925 }, { "epoch": 0.10681862500812515, "grad_norm": 1.6496350765228271, "learning_rate": 1.944219264584497e-05, "loss": 0.2102, "step": 4930 }, { "epoch": 0.106926960327606, "grad_norm": 2.0769472122192383, "learning_rate": 1.9441071282074443e-05, "loss": 0.1977, "step": 4935 }, { "epoch": 0.10703529564708686, "grad_norm": 1.9438214302062988, "learning_rate": 1.9439948824697232e-05, "loss": 0.2739, "step": 4940 }, { "epoch": 0.10714363096656772, "grad_norm": 1.773073434829712, "learning_rate": 1.9438825273843363e-05, "loss": 0.1818, "step": 4945 }, { "epoch": 0.10725196628604858, "grad_norm": 2.5967700481414795, "learning_rate": 1.9437700629642977e-05, "loss": 0.2185, "step": 4950 }, { "epoch": 0.10736030160552944, "grad_norm": 1.8425761461257935, "learning_rate": 1.9436574892226355e-05, "loss": 0.1989, "step": 4955 }, { "epoch": 0.1074686369250103, "grad_norm": 2.0173568725585938, "learning_rate": 1.943544806172389e-05, "loss": 0.257, "step": 4960 }, { "epoch": 0.10757697224449114, "grad_norm": 1.5192949771881104, "learning_rate": 1.9434320138266103e-05, "loss": 0.2905, "step": 4965 }, { "epoch": 0.107685307563972, "grad_norm": 2.2133939266204834, "learning_rate": 1.9433191121983662e-05, "loss": 0.2139, "step": 4970 }, { "epoch": 0.10779364288345286, "grad_norm": 2.099733829498291, "learning_rate": 1.9432061013007333e-05, "loss": 0.2998, "step": 4975 }, { "epoch": 0.10790197820293372, "grad_norm": 1.6056474447250366, "learning_rate": 1.9430929811468033e-05, "loss": 0.1419, "step": 4980 }, { "epoch": 0.10801031352241458, "grad_norm": 2.2511844635009766, "learning_rate": 1.9429797517496785e-05, "loss": 0.284, "step": 4985 }, { "epoch": 0.10811864884189544, "grad_norm": 1.9803236722946167, "learning_rate": 1.9428664131224754e-05, "loss": 0.2441, "step": 4990 }, { "epoch": 0.10822698416137629, "grad_norm": 1.971070647239685, "learning_rate": 1.942752965278323e-05, "loss": 0.3619, "step": 4995 }, { "epoch": 0.10833531948085715, "grad_norm": 2.115551233291626, "learning_rate": 1.9426394082303614e-05, "loss": 0.2042, "step": 5000 }, { "epoch": 0.108443654800338, "grad_norm": 1.665581226348877, "learning_rate": 1.9425257419917455e-05, "loss": 0.2606, "step": 5005 }, { "epoch": 0.10855199011981886, "grad_norm": 2.974658250808716, "learning_rate": 1.9424119665756412e-05, "loss": 0.1755, "step": 5010 }, { "epoch": 0.10866032543929972, "grad_norm": 2.2720234394073486, "learning_rate": 1.942298081995228e-05, "loss": 0.2444, "step": 5015 }, { "epoch": 0.10876866075878058, "grad_norm": 2.1371166706085205, "learning_rate": 1.9421840882636975e-05, "loss": 0.2222, "step": 5020 }, { "epoch": 0.10887699607826143, "grad_norm": 2.414825916290283, "learning_rate": 1.9420699853942543e-05, "loss": 0.1791, "step": 5025 }, { "epoch": 0.10898533139774229, "grad_norm": 2.382214069366455, "learning_rate": 1.9419557734001156e-05, "loss": 0.2188, "step": 5030 }, { "epoch": 0.10909366671722315, "grad_norm": 1.8111759424209595, "learning_rate": 1.941841452294511e-05, "loss": 0.2647, "step": 5035 }, { "epoch": 0.10920200203670401, "grad_norm": 1.7401093244552612, "learning_rate": 1.941727022090683e-05, "loss": 0.2788, "step": 5040 }, { "epoch": 0.10931033735618487, "grad_norm": 2.3472259044647217, "learning_rate": 1.9416124828018864e-05, "loss": 0.2479, "step": 5045 }, { "epoch": 0.10941867267566573, "grad_norm": 2.1000943183898926, "learning_rate": 1.941497834441389e-05, "loss": 0.2428, "step": 5050 }, { "epoch": 0.10952700799514657, "grad_norm": 2.210496664047241, "learning_rate": 1.9413830770224717e-05, "loss": 0.2472, "step": 5055 }, { "epoch": 0.10963534331462743, "grad_norm": 1.0459803342819214, "learning_rate": 1.941268210558426e-05, "loss": 0.2723, "step": 5060 }, { "epoch": 0.10974367863410829, "grad_norm": 1.7438957691192627, "learning_rate": 1.9411532350625588e-05, "loss": 0.2728, "step": 5065 }, { "epoch": 0.10985201395358915, "grad_norm": 1.36753249168396, "learning_rate": 1.9410381505481878e-05, "loss": 0.2173, "step": 5070 }, { "epoch": 0.10996034927307001, "grad_norm": 1.3276971578598022, "learning_rate": 1.9409229570286436e-05, "loss": 0.1898, "step": 5075 }, { "epoch": 0.11006868459255087, "grad_norm": 1.691213607788086, "learning_rate": 1.94080765451727e-05, "loss": 0.1931, "step": 5080 }, { "epoch": 0.11017701991203172, "grad_norm": 2.597869634628296, "learning_rate": 1.940692243027423e-05, "loss": 0.2299, "step": 5085 }, { "epoch": 0.11028535523151257, "grad_norm": 1.5075138807296753, "learning_rate": 1.940576722572471e-05, "loss": 0.2381, "step": 5090 }, { "epoch": 0.11039369055099343, "grad_norm": 1.9195259809494019, "learning_rate": 1.9404610931657957e-05, "loss": 0.1875, "step": 5095 }, { "epoch": 0.1105020258704743, "grad_norm": 1.6254398822784424, "learning_rate": 1.9403453548207905e-05, "loss": 0.252, "step": 5100 }, { "epoch": 0.11061036118995515, "grad_norm": 1.918467402458191, "learning_rate": 1.940229507550863e-05, "loss": 0.1792, "step": 5105 }, { "epoch": 0.11071869650943601, "grad_norm": 2.0262532234191895, "learning_rate": 1.940113551369431e-05, "loss": 0.3173, "step": 5110 }, { "epoch": 0.11082703182891686, "grad_norm": 1.3614825010299683, "learning_rate": 1.9399974862899273e-05, "loss": 0.2423, "step": 5115 }, { "epoch": 0.11093536714839772, "grad_norm": 2.124215602874756, "learning_rate": 1.9398813123257958e-05, "loss": 0.2862, "step": 5120 }, { "epoch": 0.11104370246787858, "grad_norm": 2.983076572418213, "learning_rate": 1.9397650294904938e-05, "loss": 0.2397, "step": 5125 }, { "epoch": 0.11115203778735944, "grad_norm": 2.367041826248169, "learning_rate": 1.9396486377974912e-05, "loss": 0.2448, "step": 5130 }, { "epoch": 0.1112603731068403, "grad_norm": 2.40126633644104, "learning_rate": 1.9395321372602693e-05, "loss": 0.2619, "step": 5135 }, { "epoch": 0.11136870842632116, "grad_norm": 1.6886483430862427, "learning_rate": 1.939415527892324e-05, "loss": 0.2407, "step": 5140 }, { "epoch": 0.111477043745802, "grad_norm": 2.0159740447998047, "learning_rate": 1.9392988097071615e-05, "loss": 0.2814, "step": 5145 }, { "epoch": 0.11158537906528286, "grad_norm": 1.8577842712402344, "learning_rate": 1.939181982718303e-05, "loss": 0.2808, "step": 5150 }, { "epoch": 0.11169371438476372, "grad_norm": 1.8393385410308838, "learning_rate": 1.939065046939281e-05, "loss": 0.2245, "step": 5155 }, { "epoch": 0.11180204970424458, "grad_norm": 2.8599042892456055, "learning_rate": 1.93894800238364e-05, "loss": 0.2622, "step": 5160 }, { "epoch": 0.11191038502372544, "grad_norm": 1.6907846927642822, "learning_rate": 1.938830849064939e-05, "loss": 0.1474, "step": 5165 }, { "epoch": 0.1120187203432063, "grad_norm": 3.5273046493530273, "learning_rate": 1.938713586996747e-05, "loss": 0.2155, "step": 5170 }, { "epoch": 0.11212705566268714, "grad_norm": 1.6213750839233398, "learning_rate": 1.9385962161926485e-05, "loss": 0.2026, "step": 5175 }, { "epoch": 0.112235390982168, "grad_norm": 2.2302846908569336, "learning_rate": 1.9384787366662384e-05, "loss": 0.3461, "step": 5180 }, { "epoch": 0.11234372630164886, "grad_norm": 2.183605432510376, "learning_rate": 1.938361148431125e-05, "loss": 0.2235, "step": 5185 }, { "epoch": 0.11245206162112972, "grad_norm": 2.9655051231384277, "learning_rate": 1.9382434515009292e-05, "loss": 0.2789, "step": 5190 }, { "epoch": 0.11256039694061058, "grad_norm": 2.34713077545166, "learning_rate": 1.9381256458892842e-05, "loss": 0.2167, "step": 5195 }, { "epoch": 0.11266873226009143, "grad_norm": 1.712239146232605, "learning_rate": 1.9380077316098366e-05, "loss": 0.2024, "step": 5200 }, { "epoch": 0.11277706757957229, "grad_norm": 1.6155084371566772, "learning_rate": 1.9378897086762446e-05, "loss": 0.2093, "step": 5205 }, { "epoch": 0.11288540289905315, "grad_norm": 2.075010299682617, "learning_rate": 1.9377715771021793e-05, "loss": 0.255, "step": 5210 }, { "epoch": 0.112993738218534, "grad_norm": 2.281651020050049, "learning_rate": 1.9376533369013248e-05, "loss": 0.2963, "step": 5215 }, { "epoch": 0.11310207353801487, "grad_norm": 2.692662000656128, "learning_rate": 1.9375349880873768e-05, "loss": 0.2503, "step": 5220 }, { "epoch": 0.11321040885749573, "grad_norm": 2.3433640003204346, "learning_rate": 1.9374165306740452e-05, "loss": 0.2572, "step": 5225 }, { "epoch": 0.11331874417697657, "grad_norm": 1.8082655668258667, "learning_rate": 1.937297964675051e-05, "loss": 0.2038, "step": 5230 }, { "epoch": 0.11342707949645743, "grad_norm": 1.8943202495574951, "learning_rate": 1.937179290104128e-05, "loss": 0.2179, "step": 5235 }, { "epoch": 0.11353541481593829, "grad_norm": 2.516770601272583, "learning_rate": 1.9370605069750233e-05, "loss": 0.2074, "step": 5240 }, { "epoch": 0.11364375013541915, "grad_norm": 1.6063692569732666, "learning_rate": 1.9369416153014962e-05, "loss": 0.2531, "step": 5245 }, { "epoch": 0.11375208545490001, "grad_norm": 1.7355687618255615, "learning_rate": 1.9368226150973184e-05, "loss": 0.2809, "step": 5250 }, { "epoch": 0.11386042077438087, "grad_norm": 2.2973432540893555, "learning_rate": 1.936703506376274e-05, "loss": 0.2413, "step": 5255 }, { "epoch": 0.11396875609386171, "grad_norm": 2.1573472023010254, "learning_rate": 1.9365842891521603e-05, "loss": 0.2513, "step": 5260 }, { "epoch": 0.11407709141334257, "grad_norm": 2.3615307807922363, "learning_rate": 1.9364649634387865e-05, "loss": 0.2687, "step": 5265 }, { "epoch": 0.11418542673282343, "grad_norm": 1.190590262413025, "learning_rate": 1.9363455292499754e-05, "loss": 0.2102, "step": 5270 }, { "epoch": 0.11429376205230429, "grad_norm": 2.138840913772583, "learning_rate": 1.936225986599561e-05, "loss": 0.2284, "step": 5275 }, { "epoch": 0.11440209737178515, "grad_norm": 2.0433390140533447, "learning_rate": 1.9361063355013906e-05, "loss": 0.3464, "step": 5280 }, { "epoch": 0.11451043269126601, "grad_norm": 0.9699363708496094, "learning_rate": 1.935986575969324e-05, "loss": 0.1775, "step": 5285 }, { "epoch": 0.11461876801074686, "grad_norm": 1.9195996522903442, "learning_rate": 1.935866708017234e-05, "loss": 0.2236, "step": 5290 }, { "epoch": 0.11472710333022772, "grad_norm": 2.0660178661346436, "learning_rate": 1.9357467316590052e-05, "loss": 0.2875, "step": 5295 }, { "epoch": 0.11483543864970858, "grad_norm": 1.2638980150222778, "learning_rate": 1.9356266469085345e-05, "loss": 0.2113, "step": 5300 }, { "epoch": 0.11494377396918944, "grad_norm": 1.844720482826233, "learning_rate": 1.9355064537797327e-05, "loss": 0.2534, "step": 5305 }, { "epoch": 0.1150521092886703, "grad_norm": 2.1356723308563232, "learning_rate": 1.9353861522865223e-05, "loss": 0.186, "step": 5310 }, { "epoch": 0.11516044460815116, "grad_norm": 1.8878830671310425, "learning_rate": 1.9352657424428383e-05, "loss": 0.2447, "step": 5315 }, { "epoch": 0.115268779927632, "grad_norm": 1.684645652770996, "learning_rate": 1.935145224262628e-05, "loss": 0.2412, "step": 5320 }, { "epoch": 0.11537711524711286, "grad_norm": 1.8491203784942627, "learning_rate": 1.9350245977598522e-05, "loss": 0.2089, "step": 5325 }, { "epoch": 0.11548545056659372, "grad_norm": 1.7873774766921997, "learning_rate": 1.934903862948483e-05, "loss": 0.2883, "step": 5330 }, { "epoch": 0.11559378588607458, "grad_norm": 1.5792508125305176, "learning_rate": 1.9347830198425064e-05, "loss": 0.2549, "step": 5335 }, { "epoch": 0.11570212120555544, "grad_norm": 1.4112211465835571, "learning_rate": 1.93466206845592e-05, "loss": 0.1829, "step": 5340 }, { "epoch": 0.1158104565250363, "grad_norm": 2.078742027282715, "learning_rate": 1.9345410088027344e-05, "loss": 0.2613, "step": 5345 }, { "epoch": 0.11591879184451714, "grad_norm": 2.0313913822174072, "learning_rate": 1.934419840896972e-05, "loss": 0.2451, "step": 5350 }, { "epoch": 0.116027127163998, "grad_norm": 2.542510986328125, "learning_rate": 1.934298564752669e-05, "loss": 0.24, "step": 5355 }, { "epoch": 0.11613546248347886, "grad_norm": 2.575371026992798, "learning_rate": 1.9341771803838728e-05, "loss": 0.2677, "step": 5360 }, { "epoch": 0.11624379780295972, "grad_norm": 1.234513759613037, "learning_rate": 1.9340556878046444e-05, "loss": 0.2377, "step": 5365 }, { "epoch": 0.11635213312244058, "grad_norm": 1.8350012302398682, "learning_rate": 1.9339340870290567e-05, "loss": 0.2445, "step": 5370 }, { "epoch": 0.11646046844192144, "grad_norm": 2.186424970626831, "learning_rate": 1.933812378071195e-05, "loss": 0.3407, "step": 5375 }, { "epoch": 0.11656880376140229, "grad_norm": 2.580674886703491, "learning_rate": 1.9336905609451584e-05, "loss": 0.2516, "step": 5380 }, { "epoch": 0.11667713908088315, "grad_norm": 3.100088357925415, "learning_rate": 1.9335686356650565e-05, "loss": 0.2443, "step": 5385 }, { "epoch": 0.116785474400364, "grad_norm": 2.379143238067627, "learning_rate": 1.933446602245013e-05, "loss": 0.2089, "step": 5390 }, { "epoch": 0.11689380971984487, "grad_norm": 1.6432318687438965, "learning_rate": 1.933324460699164e-05, "loss": 0.2512, "step": 5395 }, { "epoch": 0.11700214503932573, "grad_norm": 1.9126908779144287, "learning_rate": 1.9332022110416573e-05, "loss": 0.2214, "step": 5400 }, { "epoch": 0.11711048035880658, "grad_norm": 1.9562057256698608, "learning_rate": 1.933079853286654e-05, "loss": 0.273, "step": 5405 }, { "epoch": 0.11721881567828743, "grad_norm": 1.552812933921814, "learning_rate": 1.932957387448327e-05, "loss": 0.259, "step": 5410 }, { "epoch": 0.11732715099776829, "grad_norm": 2.014329671859741, "learning_rate": 1.9328348135408626e-05, "loss": 0.1714, "step": 5415 }, { "epoch": 0.11743548631724915, "grad_norm": 1.4477148056030273, "learning_rate": 1.9327121315784587e-05, "loss": 0.2277, "step": 5420 }, { "epoch": 0.11754382163673001, "grad_norm": 1.6945338249206543, "learning_rate": 1.932589341575326e-05, "loss": 0.1856, "step": 5425 }, { "epoch": 0.11765215695621087, "grad_norm": 1.6769777536392212, "learning_rate": 1.932466443545689e-05, "loss": 0.2056, "step": 5430 }, { "epoch": 0.11776049227569173, "grad_norm": 2.108170747756958, "learning_rate": 1.9323434375037825e-05, "loss": 0.2152, "step": 5435 }, { "epoch": 0.11786882759517257, "grad_norm": 2.0694892406463623, "learning_rate": 1.9322203234638553e-05, "loss": 0.1937, "step": 5440 }, { "epoch": 0.11797716291465343, "grad_norm": 2.541313409805298, "learning_rate": 1.9320971014401682e-05, "loss": 0.2471, "step": 5445 }, { "epoch": 0.11808549823413429, "grad_norm": 2.375894546508789, "learning_rate": 1.931973771446995e-05, "loss": 0.2961, "step": 5450 }, { "epoch": 0.11819383355361515, "grad_norm": 1.316866397857666, "learning_rate": 1.931850333498621e-05, "loss": 0.2842, "step": 5455 }, { "epoch": 0.11830216887309601, "grad_norm": 1.2578788995742798, "learning_rate": 1.931726787609345e-05, "loss": 0.2782, "step": 5460 }, { "epoch": 0.11841050419257686, "grad_norm": 2.4494452476501465, "learning_rate": 1.9316031337934783e-05, "loss": 0.1817, "step": 5465 }, { "epoch": 0.11851883951205772, "grad_norm": 3.338794469833374, "learning_rate": 1.9314793720653434e-05, "loss": 0.2642, "step": 5470 }, { "epoch": 0.11862717483153858, "grad_norm": 2.167189836502075, "learning_rate": 1.9313555024392775e-05, "loss": 0.1898, "step": 5475 }, { "epoch": 0.11873551015101944, "grad_norm": 2.0343661308288574, "learning_rate": 1.931231524929628e-05, "loss": 0.2106, "step": 5480 }, { "epoch": 0.1188438454705003, "grad_norm": 1.948531985282898, "learning_rate": 1.9311074395507558e-05, "loss": 0.2327, "step": 5485 }, { "epoch": 0.11895218078998115, "grad_norm": 1.8861794471740723, "learning_rate": 1.9309832463170352e-05, "loss": 0.1824, "step": 5490 }, { "epoch": 0.119060516109462, "grad_norm": 1.540605068206787, "learning_rate": 1.9308589452428513e-05, "loss": 0.3933, "step": 5495 }, { "epoch": 0.11916885142894286, "grad_norm": 1.628213882446289, "learning_rate": 1.9307345363426032e-05, "loss": 0.1771, "step": 5500 }, { "epoch": 0.11927718674842372, "grad_norm": 2.5020086765289307, "learning_rate": 1.9306100196307012e-05, "loss": 0.271, "step": 5505 }, { "epoch": 0.11938552206790458, "grad_norm": 2.5406901836395264, "learning_rate": 1.9304853951215693e-05, "loss": 0.1768, "step": 5510 }, { "epoch": 0.11949385738738544, "grad_norm": 2.123274326324463, "learning_rate": 1.930360662829643e-05, "loss": 0.2928, "step": 5515 }, { "epoch": 0.1196021927068663, "grad_norm": 1.0270226001739502, "learning_rate": 1.9302358227693703e-05, "loss": 0.2018, "step": 5520 }, { "epoch": 0.11971052802634714, "grad_norm": 2.48519229888916, "learning_rate": 1.930110874955213e-05, "loss": 0.3126, "step": 5525 }, { "epoch": 0.119818863345828, "grad_norm": 1.4694494009017944, "learning_rate": 1.9299858194016434e-05, "loss": 0.235, "step": 5530 }, { "epoch": 0.11992719866530886, "grad_norm": 2.1760075092315674, "learning_rate": 1.929860656123148e-05, "loss": 0.176, "step": 5535 }, { "epoch": 0.12003553398478972, "grad_norm": 1.975794792175293, "learning_rate": 1.9297353851342252e-05, "loss": 0.2784, "step": 5540 }, { "epoch": 0.12014386930427058, "grad_norm": 1.4020910263061523, "learning_rate": 1.929610006449385e-05, "loss": 0.2085, "step": 5545 }, { "epoch": 0.12025220462375144, "grad_norm": 2.8598148822784424, "learning_rate": 1.9294845200831512e-05, "loss": 0.2627, "step": 5550 }, { "epoch": 0.12036053994323229, "grad_norm": 2.205887794494629, "learning_rate": 1.9293589260500596e-05, "loss": 0.2374, "step": 5555 }, { "epoch": 0.12046887526271315, "grad_norm": 1.6959855556488037, "learning_rate": 1.929233224364658e-05, "loss": 0.1663, "step": 5560 }, { "epoch": 0.120577210582194, "grad_norm": 2.0952634811401367, "learning_rate": 1.9291074150415075e-05, "loss": 0.24, "step": 5565 }, { "epoch": 0.12068554590167486, "grad_norm": 2.2472264766693115, "learning_rate": 1.9289814980951812e-05, "loss": 0.2874, "step": 5570 }, { "epoch": 0.12079388122115572, "grad_norm": 1.3320192098617554, "learning_rate": 1.928855473540264e-05, "loss": 0.2381, "step": 5575 }, { "epoch": 0.12090221654063658, "grad_norm": 1.8532402515411377, "learning_rate": 1.9287293413913548e-05, "loss": 0.2531, "step": 5580 }, { "epoch": 0.12101055186011743, "grad_norm": 2.0693135261535645, "learning_rate": 1.9286031016630634e-05, "loss": 0.2583, "step": 5585 }, { "epoch": 0.12111888717959829, "grad_norm": 2.1354241371154785, "learning_rate": 1.9284767543700137e-05, "loss": 0.236, "step": 5590 }, { "epoch": 0.12122722249907915, "grad_norm": 1.6206220388412476, "learning_rate": 1.92835029952684e-05, "loss": 0.2295, "step": 5595 }, { "epoch": 0.12133555781856001, "grad_norm": 2.3513641357421875, "learning_rate": 1.9282237371481913e-05, "loss": 0.1767, "step": 5600 }, { "epoch": 0.12144389313804087, "grad_norm": 1.3418782949447632, "learning_rate": 1.928097067248727e-05, "loss": 0.2594, "step": 5605 }, { "epoch": 0.12155222845752173, "grad_norm": 2.4030869007110596, "learning_rate": 1.9279702898431205e-05, "loss": 0.2261, "step": 5610 }, { "epoch": 0.12166056377700257, "grad_norm": 2.3779499530792236, "learning_rate": 1.927843404946057e-05, "loss": 0.3537, "step": 5615 }, { "epoch": 0.12176889909648343, "grad_norm": 1.3077621459960938, "learning_rate": 1.927716412572234e-05, "loss": 0.1863, "step": 5620 }, { "epoch": 0.12187723441596429, "grad_norm": 2.7198381423950195, "learning_rate": 1.9275893127363615e-05, "loss": 0.1385, "step": 5625 }, { "epoch": 0.12198556973544515, "grad_norm": 1.614802598953247, "learning_rate": 1.927462105453163e-05, "loss": 0.2403, "step": 5630 }, { "epoch": 0.12209390505492601, "grad_norm": 1.721243143081665, "learning_rate": 1.9273347907373727e-05, "loss": 0.1906, "step": 5635 }, { "epoch": 0.12220224037440687, "grad_norm": 1.9122257232666016, "learning_rate": 1.9272073686037386e-05, "loss": 0.312, "step": 5640 }, { "epoch": 0.12231057569388772, "grad_norm": 2.0968875885009766, "learning_rate": 1.92707983906702e-05, "loss": 0.1919, "step": 5645 }, { "epoch": 0.12241891101336858, "grad_norm": 2.673261880874634, "learning_rate": 1.92695220214199e-05, "loss": 0.2172, "step": 5650 }, { "epoch": 0.12252724633284943, "grad_norm": 2.3658063411712646, "learning_rate": 1.926824457843433e-05, "loss": 0.2853, "step": 5655 }, { "epoch": 0.1226355816523303, "grad_norm": 3.0978829860687256, "learning_rate": 1.9266966061861466e-05, "loss": 0.2783, "step": 5660 }, { "epoch": 0.12274391697181115, "grad_norm": 2.1315951347351074, "learning_rate": 1.92656864718494e-05, "loss": 0.2672, "step": 5665 }, { "epoch": 0.12285225229129201, "grad_norm": 2.152665853500366, "learning_rate": 1.9264405808546357e-05, "loss": 0.1878, "step": 5670 }, { "epoch": 0.12296058761077286, "grad_norm": 2.0748305320739746, "learning_rate": 1.9263124072100682e-05, "loss": 0.2029, "step": 5675 }, { "epoch": 0.12306892293025372, "grad_norm": 1.7129095792770386, "learning_rate": 1.9261841262660846e-05, "loss": 0.1677, "step": 5680 }, { "epoch": 0.12317725824973458, "grad_norm": 2.059152603149414, "learning_rate": 1.926055738037544e-05, "loss": 0.2054, "step": 5685 }, { "epoch": 0.12328559356921544, "grad_norm": 1.5264393091201782, "learning_rate": 1.9259272425393187e-05, "loss": 0.2306, "step": 5690 }, { "epoch": 0.1233939288886963, "grad_norm": 1.5054854154586792, "learning_rate": 1.9257986397862925e-05, "loss": 0.264, "step": 5695 }, { "epoch": 0.12350226420817716, "grad_norm": 1.8475252389907837, "learning_rate": 1.9256699297933626e-05, "loss": 0.2044, "step": 5700 }, { "epoch": 0.123610599527658, "grad_norm": 1.7437419891357422, "learning_rate": 1.9255411125754376e-05, "loss": 0.2478, "step": 5705 }, { "epoch": 0.12371893484713886, "grad_norm": 2.5648462772369385, "learning_rate": 1.9254121881474394e-05, "loss": 0.251, "step": 5710 }, { "epoch": 0.12382727016661972, "grad_norm": 1.7777198553085327, "learning_rate": 1.925283156524302e-05, "loss": 0.3056, "step": 5715 }, { "epoch": 0.12393560548610058, "grad_norm": 2.0337514877319336, "learning_rate": 1.9251540177209714e-05, "loss": 0.183, "step": 5720 }, { "epoch": 0.12404394080558144, "grad_norm": 1.9895745515823364, "learning_rate": 1.9250247717524072e-05, "loss": 0.249, "step": 5725 }, { "epoch": 0.1241522761250623, "grad_norm": 1.9585154056549072, "learning_rate": 1.9248954186335796e-05, "loss": 0.2659, "step": 5730 }, { "epoch": 0.12426061144454315, "grad_norm": 2.4442365169525146, "learning_rate": 1.9247659583794728e-05, "loss": 0.1767, "step": 5735 }, { "epoch": 0.124368946764024, "grad_norm": 2.0987532138824463, "learning_rate": 1.9246363910050828e-05, "loss": 0.2068, "step": 5740 }, { "epoch": 0.12447728208350486, "grad_norm": 1.378199815750122, "learning_rate": 1.9245067165254176e-05, "loss": 0.3254, "step": 5745 }, { "epoch": 0.12458561740298572, "grad_norm": 2.2947988510131836, "learning_rate": 1.9243769349554985e-05, "loss": 0.2963, "step": 5750 }, { "epoch": 0.12469395272246658, "grad_norm": 2.308746099472046, "learning_rate": 1.924247046310359e-05, "loss": 0.2101, "step": 5755 }, { "epoch": 0.12480228804194743, "grad_norm": 2.0788960456848145, "learning_rate": 1.9241170506050442e-05, "loss": 0.2071, "step": 5760 }, { "epoch": 0.12491062336142829, "grad_norm": 2.0407161712646484, "learning_rate": 1.9239869478546124e-05, "loss": 0.3502, "step": 5765 }, { "epoch": 0.12501895868090915, "grad_norm": 1.6202431917190552, "learning_rate": 1.923856738074134e-05, "loss": 0.3024, "step": 5770 }, { "epoch": 0.12512729400039, "grad_norm": 2.1738009452819824, "learning_rate": 1.923726421278692e-05, "loss": 0.2052, "step": 5775 }, { "epoch": 0.12523562931987087, "grad_norm": 2.7309722900390625, "learning_rate": 1.923595997483381e-05, "loss": 0.1747, "step": 5780 }, { "epoch": 0.1253439646393517, "grad_norm": 2.2645678520202637, "learning_rate": 1.9234654667033098e-05, "loss": 0.2054, "step": 5785 }, { "epoch": 0.12545229995883259, "grad_norm": 2.126547336578369, "learning_rate": 1.9233348289535972e-05, "loss": 0.2844, "step": 5790 }, { "epoch": 0.12556063527831343, "grad_norm": 1.852167010307312, "learning_rate": 1.9232040842493766e-05, "loss": 0.2801, "step": 5795 }, { "epoch": 0.1256689705977943, "grad_norm": 1.9691920280456543, "learning_rate": 1.9230732326057923e-05, "loss": 0.1996, "step": 5800 }, { "epoch": 0.12577730591727515, "grad_norm": 1.7718167304992676, "learning_rate": 1.9229422740380016e-05, "loss": 0.2043, "step": 5805 }, { "epoch": 0.125885641236756, "grad_norm": 2.407479763031006, "learning_rate": 1.9228112085611747e-05, "loss": 0.2212, "step": 5810 }, { "epoch": 0.12599397655623687, "grad_norm": 2.3712332248687744, "learning_rate": 1.9226800361904924e-05, "loss": 0.2779, "step": 5815 }, { "epoch": 0.12610231187571772, "grad_norm": 1.9635727405548096, "learning_rate": 1.9225487569411497e-05, "loss": 0.2807, "step": 5820 }, { "epoch": 0.1262106471951986, "grad_norm": 1.4970802068710327, "learning_rate": 1.9224173708283534e-05, "loss": 0.2502, "step": 5825 }, { "epoch": 0.12631898251467943, "grad_norm": 1.6959232091903687, "learning_rate": 1.9222858778673225e-05, "loss": 0.2464, "step": 5830 }, { "epoch": 0.12642731783416028, "grad_norm": 1.5721925497055054, "learning_rate": 1.9221542780732884e-05, "loss": 0.2272, "step": 5835 }, { "epoch": 0.12653565315364115, "grad_norm": 1.8068166971206665, "learning_rate": 1.9220225714614953e-05, "loss": 0.2373, "step": 5840 }, { "epoch": 0.126643988473122, "grad_norm": 2.579350709915161, "learning_rate": 1.921890758047199e-05, "loss": 0.2752, "step": 5845 }, { "epoch": 0.12675232379260287, "grad_norm": 1.0634396076202393, "learning_rate": 1.9217588378456683e-05, "loss": 0.1985, "step": 5850 }, { "epoch": 0.12686065911208372, "grad_norm": 1.4919064044952393, "learning_rate": 1.921626810872184e-05, "loss": 0.2321, "step": 5855 }, { "epoch": 0.1269689944315646, "grad_norm": 2.1214957237243652, "learning_rate": 1.92149467714204e-05, "loss": 0.2337, "step": 5860 }, { "epoch": 0.12707732975104544, "grad_norm": 2.1259984970092773, "learning_rate": 1.9213624366705416e-05, "loss": 0.1441, "step": 5865 }, { "epoch": 0.12718566507052628, "grad_norm": 1.164818525314331, "learning_rate": 1.9212300894730066e-05, "loss": 0.2334, "step": 5870 }, { "epoch": 0.12729400039000716, "grad_norm": 2.2854578495025635, "learning_rate": 1.9210976355647658e-05, "loss": 0.2462, "step": 5875 }, { "epoch": 0.127402335709488, "grad_norm": 2.5447404384613037, "learning_rate": 1.9209650749611622e-05, "loss": 0.2315, "step": 5880 }, { "epoch": 0.12751067102896887, "grad_norm": 2.379957675933838, "learning_rate": 1.9208324076775506e-05, "loss": 0.2934, "step": 5885 }, { "epoch": 0.12761900634844972, "grad_norm": 2.2686500549316406, "learning_rate": 1.920699633729299e-05, "loss": 0.2904, "step": 5890 }, { "epoch": 0.12772734166793057, "grad_norm": 1.5292398929595947, "learning_rate": 1.9205667531317863e-05, "loss": 0.2791, "step": 5895 }, { "epoch": 0.12783567698741144, "grad_norm": 2.1824867725372314, "learning_rate": 1.9204337659004057e-05, "loss": 0.2219, "step": 5900 }, { "epoch": 0.12794401230689229, "grad_norm": 2.515199899673462, "learning_rate": 1.9203006720505614e-05, "loss": 0.2156, "step": 5905 }, { "epoch": 0.12805234762637316, "grad_norm": 1.9049646854400635, "learning_rate": 1.92016747159767e-05, "loss": 0.1911, "step": 5910 }, { "epoch": 0.128160682945854, "grad_norm": 2.5754003524780273, "learning_rate": 1.9200341645571618e-05, "loss": 0.2656, "step": 5915 }, { "epoch": 0.12826901826533488, "grad_norm": 1.985369324684143, "learning_rate": 1.919900750944477e-05, "loss": 0.2736, "step": 5920 }, { "epoch": 0.12837735358481572, "grad_norm": 2.1147751808166504, "learning_rate": 1.919767230775071e-05, "loss": 0.2612, "step": 5925 }, { "epoch": 0.12848568890429657, "grad_norm": 1.9530847072601318, "learning_rate": 1.919633604064409e-05, "loss": 0.1659, "step": 5930 }, { "epoch": 0.12859402422377744, "grad_norm": 2.2213563919067383, "learning_rate": 1.91949987082797e-05, "loss": 0.1659, "step": 5935 }, { "epoch": 0.1287023595432583, "grad_norm": 1.9354043006896973, "learning_rate": 1.9193660310812454e-05, "loss": 0.2772, "step": 5940 }, { "epoch": 0.12881069486273916, "grad_norm": 2.158846139907837, "learning_rate": 1.9192320848397382e-05, "loss": 0.2905, "step": 5945 }, { "epoch": 0.12891903018222, "grad_norm": 1.5806999206542969, "learning_rate": 1.9190980321189637e-05, "loss": 0.2335, "step": 5950 }, { "epoch": 0.12902736550170085, "grad_norm": 2.4584362506866455, "learning_rate": 1.9189638729344506e-05, "loss": 0.3599, "step": 5955 }, { "epoch": 0.12913570082118173, "grad_norm": 1.8795117139816284, "learning_rate": 1.918829607301739e-05, "loss": 0.272, "step": 5960 }, { "epoch": 0.12924403614066257, "grad_norm": 1.9970215559005737, "learning_rate": 1.918695235236381e-05, "loss": 0.2453, "step": 5965 }, { "epoch": 0.12935237146014344, "grad_norm": 3.0897979736328125, "learning_rate": 1.9185607567539424e-05, "loss": 0.2544, "step": 5970 }, { "epoch": 0.1294607067796243, "grad_norm": 2.2669663429260254, "learning_rate": 1.91842617187e-05, "loss": 0.178, "step": 5975 }, { "epoch": 0.12956904209910516, "grad_norm": 2.131404161453247, "learning_rate": 1.918291480600144e-05, "loss": 0.2528, "step": 5980 }, { "epoch": 0.129677377418586, "grad_norm": 1.8619890213012695, "learning_rate": 1.9181566829599755e-05, "loss": 0.2013, "step": 5985 }, { "epoch": 0.12978571273806686, "grad_norm": 2.004185914993286, "learning_rate": 1.9180217789651093e-05, "loss": 0.2354, "step": 5990 }, { "epoch": 0.12989404805754773, "grad_norm": 2.4280598163604736, "learning_rate": 1.917886768631172e-05, "loss": 0.2497, "step": 5995 }, { "epoch": 0.13000238337702857, "grad_norm": 1.9758033752441406, "learning_rate": 1.9177516519738025e-05, "loss": 0.1993, "step": 6000 }, { "epoch": 0.13011071869650945, "grad_norm": 3.045149326324463, "learning_rate": 1.9176164290086524e-05, "loss": 0.2386, "step": 6005 }, { "epoch": 0.1302190540159903, "grad_norm": 2.3786587715148926, "learning_rate": 1.9174810997513845e-05, "loss": 0.2558, "step": 6010 }, { "epoch": 0.13032738933547114, "grad_norm": 1.1072437763214111, "learning_rate": 1.9173456642176753e-05, "loss": 0.1713, "step": 6015 }, { "epoch": 0.130435724654952, "grad_norm": 1.7824573516845703, "learning_rate": 1.9172101224232123e-05, "loss": 0.156, "step": 6020 }, { "epoch": 0.13054405997443286, "grad_norm": 1.8626525402069092, "learning_rate": 1.9170744743836968e-05, "loss": 0.2575, "step": 6025 }, { "epoch": 0.13065239529391373, "grad_norm": 1.5402023792266846, "learning_rate": 1.916938720114841e-05, "loss": 0.2597, "step": 6030 }, { "epoch": 0.13076073061339458, "grad_norm": 1.818926215171814, "learning_rate": 1.9168028596323707e-05, "loss": 0.2513, "step": 6035 }, { "epoch": 0.13086906593287542, "grad_norm": 1.9038466215133667, "learning_rate": 1.9166668929520224e-05, "loss": 0.251, "step": 6040 }, { "epoch": 0.1309774012523563, "grad_norm": 1.8458222150802612, "learning_rate": 1.9165308200895466e-05, "loss": 0.196, "step": 6045 }, { "epoch": 0.13108573657183714, "grad_norm": 2.4295198917388916, "learning_rate": 1.9163946410607045e-05, "loss": 0.241, "step": 6050 }, { "epoch": 0.13119407189131801, "grad_norm": 2.732072114944458, "learning_rate": 1.916258355881271e-05, "loss": 0.2551, "step": 6055 }, { "epoch": 0.13130240721079886, "grad_norm": 1.2279810905456543, "learning_rate": 1.9161219645670326e-05, "loss": 0.1985, "step": 6060 }, { "epoch": 0.13141074253027973, "grad_norm": 1.3953328132629395, "learning_rate": 1.9159854671337877e-05, "loss": 0.1534, "step": 6065 }, { "epoch": 0.13151907784976058, "grad_norm": 1.7711433172225952, "learning_rate": 1.9158488635973486e-05, "loss": 0.2835, "step": 6070 }, { "epoch": 0.13162741316924143, "grad_norm": 1.2446315288543701, "learning_rate": 1.915712153973537e-05, "loss": 0.2387, "step": 6075 }, { "epoch": 0.1317357484887223, "grad_norm": 1.6395525932312012, "learning_rate": 1.915575338278191e-05, "loss": 0.2779, "step": 6080 }, { "epoch": 0.13184408380820314, "grad_norm": 1.2614686489105225, "learning_rate": 1.9154384165271565e-05, "loss": 0.168, "step": 6085 }, { "epoch": 0.13195241912768402, "grad_norm": 2.2730278968811035, "learning_rate": 1.915301388736295e-05, "loss": 0.2015, "step": 6090 }, { "epoch": 0.13206075444716486, "grad_norm": 1.2318071126937866, "learning_rate": 1.9151642549214787e-05, "loss": 0.2675, "step": 6095 }, { "epoch": 0.1321690897666457, "grad_norm": 2.128174304962158, "learning_rate": 1.915027015098593e-05, "loss": 0.1565, "step": 6100 }, { "epoch": 0.13227742508612658, "grad_norm": 2.007162570953369, "learning_rate": 1.9148896692835344e-05, "loss": 0.2611, "step": 6105 }, { "epoch": 0.13238576040560743, "grad_norm": 1.9797041416168213, "learning_rate": 1.9147522174922125e-05, "loss": 0.2554, "step": 6110 }, { "epoch": 0.1324940957250883, "grad_norm": 1.5691521167755127, "learning_rate": 1.9146146597405496e-05, "loss": 0.2125, "step": 6115 }, { "epoch": 0.13260243104456915, "grad_norm": 2.2827794551849365, "learning_rate": 1.9144769960444793e-05, "loss": 0.1769, "step": 6120 }, { "epoch": 0.13271076636405002, "grad_norm": 2.165787696838379, "learning_rate": 1.9143392264199475e-05, "loss": 0.259, "step": 6125 }, { "epoch": 0.13281910168353087, "grad_norm": 2.637681484222412, "learning_rate": 1.914201350882913e-05, "loss": 0.186, "step": 6130 }, { "epoch": 0.1329274370030117, "grad_norm": 1.652620553970337, "learning_rate": 1.9140633694493472e-05, "loss": 0.2155, "step": 6135 }, { "epoch": 0.13303577232249258, "grad_norm": 1.6098228693008423, "learning_rate": 1.9139252821352325e-05, "loss": 0.2839, "step": 6140 }, { "epoch": 0.13314410764197343, "grad_norm": 1.5553045272827148, "learning_rate": 1.9137870889565646e-05, "loss": 0.2386, "step": 6145 }, { "epoch": 0.1332524429614543, "grad_norm": 2.5384984016418457, "learning_rate": 1.9136487899293508e-05, "loss": 0.2147, "step": 6150 }, { "epoch": 0.13336077828093515, "grad_norm": 1.989781379699707, "learning_rate": 1.913510385069611e-05, "loss": 0.2623, "step": 6155 }, { "epoch": 0.133469113600416, "grad_norm": 1.5406235456466675, "learning_rate": 1.913371874393378e-05, "loss": 0.229, "step": 6160 }, { "epoch": 0.13357744891989687, "grad_norm": 1.4445576667785645, "learning_rate": 1.9132332579166954e-05, "loss": 0.1823, "step": 6165 }, { "epoch": 0.13368578423937771, "grad_norm": 2.051750898361206, "learning_rate": 1.9130945356556198e-05, "loss": 0.3053, "step": 6170 }, { "epoch": 0.1337941195588586, "grad_norm": 2.0833096504211426, "learning_rate": 1.9129557076262208e-05, "loss": 0.2444, "step": 6175 }, { "epoch": 0.13390245487833943, "grad_norm": 1.8162593841552734, "learning_rate": 1.912816773844579e-05, "loss": 0.19, "step": 6180 }, { "epoch": 0.1340107901978203, "grad_norm": 2.0521721839904785, "learning_rate": 1.912677734326788e-05, "loss": 0.2425, "step": 6185 }, { "epoch": 0.13411912551730115, "grad_norm": 2.3316092491149902, "learning_rate": 1.9125385890889536e-05, "loss": 0.2659, "step": 6190 }, { "epoch": 0.134227460836782, "grad_norm": 1.8926292657852173, "learning_rate": 1.912399338147193e-05, "loss": 0.2352, "step": 6195 }, { "epoch": 0.13433579615626287, "grad_norm": 1.7374072074890137, "learning_rate": 1.912259981517637e-05, "loss": 0.2368, "step": 6200 }, { "epoch": 0.13444413147574372, "grad_norm": 1.898661732673645, "learning_rate": 1.9121205192164277e-05, "loss": 0.2088, "step": 6205 }, { "epoch": 0.1345524667952246, "grad_norm": 2.3010478019714355, "learning_rate": 1.9119809512597198e-05, "loss": 0.2727, "step": 6210 }, { "epoch": 0.13466080211470544, "grad_norm": 1.4714999198913574, "learning_rate": 1.9118412776636804e-05, "loss": 0.1273, "step": 6215 }, { "epoch": 0.13476913743418628, "grad_norm": 1.8241432905197144, "learning_rate": 1.9117014984444885e-05, "loss": 0.1567, "step": 6220 }, { "epoch": 0.13487747275366715, "grad_norm": 1.4623886346817017, "learning_rate": 1.911561613618335e-05, "loss": 0.2348, "step": 6225 }, { "epoch": 0.134985808073148, "grad_norm": 1.9154495000839233, "learning_rate": 1.9114216232014238e-05, "loss": 0.2417, "step": 6230 }, { "epoch": 0.13509414339262887, "grad_norm": 1.7473241090774536, "learning_rate": 1.9112815272099707e-05, "loss": 0.2591, "step": 6235 }, { "epoch": 0.13520247871210972, "grad_norm": 2.5144453048706055, "learning_rate": 1.9111413256602035e-05, "loss": 0.205, "step": 6240 }, { "epoch": 0.1353108140315906, "grad_norm": 2.866926670074463, "learning_rate": 1.9110010185683628e-05, "loss": 0.2317, "step": 6245 }, { "epoch": 0.13541914935107144, "grad_norm": 2.3263070583343506, "learning_rate": 1.9108606059507007e-05, "loss": 0.2158, "step": 6250 }, { "epoch": 0.13552748467055228, "grad_norm": 2.232409954071045, "learning_rate": 1.9107200878234824e-05, "loss": 0.1837, "step": 6255 }, { "epoch": 0.13563581999003316, "grad_norm": 1.5938193798065186, "learning_rate": 1.9105794642029845e-05, "loss": 0.1924, "step": 6260 }, { "epoch": 0.135744155309514, "grad_norm": 1.8094645738601685, "learning_rate": 1.9104387351054957e-05, "loss": 0.2153, "step": 6265 }, { "epoch": 0.13585249062899488, "grad_norm": 2.377709150314331, "learning_rate": 1.9102979005473184e-05, "loss": 0.2866, "step": 6270 }, { "epoch": 0.13596082594847572, "grad_norm": 2.4387714862823486, "learning_rate": 1.9101569605447653e-05, "loss": 0.2602, "step": 6275 }, { "epoch": 0.13606916126795657, "grad_norm": 2.209859848022461, "learning_rate": 1.9100159151141623e-05, "loss": 0.336, "step": 6280 }, { "epoch": 0.13617749658743744, "grad_norm": 2.621647834777832, "learning_rate": 1.909874764271848e-05, "loss": 0.1521, "step": 6285 }, { "epoch": 0.1362858319069183, "grad_norm": 2.0416970252990723, "learning_rate": 1.909733508034172e-05, "loss": 0.2857, "step": 6290 }, { "epoch": 0.13639416722639916, "grad_norm": 1.9046870470046997, "learning_rate": 1.9095921464174965e-05, "loss": 0.3685, "step": 6295 }, { "epoch": 0.13650250254588, "grad_norm": 2.0454211235046387, "learning_rate": 1.909450679438197e-05, "loss": 0.245, "step": 6300 }, { "epoch": 0.13661083786536085, "grad_norm": 2.2539222240448, "learning_rate": 1.9093091071126597e-05, "loss": 0.3078, "step": 6305 }, { "epoch": 0.13671917318484172, "grad_norm": 2.1958065032958984, "learning_rate": 1.909167429457284e-05, "loss": 0.2006, "step": 6310 }, { "epoch": 0.13682750850432257, "grad_norm": 2.0573461055755615, "learning_rate": 1.9090256464884806e-05, "loss": 0.2551, "step": 6315 }, { "epoch": 0.13693584382380344, "grad_norm": 1.9745163917541504, "learning_rate": 1.908883758222673e-05, "loss": 0.3294, "step": 6320 }, { "epoch": 0.1370441791432843, "grad_norm": 1.8476139307022095, "learning_rate": 1.9087417646762977e-05, "loss": 0.2292, "step": 6325 }, { "epoch": 0.13715251446276516, "grad_norm": 1.6464594602584839, "learning_rate": 1.9085996658658016e-05, "loss": 0.3694, "step": 6330 }, { "epoch": 0.137260849782246, "grad_norm": 2.213870048522949, "learning_rate": 1.908457461807645e-05, "loss": 0.3092, "step": 6335 }, { "epoch": 0.13736918510172685, "grad_norm": 1.9181833267211914, "learning_rate": 1.9083151525183002e-05, "loss": 0.2853, "step": 6340 }, { "epoch": 0.13747752042120773, "grad_norm": 1.3631584644317627, "learning_rate": 1.908172738014252e-05, "loss": 0.2539, "step": 6345 }, { "epoch": 0.13758585574068857, "grad_norm": 2.0648627281188965, "learning_rate": 1.9080302183119958e-05, "loss": 0.3266, "step": 6350 }, { "epoch": 0.13769419106016945, "grad_norm": 1.6812644004821777, "learning_rate": 1.9078875934280417e-05, "loss": 0.2053, "step": 6355 }, { "epoch": 0.1378025263796503, "grad_norm": 1.9811606407165527, "learning_rate": 1.90774486337891e-05, "loss": 0.2688, "step": 6360 }, { "epoch": 0.13791086169913114, "grad_norm": 1.7803359031677246, "learning_rate": 1.9076020281811334e-05, "loss": 0.2318, "step": 6365 }, { "epoch": 0.138019197018612, "grad_norm": 2.3274104595184326, "learning_rate": 1.907459087851258e-05, "loss": 0.2458, "step": 6370 }, { "epoch": 0.13812753233809286, "grad_norm": 1.3331897258758545, "learning_rate": 1.907316042405841e-05, "loss": 0.2351, "step": 6375 }, { "epoch": 0.13823586765757373, "grad_norm": 1.8676708936691284, "learning_rate": 1.907172891861452e-05, "loss": 0.2537, "step": 6380 }, { "epoch": 0.13834420297705458, "grad_norm": 1.623095989227295, "learning_rate": 1.907029636234673e-05, "loss": 0.1701, "step": 6385 }, { "epoch": 0.13845253829653545, "grad_norm": 2.1545965671539307, "learning_rate": 1.906886275542098e-05, "loss": 0.2652, "step": 6390 }, { "epoch": 0.1385608736160163, "grad_norm": 1.8293204307556152, "learning_rate": 1.906742809800333e-05, "loss": 0.1205, "step": 6395 }, { "epoch": 0.13866920893549714, "grad_norm": 2.909670114517212, "learning_rate": 1.906599239025997e-05, "loss": 0.3001, "step": 6400 }, { "epoch": 0.138777544254978, "grad_norm": 1.5498813390731812, "learning_rate": 1.9064555632357194e-05, "loss": 0.2431, "step": 6405 }, { "epoch": 0.13888587957445886, "grad_norm": 1.8792990446090698, "learning_rate": 1.9063117824461437e-05, "loss": 0.1837, "step": 6410 }, { "epoch": 0.13899421489393973, "grad_norm": 2.1655900478363037, "learning_rate": 1.9061678966739246e-05, "loss": 0.2853, "step": 6415 }, { "epoch": 0.13910255021342058, "grad_norm": 1.96344792842865, "learning_rate": 1.9060239059357288e-05, "loss": 0.2221, "step": 6420 }, { "epoch": 0.13921088553290142, "grad_norm": 1.7099121809005737, "learning_rate": 1.905879810248236e-05, "loss": 0.2337, "step": 6425 }, { "epoch": 0.1393192208523823, "grad_norm": 2.3802273273468018, "learning_rate": 1.9057356096281372e-05, "loss": 0.2069, "step": 6430 }, { "epoch": 0.13942755617186314, "grad_norm": 1.9593852758407593, "learning_rate": 1.9055913040921358e-05, "loss": 0.2637, "step": 6435 }, { "epoch": 0.13953589149134402, "grad_norm": 1.6254006624221802, "learning_rate": 1.9054468936569477e-05, "loss": 0.2035, "step": 6440 }, { "epoch": 0.13964422681082486, "grad_norm": 2.180650234222412, "learning_rate": 1.905302378339301e-05, "loss": 0.1828, "step": 6445 }, { "epoch": 0.13975256213030574, "grad_norm": 1.5127300024032593, "learning_rate": 1.9051577581559346e-05, "loss": 0.2674, "step": 6450 }, { "epoch": 0.13986089744978658, "grad_norm": 2.344956874847412, "learning_rate": 1.9050130331236013e-05, "loss": 0.1974, "step": 6455 }, { "epoch": 0.13996923276926743, "grad_norm": 1.5283770561218262, "learning_rate": 1.9048682032590653e-05, "loss": 0.3239, "step": 6460 }, { "epoch": 0.1400775680887483, "grad_norm": 2.184600591659546, "learning_rate": 1.9047232685791027e-05, "loss": 0.1878, "step": 6465 }, { "epoch": 0.14018590340822915, "grad_norm": 2.418858766555786, "learning_rate": 1.9045782291005027e-05, "loss": 0.2396, "step": 6470 }, { "epoch": 0.14029423872771002, "grad_norm": 1.8018673658370972, "learning_rate": 1.904433084840065e-05, "loss": 0.1851, "step": 6475 }, { "epoch": 0.14040257404719086, "grad_norm": 1.7141180038452148, "learning_rate": 1.904287835814603e-05, "loss": 0.1687, "step": 6480 }, { "epoch": 0.1405109093666717, "grad_norm": 1.2463717460632324, "learning_rate": 1.9041424820409414e-05, "loss": 0.2398, "step": 6485 }, { "epoch": 0.14061924468615258, "grad_norm": 2.0712032318115234, "learning_rate": 1.9039970235359172e-05, "loss": 0.3011, "step": 6490 }, { "epoch": 0.14072758000563343, "grad_norm": 2.0978267192840576, "learning_rate": 1.90385146031638e-05, "loss": 0.2487, "step": 6495 }, { "epoch": 0.1408359153251143, "grad_norm": 2.0609049797058105, "learning_rate": 1.90370579239919e-05, "loss": 0.2755, "step": 6500 }, { "epoch": 0.14094425064459515, "grad_norm": 1.4568543434143066, "learning_rate": 1.903560019801222e-05, "loss": 0.2038, "step": 6505 }, { "epoch": 0.14105258596407602, "grad_norm": 2.730052947998047, "learning_rate": 1.9034141425393614e-05, "loss": 0.2119, "step": 6510 }, { "epoch": 0.14116092128355687, "grad_norm": 1.356967568397522, "learning_rate": 1.903268160630505e-05, "loss": 0.2761, "step": 6515 }, { "epoch": 0.1412692566030377, "grad_norm": 1.4277021884918213, "learning_rate": 1.9031220740915636e-05, "loss": 0.1769, "step": 6520 }, { "epoch": 0.1413775919225186, "grad_norm": 1.5722142457962036, "learning_rate": 1.902975882939458e-05, "loss": 0.1948, "step": 6525 }, { "epoch": 0.14148592724199943, "grad_norm": 1.921502947807312, "learning_rate": 1.902829587191124e-05, "loss": 0.3294, "step": 6530 }, { "epoch": 0.1415942625614803, "grad_norm": 1.5140877962112427, "learning_rate": 1.902683186863506e-05, "loss": 0.2552, "step": 6535 }, { "epoch": 0.14170259788096115, "grad_norm": 1.8969886302947998, "learning_rate": 1.902536681973563e-05, "loss": 0.2527, "step": 6540 }, { "epoch": 0.141810933200442, "grad_norm": 2.509241819381714, "learning_rate": 1.9023900725382654e-05, "loss": 0.3054, "step": 6545 }, { "epoch": 0.14191926851992287, "grad_norm": 1.9637895822525024, "learning_rate": 1.902243358574596e-05, "loss": 0.2447, "step": 6550 }, { "epoch": 0.14202760383940372, "grad_norm": 2.0612597465515137, "learning_rate": 1.9020965400995484e-05, "loss": 0.2754, "step": 6555 }, { "epoch": 0.1421359391588846, "grad_norm": 2.372276782989502, "learning_rate": 1.901949617130131e-05, "loss": 0.2051, "step": 6560 }, { "epoch": 0.14224427447836543, "grad_norm": 2.2435500621795654, "learning_rate": 1.901802589683361e-05, "loss": 0.2316, "step": 6565 }, { "epoch": 0.14235260979784628, "grad_norm": 2.445258140563965, "learning_rate": 1.9016554577762702e-05, "loss": 0.1919, "step": 6570 }, { "epoch": 0.14246094511732715, "grad_norm": 2.1797351837158203, "learning_rate": 1.9015082214259015e-05, "loss": 0.3238, "step": 6575 }, { "epoch": 0.142569280436808, "grad_norm": 1.5350013971328735, "learning_rate": 1.90136088064931e-05, "loss": 0.3132, "step": 6580 }, { "epoch": 0.14267761575628887, "grad_norm": 2.3635504245758057, "learning_rate": 1.9012134354635626e-05, "loss": 0.2198, "step": 6585 }, { "epoch": 0.14278595107576972, "grad_norm": 2.203130006790161, "learning_rate": 1.9010658858857387e-05, "loss": 0.1868, "step": 6590 }, { "epoch": 0.1428942863952506, "grad_norm": 1.4693704843521118, "learning_rate": 1.9009182319329302e-05, "loss": 0.2407, "step": 6595 }, { "epoch": 0.14300262171473144, "grad_norm": 1.7431607246398926, "learning_rate": 1.90077047362224e-05, "loss": 0.245, "step": 6600 }, { "epoch": 0.14311095703421228, "grad_norm": 1.792871356010437, "learning_rate": 1.9006226109707845e-05, "loss": 0.202, "step": 6605 }, { "epoch": 0.14321929235369316, "grad_norm": 1.2719570398330688, "learning_rate": 1.9004746439956906e-05, "loss": 0.3158, "step": 6610 }, { "epoch": 0.143327627673174, "grad_norm": 2.4135382175445557, "learning_rate": 1.9003265727140982e-05, "loss": 0.1846, "step": 6615 }, { "epoch": 0.14343596299265488, "grad_norm": 2.5675456523895264, "learning_rate": 1.9001783971431592e-05, "loss": 0.1582, "step": 6620 }, { "epoch": 0.14354429831213572, "grad_norm": 2.156721591949463, "learning_rate": 1.900030117300038e-05, "loss": 0.2614, "step": 6625 }, { "epoch": 0.14365263363161657, "grad_norm": 1.5008848905563354, "learning_rate": 1.89988173320191e-05, "loss": 0.2664, "step": 6630 }, { "epoch": 0.14376096895109744, "grad_norm": 2.230438470840454, "learning_rate": 1.899733244865963e-05, "loss": 0.3677, "step": 6635 }, { "epoch": 0.14386930427057829, "grad_norm": 1.8636854887008667, "learning_rate": 1.8995846523093976e-05, "loss": 0.2296, "step": 6640 }, { "epoch": 0.14397763959005916, "grad_norm": 1.74302339553833, "learning_rate": 1.8994359555494263e-05, "loss": 0.2981, "step": 6645 }, { "epoch": 0.14408597490954, "grad_norm": 1.5902702808380127, "learning_rate": 1.899287154603273e-05, "loss": 0.1833, "step": 6650 }, { "epoch": 0.14419431022902088, "grad_norm": 3.227970600128174, "learning_rate": 1.8991382494881744e-05, "loss": 0.2733, "step": 6655 }, { "epoch": 0.14430264554850172, "grad_norm": 1.6212246417999268, "learning_rate": 1.8989892402213785e-05, "loss": 0.1736, "step": 6660 }, { "epoch": 0.14441098086798257, "grad_norm": 2.700592517852783, "learning_rate": 1.898840126820146e-05, "loss": 0.2812, "step": 6665 }, { "epoch": 0.14451931618746344, "grad_norm": 1.9179885387420654, "learning_rate": 1.8986909093017493e-05, "loss": 0.1957, "step": 6670 }, { "epoch": 0.1446276515069443, "grad_norm": 2.155524969100952, "learning_rate": 1.8985415876834735e-05, "loss": 0.2353, "step": 6675 }, { "epoch": 0.14473598682642516, "grad_norm": 2.0280251502990723, "learning_rate": 1.8983921619826144e-05, "loss": 0.2784, "step": 6680 }, { "epoch": 0.144844322145906, "grad_norm": 1.5020653009414673, "learning_rate": 1.898242632216482e-05, "loss": 0.3031, "step": 6685 }, { "epoch": 0.14495265746538685, "grad_norm": 1.3113372325897217, "learning_rate": 1.8980929984023958e-05, "loss": 0.1554, "step": 6690 }, { "epoch": 0.14506099278486773, "grad_norm": 2.2569148540496826, "learning_rate": 1.897943260557689e-05, "loss": 0.3658, "step": 6695 }, { "epoch": 0.14516932810434857, "grad_norm": 1.8125605583190918, "learning_rate": 1.8977934186997068e-05, "loss": 0.193, "step": 6700 }, { "epoch": 0.14527766342382945, "grad_norm": 2.9291927814483643, "learning_rate": 1.8976434728458062e-05, "loss": 0.3211, "step": 6705 }, { "epoch": 0.1453859987433103, "grad_norm": 1.9100985527038574, "learning_rate": 1.8974934230133556e-05, "loss": 0.2934, "step": 6710 }, { "epoch": 0.14549433406279116, "grad_norm": 1.3302006721496582, "learning_rate": 1.8973432692197365e-05, "loss": 0.2766, "step": 6715 }, { "epoch": 0.145602669382272, "grad_norm": 1.343506932258606, "learning_rate": 1.8971930114823423e-05, "loss": 0.2811, "step": 6720 }, { "epoch": 0.14571100470175286, "grad_norm": 1.9257136583328247, "learning_rate": 1.8970426498185774e-05, "loss": 0.2341, "step": 6725 }, { "epoch": 0.14581934002123373, "grad_norm": 1.7961543798446655, "learning_rate": 1.896892184245859e-05, "loss": 0.2284, "step": 6730 }, { "epoch": 0.14592767534071457, "grad_norm": 2.573248863220215, "learning_rate": 1.8967416147816168e-05, "loss": 0.2376, "step": 6735 }, { "epoch": 0.14603601066019545, "grad_norm": 1.8679293394088745, "learning_rate": 1.8965909414432918e-05, "loss": 0.233, "step": 6740 }, { "epoch": 0.1461443459796763, "grad_norm": 2.075688362121582, "learning_rate": 1.8964401642483368e-05, "loss": 0.2021, "step": 6745 }, { "epoch": 0.14625268129915714, "grad_norm": 1.6224440336227417, "learning_rate": 1.8962892832142175e-05, "loss": 0.3232, "step": 6750 }, { "epoch": 0.146361016618638, "grad_norm": 2.1507582664489746, "learning_rate": 1.8961382983584113e-05, "loss": 0.2578, "step": 6755 }, { "epoch": 0.14646935193811886, "grad_norm": 1.6358908414840698, "learning_rate": 1.8959872096984074e-05, "loss": 0.2292, "step": 6760 }, { "epoch": 0.14657768725759973, "grad_norm": 2.196354627609253, "learning_rate": 1.895836017251707e-05, "loss": 0.2122, "step": 6765 }, { "epoch": 0.14668602257708058, "grad_norm": 2.212364912033081, "learning_rate": 1.8956847210358237e-05, "loss": 0.2665, "step": 6770 }, { "epoch": 0.14679435789656145, "grad_norm": 1.5492684841156006, "learning_rate": 1.895533321068283e-05, "loss": 0.2088, "step": 6775 }, { "epoch": 0.1469026932160423, "grad_norm": 1.854406714439392, "learning_rate": 1.8953818173666217e-05, "loss": 0.2684, "step": 6780 }, { "epoch": 0.14701102853552314, "grad_norm": 1.9901548624038696, "learning_rate": 1.89523020994839e-05, "loss": 0.2328, "step": 6785 }, { "epoch": 0.14711936385500402, "grad_norm": 1.7219979763031006, "learning_rate": 1.895078498831149e-05, "loss": 0.2467, "step": 6790 }, { "epoch": 0.14722769917448486, "grad_norm": 1.9309531450271606, "learning_rate": 1.8949266840324723e-05, "loss": 0.1733, "step": 6795 }, { "epoch": 0.14733603449396573, "grad_norm": 1.1257708072662354, "learning_rate": 1.894774765569945e-05, "loss": 0.1926, "step": 6800 }, { "epoch": 0.14744436981344658, "grad_norm": 1.576271414756775, "learning_rate": 1.8946227434611652e-05, "loss": 0.2359, "step": 6805 }, { "epoch": 0.14755270513292743, "grad_norm": 2.377138376235962, "learning_rate": 1.8944706177237417e-05, "loss": 0.2763, "step": 6810 }, { "epoch": 0.1476610404524083, "grad_norm": 3.5043842792510986, "learning_rate": 1.8943183883752963e-05, "loss": 0.2499, "step": 6815 }, { "epoch": 0.14776937577188914, "grad_norm": 2.0083353519439697, "learning_rate": 1.8941660554334626e-05, "loss": 0.287, "step": 6820 }, { "epoch": 0.14787771109137002, "grad_norm": 2.5620410442352295, "learning_rate": 1.894013618915886e-05, "loss": 0.2129, "step": 6825 }, { "epoch": 0.14798604641085086, "grad_norm": 2.580990791320801, "learning_rate": 1.893861078840224e-05, "loss": 0.3937, "step": 6830 }, { "epoch": 0.1480943817303317, "grad_norm": 1.9951525926589966, "learning_rate": 1.8937084352241458e-05, "loss": 0.2471, "step": 6835 }, { "epoch": 0.14820271704981258, "grad_norm": 1.7964844703674316, "learning_rate": 1.8935556880853335e-05, "loss": 0.2324, "step": 6840 }, { "epoch": 0.14831105236929343, "grad_norm": 2.0680174827575684, "learning_rate": 1.89340283744148e-05, "loss": 0.2541, "step": 6845 }, { "epoch": 0.1484193876887743, "grad_norm": 1.9189774990081787, "learning_rate": 1.8932498833102908e-05, "loss": 0.2052, "step": 6850 }, { "epoch": 0.14852772300825515, "grad_norm": 1.705143928527832, "learning_rate": 1.8930968257094836e-05, "loss": 0.2168, "step": 6855 }, { "epoch": 0.14863605832773602, "grad_norm": 1.9686213731765747, "learning_rate": 1.892943664656788e-05, "loss": 0.3137, "step": 6860 }, { "epoch": 0.14874439364721687, "grad_norm": 2.0319631099700928, "learning_rate": 1.892790400169945e-05, "loss": 0.3161, "step": 6865 }, { "epoch": 0.1488527289666977, "grad_norm": 2.280566692352295, "learning_rate": 1.892637032266708e-05, "loss": 0.1751, "step": 6870 }, { "epoch": 0.14896106428617858, "grad_norm": 1.9003353118896484, "learning_rate": 1.8924835609648428e-05, "loss": 0.2153, "step": 6875 }, { "epoch": 0.14906939960565943, "grad_norm": 1.874180793762207, "learning_rate": 1.892329986282126e-05, "loss": 0.3618, "step": 6880 }, { "epoch": 0.1491777349251403, "grad_norm": 2.0633621215820312, "learning_rate": 1.892176308236348e-05, "loss": 0.1774, "step": 6885 }, { "epoch": 0.14928607024462115, "grad_norm": 2.043466567993164, "learning_rate": 1.892022526845309e-05, "loss": 0.1661, "step": 6890 }, { "epoch": 0.149394405564102, "grad_norm": 1.9737950563430786, "learning_rate": 1.891868642126823e-05, "loss": 0.2398, "step": 6895 }, { "epoch": 0.14950274088358287, "grad_norm": 2.4094343185424805, "learning_rate": 1.8917146540987148e-05, "loss": 0.2829, "step": 6900 }, { "epoch": 0.14961107620306371, "grad_norm": 1.953717827796936, "learning_rate": 1.891560562778822e-05, "loss": 0.325, "step": 6905 }, { "epoch": 0.1497194115225446, "grad_norm": 1.772063136100769, "learning_rate": 1.8914063681849933e-05, "loss": 0.2744, "step": 6910 }, { "epoch": 0.14982774684202543, "grad_norm": 2.368319272994995, "learning_rate": 1.8912520703350905e-05, "loss": 0.2339, "step": 6915 }, { "epoch": 0.1499360821615063, "grad_norm": 2.4118688106536865, "learning_rate": 1.891097669246986e-05, "loss": 0.3513, "step": 6920 }, { "epoch": 0.15004441748098715, "grad_norm": 2.0398504734039307, "learning_rate": 1.8909431649385653e-05, "loss": 0.2618, "step": 6925 }, { "epoch": 0.150152752800468, "grad_norm": 1.6074535846710205, "learning_rate": 1.890788557427725e-05, "loss": 0.2292, "step": 6930 }, { "epoch": 0.15026108811994887, "grad_norm": 1.6350433826446533, "learning_rate": 1.8906338467323747e-05, "loss": 0.2668, "step": 6935 }, { "epoch": 0.15036942343942972, "grad_norm": 1.2958611249923706, "learning_rate": 1.890479032870435e-05, "loss": 0.2008, "step": 6940 }, { "epoch": 0.1504777587589106, "grad_norm": 1.715288519859314, "learning_rate": 1.8903241158598386e-05, "loss": 0.2736, "step": 6945 }, { "epoch": 0.15058609407839144, "grad_norm": 2.243346929550171, "learning_rate": 1.8901690957185304e-05, "loss": 0.1793, "step": 6950 }, { "epoch": 0.15069442939787228, "grad_norm": 1.9539793729782104, "learning_rate": 1.890013972464467e-05, "loss": 0.2242, "step": 6955 }, { "epoch": 0.15080276471735315, "grad_norm": 1.5324827432632446, "learning_rate": 1.8898587461156175e-05, "loss": 0.2407, "step": 6960 }, { "epoch": 0.150911100036834, "grad_norm": 1.7505486011505127, "learning_rate": 1.8897034166899625e-05, "loss": 0.2118, "step": 6965 }, { "epoch": 0.15101943535631487, "grad_norm": 1.7223596572875977, "learning_rate": 1.889547984205494e-05, "loss": 0.2933, "step": 6970 }, { "epoch": 0.15112777067579572, "grad_norm": 2.029836654663086, "learning_rate": 1.8893924486802172e-05, "loss": 0.197, "step": 6975 }, { "epoch": 0.1512361059952766, "grad_norm": 2.005953073501587, "learning_rate": 1.8892368101321486e-05, "loss": 0.2595, "step": 6980 }, { "epoch": 0.15134444131475744, "grad_norm": 2.012397050857544, "learning_rate": 1.8890810685793158e-05, "loss": 0.2067, "step": 6985 }, { "epoch": 0.15145277663423828, "grad_norm": 1.9213310480117798, "learning_rate": 1.88892522403976e-05, "loss": 0.2003, "step": 6990 }, { "epoch": 0.15156111195371916, "grad_norm": 1.4572951793670654, "learning_rate": 1.888769276531533e-05, "loss": 0.2328, "step": 6995 }, { "epoch": 0.1516694472732, "grad_norm": 1.861324429512024, "learning_rate": 1.888613226072699e-05, "loss": 0.3237, "step": 7000 }, { "epoch": 0.15177778259268088, "grad_norm": 2.042560338973999, "learning_rate": 1.8884570726813344e-05, "loss": 0.2553, "step": 7005 }, { "epoch": 0.15188611791216172, "grad_norm": 2.261017084121704, "learning_rate": 1.8883008163755264e-05, "loss": 0.2846, "step": 7010 }, { "epoch": 0.15199445323164257, "grad_norm": 2.177990436553955, "learning_rate": 1.8881444571733764e-05, "loss": 0.2328, "step": 7015 }, { "epoch": 0.15210278855112344, "grad_norm": 1.501004695892334, "learning_rate": 1.887987995092995e-05, "loss": 0.1743, "step": 7020 }, { "epoch": 0.1522111238706043, "grad_norm": 1.3992029428482056, "learning_rate": 1.8878314301525064e-05, "loss": 0.2984, "step": 7025 }, { "epoch": 0.15231945919008516, "grad_norm": 1.4134145975112915, "learning_rate": 1.8876747623700463e-05, "loss": 0.1878, "step": 7030 }, { "epoch": 0.152427794509566, "grad_norm": 1.4173469543457031, "learning_rate": 1.8875179917637627e-05, "loss": 0.2144, "step": 7035 }, { "epoch": 0.15253612982904688, "grad_norm": 1.6234710216522217, "learning_rate": 1.8873611183518143e-05, "loss": 0.2873, "step": 7040 }, { "epoch": 0.15264446514852772, "grad_norm": 1.4763742685317993, "learning_rate": 1.887204142152373e-05, "loss": 0.2449, "step": 7045 }, { "epoch": 0.15275280046800857, "grad_norm": 1.7328417301177979, "learning_rate": 1.8870470631836227e-05, "loss": 0.2129, "step": 7050 }, { "epoch": 0.15286113578748944, "grad_norm": 2.3772950172424316, "learning_rate": 1.886889881463758e-05, "loss": 0.199, "step": 7055 }, { "epoch": 0.1529694711069703, "grad_norm": 2.228024482727051, "learning_rate": 1.8867325970109857e-05, "loss": 0.2961, "step": 7060 }, { "epoch": 0.15307780642645116, "grad_norm": 1.5735865831375122, "learning_rate": 1.8865752098435254e-05, "loss": 0.2898, "step": 7065 }, { "epoch": 0.153186141745932, "grad_norm": 1.8416467905044556, "learning_rate": 1.8864177199796084e-05, "loss": 0.241, "step": 7070 }, { "epoch": 0.15329447706541285, "grad_norm": 1.4999743700027466, "learning_rate": 1.8862601274374765e-05, "loss": 0.2684, "step": 7075 }, { "epoch": 0.15340281238489373, "grad_norm": 1.5543876886367798, "learning_rate": 1.8861024322353855e-05, "loss": 0.174, "step": 7080 }, { "epoch": 0.15351114770437457, "grad_norm": 2.5021610260009766, "learning_rate": 1.885944634391601e-05, "loss": 0.2171, "step": 7085 }, { "epoch": 0.15361948302385545, "grad_norm": 1.8856207132339478, "learning_rate": 1.8857867339244025e-05, "loss": 0.1605, "step": 7090 }, { "epoch": 0.1537278183433363, "grad_norm": 2.016648054122925, "learning_rate": 1.8856287308520796e-05, "loss": 0.2483, "step": 7095 }, { "epoch": 0.15383615366281717, "grad_norm": 1.7189048528671265, "learning_rate": 1.885470625192935e-05, "loss": 0.1924, "step": 7100 }, { "epoch": 0.153944488982298, "grad_norm": 1.9022774696350098, "learning_rate": 1.8853124169652832e-05, "loss": 0.2189, "step": 7105 }, { "epoch": 0.15405282430177886, "grad_norm": 2.1516871452331543, "learning_rate": 1.8851541061874496e-05, "loss": 0.2975, "step": 7110 }, { "epoch": 0.15416115962125973, "grad_norm": 1.4451422691345215, "learning_rate": 1.8849956928777726e-05, "loss": 0.2925, "step": 7115 }, { "epoch": 0.15426949494074058, "grad_norm": 1.998767375946045, "learning_rate": 1.8848371770546016e-05, "loss": 0.2938, "step": 7120 }, { "epoch": 0.15437783026022145, "grad_norm": 1.4478622674942017, "learning_rate": 1.8846785587362986e-05, "loss": 0.2259, "step": 7125 }, { "epoch": 0.1544861655797023, "grad_norm": 2.121471643447876, "learning_rate": 1.884519837941237e-05, "loss": 0.215, "step": 7130 }, { "epoch": 0.15459450089918314, "grad_norm": 1.9281808137893677, "learning_rate": 1.8843610146878025e-05, "loss": 0.2156, "step": 7135 }, { "epoch": 0.154702836218664, "grad_norm": 2.450521945953369, "learning_rate": 1.8842020889943916e-05, "loss": 0.231, "step": 7140 }, { "epoch": 0.15481117153814486, "grad_norm": 2.327965259552002, "learning_rate": 1.8840430608794145e-05, "loss": 0.2396, "step": 7145 }, { "epoch": 0.15491950685762573, "grad_norm": 2.114917755126953, "learning_rate": 1.8838839303612915e-05, "loss": 0.2756, "step": 7150 }, { "epoch": 0.15502784217710658, "grad_norm": 1.470449686050415, "learning_rate": 1.883724697458456e-05, "loss": 0.2122, "step": 7155 }, { "epoch": 0.15513617749658742, "grad_norm": 1.6705865859985352, "learning_rate": 1.8835653621893526e-05, "loss": 0.2533, "step": 7160 }, { "epoch": 0.1552445128160683, "grad_norm": 2.500072956085205, "learning_rate": 1.8834059245724378e-05, "loss": 0.2612, "step": 7165 }, { "epoch": 0.15535284813554914, "grad_norm": 1.1996541023254395, "learning_rate": 1.8832463846261797e-05, "loss": 0.3252, "step": 7170 }, { "epoch": 0.15546118345503002, "grad_norm": 1.6236094236373901, "learning_rate": 1.8830867423690592e-05, "loss": 0.3452, "step": 7175 }, { "epoch": 0.15556951877451086, "grad_norm": 1.7397435903549194, "learning_rate": 1.8829269978195682e-05, "loss": 0.1893, "step": 7180 }, { "epoch": 0.15567785409399174, "grad_norm": 2.194816827774048, "learning_rate": 1.8827671509962105e-05, "loss": 0.2273, "step": 7185 }, { "epoch": 0.15578618941347258, "grad_norm": 1.6177679300308228, "learning_rate": 1.8826072019175026e-05, "loss": 0.1906, "step": 7190 }, { "epoch": 0.15589452473295343, "grad_norm": 0.9299748539924622, "learning_rate": 1.8824471506019715e-05, "loss": 0.197, "step": 7195 }, { "epoch": 0.1560028600524343, "grad_norm": 2.3317935466766357, "learning_rate": 1.882286997068157e-05, "loss": 0.2506, "step": 7200 }, { "epoch": 0.15611119537191515, "grad_norm": 1.9417763948440552, "learning_rate": 1.882126741334611e-05, "loss": 0.1937, "step": 7205 }, { "epoch": 0.15621953069139602, "grad_norm": 2.2233803272247314, "learning_rate": 1.881966383419896e-05, "loss": 0.1857, "step": 7210 }, { "epoch": 0.15632786601087686, "grad_norm": 2.366950511932373, "learning_rate": 1.8818059233425875e-05, "loss": 0.3161, "step": 7215 }, { "epoch": 0.1564362013303577, "grad_norm": 1.4881922006607056, "learning_rate": 1.881645361121272e-05, "loss": 0.1609, "step": 7220 }, { "epoch": 0.15654453664983858, "grad_norm": 2.01842999458313, "learning_rate": 1.8814846967745484e-05, "loss": 0.1808, "step": 7225 }, { "epoch": 0.15665287196931943, "grad_norm": 2.270875930786133, "learning_rate": 1.8813239303210275e-05, "loss": 0.2861, "step": 7230 }, { "epoch": 0.1567612072888003, "grad_norm": 2.0481300354003906, "learning_rate": 1.8811630617793316e-05, "loss": 0.3592, "step": 7235 }, { "epoch": 0.15686954260828115, "grad_norm": 1.4414328336715698, "learning_rate": 1.8810020911680947e-05, "loss": 0.2208, "step": 7240 }, { "epoch": 0.15697787792776202, "grad_norm": 1.9458004236221313, "learning_rate": 1.8808410185059628e-05, "loss": 0.1819, "step": 7245 }, { "epoch": 0.15708621324724287, "grad_norm": 2.0386404991149902, "learning_rate": 1.8806798438115942e-05, "loss": 0.3313, "step": 7250 }, { "epoch": 0.1571945485667237, "grad_norm": 1.7227306365966797, "learning_rate": 1.880518567103658e-05, "loss": 0.2096, "step": 7255 }, { "epoch": 0.1573028838862046, "grad_norm": 1.9741950035095215, "learning_rate": 1.880357188400836e-05, "loss": 0.2081, "step": 7260 }, { "epoch": 0.15741121920568543, "grad_norm": 1.4884973764419556, "learning_rate": 1.8801957077218217e-05, "loss": 0.2118, "step": 7265 }, { "epoch": 0.1575195545251663, "grad_norm": 2.162717580795288, "learning_rate": 1.8800341250853197e-05, "loss": 0.2208, "step": 7270 }, { "epoch": 0.15762788984464715, "grad_norm": 1.597316026687622, "learning_rate": 1.8798724405100475e-05, "loss": 0.2589, "step": 7275 }, { "epoch": 0.157736225164128, "grad_norm": 2.2724640369415283, "learning_rate": 1.8797106540147333e-05, "loss": 0.2076, "step": 7280 }, { "epoch": 0.15784456048360887, "grad_norm": 2.307460069656372, "learning_rate": 1.8795487656181182e-05, "loss": 0.2963, "step": 7285 }, { "epoch": 0.15795289580308972, "grad_norm": 1.3904763460159302, "learning_rate": 1.8793867753389545e-05, "loss": 0.2609, "step": 7290 }, { "epoch": 0.1580612311225706, "grad_norm": 1.3883566856384277, "learning_rate": 1.8792246831960052e-05, "loss": 0.2496, "step": 7295 }, { "epoch": 0.15816956644205143, "grad_norm": 3.036832571029663, "learning_rate": 1.879062489208048e-05, "loss": 0.2645, "step": 7300 }, { "epoch": 0.1582779017615323, "grad_norm": 1.738028645515442, "learning_rate": 1.8789001933938692e-05, "loss": 0.2686, "step": 7305 }, { "epoch": 0.15838623708101315, "grad_norm": 1.826801061630249, "learning_rate": 1.878737795772269e-05, "loss": 0.2255, "step": 7310 }, { "epoch": 0.158494572400494, "grad_norm": 2.1043994426727295, "learning_rate": 1.8785752963620586e-05, "loss": 0.2659, "step": 7315 }, { "epoch": 0.15860290771997487, "grad_norm": 1.505274772644043, "learning_rate": 1.8784126951820613e-05, "loss": 0.2036, "step": 7320 }, { "epoch": 0.15871124303945572, "grad_norm": 1.945326328277588, "learning_rate": 1.878249992251112e-05, "loss": 0.1901, "step": 7325 }, { "epoch": 0.1588195783589366, "grad_norm": 2.286639928817749, "learning_rate": 1.878087187588057e-05, "loss": 0.1955, "step": 7330 }, { "epoch": 0.15892791367841744, "grad_norm": 1.9036917686462402, "learning_rate": 1.8779242812117554e-05, "loss": 0.3445, "step": 7335 }, { "epoch": 0.15903624899789828, "grad_norm": 1.4002735614776611, "learning_rate": 1.877761273141077e-05, "loss": 0.1736, "step": 7340 }, { "epoch": 0.15914458431737916, "grad_norm": 1.7866960763931274, "learning_rate": 1.877598163394904e-05, "loss": 0.2774, "step": 7345 }, { "epoch": 0.15925291963686, "grad_norm": 2.8542065620422363, "learning_rate": 1.8774349519921303e-05, "loss": 0.2579, "step": 7350 }, { "epoch": 0.15936125495634088, "grad_norm": 2.484022617340088, "learning_rate": 1.877271638951661e-05, "loss": 0.2392, "step": 7355 }, { "epoch": 0.15946959027582172, "grad_norm": 2.248830795288086, "learning_rate": 1.877108224292414e-05, "loss": 0.2692, "step": 7360 }, { "epoch": 0.1595779255953026, "grad_norm": 1.7605586051940918, "learning_rate": 1.8769447080333186e-05, "loss": 0.2028, "step": 7365 }, { "epoch": 0.15968626091478344, "grad_norm": 1.8549453020095825, "learning_rate": 1.876781090193315e-05, "loss": 0.2056, "step": 7370 }, { "epoch": 0.15979459623426429, "grad_norm": 1.811745047569275, "learning_rate": 1.8766173707913565e-05, "loss": 0.2868, "step": 7375 }, { "epoch": 0.15990293155374516, "grad_norm": 1.5685205459594727, "learning_rate": 1.8764535498464075e-05, "loss": 0.2845, "step": 7380 }, { "epoch": 0.160011266873226, "grad_norm": 2.1914002895355225, "learning_rate": 1.8762896273774437e-05, "loss": 0.2146, "step": 7385 }, { "epoch": 0.16011960219270688, "grad_norm": 1.6003553867340088, "learning_rate": 1.8761256034034535e-05, "loss": 0.2348, "step": 7390 }, { "epoch": 0.16022793751218772, "grad_norm": 2.028677225112915, "learning_rate": 1.8759614779434362e-05, "loss": 0.2552, "step": 7395 }, { "epoch": 0.16033627283166857, "grad_norm": 1.6864057779312134, "learning_rate": 1.8757972510164043e-05, "loss": 0.221, "step": 7400 }, { "epoch": 0.16044460815114944, "grad_norm": 1.4919428825378418, "learning_rate": 1.87563292264138e-05, "loss": 0.2142, "step": 7405 }, { "epoch": 0.1605529434706303, "grad_norm": 2.5898935794830322, "learning_rate": 1.8754684928373983e-05, "loss": 0.2336, "step": 7410 }, { "epoch": 0.16066127879011116, "grad_norm": 1.1548819541931152, "learning_rate": 1.8753039616235065e-05, "loss": 0.2136, "step": 7415 }, { "epoch": 0.160769614109592, "grad_norm": 1.8739814758300781, "learning_rate": 1.8751393290187625e-05, "loss": 0.2534, "step": 7420 }, { "epoch": 0.16087794942907285, "grad_norm": 1.9018938541412354, "learning_rate": 1.8749745950422372e-05, "loss": 0.2082, "step": 7425 }, { "epoch": 0.16098628474855373, "grad_norm": 2.2382524013519287, "learning_rate": 1.874809759713012e-05, "loss": 0.3292, "step": 7430 }, { "epoch": 0.16109462006803457, "grad_norm": 1.3410561084747314, "learning_rate": 1.8746448230501807e-05, "loss": 0.218, "step": 7435 }, { "epoch": 0.16120295538751545, "grad_norm": 2.1969871520996094, "learning_rate": 1.874479785072849e-05, "loss": 0.2389, "step": 7440 }, { "epoch": 0.1613112907069963, "grad_norm": 1.9149751663208008, "learning_rate": 1.874314645800134e-05, "loss": 0.2554, "step": 7445 }, { "epoch": 0.16141962602647716, "grad_norm": 1.5347901582717896, "learning_rate": 1.874149405251164e-05, "loss": 0.2259, "step": 7450 }, { "epoch": 0.161527961345958, "grad_norm": 1.8409425020217896, "learning_rate": 1.8739840634450804e-05, "loss": 0.2258, "step": 7455 }, { "epoch": 0.16163629666543886, "grad_norm": 2.1587235927581787, "learning_rate": 1.8738186204010357e-05, "loss": 0.2709, "step": 7460 }, { "epoch": 0.16174463198491973, "grad_norm": 1.2438814640045166, "learning_rate": 1.8736530761381932e-05, "loss": 0.232, "step": 7465 }, { "epoch": 0.16185296730440057, "grad_norm": 1.8260360956192017, "learning_rate": 1.8734874306757293e-05, "loss": 0.2625, "step": 7470 }, { "epoch": 0.16196130262388145, "grad_norm": 2.4881999492645264, "learning_rate": 1.8733216840328318e-05, "loss": 0.2876, "step": 7475 }, { "epoch": 0.1620696379433623, "grad_norm": 2.445984363555908, "learning_rate": 1.873155836228699e-05, "loss": 0.1996, "step": 7480 }, { "epoch": 0.16217797326284314, "grad_norm": 2.456756830215454, "learning_rate": 1.8729898872825425e-05, "loss": 0.2199, "step": 7485 }, { "epoch": 0.162286308582324, "grad_norm": 1.4862449169158936, "learning_rate": 1.8728238372135853e-05, "loss": 0.1966, "step": 7490 }, { "epoch": 0.16239464390180486, "grad_norm": 1.757616639137268, "learning_rate": 1.8726576860410612e-05, "loss": 0.2955, "step": 7495 }, { "epoch": 0.16250297922128573, "grad_norm": 1.6295934915542603, "learning_rate": 1.8724914337842163e-05, "loss": 0.221, "step": 7500 }, { "epoch": 0.16261131454076658, "grad_norm": 1.4621484279632568, "learning_rate": 1.872325080462309e-05, "loss": 0.1849, "step": 7505 }, { "epoch": 0.16271964986024745, "grad_norm": 1.6185513734817505, "learning_rate": 1.8721586260946086e-05, "loss": 0.1685, "step": 7510 }, { "epoch": 0.1628279851797283, "grad_norm": 1.9684244394302368, "learning_rate": 1.8719920707003966e-05, "loss": 0.2265, "step": 7515 }, { "epoch": 0.16293632049920914, "grad_norm": 1.6423174142837524, "learning_rate": 1.8718254142989652e-05, "loss": 0.2813, "step": 7520 }, { "epoch": 0.16304465581869002, "grad_norm": 2.1696298122406006, "learning_rate": 1.8716586569096197e-05, "loss": 0.258, "step": 7525 }, { "epoch": 0.16315299113817086, "grad_norm": 2.3071837425231934, "learning_rate": 1.8714917985516762e-05, "loss": 0.2194, "step": 7530 }, { "epoch": 0.16326132645765173, "grad_norm": 2.7303919792175293, "learning_rate": 1.8713248392444625e-05, "loss": 0.2245, "step": 7535 }, { "epoch": 0.16336966177713258, "grad_norm": 2.289010524749756, "learning_rate": 1.871157779007319e-05, "loss": 0.1946, "step": 7540 }, { "epoch": 0.16347799709661343, "grad_norm": 1.6526108980178833, "learning_rate": 1.8709906178595967e-05, "loss": 0.2297, "step": 7545 }, { "epoch": 0.1635863324160943, "grad_norm": 1.478575587272644, "learning_rate": 1.8708233558206586e-05, "loss": 0.2087, "step": 7550 }, { "epoch": 0.16369466773557514, "grad_norm": 2.337507486343384, "learning_rate": 1.87065599290988e-05, "loss": 0.1451, "step": 7555 }, { "epoch": 0.16380300305505602, "grad_norm": 1.6807830333709717, "learning_rate": 1.8704885291466467e-05, "loss": 0.1932, "step": 7560 }, { "epoch": 0.16391133837453686, "grad_norm": 1.8580845594406128, "learning_rate": 1.870320964550357e-05, "loss": 0.2695, "step": 7565 }, { "epoch": 0.16401967369401774, "grad_norm": 1.6739237308502197, "learning_rate": 1.8701532991404215e-05, "loss": 0.2223, "step": 7570 }, { "epoch": 0.16412800901349858, "grad_norm": 2.1147139072418213, "learning_rate": 1.869985532936261e-05, "loss": 0.2409, "step": 7575 }, { "epoch": 0.16423634433297943, "grad_norm": 2.018535852432251, "learning_rate": 1.8698176659573088e-05, "loss": 0.2183, "step": 7580 }, { "epoch": 0.1643446796524603, "grad_norm": 2.287212371826172, "learning_rate": 1.86964969822301e-05, "loss": 0.2816, "step": 7585 }, { "epoch": 0.16445301497194115, "grad_norm": 2.411543846130371, "learning_rate": 1.869481629752821e-05, "loss": 0.2454, "step": 7590 }, { "epoch": 0.16456135029142202, "grad_norm": 1.277469515800476, "learning_rate": 1.86931346056621e-05, "loss": 0.198, "step": 7595 }, { "epoch": 0.16466968561090287, "grad_norm": 1.6917434930801392, "learning_rate": 1.8691451906826566e-05, "loss": 0.2176, "step": 7600 }, { "epoch": 0.1647780209303837, "grad_norm": 1.9225574731826782, "learning_rate": 1.868976820121653e-05, "loss": 0.2504, "step": 7605 }, { "epoch": 0.16488635624986459, "grad_norm": 1.7345271110534668, "learning_rate": 1.868808348902702e-05, "loss": 0.1936, "step": 7610 }, { "epoch": 0.16499469156934543, "grad_norm": 1.5734237432479858, "learning_rate": 1.8686397770453183e-05, "loss": 0.1249, "step": 7615 }, { "epoch": 0.1651030268888263, "grad_norm": 1.8095988035202026, "learning_rate": 1.8684711045690293e-05, "loss": 0.1612, "step": 7620 }, { "epoch": 0.16521136220830715, "grad_norm": 1.5652248859405518, "learning_rate": 1.8683023314933718e-05, "loss": 0.2135, "step": 7625 }, { "epoch": 0.16531969752778802, "grad_norm": 1.396380066871643, "learning_rate": 1.868133457837897e-05, "loss": 0.2626, "step": 7630 }, { "epoch": 0.16542803284726887, "grad_norm": 2.263293743133545, "learning_rate": 1.8679644836221653e-05, "loss": 0.1936, "step": 7635 }, { "epoch": 0.16553636816674971, "grad_norm": 2.15812611579895, "learning_rate": 1.8677954088657503e-05, "loss": 0.1987, "step": 7640 }, { "epoch": 0.1656447034862306, "grad_norm": 1.9588531255722046, "learning_rate": 1.867626233588237e-05, "loss": 0.2883, "step": 7645 }, { "epoch": 0.16575303880571143, "grad_norm": 2.0637624263763428, "learning_rate": 1.867456957809221e-05, "loss": 0.2043, "step": 7650 }, { "epoch": 0.1658613741251923, "grad_norm": 1.7962692975997925, "learning_rate": 1.8672875815483115e-05, "loss": 0.2352, "step": 7655 }, { "epoch": 0.16596970944467315, "grad_norm": 1.2384270429611206, "learning_rate": 1.8671181048251276e-05, "loss": 0.2401, "step": 7660 }, { "epoch": 0.166078044764154, "grad_norm": 1.493970513343811, "learning_rate": 1.8669485276593005e-05, "loss": 0.2666, "step": 7665 }, { "epoch": 0.16618638008363487, "grad_norm": 1.9134248495101929, "learning_rate": 1.8667788500704734e-05, "loss": 0.2525, "step": 7670 }, { "epoch": 0.16629471540311572, "grad_norm": 2.378805160522461, "learning_rate": 1.8666090720783012e-05, "loss": 0.2922, "step": 7675 }, { "epoch": 0.1664030507225966, "grad_norm": 1.761687994003296, "learning_rate": 1.8664391937024495e-05, "loss": 0.2657, "step": 7680 }, { "epoch": 0.16651138604207744, "grad_norm": 2.2972989082336426, "learning_rate": 1.8662692149625963e-05, "loss": 0.2454, "step": 7685 }, { "epoch": 0.16661972136155828, "grad_norm": 2.4967994689941406, "learning_rate": 1.8660991358784313e-05, "loss": 0.2354, "step": 7690 }, { "epoch": 0.16672805668103916, "grad_norm": 1.5510003566741943, "learning_rate": 1.865928956469656e-05, "loss": 0.2321, "step": 7695 }, { "epoch": 0.16683639200052, "grad_norm": 2.073686122894287, "learning_rate": 1.8657586767559824e-05, "loss": 0.269, "step": 7700 }, { "epoch": 0.16694472732000087, "grad_norm": 1.6299054622650146, "learning_rate": 1.8655882967571353e-05, "loss": 0.2261, "step": 7705 }, { "epoch": 0.16705306263948172, "grad_norm": 2.1953482627868652, "learning_rate": 1.8654178164928504e-05, "loss": 0.2295, "step": 7710 }, { "epoch": 0.1671613979589626, "grad_norm": 1.6750905513763428, "learning_rate": 1.8652472359828752e-05, "loss": 0.2277, "step": 7715 }, { "epoch": 0.16726973327844344, "grad_norm": 1.706132173538208, "learning_rate": 1.8650765552469696e-05, "loss": 0.2333, "step": 7720 }, { "epoch": 0.16737806859792428, "grad_norm": 1.6644601821899414, "learning_rate": 1.8649057743049038e-05, "loss": 0.3256, "step": 7725 }, { "epoch": 0.16748640391740516, "grad_norm": 2.585972785949707, "learning_rate": 1.86473489317646e-05, "loss": 0.1989, "step": 7730 }, { "epoch": 0.167594739236886, "grad_norm": 1.499753713607788, "learning_rate": 1.8645639118814328e-05, "loss": 0.1412, "step": 7735 }, { "epoch": 0.16770307455636688, "grad_norm": 2.0203609466552734, "learning_rate": 1.8643928304396275e-05, "loss": 0.2304, "step": 7740 }, { "epoch": 0.16781140987584772, "grad_norm": 2.193629026412964, "learning_rate": 1.8642216488708615e-05, "loss": 0.1789, "step": 7745 }, { "epoch": 0.16791974519532857, "grad_norm": 1.3718138933181763, "learning_rate": 1.8640503671949635e-05, "loss": 0.3152, "step": 7750 }, { "epoch": 0.16802808051480944, "grad_norm": 1.6787463426589966, "learning_rate": 1.8638789854317737e-05, "loss": 0.2607, "step": 7755 }, { "epoch": 0.1681364158342903, "grad_norm": 1.4180150032043457, "learning_rate": 1.8637075036011446e-05, "loss": 0.1986, "step": 7760 }, { "epoch": 0.16824475115377116, "grad_norm": 1.9895223379135132, "learning_rate": 1.8635359217229398e-05, "loss": 0.2447, "step": 7765 }, { "epoch": 0.168353086473252, "grad_norm": 1.9296784400939941, "learning_rate": 1.8633642398170336e-05, "loss": 0.2034, "step": 7770 }, { "epoch": 0.16846142179273288, "grad_norm": 1.1986918449401855, "learning_rate": 1.8631924579033138e-05, "loss": 0.2393, "step": 7775 }, { "epoch": 0.16856975711221373, "grad_norm": 3.661975145339966, "learning_rate": 1.8630205760016782e-05, "loss": 0.2881, "step": 7780 }, { "epoch": 0.16867809243169457, "grad_norm": 1.6049669981002808, "learning_rate": 1.862848594132037e-05, "loss": 0.2327, "step": 7785 }, { "epoch": 0.16878642775117544, "grad_norm": 1.7876249551773071, "learning_rate": 1.8626765123143117e-05, "loss": 0.2819, "step": 7790 }, { "epoch": 0.1688947630706563, "grad_norm": 1.94196355342865, "learning_rate": 1.8625043305684355e-05, "loss": 0.2815, "step": 7795 }, { "epoch": 0.16900309839013716, "grad_norm": 2.248824119567871, "learning_rate": 1.8623320489143526e-05, "loss": 0.3805, "step": 7800 }, { "epoch": 0.169111433709618, "grad_norm": 2.050692558288574, "learning_rate": 1.8621596673720198e-05, "loss": 0.2602, "step": 7805 }, { "epoch": 0.16921976902909885, "grad_norm": 1.656665563583374, "learning_rate": 1.8619871859614045e-05, "loss": 0.1772, "step": 7810 }, { "epoch": 0.16932810434857973, "grad_norm": 3.663846492767334, "learning_rate": 1.8618146047024863e-05, "loss": 0.2323, "step": 7815 }, { "epoch": 0.16943643966806057, "grad_norm": 1.6137163639068604, "learning_rate": 1.861641923615256e-05, "loss": 0.1655, "step": 7820 }, { "epoch": 0.16954477498754145, "grad_norm": 1.7723805904388428, "learning_rate": 1.8614691427197166e-05, "loss": 0.2616, "step": 7825 }, { "epoch": 0.1696531103070223, "grad_norm": 1.857920527458191, "learning_rate": 1.8612962620358815e-05, "loss": 0.2195, "step": 7830 }, { "epoch": 0.16976144562650317, "grad_norm": 1.6110632419586182, "learning_rate": 1.861123281583777e-05, "loss": 0.3172, "step": 7835 }, { "epoch": 0.169869780945984, "grad_norm": 1.5980123281478882, "learning_rate": 1.8609502013834397e-05, "loss": 0.1896, "step": 7840 }, { "epoch": 0.16997811626546486, "grad_norm": 1.1404036283493042, "learning_rate": 1.8607770214549186e-05, "loss": 0.1756, "step": 7845 }, { "epoch": 0.17008645158494573, "grad_norm": 2.2979936599731445, "learning_rate": 1.8606037418182743e-05, "loss": 0.2414, "step": 7850 }, { "epoch": 0.17019478690442658, "grad_norm": 1.8177646398544312, "learning_rate": 1.860430362493578e-05, "loss": 0.236, "step": 7855 }, { "epoch": 0.17030312222390745, "grad_norm": 2.2493321895599365, "learning_rate": 1.8602568835009135e-05, "loss": 0.1833, "step": 7860 }, { "epoch": 0.1704114575433883, "grad_norm": 1.671073317527771, "learning_rate": 1.8600833048603757e-05, "loss": 0.2131, "step": 7865 }, { "epoch": 0.17051979286286914, "grad_norm": 2.461411952972412, "learning_rate": 1.8599096265920714e-05, "loss": 0.2118, "step": 7870 }, { "epoch": 0.17062812818235001, "grad_norm": 1.5457204580307007, "learning_rate": 1.859735848716118e-05, "loss": 0.228, "step": 7875 }, { "epoch": 0.17073646350183086, "grad_norm": 1.7371647357940674, "learning_rate": 1.8595619712526454e-05, "loss": 0.2645, "step": 7880 }, { "epoch": 0.17084479882131173, "grad_norm": 1.7760967016220093, "learning_rate": 1.8593879942217944e-05, "loss": 0.1805, "step": 7885 }, { "epoch": 0.17095313414079258, "grad_norm": 2.023266077041626, "learning_rate": 1.8592139176437184e-05, "loss": 0.2305, "step": 7890 }, { "epoch": 0.17106146946027345, "grad_norm": 2.854799509048462, "learning_rate": 1.8590397415385807e-05, "loss": 0.2714, "step": 7895 }, { "epoch": 0.1711698047797543, "grad_norm": 1.8773669004440308, "learning_rate": 1.8588654659265578e-05, "loss": 0.1612, "step": 7900 }, { "epoch": 0.17127814009923514, "grad_norm": 2.0690195560455322, "learning_rate": 1.858691090827836e-05, "loss": 0.2072, "step": 7905 }, { "epoch": 0.17138647541871602, "grad_norm": 1.4853204488754272, "learning_rate": 1.8585166162626148e-05, "loss": 0.1986, "step": 7910 }, { "epoch": 0.17149481073819686, "grad_norm": 1.4536348581314087, "learning_rate": 1.858342042251104e-05, "loss": 0.2736, "step": 7915 }, { "epoch": 0.17160314605767774, "grad_norm": 1.9203976392745972, "learning_rate": 1.8581673688135257e-05, "loss": 0.1967, "step": 7920 }, { "epoch": 0.17171148137715858, "grad_norm": 2.480496644973755, "learning_rate": 1.857992595970113e-05, "loss": 0.2043, "step": 7925 }, { "epoch": 0.17181981669663943, "grad_norm": 2.183863401412964, "learning_rate": 1.857817723741111e-05, "loss": 0.2428, "step": 7930 }, { "epoch": 0.1719281520161203, "grad_norm": 1.895209550857544, "learning_rate": 1.8576427521467754e-05, "loss": 0.2856, "step": 7935 }, { "epoch": 0.17203648733560115, "grad_norm": 1.8026962280273438, "learning_rate": 1.8574676812073747e-05, "loss": 0.2836, "step": 7940 }, { "epoch": 0.17214482265508202, "grad_norm": 1.9970437288284302, "learning_rate": 1.8572925109431878e-05, "loss": 0.2097, "step": 7945 }, { "epoch": 0.17225315797456286, "grad_norm": 2.404254913330078, "learning_rate": 1.857117241374506e-05, "loss": 0.2954, "step": 7950 }, { "epoch": 0.1723614932940437, "grad_norm": 1.4281176328659058, "learning_rate": 1.8569418725216316e-05, "loss": 0.1554, "step": 7955 }, { "epoch": 0.17246982861352458, "grad_norm": 3.0810399055480957, "learning_rate": 1.8567664044048776e-05, "loss": 0.274, "step": 7960 }, { "epoch": 0.17257816393300543, "grad_norm": 1.7732694149017334, "learning_rate": 1.8565908370445705e-05, "loss": 0.2345, "step": 7965 }, { "epoch": 0.1726864992524863, "grad_norm": 1.4771907329559326, "learning_rate": 1.8564151704610466e-05, "loss": 0.1199, "step": 7970 }, { "epoch": 0.17279483457196715, "grad_norm": 2.6685450077056885, "learning_rate": 1.8562394046746543e-05, "loss": 0.2455, "step": 7975 }, { "epoch": 0.17290316989144802, "grad_norm": 2.5300252437591553, "learning_rate": 1.8560635397057535e-05, "loss": 0.3425, "step": 7980 }, { "epoch": 0.17301150521092887, "grad_norm": 1.7729792594909668, "learning_rate": 1.855887575574715e-05, "loss": 0.2611, "step": 7985 }, { "epoch": 0.1731198405304097, "grad_norm": 2.168807029724121, "learning_rate": 1.8557115123019226e-05, "loss": 0.3363, "step": 7990 }, { "epoch": 0.1732281758498906, "grad_norm": 1.1095056533813477, "learning_rate": 1.8555353499077698e-05, "loss": 0.1975, "step": 7995 }, { "epoch": 0.17333651116937143, "grad_norm": 1.5936354398727417, "learning_rate": 1.8553590884126628e-05, "loss": 0.2619, "step": 8000 }, { "epoch": 0.1734448464888523, "grad_norm": 2.0337908267974854, "learning_rate": 1.8551827278370183e-05, "loss": 0.1968, "step": 8005 }, { "epoch": 0.17355318180833315, "grad_norm": 2.038078546524048, "learning_rate": 1.8550062682012657e-05, "loss": 0.2801, "step": 8010 }, { "epoch": 0.173661517127814, "grad_norm": 1.926713466644287, "learning_rate": 1.854829709525845e-05, "loss": 0.2657, "step": 8015 }, { "epoch": 0.17376985244729487, "grad_norm": 2.057431936264038, "learning_rate": 1.854653051831208e-05, "loss": 0.2206, "step": 8020 }, { "epoch": 0.17387818776677572, "grad_norm": 3.3038339614868164, "learning_rate": 1.854476295137817e-05, "loss": 0.2203, "step": 8025 }, { "epoch": 0.1739865230862566, "grad_norm": 1.3957219123840332, "learning_rate": 1.854299439466148e-05, "loss": 0.1866, "step": 8030 }, { "epoch": 0.17409485840573743, "grad_norm": 2.6516878604888916, "learning_rate": 1.854122484836686e-05, "loss": 0.2552, "step": 8035 }, { "epoch": 0.1742031937252183, "grad_norm": 2.288039445877075, "learning_rate": 1.8539454312699287e-05, "loss": 0.2188, "step": 8040 }, { "epoch": 0.17431152904469915, "grad_norm": 1.4758068323135376, "learning_rate": 1.853768278786385e-05, "loss": 0.2825, "step": 8045 }, { "epoch": 0.17441986436418, "grad_norm": 1.6654034852981567, "learning_rate": 1.8535910274065764e-05, "loss": 0.181, "step": 8050 }, { "epoch": 0.17452819968366087, "grad_norm": 1.9030779600143433, "learning_rate": 1.853413677151034e-05, "loss": 0.2483, "step": 8055 }, { "epoch": 0.17463653500314172, "grad_norm": 0.8847962617874146, "learning_rate": 1.8532362280403008e-05, "loss": 0.253, "step": 8060 }, { "epoch": 0.1747448703226226, "grad_norm": 3.804615020751953, "learning_rate": 1.853058680094932e-05, "loss": 0.2412, "step": 8065 }, { "epoch": 0.17485320564210344, "grad_norm": 1.8681361675262451, "learning_rate": 1.8528810333354945e-05, "loss": 0.2152, "step": 8070 }, { "epoch": 0.17496154096158428, "grad_norm": 1.906689167022705, "learning_rate": 1.852703287782565e-05, "loss": 0.3099, "step": 8075 }, { "epoch": 0.17506987628106516, "grad_norm": 1.9805079698562622, "learning_rate": 1.852525443456733e-05, "loss": 0.241, "step": 8080 }, { "epoch": 0.175178211600546, "grad_norm": 1.7293744087219238, "learning_rate": 1.8523475003785995e-05, "loss": 0.1455, "step": 8085 }, { "epoch": 0.17528654692002688, "grad_norm": 2.085986614227295, "learning_rate": 1.8521694585687765e-05, "loss": 0.24, "step": 8090 }, { "epoch": 0.17539488223950772, "grad_norm": 2.7012124061584473, "learning_rate": 1.851991318047887e-05, "loss": 0.2543, "step": 8095 }, { "epoch": 0.1755032175589886, "grad_norm": 1.771894097328186, "learning_rate": 1.8518130788365657e-05, "loss": 0.2092, "step": 8100 }, { "epoch": 0.17561155287846944, "grad_norm": 1.6055774688720703, "learning_rate": 1.85163474095546e-05, "loss": 0.1906, "step": 8105 }, { "epoch": 0.17571988819795029, "grad_norm": 1.9784015417099, "learning_rate": 1.8514563044252267e-05, "loss": 0.2367, "step": 8110 }, { "epoch": 0.17582822351743116, "grad_norm": 2.047469139099121, "learning_rate": 1.8512777692665354e-05, "loss": 0.347, "step": 8115 }, { "epoch": 0.175936558836912, "grad_norm": 1.3222391605377197, "learning_rate": 1.8510991355000664e-05, "loss": 0.297, "step": 8120 }, { "epoch": 0.17604489415639288, "grad_norm": 2.2148969173431396, "learning_rate": 1.8509204031465126e-05, "loss": 0.2824, "step": 8125 }, { "epoch": 0.17615322947587372, "grad_norm": 2.1240246295928955, "learning_rate": 1.8507415722265766e-05, "loss": 0.2062, "step": 8130 }, { "epoch": 0.17626156479535457, "grad_norm": 1.9908820390701294, "learning_rate": 1.8505626427609736e-05, "loss": 0.1448, "step": 8135 }, { "epoch": 0.17636990011483544, "grad_norm": 1.5393606424331665, "learning_rate": 1.8503836147704297e-05, "loss": 0.1783, "step": 8140 }, { "epoch": 0.1764782354343163, "grad_norm": 1.7570366859436035, "learning_rate": 1.850204488275683e-05, "loss": 0.2041, "step": 8145 }, { "epoch": 0.17658657075379716, "grad_norm": 1.5610723495483398, "learning_rate": 1.850025263297482e-05, "loss": 0.2971, "step": 8150 }, { "epoch": 0.176694906073278, "grad_norm": 1.2627445459365845, "learning_rate": 1.849845939856588e-05, "loss": 0.2935, "step": 8155 }, { "epoch": 0.17680324139275888, "grad_norm": 2.617718458175659, "learning_rate": 1.8496665179737724e-05, "loss": 0.2578, "step": 8160 }, { "epoch": 0.17691157671223973, "grad_norm": 1.6039032936096191, "learning_rate": 1.8494869976698187e-05, "loss": 0.1562, "step": 8165 }, { "epoch": 0.17701991203172057, "grad_norm": 2.0080220699310303, "learning_rate": 1.8493073789655217e-05, "loss": 0.1687, "step": 8170 }, { "epoch": 0.17712824735120145, "grad_norm": 2.2742605209350586, "learning_rate": 1.8491276618816875e-05, "loss": 0.3399, "step": 8175 }, { "epoch": 0.1772365826706823, "grad_norm": 2.488569736480713, "learning_rate": 1.8489478464391336e-05, "loss": 0.2207, "step": 8180 }, { "epoch": 0.17734491799016316, "grad_norm": 1.7235757112503052, "learning_rate": 1.848767932658689e-05, "loss": 0.2446, "step": 8185 }, { "epoch": 0.177453253309644, "grad_norm": 2.090036153793335, "learning_rate": 1.848587920561194e-05, "loss": 0.2642, "step": 8190 }, { "epoch": 0.17756158862912486, "grad_norm": 2.569579839706421, "learning_rate": 1.8484078101675e-05, "loss": 0.1559, "step": 8195 }, { "epoch": 0.17766992394860573, "grad_norm": 1.9990912675857544, "learning_rate": 1.8482276014984703e-05, "loss": 0.1995, "step": 8200 }, { "epoch": 0.17777825926808657, "grad_norm": 2.3390986919403076, "learning_rate": 1.8480472945749796e-05, "loss": 0.2343, "step": 8205 }, { "epoch": 0.17788659458756745, "grad_norm": 1.9576784372329712, "learning_rate": 1.8478668894179135e-05, "loss": 0.1983, "step": 8210 }, { "epoch": 0.1779949299070483, "grad_norm": 3.346883773803711, "learning_rate": 1.8476863860481694e-05, "loss": 0.2242, "step": 8215 }, { "epoch": 0.17810326522652914, "grad_norm": 2.5199756622314453, "learning_rate": 1.8475057844866557e-05, "loss": 0.2905, "step": 8220 }, { "epoch": 0.17821160054601, "grad_norm": 2.2331466674804688, "learning_rate": 1.8473250847542923e-05, "loss": 0.2283, "step": 8225 }, { "epoch": 0.17831993586549086, "grad_norm": 1.7052206993103027, "learning_rate": 1.8471442868720113e-05, "loss": 0.1627, "step": 8230 }, { "epoch": 0.17842827118497173, "grad_norm": 2.370326042175293, "learning_rate": 1.8469633908607547e-05, "loss": 0.1449, "step": 8235 }, { "epoch": 0.17853660650445258, "grad_norm": 1.6235361099243164, "learning_rate": 1.8467823967414763e-05, "loss": 0.2683, "step": 8240 }, { "epoch": 0.17864494182393345, "grad_norm": 2.3507723808288574, "learning_rate": 1.8466013045351426e-05, "loss": 0.2624, "step": 8245 }, { "epoch": 0.1787532771434143, "grad_norm": 1.5916398763656616, "learning_rate": 1.84642011426273e-05, "loss": 0.2201, "step": 8250 }, { "epoch": 0.17886161246289514, "grad_norm": 1.8062294721603394, "learning_rate": 1.846238825945226e-05, "loss": 0.2766, "step": 8255 }, { "epoch": 0.17896994778237602, "grad_norm": 1.4571620225906372, "learning_rate": 1.846057439603631e-05, "loss": 0.2466, "step": 8260 }, { "epoch": 0.17907828310185686, "grad_norm": 1.3280025720596313, "learning_rate": 1.845875955258955e-05, "loss": 0.2543, "step": 8265 }, { "epoch": 0.17918661842133773, "grad_norm": 1.6345996856689453, "learning_rate": 1.8456943729322216e-05, "loss": 0.1781, "step": 8270 }, { "epoch": 0.17929495374081858, "grad_norm": 1.7738362550735474, "learning_rate": 1.845512692644463e-05, "loss": 0.193, "step": 8275 }, { "epoch": 0.17940328906029943, "grad_norm": 1.590777039527893, "learning_rate": 1.845330914416725e-05, "loss": 0.1959, "step": 8280 }, { "epoch": 0.1795116243797803, "grad_norm": 3.2599239349365234, "learning_rate": 1.8451490382700636e-05, "loss": 0.2495, "step": 8285 }, { "epoch": 0.17961995969926114, "grad_norm": 1.9147851467132568, "learning_rate": 1.8449670642255463e-05, "loss": 0.2654, "step": 8290 }, { "epoch": 0.17972829501874202, "grad_norm": 1.7608028650283813, "learning_rate": 1.8447849923042523e-05, "loss": 0.1648, "step": 8295 }, { "epoch": 0.17983663033822286, "grad_norm": 1.3933390378952026, "learning_rate": 1.8446028225272725e-05, "loss": 0.1923, "step": 8300 }, { "epoch": 0.17994496565770374, "grad_norm": 1.4252830743789673, "learning_rate": 1.844420554915707e-05, "loss": 0.2454, "step": 8305 }, { "epoch": 0.18005330097718458, "grad_norm": 1.1658154726028442, "learning_rate": 1.8442381894906697e-05, "loss": 0.2383, "step": 8310 }, { "epoch": 0.18016163629666543, "grad_norm": 1.4016915559768677, "learning_rate": 1.8440557262732852e-05, "loss": 0.2204, "step": 8315 }, { "epoch": 0.1802699716161463, "grad_norm": 2.5752968788146973, "learning_rate": 1.8438731652846885e-05, "loss": 0.2104, "step": 8320 }, { "epoch": 0.18037830693562715, "grad_norm": 1.9181230068206787, "learning_rate": 1.8436905065460268e-05, "loss": 0.2276, "step": 8325 }, { "epoch": 0.18048664225510802, "grad_norm": 1.5714794397354126, "learning_rate": 1.8435077500784584e-05, "loss": 0.269, "step": 8330 }, { "epoch": 0.18059497757458887, "grad_norm": 1.5561503171920776, "learning_rate": 1.8433248959031533e-05, "loss": 0.2075, "step": 8335 }, { "epoch": 0.1807033128940697, "grad_norm": 2.2466001510620117, "learning_rate": 1.8431419440412917e-05, "loss": 0.2593, "step": 8340 }, { "epoch": 0.18081164821355059, "grad_norm": 1.8981821537017822, "learning_rate": 1.8429588945140658e-05, "loss": 0.25, "step": 8345 }, { "epoch": 0.18091998353303143, "grad_norm": 1.4752658605575562, "learning_rate": 1.8427757473426798e-05, "loss": 0.2542, "step": 8350 }, { "epoch": 0.1810283188525123, "grad_norm": 2.075434923171997, "learning_rate": 1.8425925025483485e-05, "loss": 0.1612, "step": 8355 }, { "epoch": 0.18113665417199315, "grad_norm": 1.6220848560333252, "learning_rate": 1.8424091601522976e-05, "loss": 0.2295, "step": 8360 }, { "epoch": 0.18124498949147402, "grad_norm": 2.055027723312378, "learning_rate": 1.8422257201757648e-05, "loss": 0.2589, "step": 8365 }, { "epoch": 0.18135332481095487, "grad_norm": 1.0688756704330444, "learning_rate": 1.842042182639999e-05, "loss": 0.2226, "step": 8370 }, { "epoch": 0.18146166013043571, "grad_norm": 2.2585182189941406, "learning_rate": 1.84185854756626e-05, "loss": 0.207, "step": 8375 }, { "epoch": 0.1815699954499166, "grad_norm": 2.027921438217163, "learning_rate": 1.8416748149758194e-05, "loss": 0.3221, "step": 8380 }, { "epoch": 0.18167833076939743, "grad_norm": 1.359515905380249, "learning_rate": 1.8414909848899595e-05, "loss": 0.2243, "step": 8385 }, { "epoch": 0.1817866660888783, "grad_norm": 2.5287821292877197, "learning_rate": 1.8413070573299745e-05, "loss": 0.2112, "step": 8390 }, { "epoch": 0.18189500140835915, "grad_norm": 1.9935896396636963, "learning_rate": 1.8411230323171702e-05, "loss": 0.2485, "step": 8395 }, { "epoch": 0.18200333672784, "grad_norm": 2.1095056533813477, "learning_rate": 1.840938909872862e-05, "loss": 0.1588, "step": 8400 }, { "epoch": 0.18211167204732087, "grad_norm": 1.934486746788025, "learning_rate": 1.8407546900183786e-05, "loss": 0.2035, "step": 8405 }, { "epoch": 0.18222000736680172, "grad_norm": 2.113560676574707, "learning_rate": 1.840570372775059e-05, "loss": 0.2182, "step": 8410 }, { "epoch": 0.1823283426862826, "grad_norm": 1.6196739673614502, "learning_rate": 1.8403859581642532e-05, "loss": 0.2107, "step": 8415 }, { "epoch": 0.18243667800576344, "grad_norm": 1.8191289901733398, "learning_rate": 1.8402014462073236e-05, "loss": 0.1966, "step": 8420 }, { "epoch": 0.1825450133252443, "grad_norm": 2.4888741970062256, "learning_rate": 1.840016836925642e-05, "loss": 0.1859, "step": 8425 }, { "epoch": 0.18265334864472516, "grad_norm": 1.5374271869659424, "learning_rate": 1.839832130340594e-05, "loss": 0.2113, "step": 8430 }, { "epoch": 0.182761683964206, "grad_norm": 1.7008228302001953, "learning_rate": 1.839647326473574e-05, "loss": 0.2745, "step": 8435 }, { "epoch": 0.18287001928368687, "grad_norm": 1.8808457851409912, "learning_rate": 1.8394624253459896e-05, "loss": 0.2845, "step": 8440 }, { "epoch": 0.18297835460316772, "grad_norm": 1.762873649597168, "learning_rate": 1.8392774269792577e-05, "loss": 0.1506, "step": 8445 }, { "epoch": 0.1830866899226486, "grad_norm": 1.8351657390594482, "learning_rate": 1.8390923313948086e-05, "loss": 0.2215, "step": 8450 }, { "epoch": 0.18319502524212944, "grad_norm": 1.634067416191101, "learning_rate": 1.8389071386140823e-05, "loss": 0.1677, "step": 8455 }, { "epoch": 0.18330336056161028, "grad_norm": 2.590745449066162, "learning_rate": 1.8387218486585312e-05, "loss": 0.3187, "step": 8460 }, { "epoch": 0.18341169588109116, "grad_norm": 1.8655256032943726, "learning_rate": 1.8385364615496176e-05, "loss": 0.2658, "step": 8465 }, { "epoch": 0.183520031200572, "grad_norm": 1.388429880142212, "learning_rate": 1.8383509773088163e-05, "loss": 0.2659, "step": 8470 }, { "epoch": 0.18362836652005288, "grad_norm": 2.0100257396698, "learning_rate": 1.838165395957613e-05, "loss": 0.237, "step": 8475 }, { "epoch": 0.18373670183953372, "grad_norm": 1.773439645767212, "learning_rate": 1.837979717517504e-05, "loss": 0.3619, "step": 8480 }, { "epoch": 0.18384503715901457, "grad_norm": 1.6438117027282715, "learning_rate": 1.837793942009998e-05, "loss": 0.1868, "step": 8485 }, { "epoch": 0.18395337247849544, "grad_norm": 1.0147454738616943, "learning_rate": 1.8376080694566136e-05, "loss": 0.2387, "step": 8490 }, { "epoch": 0.1840617077979763, "grad_norm": 1.7114872932434082, "learning_rate": 1.837422099878882e-05, "loss": 0.2793, "step": 8495 }, { "epoch": 0.18417004311745716, "grad_norm": 2.061903953552246, "learning_rate": 1.8372360332983445e-05, "loss": 0.2129, "step": 8500 }, { "epoch": 0.184278378436938, "grad_norm": 1.8595091104507446, "learning_rate": 1.8370498697365543e-05, "loss": 0.207, "step": 8505 }, { "epoch": 0.18438671375641888, "grad_norm": 2.016465902328491, "learning_rate": 1.836863609215076e-05, "loss": 0.2959, "step": 8510 }, { "epoch": 0.18449504907589973, "grad_norm": 1.981092929840088, "learning_rate": 1.8366772517554846e-05, "loss": 0.2171, "step": 8515 }, { "epoch": 0.18460338439538057, "grad_norm": 1.6817995309829712, "learning_rate": 1.836490797379367e-05, "loss": 0.2245, "step": 8520 }, { "epoch": 0.18471171971486144, "grad_norm": 1.1438032388687134, "learning_rate": 1.836304246108321e-05, "loss": 0.2455, "step": 8525 }, { "epoch": 0.1848200550343423, "grad_norm": 1.8453320264816284, "learning_rate": 1.8361175979639565e-05, "loss": 0.2936, "step": 8530 }, { "epoch": 0.18492839035382316, "grad_norm": 1.4769134521484375, "learning_rate": 1.835930852967893e-05, "loss": 0.2678, "step": 8535 }, { "epoch": 0.185036725673304, "grad_norm": 2.1382267475128174, "learning_rate": 1.835744011141762e-05, "loss": 0.3585, "step": 8540 }, { "epoch": 0.18514506099278485, "grad_norm": 2.796433210372925, "learning_rate": 1.835557072507207e-05, "loss": 0.2053, "step": 8545 }, { "epoch": 0.18525339631226573, "grad_norm": 1.6074655055999756, "learning_rate": 1.8353700370858824e-05, "loss": 0.3043, "step": 8550 }, { "epoch": 0.18536173163174657, "grad_norm": 1.6708790063858032, "learning_rate": 1.835182904899452e-05, "loss": 0.1212, "step": 8555 }, { "epoch": 0.18547006695122745, "grad_norm": 1.8687069416046143, "learning_rate": 1.8349956759695934e-05, "loss": 0.2966, "step": 8560 }, { "epoch": 0.1855784022707083, "grad_norm": 2.0159847736358643, "learning_rate": 1.834808350317994e-05, "loss": 0.3007, "step": 8565 }, { "epoch": 0.18568673759018917, "grad_norm": 1.5059723854064941, "learning_rate": 1.834620927966353e-05, "loss": 0.1552, "step": 8570 }, { "epoch": 0.18579507290967, "grad_norm": 1.7373120784759521, "learning_rate": 1.8344334089363798e-05, "loss": 0.1962, "step": 8575 }, { "epoch": 0.18590340822915086, "grad_norm": 2.1653010845184326, "learning_rate": 1.834245793249796e-05, "loss": 0.3123, "step": 8580 }, { "epoch": 0.18601174354863173, "grad_norm": 2.3544228076934814, "learning_rate": 1.8340580809283335e-05, "loss": 0.3111, "step": 8585 }, { "epoch": 0.18612007886811258, "grad_norm": 1.9255664348602295, "learning_rate": 1.8338702719937375e-05, "loss": 0.2005, "step": 8590 }, { "epoch": 0.18622841418759345, "grad_norm": 2.1542460918426514, "learning_rate": 1.8336823664677613e-05, "loss": 0.2234, "step": 8595 }, { "epoch": 0.1863367495070743, "grad_norm": 1.9572583436965942, "learning_rate": 1.8334943643721717e-05, "loss": 0.2421, "step": 8600 }, { "epoch": 0.18644508482655514, "grad_norm": 1.5250073671340942, "learning_rate": 1.833306265728746e-05, "loss": 0.2057, "step": 8605 }, { "epoch": 0.18655342014603601, "grad_norm": 1.7846856117248535, "learning_rate": 1.8331180705592716e-05, "loss": 0.2048, "step": 8610 }, { "epoch": 0.18666175546551686, "grad_norm": 1.5758588314056396, "learning_rate": 1.832929778885549e-05, "loss": 0.2744, "step": 8615 }, { "epoch": 0.18677009078499773, "grad_norm": 2.3877835273742676, "learning_rate": 1.8327413907293892e-05, "loss": 0.2768, "step": 8620 }, { "epoch": 0.18687842610447858, "grad_norm": 1.8720277547836304, "learning_rate": 1.8325529061126138e-05, "loss": 0.2431, "step": 8625 }, { "epoch": 0.18698676142395945, "grad_norm": 1.6180672645568848, "learning_rate": 1.8323643250570553e-05, "loss": 0.1477, "step": 8630 }, { "epoch": 0.1870950967434403, "grad_norm": 2.352593421936035, "learning_rate": 1.832175647584559e-05, "loss": 0.2386, "step": 8635 }, { "epoch": 0.18720343206292114, "grad_norm": 2.147777557373047, "learning_rate": 1.83198687371698e-05, "loss": 0.3307, "step": 8640 }, { "epoch": 0.18731176738240202, "grad_norm": 1.7921454906463623, "learning_rate": 1.8317980034761844e-05, "loss": 0.2016, "step": 8645 }, { "epoch": 0.18742010270188286, "grad_norm": 2.097470998764038, "learning_rate": 1.831609036884051e-05, "loss": 0.1977, "step": 8650 }, { "epoch": 0.18752843802136374, "grad_norm": 2.018691301345825, "learning_rate": 1.8314199739624676e-05, "loss": 0.1659, "step": 8655 }, { "epoch": 0.18763677334084458, "grad_norm": 1.3829762935638428, "learning_rate": 1.8312308147333354e-05, "loss": 0.2534, "step": 8660 }, { "epoch": 0.18774510866032543, "grad_norm": 1.910223126411438, "learning_rate": 1.8310415592185647e-05, "loss": 0.2492, "step": 8665 }, { "epoch": 0.1878534439798063, "grad_norm": 1.8315904140472412, "learning_rate": 1.830852207440079e-05, "loss": 0.2579, "step": 8670 }, { "epoch": 0.18796177929928715, "grad_norm": 2.036158323287964, "learning_rate": 1.8306627594198104e-05, "loss": 0.2198, "step": 8675 }, { "epoch": 0.18807011461876802, "grad_norm": 1.9097542762756348, "learning_rate": 1.8304732151797048e-05, "loss": 0.2858, "step": 8680 }, { "epoch": 0.18817844993824887, "grad_norm": 2.9772064685821533, "learning_rate": 1.830283574741718e-05, "loss": 0.1964, "step": 8685 }, { "epoch": 0.18828678525772974, "grad_norm": 1.6149227619171143, "learning_rate": 1.8300938381278163e-05, "loss": 0.1823, "step": 8690 }, { "epoch": 0.18839512057721058, "grad_norm": 1.6694883108139038, "learning_rate": 1.8299040053599786e-05, "loss": 0.2271, "step": 8695 }, { "epoch": 0.18850345589669143, "grad_norm": 2.5210678577423096, "learning_rate": 1.8297140764601934e-05, "loss": 0.2879, "step": 8700 }, { "epoch": 0.1886117912161723, "grad_norm": 1.6159617900848389, "learning_rate": 1.829524051450462e-05, "loss": 0.2173, "step": 8705 }, { "epoch": 0.18872012653565315, "grad_norm": 2.230532646179199, "learning_rate": 1.8293339303527955e-05, "loss": 0.2899, "step": 8710 }, { "epoch": 0.18882846185513402, "grad_norm": 2.1863889694213867, "learning_rate": 1.8291437131892165e-05, "loss": 0.2363, "step": 8715 }, { "epoch": 0.18893679717461487, "grad_norm": 1.3983858823776245, "learning_rate": 1.8289533999817588e-05, "loss": 0.2298, "step": 8720 }, { "epoch": 0.1890451324940957, "grad_norm": 2.1253209114074707, "learning_rate": 1.8287629907524673e-05, "loss": 0.2718, "step": 8725 }, { "epoch": 0.1891534678135766, "grad_norm": 2.0926361083984375, "learning_rate": 1.8285724855233984e-05, "loss": 0.297, "step": 8730 }, { "epoch": 0.18926180313305743, "grad_norm": 1.5034738779067993, "learning_rate": 1.828381884316619e-05, "loss": 0.2235, "step": 8735 }, { "epoch": 0.1893701384525383, "grad_norm": 1.6526416540145874, "learning_rate": 1.8281911871542075e-05, "loss": 0.2993, "step": 8740 }, { "epoch": 0.18947847377201915, "grad_norm": 1.8494867086410522, "learning_rate": 1.828000394058253e-05, "loss": 0.3474, "step": 8745 }, { "epoch": 0.1895868090915, "grad_norm": 2.2829842567443848, "learning_rate": 1.8278095050508568e-05, "loss": 0.2647, "step": 8750 }, { "epoch": 0.18969514441098087, "grad_norm": 2.3453445434570312, "learning_rate": 1.82761852015413e-05, "loss": 0.3126, "step": 8755 }, { "epoch": 0.18980347973046172, "grad_norm": 2.1737141609191895, "learning_rate": 1.827427439390195e-05, "loss": 0.2521, "step": 8760 }, { "epoch": 0.1899118150499426, "grad_norm": 1.7016100883483887, "learning_rate": 1.827236262781186e-05, "loss": 0.269, "step": 8765 }, { "epoch": 0.19002015036942344, "grad_norm": 1.8975030183792114, "learning_rate": 1.8270449903492482e-05, "loss": 0.3179, "step": 8770 }, { "epoch": 0.1901284856889043, "grad_norm": 1.7488764524459839, "learning_rate": 1.8268536221165373e-05, "loss": 0.3469, "step": 8775 }, { "epoch": 0.19023682100838515, "grad_norm": 2.773061752319336, "learning_rate": 1.8266621581052204e-05, "loss": 0.2227, "step": 8780 }, { "epoch": 0.190345156327866, "grad_norm": 1.8072088956832886, "learning_rate": 1.826470598337476e-05, "loss": 0.2957, "step": 8785 }, { "epoch": 0.19045349164734687, "grad_norm": 2.6144826412200928, "learning_rate": 1.8262789428354937e-05, "loss": 0.1777, "step": 8790 }, { "epoch": 0.19056182696682772, "grad_norm": 2.0642220973968506, "learning_rate": 1.826087191621473e-05, "loss": 0.3205, "step": 8795 }, { "epoch": 0.1906701622863086, "grad_norm": 1.7987383604049683, "learning_rate": 1.8258953447176263e-05, "loss": 0.3273, "step": 8800 }, { "epoch": 0.19077849760578944, "grad_norm": 1.8837794065475464, "learning_rate": 1.8257034021461756e-05, "loss": 0.3228, "step": 8805 }, { "epoch": 0.19088683292527028, "grad_norm": 1.4928518533706665, "learning_rate": 1.8255113639293546e-05, "loss": 0.2122, "step": 8810 }, { "epoch": 0.19099516824475116, "grad_norm": 2.153987407684326, "learning_rate": 1.8253192300894084e-05, "loss": 0.3356, "step": 8815 }, { "epoch": 0.191103503564232, "grad_norm": 2.0758979320526123, "learning_rate": 1.825127000648593e-05, "loss": 0.278, "step": 8820 }, { "epoch": 0.19121183888371288, "grad_norm": 1.8728705644607544, "learning_rate": 1.824934675629175e-05, "loss": 0.2646, "step": 8825 }, { "epoch": 0.19132017420319372, "grad_norm": 1.4525476694107056, "learning_rate": 1.8247422550534317e-05, "loss": 0.2596, "step": 8830 }, { "epoch": 0.1914285095226746, "grad_norm": 1.3276370763778687, "learning_rate": 1.8245497389436532e-05, "loss": 0.2882, "step": 8835 }, { "epoch": 0.19153684484215544, "grad_norm": 1.1183146238327026, "learning_rate": 1.8243571273221394e-05, "loss": 0.2059, "step": 8840 }, { "epoch": 0.19164518016163629, "grad_norm": 1.652922511100769, "learning_rate": 1.8241644202112007e-05, "loss": 0.2263, "step": 8845 }, { "epoch": 0.19175351548111716, "grad_norm": 2.181920051574707, "learning_rate": 1.8239716176331604e-05, "loss": 0.3035, "step": 8850 }, { "epoch": 0.191861850800598, "grad_norm": 2.2790744304656982, "learning_rate": 1.823778719610351e-05, "loss": 0.2357, "step": 8855 }, { "epoch": 0.19197018612007888, "grad_norm": 3.0147647857666016, "learning_rate": 1.8235857261651176e-05, "loss": 0.2273, "step": 8860 }, { "epoch": 0.19207852143955972, "grad_norm": 3.151327133178711, "learning_rate": 1.8233926373198145e-05, "loss": 0.2499, "step": 8865 }, { "epoch": 0.19218685675904057, "grad_norm": 2.529648542404175, "learning_rate": 1.8231994530968093e-05, "loss": 0.2583, "step": 8870 }, { "epoch": 0.19229519207852144, "grad_norm": 1.4268088340759277, "learning_rate": 1.8230061735184788e-05, "loss": 0.2096, "step": 8875 }, { "epoch": 0.1924035273980023, "grad_norm": 2.1276042461395264, "learning_rate": 1.8228127986072114e-05, "loss": 0.2884, "step": 8880 }, { "epoch": 0.19251186271748316, "grad_norm": 2.1410300731658936, "learning_rate": 1.8226193283854076e-05, "loss": 0.2627, "step": 8885 }, { "epoch": 0.192620198036964, "grad_norm": 1.9997820854187012, "learning_rate": 1.8224257628754773e-05, "loss": 0.3018, "step": 8890 }, { "epoch": 0.19272853335644488, "grad_norm": 1.9604012966156006, "learning_rate": 1.8222321020998422e-05, "loss": 0.2444, "step": 8895 }, { "epoch": 0.19283686867592573, "grad_norm": 1.4487924575805664, "learning_rate": 1.8220383460809348e-05, "loss": 0.1562, "step": 8900 }, { "epoch": 0.19294520399540657, "grad_norm": 1.9850802421569824, "learning_rate": 1.8218444948411995e-05, "loss": 0.2074, "step": 8905 }, { "epoch": 0.19305353931488745, "grad_norm": 2.0008010864257812, "learning_rate": 1.8216505484030907e-05, "loss": 0.2975, "step": 8910 }, { "epoch": 0.1931618746343683, "grad_norm": 1.6665109395980835, "learning_rate": 1.821456506789074e-05, "loss": 0.2336, "step": 8915 }, { "epoch": 0.19327020995384916, "grad_norm": 1.2810604572296143, "learning_rate": 1.8212623700216264e-05, "loss": 0.28, "step": 8920 }, { "epoch": 0.19337854527333, "grad_norm": 1.65047025680542, "learning_rate": 1.821068138123236e-05, "loss": 0.2232, "step": 8925 }, { "epoch": 0.19348688059281086, "grad_norm": 1.4241520166397095, "learning_rate": 1.8208738111164016e-05, "loss": 0.1742, "step": 8930 }, { "epoch": 0.19359521591229173, "grad_norm": 2.0878701210021973, "learning_rate": 1.8206793890236324e-05, "loss": 0.2335, "step": 8935 }, { "epoch": 0.19370355123177257, "grad_norm": 1.9241973161697388, "learning_rate": 1.82048487186745e-05, "loss": 0.2515, "step": 8940 }, { "epoch": 0.19381188655125345, "grad_norm": 2.8639566898345947, "learning_rate": 1.8202902596703856e-05, "loss": 0.2876, "step": 8945 }, { "epoch": 0.1939202218707343, "grad_norm": 1.651173710823059, "learning_rate": 1.820095552454983e-05, "loss": 0.235, "step": 8950 }, { "epoch": 0.19402855719021517, "grad_norm": 1.9255871772766113, "learning_rate": 1.8199007502437958e-05, "loss": 0.2637, "step": 8955 }, { "epoch": 0.194136892509696, "grad_norm": 1.696498990058899, "learning_rate": 1.8197058530593884e-05, "loss": 0.2659, "step": 8960 }, { "epoch": 0.19424522782917686, "grad_norm": 2.361424446105957, "learning_rate": 1.8195108609243375e-05, "loss": 0.2234, "step": 8965 }, { "epoch": 0.19435356314865773, "grad_norm": 2.915388584136963, "learning_rate": 1.8193157738612293e-05, "loss": 0.2051, "step": 8970 }, { "epoch": 0.19446189846813858, "grad_norm": 1.6178172826766968, "learning_rate": 1.8191205918926624e-05, "loss": 0.2581, "step": 8975 }, { "epoch": 0.19457023378761945, "grad_norm": 2.258880138397217, "learning_rate": 1.818925315041245e-05, "loss": 0.2835, "step": 8980 }, { "epoch": 0.1946785691071003, "grad_norm": 2.3192484378814697, "learning_rate": 1.8187299433295976e-05, "loss": 0.172, "step": 8985 }, { "epoch": 0.19478690442658114, "grad_norm": 2.01814341545105, "learning_rate": 1.8185344767803505e-05, "loss": 0.2338, "step": 8990 }, { "epoch": 0.19489523974606202, "grad_norm": 1.831773042678833, "learning_rate": 1.8183389154161463e-05, "loss": 0.2535, "step": 8995 }, { "epoch": 0.19500357506554286, "grad_norm": 1.7647877931594849, "learning_rate": 1.8181432592596372e-05, "loss": 0.2097, "step": 9000 }, { "epoch": 0.19511191038502373, "grad_norm": 3.0379703044891357, "learning_rate": 1.8179475083334875e-05, "loss": 0.2816, "step": 9005 }, { "epoch": 0.19522024570450458, "grad_norm": 1.7395471334457397, "learning_rate": 1.8177516626603716e-05, "loss": 0.2919, "step": 9010 }, { "epoch": 0.19532858102398543, "grad_norm": 2.1579458713531494, "learning_rate": 1.8175557222629757e-05, "loss": 0.2356, "step": 9015 }, { "epoch": 0.1954369163434663, "grad_norm": 2.1208672523498535, "learning_rate": 1.8173596871639963e-05, "loss": 0.2304, "step": 9020 }, { "epoch": 0.19554525166294714, "grad_norm": 2.24867844581604, "learning_rate": 1.8171635573861413e-05, "loss": 0.3572, "step": 9025 }, { "epoch": 0.19565358698242802, "grad_norm": 2.052788019180298, "learning_rate": 1.816967332952129e-05, "loss": 0.197, "step": 9030 }, { "epoch": 0.19576192230190886, "grad_norm": 1.402293086051941, "learning_rate": 1.8167710138846897e-05, "loss": 0.2538, "step": 9035 }, { "epoch": 0.19587025762138974, "grad_norm": 1.9557771682739258, "learning_rate": 1.8165746002065633e-05, "loss": 0.2595, "step": 9040 }, { "epoch": 0.19597859294087058, "grad_norm": 2.33656907081604, "learning_rate": 1.816378091940502e-05, "loss": 0.2397, "step": 9045 }, { "epoch": 0.19608692826035143, "grad_norm": 1.4047679901123047, "learning_rate": 1.8161814891092682e-05, "loss": 0.1387, "step": 9050 }, { "epoch": 0.1961952635798323, "grad_norm": 1.5043069124221802, "learning_rate": 1.8159847917356347e-05, "loss": 0.2014, "step": 9055 }, { "epoch": 0.19630359889931315, "grad_norm": 1.9788732528686523, "learning_rate": 1.815787999842387e-05, "loss": 0.2601, "step": 9060 }, { "epoch": 0.19641193421879402, "grad_norm": 1.6597152948379517, "learning_rate": 1.81559111345232e-05, "loss": 0.3191, "step": 9065 }, { "epoch": 0.19652026953827487, "grad_norm": 2.79023814201355, "learning_rate": 1.81539413258824e-05, "loss": 0.2295, "step": 9070 }, { "epoch": 0.1966286048577557, "grad_norm": 1.156213641166687, "learning_rate": 1.8151970572729645e-05, "loss": 0.2105, "step": 9075 }, { "epoch": 0.19673694017723659, "grad_norm": 1.821634292602539, "learning_rate": 1.8149998875293214e-05, "loss": 0.2591, "step": 9080 }, { "epoch": 0.19684527549671743, "grad_norm": 1.7714136838912964, "learning_rate": 1.81480262338015e-05, "loss": 0.2732, "step": 9085 }, { "epoch": 0.1969536108161983, "grad_norm": 1.3465383052825928, "learning_rate": 1.8146052648483004e-05, "loss": 0.252, "step": 9090 }, { "epoch": 0.19706194613567915, "grad_norm": 1.7086254358291626, "learning_rate": 1.814407811956634e-05, "loss": 0.2297, "step": 9095 }, { "epoch": 0.19717028145516002, "grad_norm": 1.800577998161316, "learning_rate": 1.814210264728022e-05, "loss": 0.221, "step": 9100 }, { "epoch": 0.19727861677464087, "grad_norm": 1.8198038339614868, "learning_rate": 1.8140126231853477e-05, "loss": 0.2005, "step": 9105 }, { "epoch": 0.19738695209412171, "grad_norm": 2.5595216751098633, "learning_rate": 1.8138148873515053e-05, "loss": 0.2323, "step": 9110 }, { "epoch": 0.1974952874136026, "grad_norm": 2.07646107673645, "learning_rate": 1.813617057249399e-05, "loss": 0.1775, "step": 9115 }, { "epoch": 0.19760362273308343, "grad_norm": 1.7302916049957275, "learning_rate": 1.8134191329019444e-05, "loss": 0.1958, "step": 9120 }, { "epoch": 0.1977119580525643, "grad_norm": 2.0247080326080322, "learning_rate": 1.8132211143320684e-05, "loss": 0.2422, "step": 9125 }, { "epoch": 0.19782029337204515, "grad_norm": 1.197278380393982, "learning_rate": 1.813023001562708e-05, "loss": 0.2161, "step": 9130 }, { "epoch": 0.197928628691526, "grad_norm": 1.3880797624588013, "learning_rate": 1.8128247946168124e-05, "loss": 0.2235, "step": 9135 }, { "epoch": 0.19803696401100687, "grad_norm": 1.4864047765731812, "learning_rate": 1.8126264935173405e-05, "loss": 0.1952, "step": 9140 }, { "epoch": 0.19814529933048772, "grad_norm": 1.151265263557434, "learning_rate": 1.8124280982872624e-05, "loss": 0.2236, "step": 9145 }, { "epoch": 0.1982536346499686, "grad_norm": 1.6044278144836426, "learning_rate": 1.8122296089495594e-05, "loss": 0.1755, "step": 9150 }, { "epoch": 0.19836196996944944, "grad_norm": 1.8486298322677612, "learning_rate": 1.8120310255272227e-05, "loss": 0.2652, "step": 9155 }, { "epoch": 0.1984703052889303, "grad_norm": 0.9308688044548035, "learning_rate": 1.8118323480432566e-05, "loss": 0.1489, "step": 9160 }, { "epoch": 0.19857864060841116, "grad_norm": 1.8588229417800903, "learning_rate": 1.811633576520674e-05, "loss": 0.2424, "step": 9165 }, { "epoch": 0.198686975927892, "grad_norm": 2.0817711353302, "learning_rate": 1.8114347109825e-05, "loss": 0.252, "step": 9170 }, { "epoch": 0.19879531124737287, "grad_norm": 1.8784358501434326, "learning_rate": 1.8112357514517697e-05, "loss": 0.2549, "step": 9175 }, { "epoch": 0.19890364656685372, "grad_norm": 1.6468122005462646, "learning_rate": 1.8110366979515303e-05, "loss": 0.2811, "step": 9180 }, { "epoch": 0.1990119818863346, "grad_norm": 1.8065681457519531, "learning_rate": 1.8108375505048385e-05, "loss": 0.2659, "step": 9185 }, { "epoch": 0.19912031720581544, "grad_norm": 1.8494912385940552, "learning_rate": 1.810638309134763e-05, "loss": 0.2378, "step": 9190 }, { "epoch": 0.19922865252529628, "grad_norm": 2.219125986099243, "learning_rate": 1.8104389738643825e-05, "loss": 0.2274, "step": 9195 }, { "epoch": 0.19933698784477716, "grad_norm": 2.3860929012298584, "learning_rate": 1.8102395447167874e-05, "loss": 0.21, "step": 9200 }, { "epoch": 0.199445323164258, "grad_norm": 1.8623318672180176, "learning_rate": 1.8100400217150788e-05, "loss": 0.1339, "step": 9205 }, { "epoch": 0.19955365848373888, "grad_norm": 2.8262577056884766, "learning_rate": 1.8098404048823674e-05, "loss": 0.2376, "step": 9210 }, { "epoch": 0.19966199380321972, "grad_norm": 2.1003501415252686, "learning_rate": 1.809640694241777e-05, "loss": 0.1981, "step": 9215 }, { "epoch": 0.1997703291227006, "grad_norm": 1.6956311464309692, "learning_rate": 1.8094408898164402e-05, "loss": 0.2753, "step": 9220 }, { "epoch": 0.19987866444218144, "grad_norm": 1.3600330352783203, "learning_rate": 1.8092409916295022e-05, "loss": 0.2835, "step": 9225 }, { "epoch": 0.1999869997616623, "grad_norm": 2.678321599960327, "learning_rate": 1.8090409997041174e-05, "loss": 0.2759, "step": 9230 }, { "epoch": 0.20009533508114316, "grad_norm": 2.0113136768341064, "learning_rate": 1.8088409140634523e-05, "loss": 0.357, "step": 9235 }, { "epoch": 0.200203670400624, "grad_norm": 1.9382045269012451, "learning_rate": 1.808640734730684e-05, "loss": 0.2118, "step": 9240 }, { "epoch": 0.20031200572010488, "grad_norm": 2.0002188682556152, "learning_rate": 1.8084404617289995e-05, "loss": 0.2666, "step": 9245 }, { "epoch": 0.20042034103958573, "grad_norm": 1.9307124614715576, "learning_rate": 1.8082400950815983e-05, "loss": 0.2102, "step": 9250 }, { "epoch": 0.20052867635906657, "grad_norm": 2.6050570011138916, "learning_rate": 1.8080396348116894e-05, "loss": 0.265, "step": 9255 }, { "epoch": 0.20063701167854744, "grad_norm": 2.5237503051757812, "learning_rate": 1.8078390809424934e-05, "loss": 0.2602, "step": 9260 }, { "epoch": 0.2007453469980283, "grad_norm": 1.7996548414230347, "learning_rate": 1.807638433497241e-05, "loss": 0.2127, "step": 9265 }, { "epoch": 0.20085368231750916, "grad_norm": 1.1467217206954956, "learning_rate": 1.8074376924991748e-05, "loss": 0.2389, "step": 9270 }, { "epoch": 0.20096201763699, "grad_norm": 2.51582407951355, "learning_rate": 1.807236857971547e-05, "loss": 0.3465, "step": 9275 }, { "epoch": 0.20107035295647088, "grad_norm": 2.9178216457366943, "learning_rate": 1.807035929937622e-05, "loss": 0.2393, "step": 9280 }, { "epoch": 0.20117868827595173, "grad_norm": 1.9995671510696411, "learning_rate": 1.806834908420674e-05, "loss": 0.2882, "step": 9285 }, { "epoch": 0.20128702359543257, "grad_norm": 1.459763526916504, "learning_rate": 1.8066337934439878e-05, "loss": 0.2239, "step": 9290 }, { "epoch": 0.20139535891491345, "grad_norm": 1.7000337839126587, "learning_rate": 1.80643258503086e-05, "loss": 0.2399, "step": 9295 }, { "epoch": 0.2015036942343943, "grad_norm": 2.255579710006714, "learning_rate": 1.806231283204598e-05, "loss": 0.2785, "step": 9300 }, { "epoch": 0.20161202955387517, "grad_norm": 1.8963652849197388, "learning_rate": 1.806029887988519e-05, "loss": 0.2326, "step": 9305 }, { "epoch": 0.201720364873356, "grad_norm": 1.6848448514938354, "learning_rate": 1.8058283994059516e-05, "loss": 0.224, "step": 9310 }, { "epoch": 0.20182870019283686, "grad_norm": 2.6495256423950195, "learning_rate": 1.8056268174802356e-05, "loss": 0.2726, "step": 9315 }, { "epoch": 0.20193703551231773, "grad_norm": 1.7186658382415771, "learning_rate": 1.8054251422347213e-05, "loss": 0.1933, "step": 9320 }, { "epoch": 0.20204537083179858, "grad_norm": 1.6817262172698975, "learning_rate": 1.805223373692769e-05, "loss": 0.2848, "step": 9325 }, { "epoch": 0.20215370615127945, "grad_norm": 2.2894630432128906, "learning_rate": 1.8050215118777516e-05, "loss": 0.2594, "step": 9330 }, { "epoch": 0.2022620414707603, "grad_norm": 1.6995577812194824, "learning_rate": 1.804819556813051e-05, "loss": 0.2469, "step": 9335 }, { "epoch": 0.20237037679024114, "grad_norm": 1.4856315851211548, "learning_rate": 1.804617508522061e-05, "loss": 0.1675, "step": 9340 }, { "epoch": 0.20247871210972201, "grad_norm": 1.8602029085159302, "learning_rate": 1.8044153670281858e-05, "loss": 0.2426, "step": 9345 }, { "epoch": 0.20258704742920286, "grad_norm": 1.493239402770996, "learning_rate": 1.8042131323548408e-05, "loss": 0.2856, "step": 9350 }, { "epoch": 0.20269538274868373, "grad_norm": 2.2108542919158936, "learning_rate": 1.8040108045254513e-05, "loss": 0.2189, "step": 9355 }, { "epoch": 0.20280371806816458, "grad_norm": 1.3134450912475586, "learning_rate": 1.803808383563454e-05, "loss": 0.2082, "step": 9360 }, { "epoch": 0.20291205338764545, "grad_norm": 0.7481719255447388, "learning_rate": 1.8036058694922967e-05, "loss": 0.1516, "step": 9365 }, { "epoch": 0.2030203887071263, "grad_norm": 2.200680732727051, "learning_rate": 1.8034032623354373e-05, "loss": 0.2875, "step": 9370 }, { "epoch": 0.20312872402660714, "grad_norm": 2.347028970718384, "learning_rate": 1.803200562116345e-05, "loss": 0.2129, "step": 9375 }, { "epoch": 0.20323705934608802, "grad_norm": 2.0814740657806396, "learning_rate": 1.8029977688584998e-05, "loss": 0.1265, "step": 9380 }, { "epoch": 0.20334539466556886, "grad_norm": 2.060615301132202, "learning_rate": 1.8027948825853917e-05, "loss": 0.2487, "step": 9385 }, { "epoch": 0.20345372998504974, "grad_norm": 1.925205945968628, "learning_rate": 1.802591903320522e-05, "loss": 0.1794, "step": 9390 }, { "epoch": 0.20356206530453058, "grad_norm": 1.6366618871688843, "learning_rate": 1.8023888310874037e-05, "loss": 0.2783, "step": 9395 }, { "epoch": 0.20367040062401143, "grad_norm": 2.938725709915161, "learning_rate": 1.8021856659095588e-05, "loss": 0.2303, "step": 9400 }, { "epoch": 0.2037787359434923, "grad_norm": 1.980095386505127, "learning_rate": 1.8019824078105212e-05, "loss": 0.1497, "step": 9405 }, { "epoch": 0.20388707126297315, "grad_norm": 2.1199352741241455, "learning_rate": 1.8017790568138352e-05, "loss": 0.2498, "step": 9410 }, { "epoch": 0.20399540658245402, "grad_norm": 1.9812626838684082, "learning_rate": 1.8015756129430565e-05, "loss": 0.2413, "step": 9415 }, { "epoch": 0.20410374190193487, "grad_norm": 1.5217387676239014, "learning_rate": 1.8013720762217507e-05, "loss": 0.1687, "step": 9420 }, { "epoch": 0.20421207722141574, "grad_norm": 3.492187976837158, "learning_rate": 1.8011684466734943e-05, "loss": 0.1643, "step": 9425 }, { "epoch": 0.20432041254089658, "grad_norm": 1.8812798261642456, "learning_rate": 1.8009647243218748e-05, "loss": 0.216, "step": 9430 }, { "epoch": 0.20442874786037743, "grad_norm": 2.489347457885742, "learning_rate": 1.8007609091904906e-05, "loss": 0.2287, "step": 9435 }, { "epoch": 0.2045370831798583, "grad_norm": 1.9548838138580322, "learning_rate": 1.8005570013029502e-05, "loss": 0.2284, "step": 9440 }, { "epoch": 0.20464541849933915, "grad_norm": 2.717435598373413, "learning_rate": 1.8003530006828736e-05, "loss": 0.2265, "step": 9445 }, { "epoch": 0.20475375381882002, "grad_norm": 2.736449956893921, "learning_rate": 1.8001489073538913e-05, "loss": 0.2145, "step": 9450 }, { "epoch": 0.20486208913830087, "grad_norm": 2.3662068843841553, "learning_rate": 1.799944721339644e-05, "loss": 0.1856, "step": 9455 }, { "epoch": 0.2049704244577817, "grad_norm": 1.7388263940811157, "learning_rate": 1.7997404426637843e-05, "loss": 0.1776, "step": 9460 }, { "epoch": 0.2050787597772626, "grad_norm": 1.9887199401855469, "learning_rate": 1.7995360713499742e-05, "loss": 0.2332, "step": 9465 }, { "epoch": 0.20518709509674343, "grad_norm": 2.2277698516845703, "learning_rate": 1.7993316074218873e-05, "loss": 0.1953, "step": 9470 }, { "epoch": 0.2052954304162243, "grad_norm": 2.0185294151306152, "learning_rate": 1.7991270509032076e-05, "loss": 0.2283, "step": 9475 }, { "epoch": 0.20540376573570515, "grad_norm": 1.583186149597168, "learning_rate": 1.79892240181763e-05, "loss": 0.2443, "step": 9480 }, { "epoch": 0.20551210105518603, "grad_norm": 1.2814081907272339, "learning_rate": 1.7987176601888602e-05, "loss": 0.1914, "step": 9485 }, { "epoch": 0.20562043637466687, "grad_norm": 2.027348041534424, "learning_rate": 1.7985128260406143e-05, "loss": 0.2188, "step": 9490 }, { "epoch": 0.20572877169414772, "grad_norm": 1.5702438354492188, "learning_rate": 1.798307899396619e-05, "loss": 0.2328, "step": 9495 }, { "epoch": 0.2058371070136286, "grad_norm": 1.7143665552139282, "learning_rate": 1.798102880280612e-05, "loss": 0.2288, "step": 9500 }, { "epoch": 0.20594544233310944, "grad_norm": 1.1564536094665527, "learning_rate": 1.7978977687163426e-05, "loss": 0.2333, "step": 9505 }, { "epoch": 0.2060537776525903, "grad_norm": 2.1369850635528564, "learning_rate": 1.7976925647275686e-05, "loss": 0.1961, "step": 9510 }, { "epoch": 0.20616211297207115, "grad_norm": 2.197690725326538, "learning_rate": 1.7974872683380608e-05, "loss": 0.2551, "step": 9515 }, { "epoch": 0.206270448291552, "grad_norm": 3.106978178024292, "learning_rate": 1.7972818795715986e-05, "loss": 0.2045, "step": 9520 }, { "epoch": 0.20637878361103287, "grad_norm": 1.5762053728103638, "learning_rate": 1.7970763984519747e-05, "loss": 0.2157, "step": 9525 }, { "epoch": 0.20648711893051372, "grad_norm": 2.1071438789367676, "learning_rate": 1.79687082500299e-05, "loss": 0.2934, "step": 9530 }, { "epoch": 0.2065954542499946, "grad_norm": 2.0333657264709473, "learning_rate": 1.7966651592484572e-05, "loss": 0.2709, "step": 9535 }, { "epoch": 0.20670378956947544, "grad_norm": 2.662667989730835, "learning_rate": 1.7964594012122e-05, "loss": 0.2802, "step": 9540 }, { "epoch": 0.2068121248889563, "grad_norm": 2.587235450744629, "learning_rate": 1.796253550918052e-05, "loss": 0.2453, "step": 9545 }, { "epoch": 0.20692046020843716, "grad_norm": 3.9278979301452637, "learning_rate": 1.796047608389858e-05, "loss": 0.1843, "step": 9550 }, { "epoch": 0.207028795527918, "grad_norm": 1.2565028667449951, "learning_rate": 1.7958415736514733e-05, "loss": 0.1382, "step": 9555 }, { "epoch": 0.20713713084739888, "grad_norm": 1.906050443649292, "learning_rate": 1.795635446726764e-05, "loss": 0.2587, "step": 9560 }, { "epoch": 0.20724546616687972, "grad_norm": 2.415365695953369, "learning_rate": 1.7954292276396073e-05, "loss": 0.2459, "step": 9565 }, { "epoch": 0.2073538014863606, "grad_norm": 2.3140714168548584, "learning_rate": 1.7952229164138895e-05, "loss": 0.2101, "step": 9570 }, { "epoch": 0.20746213680584144, "grad_norm": 1.878907561302185, "learning_rate": 1.7950165130735094e-05, "loss": 0.2837, "step": 9575 }, { "epoch": 0.2075704721253223, "grad_norm": 1.37799870967865, "learning_rate": 1.7948100176423758e-05, "loss": 0.2883, "step": 9580 }, { "epoch": 0.20767880744480316, "grad_norm": 2.178032875061035, "learning_rate": 1.7946034301444078e-05, "loss": 0.1711, "step": 9585 }, { "epoch": 0.207787142764284, "grad_norm": 1.9754008054733276, "learning_rate": 1.7943967506035354e-05, "loss": 0.221, "step": 9590 }, { "epoch": 0.20789547808376488, "grad_norm": 1.8354995250701904, "learning_rate": 1.7941899790436997e-05, "loss": 0.1711, "step": 9595 }, { "epoch": 0.20800381340324572, "grad_norm": 2.5123252868652344, "learning_rate": 1.7939831154888518e-05, "loss": 0.1999, "step": 9600 }, { "epoch": 0.20811214872272657, "grad_norm": 1.782740592956543, "learning_rate": 1.793776159962954e-05, "loss": 0.2427, "step": 9605 }, { "epoch": 0.20822048404220744, "grad_norm": 2.2083511352539062, "learning_rate": 1.7935691124899786e-05, "loss": 0.1858, "step": 9610 }, { "epoch": 0.2083288193616883, "grad_norm": 2.1114261150360107, "learning_rate": 1.7933619730939095e-05, "loss": 0.3684, "step": 9615 }, { "epoch": 0.20843715468116916, "grad_norm": 2.525428056716919, "learning_rate": 1.79315474179874e-05, "loss": 0.2812, "step": 9620 }, { "epoch": 0.20854549000065, "grad_norm": 2.08176851272583, "learning_rate": 1.7929474186284755e-05, "loss": 0.2834, "step": 9625 }, { "epoch": 0.20865382532013088, "grad_norm": 1.5758028030395508, "learning_rate": 1.7927400036071305e-05, "loss": 0.2903, "step": 9630 }, { "epoch": 0.20876216063961173, "grad_norm": 1.549129843711853, "learning_rate": 1.7925324967587316e-05, "loss": 0.1769, "step": 9635 }, { "epoch": 0.20887049595909257, "grad_norm": 1.6997283697128296, "learning_rate": 1.7923248981073153e-05, "loss": 0.2573, "step": 9640 }, { "epoch": 0.20897883127857345, "grad_norm": 2.030553102493286, "learning_rate": 1.792117207676928e-05, "loss": 0.3259, "step": 9645 }, { "epoch": 0.2090871665980543, "grad_norm": 2.0757508277893066, "learning_rate": 1.7919094254916286e-05, "loss": 0.1921, "step": 9650 }, { "epoch": 0.20919550191753516, "grad_norm": 1.9520487785339355, "learning_rate": 1.7917015515754847e-05, "loss": 0.294, "step": 9655 }, { "epoch": 0.209303837237016, "grad_norm": 2.004873752593994, "learning_rate": 1.791493585952576e-05, "loss": 0.213, "step": 9660 }, { "epoch": 0.20941217255649686, "grad_norm": 2.0481455326080322, "learning_rate": 1.791285528646992e-05, "loss": 0.2867, "step": 9665 }, { "epoch": 0.20952050787597773, "grad_norm": 2.1476192474365234, "learning_rate": 1.7910773796828326e-05, "loss": 0.1615, "step": 9670 }, { "epoch": 0.20962884319545858, "grad_norm": 2.0390872955322266, "learning_rate": 1.7908691390842095e-05, "loss": 0.1705, "step": 9675 }, { "epoch": 0.20973717851493945, "grad_norm": 2.5919013023376465, "learning_rate": 1.7906608068752435e-05, "loss": 0.172, "step": 9680 }, { "epoch": 0.2098455138344203, "grad_norm": 4.929595947265625, "learning_rate": 1.7904523830800673e-05, "loss": 0.25, "step": 9685 }, { "epoch": 0.20995384915390117, "grad_norm": 1.46424400806427, "learning_rate": 1.7902438677228233e-05, "loss": 0.1608, "step": 9690 }, { "epoch": 0.210062184473382, "grad_norm": 0.9332728981971741, "learning_rate": 1.7900352608276654e-05, "loss": 0.1691, "step": 9695 }, { "epoch": 0.21017051979286286, "grad_norm": 1.1870920658111572, "learning_rate": 1.7898265624187573e-05, "loss": 0.2071, "step": 9700 }, { "epoch": 0.21027885511234373, "grad_norm": 1.8550817966461182, "learning_rate": 1.789617772520273e-05, "loss": 0.2049, "step": 9705 }, { "epoch": 0.21038719043182458, "grad_norm": 2.4140567779541016, "learning_rate": 1.789408891156399e-05, "loss": 0.2505, "step": 9710 }, { "epoch": 0.21049552575130545, "grad_norm": 2.409695863723755, "learning_rate": 1.7891999183513298e-05, "loss": 0.2274, "step": 9715 }, { "epoch": 0.2106038610707863, "grad_norm": 1.2218377590179443, "learning_rate": 1.7889908541292724e-05, "loss": 0.1839, "step": 9720 }, { "epoch": 0.21071219639026714, "grad_norm": 1.7677817344665527, "learning_rate": 1.7887816985144436e-05, "loss": 0.1192, "step": 9725 }, { "epoch": 0.21082053170974802, "grad_norm": 2.2373945713043213, "learning_rate": 1.7885724515310708e-05, "loss": 0.2398, "step": 9730 }, { "epoch": 0.21092886702922886, "grad_norm": 2.353170871734619, "learning_rate": 1.7883631132033925e-05, "loss": 0.1896, "step": 9735 }, { "epoch": 0.21103720234870973, "grad_norm": 1.7801687717437744, "learning_rate": 1.7881536835556572e-05, "loss": 0.2437, "step": 9740 }, { "epoch": 0.21114553766819058, "grad_norm": 1.7285305261611938, "learning_rate": 1.7879441626121245e-05, "loss": 0.1756, "step": 9745 }, { "epoch": 0.21125387298767145, "grad_norm": 1.6032384634017944, "learning_rate": 1.7877345503970633e-05, "loss": 0.3128, "step": 9750 }, { "epoch": 0.2113622083071523, "grad_norm": 2.0973992347717285, "learning_rate": 1.7875248469347552e-05, "loss": 0.2875, "step": 9755 }, { "epoch": 0.21147054362663315, "grad_norm": 2.6195220947265625, "learning_rate": 1.7873150522494906e-05, "loss": 0.1621, "step": 9760 }, { "epoch": 0.21157887894611402, "grad_norm": 1.9989840984344482, "learning_rate": 1.7871051663655713e-05, "loss": 0.2167, "step": 9765 }, { "epoch": 0.21168721426559486, "grad_norm": 1.5157629251480103, "learning_rate": 1.786895189307309e-05, "loss": 0.2458, "step": 9770 }, { "epoch": 0.21179554958507574, "grad_norm": 1.876967191696167, "learning_rate": 1.786685121099027e-05, "loss": 0.1742, "step": 9775 }, { "epoch": 0.21190388490455658, "grad_norm": 2.324904680252075, "learning_rate": 1.786474961765058e-05, "loss": 0.307, "step": 9780 }, { "epoch": 0.21201222022403743, "grad_norm": 1.7622851133346558, "learning_rate": 1.7862647113297463e-05, "loss": 0.1881, "step": 9785 }, { "epoch": 0.2121205555435183, "grad_norm": 1.7323144674301147, "learning_rate": 1.7860543698174456e-05, "loss": 0.2469, "step": 9790 }, { "epoch": 0.21222889086299915, "grad_norm": 1.9509493112564087, "learning_rate": 1.7858439372525217e-05, "loss": 0.2176, "step": 9795 }, { "epoch": 0.21233722618248002, "grad_norm": 2.307518482208252, "learning_rate": 1.7856334136593495e-05, "loss": 0.2283, "step": 9800 }, { "epoch": 0.21244556150196087, "grad_norm": 1.9631471633911133, "learning_rate": 1.785422799062315e-05, "loss": 0.243, "step": 9805 }, { "epoch": 0.21255389682144174, "grad_norm": 2.1028172969818115, "learning_rate": 1.7852120934858154e-05, "loss": 0.2131, "step": 9810 }, { "epoch": 0.21266223214092259, "grad_norm": 2.448533058166504, "learning_rate": 1.7850012969542565e-05, "loss": 0.1777, "step": 9815 }, { "epoch": 0.21277056746040343, "grad_norm": 1.7979869842529297, "learning_rate": 1.784790409492057e-05, "loss": 0.1287, "step": 9820 }, { "epoch": 0.2128789027798843, "grad_norm": 2.265268325805664, "learning_rate": 1.7845794311236447e-05, "loss": 0.1681, "step": 9825 }, { "epoch": 0.21298723809936515, "grad_norm": 2.867924928665161, "learning_rate": 1.7843683618734583e-05, "loss": 0.3577, "step": 9830 }, { "epoch": 0.21309557341884602, "grad_norm": 1.7269723415374756, "learning_rate": 1.7841572017659474e-05, "loss": 0.2325, "step": 9835 }, { "epoch": 0.21320390873832687, "grad_norm": 2.020253896713257, "learning_rate": 1.7839459508255705e-05, "loss": 0.2802, "step": 9840 }, { "epoch": 0.21331224405780772, "grad_norm": 1.4229507446289062, "learning_rate": 1.783734609076799e-05, "loss": 0.2051, "step": 9845 }, { "epoch": 0.2134205793772886, "grad_norm": 1.9194260835647583, "learning_rate": 1.783523176544114e-05, "loss": 0.1882, "step": 9850 }, { "epoch": 0.21352891469676943, "grad_norm": 1.4208967685699463, "learning_rate": 1.7833116532520057e-05, "loss": 0.2503, "step": 9855 }, { "epoch": 0.2136372500162503, "grad_norm": 2.7451791763305664, "learning_rate": 1.7831000392249763e-05, "loss": 0.2901, "step": 9860 }, { "epoch": 0.21374558533573115, "grad_norm": 1.2985527515411377, "learning_rate": 1.7828883344875385e-05, "loss": 0.2168, "step": 9865 }, { "epoch": 0.213853920655212, "grad_norm": 1.6416298151016235, "learning_rate": 1.7826765390642147e-05, "loss": 0.1806, "step": 9870 }, { "epoch": 0.21396225597469287, "grad_norm": 1.8657121658325195, "learning_rate": 1.7824646529795383e-05, "loss": 0.2366, "step": 9875 }, { "epoch": 0.21407059129417372, "grad_norm": 1.5361382961273193, "learning_rate": 1.782252676258053e-05, "loss": 0.1415, "step": 9880 }, { "epoch": 0.2141789266136546, "grad_norm": 1.9040659666061401, "learning_rate": 1.7820406089243133e-05, "loss": 0.2129, "step": 9885 }, { "epoch": 0.21428726193313544, "grad_norm": 2.202582836151123, "learning_rate": 1.7818284510028842e-05, "loss": 0.2573, "step": 9890 }, { "epoch": 0.2143955972526163, "grad_norm": 1.8485374450683594, "learning_rate": 1.7816162025183408e-05, "loss": 0.2237, "step": 9895 }, { "epoch": 0.21450393257209716, "grad_norm": 1.6406925916671753, "learning_rate": 1.781403863495269e-05, "loss": 0.1418, "step": 9900 }, { "epoch": 0.214612267891578, "grad_norm": 2.1884572505950928, "learning_rate": 1.781191433958265e-05, "loss": 0.1417, "step": 9905 }, { "epoch": 0.21472060321105887, "grad_norm": 1.4908313751220703, "learning_rate": 1.7809789139319356e-05, "loss": 0.2274, "step": 9910 }, { "epoch": 0.21482893853053972, "grad_norm": 1.68463933467865, "learning_rate": 1.7807663034408983e-05, "loss": 0.2332, "step": 9915 }, { "epoch": 0.2149372738500206, "grad_norm": 2.432039737701416, "learning_rate": 1.7805536025097802e-05, "loss": 0.3453, "step": 9920 }, { "epoch": 0.21504560916950144, "grad_norm": 2.3509585857391357, "learning_rate": 1.78034081116322e-05, "loss": 0.1668, "step": 9925 }, { "epoch": 0.21515394448898228, "grad_norm": 1.810692310333252, "learning_rate": 1.7801279294258668e-05, "loss": 0.2992, "step": 9930 }, { "epoch": 0.21526227980846316, "grad_norm": 1.924435019493103, "learning_rate": 1.779914957322379e-05, "loss": 0.2956, "step": 9935 }, { "epoch": 0.215370615127944, "grad_norm": 1.4780534505844116, "learning_rate": 1.7797018948774264e-05, "loss": 0.2114, "step": 9940 }, { "epoch": 0.21547895044742488, "grad_norm": 1.5437250137329102, "learning_rate": 1.7794887421156893e-05, "loss": 0.2966, "step": 9945 }, { "epoch": 0.21558728576690572, "grad_norm": 2.57647967338562, "learning_rate": 1.7792754990618585e-05, "loss": 0.19, "step": 9950 }, { "epoch": 0.2156956210863866, "grad_norm": 1.7738043069839478, "learning_rate": 1.779062165740634e-05, "loss": 0.2094, "step": 9955 }, { "epoch": 0.21580395640586744, "grad_norm": 2.347167491912842, "learning_rate": 1.7788487421767285e-05, "loss": 0.2496, "step": 9960 }, { "epoch": 0.2159122917253483, "grad_norm": 1.8999868631362915, "learning_rate": 1.778635228394863e-05, "loss": 0.1907, "step": 9965 }, { "epoch": 0.21602062704482916, "grad_norm": 1.7902700901031494, "learning_rate": 1.7784216244197707e-05, "loss": 0.1779, "step": 9970 }, { "epoch": 0.21612896236431, "grad_norm": 1.6270583868026733, "learning_rate": 1.7782079302761935e-05, "loss": 0.225, "step": 9975 }, { "epoch": 0.21623729768379088, "grad_norm": 2.3050320148468018, "learning_rate": 1.7779941459888852e-05, "loss": 0.3643, "step": 9980 }, { "epoch": 0.21634563300327173, "grad_norm": 1.9302351474761963, "learning_rate": 1.7777802715826097e-05, "loss": 0.2451, "step": 9985 }, { "epoch": 0.21645396832275257, "grad_norm": 2.3350377082824707, "learning_rate": 1.777566307082141e-05, "loss": 0.2819, "step": 9990 }, { "epoch": 0.21656230364223344, "grad_norm": 1.9805548191070557, "learning_rate": 1.777352252512263e-05, "loss": 0.2467, "step": 9995 }, { "epoch": 0.2166706389617143, "grad_norm": 2.531308174133301, "learning_rate": 1.7771381078977717e-05, "loss": 0.1789, "step": 10000 }, { "epoch": 0.21677897428119516, "grad_norm": 1.1472384929656982, "learning_rate": 1.776923873263472e-05, "loss": 0.2523, "step": 10005 }, { "epoch": 0.216887309600676, "grad_norm": 1.5246566534042358, "learning_rate": 1.77670954863418e-05, "loss": 0.1958, "step": 10010 }, { "epoch": 0.21699564492015688, "grad_norm": 1.7883135080337524, "learning_rate": 1.776495134034722e-05, "loss": 0.2391, "step": 10015 }, { "epoch": 0.21710398023963773, "grad_norm": 1.7573590278625488, "learning_rate": 1.7762806294899344e-05, "loss": 0.1463, "step": 10020 }, { "epoch": 0.21721231555911857, "grad_norm": 2.5348947048187256, "learning_rate": 1.7760660350246645e-05, "loss": 0.2843, "step": 10025 }, { "epoch": 0.21732065087859945, "grad_norm": 2.1144955158233643, "learning_rate": 1.77585135066377e-05, "loss": 0.2899, "step": 10030 }, { "epoch": 0.2174289861980803, "grad_norm": 1.5888878107070923, "learning_rate": 1.775636576432119e-05, "loss": 0.1588, "step": 10035 }, { "epoch": 0.21753732151756117, "grad_norm": 2.029433012008667, "learning_rate": 1.7754217123545895e-05, "loss": 0.2346, "step": 10040 }, { "epoch": 0.217645656837042, "grad_norm": 2.2095983028411865, "learning_rate": 1.7752067584560705e-05, "loss": 0.2958, "step": 10045 }, { "epoch": 0.21775399215652286, "grad_norm": 1.7187986373901367, "learning_rate": 1.7749917147614608e-05, "loss": 0.2545, "step": 10050 }, { "epoch": 0.21786232747600373, "grad_norm": 1.626128077507019, "learning_rate": 1.7747765812956704e-05, "loss": 0.1967, "step": 10055 }, { "epoch": 0.21797066279548458, "grad_norm": 2.394015312194824, "learning_rate": 1.7745613580836195e-05, "loss": 0.2414, "step": 10060 }, { "epoch": 0.21807899811496545, "grad_norm": 2.638504981994629, "learning_rate": 1.774346045150238e-05, "loss": 0.2681, "step": 10065 }, { "epoch": 0.2181873334344463, "grad_norm": 1.9802109003067017, "learning_rate": 1.774130642520467e-05, "loss": 0.3028, "step": 10070 }, { "epoch": 0.21829566875392717, "grad_norm": 1.7301287651062012, "learning_rate": 1.7739151502192574e-05, "loss": 0.213, "step": 10075 }, { "epoch": 0.21840400407340801, "grad_norm": 2.039672374725342, "learning_rate": 1.773699568271571e-05, "loss": 0.2534, "step": 10080 }, { "epoch": 0.21851233939288886, "grad_norm": 1.9794929027557373, "learning_rate": 1.7734838967023796e-05, "loss": 0.2574, "step": 10085 }, { "epoch": 0.21862067471236973, "grad_norm": 2.762233257293701, "learning_rate": 1.7732681355366654e-05, "loss": 0.247, "step": 10090 }, { "epoch": 0.21872901003185058, "grad_norm": 1.8712530136108398, "learning_rate": 1.7730522847994214e-05, "loss": 0.223, "step": 10095 }, { "epoch": 0.21883734535133145, "grad_norm": 1.1173123121261597, "learning_rate": 1.7728363445156505e-05, "loss": 0.2212, "step": 10100 }, { "epoch": 0.2189456806708123, "grad_norm": 2.1827828884124756, "learning_rate": 1.7726203147103664e-05, "loss": 0.1951, "step": 10105 }, { "epoch": 0.21905401599029314, "grad_norm": 3.0588157176971436, "learning_rate": 1.7724041954085925e-05, "loss": 0.301, "step": 10110 }, { "epoch": 0.21916235130977402, "grad_norm": 1.9564989805221558, "learning_rate": 1.772187986635363e-05, "loss": 0.2812, "step": 10115 }, { "epoch": 0.21927068662925486, "grad_norm": 2.8906850814819336, "learning_rate": 1.771971688415723e-05, "loss": 0.2675, "step": 10120 }, { "epoch": 0.21937902194873574, "grad_norm": 1.3681234121322632, "learning_rate": 1.7717553007747268e-05, "loss": 0.1972, "step": 10125 }, { "epoch": 0.21948735726821658, "grad_norm": 2.186417579650879, "learning_rate": 1.7715388237374397e-05, "loss": 0.2658, "step": 10130 }, { "epoch": 0.21959569258769743, "grad_norm": 2.3141732215881348, "learning_rate": 1.771322257328938e-05, "loss": 0.2554, "step": 10135 }, { "epoch": 0.2197040279071783, "grad_norm": 1.6795035600662231, "learning_rate": 1.771105601574307e-05, "loss": 0.2525, "step": 10140 }, { "epoch": 0.21981236322665915, "grad_norm": 2.5045218467712402, "learning_rate": 1.7708888564986432e-05, "loss": 0.2075, "step": 10145 }, { "epoch": 0.21992069854614002, "grad_norm": 1.9158786535263062, "learning_rate": 1.770672022127053e-05, "loss": 0.2572, "step": 10150 }, { "epoch": 0.22002903386562087, "grad_norm": 2.495215654373169, "learning_rate": 1.770455098484654e-05, "loss": 0.2561, "step": 10155 }, { "epoch": 0.22013736918510174, "grad_norm": 2.0567686557769775, "learning_rate": 1.7702380855965733e-05, "loss": 0.2379, "step": 10160 }, { "epoch": 0.22024570450458258, "grad_norm": 1.2452102899551392, "learning_rate": 1.7700209834879486e-05, "loss": 0.1654, "step": 10165 }, { "epoch": 0.22035403982406343, "grad_norm": 2.143044948577881, "learning_rate": 1.7698037921839275e-05, "loss": 0.2253, "step": 10170 }, { "epoch": 0.2204623751435443, "grad_norm": 1.9156180620193481, "learning_rate": 1.769586511709669e-05, "loss": 0.1978, "step": 10175 }, { "epoch": 0.22057071046302515, "grad_norm": 1.7740519046783447, "learning_rate": 1.7693691420903416e-05, "loss": 0.2243, "step": 10180 }, { "epoch": 0.22067904578250602, "grad_norm": 1.4174156188964844, "learning_rate": 1.769151683351124e-05, "loss": 0.2417, "step": 10185 }, { "epoch": 0.22078738110198687, "grad_norm": 2.3079867362976074, "learning_rate": 1.768934135517206e-05, "loss": 0.2911, "step": 10190 }, { "epoch": 0.2208957164214677, "grad_norm": 2.2223453521728516, "learning_rate": 1.768716498613787e-05, "loss": 0.2027, "step": 10195 }, { "epoch": 0.2210040517409486, "grad_norm": 1.4806184768676758, "learning_rate": 1.768498772666077e-05, "loss": 0.2353, "step": 10200 }, { "epoch": 0.22111238706042943, "grad_norm": 1.8589372634887695, "learning_rate": 1.7682809576992963e-05, "loss": 0.223, "step": 10205 }, { "epoch": 0.2212207223799103, "grad_norm": 1.8406555652618408, "learning_rate": 1.7680630537386755e-05, "loss": 0.1543, "step": 10210 }, { "epoch": 0.22132905769939115, "grad_norm": 1.64943265914917, "learning_rate": 1.767845060809455e-05, "loss": 0.2176, "step": 10215 }, { "epoch": 0.22143739301887203, "grad_norm": 2.1940174102783203, "learning_rate": 1.7676269789368873e-05, "loss": 0.3198, "step": 10220 }, { "epoch": 0.22154572833835287, "grad_norm": 1.5044643878936768, "learning_rate": 1.7674088081462328e-05, "loss": 0.1955, "step": 10225 }, { "epoch": 0.22165406365783372, "grad_norm": 1.8315116167068481, "learning_rate": 1.7671905484627637e-05, "loss": 0.2149, "step": 10230 }, { "epoch": 0.2217623989773146, "grad_norm": 1.4113349914550781, "learning_rate": 1.766972199911762e-05, "loss": 0.2126, "step": 10235 }, { "epoch": 0.22187073429679544, "grad_norm": 1.3496758937835693, "learning_rate": 1.7667537625185204e-05, "loss": 0.1912, "step": 10240 }, { "epoch": 0.2219790696162763, "grad_norm": 3.0787503719329834, "learning_rate": 1.766535236308341e-05, "loss": 0.2126, "step": 10245 }, { "epoch": 0.22208740493575715, "grad_norm": 1.8840701580047607, "learning_rate": 1.7663166213065376e-05, "loss": 0.2704, "step": 10250 }, { "epoch": 0.222195740255238, "grad_norm": 1.7415785789489746, "learning_rate": 1.7660979175384326e-05, "loss": 0.2885, "step": 10255 }, { "epoch": 0.22230407557471887, "grad_norm": 2.8832967281341553, "learning_rate": 1.7658791250293604e-05, "loss": 0.1894, "step": 10260 }, { "epoch": 0.22241241089419972, "grad_norm": 2.498044729232788, "learning_rate": 1.765660243804664e-05, "loss": 0.2427, "step": 10265 }, { "epoch": 0.2225207462136806, "grad_norm": 1.9526681900024414, "learning_rate": 1.7654412738896985e-05, "loss": 0.289, "step": 10270 }, { "epoch": 0.22262908153316144, "grad_norm": 2.100358247756958, "learning_rate": 1.7652222153098275e-05, "loss": 0.2774, "step": 10275 }, { "epoch": 0.2227374168526423, "grad_norm": 1.7372220754623413, "learning_rate": 1.7650030680904254e-05, "loss": 0.1556, "step": 10280 }, { "epoch": 0.22284575217212316, "grad_norm": 1.5553903579711914, "learning_rate": 1.7647838322568786e-05, "loss": 0.1896, "step": 10285 }, { "epoch": 0.222954087491604, "grad_norm": 2.0304136276245117, "learning_rate": 1.7645645078345807e-05, "loss": 0.2928, "step": 10290 }, { "epoch": 0.22306242281108488, "grad_norm": 2.350355386734009, "learning_rate": 1.7643450948489376e-05, "loss": 0.2837, "step": 10295 }, { "epoch": 0.22317075813056572, "grad_norm": 1.2835121154785156, "learning_rate": 1.7641255933253654e-05, "loss": 0.173, "step": 10300 }, { "epoch": 0.2232790934500466, "grad_norm": 2.03757381439209, "learning_rate": 1.7639060032892897e-05, "loss": 0.1623, "step": 10305 }, { "epoch": 0.22338742876952744, "grad_norm": 3.251444101333618, "learning_rate": 1.763686324766147e-05, "loss": 0.3084, "step": 10310 }, { "epoch": 0.2234957640890083, "grad_norm": 2.416841983795166, "learning_rate": 1.763466557781383e-05, "loss": 0.1979, "step": 10315 }, { "epoch": 0.22360409940848916, "grad_norm": 2.0626089572906494, "learning_rate": 1.763246702360456e-05, "loss": 0.3202, "step": 10320 }, { "epoch": 0.22371243472797, "grad_norm": 1.7547746896743774, "learning_rate": 1.763026758528831e-05, "loss": 0.2831, "step": 10325 }, { "epoch": 0.22382077004745088, "grad_norm": 1.9141414165496826, "learning_rate": 1.7628067263119866e-05, "loss": 0.1985, "step": 10330 }, { "epoch": 0.22392910536693172, "grad_norm": 2.7238850593566895, "learning_rate": 1.7625866057354102e-05, "loss": 0.2468, "step": 10335 }, { "epoch": 0.2240374406864126, "grad_norm": 1.6206272840499878, "learning_rate": 1.7623663968245982e-05, "loss": 0.2384, "step": 10340 }, { "epoch": 0.22414577600589344, "grad_norm": 2.03643536567688, "learning_rate": 1.76214609960506e-05, "loss": 0.2784, "step": 10345 }, { "epoch": 0.2242541113253743, "grad_norm": 2.5952541828155518, "learning_rate": 1.761925714102313e-05, "loss": 0.2809, "step": 10350 }, { "epoch": 0.22436244664485516, "grad_norm": 1.8163155317306519, "learning_rate": 1.761705240341886e-05, "loss": 0.3279, "step": 10355 }, { "epoch": 0.224470781964336, "grad_norm": 1.5642229318618774, "learning_rate": 1.7614846783493166e-05, "loss": 0.2231, "step": 10360 }, { "epoch": 0.22457911728381688, "grad_norm": 1.6720824241638184, "learning_rate": 1.7612640281501545e-05, "loss": 0.2439, "step": 10365 }, { "epoch": 0.22468745260329773, "grad_norm": 1.6503769159317017, "learning_rate": 1.7610432897699586e-05, "loss": 0.2031, "step": 10370 }, { "epoch": 0.22479578792277857, "grad_norm": 1.9656314849853516, "learning_rate": 1.7608224632342978e-05, "loss": 0.2532, "step": 10375 }, { "epoch": 0.22490412324225945, "grad_norm": 1.1954536437988281, "learning_rate": 1.7606015485687518e-05, "loss": 0.22, "step": 10380 }, { "epoch": 0.2250124585617403, "grad_norm": 1.9508414268493652, "learning_rate": 1.76038054579891e-05, "loss": 0.2243, "step": 10385 }, { "epoch": 0.22512079388122117, "grad_norm": 1.3339678049087524, "learning_rate": 1.7601594549503732e-05, "loss": 0.2226, "step": 10390 }, { "epoch": 0.225229129200702, "grad_norm": 2.322477340698242, "learning_rate": 1.7599382760487502e-05, "loss": 0.1999, "step": 10395 }, { "epoch": 0.22533746452018286, "grad_norm": 2.267859697341919, "learning_rate": 1.7597170091196618e-05, "loss": 0.1879, "step": 10400 }, { "epoch": 0.22544579983966373, "grad_norm": 2.5094709396362305, "learning_rate": 1.7594956541887386e-05, "loss": 0.3095, "step": 10405 }, { "epoch": 0.22555413515914458, "grad_norm": 1.7297706604003906, "learning_rate": 1.7592742112816213e-05, "loss": 0.2189, "step": 10410 }, { "epoch": 0.22566247047862545, "grad_norm": 1.9549282789230347, "learning_rate": 1.75905268042396e-05, "loss": 0.1905, "step": 10415 }, { "epoch": 0.2257708057981063, "grad_norm": 1.6827666759490967, "learning_rate": 1.7588310616414166e-05, "loss": 0.1582, "step": 10420 }, { "epoch": 0.22587914111758717, "grad_norm": 1.9104433059692383, "learning_rate": 1.758609354959662e-05, "loss": 0.2225, "step": 10425 }, { "epoch": 0.225987476437068, "grad_norm": 2.1889843940734863, "learning_rate": 1.7583875604043777e-05, "loss": 0.2056, "step": 10430 }, { "epoch": 0.22609581175654886, "grad_norm": 2.436312198638916, "learning_rate": 1.7581656780012547e-05, "loss": 0.2873, "step": 10435 }, { "epoch": 0.22620414707602973, "grad_norm": 2.4795100688934326, "learning_rate": 1.7579437077759957e-05, "loss": 0.2048, "step": 10440 }, { "epoch": 0.22631248239551058, "grad_norm": 1.649898648262024, "learning_rate": 1.757721649754312e-05, "loss": 0.1531, "step": 10445 }, { "epoch": 0.22642081771499145, "grad_norm": 1.0058046579360962, "learning_rate": 1.7574995039619258e-05, "loss": 0.2149, "step": 10450 }, { "epoch": 0.2265291530344723, "grad_norm": 2.097806453704834, "learning_rate": 1.7572772704245695e-05, "loss": 0.2945, "step": 10455 }, { "epoch": 0.22663748835395314, "grad_norm": 1.4524166584014893, "learning_rate": 1.7570549491679852e-05, "loss": 0.1464, "step": 10460 }, { "epoch": 0.22674582367343402, "grad_norm": 2.220776081085205, "learning_rate": 1.7568325402179264e-05, "loss": 0.3114, "step": 10465 }, { "epoch": 0.22685415899291486, "grad_norm": 2.1936395168304443, "learning_rate": 1.7566100436001544e-05, "loss": 0.2492, "step": 10470 }, { "epoch": 0.22696249431239574, "grad_norm": 1.8979463577270508, "learning_rate": 1.7563874593404434e-05, "loss": 0.259, "step": 10475 }, { "epoch": 0.22707082963187658, "grad_norm": 2.8482778072357178, "learning_rate": 1.7561647874645754e-05, "loss": 0.2485, "step": 10480 }, { "epoch": 0.22717916495135745, "grad_norm": 1.2618547677993774, "learning_rate": 1.7559420279983442e-05, "loss": 0.1525, "step": 10485 }, { "epoch": 0.2272875002708383, "grad_norm": 1.735243797302246, "learning_rate": 1.7557191809675536e-05, "loss": 0.2765, "step": 10490 }, { "epoch": 0.22739583559031915, "grad_norm": 1.7780343294143677, "learning_rate": 1.7554962463980162e-05, "loss": 0.2358, "step": 10495 }, { "epoch": 0.22750417090980002, "grad_norm": 1.3695361614227295, "learning_rate": 1.755273224315556e-05, "loss": 0.2372, "step": 10500 }, { "epoch": 0.22761250622928086, "grad_norm": 2.24372935295105, "learning_rate": 1.7550501147460068e-05, "loss": 0.1943, "step": 10505 }, { "epoch": 0.22772084154876174, "grad_norm": 1.0342111587524414, "learning_rate": 1.7548269177152124e-05, "loss": 0.1938, "step": 10510 }, { "epoch": 0.22782917686824258, "grad_norm": 1.613816261291504, "learning_rate": 1.7546036332490267e-05, "loss": 0.2361, "step": 10515 }, { "epoch": 0.22793751218772343, "grad_norm": 1.6656458377838135, "learning_rate": 1.7543802613733143e-05, "loss": 0.2979, "step": 10520 }, { "epoch": 0.2280458475072043, "grad_norm": 2.35919189453125, "learning_rate": 1.7541568021139493e-05, "loss": 0.2187, "step": 10525 }, { "epoch": 0.22815418282668515, "grad_norm": 1.9373161792755127, "learning_rate": 1.7539332554968158e-05, "loss": 0.2623, "step": 10530 }, { "epoch": 0.22826251814616602, "grad_norm": 1.9140820503234863, "learning_rate": 1.7537096215478088e-05, "loss": 0.2404, "step": 10535 }, { "epoch": 0.22837085346564687, "grad_norm": 1.8936142921447754, "learning_rate": 1.7534859002928323e-05, "loss": 0.2125, "step": 10540 }, { "epoch": 0.22847918878512774, "grad_norm": 2.2147376537323, "learning_rate": 1.7532620917578022e-05, "loss": 0.3081, "step": 10545 }, { "epoch": 0.22858752410460859, "grad_norm": 1.6499325037002563, "learning_rate": 1.753038195968642e-05, "loss": 0.2341, "step": 10550 }, { "epoch": 0.22869585942408943, "grad_norm": 1.825117588043213, "learning_rate": 1.7528142129512877e-05, "loss": 0.1933, "step": 10555 }, { "epoch": 0.2288041947435703, "grad_norm": 1.8027338981628418, "learning_rate": 1.7525901427316836e-05, "loss": 0.2292, "step": 10560 }, { "epoch": 0.22891253006305115, "grad_norm": 2.1231236457824707, "learning_rate": 1.7523659853357855e-05, "loss": 0.3044, "step": 10565 }, { "epoch": 0.22902086538253202, "grad_norm": 2.1746203899383545, "learning_rate": 1.7521417407895585e-05, "loss": 0.2401, "step": 10570 }, { "epoch": 0.22912920070201287, "grad_norm": 1.3497225046157837, "learning_rate": 1.7519174091189777e-05, "loss": 0.2234, "step": 10575 }, { "epoch": 0.22923753602149372, "grad_norm": 2.177114248275757, "learning_rate": 1.751692990350029e-05, "loss": 0.2058, "step": 10580 }, { "epoch": 0.2293458713409746, "grad_norm": 1.885765552520752, "learning_rate": 1.7514684845087075e-05, "loss": 0.2974, "step": 10585 }, { "epoch": 0.22945420666045543, "grad_norm": 1.6633955240249634, "learning_rate": 1.7512438916210195e-05, "loss": 0.2247, "step": 10590 }, { "epoch": 0.2295625419799363, "grad_norm": 1.4057459831237793, "learning_rate": 1.7510192117129798e-05, "loss": 0.2046, "step": 10595 }, { "epoch": 0.22967087729941715, "grad_norm": 2.51033091545105, "learning_rate": 1.7507944448106148e-05, "loss": 0.3299, "step": 10600 }, { "epoch": 0.22977921261889803, "grad_norm": 1.7946678400039673, "learning_rate": 1.7505695909399604e-05, "loss": 0.2749, "step": 10605 }, { "epoch": 0.22988754793837887, "grad_norm": 2.7940685749053955, "learning_rate": 1.7503446501270624e-05, "loss": 0.1634, "step": 10610 }, { "epoch": 0.22999588325785972, "grad_norm": 2.1103386878967285, "learning_rate": 1.7501196223979767e-05, "loss": 0.1607, "step": 10615 }, { "epoch": 0.2301042185773406, "grad_norm": 1.2816002368927002, "learning_rate": 1.7498945077787697e-05, "loss": 0.1917, "step": 10620 }, { "epoch": 0.23021255389682144, "grad_norm": 1.8748022317886353, "learning_rate": 1.7496693062955174e-05, "loss": 0.2294, "step": 10625 }, { "epoch": 0.2303208892163023, "grad_norm": 1.9261901378631592, "learning_rate": 1.749444017974306e-05, "loss": 0.2139, "step": 10630 }, { "epoch": 0.23042922453578316, "grad_norm": 2.068681478500366, "learning_rate": 1.7492186428412317e-05, "loss": 0.2124, "step": 10635 }, { "epoch": 0.230537559855264, "grad_norm": 1.268027901649475, "learning_rate": 1.748993180922401e-05, "loss": 0.1764, "step": 10640 }, { "epoch": 0.23064589517474487, "grad_norm": 1.936034083366394, "learning_rate": 1.7487676322439303e-05, "loss": 0.266, "step": 10645 }, { "epoch": 0.23075423049422572, "grad_norm": 3.1297824382781982, "learning_rate": 1.748541996831946e-05, "loss": 0.2755, "step": 10650 }, { "epoch": 0.2308625658137066, "grad_norm": 2.1766247749328613, "learning_rate": 1.748316274712584e-05, "loss": 0.2306, "step": 10655 }, { "epoch": 0.23097090113318744, "grad_norm": 2.251570224761963, "learning_rate": 1.7480904659119916e-05, "loss": 0.2413, "step": 10660 }, { "epoch": 0.23107923645266829, "grad_norm": 2.7114546298980713, "learning_rate": 1.7478645704563254e-05, "loss": 0.1975, "step": 10665 }, { "epoch": 0.23118757177214916, "grad_norm": 1.6647120714187622, "learning_rate": 1.7476385883717514e-05, "loss": 0.3388, "step": 10670 }, { "epoch": 0.23129590709163, "grad_norm": 1.4352656602859497, "learning_rate": 1.7474125196844468e-05, "loss": 0.2067, "step": 10675 }, { "epoch": 0.23140424241111088, "grad_norm": 1.6876044273376465, "learning_rate": 1.7471863644205978e-05, "loss": 0.2213, "step": 10680 }, { "epoch": 0.23151257773059172, "grad_norm": 1.5819050073623657, "learning_rate": 1.7469601226064016e-05, "loss": 0.2879, "step": 10685 }, { "epoch": 0.2316209130500726, "grad_norm": 1.3463819026947021, "learning_rate": 1.7467337942680645e-05, "loss": 0.1741, "step": 10690 }, { "epoch": 0.23172924836955344, "grad_norm": 2.10538911819458, "learning_rate": 1.7465073794318034e-05, "loss": 0.2289, "step": 10695 }, { "epoch": 0.2318375836890343, "grad_norm": 1.9373494386672974, "learning_rate": 1.746280878123845e-05, "loss": 0.2961, "step": 10700 }, { "epoch": 0.23194591900851516, "grad_norm": 1.4096088409423828, "learning_rate": 1.7460542903704264e-05, "loss": 0.1195, "step": 10705 }, { "epoch": 0.232054254327996, "grad_norm": 2.178590774536133, "learning_rate": 1.745827616197794e-05, "loss": 0.3473, "step": 10710 }, { "epoch": 0.23216258964747688, "grad_norm": 2.1845643520355225, "learning_rate": 1.7456008556322047e-05, "loss": 0.1952, "step": 10715 }, { "epoch": 0.23227092496695773, "grad_norm": 2.0847854614257812, "learning_rate": 1.7453740086999253e-05, "loss": 0.2382, "step": 10720 }, { "epoch": 0.23237926028643857, "grad_norm": 2.497804641723633, "learning_rate": 1.7451470754272328e-05, "loss": 0.3216, "step": 10725 }, { "epoch": 0.23248759560591944, "grad_norm": 2.041177749633789, "learning_rate": 1.7449200558404142e-05, "loss": 0.2193, "step": 10730 }, { "epoch": 0.2325959309254003, "grad_norm": 1.7857587337493896, "learning_rate": 1.7446929499657658e-05, "loss": 0.1967, "step": 10735 }, { "epoch": 0.23270426624488116, "grad_norm": 1.2913081645965576, "learning_rate": 1.7444657578295944e-05, "loss": 0.2228, "step": 10740 }, { "epoch": 0.232812601564362, "grad_norm": 2.2870731353759766, "learning_rate": 1.7442384794582174e-05, "loss": 0.1475, "step": 10745 }, { "epoch": 0.23292093688384288, "grad_norm": 1.6040552854537964, "learning_rate": 1.7440111148779612e-05, "loss": 0.2275, "step": 10750 }, { "epoch": 0.23302927220332373, "grad_norm": 1.6174405813217163, "learning_rate": 1.7437836641151624e-05, "loss": 0.221, "step": 10755 }, { "epoch": 0.23313760752280457, "grad_norm": 1.0531604290008545, "learning_rate": 1.7435561271961682e-05, "loss": 0.2037, "step": 10760 }, { "epoch": 0.23324594284228545, "grad_norm": 1.8064829111099243, "learning_rate": 1.7433285041473352e-05, "loss": 0.2446, "step": 10765 }, { "epoch": 0.2333542781617663, "grad_norm": 1.0103496313095093, "learning_rate": 1.74310079499503e-05, "loss": 0.2231, "step": 10770 }, { "epoch": 0.23346261348124717, "grad_norm": 2.602618932723999, "learning_rate": 1.742872999765629e-05, "loss": 0.245, "step": 10775 }, { "epoch": 0.233570948800728, "grad_norm": 1.2509827613830566, "learning_rate": 1.7426451184855196e-05, "loss": 0.2203, "step": 10780 }, { "epoch": 0.23367928412020886, "grad_norm": 1.7939027547836304, "learning_rate": 1.742417151181098e-05, "loss": 0.1458, "step": 10785 }, { "epoch": 0.23378761943968973, "grad_norm": 2.1216487884521484, "learning_rate": 1.742189097878771e-05, "loss": 0.2907, "step": 10790 }, { "epoch": 0.23389595475917058, "grad_norm": 1.8353973627090454, "learning_rate": 1.7419609586049543e-05, "loss": 0.2857, "step": 10795 }, { "epoch": 0.23400429007865145, "grad_norm": 3.035806179046631, "learning_rate": 1.7417327333860757e-05, "loss": 0.2643, "step": 10800 }, { "epoch": 0.2341126253981323, "grad_norm": 2.944119691848755, "learning_rate": 1.741504422248571e-05, "loss": 0.2027, "step": 10805 }, { "epoch": 0.23422096071761317, "grad_norm": 3.354097366333008, "learning_rate": 1.741276025218887e-05, "loss": 0.2799, "step": 10810 }, { "epoch": 0.23432929603709401, "grad_norm": 1.2612823247909546, "learning_rate": 1.7410475423234795e-05, "loss": 0.2063, "step": 10815 }, { "epoch": 0.23443763135657486, "grad_norm": 1.8627065420150757, "learning_rate": 1.740818973588815e-05, "loss": 0.2542, "step": 10820 }, { "epoch": 0.23454596667605573, "grad_norm": 2.112384796142578, "learning_rate": 1.74059031904137e-05, "loss": 0.219, "step": 10825 }, { "epoch": 0.23465430199553658, "grad_norm": 2.8599777221679688, "learning_rate": 1.7403615787076308e-05, "loss": 0.2875, "step": 10830 }, { "epoch": 0.23476263731501745, "grad_norm": 1.7843679189682007, "learning_rate": 1.740132752614093e-05, "loss": 0.2806, "step": 10835 }, { "epoch": 0.2348709726344983, "grad_norm": 2.594106435775757, "learning_rate": 1.739903840787263e-05, "loss": 0.1868, "step": 10840 }, { "epoch": 0.23497930795397914, "grad_norm": 2.8797719478607178, "learning_rate": 1.739674843253657e-05, "loss": 0.2068, "step": 10845 }, { "epoch": 0.23508764327346002, "grad_norm": 2.2237977981567383, "learning_rate": 1.7394457600398003e-05, "loss": 0.2788, "step": 10850 }, { "epoch": 0.23519597859294086, "grad_norm": 1.4306341409683228, "learning_rate": 1.7392165911722297e-05, "loss": 0.2242, "step": 10855 }, { "epoch": 0.23530431391242174, "grad_norm": 1.8208105564117432, "learning_rate": 1.7389873366774902e-05, "loss": 0.2971, "step": 10860 }, { "epoch": 0.23541264923190258, "grad_norm": 2.277268171310425, "learning_rate": 1.7387579965821378e-05, "loss": 0.2355, "step": 10865 }, { "epoch": 0.23552098455138346, "grad_norm": 1.2910120487213135, "learning_rate": 1.7385285709127388e-05, "loss": 0.1972, "step": 10870 }, { "epoch": 0.2356293198708643, "grad_norm": 1.9340442419052124, "learning_rate": 1.7382990596958672e-05, "loss": 0.1771, "step": 10875 }, { "epoch": 0.23573765519034515, "grad_norm": 1.745797872543335, "learning_rate": 1.7380694629581095e-05, "loss": 0.17, "step": 10880 }, { "epoch": 0.23584599050982602, "grad_norm": 2.003828763961792, "learning_rate": 1.7378397807260605e-05, "loss": 0.186, "step": 10885 }, { "epoch": 0.23595432582930687, "grad_norm": 2.5177254676818848, "learning_rate": 1.737610013026326e-05, "loss": 0.1925, "step": 10890 }, { "epoch": 0.23606266114878774, "grad_norm": 2.3835501670837402, "learning_rate": 1.737380159885521e-05, "loss": 0.162, "step": 10895 }, { "epoch": 0.23617099646826858, "grad_norm": 1.49726140499115, "learning_rate": 1.7371502213302703e-05, "loss": 0.1557, "step": 10900 }, { "epoch": 0.23627933178774943, "grad_norm": 2.092001438140869, "learning_rate": 1.7369201973872088e-05, "loss": 0.2447, "step": 10905 }, { "epoch": 0.2363876671072303, "grad_norm": 1.4684088230133057, "learning_rate": 1.7366900880829817e-05, "loss": 0.2118, "step": 10910 }, { "epoch": 0.23649600242671115, "grad_norm": 1.7973731756210327, "learning_rate": 1.7364598934442435e-05, "loss": 0.1466, "step": 10915 }, { "epoch": 0.23660433774619202, "grad_norm": 1.7491710186004639, "learning_rate": 1.7362296134976588e-05, "loss": 0.2471, "step": 10920 }, { "epoch": 0.23671267306567287, "grad_norm": 2.424468994140625, "learning_rate": 1.7359992482699023e-05, "loss": 0.3064, "step": 10925 }, { "epoch": 0.23682100838515371, "grad_norm": 2.0234482288360596, "learning_rate": 1.735768797787658e-05, "loss": 0.2132, "step": 10930 }, { "epoch": 0.2369293437046346, "grad_norm": 1.878676176071167, "learning_rate": 1.73553826207762e-05, "loss": 0.3185, "step": 10935 }, { "epoch": 0.23703767902411543, "grad_norm": 1.33259916305542, "learning_rate": 1.735307641166493e-05, "loss": 0.2148, "step": 10940 }, { "epoch": 0.2371460143435963, "grad_norm": 1.2959368228912354, "learning_rate": 1.7350769350809908e-05, "loss": 0.2044, "step": 10945 }, { "epoch": 0.23725434966307715, "grad_norm": 1.5142754316329956, "learning_rate": 1.734846143847837e-05, "loss": 0.1236, "step": 10950 }, { "epoch": 0.23736268498255803, "grad_norm": 1.4217064380645752, "learning_rate": 1.7346152674937654e-05, "loss": 0.199, "step": 10955 }, { "epoch": 0.23747102030203887, "grad_norm": 1.2696309089660645, "learning_rate": 1.7343843060455194e-05, "loss": 0.2347, "step": 10960 }, { "epoch": 0.23757935562151972, "grad_norm": 1.6021784543991089, "learning_rate": 1.734153259529853e-05, "loss": 0.2513, "step": 10965 }, { "epoch": 0.2376876909410006, "grad_norm": 2.8316762447357178, "learning_rate": 1.7339221279735286e-05, "loss": 0.1157, "step": 10970 }, { "epoch": 0.23779602626048144, "grad_norm": 2.312892436981201, "learning_rate": 1.73369091140332e-05, "loss": 0.2068, "step": 10975 }, { "epoch": 0.2379043615799623, "grad_norm": 1.9547010660171509, "learning_rate": 1.7334596098460103e-05, "loss": 0.2058, "step": 10980 }, { "epoch": 0.23801269689944315, "grad_norm": 1.1741863489151, "learning_rate": 1.733228223328392e-05, "loss": 0.2136, "step": 10985 }, { "epoch": 0.238121032218924, "grad_norm": 1.753952980041504, "learning_rate": 1.7329967518772676e-05, "loss": 0.2421, "step": 10990 }, { "epoch": 0.23822936753840487, "grad_norm": 2.8638429641723633, "learning_rate": 1.7327651955194497e-05, "loss": 0.3031, "step": 10995 }, { "epoch": 0.23833770285788572, "grad_norm": 1.7919232845306396, "learning_rate": 1.732533554281761e-05, "loss": 0.2637, "step": 11000 }, { "epoch": 0.2384460381773666, "grad_norm": 1.3466973304748535, "learning_rate": 1.732301828191033e-05, "loss": 0.2625, "step": 11005 }, { "epoch": 0.23855437349684744, "grad_norm": 1.9144351482391357, "learning_rate": 1.7320700172741082e-05, "loss": 0.228, "step": 11010 }, { "epoch": 0.2386627088163283, "grad_norm": 1.4073257446289062, "learning_rate": 1.7318381215578387e-05, "loss": 0.1827, "step": 11015 }, { "epoch": 0.23877104413580916, "grad_norm": 2.4683961868286133, "learning_rate": 1.7316061410690855e-05, "loss": 0.1763, "step": 11020 }, { "epoch": 0.23887937945529, "grad_norm": 1.8474236726760864, "learning_rate": 1.73137407583472e-05, "loss": 0.2461, "step": 11025 }, { "epoch": 0.23898771477477088, "grad_norm": 2.5638461112976074, "learning_rate": 1.7311419258816243e-05, "loss": 0.2676, "step": 11030 }, { "epoch": 0.23909605009425172, "grad_norm": 1.421076774597168, "learning_rate": 1.7309096912366888e-05, "loss": 0.2677, "step": 11035 }, { "epoch": 0.2392043854137326, "grad_norm": 1.17887282371521, "learning_rate": 1.7306773719268145e-05, "loss": 0.2565, "step": 11040 }, { "epoch": 0.23931272073321344, "grad_norm": 1.7126809358596802, "learning_rate": 1.7304449679789125e-05, "loss": 0.2356, "step": 11045 }, { "epoch": 0.2394210560526943, "grad_norm": 2.501052141189575, "learning_rate": 1.7302124794199027e-05, "loss": 0.2226, "step": 11050 }, { "epoch": 0.23952939137217516, "grad_norm": 1.5721336603164673, "learning_rate": 1.7299799062767158e-05, "loss": 0.1913, "step": 11055 }, { "epoch": 0.239637726691656, "grad_norm": 2.13153076171875, "learning_rate": 1.7297472485762918e-05, "loss": 0.2402, "step": 11060 }, { "epoch": 0.23974606201113688, "grad_norm": 1.70826256275177, "learning_rate": 1.7295145063455808e-05, "loss": 0.2053, "step": 11065 }, { "epoch": 0.23985439733061772, "grad_norm": 1.709778904914856, "learning_rate": 1.7292816796115427e-05, "loss": 0.1881, "step": 11070 }, { "epoch": 0.2399627326500986, "grad_norm": 1.6457194089889526, "learning_rate": 1.7290487684011463e-05, "loss": 0.1483, "step": 11075 }, { "epoch": 0.24007106796957944, "grad_norm": 4.039981842041016, "learning_rate": 1.728815772741371e-05, "loss": 0.2359, "step": 11080 }, { "epoch": 0.2401794032890603, "grad_norm": 2.488529682159424, "learning_rate": 1.7285826926592063e-05, "loss": 0.2862, "step": 11085 }, { "epoch": 0.24028773860854116, "grad_norm": 1.8482961654663086, "learning_rate": 1.7283495281816506e-05, "loss": 0.2074, "step": 11090 }, { "epoch": 0.240396073928022, "grad_norm": 1.678737759590149, "learning_rate": 1.7281162793357133e-05, "loss": 0.2293, "step": 11095 }, { "epoch": 0.24050440924750288, "grad_norm": 1.6731938123703003, "learning_rate": 1.727882946148412e-05, "loss": 0.1628, "step": 11100 }, { "epoch": 0.24061274456698373, "grad_norm": 2.055112600326538, "learning_rate": 1.7276495286467748e-05, "loss": 0.2159, "step": 11105 }, { "epoch": 0.24072107988646457, "grad_norm": 1.49240243434906, "learning_rate": 1.7274160268578398e-05, "loss": 0.2512, "step": 11110 }, { "epoch": 0.24082941520594545, "grad_norm": 1.5463886260986328, "learning_rate": 1.7271824408086554e-05, "loss": 0.2851, "step": 11115 }, { "epoch": 0.2409377505254263, "grad_norm": 1.3883006572723389, "learning_rate": 1.726948770526278e-05, "loss": 0.1714, "step": 11120 }, { "epoch": 0.24104608584490717, "grad_norm": 2.322866678237915, "learning_rate": 1.7267150160377753e-05, "loss": 0.2565, "step": 11125 }, { "epoch": 0.241154421164388, "grad_norm": 2.253284215927124, "learning_rate": 1.726481177370224e-05, "loss": 0.293, "step": 11130 }, { "epoch": 0.24126275648386888, "grad_norm": 2.8630456924438477, "learning_rate": 1.726247254550711e-05, "loss": 0.199, "step": 11135 }, { "epoch": 0.24137109180334973, "grad_norm": 2.0768392086029053, "learning_rate": 1.726013247606333e-05, "loss": 0.2538, "step": 11140 }, { "epoch": 0.24147942712283058, "grad_norm": 1.549655556678772, "learning_rate": 1.7257791565641963e-05, "loss": 0.171, "step": 11145 }, { "epoch": 0.24158776244231145, "grad_norm": 1.8877158164978027, "learning_rate": 1.7255449814514155e-05, "loss": 0.2112, "step": 11150 }, { "epoch": 0.2416960977617923, "grad_norm": 2.0702309608459473, "learning_rate": 1.725310722295118e-05, "loss": 0.2531, "step": 11155 }, { "epoch": 0.24180443308127317, "grad_norm": 2.094169855117798, "learning_rate": 1.7250763791224382e-05, "loss": 0.2307, "step": 11160 }, { "epoch": 0.241912768400754, "grad_norm": 2.3841311931610107, "learning_rate": 1.7248419519605217e-05, "loss": 0.2273, "step": 11165 }, { "epoch": 0.24202110372023486, "grad_norm": 2.690852403640747, "learning_rate": 1.7246074408365232e-05, "loss": 0.2544, "step": 11170 }, { "epoch": 0.24212943903971573, "grad_norm": 1.3615187406539917, "learning_rate": 1.724372845777607e-05, "loss": 0.1773, "step": 11175 }, { "epoch": 0.24223777435919658, "grad_norm": 2.1411983966827393, "learning_rate": 1.724138166810948e-05, "loss": 0.1684, "step": 11180 }, { "epoch": 0.24234610967867745, "grad_norm": 3.3295724391937256, "learning_rate": 1.7239034039637298e-05, "loss": 0.2114, "step": 11185 }, { "epoch": 0.2424544449981583, "grad_norm": 1.9267624616622925, "learning_rate": 1.7236685572631463e-05, "loss": 0.2684, "step": 11190 }, { "epoch": 0.24256278031763917, "grad_norm": 1.0913684368133545, "learning_rate": 1.7234336267364012e-05, "loss": 0.2013, "step": 11195 }, { "epoch": 0.24267111563712002, "grad_norm": 3.294583559036255, "learning_rate": 1.7231986124107076e-05, "loss": 0.2185, "step": 11200 }, { "epoch": 0.24277945095660086, "grad_norm": 2.58733868598938, "learning_rate": 1.722963514313288e-05, "loss": 0.3005, "step": 11205 }, { "epoch": 0.24288778627608174, "grad_norm": 1.546453833580017, "learning_rate": 1.722728332471375e-05, "loss": 0.2646, "step": 11210 }, { "epoch": 0.24299612159556258, "grad_norm": 1.1810158491134644, "learning_rate": 1.7224930669122117e-05, "loss": 0.2052, "step": 11215 }, { "epoch": 0.24310445691504345, "grad_norm": 2.7895755767822266, "learning_rate": 1.7222577176630493e-05, "loss": 0.3005, "step": 11220 }, { "epoch": 0.2432127922345243, "grad_norm": 1.3823699951171875, "learning_rate": 1.7220222847511496e-05, "loss": 0.2792, "step": 11225 }, { "epoch": 0.24332112755400515, "grad_norm": 2.4897220134735107, "learning_rate": 1.7217867682037844e-05, "loss": 0.233, "step": 11230 }, { "epoch": 0.24342946287348602, "grad_norm": 1.9921386241912842, "learning_rate": 1.721551168048234e-05, "loss": 0.1563, "step": 11235 }, { "epoch": 0.24353779819296686, "grad_norm": 2.081753969192505, "learning_rate": 1.72131548431179e-05, "loss": 0.2374, "step": 11240 }, { "epoch": 0.24364613351244774, "grad_norm": 1.7324841022491455, "learning_rate": 1.7210797170217525e-05, "loss": 0.2291, "step": 11245 }, { "epoch": 0.24375446883192858, "grad_norm": 1.7678170204162598, "learning_rate": 1.7208438662054314e-05, "loss": 0.1654, "step": 11250 }, { "epoch": 0.24386280415140943, "grad_norm": 1.962127447128296, "learning_rate": 1.7206079318901468e-05, "loss": 0.2385, "step": 11255 }, { "epoch": 0.2439711394708903, "grad_norm": 2.025867223739624, "learning_rate": 1.7203719141032275e-05, "loss": 0.2472, "step": 11260 }, { "epoch": 0.24407947479037115, "grad_norm": 2.2809574604034424, "learning_rate": 1.7201358128720137e-05, "loss": 0.2628, "step": 11265 }, { "epoch": 0.24418781010985202, "grad_norm": 1.9886963367462158, "learning_rate": 1.7198996282238535e-05, "loss": 0.1369, "step": 11270 }, { "epoch": 0.24429614542933287, "grad_norm": 1.4288212060928345, "learning_rate": 1.7196633601861054e-05, "loss": 0.2511, "step": 11275 }, { "epoch": 0.24440448074881374, "grad_norm": 1.2690013647079468, "learning_rate": 1.7194270087861373e-05, "loss": 0.1903, "step": 11280 }, { "epoch": 0.2445128160682946, "grad_norm": 1.3826864957809448, "learning_rate": 1.7191905740513276e-05, "loss": 0.269, "step": 11285 }, { "epoch": 0.24462115138777543, "grad_norm": 1.7598220109939575, "learning_rate": 1.718954056009063e-05, "loss": 0.2524, "step": 11290 }, { "epoch": 0.2447294867072563, "grad_norm": 1.6942685842514038, "learning_rate": 1.7187174546867414e-05, "loss": 0.2207, "step": 11295 }, { "epoch": 0.24483782202673715, "grad_norm": 1.9755018949508667, "learning_rate": 1.718480770111769e-05, "loss": 0.2474, "step": 11300 }, { "epoch": 0.24494615734621802, "grad_norm": 1.8392343521118164, "learning_rate": 1.718244002311562e-05, "loss": 0.1707, "step": 11305 }, { "epoch": 0.24505449266569887, "grad_norm": 1.414913535118103, "learning_rate": 1.718007151313547e-05, "loss": 0.155, "step": 11310 }, { "epoch": 0.24516282798517972, "grad_norm": 1.289547324180603, "learning_rate": 1.7177702171451587e-05, "loss": 0.1859, "step": 11315 }, { "epoch": 0.2452711633046606, "grad_norm": 1.787584662437439, "learning_rate": 1.7175331998338432e-05, "loss": 0.2247, "step": 11320 }, { "epoch": 0.24537949862414143, "grad_norm": 1.6759681701660156, "learning_rate": 1.7172960994070552e-05, "loss": 0.2482, "step": 11325 }, { "epoch": 0.2454878339436223, "grad_norm": 1.4071475267410278, "learning_rate": 1.717058915892259e-05, "loss": 0.1811, "step": 11330 }, { "epoch": 0.24559616926310315, "grad_norm": 2.0843100547790527, "learning_rate": 1.716821649316929e-05, "loss": 0.2995, "step": 11335 }, { "epoch": 0.24570450458258403, "grad_norm": 1.8004368543624878, "learning_rate": 1.716584299708549e-05, "loss": 0.2185, "step": 11340 }, { "epoch": 0.24581283990206487, "grad_norm": 2.0880589485168457, "learning_rate": 1.7163468670946122e-05, "loss": 0.2415, "step": 11345 }, { "epoch": 0.24592117522154572, "grad_norm": 2.3007099628448486, "learning_rate": 1.7161093515026222e-05, "loss": 0.2068, "step": 11350 }, { "epoch": 0.2460295105410266, "grad_norm": 2.195582389831543, "learning_rate": 1.7158717529600905e-05, "loss": 0.3099, "step": 11355 }, { "epoch": 0.24613784586050744, "grad_norm": 1.5400208234786987, "learning_rate": 1.7156340714945403e-05, "loss": 0.1959, "step": 11360 }, { "epoch": 0.2462461811799883, "grad_norm": 2.4817347526550293, "learning_rate": 1.715396307133503e-05, "loss": 0.1931, "step": 11365 }, { "epoch": 0.24635451649946916, "grad_norm": 1.8195922374725342, "learning_rate": 1.7151584599045204e-05, "loss": 0.1929, "step": 11370 }, { "epoch": 0.24646285181895, "grad_norm": 2.1801223754882812, "learning_rate": 1.7149205298351434e-05, "loss": 0.2045, "step": 11375 }, { "epoch": 0.24657118713843088, "grad_norm": 1.5778888463974, "learning_rate": 1.714682516952932e-05, "loss": 0.201, "step": 11380 }, { "epoch": 0.24667952245791172, "grad_norm": 1.9974315166473389, "learning_rate": 1.714444421285457e-05, "loss": 0.1469, "step": 11385 }, { "epoch": 0.2467878577773926, "grad_norm": 3.045602798461914, "learning_rate": 1.7142062428602984e-05, "loss": 0.3096, "step": 11390 }, { "epoch": 0.24689619309687344, "grad_norm": 2.67854380607605, "learning_rate": 1.7139679817050455e-05, "loss": 0.2596, "step": 11395 }, { "epoch": 0.2470045284163543, "grad_norm": 1.9276854991912842, "learning_rate": 1.7137296378472968e-05, "loss": 0.1888, "step": 11400 }, { "epoch": 0.24711286373583516, "grad_norm": 1.8858342170715332, "learning_rate": 1.7134912113146614e-05, "loss": 0.237, "step": 11405 }, { "epoch": 0.247221199055316, "grad_norm": 1.6271095275878906, "learning_rate": 1.7132527021347574e-05, "loss": 0.2722, "step": 11410 }, { "epoch": 0.24732953437479688, "grad_norm": 1.850957989692688, "learning_rate": 1.7130141103352124e-05, "loss": 0.217, "step": 11415 }, { "epoch": 0.24743786969427772, "grad_norm": 1.3179609775543213, "learning_rate": 1.7127754359436632e-05, "loss": 0.1957, "step": 11420 }, { "epoch": 0.2475462050137586, "grad_norm": 1.8553516864776611, "learning_rate": 1.7125366789877577e-05, "loss": 0.176, "step": 11425 }, { "epoch": 0.24765454033323944, "grad_norm": 2.155275821685791, "learning_rate": 1.7122978394951512e-05, "loss": 0.2887, "step": 11430 }, { "epoch": 0.2477628756527203, "grad_norm": 2.2380309104919434, "learning_rate": 1.7120589174935106e-05, "loss": 0.2517, "step": 11435 }, { "epoch": 0.24787121097220116, "grad_norm": 1.4787144660949707, "learning_rate": 1.711819913010511e-05, "loss": 0.2586, "step": 11440 }, { "epoch": 0.247979546291682, "grad_norm": 1.7501507997512817, "learning_rate": 1.7115808260738376e-05, "loss": 0.1265, "step": 11445 }, { "epoch": 0.24808788161116288, "grad_norm": 1.634001612663269, "learning_rate": 1.7113416567111846e-05, "loss": 0.2797, "step": 11450 }, { "epoch": 0.24819621693064373, "grad_norm": 1.8458455801010132, "learning_rate": 1.7111024049502572e-05, "loss": 0.1924, "step": 11455 }, { "epoch": 0.2483045522501246, "grad_norm": 2.1664652824401855, "learning_rate": 1.7108630708187682e-05, "loss": 0.2906, "step": 11460 }, { "epoch": 0.24841288756960545, "grad_norm": 1.7224514484405518, "learning_rate": 1.710623654344441e-05, "loss": 0.1912, "step": 11465 }, { "epoch": 0.2485212228890863, "grad_norm": 1.7863414287567139, "learning_rate": 1.7103841555550088e-05, "loss": 0.1919, "step": 11470 }, { "epoch": 0.24862955820856716, "grad_norm": 2.385786294937134, "learning_rate": 1.7101445744782137e-05, "loss": 0.2151, "step": 11475 }, { "epoch": 0.248737893528048, "grad_norm": 1.9745818376541138, "learning_rate": 1.7099049111418074e-05, "loss": 0.2487, "step": 11480 }, { "epoch": 0.24884622884752888, "grad_norm": 1.4676640033721924, "learning_rate": 1.7096651655735517e-05, "loss": 0.191, "step": 11485 }, { "epoch": 0.24895456416700973, "grad_norm": 1.2384154796600342, "learning_rate": 1.7094253378012174e-05, "loss": 0.1544, "step": 11490 }, { "epoch": 0.24906289948649057, "grad_norm": 1.8719629049301147, "learning_rate": 1.709185427852585e-05, "loss": 0.2055, "step": 11495 }, { "epoch": 0.24917123480597145, "grad_norm": 1.4775221347808838, "learning_rate": 1.7089454357554437e-05, "loss": 0.2222, "step": 11500 }, { "epoch": 0.2492795701254523, "grad_norm": 1.9558796882629395, "learning_rate": 1.7087053615375944e-05, "loss": 0.2984, "step": 11505 }, { "epoch": 0.24938790544493317, "grad_norm": 3.82356858253479, "learning_rate": 1.708465205226845e-05, "loss": 0.2414, "step": 11510 }, { "epoch": 0.249496240764414, "grad_norm": 1.7846888303756714, "learning_rate": 1.708224966851015e-05, "loss": 0.1951, "step": 11515 }, { "epoch": 0.24960457608389486, "grad_norm": 2.104614496231079, "learning_rate": 1.707984646437931e-05, "loss": 0.1864, "step": 11520 }, { "epoch": 0.24971291140337573, "grad_norm": 1.7064871788024902, "learning_rate": 1.7077442440154317e-05, "loss": 0.1972, "step": 11525 }, { "epoch": 0.24982124672285658, "grad_norm": 1.7380928993225098, "learning_rate": 1.7075037596113636e-05, "loss": 0.2491, "step": 11530 }, { "epoch": 0.24992958204233745, "grad_norm": 2.861114501953125, "learning_rate": 1.7072631932535836e-05, "loss": 0.3018, "step": 11535 }, { "epoch": 0.2500379173618183, "grad_norm": 1.3781408071517944, "learning_rate": 1.707022544969957e-05, "loss": 0.1958, "step": 11540 }, { "epoch": 0.25014625268129914, "grad_norm": 2.2474381923675537, "learning_rate": 1.7067818147883603e-05, "loss": 0.2379, "step": 11545 }, { "epoch": 0.25025458800078, "grad_norm": 2.391714572906494, "learning_rate": 1.706541002736678e-05, "loss": 0.2451, "step": 11550 }, { "epoch": 0.2503629233202609, "grad_norm": 1.7643376588821411, "learning_rate": 1.706300108842804e-05, "loss": 0.206, "step": 11555 }, { "epoch": 0.25047125863974173, "grad_norm": 2.546778440475464, "learning_rate": 1.706059133134643e-05, "loss": 0.1994, "step": 11560 }, { "epoch": 0.2505795939592226, "grad_norm": 2.2810702323913574, "learning_rate": 1.705818075640108e-05, "loss": 0.2787, "step": 11565 }, { "epoch": 0.2506879292787034, "grad_norm": 1.601022720336914, "learning_rate": 1.7055769363871224e-05, "loss": 0.1872, "step": 11570 }, { "epoch": 0.2507962645981843, "grad_norm": 2.0978283882141113, "learning_rate": 1.705335715403618e-05, "loss": 0.3252, "step": 11575 }, { "epoch": 0.25090459991766517, "grad_norm": 1.4451158046722412, "learning_rate": 1.7050944127175373e-05, "loss": 0.2655, "step": 11580 }, { "epoch": 0.251012935237146, "grad_norm": 2.1346118450164795, "learning_rate": 1.7048530283568308e-05, "loss": 0.1472, "step": 11585 }, { "epoch": 0.25112127055662686, "grad_norm": 2.1138620376586914, "learning_rate": 1.70461156234946e-05, "loss": 0.2822, "step": 11590 }, { "epoch": 0.2512296058761077, "grad_norm": 1.4506478309631348, "learning_rate": 1.7043700147233944e-05, "loss": 0.227, "step": 11595 }, { "epoch": 0.2513379411955886, "grad_norm": 1.9335927963256836, "learning_rate": 1.7041283855066142e-05, "loss": 0.1981, "step": 11600 }, { "epoch": 0.25144627651506946, "grad_norm": 1.5585851669311523, "learning_rate": 1.7038866747271085e-05, "loss": 0.2136, "step": 11605 }, { "epoch": 0.2515546118345503, "grad_norm": 1.57645583152771, "learning_rate": 1.7036448824128756e-05, "loss": 0.1831, "step": 11610 }, { "epoch": 0.25166294715403115, "grad_norm": 2.4733235836029053, "learning_rate": 1.703403008591924e-05, "loss": 0.277, "step": 11615 }, { "epoch": 0.251771282473512, "grad_norm": 2.2498905658721924, "learning_rate": 1.7031610532922704e-05, "loss": 0.2712, "step": 11620 }, { "epoch": 0.2518796177929929, "grad_norm": 2.6805150508880615, "learning_rate": 1.702919016541942e-05, "loss": 0.2679, "step": 11625 }, { "epoch": 0.25198795311247374, "grad_norm": 2.0753002166748047, "learning_rate": 1.702676898368976e-05, "loss": 0.1782, "step": 11630 }, { "epoch": 0.2520962884319546, "grad_norm": 2.246875762939453, "learning_rate": 1.702434698801417e-05, "loss": 0.1949, "step": 11635 }, { "epoch": 0.25220462375143543, "grad_norm": 2.231006622314453, "learning_rate": 1.7021924178673207e-05, "loss": 0.2626, "step": 11640 }, { "epoch": 0.2523129590709163, "grad_norm": 1.5517759323120117, "learning_rate": 1.7019500555947515e-05, "loss": 0.2597, "step": 11645 }, { "epoch": 0.2524212943903972, "grad_norm": 1.9685752391815186, "learning_rate": 1.7017076120117835e-05, "loss": 0.1606, "step": 11650 }, { "epoch": 0.252529629709878, "grad_norm": 1.9589954614639282, "learning_rate": 1.7014650871465004e-05, "loss": 0.2132, "step": 11655 }, { "epoch": 0.25263796502935887, "grad_norm": 1.806522011756897, "learning_rate": 1.7012224810269947e-05, "loss": 0.1974, "step": 11660 }, { "epoch": 0.2527463003488397, "grad_norm": 1.918008804321289, "learning_rate": 1.7009797936813692e-05, "loss": 0.23, "step": 11665 }, { "epoch": 0.25285463566832056, "grad_norm": 2.117475986480713, "learning_rate": 1.7007370251377353e-05, "loss": 0.1463, "step": 11670 }, { "epoch": 0.25296297098780146, "grad_norm": 1.239650011062622, "learning_rate": 1.7004941754242137e-05, "loss": 0.2444, "step": 11675 }, { "epoch": 0.2530713063072823, "grad_norm": 2.4351930618286133, "learning_rate": 1.7002512445689355e-05, "loss": 0.2393, "step": 11680 }, { "epoch": 0.25317964162676315, "grad_norm": 1.7672038078308105, "learning_rate": 1.7000082326000404e-05, "loss": 0.1908, "step": 11685 }, { "epoch": 0.253287976946244, "grad_norm": 1.7752989530563354, "learning_rate": 1.6997651395456777e-05, "loss": 0.1497, "step": 11690 }, { "epoch": 0.2533963122657249, "grad_norm": 2.5995326042175293, "learning_rate": 1.6995219654340055e-05, "loss": 0.279, "step": 11695 }, { "epoch": 0.25350464758520574, "grad_norm": 1.8781708478927612, "learning_rate": 1.6992787102931934e-05, "loss": 0.2674, "step": 11700 }, { "epoch": 0.2536129829046866, "grad_norm": 3.5312232971191406, "learning_rate": 1.6990353741514174e-05, "loss": 0.311, "step": 11705 }, { "epoch": 0.25372131822416744, "grad_norm": 2.831644058227539, "learning_rate": 1.6987919570368648e-05, "loss": 0.3561, "step": 11710 }, { "epoch": 0.2538296535436483, "grad_norm": 1.7271143198013306, "learning_rate": 1.698548458977732e-05, "loss": 0.195, "step": 11715 }, { "epoch": 0.2539379888631292, "grad_norm": 2.0338361263275146, "learning_rate": 1.6983048800022244e-05, "loss": 0.2904, "step": 11720 }, { "epoch": 0.25404632418261003, "grad_norm": 1.2017263174057007, "learning_rate": 1.698061220138557e-05, "loss": 0.2523, "step": 11725 }, { "epoch": 0.2541546595020909, "grad_norm": 2.4775004386901855, "learning_rate": 1.697817479414954e-05, "loss": 0.2578, "step": 11730 }, { "epoch": 0.2542629948215717, "grad_norm": 1.7666728496551514, "learning_rate": 1.69757365785965e-05, "loss": 0.2111, "step": 11735 }, { "epoch": 0.25437133014105257, "grad_norm": 1.855082631111145, "learning_rate": 1.6973297555008873e-05, "loss": 0.1683, "step": 11740 }, { "epoch": 0.25447966546053347, "grad_norm": 2.5297932624816895, "learning_rate": 1.6970857723669178e-05, "loss": 0.202, "step": 11745 }, { "epoch": 0.2545880007800143, "grad_norm": 1.8676683902740479, "learning_rate": 1.696841708486004e-05, "loss": 0.199, "step": 11750 }, { "epoch": 0.25469633609949516, "grad_norm": 1.9058337211608887, "learning_rate": 1.6965975638864173e-05, "loss": 0.2332, "step": 11755 }, { "epoch": 0.254804671418976, "grad_norm": 1.4448645114898682, "learning_rate": 1.696353338596438e-05, "loss": 0.2645, "step": 11760 }, { "epoch": 0.25491300673845685, "grad_norm": 2.5072214603424072, "learning_rate": 1.6961090326443556e-05, "loss": 0.2024, "step": 11765 }, { "epoch": 0.25502134205793775, "grad_norm": 3.1057705879211426, "learning_rate": 1.6958646460584695e-05, "loss": 0.2405, "step": 11770 }, { "epoch": 0.2551296773774186, "grad_norm": 1.6115193367004395, "learning_rate": 1.695620178867088e-05, "loss": 0.2787, "step": 11775 }, { "epoch": 0.25523801269689944, "grad_norm": 1.7722103595733643, "learning_rate": 1.6953756310985294e-05, "loss": 0.2736, "step": 11780 }, { "epoch": 0.2553463480163803, "grad_norm": 1.6368645429611206, "learning_rate": 1.6951310027811208e-05, "loss": 0.2575, "step": 11785 }, { "epoch": 0.25545468333586113, "grad_norm": 2.4537947177886963, "learning_rate": 1.6948862939431983e-05, "loss": 0.198, "step": 11790 }, { "epoch": 0.25556301865534203, "grad_norm": 2.2223434448242188, "learning_rate": 1.694641504613108e-05, "loss": 0.2453, "step": 11795 }, { "epoch": 0.2556713539748229, "grad_norm": 1.5275118350982666, "learning_rate": 1.6943966348192052e-05, "loss": 0.1742, "step": 11800 }, { "epoch": 0.2557796892943037, "grad_norm": 1.3718397617340088, "learning_rate": 1.6941516845898544e-05, "loss": 0.2419, "step": 11805 }, { "epoch": 0.25588802461378457, "grad_norm": 2.4568898677825928, "learning_rate": 1.6939066539534294e-05, "loss": 0.201, "step": 11810 }, { "epoch": 0.2559963599332654, "grad_norm": 2.8242242336273193, "learning_rate": 1.6936615429383133e-05, "loss": 0.2128, "step": 11815 }, { "epoch": 0.2561046952527463, "grad_norm": 1.9933531284332275, "learning_rate": 1.693416351572898e-05, "loss": 0.1916, "step": 11820 }, { "epoch": 0.25621303057222716, "grad_norm": 2.5041050910949707, "learning_rate": 1.693171079885586e-05, "loss": 0.2562, "step": 11825 }, { "epoch": 0.256321365891708, "grad_norm": 1.9089016914367676, "learning_rate": 1.6929257279047882e-05, "loss": 0.2182, "step": 11830 }, { "epoch": 0.25642970121118885, "grad_norm": 1.2791916131973267, "learning_rate": 1.692680295658925e-05, "loss": 0.2692, "step": 11835 }, { "epoch": 0.25653803653066976, "grad_norm": 2.079775094985962, "learning_rate": 1.6924347831764255e-05, "loss": 0.2689, "step": 11840 }, { "epoch": 0.2566463718501506, "grad_norm": 2.2777907848358154, "learning_rate": 1.6921891904857295e-05, "loss": 0.1585, "step": 11845 }, { "epoch": 0.25675470716963145, "grad_norm": 1.7127537727355957, "learning_rate": 1.6919435176152846e-05, "loss": 0.2653, "step": 11850 }, { "epoch": 0.2568630424891123, "grad_norm": 1.296994924545288, "learning_rate": 1.6916977645935485e-05, "loss": 0.2131, "step": 11855 }, { "epoch": 0.25697137780859314, "grad_norm": 1.7490321397781372, "learning_rate": 1.691451931448988e-05, "loss": 0.2434, "step": 11860 }, { "epoch": 0.25707971312807404, "grad_norm": 1.5782251358032227, "learning_rate": 1.6912060182100792e-05, "loss": 0.2792, "step": 11865 }, { "epoch": 0.2571880484475549, "grad_norm": 2.2292661666870117, "learning_rate": 1.6909600249053072e-05, "loss": 0.1752, "step": 11870 }, { "epoch": 0.25729638376703573, "grad_norm": 1.818871021270752, "learning_rate": 1.6907139515631672e-05, "loss": 0.2872, "step": 11875 }, { "epoch": 0.2574047190865166, "grad_norm": 2.1911814212799072, "learning_rate": 1.6904677982121626e-05, "loss": 0.1942, "step": 11880 }, { "epoch": 0.2575130544059974, "grad_norm": 1.2934635877609253, "learning_rate": 1.6902215648808067e-05, "loss": 0.169, "step": 11885 }, { "epoch": 0.2576213897254783, "grad_norm": 1.6277027130126953, "learning_rate": 1.6899752515976224e-05, "loss": 0.1488, "step": 11890 }, { "epoch": 0.25772972504495917, "grad_norm": 2.53818678855896, "learning_rate": 1.6897288583911407e-05, "loss": 0.1402, "step": 11895 }, { "epoch": 0.25783806036444, "grad_norm": 1.4921270608901978, "learning_rate": 1.6894823852899032e-05, "loss": 0.198, "step": 11900 }, { "epoch": 0.25794639568392086, "grad_norm": 1.8270591497421265, "learning_rate": 1.6892358323224594e-05, "loss": 0.2088, "step": 11905 }, { "epoch": 0.2580547310034017, "grad_norm": 2.451838731765747, "learning_rate": 1.6889891995173698e-05, "loss": 0.3407, "step": 11910 }, { "epoch": 0.2581630663228826, "grad_norm": 1.5984808206558228, "learning_rate": 1.6887424869032022e-05, "loss": 0.2844, "step": 11915 }, { "epoch": 0.25827140164236345, "grad_norm": 1.584338665008545, "learning_rate": 1.6884956945085347e-05, "loss": 0.2416, "step": 11920 }, { "epoch": 0.2583797369618443, "grad_norm": 1.2905439138412476, "learning_rate": 1.6882488223619548e-05, "loss": 0.1571, "step": 11925 }, { "epoch": 0.25848807228132514, "grad_norm": 2.383042573928833, "learning_rate": 1.6880018704920585e-05, "loss": 0.2553, "step": 11930 }, { "epoch": 0.258596407600806, "grad_norm": 2.070230722427368, "learning_rate": 1.687754838927452e-05, "loss": 0.232, "step": 11935 }, { "epoch": 0.2587047429202869, "grad_norm": 2.027698278427124, "learning_rate": 1.68750772769675e-05, "loss": 0.3005, "step": 11940 }, { "epoch": 0.25881307823976774, "grad_norm": 1.9989681243896484, "learning_rate": 1.6872605368285767e-05, "loss": 0.2281, "step": 11945 }, { "epoch": 0.2589214135592486, "grad_norm": 1.4276399612426758, "learning_rate": 1.6870132663515653e-05, "loss": 0.3103, "step": 11950 }, { "epoch": 0.2590297488787294, "grad_norm": 1.7173782587051392, "learning_rate": 1.6867659162943585e-05, "loss": 0.2454, "step": 11955 }, { "epoch": 0.25913808419821033, "grad_norm": 2.0630338191986084, "learning_rate": 1.6865184866856078e-05, "loss": 0.2468, "step": 11960 }, { "epoch": 0.2592464195176912, "grad_norm": 1.8007808923721313, "learning_rate": 1.6862709775539746e-05, "loss": 0.2, "step": 11965 }, { "epoch": 0.259354754837172, "grad_norm": 1.8437550067901611, "learning_rate": 1.686023388928129e-05, "loss": 0.1627, "step": 11970 }, { "epoch": 0.25946309015665286, "grad_norm": 2.078751802444458, "learning_rate": 1.6857757208367505e-05, "loss": 0.2374, "step": 11975 }, { "epoch": 0.2595714254761337, "grad_norm": 2.2578847408294678, "learning_rate": 1.6855279733085278e-05, "loss": 0.3137, "step": 11980 }, { "epoch": 0.2596797607956146, "grad_norm": 1.9021979570388794, "learning_rate": 1.6852801463721582e-05, "loss": 0.2363, "step": 11985 }, { "epoch": 0.25978809611509546, "grad_norm": 1.7476214170455933, "learning_rate": 1.6850322400563494e-05, "loss": 0.2922, "step": 11990 }, { "epoch": 0.2598964314345763, "grad_norm": 2.256361246109009, "learning_rate": 1.6847842543898168e-05, "loss": 0.291, "step": 11995 }, { "epoch": 0.26000476675405715, "grad_norm": 2.1663105487823486, "learning_rate": 1.6845361894012874e-05, "loss": 0.2305, "step": 12000 }, { "epoch": 0.260113102073538, "grad_norm": 1.385014533996582, "learning_rate": 1.6842880451194942e-05, "loss": 0.1902, "step": 12005 }, { "epoch": 0.2602214373930189, "grad_norm": 2.7258076667785645, "learning_rate": 1.6840398215731814e-05, "loss": 0.2105, "step": 12010 }, { "epoch": 0.26032977271249974, "grad_norm": 1.639404535293579, "learning_rate": 1.6837915187911028e-05, "loss": 0.2128, "step": 12015 }, { "epoch": 0.2604381080319806, "grad_norm": 1.5505534410476685, "learning_rate": 1.6835431368020196e-05, "loss": 0.298, "step": 12020 }, { "epoch": 0.26054644335146143, "grad_norm": 1.6340745687484741, "learning_rate": 1.6832946756347034e-05, "loss": 0.2759, "step": 12025 }, { "epoch": 0.2606547786709423, "grad_norm": 1.7651172876358032, "learning_rate": 1.6830461353179353e-05, "loss": 0.2036, "step": 12030 }, { "epoch": 0.2607631139904232, "grad_norm": 2.156705856323242, "learning_rate": 1.6827975158805037e-05, "loss": 0.2396, "step": 12035 }, { "epoch": 0.260871449309904, "grad_norm": 1.9076249599456787, "learning_rate": 1.6825488173512088e-05, "loss": 0.1777, "step": 12040 }, { "epoch": 0.26097978462938487, "grad_norm": 1.630133032798767, "learning_rate": 1.682300039758858e-05, "loss": 0.2428, "step": 12045 }, { "epoch": 0.2610881199488657, "grad_norm": 1.8495723009109497, "learning_rate": 1.6820511831322678e-05, "loss": 0.2439, "step": 12050 }, { "epoch": 0.26119645526834656, "grad_norm": 1.3828357458114624, "learning_rate": 1.6818022475002654e-05, "loss": 0.2323, "step": 12055 }, { "epoch": 0.26130479058782746, "grad_norm": 2.460367202758789, "learning_rate": 1.6815532328916862e-05, "loss": 0.1315, "step": 12060 }, { "epoch": 0.2614131259073083, "grad_norm": 1.721638798713684, "learning_rate": 1.6813041393353748e-05, "loss": 0.3062, "step": 12065 }, { "epoch": 0.26152146122678915, "grad_norm": 1.716455340385437, "learning_rate": 1.6810549668601847e-05, "loss": 0.2224, "step": 12070 }, { "epoch": 0.26162979654627, "grad_norm": 2.839078426361084, "learning_rate": 1.680805715494979e-05, "loss": 0.2366, "step": 12075 }, { "epoch": 0.26173813186575084, "grad_norm": 1.506135106086731, "learning_rate": 1.6805563852686292e-05, "loss": 0.2134, "step": 12080 }, { "epoch": 0.26184646718523175, "grad_norm": 1.5930664539337158, "learning_rate": 1.680306976210017e-05, "loss": 0.2329, "step": 12085 }, { "epoch": 0.2619548025047126, "grad_norm": 1.4225807189941406, "learning_rate": 1.6800574883480326e-05, "loss": 0.163, "step": 12090 }, { "epoch": 0.26206313782419344, "grad_norm": 2.497204542160034, "learning_rate": 1.6798079217115758e-05, "loss": 0.2218, "step": 12095 }, { "epoch": 0.2621714731436743, "grad_norm": 2.6246016025543213, "learning_rate": 1.6795582763295548e-05, "loss": 0.315, "step": 12100 }, { "epoch": 0.2622798084631552, "grad_norm": 1.904488444328308, "learning_rate": 1.6793085522308866e-05, "loss": 0.1898, "step": 12105 }, { "epoch": 0.26238814378263603, "grad_norm": 1.9679028987884521, "learning_rate": 1.6790587494444993e-05, "loss": 0.2132, "step": 12110 }, { "epoch": 0.2624964791021169, "grad_norm": 1.5198431015014648, "learning_rate": 1.6788088679993276e-05, "loss": 0.2001, "step": 12115 }, { "epoch": 0.2626048144215977, "grad_norm": 2.356079578399658, "learning_rate": 1.678558907924318e-05, "loss": 0.3202, "step": 12120 }, { "epoch": 0.26271314974107857, "grad_norm": 1.5605658292770386, "learning_rate": 1.678308869248423e-05, "loss": 0.2834, "step": 12125 }, { "epoch": 0.26282148506055947, "grad_norm": 1.6188931465148926, "learning_rate": 1.6780587520006067e-05, "loss": 0.2569, "step": 12130 }, { "epoch": 0.2629298203800403, "grad_norm": 2.092587947845459, "learning_rate": 1.6778085562098413e-05, "loss": 0.2634, "step": 12135 }, { "epoch": 0.26303815569952116, "grad_norm": 2.2456936836242676, "learning_rate": 1.6775582819051085e-05, "loss": 0.2722, "step": 12140 }, { "epoch": 0.263146491019002, "grad_norm": 1.6598068475723267, "learning_rate": 1.6773079291153985e-05, "loss": 0.2301, "step": 12145 }, { "epoch": 0.26325482633848285, "grad_norm": 1.6021183729171753, "learning_rate": 1.6770574978697104e-05, "loss": 0.2125, "step": 12150 }, { "epoch": 0.26336316165796375, "grad_norm": 1.5187952518463135, "learning_rate": 1.676806988197054e-05, "loss": 0.2037, "step": 12155 }, { "epoch": 0.2634714969774446, "grad_norm": 1.3862016201019287, "learning_rate": 1.6765564001264463e-05, "loss": 0.2324, "step": 12160 }, { "epoch": 0.26357983229692544, "grad_norm": 1.370478630065918, "learning_rate": 1.676305733686915e-05, "loss": 0.1738, "step": 12165 }, { "epoch": 0.2636881676164063, "grad_norm": 1.686584711074829, "learning_rate": 1.6760549889074954e-05, "loss": 0.1731, "step": 12170 }, { "epoch": 0.26379650293588713, "grad_norm": 1.7057178020477295, "learning_rate": 1.6758041658172325e-05, "loss": 0.1894, "step": 12175 }, { "epoch": 0.26390483825536803, "grad_norm": 2.626192331314087, "learning_rate": 1.67555326444518e-05, "loss": 0.2856, "step": 12180 }, { "epoch": 0.2640131735748489, "grad_norm": 2.028991460800171, "learning_rate": 1.6753022848204025e-05, "loss": 0.2338, "step": 12185 }, { "epoch": 0.2641215088943297, "grad_norm": 1.491821527481079, "learning_rate": 1.6750512269719706e-05, "loss": 0.268, "step": 12190 }, { "epoch": 0.26422984421381057, "grad_norm": 2.280662775039673, "learning_rate": 1.6748000909289665e-05, "loss": 0.1643, "step": 12195 }, { "epoch": 0.2643381795332914, "grad_norm": 1.1503077745437622, "learning_rate": 1.6745488767204806e-05, "loss": 0.1455, "step": 12200 }, { "epoch": 0.2644465148527723, "grad_norm": 2.841505765914917, "learning_rate": 1.674297584375612e-05, "loss": 0.282, "step": 12205 }, { "epoch": 0.26455485017225316, "grad_norm": 1.6013654470443726, "learning_rate": 1.6740462139234693e-05, "loss": 0.2755, "step": 12210 }, { "epoch": 0.264663185491734, "grad_norm": 1.7957099676132202, "learning_rate": 1.67379476539317e-05, "loss": 0.1691, "step": 12215 }, { "epoch": 0.26477152081121486, "grad_norm": 1.635355830192566, "learning_rate": 1.67354323881384e-05, "loss": 0.2572, "step": 12220 }, { "epoch": 0.26487985613069576, "grad_norm": 1.8173223733901978, "learning_rate": 1.673291634214616e-05, "loss": 0.2085, "step": 12225 }, { "epoch": 0.2649881914501766, "grad_norm": 2.214052677154541, "learning_rate": 1.673039951624642e-05, "loss": 0.273, "step": 12230 }, { "epoch": 0.26509652676965745, "grad_norm": 1.8818804025650024, "learning_rate": 1.6727881910730714e-05, "loss": 0.2682, "step": 12235 }, { "epoch": 0.2652048620891383, "grad_norm": 1.3158917427062988, "learning_rate": 1.6725363525890678e-05, "loss": 0.1663, "step": 12240 }, { "epoch": 0.26531319740861914, "grad_norm": 1.2026188373565674, "learning_rate": 1.672284436201802e-05, "loss": 0.2476, "step": 12245 }, { "epoch": 0.26542153272810004, "grad_norm": 2.3106396198272705, "learning_rate": 1.672032441940455e-05, "loss": 0.246, "step": 12250 }, { "epoch": 0.2655298680475809, "grad_norm": 1.8808586597442627, "learning_rate": 1.6717803698342167e-05, "loss": 0.2111, "step": 12255 }, { "epoch": 0.26563820336706173, "grad_norm": 1.532265305519104, "learning_rate": 1.6715282199122863e-05, "loss": 0.2252, "step": 12260 }, { "epoch": 0.2657465386865426, "grad_norm": 2.047991991043091, "learning_rate": 1.6712759922038706e-05, "loss": 0.2494, "step": 12265 }, { "epoch": 0.2658548740060234, "grad_norm": 1.4829646348953247, "learning_rate": 1.671023686738187e-05, "loss": 0.2322, "step": 12270 }, { "epoch": 0.2659632093255043, "grad_norm": 1.2438640594482422, "learning_rate": 1.6707713035444614e-05, "loss": 0.2032, "step": 12275 }, { "epoch": 0.26607154464498517, "grad_norm": 1.3935405015945435, "learning_rate": 1.6705188426519284e-05, "loss": 0.1761, "step": 12280 }, { "epoch": 0.266179879964466, "grad_norm": 1.407711148262024, "learning_rate": 1.6702663040898316e-05, "loss": 0.1824, "step": 12285 }, { "epoch": 0.26628821528394686, "grad_norm": 2.0799779891967773, "learning_rate": 1.6700136878874242e-05, "loss": 0.2254, "step": 12290 }, { "epoch": 0.2663965506034277, "grad_norm": 1.6582187414169312, "learning_rate": 1.669760994073968e-05, "loss": 0.2042, "step": 12295 }, { "epoch": 0.2665048859229086, "grad_norm": 3.1236045360565186, "learning_rate": 1.6695082226787335e-05, "loss": 0.2502, "step": 12300 }, { "epoch": 0.26661322124238945, "grad_norm": 1.39210844039917, "learning_rate": 1.6692553737310005e-05, "loss": 0.2598, "step": 12305 }, { "epoch": 0.2667215565618703, "grad_norm": 1.555472731590271, "learning_rate": 1.6690024472600583e-05, "loss": 0.2023, "step": 12310 }, { "epoch": 0.26682989188135114, "grad_norm": 1.7811228036880493, "learning_rate": 1.6687494432952037e-05, "loss": 0.2478, "step": 12315 }, { "epoch": 0.266938227200832, "grad_norm": 2.4793782234191895, "learning_rate": 1.6684963618657444e-05, "loss": 0.1733, "step": 12320 }, { "epoch": 0.2670465625203129, "grad_norm": 1.3817542791366577, "learning_rate": 1.6682432030009956e-05, "loss": 0.2644, "step": 12325 }, { "epoch": 0.26715489783979374, "grad_norm": 1.397183895111084, "learning_rate": 1.667989966730282e-05, "loss": 0.2657, "step": 12330 }, { "epoch": 0.2672632331592746, "grad_norm": 2.068371057510376, "learning_rate": 1.6677366530829372e-05, "loss": 0.2689, "step": 12335 }, { "epoch": 0.26737156847875543, "grad_norm": 1.929335594177246, "learning_rate": 1.667483262088304e-05, "loss": 0.2176, "step": 12340 }, { "epoch": 0.2674799037982363, "grad_norm": 3.0669353008270264, "learning_rate": 1.6672297937757336e-05, "loss": 0.1912, "step": 12345 }, { "epoch": 0.2675882391177172, "grad_norm": 2.10042667388916, "learning_rate": 1.6669762481745862e-05, "loss": 0.2301, "step": 12350 }, { "epoch": 0.267696574437198, "grad_norm": 2.0651063919067383, "learning_rate": 1.6667226253142325e-05, "loss": 0.2518, "step": 12355 }, { "epoch": 0.26780490975667887, "grad_norm": 1.438330054283142, "learning_rate": 1.66646892522405e-05, "loss": 0.3024, "step": 12360 }, { "epoch": 0.2679132450761597, "grad_norm": 1.5618128776550293, "learning_rate": 1.666215147933426e-05, "loss": 0.2265, "step": 12365 }, { "epoch": 0.2680215803956406, "grad_norm": 2.3096306324005127, "learning_rate": 1.665961293471757e-05, "loss": 0.2678, "step": 12370 }, { "epoch": 0.26812991571512146, "grad_norm": 1.94977867603302, "learning_rate": 1.6657073618684486e-05, "loss": 0.249, "step": 12375 }, { "epoch": 0.2682382510346023, "grad_norm": 2.0194694995880127, "learning_rate": 1.6654533531529144e-05, "loss": 0.2252, "step": 12380 }, { "epoch": 0.26834658635408315, "grad_norm": 1.9397650957107544, "learning_rate": 1.6651992673545777e-05, "loss": 0.2426, "step": 12385 }, { "epoch": 0.268454921673564, "grad_norm": 2.209484577178955, "learning_rate": 1.6649451045028708e-05, "loss": 0.2424, "step": 12390 }, { "epoch": 0.2685632569930449, "grad_norm": 1.1650910377502441, "learning_rate": 1.6646908646272342e-05, "loss": 0.1171, "step": 12395 }, { "epoch": 0.26867159231252574, "grad_norm": 1.1354072093963623, "learning_rate": 1.664436547757118e-05, "loss": 0.2324, "step": 12400 }, { "epoch": 0.2687799276320066, "grad_norm": 1.5801208019256592, "learning_rate": 1.664182153921981e-05, "loss": 0.1914, "step": 12405 }, { "epoch": 0.26888826295148743, "grad_norm": 1.7207589149475098, "learning_rate": 1.663927683151291e-05, "loss": 0.2806, "step": 12410 }, { "epoch": 0.2689965982709683, "grad_norm": 1.86616849899292, "learning_rate": 1.6636731354745243e-05, "loss": 0.265, "step": 12415 }, { "epoch": 0.2691049335904492, "grad_norm": 1.7564817667007446, "learning_rate": 1.6634185109211668e-05, "loss": 0.1797, "step": 12420 }, { "epoch": 0.26921326890993, "grad_norm": 3.130563259124756, "learning_rate": 1.663163809520713e-05, "loss": 0.2465, "step": 12425 }, { "epoch": 0.26932160422941087, "grad_norm": 1.903921365737915, "learning_rate": 1.662909031302666e-05, "loss": 0.3336, "step": 12430 }, { "epoch": 0.2694299395488917, "grad_norm": 1.2504708766937256, "learning_rate": 1.662654176296538e-05, "loss": 0.166, "step": 12435 }, { "epoch": 0.26953827486837256, "grad_norm": 1.9524176120758057, "learning_rate": 1.6623992445318506e-05, "loss": 0.1308, "step": 12440 }, { "epoch": 0.26964661018785346, "grad_norm": 1.5678969621658325, "learning_rate": 1.662144236038133e-05, "loss": 0.2329, "step": 12445 }, { "epoch": 0.2697549455073343, "grad_norm": 1.146704912185669, "learning_rate": 1.6618891508449246e-05, "loss": 0.1916, "step": 12450 }, { "epoch": 0.26986328082681516, "grad_norm": 2.5948145389556885, "learning_rate": 1.6616339889817732e-05, "loss": 0.2622, "step": 12455 }, { "epoch": 0.269971616146296, "grad_norm": 2.1821701526641846, "learning_rate": 1.6613787504782357e-05, "loss": 0.2531, "step": 12460 }, { "epoch": 0.27007995146577685, "grad_norm": 2.2774388790130615, "learning_rate": 1.6611234353638774e-05, "loss": 0.2167, "step": 12465 }, { "epoch": 0.27018828678525775, "grad_norm": 2.2775909900665283, "learning_rate": 1.6608680436682724e-05, "loss": 0.2586, "step": 12470 }, { "epoch": 0.2702966221047386, "grad_norm": 2.643120765686035, "learning_rate": 1.6606125754210044e-05, "loss": 0.2641, "step": 12475 }, { "epoch": 0.27040495742421944, "grad_norm": 1.8256027698516846, "learning_rate": 1.6603570306516656e-05, "loss": 0.2083, "step": 12480 }, { "epoch": 0.2705132927437003, "grad_norm": 1.2894082069396973, "learning_rate": 1.6601014093898573e-05, "loss": 0.2568, "step": 12485 }, { "epoch": 0.2706216280631812, "grad_norm": 1.4517230987548828, "learning_rate": 1.6598457116651886e-05, "loss": 0.2651, "step": 12490 }, { "epoch": 0.27072996338266203, "grad_norm": 1.4328526258468628, "learning_rate": 1.6595899375072788e-05, "loss": 0.2346, "step": 12495 }, { "epoch": 0.2708382987021429, "grad_norm": 2.08671236038208, "learning_rate": 1.6593340869457554e-05, "loss": 0.2465, "step": 12500 }, { "epoch": 0.2709466340216237, "grad_norm": 1.7158926725387573, "learning_rate": 1.659078160010255e-05, "loss": 0.2463, "step": 12505 }, { "epoch": 0.27105496934110457, "grad_norm": 2.1734354496002197, "learning_rate": 1.658822156730423e-05, "loss": 0.2803, "step": 12510 }, { "epoch": 0.27116330466058547, "grad_norm": 2.4677345752716064, "learning_rate": 1.6585660771359125e-05, "loss": 0.2371, "step": 12515 }, { "epoch": 0.2712716399800663, "grad_norm": 1.677243947982788, "learning_rate": 1.6583099212563878e-05, "loss": 0.2517, "step": 12520 }, { "epoch": 0.27137997529954716, "grad_norm": 1.1378209590911865, "learning_rate": 1.6580536891215202e-05, "loss": 0.2135, "step": 12525 }, { "epoch": 0.271488310619028, "grad_norm": 1.7041677236557007, "learning_rate": 1.6577973807609902e-05, "loss": 0.1578, "step": 12530 }, { "epoch": 0.27159664593850885, "grad_norm": 1.7305283546447754, "learning_rate": 1.6575409962044875e-05, "loss": 0.2178, "step": 12535 }, { "epoch": 0.27170498125798975, "grad_norm": 1.8704955577850342, "learning_rate": 1.6572845354817106e-05, "loss": 0.2185, "step": 12540 }, { "epoch": 0.2718133165774706, "grad_norm": 1.9082196950912476, "learning_rate": 1.6570279986223658e-05, "loss": 0.2611, "step": 12545 }, { "epoch": 0.27192165189695144, "grad_norm": 1.581024408340454, "learning_rate": 1.65677138565617e-05, "loss": 0.2424, "step": 12550 }, { "epoch": 0.2720299872164323, "grad_norm": 2.4191184043884277, "learning_rate": 1.6565146966128477e-05, "loss": 0.2534, "step": 12555 }, { "epoch": 0.27213832253591314, "grad_norm": 2.10929274559021, "learning_rate": 1.6562579315221325e-05, "loss": 0.1831, "step": 12560 }, { "epoch": 0.27224665785539404, "grad_norm": 1.4816910028457642, "learning_rate": 1.6560010904137664e-05, "loss": 0.1988, "step": 12565 }, { "epoch": 0.2723549931748749, "grad_norm": 2.34885311126709, "learning_rate": 1.655744173317501e-05, "loss": 0.279, "step": 12570 }, { "epoch": 0.2724633284943557, "grad_norm": 2.2215445041656494, "learning_rate": 1.655487180263096e-05, "loss": 0.2058, "step": 12575 }, { "epoch": 0.2725716638138366, "grad_norm": 1.1472136974334717, "learning_rate": 1.6552301112803203e-05, "loss": 0.2366, "step": 12580 }, { "epoch": 0.2726799991333174, "grad_norm": 2.0262839794158936, "learning_rate": 1.6549729663989518e-05, "loss": 0.3425, "step": 12585 }, { "epoch": 0.2727883344527983, "grad_norm": 1.9201045036315918, "learning_rate": 1.6547157456487765e-05, "loss": 0.1838, "step": 12590 }, { "epoch": 0.27289666977227917, "grad_norm": 2.8431153297424316, "learning_rate": 1.65445844905959e-05, "loss": 0.1819, "step": 12595 }, { "epoch": 0.27300500509176, "grad_norm": 1.9347097873687744, "learning_rate": 1.6542010766611956e-05, "loss": 0.1643, "step": 12600 }, { "epoch": 0.27311334041124086, "grad_norm": 1.5622884035110474, "learning_rate": 1.6539436284834068e-05, "loss": 0.2047, "step": 12605 }, { "epoch": 0.2732216757307217, "grad_norm": 1.8113480806350708, "learning_rate": 1.6536861045560443e-05, "loss": 0.221, "step": 12610 }, { "epoch": 0.2733300110502026, "grad_norm": 2.4058949947357178, "learning_rate": 1.6534285049089385e-05, "loss": 0.2652, "step": 12615 }, { "epoch": 0.27343834636968345, "grad_norm": 1.4518359899520874, "learning_rate": 1.6531708295719295e-05, "loss": 0.2208, "step": 12620 }, { "epoch": 0.2735466816891643, "grad_norm": 1.7210776805877686, "learning_rate": 1.6529130785748642e-05, "loss": 0.2024, "step": 12625 }, { "epoch": 0.27365501700864514, "grad_norm": 2.2852468490600586, "learning_rate": 1.6526552519475993e-05, "loss": 0.2187, "step": 12630 }, { "epoch": 0.27376335232812604, "grad_norm": 2.605451822280884, "learning_rate": 1.65239734972e-05, "loss": 0.1976, "step": 12635 }, { "epoch": 0.2738716876476069, "grad_norm": 1.4619876146316528, "learning_rate": 1.652139371921941e-05, "loss": 0.2618, "step": 12640 }, { "epoch": 0.27398002296708773, "grad_norm": 2.283212900161743, "learning_rate": 1.6518813185833046e-05, "loss": 0.2469, "step": 12645 }, { "epoch": 0.2740883582865686, "grad_norm": 1.5754977464675903, "learning_rate": 1.651623189733983e-05, "loss": 0.2625, "step": 12650 }, { "epoch": 0.2741966936060494, "grad_norm": 1.6437385082244873, "learning_rate": 1.6513649854038758e-05, "loss": 0.3, "step": 12655 }, { "epoch": 0.2743050289255303, "grad_norm": 1.83260178565979, "learning_rate": 1.6511067056228925e-05, "loss": 0.2644, "step": 12660 }, { "epoch": 0.27441336424501117, "grad_norm": 1.929547905921936, "learning_rate": 1.650848350420951e-05, "loss": 0.1812, "step": 12665 }, { "epoch": 0.274521699564492, "grad_norm": 2.7591042518615723, "learning_rate": 1.650589919827978e-05, "loss": 0.2015, "step": 12670 }, { "epoch": 0.27463003488397286, "grad_norm": 2.054130792617798, "learning_rate": 1.6503314138739084e-05, "loss": 0.1694, "step": 12675 }, { "epoch": 0.2747383702034537, "grad_norm": 1.7659425735473633, "learning_rate": 1.6500728325886866e-05, "loss": 0.1576, "step": 12680 }, { "epoch": 0.2748467055229346, "grad_norm": 2.0051655769348145, "learning_rate": 1.649814176002265e-05, "loss": 0.2726, "step": 12685 }, { "epoch": 0.27495504084241545, "grad_norm": 2.209055185317993, "learning_rate": 1.6495554441446056e-05, "loss": 0.2267, "step": 12690 }, { "epoch": 0.2750633761618963, "grad_norm": 1.3380017280578613, "learning_rate": 1.649296637045678e-05, "loss": 0.1764, "step": 12695 }, { "epoch": 0.27517171148137715, "grad_norm": 1.1486074924468994, "learning_rate": 1.6490377547354616e-05, "loss": 0.2735, "step": 12700 }, { "epoch": 0.275280046800858, "grad_norm": 1.8018193244934082, "learning_rate": 1.648778797243944e-05, "loss": 0.1892, "step": 12705 }, { "epoch": 0.2753883821203389, "grad_norm": 1.6319540739059448, "learning_rate": 1.6485197646011217e-05, "loss": 0.2881, "step": 12710 }, { "epoch": 0.27549671743981974, "grad_norm": 2.2323246002197266, "learning_rate": 1.648260656836999e-05, "loss": 0.1929, "step": 12715 }, { "epoch": 0.2756050527593006, "grad_norm": 1.8123085498809814, "learning_rate": 1.6480014739815903e-05, "loss": 0.2446, "step": 12720 }, { "epoch": 0.27571338807878143, "grad_norm": 3.9986398220062256, "learning_rate": 1.6477422160649183e-05, "loss": 0.2053, "step": 12725 }, { "epoch": 0.2758217233982623, "grad_norm": 2.227452278137207, "learning_rate": 1.6474828831170134e-05, "loss": 0.1863, "step": 12730 }, { "epoch": 0.2759300587177432, "grad_norm": 1.6437984704971313, "learning_rate": 1.6472234751679156e-05, "loss": 0.2121, "step": 12735 }, { "epoch": 0.276038394037224, "grad_norm": 1.6501349210739136, "learning_rate": 1.6469639922476738e-05, "loss": 0.2024, "step": 12740 }, { "epoch": 0.27614672935670487, "grad_norm": 1.741479516029358, "learning_rate": 1.646704434386345e-05, "loss": 0.2952, "step": 12745 }, { "epoch": 0.2762550646761857, "grad_norm": 2.4118599891662598, "learning_rate": 1.6464448016139954e-05, "loss": 0.2725, "step": 12750 }, { "epoch": 0.2763633999956666, "grad_norm": 2.3054044246673584, "learning_rate": 1.646185093960699e-05, "loss": 0.1846, "step": 12755 }, { "epoch": 0.27647173531514746, "grad_norm": 1.0472253561019897, "learning_rate": 1.645925311456539e-05, "loss": 0.3502, "step": 12760 }, { "epoch": 0.2765800706346283, "grad_norm": 2.1410417556762695, "learning_rate": 1.6456654541316082e-05, "loss": 0.2105, "step": 12765 }, { "epoch": 0.27668840595410915, "grad_norm": 2.0404107570648193, "learning_rate": 1.6454055220160064e-05, "loss": 0.1721, "step": 12770 }, { "epoch": 0.27679674127359, "grad_norm": 1.8801498413085938, "learning_rate": 1.645145515139843e-05, "loss": 0.2064, "step": 12775 }, { "epoch": 0.2769050765930709, "grad_norm": 1.5738521814346313, "learning_rate": 1.644885433533236e-05, "loss": 0.2397, "step": 12780 }, { "epoch": 0.27701341191255174, "grad_norm": 1.4628483057022095, "learning_rate": 1.6446252772263117e-05, "loss": 0.1786, "step": 12785 }, { "epoch": 0.2771217472320326, "grad_norm": 1.7568849325180054, "learning_rate": 1.6443650462492054e-05, "loss": 0.2198, "step": 12790 }, { "epoch": 0.27723008255151343, "grad_norm": 5.291439056396484, "learning_rate": 1.6441047406320616e-05, "loss": 0.2806, "step": 12795 }, { "epoch": 0.2773384178709943, "grad_norm": 3.1913106441497803, "learning_rate": 1.6438443604050324e-05, "loss": 0.2366, "step": 12800 }, { "epoch": 0.2774467531904752, "grad_norm": 2.0224151611328125, "learning_rate": 1.6435839055982782e-05, "loss": 0.2582, "step": 12805 }, { "epoch": 0.277555088509956, "grad_norm": 2.190181255340576, "learning_rate": 1.64332337624197e-05, "loss": 0.163, "step": 12810 }, { "epoch": 0.2776634238294369, "grad_norm": 1.757277011871338, "learning_rate": 1.643062772366285e-05, "loss": 0.2379, "step": 12815 }, { "epoch": 0.2777717591489177, "grad_norm": 1.2464911937713623, "learning_rate": 1.6428020940014113e-05, "loss": 0.1731, "step": 12820 }, { "epoch": 0.27788009446839856, "grad_norm": 1.510176181793213, "learning_rate": 1.642541341177544e-05, "loss": 0.2764, "step": 12825 }, { "epoch": 0.27798842978787947, "grad_norm": 1.2414144277572632, "learning_rate": 1.6422805139248878e-05, "loss": 0.2284, "step": 12830 }, { "epoch": 0.2780967651073603, "grad_norm": 1.9829838275909424, "learning_rate": 1.6420196122736553e-05, "loss": 0.2604, "step": 12835 }, { "epoch": 0.27820510042684116, "grad_norm": 1.9884593486785889, "learning_rate": 1.6417586362540684e-05, "loss": 0.2708, "step": 12840 }, { "epoch": 0.278313435746322, "grad_norm": 2.4092564582824707, "learning_rate": 1.641497585896357e-05, "loss": 0.1922, "step": 12845 }, { "epoch": 0.27842177106580285, "grad_norm": 1.184828758239746, "learning_rate": 1.6412364612307596e-05, "loss": 0.211, "step": 12850 }, { "epoch": 0.27853010638528375, "grad_norm": 1.1769131422042847, "learning_rate": 1.6409752622875238e-05, "loss": 0.1832, "step": 12855 }, { "epoch": 0.2786384417047646, "grad_norm": 1.8960915803909302, "learning_rate": 1.6407139890969062e-05, "loss": 0.2217, "step": 12860 }, { "epoch": 0.27874677702424544, "grad_norm": 2.2009410858154297, "learning_rate": 1.6404526416891707e-05, "loss": 0.2988, "step": 12865 }, { "epoch": 0.2788551123437263, "grad_norm": 1.6452525854110718, "learning_rate": 1.6401912200945904e-05, "loss": 0.2859, "step": 12870 }, { "epoch": 0.27896344766320713, "grad_norm": 1.7510628700256348, "learning_rate": 1.6399297243434476e-05, "loss": 0.3056, "step": 12875 }, { "epoch": 0.27907178298268803, "grad_norm": 1.9082872867584229, "learning_rate": 1.639668154466032e-05, "loss": 0.1985, "step": 12880 }, { "epoch": 0.2791801183021689, "grad_norm": 2.450263023376465, "learning_rate": 1.6394065104926434e-05, "loss": 0.3226, "step": 12885 }, { "epoch": 0.2792884536216497, "grad_norm": 1.5881435871124268, "learning_rate": 1.6391447924535885e-05, "loss": 0.1921, "step": 12890 }, { "epoch": 0.27939678894113057, "grad_norm": 1.5332649946212769, "learning_rate": 1.638883000379184e-05, "loss": 0.2353, "step": 12895 }, { "epoch": 0.27950512426061147, "grad_norm": 2.256431818008423, "learning_rate": 1.638621134299754e-05, "loss": 0.197, "step": 12900 }, { "epoch": 0.2796134595800923, "grad_norm": 1.967344045639038, "learning_rate": 1.6383591942456327e-05, "loss": 0.1684, "step": 12905 }, { "epoch": 0.27972179489957316, "grad_norm": 1.2590556144714355, "learning_rate": 1.638097180247161e-05, "loss": 0.2017, "step": 12910 }, { "epoch": 0.279830130219054, "grad_norm": 1.5854548215866089, "learning_rate": 1.6378350923346898e-05, "loss": 0.2276, "step": 12915 }, { "epoch": 0.27993846553853485, "grad_norm": 1.7111212015151978, "learning_rate": 1.6375729305385778e-05, "loss": 0.2425, "step": 12920 }, { "epoch": 0.28004680085801575, "grad_norm": 2.2602527141571045, "learning_rate": 1.637310694889193e-05, "loss": 0.2981, "step": 12925 }, { "epoch": 0.2801551361774966, "grad_norm": 2.1023752689361572, "learning_rate": 1.6370483854169107e-05, "loss": 0.2485, "step": 12930 }, { "epoch": 0.28026347149697745, "grad_norm": 1.7337002754211426, "learning_rate": 1.636786002152116e-05, "loss": 0.1492, "step": 12935 }, { "epoch": 0.2803718068164583, "grad_norm": 1.6455192565917969, "learning_rate": 1.6365235451252018e-05, "loss": 0.1662, "step": 12940 }, { "epoch": 0.28048014213593914, "grad_norm": 1.5979835987091064, "learning_rate": 1.6362610143665704e-05, "loss": 0.1986, "step": 12945 }, { "epoch": 0.28058847745542004, "grad_norm": 3.09911847114563, "learning_rate": 1.6359984099066316e-05, "loss": 0.2893, "step": 12950 }, { "epoch": 0.2806968127749009, "grad_norm": 1.0637627840042114, "learning_rate": 1.6357357317758043e-05, "loss": 0.2849, "step": 12955 }, { "epoch": 0.28080514809438173, "grad_norm": 2.2074623107910156, "learning_rate": 1.6354729800045157e-05, "loss": 0.2064, "step": 12960 }, { "epoch": 0.2809134834138626, "grad_norm": 2.188585042953491, "learning_rate": 1.6352101546232017e-05, "loss": 0.2173, "step": 12965 }, { "epoch": 0.2810218187333434, "grad_norm": 2.101789951324463, "learning_rate": 1.634947255662307e-05, "loss": 0.24, "step": 12970 }, { "epoch": 0.2811301540528243, "grad_norm": 1.7284148931503296, "learning_rate": 1.634684283152284e-05, "loss": 0.2099, "step": 12975 }, { "epoch": 0.28123848937230517, "grad_norm": 1.511340618133545, "learning_rate": 1.6344212371235945e-05, "loss": 0.1833, "step": 12980 }, { "epoch": 0.281346824691786, "grad_norm": 2.3559813499450684, "learning_rate": 1.634158117606708e-05, "loss": 0.2714, "step": 12985 }, { "epoch": 0.28145516001126686, "grad_norm": 2.145200490951538, "learning_rate": 1.6338949246321038e-05, "loss": 0.2729, "step": 12990 }, { "epoch": 0.2815634953307477, "grad_norm": 1.9375032186508179, "learning_rate": 1.6336316582302678e-05, "loss": 0.227, "step": 12995 }, { "epoch": 0.2816718306502286, "grad_norm": 2.24454927444458, "learning_rate": 1.633368318431696e-05, "loss": 0.1456, "step": 13000 }, { "epoch": 0.28178016596970945, "grad_norm": 1.969693660736084, "learning_rate": 1.633104905266893e-05, "loss": 0.1662, "step": 13005 }, { "epoch": 0.2818885012891903, "grad_norm": 2.730487108230591, "learning_rate": 1.6328414187663703e-05, "loss": 0.2112, "step": 13010 }, { "epoch": 0.28199683660867114, "grad_norm": 1.922775149345398, "learning_rate": 1.632577858960649e-05, "loss": 0.2637, "step": 13015 }, { "epoch": 0.28210517192815204, "grad_norm": 1.2734466791152954, "learning_rate": 1.6323142258802587e-05, "loss": 0.1672, "step": 13020 }, { "epoch": 0.2822135072476329, "grad_norm": 2.5713658332824707, "learning_rate": 1.6320505195557374e-05, "loss": 0.2679, "step": 13025 }, { "epoch": 0.28232184256711373, "grad_norm": 2.2841014862060547, "learning_rate": 1.6317867400176316e-05, "loss": 0.2179, "step": 13030 }, { "epoch": 0.2824301778865946, "grad_norm": 2.425589084625244, "learning_rate": 1.6315228872964962e-05, "loss": 0.244, "step": 13035 }, { "epoch": 0.2825385132060754, "grad_norm": 1.6580888032913208, "learning_rate": 1.6312589614228947e-05, "loss": 0.2207, "step": 13040 }, { "epoch": 0.2826468485255563, "grad_norm": 2.190131664276123, "learning_rate": 1.6309949624273987e-05, "loss": 0.2539, "step": 13045 }, { "epoch": 0.2827551838450372, "grad_norm": 2.167526960372925, "learning_rate": 1.6307308903405884e-05, "loss": 0.3025, "step": 13050 }, { "epoch": 0.282863519164518, "grad_norm": 1.9737542867660522, "learning_rate": 1.630466745193053e-05, "loss": 0.2084, "step": 13055 }, { "epoch": 0.28297185448399886, "grad_norm": 2.1031689643859863, "learning_rate": 1.6302025270153894e-05, "loss": 0.2168, "step": 13060 }, { "epoch": 0.2830801898034797, "grad_norm": 1.8798470497131348, "learning_rate": 1.6299382358382038e-05, "loss": 0.2923, "step": 13065 }, { "epoch": 0.2831885251229606, "grad_norm": 1.5021733045578003, "learning_rate": 1.6296738716921097e-05, "loss": 0.2683, "step": 13070 }, { "epoch": 0.28329686044244146, "grad_norm": 2.6031136512756348, "learning_rate": 1.6294094346077304e-05, "loss": 0.1771, "step": 13075 }, { "epoch": 0.2834051957619223, "grad_norm": 2.170121431350708, "learning_rate": 1.629144924615696e-05, "loss": 0.185, "step": 13080 }, { "epoch": 0.28351353108140315, "grad_norm": 2.0954155921936035, "learning_rate": 1.6288803417466474e-05, "loss": 0.2775, "step": 13085 }, { "epoch": 0.283621866400884, "grad_norm": 1.944502592086792, "learning_rate": 1.6286156860312316e-05, "loss": 0.1937, "step": 13090 }, { "epoch": 0.2837302017203649, "grad_norm": 1.9706709384918213, "learning_rate": 1.628350957500105e-05, "loss": 0.3343, "step": 13095 }, { "epoch": 0.28383853703984574, "grad_norm": 2.4019031524658203, "learning_rate": 1.628086156183933e-05, "loss": 0.271, "step": 13100 }, { "epoch": 0.2839468723593266, "grad_norm": 1.8498684167861938, "learning_rate": 1.6278212821133884e-05, "loss": 0.2083, "step": 13105 }, { "epoch": 0.28405520767880743, "grad_norm": 1.2708085775375366, "learning_rate": 1.6275563353191532e-05, "loss": 0.2087, "step": 13110 }, { "epoch": 0.2841635429982883, "grad_norm": 1.764910340309143, "learning_rate": 1.627291315831917e-05, "loss": 0.1975, "step": 13115 }, { "epoch": 0.2842718783177692, "grad_norm": 2.4578940868377686, "learning_rate": 1.6270262236823787e-05, "loss": 0.1676, "step": 13120 }, { "epoch": 0.28438021363725, "grad_norm": 1.3371089696884155, "learning_rate": 1.6267610589012455e-05, "loss": 0.2527, "step": 13125 }, { "epoch": 0.28448854895673087, "grad_norm": 2.603684902191162, "learning_rate": 1.6264958215192325e-05, "loss": 0.2365, "step": 13130 }, { "epoch": 0.2845968842762117, "grad_norm": 1.6551300287246704, "learning_rate": 1.6262305115670635e-05, "loss": 0.2582, "step": 13135 }, { "epoch": 0.28470521959569256, "grad_norm": 0.8817737698554993, "learning_rate": 1.6259651290754703e-05, "loss": 0.1812, "step": 13140 }, { "epoch": 0.28481355491517346, "grad_norm": 1.8562999963760376, "learning_rate": 1.625699674075194e-05, "loss": 0.1811, "step": 13145 }, { "epoch": 0.2849218902346543, "grad_norm": 1.8463873863220215, "learning_rate": 1.625434146596984e-05, "loss": 0.2438, "step": 13150 }, { "epoch": 0.28503022555413515, "grad_norm": 1.0937844514846802, "learning_rate": 1.6251685466715965e-05, "loss": 0.1866, "step": 13155 }, { "epoch": 0.285138560873616, "grad_norm": 4.536230087280273, "learning_rate": 1.624902874329798e-05, "loss": 0.2031, "step": 13160 }, { "epoch": 0.2852468961930969, "grad_norm": 2.7859599590301514, "learning_rate": 1.6246371296023627e-05, "loss": 0.1708, "step": 13165 }, { "epoch": 0.28535523151257774, "grad_norm": 1.6615980863571167, "learning_rate": 1.624371312520073e-05, "loss": 0.144, "step": 13170 }, { "epoch": 0.2854635668320586, "grad_norm": 2.4569649696350098, "learning_rate": 1.6241054231137196e-05, "loss": 0.2219, "step": 13175 }, { "epoch": 0.28557190215153944, "grad_norm": 2.1883482933044434, "learning_rate": 1.6238394614141022e-05, "loss": 0.2367, "step": 13180 }, { "epoch": 0.2856802374710203, "grad_norm": 1.5525898933410645, "learning_rate": 1.6235734274520282e-05, "loss": 0.2426, "step": 13185 }, { "epoch": 0.2857885727905012, "grad_norm": 2.0413618087768555, "learning_rate": 1.623307321258314e-05, "loss": 0.1868, "step": 13190 }, { "epoch": 0.28589690810998203, "grad_norm": 1.909819483757019, "learning_rate": 1.6230411428637834e-05, "loss": 0.2446, "step": 13195 }, { "epoch": 0.2860052434294629, "grad_norm": 1.7952117919921875, "learning_rate": 1.62277489229927e-05, "loss": 0.2008, "step": 13200 }, { "epoch": 0.2861135787489437, "grad_norm": 1.8918819427490234, "learning_rate": 1.622508569595614e-05, "loss": 0.1946, "step": 13205 }, { "epoch": 0.28622191406842457, "grad_norm": 1.9460506439208984, "learning_rate": 1.6222421747836658e-05, "loss": 0.2332, "step": 13210 }, { "epoch": 0.28633024938790547, "grad_norm": 2.224653959274292, "learning_rate": 1.6219757078942826e-05, "loss": 0.1992, "step": 13215 }, { "epoch": 0.2864385847073863, "grad_norm": 0.8548656702041626, "learning_rate": 1.6217091689583312e-05, "loss": 0.1495, "step": 13220 }, { "epoch": 0.28654692002686716, "grad_norm": 1.4365549087524414, "learning_rate": 1.621442558006685e-05, "loss": 0.2619, "step": 13225 }, { "epoch": 0.286655255346348, "grad_norm": 1.8801711797714233, "learning_rate": 1.6211758750702284e-05, "loss": 0.2612, "step": 13230 }, { "epoch": 0.28676359066582885, "grad_norm": 1.0663892030715942, "learning_rate": 1.6209091201798518e-05, "loss": 0.0865, "step": 13235 }, { "epoch": 0.28687192598530975, "grad_norm": 2.089509963989258, "learning_rate": 1.6206422933664544e-05, "loss": 0.2954, "step": 13240 }, { "epoch": 0.2869802613047906, "grad_norm": 1.511747121810913, "learning_rate": 1.620375394660945e-05, "loss": 0.2042, "step": 13245 }, { "epoch": 0.28708859662427144, "grad_norm": 2.038036584854126, "learning_rate": 1.6201084240942394e-05, "loss": 0.2923, "step": 13250 }, { "epoch": 0.2871969319437523, "grad_norm": 1.237716555595398, "learning_rate": 1.6198413816972618e-05, "loss": 0.3186, "step": 13255 }, { "epoch": 0.28730526726323313, "grad_norm": 1.6987382173538208, "learning_rate": 1.6195742675009456e-05, "loss": 0.1668, "step": 13260 }, { "epoch": 0.28741360258271403, "grad_norm": 1.5444583892822266, "learning_rate": 1.619307081536231e-05, "loss": 0.2402, "step": 13265 }, { "epoch": 0.2875219379021949, "grad_norm": 2.016413688659668, "learning_rate": 1.6190398238340693e-05, "loss": 0.2358, "step": 13270 }, { "epoch": 0.2876302732216757, "grad_norm": 1.80903160572052, "learning_rate": 1.6187724944254166e-05, "loss": 0.3313, "step": 13275 }, { "epoch": 0.28773860854115657, "grad_norm": 2.0937271118164062, "learning_rate": 1.61850509334124e-05, "loss": 0.1403, "step": 13280 }, { "epoch": 0.28784694386063747, "grad_norm": 2.7698421478271484, "learning_rate": 1.618237620612513e-05, "loss": 0.1522, "step": 13285 }, { "epoch": 0.2879552791801183, "grad_norm": 1.9832546710968018, "learning_rate": 1.617970076270219e-05, "loss": 0.2048, "step": 13290 }, { "epoch": 0.28806361449959916, "grad_norm": 2.6842353343963623, "learning_rate": 1.6177024603453492e-05, "loss": 0.2249, "step": 13295 }, { "epoch": 0.28817194981908, "grad_norm": 2.17331600189209, "learning_rate": 1.6174347728689025e-05, "loss": 0.2442, "step": 13300 }, { "epoch": 0.28828028513856085, "grad_norm": 2.355865001678467, "learning_rate": 1.617167013871886e-05, "loss": 0.2034, "step": 13305 }, { "epoch": 0.28838862045804176, "grad_norm": 2.900618553161621, "learning_rate": 1.6168991833853168e-05, "loss": 0.2327, "step": 13310 }, { "epoch": 0.2884969557775226, "grad_norm": 2.0839316844940186, "learning_rate": 1.616631281440218e-05, "loss": 0.2723, "step": 13315 }, { "epoch": 0.28860529109700345, "grad_norm": 2.0379602909088135, "learning_rate": 1.6163633080676225e-05, "loss": 0.263, "step": 13320 }, { "epoch": 0.2887136264164843, "grad_norm": 3.2156729698181152, "learning_rate": 1.6160952632985708e-05, "loss": 0.2159, "step": 13325 }, { "epoch": 0.28882196173596514, "grad_norm": 2.549137592315674, "learning_rate": 1.615827147164112e-05, "loss": 0.2447, "step": 13330 }, { "epoch": 0.28893029705544604, "grad_norm": 1.6740878820419312, "learning_rate": 1.615558959695303e-05, "loss": 0.2792, "step": 13335 }, { "epoch": 0.2890386323749269, "grad_norm": 2.2466869354248047, "learning_rate": 1.61529070092321e-05, "loss": 0.2523, "step": 13340 }, { "epoch": 0.28914696769440773, "grad_norm": 3.257378101348877, "learning_rate": 1.6150223708789062e-05, "loss": 0.2531, "step": 13345 }, { "epoch": 0.2892553030138886, "grad_norm": 2.02641224861145, "learning_rate": 1.614753969593474e-05, "loss": 0.1826, "step": 13350 }, { "epoch": 0.2893636383333694, "grad_norm": 1.8288441896438599, "learning_rate": 1.614485497098003e-05, "loss": 0.2023, "step": 13355 }, { "epoch": 0.2894719736528503, "grad_norm": 2.014397144317627, "learning_rate": 1.6142169534235922e-05, "loss": 0.3337, "step": 13360 }, { "epoch": 0.28958030897233117, "grad_norm": 2.093625545501709, "learning_rate": 1.6139483386013487e-05, "loss": 0.2288, "step": 13365 }, { "epoch": 0.289688644291812, "grad_norm": 2.6670875549316406, "learning_rate": 1.6136796526623867e-05, "loss": 0.2626, "step": 13370 }, { "epoch": 0.28979697961129286, "grad_norm": 1.8787888288497925, "learning_rate": 1.61341089563783e-05, "loss": 0.1883, "step": 13375 }, { "epoch": 0.2899053149307737, "grad_norm": 2.0617518424987793, "learning_rate": 1.61314206755881e-05, "loss": 0.2679, "step": 13380 }, { "epoch": 0.2900136502502546, "grad_norm": 3.598884105682373, "learning_rate": 1.6128731684564664e-05, "loss": 0.2147, "step": 13385 }, { "epoch": 0.29012198556973545, "grad_norm": 1.166772484779358, "learning_rate": 1.612604198361947e-05, "loss": 0.2148, "step": 13390 }, { "epoch": 0.2902303208892163, "grad_norm": 1.533359408378601, "learning_rate": 1.612335157306408e-05, "loss": 0.2393, "step": 13395 }, { "epoch": 0.29033865620869714, "grad_norm": 1.3348809480667114, "learning_rate": 1.612066045321014e-05, "loss": 0.2283, "step": 13400 }, { "epoch": 0.290446991528178, "grad_norm": 1.9959553480148315, "learning_rate": 1.611796862436937e-05, "loss": 0.1438, "step": 13405 }, { "epoch": 0.2905553268476589, "grad_norm": 2.505516529083252, "learning_rate": 1.6115276086853585e-05, "loss": 0.229, "step": 13410 }, { "epoch": 0.29066366216713974, "grad_norm": 1.908064603805542, "learning_rate": 1.6112582840974672e-05, "loss": 0.2092, "step": 13415 }, { "epoch": 0.2907719974866206, "grad_norm": 1.1594945192337036, "learning_rate": 1.6109888887044602e-05, "loss": 0.1909, "step": 13420 }, { "epoch": 0.2908803328061014, "grad_norm": 2.8367204666137695, "learning_rate": 1.6107194225375434e-05, "loss": 0.2814, "step": 13425 }, { "epoch": 0.29098866812558233, "grad_norm": 2.0364959239959717, "learning_rate": 1.6104498856279297e-05, "loss": 0.2717, "step": 13430 }, { "epoch": 0.2910970034450632, "grad_norm": 2.4714386463165283, "learning_rate": 1.6101802780068414e-05, "loss": 0.2004, "step": 13435 }, { "epoch": 0.291205338764544, "grad_norm": 2.0714831352233887, "learning_rate": 1.6099105997055083e-05, "loss": 0.2129, "step": 13440 }, { "epoch": 0.29131367408402487, "grad_norm": 1.8711638450622559, "learning_rate": 1.609640850755169e-05, "loss": 0.2581, "step": 13445 }, { "epoch": 0.2914220094035057, "grad_norm": 1.9782137870788574, "learning_rate": 1.6093710311870693e-05, "loss": 0.1967, "step": 13450 }, { "epoch": 0.2915303447229866, "grad_norm": 1.6224918365478516, "learning_rate": 1.609101141032464e-05, "loss": 0.2879, "step": 13455 }, { "epoch": 0.29163868004246746, "grad_norm": 2.5772221088409424, "learning_rate": 1.6088311803226158e-05, "loss": 0.2446, "step": 13460 }, { "epoch": 0.2917470153619483, "grad_norm": 2.061042070388794, "learning_rate": 1.6085611490887957e-05, "loss": 0.1708, "step": 13465 }, { "epoch": 0.29185535068142915, "grad_norm": 1.42851984500885, "learning_rate": 1.608291047362283e-05, "loss": 0.2559, "step": 13470 }, { "epoch": 0.29196368600091, "grad_norm": 2.419224500656128, "learning_rate": 1.6080208751743646e-05, "loss": 0.2805, "step": 13475 }, { "epoch": 0.2920720213203909, "grad_norm": 1.999448537826538, "learning_rate": 1.6077506325563354e-05, "loss": 0.1702, "step": 13480 }, { "epoch": 0.29218035663987174, "grad_norm": 2.171661615371704, "learning_rate": 1.6074803195395e-05, "loss": 0.2319, "step": 13485 }, { "epoch": 0.2922886919593526, "grad_norm": 1.5648380517959595, "learning_rate": 1.6072099361551696e-05, "loss": 0.2298, "step": 13490 }, { "epoch": 0.29239702727883343, "grad_norm": 2.6486570835113525, "learning_rate": 1.606939482434664e-05, "loss": 0.2682, "step": 13495 }, { "epoch": 0.2925053625983143, "grad_norm": 1.7558616399765015, "learning_rate": 1.6066689584093117e-05, "loss": 0.2318, "step": 13500 }, { "epoch": 0.2926136979177952, "grad_norm": 1.1135566234588623, "learning_rate": 1.6063983641104477e-05, "loss": 0.116, "step": 13505 }, { "epoch": 0.292722033237276, "grad_norm": 1.5300376415252686, "learning_rate": 1.6061276995694178e-05, "loss": 0.2139, "step": 13510 }, { "epoch": 0.29283036855675687, "grad_norm": 1.3619003295898438, "learning_rate": 1.605856964817573e-05, "loss": 0.1482, "step": 13515 }, { "epoch": 0.2929387038762377, "grad_norm": 2.428462505340576, "learning_rate": 1.6055861598862753e-05, "loss": 0.3069, "step": 13520 }, { "epoch": 0.29304703919571856, "grad_norm": 1.4396811723709106, "learning_rate": 1.6053152848068926e-05, "loss": 0.1917, "step": 13525 }, { "epoch": 0.29315537451519946, "grad_norm": 2.3488693237304688, "learning_rate": 1.6050443396108014e-05, "loss": 0.1886, "step": 13530 }, { "epoch": 0.2932637098346803, "grad_norm": 2.37436842918396, "learning_rate": 1.604773324329387e-05, "loss": 0.2413, "step": 13535 }, { "epoch": 0.29337204515416115, "grad_norm": 2.2661941051483154, "learning_rate": 1.6045022389940426e-05, "loss": 0.1431, "step": 13540 }, { "epoch": 0.293480380473642, "grad_norm": 1.4994630813598633, "learning_rate": 1.6042310836361692e-05, "loss": 0.2323, "step": 13545 }, { "epoch": 0.2935887157931229, "grad_norm": 2.0666286945343018, "learning_rate": 1.6039598582871763e-05, "loss": 0.2788, "step": 13550 }, { "epoch": 0.29369705111260375, "grad_norm": 1.955644130706787, "learning_rate": 1.603688562978481e-05, "loss": 0.2574, "step": 13555 }, { "epoch": 0.2938053864320846, "grad_norm": 1.9564732313156128, "learning_rate": 1.603417197741509e-05, "loss": 0.2414, "step": 13560 }, { "epoch": 0.29391372175156544, "grad_norm": 1.520156979560852, "learning_rate": 1.6031457626076935e-05, "loss": 0.18, "step": 13565 }, { "epoch": 0.2940220570710463, "grad_norm": 1.2116302251815796, "learning_rate": 1.602874257608477e-05, "loss": 0.1606, "step": 13570 }, { "epoch": 0.2941303923905272, "grad_norm": 1.8628902435302734, "learning_rate": 1.6026026827753085e-05, "loss": 0.1844, "step": 13575 }, { "epoch": 0.29423872771000803, "grad_norm": 2.5242385864257812, "learning_rate": 1.6023310381396463e-05, "loss": 0.1786, "step": 13580 }, { "epoch": 0.2943470630294889, "grad_norm": 2.3233141899108887, "learning_rate": 1.6020593237329563e-05, "loss": 0.2256, "step": 13585 }, { "epoch": 0.2944553983489697, "grad_norm": 1.7127983570098877, "learning_rate": 1.6017875395867126e-05, "loss": 0.1954, "step": 13590 }, { "epoch": 0.29456373366845057, "grad_norm": 1.8853511810302734, "learning_rate": 1.6015156857323972e-05, "loss": 0.2414, "step": 13595 }, { "epoch": 0.29467206898793147, "grad_norm": 1.6108322143554688, "learning_rate": 1.6012437622015e-05, "loss": 0.2594, "step": 13600 }, { "epoch": 0.2947804043074123, "grad_norm": 2.3106369972229004, "learning_rate": 1.60097176902552e-05, "loss": 0.2229, "step": 13605 }, { "epoch": 0.29488873962689316, "grad_norm": 1.4125146865844727, "learning_rate": 1.600699706235963e-05, "loss": 0.2541, "step": 13610 }, { "epoch": 0.294997074946374, "grad_norm": 1.9259415864944458, "learning_rate": 1.600427573864343e-05, "loss": 0.1103, "step": 13615 }, { "epoch": 0.29510541026585485, "grad_norm": 1.4473294019699097, "learning_rate": 1.6001553719421837e-05, "loss": 0.2066, "step": 13620 }, { "epoch": 0.29521374558533575, "grad_norm": 1.4208077192306519, "learning_rate": 1.5998831005010144e-05, "loss": 0.1856, "step": 13625 }, { "epoch": 0.2953220809048166, "grad_norm": 2.2072107791900635, "learning_rate": 1.5996107595723744e-05, "loss": 0.2512, "step": 13630 }, { "epoch": 0.29543041622429744, "grad_norm": 2.1634521484375, "learning_rate": 1.59933834918781e-05, "loss": 0.2405, "step": 13635 }, { "epoch": 0.2955387515437783, "grad_norm": 1.7348723411560059, "learning_rate": 1.5990658693788757e-05, "loss": 0.3054, "step": 13640 }, { "epoch": 0.29564708686325913, "grad_norm": 2.5298829078674316, "learning_rate": 1.598793320177135e-05, "loss": 0.2605, "step": 13645 }, { "epoch": 0.29575542218274004, "grad_norm": 2.095710277557373, "learning_rate": 1.5985207016141575e-05, "loss": 0.23, "step": 13650 }, { "epoch": 0.2958637575022209, "grad_norm": 1.5771914720535278, "learning_rate": 1.5982480137215228e-05, "loss": 0.1811, "step": 13655 }, { "epoch": 0.2959720928217017, "grad_norm": 1.5940991640090942, "learning_rate": 1.5979752565308174e-05, "loss": 0.3408, "step": 13660 }, { "epoch": 0.29608042814118257, "grad_norm": 2.012815237045288, "learning_rate": 1.5977024300736363e-05, "loss": 0.1815, "step": 13665 }, { "epoch": 0.2961887634606634, "grad_norm": 2.7248220443725586, "learning_rate": 1.5974295343815823e-05, "loss": 0.162, "step": 13670 }, { "epoch": 0.2962970987801443, "grad_norm": 1.3886926174163818, "learning_rate": 1.5971565694862664e-05, "loss": 0.1431, "step": 13675 }, { "epoch": 0.29640543409962516, "grad_norm": 1.8792616128921509, "learning_rate": 1.596883535419307e-05, "loss": 0.2734, "step": 13680 }, { "epoch": 0.296513769419106, "grad_norm": 2.578324556350708, "learning_rate": 1.5966104322123313e-05, "loss": 0.3035, "step": 13685 }, { "epoch": 0.29662210473858686, "grad_norm": 1.8763560056686401, "learning_rate": 1.5963372598969744e-05, "loss": 0.2357, "step": 13690 }, { "epoch": 0.29673044005806776, "grad_norm": 1.7580012083053589, "learning_rate": 1.596064018504879e-05, "loss": 0.1944, "step": 13695 }, { "epoch": 0.2968387753775486, "grad_norm": 1.9273715019226074, "learning_rate": 1.5957907080676962e-05, "loss": 0.2232, "step": 13700 }, { "epoch": 0.29694711069702945, "grad_norm": 1.6487919092178345, "learning_rate": 1.595517328617085e-05, "loss": 0.1963, "step": 13705 }, { "epoch": 0.2970554460165103, "grad_norm": 1.6516304016113281, "learning_rate": 1.5952438801847118e-05, "loss": 0.2406, "step": 13710 }, { "epoch": 0.29716378133599114, "grad_norm": 2.4647343158721924, "learning_rate": 1.594970362802252e-05, "loss": 0.2397, "step": 13715 }, { "epoch": 0.29727211665547204, "grad_norm": 1.6646089553833008, "learning_rate": 1.594696776501388e-05, "loss": 0.1598, "step": 13720 }, { "epoch": 0.2973804519749529, "grad_norm": 2.0371012687683105, "learning_rate": 1.594423121313811e-05, "loss": 0.2332, "step": 13725 }, { "epoch": 0.29748878729443373, "grad_norm": 1.2967956066131592, "learning_rate": 1.5941493972712203e-05, "loss": 0.1379, "step": 13730 }, { "epoch": 0.2975971226139146, "grad_norm": 1.4616131782531738, "learning_rate": 1.593875604405322e-05, "loss": 0.1212, "step": 13735 }, { "epoch": 0.2977054579333954, "grad_norm": 2.056068181991577, "learning_rate": 1.5936017427478315e-05, "loss": 0.2098, "step": 13740 }, { "epoch": 0.2978137932528763, "grad_norm": 2.2236759662628174, "learning_rate": 1.593327812330471e-05, "loss": 0.2644, "step": 13745 }, { "epoch": 0.29792212857235717, "grad_norm": 1.0465307235717773, "learning_rate": 1.5930538131849714e-05, "loss": 0.1557, "step": 13750 }, { "epoch": 0.298030463891838, "grad_norm": 1.9830994606018066, "learning_rate": 1.5927797453430718e-05, "loss": 0.2231, "step": 13755 }, { "epoch": 0.29813879921131886, "grad_norm": 2.0693323612213135, "learning_rate": 1.592505608836518e-05, "loss": 0.2149, "step": 13760 }, { "epoch": 0.2982471345307997, "grad_norm": 1.086814522743225, "learning_rate": 1.5922314036970657e-05, "loss": 0.169, "step": 13765 }, { "epoch": 0.2983554698502806, "grad_norm": 2.1726016998291016, "learning_rate": 1.5919571299564765e-05, "loss": 0.2284, "step": 13770 }, { "epoch": 0.29846380516976145, "grad_norm": 2.403935194015503, "learning_rate": 1.5916827876465218e-05, "loss": 0.2194, "step": 13775 }, { "epoch": 0.2985721404892423, "grad_norm": 1.336096167564392, "learning_rate": 1.5914083767989792e-05, "loss": 0.2746, "step": 13780 }, { "epoch": 0.29868047580872314, "grad_norm": 1.9775007963180542, "learning_rate": 1.5911338974456357e-05, "loss": 0.2291, "step": 13785 }, { "epoch": 0.298788811128204, "grad_norm": 2.0305373668670654, "learning_rate": 1.590859349618285e-05, "loss": 0.2398, "step": 13790 }, { "epoch": 0.2988971464476849, "grad_norm": 1.8244982957839966, "learning_rate": 1.59058473334873e-05, "loss": 0.22, "step": 13795 }, { "epoch": 0.29900548176716574, "grad_norm": 1.8946293592453003, "learning_rate": 1.5903100486687805e-05, "loss": 0.2208, "step": 13800 }, { "epoch": 0.2991138170866466, "grad_norm": 1.6732590198516846, "learning_rate": 1.5900352956102547e-05, "loss": 0.2089, "step": 13805 }, { "epoch": 0.29922215240612743, "grad_norm": 1.5538212060928345, "learning_rate": 1.5897604742049786e-05, "loss": 0.2776, "step": 13810 }, { "epoch": 0.29933048772560833, "grad_norm": 1.6088151931762695, "learning_rate": 1.5894855844847863e-05, "loss": 0.1927, "step": 13815 }, { "epoch": 0.2994388230450892, "grad_norm": 1.6281962394714355, "learning_rate": 1.589210626481519e-05, "loss": 0.2051, "step": 13820 }, { "epoch": 0.29954715836457, "grad_norm": 1.3285565376281738, "learning_rate": 1.588935600227028e-05, "loss": 0.1796, "step": 13825 }, { "epoch": 0.29965549368405087, "grad_norm": 1.5981171131134033, "learning_rate": 1.5886605057531692e-05, "loss": 0.2933, "step": 13830 }, { "epoch": 0.2997638290035317, "grad_norm": 1.882897138595581, "learning_rate": 1.5883853430918095e-05, "loss": 0.1793, "step": 13835 }, { "epoch": 0.2998721643230126, "grad_norm": 1.330929160118103, "learning_rate": 1.588110112274821e-05, "loss": 0.155, "step": 13840 }, { "epoch": 0.29998049964249346, "grad_norm": 2.22928786277771, "learning_rate": 1.5878348133340863e-05, "loss": 0.3571, "step": 13845 }, { "epoch": 0.3000888349619743, "grad_norm": 1.7556861639022827, "learning_rate": 1.5875594463014946e-05, "loss": 0.1691, "step": 13850 }, { "epoch": 0.30019717028145515, "grad_norm": 1.5357091426849365, "learning_rate": 1.5872840112089423e-05, "loss": 0.2411, "step": 13855 }, { "epoch": 0.300305505600936, "grad_norm": 2.034278631210327, "learning_rate": 1.587008508088335e-05, "loss": 0.3182, "step": 13860 }, { "epoch": 0.3004138409204169, "grad_norm": 2.6812708377838135, "learning_rate": 1.586732936971585e-05, "loss": 0.1335, "step": 13865 }, { "epoch": 0.30052217623989774, "grad_norm": 1.8746672868728638, "learning_rate": 1.5864572978906142e-05, "loss": 0.186, "step": 13870 }, { "epoch": 0.3006305115593786, "grad_norm": 1.9789725542068481, "learning_rate": 1.5861815908773503e-05, "loss": 0.2431, "step": 13875 }, { "epoch": 0.30073884687885943, "grad_norm": 1.8477191925048828, "learning_rate": 1.5859058159637298e-05, "loss": 0.2029, "step": 13880 }, { "epoch": 0.3008471821983403, "grad_norm": 3.551687002182007, "learning_rate": 1.5856299731816974e-05, "loss": 0.2743, "step": 13885 }, { "epoch": 0.3009555175178212, "grad_norm": 2.1000161170959473, "learning_rate": 1.5853540625632056e-05, "loss": 0.16, "step": 13890 }, { "epoch": 0.301063852837302, "grad_norm": 2.2657973766326904, "learning_rate": 1.5850780841402143e-05, "loss": 0.2384, "step": 13895 }, { "epoch": 0.30117218815678287, "grad_norm": 2.0027689933776855, "learning_rate": 1.5848020379446914e-05, "loss": 0.2182, "step": 13900 }, { "epoch": 0.3012805234762637, "grad_norm": 1.6793452501296997, "learning_rate": 1.5845259240086126e-05, "loss": 0.1966, "step": 13905 }, { "epoch": 0.30138885879574456, "grad_norm": 2.213371753692627, "learning_rate": 1.5842497423639617e-05, "loss": 0.181, "step": 13910 }, { "epoch": 0.30149719411522546, "grad_norm": 1.6913573741912842, "learning_rate": 1.58397349304273e-05, "loss": 0.2146, "step": 13915 }, { "epoch": 0.3016055294347063, "grad_norm": 2.403630495071411, "learning_rate": 1.5836971760769176e-05, "loss": 0.2687, "step": 13920 }, { "epoch": 0.30171386475418716, "grad_norm": 2.3849756717681885, "learning_rate": 1.5834207914985306e-05, "loss": 0.2294, "step": 13925 }, { "epoch": 0.301822200073668, "grad_norm": 1.4650161266326904, "learning_rate": 1.583144339339585e-05, "loss": 0.23, "step": 13930 }, { "epoch": 0.30193053539314885, "grad_norm": 0.931613028049469, "learning_rate": 1.582867819632103e-05, "loss": 0.1791, "step": 13935 }, { "epoch": 0.30203887071262975, "grad_norm": 2.723107099533081, "learning_rate": 1.5825912324081155e-05, "loss": 0.134, "step": 13940 }, { "epoch": 0.3021472060321106, "grad_norm": 1.7937469482421875, "learning_rate": 1.5823145776996608e-05, "loss": 0.1886, "step": 13945 }, { "epoch": 0.30225554135159144, "grad_norm": 2.842087984085083, "learning_rate": 1.5820378555387853e-05, "loss": 0.2869, "step": 13950 }, { "epoch": 0.3023638766710723, "grad_norm": 1.9545873403549194, "learning_rate": 1.5817610659575435e-05, "loss": 0.1368, "step": 13955 }, { "epoch": 0.3024722119905532, "grad_norm": 2.57452392578125, "learning_rate": 1.5814842089879965e-05, "loss": 0.2694, "step": 13960 }, { "epoch": 0.30258054731003403, "grad_norm": 1.4238537549972534, "learning_rate": 1.5812072846622147e-05, "loss": 0.2812, "step": 13965 }, { "epoch": 0.3026888826295149, "grad_norm": 1.4099435806274414, "learning_rate": 1.580930293012276e-05, "loss": 0.2, "step": 13970 }, { "epoch": 0.3027972179489957, "grad_norm": 1.9638926982879639, "learning_rate": 1.5806532340702645e-05, "loss": 0.2355, "step": 13975 }, { "epoch": 0.30290555326847657, "grad_norm": 2.0325207710266113, "learning_rate": 1.5803761078682743e-05, "loss": 0.1539, "step": 13980 }, { "epoch": 0.30301388858795747, "grad_norm": 1.8874484300613403, "learning_rate": 1.580098914438406e-05, "loss": 0.1772, "step": 13985 }, { "epoch": 0.3031222239074383, "grad_norm": 1.6702306270599365, "learning_rate": 1.5798216538127683e-05, "loss": 0.1709, "step": 13990 }, { "epoch": 0.30323055922691916, "grad_norm": 1.514100432395935, "learning_rate": 1.5795443260234778e-05, "loss": 0.1811, "step": 13995 }, { "epoch": 0.3033388945464, "grad_norm": 2.000685691833496, "learning_rate": 1.5792669311026586e-05, "loss": 0.2918, "step": 14000 }, { "epoch": 0.30344722986588085, "grad_norm": 2.121569871902466, "learning_rate": 1.5789894690824432e-05, "loss": 0.1852, "step": 14005 }, { "epoch": 0.30355556518536175, "grad_norm": 2.132641553878784, "learning_rate": 1.5787119399949705e-05, "loss": 0.291, "step": 14010 }, { "epoch": 0.3036639005048426, "grad_norm": 1.750887155532837, "learning_rate": 1.578434343872389e-05, "loss": 0.2069, "step": 14015 }, { "epoch": 0.30377223582432344, "grad_norm": 2.342440366744995, "learning_rate": 1.5781566807468538e-05, "loss": 0.225, "step": 14020 }, { "epoch": 0.3038805711438043, "grad_norm": 1.8764790296554565, "learning_rate": 1.5778789506505277e-05, "loss": 0.2424, "step": 14025 }, { "epoch": 0.30398890646328514, "grad_norm": 2.1267290115356445, "learning_rate": 1.577601153615582e-05, "loss": 0.1936, "step": 14030 }, { "epoch": 0.30409724178276604, "grad_norm": 1.1598209142684937, "learning_rate": 1.5773232896741947e-05, "loss": 0.2252, "step": 14035 }, { "epoch": 0.3042055771022469, "grad_norm": 1.3566831350326538, "learning_rate": 1.577045358858553e-05, "loss": 0.2124, "step": 14040 }, { "epoch": 0.30431391242172773, "grad_norm": 1.5566827058792114, "learning_rate": 1.5767673612008505e-05, "loss": 0.2224, "step": 14045 }, { "epoch": 0.3044222477412086, "grad_norm": 2.4469268321990967, "learning_rate": 1.5764892967332893e-05, "loss": 0.1893, "step": 14050 }, { "epoch": 0.3045305830606894, "grad_norm": 1.7019712924957275, "learning_rate": 1.576211165488079e-05, "loss": 0.2143, "step": 14055 }, { "epoch": 0.3046389183801703, "grad_norm": 1.1385821104049683, "learning_rate": 1.5759329674974365e-05, "loss": 0.1596, "step": 14060 }, { "epoch": 0.30474725369965117, "grad_norm": 3.445718288421631, "learning_rate": 1.575654702793587e-05, "loss": 0.2817, "step": 14065 }, { "epoch": 0.304855589019132, "grad_norm": 2.254990816116333, "learning_rate": 1.5753763714087637e-05, "loss": 0.191, "step": 14070 }, { "epoch": 0.30496392433861286, "grad_norm": 2.1223630905151367, "learning_rate": 1.5750979733752073e-05, "loss": 0.1739, "step": 14075 }, { "epoch": 0.30507225965809376, "grad_norm": 2.216616630554199, "learning_rate": 1.574819508725165e-05, "loss": 0.2207, "step": 14080 }, { "epoch": 0.3051805949775746, "grad_norm": 1.2723345756530762, "learning_rate": 1.574540977490894e-05, "loss": 0.2902, "step": 14085 }, { "epoch": 0.30528893029705545, "grad_norm": 1.7536426782608032, "learning_rate": 1.574262379704657e-05, "loss": 0.2278, "step": 14090 }, { "epoch": 0.3053972656165363, "grad_norm": 2.5345308780670166, "learning_rate": 1.573983715398726e-05, "loss": 0.1526, "step": 14095 }, { "epoch": 0.30550560093601714, "grad_norm": 2.4778995513916016, "learning_rate": 1.5737049846053797e-05, "loss": 0.2194, "step": 14100 }, { "epoch": 0.30561393625549804, "grad_norm": 2.3907058238983154, "learning_rate": 1.573426187356905e-05, "loss": 0.2212, "step": 14105 }, { "epoch": 0.3057222715749789, "grad_norm": 2.191375732421875, "learning_rate": 1.573147323685596e-05, "loss": 0.306, "step": 14110 }, { "epoch": 0.30583060689445973, "grad_norm": 2.9607081413269043, "learning_rate": 1.5728683936237562e-05, "loss": 0.2099, "step": 14115 }, { "epoch": 0.3059389422139406, "grad_norm": 2.011141300201416, "learning_rate": 1.5725893972036944e-05, "loss": 0.2784, "step": 14120 }, { "epoch": 0.3060472775334214, "grad_norm": 1.5449641942977905, "learning_rate": 1.572310334457728e-05, "loss": 0.2092, "step": 14125 }, { "epoch": 0.3061556128529023, "grad_norm": 2.25266170501709, "learning_rate": 1.5720312054181827e-05, "loss": 0.274, "step": 14130 }, { "epoch": 0.30626394817238317, "grad_norm": 1.59257972240448, "learning_rate": 1.571752010117391e-05, "loss": 0.1808, "step": 14135 }, { "epoch": 0.306372283491864, "grad_norm": 1.6266041994094849, "learning_rate": 1.571472748587694e-05, "loss": 0.1964, "step": 14140 }, { "epoch": 0.30648061881134486, "grad_norm": 1.864446997642517, "learning_rate": 1.5711934208614397e-05, "loss": 0.2571, "step": 14145 }, { "epoch": 0.3065889541308257, "grad_norm": 1.0349246263504028, "learning_rate": 1.570914026970984e-05, "loss": 0.1889, "step": 14150 }, { "epoch": 0.3066972894503066, "grad_norm": 0.9608047604560852, "learning_rate": 1.5706345669486905e-05, "loss": 0.2079, "step": 14155 }, { "epoch": 0.30680562476978746, "grad_norm": 2.446375608444214, "learning_rate": 1.570355040826931e-05, "loss": 0.2131, "step": 14160 }, { "epoch": 0.3069139600892683, "grad_norm": 3.1575675010681152, "learning_rate": 1.5700754486380834e-05, "loss": 0.3149, "step": 14165 }, { "epoch": 0.30702229540874915, "grad_norm": 1.6652542352676392, "learning_rate": 1.569795790414535e-05, "loss": 0.2318, "step": 14170 }, { "epoch": 0.30713063072823, "grad_norm": 1.5505132675170898, "learning_rate": 1.56951606618868e-05, "loss": 0.2477, "step": 14175 }, { "epoch": 0.3072389660477109, "grad_norm": 2.0733187198638916, "learning_rate": 1.5692362759929197e-05, "loss": 0.2494, "step": 14180 }, { "epoch": 0.30734730136719174, "grad_norm": 1.734971523284912, "learning_rate": 1.5689564198596644e-05, "loss": 0.2812, "step": 14185 }, { "epoch": 0.3074556366866726, "grad_norm": 2.5222585201263428, "learning_rate": 1.5686764978213304e-05, "loss": 0.2205, "step": 14190 }, { "epoch": 0.30756397200615343, "grad_norm": 1.7317814826965332, "learning_rate": 1.5683965099103433e-05, "loss": 0.3048, "step": 14195 }, { "epoch": 0.30767230732563433, "grad_norm": 1.5433467626571655, "learning_rate": 1.5681164561591348e-05, "loss": 0.1403, "step": 14200 }, { "epoch": 0.3077806426451152, "grad_norm": 1.2512880563735962, "learning_rate": 1.5678363366001453e-05, "loss": 0.1944, "step": 14205 }, { "epoch": 0.307888977964596, "grad_norm": 2.7119064331054688, "learning_rate": 1.5675561512658227e-05, "loss": 0.1961, "step": 14210 }, { "epoch": 0.30799731328407687, "grad_norm": 1.5647631883621216, "learning_rate": 1.567275900188622e-05, "loss": 0.2326, "step": 14215 }, { "epoch": 0.3081056486035577, "grad_norm": 1.6565624475479126, "learning_rate": 1.5669955834010057e-05, "loss": 0.1999, "step": 14220 }, { "epoch": 0.3082139839230386, "grad_norm": 1.537829875946045, "learning_rate": 1.5667152009354446e-05, "loss": 0.2598, "step": 14225 }, { "epoch": 0.30832231924251946, "grad_norm": 1.955127477645874, "learning_rate": 1.566434752824417e-05, "loss": 0.2848, "step": 14230 }, { "epoch": 0.3084306545620003, "grad_norm": 1.4578559398651123, "learning_rate": 1.5661542391004087e-05, "loss": 0.2178, "step": 14235 }, { "epoch": 0.30853898988148115, "grad_norm": 1.921678066253662, "learning_rate": 1.5658736597959126e-05, "loss": 0.1662, "step": 14240 }, { "epoch": 0.308647325200962, "grad_norm": 1.5905295610427856, "learning_rate": 1.5655930149434294e-05, "loss": 0.2135, "step": 14245 }, { "epoch": 0.3087556605204429, "grad_norm": 1.7913674116134644, "learning_rate": 1.5653123045754684e-05, "loss": 0.2087, "step": 14250 }, { "epoch": 0.30886399583992374, "grad_norm": 2.691958427429199, "learning_rate": 1.5650315287245453e-05, "loss": 0.2105, "step": 14255 }, { "epoch": 0.3089723311594046, "grad_norm": 1.2438665628433228, "learning_rate": 1.5647506874231838e-05, "loss": 0.1544, "step": 14260 }, { "epoch": 0.30908066647888544, "grad_norm": 1.6019604206085205, "learning_rate": 1.5644697807039153e-05, "loss": 0.1612, "step": 14265 }, { "epoch": 0.3091890017983663, "grad_norm": 1.9886807203292847, "learning_rate": 1.564188808599278e-05, "loss": 0.23, "step": 14270 }, { "epoch": 0.3092973371178472, "grad_norm": 1.584162712097168, "learning_rate": 1.563907771141819e-05, "loss": 0.2009, "step": 14275 }, { "epoch": 0.309405672437328, "grad_norm": 2.2618792057037354, "learning_rate": 1.563626668364092e-05, "loss": 0.2133, "step": 14280 }, { "epoch": 0.3095140077568089, "grad_norm": 1.9245693683624268, "learning_rate": 1.563345500298659e-05, "loss": 0.2434, "step": 14285 }, { "epoch": 0.3096223430762897, "grad_norm": 1.8085323572158813, "learning_rate": 1.563064266978088e-05, "loss": 0.2236, "step": 14290 }, { "epoch": 0.30973067839577056, "grad_norm": 1.5915236473083496, "learning_rate": 1.562782968434957e-05, "loss": 0.1407, "step": 14295 }, { "epoch": 0.30983901371525147, "grad_norm": 2.4446775913238525, "learning_rate": 1.5625016047018495e-05, "loss": 0.172, "step": 14300 }, { "epoch": 0.3099473490347323, "grad_norm": 1.412638545036316, "learning_rate": 1.562220175811357e-05, "loss": 0.12, "step": 14305 }, { "epoch": 0.31005568435421316, "grad_norm": 1.481162190437317, "learning_rate": 1.5619386817960794e-05, "loss": 0.24, "step": 14310 }, { "epoch": 0.310164019673694, "grad_norm": 2.460644245147705, "learning_rate": 1.561657122688623e-05, "loss": 0.2674, "step": 14315 }, { "epoch": 0.31027235499317485, "grad_norm": 1.9435791969299316, "learning_rate": 1.5613754985216032e-05, "loss": 0.169, "step": 14320 }, { "epoch": 0.31038069031265575, "grad_norm": 2.3343465328216553, "learning_rate": 1.5610938093276407e-05, "loss": 0.2785, "step": 14325 }, { "epoch": 0.3104890256321366, "grad_norm": 2.4004154205322266, "learning_rate": 1.560812055139366e-05, "loss": 0.1434, "step": 14330 }, { "epoch": 0.31059736095161744, "grad_norm": 2.330333709716797, "learning_rate": 1.5605302359894155e-05, "loss": 0.2042, "step": 14335 }, { "epoch": 0.3107056962710983, "grad_norm": 3.293260335922241, "learning_rate": 1.5602483519104344e-05, "loss": 0.1706, "step": 14340 }, { "epoch": 0.3108140315905792, "grad_norm": 1.222713828086853, "learning_rate": 1.5599664029350732e-05, "loss": 0.1262, "step": 14345 }, { "epoch": 0.31092236691006003, "grad_norm": 1.2967153787612915, "learning_rate": 1.559684389095993e-05, "loss": 0.2046, "step": 14350 }, { "epoch": 0.3110307022295409, "grad_norm": 1.334507942199707, "learning_rate": 1.5594023104258612e-05, "loss": 0.1386, "step": 14355 }, { "epoch": 0.3111390375490217, "grad_norm": 1.398955225944519, "learning_rate": 1.5591201669573507e-05, "loss": 0.3004, "step": 14360 }, { "epoch": 0.31124737286850257, "grad_norm": 1.325086236000061, "learning_rate": 1.5588379587231446e-05, "loss": 0.1964, "step": 14365 }, { "epoch": 0.31135570818798347, "grad_norm": 1.8591892719268799, "learning_rate": 1.5585556857559322e-05, "loss": 0.2473, "step": 14370 }, { "epoch": 0.3114640435074643, "grad_norm": 2.1450188159942627, "learning_rate": 1.5582733480884114e-05, "loss": 0.2541, "step": 14375 }, { "epoch": 0.31157237882694516, "grad_norm": 1.8907015323638916, "learning_rate": 1.5579909457532857e-05, "loss": 0.3206, "step": 14380 }, { "epoch": 0.311680714146426, "grad_norm": 1.2811510562896729, "learning_rate": 1.5577084787832676e-05, "loss": 0.1947, "step": 14385 }, { "epoch": 0.31178904946590685, "grad_norm": 2.882749557495117, "learning_rate": 1.557425947211077e-05, "loss": 0.2496, "step": 14390 }, { "epoch": 0.31189738478538775, "grad_norm": 2.085770845413208, "learning_rate": 1.5571433510694404e-05, "loss": 0.1874, "step": 14395 }, { "epoch": 0.3120057201048686, "grad_norm": 2.3735203742980957, "learning_rate": 1.5568606903910927e-05, "loss": 0.2508, "step": 14400 }, { "epoch": 0.31211405542434945, "grad_norm": 2.1283791065216064, "learning_rate": 1.5565779652087757e-05, "loss": 0.2463, "step": 14405 }, { "epoch": 0.3122223907438303, "grad_norm": 1.836049199104309, "learning_rate": 1.556295175555239e-05, "loss": 0.2527, "step": 14410 }, { "epoch": 0.31233072606331114, "grad_norm": 1.7334076166152954, "learning_rate": 1.5560123214632396e-05, "loss": 0.3056, "step": 14415 }, { "epoch": 0.31243906138279204, "grad_norm": 2.428988456726074, "learning_rate": 1.5557294029655418e-05, "loss": 0.2227, "step": 14420 }, { "epoch": 0.3125473967022729, "grad_norm": 1.553356647491455, "learning_rate": 1.5554464200949175e-05, "loss": 0.1938, "step": 14425 }, { "epoch": 0.31265573202175373, "grad_norm": 2.0951011180877686, "learning_rate": 1.555163372884146e-05, "loss": 0.2159, "step": 14430 }, { "epoch": 0.3127640673412346, "grad_norm": 1.9005095958709717, "learning_rate": 1.554880261366014e-05, "loss": 0.2346, "step": 14435 }, { "epoch": 0.3128724026607154, "grad_norm": 1.733616828918457, "learning_rate": 1.554597085573316e-05, "loss": 0.1953, "step": 14440 }, { "epoch": 0.3129807379801963, "grad_norm": 1.7909687757492065, "learning_rate": 1.5543138455388532e-05, "loss": 0.2342, "step": 14445 }, { "epoch": 0.31308907329967717, "grad_norm": 1.9493224620819092, "learning_rate": 1.5540305412954354e-05, "loss": 0.2399, "step": 14450 }, { "epoch": 0.313197408619158, "grad_norm": 1.488526463508606, "learning_rate": 1.553747172875878e-05, "loss": 0.2707, "step": 14455 }, { "epoch": 0.31330574393863886, "grad_norm": 1.3563624620437622, "learning_rate": 1.5534637403130068e-05, "loss": 0.267, "step": 14460 }, { "epoch": 0.31341407925811976, "grad_norm": 1.0176537036895752, "learning_rate": 1.5531802436396516e-05, "loss": 0.1437, "step": 14465 }, { "epoch": 0.3135224145776006, "grad_norm": 1.8597745895385742, "learning_rate": 1.5528966828886517e-05, "loss": 0.2636, "step": 14470 }, { "epoch": 0.31363074989708145, "grad_norm": 1.78824782371521, "learning_rate": 1.5526130580928537e-05, "loss": 0.1674, "step": 14475 }, { "epoch": 0.3137390852165623, "grad_norm": 2.8783814907073975, "learning_rate": 1.5523293692851113e-05, "loss": 0.2324, "step": 14480 }, { "epoch": 0.31384742053604314, "grad_norm": 1.311141848564148, "learning_rate": 1.5520456164982853e-05, "loss": 0.2204, "step": 14485 }, { "epoch": 0.31395575585552404, "grad_norm": 1.7328202724456787, "learning_rate": 1.551761799765244e-05, "loss": 0.1819, "step": 14490 }, { "epoch": 0.3140640911750049, "grad_norm": 1.7123901844024658, "learning_rate": 1.5514779191188636e-05, "loss": 0.2415, "step": 14495 }, { "epoch": 0.31417242649448573, "grad_norm": 1.131519079208374, "learning_rate": 1.5511939745920276e-05, "loss": 0.279, "step": 14500 }, { "epoch": 0.3142807618139666, "grad_norm": 1.4613232612609863, "learning_rate": 1.5509099662176264e-05, "loss": 0.2358, "step": 14505 }, { "epoch": 0.3143890971334474, "grad_norm": 1.852067470550537, "learning_rate": 1.5506258940285582e-05, "loss": 0.1928, "step": 14510 }, { "epoch": 0.3144974324529283, "grad_norm": 1.2439936399459839, "learning_rate": 1.5503417580577285e-05, "loss": 0.133, "step": 14515 }, { "epoch": 0.3146057677724092, "grad_norm": 2.7452900409698486, "learning_rate": 1.5500575583380505e-05, "loss": 0.2246, "step": 14520 }, { "epoch": 0.31471410309189, "grad_norm": 1.7645128965377808, "learning_rate": 1.549773294902444e-05, "loss": 0.2388, "step": 14525 }, { "epoch": 0.31482243841137086, "grad_norm": 1.5503212213516235, "learning_rate": 1.549488967783837e-05, "loss": 0.2137, "step": 14530 }, { "epoch": 0.3149307737308517, "grad_norm": 1.7385711669921875, "learning_rate": 1.5492045770151642e-05, "loss": 0.2261, "step": 14535 }, { "epoch": 0.3150391090503326, "grad_norm": 1.851948857307434, "learning_rate": 1.5489201226293685e-05, "loss": 0.2435, "step": 14540 }, { "epoch": 0.31514744436981346, "grad_norm": 2.0075535774230957, "learning_rate": 1.5486356046593996e-05, "loss": 0.1505, "step": 14545 }, { "epoch": 0.3152557796892943, "grad_norm": 1.8603477478027344, "learning_rate": 1.548351023138214e-05, "loss": 0.2549, "step": 14550 }, { "epoch": 0.31536411500877515, "grad_norm": 1.3702895641326904, "learning_rate": 1.5480663780987767e-05, "loss": 0.1581, "step": 14555 }, { "epoch": 0.315472450328256, "grad_norm": 2.3968474864959717, "learning_rate": 1.54778166957406e-05, "loss": 0.2158, "step": 14560 }, { "epoch": 0.3155807856477369, "grad_norm": 1.7409279346466064, "learning_rate": 1.5474968975970423e-05, "loss": 0.1976, "step": 14565 }, { "epoch": 0.31568912096721774, "grad_norm": 1.4510633945465088, "learning_rate": 1.5472120622007107e-05, "loss": 0.2309, "step": 14570 }, { "epoch": 0.3157974562866986, "grad_norm": 1.5833570957183838, "learning_rate": 1.5469271634180586e-05, "loss": 0.2236, "step": 14575 }, { "epoch": 0.31590579160617943, "grad_norm": 1.8660279512405396, "learning_rate": 1.546642201282088e-05, "loss": 0.2115, "step": 14580 }, { "epoch": 0.3160141269256603, "grad_norm": 2.3111746311187744, "learning_rate": 1.546357175825807e-05, "loss": 0.2342, "step": 14585 }, { "epoch": 0.3161224622451412, "grad_norm": 1.5607606172561646, "learning_rate": 1.5460720870822312e-05, "loss": 0.1495, "step": 14590 }, { "epoch": 0.316230797564622, "grad_norm": 1.9660574197769165, "learning_rate": 1.5457869350843847e-05, "loss": 0.2532, "step": 14595 }, { "epoch": 0.31633913288410287, "grad_norm": 1.4750168323516846, "learning_rate": 1.5455017198652974e-05, "loss": 0.2116, "step": 14600 }, { "epoch": 0.3164474682035837, "grad_norm": 1.1422812938690186, "learning_rate": 1.545216441458008e-05, "loss": 0.2109, "step": 14605 }, { "epoch": 0.3165558035230646, "grad_norm": 1.8954709768295288, "learning_rate": 1.5449310998955603e-05, "loss": 0.2218, "step": 14610 }, { "epoch": 0.31666413884254546, "grad_norm": 1.8380374908447266, "learning_rate": 1.5446456952110086e-05, "loss": 0.2455, "step": 14615 }, { "epoch": 0.3167724741620263, "grad_norm": 1.9742908477783203, "learning_rate": 1.5443602274374115e-05, "loss": 0.2522, "step": 14620 }, { "epoch": 0.31688080948150715, "grad_norm": 1.1864469051361084, "learning_rate": 1.5440746966078365e-05, "loss": 0.1576, "step": 14625 }, { "epoch": 0.316989144800988, "grad_norm": 2.128899574279785, "learning_rate": 1.543789102755358e-05, "loss": 0.2892, "step": 14630 }, { "epoch": 0.3170974801204689, "grad_norm": 1.3251636028289795, "learning_rate": 1.5435034459130584e-05, "loss": 0.1412, "step": 14635 }, { "epoch": 0.31720581543994975, "grad_norm": 2.2373147010803223, "learning_rate": 1.543217726114026e-05, "loss": 0.2836, "step": 14640 }, { "epoch": 0.3173141507594306, "grad_norm": 2.2428019046783447, "learning_rate": 1.5429319433913573e-05, "loss": 0.2117, "step": 14645 }, { "epoch": 0.31742248607891144, "grad_norm": 1.4921127557754517, "learning_rate": 1.5426460977781562e-05, "loss": 0.2092, "step": 14650 }, { "epoch": 0.3175308213983923, "grad_norm": 1.7283852100372314, "learning_rate": 1.5423601893075336e-05, "loss": 0.2234, "step": 14655 }, { "epoch": 0.3176391567178732, "grad_norm": 1.7876777648925781, "learning_rate": 1.5420742180126077e-05, "loss": 0.246, "step": 14660 }, { "epoch": 0.31774749203735403, "grad_norm": 2.0788705348968506, "learning_rate": 1.5417881839265037e-05, "loss": 0.2663, "step": 14665 }, { "epoch": 0.3178558273568349, "grad_norm": 2.074110746383667, "learning_rate": 1.5415020870823547e-05, "loss": 0.2151, "step": 14670 }, { "epoch": 0.3179641626763157, "grad_norm": 2.2724738121032715, "learning_rate": 1.5412159275133004e-05, "loss": 0.3684, "step": 14675 }, { "epoch": 0.31807249799579657, "grad_norm": 1.6706606149673462, "learning_rate": 1.5409297052524886e-05, "loss": 0.2643, "step": 14680 }, { "epoch": 0.31818083331527747, "grad_norm": 2.354757070541382, "learning_rate": 1.5406434203330735e-05, "loss": 0.275, "step": 14685 }, { "epoch": 0.3182891686347583, "grad_norm": 2.3203580379486084, "learning_rate": 1.5403570727882168e-05, "loss": 0.3004, "step": 14690 }, { "epoch": 0.31839750395423916, "grad_norm": 2.414137840270996, "learning_rate": 1.540070662651088e-05, "loss": 0.1653, "step": 14695 }, { "epoch": 0.31850583927372, "grad_norm": 2.1497857570648193, "learning_rate": 1.539784189954863e-05, "loss": 0.2032, "step": 14700 }, { "epoch": 0.31861417459320085, "grad_norm": 1.522763967514038, "learning_rate": 1.5394976547327258e-05, "loss": 0.2795, "step": 14705 }, { "epoch": 0.31872250991268175, "grad_norm": 2.23423171043396, "learning_rate": 1.5392110570178665e-05, "loss": 0.1822, "step": 14710 }, { "epoch": 0.3188308452321626, "grad_norm": 1.4043549299240112, "learning_rate": 1.538924396843484e-05, "loss": 0.187, "step": 14715 }, { "epoch": 0.31893918055164344, "grad_norm": 2.9397406578063965, "learning_rate": 1.5386376742427834e-05, "loss": 0.2395, "step": 14720 }, { "epoch": 0.3190475158711243, "grad_norm": 3.195239782333374, "learning_rate": 1.5383508892489768e-05, "loss": 0.136, "step": 14725 }, { "epoch": 0.3191558511906052, "grad_norm": 2.494755268096924, "learning_rate": 1.5380640418952842e-05, "loss": 0.2565, "step": 14730 }, { "epoch": 0.31926418651008603, "grad_norm": 1.996914267539978, "learning_rate": 1.5377771322149328e-05, "loss": 0.1926, "step": 14735 }, { "epoch": 0.3193725218295669, "grad_norm": 2.9855401515960693, "learning_rate": 1.537490160241156e-05, "loss": 0.163, "step": 14740 }, { "epoch": 0.3194808571490477, "grad_norm": 1.3966645002365112, "learning_rate": 1.537203126007196e-05, "loss": 0.3387, "step": 14745 }, { "epoch": 0.31958919246852857, "grad_norm": 1.8826476335525513, "learning_rate": 1.536916029546301e-05, "loss": 0.1317, "step": 14750 }, { "epoch": 0.3196975277880095, "grad_norm": 2.2612993717193604, "learning_rate": 1.5366288708917272e-05, "loss": 0.2527, "step": 14755 }, { "epoch": 0.3198058631074903, "grad_norm": 2.3325750827789307, "learning_rate": 1.5363416500767372e-05, "loss": 0.28, "step": 14760 }, { "epoch": 0.31991419842697116, "grad_norm": 1.6101890802383423, "learning_rate": 1.5360543671346016e-05, "loss": 0.1592, "step": 14765 }, { "epoch": 0.320022533746452, "grad_norm": 1.6723392009735107, "learning_rate": 1.5357670220985978e-05, "loss": 0.2524, "step": 14770 }, { "epoch": 0.32013086906593285, "grad_norm": 1.552077054977417, "learning_rate": 1.5354796150020102e-05, "loss": 0.2698, "step": 14775 }, { "epoch": 0.32023920438541376, "grad_norm": 1.7886745929718018, "learning_rate": 1.5351921458781303e-05, "loss": 0.1825, "step": 14780 }, { "epoch": 0.3203475397048946, "grad_norm": 1.8440614938735962, "learning_rate": 1.534904614760258e-05, "loss": 0.263, "step": 14785 }, { "epoch": 0.32045587502437545, "grad_norm": 1.6687666177749634, "learning_rate": 1.5346170216816985e-05, "loss": 0.1886, "step": 14790 }, { "epoch": 0.3205642103438563, "grad_norm": 1.7046895027160645, "learning_rate": 1.5343293666757658e-05, "loss": 0.2861, "step": 14795 }, { "epoch": 0.32067254566333714, "grad_norm": 2.0351297855377197, "learning_rate": 1.5340416497757804e-05, "loss": 0.1952, "step": 14800 }, { "epoch": 0.32078088098281804, "grad_norm": 1.3784339427947998, "learning_rate": 1.53375387101507e-05, "loss": 0.2797, "step": 14805 }, { "epoch": 0.3208892163022989, "grad_norm": 1.9419840574264526, "learning_rate": 1.533466030426969e-05, "loss": 0.2525, "step": 14810 }, { "epoch": 0.32099755162177973, "grad_norm": 2.005676746368408, "learning_rate": 1.5331781280448193e-05, "loss": 0.2735, "step": 14815 }, { "epoch": 0.3211058869412606, "grad_norm": 1.587806224822998, "learning_rate": 1.532890163901971e-05, "loss": 0.1673, "step": 14820 }, { "epoch": 0.3212142222607414, "grad_norm": 0.8968108296394348, "learning_rate": 1.5326021380317796e-05, "loss": 0.1744, "step": 14825 }, { "epoch": 0.3213225575802223, "grad_norm": 2.1774749755859375, "learning_rate": 1.532314050467609e-05, "loss": 0.2052, "step": 14830 }, { "epoch": 0.32143089289970317, "grad_norm": 1.4070422649383545, "learning_rate": 1.5320259012428295e-05, "loss": 0.2466, "step": 14835 }, { "epoch": 0.321539228219184, "grad_norm": 2.2576653957366943, "learning_rate": 1.5317376903908195e-05, "loss": 0.1787, "step": 14840 }, { "epoch": 0.32164756353866486, "grad_norm": 1.6554542779922485, "learning_rate": 1.5314494179449633e-05, "loss": 0.1197, "step": 14845 }, { "epoch": 0.3217558988581457, "grad_norm": 2.1720268726348877, "learning_rate": 1.5311610839386532e-05, "loss": 0.2748, "step": 14850 }, { "epoch": 0.3218642341776266, "grad_norm": 2.790147542953491, "learning_rate": 1.5308726884052884e-05, "loss": 0.3336, "step": 14855 }, { "epoch": 0.32197256949710745, "grad_norm": 1.5244284868240356, "learning_rate": 1.530584231378275e-05, "loss": 0.218, "step": 14860 }, { "epoch": 0.3220809048165883, "grad_norm": 1.6125026941299438, "learning_rate": 1.5302957128910264e-05, "loss": 0.154, "step": 14865 }, { "epoch": 0.32218924013606914, "grad_norm": 1.888965129852295, "learning_rate": 1.5300071329769632e-05, "loss": 0.2709, "step": 14870 }, { "epoch": 0.32229757545555004, "grad_norm": 2.657344341278076, "learning_rate": 1.5297184916695135e-05, "loss": 0.2245, "step": 14875 }, { "epoch": 0.3224059107750309, "grad_norm": 1.3080216646194458, "learning_rate": 1.5294297890021115e-05, "loss": 0.1928, "step": 14880 }, { "epoch": 0.32251424609451174, "grad_norm": 1.739601969718933, "learning_rate": 1.5291410250081997e-05, "loss": 0.1871, "step": 14885 }, { "epoch": 0.3226225814139926, "grad_norm": 1.1482915878295898, "learning_rate": 1.5288521997212263e-05, "loss": 0.2638, "step": 14890 }, { "epoch": 0.3227309167334734, "grad_norm": 3.3481414318084717, "learning_rate": 1.5285633131746476e-05, "loss": 0.1602, "step": 14895 }, { "epoch": 0.32283925205295433, "grad_norm": 1.8293688297271729, "learning_rate": 1.528274365401927e-05, "loss": 0.2108, "step": 14900 }, { "epoch": 0.3229475873724352, "grad_norm": 2.0583646297454834, "learning_rate": 1.527985356436535e-05, "loss": 0.1348, "step": 14905 }, { "epoch": 0.323055922691916, "grad_norm": 2.695082902908325, "learning_rate": 1.5276962863119488e-05, "loss": 0.2428, "step": 14910 }, { "epoch": 0.32316425801139687, "grad_norm": 1.4165359735488892, "learning_rate": 1.5274071550616526e-05, "loss": 0.1619, "step": 14915 }, { "epoch": 0.3232725933308777, "grad_norm": 1.7764513492584229, "learning_rate": 1.527117962719138e-05, "loss": 0.2089, "step": 14920 }, { "epoch": 0.3233809286503586, "grad_norm": 1.6158745288848877, "learning_rate": 1.5268287093179034e-05, "loss": 0.199, "step": 14925 }, { "epoch": 0.32348926396983946, "grad_norm": 1.8566803932189941, "learning_rate": 1.5265393948914553e-05, "loss": 0.1316, "step": 14930 }, { "epoch": 0.3235975992893203, "grad_norm": 1.378032922744751, "learning_rate": 1.5262500194733056e-05, "loss": 0.2228, "step": 14935 }, { "epoch": 0.32370593460880115, "grad_norm": 2.4024362564086914, "learning_rate": 1.525960583096974e-05, "loss": 0.2327, "step": 14940 }, { "epoch": 0.323814269928282, "grad_norm": 0.9057410359382629, "learning_rate": 1.5256710857959882e-05, "loss": 0.265, "step": 14945 }, { "epoch": 0.3239226052477629, "grad_norm": 1.5942014455795288, "learning_rate": 1.525381527603881e-05, "loss": 0.3113, "step": 14950 }, { "epoch": 0.32403094056724374, "grad_norm": 0.9287599325180054, "learning_rate": 1.5250919085541946e-05, "loss": 0.171, "step": 14955 }, { "epoch": 0.3241392758867246, "grad_norm": 2.2726287841796875, "learning_rate": 1.5248022286804765e-05, "loss": 0.2001, "step": 14960 }, { "epoch": 0.32424761120620543, "grad_norm": 1.3104428052902222, "learning_rate": 1.5245124880162816e-05, "loss": 0.2386, "step": 14965 }, { "epoch": 0.3243559465256863, "grad_norm": 2.298351287841797, "learning_rate": 1.5242226865951724e-05, "loss": 0.1848, "step": 14970 }, { "epoch": 0.3244642818451672, "grad_norm": 1.9949513673782349, "learning_rate": 1.5239328244507175e-05, "loss": 0.1884, "step": 14975 }, { "epoch": 0.324572617164648, "grad_norm": 1.5848839282989502, "learning_rate": 1.5236429016164932e-05, "loss": 0.2072, "step": 14980 }, { "epoch": 0.32468095248412887, "grad_norm": 1.7805147171020508, "learning_rate": 1.5233529181260833e-05, "loss": 0.1975, "step": 14985 }, { "epoch": 0.3247892878036097, "grad_norm": 2.145887613296509, "learning_rate": 1.5230628740130777e-05, "loss": 0.2012, "step": 14990 }, { "epoch": 0.3248976231230906, "grad_norm": 1.5535612106323242, "learning_rate": 1.5227727693110734e-05, "loss": 0.1691, "step": 14995 }, { "epoch": 0.32500595844257146, "grad_norm": 1.5684388875961304, "learning_rate": 1.5224826040536749e-05, "loss": 0.2092, "step": 15000 }, { "epoch": 0.3251142937620523, "grad_norm": 2.296330451965332, "learning_rate": 1.5221923782744936e-05, "loss": 0.2766, "step": 15005 }, { "epoch": 0.32522262908153315, "grad_norm": 2.5763320922851562, "learning_rate": 1.521902092007148e-05, "loss": 0.278, "step": 15010 }, { "epoch": 0.325330964401014, "grad_norm": 2.5125086307525635, "learning_rate": 1.521611745285263e-05, "loss": 0.2152, "step": 15015 }, { "epoch": 0.3254392997204949, "grad_norm": 1.5628105401992798, "learning_rate": 1.5213213381424705e-05, "loss": 0.2217, "step": 15020 }, { "epoch": 0.32554763503997575, "grad_norm": 2.2340879440307617, "learning_rate": 1.5210308706124108e-05, "loss": 0.2595, "step": 15025 }, { "epoch": 0.3256559703594566, "grad_norm": 2.4243781566619873, "learning_rate": 1.52074034272873e-05, "loss": 0.2061, "step": 15030 }, { "epoch": 0.32576430567893744, "grad_norm": 1.8561378717422485, "learning_rate": 1.5204497545250809e-05, "loss": 0.2212, "step": 15035 }, { "epoch": 0.3258726409984183, "grad_norm": 1.7748347520828247, "learning_rate": 1.5201591060351242e-05, "loss": 0.213, "step": 15040 }, { "epoch": 0.3259809763178992, "grad_norm": 2.1319432258605957, "learning_rate": 1.5198683972925268e-05, "loss": 0.2969, "step": 15045 }, { "epoch": 0.32608931163738003, "grad_norm": 1.448140263557434, "learning_rate": 1.5195776283309636e-05, "loss": 0.2379, "step": 15050 }, { "epoch": 0.3261976469568609, "grad_norm": 1.9653023481369019, "learning_rate": 1.5192867991841152e-05, "loss": 0.201, "step": 15055 }, { "epoch": 0.3263059822763417, "grad_norm": 1.7508336305618286, "learning_rate": 1.5189959098856698e-05, "loss": 0.1634, "step": 15060 }, { "epoch": 0.32641431759582257, "grad_norm": 1.8149222135543823, "learning_rate": 1.5187049604693234e-05, "loss": 0.1733, "step": 15065 }, { "epoch": 0.32652265291530347, "grad_norm": 2.1920576095581055, "learning_rate": 1.518413950968777e-05, "loss": 0.224, "step": 15070 }, { "epoch": 0.3266309882347843, "grad_norm": 1.747759222984314, "learning_rate": 1.5181228814177403e-05, "loss": 0.3051, "step": 15075 }, { "epoch": 0.32673932355426516, "grad_norm": 1.6981147527694702, "learning_rate": 1.5178317518499292e-05, "loss": 0.2969, "step": 15080 }, { "epoch": 0.326847658873746, "grad_norm": 2.233778476715088, "learning_rate": 1.5175405622990672e-05, "loss": 0.2086, "step": 15085 }, { "epoch": 0.32695599419322685, "grad_norm": 1.379204511642456, "learning_rate": 1.5172493127988835e-05, "loss": 0.1793, "step": 15090 }, { "epoch": 0.32706432951270775, "grad_norm": 1.9188494682312012, "learning_rate": 1.5169580033831155e-05, "loss": 0.1694, "step": 15095 }, { "epoch": 0.3271726648321886, "grad_norm": 1.545575499534607, "learning_rate": 1.5166666340855066e-05, "loss": 0.1283, "step": 15100 }, { "epoch": 0.32728100015166944, "grad_norm": 2.3675646781921387, "learning_rate": 1.516375204939808e-05, "loss": 0.1918, "step": 15105 }, { "epoch": 0.3273893354711503, "grad_norm": 2.0503342151641846, "learning_rate": 1.516083715979777e-05, "loss": 0.1735, "step": 15110 }, { "epoch": 0.32749767079063113, "grad_norm": 1.3050837516784668, "learning_rate": 1.5157921672391784e-05, "loss": 0.2511, "step": 15115 }, { "epoch": 0.32760600611011204, "grad_norm": 1.131915807723999, "learning_rate": 1.515500558751784e-05, "loss": 0.1726, "step": 15120 }, { "epoch": 0.3277143414295929, "grad_norm": 2.7964558601379395, "learning_rate": 1.5152088905513717e-05, "loss": 0.3334, "step": 15125 }, { "epoch": 0.3278226767490737, "grad_norm": 2.2200746536254883, "learning_rate": 1.5149171626717278e-05, "loss": 0.2755, "step": 15130 }, { "epoch": 0.3279310120685546, "grad_norm": 2.8347339630126953, "learning_rate": 1.514625375146644e-05, "loss": 0.2632, "step": 15135 }, { "epoch": 0.3280393473880355, "grad_norm": 2.262327194213867, "learning_rate": 1.5143335280099191e-05, "loss": 0.2133, "step": 15140 }, { "epoch": 0.3281476827075163, "grad_norm": 1.8919304609298706, "learning_rate": 1.5140416212953602e-05, "loss": 0.2301, "step": 15145 }, { "epoch": 0.32825601802699717, "grad_norm": 1.7730576992034912, "learning_rate": 1.5137496550367793e-05, "loss": 0.1753, "step": 15150 }, { "epoch": 0.328364353346478, "grad_norm": 2.0000431537628174, "learning_rate": 1.5134576292679975e-05, "loss": 0.2171, "step": 15155 }, { "epoch": 0.32847268866595886, "grad_norm": 2.8562138080596924, "learning_rate": 1.5131655440228406e-05, "loss": 0.2125, "step": 15160 }, { "epoch": 0.32858102398543976, "grad_norm": 1.6151725053787231, "learning_rate": 1.5128733993351423e-05, "loss": 0.1834, "step": 15165 }, { "epoch": 0.3286893593049206, "grad_norm": 1.270536184310913, "learning_rate": 1.512581195238744e-05, "loss": 0.1205, "step": 15170 }, { "epoch": 0.32879769462440145, "grad_norm": 2.3834872245788574, "learning_rate": 1.5122889317674927e-05, "loss": 0.1398, "step": 15175 }, { "epoch": 0.3289060299438823, "grad_norm": 1.384761095046997, "learning_rate": 1.5119966089552427e-05, "loss": 0.206, "step": 15180 }, { "epoch": 0.32901436526336314, "grad_norm": 1.5613328218460083, "learning_rate": 1.511704226835855e-05, "loss": 0.2165, "step": 15185 }, { "epoch": 0.32912270058284404, "grad_norm": 1.9736576080322266, "learning_rate": 1.511411785443198e-05, "loss": 0.2105, "step": 15190 }, { "epoch": 0.3292310359023249, "grad_norm": 2.355787992477417, "learning_rate": 1.5111192848111466e-05, "loss": 0.1286, "step": 15195 }, { "epoch": 0.32933937122180573, "grad_norm": 1.6901196241378784, "learning_rate": 1.5108267249735828e-05, "loss": 0.195, "step": 15200 }, { "epoch": 0.3294477065412866, "grad_norm": 2.053196907043457, "learning_rate": 1.5105341059643952e-05, "loss": 0.1991, "step": 15205 }, { "epoch": 0.3295560418607674, "grad_norm": 2.035844326019287, "learning_rate": 1.5102414278174791e-05, "loss": 0.2857, "step": 15210 }, { "epoch": 0.3296643771802483, "grad_norm": 1.0856698751449585, "learning_rate": 1.5099486905667368e-05, "loss": 0.2018, "step": 15215 }, { "epoch": 0.32977271249972917, "grad_norm": 1.9325988292694092, "learning_rate": 1.5096558942460782e-05, "loss": 0.1817, "step": 15220 }, { "epoch": 0.32988104781921, "grad_norm": 2.1536576747894287, "learning_rate": 1.5093630388894184e-05, "loss": 0.2306, "step": 15225 }, { "epoch": 0.32998938313869086, "grad_norm": 1.5899940729141235, "learning_rate": 1.5090701245306808e-05, "loss": 0.167, "step": 15230 }, { "epoch": 0.3300977184581717, "grad_norm": 1.638210415840149, "learning_rate": 1.5087771512037956e-05, "loss": 0.206, "step": 15235 }, { "epoch": 0.3302060537776526, "grad_norm": 1.2096341848373413, "learning_rate": 1.5084841189426984e-05, "loss": 0.19, "step": 15240 }, { "epoch": 0.33031438909713345, "grad_norm": 1.909605860710144, "learning_rate": 1.5081910277813335e-05, "loss": 0.1972, "step": 15245 }, { "epoch": 0.3304227244166143, "grad_norm": 1.0610971450805664, "learning_rate": 1.5078978777536507e-05, "loss": 0.1442, "step": 15250 }, { "epoch": 0.33053105973609515, "grad_norm": 2.404114007949829, "learning_rate": 1.507604668893607e-05, "loss": 0.2897, "step": 15255 }, { "epoch": 0.33063939505557605, "grad_norm": 1.8389209508895874, "learning_rate": 1.5073114012351661e-05, "loss": 0.223, "step": 15260 }, { "epoch": 0.3307477303750569, "grad_norm": 1.7681808471679688, "learning_rate": 1.5070180748122991e-05, "loss": 0.2228, "step": 15265 }, { "epoch": 0.33085606569453774, "grad_norm": 1.6307345628738403, "learning_rate": 1.506724689658983e-05, "loss": 0.2556, "step": 15270 }, { "epoch": 0.3309644010140186, "grad_norm": 2.185744285583496, "learning_rate": 1.506431245809203e-05, "loss": 0.1938, "step": 15275 }, { "epoch": 0.33107273633349943, "grad_norm": 2.4708120822906494, "learning_rate": 1.5061377432969488e-05, "loss": 0.1769, "step": 15280 }, { "epoch": 0.33118107165298033, "grad_norm": 1.5702651739120483, "learning_rate": 1.5058441821562192e-05, "loss": 0.254, "step": 15285 }, { "epoch": 0.3312894069724612, "grad_norm": 2.3954904079437256, "learning_rate": 1.5055505624210189e-05, "loss": 0.2103, "step": 15290 }, { "epoch": 0.331397742291942, "grad_norm": 1.3959828615188599, "learning_rate": 1.505256884125359e-05, "loss": 0.2465, "step": 15295 }, { "epoch": 0.33150607761142287, "grad_norm": 1.8047194480895996, "learning_rate": 1.5049631473032577e-05, "loss": 0.3155, "step": 15300 }, { "epoch": 0.3316144129309037, "grad_norm": 1.6763101816177368, "learning_rate": 1.5046693519887404e-05, "loss": 0.283, "step": 15305 }, { "epoch": 0.3317227482503846, "grad_norm": 1.4159234762191772, "learning_rate": 1.5043754982158381e-05, "loss": 0.1874, "step": 15310 }, { "epoch": 0.33183108356986546, "grad_norm": 1.373852252960205, "learning_rate": 1.50408158601859e-05, "loss": 0.2361, "step": 15315 }, { "epoch": 0.3319394188893463, "grad_norm": 2.678845167160034, "learning_rate": 1.5037876154310416e-05, "loss": 0.2212, "step": 15320 }, { "epoch": 0.33204775420882715, "grad_norm": 1.799472451210022, "learning_rate": 1.5034935864872443e-05, "loss": 0.1743, "step": 15325 }, { "epoch": 0.332156089528308, "grad_norm": 2.4561214447021484, "learning_rate": 1.5031994992212578e-05, "loss": 0.2994, "step": 15330 }, { "epoch": 0.3322644248477889, "grad_norm": 2.9009556770324707, "learning_rate": 1.5029053536671469e-05, "loss": 0.1923, "step": 15335 }, { "epoch": 0.33237276016726974, "grad_norm": 1.1972944736480713, "learning_rate": 1.5026111498589846e-05, "loss": 0.2079, "step": 15340 }, { "epoch": 0.3324810954867506, "grad_norm": 1.8656902313232422, "learning_rate": 1.5023168878308493e-05, "loss": 0.2867, "step": 15345 }, { "epoch": 0.33258943080623143, "grad_norm": 1.8218876123428345, "learning_rate": 1.5020225676168276e-05, "loss": 0.319, "step": 15350 }, { "epoch": 0.3326977661257123, "grad_norm": 1.3105915784835815, "learning_rate": 1.5017281892510118e-05, "loss": 0.1681, "step": 15355 }, { "epoch": 0.3328061014451932, "grad_norm": 1.5885618925094604, "learning_rate": 1.501433752767501e-05, "loss": 0.2094, "step": 15360 }, { "epoch": 0.332914436764674, "grad_norm": 1.6557625532150269, "learning_rate": 1.5011392582004012e-05, "loss": 0.1472, "step": 15365 }, { "epoch": 0.33302277208415487, "grad_norm": 1.8599320650100708, "learning_rate": 1.5008447055838255e-05, "loss": 0.2795, "step": 15370 }, { "epoch": 0.3331311074036357, "grad_norm": 1.7995314598083496, "learning_rate": 1.5005500949518937e-05, "loss": 0.2487, "step": 15375 }, { "epoch": 0.33323944272311656, "grad_norm": 1.5511081218719482, "learning_rate": 1.5002554263387314e-05, "loss": 0.287, "step": 15380 }, { "epoch": 0.33334777804259746, "grad_norm": 1.6802148818969727, "learning_rate": 1.4999606997784714e-05, "loss": 0.1523, "step": 15385 }, { "epoch": 0.3334561133620783, "grad_norm": 1.5256456136703491, "learning_rate": 1.4996659153052543e-05, "loss": 0.1837, "step": 15390 }, { "epoch": 0.33356444868155916, "grad_norm": 1.2835779190063477, "learning_rate": 1.4993710729532258e-05, "loss": 0.1216, "step": 15395 }, { "epoch": 0.33367278400104, "grad_norm": 2.7881476879119873, "learning_rate": 1.4990761727565388e-05, "loss": 0.308, "step": 15400 }, { "epoch": 0.3337811193205209, "grad_norm": 2.0394322872161865, "learning_rate": 1.4987812147493534e-05, "loss": 0.193, "step": 15405 }, { "epoch": 0.33388945464000175, "grad_norm": 1.396162509918213, "learning_rate": 1.4984861989658362e-05, "loss": 0.2099, "step": 15410 }, { "epoch": 0.3339977899594826, "grad_norm": 1.4975184202194214, "learning_rate": 1.4981911254401604e-05, "loss": 0.1557, "step": 15415 }, { "epoch": 0.33410612527896344, "grad_norm": 2.3160064220428467, "learning_rate": 1.4978959942065053e-05, "loss": 0.2978, "step": 15420 }, { "epoch": 0.3342144605984443, "grad_norm": 1.5050506591796875, "learning_rate": 1.4976008052990576e-05, "loss": 0.1858, "step": 15425 }, { "epoch": 0.3343227959179252, "grad_norm": 2.3044753074645996, "learning_rate": 1.4973055587520108e-05, "loss": 0.1974, "step": 15430 }, { "epoch": 0.33443113123740603, "grad_norm": 1.9111295938491821, "learning_rate": 1.4970102545995647e-05, "loss": 0.1916, "step": 15435 }, { "epoch": 0.3345394665568869, "grad_norm": 2.070918083190918, "learning_rate": 1.4967148928759259e-05, "loss": 0.2744, "step": 15440 }, { "epoch": 0.3346478018763677, "grad_norm": 2.244844913482666, "learning_rate": 1.4964194736153075e-05, "loss": 0.2982, "step": 15445 }, { "epoch": 0.33475613719584857, "grad_norm": 1.5282964706420898, "learning_rate": 1.4961239968519295e-05, "loss": 0.2326, "step": 15450 }, { "epoch": 0.33486447251532947, "grad_norm": 2.0586469173431396, "learning_rate": 1.495828462620018e-05, "loss": 0.1349, "step": 15455 }, { "epoch": 0.3349728078348103, "grad_norm": 1.730150818824768, "learning_rate": 1.495532870953807e-05, "loss": 0.2487, "step": 15460 }, { "epoch": 0.33508114315429116, "grad_norm": 1.752886176109314, "learning_rate": 1.495237221887536e-05, "loss": 0.2021, "step": 15465 }, { "epoch": 0.335189478473772, "grad_norm": 1.8700110912322998, "learning_rate": 1.4949415154554514e-05, "loss": 0.1807, "step": 15470 }, { "epoch": 0.33529781379325285, "grad_norm": 3.0619027614593506, "learning_rate": 1.4946457516918066e-05, "loss": 0.1728, "step": 15475 }, { "epoch": 0.33540614911273375, "grad_norm": 2.322248697280884, "learning_rate": 1.4943499306308609e-05, "loss": 0.2396, "step": 15480 }, { "epoch": 0.3355144844322146, "grad_norm": 1.6414151191711426, "learning_rate": 1.4940540523068813e-05, "loss": 0.2595, "step": 15485 }, { "epoch": 0.33562281975169544, "grad_norm": 1.9887186288833618, "learning_rate": 1.4937581167541406e-05, "loss": 0.171, "step": 15490 }, { "epoch": 0.3357311550711763, "grad_norm": 2.0903778076171875, "learning_rate": 1.4934621240069187e-05, "loss": 0.2365, "step": 15495 }, { "epoch": 0.33583949039065714, "grad_norm": 1.177736759185791, "learning_rate": 1.493166074099502e-05, "loss": 0.1481, "step": 15500 }, { "epoch": 0.33594782571013804, "grad_norm": 2.606887102127075, "learning_rate": 1.4928699670661828e-05, "loss": 0.2785, "step": 15505 }, { "epoch": 0.3360561610296189, "grad_norm": 2.2362022399902344, "learning_rate": 1.4925738029412613e-05, "loss": 0.3228, "step": 15510 }, { "epoch": 0.33616449634909973, "grad_norm": 1.5481576919555664, "learning_rate": 1.4922775817590437e-05, "loss": 0.2798, "step": 15515 }, { "epoch": 0.3362728316685806, "grad_norm": 1.3022876977920532, "learning_rate": 1.4919813035538422e-05, "loss": 0.1242, "step": 15520 }, { "epoch": 0.3363811669880615, "grad_norm": 1.5416380167007446, "learning_rate": 1.4916849683599766e-05, "loss": 0.1322, "step": 15525 }, { "epoch": 0.3364895023075423, "grad_norm": 1.0179752111434937, "learning_rate": 1.491388576211773e-05, "loss": 0.1841, "step": 15530 }, { "epoch": 0.33659783762702317, "grad_norm": 1.0873278379440308, "learning_rate": 1.491092127143564e-05, "loss": 0.1517, "step": 15535 }, { "epoch": 0.336706172946504, "grad_norm": 2.179431676864624, "learning_rate": 1.4907956211896886e-05, "loss": 0.1652, "step": 15540 }, { "epoch": 0.33681450826598486, "grad_norm": 1.9463918209075928, "learning_rate": 1.4904990583844923e-05, "loss": 0.2694, "step": 15545 }, { "epoch": 0.33692284358546576, "grad_norm": 1.706889271736145, "learning_rate": 1.490202438762328e-05, "loss": 0.1582, "step": 15550 }, { "epoch": 0.3370311789049466, "grad_norm": 1.5294557809829712, "learning_rate": 1.489905762357554e-05, "loss": 0.1963, "step": 15555 }, { "epoch": 0.33713951422442745, "grad_norm": 2.2283856868743896, "learning_rate": 1.4896090292045367e-05, "loss": 0.2789, "step": 15560 }, { "epoch": 0.3372478495439083, "grad_norm": 1.8152482509613037, "learning_rate": 1.4893122393376476e-05, "loss": 0.1927, "step": 15565 }, { "epoch": 0.33735618486338914, "grad_norm": 2.011565685272217, "learning_rate": 1.4890153927912654e-05, "loss": 0.1844, "step": 15570 }, { "epoch": 0.33746452018287004, "grad_norm": 1.6516958475112915, "learning_rate": 1.4887184895997755e-05, "loss": 0.2054, "step": 15575 }, { "epoch": 0.3375728555023509, "grad_norm": 2.0871384143829346, "learning_rate": 1.4884215297975694e-05, "loss": 0.2568, "step": 15580 }, { "epoch": 0.33768119082183173, "grad_norm": 2.8119113445281982, "learning_rate": 1.4881245134190458e-05, "loss": 0.1755, "step": 15585 }, { "epoch": 0.3377895261413126, "grad_norm": 2.1218419075012207, "learning_rate": 1.4878274404986095e-05, "loss": 0.2045, "step": 15590 }, { "epoch": 0.3378978614607934, "grad_norm": 1.712370753288269, "learning_rate": 1.4875303110706716e-05, "loss": 0.2003, "step": 15595 }, { "epoch": 0.3380061967802743, "grad_norm": 2.140299081802368, "learning_rate": 1.4872331251696504e-05, "loss": 0.182, "step": 15600 }, { "epoch": 0.33811453209975517, "grad_norm": 2.4266510009765625, "learning_rate": 1.4869358828299704e-05, "loss": 0.3478, "step": 15605 }, { "epoch": 0.338222867419236, "grad_norm": 1.8068751096725464, "learning_rate": 1.486638584086063e-05, "loss": 0.213, "step": 15610 }, { "epoch": 0.33833120273871686, "grad_norm": 1.9387575387954712, "learning_rate": 1.486341228972365e-05, "loss": 0.2447, "step": 15615 }, { "epoch": 0.3384395380581977, "grad_norm": 1.8388831615447998, "learning_rate": 1.4860438175233215e-05, "loss": 0.2339, "step": 15620 }, { "epoch": 0.3385478733776786, "grad_norm": 2.2363381385803223, "learning_rate": 1.4857463497733822e-05, "loss": 0.2225, "step": 15625 }, { "epoch": 0.33865620869715946, "grad_norm": 1.9986774921417236, "learning_rate": 1.485448825757005e-05, "loss": 0.168, "step": 15630 }, { "epoch": 0.3387645440166403, "grad_norm": 1.0578649044036865, "learning_rate": 1.4851512455086535e-05, "loss": 0.2111, "step": 15635 }, { "epoch": 0.33887287933612115, "grad_norm": 2.2973854541778564, "learning_rate": 1.4848536090627975e-05, "loss": 0.2309, "step": 15640 }, { "epoch": 0.338981214655602, "grad_norm": 1.818691372871399, "learning_rate": 1.4845559164539144e-05, "loss": 0.1838, "step": 15645 }, { "epoch": 0.3390895499750829, "grad_norm": 1.4645838737487793, "learning_rate": 1.4842581677164864e-05, "loss": 0.2313, "step": 15650 }, { "epoch": 0.33919788529456374, "grad_norm": 1.5915101766586304, "learning_rate": 1.4839603628850043e-05, "loss": 0.1279, "step": 15655 }, { "epoch": 0.3393062206140446, "grad_norm": 1.037771224975586, "learning_rate": 1.483662501993964e-05, "loss": 0.1898, "step": 15660 }, { "epoch": 0.33941455593352543, "grad_norm": 1.8157217502593994, "learning_rate": 1.4833645850778677e-05, "loss": 0.2754, "step": 15665 }, { "epoch": 0.33952289125300633, "grad_norm": 1.2016284465789795, "learning_rate": 1.4830666121712252e-05, "loss": 0.297, "step": 15670 }, { "epoch": 0.3396312265724872, "grad_norm": 2.375385046005249, "learning_rate": 1.4827685833085519e-05, "loss": 0.1789, "step": 15675 }, { "epoch": 0.339739561891968, "grad_norm": 2.2038166522979736, "learning_rate": 1.4824704985243703e-05, "loss": 0.2254, "step": 15680 }, { "epoch": 0.33984789721144887, "grad_norm": 2.442315101623535, "learning_rate": 1.4821723578532087e-05, "loss": 0.2583, "step": 15685 }, { "epoch": 0.3399562325309297, "grad_norm": 5.3440070152282715, "learning_rate": 1.4818741613296026e-05, "loss": 0.2163, "step": 15690 }, { "epoch": 0.3400645678504106, "grad_norm": 1.3906580209732056, "learning_rate": 1.4815759089880932e-05, "loss": 0.1768, "step": 15695 }, { "epoch": 0.34017290316989146, "grad_norm": 1.272980809211731, "learning_rate": 1.481277600863229e-05, "loss": 0.22, "step": 15700 }, { "epoch": 0.3402812384893723, "grad_norm": 2.839505195617676, "learning_rate": 1.480979236989564e-05, "loss": 0.2036, "step": 15705 }, { "epoch": 0.34038957380885315, "grad_norm": 1.0160205364227295, "learning_rate": 1.4806808174016596e-05, "loss": 0.1799, "step": 15710 }, { "epoch": 0.340497909128334, "grad_norm": 2.0321502685546875, "learning_rate": 1.480382342134083e-05, "loss": 0.1392, "step": 15715 }, { "epoch": 0.3406062444478149, "grad_norm": 2.6618499755859375, "learning_rate": 1.4800838112214079e-05, "loss": 0.3114, "step": 15720 }, { "epoch": 0.34071457976729574, "grad_norm": 1.8576725721359253, "learning_rate": 1.4797852246982154e-05, "loss": 0.1525, "step": 15725 }, { "epoch": 0.3408229150867766, "grad_norm": 3.711806535720825, "learning_rate": 1.4794865825990918e-05, "loss": 0.2042, "step": 15730 }, { "epoch": 0.34093125040625744, "grad_norm": 2.079113483428955, "learning_rate": 1.47918788495863e-05, "loss": 0.2253, "step": 15735 }, { "epoch": 0.3410395857257383, "grad_norm": 2.352710008621216, "learning_rate": 1.4788891318114305e-05, "loss": 0.197, "step": 15740 }, { "epoch": 0.3411479210452192, "grad_norm": 2.085097551345825, "learning_rate": 1.4785903231920982e-05, "loss": 0.2303, "step": 15745 }, { "epoch": 0.34125625636470003, "grad_norm": 1.7999598979949951, "learning_rate": 1.4782914591352466e-05, "loss": 0.1701, "step": 15750 }, { "epoch": 0.3413645916841809, "grad_norm": 1.3487509489059448, "learning_rate": 1.4779925396754941e-05, "loss": 0.2287, "step": 15755 }, { "epoch": 0.3414729270036617, "grad_norm": 2.323197364807129, "learning_rate": 1.4776935648474663e-05, "loss": 0.2681, "step": 15760 }, { "epoch": 0.34158126232314256, "grad_norm": 2.176417350769043, "learning_rate": 1.477394534685795e-05, "loss": 0.3186, "step": 15765 }, { "epoch": 0.34168959764262347, "grad_norm": 1.591259479522705, "learning_rate": 1.477095449225118e-05, "loss": 0.2781, "step": 15770 }, { "epoch": 0.3417979329621043, "grad_norm": 2.7956299781799316, "learning_rate": 1.4767963085000802e-05, "loss": 0.2241, "step": 15775 }, { "epoch": 0.34190626828158516, "grad_norm": 2.295281410217285, "learning_rate": 1.4764971125453324e-05, "loss": 0.256, "step": 15780 }, { "epoch": 0.342014603601066, "grad_norm": 1.8056915998458862, "learning_rate": 1.4761978613955323e-05, "loss": 0.2336, "step": 15785 }, { "epoch": 0.3421229389205469, "grad_norm": 2.0728602409362793, "learning_rate": 1.4758985550853428e-05, "loss": 0.2238, "step": 15790 }, { "epoch": 0.34223127424002775, "grad_norm": 1.142870545387268, "learning_rate": 1.4755991936494352e-05, "loss": 0.2158, "step": 15795 }, { "epoch": 0.3423396095595086, "grad_norm": 1.6583644151687622, "learning_rate": 1.4752997771224853e-05, "loss": 0.1754, "step": 15800 }, { "epoch": 0.34244794487898944, "grad_norm": 1.6810423135757446, "learning_rate": 1.4750003055391765e-05, "loss": 0.1561, "step": 15805 }, { "epoch": 0.3425562801984703, "grad_norm": 1.749264121055603, "learning_rate": 1.4747007789341974e-05, "loss": 0.2038, "step": 15810 }, { "epoch": 0.3426646155179512, "grad_norm": 1.4664204120635986, "learning_rate": 1.4744011973422441e-05, "loss": 0.1749, "step": 15815 }, { "epoch": 0.34277295083743203, "grad_norm": 2.086442232131958, "learning_rate": 1.474101560798019e-05, "loss": 0.1663, "step": 15820 }, { "epoch": 0.3428812861569129, "grad_norm": 1.8279842138290405, "learning_rate": 1.4738018693362296e-05, "loss": 0.2529, "step": 15825 }, { "epoch": 0.3429896214763937, "grad_norm": 1.9686055183410645, "learning_rate": 1.4735021229915916e-05, "loss": 0.1836, "step": 15830 }, { "epoch": 0.34309795679587457, "grad_norm": 1.8279647827148438, "learning_rate": 1.4732023217988256e-05, "loss": 0.2011, "step": 15835 }, { "epoch": 0.34320629211535547, "grad_norm": 2.203742265701294, "learning_rate": 1.4729024657926589e-05, "loss": 0.3086, "step": 15840 }, { "epoch": 0.3433146274348363, "grad_norm": 2.0538370609283447, "learning_rate": 1.4726025550078257e-05, "loss": 0.2008, "step": 15845 }, { "epoch": 0.34342296275431716, "grad_norm": 1.5192116498947144, "learning_rate": 1.4723025894790665e-05, "loss": 0.2197, "step": 15850 }, { "epoch": 0.343531298073798, "grad_norm": 1.7838120460510254, "learning_rate": 1.472002569241127e-05, "loss": 0.2697, "step": 15855 }, { "epoch": 0.34363963339327885, "grad_norm": 1.9677250385284424, "learning_rate": 1.471702494328761e-05, "loss": 0.2215, "step": 15860 }, { "epoch": 0.34374796871275975, "grad_norm": 1.731826901435852, "learning_rate": 1.4714023647767265e-05, "loss": 0.1434, "step": 15865 }, { "epoch": 0.3438563040322406, "grad_norm": 2.0830440521240234, "learning_rate": 1.47110218061979e-05, "loss": 0.2321, "step": 15870 }, { "epoch": 0.34396463935172145, "grad_norm": 1.3075186014175415, "learning_rate": 1.4708019418927228e-05, "loss": 0.1363, "step": 15875 }, { "epoch": 0.3440729746712023, "grad_norm": 2.0934829711914062, "learning_rate": 1.4705016486303034e-05, "loss": 0.2028, "step": 15880 }, { "epoch": 0.34418130999068314, "grad_norm": 1.2974474430084229, "learning_rate": 1.470201300867316e-05, "loss": 0.2675, "step": 15885 }, { "epoch": 0.34428964531016404, "grad_norm": 1.9624452590942383, "learning_rate": 1.4699008986385515e-05, "loss": 0.2802, "step": 15890 }, { "epoch": 0.3443979806296449, "grad_norm": 1.7916702032089233, "learning_rate": 1.4696004419788072e-05, "loss": 0.1573, "step": 15895 }, { "epoch": 0.34450631594912573, "grad_norm": 2.5479965209960938, "learning_rate": 1.4692999309228861e-05, "loss": 0.1929, "step": 15900 }, { "epoch": 0.3446146512686066, "grad_norm": 2.295414686203003, "learning_rate": 1.4689993655055983e-05, "loss": 0.238, "step": 15905 }, { "epoch": 0.3447229865880874, "grad_norm": 1.876362681388855, "learning_rate": 1.4686987457617594e-05, "loss": 0.2144, "step": 15910 }, { "epoch": 0.3448313219075683, "grad_norm": 2.094738006591797, "learning_rate": 1.4683980717261918e-05, "loss": 0.1852, "step": 15915 }, { "epoch": 0.34493965722704917, "grad_norm": 1.3280586004257202, "learning_rate": 1.4680973434337245e-05, "loss": 0.1598, "step": 15920 }, { "epoch": 0.34504799254653, "grad_norm": 2.0957753658294678, "learning_rate": 1.467796560919192e-05, "loss": 0.3034, "step": 15925 }, { "epoch": 0.34515632786601086, "grad_norm": 1.4789437055587769, "learning_rate": 1.4674957242174355e-05, "loss": 0.1756, "step": 15930 }, { "epoch": 0.34526466318549176, "grad_norm": 1.6434253454208374, "learning_rate": 1.4671948333633024e-05, "loss": 0.0947, "step": 15935 }, { "epoch": 0.3453729985049726, "grad_norm": 1.3445769548416138, "learning_rate": 1.4668938883916463e-05, "loss": 0.1868, "step": 15940 }, { "epoch": 0.34548133382445345, "grad_norm": 1.3801053762435913, "learning_rate": 1.4665928893373276e-05, "loss": 0.1945, "step": 15945 }, { "epoch": 0.3455896691439343, "grad_norm": 2.003896474838257, "learning_rate": 1.466291836235212e-05, "loss": 0.1918, "step": 15950 }, { "epoch": 0.34569800446341514, "grad_norm": 1.9896851778030396, "learning_rate": 1.4659907291201725e-05, "loss": 0.1655, "step": 15955 }, { "epoch": 0.34580633978289604, "grad_norm": 1.2389754056930542, "learning_rate": 1.465689568027087e-05, "loss": 0.3811, "step": 15960 }, { "epoch": 0.3459146751023769, "grad_norm": 1.6124751567840576, "learning_rate": 1.4653883529908415e-05, "loss": 0.1661, "step": 15965 }, { "epoch": 0.34602301042185774, "grad_norm": 1.7534297704696655, "learning_rate": 1.465087084046327e-05, "loss": 0.2206, "step": 15970 }, { "epoch": 0.3461313457413386, "grad_norm": 1.7160191535949707, "learning_rate": 1.4647857612284405e-05, "loss": 0.2193, "step": 15975 }, { "epoch": 0.3462396810608194, "grad_norm": 1.9252064228057861, "learning_rate": 1.4644843845720861e-05, "loss": 0.1887, "step": 15980 }, { "epoch": 0.3463480163803003, "grad_norm": 1.1280481815338135, "learning_rate": 1.4641829541121739e-05, "loss": 0.2478, "step": 15985 }, { "epoch": 0.3464563516997812, "grad_norm": 2.3981213569641113, "learning_rate": 1.4638814698836196e-05, "loss": 0.2115, "step": 15990 }, { "epoch": 0.346564687019262, "grad_norm": 1.7565580606460571, "learning_rate": 1.4635799319213462e-05, "loss": 0.3022, "step": 15995 }, { "epoch": 0.34667302233874286, "grad_norm": 1.4944944381713867, "learning_rate": 1.4632783402602822e-05, "loss": 0.1689, "step": 16000 }, { "epoch": 0.3467813576582237, "grad_norm": 1.7455041408538818, "learning_rate": 1.4629766949353621e-05, "loss": 0.1859, "step": 16005 }, { "epoch": 0.3468896929777046, "grad_norm": 2.1084647178649902, "learning_rate": 1.4626749959815275e-05, "loss": 0.2472, "step": 16010 }, { "epoch": 0.34699802829718546, "grad_norm": 2.977743625640869, "learning_rate": 1.4623732434337253e-05, "loss": 0.2101, "step": 16015 }, { "epoch": 0.3471063636166663, "grad_norm": 1.2965917587280273, "learning_rate": 1.4620714373269096e-05, "loss": 0.1977, "step": 16020 }, { "epoch": 0.34721469893614715, "grad_norm": 1.2756582498550415, "learning_rate": 1.4617695776960394e-05, "loss": 0.2645, "step": 16025 }, { "epoch": 0.347323034255628, "grad_norm": 2.0041234493255615, "learning_rate": 1.461467664576081e-05, "loss": 0.2211, "step": 16030 }, { "epoch": 0.3474313695751089, "grad_norm": 1.783522605895996, "learning_rate": 1.4611656980020062e-05, "loss": 0.1861, "step": 16035 }, { "epoch": 0.34753970489458974, "grad_norm": 1.7868845462799072, "learning_rate": 1.4608636780087937e-05, "loss": 0.2189, "step": 16040 }, { "epoch": 0.3476480402140706, "grad_norm": 1.6443296670913696, "learning_rate": 1.460561604631428e-05, "loss": 0.2293, "step": 16045 }, { "epoch": 0.34775637553355143, "grad_norm": 1.5248733758926392, "learning_rate": 1.4602594779048995e-05, "loss": 0.1722, "step": 16050 }, { "epoch": 0.34786471085303233, "grad_norm": 2.0964856147766113, "learning_rate": 1.4599572978642052e-05, "loss": 0.1635, "step": 16055 }, { "epoch": 0.3479730461725132, "grad_norm": 1.4121421575546265, "learning_rate": 1.4596550645443482e-05, "loss": 0.1986, "step": 16060 }, { "epoch": 0.348081381491994, "grad_norm": 2.519805669784546, "learning_rate": 1.4593527779803374e-05, "loss": 0.2798, "step": 16065 }, { "epoch": 0.34818971681147487, "grad_norm": 1.559249758720398, "learning_rate": 1.4590504382071885e-05, "loss": 0.231, "step": 16070 }, { "epoch": 0.3482980521309557, "grad_norm": 2.9146728515625, "learning_rate": 1.458748045259923e-05, "loss": 0.2498, "step": 16075 }, { "epoch": 0.3484063874504366, "grad_norm": 1.7728729248046875, "learning_rate": 1.458445599173568e-05, "loss": 0.2271, "step": 16080 }, { "epoch": 0.34851472276991746, "grad_norm": 1.6134612560272217, "learning_rate": 1.4581430999831583e-05, "loss": 0.1781, "step": 16085 }, { "epoch": 0.3486230580893983, "grad_norm": 2.3181746006011963, "learning_rate": 1.4578405477237334e-05, "loss": 0.2704, "step": 16090 }, { "epoch": 0.34873139340887915, "grad_norm": 1.3158106803894043, "learning_rate": 1.4575379424303395e-05, "loss": 0.2012, "step": 16095 }, { "epoch": 0.34883972872836, "grad_norm": 1.4170711040496826, "learning_rate": 1.4572352841380288e-05, "loss": 0.17, "step": 16100 }, { "epoch": 0.3489480640478409, "grad_norm": 2.1973719596862793, "learning_rate": 1.45693257288186e-05, "loss": 0.2178, "step": 16105 }, { "epoch": 0.34905639936732175, "grad_norm": 2.0337469577789307, "learning_rate": 1.4566298086968973e-05, "loss": 0.2377, "step": 16110 }, { "epoch": 0.3491647346868026, "grad_norm": 2.541823148727417, "learning_rate": 1.4563269916182117e-05, "loss": 0.2225, "step": 16115 }, { "epoch": 0.34927307000628344, "grad_norm": 1.1933866739273071, "learning_rate": 1.45602412168088e-05, "loss": 0.1397, "step": 16120 }, { "epoch": 0.3493814053257643, "grad_norm": 1.7703380584716797, "learning_rate": 1.4557211989199847e-05, "loss": 0.2389, "step": 16125 }, { "epoch": 0.3494897406452452, "grad_norm": 1.890524983406067, "learning_rate": 1.4554182233706154e-05, "loss": 0.174, "step": 16130 }, { "epoch": 0.34959807596472603, "grad_norm": 2.4532904624938965, "learning_rate": 1.455115195067867e-05, "loss": 0.2536, "step": 16135 }, { "epoch": 0.3497064112842069, "grad_norm": 1.8317806720733643, "learning_rate": 1.454812114046841e-05, "loss": 0.2346, "step": 16140 }, { "epoch": 0.3498147466036877, "grad_norm": 2.4970178604125977, "learning_rate": 1.4545089803426447e-05, "loss": 0.2199, "step": 16145 }, { "epoch": 0.34992308192316857, "grad_norm": 2.365018129348755, "learning_rate": 1.4542057939903915e-05, "loss": 0.2403, "step": 16150 }, { "epoch": 0.35003141724264947, "grad_norm": 1.9780700206756592, "learning_rate": 1.4539025550252009e-05, "loss": 0.1821, "step": 16155 }, { "epoch": 0.3501397525621303, "grad_norm": 2.0824387073516846, "learning_rate": 1.4535992634821989e-05, "loss": 0.1943, "step": 16160 }, { "epoch": 0.35024808788161116, "grad_norm": 1.8408211469650269, "learning_rate": 1.4532959193965172e-05, "loss": 0.1863, "step": 16165 }, { "epoch": 0.350356423201092, "grad_norm": 1.7170772552490234, "learning_rate": 1.4529925228032938e-05, "loss": 0.2837, "step": 16170 }, { "epoch": 0.35046475852057285, "grad_norm": 1.380959391593933, "learning_rate": 1.452689073737672e-05, "loss": 0.2351, "step": 16175 }, { "epoch": 0.35057309384005375, "grad_norm": 2.016590118408203, "learning_rate": 1.4523855722348026e-05, "loss": 0.1575, "step": 16180 }, { "epoch": 0.3506814291595346, "grad_norm": 1.6139734983444214, "learning_rate": 1.4520820183298414e-05, "loss": 0.1995, "step": 16185 }, { "epoch": 0.35078976447901544, "grad_norm": 2.153280019760132, "learning_rate": 1.4517784120579505e-05, "loss": 0.1805, "step": 16190 }, { "epoch": 0.3508980997984963, "grad_norm": 1.9874204397201538, "learning_rate": 1.4514747534542981e-05, "loss": 0.1823, "step": 16195 }, { "epoch": 0.3510064351179772, "grad_norm": 1.765692114830017, "learning_rate": 1.4511710425540585e-05, "loss": 0.2473, "step": 16200 }, { "epoch": 0.35111477043745803, "grad_norm": 1.4270234107971191, "learning_rate": 1.4508672793924123e-05, "loss": 0.2471, "step": 16205 }, { "epoch": 0.3512231057569389, "grad_norm": 1.6723833084106445, "learning_rate": 1.4505634640045458e-05, "loss": 0.2645, "step": 16210 }, { "epoch": 0.3513314410764197, "grad_norm": 2.657459020614624, "learning_rate": 1.4502595964256515e-05, "loss": 0.2124, "step": 16215 }, { "epoch": 0.35143977639590057, "grad_norm": 1.2996262311935425, "learning_rate": 1.4499556766909279e-05, "loss": 0.2218, "step": 16220 }, { "epoch": 0.3515481117153815, "grad_norm": 2.01252818107605, "learning_rate": 1.4496517048355794e-05, "loss": 0.1782, "step": 16225 }, { "epoch": 0.3516564470348623, "grad_norm": 1.8185789585113525, "learning_rate": 1.4493476808948168e-05, "loss": 0.2651, "step": 16230 }, { "epoch": 0.35176478235434316, "grad_norm": 1.9229177236557007, "learning_rate": 1.4490436049038565e-05, "loss": 0.2141, "step": 16235 }, { "epoch": 0.351873117673824, "grad_norm": 1.9024420976638794, "learning_rate": 1.448739476897921e-05, "loss": 0.244, "step": 16240 }, { "epoch": 0.35198145299330486, "grad_norm": 1.7461750507354736, "learning_rate": 1.4484352969122398e-05, "loss": 0.189, "step": 16245 }, { "epoch": 0.35208978831278576, "grad_norm": 1.429639458656311, "learning_rate": 1.4481310649820462e-05, "loss": 0.1934, "step": 16250 }, { "epoch": 0.3521981236322666, "grad_norm": 2.223825216293335, "learning_rate": 1.4478267811425823e-05, "loss": 0.1864, "step": 16255 }, { "epoch": 0.35230645895174745, "grad_norm": 2.307648181915283, "learning_rate": 1.4475224454290943e-05, "loss": 0.1851, "step": 16260 }, { "epoch": 0.3524147942712283, "grad_norm": 1.973677635192871, "learning_rate": 1.4472180578768347e-05, "loss": 0.25, "step": 16265 }, { "epoch": 0.35252312959070914, "grad_norm": 2.398192882537842, "learning_rate": 1.4469136185210624e-05, "loss": 0.2515, "step": 16270 }, { "epoch": 0.35263146491019004, "grad_norm": 1.3809813261032104, "learning_rate": 1.4466091273970419e-05, "loss": 0.2492, "step": 16275 }, { "epoch": 0.3527398002296709, "grad_norm": 1.6477237939834595, "learning_rate": 1.4463045845400445e-05, "loss": 0.147, "step": 16280 }, { "epoch": 0.35284813554915173, "grad_norm": 2.152745485305786, "learning_rate": 1.4459999899853467e-05, "loss": 0.1694, "step": 16285 }, { "epoch": 0.3529564708686326, "grad_norm": 1.7155814170837402, "learning_rate": 1.445695343768231e-05, "loss": 0.1586, "step": 16290 }, { "epoch": 0.3530648061881134, "grad_norm": 1.6494520902633667, "learning_rate": 1.4453906459239863e-05, "loss": 0.2205, "step": 16295 }, { "epoch": 0.3531731415075943, "grad_norm": 2.344989061355591, "learning_rate": 1.4450858964879073e-05, "loss": 0.2846, "step": 16300 }, { "epoch": 0.35328147682707517, "grad_norm": 1.5048540830612183, "learning_rate": 1.4447810954952946e-05, "loss": 0.2305, "step": 16305 }, { "epoch": 0.353389812146556, "grad_norm": 1.6044279336929321, "learning_rate": 1.4444762429814545e-05, "loss": 0.3061, "step": 16310 }, { "epoch": 0.35349814746603686, "grad_norm": 2.3960487842559814, "learning_rate": 1.4441713389817002e-05, "loss": 0.1791, "step": 16315 }, { "epoch": 0.35360648278551776, "grad_norm": 1.5685083866119385, "learning_rate": 1.4438663835313498e-05, "loss": 0.2418, "step": 16320 }, { "epoch": 0.3537148181049986, "grad_norm": 1.7616521120071411, "learning_rate": 1.443561376665728e-05, "loss": 0.2551, "step": 16325 }, { "epoch": 0.35382315342447945, "grad_norm": 1.607089638710022, "learning_rate": 1.4432563184201653e-05, "loss": 0.165, "step": 16330 }, { "epoch": 0.3539314887439603, "grad_norm": 1.6312578916549683, "learning_rate": 1.4429512088299982e-05, "loss": 0.2366, "step": 16335 }, { "epoch": 0.35403982406344114, "grad_norm": 1.7385650873184204, "learning_rate": 1.4426460479305688e-05, "loss": 0.1759, "step": 16340 }, { "epoch": 0.35414815938292205, "grad_norm": 2.2979748249053955, "learning_rate": 1.4423408357572255e-05, "loss": 0.169, "step": 16345 }, { "epoch": 0.3542564947024029, "grad_norm": 1.3316103219985962, "learning_rate": 1.442035572345323e-05, "loss": 0.2158, "step": 16350 }, { "epoch": 0.35436483002188374, "grad_norm": 2.3800711631774902, "learning_rate": 1.4417302577302207e-05, "loss": 0.1518, "step": 16355 }, { "epoch": 0.3544731653413646, "grad_norm": 1.8499126434326172, "learning_rate": 1.4414248919472855e-05, "loss": 0.1398, "step": 16360 }, { "epoch": 0.35458150066084543, "grad_norm": 1.853880524635315, "learning_rate": 1.441119475031889e-05, "loss": 0.1737, "step": 16365 }, { "epoch": 0.35468983598032633, "grad_norm": 0.9665327072143555, "learning_rate": 1.4408140070194093e-05, "loss": 0.1796, "step": 16370 }, { "epoch": 0.3547981712998072, "grad_norm": 1.8021571636199951, "learning_rate": 1.44050848794523e-05, "loss": 0.2503, "step": 16375 }, { "epoch": 0.354906506619288, "grad_norm": 2.226574659347534, "learning_rate": 1.4402029178447419e-05, "loss": 0.2226, "step": 16380 }, { "epoch": 0.35501484193876887, "grad_norm": 1.2485706806182861, "learning_rate": 1.4398972967533395e-05, "loss": 0.2533, "step": 16385 }, { "epoch": 0.3551231772582497, "grad_norm": 2.3458759784698486, "learning_rate": 1.439591624706425e-05, "loss": 0.2012, "step": 16390 }, { "epoch": 0.3552315125777306, "grad_norm": 1.886288046836853, "learning_rate": 1.4392859017394056e-05, "loss": 0.1687, "step": 16395 }, { "epoch": 0.35533984789721146, "grad_norm": 1.7543731927871704, "learning_rate": 1.4389801278876953e-05, "loss": 0.2212, "step": 16400 }, { "epoch": 0.3554481832166923, "grad_norm": 2.014305353164673, "learning_rate": 1.438674303186713e-05, "loss": 0.1912, "step": 16405 }, { "epoch": 0.35555651853617315, "grad_norm": 1.6904191970825195, "learning_rate": 1.438368427671884e-05, "loss": 0.1898, "step": 16410 }, { "epoch": 0.355664853855654, "grad_norm": 4.081460475921631, "learning_rate": 1.4380625013786396e-05, "loss": 0.211, "step": 16415 }, { "epoch": 0.3557731891751349, "grad_norm": 2.1739957332611084, "learning_rate": 1.4377565243424166e-05, "loss": 0.2621, "step": 16420 }, { "epoch": 0.35588152449461574, "grad_norm": 1.9480650424957275, "learning_rate": 1.4374504965986575e-05, "loss": 0.2273, "step": 16425 }, { "epoch": 0.3559898598140966, "grad_norm": 1.9045460224151611, "learning_rate": 1.4371444181828117e-05, "loss": 0.2455, "step": 16430 }, { "epoch": 0.35609819513357743, "grad_norm": 1.7916266918182373, "learning_rate": 1.4368382891303335e-05, "loss": 0.1908, "step": 16435 }, { "epoch": 0.3562065304530583, "grad_norm": 2.087545156478882, "learning_rate": 1.436532109476683e-05, "loss": 0.1486, "step": 16440 }, { "epoch": 0.3563148657725392, "grad_norm": 2.0620317459106445, "learning_rate": 1.4362258792573269e-05, "loss": 0.2218, "step": 16445 }, { "epoch": 0.35642320109202, "grad_norm": 1.5721389055252075, "learning_rate": 1.4359195985077377e-05, "loss": 0.2199, "step": 16450 }, { "epoch": 0.35653153641150087, "grad_norm": 2.3285672664642334, "learning_rate": 1.435613267263393e-05, "loss": 0.2112, "step": 16455 }, { "epoch": 0.3566398717309817, "grad_norm": 1.425586462020874, "learning_rate": 1.4353068855597766e-05, "loss": 0.2211, "step": 16460 }, { "epoch": 0.3567482070504626, "grad_norm": 1.6877063512802124, "learning_rate": 1.4350004534323785e-05, "loss": 0.1366, "step": 16465 }, { "epoch": 0.35685654236994346, "grad_norm": 1.8912030458450317, "learning_rate": 1.4346939709166942e-05, "loss": 0.1792, "step": 16470 }, { "epoch": 0.3569648776894243, "grad_norm": 3.027107000350952, "learning_rate": 1.434387438048225e-05, "loss": 0.2165, "step": 16475 }, { "epoch": 0.35707321300890515, "grad_norm": 1.2095658779144287, "learning_rate": 1.4340808548624785e-05, "loss": 0.1587, "step": 16480 }, { "epoch": 0.357181548328386, "grad_norm": 2.4074723720550537, "learning_rate": 1.433774221394967e-05, "loss": 0.1448, "step": 16485 }, { "epoch": 0.3572898836478669, "grad_norm": 1.6898397207260132, "learning_rate": 1.4334675376812102e-05, "loss": 0.2681, "step": 16490 }, { "epoch": 0.35739821896734775, "grad_norm": 1.5591439008712769, "learning_rate": 1.4331608037567324e-05, "loss": 0.2257, "step": 16495 }, { "epoch": 0.3575065542868286, "grad_norm": 1.9151735305786133, "learning_rate": 1.4328540196570644e-05, "loss": 0.2019, "step": 16500 }, { "epoch": 0.35761488960630944, "grad_norm": 1.3414801359176636, "learning_rate": 1.4325471854177424e-05, "loss": 0.1649, "step": 16505 }, { "epoch": 0.3577232249257903, "grad_norm": 1.8558109998703003, "learning_rate": 1.4322403010743085e-05, "loss": 0.232, "step": 16510 }, { "epoch": 0.3578315602452712, "grad_norm": 1.7858588695526123, "learning_rate": 1.4319333666623104e-05, "loss": 0.1423, "step": 16515 }, { "epoch": 0.35793989556475203, "grad_norm": 1.8715921640396118, "learning_rate": 1.4316263822173022e-05, "loss": 0.2126, "step": 16520 }, { "epoch": 0.3580482308842329, "grad_norm": 1.1934635639190674, "learning_rate": 1.4313193477748435e-05, "loss": 0.1716, "step": 16525 }, { "epoch": 0.3581565662037137, "grad_norm": 1.7199139595031738, "learning_rate": 1.4310122633704996e-05, "loss": 0.2704, "step": 16530 }, { "epoch": 0.35826490152319457, "grad_norm": 2.1004550457000732, "learning_rate": 1.4307051290398415e-05, "loss": 0.251, "step": 16535 }, { "epoch": 0.35837323684267547, "grad_norm": 1.5500274896621704, "learning_rate": 1.4303979448184461e-05, "loss": 0.1703, "step": 16540 }, { "epoch": 0.3584815721621563, "grad_norm": 1.5864309072494507, "learning_rate": 1.4300907107418961e-05, "loss": 0.2266, "step": 16545 }, { "epoch": 0.35858990748163716, "grad_norm": 1.8449493646621704, "learning_rate": 1.4297834268457803e-05, "loss": 0.2151, "step": 16550 }, { "epoch": 0.358698242801118, "grad_norm": 1.887195110321045, "learning_rate": 1.4294760931656924e-05, "loss": 0.1396, "step": 16555 }, { "epoch": 0.35880657812059885, "grad_norm": 1.930528163909912, "learning_rate": 1.429168709737233e-05, "loss": 0.231, "step": 16560 }, { "epoch": 0.35891491344007975, "grad_norm": 1.7346237897872925, "learning_rate": 1.4288612765960068e-05, "loss": 0.2736, "step": 16565 }, { "epoch": 0.3590232487595606, "grad_norm": 2.456427574157715, "learning_rate": 1.4285537937776266e-05, "loss": 0.2453, "step": 16570 }, { "epoch": 0.35913158407904144, "grad_norm": 1.8923609256744385, "learning_rate": 1.428246261317709e-05, "loss": 0.1534, "step": 16575 }, { "epoch": 0.3592399193985223, "grad_norm": 1.8328025341033936, "learning_rate": 1.4279386792518772e-05, "loss": 0.2783, "step": 16580 }, { "epoch": 0.3593482547180032, "grad_norm": 2.566894292831421, "learning_rate": 1.42763104761576e-05, "loss": 0.2351, "step": 16585 }, { "epoch": 0.35945659003748404, "grad_norm": 2.77567195892334, "learning_rate": 1.4273233664449918e-05, "loss": 0.2897, "step": 16590 }, { "epoch": 0.3595649253569649, "grad_norm": 2.245323419570923, "learning_rate": 1.4270156357752128e-05, "loss": 0.1923, "step": 16595 }, { "epoch": 0.3596732606764457, "grad_norm": 2.1541080474853516, "learning_rate": 1.426707855642069e-05, "loss": 0.19, "step": 16600 }, { "epoch": 0.3597815959959266, "grad_norm": 2.9091031551361084, "learning_rate": 1.4264000260812125e-05, "loss": 0.3043, "step": 16605 }, { "epoch": 0.3598899313154075, "grad_norm": 0.7991570234298706, "learning_rate": 1.4260921471283e-05, "loss": 0.1244, "step": 16610 }, { "epoch": 0.3599982666348883, "grad_norm": 2.3114430904388428, "learning_rate": 1.4257842188189954e-05, "loss": 0.233, "step": 16615 }, { "epoch": 0.36010660195436917, "grad_norm": 1.84245765209198, "learning_rate": 1.4254762411889673e-05, "loss": 0.138, "step": 16620 }, { "epoch": 0.36021493727385, "grad_norm": 1.71779203414917, "learning_rate": 1.4251682142738906e-05, "loss": 0.2779, "step": 16625 }, { "epoch": 0.36032327259333086, "grad_norm": 1.9271316528320312, "learning_rate": 1.4248601381094452e-05, "loss": 0.2939, "step": 16630 }, { "epoch": 0.36043160791281176, "grad_norm": 1.84297776222229, "learning_rate": 1.4245520127313166e-05, "loss": 0.2653, "step": 16635 }, { "epoch": 0.3605399432322926, "grad_norm": 1.998805046081543, "learning_rate": 1.4242438381751976e-05, "loss": 0.2037, "step": 16640 }, { "epoch": 0.36064827855177345, "grad_norm": 1.67025887966156, "learning_rate": 1.423935614476785e-05, "loss": 0.1494, "step": 16645 }, { "epoch": 0.3607566138712543, "grad_norm": 2.065426826477051, "learning_rate": 1.423627341671782e-05, "loss": 0.2429, "step": 16650 }, { "epoch": 0.36086494919073514, "grad_norm": 2.4929792881011963, "learning_rate": 1.4233190197958976e-05, "loss": 0.2248, "step": 16655 }, { "epoch": 0.36097328451021604, "grad_norm": 2.000175952911377, "learning_rate": 1.4230106488848461e-05, "loss": 0.2314, "step": 16660 }, { "epoch": 0.3610816198296969, "grad_norm": 2.6011791229248047, "learning_rate": 1.4227022289743472e-05, "loss": 0.2298, "step": 16665 }, { "epoch": 0.36118995514917773, "grad_norm": 1.5000661611557007, "learning_rate": 1.4223937601001276e-05, "loss": 0.2479, "step": 16670 }, { "epoch": 0.3612982904686586, "grad_norm": 1.596360206604004, "learning_rate": 1.422085242297918e-05, "loss": 0.2198, "step": 16675 }, { "epoch": 0.3614066257881394, "grad_norm": 3.091912269592285, "learning_rate": 1.4217766756034563e-05, "loss": 0.2386, "step": 16680 }, { "epoch": 0.3615149611076203, "grad_norm": 1.519888997077942, "learning_rate": 1.4214680600524843e-05, "loss": 0.258, "step": 16685 }, { "epoch": 0.36162329642710117, "grad_norm": 1.8947137594223022, "learning_rate": 1.4211593956807516e-05, "loss": 0.1993, "step": 16690 }, { "epoch": 0.361731631746582, "grad_norm": 1.6109607219696045, "learning_rate": 1.420850682524012e-05, "loss": 0.2191, "step": 16695 }, { "epoch": 0.36183996706606286, "grad_norm": 2.2447710037231445, "learning_rate": 1.420541920618025e-05, "loss": 0.1926, "step": 16700 }, { "epoch": 0.3619483023855437, "grad_norm": 2.0970826148986816, "learning_rate": 1.4202331099985562e-05, "loss": 0.2572, "step": 16705 }, { "epoch": 0.3620566377050246, "grad_norm": 1.694889783859253, "learning_rate": 1.4199242507013769e-05, "loss": 0.2053, "step": 16710 }, { "epoch": 0.36216497302450545, "grad_norm": 2.0337889194488525, "learning_rate": 1.4196153427622635e-05, "loss": 0.2585, "step": 16715 }, { "epoch": 0.3622733083439863, "grad_norm": 1.703412652015686, "learning_rate": 1.4193063862169982e-05, "loss": 0.1787, "step": 16720 }, { "epoch": 0.36238164366346715, "grad_norm": 2.4961771965026855, "learning_rate": 1.4189973811013695e-05, "loss": 0.2497, "step": 16725 }, { "epoch": 0.36248997898294805, "grad_norm": 2.1724202632904053, "learning_rate": 1.4186883274511708e-05, "loss": 0.2306, "step": 16730 }, { "epoch": 0.3625983143024289, "grad_norm": 1.6453161239624023, "learning_rate": 1.4183792253022012e-05, "loss": 0.1567, "step": 16735 }, { "epoch": 0.36270664962190974, "grad_norm": 1.9732192754745483, "learning_rate": 1.4180700746902658e-05, "loss": 0.2278, "step": 16740 }, { "epoch": 0.3628149849413906, "grad_norm": 1.6632282733917236, "learning_rate": 1.4177608756511752e-05, "loss": 0.2143, "step": 16745 }, { "epoch": 0.36292332026087143, "grad_norm": 1.9031721353530884, "learning_rate": 1.4174516282207446e-05, "loss": 0.277, "step": 16750 }, { "epoch": 0.36303165558035233, "grad_norm": 1.444787621498108, "learning_rate": 1.4171423324347966e-05, "loss": 0.2377, "step": 16755 }, { "epoch": 0.3631399908998332, "grad_norm": 1.7541612386703491, "learning_rate": 1.416832988329158e-05, "loss": 0.2232, "step": 16760 }, { "epoch": 0.363248326219314, "grad_norm": 1.923957347869873, "learning_rate": 1.4165235959396622e-05, "loss": 0.3352, "step": 16765 }, { "epoch": 0.36335666153879487, "grad_norm": 1.4658126831054688, "learning_rate": 1.4162141553021472e-05, "loss": 0.2651, "step": 16770 }, { "epoch": 0.3634649968582757, "grad_norm": 1.7136421203613281, "learning_rate": 1.4159046664524571e-05, "loss": 0.2145, "step": 16775 }, { "epoch": 0.3635733321777566, "grad_norm": 1.6197030544281006, "learning_rate": 1.4155951294264418e-05, "loss": 0.2357, "step": 16780 }, { "epoch": 0.36368166749723746, "grad_norm": 2.910484790802002, "learning_rate": 1.4152855442599562e-05, "loss": 0.2535, "step": 16785 }, { "epoch": 0.3637900028167183, "grad_norm": 1.2756881713867188, "learning_rate": 1.4149759109888614e-05, "loss": 0.2013, "step": 16790 }, { "epoch": 0.36389833813619915, "grad_norm": 2.0630617141723633, "learning_rate": 1.4146662296490236e-05, "loss": 0.1511, "step": 16795 }, { "epoch": 0.36400667345568, "grad_norm": 2.8010501861572266, "learning_rate": 1.4143565002763149e-05, "loss": 0.185, "step": 16800 }, { "epoch": 0.3641150087751609, "grad_norm": 2.107789993286133, "learning_rate": 1.4140467229066124e-05, "loss": 0.2321, "step": 16805 }, { "epoch": 0.36422334409464174, "grad_norm": 1.0180689096450806, "learning_rate": 1.4137368975757996e-05, "loss": 0.1506, "step": 16810 }, { "epoch": 0.3643316794141226, "grad_norm": 2.304908275604248, "learning_rate": 1.4134270243197653e-05, "loss": 0.2841, "step": 16815 }, { "epoch": 0.36444001473360343, "grad_norm": 1.306046962738037, "learning_rate": 1.413117103174403e-05, "loss": 0.2317, "step": 16820 }, { "epoch": 0.3645483500530843, "grad_norm": 1.549363374710083, "learning_rate": 1.4128071341756129e-05, "loss": 0.2048, "step": 16825 }, { "epoch": 0.3646566853725652, "grad_norm": 1.29304838180542, "learning_rate": 1.4124971173593002e-05, "loss": 0.1845, "step": 16830 }, { "epoch": 0.364765020692046, "grad_norm": 2.8610405921936035, "learning_rate": 1.4121870527613757e-05, "loss": 0.2017, "step": 16835 }, { "epoch": 0.3648733560115269, "grad_norm": 1.9925799369812012, "learning_rate": 1.4118769404177557e-05, "loss": 0.2705, "step": 16840 }, { "epoch": 0.3649816913310077, "grad_norm": 2.239644765853882, "learning_rate": 1.411566780364362e-05, "loss": 0.2271, "step": 16845 }, { "epoch": 0.3650900266504886, "grad_norm": 2.1727755069732666, "learning_rate": 1.4112565726371219e-05, "loss": 0.1267, "step": 16850 }, { "epoch": 0.36519836196996946, "grad_norm": 1.5525175333023071, "learning_rate": 1.4109463172719686e-05, "loss": 0.1904, "step": 16855 }, { "epoch": 0.3653066972894503, "grad_norm": 1.5898340940475464, "learning_rate": 1.4106360143048405e-05, "loss": 0.2687, "step": 16860 }, { "epoch": 0.36541503260893116, "grad_norm": 1.5559816360473633, "learning_rate": 1.4103256637716817e-05, "loss": 0.291, "step": 16865 }, { "epoch": 0.365523367928412, "grad_norm": 2.39084792137146, "learning_rate": 1.4100152657084411e-05, "loss": 0.1932, "step": 16870 }, { "epoch": 0.3656317032478929, "grad_norm": 1.5148868560791016, "learning_rate": 1.409704820151074e-05, "loss": 0.2105, "step": 16875 }, { "epoch": 0.36574003856737375, "grad_norm": 1.5108177661895752, "learning_rate": 1.409394327135541e-05, "loss": 0.1977, "step": 16880 }, { "epoch": 0.3658483738868546, "grad_norm": 2.2490928173065186, "learning_rate": 1.4090837866978078e-05, "loss": 0.2958, "step": 16885 }, { "epoch": 0.36595670920633544, "grad_norm": 1.5649296045303345, "learning_rate": 1.408773198873846e-05, "loss": 0.1498, "step": 16890 }, { "epoch": 0.3660650445258163, "grad_norm": 1.7197535037994385, "learning_rate": 1.4084625636996328e-05, "loss": 0.2378, "step": 16895 }, { "epoch": 0.3661733798452972, "grad_norm": 1.9194011688232422, "learning_rate": 1.4081518812111502e-05, "loss": 0.2793, "step": 16900 }, { "epoch": 0.36628171516477803, "grad_norm": 1.6907966136932373, "learning_rate": 1.4078411514443862e-05, "loss": 0.1749, "step": 16905 }, { "epoch": 0.3663900504842589, "grad_norm": 1.7937589883804321, "learning_rate": 1.4075303744353344e-05, "loss": 0.1824, "step": 16910 }, { "epoch": 0.3664983858037397, "grad_norm": 1.2333357334136963, "learning_rate": 1.4072195502199933e-05, "loss": 0.1377, "step": 16915 }, { "epoch": 0.36660672112322057, "grad_norm": 1.4482353925704956, "learning_rate": 1.4069086788343675e-05, "loss": 0.2403, "step": 16920 }, { "epoch": 0.36671505644270147, "grad_norm": 2.0242953300476074, "learning_rate": 1.4065977603144666e-05, "loss": 0.3285, "step": 16925 }, { "epoch": 0.3668233917621823, "grad_norm": 2.4564027786254883, "learning_rate": 1.4062867946963064e-05, "loss": 0.2078, "step": 16930 }, { "epoch": 0.36693172708166316, "grad_norm": 1.369009017944336, "learning_rate": 1.405975782015907e-05, "loss": 0.1948, "step": 16935 }, { "epoch": 0.367040062401144, "grad_norm": 1.2382476329803467, "learning_rate": 1.4056647223092948e-05, "loss": 0.1754, "step": 16940 }, { "epoch": 0.36714839772062485, "grad_norm": 1.7679224014282227, "learning_rate": 1.4053536156125013e-05, "loss": 0.1535, "step": 16945 }, { "epoch": 0.36725673304010575, "grad_norm": 1.6370786428451538, "learning_rate": 1.4050424619615637e-05, "loss": 0.2141, "step": 16950 }, { "epoch": 0.3673650683595866, "grad_norm": 1.7225226163864136, "learning_rate": 1.4047312613925246e-05, "loss": 0.2287, "step": 16955 }, { "epoch": 0.36747340367906745, "grad_norm": 2.0302610397338867, "learning_rate": 1.4044200139414317e-05, "loss": 0.1945, "step": 16960 }, { "epoch": 0.3675817389985483, "grad_norm": 1.8278611898422241, "learning_rate": 1.4041087196443381e-05, "loss": 0.1425, "step": 16965 }, { "epoch": 0.36769007431802914, "grad_norm": 1.5496978759765625, "learning_rate": 1.4037973785373031e-05, "loss": 0.2065, "step": 16970 }, { "epoch": 0.36779840963751004, "grad_norm": 1.8819550275802612, "learning_rate": 1.4034859906563905e-05, "loss": 0.2757, "step": 16975 }, { "epoch": 0.3679067449569909, "grad_norm": 1.52733314037323, "learning_rate": 1.4031745560376701e-05, "loss": 0.1414, "step": 16980 }, { "epoch": 0.36801508027647173, "grad_norm": 1.8592435121536255, "learning_rate": 1.4028630747172171e-05, "loss": 0.2401, "step": 16985 }, { "epoch": 0.3681234155959526, "grad_norm": 1.6777690649032593, "learning_rate": 1.4025515467311119e-05, "loss": 0.2278, "step": 16990 }, { "epoch": 0.3682317509154335, "grad_norm": 2.019904375076294, "learning_rate": 1.40223997211544e-05, "loss": 0.3254, "step": 16995 }, { "epoch": 0.3683400862349143, "grad_norm": 2.510012149810791, "learning_rate": 1.4019283509062929e-05, "loss": 0.2516, "step": 17000 }, { "epoch": 0.36844842155439517, "grad_norm": 2.1923670768737793, "learning_rate": 1.4016166831397673e-05, "loss": 0.134, "step": 17005 }, { "epoch": 0.368556756873876, "grad_norm": 1.924729824066162, "learning_rate": 1.4013049688519654e-05, "loss": 0.2857, "step": 17010 }, { "epoch": 0.36866509219335686, "grad_norm": 2.2961623668670654, "learning_rate": 1.400993208078994e-05, "loss": 0.2475, "step": 17015 }, { "epoch": 0.36877342751283776, "grad_norm": 1.718163251876831, "learning_rate": 1.4006814008569664e-05, "loss": 0.2075, "step": 17020 }, { "epoch": 0.3688817628323186, "grad_norm": 1.7111949920654297, "learning_rate": 1.4003695472220012e-05, "loss": 0.2537, "step": 17025 }, { "epoch": 0.36899009815179945, "grad_norm": 2.5131306648254395, "learning_rate": 1.400057647210221e-05, "loss": 0.1708, "step": 17030 }, { "epoch": 0.3690984334712803, "grad_norm": 1.889119029045105, "learning_rate": 1.3997457008577554e-05, "loss": 0.2924, "step": 17035 }, { "epoch": 0.36920676879076114, "grad_norm": 1.9303550720214844, "learning_rate": 1.3994337082007382e-05, "loss": 0.2488, "step": 17040 }, { "epoch": 0.36931510411024204, "grad_norm": 2.040526866912842, "learning_rate": 1.3991216692753099e-05, "loss": 0.2548, "step": 17045 }, { "epoch": 0.3694234394297229, "grad_norm": 2.128519296646118, "learning_rate": 1.3988095841176149e-05, "loss": 0.2947, "step": 17050 }, { "epoch": 0.36953177474920373, "grad_norm": 1.5670924186706543, "learning_rate": 1.3984974527638036e-05, "loss": 0.2326, "step": 17055 }, { "epoch": 0.3696401100686846, "grad_norm": 2.024207592010498, "learning_rate": 1.398185275250032e-05, "loss": 0.2751, "step": 17060 }, { "epoch": 0.3697484453881654, "grad_norm": 2.059225082397461, "learning_rate": 1.397873051612461e-05, "loss": 0.1582, "step": 17065 }, { "epoch": 0.3698567807076463, "grad_norm": 2.4299228191375732, "learning_rate": 1.3975607818872573e-05, "loss": 0.221, "step": 17070 }, { "epoch": 0.36996511602712717, "grad_norm": 2.0352797508239746, "learning_rate": 1.3972484661105922e-05, "loss": 0.2187, "step": 17075 }, { "epoch": 0.370073451346608, "grad_norm": 1.3031632900238037, "learning_rate": 1.3969361043186433e-05, "loss": 0.238, "step": 17080 }, { "epoch": 0.37018178666608886, "grad_norm": 1.946915864944458, "learning_rate": 1.396623696547593e-05, "loss": 0.2138, "step": 17085 }, { "epoch": 0.3702901219855697, "grad_norm": 2.5428717136383057, "learning_rate": 1.3963112428336286e-05, "loss": 0.1579, "step": 17090 }, { "epoch": 0.3703984573050506, "grad_norm": 1.9177037477493286, "learning_rate": 1.3959987432129435e-05, "loss": 0.1313, "step": 17095 }, { "epoch": 0.37050679262453146, "grad_norm": 1.5387428998947144, "learning_rate": 1.395686197721736e-05, "loss": 0.2514, "step": 17100 }, { "epoch": 0.3706151279440123, "grad_norm": 1.4029978513717651, "learning_rate": 1.3953736063962097e-05, "loss": 0.2238, "step": 17105 }, { "epoch": 0.37072346326349315, "grad_norm": 0.8995121121406555, "learning_rate": 1.3950609692725743e-05, "loss": 0.227, "step": 17110 }, { "epoch": 0.37083179858297405, "grad_norm": 1.20826256275177, "learning_rate": 1.3947482863870431e-05, "loss": 0.2238, "step": 17115 }, { "epoch": 0.3709401339024549, "grad_norm": 1.7248642444610596, "learning_rate": 1.3944355577758366e-05, "loss": 0.2103, "step": 17120 }, { "epoch": 0.37104846922193574, "grad_norm": 2.6439106464385986, "learning_rate": 1.3941227834751793e-05, "loss": 0.1671, "step": 17125 }, { "epoch": 0.3711568045414166, "grad_norm": 2.466679573059082, "learning_rate": 1.3938099635213016e-05, "loss": 0.2433, "step": 17130 }, { "epoch": 0.37126513986089743, "grad_norm": 1.7576196193695068, "learning_rate": 1.3934970979504388e-05, "loss": 0.2913, "step": 17135 }, { "epoch": 0.37137347518037833, "grad_norm": 1.0882834196090698, "learning_rate": 1.3931841867988313e-05, "loss": 0.2257, "step": 17140 }, { "epoch": 0.3714818104998592, "grad_norm": 2.0759425163269043, "learning_rate": 1.3928712301027265e-05, "loss": 0.1813, "step": 17145 }, { "epoch": 0.37159014581934, "grad_norm": 1.713004469871521, "learning_rate": 1.3925582278983745e-05, "loss": 0.2532, "step": 17150 }, { "epoch": 0.37169848113882087, "grad_norm": 1.2845443487167358, "learning_rate": 1.3922451802220326e-05, "loss": 0.1374, "step": 17155 }, { "epoch": 0.3718068164583017, "grad_norm": 1.8296653032302856, "learning_rate": 1.391932087109962e-05, "loss": 0.1632, "step": 17160 }, { "epoch": 0.3719151517777826, "grad_norm": 1.4291419982910156, "learning_rate": 1.3916189485984302e-05, "loss": 0.1696, "step": 17165 }, { "epoch": 0.37202348709726346, "grad_norm": 1.848149299621582, "learning_rate": 1.3913057647237099e-05, "loss": 0.2404, "step": 17170 }, { "epoch": 0.3721318224167443, "grad_norm": 1.5216972827911377, "learning_rate": 1.3909925355220784e-05, "loss": 0.183, "step": 17175 }, { "epoch": 0.37224015773622515, "grad_norm": 1.2773526906967163, "learning_rate": 1.3906792610298187e-05, "loss": 0.2519, "step": 17180 }, { "epoch": 0.372348493055706, "grad_norm": 2.736193895339966, "learning_rate": 1.3903659412832191e-05, "loss": 0.1377, "step": 17185 }, { "epoch": 0.3724568283751869, "grad_norm": 0.8216122388839722, "learning_rate": 1.390052576318573e-05, "loss": 0.2448, "step": 17190 }, { "epoch": 0.37256516369466774, "grad_norm": 1.9271172285079956, "learning_rate": 1.3897391661721788e-05, "loss": 0.2315, "step": 17195 }, { "epoch": 0.3726734990141486, "grad_norm": 1.1292169094085693, "learning_rate": 1.3894257108803406e-05, "loss": 0.2572, "step": 17200 }, { "epoch": 0.37278183433362944, "grad_norm": 1.7225532531738281, "learning_rate": 1.3891122104793673e-05, "loss": 0.3163, "step": 17205 }, { "epoch": 0.3728901696531103, "grad_norm": 1.631009817123413, "learning_rate": 1.3887986650055732e-05, "loss": 0.2285, "step": 17210 }, { "epoch": 0.3729985049725912, "grad_norm": 1.5292301177978516, "learning_rate": 1.3884850744952778e-05, "loss": 0.3024, "step": 17215 }, { "epoch": 0.37310684029207203, "grad_norm": 1.6479532718658447, "learning_rate": 1.3881714389848064e-05, "loss": 0.1334, "step": 17220 }, { "epoch": 0.3732151756115529, "grad_norm": 1.4605307579040527, "learning_rate": 1.3878577585104885e-05, "loss": 0.218, "step": 17225 }, { "epoch": 0.3733235109310337, "grad_norm": 3.224294900894165, "learning_rate": 1.3875440331086596e-05, "loss": 0.211, "step": 17230 }, { "epoch": 0.37343184625051457, "grad_norm": 1.9424418210983276, "learning_rate": 1.3872302628156596e-05, "loss": 0.1795, "step": 17235 }, { "epoch": 0.37354018156999547, "grad_norm": 1.946175456047058, "learning_rate": 1.3869164476678343e-05, "loss": 0.1799, "step": 17240 }, { "epoch": 0.3736485168894763, "grad_norm": 1.3620997667312622, "learning_rate": 1.3866025877015346e-05, "loss": 0.1334, "step": 17245 }, { "epoch": 0.37375685220895716, "grad_norm": 2.040567636489868, "learning_rate": 1.3862886829531164e-05, "loss": 0.1695, "step": 17250 }, { "epoch": 0.373865187528438, "grad_norm": 2.424323320388794, "learning_rate": 1.3859747334589411e-05, "loss": 0.2434, "step": 17255 }, { "epoch": 0.3739735228479189, "grad_norm": 1.2381294965744019, "learning_rate": 1.3856607392553745e-05, "loss": 0.2432, "step": 17260 }, { "epoch": 0.37408185816739975, "grad_norm": 1.1658399105072021, "learning_rate": 1.3853467003787887e-05, "loss": 0.1613, "step": 17265 }, { "epoch": 0.3741901934868806, "grad_norm": 1.3804194927215576, "learning_rate": 1.3850326168655601e-05, "loss": 0.2236, "step": 17270 }, { "epoch": 0.37429852880636144, "grad_norm": 1.1181120872497559, "learning_rate": 1.3847184887520707e-05, "loss": 0.183, "step": 17275 }, { "epoch": 0.3744068641258423, "grad_norm": 2.11354398727417, "learning_rate": 1.3844043160747074e-05, "loss": 0.1661, "step": 17280 }, { "epoch": 0.3745151994453232, "grad_norm": 1.714139699935913, "learning_rate": 1.3840900988698623e-05, "loss": 0.2418, "step": 17285 }, { "epoch": 0.37462353476480403, "grad_norm": 2.5067451000213623, "learning_rate": 1.383775837173933e-05, "loss": 0.1521, "step": 17290 }, { "epoch": 0.3747318700842849, "grad_norm": 1.6973841190338135, "learning_rate": 1.3834615310233219e-05, "loss": 0.2075, "step": 17295 }, { "epoch": 0.3748402054037657, "grad_norm": 1.5358152389526367, "learning_rate": 1.3831471804544369e-05, "loss": 0.2055, "step": 17300 }, { "epoch": 0.37494854072324657, "grad_norm": 2.2860708236694336, "learning_rate": 1.3828327855036902e-05, "loss": 0.1767, "step": 17305 }, { "epoch": 0.37505687604272747, "grad_norm": 1.531667947769165, "learning_rate": 1.3825183462075007e-05, "loss": 0.2054, "step": 17310 }, { "epoch": 0.3751652113622083, "grad_norm": 2.0898191928863525, "learning_rate": 1.3822038626022905e-05, "loss": 0.2288, "step": 17315 }, { "epoch": 0.37527354668168916, "grad_norm": 1.7174354791641235, "learning_rate": 1.3818893347244884e-05, "loss": 0.2751, "step": 17320 }, { "epoch": 0.37538188200117, "grad_norm": 1.545420527458191, "learning_rate": 1.3815747626105273e-05, "loss": 0.2312, "step": 17325 }, { "epoch": 0.37549021732065085, "grad_norm": 1.827208161354065, "learning_rate": 1.3812601462968463e-05, "loss": 0.201, "step": 17330 }, { "epoch": 0.37559855264013176, "grad_norm": 2.5357658863067627, "learning_rate": 1.3809454858198883e-05, "loss": 0.1585, "step": 17335 }, { "epoch": 0.3757068879596126, "grad_norm": 3.0548365116119385, "learning_rate": 1.3806307812161025e-05, "loss": 0.2785, "step": 17340 }, { "epoch": 0.37581522327909345, "grad_norm": 1.943721055984497, "learning_rate": 1.3803160325219427e-05, "loss": 0.1655, "step": 17345 }, { "epoch": 0.3759235585985743, "grad_norm": 1.8016180992126465, "learning_rate": 1.3800012397738675e-05, "loss": 0.3549, "step": 17350 }, { "epoch": 0.37603189391805514, "grad_norm": 1.418182611465454, "learning_rate": 1.3796864030083412e-05, "loss": 0.3012, "step": 17355 }, { "epoch": 0.37614022923753604, "grad_norm": 3.668025016784668, "learning_rate": 1.3793715222618325e-05, "loss": 0.1561, "step": 17360 }, { "epoch": 0.3762485645570169, "grad_norm": 1.7821242809295654, "learning_rate": 1.3790565975708164e-05, "loss": 0.1876, "step": 17365 }, { "epoch": 0.37635689987649773, "grad_norm": 2.8102669715881348, "learning_rate": 1.3787416289717714e-05, "loss": 0.2198, "step": 17370 }, { "epoch": 0.3764652351959786, "grad_norm": 2.031449794769287, "learning_rate": 1.3784266165011823e-05, "loss": 0.2371, "step": 17375 }, { "epoch": 0.3765735705154595, "grad_norm": 1.897780418395996, "learning_rate": 1.3781115601955382e-05, "loss": 0.1846, "step": 17380 }, { "epoch": 0.3766819058349403, "grad_norm": 1.115036964416504, "learning_rate": 1.3777964600913344e-05, "loss": 0.1695, "step": 17385 }, { "epoch": 0.37679024115442117, "grad_norm": 1.5630228519439697, "learning_rate": 1.3774813162250702e-05, "loss": 0.2562, "step": 17390 }, { "epoch": 0.376898576473902, "grad_norm": 2.0945193767547607, "learning_rate": 1.3771661286332499e-05, "loss": 0.1434, "step": 17395 }, { "epoch": 0.37700691179338286, "grad_norm": 1.5595744848251343, "learning_rate": 1.3768508973523837e-05, "loss": 0.2261, "step": 17400 }, { "epoch": 0.37711524711286376, "grad_norm": 1.3451687097549438, "learning_rate": 1.3765356224189858e-05, "loss": 0.2289, "step": 17405 }, { "epoch": 0.3772235824323446, "grad_norm": 1.4249101877212524, "learning_rate": 1.3762203038695772e-05, "loss": 0.287, "step": 17410 }, { "epoch": 0.37733191775182545, "grad_norm": 1.70997953414917, "learning_rate": 1.3759049417406817e-05, "loss": 0.2132, "step": 17415 }, { "epoch": 0.3774402530713063, "grad_norm": 2.181854486465454, "learning_rate": 1.37558953606883e-05, "loss": 0.2293, "step": 17420 }, { "epoch": 0.37754858839078714, "grad_norm": 2.4701132774353027, "learning_rate": 1.375274086890557e-05, "loss": 0.194, "step": 17425 }, { "epoch": 0.37765692371026804, "grad_norm": 1.9311017990112305, "learning_rate": 1.3749585942424024e-05, "loss": 0.1531, "step": 17430 }, { "epoch": 0.3777652590297489, "grad_norm": 1.3879507780075073, "learning_rate": 1.3746430581609117e-05, "loss": 0.243, "step": 17435 }, { "epoch": 0.37787359434922974, "grad_norm": 1.8124011754989624, "learning_rate": 1.3743274786826348e-05, "loss": 0.1534, "step": 17440 }, { "epoch": 0.3779819296687106, "grad_norm": 1.6171979904174805, "learning_rate": 1.3740118558441268e-05, "loss": 0.2572, "step": 17445 }, { "epoch": 0.3780902649881914, "grad_norm": 2.523426055908203, "learning_rate": 1.3736961896819483e-05, "loss": 0.2278, "step": 17450 }, { "epoch": 0.37819860030767233, "grad_norm": 1.9738572835922241, "learning_rate": 1.3733804802326636e-05, "loss": 0.2587, "step": 17455 }, { "epoch": 0.3783069356271532, "grad_norm": 1.5068035125732422, "learning_rate": 1.373064727532844e-05, "loss": 0.2117, "step": 17460 }, { "epoch": 0.378415270946634, "grad_norm": 2.4762017726898193, "learning_rate": 1.372748931619064e-05, "loss": 0.2583, "step": 17465 }, { "epoch": 0.37852360626611486, "grad_norm": 1.5580506324768066, "learning_rate": 1.3724330925279046e-05, "loss": 0.2136, "step": 17470 }, { "epoch": 0.3786319415855957, "grad_norm": 1.2147750854492188, "learning_rate": 1.37211721029595e-05, "loss": 0.1817, "step": 17475 }, { "epoch": 0.3787402769050766, "grad_norm": 1.7240586280822754, "learning_rate": 1.3718012849597906e-05, "loss": 0.2129, "step": 17480 }, { "epoch": 0.37884861222455746, "grad_norm": 1.1986019611358643, "learning_rate": 1.3714853165560223e-05, "loss": 0.3742, "step": 17485 }, { "epoch": 0.3789569475440383, "grad_norm": 1.5966825485229492, "learning_rate": 1.371169305121245e-05, "loss": 0.192, "step": 17490 }, { "epoch": 0.37906528286351915, "grad_norm": 1.5514410734176636, "learning_rate": 1.3708532506920637e-05, "loss": 0.2281, "step": 17495 }, { "epoch": 0.379173618183, "grad_norm": 1.4100415706634521, "learning_rate": 1.3705371533050888e-05, "loss": 0.2132, "step": 17500 }, { "epoch": 0.3792819535024809, "grad_norm": 1.8283413648605347, "learning_rate": 1.3702210129969348e-05, "loss": 0.2011, "step": 17505 }, { "epoch": 0.37939028882196174, "grad_norm": 1.9498120546340942, "learning_rate": 1.3699048298042232e-05, "loss": 0.2255, "step": 17510 }, { "epoch": 0.3794986241414426, "grad_norm": 1.6281710863113403, "learning_rate": 1.3695886037635777e-05, "loss": 0.2266, "step": 17515 }, { "epoch": 0.37960695946092343, "grad_norm": 3.106297254562378, "learning_rate": 1.3692723349116293e-05, "loss": 0.1498, "step": 17520 }, { "epoch": 0.37971529478040433, "grad_norm": 1.8727648258209229, "learning_rate": 1.368956023285012e-05, "loss": 0.29, "step": 17525 }, { "epoch": 0.3798236300998852, "grad_norm": 1.7557874917984009, "learning_rate": 1.3686396689203667e-05, "loss": 0.2098, "step": 17530 }, { "epoch": 0.379931965419366, "grad_norm": 1.5760020017623901, "learning_rate": 1.368323271854338e-05, "loss": 0.2083, "step": 17535 }, { "epoch": 0.38004030073884687, "grad_norm": 3.7441327571868896, "learning_rate": 1.3680068321235758e-05, "loss": 0.2147, "step": 17540 }, { "epoch": 0.3801486360583277, "grad_norm": 1.9624344110488892, "learning_rate": 1.3676903497647346e-05, "loss": 0.325, "step": 17545 }, { "epoch": 0.3802569713778086, "grad_norm": 1.910313367843628, "learning_rate": 1.3673738248144746e-05, "loss": 0.2812, "step": 17550 }, { "epoch": 0.38036530669728946, "grad_norm": 1.310538649559021, "learning_rate": 1.36705725730946e-05, "loss": 0.1158, "step": 17555 }, { "epoch": 0.3804736420167703, "grad_norm": 2.7863917350769043, "learning_rate": 1.366740647286361e-05, "loss": 0.2432, "step": 17560 }, { "epoch": 0.38058197733625115, "grad_norm": 2.518340826034546, "learning_rate": 1.3664239947818516e-05, "loss": 0.1514, "step": 17565 }, { "epoch": 0.380690312655732, "grad_norm": 1.7150615453720093, "learning_rate": 1.3661072998326115e-05, "loss": 0.178, "step": 17570 }, { "epoch": 0.3807986479752129, "grad_norm": 1.5533887147903442, "learning_rate": 1.3657905624753247e-05, "loss": 0.2861, "step": 17575 }, { "epoch": 0.38090698329469375, "grad_norm": 2.5467121601104736, "learning_rate": 1.3654737827466811e-05, "loss": 0.1674, "step": 17580 }, { "epoch": 0.3810153186141746, "grad_norm": 1.6544362306594849, "learning_rate": 1.3651569606833746e-05, "loss": 0.2668, "step": 17585 }, { "epoch": 0.38112365393365544, "grad_norm": 1.480867862701416, "learning_rate": 1.3648400963221042e-05, "loss": 0.1654, "step": 17590 }, { "epoch": 0.3812319892531363, "grad_norm": 1.8613991737365723, "learning_rate": 1.364523189699574e-05, "loss": 0.2176, "step": 17595 }, { "epoch": 0.3813403245726172, "grad_norm": 2.069481372833252, "learning_rate": 1.3642062408524926e-05, "loss": 0.2606, "step": 17600 }, { "epoch": 0.38144865989209803, "grad_norm": 1.89919114112854, "learning_rate": 1.3638892498175742e-05, "loss": 0.1565, "step": 17605 }, { "epoch": 0.3815569952115789, "grad_norm": 2.1204519271850586, "learning_rate": 1.3635722166315372e-05, "loss": 0.1737, "step": 17610 }, { "epoch": 0.3816653305310597, "grad_norm": 1.653235912322998, "learning_rate": 1.3632551413311053e-05, "loss": 0.3013, "step": 17615 }, { "epoch": 0.38177366585054057, "grad_norm": 1.747495174407959, "learning_rate": 1.3629380239530066e-05, "loss": 0.2336, "step": 17620 }, { "epoch": 0.38188200117002147, "grad_norm": 1.6250617504119873, "learning_rate": 1.3626208645339747e-05, "loss": 0.2898, "step": 17625 }, { "epoch": 0.3819903364895023, "grad_norm": 1.5616604089736938, "learning_rate": 1.362303663110748e-05, "loss": 0.1611, "step": 17630 }, { "epoch": 0.38209867180898316, "grad_norm": 1.4112309217453003, "learning_rate": 1.361986419720069e-05, "loss": 0.2456, "step": 17635 }, { "epoch": 0.382207007128464, "grad_norm": 1.620361566543579, "learning_rate": 1.3616691343986861e-05, "loss": 0.2195, "step": 17640 }, { "epoch": 0.3823153424479449, "grad_norm": 1.898438811302185, "learning_rate": 1.3613518071833512e-05, "loss": 0.1482, "step": 17645 }, { "epoch": 0.38242367776742575, "grad_norm": 1.6403217315673828, "learning_rate": 1.3610344381108229e-05, "loss": 0.1632, "step": 17650 }, { "epoch": 0.3825320130869066, "grad_norm": 2.14387583732605, "learning_rate": 1.3607170272178632e-05, "loss": 0.2281, "step": 17655 }, { "epoch": 0.38264034840638744, "grad_norm": 1.4229074716567993, "learning_rate": 1.3603995745412392e-05, "loss": 0.274, "step": 17660 }, { "epoch": 0.3827486837258683, "grad_norm": 1.3117235898971558, "learning_rate": 1.3600820801177236e-05, "loss": 0.1978, "step": 17665 }, { "epoch": 0.3828570190453492, "grad_norm": 1.0469154119491577, "learning_rate": 1.3597645439840931e-05, "loss": 0.1627, "step": 17670 }, { "epoch": 0.38296535436483004, "grad_norm": 2.462810516357422, "learning_rate": 1.3594469661771292e-05, "loss": 0.252, "step": 17675 }, { "epoch": 0.3830736896843109, "grad_norm": 1.4403692483901978, "learning_rate": 1.359129346733619e-05, "loss": 0.31, "step": 17680 }, { "epoch": 0.3831820250037917, "grad_norm": 1.5874103307724, "learning_rate": 1.3588116856903537e-05, "loss": 0.2557, "step": 17685 }, { "epoch": 0.38329036032327257, "grad_norm": 1.736517071723938, "learning_rate": 1.3584939830841295e-05, "loss": 0.1684, "step": 17690 }, { "epoch": 0.3833986956427535, "grad_norm": 2.1822707653045654, "learning_rate": 1.3581762389517474e-05, "loss": 0.2763, "step": 17695 }, { "epoch": 0.3835070309622343, "grad_norm": 1.6100445985794067, "learning_rate": 1.3578584533300136e-05, "loss": 0.1837, "step": 17700 }, { "epoch": 0.38361536628171516, "grad_norm": 1.744348406791687, "learning_rate": 1.357540626255739e-05, "loss": 0.2399, "step": 17705 }, { "epoch": 0.383723701601196, "grad_norm": 1.807657241821289, "learning_rate": 1.3572227577657387e-05, "loss": 0.2079, "step": 17710 }, { "epoch": 0.38383203692067686, "grad_norm": 1.3625874519348145, "learning_rate": 1.3569048478968332e-05, "loss": 0.134, "step": 17715 }, { "epoch": 0.38394037224015776, "grad_norm": 1.7777341604232788, "learning_rate": 1.356586896685847e-05, "loss": 0.2934, "step": 17720 }, { "epoch": 0.3840487075596386, "grad_norm": 1.4231129884719849, "learning_rate": 1.356268904169611e-05, "loss": 0.1849, "step": 17725 }, { "epoch": 0.38415704287911945, "grad_norm": 1.449781060218811, "learning_rate": 1.3559508703849593e-05, "loss": 0.2362, "step": 17730 }, { "epoch": 0.3842653781986003, "grad_norm": 2.4949557781219482, "learning_rate": 1.3556327953687312e-05, "loss": 0.2007, "step": 17735 }, { "epoch": 0.38437371351808114, "grad_norm": 2.381202459335327, "learning_rate": 1.3553146791577714e-05, "loss": 0.1696, "step": 17740 }, { "epoch": 0.38448204883756204, "grad_norm": 2.009526014328003, "learning_rate": 1.354996521788928e-05, "loss": 0.1625, "step": 17745 }, { "epoch": 0.3845903841570429, "grad_norm": 1.625480055809021, "learning_rate": 1.3546783232990563e-05, "loss": 0.1762, "step": 17750 }, { "epoch": 0.38469871947652373, "grad_norm": 2.293713092803955, "learning_rate": 1.3543600837250135e-05, "loss": 0.2187, "step": 17755 }, { "epoch": 0.3848070547960046, "grad_norm": 1.2324877977371216, "learning_rate": 1.3540418031036633e-05, "loss": 0.201, "step": 17760 }, { "epoch": 0.3849153901154854, "grad_norm": 2.727675676345825, "learning_rate": 1.3537234814718738e-05, "loss": 0.2348, "step": 17765 }, { "epoch": 0.3850237254349663, "grad_norm": 1.5309126377105713, "learning_rate": 1.3534051188665177e-05, "loss": 0.2502, "step": 17770 }, { "epoch": 0.38513206075444717, "grad_norm": 1.8462779521942139, "learning_rate": 1.3530867153244724e-05, "loss": 0.2359, "step": 17775 }, { "epoch": 0.385240396073928, "grad_norm": 1.4766619205474854, "learning_rate": 1.3527682708826207e-05, "loss": 0.2359, "step": 17780 }, { "epoch": 0.38534873139340886, "grad_norm": 2.8587653636932373, "learning_rate": 1.352449785577849e-05, "loss": 0.1889, "step": 17785 }, { "epoch": 0.38545706671288976, "grad_norm": 1.6460638046264648, "learning_rate": 1.3521312594470497e-05, "loss": 0.2403, "step": 17790 }, { "epoch": 0.3855654020323706, "grad_norm": 1.635430097579956, "learning_rate": 1.3518126925271186e-05, "loss": 0.2313, "step": 17795 }, { "epoch": 0.38567373735185145, "grad_norm": 1.3797860145568848, "learning_rate": 1.3514940848549574e-05, "loss": 0.1689, "step": 17800 }, { "epoch": 0.3857820726713323, "grad_norm": 2.3262875080108643, "learning_rate": 1.3511754364674719e-05, "loss": 0.2356, "step": 17805 }, { "epoch": 0.38589040799081314, "grad_norm": 2.4941742420196533, "learning_rate": 1.3508567474015725e-05, "loss": 0.2552, "step": 17810 }, { "epoch": 0.38599874331029405, "grad_norm": 1.4786666631698608, "learning_rate": 1.3505380176941747e-05, "loss": 0.1177, "step": 17815 }, { "epoch": 0.3861070786297749, "grad_norm": 1.566679835319519, "learning_rate": 1.3502192473821986e-05, "loss": 0.155, "step": 17820 }, { "epoch": 0.38621541394925574, "grad_norm": 2.1277201175689697, "learning_rate": 1.3499004365025693e-05, "loss": 0.2843, "step": 17825 }, { "epoch": 0.3863237492687366, "grad_norm": 1.4256260395050049, "learning_rate": 1.3495815850922158e-05, "loss": 0.1997, "step": 17830 }, { "epoch": 0.38643208458821743, "grad_norm": 1.8932440280914307, "learning_rate": 1.3492626931880725e-05, "loss": 0.2939, "step": 17835 }, { "epoch": 0.38654041990769833, "grad_norm": 2.0586373805999756, "learning_rate": 1.348943760827078e-05, "loss": 0.1468, "step": 17840 }, { "epoch": 0.3866487552271792, "grad_norm": 1.972344994544983, "learning_rate": 1.3486247880461758e-05, "loss": 0.3047, "step": 17845 }, { "epoch": 0.38675709054666, "grad_norm": 2.3495664596557617, "learning_rate": 1.3483057748823145e-05, "loss": 0.1928, "step": 17850 }, { "epoch": 0.38686542586614087, "grad_norm": 1.8635801076889038, "learning_rate": 1.3479867213724469e-05, "loss": 0.2158, "step": 17855 }, { "epoch": 0.3869737611856217, "grad_norm": 1.4120322465896606, "learning_rate": 1.3476676275535303e-05, "loss": 0.2753, "step": 17860 }, { "epoch": 0.3870820965051026, "grad_norm": 1.558695673942566, "learning_rate": 1.347348493462527e-05, "loss": 0.2201, "step": 17865 }, { "epoch": 0.38719043182458346, "grad_norm": 2.9818196296691895, "learning_rate": 1.3470293191364046e-05, "loss": 0.2162, "step": 17870 }, { "epoch": 0.3872987671440643, "grad_norm": 1.7214031219482422, "learning_rate": 1.3467101046121337e-05, "loss": 0.1931, "step": 17875 }, { "epoch": 0.38740710246354515, "grad_norm": 2.359260320663452, "learning_rate": 1.3463908499266906e-05, "loss": 0.1923, "step": 17880 }, { "epoch": 0.387515437783026, "grad_norm": 1.6554138660430908, "learning_rate": 1.3460715551170569e-05, "loss": 0.1489, "step": 17885 }, { "epoch": 0.3876237731025069, "grad_norm": 1.9619277715682983, "learning_rate": 1.3457522202202173e-05, "loss": 0.2028, "step": 17890 }, { "epoch": 0.38773210842198774, "grad_norm": 2.2099475860595703, "learning_rate": 1.3454328452731625e-05, "loss": 0.1994, "step": 17895 }, { "epoch": 0.3878404437414686, "grad_norm": 2.5965631008148193, "learning_rate": 1.3451134303128872e-05, "loss": 0.1651, "step": 17900 }, { "epoch": 0.38794877906094943, "grad_norm": 1.4046962261199951, "learning_rate": 1.344793975376391e-05, "loss": 0.115, "step": 17905 }, { "epoch": 0.38805711438043033, "grad_norm": 1.7806110382080078, "learning_rate": 1.3444744805006775e-05, "loss": 0.1661, "step": 17910 }, { "epoch": 0.3881654496999112, "grad_norm": 1.3122895956039429, "learning_rate": 1.3441549457227559e-05, "loss": 0.2041, "step": 17915 }, { "epoch": 0.388273785019392, "grad_norm": 2.067930221557617, "learning_rate": 1.343835371079639e-05, "loss": 0.2849, "step": 17920 }, { "epoch": 0.38838212033887287, "grad_norm": 1.6159058809280396, "learning_rate": 1.3435157566083452e-05, "loss": 0.228, "step": 17925 }, { "epoch": 0.3884904556583537, "grad_norm": 2.023395538330078, "learning_rate": 1.343196102345897e-05, "loss": 0.2819, "step": 17930 }, { "epoch": 0.3885987909778346, "grad_norm": 1.557128667831421, "learning_rate": 1.342876408329321e-05, "loss": 0.2052, "step": 17935 }, { "epoch": 0.38870712629731546, "grad_norm": 2.015014171600342, "learning_rate": 1.3425566745956495e-05, "loss": 0.1843, "step": 17940 }, { "epoch": 0.3888154616167963, "grad_norm": 1.817903757095337, "learning_rate": 1.342236901181919e-05, "loss": 0.2711, "step": 17945 }, { "epoch": 0.38892379693627716, "grad_norm": 2.070464849472046, "learning_rate": 1.3419170881251699e-05, "loss": 0.1294, "step": 17950 }, { "epoch": 0.389032132255758, "grad_norm": 1.9078969955444336, "learning_rate": 1.3415972354624487e-05, "loss": 0.1934, "step": 17955 }, { "epoch": 0.3891404675752389, "grad_norm": 1.1446818113327026, "learning_rate": 1.3412773432308041e-05, "loss": 0.1915, "step": 17960 }, { "epoch": 0.38924880289471975, "grad_norm": 1.6628893613815308, "learning_rate": 1.3409574114672921e-05, "loss": 0.1777, "step": 17965 }, { "epoch": 0.3893571382142006, "grad_norm": 1.5055512189865112, "learning_rate": 1.3406374402089713e-05, "loss": 0.2011, "step": 17970 }, { "epoch": 0.38946547353368144, "grad_norm": 1.8115735054016113, "learning_rate": 1.340317429492906e-05, "loss": 0.2201, "step": 17975 }, { "epoch": 0.3895738088531623, "grad_norm": 2.7826218605041504, "learning_rate": 1.3399973793561643e-05, "loss": 0.223, "step": 17980 }, { "epoch": 0.3896821441726432, "grad_norm": 1.846871256828308, "learning_rate": 1.339677289835819e-05, "loss": 0.2893, "step": 17985 }, { "epoch": 0.38979047949212403, "grad_norm": 1.6116102933883667, "learning_rate": 1.3393571609689488e-05, "loss": 0.1211, "step": 17990 }, { "epoch": 0.3898988148116049, "grad_norm": 1.5731701850891113, "learning_rate": 1.3390369927926347e-05, "loss": 0.2122, "step": 17995 }, { "epoch": 0.3900071501310857, "grad_norm": 2.222768545150757, "learning_rate": 1.3387167853439638e-05, "loss": 0.118, "step": 18000 }, { "epoch": 0.39011548545056657, "grad_norm": 2.2808279991149902, "learning_rate": 1.3383965386600272e-05, "loss": 0.1855, "step": 18005 }, { "epoch": 0.39022382077004747, "grad_norm": 1.6751041412353516, "learning_rate": 1.3380762527779206e-05, "loss": 0.1975, "step": 18010 }, { "epoch": 0.3903321560895283, "grad_norm": 2.1770877838134766, "learning_rate": 1.3377559277347447e-05, "loss": 0.2052, "step": 18015 }, { "epoch": 0.39044049140900916, "grad_norm": 1.4747976064682007, "learning_rate": 1.3374355635676044e-05, "loss": 0.2061, "step": 18020 }, { "epoch": 0.39054882672849, "grad_norm": 1.5060774087905884, "learning_rate": 1.3371151603136086e-05, "loss": 0.2364, "step": 18025 }, { "epoch": 0.39065716204797085, "grad_norm": 1.1667413711547852, "learning_rate": 1.3367947180098715e-05, "loss": 0.2087, "step": 18030 }, { "epoch": 0.39076549736745175, "grad_norm": 2.3859634399414062, "learning_rate": 1.3364742366935116e-05, "loss": 0.1688, "step": 18035 }, { "epoch": 0.3908738326869326, "grad_norm": 1.7592318058013916, "learning_rate": 1.3361537164016514e-05, "loss": 0.2214, "step": 18040 }, { "epoch": 0.39098216800641344, "grad_norm": 1.3873140811920166, "learning_rate": 1.3358331571714192e-05, "loss": 0.1851, "step": 18045 }, { "epoch": 0.3910905033258943, "grad_norm": 1.356471061706543, "learning_rate": 1.3355125590399463e-05, "loss": 0.2863, "step": 18050 }, { "epoch": 0.3911988386453752, "grad_norm": 1.857057809829712, "learning_rate": 1.3351919220443693e-05, "loss": 0.2314, "step": 18055 }, { "epoch": 0.39130717396485604, "grad_norm": 2.0366532802581787, "learning_rate": 1.3348712462218294e-05, "loss": 0.2085, "step": 18060 }, { "epoch": 0.3914155092843369, "grad_norm": 1.4481568336486816, "learning_rate": 1.334550531609472e-05, "loss": 0.1413, "step": 18065 }, { "epoch": 0.39152384460381773, "grad_norm": 1.8043303489685059, "learning_rate": 1.334229778244447e-05, "loss": 0.2597, "step": 18070 }, { "epoch": 0.3916321799232986, "grad_norm": 1.262267827987671, "learning_rate": 1.3339089861639093e-05, "loss": 0.208, "step": 18075 }, { "epoch": 0.3917405152427795, "grad_norm": 2.0723323822021484, "learning_rate": 1.3335881554050172e-05, "loss": 0.174, "step": 18080 }, { "epoch": 0.3918488505622603, "grad_norm": 2.108059883117676, "learning_rate": 1.3332672860049345e-05, "loss": 0.2082, "step": 18085 }, { "epoch": 0.39195718588174117, "grad_norm": 1.6587629318237305, "learning_rate": 1.3329463780008291e-05, "loss": 0.2129, "step": 18090 }, { "epoch": 0.392065521201222, "grad_norm": 1.4491779804229736, "learning_rate": 1.3326254314298731e-05, "loss": 0.1552, "step": 18095 }, { "epoch": 0.39217385652070286, "grad_norm": 1.88791823387146, "learning_rate": 1.3323044463292441e-05, "loss": 0.2403, "step": 18100 }, { "epoch": 0.39228219184018376, "grad_norm": 1.6245412826538086, "learning_rate": 1.3319834227361223e-05, "loss": 0.1886, "step": 18105 }, { "epoch": 0.3923905271596646, "grad_norm": 2.095489978790283, "learning_rate": 1.3316623606876943e-05, "loss": 0.1858, "step": 18110 }, { "epoch": 0.39249886247914545, "grad_norm": 1.2456902265548706, "learning_rate": 1.3313412602211506e-05, "loss": 0.1699, "step": 18115 }, { "epoch": 0.3926071977986263, "grad_norm": 1.4593961238861084, "learning_rate": 1.3310201213736851e-05, "loss": 0.2141, "step": 18120 }, { "epoch": 0.39271553311810714, "grad_norm": 1.2082394361495972, "learning_rate": 1.3306989441824972e-05, "loss": 0.2296, "step": 18125 }, { "epoch": 0.39282386843758804, "grad_norm": 2.792753219604492, "learning_rate": 1.33037772868479e-05, "loss": 0.2015, "step": 18130 }, { "epoch": 0.3929322037570689, "grad_norm": 1.387868881225586, "learning_rate": 1.3300564749177727e-05, "loss": 0.1806, "step": 18135 }, { "epoch": 0.39304053907654973, "grad_norm": 1.4484760761260986, "learning_rate": 1.3297351829186566e-05, "loss": 0.1539, "step": 18140 }, { "epoch": 0.3931488743960306, "grad_norm": 1.9654322862625122, "learning_rate": 1.3294138527246592e-05, "loss": 0.2283, "step": 18145 }, { "epoch": 0.3932572097155114, "grad_norm": 2.5661325454711914, "learning_rate": 1.3290924843730016e-05, "loss": 0.1519, "step": 18150 }, { "epoch": 0.3933655450349923, "grad_norm": 1.3359410762786865, "learning_rate": 1.3287710779009096e-05, "loss": 0.1439, "step": 18155 }, { "epoch": 0.39347388035447317, "grad_norm": 2.8794190883636475, "learning_rate": 1.328449633345613e-05, "loss": 0.1318, "step": 18160 }, { "epoch": 0.393582215673954, "grad_norm": 1.1602258682250977, "learning_rate": 1.3281281507443463e-05, "loss": 0.1869, "step": 18165 }, { "epoch": 0.39369055099343486, "grad_norm": 1.1592888832092285, "learning_rate": 1.327806630134349e-05, "loss": 0.1974, "step": 18170 }, { "epoch": 0.39379888631291576, "grad_norm": 2.3740787506103516, "learning_rate": 1.327485071552864e-05, "loss": 0.1837, "step": 18175 }, { "epoch": 0.3939072216323966, "grad_norm": 2.2842354774475098, "learning_rate": 1.3271634750371394e-05, "loss": 0.2586, "step": 18180 }, { "epoch": 0.39401555695187745, "grad_norm": 1.5606942176818848, "learning_rate": 1.326841840624427e-05, "loss": 0.2609, "step": 18185 }, { "epoch": 0.3941238922713583, "grad_norm": 1.9964654445648193, "learning_rate": 1.3265201683519834e-05, "loss": 0.2078, "step": 18190 }, { "epoch": 0.39423222759083915, "grad_norm": 2.5023691654205322, "learning_rate": 1.3261984582570697e-05, "loss": 0.2435, "step": 18195 }, { "epoch": 0.39434056291032005, "grad_norm": 2.040872097015381, "learning_rate": 1.3258767103769512e-05, "loss": 0.195, "step": 18200 }, { "epoch": 0.3944488982298009, "grad_norm": 1.429844617843628, "learning_rate": 1.3255549247488973e-05, "loss": 0.2176, "step": 18205 }, { "epoch": 0.39455723354928174, "grad_norm": 2.1068668365478516, "learning_rate": 1.325233101410182e-05, "loss": 0.2126, "step": 18210 }, { "epoch": 0.3946655688687626, "grad_norm": 1.2959089279174805, "learning_rate": 1.3249112403980842e-05, "loss": 0.2574, "step": 18215 }, { "epoch": 0.39477390418824343, "grad_norm": 1.9259480237960815, "learning_rate": 1.324589341749886e-05, "loss": 0.2617, "step": 18220 }, { "epoch": 0.39488223950772433, "grad_norm": 1.3122972249984741, "learning_rate": 1.324267405502875e-05, "loss": 0.1236, "step": 18225 }, { "epoch": 0.3949905748272052, "grad_norm": 2.297987222671509, "learning_rate": 1.3239454316943427e-05, "loss": 0.1533, "step": 18230 }, { "epoch": 0.395098910146686, "grad_norm": 2.1452372074127197, "learning_rate": 1.3236234203615851e-05, "loss": 0.1846, "step": 18235 }, { "epoch": 0.39520724546616687, "grad_norm": 1.8027197122573853, "learning_rate": 1.3233013715419019e-05, "loss": 0.2017, "step": 18240 }, { "epoch": 0.3953155807856477, "grad_norm": 2.7879772186279297, "learning_rate": 1.3229792852725977e-05, "loss": 0.2311, "step": 18245 }, { "epoch": 0.3954239161051286, "grad_norm": 1.6670814752578735, "learning_rate": 1.3226571615909816e-05, "loss": 0.2641, "step": 18250 }, { "epoch": 0.39553225142460946, "grad_norm": 1.8655595779418945, "learning_rate": 1.3223350005343668e-05, "loss": 0.1991, "step": 18255 }, { "epoch": 0.3956405867440903, "grad_norm": 2.0373122692108154, "learning_rate": 1.3220128021400708e-05, "loss": 0.2423, "step": 18260 }, { "epoch": 0.39574892206357115, "grad_norm": 2.5269577503204346, "learning_rate": 1.3216905664454154e-05, "loss": 0.3037, "step": 18265 }, { "epoch": 0.395857257383052, "grad_norm": 1.929080843925476, "learning_rate": 1.3213682934877268e-05, "loss": 0.2894, "step": 18270 }, { "epoch": 0.3959655927025329, "grad_norm": 1.3424235582351685, "learning_rate": 1.3210459833043354e-05, "loss": 0.3232, "step": 18275 }, { "epoch": 0.39607392802201374, "grad_norm": 1.6531063318252563, "learning_rate": 1.3207236359325764e-05, "loss": 0.2079, "step": 18280 }, { "epoch": 0.3961822633414946, "grad_norm": 1.6614043712615967, "learning_rate": 1.3204012514097884e-05, "loss": 0.1881, "step": 18285 }, { "epoch": 0.39629059866097543, "grad_norm": 1.7453978061676025, "learning_rate": 1.320078829773315e-05, "loss": 0.3092, "step": 18290 }, { "epoch": 0.39639893398045634, "grad_norm": 0.5203931331634521, "learning_rate": 1.3197563710605038e-05, "loss": 0.1351, "step": 18295 }, { "epoch": 0.3965072692999372, "grad_norm": 2.390334367752075, "learning_rate": 1.319433875308707e-05, "loss": 0.2741, "step": 18300 }, { "epoch": 0.396615604619418, "grad_norm": 1.528495192527771, "learning_rate": 1.319111342555281e-05, "loss": 0.1342, "step": 18305 }, { "epoch": 0.3967239399388989, "grad_norm": 2.0106616020202637, "learning_rate": 1.3187887728375865e-05, "loss": 0.2194, "step": 18310 }, { "epoch": 0.3968322752583797, "grad_norm": 1.9254074096679688, "learning_rate": 1.3184661661929878e-05, "loss": 0.1386, "step": 18315 }, { "epoch": 0.3969406105778606, "grad_norm": 1.7639533281326294, "learning_rate": 1.3181435226588547e-05, "loss": 0.2045, "step": 18320 }, { "epoch": 0.39704894589734147, "grad_norm": 1.5680968761444092, "learning_rate": 1.3178208422725598e-05, "loss": 0.1997, "step": 18325 }, { "epoch": 0.3971572812168223, "grad_norm": 1.8448176383972168, "learning_rate": 1.3174981250714815e-05, "loss": 0.2294, "step": 18330 }, { "epoch": 0.39726561653630316, "grad_norm": 1.7525336742401123, "learning_rate": 1.3171753710930017e-05, "loss": 0.2583, "step": 18335 }, { "epoch": 0.397373951855784, "grad_norm": 1.0468100309371948, "learning_rate": 1.3168525803745062e-05, "loss": 0.1857, "step": 18340 }, { "epoch": 0.3974822871752649, "grad_norm": 1.7366663217544556, "learning_rate": 1.3165297529533858e-05, "loss": 0.1903, "step": 18345 }, { "epoch": 0.39759062249474575, "grad_norm": 1.8203898668289185, "learning_rate": 1.3162068888670352e-05, "loss": 0.2619, "step": 18350 }, { "epoch": 0.3976989578142266, "grad_norm": 0.9496480226516724, "learning_rate": 1.3158839881528534e-05, "loss": 0.209, "step": 18355 }, { "epoch": 0.39780729313370744, "grad_norm": 1.7885260581970215, "learning_rate": 1.3155610508482436e-05, "loss": 0.2584, "step": 18360 }, { "epoch": 0.3979156284531883, "grad_norm": 1.643243670463562, "learning_rate": 1.3152380769906128e-05, "loss": 0.1165, "step": 18365 }, { "epoch": 0.3980239637726692, "grad_norm": 1.5622109174728394, "learning_rate": 1.314915066617373e-05, "loss": 0.2159, "step": 18370 }, { "epoch": 0.39813229909215003, "grad_norm": 2.4855761528015137, "learning_rate": 1.3145920197659403e-05, "loss": 0.2144, "step": 18375 }, { "epoch": 0.3982406344116309, "grad_norm": 1.6052199602127075, "learning_rate": 1.314268936473735e-05, "loss": 0.1968, "step": 18380 }, { "epoch": 0.3983489697311117, "grad_norm": 1.6908031702041626, "learning_rate": 1.3139458167781806e-05, "loss": 0.2195, "step": 18385 }, { "epoch": 0.39845730505059257, "grad_norm": 1.7838135957717896, "learning_rate": 1.3136226607167066e-05, "loss": 0.2155, "step": 18390 }, { "epoch": 0.39856564037007347, "grad_norm": 1.6658015251159668, "learning_rate": 1.3132994683267455e-05, "loss": 0.164, "step": 18395 }, { "epoch": 0.3986739756895543, "grad_norm": 0.6379249095916748, "learning_rate": 1.3129762396457339e-05, "loss": 0.1434, "step": 18400 }, { "epoch": 0.39878231100903516, "grad_norm": 2.2853291034698486, "learning_rate": 1.3126529747111137e-05, "loss": 0.27, "step": 18405 }, { "epoch": 0.398890646328516, "grad_norm": 2.633092164993286, "learning_rate": 1.3123296735603297e-05, "loss": 0.1821, "step": 18410 }, { "epoch": 0.39899898164799685, "grad_norm": 1.7289009094238281, "learning_rate": 1.3120063362308318e-05, "loss": 0.1792, "step": 18415 }, { "epoch": 0.39910731696747775, "grad_norm": 1.59949791431427, "learning_rate": 1.3116829627600733e-05, "loss": 0.1803, "step": 18420 }, { "epoch": 0.3992156522869586, "grad_norm": 2.018223762512207, "learning_rate": 1.311359553185513e-05, "loss": 0.1577, "step": 18425 }, { "epoch": 0.39932398760643945, "grad_norm": 1.860070824623108, "learning_rate": 1.3110361075446125e-05, "loss": 0.1672, "step": 18430 }, { "epoch": 0.3994323229259203, "grad_norm": 1.9978244304656982, "learning_rate": 1.3107126258748384e-05, "loss": 0.2244, "step": 18435 }, { "epoch": 0.3995406582454012, "grad_norm": 0.9515318870544434, "learning_rate": 1.310389108213661e-05, "loss": 0.2415, "step": 18440 }, { "epoch": 0.39964899356488204, "grad_norm": 2.3336963653564453, "learning_rate": 1.310065554598555e-05, "loss": 0.1859, "step": 18445 }, { "epoch": 0.3997573288843629, "grad_norm": 1.5087542533874512, "learning_rate": 1.3097419650669993e-05, "loss": 0.1435, "step": 18450 }, { "epoch": 0.39986566420384373, "grad_norm": 2.531679391860962, "learning_rate": 1.3094183396564769e-05, "loss": 0.2598, "step": 18455 }, { "epoch": 0.3999739995233246, "grad_norm": 2.3087542057037354, "learning_rate": 1.309094678404475e-05, "loss": 0.1987, "step": 18460 }, { "epoch": 0.4000823348428055, "grad_norm": 1.75264310836792, "learning_rate": 1.3087709813484843e-05, "loss": 0.1657, "step": 18465 }, { "epoch": 0.4001906701622863, "grad_norm": 2.1722495555877686, "learning_rate": 1.3084472485260014e-05, "loss": 0.2123, "step": 18470 }, { "epoch": 0.40029900548176717, "grad_norm": 2.141340494155884, "learning_rate": 1.3081234799745255e-05, "loss": 0.2328, "step": 18475 }, { "epoch": 0.400407340801248, "grad_norm": 1.9742228984832764, "learning_rate": 1.3077996757315599e-05, "loss": 0.1533, "step": 18480 }, { "epoch": 0.40051567612072886, "grad_norm": 1.9627233743667603, "learning_rate": 1.3074758358346125e-05, "loss": 0.2046, "step": 18485 }, { "epoch": 0.40062401144020976, "grad_norm": 2.711481809616089, "learning_rate": 1.3071519603211952e-05, "loss": 0.1643, "step": 18490 }, { "epoch": 0.4007323467596906, "grad_norm": 1.8062124252319336, "learning_rate": 1.306828049228825e-05, "loss": 0.1902, "step": 18495 }, { "epoch": 0.40084068207917145, "grad_norm": 2.399662733078003, "learning_rate": 1.3065041025950213e-05, "loss": 0.2332, "step": 18500 }, { "epoch": 0.4009490173986523, "grad_norm": 1.9874348640441895, "learning_rate": 1.306180120457309e-05, "loss": 0.1578, "step": 18505 }, { "epoch": 0.40105735271813314, "grad_norm": 1.146345615386963, "learning_rate": 1.3058561028532161e-05, "loss": 0.1702, "step": 18510 }, { "epoch": 0.40116568803761404, "grad_norm": 2.511016845703125, "learning_rate": 1.3055320498202757e-05, "loss": 0.2192, "step": 18515 }, { "epoch": 0.4012740233570949, "grad_norm": 2.2319557666778564, "learning_rate": 1.305207961396024e-05, "loss": 0.2209, "step": 18520 }, { "epoch": 0.40138235867657573, "grad_norm": 1.5437498092651367, "learning_rate": 1.304883837618002e-05, "loss": 0.201, "step": 18525 }, { "epoch": 0.4014906939960566, "grad_norm": 1.4907782077789307, "learning_rate": 1.3045596785237545e-05, "loss": 0.1904, "step": 18530 }, { "epoch": 0.4015990293155374, "grad_norm": 2.237368106842041, "learning_rate": 1.3042354841508307e-05, "loss": 0.2942, "step": 18535 }, { "epoch": 0.4017073646350183, "grad_norm": 2.0825788974761963, "learning_rate": 1.3039112545367831e-05, "loss": 0.2376, "step": 18540 }, { "epoch": 0.4018156999544992, "grad_norm": 2.1128406524658203, "learning_rate": 1.3035869897191696e-05, "loss": 0.1676, "step": 18545 }, { "epoch": 0.40192403527398, "grad_norm": 1.938315510749817, "learning_rate": 1.303262689735551e-05, "loss": 0.3013, "step": 18550 }, { "epoch": 0.40203237059346086, "grad_norm": 1.6363016366958618, "learning_rate": 1.3029383546234928e-05, "loss": 0.2316, "step": 18555 }, { "epoch": 0.40214070591294176, "grad_norm": 1.2784234285354614, "learning_rate": 1.3026139844205642e-05, "loss": 0.3006, "step": 18560 }, { "epoch": 0.4022490412324226, "grad_norm": 2.299520969390869, "learning_rate": 1.3022895791643384e-05, "loss": 0.1807, "step": 18565 }, { "epoch": 0.40235737655190346, "grad_norm": 1.305992841720581, "learning_rate": 1.3019651388923935e-05, "loss": 0.2071, "step": 18570 }, { "epoch": 0.4024657118713843, "grad_norm": 1.8124518394470215, "learning_rate": 1.3016406636423105e-05, "loss": 0.1885, "step": 18575 }, { "epoch": 0.40257404719086515, "grad_norm": 2.6074411869049072, "learning_rate": 1.3013161534516754e-05, "loss": 0.2184, "step": 18580 }, { "epoch": 0.40268238251034605, "grad_norm": 2.061293125152588, "learning_rate": 1.3009916083580772e-05, "loss": 0.2254, "step": 18585 }, { "epoch": 0.4027907178298269, "grad_norm": 1.3785089254379272, "learning_rate": 1.3006670283991102e-05, "loss": 0.2491, "step": 18590 }, { "epoch": 0.40289905314930774, "grad_norm": 3.10444974899292, "learning_rate": 1.3003424136123721e-05, "loss": 0.2798, "step": 18595 }, { "epoch": 0.4030073884687886, "grad_norm": 2.0588982105255127, "learning_rate": 1.3000177640354648e-05, "loss": 0.174, "step": 18600 }, { "epoch": 0.40311572378826943, "grad_norm": 2.834601402282715, "learning_rate": 1.2996930797059934e-05, "loss": 0.2174, "step": 18605 }, { "epoch": 0.40322405910775033, "grad_norm": 2.3393423557281494, "learning_rate": 1.2993683606615682e-05, "loss": 0.182, "step": 18610 }, { "epoch": 0.4033323944272312, "grad_norm": 1.4468321800231934, "learning_rate": 1.2990436069398026e-05, "loss": 0.1871, "step": 18615 }, { "epoch": 0.403440729746712, "grad_norm": 2.2140488624572754, "learning_rate": 1.2987188185783152e-05, "loss": 0.2467, "step": 18620 }, { "epoch": 0.40354906506619287, "grad_norm": 1.6285862922668457, "learning_rate": 1.2983939956147275e-05, "loss": 0.1532, "step": 18625 }, { "epoch": 0.4036574003856737, "grad_norm": 1.3247203826904297, "learning_rate": 1.2980691380866655e-05, "loss": 0.1434, "step": 18630 }, { "epoch": 0.4037657357051546, "grad_norm": 2.6026763916015625, "learning_rate": 1.2977442460317586e-05, "loss": 0.2781, "step": 18635 }, { "epoch": 0.40387407102463546, "grad_norm": 1.722639799118042, "learning_rate": 1.297419319487641e-05, "loss": 0.232, "step": 18640 }, { "epoch": 0.4039824063441163, "grad_norm": 1.2573719024658203, "learning_rate": 1.2970943584919509e-05, "loss": 0.2439, "step": 18645 }, { "epoch": 0.40409074166359715, "grad_norm": 1.0805267095565796, "learning_rate": 1.2967693630823295e-05, "loss": 0.1592, "step": 18650 }, { "epoch": 0.404199076983078, "grad_norm": 1.429016351699829, "learning_rate": 1.2964443332964232e-05, "loss": 0.1354, "step": 18655 }, { "epoch": 0.4043074123025589, "grad_norm": 1.5904433727264404, "learning_rate": 1.2961192691718813e-05, "loss": 0.1894, "step": 18660 }, { "epoch": 0.40441574762203975, "grad_norm": 1.4441325664520264, "learning_rate": 1.2957941707463584e-05, "loss": 0.2003, "step": 18665 }, { "epoch": 0.4045240829415206, "grad_norm": 1.7117916345596313, "learning_rate": 1.2954690380575114e-05, "loss": 0.1882, "step": 18670 }, { "epoch": 0.40463241826100144, "grad_norm": 1.7708110809326172, "learning_rate": 1.2951438711430026e-05, "loss": 0.2062, "step": 18675 }, { "epoch": 0.4047407535804823, "grad_norm": 1.8140599727630615, "learning_rate": 1.2948186700404979e-05, "loss": 0.1238, "step": 18680 }, { "epoch": 0.4048490888999632, "grad_norm": 1.9157350063323975, "learning_rate": 1.2944934347876664e-05, "loss": 0.3081, "step": 18685 }, { "epoch": 0.40495742421944403, "grad_norm": 1.7394486665725708, "learning_rate": 1.2941681654221817e-05, "loss": 0.1811, "step": 18690 }, { "epoch": 0.4050657595389249, "grad_norm": 0.8404908180236816, "learning_rate": 1.2938428619817223e-05, "loss": 0.1797, "step": 18695 }, { "epoch": 0.4051740948584057, "grad_norm": 1.7909718751907349, "learning_rate": 1.2935175245039689e-05, "loss": 0.1921, "step": 18700 }, { "epoch": 0.4052824301778866, "grad_norm": 1.7627934217453003, "learning_rate": 1.2931921530266067e-05, "loss": 0.1606, "step": 18705 }, { "epoch": 0.40539076549736747, "grad_norm": 2.5382885932922363, "learning_rate": 1.292866747587326e-05, "loss": 0.2201, "step": 18710 }, { "epoch": 0.4054991008168483, "grad_norm": 1.645346760749817, "learning_rate": 1.2925413082238196e-05, "loss": 0.1834, "step": 18715 }, { "epoch": 0.40560743613632916, "grad_norm": 2.009915351867676, "learning_rate": 1.2922158349737852e-05, "loss": 0.1729, "step": 18720 }, { "epoch": 0.40571577145581, "grad_norm": 1.2443779706954956, "learning_rate": 1.2918903278749235e-05, "loss": 0.25, "step": 18725 }, { "epoch": 0.4058241067752909, "grad_norm": 1.9370100498199463, "learning_rate": 1.2915647869649397e-05, "loss": 0.2171, "step": 18730 }, { "epoch": 0.40593244209477175, "grad_norm": 1.481436014175415, "learning_rate": 1.2912392122815432e-05, "loss": 0.2715, "step": 18735 }, { "epoch": 0.4060407774142526, "grad_norm": 2.0970091819763184, "learning_rate": 1.2909136038624465e-05, "loss": 0.26, "step": 18740 }, { "epoch": 0.40614911273373344, "grad_norm": 1.6242060661315918, "learning_rate": 1.290587961745367e-05, "loss": 0.1343, "step": 18745 }, { "epoch": 0.4062574480532143, "grad_norm": 1.5530223846435547, "learning_rate": 1.2902622859680248e-05, "loss": 0.1139, "step": 18750 }, { "epoch": 0.4063657833726952, "grad_norm": 2.304253339767456, "learning_rate": 1.2899365765681451e-05, "loss": 0.27, "step": 18755 }, { "epoch": 0.40647411869217603, "grad_norm": 2.316948890686035, "learning_rate": 1.2896108335834562e-05, "loss": 0.1641, "step": 18760 }, { "epoch": 0.4065824540116569, "grad_norm": 2.4852724075317383, "learning_rate": 1.2892850570516907e-05, "loss": 0.2028, "step": 18765 }, { "epoch": 0.4066907893311377, "grad_norm": 2.236621618270874, "learning_rate": 1.2889592470105848e-05, "loss": 0.2163, "step": 18770 }, { "epoch": 0.40679912465061857, "grad_norm": 2.449044942855835, "learning_rate": 1.2886334034978785e-05, "loss": 0.1582, "step": 18775 }, { "epoch": 0.40690745997009947, "grad_norm": 1.8244373798370361, "learning_rate": 1.288307526551316e-05, "loss": 0.206, "step": 18780 }, { "epoch": 0.4070157952895803, "grad_norm": 1.5126440525054932, "learning_rate": 1.2879816162086458e-05, "loss": 0.1618, "step": 18785 }, { "epoch": 0.40712413060906116, "grad_norm": 1.9944019317626953, "learning_rate": 1.2876556725076192e-05, "loss": 0.2417, "step": 18790 }, { "epoch": 0.407232465928542, "grad_norm": 1.9302622079849243, "learning_rate": 1.2873296954859921e-05, "loss": 0.194, "step": 18795 }, { "epoch": 0.40734080124802285, "grad_norm": 2.0038275718688965, "learning_rate": 1.287003685181524e-05, "loss": 0.1778, "step": 18800 }, { "epoch": 0.40744913656750376, "grad_norm": 1.8318531513214111, "learning_rate": 1.2866776416319781e-05, "loss": 0.1954, "step": 18805 }, { "epoch": 0.4075574718869846, "grad_norm": 1.3484257459640503, "learning_rate": 1.286351564875122e-05, "loss": 0.3017, "step": 18810 }, { "epoch": 0.40766580720646545, "grad_norm": 1.6174581050872803, "learning_rate": 1.2860254549487268e-05, "loss": 0.192, "step": 18815 }, { "epoch": 0.4077741425259463, "grad_norm": 1.4937713146209717, "learning_rate": 1.285699311890567e-05, "loss": 0.2757, "step": 18820 }, { "epoch": 0.4078824778454272, "grad_norm": 1.514501690864563, "learning_rate": 1.2853731357384215e-05, "loss": 0.1883, "step": 18825 }, { "epoch": 0.40799081316490804, "grad_norm": 2.2800445556640625, "learning_rate": 1.2850469265300736e-05, "loss": 0.172, "step": 18830 }, { "epoch": 0.4080991484843889, "grad_norm": 1.5346004962921143, "learning_rate": 1.2847206843033091e-05, "loss": 0.1799, "step": 18835 }, { "epoch": 0.40820748380386973, "grad_norm": 1.8201093673706055, "learning_rate": 1.2843944090959186e-05, "loss": 0.2109, "step": 18840 }, { "epoch": 0.4083158191233506, "grad_norm": 2.13057017326355, "learning_rate": 1.2840681009456959e-05, "loss": 0.1844, "step": 18845 }, { "epoch": 0.4084241544428315, "grad_norm": 2.101426601409912, "learning_rate": 1.2837417598904386e-05, "loss": 0.1776, "step": 18850 }, { "epoch": 0.4085324897623123, "grad_norm": 1.9584834575653076, "learning_rate": 1.2834153859679494e-05, "loss": 0.2109, "step": 18855 }, { "epoch": 0.40864082508179317, "grad_norm": 2.031186103820801, "learning_rate": 1.2830889792160333e-05, "loss": 0.2121, "step": 18860 }, { "epoch": 0.408749160401274, "grad_norm": 1.754419207572937, "learning_rate": 1.2827625396724995e-05, "loss": 0.1734, "step": 18865 }, { "epoch": 0.40885749572075486, "grad_norm": 2.6725902557373047, "learning_rate": 1.2824360673751612e-05, "loss": 0.2254, "step": 18870 }, { "epoch": 0.40896583104023576, "grad_norm": 1.1956855058670044, "learning_rate": 1.2821095623618356e-05, "loss": 0.1599, "step": 18875 }, { "epoch": 0.4090741663597166, "grad_norm": 2.0716729164123535, "learning_rate": 1.2817830246703431e-05, "loss": 0.1496, "step": 18880 }, { "epoch": 0.40918250167919745, "grad_norm": 1.9555786848068237, "learning_rate": 1.2814564543385082e-05, "loss": 0.238, "step": 18885 }, { "epoch": 0.4092908369986783, "grad_norm": 2.082925319671631, "learning_rate": 1.2811298514041592e-05, "loss": 0.1767, "step": 18890 }, { "epoch": 0.40939917231815914, "grad_norm": 2.1155505180358887, "learning_rate": 1.2808032159051284e-05, "loss": 0.1809, "step": 18895 }, { "epoch": 0.40950750763764004, "grad_norm": 2.036790132522583, "learning_rate": 1.2804765478792513e-05, "loss": 0.2244, "step": 18900 }, { "epoch": 0.4096158429571209, "grad_norm": 1.3411670923233032, "learning_rate": 1.2801498473643679e-05, "loss": 0.2374, "step": 18905 }, { "epoch": 0.40972417827660174, "grad_norm": 1.2599098682403564, "learning_rate": 1.2798231143983211e-05, "loss": 0.2929, "step": 18910 }, { "epoch": 0.4098325135960826, "grad_norm": 1.9325147867202759, "learning_rate": 1.2794963490189585e-05, "loss": 0.2288, "step": 18915 }, { "epoch": 0.4099408489155634, "grad_norm": 1.6268292665481567, "learning_rate": 1.2791695512641307e-05, "loss": 0.2604, "step": 18920 }, { "epoch": 0.41004918423504433, "grad_norm": 1.9335658550262451, "learning_rate": 1.2788427211716924e-05, "loss": 0.1767, "step": 18925 }, { "epoch": 0.4101575195545252, "grad_norm": 1.9807798862457275, "learning_rate": 1.2785158587795015e-05, "loss": 0.228, "step": 18930 }, { "epoch": 0.410265854874006, "grad_norm": 2.9276621341705322, "learning_rate": 1.2781889641254208e-05, "loss": 0.184, "step": 18935 }, { "epoch": 0.41037419019348687, "grad_norm": 2.260694980621338, "learning_rate": 1.277862037247316e-05, "loss": 0.1862, "step": 18940 }, { "epoch": 0.4104825255129677, "grad_norm": 2.0211851596832275, "learning_rate": 1.2775350781830565e-05, "loss": 0.1967, "step": 18945 }, { "epoch": 0.4105908608324486, "grad_norm": 2.403899669647217, "learning_rate": 1.2772080869705152e-05, "loss": 0.1438, "step": 18950 }, { "epoch": 0.41069919615192946, "grad_norm": 1.98488187789917, "learning_rate": 1.2768810636475701e-05, "loss": 0.2294, "step": 18955 }, { "epoch": 0.4108075314714103, "grad_norm": 1.403430461883545, "learning_rate": 1.2765540082521017e-05, "loss": 0.1604, "step": 18960 }, { "epoch": 0.41091586679089115, "grad_norm": 1.7159619331359863, "learning_rate": 1.2762269208219938e-05, "loss": 0.2349, "step": 18965 }, { "epoch": 0.41102420211037205, "grad_norm": 1.5472557544708252, "learning_rate": 1.275899801395135e-05, "loss": 0.1349, "step": 18970 }, { "epoch": 0.4111325374298529, "grad_norm": 2.4287140369415283, "learning_rate": 1.2755726500094173e-05, "loss": 0.2087, "step": 18975 }, { "epoch": 0.41124087274933374, "grad_norm": 3.0362982749938965, "learning_rate": 1.2752454667027361e-05, "loss": 0.264, "step": 18980 }, { "epoch": 0.4113492080688146, "grad_norm": 2.0058960914611816, "learning_rate": 1.2749182515129908e-05, "loss": 0.2393, "step": 18985 }, { "epoch": 0.41145754338829543, "grad_norm": 1.5689131021499634, "learning_rate": 1.2745910044780843e-05, "loss": 0.2034, "step": 18990 }, { "epoch": 0.41156587870777633, "grad_norm": 1.829154372215271, "learning_rate": 1.2742637256359235e-05, "loss": 0.285, "step": 18995 }, { "epoch": 0.4116742140272572, "grad_norm": 1.8529828786849976, "learning_rate": 1.2739364150244182e-05, "loss": 0.1835, "step": 19000 }, { "epoch": 0.411782549346738, "grad_norm": 2.72247052192688, "learning_rate": 1.273609072681483e-05, "loss": 0.2045, "step": 19005 }, { "epoch": 0.41189088466621887, "grad_norm": 1.6711159944534302, "learning_rate": 1.2732816986450354e-05, "loss": 0.1698, "step": 19010 }, { "epoch": 0.4119992199856997, "grad_norm": 1.7295435667037964, "learning_rate": 1.2729542929529967e-05, "loss": 0.2103, "step": 19015 }, { "epoch": 0.4121075553051806, "grad_norm": 2.0472664833068848, "learning_rate": 1.2726268556432919e-05, "loss": 0.1497, "step": 19020 }, { "epoch": 0.41221589062466146, "grad_norm": 2.4138245582580566, "learning_rate": 1.27229938675385e-05, "loss": 0.2239, "step": 19025 }, { "epoch": 0.4123242259441423, "grad_norm": 1.2216686010360718, "learning_rate": 1.2719718863226032e-05, "loss": 0.1474, "step": 19030 }, { "epoch": 0.41243256126362315, "grad_norm": 1.6840879917144775, "learning_rate": 1.2716443543874877e-05, "loss": 0.2166, "step": 19035 }, { "epoch": 0.412540896583104, "grad_norm": 1.4159494638442993, "learning_rate": 1.2713167909864425e-05, "loss": 0.2374, "step": 19040 }, { "epoch": 0.4126492319025849, "grad_norm": 1.2385677099227905, "learning_rate": 1.270989196157412e-05, "loss": 0.151, "step": 19045 }, { "epoch": 0.41275756722206575, "grad_norm": 3.0261411666870117, "learning_rate": 1.270661569938342e-05, "loss": 0.1561, "step": 19050 }, { "epoch": 0.4128659025415466, "grad_norm": 2.007096529006958, "learning_rate": 1.2703339123671839e-05, "loss": 0.1992, "step": 19055 }, { "epoch": 0.41297423786102744, "grad_norm": 2.5309951305389404, "learning_rate": 1.2700062234818915e-05, "loss": 0.1723, "step": 19060 }, { "epoch": 0.4130825731805083, "grad_norm": 2.450978994369507, "learning_rate": 1.2696785033204231e-05, "loss": 0.206, "step": 19065 }, { "epoch": 0.4131909084999892, "grad_norm": 2.6431069374084473, "learning_rate": 1.2693507519207394e-05, "loss": 0.1537, "step": 19070 }, { "epoch": 0.41329924381947003, "grad_norm": 2.435396194458008, "learning_rate": 1.2690229693208061e-05, "loss": 0.2878, "step": 19075 }, { "epoch": 0.4134075791389509, "grad_norm": 2.0002613067626953, "learning_rate": 1.268695155558592e-05, "loss": 0.2004, "step": 19080 }, { "epoch": 0.4135159144584317, "grad_norm": 1.6848421096801758, "learning_rate": 1.2683673106720693e-05, "loss": 0.2865, "step": 19085 }, { "epoch": 0.4136242497779126, "grad_norm": 3.982630729675293, "learning_rate": 1.2680394346992132e-05, "loss": 0.1904, "step": 19090 }, { "epoch": 0.41373258509739347, "grad_norm": 2.36617112159729, "learning_rate": 1.2677115276780044e-05, "loss": 0.316, "step": 19095 }, { "epoch": 0.4138409204168743, "grad_norm": 1.8227136135101318, "learning_rate": 1.2673835896464253e-05, "loss": 0.2831, "step": 19100 }, { "epoch": 0.41394925573635516, "grad_norm": 2.3358652591705322, "learning_rate": 1.2670556206424624e-05, "loss": 0.1914, "step": 19105 }, { "epoch": 0.414057591055836, "grad_norm": 2.090517282485962, "learning_rate": 1.2667276207041069e-05, "loss": 0.3038, "step": 19110 }, { "epoch": 0.4141659263753169, "grad_norm": 2.8342578411102295, "learning_rate": 1.2663995898693515e-05, "loss": 0.355, "step": 19115 }, { "epoch": 0.41427426169479775, "grad_norm": 2.183173656463623, "learning_rate": 1.2660715281761949e-05, "loss": 0.2493, "step": 19120 }, { "epoch": 0.4143825970142786, "grad_norm": 1.8797773122787476, "learning_rate": 1.2657434356626373e-05, "loss": 0.2597, "step": 19125 }, { "epoch": 0.41449093233375944, "grad_norm": 1.5929274559020996, "learning_rate": 1.2654153123666832e-05, "loss": 0.2365, "step": 19130 }, { "epoch": 0.4145992676532403, "grad_norm": 2.3208770751953125, "learning_rate": 1.2650871583263415e-05, "loss": 0.27, "step": 19135 }, { "epoch": 0.4147076029727212, "grad_norm": 2.4975674152374268, "learning_rate": 1.2647589735796233e-05, "loss": 0.2314, "step": 19140 }, { "epoch": 0.41481593829220204, "grad_norm": 1.0864452123641968, "learning_rate": 1.264430758164544e-05, "loss": 0.1106, "step": 19145 }, { "epoch": 0.4149242736116829, "grad_norm": 2.6578369140625, "learning_rate": 1.2641025121191226e-05, "loss": 0.2125, "step": 19150 }, { "epoch": 0.4150326089311637, "grad_norm": 1.7660125494003296, "learning_rate": 1.2637742354813815e-05, "loss": 0.2638, "step": 19155 }, { "epoch": 0.4151409442506446, "grad_norm": 1.2701709270477295, "learning_rate": 1.2634459282893468e-05, "loss": 0.2213, "step": 19160 }, { "epoch": 0.4152492795701255, "grad_norm": 1.8459036350250244, "learning_rate": 1.2631175905810476e-05, "loss": 0.145, "step": 19165 }, { "epoch": 0.4153576148896063, "grad_norm": 2.0724546909332275, "learning_rate": 1.2627892223945169e-05, "loss": 0.269, "step": 19170 }, { "epoch": 0.41546595020908716, "grad_norm": 1.5041601657867432, "learning_rate": 1.2624608237677914e-05, "loss": 0.1843, "step": 19175 }, { "epoch": 0.415574285528568, "grad_norm": 1.4203650951385498, "learning_rate": 1.262132394738911e-05, "loss": 0.2502, "step": 19180 }, { "epoch": 0.41568262084804886, "grad_norm": 1.573593020439148, "learning_rate": 1.2618039353459195e-05, "loss": 0.192, "step": 19185 }, { "epoch": 0.41579095616752976, "grad_norm": 1.8381518125534058, "learning_rate": 1.2614754456268638e-05, "loss": 0.1891, "step": 19190 }, { "epoch": 0.4158992914870106, "grad_norm": 1.6231989860534668, "learning_rate": 1.2611469256197947e-05, "loss": 0.2481, "step": 19195 }, { "epoch": 0.41600762680649145, "grad_norm": 1.594157099723816, "learning_rate": 1.2608183753627664e-05, "loss": 0.1653, "step": 19200 }, { "epoch": 0.4161159621259723, "grad_norm": 0.40097776055336, "learning_rate": 1.2604897948938367e-05, "loss": 0.1904, "step": 19205 }, { "epoch": 0.41622429744545314, "grad_norm": 2.5584094524383545, "learning_rate": 1.2601611842510657e-05, "loss": 0.16, "step": 19210 }, { "epoch": 0.41633263276493404, "grad_norm": 1.2443146705627441, "learning_rate": 1.2598325434725189e-05, "loss": 0.1877, "step": 19215 }, { "epoch": 0.4164409680844149, "grad_norm": 1.749605655670166, "learning_rate": 1.2595038725962643e-05, "loss": 0.2234, "step": 19220 }, { "epoch": 0.41654930340389573, "grad_norm": 2.281348705291748, "learning_rate": 1.2591751716603735e-05, "loss": 0.2615, "step": 19225 }, { "epoch": 0.4166576387233766, "grad_norm": 1.12075936794281, "learning_rate": 1.2588464407029216e-05, "loss": 0.1545, "step": 19230 }, { "epoch": 0.4167659740428575, "grad_norm": 2.0064804553985596, "learning_rate": 1.2585176797619866e-05, "loss": 0.3049, "step": 19235 }, { "epoch": 0.4168743093623383, "grad_norm": 1.5621356964111328, "learning_rate": 1.2581888888756517e-05, "loss": 0.2026, "step": 19240 }, { "epoch": 0.41698264468181917, "grad_norm": 2.072059154510498, "learning_rate": 1.2578600680820014e-05, "loss": 0.183, "step": 19245 }, { "epoch": 0.4170909800013, "grad_norm": 1.495431900024414, "learning_rate": 1.2575312174191247e-05, "loss": 0.1793, "step": 19250 }, { "epoch": 0.41719931532078086, "grad_norm": 1.7303322553634644, "learning_rate": 1.2572023369251146e-05, "loss": 0.1948, "step": 19255 }, { "epoch": 0.41730765064026176, "grad_norm": 1.667259931564331, "learning_rate": 1.2568734266380664e-05, "loss": 0.214, "step": 19260 }, { "epoch": 0.4174159859597426, "grad_norm": 1.3807955980300903, "learning_rate": 1.2565444865960798e-05, "loss": 0.2435, "step": 19265 }, { "epoch": 0.41752432127922345, "grad_norm": 1.8910068273544312, "learning_rate": 1.2562155168372574e-05, "loss": 0.2078, "step": 19270 }, { "epoch": 0.4176326565987043, "grad_norm": 1.059820294380188, "learning_rate": 1.2558865173997058e-05, "loss": 0.1604, "step": 19275 }, { "epoch": 0.41774099191818514, "grad_norm": 1.52948796749115, "learning_rate": 1.255557488321534e-05, "loss": 0.1657, "step": 19280 }, { "epoch": 0.41784932723766605, "grad_norm": 1.2504738569259644, "learning_rate": 1.2552284296408554e-05, "loss": 0.1907, "step": 19285 }, { "epoch": 0.4179576625571469, "grad_norm": 1.8957849740982056, "learning_rate": 1.2548993413957868e-05, "loss": 0.207, "step": 19290 }, { "epoch": 0.41806599787662774, "grad_norm": 1.225531816482544, "learning_rate": 1.2545702236244477e-05, "loss": 0.2723, "step": 19295 }, { "epoch": 0.4181743331961086, "grad_norm": 2.0121910572052, "learning_rate": 1.2542410763649617e-05, "loss": 0.2221, "step": 19300 }, { "epoch": 0.41828266851558943, "grad_norm": 1.281807780265808, "learning_rate": 1.2539118996554555e-05, "loss": 0.2399, "step": 19305 }, { "epoch": 0.41839100383507033, "grad_norm": 1.903159260749817, "learning_rate": 1.2535826935340593e-05, "loss": 0.2262, "step": 19310 }, { "epoch": 0.4184993391545512, "grad_norm": 1.6703320741653442, "learning_rate": 1.2532534580389068e-05, "loss": 0.2329, "step": 19315 }, { "epoch": 0.418607674474032, "grad_norm": 2.8176724910736084, "learning_rate": 1.252924193208135e-05, "loss": 0.2764, "step": 19320 }, { "epoch": 0.41871600979351287, "grad_norm": 1.461043357849121, "learning_rate": 1.2525948990798843e-05, "loss": 0.1415, "step": 19325 }, { "epoch": 0.4188243451129937, "grad_norm": 2.1538286209106445, "learning_rate": 1.2522655756922983e-05, "loss": 0.2949, "step": 19330 }, { "epoch": 0.4189326804324746, "grad_norm": 1.6489096879959106, "learning_rate": 1.2519362230835238e-05, "loss": 0.2034, "step": 19335 }, { "epoch": 0.41904101575195546, "grad_norm": 1.893416404724121, "learning_rate": 1.2516068412917126e-05, "loss": 0.2169, "step": 19340 }, { "epoch": 0.4191493510714363, "grad_norm": 1.7192623615264893, "learning_rate": 1.2512774303550175e-05, "loss": 0.2033, "step": 19345 }, { "epoch": 0.41925768639091715, "grad_norm": 1.423453688621521, "learning_rate": 1.2509479903115965e-05, "loss": 0.2237, "step": 19350 }, { "epoch": 0.41936602171039805, "grad_norm": 2.426544427871704, "learning_rate": 1.2506185211996095e-05, "loss": 0.1385, "step": 19355 }, { "epoch": 0.4194743570298789, "grad_norm": 2.072495698928833, "learning_rate": 1.2502890230572217e-05, "loss": 0.1936, "step": 19360 }, { "epoch": 0.41958269234935974, "grad_norm": 1.5006871223449707, "learning_rate": 1.2499594959225998e-05, "loss": 0.2257, "step": 19365 }, { "epoch": 0.4196910276688406, "grad_norm": 1.20123291015625, "learning_rate": 1.2496299398339144e-05, "loss": 0.1593, "step": 19370 }, { "epoch": 0.41979936298832143, "grad_norm": 2.523118734359741, "learning_rate": 1.24930035482934e-05, "loss": 0.1603, "step": 19375 }, { "epoch": 0.41990769830780234, "grad_norm": 1.5226221084594727, "learning_rate": 1.2489707409470539e-05, "loss": 0.1817, "step": 19380 }, { "epoch": 0.4200160336272832, "grad_norm": 1.7378253936767578, "learning_rate": 1.2486410982252374e-05, "loss": 0.167, "step": 19385 }, { "epoch": 0.420124368946764, "grad_norm": 1.780158281326294, "learning_rate": 1.2483114267020742e-05, "loss": 0.1998, "step": 19390 }, { "epoch": 0.42023270426624487, "grad_norm": 1.4526805877685547, "learning_rate": 1.247981726415752e-05, "loss": 0.2155, "step": 19395 }, { "epoch": 0.4203410395857257, "grad_norm": 1.3889706134796143, "learning_rate": 1.2476519974044611e-05, "loss": 0.1576, "step": 19400 }, { "epoch": 0.4204493749052066, "grad_norm": 1.7574130296707153, "learning_rate": 1.2473222397063966e-05, "loss": 0.166, "step": 19405 }, { "epoch": 0.42055771022468746, "grad_norm": 1.7617456912994385, "learning_rate": 1.2469924533597554e-05, "loss": 0.181, "step": 19410 }, { "epoch": 0.4206660455441683, "grad_norm": 2.227126359939575, "learning_rate": 1.2466626384027386e-05, "loss": 0.2021, "step": 19415 }, { "epoch": 0.42077438086364916, "grad_norm": 1.9006779193878174, "learning_rate": 1.2463327948735497e-05, "loss": 0.2232, "step": 19420 }, { "epoch": 0.42088271618313, "grad_norm": 1.1434710025787354, "learning_rate": 1.2460029228103969e-05, "loss": 0.2137, "step": 19425 }, { "epoch": 0.4209910515026109, "grad_norm": 1.8096281290054321, "learning_rate": 1.2456730222514904e-05, "loss": 0.2538, "step": 19430 }, { "epoch": 0.42109938682209175, "grad_norm": 1.9885205030441284, "learning_rate": 1.2453430932350445e-05, "loss": 0.1812, "step": 19435 }, { "epoch": 0.4212077221415726, "grad_norm": 2.1387088298797607, "learning_rate": 1.2450131357992767e-05, "loss": 0.1807, "step": 19440 }, { "epoch": 0.42131605746105344, "grad_norm": 0.9287348389625549, "learning_rate": 1.2446831499824075e-05, "loss": 0.1946, "step": 19445 }, { "epoch": 0.4214243927805343, "grad_norm": 1.8827530145645142, "learning_rate": 1.2443531358226607e-05, "loss": 0.243, "step": 19450 }, { "epoch": 0.4215327281000152, "grad_norm": 1.6459087133407593, "learning_rate": 1.2440230933582632e-05, "loss": 0.1893, "step": 19455 }, { "epoch": 0.42164106341949603, "grad_norm": 1.8803497552871704, "learning_rate": 1.2436930226274457e-05, "loss": 0.2785, "step": 19460 }, { "epoch": 0.4217493987389769, "grad_norm": 1.0922930240631104, "learning_rate": 1.2433629236684424e-05, "loss": 0.1535, "step": 19465 }, { "epoch": 0.4218577340584577, "grad_norm": 1.869428277015686, "learning_rate": 1.24303279651949e-05, "loss": 0.2368, "step": 19470 }, { "epoch": 0.42196606937793857, "grad_norm": 1.7059810161590576, "learning_rate": 1.2427026412188285e-05, "loss": 0.1895, "step": 19475 }, { "epoch": 0.42207440469741947, "grad_norm": 2.2533280849456787, "learning_rate": 1.2423724578047018e-05, "loss": 0.1884, "step": 19480 }, { "epoch": 0.4221827400169003, "grad_norm": 2.2418410778045654, "learning_rate": 1.2420422463153567e-05, "loss": 0.1774, "step": 19485 }, { "epoch": 0.42229107533638116, "grad_norm": 2.4098594188690186, "learning_rate": 1.241712006789043e-05, "loss": 0.2082, "step": 19490 }, { "epoch": 0.422399410655862, "grad_norm": 2.3132543563842773, "learning_rate": 1.2413817392640143e-05, "loss": 0.2366, "step": 19495 }, { "epoch": 0.4225077459753429, "grad_norm": 1.5473709106445312, "learning_rate": 1.2410514437785268e-05, "loss": 0.205, "step": 19500 }, { "epoch": 0.42261608129482375, "grad_norm": 1.3135632276535034, "learning_rate": 1.2407211203708407e-05, "loss": 0.2371, "step": 19505 }, { "epoch": 0.4227244166143046, "grad_norm": 1.2404985427856445, "learning_rate": 1.2403907690792188e-05, "loss": 0.1976, "step": 19510 }, { "epoch": 0.42283275193378544, "grad_norm": 1.6287492513656616, "learning_rate": 1.2400603899419273e-05, "loss": 0.2382, "step": 19515 }, { "epoch": 0.4229410872532663, "grad_norm": 1.486284613609314, "learning_rate": 1.2397299829972357e-05, "loss": 0.2058, "step": 19520 }, { "epoch": 0.4230494225727472, "grad_norm": 3.062760591506958, "learning_rate": 1.2393995482834167e-05, "loss": 0.1607, "step": 19525 }, { "epoch": 0.42315775789222804, "grad_norm": 1.1994547843933105, "learning_rate": 1.2390690858387462e-05, "loss": 0.2525, "step": 19530 }, { "epoch": 0.4232660932117089, "grad_norm": 1.3438076972961426, "learning_rate": 1.2387385957015034e-05, "loss": 0.2446, "step": 19535 }, { "epoch": 0.42337442853118973, "grad_norm": 2.6476595401763916, "learning_rate": 1.2384080779099708e-05, "loss": 0.2171, "step": 19540 }, { "epoch": 0.4234827638506706, "grad_norm": 1.651848554611206, "learning_rate": 1.2380775325024336e-05, "loss": 0.1703, "step": 19545 }, { "epoch": 0.4235910991701515, "grad_norm": 2.680250406265259, "learning_rate": 1.2377469595171805e-05, "loss": 0.1874, "step": 19550 }, { "epoch": 0.4236994344896323, "grad_norm": 1.5440455675125122, "learning_rate": 1.2374163589925037e-05, "loss": 0.1395, "step": 19555 }, { "epoch": 0.42380776980911317, "grad_norm": 2.259976625442505, "learning_rate": 1.2370857309666981e-05, "loss": 0.221, "step": 19560 }, { "epoch": 0.423916105128594, "grad_norm": 1.8104342222213745, "learning_rate": 1.2367550754780625e-05, "loss": 0.1806, "step": 19565 }, { "epoch": 0.42402444044807486, "grad_norm": 1.435248851776123, "learning_rate": 1.2364243925648977e-05, "loss": 0.1984, "step": 19570 }, { "epoch": 0.42413277576755576, "grad_norm": 1.7851003408432007, "learning_rate": 1.2360936822655086e-05, "loss": 0.19, "step": 19575 }, { "epoch": 0.4242411110870366, "grad_norm": 1.9511967897415161, "learning_rate": 1.2357629446182031e-05, "loss": 0.2351, "step": 19580 }, { "epoch": 0.42434944640651745, "grad_norm": 1.9863018989562988, "learning_rate": 1.2354321796612925e-05, "loss": 0.1755, "step": 19585 }, { "epoch": 0.4244577817259983, "grad_norm": 1.5575060844421387, "learning_rate": 1.2351013874330905e-05, "loss": 0.2681, "step": 19590 }, { "epoch": 0.42456611704547914, "grad_norm": 1.446662425994873, "learning_rate": 1.2347705679719145e-05, "loss": 0.2801, "step": 19595 }, { "epoch": 0.42467445236496004, "grad_norm": 1.9757710695266724, "learning_rate": 1.2344397213160849e-05, "loss": 0.21, "step": 19600 }, { "epoch": 0.4247827876844409, "grad_norm": 1.8977495431900024, "learning_rate": 1.2341088475039258e-05, "loss": 0.1531, "step": 19605 }, { "epoch": 0.42489112300392173, "grad_norm": 1.3322116136550903, "learning_rate": 1.2337779465737635e-05, "loss": 0.2426, "step": 19610 }, { "epoch": 0.4249994583234026, "grad_norm": 2.3956985473632812, "learning_rate": 1.2334470185639283e-05, "loss": 0.125, "step": 19615 }, { "epoch": 0.4251077936428835, "grad_norm": 1.8123438358306885, "learning_rate": 1.2331160635127525e-05, "loss": 0.1769, "step": 19620 }, { "epoch": 0.4252161289623643, "grad_norm": 1.6269689798355103, "learning_rate": 1.2327850814585732e-05, "loss": 0.2329, "step": 19625 }, { "epoch": 0.42532446428184517, "grad_norm": 2.4392714500427246, "learning_rate": 1.2324540724397294e-05, "loss": 0.139, "step": 19630 }, { "epoch": 0.425432799601326, "grad_norm": 1.568558692932129, "learning_rate": 1.2321230364945633e-05, "loss": 0.1246, "step": 19635 }, { "epoch": 0.42554113492080686, "grad_norm": 1.9332282543182373, "learning_rate": 1.2317919736614208e-05, "loss": 0.2282, "step": 19640 }, { "epoch": 0.42564947024028776, "grad_norm": 2.3660364151000977, "learning_rate": 1.2314608839786503e-05, "loss": 0.2458, "step": 19645 }, { "epoch": 0.4257578055597686, "grad_norm": 1.2987819910049438, "learning_rate": 1.2311297674846037e-05, "loss": 0.1998, "step": 19650 }, { "epoch": 0.42586614087924946, "grad_norm": 1.8170074224472046, "learning_rate": 1.230798624217636e-05, "loss": 0.2487, "step": 19655 }, { "epoch": 0.4259744761987303, "grad_norm": 1.8668867349624634, "learning_rate": 1.2304674542161048e-05, "loss": 0.2481, "step": 19660 }, { "epoch": 0.42608281151821115, "grad_norm": 1.411389708518982, "learning_rate": 1.2301362575183715e-05, "loss": 0.138, "step": 19665 }, { "epoch": 0.42619114683769205, "grad_norm": 1.8089029788970947, "learning_rate": 1.2298050341628e-05, "loss": 0.1814, "step": 19670 }, { "epoch": 0.4262994821571729, "grad_norm": 1.2997183799743652, "learning_rate": 1.229473784187758e-05, "loss": 0.2761, "step": 19675 }, { "epoch": 0.42640781747665374, "grad_norm": 1.77358078956604, "learning_rate": 1.2291425076316156e-05, "loss": 0.2284, "step": 19680 }, { "epoch": 0.4265161527961346, "grad_norm": 2.477631092071533, "learning_rate": 1.2288112045327463e-05, "loss": 0.2263, "step": 19685 }, { "epoch": 0.42662448811561543, "grad_norm": 1.2823928594589233, "learning_rate": 1.2284798749295268e-05, "loss": 0.1852, "step": 19690 }, { "epoch": 0.42673282343509633, "grad_norm": 1.3958908319473267, "learning_rate": 1.228148518860336e-05, "loss": 0.234, "step": 19695 }, { "epoch": 0.4268411587545772, "grad_norm": 1.441460371017456, "learning_rate": 1.2278171363635569e-05, "loss": 0.1394, "step": 19700 }, { "epoch": 0.426949494074058, "grad_norm": 1.9139498472213745, "learning_rate": 1.2274857274775756e-05, "loss": 0.2298, "step": 19705 }, { "epoch": 0.42705782939353887, "grad_norm": 2.0364067554473877, "learning_rate": 1.2271542922407801e-05, "loss": 0.2077, "step": 19710 }, { "epoch": 0.4271661647130197, "grad_norm": 2.0662293434143066, "learning_rate": 1.2268228306915631e-05, "loss": 0.1933, "step": 19715 }, { "epoch": 0.4272745000325006, "grad_norm": 1.2729918956756592, "learning_rate": 1.2264913428683186e-05, "loss": 0.1475, "step": 19720 }, { "epoch": 0.42738283535198146, "grad_norm": 1.980011224746704, "learning_rate": 1.2261598288094451e-05, "loss": 0.2207, "step": 19725 }, { "epoch": 0.4274911706714623, "grad_norm": 2.802891969680786, "learning_rate": 1.2258282885533432e-05, "loss": 0.2888, "step": 19730 }, { "epoch": 0.42759950599094315, "grad_norm": 2.0369925498962402, "learning_rate": 1.225496722138417e-05, "loss": 0.2628, "step": 19735 }, { "epoch": 0.427707841310424, "grad_norm": 1.7554205656051636, "learning_rate": 1.2251651296030735e-05, "loss": 0.2431, "step": 19740 }, { "epoch": 0.4278161766299049, "grad_norm": 1.7796269655227661, "learning_rate": 1.2248335109857226e-05, "loss": 0.2576, "step": 19745 }, { "epoch": 0.42792451194938574, "grad_norm": 1.5089939832687378, "learning_rate": 1.2245018663247773e-05, "loss": 0.216, "step": 19750 }, { "epoch": 0.4280328472688666, "grad_norm": 1.6897679567337036, "learning_rate": 1.2241701956586543e-05, "loss": 0.196, "step": 19755 }, { "epoch": 0.42814118258834744, "grad_norm": 1.1313492059707642, "learning_rate": 1.2238384990257719e-05, "loss": 0.1599, "step": 19760 }, { "epoch": 0.42824951790782834, "grad_norm": 1.7058725357055664, "learning_rate": 1.2235067764645526e-05, "loss": 0.1726, "step": 19765 }, { "epoch": 0.4283578532273092, "grad_norm": 1.7514588832855225, "learning_rate": 1.2231750280134213e-05, "loss": 0.2358, "step": 19770 }, { "epoch": 0.42846618854679, "grad_norm": 1.276754379272461, "learning_rate": 1.2228432537108061e-05, "loss": 0.1869, "step": 19775 }, { "epoch": 0.4285745238662709, "grad_norm": 1.3653554916381836, "learning_rate": 1.2225114535951384e-05, "loss": 0.1936, "step": 19780 }, { "epoch": 0.4286828591857517, "grad_norm": 1.9028406143188477, "learning_rate": 1.2221796277048518e-05, "loss": 0.2446, "step": 19785 }, { "epoch": 0.4287911945052326, "grad_norm": 1.9747511148452759, "learning_rate": 1.2218477760783836e-05, "loss": 0.3125, "step": 19790 }, { "epoch": 0.42889952982471347, "grad_norm": 1.6494489908218384, "learning_rate": 1.2215158987541737e-05, "loss": 0.2776, "step": 19795 }, { "epoch": 0.4290078651441943, "grad_norm": 1.9327178001403809, "learning_rate": 1.2211839957706656e-05, "loss": 0.2335, "step": 19800 }, { "epoch": 0.42911620046367516, "grad_norm": 1.0387592315673828, "learning_rate": 1.2208520671663047e-05, "loss": 0.1505, "step": 19805 }, { "epoch": 0.429224535783156, "grad_norm": 1.7549306154251099, "learning_rate": 1.2205201129795403e-05, "loss": 0.2681, "step": 19810 }, { "epoch": 0.4293328711026369, "grad_norm": 1.5042643547058105, "learning_rate": 1.2201881332488239e-05, "loss": 0.1618, "step": 19815 }, { "epoch": 0.42944120642211775, "grad_norm": 3.342789888381958, "learning_rate": 1.2198561280126109e-05, "loss": 0.2011, "step": 19820 }, { "epoch": 0.4295495417415986, "grad_norm": 1.4707221984863281, "learning_rate": 1.2195240973093589e-05, "loss": 0.2335, "step": 19825 }, { "epoch": 0.42965787706107944, "grad_norm": 1.7656526565551758, "learning_rate": 1.2191920411775288e-05, "loss": 0.2251, "step": 19830 }, { "epoch": 0.4297662123805603, "grad_norm": 1.6359623670578003, "learning_rate": 1.2188599596555841e-05, "loss": 0.1887, "step": 19835 }, { "epoch": 0.4298745477000412, "grad_norm": 1.7972577810287476, "learning_rate": 1.2185278527819914e-05, "loss": 0.2069, "step": 19840 }, { "epoch": 0.42998288301952203, "grad_norm": 2.5485548973083496, "learning_rate": 1.218195720595221e-05, "loss": 0.3347, "step": 19845 }, { "epoch": 0.4300912183390029, "grad_norm": 2.9659698009490967, "learning_rate": 1.2178635631337447e-05, "loss": 0.2542, "step": 19850 }, { "epoch": 0.4301995536584837, "grad_norm": 1.3273561000823975, "learning_rate": 1.2175313804360382e-05, "loss": 0.1581, "step": 19855 }, { "epoch": 0.43030788897796457, "grad_norm": 1.897822380065918, "learning_rate": 1.2171991725405799e-05, "loss": 0.2415, "step": 19860 }, { "epoch": 0.43041622429744547, "grad_norm": 1.2836167812347412, "learning_rate": 1.2168669394858507e-05, "loss": 0.1406, "step": 19865 }, { "epoch": 0.4305245596169263, "grad_norm": 1.82919180393219, "learning_rate": 1.2165346813103358e-05, "loss": 0.2025, "step": 19870 }, { "epoch": 0.43063289493640716, "grad_norm": 1.61642324924469, "learning_rate": 1.2162023980525215e-05, "loss": 0.2254, "step": 19875 }, { "epoch": 0.430741230255888, "grad_norm": 1.9750821590423584, "learning_rate": 1.2158700897508982e-05, "loss": 0.1965, "step": 19880 }, { "epoch": 0.4308495655753689, "grad_norm": 2.255953311920166, "learning_rate": 1.2155377564439587e-05, "loss": 0.2257, "step": 19885 }, { "epoch": 0.43095790089484975, "grad_norm": 1.4561115503311157, "learning_rate": 1.215205398170199e-05, "loss": 0.2775, "step": 19890 }, { "epoch": 0.4310662362143306, "grad_norm": 1.4412860870361328, "learning_rate": 1.2148730149681176e-05, "loss": 0.1693, "step": 19895 }, { "epoch": 0.43117457153381145, "grad_norm": 1.7427726984024048, "learning_rate": 1.214540606876216e-05, "loss": 0.2094, "step": 19900 }, { "epoch": 0.4312829068532923, "grad_norm": 2.029054641723633, "learning_rate": 1.2142081739329992e-05, "loss": 0.2573, "step": 19905 }, { "epoch": 0.4313912421727732, "grad_norm": 1.8159520626068115, "learning_rate": 1.213875716176974e-05, "loss": 0.1753, "step": 19910 }, { "epoch": 0.43149957749225404, "grad_norm": 1.9840266704559326, "learning_rate": 1.2135432336466511e-05, "loss": 0.1463, "step": 19915 }, { "epoch": 0.4316079128117349, "grad_norm": 1.4658859968185425, "learning_rate": 1.2132107263805434e-05, "loss": 0.1614, "step": 19920 }, { "epoch": 0.43171624813121573, "grad_norm": 2.073751449584961, "learning_rate": 1.2128781944171673e-05, "loss": 0.2085, "step": 19925 }, { "epoch": 0.4318245834506966, "grad_norm": 0.906789243221283, "learning_rate": 1.2125456377950411e-05, "loss": 0.3032, "step": 19930 }, { "epoch": 0.4319329187701775, "grad_norm": 1.540758490562439, "learning_rate": 1.2122130565526864e-05, "loss": 0.3288, "step": 19935 }, { "epoch": 0.4320412540896583, "grad_norm": 1.9312158823013306, "learning_rate": 1.2118804507286283e-05, "loss": 0.181, "step": 19940 }, { "epoch": 0.43214958940913917, "grad_norm": 2.7037410736083984, "learning_rate": 1.211547820361394e-05, "loss": 0.2365, "step": 19945 }, { "epoch": 0.43225792472862, "grad_norm": 1.864251971244812, "learning_rate": 1.2112151654895134e-05, "loss": 0.2618, "step": 19950 }, { "epoch": 0.43236626004810086, "grad_norm": 1.793407678604126, "learning_rate": 1.2108824861515202e-05, "loss": 0.299, "step": 19955 }, { "epoch": 0.43247459536758176, "grad_norm": 2.206414222717285, "learning_rate": 1.2105497823859498e-05, "loss": 0.2295, "step": 19960 }, { "epoch": 0.4325829306870626, "grad_norm": 5.3804707527160645, "learning_rate": 1.2102170542313414e-05, "loss": 0.1604, "step": 19965 }, { "epoch": 0.43269126600654345, "grad_norm": 3.5956804752349854, "learning_rate": 1.2098843017262363e-05, "loss": 0.2103, "step": 19970 }, { "epoch": 0.4327996013260243, "grad_norm": 1.6720399856567383, "learning_rate": 1.2095515249091787e-05, "loss": 0.2339, "step": 19975 }, { "epoch": 0.43290793664550514, "grad_norm": 2.2577338218688965, "learning_rate": 1.2092187238187161e-05, "loss": 0.1988, "step": 19980 }, { "epoch": 0.43301627196498604, "grad_norm": 1.7940833568572998, "learning_rate": 1.2088858984933982e-05, "loss": 0.1551, "step": 19985 }, { "epoch": 0.4331246072844669, "grad_norm": 2.441965103149414, "learning_rate": 1.2085530489717782e-05, "loss": 0.2033, "step": 19990 }, { "epoch": 0.43323294260394773, "grad_norm": 1.5197027921676636, "learning_rate": 1.208220175292412e-05, "loss": 0.2177, "step": 19995 }, { "epoch": 0.4333412779234286, "grad_norm": 2.4936091899871826, "learning_rate": 1.2078872774938572e-05, "loss": 0.1871, "step": 20000 }, { "epoch": 0.4334496132429094, "grad_norm": 2.056100606918335, "learning_rate": 1.2075543556146757e-05, "loss": 0.1969, "step": 20005 }, { "epoch": 0.4335579485623903, "grad_norm": 1.6864780187606812, "learning_rate": 1.207221409693431e-05, "loss": 0.1848, "step": 20010 }, { "epoch": 0.4336662838818712, "grad_norm": 2.148376941680908, "learning_rate": 1.2068884397686904e-05, "loss": 0.2029, "step": 20015 }, { "epoch": 0.433774619201352, "grad_norm": 1.605687141418457, "learning_rate": 1.2065554458790232e-05, "loss": 0.2029, "step": 20020 }, { "epoch": 0.43388295452083286, "grad_norm": 2.885127067565918, "learning_rate": 1.2062224280630017e-05, "loss": 0.2808, "step": 20025 }, { "epoch": 0.43399128984031377, "grad_norm": 0.8161144852638245, "learning_rate": 1.205889386359201e-05, "loss": 0.1727, "step": 20030 }, { "epoch": 0.4340996251597946, "grad_norm": 1.8550636768341064, "learning_rate": 1.2055563208061994e-05, "loss": 0.2689, "step": 20035 }, { "epoch": 0.43420796047927546, "grad_norm": 1.94329833984375, "learning_rate": 1.205223231442577e-05, "loss": 0.2689, "step": 20040 }, { "epoch": 0.4343162957987563, "grad_norm": 1.2260628938674927, "learning_rate": 1.2048901183069177e-05, "loss": 0.1795, "step": 20045 }, { "epoch": 0.43442463111823715, "grad_norm": 2.0296812057495117, "learning_rate": 1.2045569814378076e-05, "loss": 0.1963, "step": 20050 }, { "epoch": 0.43453296643771805, "grad_norm": 1.8177610635757446, "learning_rate": 1.2042238208738351e-05, "loss": 0.2052, "step": 20055 }, { "epoch": 0.4346413017571989, "grad_norm": 1.3666012287139893, "learning_rate": 1.2038906366535922e-05, "loss": 0.1871, "step": 20060 }, { "epoch": 0.43474963707667974, "grad_norm": 2.6165995597839355, "learning_rate": 1.2035574288156737e-05, "loss": 0.2877, "step": 20065 }, { "epoch": 0.4348579723961606, "grad_norm": 1.829862356185913, "learning_rate": 1.203224197398676e-05, "loss": 0.3148, "step": 20070 }, { "epoch": 0.43496630771564143, "grad_norm": 1.712570309638977, "learning_rate": 1.2028909424411996e-05, "loss": 0.1692, "step": 20075 }, { "epoch": 0.43507464303512233, "grad_norm": 1.364941120147705, "learning_rate": 1.2025576639818466e-05, "loss": 0.1654, "step": 20080 }, { "epoch": 0.4351829783546032, "grad_norm": 1.6015592813491821, "learning_rate": 1.2022243620592225e-05, "loss": 0.1414, "step": 20085 }, { "epoch": 0.435291313674084, "grad_norm": 3.3082706928253174, "learning_rate": 1.2018910367119358e-05, "loss": 0.2969, "step": 20090 }, { "epoch": 0.43539964899356487, "grad_norm": 1.2037678956985474, "learning_rate": 1.2015576879785966e-05, "loss": 0.164, "step": 20095 }, { "epoch": 0.4355079843130457, "grad_norm": 1.8348630666732788, "learning_rate": 1.2012243158978185e-05, "loss": 0.2151, "step": 20100 }, { "epoch": 0.4356163196325266, "grad_norm": 1.8862426280975342, "learning_rate": 1.2008909205082175e-05, "loss": 0.2156, "step": 20105 }, { "epoch": 0.43572465495200746, "grad_norm": 0.5748264789581299, "learning_rate": 1.200557501848413e-05, "loss": 0.1494, "step": 20110 }, { "epoch": 0.4358329902714883, "grad_norm": 2.5396440029144287, "learning_rate": 1.2002240599570264e-05, "loss": 0.2393, "step": 20115 }, { "epoch": 0.43594132559096915, "grad_norm": 1.5276098251342773, "learning_rate": 1.1998905948726815e-05, "loss": 0.1861, "step": 20120 }, { "epoch": 0.43604966091045, "grad_norm": 4.038533687591553, "learning_rate": 1.1995571066340059e-05, "loss": 0.1784, "step": 20125 }, { "epoch": 0.4361579962299309, "grad_norm": 2.0718421936035156, "learning_rate": 1.1992235952796287e-05, "loss": 0.1373, "step": 20130 }, { "epoch": 0.43626633154941175, "grad_norm": 1.5339237451553345, "learning_rate": 1.1988900608481821e-05, "loss": 0.2068, "step": 20135 }, { "epoch": 0.4363746668688926, "grad_norm": 1.1479941606521606, "learning_rate": 1.1985565033783018e-05, "loss": 0.1835, "step": 20140 }, { "epoch": 0.43648300218837344, "grad_norm": 1.215248942375183, "learning_rate": 1.1982229229086247e-05, "loss": 0.2348, "step": 20145 }, { "epoch": 0.43659133750785434, "grad_norm": 1.4911991357803345, "learning_rate": 1.197889319477791e-05, "loss": 0.2268, "step": 20150 }, { "epoch": 0.4366996728273352, "grad_norm": 1.4582377672195435, "learning_rate": 1.1975556931244445e-05, "loss": 0.1519, "step": 20155 }, { "epoch": 0.43680800814681603, "grad_norm": 1.183151125907898, "learning_rate": 1.1972220438872302e-05, "loss": 0.1788, "step": 20160 }, { "epoch": 0.4369163434662969, "grad_norm": 2.4155266284942627, "learning_rate": 1.1968883718047966e-05, "loss": 0.2822, "step": 20165 }, { "epoch": 0.4370246787857777, "grad_norm": 1.5792371034622192, "learning_rate": 1.1965546769157946e-05, "loss": 0.1449, "step": 20170 }, { "epoch": 0.4371330141052586, "grad_norm": 1.3853508234024048, "learning_rate": 1.1962209592588773e-05, "loss": 0.2005, "step": 20175 }, { "epoch": 0.43724134942473947, "grad_norm": 1.6856000423431396, "learning_rate": 1.1958872188727015e-05, "loss": 0.2238, "step": 20180 }, { "epoch": 0.4373496847442203, "grad_norm": 1.7077282667160034, "learning_rate": 1.1955534557959258e-05, "loss": 0.1638, "step": 20185 }, { "epoch": 0.43745802006370116, "grad_norm": 1.1223148107528687, "learning_rate": 1.1952196700672116e-05, "loss": 0.1825, "step": 20190 }, { "epoch": 0.437566355383182, "grad_norm": 1.3643436431884766, "learning_rate": 1.194885861725223e-05, "loss": 0.2328, "step": 20195 }, { "epoch": 0.4376746907026629, "grad_norm": 1.80772864818573, "learning_rate": 1.1945520308086265e-05, "loss": 0.1493, "step": 20200 }, { "epoch": 0.43778302602214375, "grad_norm": 1.5059130191802979, "learning_rate": 1.1942181773560918e-05, "loss": 0.1781, "step": 20205 }, { "epoch": 0.4378913613416246, "grad_norm": 1.054976463317871, "learning_rate": 1.1938843014062908e-05, "loss": 0.1415, "step": 20210 }, { "epoch": 0.43799969666110544, "grad_norm": 1.2244094610214233, "learning_rate": 1.193550402997898e-05, "loss": 0.1762, "step": 20215 }, { "epoch": 0.4381080319805863, "grad_norm": 1.4193874597549438, "learning_rate": 1.19321648216959e-05, "loss": 0.2771, "step": 20220 }, { "epoch": 0.4382163673000672, "grad_norm": 1.4511209726333618, "learning_rate": 1.192882538960047e-05, "loss": 0.1538, "step": 20225 }, { "epoch": 0.43832470261954803, "grad_norm": 1.3396269083023071, "learning_rate": 1.1925485734079512e-05, "loss": 0.2146, "step": 20230 }, { "epoch": 0.4384330379390289, "grad_norm": 1.4593241214752197, "learning_rate": 1.1922145855519876e-05, "loss": 0.2437, "step": 20235 }, { "epoch": 0.4385413732585097, "grad_norm": 1.608056902885437, "learning_rate": 1.1918805754308437e-05, "loss": 0.1479, "step": 20240 }, { "epoch": 0.43864970857799057, "grad_norm": 1.2263691425323486, "learning_rate": 1.1915465430832096e-05, "loss": 0.1955, "step": 20245 }, { "epoch": 0.4387580438974715, "grad_norm": 2.17169451713562, "learning_rate": 1.1912124885477777e-05, "loss": 0.262, "step": 20250 }, { "epoch": 0.4388663792169523, "grad_norm": 1.4909868240356445, "learning_rate": 1.1908784118632432e-05, "loss": 0.1756, "step": 20255 }, { "epoch": 0.43897471453643316, "grad_norm": 1.7252763509750366, "learning_rate": 1.1905443130683046e-05, "loss": 0.228, "step": 20260 }, { "epoch": 0.439083049855914, "grad_norm": 3.2144837379455566, "learning_rate": 1.1902101922016612e-05, "loss": 0.1973, "step": 20265 }, { "epoch": 0.43919138517539486, "grad_norm": 1.595503330230713, "learning_rate": 1.1898760493020165e-05, "loss": 0.2448, "step": 20270 }, { "epoch": 0.43929972049487576, "grad_norm": 1.580978274345398, "learning_rate": 1.1895418844080757e-05, "loss": 0.2015, "step": 20275 }, { "epoch": 0.4394080558143566, "grad_norm": 1.8329122066497803, "learning_rate": 1.1892076975585471e-05, "loss": 0.1787, "step": 20280 }, { "epoch": 0.43951639113383745, "grad_norm": 1.624977469444275, "learning_rate": 1.1888734887921411e-05, "loss": 0.1402, "step": 20285 }, { "epoch": 0.4396247264533183, "grad_norm": 0.8628082871437073, "learning_rate": 1.1885392581475705e-05, "loss": 0.0884, "step": 20290 }, { "epoch": 0.4397330617727992, "grad_norm": 2.1755850315093994, "learning_rate": 1.1882050056635514e-05, "loss": 0.1815, "step": 20295 }, { "epoch": 0.43984139709228004, "grad_norm": 2.208278179168701, "learning_rate": 1.1878707313788019e-05, "loss": 0.1473, "step": 20300 }, { "epoch": 0.4399497324117609, "grad_norm": 1.3800852298736572, "learning_rate": 1.1875364353320422e-05, "loss": 0.1288, "step": 20305 }, { "epoch": 0.44005806773124173, "grad_norm": 2.212777614593506, "learning_rate": 1.1872021175619957e-05, "loss": 0.1956, "step": 20310 }, { "epoch": 0.4401664030507226, "grad_norm": 1.6843987703323364, "learning_rate": 1.1868677781073883e-05, "loss": 0.1555, "step": 20315 }, { "epoch": 0.4402747383702035, "grad_norm": 1.493508219718933, "learning_rate": 1.1865334170069479e-05, "loss": 0.2591, "step": 20320 }, { "epoch": 0.4403830736896843, "grad_norm": 2.47109055519104, "learning_rate": 1.1861990342994054e-05, "loss": 0.1828, "step": 20325 }, { "epoch": 0.44049140900916517, "grad_norm": 1.3200337886810303, "learning_rate": 1.1858646300234946e-05, "loss": 0.1462, "step": 20330 }, { "epoch": 0.440599744328646, "grad_norm": 1.6965845823287964, "learning_rate": 1.1855302042179502e-05, "loss": 0.2438, "step": 20335 }, { "epoch": 0.44070807964812686, "grad_norm": 1.9859976768493652, "learning_rate": 1.185195756921511e-05, "loss": 0.1755, "step": 20340 }, { "epoch": 0.44081641496760776, "grad_norm": 1.7462725639343262, "learning_rate": 1.1848612881729173e-05, "loss": 0.211, "step": 20345 }, { "epoch": 0.4409247502870886, "grad_norm": 1.5685266256332397, "learning_rate": 1.184526798010913e-05, "loss": 0.1879, "step": 20350 }, { "epoch": 0.44103308560656945, "grad_norm": 1.7387324571609497, "learning_rate": 1.184192286474243e-05, "loss": 0.2013, "step": 20355 }, { "epoch": 0.4411414209260503, "grad_norm": 1.5898654460906982, "learning_rate": 1.183857753601656e-05, "loss": 0.1793, "step": 20360 }, { "epoch": 0.44124975624553114, "grad_norm": 2.3190431594848633, "learning_rate": 1.1835231994319025e-05, "loss": 0.1983, "step": 20365 }, { "epoch": 0.44135809156501205, "grad_norm": 2.0547032356262207, "learning_rate": 1.1831886240037356e-05, "loss": 0.2515, "step": 20370 }, { "epoch": 0.4414664268844929, "grad_norm": 1.4889311790466309, "learning_rate": 1.1828540273559105e-05, "loss": 0.1894, "step": 20375 }, { "epoch": 0.44157476220397374, "grad_norm": 1.703307032585144, "learning_rate": 1.1825194095271857e-05, "loss": 0.1315, "step": 20380 }, { "epoch": 0.4416830975234546, "grad_norm": 2.2177724838256836, "learning_rate": 1.1821847705563215e-05, "loss": 0.307, "step": 20385 }, { "epoch": 0.4417914328429354, "grad_norm": 1.3883459568023682, "learning_rate": 1.1818501104820805e-05, "loss": 0.1504, "step": 20390 }, { "epoch": 0.44189976816241633, "grad_norm": 1.6876698732376099, "learning_rate": 1.1815154293432283e-05, "loss": 0.2414, "step": 20395 }, { "epoch": 0.4420081034818972, "grad_norm": 2.1913321018218994, "learning_rate": 1.1811807271785327e-05, "loss": 0.2631, "step": 20400 }, { "epoch": 0.442116438801378, "grad_norm": 1.7267348766326904, "learning_rate": 1.1808460040267642e-05, "loss": 0.2061, "step": 20405 }, { "epoch": 0.44222477412085887, "grad_norm": 1.2879917621612549, "learning_rate": 1.180511259926695e-05, "loss": 0.2479, "step": 20410 }, { "epoch": 0.44233310944033977, "grad_norm": 1.835518479347229, "learning_rate": 1.1801764949171007e-05, "loss": 0.1593, "step": 20415 }, { "epoch": 0.4424414447598206, "grad_norm": 1.7329027652740479, "learning_rate": 1.1798417090367582e-05, "loss": 0.2093, "step": 20420 }, { "epoch": 0.44254978007930146, "grad_norm": 1.7503489255905151, "learning_rate": 1.1795069023244478e-05, "loss": 0.1722, "step": 20425 }, { "epoch": 0.4426581153987823, "grad_norm": 1.5234854221343994, "learning_rate": 1.1791720748189519e-05, "loss": 0.1892, "step": 20430 }, { "epoch": 0.44276645071826315, "grad_norm": 1.3826451301574707, "learning_rate": 1.178837226559055e-05, "loss": 0.1983, "step": 20435 }, { "epoch": 0.44287478603774405, "grad_norm": 1.812329649925232, "learning_rate": 1.1785023575835443e-05, "loss": 0.1699, "step": 20440 }, { "epoch": 0.4429831213572249, "grad_norm": 1.4988502264022827, "learning_rate": 1.1781674679312096e-05, "loss": 0.2313, "step": 20445 }, { "epoch": 0.44309145667670574, "grad_norm": 2.2059121131896973, "learning_rate": 1.177832557640843e-05, "loss": 0.1488, "step": 20450 }, { "epoch": 0.4431997919961866, "grad_norm": 1.7620501518249512, "learning_rate": 1.177497626751238e-05, "loss": 0.2012, "step": 20455 }, { "epoch": 0.44330812731566743, "grad_norm": 1.8194246292114258, "learning_rate": 1.177162675301192e-05, "loss": 0.1677, "step": 20460 }, { "epoch": 0.44341646263514833, "grad_norm": 1.9542169570922852, "learning_rate": 1.176827703329504e-05, "loss": 0.153, "step": 20465 }, { "epoch": 0.4435247979546292, "grad_norm": 2.3664519786834717, "learning_rate": 1.1764927108749752e-05, "loss": 0.2239, "step": 20470 }, { "epoch": 0.44363313327411, "grad_norm": 3.0212831497192383, "learning_rate": 1.1761576979764101e-05, "loss": 0.2667, "step": 20475 }, { "epoch": 0.44374146859359087, "grad_norm": 1.7209237813949585, "learning_rate": 1.1758226646726145e-05, "loss": 0.2157, "step": 20480 }, { "epoch": 0.4438498039130717, "grad_norm": 2.264922857284546, "learning_rate": 1.175487611002397e-05, "loss": 0.2337, "step": 20485 }, { "epoch": 0.4439581392325526, "grad_norm": 1.2458019256591797, "learning_rate": 1.1751525370045685e-05, "loss": 0.1529, "step": 20490 }, { "epoch": 0.44406647455203346, "grad_norm": 1.483271598815918, "learning_rate": 1.1748174427179421e-05, "loss": 0.2327, "step": 20495 }, { "epoch": 0.4441748098715143, "grad_norm": 1.7489019632339478, "learning_rate": 1.1744823281813343e-05, "loss": 0.1751, "step": 20500 }, { "epoch": 0.44428314519099515, "grad_norm": 1.5055030584335327, "learning_rate": 1.174147193433562e-05, "loss": 0.2447, "step": 20505 }, { "epoch": 0.444391480510476, "grad_norm": 2.0124306678771973, "learning_rate": 1.1738120385134463e-05, "loss": 0.1939, "step": 20510 }, { "epoch": 0.4444998158299569, "grad_norm": 2.167490243911743, "learning_rate": 1.1734768634598094e-05, "loss": 0.1959, "step": 20515 }, { "epoch": 0.44460815114943775, "grad_norm": 2.1856930255889893, "learning_rate": 1.1731416683114768e-05, "loss": 0.1796, "step": 20520 }, { "epoch": 0.4447164864689186, "grad_norm": 1.5308562517166138, "learning_rate": 1.1728064531072757e-05, "loss": 0.1353, "step": 20525 }, { "epoch": 0.44482482178839944, "grad_norm": 2.024794578552246, "learning_rate": 1.1724712178860354e-05, "loss": 0.1595, "step": 20530 }, { "epoch": 0.4449331571078803, "grad_norm": 1.8053535223007202, "learning_rate": 1.1721359626865885e-05, "loss": 0.1733, "step": 20535 }, { "epoch": 0.4450414924273612, "grad_norm": 1.4772188663482666, "learning_rate": 1.1718006875477682e-05, "loss": 0.2583, "step": 20540 }, { "epoch": 0.44514982774684203, "grad_norm": 1.5408952236175537, "learning_rate": 1.1714653925084126e-05, "loss": 0.1888, "step": 20545 }, { "epoch": 0.4452581630663229, "grad_norm": 2.077342987060547, "learning_rate": 1.1711300776073595e-05, "loss": 0.2871, "step": 20550 }, { "epoch": 0.4453664983858037, "grad_norm": 1.5805248022079468, "learning_rate": 1.1707947428834503e-05, "loss": 0.1129, "step": 20555 }, { "epoch": 0.4454748337052846, "grad_norm": 1.8052772283554077, "learning_rate": 1.1704593883755287e-05, "loss": 0.1873, "step": 20560 }, { "epoch": 0.44558316902476547, "grad_norm": 1.7725156545639038, "learning_rate": 1.1701240141224406e-05, "loss": 0.2652, "step": 20565 }, { "epoch": 0.4456915043442463, "grad_norm": 1.4446715116500854, "learning_rate": 1.169788620163034e-05, "loss": 0.1733, "step": 20570 }, { "epoch": 0.44579983966372716, "grad_norm": 1.2935880422592163, "learning_rate": 1.1694532065361591e-05, "loss": 0.1726, "step": 20575 }, { "epoch": 0.445908174983208, "grad_norm": 1.7252732515335083, "learning_rate": 1.1691177732806685e-05, "loss": 0.2252, "step": 20580 }, { "epoch": 0.4460165103026889, "grad_norm": 0.8340908288955688, "learning_rate": 1.1687823204354172e-05, "loss": 0.2247, "step": 20585 }, { "epoch": 0.44612484562216975, "grad_norm": 1.7543375492095947, "learning_rate": 1.1684468480392624e-05, "loss": 0.1764, "step": 20590 }, { "epoch": 0.4462331809416506, "grad_norm": 1.5077452659606934, "learning_rate": 1.1681113561310638e-05, "loss": 0.2664, "step": 20595 }, { "epoch": 0.44634151626113144, "grad_norm": 1.5589981079101562, "learning_rate": 1.1677758447496827e-05, "loss": 0.238, "step": 20600 }, { "epoch": 0.4464498515806123, "grad_norm": 1.773184895515442, "learning_rate": 1.1674403139339836e-05, "loss": 0.193, "step": 20605 }, { "epoch": 0.4465581869000932, "grad_norm": 2.47342586517334, "learning_rate": 1.1671047637228322e-05, "loss": 0.1584, "step": 20610 }, { "epoch": 0.44666652221957404, "grad_norm": 1.7155311107635498, "learning_rate": 1.166769194155097e-05, "loss": 0.2042, "step": 20615 }, { "epoch": 0.4467748575390549, "grad_norm": 2.551389694213867, "learning_rate": 1.1664336052696489e-05, "loss": 0.1473, "step": 20620 }, { "epoch": 0.4468831928585357, "grad_norm": 1.408864140510559, "learning_rate": 1.1660979971053612e-05, "loss": 0.1326, "step": 20625 }, { "epoch": 0.4469915281780166, "grad_norm": 1.8348827362060547, "learning_rate": 1.1657623697011082e-05, "loss": 0.2968, "step": 20630 }, { "epoch": 0.4470998634974975, "grad_norm": 1.5034610033035278, "learning_rate": 1.165426723095768e-05, "loss": 0.2275, "step": 20635 }, { "epoch": 0.4472081988169783, "grad_norm": 1.2782686948776245, "learning_rate": 1.16509105732822e-05, "loss": 0.2129, "step": 20640 }, { "epoch": 0.44731653413645917, "grad_norm": 1.3724900484085083, "learning_rate": 1.1647553724373459e-05, "loss": 0.2343, "step": 20645 }, { "epoch": 0.44742486945594, "grad_norm": 1.7736705541610718, "learning_rate": 1.1644196684620302e-05, "loss": 0.157, "step": 20650 }, { "epoch": 0.44753320477542086, "grad_norm": 1.9843846559524536, "learning_rate": 1.164083945441159e-05, "loss": 0.1851, "step": 20655 }, { "epoch": 0.44764154009490176, "grad_norm": 1.4447063207626343, "learning_rate": 1.1637482034136203e-05, "loss": 0.0789, "step": 20660 }, { "epoch": 0.4477498754143826, "grad_norm": 1.205124855041504, "learning_rate": 1.1634124424183055e-05, "loss": 0.1614, "step": 20665 }, { "epoch": 0.44785821073386345, "grad_norm": 1.037827730178833, "learning_rate": 1.163076662494107e-05, "loss": 0.2325, "step": 20670 }, { "epoch": 0.4479665460533443, "grad_norm": 2.2822518348693848, "learning_rate": 1.1627408636799202e-05, "loss": 0.2128, "step": 20675 }, { "epoch": 0.4480748813728252, "grad_norm": 1.7075526714324951, "learning_rate": 1.162405046014642e-05, "loss": 0.1097, "step": 20680 }, { "epoch": 0.44818321669230604, "grad_norm": 3.1957757472991943, "learning_rate": 1.1620692095371719e-05, "loss": 0.2944, "step": 20685 }, { "epoch": 0.4482915520117869, "grad_norm": 1.9181411266326904, "learning_rate": 1.161733354286412e-05, "loss": 0.2013, "step": 20690 }, { "epoch": 0.44839988733126773, "grad_norm": 1.5084621906280518, "learning_rate": 1.161397480301266e-05, "loss": 0.1158, "step": 20695 }, { "epoch": 0.4485082226507486, "grad_norm": 1.6036548614501953, "learning_rate": 1.1610615876206395e-05, "loss": 0.2077, "step": 20700 }, { "epoch": 0.4486165579702295, "grad_norm": 3.055976390838623, "learning_rate": 1.1607256762834402e-05, "loss": 0.1661, "step": 20705 }, { "epoch": 0.4487248932897103, "grad_norm": 1.8013625144958496, "learning_rate": 1.1603897463285793e-05, "loss": 0.2367, "step": 20710 }, { "epoch": 0.44883322860919117, "grad_norm": 2.790752649307251, "learning_rate": 1.1600537977949694e-05, "loss": 0.1835, "step": 20715 }, { "epoch": 0.448941563928672, "grad_norm": 1.8943699598312378, "learning_rate": 1.159717830721524e-05, "loss": 0.1501, "step": 20720 }, { "epoch": 0.44904989924815286, "grad_norm": 1.6857153177261353, "learning_rate": 1.1593818451471608e-05, "loss": 0.2335, "step": 20725 }, { "epoch": 0.44915823456763376, "grad_norm": 2.167797327041626, "learning_rate": 1.1590458411107983e-05, "loss": 0.1331, "step": 20730 }, { "epoch": 0.4492665698871146, "grad_norm": 1.4917776584625244, "learning_rate": 1.1587098186513576e-05, "loss": 0.1574, "step": 20735 }, { "epoch": 0.44937490520659545, "grad_norm": 1.776200771331787, "learning_rate": 1.158373777807762e-05, "loss": 0.2027, "step": 20740 }, { "epoch": 0.4494832405260763, "grad_norm": 1.7672903537750244, "learning_rate": 1.1580377186189367e-05, "loss": 0.2333, "step": 20745 }, { "epoch": 0.44959157584555715, "grad_norm": 2.1205878257751465, "learning_rate": 1.1577016411238089e-05, "loss": 0.1931, "step": 20750 }, { "epoch": 0.44969991116503805, "grad_norm": 1.7292938232421875, "learning_rate": 1.1573655453613082e-05, "loss": 0.2239, "step": 20755 }, { "epoch": 0.4498082464845189, "grad_norm": 1.6356947422027588, "learning_rate": 1.1570294313703667e-05, "loss": 0.1122, "step": 20760 }, { "epoch": 0.44991658180399974, "grad_norm": 1.694224238395691, "learning_rate": 1.1566932991899178e-05, "loss": 0.2393, "step": 20765 }, { "epoch": 0.4500249171234806, "grad_norm": 0.9965943694114685, "learning_rate": 1.1563571488588975e-05, "loss": 0.1769, "step": 20770 }, { "epoch": 0.45013325244296143, "grad_norm": 1.2934879064559937, "learning_rate": 1.1560209804162437e-05, "loss": 0.1938, "step": 20775 }, { "epoch": 0.45024158776244233, "grad_norm": 1.9346857070922852, "learning_rate": 1.1556847939008966e-05, "loss": 0.1793, "step": 20780 }, { "epoch": 0.4503499230819232, "grad_norm": 1.3295103311538696, "learning_rate": 1.1553485893517981e-05, "loss": 0.1303, "step": 20785 }, { "epoch": 0.450458258401404, "grad_norm": 1.0927420854568481, "learning_rate": 1.1550123668078927e-05, "loss": 0.1272, "step": 20790 }, { "epoch": 0.45056659372088487, "grad_norm": 1.20215904712677, "learning_rate": 1.1546761263081267e-05, "loss": 0.2224, "step": 20795 }, { "epoch": 0.4506749290403657, "grad_norm": 1.744379997253418, "learning_rate": 1.1543398678914488e-05, "loss": 0.1467, "step": 20800 }, { "epoch": 0.4507832643598466, "grad_norm": 2.0303456783294678, "learning_rate": 1.1540035915968087e-05, "loss": 0.167, "step": 20805 }, { "epoch": 0.45089159967932746, "grad_norm": 1.2924267053604126, "learning_rate": 1.1536672974631597e-05, "loss": 0.1646, "step": 20810 }, { "epoch": 0.4509999349988083, "grad_norm": 1.585565209388733, "learning_rate": 1.1533309855294566e-05, "loss": 0.1603, "step": 20815 }, { "epoch": 0.45110827031828915, "grad_norm": 2.4723379611968994, "learning_rate": 1.1529946558346553e-05, "loss": 0.1801, "step": 20820 }, { "epoch": 0.45121660563777005, "grad_norm": 1.955614447593689, "learning_rate": 1.152658308417715e-05, "loss": 0.1791, "step": 20825 }, { "epoch": 0.4513249409572509, "grad_norm": 2.0126171112060547, "learning_rate": 1.1523219433175965e-05, "loss": 0.2193, "step": 20830 }, { "epoch": 0.45143327627673174, "grad_norm": 2.6805100440979004, "learning_rate": 1.1519855605732629e-05, "loss": 0.2938, "step": 20835 }, { "epoch": 0.4515416115962126, "grad_norm": 1.2801319360733032, "learning_rate": 1.1516491602236786e-05, "loss": 0.2585, "step": 20840 }, { "epoch": 0.45164994691569343, "grad_norm": 1.8311606645584106, "learning_rate": 1.1513127423078107e-05, "loss": 0.1542, "step": 20845 }, { "epoch": 0.45175828223517434, "grad_norm": 1.5442231893539429, "learning_rate": 1.1509763068646288e-05, "loss": 0.2537, "step": 20850 }, { "epoch": 0.4518666175546552, "grad_norm": 2.2030704021453857, "learning_rate": 1.150639853933103e-05, "loss": 0.2731, "step": 20855 }, { "epoch": 0.451974952874136, "grad_norm": 0.8808423280715942, "learning_rate": 1.1503033835522066e-05, "loss": 0.2457, "step": 20860 }, { "epoch": 0.4520832881936169, "grad_norm": 2.264357566833496, "learning_rate": 1.149966895760915e-05, "loss": 0.2752, "step": 20865 }, { "epoch": 0.4521916235130977, "grad_norm": 1.4502320289611816, "learning_rate": 1.1496303905982046e-05, "loss": 0.1458, "step": 20870 }, { "epoch": 0.4522999588325786, "grad_norm": 0.9883337616920471, "learning_rate": 1.1492938681030551e-05, "loss": 0.1899, "step": 20875 }, { "epoch": 0.45240829415205946, "grad_norm": 1.7882416248321533, "learning_rate": 1.1489573283144477e-05, "loss": 0.1933, "step": 20880 }, { "epoch": 0.4525166294715403, "grad_norm": 1.916590690612793, "learning_rate": 1.148620771271365e-05, "loss": 0.1586, "step": 20885 }, { "epoch": 0.45262496479102116, "grad_norm": 0.545600950717926, "learning_rate": 1.1482841970127922e-05, "loss": 0.1372, "step": 20890 }, { "epoch": 0.452733300110502, "grad_norm": 1.3925776481628418, "learning_rate": 1.1479476055777167e-05, "loss": 0.1912, "step": 20895 }, { "epoch": 0.4528416354299829, "grad_norm": 1.6945562362670898, "learning_rate": 1.1476109970051272e-05, "loss": 0.1828, "step": 20900 }, { "epoch": 0.45294997074946375, "grad_norm": 1.2364338636398315, "learning_rate": 1.147274371334015e-05, "loss": 0.202, "step": 20905 }, { "epoch": 0.4530583060689446, "grad_norm": 2.688868761062622, "learning_rate": 1.1469377286033729e-05, "loss": 0.2252, "step": 20910 }, { "epoch": 0.45316664138842544, "grad_norm": 1.5407127141952515, "learning_rate": 1.1466010688521962e-05, "loss": 0.1734, "step": 20915 }, { "epoch": 0.4532749767079063, "grad_norm": 1.0944278240203857, "learning_rate": 1.1462643921194819e-05, "loss": 0.2712, "step": 20920 }, { "epoch": 0.4533833120273872, "grad_norm": 1.9530049562454224, "learning_rate": 1.1459276984442283e-05, "loss": 0.2092, "step": 20925 }, { "epoch": 0.45349164734686803, "grad_norm": 1.2829689979553223, "learning_rate": 1.1455909878654372e-05, "loss": 0.1877, "step": 20930 }, { "epoch": 0.4535999826663489, "grad_norm": 2.2383954524993896, "learning_rate": 1.1452542604221113e-05, "loss": 0.2296, "step": 20935 }, { "epoch": 0.4537083179858297, "grad_norm": 1.1870620250701904, "learning_rate": 1.1449175161532551e-05, "loss": 0.1451, "step": 20940 }, { "epoch": 0.4538166533053106, "grad_norm": 2.785367250442505, "learning_rate": 1.1445807550978751e-05, "loss": 0.1805, "step": 20945 }, { "epoch": 0.45392498862479147, "grad_norm": 1.3865042924880981, "learning_rate": 1.144243977294981e-05, "loss": 0.1657, "step": 20950 }, { "epoch": 0.4540333239442723, "grad_norm": 0.7453527450561523, "learning_rate": 1.1439071827835826e-05, "loss": 0.1643, "step": 20955 }, { "epoch": 0.45414165926375316, "grad_norm": 0.656039297580719, "learning_rate": 1.1435703716026926e-05, "loss": 0.1185, "step": 20960 }, { "epoch": 0.454249994583234, "grad_norm": 1.9340317249298096, "learning_rate": 1.1432335437913262e-05, "loss": 0.1843, "step": 20965 }, { "epoch": 0.4543583299027149, "grad_norm": 1.2050212621688843, "learning_rate": 1.1428966993884991e-05, "loss": 0.1414, "step": 20970 }, { "epoch": 0.45446666522219575, "grad_norm": 0.9291523098945618, "learning_rate": 1.1425598384332302e-05, "loss": 0.2132, "step": 20975 }, { "epoch": 0.4545750005416766, "grad_norm": 2.0301706790924072, "learning_rate": 1.1422229609645394e-05, "loss": 0.196, "step": 20980 }, { "epoch": 0.45468333586115744, "grad_norm": 1.9810656309127808, "learning_rate": 1.1418860670214492e-05, "loss": 0.2291, "step": 20985 }, { "epoch": 0.4547916711806383, "grad_norm": 3.6370654106140137, "learning_rate": 1.1415491566429836e-05, "loss": 0.2109, "step": 20990 }, { "epoch": 0.4549000065001192, "grad_norm": 1.7237741947174072, "learning_rate": 1.1412122298681683e-05, "loss": 0.1841, "step": 20995 }, { "epoch": 0.45500834181960004, "grad_norm": 1.7542388439178467, "learning_rate": 1.1408752867360315e-05, "loss": 0.1764, "step": 21000 }, { "epoch": 0.4551166771390809, "grad_norm": 1.0871284008026123, "learning_rate": 1.1405383272856034e-05, "loss": 0.1397, "step": 21005 }, { "epoch": 0.45522501245856173, "grad_norm": 1.525464653968811, "learning_rate": 1.1402013515559154e-05, "loss": 0.192, "step": 21010 }, { "epoch": 0.4553333477780426, "grad_norm": 1.9893203973770142, "learning_rate": 1.1398643595860008e-05, "loss": 0.1892, "step": 21015 }, { "epoch": 0.4554416830975235, "grad_norm": 1.1673219203948975, "learning_rate": 1.1395273514148952e-05, "loss": 0.2194, "step": 21020 }, { "epoch": 0.4555500184170043, "grad_norm": 1.8803025484085083, "learning_rate": 1.1391903270816364e-05, "loss": 0.2107, "step": 21025 }, { "epoch": 0.45565835373648517, "grad_norm": 1.199450135231018, "learning_rate": 1.138853286625263e-05, "loss": 0.1915, "step": 21030 }, { "epoch": 0.455766689055966, "grad_norm": 1.5225549936294556, "learning_rate": 1.1385162300848164e-05, "loss": 0.1858, "step": 21035 }, { "epoch": 0.45587502437544686, "grad_norm": 1.5759103298187256, "learning_rate": 1.1381791574993397e-05, "loss": 0.2394, "step": 21040 }, { "epoch": 0.45598335969492776, "grad_norm": 1.7782617807388306, "learning_rate": 1.1378420689078773e-05, "loss": 0.1438, "step": 21045 }, { "epoch": 0.4560916950144086, "grad_norm": 1.856531023979187, "learning_rate": 1.1375049643494761e-05, "loss": 0.1664, "step": 21050 }, { "epoch": 0.45620003033388945, "grad_norm": 2.2554919719696045, "learning_rate": 1.137167843863185e-05, "loss": 0.2519, "step": 21055 }, { "epoch": 0.4563083656533703, "grad_norm": 1.938092827796936, "learning_rate": 1.1368307074880538e-05, "loss": 0.2181, "step": 21060 }, { "epoch": 0.45641670097285114, "grad_norm": 1.6878124475479126, "learning_rate": 1.1364935552631347e-05, "loss": 0.2057, "step": 21065 }, { "epoch": 0.45652503629233204, "grad_norm": 2.2105042934417725, "learning_rate": 1.1361563872274817e-05, "loss": 0.207, "step": 21070 }, { "epoch": 0.4566333716118129, "grad_norm": 0.9644080400466919, "learning_rate": 1.1358192034201512e-05, "loss": 0.1414, "step": 21075 }, { "epoch": 0.45674170693129373, "grad_norm": 1.5774930715560913, "learning_rate": 1.1354820038802003e-05, "loss": 0.1795, "step": 21080 }, { "epoch": 0.4568500422507746, "grad_norm": 1.1412333250045776, "learning_rate": 1.1351447886466891e-05, "loss": 0.1756, "step": 21085 }, { "epoch": 0.4569583775702555, "grad_norm": 2.161813974380493, "learning_rate": 1.1348075577586783e-05, "loss": 0.2273, "step": 21090 }, { "epoch": 0.4570667128897363, "grad_norm": 1.4544124603271484, "learning_rate": 1.1344703112552315e-05, "loss": 0.2724, "step": 21095 }, { "epoch": 0.45717504820921717, "grad_norm": 1.1730619668960571, "learning_rate": 1.1341330491754137e-05, "loss": 0.1574, "step": 21100 }, { "epoch": 0.457283383528698, "grad_norm": 0.9932999610900879, "learning_rate": 1.1337957715582912e-05, "loss": 0.128, "step": 21105 }, { "epoch": 0.45739171884817886, "grad_norm": 1.9285485744476318, "learning_rate": 1.1334584784429328e-05, "loss": 0.2388, "step": 21110 }, { "epoch": 0.45750005416765976, "grad_norm": 1.4442461729049683, "learning_rate": 1.1331211698684087e-05, "loss": 0.2382, "step": 21115 }, { "epoch": 0.4576083894871406, "grad_norm": 1.2633204460144043, "learning_rate": 1.1327838458737917e-05, "loss": 0.1316, "step": 21120 }, { "epoch": 0.45771672480662146, "grad_norm": 1.5461007356643677, "learning_rate": 1.132446506498155e-05, "loss": 0.1874, "step": 21125 }, { "epoch": 0.4578250601261023, "grad_norm": 1.69553804397583, "learning_rate": 1.1321091517805746e-05, "loss": 0.1466, "step": 21130 }, { "epoch": 0.45793339544558315, "grad_norm": 1.768660545349121, "learning_rate": 1.131771781760128e-05, "loss": 0.2743, "step": 21135 }, { "epoch": 0.45804173076506405, "grad_norm": 1.6407160758972168, "learning_rate": 1.1314343964758945e-05, "loss": 0.1869, "step": 21140 }, { "epoch": 0.4581500660845449, "grad_norm": 1.4282878637313843, "learning_rate": 1.1310969959669548e-05, "loss": 0.2294, "step": 21145 }, { "epoch": 0.45825840140402574, "grad_norm": 1.823672890663147, "learning_rate": 1.1307595802723922e-05, "loss": 0.178, "step": 21150 }, { "epoch": 0.4583667367235066, "grad_norm": 2.4801578521728516, "learning_rate": 1.1304221494312909e-05, "loss": 0.1766, "step": 21155 }, { "epoch": 0.45847507204298743, "grad_norm": 1.5190640687942505, "learning_rate": 1.1300847034827373e-05, "loss": 0.2407, "step": 21160 }, { "epoch": 0.45858340736246833, "grad_norm": 1.333453893661499, "learning_rate": 1.1297472424658194e-05, "loss": 0.1515, "step": 21165 }, { "epoch": 0.4586917426819492, "grad_norm": 2.3642923831939697, "learning_rate": 1.129409766419627e-05, "loss": 0.1726, "step": 21170 }, { "epoch": 0.45880007800143, "grad_norm": 1.6944255828857422, "learning_rate": 1.129072275383252e-05, "loss": 0.2108, "step": 21175 }, { "epoch": 0.45890841332091087, "grad_norm": 1.1311440467834473, "learning_rate": 1.1287347693957874e-05, "loss": 0.1154, "step": 21180 }, { "epoch": 0.4590167486403917, "grad_norm": 1.652513027191162, "learning_rate": 1.1283972484963282e-05, "loss": 0.237, "step": 21185 }, { "epoch": 0.4591250839598726, "grad_norm": 1.7295969724655151, "learning_rate": 1.1280597127239707e-05, "loss": 0.3139, "step": 21190 }, { "epoch": 0.45923341927935346, "grad_norm": 1.5405172109603882, "learning_rate": 1.1277221621178143e-05, "loss": 0.2486, "step": 21195 }, { "epoch": 0.4593417545988343, "grad_norm": 1.4487258195877075, "learning_rate": 1.1273845967169585e-05, "loss": 0.1835, "step": 21200 }, { "epoch": 0.45945008991831515, "grad_norm": 2.642927885055542, "learning_rate": 1.1270470165605054e-05, "loss": 0.1933, "step": 21205 }, { "epoch": 0.45955842523779605, "grad_norm": 1.8407763242721558, "learning_rate": 1.1267094216875584e-05, "loss": 0.1649, "step": 21210 }, { "epoch": 0.4596667605572769, "grad_norm": 3.043302059173584, "learning_rate": 1.1263718121372236e-05, "loss": 0.246, "step": 21215 }, { "epoch": 0.45977509587675774, "grad_norm": 1.3158797025680542, "learning_rate": 1.1260341879486071e-05, "loss": 0.2189, "step": 21220 }, { "epoch": 0.4598834311962386, "grad_norm": 1.3888906240463257, "learning_rate": 1.1256965491608178e-05, "loss": 0.2117, "step": 21225 }, { "epoch": 0.45999176651571944, "grad_norm": 1.619282841682434, "learning_rate": 1.1253588958129664e-05, "loss": 0.137, "step": 21230 }, { "epoch": 0.46010010183520034, "grad_norm": 2.878323554992676, "learning_rate": 1.1250212279441643e-05, "loss": 0.3217, "step": 21235 }, { "epoch": 0.4602084371546812, "grad_norm": 2.4732863903045654, "learning_rate": 1.1246835455935263e-05, "loss": 0.1576, "step": 21240 }, { "epoch": 0.46031677247416203, "grad_norm": 1.1930230855941772, "learning_rate": 1.1243458488001673e-05, "loss": 0.2706, "step": 21245 }, { "epoch": 0.4604251077936429, "grad_norm": 1.8859766721725464, "learning_rate": 1.1240081376032041e-05, "loss": 0.2242, "step": 21250 }, { "epoch": 0.4605334431131237, "grad_norm": 2.6833856105804443, "learning_rate": 1.1236704120417561e-05, "loss": 0.2173, "step": 21255 }, { "epoch": 0.4606417784326046, "grad_norm": 2.1588187217712402, "learning_rate": 1.1233326721549433e-05, "loss": 0.0928, "step": 21260 }, { "epoch": 0.46075011375208547, "grad_norm": 1.4173226356506348, "learning_rate": 1.122994917981888e-05, "loss": 0.2366, "step": 21265 }, { "epoch": 0.4608584490715663, "grad_norm": 1.3535534143447876, "learning_rate": 1.1226571495617139e-05, "loss": 0.1158, "step": 21270 }, { "epoch": 0.46096678439104716, "grad_norm": 1.2257355451583862, "learning_rate": 1.1223193669335464e-05, "loss": 0.1378, "step": 21275 }, { "epoch": 0.461075119710528, "grad_norm": 1.8373135328292847, "learning_rate": 1.1219815701365127e-05, "loss": 0.1551, "step": 21280 }, { "epoch": 0.4611834550300089, "grad_norm": 1.2847483158111572, "learning_rate": 1.121643759209741e-05, "loss": 0.1906, "step": 21285 }, { "epoch": 0.46129179034948975, "grad_norm": 1.745548129081726, "learning_rate": 1.1213059341923622e-05, "loss": 0.1705, "step": 21290 }, { "epoch": 0.4614001256689706, "grad_norm": 1.7455939054489136, "learning_rate": 1.1209680951235082e-05, "loss": 0.1687, "step": 21295 }, { "epoch": 0.46150846098845144, "grad_norm": 1.1872531175613403, "learning_rate": 1.1206302420423128e-05, "loss": 0.2053, "step": 21300 }, { "epoch": 0.4616167963079323, "grad_norm": 2.1036298274993896, "learning_rate": 1.1202923749879107e-05, "loss": 0.2276, "step": 21305 }, { "epoch": 0.4617251316274132, "grad_norm": 2.0850629806518555, "learning_rate": 1.1199544939994385e-05, "loss": 0.2136, "step": 21310 }, { "epoch": 0.46183346694689403, "grad_norm": 1.3017090559005737, "learning_rate": 1.1196165991160356e-05, "loss": 0.2454, "step": 21315 }, { "epoch": 0.4619418022663749, "grad_norm": 1.8469003438949585, "learning_rate": 1.119278690376841e-05, "loss": 0.2097, "step": 21320 }, { "epoch": 0.4620501375858557, "grad_norm": 1.3358296155929565, "learning_rate": 1.1189407678209974e-05, "loss": 0.2531, "step": 21325 }, { "epoch": 0.46215847290533657, "grad_norm": 1.6220604181289673, "learning_rate": 1.1186028314876476e-05, "loss": 0.1824, "step": 21330 }, { "epoch": 0.46226680822481747, "grad_norm": 1.6692126989364624, "learning_rate": 1.1182648814159363e-05, "loss": 0.1424, "step": 21335 }, { "epoch": 0.4623751435442983, "grad_norm": 1.6495980024337769, "learning_rate": 1.11792691764501e-05, "loss": 0.2469, "step": 21340 }, { "epoch": 0.46248347886377916, "grad_norm": 1.388160228729248, "learning_rate": 1.117588940214017e-05, "loss": 0.2365, "step": 21345 }, { "epoch": 0.46259181418326, "grad_norm": 1.6817206144332886, "learning_rate": 1.1172509491621067e-05, "loss": 0.2288, "step": 21350 }, { "epoch": 0.4627001495027409, "grad_norm": 1.023118495941162, "learning_rate": 1.1169129445284301e-05, "loss": 0.2579, "step": 21355 }, { "epoch": 0.46280848482222176, "grad_norm": 1.9010976552963257, "learning_rate": 1.1165749263521404e-05, "loss": 0.1774, "step": 21360 }, { "epoch": 0.4629168201417026, "grad_norm": 1.7458302974700928, "learning_rate": 1.1162368946723918e-05, "loss": 0.2144, "step": 21365 }, { "epoch": 0.46302515546118345, "grad_norm": 2.425935983657837, "learning_rate": 1.1158988495283403e-05, "loss": 0.1744, "step": 21370 }, { "epoch": 0.4631334907806643, "grad_norm": 1.3700650930404663, "learning_rate": 1.1155607909591432e-05, "loss": 0.1683, "step": 21375 }, { "epoch": 0.4632418261001452, "grad_norm": 1.7498817443847656, "learning_rate": 1.1152227190039596e-05, "loss": 0.139, "step": 21380 }, { "epoch": 0.46335016141962604, "grad_norm": 1.8391778469085693, "learning_rate": 1.1148846337019498e-05, "loss": 0.1925, "step": 21385 }, { "epoch": 0.4634584967391069, "grad_norm": 1.2004238367080688, "learning_rate": 1.1145465350922765e-05, "loss": 0.1048, "step": 21390 }, { "epoch": 0.46356683205858773, "grad_norm": 2.1099801063537598, "learning_rate": 1.1142084232141028e-05, "loss": 0.1369, "step": 21395 }, { "epoch": 0.4636751673780686, "grad_norm": 1.0603758096694946, "learning_rate": 1.1138702981065941e-05, "loss": 0.2468, "step": 21400 }, { "epoch": 0.4637835026975495, "grad_norm": 2.4194276332855225, "learning_rate": 1.113532159808917e-05, "loss": 0.2063, "step": 21405 }, { "epoch": 0.4638918380170303, "grad_norm": 1.6563427448272705, "learning_rate": 1.1131940083602401e-05, "loss": 0.3129, "step": 21410 }, { "epoch": 0.46400017333651117, "grad_norm": 1.637131690979004, "learning_rate": 1.112855843799733e-05, "loss": 0.1836, "step": 21415 }, { "epoch": 0.464108508655992, "grad_norm": 2.276238203048706, "learning_rate": 1.1125176661665673e-05, "loss": 0.1681, "step": 21420 }, { "epoch": 0.46421684397547286, "grad_norm": 1.848861813545227, "learning_rate": 1.1121794754999151e-05, "loss": 0.1782, "step": 21425 }, { "epoch": 0.46432517929495376, "grad_norm": 1.052605152130127, "learning_rate": 1.1118412718389511e-05, "loss": 0.2261, "step": 21430 }, { "epoch": 0.4644335146144346, "grad_norm": 1.2876272201538086, "learning_rate": 1.1115030552228512e-05, "loss": 0.145, "step": 21435 }, { "epoch": 0.46454184993391545, "grad_norm": 1.5573643445968628, "learning_rate": 1.1111648256907925e-05, "loss": 0.257, "step": 21440 }, { "epoch": 0.4646501852533963, "grad_norm": 1.405585765838623, "learning_rate": 1.1108265832819542e-05, "loss": 0.2528, "step": 21445 }, { "epoch": 0.46475852057287714, "grad_norm": 2.236433267593384, "learning_rate": 1.1104883280355164e-05, "loss": 0.156, "step": 21450 }, { "epoch": 0.46486685589235804, "grad_norm": 1.9464596509933472, "learning_rate": 1.110150059990661e-05, "loss": 0.2225, "step": 21455 }, { "epoch": 0.4649751912118389, "grad_norm": 2.166733503341675, "learning_rate": 1.109811779186571e-05, "loss": 0.2653, "step": 21460 }, { "epoch": 0.46508352653131974, "grad_norm": 1.883748173713684, "learning_rate": 1.1094734856624313e-05, "loss": 0.2364, "step": 21465 }, { "epoch": 0.4651918618508006, "grad_norm": 1.5165224075317383, "learning_rate": 1.1091351794574285e-05, "loss": 0.1053, "step": 21470 }, { "epoch": 0.4653001971702815, "grad_norm": 1.0724684000015259, "learning_rate": 1.1087968606107495e-05, "loss": 0.1215, "step": 21475 }, { "epoch": 0.4654085324897623, "grad_norm": 1.8814160823822021, "learning_rate": 1.1084585291615844e-05, "loss": 0.245, "step": 21480 }, { "epoch": 0.4655168678092432, "grad_norm": 2.0914459228515625, "learning_rate": 1.1081201851491235e-05, "loss": 0.2429, "step": 21485 }, { "epoch": 0.465625203128724, "grad_norm": 1.9619704484939575, "learning_rate": 1.1077818286125585e-05, "loss": 0.1466, "step": 21490 }, { "epoch": 0.46573353844820486, "grad_norm": 1.8678829669952393, "learning_rate": 1.1074434595910833e-05, "loss": 0.2328, "step": 21495 }, { "epoch": 0.46584187376768577, "grad_norm": 2.0553994178771973, "learning_rate": 1.1071050781238931e-05, "loss": 0.1886, "step": 21500 }, { "epoch": 0.4659502090871666, "grad_norm": 1.3086072206497192, "learning_rate": 1.1067666842501837e-05, "loss": 0.1815, "step": 21505 }, { "epoch": 0.46605854440664746, "grad_norm": 1.8405039310455322, "learning_rate": 1.1064282780091533e-05, "loss": 0.2009, "step": 21510 }, { "epoch": 0.4661668797261283, "grad_norm": 2.1866514682769775, "learning_rate": 1.1060898594400013e-05, "loss": 0.1925, "step": 21515 }, { "epoch": 0.46627521504560915, "grad_norm": 1.2898224592208862, "learning_rate": 1.1057514285819281e-05, "loss": 0.2019, "step": 21520 }, { "epoch": 0.46638355036509005, "grad_norm": 1.405748963356018, "learning_rate": 1.1054129854741363e-05, "loss": 0.1417, "step": 21525 }, { "epoch": 0.4664918856845709, "grad_norm": 1.243212342262268, "learning_rate": 1.105074530155829e-05, "loss": 0.2345, "step": 21530 }, { "epoch": 0.46660022100405174, "grad_norm": 1.2003512382507324, "learning_rate": 1.1047360626662116e-05, "loss": 0.1443, "step": 21535 }, { "epoch": 0.4667085563235326, "grad_norm": 2.099292516708374, "learning_rate": 1.1043975830444904e-05, "loss": 0.2803, "step": 21540 }, { "epoch": 0.46681689164301343, "grad_norm": 1.9396634101867676, "learning_rate": 1.104059091329873e-05, "loss": 0.178, "step": 21545 }, { "epoch": 0.46692522696249433, "grad_norm": 1.700528860092163, "learning_rate": 1.1037205875615685e-05, "loss": 0.2834, "step": 21550 }, { "epoch": 0.4670335622819752, "grad_norm": 1.950739860534668, "learning_rate": 1.1033820717787875e-05, "loss": 0.1722, "step": 21555 }, { "epoch": 0.467141897601456, "grad_norm": 1.7964059114456177, "learning_rate": 1.1030435440207425e-05, "loss": 0.2668, "step": 21560 }, { "epoch": 0.46725023292093687, "grad_norm": 2.465181589126587, "learning_rate": 1.1027050043266465e-05, "loss": 0.2151, "step": 21565 }, { "epoch": 0.4673585682404177, "grad_norm": 2.100797414779663, "learning_rate": 1.102366452735714e-05, "loss": 0.2037, "step": 21570 }, { "epoch": 0.4674669035598986, "grad_norm": 2.2423908710479736, "learning_rate": 1.1020278892871616e-05, "loss": 0.1829, "step": 21575 }, { "epoch": 0.46757523887937946, "grad_norm": 3.006462812423706, "learning_rate": 1.1016893140202068e-05, "loss": 0.1937, "step": 21580 }, { "epoch": 0.4676835741988603, "grad_norm": 1.5795437097549438, "learning_rate": 1.101350726974068e-05, "loss": 0.1961, "step": 21585 }, { "epoch": 0.46779190951834115, "grad_norm": 2.047828435897827, "learning_rate": 1.1010121281879658e-05, "loss": 0.2433, "step": 21590 }, { "epoch": 0.467900244837822, "grad_norm": 1.1860957145690918, "learning_rate": 1.1006735177011216e-05, "loss": 0.2088, "step": 21595 }, { "epoch": 0.4680085801573029, "grad_norm": 1.7393074035644531, "learning_rate": 1.1003348955527585e-05, "loss": 0.3087, "step": 21600 }, { "epoch": 0.46811691547678375, "grad_norm": 2.60239577293396, "learning_rate": 1.0999962617821008e-05, "loss": 0.1936, "step": 21605 }, { "epoch": 0.4682252507962646, "grad_norm": 1.4511635303497314, "learning_rate": 1.0996576164283741e-05, "loss": 0.1476, "step": 21610 }, { "epoch": 0.46833358611574544, "grad_norm": 1.6920284032821655, "learning_rate": 1.0993189595308055e-05, "loss": 0.1486, "step": 21615 }, { "epoch": 0.46844192143522634, "grad_norm": 2.715566396713257, "learning_rate": 1.0989802911286232e-05, "loss": 0.2249, "step": 21620 }, { "epoch": 0.4685502567547072, "grad_norm": 1.9261205196380615, "learning_rate": 1.098641611261057e-05, "loss": 0.1454, "step": 21625 }, { "epoch": 0.46865859207418803, "grad_norm": 1.779605746269226, "learning_rate": 1.0983029199673376e-05, "loss": 0.1504, "step": 21630 }, { "epoch": 0.4687669273936689, "grad_norm": 2.556809663772583, "learning_rate": 1.0979642172866975e-05, "loss": 0.2177, "step": 21635 }, { "epoch": 0.4688752627131497, "grad_norm": 1.0354288816452026, "learning_rate": 1.0976255032583705e-05, "loss": 0.2139, "step": 21640 }, { "epoch": 0.4689835980326306, "grad_norm": 1.9916706085205078, "learning_rate": 1.097286777921591e-05, "loss": 0.3181, "step": 21645 }, { "epoch": 0.46909193335211147, "grad_norm": 1.68819260597229, "learning_rate": 1.0969480413155959e-05, "loss": 0.2511, "step": 21650 }, { "epoch": 0.4692002686715923, "grad_norm": 1.571518898010254, "learning_rate": 1.0966092934796226e-05, "loss": 0.2471, "step": 21655 }, { "epoch": 0.46930860399107316, "grad_norm": 1.4677278995513916, "learning_rate": 1.0962705344529099e-05, "loss": 0.1563, "step": 21660 }, { "epoch": 0.469416939310554, "grad_norm": 1.6224406957626343, "learning_rate": 1.0959317642746978e-05, "loss": 0.2479, "step": 21665 }, { "epoch": 0.4695252746300349, "grad_norm": 1.7730857133865356, "learning_rate": 1.0955929829842275e-05, "loss": 0.1607, "step": 21670 }, { "epoch": 0.46963360994951575, "grad_norm": 2.3292593955993652, "learning_rate": 1.0952541906207426e-05, "loss": 0.227, "step": 21675 }, { "epoch": 0.4697419452689966, "grad_norm": 1.5159144401550293, "learning_rate": 1.0949153872234866e-05, "loss": 0.225, "step": 21680 }, { "epoch": 0.46985028058847744, "grad_norm": 2.6304805278778076, "learning_rate": 1.0945765728317047e-05, "loss": 0.2033, "step": 21685 }, { "epoch": 0.4699586159079583, "grad_norm": 1.7923510074615479, "learning_rate": 1.0942377474846436e-05, "loss": 0.2225, "step": 21690 }, { "epoch": 0.4700669512274392, "grad_norm": 1.7915005683898926, "learning_rate": 1.0938989112215507e-05, "loss": 0.2554, "step": 21695 }, { "epoch": 0.47017528654692003, "grad_norm": 1.1725881099700928, "learning_rate": 1.0935600640816763e-05, "loss": 0.1742, "step": 21700 }, { "epoch": 0.4702836218664009, "grad_norm": 1.8635276556015015, "learning_rate": 1.0932212061042698e-05, "loss": 0.2828, "step": 21705 }, { "epoch": 0.4703919571858817, "grad_norm": 2.076226234436035, "learning_rate": 1.0928823373285828e-05, "loss": 0.1682, "step": 21710 }, { "epoch": 0.47050029250536257, "grad_norm": 2.644501209259033, "learning_rate": 1.0925434577938684e-05, "loss": 0.2245, "step": 21715 }, { "epoch": 0.4706086278248435, "grad_norm": 1.3276981115341187, "learning_rate": 1.0922045675393807e-05, "loss": 0.146, "step": 21720 }, { "epoch": 0.4707169631443243, "grad_norm": 2.0974795818328857, "learning_rate": 1.0918656666043752e-05, "loss": 0.2569, "step": 21725 }, { "epoch": 0.47082529846380516, "grad_norm": 1.9771851301193237, "learning_rate": 1.0915267550281083e-05, "loss": 0.1942, "step": 21730 }, { "epoch": 0.470933633783286, "grad_norm": 1.4033089876174927, "learning_rate": 1.091187832849838e-05, "loss": 0.2173, "step": 21735 }, { "epoch": 0.4710419691027669, "grad_norm": 1.4854484796524048, "learning_rate": 1.0908489001088235e-05, "loss": 0.1296, "step": 21740 }, { "epoch": 0.47115030442224776, "grad_norm": 1.3836315870285034, "learning_rate": 1.0905099568443242e-05, "loss": 0.2176, "step": 21745 }, { "epoch": 0.4712586397417286, "grad_norm": 1.6934869289398193, "learning_rate": 1.0901710030956027e-05, "loss": 0.2426, "step": 21750 }, { "epoch": 0.47136697506120945, "grad_norm": 3.0213449001312256, "learning_rate": 1.0898320389019212e-05, "loss": 0.1919, "step": 21755 }, { "epoch": 0.4714753103806903, "grad_norm": 1.9994758367538452, "learning_rate": 1.0894930643025436e-05, "loss": 0.2096, "step": 21760 }, { "epoch": 0.4715836457001712, "grad_norm": 0.9842442870140076, "learning_rate": 1.089154079336735e-05, "loss": 0.1961, "step": 21765 }, { "epoch": 0.47169198101965204, "grad_norm": 1.0074145793914795, "learning_rate": 1.0888150840437618e-05, "loss": 0.1496, "step": 21770 }, { "epoch": 0.4718003163391329, "grad_norm": 1.780756950378418, "learning_rate": 1.0884760784628918e-05, "loss": 0.2548, "step": 21775 }, { "epoch": 0.47190865165861373, "grad_norm": 2.0495660305023193, "learning_rate": 1.0881370626333936e-05, "loss": 0.2554, "step": 21780 }, { "epoch": 0.4720169869780946, "grad_norm": 1.3457400798797607, "learning_rate": 1.0877980365945369e-05, "loss": 0.1723, "step": 21785 }, { "epoch": 0.4721253222975755, "grad_norm": 1.3256614208221436, "learning_rate": 1.0874590003855925e-05, "loss": 0.191, "step": 21790 }, { "epoch": 0.4722336576170563, "grad_norm": 2.1248812675476074, "learning_rate": 1.0871199540458334e-05, "loss": 0.1858, "step": 21795 }, { "epoch": 0.47234199293653717, "grad_norm": 1.5204960107803345, "learning_rate": 1.0867808976145327e-05, "loss": 0.2576, "step": 21800 }, { "epoch": 0.472450328256018, "grad_norm": 1.2593671083450317, "learning_rate": 1.0864418311309648e-05, "loss": 0.2409, "step": 21805 }, { "epoch": 0.47255866357549886, "grad_norm": 1.235636830329895, "learning_rate": 1.0861027546344058e-05, "loss": 0.1744, "step": 21810 }, { "epoch": 0.47266699889497976, "grad_norm": 2.0251524448394775, "learning_rate": 1.0857636681641322e-05, "loss": 0.2787, "step": 21815 }, { "epoch": 0.4727753342144606, "grad_norm": 1.8833696842193604, "learning_rate": 1.0854245717594229e-05, "loss": 0.1916, "step": 21820 }, { "epoch": 0.47288366953394145, "grad_norm": 1.5607441663742065, "learning_rate": 1.0850854654595565e-05, "loss": 0.191, "step": 21825 }, { "epoch": 0.4729920048534223, "grad_norm": 1.9521145820617676, "learning_rate": 1.0847463493038132e-05, "loss": 0.2857, "step": 21830 }, { "epoch": 0.47310034017290314, "grad_norm": 2.4766457080841064, "learning_rate": 1.0844072233314751e-05, "loss": 0.1645, "step": 21835 }, { "epoch": 0.47320867549238405, "grad_norm": 1.6299034357070923, "learning_rate": 1.0840680875818242e-05, "loss": 0.2594, "step": 21840 }, { "epoch": 0.4733170108118649, "grad_norm": 1.9453566074371338, "learning_rate": 1.083728942094145e-05, "loss": 0.1916, "step": 21845 }, { "epoch": 0.47342534613134574, "grad_norm": 2.106499433517456, "learning_rate": 1.0833897869077222e-05, "loss": 0.1658, "step": 21850 }, { "epoch": 0.4735336814508266, "grad_norm": 1.7864142656326294, "learning_rate": 1.0830506220618415e-05, "loss": 0.2949, "step": 21855 }, { "epoch": 0.47364201677030743, "grad_norm": 2.092763662338257, "learning_rate": 1.0827114475957905e-05, "loss": 0.1611, "step": 21860 }, { "epoch": 0.47375035208978833, "grad_norm": 1.5083411931991577, "learning_rate": 1.0823722635488574e-05, "loss": 0.1609, "step": 21865 }, { "epoch": 0.4738586874092692, "grad_norm": 0.9410499930381775, "learning_rate": 1.0820330699603315e-05, "loss": 0.1838, "step": 21870 }, { "epoch": 0.47396702272875, "grad_norm": 1.415337085723877, "learning_rate": 1.0816938668695031e-05, "loss": 0.1826, "step": 21875 }, { "epoch": 0.47407535804823087, "grad_norm": 1.314255952835083, "learning_rate": 1.0813546543156642e-05, "loss": 0.1905, "step": 21880 }, { "epoch": 0.47418369336771177, "grad_norm": 2.400757074356079, "learning_rate": 1.0810154323381068e-05, "loss": 0.1513, "step": 21885 }, { "epoch": 0.4742920286871926, "grad_norm": 1.7405714988708496, "learning_rate": 1.0806762009761256e-05, "loss": 0.1694, "step": 21890 }, { "epoch": 0.47440036400667346, "grad_norm": 1.5440833568572998, "learning_rate": 1.080336960269015e-05, "loss": 0.2079, "step": 21895 }, { "epoch": 0.4745086993261543, "grad_norm": 1.9471030235290527, "learning_rate": 1.079997710256071e-05, "loss": 0.1538, "step": 21900 }, { "epoch": 0.47461703464563515, "grad_norm": 1.8528331518173218, "learning_rate": 1.079658450976591e-05, "loss": 0.2777, "step": 21905 }, { "epoch": 0.47472536996511605, "grad_norm": 2.5002760887145996, "learning_rate": 1.079319182469872e-05, "loss": 0.182, "step": 21910 }, { "epoch": 0.4748337052845969, "grad_norm": 1.8201277256011963, "learning_rate": 1.0789799047752145e-05, "loss": 0.1733, "step": 21915 }, { "epoch": 0.47494204060407774, "grad_norm": 1.8881051540374756, "learning_rate": 1.078640617931918e-05, "loss": 0.1094, "step": 21920 }, { "epoch": 0.4750503759235586, "grad_norm": 1.6921287775039673, "learning_rate": 1.078301321979284e-05, "loss": 0.1537, "step": 21925 }, { "epoch": 0.47515871124303943, "grad_norm": 2.054672956466675, "learning_rate": 1.0779620169566146e-05, "loss": 0.2875, "step": 21930 }, { "epoch": 0.47526704656252033, "grad_norm": 1.5124660730361938, "learning_rate": 1.0776227029032133e-05, "loss": 0.22, "step": 21935 }, { "epoch": 0.4753753818820012, "grad_norm": 1.6920498609542847, "learning_rate": 1.077283379858385e-05, "loss": 0.2162, "step": 21940 }, { "epoch": 0.475483717201482, "grad_norm": 1.7214100360870361, "learning_rate": 1.0769440478614347e-05, "loss": 0.1686, "step": 21945 }, { "epoch": 0.47559205252096287, "grad_norm": 1.6676005125045776, "learning_rate": 1.0766047069516692e-05, "loss": 0.197, "step": 21950 }, { "epoch": 0.4757003878404437, "grad_norm": 2.056166410446167, "learning_rate": 1.0762653571683958e-05, "loss": 0.2318, "step": 21955 }, { "epoch": 0.4758087231599246, "grad_norm": 2.3275153636932373, "learning_rate": 1.0759259985509232e-05, "loss": 0.1382, "step": 21960 }, { "epoch": 0.47591705847940546, "grad_norm": 1.8077178001403809, "learning_rate": 1.075586631138561e-05, "loss": 0.2146, "step": 21965 }, { "epoch": 0.4760253937988863, "grad_norm": 2.2927186489105225, "learning_rate": 1.0752472549706201e-05, "loss": 0.1915, "step": 21970 }, { "epoch": 0.47613372911836715, "grad_norm": 1.7620185613632202, "learning_rate": 1.0749078700864117e-05, "loss": 0.2264, "step": 21975 }, { "epoch": 0.476242064437848, "grad_norm": 0.7588160634040833, "learning_rate": 1.074568476525249e-05, "loss": 0.1039, "step": 21980 }, { "epoch": 0.4763503997573289, "grad_norm": 1.0885462760925293, "learning_rate": 1.074229074326445e-05, "loss": 0.1809, "step": 21985 }, { "epoch": 0.47645873507680975, "grad_norm": 1.371910572052002, "learning_rate": 1.0738896635293146e-05, "loss": 0.215, "step": 21990 }, { "epoch": 0.4765670703962906, "grad_norm": 0.9645897150039673, "learning_rate": 1.0735502441731737e-05, "loss": 0.1833, "step": 21995 }, { "epoch": 0.47667540571577144, "grad_norm": 1.2859383821487427, "learning_rate": 1.073210816297339e-05, "loss": 0.2224, "step": 22000 }, { "epoch": 0.47678374103525234, "grad_norm": 1.9038275480270386, "learning_rate": 1.0728713799411274e-05, "loss": 0.1961, "step": 22005 }, { "epoch": 0.4768920763547332, "grad_norm": 1.5870914459228516, "learning_rate": 1.0725319351438585e-05, "loss": 0.1567, "step": 22010 }, { "epoch": 0.47700041167421403, "grad_norm": 0.4360608160495758, "learning_rate": 1.0721924819448513e-05, "loss": 0.1718, "step": 22015 }, { "epoch": 0.4771087469936949, "grad_norm": 2.4375953674316406, "learning_rate": 1.0718530203834265e-05, "loss": 0.1737, "step": 22020 }, { "epoch": 0.4772170823131757, "grad_norm": 2.1380701065063477, "learning_rate": 1.071513550498906e-05, "loss": 0.1295, "step": 22025 }, { "epoch": 0.4773254176326566, "grad_norm": 1.2288352251052856, "learning_rate": 1.0711740723306115e-05, "loss": 0.2129, "step": 22030 }, { "epoch": 0.47743375295213747, "grad_norm": 2.3529019355773926, "learning_rate": 1.070834585917867e-05, "loss": 0.2172, "step": 22035 }, { "epoch": 0.4775420882716183, "grad_norm": 2.4015073776245117, "learning_rate": 1.070495091299997e-05, "loss": 0.2225, "step": 22040 }, { "epoch": 0.47765042359109916, "grad_norm": 1.6925874948501587, "learning_rate": 1.0701555885163263e-05, "loss": 0.1987, "step": 22045 }, { "epoch": 0.47775875891058, "grad_norm": 1.6988122463226318, "learning_rate": 1.069816077606182e-05, "loss": 0.2195, "step": 22050 }, { "epoch": 0.4778670942300609, "grad_norm": 1.87235426902771, "learning_rate": 1.0694765586088907e-05, "loss": 0.2753, "step": 22055 }, { "epoch": 0.47797542954954175, "grad_norm": 1.6849141120910645, "learning_rate": 1.069137031563781e-05, "loss": 0.2588, "step": 22060 }, { "epoch": 0.4780837648690226, "grad_norm": 1.1536140441894531, "learning_rate": 1.0687974965101819e-05, "loss": 0.2324, "step": 22065 }, { "epoch": 0.47819210018850344, "grad_norm": 2.8024516105651855, "learning_rate": 1.0684579534874234e-05, "loss": 0.2052, "step": 22070 }, { "epoch": 0.4783004355079843, "grad_norm": 1.8390246629714966, "learning_rate": 1.0681184025348364e-05, "loss": 0.2894, "step": 22075 }, { "epoch": 0.4784087708274652, "grad_norm": 1.888909935951233, "learning_rate": 1.0677788436917526e-05, "loss": 0.167, "step": 22080 }, { "epoch": 0.47851710614694604, "grad_norm": 1.566853404045105, "learning_rate": 1.0674392769975055e-05, "loss": 0.1726, "step": 22085 }, { "epoch": 0.4786254414664269, "grad_norm": 2.416740655899048, "learning_rate": 1.0670997024914282e-05, "loss": 0.1858, "step": 22090 }, { "epoch": 0.4787337767859077, "grad_norm": 1.7742063999176025, "learning_rate": 1.0667601202128557e-05, "loss": 0.1901, "step": 22095 }, { "epoch": 0.4788421121053886, "grad_norm": 1.9463857412338257, "learning_rate": 1.0664205302011233e-05, "loss": 0.154, "step": 22100 }, { "epoch": 0.4789504474248695, "grad_norm": 2.7909319400787354, "learning_rate": 1.0660809324955675e-05, "loss": 0.1378, "step": 22105 }, { "epoch": 0.4790587827443503, "grad_norm": 1.7233439683914185, "learning_rate": 1.0657413271355254e-05, "loss": 0.1859, "step": 22110 }, { "epoch": 0.47916711806383117, "grad_norm": 2.058258295059204, "learning_rate": 1.0654017141603354e-05, "loss": 0.2121, "step": 22115 }, { "epoch": 0.479275453383312, "grad_norm": 1.1712837219238281, "learning_rate": 1.0650620936093365e-05, "loss": 0.1678, "step": 22120 }, { "epoch": 0.47938378870279286, "grad_norm": 2.0081357955932617, "learning_rate": 1.0647224655218688e-05, "loss": 0.2443, "step": 22125 }, { "epoch": 0.47949212402227376, "grad_norm": 1.4261858463287354, "learning_rate": 1.0643828299372729e-05, "loss": 0.0976, "step": 22130 }, { "epoch": 0.4796004593417546, "grad_norm": 1.3040066957473755, "learning_rate": 1.0640431868948905e-05, "loss": 0.1997, "step": 22135 }, { "epoch": 0.47970879466123545, "grad_norm": 1.8219774961471558, "learning_rate": 1.0637035364340647e-05, "loss": 0.1971, "step": 22140 }, { "epoch": 0.4798171299807163, "grad_norm": 1.548091173171997, "learning_rate": 1.0633638785941382e-05, "loss": 0.2339, "step": 22145 }, { "epoch": 0.4799254653001972, "grad_norm": 1.5909487009048462, "learning_rate": 1.0630242134144556e-05, "loss": 0.2109, "step": 22150 }, { "epoch": 0.48003380061967804, "grad_norm": 1.1618311405181885, "learning_rate": 1.0626845409343618e-05, "loss": 0.234, "step": 22155 }, { "epoch": 0.4801421359391589, "grad_norm": 2.0501174926757812, "learning_rate": 1.0623448611932033e-05, "loss": 0.1613, "step": 22160 }, { "epoch": 0.48025047125863973, "grad_norm": 1.969367265701294, "learning_rate": 1.062005174230326e-05, "loss": 0.282, "step": 22165 }, { "epoch": 0.4803588065781206, "grad_norm": 1.2322684526443481, "learning_rate": 1.0616654800850787e-05, "loss": 0.1796, "step": 22170 }, { "epoch": 0.4804671418976015, "grad_norm": 1.6516668796539307, "learning_rate": 1.0613257787968087e-05, "loss": 0.1757, "step": 22175 }, { "epoch": 0.4805754772170823, "grad_norm": 2.331975221633911, "learning_rate": 1.0609860704048662e-05, "loss": 0.1788, "step": 22180 }, { "epoch": 0.48068381253656317, "grad_norm": 1.2895749807357788, "learning_rate": 1.0606463549486015e-05, "loss": 0.2109, "step": 22185 }, { "epoch": 0.480792147856044, "grad_norm": 1.2284672260284424, "learning_rate": 1.0603066324673645e-05, "loss": 0.1834, "step": 22190 }, { "epoch": 0.48090048317552486, "grad_norm": 1.7239446640014648, "learning_rate": 1.0599669030005078e-05, "loss": 0.1759, "step": 22195 }, { "epoch": 0.48100881849500576, "grad_norm": 1.010333776473999, "learning_rate": 1.0596271665873831e-05, "loss": 0.1947, "step": 22200 }, { "epoch": 0.4811171538144866, "grad_norm": 1.733668327331543, "learning_rate": 1.059287423267345e-05, "loss": 0.1655, "step": 22205 }, { "epoch": 0.48122548913396745, "grad_norm": 1.2040826082229614, "learning_rate": 1.0589476730797467e-05, "loss": 0.2101, "step": 22210 }, { "epoch": 0.4813338244534483, "grad_norm": 2.0567150115966797, "learning_rate": 1.0586079160639435e-05, "loss": 0.17, "step": 22215 }, { "epoch": 0.48144215977292915, "grad_norm": 1.8121678829193115, "learning_rate": 1.0582681522592915e-05, "loss": 0.2476, "step": 22220 }, { "epoch": 0.48155049509241005, "grad_norm": 1.904815435409546, "learning_rate": 1.0579283817051466e-05, "loss": 0.2028, "step": 22225 }, { "epoch": 0.4816588304118909, "grad_norm": 1.9382078647613525, "learning_rate": 1.0575886044408665e-05, "loss": 0.1913, "step": 22230 }, { "epoch": 0.48176716573137174, "grad_norm": 2.116909980773926, "learning_rate": 1.0572488205058094e-05, "loss": 0.2078, "step": 22235 }, { "epoch": 0.4818755010508526, "grad_norm": 1.647963047027588, "learning_rate": 1.056909029939334e-05, "loss": 0.1537, "step": 22240 }, { "epoch": 0.48198383637033343, "grad_norm": 1.7502458095550537, "learning_rate": 1.0565692327808e-05, "loss": 0.1972, "step": 22245 }, { "epoch": 0.48209217168981433, "grad_norm": 2.1316845417022705, "learning_rate": 1.0562294290695674e-05, "loss": 0.184, "step": 22250 }, { "epoch": 0.4822005070092952, "grad_norm": 1.6599348783493042, "learning_rate": 1.0558896188449981e-05, "loss": 0.2507, "step": 22255 }, { "epoch": 0.482308842328776, "grad_norm": 3.6444613933563232, "learning_rate": 1.0555498021464537e-05, "loss": 0.2206, "step": 22260 }, { "epoch": 0.48241717764825687, "grad_norm": 0.7236661314964294, "learning_rate": 1.0552099790132968e-05, "loss": 0.1234, "step": 22265 }, { "epoch": 0.48252551296773777, "grad_norm": 2.129220485687256, "learning_rate": 1.054870149484891e-05, "loss": 0.1627, "step": 22270 }, { "epoch": 0.4826338482872186, "grad_norm": 2.041130542755127, "learning_rate": 1.0545303136006002e-05, "loss": 0.2377, "step": 22275 }, { "epoch": 0.48274218360669946, "grad_norm": 1.379555583000183, "learning_rate": 1.0541904713997894e-05, "loss": 0.1742, "step": 22280 }, { "epoch": 0.4828505189261803, "grad_norm": 1.8738446235656738, "learning_rate": 1.0538506229218244e-05, "loss": 0.1843, "step": 22285 }, { "epoch": 0.48295885424566115, "grad_norm": 1.50621497631073, "learning_rate": 1.0535107682060712e-05, "loss": 0.1983, "step": 22290 }, { "epoch": 0.48306718956514205, "grad_norm": 1.4693461656570435, "learning_rate": 1.0531709072918972e-05, "loss": 0.1682, "step": 22295 }, { "epoch": 0.4831755248846229, "grad_norm": 2.3393685817718506, "learning_rate": 1.0528310402186701e-05, "loss": 0.2143, "step": 22300 }, { "epoch": 0.48328386020410374, "grad_norm": 2.238175392150879, "learning_rate": 1.0524911670257588e-05, "loss": 0.1828, "step": 22305 }, { "epoch": 0.4833921955235846, "grad_norm": 1.1034862995147705, "learning_rate": 1.0521512877525315e-05, "loss": 0.2104, "step": 22310 }, { "epoch": 0.48350053084306543, "grad_norm": 1.9716942310333252, "learning_rate": 1.051811402438359e-05, "loss": 0.2111, "step": 22315 }, { "epoch": 0.48360886616254634, "grad_norm": 1.292386770248413, "learning_rate": 1.0514715111226114e-05, "loss": 0.2565, "step": 22320 }, { "epoch": 0.4837172014820272, "grad_norm": 2.0832252502441406, "learning_rate": 1.0511316138446608e-05, "loss": 0.2854, "step": 22325 }, { "epoch": 0.483825536801508, "grad_norm": 1.680895209312439, "learning_rate": 1.0507917106438783e-05, "loss": 0.1606, "step": 22330 }, { "epoch": 0.4839338721209889, "grad_norm": 1.613797664642334, "learning_rate": 1.050451801559637e-05, "loss": 0.1829, "step": 22335 }, { "epoch": 0.4840422074404697, "grad_norm": 3.43261981010437, "learning_rate": 1.0501118866313105e-05, "loss": 0.2527, "step": 22340 }, { "epoch": 0.4841505427599506, "grad_norm": 2.0129079818725586, "learning_rate": 1.0497719658982724e-05, "loss": 0.2313, "step": 22345 }, { "epoch": 0.48425887807943147, "grad_norm": 1.5929834842681885, "learning_rate": 1.0494320393998978e-05, "loss": 0.212, "step": 22350 }, { "epoch": 0.4843672133989123, "grad_norm": 1.8365428447723389, "learning_rate": 1.0490921071755617e-05, "loss": 0.208, "step": 22355 }, { "epoch": 0.48447554871839316, "grad_norm": 1.8844553232192993, "learning_rate": 1.0487521692646405e-05, "loss": 0.2222, "step": 22360 }, { "epoch": 0.484583884037874, "grad_norm": 2.1152822971343994, "learning_rate": 1.0484122257065108e-05, "loss": 0.2103, "step": 22365 }, { "epoch": 0.4846922193573549, "grad_norm": 1.1774643659591675, "learning_rate": 1.0480722765405498e-05, "loss": 0.0847, "step": 22370 }, { "epoch": 0.48480055467683575, "grad_norm": 1.779504656791687, "learning_rate": 1.047732321806136e-05, "loss": 0.1457, "step": 22375 }, { "epoch": 0.4849088899963166, "grad_norm": 1.249984622001648, "learning_rate": 1.0473923615426476e-05, "loss": 0.241, "step": 22380 }, { "epoch": 0.48501722531579744, "grad_norm": 2.3128647804260254, "learning_rate": 1.047052395789464e-05, "loss": 0.1321, "step": 22385 }, { "epoch": 0.48512556063527834, "grad_norm": 1.2234331369400024, "learning_rate": 1.0467124245859652e-05, "loss": 0.2805, "step": 22390 }, { "epoch": 0.4852338959547592, "grad_norm": 1.4549853801727295, "learning_rate": 1.0463724479715317e-05, "loss": 0.2254, "step": 22395 }, { "epoch": 0.48534223127424003, "grad_norm": 1.7760450839996338, "learning_rate": 1.0460324659855452e-05, "loss": 0.2969, "step": 22400 }, { "epoch": 0.4854505665937209, "grad_norm": 2.1622061729431152, "learning_rate": 1.0456924786673868e-05, "loss": 0.214, "step": 22405 }, { "epoch": 0.4855589019132017, "grad_norm": 1.1111218929290771, "learning_rate": 1.045352486056439e-05, "loss": 0.1764, "step": 22410 }, { "epoch": 0.4856672372326826, "grad_norm": 1.231149435043335, "learning_rate": 1.0450124881920851e-05, "loss": 0.1996, "step": 22415 }, { "epoch": 0.48577557255216347, "grad_norm": 1.9663528203964233, "learning_rate": 1.044672485113709e-05, "loss": 0.202, "step": 22420 }, { "epoch": 0.4858839078716443, "grad_norm": 1.381089210510254, "learning_rate": 1.0443324768606945e-05, "loss": 0.1892, "step": 22425 }, { "epoch": 0.48599224319112516, "grad_norm": 1.7325493097305298, "learning_rate": 1.043992463472427e-05, "loss": 0.1666, "step": 22430 }, { "epoch": 0.486100578510606, "grad_norm": 2.025830030441284, "learning_rate": 1.0436524449882912e-05, "loss": 0.1597, "step": 22435 }, { "epoch": 0.4862089138300869, "grad_norm": 2.756441593170166, "learning_rate": 1.0433124214476736e-05, "loss": 0.2048, "step": 22440 }, { "epoch": 0.48631724914956775, "grad_norm": 1.0835278034210205, "learning_rate": 1.0429723928899609e-05, "loss": 0.1711, "step": 22445 }, { "epoch": 0.4864255844690486, "grad_norm": 1.4319376945495605, "learning_rate": 1.0426323593545402e-05, "loss": 0.1752, "step": 22450 }, { "epoch": 0.48653391978852945, "grad_norm": 1.727724313735962, "learning_rate": 1.0422923208807993e-05, "loss": 0.1877, "step": 22455 }, { "epoch": 0.4866422551080103, "grad_norm": 1.6971145868301392, "learning_rate": 1.0419522775081265e-05, "loss": 0.2762, "step": 22460 }, { "epoch": 0.4867505904274912, "grad_norm": 1.6109561920166016, "learning_rate": 1.041612229275911e-05, "loss": 0.2167, "step": 22465 }, { "epoch": 0.48685892574697204, "grad_norm": 1.3032362461090088, "learning_rate": 1.041272176223542e-05, "loss": 0.1584, "step": 22470 }, { "epoch": 0.4869672610664529, "grad_norm": 1.8459477424621582, "learning_rate": 1.0409321183904095e-05, "loss": 0.1344, "step": 22475 }, { "epoch": 0.48707559638593373, "grad_norm": 1.2267658710479736, "learning_rate": 1.0405920558159043e-05, "loss": 0.1746, "step": 22480 }, { "epoch": 0.4871839317054146, "grad_norm": 1.7203946113586426, "learning_rate": 1.0402519885394178e-05, "loss": 0.2649, "step": 22485 }, { "epoch": 0.4872922670248955, "grad_norm": 1.349678635597229, "learning_rate": 1.0399119166003408e-05, "loss": 0.1316, "step": 22490 }, { "epoch": 0.4874006023443763, "grad_norm": 1.2869411706924438, "learning_rate": 1.0395718400380665e-05, "loss": 0.1713, "step": 22495 }, { "epoch": 0.48750893766385717, "grad_norm": 2.260047674179077, "learning_rate": 1.0392317588919874e-05, "loss": 0.2059, "step": 22500 }, { "epoch": 0.487617272983338, "grad_norm": 1.6728969812393188, "learning_rate": 1.0388916732014967e-05, "loss": 0.1939, "step": 22505 }, { "epoch": 0.48772560830281886, "grad_norm": 1.701924204826355, "learning_rate": 1.0385515830059884e-05, "loss": 0.1979, "step": 22510 }, { "epoch": 0.48783394362229976, "grad_norm": 1.5867770910263062, "learning_rate": 1.0382114883448562e-05, "loss": 0.2001, "step": 22515 }, { "epoch": 0.4879422789417806, "grad_norm": 1.8361749649047852, "learning_rate": 1.037871389257496e-05, "loss": 0.1683, "step": 22520 }, { "epoch": 0.48805061426126145, "grad_norm": 1.533612847328186, "learning_rate": 1.0375312857833024e-05, "loss": 0.1692, "step": 22525 }, { "epoch": 0.4881589495807423, "grad_norm": 2.470554828643799, "learning_rate": 1.0371911779616717e-05, "loss": 0.1441, "step": 22530 }, { "epoch": 0.4882672849002232, "grad_norm": 2.3189756870269775, "learning_rate": 1.0368510658319997e-05, "loss": 0.1826, "step": 22535 }, { "epoch": 0.48837562021970404, "grad_norm": 2.8117194175720215, "learning_rate": 1.0365109494336842e-05, "loss": 0.1396, "step": 22540 }, { "epoch": 0.4884839555391849, "grad_norm": 0.875709056854248, "learning_rate": 1.0361708288061223e-05, "loss": 0.1553, "step": 22545 }, { "epoch": 0.48859229085866573, "grad_norm": 1.0789878368377686, "learning_rate": 1.0358307039887115e-05, "loss": 0.2396, "step": 22550 }, { "epoch": 0.4887006261781466, "grad_norm": 1.8923543691635132, "learning_rate": 1.0354905750208504e-05, "loss": 0.1631, "step": 22555 }, { "epoch": 0.4888089614976275, "grad_norm": 1.5340265035629272, "learning_rate": 1.0351504419419375e-05, "loss": 0.1319, "step": 22560 }, { "epoch": 0.4889172968171083, "grad_norm": 1.660526990890503, "learning_rate": 1.034810304791373e-05, "loss": 0.1256, "step": 22565 }, { "epoch": 0.4890256321365892, "grad_norm": 2.0326404571533203, "learning_rate": 1.0344701636085559e-05, "loss": 0.1573, "step": 22570 }, { "epoch": 0.48913396745607, "grad_norm": 1.6882953643798828, "learning_rate": 1.0341300184328866e-05, "loss": 0.1822, "step": 22575 }, { "epoch": 0.48924230277555086, "grad_norm": 1.8754994869232178, "learning_rate": 1.033789869303766e-05, "loss": 0.1505, "step": 22580 }, { "epoch": 0.48935063809503176, "grad_norm": 2.803679943084717, "learning_rate": 1.0334497162605954e-05, "loss": 0.1649, "step": 22585 }, { "epoch": 0.4894589734145126, "grad_norm": 1.194326400756836, "learning_rate": 1.033109559342776e-05, "loss": 0.1558, "step": 22590 }, { "epoch": 0.48956730873399346, "grad_norm": 2.86250901222229, "learning_rate": 1.0327693985897103e-05, "loss": 0.2694, "step": 22595 }, { "epoch": 0.4896756440534743, "grad_norm": 2.6025993824005127, "learning_rate": 1.0324292340408007e-05, "loss": 0.1849, "step": 22600 }, { "epoch": 0.48978397937295515, "grad_norm": 1.7637579441070557, "learning_rate": 1.03208906573545e-05, "loss": 0.162, "step": 22605 }, { "epoch": 0.48989231469243605, "grad_norm": 1.122198224067688, "learning_rate": 1.0317488937130615e-05, "loss": 0.1966, "step": 22610 }, { "epoch": 0.4900006500119169, "grad_norm": 2.6704981327056885, "learning_rate": 1.0314087180130397e-05, "loss": 0.1735, "step": 22615 }, { "epoch": 0.49010898533139774, "grad_norm": 1.6129522323608398, "learning_rate": 1.0310685386747881e-05, "loss": 0.2264, "step": 22620 }, { "epoch": 0.4902173206508786, "grad_norm": 1.5959663391113281, "learning_rate": 1.030728355737712e-05, "loss": 0.1797, "step": 22625 }, { "epoch": 0.49032565597035943, "grad_norm": 1.9813058376312256, "learning_rate": 1.0303881692412161e-05, "loss": 0.222, "step": 22630 }, { "epoch": 0.49043399128984033, "grad_norm": 1.8760731220245361, "learning_rate": 1.0300479792247058e-05, "loss": 0.2345, "step": 22635 }, { "epoch": 0.4905423266093212, "grad_norm": 2.0036532878875732, "learning_rate": 1.0297077857275875e-05, "loss": 0.1783, "step": 22640 }, { "epoch": 0.490650661928802, "grad_norm": 1.9865187406539917, "learning_rate": 1.029367588789267e-05, "loss": 0.16, "step": 22645 }, { "epoch": 0.49075899724828287, "grad_norm": 1.5315865278244019, "learning_rate": 1.0290273884491516e-05, "loss": 0.1176, "step": 22650 }, { "epoch": 0.49086733256776377, "grad_norm": 1.9760966300964355, "learning_rate": 1.0286871847466476e-05, "loss": 0.1184, "step": 22655 }, { "epoch": 0.4909756678872446, "grad_norm": 2.6699142456054688, "learning_rate": 1.0283469777211633e-05, "loss": 0.2615, "step": 22660 }, { "epoch": 0.49108400320672546, "grad_norm": 1.939475417137146, "learning_rate": 1.0280067674121064e-05, "loss": 0.2404, "step": 22665 }, { "epoch": 0.4911923385262063, "grad_norm": 1.2077370882034302, "learning_rate": 1.0276665538588847e-05, "loss": 0.148, "step": 22670 }, { "epoch": 0.49130067384568715, "grad_norm": 1.8094977140426636, "learning_rate": 1.0273263371009075e-05, "loss": 0.1814, "step": 22675 }, { "epoch": 0.49140900916516805, "grad_norm": 2.608785629272461, "learning_rate": 1.026986117177583e-05, "loss": 0.1799, "step": 22680 }, { "epoch": 0.4915173444846489, "grad_norm": 1.683432698249817, "learning_rate": 1.0266458941283216e-05, "loss": 0.2734, "step": 22685 }, { "epoch": 0.49162567980412974, "grad_norm": 1.0406728982925415, "learning_rate": 1.0263056679925323e-05, "loss": 0.155, "step": 22690 }, { "epoch": 0.4917340151236106, "grad_norm": 1.593238353729248, "learning_rate": 1.0259654388096255e-05, "loss": 0.1866, "step": 22695 }, { "epoch": 0.49184235044309144, "grad_norm": 2.022376775741577, "learning_rate": 1.0256252066190113e-05, "loss": 0.2192, "step": 22700 }, { "epoch": 0.49195068576257234, "grad_norm": 1.839226245880127, "learning_rate": 1.0252849714601011e-05, "loss": 0.2535, "step": 22705 }, { "epoch": 0.4920590210820532, "grad_norm": 1.2572882175445557, "learning_rate": 1.0249447333723057e-05, "loss": 0.182, "step": 22710 }, { "epoch": 0.49216735640153403, "grad_norm": 1.3781447410583496, "learning_rate": 1.0246044923950364e-05, "loss": 0.1776, "step": 22715 }, { "epoch": 0.4922756917210149, "grad_norm": 1.9659606218338013, "learning_rate": 1.0242642485677054e-05, "loss": 0.2219, "step": 22720 }, { "epoch": 0.4923840270404957, "grad_norm": 1.8240206241607666, "learning_rate": 1.0239240019297248e-05, "loss": 0.1997, "step": 22725 }, { "epoch": 0.4924923623599766, "grad_norm": 1.1564836502075195, "learning_rate": 1.0235837525205065e-05, "loss": 0.1581, "step": 22730 }, { "epoch": 0.49260069767945747, "grad_norm": 2.6450035572052, "learning_rate": 1.0232435003794641e-05, "loss": 0.1763, "step": 22735 }, { "epoch": 0.4927090329989383, "grad_norm": 2.2979867458343506, "learning_rate": 1.0229032455460104e-05, "loss": 0.2027, "step": 22740 }, { "epoch": 0.49281736831841916, "grad_norm": 1.619971752166748, "learning_rate": 1.0225629880595586e-05, "loss": 0.1905, "step": 22745 }, { "epoch": 0.4929257036379, "grad_norm": 2.215609550476074, "learning_rate": 1.0222227279595231e-05, "loss": 0.1612, "step": 22750 }, { "epoch": 0.4930340389573809, "grad_norm": 1.7437222003936768, "learning_rate": 1.021882465285317e-05, "loss": 0.2131, "step": 22755 }, { "epoch": 0.49314237427686175, "grad_norm": 1.7817409038543701, "learning_rate": 1.0215422000763553e-05, "loss": 0.2467, "step": 22760 }, { "epoch": 0.4932507095963426, "grad_norm": 1.3479028940200806, "learning_rate": 1.0212019323720524e-05, "loss": 0.1158, "step": 22765 }, { "epoch": 0.49335904491582344, "grad_norm": 1.2815543413162231, "learning_rate": 1.020861662211823e-05, "loss": 0.1996, "step": 22770 }, { "epoch": 0.4934673802353043, "grad_norm": 1.298487663269043, "learning_rate": 1.0205213896350828e-05, "loss": 0.2097, "step": 22775 }, { "epoch": 0.4935757155547852, "grad_norm": 0.9871394634246826, "learning_rate": 1.0201811146812466e-05, "loss": 0.1951, "step": 22780 }, { "epoch": 0.49368405087426603, "grad_norm": 1.51167893409729, "learning_rate": 1.019840837389731e-05, "loss": 0.1868, "step": 22785 }, { "epoch": 0.4937923861937469, "grad_norm": 2.1787800788879395, "learning_rate": 1.0195005577999517e-05, "loss": 0.2436, "step": 22790 }, { "epoch": 0.4939007215132277, "grad_norm": 1.740513563156128, "learning_rate": 1.0191602759513249e-05, "loss": 0.2044, "step": 22795 }, { "epoch": 0.4940090568327086, "grad_norm": 1.4662233591079712, "learning_rate": 1.0188199918832666e-05, "loss": 0.16, "step": 22800 }, { "epoch": 0.49411739215218947, "grad_norm": 1.9385325908660889, "learning_rate": 1.0184797056351945e-05, "loss": 0.2247, "step": 22805 }, { "epoch": 0.4942257274716703, "grad_norm": 2.4456627368927, "learning_rate": 1.0181394172465255e-05, "loss": 0.1421, "step": 22810 }, { "epoch": 0.49433406279115116, "grad_norm": 1.5870155096054077, "learning_rate": 1.0177991267566766e-05, "loss": 0.2425, "step": 22815 }, { "epoch": 0.494442398110632, "grad_norm": 2.345708131790161, "learning_rate": 1.0174588342050655e-05, "loss": 0.2093, "step": 22820 }, { "epoch": 0.4945507334301129, "grad_norm": 1.6447169780731201, "learning_rate": 1.0171185396311101e-05, "loss": 0.2258, "step": 22825 }, { "epoch": 0.49465906874959376, "grad_norm": 1.3324614763259888, "learning_rate": 1.0167782430742282e-05, "loss": 0.3034, "step": 22830 }, { "epoch": 0.4947674040690746, "grad_norm": 1.5796035528182983, "learning_rate": 1.0164379445738381e-05, "loss": 0.1497, "step": 22835 }, { "epoch": 0.49487573938855545, "grad_norm": 1.789042592048645, "learning_rate": 1.0160976441693585e-05, "loss": 0.2383, "step": 22840 }, { "epoch": 0.4949840747080363, "grad_norm": 1.356807827949524, "learning_rate": 1.0157573419002078e-05, "loss": 0.2548, "step": 22845 }, { "epoch": 0.4950924100275172, "grad_norm": 1.5131869316101074, "learning_rate": 1.015417037805805e-05, "loss": 0.1802, "step": 22850 }, { "epoch": 0.49520074534699804, "grad_norm": 1.6202031373977661, "learning_rate": 1.0150767319255697e-05, "loss": 0.266, "step": 22855 }, { "epoch": 0.4953090806664789, "grad_norm": 1.940898060798645, "learning_rate": 1.0147364242989206e-05, "loss": 0.2051, "step": 22860 }, { "epoch": 0.49541741598595973, "grad_norm": 2.029148578643799, "learning_rate": 1.0143961149652776e-05, "loss": 0.2828, "step": 22865 }, { "epoch": 0.4955257513054406, "grad_norm": 0.9385043382644653, "learning_rate": 1.0140558039640602e-05, "loss": 0.0931, "step": 22870 }, { "epoch": 0.4956340866249215, "grad_norm": 2.531959056854248, "learning_rate": 1.0137154913346887e-05, "loss": 0.2124, "step": 22875 }, { "epoch": 0.4957424219444023, "grad_norm": 2.1515064239501953, "learning_rate": 1.013375177116583e-05, "loss": 0.1681, "step": 22880 }, { "epoch": 0.49585075726388317, "grad_norm": 1.456650733947754, "learning_rate": 1.0130348613491632e-05, "loss": 0.2763, "step": 22885 }, { "epoch": 0.495959092583364, "grad_norm": 1.6946351528167725, "learning_rate": 1.0126945440718499e-05, "loss": 0.1923, "step": 22890 }, { "epoch": 0.49606742790284486, "grad_norm": 1.4878426790237427, "learning_rate": 1.012354225324064e-05, "loss": 0.2152, "step": 22895 }, { "epoch": 0.49617576322232576, "grad_norm": 1.4302513599395752, "learning_rate": 1.012013905145226e-05, "loss": 0.2232, "step": 22900 }, { "epoch": 0.4962840985418066, "grad_norm": 1.0278911590576172, "learning_rate": 1.0116735835747572e-05, "loss": 0.1781, "step": 22905 }, { "epoch": 0.49639243386128745, "grad_norm": 1.4716304540634155, "learning_rate": 1.0113332606520787e-05, "loss": 0.2327, "step": 22910 }, { "epoch": 0.4965007691807683, "grad_norm": 1.334284782409668, "learning_rate": 1.0109929364166117e-05, "loss": 0.2123, "step": 22915 }, { "epoch": 0.4966091045002492, "grad_norm": 1.7819454669952393, "learning_rate": 1.0106526109077774e-05, "loss": 0.2133, "step": 22920 }, { "epoch": 0.49671743981973004, "grad_norm": 1.2816592454910278, "learning_rate": 1.010312284164998e-05, "loss": 0.2417, "step": 22925 }, { "epoch": 0.4968257751392109, "grad_norm": 1.6358642578125, "learning_rate": 1.0099719562276948e-05, "loss": 0.2202, "step": 22930 }, { "epoch": 0.49693411045869174, "grad_norm": 1.58152174949646, "learning_rate": 1.0096316271352899e-05, "loss": 0.2444, "step": 22935 }, { "epoch": 0.4970424457781726, "grad_norm": 1.4229447841644287, "learning_rate": 1.0092912969272052e-05, "loss": 0.296, "step": 22940 }, { "epoch": 0.4971507810976535, "grad_norm": 1.6725727319717407, "learning_rate": 1.0089509656428632e-05, "loss": 0.1737, "step": 22945 }, { "epoch": 0.49725911641713433, "grad_norm": 1.4749383926391602, "learning_rate": 1.0086106333216856e-05, "loss": 0.2428, "step": 22950 }, { "epoch": 0.4973674517366152, "grad_norm": 2.7297987937927246, "learning_rate": 1.0082703000030953e-05, "loss": 0.171, "step": 22955 }, { "epoch": 0.497475787056096, "grad_norm": 1.4509048461914062, "learning_rate": 1.0079299657265147e-05, "loss": 0.1811, "step": 22960 }, { "epoch": 0.49758412237557686, "grad_norm": 1.977044701576233, "learning_rate": 1.0075896305313661e-05, "loss": 0.1292, "step": 22965 }, { "epoch": 0.49769245769505777, "grad_norm": 2.0405590534210205, "learning_rate": 1.0072492944570726e-05, "loss": 0.1599, "step": 22970 }, { "epoch": 0.4978007930145386, "grad_norm": 1.2068692445755005, "learning_rate": 1.006908957543057e-05, "loss": 0.1087, "step": 22975 }, { "epoch": 0.49790912833401946, "grad_norm": 1.3906766176223755, "learning_rate": 1.006568619828742e-05, "loss": 0.1324, "step": 22980 }, { "epoch": 0.4980174636535003, "grad_norm": 1.8643832206726074, "learning_rate": 1.006228281353551e-05, "loss": 0.2036, "step": 22985 }, { "epoch": 0.49812579897298115, "grad_norm": 2.138298749923706, "learning_rate": 1.005887942156907e-05, "loss": 0.1436, "step": 22990 }, { "epoch": 0.49823413429246205, "grad_norm": 1.6326138973236084, "learning_rate": 1.0055476022782328e-05, "loss": 0.2082, "step": 22995 }, { "epoch": 0.4983424696119429, "grad_norm": 1.517207145690918, "learning_rate": 1.0052072617569525e-05, "loss": 0.166, "step": 23000 }, { "epoch": 0.49845080493142374, "grad_norm": 1.9152028560638428, "learning_rate": 1.0048669206324884e-05, "loss": 0.2034, "step": 23005 }, { "epoch": 0.4985591402509046, "grad_norm": 1.3320069313049316, "learning_rate": 1.0045265789442646e-05, "loss": 0.2, "step": 23010 }, { "epoch": 0.49866747557038543, "grad_norm": 1.6978535652160645, "learning_rate": 1.0041862367317043e-05, "loss": 0.1029, "step": 23015 }, { "epoch": 0.49877581088986633, "grad_norm": 1.5062154531478882, "learning_rate": 1.0038458940342313e-05, "loss": 0.2211, "step": 23020 }, { "epoch": 0.4988841462093472, "grad_norm": 0.8048402667045593, "learning_rate": 1.003505550891269e-05, "loss": 0.1432, "step": 23025 }, { "epoch": 0.498992481528828, "grad_norm": 1.8756102323532104, "learning_rate": 1.0031652073422415e-05, "loss": 0.2172, "step": 23030 }, { "epoch": 0.49910081684830887, "grad_norm": 1.223048448562622, "learning_rate": 1.0028248634265721e-05, "loss": 0.1816, "step": 23035 }, { "epoch": 0.4992091521677897, "grad_norm": 1.8914763927459717, "learning_rate": 1.0024845191836843e-05, "loss": 0.1803, "step": 23040 }, { "epoch": 0.4993174874872706, "grad_norm": 1.1108278036117554, "learning_rate": 1.002144174653002e-05, "loss": 0.1542, "step": 23045 }, { "epoch": 0.49942582280675146, "grad_norm": 1.4402962923049927, "learning_rate": 1.0018038298739495e-05, "loss": 0.1617, "step": 23050 }, { "epoch": 0.4995341581262323, "grad_norm": 1.7302007675170898, "learning_rate": 1.0014634848859503e-05, "loss": 0.2021, "step": 23055 }, { "epoch": 0.49964249344571315, "grad_norm": 1.0758613348007202, "learning_rate": 1.0011231397284281e-05, "loss": 0.2137, "step": 23060 }, { "epoch": 0.49975082876519406, "grad_norm": 1.7073034048080444, "learning_rate": 1.0007827944408073e-05, "loss": 0.2784, "step": 23065 }, { "epoch": 0.4998591640846749, "grad_norm": 1.1407901048660278, "learning_rate": 1.0004424490625115e-05, "loss": 0.2476, "step": 23070 }, { "epoch": 0.49996749940415575, "grad_norm": 2.2810122966766357, "learning_rate": 1.0001021036329643e-05, "loss": 0.1687, "step": 23075 }, { "epoch": 0.5000758347236366, "grad_norm": 1.602537751197815, "learning_rate": 9.997617581915901e-06, "loss": 0.2558, "step": 23080 }, { "epoch": 0.5001841700431174, "grad_norm": 1.5084447860717773, "learning_rate": 9.994214127778129e-06, "loss": 0.255, "step": 23085 }, { "epoch": 0.5002925053625983, "grad_norm": 1.5253750085830688, "learning_rate": 9.99081067431056e-06, "loss": 0.2615, "step": 23090 }, { "epoch": 0.5004008406820791, "grad_norm": 1.8634432554244995, "learning_rate": 9.987407221907438e-06, "loss": 0.1208, "step": 23095 }, { "epoch": 0.50050917600156, "grad_norm": 2.230072021484375, "learning_rate": 9.984003770963e-06, "loss": 0.2502, "step": 23100 }, { "epoch": 0.5006175113210409, "grad_norm": 2.175935745239258, "learning_rate": 9.980600321871484e-06, "loss": 0.2283, "step": 23105 }, { "epoch": 0.5007258466405218, "grad_norm": 1.182102084159851, "learning_rate": 9.977196875027133e-06, "loss": 0.207, "step": 23110 }, { "epoch": 0.5008341819600026, "grad_norm": 2.8575220108032227, "learning_rate": 9.97379343082418e-06, "loss": 0.1962, "step": 23115 }, { "epoch": 0.5009425172794835, "grad_norm": 1.5606744289398193, "learning_rate": 9.970389989656867e-06, "loss": 0.101, "step": 23120 }, { "epoch": 0.5010508525989643, "grad_norm": 1.7078931331634521, "learning_rate": 9.96698655191943e-06, "loss": 0.2102, "step": 23125 }, { "epoch": 0.5011591879184452, "grad_norm": 2.132974147796631, "learning_rate": 9.963583118006107e-06, "loss": 0.16, "step": 23130 }, { "epoch": 0.501267523237926, "grad_norm": 2.703505754470825, "learning_rate": 9.960179688311134e-06, "loss": 0.2036, "step": 23135 }, { "epoch": 0.5013758585574069, "grad_norm": 1.7154319286346436, "learning_rate": 9.956776263228746e-06, "loss": 0.2363, "step": 23140 }, { "epoch": 0.5014841938768877, "grad_norm": 2.2381856441497803, "learning_rate": 9.953372843153183e-06, "loss": 0.1762, "step": 23145 }, { "epoch": 0.5015925291963687, "grad_norm": 1.767531394958496, "learning_rate": 9.949969428478674e-06, "loss": 0.1845, "step": 23150 }, { "epoch": 0.5017008645158495, "grad_norm": 1.4189702272415161, "learning_rate": 9.946566019599459e-06, "loss": 0.1492, "step": 23155 }, { "epoch": 0.5018091998353303, "grad_norm": 2.1792120933532715, "learning_rate": 9.943162616909772e-06, "loss": 0.2533, "step": 23160 }, { "epoch": 0.5019175351548112, "grad_norm": 2.0264856815338135, "learning_rate": 9.939759220803843e-06, "loss": 0.1546, "step": 23165 }, { "epoch": 0.502025870474292, "grad_norm": 2.2954044342041016, "learning_rate": 9.936355831675905e-06, "loss": 0.2406, "step": 23170 }, { "epoch": 0.5021342057937729, "grad_norm": 2.0994322299957275, "learning_rate": 9.932952449920193e-06, "loss": 0.1588, "step": 23175 }, { "epoch": 0.5022425411132537, "grad_norm": 1.6152287721633911, "learning_rate": 9.929549075930933e-06, "loss": 0.1751, "step": 23180 }, { "epoch": 0.5023508764327346, "grad_norm": 0.598036527633667, "learning_rate": 9.926145710102358e-06, "loss": 0.2273, "step": 23185 }, { "epoch": 0.5024592117522154, "grad_norm": 2.162415027618408, "learning_rate": 9.922742352828697e-06, "loss": 0.1565, "step": 23190 }, { "epoch": 0.5025675470716963, "grad_norm": 3.2628486156463623, "learning_rate": 9.919339004504176e-06, "loss": 0.207, "step": 23195 }, { "epoch": 0.5026758823911772, "grad_norm": 0.953778088092804, "learning_rate": 9.915935665523023e-06, "loss": 0.1877, "step": 23200 }, { "epoch": 0.5027842177106581, "grad_norm": 1.2217798233032227, "learning_rate": 9.912532336279464e-06, "loss": 0.1967, "step": 23205 }, { "epoch": 0.5028925530301389, "grad_norm": 1.7719453573226929, "learning_rate": 9.909129017167724e-06, "loss": 0.1978, "step": 23210 }, { "epoch": 0.5030008883496198, "grad_norm": 1.328766107559204, "learning_rate": 9.905725708582025e-06, "loss": 0.1855, "step": 23215 }, { "epoch": 0.5031092236691006, "grad_norm": 3.1879770755767822, "learning_rate": 9.902322410916591e-06, "loss": 0.1552, "step": 23220 }, { "epoch": 0.5032175589885814, "grad_norm": 2.277345657348633, "learning_rate": 9.898919124565644e-06, "loss": 0.2244, "step": 23225 }, { "epoch": 0.5033258943080623, "grad_norm": 2.3906049728393555, "learning_rate": 9.895515849923401e-06, "loss": 0.1813, "step": 23230 }, { "epoch": 0.5034342296275431, "grad_norm": 1.4825360774993896, "learning_rate": 9.892112587384083e-06, "loss": 0.1744, "step": 23235 }, { "epoch": 0.503542564947024, "grad_norm": 2.0713205337524414, "learning_rate": 9.888709337341905e-06, "loss": 0.2428, "step": 23240 }, { "epoch": 0.5036509002665048, "grad_norm": 1.8840688467025757, "learning_rate": 9.885306100191082e-06, "loss": 0.2876, "step": 23245 }, { "epoch": 0.5037592355859858, "grad_norm": 1.5778074264526367, "learning_rate": 9.881902876325825e-06, "loss": 0.1817, "step": 23250 }, { "epoch": 0.5038675709054666, "grad_norm": 1.5657516717910767, "learning_rate": 9.878499666140357e-06, "loss": 0.2243, "step": 23255 }, { "epoch": 0.5039759062249475, "grad_norm": 1.736602544784546, "learning_rate": 9.87509647002888e-06, "loss": 0.1152, "step": 23260 }, { "epoch": 0.5040842415444283, "grad_norm": 0.6698989868164062, "learning_rate": 9.871693288385606e-06, "loss": 0.1246, "step": 23265 }, { "epoch": 0.5041925768639092, "grad_norm": 1.4610137939453125, "learning_rate": 9.868290121604745e-06, "loss": 0.2236, "step": 23270 }, { "epoch": 0.50430091218339, "grad_norm": 1.9102040529251099, "learning_rate": 9.864886970080501e-06, "loss": 0.1629, "step": 23275 }, { "epoch": 0.5044092475028709, "grad_norm": 2.391772747039795, "learning_rate": 9.861483834207073e-06, "loss": 0.1859, "step": 23280 }, { "epoch": 0.5045175828223517, "grad_norm": 0.9163898229598999, "learning_rate": 9.858080714378672e-06, "loss": 0.2118, "step": 23285 }, { "epoch": 0.5046259181418326, "grad_norm": 1.778236746788025, "learning_rate": 9.854677610989491e-06, "loss": 0.2032, "step": 23290 }, { "epoch": 0.5047342534613135, "grad_norm": 2.018115758895874, "learning_rate": 9.85127452443373e-06, "loss": 0.2238, "step": 23295 }, { "epoch": 0.5048425887807944, "grad_norm": 1.8119155168533325, "learning_rate": 9.847871455105592e-06, "loss": 0.2371, "step": 23300 }, { "epoch": 0.5049509241002752, "grad_norm": 1.716844916343689, "learning_rate": 9.844468403399264e-06, "loss": 0.1705, "step": 23305 }, { "epoch": 0.505059259419756, "grad_norm": 1.4768776893615723, "learning_rate": 9.841065369708945e-06, "loss": 0.1982, "step": 23310 }, { "epoch": 0.5051675947392369, "grad_norm": 0.9656436443328857, "learning_rate": 9.837662354428821e-06, "loss": 0.1199, "step": 23315 }, { "epoch": 0.5052759300587177, "grad_norm": 1.4687546491622925, "learning_rate": 9.83425935795308e-06, "loss": 0.1543, "step": 23320 }, { "epoch": 0.5053842653781986, "grad_norm": 1.6691994667053223, "learning_rate": 9.830856380675911e-06, "loss": 0.1651, "step": 23325 }, { "epoch": 0.5054926006976794, "grad_norm": 1.3171274662017822, "learning_rate": 9.827453422991496e-06, "loss": 0.2013, "step": 23330 }, { "epoch": 0.5056009360171603, "grad_norm": 1.1908565759658813, "learning_rate": 9.824050485294018e-06, "loss": 0.1655, "step": 23335 }, { "epoch": 0.5057092713366411, "grad_norm": 1.7045658826828003, "learning_rate": 9.820647567977655e-06, "loss": 0.23, "step": 23340 }, { "epoch": 0.5058176066561221, "grad_norm": 2.172590732574463, "learning_rate": 9.817244671436582e-06, "loss": 0.2135, "step": 23345 }, { "epoch": 0.5059259419756029, "grad_norm": 1.3603674173355103, "learning_rate": 9.813841796064977e-06, "loss": 0.1419, "step": 23350 }, { "epoch": 0.5060342772950838, "grad_norm": 1.483699083328247, "learning_rate": 9.810438942257014e-06, "loss": 0.1835, "step": 23355 }, { "epoch": 0.5061426126145646, "grad_norm": 1.716740608215332, "learning_rate": 9.807036110406858e-06, "loss": 0.1761, "step": 23360 }, { "epoch": 0.5062509479340455, "grad_norm": 1.6723849773406982, "learning_rate": 9.803633300908679e-06, "loss": 0.1549, "step": 23365 }, { "epoch": 0.5063592832535263, "grad_norm": 1.1604340076446533, "learning_rate": 9.80023051415664e-06, "loss": 0.2257, "step": 23370 }, { "epoch": 0.5064676185730072, "grad_norm": 1.2274303436279297, "learning_rate": 9.796827750544903e-06, "loss": 0.2393, "step": 23375 }, { "epoch": 0.506575953892488, "grad_norm": 1.1963406801223755, "learning_rate": 9.793425010467628e-06, "loss": 0.1948, "step": 23380 }, { "epoch": 0.5066842892119688, "grad_norm": 1.355798363685608, "learning_rate": 9.790022294318971e-06, "loss": 0.1398, "step": 23385 }, { "epoch": 0.5067926245314498, "grad_norm": 0.6675195693969727, "learning_rate": 9.786619602493084e-06, "loss": 0.124, "step": 23390 }, { "epoch": 0.5069009598509306, "grad_norm": 1.7216472625732422, "learning_rate": 9.783216935384122e-06, "loss": 0.1614, "step": 23395 }, { "epoch": 0.5070092951704115, "grad_norm": 1.2270907163619995, "learning_rate": 9.77981429338623e-06, "loss": 0.1262, "step": 23400 }, { "epoch": 0.5071176304898923, "grad_norm": 1.027746319770813, "learning_rate": 9.776411676893554e-06, "loss": 0.1017, "step": 23405 }, { "epoch": 0.5072259658093732, "grad_norm": 1.6711199283599854, "learning_rate": 9.773009086300235e-06, "loss": 0.2011, "step": 23410 }, { "epoch": 0.507334301128854, "grad_norm": 1.1110224723815918, "learning_rate": 9.769606522000414e-06, "loss": 0.231, "step": 23415 }, { "epoch": 0.5074426364483349, "grad_norm": 1.733496904373169, "learning_rate": 9.766203984388225e-06, "loss": 0.1434, "step": 23420 }, { "epoch": 0.5075509717678157, "grad_norm": 1.5671712160110474, "learning_rate": 9.762801473857803e-06, "loss": 0.2308, "step": 23425 }, { "epoch": 0.5076593070872966, "grad_norm": 2.6082942485809326, "learning_rate": 9.759398990803278e-06, "loss": 0.3022, "step": 23430 }, { "epoch": 0.5077676424067774, "grad_norm": 1.8834255933761597, "learning_rate": 9.755996535618775e-06, "loss": 0.2272, "step": 23435 }, { "epoch": 0.5078759777262584, "grad_norm": 1.7464288473129272, "learning_rate": 9.752594108698416e-06, "loss": 0.1884, "step": 23440 }, { "epoch": 0.5079843130457392, "grad_norm": 2.402513027191162, "learning_rate": 9.749191710436325e-06, "loss": 0.2201, "step": 23445 }, { "epoch": 0.5080926483652201, "grad_norm": 1.98440420627594, "learning_rate": 9.745789341226617e-06, "loss": 0.1565, "step": 23450 }, { "epoch": 0.5082009836847009, "grad_norm": 0.8317252993583679, "learning_rate": 9.742387001463406e-06, "loss": 0.2154, "step": 23455 }, { "epoch": 0.5083093190041817, "grad_norm": 1.5147782564163208, "learning_rate": 9.738984691540802e-06, "loss": 0.1789, "step": 23460 }, { "epoch": 0.5084176543236626, "grad_norm": 1.8862055540084839, "learning_rate": 9.735582411852912e-06, "loss": 0.1745, "step": 23465 }, { "epoch": 0.5085259896431434, "grad_norm": 2.1934592723846436, "learning_rate": 9.732180162793839e-06, "loss": 0.1574, "step": 23470 }, { "epoch": 0.5086343249626243, "grad_norm": 2.276426315307617, "learning_rate": 9.72877794475768e-06, "loss": 0.2193, "step": 23475 }, { "epoch": 0.5087426602821051, "grad_norm": 1.041351318359375, "learning_rate": 9.725375758138537e-06, "loss": 0.2944, "step": 23480 }, { "epoch": 0.508850995601586, "grad_norm": 2.027637481689453, "learning_rate": 9.721973603330496e-06, "loss": 0.2065, "step": 23485 }, { "epoch": 0.5089593309210669, "grad_norm": 1.4539328813552856, "learning_rate": 9.718571480727645e-06, "loss": 0.1657, "step": 23490 }, { "epoch": 0.5090676662405478, "grad_norm": 2.511780261993408, "learning_rate": 9.715169390724076e-06, "loss": 0.271, "step": 23495 }, { "epoch": 0.5091760015600286, "grad_norm": 1.6555407047271729, "learning_rate": 9.711767333713865e-06, "loss": 0.2887, "step": 23500 }, { "epoch": 0.5092843368795095, "grad_norm": 2.7752270698547363, "learning_rate": 9.70836531009109e-06, "loss": 0.2396, "step": 23505 }, { "epoch": 0.5093926721989903, "grad_norm": 1.8181841373443604, "learning_rate": 9.704963320249827e-06, "loss": 0.2327, "step": 23510 }, { "epoch": 0.5095010075184712, "grad_norm": 2.241577625274658, "learning_rate": 9.701561364584147e-06, "loss": 0.1381, "step": 23515 }, { "epoch": 0.509609342837952, "grad_norm": 2.446228265762329, "learning_rate": 9.698159443488107e-06, "loss": 0.1904, "step": 23520 }, { "epoch": 0.5097176781574329, "grad_norm": 1.092598795890808, "learning_rate": 9.694757557355777e-06, "loss": 0.2314, "step": 23525 }, { "epoch": 0.5098260134769137, "grad_norm": 1.2160922288894653, "learning_rate": 9.691355706581209e-06, "loss": 0.2269, "step": 23530 }, { "epoch": 0.5099343487963947, "grad_norm": 2.296792984008789, "learning_rate": 9.687953891558456e-06, "loss": 0.2445, "step": 23535 }, { "epoch": 0.5100426841158755, "grad_norm": 1.4734362363815308, "learning_rate": 9.684552112681573e-06, "loss": 0.1793, "step": 23540 }, { "epoch": 0.5101510194353563, "grad_norm": 1.9334232807159424, "learning_rate": 9.681150370344603e-06, "loss": 0.2476, "step": 23545 }, { "epoch": 0.5102593547548372, "grad_norm": 2.472046136856079, "learning_rate": 9.67774866494159e-06, "loss": 0.2805, "step": 23550 }, { "epoch": 0.510367690074318, "grad_norm": 0.8746871948242188, "learning_rate": 9.674346996866564e-06, "loss": 0.1661, "step": 23555 }, { "epoch": 0.5104760253937989, "grad_norm": 1.935158371925354, "learning_rate": 9.670945366513559e-06, "loss": 0.218, "step": 23560 }, { "epoch": 0.5105843607132797, "grad_norm": 1.3759758472442627, "learning_rate": 9.667543774276606e-06, "loss": 0.1376, "step": 23565 }, { "epoch": 0.5106926960327606, "grad_norm": 1.3414403200149536, "learning_rate": 9.664142220549723e-06, "loss": 0.1543, "step": 23570 }, { "epoch": 0.5108010313522414, "grad_norm": 2.738903045654297, "learning_rate": 9.660740705726937e-06, "loss": 0.298, "step": 23575 }, { "epoch": 0.5109093666717223, "grad_norm": 0.9751942157745361, "learning_rate": 9.657339230202255e-06, "loss": 0.1648, "step": 23580 }, { "epoch": 0.5110177019912032, "grad_norm": 1.950277328491211, "learning_rate": 9.653937794369689e-06, "loss": 0.1294, "step": 23585 }, { "epoch": 0.5111260373106841, "grad_norm": 2.1280105113983154, "learning_rate": 9.650536398623248e-06, "loss": 0.2219, "step": 23590 }, { "epoch": 0.5112343726301649, "grad_norm": 1.7968817949295044, "learning_rate": 9.64713504335693e-06, "loss": 0.2281, "step": 23595 }, { "epoch": 0.5113427079496458, "grad_norm": 0.9896278381347656, "learning_rate": 9.64373372896473e-06, "loss": 0.1897, "step": 23600 }, { "epoch": 0.5114510432691266, "grad_norm": 1.7075973749160767, "learning_rate": 9.640332455840642e-06, "loss": 0.1486, "step": 23605 }, { "epoch": 0.5115593785886074, "grad_norm": 1.6087957620620728, "learning_rate": 9.636931224378652e-06, "loss": 0.2176, "step": 23610 }, { "epoch": 0.5116677139080883, "grad_norm": 1.697937250137329, "learning_rate": 9.633530034972739e-06, "loss": 0.1831, "step": 23615 }, { "epoch": 0.5117760492275691, "grad_norm": 1.6197504997253418, "learning_rate": 9.630128888016883e-06, "loss": 0.1716, "step": 23620 }, { "epoch": 0.51188438454705, "grad_norm": 1.5136504173278809, "learning_rate": 9.626727783905055e-06, "loss": 0.1852, "step": 23625 }, { "epoch": 0.5119927198665308, "grad_norm": 1.4976146221160889, "learning_rate": 9.62332672303122e-06, "loss": 0.2117, "step": 23630 }, { "epoch": 0.5121010551860118, "grad_norm": 1.9031174182891846, "learning_rate": 9.61992570578934e-06, "loss": 0.1885, "step": 23635 }, { "epoch": 0.5122093905054926, "grad_norm": 1.5504791736602783, "learning_rate": 9.616524732573376e-06, "loss": 0.1498, "step": 23640 }, { "epoch": 0.5123177258249735, "grad_norm": 1.9283276796340942, "learning_rate": 9.613123803777278e-06, "loss": 0.2457, "step": 23645 }, { "epoch": 0.5124260611444543, "grad_norm": 1.6156703233718872, "learning_rate": 9.60972291979499e-06, "loss": 0.1646, "step": 23650 }, { "epoch": 0.5125343964639352, "grad_norm": 1.2636700868606567, "learning_rate": 9.606322081020456e-06, "loss": 0.1991, "step": 23655 }, { "epoch": 0.512642731783416, "grad_norm": 1.2542424201965332, "learning_rate": 9.602921287847613e-06, "loss": 0.1385, "step": 23660 }, { "epoch": 0.5127510671028969, "grad_norm": 3.267608165740967, "learning_rate": 9.59952054067039e-06, "loss": 0.212, "step": 23665 }, { "epoch": 0.5128594024223777, "grad_norm": 1.521600365638733, "learning_rate": 9.596119839882713e-06, "loss": 0.143, "step": 23670 }, { "epoch": 0.5129677377418586, "grad_norm": 1.82282555103302, "learning_rate": 9.592719185878501e-06, "loss": 0.236, "step": 23675 }, { "epoch": 0.5130760730613395, "grad_norm": 1.2110888957977295, "learning_rate": 9.589318579051671e-06, "loss": 0.1737, "step": 23680 }, { "epoch": 0.5131844083808204, "grad_norm": 1.4504836797714233, "learning_rate": 9.58591801979613e-06, "loss": 0.2249, "step": 23685 }, { "epoch": 0.5132927437003012, "grad_norm": 0.9778319001197815, "learning_rate": 9.582517508505788e-06, "loss": 0.1777, "step": 23690 }, { "epoch": 0.513401079019782, "grad_norm": 1.8080899715423584, "learning_rate": 9.579117045574536e-06, "loss": 0.2415, "step": 23695 }, { "epoch": 0.5135094143392629, "grad_norm": 0.9596489667892456, "learning_rate": 9.57571663139627e-06, "loss": 0.1479, "step": 23700 }, { "epoch": 0.5136177496587437, "grad_norm": 1.9050532579421997, "learning_rate": 9.572316266364876e-06, "loss": 0.2252, "step": 23705 }, { "epoch": 0.5137260849782246, "grad_norm": 1.0765469074249268, "learning_rate": 9.568915950874235e-06, "loss": 0.1576, "step": 23710 }, { "epoch": 0.5138344202977054, "grad_norm": 1.924372673034668, "learning_rate": 9.565515685318225e-06, "loss": 0.1947, "step": 23715 }, { "epoch": 0.5139427556171863, "grad_norm": 1.180634617805481, "learning_rate": 9.562115470090716e-06, "loss": 0.1798, "step": 23720 }, { "epoch": 0.5140510909366671, "grad_norm": 1.0643247365951538, "learning_rate": 9.558715305585569e-06, "loss": 0.1893, "step": 23725 }, { "epoch": 0.5141594262561481, "grad_norm": 1.266739010810852, "learning_rate": 9.555315192196639e-06, "loss": 0.0755, "step": 23730 }, { "epoch": 0.5142677615756289, "grad_norm": 2.0852205753326416, "learning_rate": 9.551915130317784e-06, "loss": 0.1901, "step": 23735 }, { "epoch": 0.5143760968951098, "grad_norm": 1.6497913599014282, "learning_rate": 9.548515120342851e-06, "loss": 0.1428, "step": 23740 }, { "epoch": 0.5144844322145906, "grad_norm": 1.6890296936035156, "learning_rate": 9.54511516266568e-06, "loss": 0.1848, "step": 23745 }, { "epoch": 0.5145927675340715, "grad_norm": 1.2503491640090942, "learning_rate": 9.541715257680099e-06, "loss": 0.0887, "step": 23750 }, { "epoch": 0.5147011028535523, "grad_norm": 1.8102434873580933, "learning_rate": 9.538315405779947e-06, "loss": 0.1853, "step": 23755 }, { "epoch": 0.5148094381730332, "grad_norm": 1.8148645162582397, "learning_rate": 9.534915607359034e-06, "loss": 0.2624, "step": 23760 }, { "epoch": 0.514917773492514, "grad_norm": 1.4980638027191162, "learning_rate": 9.531515862811182e-06, "loss": 0.183, "step": 23765 }, { "epoch": 0.5150261088119948, "grad_norm": 2.3685362339019775, "learning_rate": 9.528116172530198e-06, "loss": 0.1972, "step": 23770 }, { "epoch": 0.5151344441314757, "grad_norm": 2.3099751472473145, "learning_rate": 9.524716536909888e-06, "loss": 0.2679, "step": 23775 }, { "epoch": 0.5152427794509566, "grad_norm": 2.1220266819000244, "learning_rate": 9.521316956344043e-06, "loss": 0.2288, "step": 23780 }, { "epoch": 0.5153511147704375, "grad_norm": 1.6513550281524658, "learning_rate": 9.517917431226463e-06, "loss": 0.1445, "step": 23785 }, { "epoch": 0.5154594500899183, "grad_norm": 1.5327764749526978, "learning_rate": 9.514517961950925e-06, "loss": 0.1879, "step": 23790 }, { "epoch": 0.5155677854093992, "grad_norm": 1.7430557012557983, "learning_rate": 9.511118548911213e-06, "loss": 0.1614, "step": 23795 }, { "epoch": 0.51567612072888, "grad_norm": 1.3256361484527588, "learning_rate": 9.50771919250109e-06, "loss": 0.1738, "step": 23800 }, { "epoch": 0.5157844560483609, "grad_norm": 2.3109190464019775, "learning_rate": 9.504319893114325e-06, "loss": 0.3112, "step": 23805 }, { "epoch": 0.5158927913678417, "grad_norm": 1.078415870666504, "learning_rate": 9.500920651144675e-06, "loss": 0.2359, "step": 23810 }, { "epoch": 0.5160011266873226, "grad_norm": 1.8890438079833984, "learning_rate": 9.49752146698589e-06, "loss": 0.2387, "step": 23815 }, { "epoch": 0.5161094620068034, "grad_norm": 1.5819587707519531, "learning_rate": 9.494122341031717e-06, "loss": 0.1921, "step": 23820 }, { "epoch": 0.5162177973262844, "grad_norm": 1.7389047145843506, "learning_rate": 9.490723273675888e-06, "loss": 0.2295, "step": 23825 }, { "epoch": 0.5163261326457652, "grad_norm": 2.4413414001464844, "learning_rate": 9.487324265312146e-06, "loss": 0.1932, "step": 23830 }, { "epoch": 0.5164344679652461, "grad_norm": 1.5650852918624878, "learning_rate": 9.483925316334204e-06, "loss": 0.2685, "step": 23835 }, { "epoch": 0.5165428032847269, "grad_norm": 1.3930710554122925, "learning_rate": 9.480526427135786e-06, "loss": 0.205, "step": 23840 }, { "epoch": 0.5166511386042077, "grad_norm": 2.026191234588623, "learning_rate": 9.477127598110598e-06, "loss": 0.2158, "step": 23845 }, { "epoch": 0.5167594739236886, "grad_norm": 1.0825457572937012, "learning_rate": 9.473728829652345e-06, "loss": 0.1674, "step": 23850 }, { "epoch": 0.5168678092431694, "grad_norm": 1.9524420499801636, "learning_rate": 9.470330122154725e-06, "loss": 0.1973, "step": 23855 }, { "epoch": 0.5169761445626503, "grad_norm": 1.428094744682312, "learning_rate": 9.466931476011426e-06, "loss": 0.1398, "step": 23860 }, { "epoch": 0.5170844798821311, "grad_norm": 1.703904390335083, "learning_rate": 9.463532891616128e-06, "loss": 0.1844, "step": 23865 }, { "epoch": 0.517192815201612, "grad_norm": 2.8646037578582764, "learning_rate": 9.460134369362509e-06, "loss": 0.2462, "step": 23870 }, { "epoch": 0.5173011505210929, "grad_norm": 2.152409553527832, "learning_rate": 9.456735909644235e-06, "loss": 0.2061, "step": 23875 }, { "epoch": 0.5174094858405738, "grad_norm": 1.2135260105133057, "learning_rate": 9.453337512854969e-06, "loss": 0.1657, "step": 23880 }, { "epoch": 0.5175178211600546, "grad_norm": 2.245955228805542, "learning_rate": 9.449939179388364e-06, "loss": 0.109, "step": 23885 }, { "epoch": 0.5176261564795355, "grad_norm": 1.548405408859253, "learning_rate": 9.446540909638063e-06, "loss": 0.186, "step": 23890 }, { "epoch": 0.5177344917990163, "grad_norm": 1.589192271232605, "learning_rate": 9.443142703997708e-06, "loss": 0.16, "step": 23895 }, { "epoch": 0.5178428271184972, "grad_norm": 1.6974332332611084, "learning_rate": 9.439744562860931e-06, "loss": 0.221, "step": 23900 }, { "epoch": 0.517951162437978, "grad_norm": 2.6615958213806152, "learning_rate": 9.436346486621352e-06, "loss": 0.1773, "step": 23905 }, { "epoch": 0.5180594977574589, "grad_norm": 0.8999016284942627, "learning_rate": 9.432948475672589e-06, "loss": 0.1575, "step": 23910 }, { "epoch": 0.5181678330769397, "grad_norm": 1.3910430669784546, "learning_rate": 9.42955053040825e-06, "loss": 0.2396, "step": 23915 }, { "epoch": 0.5182761683964207, "grad_norm": 1.5776647329330444, "learning_rate": 9.426152651221935e-06, "loss": 0.174, "step": 23920 }, { "epoch": 0.5183845037159015, "grad_norm": 1.8958220481872559, "learning_rate": 9.422754838507242e-06, "loss": 0.1521, "step": 23925 }, { "epoch": 0.5184928390353823, "grad_norm": 1.9382867813110352, "learning_rate": 9.419357092657753e-06, "loss": 0.2293, "step": 23930 }, { "epoch": 0.5186011743548632, "grad_norm": 1.9277554750442505, "learning_rate": 9.415959414067049e-06, "loss": 0.2128, "step": 23935 }, { "epoch": 0.518709509674344, "grad_norm": 1.319722294807434, "learning_rate": 9.412561803128694e-06, "loss": 0.1703, "step": 23940 }, { "epoch": 0.5188178449938249, "grad_norm": 1.6666266918182373, "learning_rate": 9.409164260236258e-06, "loss": 0.1615, "step": 23945 }, { "epoch": 0.5189261803133057, "grad_norm": 2.123871088027954, "learning_rate": 9.40576678578329e-06, "loss": 0.1814, "step": 23950 }, { "epoch": 0.5190345156327866, "grad_norm": 3.3636884689331055, "learning_rate": 9.402369380163339e-06, "loss": 0.1951, "step": 23955 }, { "epoch": 0.5191428509522674, "grad_norm": 2.327413320541382, "learning_rate": 9.398972043769942e-06, "loss": 0.2123, "step": 23960 }, { "epoch": 0.5192511862717483, "grad_norm": 2.0929174423217773, "learning_rate": 9.395574776996632e-06, "loss": 0.1692, "step": 23965 }, { "epoch": 0.5193595215912292, "grad_norm": 1.5212329626083374, "learning_rate": 9.392177580236924e-06, "loss": 0.1786, "step": 23970 }, { "epoch": 0.5194678569107101, "grad_norm": 1.715348243713379, "learning_rate": 9.388780453884344e-06, "loss": 0.1228, "step": 23975 }, { "epoch": 0.5195761922301909, "grad_norm": 1.3107565641403198, "learning_rate": 9.38538339833239e-06, "loss": 0.2033, "step": 23980 }, { "epoch": 0.5196845275496718, "grad_norm": 0.7650331258773804, "learning_rate": 9.381986413974564e-06, "loss": 0.1617, "step": 23985 }, { "epoch": 0.5197928628691526, "grad_norm": 1.93569016456604, "learning_rate": 9.378589501204352e-06, "loss": 0.2089, "step": 23990 }, { "epoch": 0.5199011981886335, "grad_norm": 1.700518250465393, "learning_rate": 9.37519266041524e-06, "loss": 0.1966, "step": 23995 }, { "epoch": 0.5200095335081143, "grad_norm": 0.36932116746902466, "learning_rate": 9.371795892000699e-06, "loss": 0.173, "step": 24000 }, { "epoch": 0.5201178688275951, "grad_norm": 2.550532579421997, "learning_rate": 9.368399196354193e-06, "loss": 0.128, "step": 24005 }, { "epoch": 0.520226204147076, "grad_norm": 1.5200412273406982, "learning_rate": 9.365002573869177e-06, "loss": 0.1444, "step": 24010 }, { "epoch": 0.5203345394665568, "grad_norm": 1.8694783449172974, "learning_rate": 9.361606024939101e-06, "loss": 0.2223, "step": 24015 }, { "epoch": 0.5204428747860378, "grad_norm": 1.3260430097579956, "learning_rate": 9.3582095499574e-06, "loss": 0.1884, "step": 24020 }, { "epoch": 0.5205512101055186, "grad_norm": 1.8782380819320679, "learning_rate": 9.354813149317512e-06, "loss": 0.1968, "step": 24025 }, { "epoch": 0.5206595454249995, "grad_norm": 2.2771496772766113, "learning_rate": 9.351416823412856e-06, "loss": 0.1972, "step": 24030 }, { "epoch": 0.5207678807444803, "grad_norm": 1.169626235961914, "learning_rate": 9.348020572636848e-06, "loss": 0.231, "step": 24035 }, { "epoch": 0.5208762160639612, "grad_norm": 1.6889375448226929, "learning_rate": 9.344624397382889e-06, "loss": 0.1992, "step": 24040 }, { "epoch": 0.520984551383442, "grad_norm": 1.0877364873886108, "learning_rate": 9.341228298044375e-06, "loss": 0.1854, "step": 24045 }, { "epoch": 0.5210928867029229, "grad_norm": 1.3777269124984741, "learning_rate": 9.337832275014693e-06, "loss": 0.179, "step": 24050 }, { "epoch": 0.5212012220224037, "grad_norm": 2.3784070014953613, "learning_rate": 9.334436328687224e-06, "loss": 0.2027, "step": 24055 }, { "epoch": 0.5213095573418846, "grad_norm": 2.1262638568878174, "learning_rate": 9.331040459455337e-06, "loss": 0.1793, "step": 24060 }, { "epoch": 0.5214178926613655, "grad_norm": 1.4122427701950073, "learning_rate": 9.327644667712385e-06, "loss": 0.1992, "step": 24065 }, { "epoch": 0.5215262279808464, "grad_norm": 1.9527820348739624, "learning_rate": 9.324248953851734e-06, "loss": 0.2535, "step": 24070 }, { "epoch": 0.5216345633003272, "grad_norm": 1.230173110961914, "learning_rate": 9.320853318266718e-06, "loss": 0.2086, "step": 24075 }, { "epoch": 0.521742898619808, "grad_norm": 1.5780318975448608, "learning_rate": 9.31745776135067e-06, "loss": 0.2305, "step": 24080 }, { "epoch": 0.5218512339392889, "grad_norm": 2.357088565826416, "learning_rate": 9.314062283496917e-06, "loss": 0.2749, "step": 24085 }, { "epoch": 0.5219595692587697, "grad_norm": 2.020580291748047, "learning_rate": 9.310666885098771e-06, "loss": 0.1823, "step": 24090 }, { "epoch": 0.5220679045782506, "grad_norm": 2.800795316696167, "learning_rate": 9.307271566549542e-06, "loss": 0.1791, "step": 24095 }, { "epoch": 0.5221762398977314, "grad_norm": 1.9541146755218506, "learning_rate": 9.303876328242525e-06, "loss": 0.1732, "step": 24100 }, { "epoch": 0.5222845752172123, "grad_norm": 1.5688610076904297, "learning_rate": 9.300481170571007e-06, "loss": 0.1605, "step": 24105 }, { "epoch": 0.5223929105366931, "grad_norm": 0.9369041919708252, "learning_rate": 9.297086093928267e-06, "loss": 0.2385, "step": 24110 }, { "epoch": 0.5225012458561741, "grad_norm": 0.9839863777160645, "learning_rate": 9.29369109870757e-06, "loss": 0.1946, "step": 24115 }, { "epoch": 0.5226095811756549, "grad_norm": 1.658436894416809, "learning_rate": 9.290296185302181e-06, "loss": 0.1866, "step": 24120 }, { "epoch": 0.5227179164951358, "grad_norm": 1.4187672138214111, "learning_rate": 9.28690135410535e-06, "loss": 0.1608, "step": 24125 }, { "epoch": 0.5228262518146166, "grad_norm": 1.3356730937957764, "learning_rate": 9.283506605510311e-06, "loss": 0.0993, "step": 24130 }, { "epoch": 0.5229345871340975, "grad_norm": 1.822002649307251, "learning_rate": 9.280111939910301e-06, "loss": 0.2101, "step": 24135 }, { "epoch": 0.5230429224535783, "grad_norm": 1.6801527738571167, "learning_rate": 9.27671735769854e-06, "loss": 0.1801, "step": 24140 }, { "epoch": 0.5231512577730592, "grad_norm": 1.4417402744293213, "learning_rate": 9.273322859268237e-06, "loss": 0.1881, "step": 24145 }, { "epoch": 0.52325959309254, "grad_norm": 1.4071606397628784, "learning_rate": 9.269928445012595e-06, "loss": 0.1718, "step": 24150 }, { "epoch": 0.5233679284120208, "grad_norm": 2.134406089782715, "learning_rate": 9.266534115324806e-06, "loss": 0.206, "step": 24155 }, { "epoch": 0.5234762637315017, "grad_norm": 1.7839058637619019, "learning_rate": 9.263139870598052e-06, "loss": 0.1415, "step": 24160 }, { "epoch": 0.5235845990509826, "grad_norm": 2.492023229598999, "learning_rate": 9.259745711225506e-06, "loss": 0.1536, "step": 24165 }, { "epoch": 0.5236929343704635, "grad_norm": 1.0282942056655884, "learning_rate": 9.256351637600328e-06, "loss": 0.1656, "step": 24170 }, { "epoch": 0.5238012696899443, "grad_norm": 1.50704824924469, "learning_rate": 9.252957650115676e-06, "loss": 0.1441, "step": 24175 }, { "epoch": 0.5239096050094252, "grad_norm": 1.7197604179382324, "learning_rate": 9.249563749164691e-06, "loss": 0.2119, "step": 24180 }, { "epoch": 0.524017940328906, "grad_norm": 1.6120073795318604, "learning_rate": 9.246169935140503e-06, "loss": 0.0903, "step": 24185 }, { "epoch": 0.5241262756483869, "grad_norm": 1.022861123085022, "learning_rate": 9.242776208436235e-06, "loss": 0.1434, "step": 24190 }, { "epoch": 0.5242346109678677, "grad_norm": 1.7874984741210938, "learning_rate": 9.239382569445e-06, "loss": 0.1487, "step": 24195 }, { "epoch": 0.5243429462873486, "grad_norm": 1.4165189266204834, "learning_rate": 9.2359890185599e-06, "loss": 0.2213, "step": 24200 }, { "epoch": 0.5244512816068294, "grad_norm": 2.2036893367767334, "learning_rate": 9.23259555617403e-06, "loss": 0.2188, "step": 24205 }, { "epoch": 0.5245596169263104, "grad_norm": 1.812599778175354, "learning_rate": 9.229202182680462e-06, "loss": 0.1961, "step": 24210 }, { "epoch": 0.5246679522457912, "grad_norm": 0.825985312461853, "learning_rate": 9.22580889847228e-06, "loss": 0.2164, "step": 24215 }, { "epoch": 0.5247762875652721, "grad_norm": 1.662710428237915, "learning_rate": 9.22241570394254e-06, "loss": 0.1409, "step": 24220 }, { "epoch": 0.5248846228847529, "grad_norm": 0.8258217573165894, "learning_rate": 9.219022599484292e-06, "loss": 0.1985, "step": 24225 }, { "epoch": 0.5249929582042338, "grad_norm": 1.565368413925171, "learning_rate": 9.215629585490576e-06, "loss": 0.1721, "step": 24230 }, { "epoch": 0.5251012935237146, "grad_norm": 1.0390037298202515, "learning_rate": 9.212236662354423e-06, "loss": 0.1989, "step": 24235 }, { "epoch": 0.5252096288431954, "grad_norm": 2.2270686626434326, "learning_rate": 9.208843830468854e-06, "loss": 0.1874, "step": 24240 }, { "epoch": 0.5253179641626763, "grad_norm": 1.7552815675735474, "learning_rate": 9.205451090226874e-06, "loss": 0.2272, "step": 24245 }, { "epoch": 0.5254262994821571, "grad_norm": 1.4403882026672363, "learning_rate": 9.202058442021482e-06, "loss": 0.2056, "step": 24250 }, { "epoch": 0.525534634801638, "grad_norm": 1.534766674041748, "learning_rate": 9.198665886245666e-06, "loss": 0.2076, "step": 24255 }, { "epoch": 0.5256429701211189, "grad_norm": 0.856061577796936, "learning_rate": 9.1952734232924e-06, "loss": 0.1892, "step": 24260 }, { "epoch": 0.5257513054405998, "grad_norm": 1.5521351099014282, "learning_rate": 9.191881053554658e-06, "loss": 0.2565, "step": 24265 }, { "epoch": 0.5258596407600806, "grad_norm": 1.1293466091156006, "learning_rate": 9.18848877742539e-06, "loss": 0.1745, "step": 24270 }, { "epoch": 0.5259679760795615, "grad_norm": 1.9246654510498047, "learning_rate": 9.185096595297539e-06, "loss": 0.1829, "step": 24275 }, { "epoch": 0.5260763113990423, "grad_norm": 1.1774417161941528, "learning_rate": 9.181704507564044e-06, "loss": 0.1929, "step": 24280 }, { "epoch": 0.5261846467185232, "grad_norm": 1.9376842975616455, "learning_rate": 9.178312514617821e-06, "loss": 0.1791, "step": 24285 }, { "epoch": 0.526292982038004, "grad_norm": 1.9672046899795532, "learning_rate": 9.174920616851785e-06, "loss": 0.2094, "step": 24290 }, { "epoch": 0.5264013173574849, "grad_norm": 2.507185220718384, "learning_rate": 9.171528814658835e-06, "loss": 0.2362, "step": 24295 }, { "epoch": 0.5265096526769657, "grad_norm": 1.2149304151535034, "learning_rate": 9.168137108431861e-06, "loss": 0.1566, "step": 24300 }, { "epoch": 0.5266179879964467, "grad_norm": 1.865859866142273, "learning_rate": 9.164745498563739e-06, "loss": 0.1849, "step": 24305 }, { "epoch": 0.5267263233159275, "grad_norm": 1.6342194080352783, "learning_rate": 9.161353985447341e-06, "loss": 0.173, "step": 24310 }, { "epoch": 0.5268346586354083, "grad_norm": 1.2781459093093872, "learning_rate": 9.157962569475525e-06, "loss": 0.1486, "step": 24315 }, { "epoch": 0.5269429939548892, "grad_norm": 2.262179136276245, "learning_rate": 9.154571251041127e-06, "loss": 0.1977, "step": 24320 }, { "epoch": 0.52705132927437, "grad_norm": 1.8123726844787598, "learning_rate": 9.151180030536988e-06, "loss": 0.1571, "step": 24325 }, { "epoch": 0.5271596645938509, "grad_norm": 1.6192481517791748, "learning_rate": 9.147788908355927e-06, "loss": 0.1798, "step": 24330 }, { "epoch": 0.5272679999133317, "grad_norm": 1.9346164464950562, "learning_rate": 9.144397884890753e-06, "loss": 0.2046, "step": 24335 }, { "epoch": 0.5273763352328126, "grad_norm": 1.6860291957855225, "learning_rate": 9.141006960534267e-06, "loss": 0.2307, "step": 24340 }, { "epoch": 0.5274846705522934, "grad_norm": 1.2557300329208374, "learning_rate": 9.137616135679258e-06, "loss": 0.2062, "step": 24345 }, { "epoch": 0.5275930058717743, "grad_norm": 2.383556365966797, "learning_rate": 9.1342254107185e-06, "loss": 0.1003, "step": 24350 }, { "epoch": 0.5277013411912552, "grad_norm": 1.7638128995895386, "learning_rate": 9.130834786044758e-06, "loss": 0.2271, "step": 24355 }, { "epoch": 0.5278096765107361, "grad_norm": 1.5294963121414185, "learning_rate": 9.127444262050788e-06, "loss": 0.1916, "step": 24360 }, { "epoch": 0.5279180118302169, "grad_norm": 1.3557052612304688, "learning_rate": 9.124053839129328e-06, "loss": 0.1763, "step": 24365 }, { "epoch": 0.5280263471496978, "grad_norm": 1.0437817573547363, "learning_rate": 9.120663517673111e-06, "loss": 0.1691, "step": 24370 }, { "epoch": 0.5281346824691786, "grad_norm": 1.880231261253357, "learning_rate": 9.117273298074853e-06, "loss": 0.0974, "step": 24375 }, { "epoch": 0.5282430177886595, "grad_norm": 1.3653892278671265, "learning_rate": 9.113883180727259e-06, "loss": 0.179, "step": 24380 }, { "epoch": 0.5283513531081403, "grad_norm": 1.4767427444458008, "learning_rate": 9.110493166023024e-06, "loss": 0.1319, "step": 24385 }, { "epoch": 0.5284596884276211, "grad_norm": 1.9570269584655762, "learning_rate": 9.107103254354832e-06, "loss": 0.2247, "step": 24390 }, { "epoch": 0.528568023747102, "grad_norm": 2.218301296234131, "learning_rate": 9.103713446115353e-06, "loss": 0.2135, "step": 24395 }, { "epoch": 0.5286763590665828, "grad_norm": 2.5085246562957764, "learning_rate": 9.100323741697242e-06, "loss": 0.2261, "step": 24400 }, { "epoch": 0.5287846943860638, "grad_norm": 1.0236703157424927, "learning_rate": 9.09693414149315e-06, "loss": 0.1631, "step": 24405 }, { "epoch": 0.5288930297055446, "grad_norm": 1.1205668449401855, "learning_rate": 9.093544645895708e-06, "loss": 0.1552, "step": 24410 }, { "epoch": 0.5290013650250255, "grad_norm": 2.2358365058898926, "learning_rate": 9.090155255297543e-06, "loss": 0.2249, "step": 24415 }, { "epoch": 0.5291097003445063, "grad_norm": 1.16229248046875, "learning_rate": 9.086765970091261e-06, "loss": 0.1793, "step": 24420 }, { "epoch": 0.5292180356639872, "grad_norm": 2.0735700130462646, "learning_rate": 9.083376790669462e-06, "loss": 0.145, "step": 24425 }, { "epoch": 0.529326370983468, "grad_norm": 1.499422311782837, "learning_rate": 9.07998771742473e-06, "loss": 0.2057, "step": 24430 }, { "epoch": 0.5294347063029489, "grad_norm": 1.5461522340774536, "learning_rate": 9.07659875074964e-06, "loss": 0.2202, "step": 24435 }, { "epoch": 0.5295430416224297, "grad_norm": 1.659820556640625, "learning_rate": 9.073209891036752e-06, "loss": 0.2124, "step": 24440 }, { "epoch": 0.5296513769419106, "grad_norm": 1.3253929615020752, "learning_rate": 9.069821138678614e-06, "loss": 0.2547, "step": 24445 }, { "epoch": 0.5297597122613915, "grad_norm": 1.8860151767730713, "learning_rate": 9.066432494067761e-06, "loss": 0.1833, "step": 24450 }, { "epoch": 0.5298680475808724, "grad_norm": 1.652764081954956, "learning_rate": 9.06304395759672e-06, "loss": 0.2164, "step": 24455 }, { "epoch": 0.5299763829003532, "grad_norm": 1.545577049255371, "learning_rate": 9.059655529658004e-06, "loss": 0.2518, "step": 24460 }, { "epoch": 0.530084718219834, "grad_norm": 1.3583531379699707, "learning_rate": 9.056267210644105e-06, "loss": 0.2336, "step": 24465 }, { "epoch": 0.5301930535393149, "grad_norm": 1.8624135255813599, "learning_rate": 9.052879000947515e-06, "loss": 0.3166, "step": 24470 }, { "epoch": 0.5303013888587957, "grad_norm": 2.4325029850006104, "learning_rate": 9.049490900960704e-06, "loss": 0.1813, "step": 24475 }, { "epoch": 0.5304097241782766, "grad_norm": 1.5559873580932617, "learning_rate": 9.046102911076133e-06, "loss": 0.2128, "step": 24480 }, { "epoch": 0.5305180594977574, "grad_norm": 1.342016339302063, "learning_rate": 9.042715031686254e-06, "loss": 0.1637, "step": 24485 }, { "epoch": 0.5306263948172383, "grad_norm": 1.4605138301849365, "learning_rate": 9.039327263183495e-06, "loss": 0.2161, "step": 24490 }, { "epoch": 0.5307347301367191, "grad_norm": 1.3872865438461304, "learning_rate": 9.035939605960282e-06, "loss": 0.1933, "step": 24495 }, { "epoch": 0.5308430654562001, "grad_norm": 1.496686577796936, "learning_rate": 9.032552060409019e-06, "loss": 0.1981, "step": 24500 }, { "epoch": 0.5309514007756809, "grad_norm": 1.7142434120178223, "learning_rate": 9.029164626922113e-06, "loss": 0.1566, "step": 24505 }, { "epoch": 0.5310597360951618, "grad_norm": 1.864953875541687, "learning_rate": 9.02577730589194e-06, "loss": 0.1996, "step": 24510 }, { "epoch": 0.5311680714146426, "grad_norm": 1.589272141456604, "learning_rate": 9.022390097710874e-06, "loss": 0.1867, "step": 24515 }, { "epoch": 0.5312764067341235, "grad_norm": 1.4780009984970093, "learning_rate": 9.019003002771273e-06, "loss": 0.1467, "step": 24520 }, { "epoch": 0.5313847420536043, "grad_norm": 1.5984174013137817, "learning_rate": 9.015616021465476e-06, "loss": 0.1638, "step": 24525 }, { "epoch": 0.5314930773730852, "grad_norm": 2.1530399322509766, "learning_rate": 9.012229154185817e-06, "loss": 0.202, "step": 24530 }, { "epoch": 0.531601412692566, "grad_norm": 1.399898886680603, "learning_rate": 9.008842401324613e-06, "loss": 0.1336, "step": 24535 }, { "epoch": 0.5317097480120468, "grad_norm": 1.8537771701812744, "learning_rate": 9.00545576327417e-06, "loss": 0.1683, "step": 24540 }, { "epoch": 0.5318180833315277, "grad_norm": 2.4577388763427734, "learning_rate": 9.002069240426778e-06, "loss": 0.2041, "step": 24545 }, { "epoch": 0.5319264186510086, "grad_norm": 1.949990153312683, "learning_rate": 8.998682833174712e-06, "loss": 0.2119, "step": 24550 }, { "epoch": 0.5320347539704895, "grad_norm": 1.4291913509368896, "learning_rate": 8.995296541910245e-06, "loss": 0.1577, "step": 24555 }, { "epoch": 0.5321430892899703, "grad_norm": 1.060661792755127, "learning_rate": 8.991910367025622e-06, "loss": 0.2202, "step": 24560 }, { "epoch": 0.5322514246094512, "grad_norm": 1.5011506080627441, "learning_rate": 8.988524308913083e-06, "loss": 0.1486, "step": 24565 }, { "epoch": 0.532359759928932, "grad_norm": 1.2915517091751099, "learning_rate": 8.98513836796485e-06, "loss": 0.1999, "step": 24570 }, { "epoch": 0.5324680952484129, "grad_norm": 1.512010931968689, "learning_rate": 8.981752544573133e-06, "loss": 0.1833, "step": 24575 }, { "epoch": 0.5325764305678937, "grad_norm": 1.4183166027069092, "learning_rate": 8.978366839130132e-06, "loss": 0.2164, "step": 24580 }, { "epoch": 0.5326847658873746, "grad_norm": 1.6538327932357788, "learning_rate": 8.974981252028027e-06, "loss": 0.1433, "step": 24585 }, { "epoch": 0.5327931012068554, "grad_norm": 1.1656792163848877, "learning_rate": 8.971595783658991e-06, "loss": 0.2123, "step": 24590 }, { "epoch": 0.5329014365263364, "grad_norm": 1.4338542222976685, "learning_rate": 8.968210434415176e-06, "loss": 0.1551, "step": 24595 }, { "epoch": 0.5330097718458172, "grad_norm": 1.659575343132019, "learning_rate": 8.964825204688728e-06, "loss": 0.1618, "step": 24600 }, { "epoch": 0.5331181071652981, "grad_norm": 1.9801603555679321, "learning_rate": 8.961440094871772e-06, "loss": 0.1663, "step": 24605 }, { "epoch": 0.5332264424847789, "grad_norm": 1.6418744325637817, "learning_rate": 8.958055105356423e-06, "loss": 0.2973, "step": 24610 }, { "epoch": 0.5333347778042598, "grad_norm": 1.7219372987747192, "learning_rate": 8.954670236534783e-06, "loss": 0.1696, "step": 24615 }, { "epoch": 0.5334431131237406, "grad_norm": 1.496558427810669, "learning_rate": 8.951285488798935e-06, "loss": 0.2119, "step": 24620 }, { "epoch": 0.5335514484432214, "grad_norm": 1.4159945249557495, "learning_rate": 8.947900862540956e-06, "loss": 0.1914, "step": 24625 }, { "epoch": 0.5336597837627023, "grad_norm": 1.5488810539245605, "learning_rate": 8.9445163581529e-06, "loss": 0.2254, "step": 24630 }, { "epoch": 0.5337681190821831, "grad_norm": 1.9627089500427246, "learning_rate": 8.941131976026813e-06, "loss": 0.2456, "step": 24635 }, { "epoch": 0.533876454401664, "grad_norm": 1.8864736557006836, "learning_rate": 8.937747716554726e-06, "loss": 0.157, "step": 24640 }, { "epoch": 0.5339847897211449, "grad_norm": 1.9202955961227417, "learning_rate": 8.93436358012865e-06, "loss": 0.25, "step": 24645 }, { "epoch": 0.5340931250406258, "grad_norm": 1.0833181142807007, "learning_rate": 8.930979567140594e-06, "loss": 0.1791, "step": 24650 }, { "epoch": 0.5342014603601066, "grad_norm": 2.045741081237793, "learning_rate": 8.927595677982537e-06, "loss": 0.1797, "step": 24655 }, { "epoch": 0.5343097956795875, "grad_norm": 1.520858883857727, "learning_rate": 8.92421191304646e-06, "loss": 0.1908, "step": 24660 }, { "epoch": 0.5344181309990683, "grad_norm": 1.7372463941574097, "learning_rate": 8.920828272724317e-06, "loss": 0.1578, "step": 24665 }, { "epoch": 0.5345264663185492, "grad_norm": 2.049226760864258, "learning_rate": 8.917444757408051e-06, "loss": 0.2492, "step": 24670 }, { "epoch": 0.53463480163803, "grad_norm": 1.9934412240982056, "learning_rate": 8.914061367489594e-06, "loss": 0.1445, "step": 24675 }, { "epoch": 0.5347431369575109, "grad_norm": 1.859284520149231, "learning_rate": 8.910678103360861e-06, "loss": 0.2117, "step": 24680 }, { "epoch": 0.5348514722769917, "grad_norm": 2.082242727279663, "learning_rate": 8.90729496541375e-06, "loss": 0.1329, "step": 24685 }, { "epoch": 0.5349598075964725, "grad_norm": 1.840087652206421, "learning_rate": 8.903911954040152e-06, "loss": 0.1844, "step": 24690 }, { "epoch": 0.5350681429159535, "grad_norm": 2.00960636138916, "learning_rate": 8.900529069631929e-06, "loss": 0.1624, "step": 24695 }, { "epoch": 0.5351764782354343, "grad_norm": 1.4455653429031372, "learning_rate": 8.897146312580947e-06, "loss": 0.2018, "step": 24700 }, { "epoch": 0.5352848135549152, "grad_norm": 1.4512922763824463, "learning_rate": 8.893763683279042e-06, "loss": 0.2002, "step": 24705 }, { "epoch": 0.535393148874396, "grad_norm": 1.0297291278839111, "learning_rate": 8.890381182118045e-06, "loss": 0.2446, "step": 24710 }, { "epoch": 0.5355014841938769, "grad_norm": 1.953542947769165, "learning_rate": 8.886998809489766e-06, "loss": 0.1587, "step": 24715 }, { "epoch": 0.5356098195133577, "grad_norm": 1.2007042169570923, "learning_rate": 8.883616565786002e-06, "loss": 0.126, "step": 24720 }, { "epoch": 0.5357181548328386, "grad_norm": 2.4039077758789062, "learning_rate": 8.880234451398536e-06, "loss": 0.265, "step": 24725 }, { "epoch": 0.5358264901523194, "grad_norm": 0.8937031626701355, "learning_rate": 8.876852466719135e-06, "loss": 0.1619, "step": 24730 }, { "epoch": 0.5359348254718003, "grad_norm": 2.1910290718078613, "learning_rate": 8.873470612139549e-06, "loss": 0.1858, "step": 24735 }, { "epoch": 0.5360431607912812, "grad_norm": 1.9800642728805542, "learning_rate": 8.870088888051513e-06, "loss": 0.2026, "step": 24740 }, { "epoch": 0.5361514961107621, "grad_norm": 1.0205851793289185, "learning_rate": 8.86670729484676e-06, "loss": 0.156, "step": 24745 }, { "epoch": 0.5362598314302429, "grad_norm": 1.583701252937317, "learning_rate": 8.863325832916988e-06, "loss": 0.2326, "step": 24750 }, { "epoch": 0.5363681667497238, "grad_norm": 1.8588415384292603, "learning_rate": 8.85994450265389e-06, "loss": 0.2357, "step": 24755 }, { "epoch": 0.5364765020692046, "grad_norm": 0.9064070582389832, "learning_rate": 8.856563304449147e-06, "loss": 0.1372, "step": 24760 }, { "epoch": 0.5365848373886855, "grad_norm": 1.7733066082000732, "learning_rate": 8.853182238694414e-06, "loss": 0.1942, "step": 24765 }, { "epoch": 0.5366931727081663, "grad_norm": 1.7191598415374756, "learning_rate": 8.849801305781339e-06, "loss": 0.175, "step": 24770 }, { "epoch": 0.5368015080276471, "grad_norm": 1.2533814907073975, "learning_rate": 8.846420506101553e-06, "loss": 0.1817, "step": 24775 }, { "epoch": 0.536909843347128, "grad_norm": 1.8758450746536255, "learning_rate": 8.84303984004667e-06, "loss": 0.1319, "step": 24780 }, { "epoch": 0.5370181786666088, "grad_norm": 0.9241670370101929, "learning_rate": 8.839659308008292e-06, "loss": 0.2063, "step": 24785 }, { "epoch": 0.5371265139860898, "grad_norm": 1.569069743156433, "learning_rate": 8.836278910377995e-06, "loss": 0.1405, "step": 24790 }, { "epoch": 0.5372348493055706, "grad_norm": 1.2448216676712036, "learning_rate": 8.83289864754736e-06, "loss": 0.2151, "step": 24795 }, { "epoch": 0.5373431846250515, "grad_norm": 1.8283432722091675, "learning_rate": 8.829518519907935e-06, "loss": 0.1722, "step": 24800 }, { "epoch": 0.5374515199445323, "grad_norm": 1.89499032497406, "learning_rate": 8.826138527851252e-06, "loss": 0.2192, "step": 24805 }, { "epoch": 0.5375598552640132, "grad_norm": 1.5487333536148071, "learning_rate": 8.822758671768837e-06, "loss": 0.1737, "step": 24810 }, { "epoch": 0.537668190583494, "grad_norm": 1.7098344564437866, "learning_rate": 8.819378952052196e-06, "loss": 0.2137, "step": 24815 }, { "epoch": 0.5377765259029749, "grad_norm": 1.547386884689331, "learning_rate": 8.815999369092817e-06, "loss": 0.1527, "step": 24820 }, { "epoch": 0.5378848612224557, "grad_norm": 1.9955488443374634, "learning_rate": 8.812619923282173e-06, "loss": 0.2151, "step": 24825 }, { "epoch": 0.5379931965419366, "grad_norm": 2.6691482067108154, "learning_rate": 8.809240615011727e-06, "loss": 0.1946, "step": 24830 }, { "epoch": 0.5381015318614175, "grad_norm": 1.2730637788772583, "learning_rate": 8.805861444672914e-06, "loss": 0.203, "step": 24835 }, { "epoch": 0.5382098671808984, "grad_norm": 2.2840874195098877, "learning_rate": 8.802482412657167e-06, "loss": 0.2441, "step": 24840 }, { "epoch": 0.5383182025003792, "grad_norm": 2.260369062423706, "learning_rate": 8.799103519355894e-06, "loss": 0.2677, "step": 24845 }, { "epoch": 0.53842653781986, "grad_norm": 0.8423546552658081, "learning_rate": 8.795724765160488e-06, "loss": 0.1531, "step": 24850 }, { "epoch": 0.5385348731393409, "grad_norm": 1.4166845083236694, "learning_rate": 8.79234615046233e-06, "loss": 0.1462, "step": 24855 }, { "epoch": 0.5386432084588217, "grad_norm": 1.3478434085845947, "learning_rate": 8.788967675652778e-06, "loss": 0.2702, "step": 24860 }, { "epoch": 0.5387515437783026, "grad_norm": 1.407292127609253, "learning_rate": 8.78558934112318e-06, "loss": 0.2139, "step": 24865 }, { "epoch": 0.5388598790977834, "grad_norm": 1.4257705211639404, "learning_rate": 8.782211147264864e-06, "loss": 0.1793, "step": 24870 }, { "epoch": 0.5389682144172643, "grad_norm": 1.03371262550354, "learning_rate": 8.778833094469144e-06, "loss": 0.1697, "step": 24875 }, { "epoch": 0.5390765497367451, "grad_norm": 1.5604933500289917, "learning_rate": 8.77545518312732e-06, "loss": 0.1269, "step": 24880 }, { "epoch": 0.5391848850562261, "grad_norm": 1.8187017440795898, "learning_rate": 8.772077413630665e-06, "loss": 0.1699, "step": 24885 }, { "epoch": 0.5392932203757069, "grad_norm": 1.9562478065490723, "learning_rate": 8.768699786370448e-06, "loss": 0.1713, "step": 24890 }, { "epoch": 0.5394015556951878, "grad_norm": 1.1397768259048462, "learning_rate": 8.765322301737919e-06, "loss": 0.1574, "step": 24895 }, { "epoch": 0.5395098910146686, "grad_norm": 1.5701178312301636, "learning_rate": 8.761944960124304e-06, "loss": 0.0833, "step": 24900 }, { "epoch": 0.5396182263341495, "grad_norm": 1.2129818201065063, "learning_rate": 8.75856776192082e-06, "loss": 0.1582, "step": 24905 }, { "epoch": 0.5397265616536303, "grad_norm": 1.8442193269729614, "learning_rate": 8.755190707518662e-06, "loss": 0.3157, "step": 24910 }, { "epoch": 0.5398348969731112, "grad_norm": 1.4901819229125977, "learning_rate": 8.751813797309015e-06, "loss": 0.2032, "step": 24915 }, { "epoch": 0.539943232292592, "grad_norm": 1.1710714101791382, "learning_rate": 8.748437031683042e-06, "loss": 0.1577, "step": 24920 }, { "epoch": 0.5400515676120728, "grad_norm": 3.255350351333618, "learning_rate": 8.745060411031892e-06, "loss": 0.2415, "step": 24925 }, { "epoch": 0.5401599029315537, "grad_norm": 1.3721555471420288, "learning_rate": 8.741683935746692e-06, "loss": 0.1729, "step": 24930 }, { "epoch": 0.5402682382510346, "grad_norm": 1.6142499446868896, "learning_rate": 8.738307606218556e-06, "loss": 0.2036, "step": 24935 }, { "epoch": 0.5403765735705155, "grad_norm": 1.9933720827102661, "learning_rate": 8.734931422838588e-06, "loss": 0.2318, "step": 24940 }, { "epoch": 0.5404849088899963, "grad_norm": 2.0403757095336914, "learning_rate": 8.731555385997862e-06, "loss": 0.1898, "step": 24945 }, { "epoch": 0.5405932442094772, "grad_norm": 1.4948482513427734, "learning_rate": 8.728179496087442e-06, "loss": 0.1399, "step": 24950 }, { "epoch": 0.540701579528958, "grad_norm": 1.841347575187683, "learning_rate": 8.72480375349838e-06, "loss": 0.2249, "step": 24955 }, { "epoch": 0.5408099148484389, "grad_norm": 1.5779601335525513, "learning_rate": 8.721428158621698e-06, "loss": 0.1922, "step": 24960 }, { "epoch": 0.5409182501679197, "grad_norm": 2.241159439086914, "learning_rate": 8.71805271184841e-06, "loss": 0.1452, "step": 24965 }, { "epoch": 0.5410265854874006, "grad_norm": 1.3193655014038086, "learning_rate": 8.714677413569515e-06, "loss": 0.1232, "step": 24970 }, { "epoch": 0.5411349208068814, "grad_norm": 1.3999531269073486, "learning_rate": 8.711302264175986e-06, "loss": 0.2093, "step": 24975 }, { "epoch": 0.5412432561263624, "grad_norm": 1.9669537544250488, "learning_rate": 8.70792726405878e-06, "loss": 0.1876, "step": 24980 }, { "epoch": 0.5413515914458432, "grad_norm": 1.083290696144104, "learning_rate": 8.70455241360885e-06, "loss": 0.1709, "step": 24985 }, { "epoch": 0.5414599267653241, "grad_norm": 1.2387351989746094, "learning_rate": 8.701177713217116e-06, "loss": 0.1962, "step": 24990 }, { "epoch": 0.5415682620848049, "grad_norm": 1.3194514513015747, "learning_rate": 8.697803163274487e-06, "loss": 0.1761, "step": 24995 }, { "epoch": 0.5416765974042858, "grad_norm": 1.792014718055725, "learning_rate": 8.694428764171857e-06, "loss": 0.1716, "step": 25000 }, { "epoch": 0.5417849327237666, "grad_norm": 2.3382420539855957, "learning_rate": 8.691054516300098e-06, "loss": 0.2234, "step": 25005 }, { "epoch": 0.5418932680432474, "grad_norm": 1.4567902088165283, "learning_rate": 8.687680420050063e-06, "loss": 0.2148, "step": 25010 }, { "epoch": 0.5420016033627283, "grad_norm": 2.8011434078216553, "learning_rate": 8.684306475812593e-06, "loss": 0.1895, "step": 25015 }, { "epoch": 0.5421099386822091, "grad_norm": 1.4203909635543823, "learning_rate": 8.68093268397851e-06, "loss": 0.1735, "step": 25020 }, { "epoch": 0.54221827400169, "grad_norm": 1.810450792312622, "learning_rate": 8.677559044938614e-06, "loss": 0.1732, "step": 25025 }, { "epoch": 0.5423266093211709, "grad_norm": 1.655503273010254, "learning_rate": 8.674185559083688e-06, "loss": 0.1862, "step": 25030 }, { "epoch": 0.5424349446406518, "grad_norm": 2.098139524459839, "learning_rate": 8.67081222680451e-06, "loss": 0.2069, "step": 25035 }, { "epoch": 0.5425432799601326, "grad_norm": 2.0968072414398193, "learning_rate": 8.667439048491826e-06, "loss": 0.266, "step": 25040 }, { "epoch": 0.5426516152796135, "grad_norm": 1.3416919708251953, "learning_rate": 8.664066024536363e-06, "loss": 0.1706, "step": 25045 }, { "epoch": 0.5427599505990943, "grad_norm": 2.2118821144104004, "learning_rate": 8.660693155328842e-06, "loss": 0.1237, "step": 25050 }, { "epoch": 0.5428682859185752, "grad_norm": 1.3673526048660278, "learning_rate": 8.657320441259953e-06, "loss": 0.1018, "step": 25055 }, { "epoch": 0.542976621238056, "grad_norm": 1.338118314743042, "learning_rate": 8.653947882720379e-06, "loss": 0.1648, "step": 25060 }, { "epoch": 0.5430849565575369, "grad_norm": 1.6189244985580444, "learning_rate": 8.650575480100778e-06, "loss": 0.1913, "step": 25065 }, { "epoch": 0.5431932918770177, "grad_norm": 1.8853340148925781, "learning_rate": 8.647203233791794e-06, "loss": 0.1758, "step": 25070 }, { "epoch": 0.5433016271964985, "grad_norm": 0.7415140271186829, "learning_rate": 8.64383114418405e-06, "loss": 0.1244, "step": 25075 }, { "epoch": 0.5434099625159795, "grad_norm": 2.195084810256958, "learning_rate": 8.64045921166815e-06, "loss": 0.2127, "step": 25080 }, { "epoch": 0.5435182978354604, "grad_norm": 1.4776700735092163, "learning_rate": 8.637087436634688e-06, "loss": 0.1553, "step": 25085 }, { "epoch": 0.5436266331549412, "grad_norm": 2.2768540382385254, "learning_rate": 8.63371581947423e-06, "loss": 0.1528, "step": 25090 }, { "epoch": 0.543734968474422, "grad_norm": 1.1512559652328491, "learning_rate": 8.630344360577326e-06, "loss": 0.1325, "step": 25095 }, { "epoch": 0.5438433037939029, "grad_norm": 1.5696117877960205, "learning_rate": 8.626973060334511e-06, "loss": 0.1898, "step": 25100 }, { "epoch": 0.5439516391133837, "grad_norm": 1.5907909870147705, "learning_rate": 8.6236019191363e-06, "loss": 0.2365, "step": 25105 }, { "epoch": 0.5440599744328646, "grad_norm": 2.137080430984497, "learning_rate": 8.620230937373187e-06, "loss": 0.1311, "step": 25110 }, { "epoch": 0.5441683097523454, "grad_norm": 1.3665575981140137, "learning_rate": 8.616860115435652e-06, "loss": 0.2069, "step": 25115 }, { "epoch": 0.5442766450718263, "grad_norm": 1.4328120946884155, "learning_rate": 8.613489453714152e-06, "loss": 0.207, "step": 25120 }, { "epoch": 0.5443849803913072, "grad_norm": 1.5403848886489868, "learning_rate": 8.610118952599128e-06, "loss": 0.1603, "step": 25125 }, { "epoch": 0.5444933157107881, "grad_norm": 2.434340000152588, "learning_rate": 8.606748612481003e-06, "loss": 0.2029, "step": 25130 }, { "epoch": 0.5446016510302689, "grad_norm": 1.5631275177001953, "learning_rate": 8.603378433750181e-06, "loss": 0.1763, "step": 25135 }, { "epoch": 0.5447099863497498, "grad_norm": 1.8285815715789795, "learning_rate": 8.600008416797047e-06, "loss": 0.203, "step": 25140 }, { "epoch": 0.5448183216692306, "grad_norm": 2.1618399620056152, "learning_rate": 8.596638562011965e-06, "loss": 0.2198, "step": 25145 }, { "epoch": 0.5449266569887115, "grad_norm": 2.678671360015869, "learning_rate": 8.593268869785284e-06, "loss": 0.1623, "step": 25150 }, { "epoch": 0.5450349923081923, "grad_norm": 1.865646243095398, "learning_rate": 8.589899340507332e-06, "loss": 0.2699, "step": 25155 }, { "epoch": 0.5451433276276731, "grad_norm": 1.7429535388946533, "learning_rate": 8.586529974568419e-06, "loss": 0.1707, "step": 25160 }, { "epoch": 0.545251662947154, "grad_norm": 2.197767496109009, "learning_rate": 8.583160772358831e-06, "loss": 0.2397, "step": 25165 }, { "epoch": 0.5453599982666348, "grad_norm": 1.9500749111175537, "learning_rate": 8.579791734268846e-06, "loss": 0.2017, "step": 25170 }, { "epoch": 0.5454683335861158, "grad_norm": 1.9152437448501587, "learning_rate": 8.576422860688712e-06, "loss": 0.1862, "step": 25175 }, { "epoch": 0.5455766689055966, "grad_norm": 1.5354654788970947, "learning_rate": 8.573054152008667e-06, "loss": 0.1995, "step": 25180 }, { "epoch": 0.5456850042250775, "grad_norm": 1.2630589008331299, "learning_rate": 8.56968560861892e-06, "loss": 0.1748, "step": 25185 }, { "epoch": 0.5457933395445583, "grad_norm": 1.6294543743133545, "learning_rate": 8.566317230909672e-06, "loss": 0.1742, "step": 25190 }, { "epoch": 0.5459016748640392, "grad_norm": 1.3850517272949219, "learning_rate": 8.562949019271094e-06, "loss": 0.206, "step": 25195 }, { "epoch": 0.54601001018352, "grad_norm": 1.3823976516723633, "learning_rate": 8.559580974093346e-06, "loss": 0.2667, "step": 25200 }, { "epoch": 0.5461183455030009, "grad_norm": 1.6978224515914917, "learning_rate": 8.556213095766565e-06, "loss": 0.1649, "step": 25205 }, { "epoch": 0.5462266808224817, "grad_norm": 1.4321579933166504, "learning_rate": 8.55284538468087e-06, "loss": 0.1841, "step": 25210 }, { "epoch": 0.5463350161419626, "grad_norm": 1.5324501991271973, "learning_rate": 8.549477841226359e-06, "loss": 0.1745, "step": 25215 }, { "epoch": 0.5464433514614434, "grad_norm": 1.5675550699234009, "learning_rate": 8.54611046579311e-06, "loss": 0.1904, "step": 25220 }, { "epoch": 0.5465516867809244, "grad_norm": 1.5922918319702148, "learning_rate": 8.542743258771179e-06, "loss": 0.1883, "step": 25225 }, { "epoch": 0.5466600221004052, "grad_norm": 0.9175023436546326, "learning_rate": 8.539376220550618e-06, "loss": 0.199, "step": 25230 }, { "epoch": 0.546768357419886, "grad_norm": 1.4530171155929565, "learning_rate": 8.53600935152144e-06, "loss": 0.2038, "step": 25235 }, { "epoch": 0.5468766927393669, "grad_norm": 1.8194423913955688, "learning_rate": 8.532642652073649e-06, "loss": 0.1748, "step": 25240 }, { "epoch": 0.5469850280588477, "grad_norm": 1.6674681901931763, "learning_rate": 8.529276122597227e-06, "loss": 0.2315, "step": 25245 }, { "epoch": 0.5470933633783286, "grad_norm": 1.2587249279022217, "learning_rate": 8.525909763482133e-06, "loss": 0.1745, "step": 25250 }, { "epoch": 0.5472016986978094, "grad_norm": 1.1874064207077026, "learning_rate": 8.522543575118311e-06, "loss": 0.0879, "step": 25255 }, { "epoch": 0.5473100340172903, "grad_norm": 2.239337921142578, "learning_rate": 8.519177557895684e-06, "loss": 0.1741, "step": 25260 }, { "epoch": 0.5474183693367711, "grad_norm": 1.5981087684631348, "learning_rate": 8.515811712204154e-06, "loss": 0.1529, "step": 25265 }, { "epoch": 0.5475267046562521, "grad_norm": 2.2694694995880127, "learning_rate": 8.512446038433599e-06, "loss": 0.3028, "step": 25270 }, { "epoch": 0.5476350399757329, "grad_norm": 1.2029199600219727, "learning_rate": 8.509080536973892e-06, "loss": 0.2004, "step": 25275 }, { "epoch": 0.5477433752952138, "grad_norm": 1.8243896961212158, "learning_rate": 8.505715208214871e-06, "loss": 0.1841, "step": 25280 }, { "epoch": 0.5478517106146946, "grad_norm": 2.0461158752441406, "learning_rate": 8.50235005254636e-06, "loss": 0.2209, "step": 25285 }, { "epoch": 0.5479600459341755, "grad_norm": 1.1597152948379517, "learning_rate": 8.498985070358158e-06, "loss": 0.2365, "step": 25290 }, { "epoch": 0.5480683812536563, "grad_norm": 1.4771448373794556, "learning_rate": 8.495620262040052e-06, "loss": 0.1582, "step": 25295 }, { "epoch": 0.5481767165731372, "grad_norm": 1.503440499305725, "learning_rate": 8.492255627981803e-06, "loss": 0.2239, "step": 25300 }, { "epoch": 0.548285051892618, "grad_norm": 1.7068977355957031, "learning_rate": 8.488891168573153e-06, "loss": 0.1343, "step": 25305 }, { "epoch": 0.5483933872120988, "grad_norm": 1.064829707145691, "learning_rate": 8.485526884203824e-06, "loss": 0.1287, "step": 25310 }, { "epoch": 0.5485017225315797, "grad_norm": 1.4665908813476562, "learning_rate": 8.48216277526352e-06, "loss": 0.1614, "step": 25315 }, { "epoch": 0.5486100578510607, "grad_norm": 1.4610592126846313, "learning_rate": 8.478798842141917e-06, "loss": 0.2109, "step": 25320 }, { "epoch": 0.5487183931705415, "grad_norm": 1.1622087955474854, "learning_rate": 8.475435085228685e-06, "loss": 0.2056, "step": 25325 }, { "epoch": 0.5488267284900223, "grad_norm": 2.014650344848633, "learning_rate": 8.472071504913459e-06, "loss": 0.2128, "step": 25330 }, { "epoch": 0.5489350638095032, "grad_norm": 2.443777084350586, "learning_rate": 8.468708101585862e-06, "loss": 0.191, "step": 25335 }, { "epoch": 0.549043399128984, "grad_norm": 1.4866632223129272, "learning_rate": 8.465344875635492e-06, "loss": 0.1843, "step": 25340 }, { "epoch": 0.5491517344484649, "grad_norm": 1.135105848312378, "learning_rate": 8.461981827451928e-06, "loss": 0.166, "step": 25345 }, { "epoch": 0.5492600697679457, "grad_norm": 2.102090835571289, "learning_rate": 8.458618957424732e-06, "loss": 0.1441, "step": 25350 }, { "epoch": 0.5493684050874266, "grad_norm": 1.2547303438186646, "learning_rate": 8.455256265943437e-06, "loss": 0.2049, "step": 25355 }, { "epoch": 0.5494767404069074, "grad_norm": 1.1391936540603638, "learning_rate": 8.451893753397567e-06, "loss": 0.1729, "step": 25360 }, { "epoch": 0.5495850757263884, "grad_norm": 2.317155122756958, "learning_rate": 8.44853142017661e-06, "loss": 0.1855, "step": 25365 }, { "epoch": 0.5496934110458692, "grad_norm": 2.034170389175415, "learning_rate": 8.44516926667005e-06, "loss": 0.1332, "step": 25370 }, { "epoch": 0.5498017463653501, "grad_norm": 1.6693496704101562, "learning_rate": 8.441807293267338e-06, "loss": 0.1847, "step": 25375 }, { "epoch": 0.5499100816848309, "grad_norm": 2.113393545150757, "learning_rate": 8.438445500357912e-06, "loss": 0.1911, "step": 25380 }, { "epoch": 0.5500184170043118, "grad_norm": 1.7016870975494385, "learning_rate": 8.43508388833118e-06, "loss": 0.2027, "step": 25385 }, { "epoch": 0.5501267523237926, "grad_norm": 1.6528418064117432, "learning_rate": 8.431722457576539e-06, "loss": 0.1481, "step": 25390 }, { "epoch": 0.5502350876432734, "grad_norm": 1.7269233465194702, "learning_rate": 8.428361208483357e-06, "loss": 0.145, "step": 25395 }, { "epoch": 0.5503434229627543, "grad_norm": 1.7007992267608643, "learning_rate": 8.425000141440987e-06, "loss": 0.1677, "step": 25400 }, { "epoch": 0.5504517582822351, "grad_norm": 1.280612826347351, "learning_rate": 8.421639256838756e-06, "loss": 0.1612, "step": 25405 }, { "epoch": 0.550560093601716, "grad_norm": 1.1180630922317505, "learning_rate": 8.418278555065974e-06, "loss": 0.1527, "step": 25410 }, { "epoch": 0.5506684289211969, "grad_norm": 0.9983630776405334, "learning_rate": 8.414918036511925e-06, "loss": 0.1492, "step": 25415 }, { "epoch": 0.5507767642406778, "grad_norm": 1.6162792444229126, "learning_rate": 8.41155770156588e-06, "loss": 0.2175, "step": 25420 }, { "epoch": 0.5508850995601586, "grad_norm": 1.972397804260254, "learning_rate": 8.408197550617078e-06, "loss": 0.1924, "step": 25425 }, { "epoch": 0.5509934348796395, "grad_norm": 1.1354032754898071, "learning_rate": 8.404837584054747e-06, "loss": 0.2156, "step": 25430 }, { "epoch": 0.5511017701991203, "grad_norm": 1.3897532224655151, "learning_rate": 8.401477802268086e-06, "loss": 0.1212, "step": 25435 }, { "epoch": 0.5512101055186012, "grad_norm": 1.9438889026641846, "learning_rate": 8.398118205646275e-06, "loss": 0.1701, "step": 25440 }, { "epoch": 0.551318440838082, "grad_norm": 1.0993337631225586, "learning_rate": 8.394758794578473e-06, "loss": 0.2057, "step": 25445 }, { "epoch": 0.5514267761575629, "grad_norm": 1.2628434896469116, "learning_rate": 8.39139956945382e-06, "loss": 0.1895, "step": 25450 }, { "epoch": 0.5515351114770437, "grad_norm": 1.2004622220993042, "learning_rate": 8.38804053066143e-06, "loss": 0.2063, "step": 25455 }, { "epoch": 0.5516434467965246, "grad_norm": 1.5644676685333252, "learning_rate": 8.384681678590397e-06, "loss": 0.1798, "step": 25460 }, { "epoch": 0.5517517821160055, "grad_norm": 1.2246202230453491, "learning_rate": 8.38132301362979e-06, "loss": 0.1565, "step": 25465 }, { "epoch": 0.5518601174354864, "grad_norm": 1.082503318786621, "learning_rate": 8.377964536168667e-06, "loss": 0.1371, "step": 25470 }, { "epoch": 0.5519684527549672, "grad_norm": 1.8424782752990723, "learning_rate": 8.374606246596054e-06, "loss": 0.1852, "step": 25475 }, { "epoch": 0.552076788074448, "grad_norm": 1.274102807044983, "learning_rate": 8.37124814530096e-06, "loss": 0.1595, "step": 25480 }, { "epoch": 0.5521851233939289, "grad_norm": 1.8995176553726196, "learning_rate": 8.36789023267237e-06, "loss": 0.241, "step": 25485 }, { "epoch": 0.5522934587134097, "grad_norm": 1.61567223072052, "learning_rate": 8.36453250909925e-06, "loss": 0.1844, "step": 25490 }, { "epoch": 0.5524017940328906, "grad_norm": 1.5919044017791748, "learning_rate": 8.361174974970536e-06, "loss": 0.1687, "step": 25495 }, { "epoch": 0.5525101293523714, "grad_norm": 1.2408581972122192, "learning_rate": 8.357817630675152e-06, "loss": 0.1429, "step": 25500 }, { "epoch": 0.5526184646718523, "grad_norm": 1.602074146270752, "learning_rate": 8.354460476601995e-06, "loss": 0.2516, "step": 25505 }, { "epoch": 0.5527267999913332, "grad_norm": 1.4761790037155151, "learning_rate": 8.351103513139939e-06, "loss": 0.2516, "step": 25510 }, { "epoch": 0.5528351353108141, "grad_norm": 0.5182232856750488, "learning_rate": 8.347746740677843e-06, "loss": 0.1408, "step": 25515 }, { "epoch": 0.5529434706302949, "grad_norm": 1.6941555738449097, "learning_rate": 8.344390159604538e-06, "loss": 0.1763, "step": 25520 }, { "epoch": 0.5530518059497758, "grad_norm": 1.7370644807815552, "learning_rate": 8.341033770308832e-06, "loss": 0.2255, "step": 25525 }, { "epoch": 0.5531601412692566, "grad_norm": 1.8434821367263794, "learning_rate": 8.337677573179513e-06, "loss": 0.1899, "step": 25530 }, { "epoch": 0.5532684765887375, "grad_norm": 0.9067831039428711, "learning_rate": 8.334321568605343e-06, "loss": 0.2127, "step": 25535 }, { "epoch": 0.5533768119082183, "grad_norm": 2.1355302333831787, "learning_rate": 8.330965756975069e-06, "loss": 0.1896, "step": 25540 }, { "epoch": 0.5534851472276991, "grad_norm": 1.458899736404419, "learning_rate": 8.32761013867741e-06, "loss": 0.1576, "step": 25545 }, { "epoch": 0.55359348254718, "grad_norm": 2.3502743244171143, "learning_rate": 8.324254714101064e-06, "loss": 0.1502, "step": 25550 }, { "epoch": 0.5537018178666608, "grad_norm": 1.69464910030365, "learning_rate": 8.320899483634706e-06, "loss": 0.2188, "step": 25555 }, { "epoch": 0.5538101531861418, "grad_norm": 1.1039820909500122, "learning_rate": 8.317544447666987e-06, "loss": 0.282, "step": 25560 }, { "epoch": 0.5539184885056226, "grad_norm": 1.2525088787078857, "learning_rate": 8.314189606586544e-06, "loss": 0.203, "step": 25565 }, { "epoch": 0.5540268238251035, "grad_norm": 2.21844482421875, "learning_rate": 8.310834960781982e-06, "loss": 0.1686, "step": 25570 }, { "epoch": 0.5541351591445843, "grad_norm": 1.721098780632019, "learning_rate": 8.307480510641886e-06, "loss": 0.1985, "step": 25575 }, { "epoch": 0.5542434944640652, "grad_norm": 1.0789164304733276, "learning_rate": 8.304126256554818e-06, "loss": 0.1387, "step": 25580 }, { "epoch": 0.554351829783546, "grad_norm": 0.9957844614982605, "learning_rate": 8.30077219890932e-06, "loss": 0.2038, "step": 25585 }, { "epoch": 0.5544601651030269, "grad_norm": 1.7464591264724731, "learning_rate": 8.297418338093906e-06, "loss": 0.1971, "step": 25590 }, { "epoch": 0.5545685004225077, "grad_norm": 1.2989946603775024, "learning_rate": 8.294064674497075e-06, "loss": 0.1374, "step": 25595 }, { "epoch": 0.5546768357419886, "grad_norm": 1.624703288078308, "learning_rate": 8.290711208507296e-06, "loss": 0.1853, "step": 25600 }, { "epoch": 0.5547851710614694, "grad_norm": 1.6137107610702515, "learning_rate": 8.287357940513018e-06, "loss": 0.2133, "step": 25605 }, { "epoch": 0.5548935063809504, "grad_norm": 1.379612684249878, "learning_rate": 8.284004870902666e-06, "loss": 0.203, "step": 25610 }, { "epoch": 0.5550018417004312, "grad_norm": 1.5245414972305298, "learning_rate": 8.280652000064646e-06, "loss": 0.2127, "step": 25615 }, { "epoch": 0.555110177019912, "grad_norm": 2.093230962753296, "learning_rate": 8.277299328387337e-06, "loss": 0.1375, "step": 25620 }, { "epoch": 0.5552185123393929, "grad_norm": 1.5825879573822021, "learning_rate": 8.273946856259092e-06, "loss": 0.1576, "step": 25625 }, { "epoch": 0.5553268476588737, "grad_norm": 1.6283258199691772, "learning_rate": 8.270594584068249e-06, "loss": 0.129, "step": 25630 }, { "epoch": 0.5554351829783546, "grad_norm": 0.4952295124530792, "learning_rate": 8.267242512203118e-06, "loss": 0.1135, "step": 25635 }, { "epoch": 0.5555435182978354, "grad_norm": 1.4404983520507812, "learning_rate": 8.263890641051983e-06, "loss": 0.0985, "step": 25640 }, { "epoch": 0.5556518536173163, "grad_norm": 2.222858190536499, "learning_rate": 8.260538971003111e-06, "loss": 0.2143, "step": 25645 }, { "epoch": 0.5557601889367971, "grad_norm": 2.6701440811157227, "learning_rate": 8.257187502444744e-06, "loss": 0.1992, "step": 25650 }, { "epoch": 0.5558685242562781, "grad_norm": 1.4579838514328003, "learning_rate": 8.253836235765093e-06, "loss": 0.1985, "step": 25655 }, { "epoch": 0.5559768595757589, "grad_norm": 1.2037423849105835, "learning_rate": 8.250485171352359e-06, "loss": 0.1416, "step": 25660 }, { "epoch": 0.5560851948952398, "grad_norm": 1.4234294891357422, "learning_rate": 8.24713430959471e-06, "loss": 0.1707, "step": 25665 }, { "epoch": 0.5561935302147206, "grad_norm": 1.9702942371368408, "learning_rate": 8.243783650880295e-06, "loss": 0.2341, "step": 25670 }, { "epoch": 0.5563018655342015, "grad_norm": 2.0128660202026367, "learning_rate": 8.240433195597235e-06, "loss": 0.1681, "step": 25675 }, { "epoch": 0.5564102008536823, "grad_norm": 1.5482159852981567, "learning_rate": 8.237082944133632e-06, "loss": 0.1276, "step": 25680 }, { "epoch": 0.5565185361731632, "grad_norm": 1.0440856218338013, "learning_rate": 8.23373289687756e-06, "loss": 0.1416, "step": 25685 }, { "epoch": 0.556626871492644, "grad_norm": 1.7885119915008545, "learning_rate": 8.230383054217073e-06, "loss": 0.2182, "step": 25690 }, { "epoch": 0.5567352068121248, "grad_norm": 1.744443655014038, "learning_rate": 8.227033416540203e-06, "loss": 0.1366, "step": 25695 }, { "epoch": 0.5568435421316057, "grad_norm": 1.8295718431472778, "learning_rate": 8.22368398423495e-06, "loss": 0.164, "step": 25700 }, { "epoch": 0.5569518774510867, "grad_norm": 1.8643450736999512, "learning_rate": 8.220334757689294e-06, "loss": 0.1842, "step": 25705 }, { "epoch": 0.5570602127705675, "grad_norm": 1.9237501621246338, "learning_rate": 8.216985737291203e-06, "loss": 0.2141, "step": 25710 }, { "epoch": 0.5571685480900483, "grad_norm": 1.0273411273956299, "learning_rate": 8.213636923428603e-06, "loss": 0.1917, "step": 25715 }, { "epoch": 0.5572768834095292, "grad_norm": 1.3829773664474487, "learning_rate": 8.210288316489406e-06, "loss": 0.1631, "step": 25720 }, { "epoch": 0.55738521872901, "grad_norm": 1.907813310623169, "learning_rate": 8.206939916861499e-06, "loss": 0.1364, "step": 25725 }, { "epoch": 0.5574935540484909, "grad_norm": 0.8112154006958008, "learning_rate": 8.203591724932742e-06, "loss": 0.1812, "step": 25730 }, { "epoch": 0.5576018893679717, "grad_norm": 1.6351914405822754, "learning_rate": 8.200243741090975e-06, "loss": 0.191, "step": 25735 }, { "epoch": 0.5577102246874526, "grad_norm": 1.3445000648498535, "learning_rate": 8.196895965724007e-06, "loss": 0.2047, "step": 25740 }, { "epoch": 0.5578185600069334, "grad_norm": 2.0635931491851807, "learning_rate": 8.193548399219632e-06, "loss": 0.2382, "step": 25745 }, { "epoch": 0.5579268953264143, "grad_norm": 1.1861013174057007, "learning_rate": 8.190201041965615e-06, "loss": 0.167, "step": 25750 }, { "epoch": 0.5580352306458952, "grad_norm": 2.1539418697357178, "learning_rate": 8.186853894349691e-06, "loss": 0.1785, "step": 25755 }, { "epoch": 0.5581435659653761, "grad_norm": 1.8227866888046265, "learning_rate": 8.183506956759588e-06, "loss": 0.1905, "step": 25760 }, { "epoch": 0.5582519012848569, "grad_norm": 0.5775172114372253, "learning_rate": 8.180160229582992e-06, "loss": 0.2225, "step": 25765 }, { "epoch": 0.5583602366043378, "grad_norm": 1.4097291231155396, "learning_rate": 8.176813713207574e-06, "loss": 0.1745, "step": 25770 }, { "epoch": 0.5584685719238186, "grad_norm": 2.365762710571289, "learning_rate": 8.173467408020973e-06, "loss": 0.25, "step": 25775 }, { "epoch": 0.5585769072432994, "grad_norm": 2.013408660888672, "learning_rate": 8.170121314410814e-06, "loss": 0.1917, "step": 25780 }, { "epoch": 0.5586852425627803, "grad_norm": 2.302950859069824, "learning_rate": 8.166775432764687e-06, "loss": 0.1816, "step": 25785 }, { "epoch": 0.5587935778822611, "grad_norm": 1.3680697679519653, "learning_rate": 8.163429763470167e-06, "loss": 0.1708, "step": 25790 }, { "epoch": 0.558901913201742, "grad_norm": 1.747449278831482, "learning_rate": 8.160084306914795e-06, "loss": 0.2058, "step": 25795 }, { "epoch": 0.5590102485212229, "grad_norm": 3.659452199935913, "learning_rate": 8.15673906348609e-06, "loss": 0.1817, "step": 25800 }, { "epoch": 0.5591185838407038, "grad_norm": 1.6433743238449097, "learning_rate": 8.153394033571559e-06, "loss": 0.1614, "step": 25805 }, { "epoch": 0.5592269191601846, "grad_norm": 2.309952735900879, "learning_rate": 8.150049217558665e-06, "loss": 0.1054, "step": 25810 }, { "epoch": 0.5593352544796655, "grad_norm": 2.6285433769226074, "learning_rate": 8.146704615834857e-06, "loss": 0.1438, "step": 25815 }, { "epoch": 0.5594435897991463, "grad_norm": 1.4110182523727417, "learning_rate": 8.143360228787558e-06, "loss": 0.1344, "step": 25820 }, { "epoch": 0.5595519251186272, "grad_norm": 0.48263075947761536, "learning_rate": 8.140016056804161e-06, "loss": 0.2034, "step": 25825 }, { "epoch": 0.559660260438108, "grad_norm": 1.195412516593933, "learning_rate": 8.136672100272043e-06, "loss": 0.1798, "step": 25830 }, { "epoch": 0.5597685957575889, "grad_norm": 1.395740032196045, "learning_rate": 8.13332835957855e-06, "loss": 0.1621, "step": 25835 }, { "epoch": 0.5598769310770697, "grad_norm": 1.1624188423156738, "learning_rate": 8.129984835111004e-06, "loss": 0.1828, "step": 25840 }, { "epoch": 0.5599852663965506, "grad_norm": 1.243133306503296, "learning_rate": 8.126641527256702e-06, "loss": 0.1685, "step": 25845 }, { "epoch": 0.5600936017160315, "grad_norm": 2.377704620361328, "learning_rate": 8.123298436402913e-06, "loss": 0.201, "step": 25850 }, { "epoch": 0.5602019370355124, "grad_norm": 2.246490716934204, "learning_rate": 8.11995556293689e-06, "loss": 0.2445, "step": 25855 }, { "epoch": 0.5603102723549932, "grad_norm": 1.5659605264663696, "learning_rate": 8.11661290724585e-06, "loss": 0.1514, "step": 25860 }, { "epoch": 0.560418607674474, "grad_norm": 1.4105145931243896, "learning_rate": 8.113270469716993e-06, "loss": 0.1912, "step": 25865 }, { "epoch": 0.5605269429939549, "grad_norm": 1.9113036394119263, "learning_rate": 8.10992825073749e-06, "loss": 0.1467, "step": 25870 }, { "epoch": 0.5606352783134357, "grad_norm": 1.4496337175369263, "learning_rate": 8.106586250694486e-06, "loss": 0.1286, "step": 25875 }, { "epoch": 0.5607436136329166, "grad_norm": 1.1118240356445312, "learning_rate": 8.1032444699751e-06, "loss": 0.1779, "step": 25880 }, { "epoch": 0.5608519489523974, "grad_norm": 1.1862027645111084, "learning_rate": 8.09990290896643e-06, "loss": 0.1408, "step": 25885 }, { "epoch": 0.5609602842718783, "grad_norm": 1.754892110824585, "learning_rate": 8.096561568055543e-06, "loss": 0.1435, "step": 25890 }, { "epoch": 0.5610686195913592, "grad_norm": 1.746908187866211, "learning_rate": 8.093220447629484e-06, "loss": 0.1338, "step": 25895 }, { "epoch": 0.5611769549108401, "grad_norm": 2.169235944747925, "learning_rate": 8.089879548075275e-06, "loss": 0.239, "step": 25900 }, { "epoch": 0.5612852902303209, "grad_norm": 1.35885751247406, "learning_rate": 8.086538869779905e-06, "loss": 0.1585, "step": 25905 }, { "epoch": 0.5613936255498018, "grad_norm": 1.5800038576126099, "learning_rate": 8.083198413130344e-06, "loss": 0.2222, "step": 25910 }, { "epoch": 0.5615019608692826, "grad_norm": 2.133129835128784, "learning_rate": 8.079858178513534e-06, "loss": 0.2188, "step": 25915 }, { "epoch": 0.5616102961887635, "grad_norm": 1.3711243867874146, "learning_rate": 8.07651816631639e-06, "loss": 0.2518, "step": 25920 }, { "epoch": 0.5617186315082443, "grad_norm": 1.2870861291885376, "learning_rate": 8.073178376925801e-06, "loss": 0.1395, "step": 25925 }, { "epoch": 0.5618269668277251, "grad_norm": 1.6686906814575195, "learning_rate": 8.069838810728637e-06, "loss": 0.1696, "step": 25930 }, { "epoch": 0.561935302147206, "grad_norm": 2.2310025691986084, "learning_rate": 8.066499468111729e-06, "loss": 0.1968, "step": 25935 }, { "epoch": 0.5620436374666868, "grad_norm": 1.2657655477523804, "learning_rate": 8.063160349461897e-06, "loss": 0.1311, "step": 25940 }, { "epoch": 0.5621519727861678, "grad_norm": 1.2073519229888916, "learning_rate": 8.05982145516592e-06, "loss": 0.1413, "step": 25945 }, { "epoch": 0.5622603081056486, "grad_norm": 1.8587785959243774, "learning_rate": 8.056482785610567e-06, "loss": 0.1463, "step": 25950 }, { "epoch": 0.5623686434251295, "grad_norm": 1.0857759714126587, "learning_rate": 8.053144341182568e-06, "loss": 0.2025, "step": 25955 }, { "epoch": 0.5624769787446103, "grad_norm": 1.7626972198486328, "learning_rate": 8.049806122268635e-06, "loss": 0.1589, "step": 25960 }, { "epoch": 0.5625853140640912, "grad_norm": 1.6879464387893677, "learning_rate": 8.046468129255448e-06, "loss": 0.2139, "step": 25965 }, { "epoch": 0.562693649383572, "grad_norm": 1.7376466989517212, "learning_rate": 8.043130362529664e-06, "loss": 0.2256, "step": 25970 }, { "epoch": 0.5628019847030529, "grad_norm": 1.8908089399337769, "learning_rate": 8.039792822477916e-06, "loss": 0.2181, "step": 25975 }, { "epoch": 0.5629103200225337, "grad_norm": 1.678216814994812, "learning_rate": 8.036455509486805e-06, "loss": 0.1513, "step": 25980 }, { "epoch": 0.5630186553420146, "grad_norm": 2.122131824493408, "learning_rate": 8.033118423942908e-06, "loss": 0.1824, "step": 25985 }, { "epoch": 0.5631269906614954, "grad_norm": 1.999430775642395, "learning_rate": 8.029781566232777e-06, "loss": 0.2265, "step": 25990 }, { "epoch": 0.5632353259809764, "grad_norm": 1.7248677015304565, "learning_rate": 8.026444936742934e-06, "loss": 0.1058, "step": 25995 }, { "epoch": 0.5633436613004572, "grad_norm": 1.9964207410812378, "learning_rate": 8.023108535859885e-06, "loss": 0.2472, "step": 26000 }, { "epoch": 0.5634519966199381, "grad_norm": 2.921823501586914, "learning_rate": 8.019772363970099e-06, "loss": 0.1727, "step": 26005 }, { "epoch": 0.5635603319394189, "grad_norm": 1.4732064008712769, "learning_rate": 8.016436421460023e-06, "loss": 0.166, "step": 26010 }, { "epoch": 0.5636686672588997, "grad_norm": 1.1780295372009277, "learning_rate": 8.01310070871607e-06, "loss": 0.1666, "step": 26015 }, { "epoch": 0.5637770025783806, "grad_norm": 1.3045467138290405, "learning_rate": 8.009765226124639e-06, "loss": 0.1769, "step": 26020 }, { "epoch": 0.5638853378978614, "grad_norm": 1.838767170906067, "learning_rate": 8.006429974072092e-06, "loss": 0.1787, "step": 26025 }, { "epoch": 0.5639936732173423, "grad_norm": 1.0395989418029785, "learning_rate": 8.003094952944768e-06, "loss": 0.1493, "step": 26030 }, { "epoch": 0.5641020085368231, "grad_norm": 1.859514594078064, "learning_rate": 7.99976016312898e-06, "loss": 0.142, "step": 26035 }, { "epoch": 0.5642103438563041, "grad_norm": 1.7054904699325562, "learning_rate": 7.99642560501101e-06, "loss": 0.2061, "step": 26040 }, { "epoch": 0.5643186791757849, "grad_norm": 2.0357778072357178, "learning_rate": 7.993091278977125e-06, "loss": 0.2243, "step": 26045 }, { "epoch": 0.5644270144952658, "grad_norm": 1.3569095134735107, "learning_rate": 7.989757185413552e-06, "loss": 0.1481, "step": 26050 }, { "epoch": 0.5645353498147466, "grad_norm": 1.7465448379516602, "learning_rate": 7.986423324706494e-06, "loss": 0.1987, "step": 26055 }, { "epoch": 0.5646436851342275, "grad_norm": 2.5412261486053467, "learning_rate": 7.98308969724213e-06, "loss": 0.2706, "step": 26060 }, { "epoch": 0.5647520204537083, "grad_norm": 1.9434272050857544, "learning_rate": 7.979756303406613e-06, "loss": 0.2308, "step": 26065 }, { "epoch": 0.5648603557731892, "grad_norm": 2.2102725505828857, "learning_rate": 7.976423143586064e-06, "loss": 0.1762, "step": 26070 }, { "epoch": 0.56496869109267, "grad_norm": 1.6951112747192383, "learning_rate": 7.97309021816658e-06, "loss": 0.1397, "step": 26075 }, { "epoch": 0.5650770264121509, "grad_norm": 1.1929908990859985, "learning_rate": 7.969757527534232e-06, "loss": 0.1547, "step": 26080 }, { "epoch": 0.5651853617316317, "grad_norm": 1.635800838470459, "learning_rate": 7.96642507207506e-06, "loss": 0.2174, "step": 26085 }, { "epoch": 0.5652936970511127, "grad_norm": 1.825469970703125, "learning_rate": 7.96309285217508e-06, "loss": 0.1669, "step": 26090 }, { "epoch": 0.5654020323705935, "grad_norm": 1.7387919425964355, "learning_rate": 7.959760868220284e-06, "loss": 0.1685, "step": 26095 }, { "epoch": 0.5655103676900743, "grad_norm": 1.5635906457901, "learning_rate": 7.956429120596626e-06, "loss": 0.2389, "step": 26100 }, { "epoch": 0.5656187030095552, "grad_norm": 1.0025662183761597, "learning_rate": 7.953097609690043e-06, "loss": 0.1648, "step": 26105 }, { "epoch": 0.565727038329036, "grad_norm": 1.0594804286956787, "learning_rate": 7.949766335886438e-06, "loss": 0.2056, "step": 26110 }, { "epoch": 0.5658353736485169, "grad_norm": 1.3278945684432983, "learning_rate": 7.94643529957169e-06, "loss": 0.1249, "step": 26115 }, { "epoch": 0.5659437089679977, "grad_norm": 0.8274676203727722, "learning_rate": 7.943104501131652e-06, "loss": 0.2374, "step": 26120 }, { "epoch": 0.5660520442874786, "grad_norm": 2.7489871978759766, "learning_rate": 7.939773940952144e-06, "loss": 0.275, "step": 26125 }, { "epoch": 0.5661603796069594, "grad_norm": 1.443816900253296, "learning_rate": 7.936443619418964e-06, "loss": 0.1649, "step": 26130 }, { "epoch": 0.5662687149264403, "grad_norm": 1.6377551555633545, "learning_rate": 7.933113536917877e-06, "loss": 0.1779, "step": 26135 }, { "epoch": 0.5663770502459212, "grad_norm": 1.4699760675430298, "learning_rate": 7.929783693834625e-06, "loss": 0.152, "step": 26140 }, { "epoch": 0.5664853855654021, "grad_norm": 1.7111057043075562, "learning_rate": 7.92645409055492e-06, "loss": 0.2454, "step": 26145 }, { "epoch": 0.5665937208848829, "grad_norm": 1.884020447731018, "learning_rate": 7.923124727464448e-06, "loss": 0.2239, "step": 26150 }, { "epoch": 0.5667020562043638, "grad_norm": 0.8372263312339783, "learning_rate": 7.919795604948864e-06, "loss": 0.1338, "step": 26155 }, { "epoch": 0.5668103915238446, "grad_norm": 2.0349175930023193, "learning_rate": 7.9164667233938e-06, "loss": 0.1779, "step": 26160 }, { "epoch": 0.5669187268433254, "grad_norm": 1.4601576328277588, "learning_rate": 7.913138083184852e-06, "loss": 0.1686, "step": 26165 }, { "epoch": 0.5670270621628063, "grad_norm": 2.328498125076294, "learning_rate": 7.909809684707597e-06, "loss": 0.1707, "step": 26170 }, { "epoch": 0.5671353974822871, "grad_norm": 1.551239252090454, "learning_rate": 7.906481528347578e-06, "loss": 0.2001, "step": 26175 }, { "epoch": 0.567243732801768, "grad_norm": 1.6416668891906738, "learning_rate": 7.903153614490317e-06, "loss": 0.232, "step": 26180 }, { "epoch": 0.5673520681212489, "grad_norm": 1.8718008995056152, "learning_rate": 7.899825943521291e-06, "loss": 0.2135, "step": 26185 }, { "epoch": 0.5674604034407298, "grad_norm": 1.9423664808273315, "learning_rate": 7.896498515825974e-06, "loss": 0.207, "step": 26190 }, { "epoch": 0.5675687387602106, "grad_norm": 1.7571260929107666, "learning_rate": 7.893171331789793e-06, "loss": 0.1944, "step": 26195 }, { "epoch": 0.5676770740796915, "grad_norm": 1.707960605621338, "learning_rate": 7.889844391798153e-06, "loss": 0.1152, "step": 26200 }, { "epoch": 0.5677854093991723, "grad_norm": 1.029252052307129, "learning_rate": 7.88651769623643e-06, "loss": 0.1481, "step": 26205 }, { "epoch": 0.5678937447186532, "grad_norm": 1.1164807081222534, "learning_rate": 7.883191245489973e-06, "loss": 0.1347, "step": 26210 }, { "epoch": 0.568002080038134, "grad_norm": 0.9416565895080566, "learning_rate": 7.879865039944102e-06, "loss": 0.0992, "step": 26215 }, { "epoch": 0.5681104153576149, "grad_norm": 2.5451769828796387, "learning_rate": 7.876539079984103e-06, "loss": 0.1688, "step": 26220 }, { "epoch": 0.5682187506770957, "grad_norm": 1.7951480150222778, "learning_rate": 7.873213365995244e-06, "loss": 0.2629, "step": 26225 }, { "epoch": 0.5683270859965766, "grad_norm": 1.592702031135559, "learning_rate": 7.869887898362756e-06, "loss": 0.2117, "step": 26230 }, { "epoch": 0.5684354213160575, "grad_norm": 2.0264127254486084, "learning_rate": 7.866562677471842e-06, "loss": 0.1182, "step": 26235 }, { "epoch": 0.5685437566355384, "grad_norm": 0.8016126751899719, "learning_rate": 7.863237703707687e-06, "loss": 0.2012, "step": 26240 }, { "epoch": 0.5686520919550192, "grad_norm": 1.032522439956665, "learning_rate": 7.859912977455437e-06, "loss": 0.1962, "step": 26245 }, { "epoch": 0.5687604272745, "grad_norm": 1.7294906377792358, "learning_rate": 7.85658849910021e-06, "loss": 0.212, "step": 26250 }, { "epoch": 0.5688687625939809, "grad_norm": 1.2984663248062134, "learning_rate": 7.853264269027096e-06, "loss": 0.1737, "step": 26255 }, { "epoch": 0.5689770979134617, "grad_norm": 1.4630398750305176, "learning_rate": 7.849940287621159e-06, "loss": 0.1116, "step": 26260 }, { "epoch": 0.5690854332329426, "grad_norm": 1.6285696029663086, "learning_rate": 7.84661655526743e-06, "loss": 0.1196, "step": 26265 }, { "epoch": 0.5691937685524234, "grad_norm": 1.8202883005142212, "learning_rate": 7.843293072350916e-06, "loss": 0.1317, "step": 26270 }, { "epoch": 0.5693021038719043, "grad_norm": 1.0398439168930054, "learning_rate": 7.839969839256593e-06, "loss": 0.1528, "step": 26275 }, { "epoch": 0.5694104391913851, "grad_norm": 1.472583532333374, "learning_rate": 7.836646856369405e-06, "loss": 0.1878, "step": 26280 }, { "epoch": 0.5695187745108661, "grad_norm": 1.8094689846038818, "learning_rate": 7.833324124074268e-06, "loss": 0.2226, "step": 26285 }, { "epoch": 0.5696271098303469, "grad_norm": 1.538610816001892, "learning_rate": 7.830001642756082e-06, "loss": 0.1317, "step": 26290 }, { "epoch": 0.5697354451498278, "grad_norm": 1.1882168054580688, "learning_rate": 7.826679412799698e-06, "loss": 0.1826, "step": 26295 }, { "epoch": 0.5698437804693086, "grad_norm": 1.2899988889694214, "learning_rate": 7.823357434589945e-06, "loss": 0.1517, "step": 26300 }, { "epoch": 0.5699521157887895, "grad_norm": 1.4838553667068481, "learning_rate": 7.820035708511629e-06, "loss": 0.1971, "step": 26305 }, { "epoch": 0.5700604511082703, "grad_norm": 1.718443512916565, "learning_rate": 7.81671423494952e-06, "loss": 0.1415, "step": 26310 }, { "epoch": 0.5701687864277512, "grad_norm": 0.9498315453529358, "learning_rate": 7.81339301428836e-06, "loss": 0.1802, "step": 26315 }, { "epoch": 0.570277121747232, "grad_norm": 1.3560914993286133, "learning_rate": 7.810072046912864e-06, "loss": 0.1537, "step": 26320 }, { "epoch": 0.5703854570667128, "grad_norm": 0.7360680103302002, "learning_rate": 7.806751333207719e-06, "loss": 0.1407, "step": 26325 }, { "epoch": 0.5704937923861938, "grad_norm": 1.5911656618118286, "learning_rate": 7.803430873557571e-06, "loss": 0.2013, "step": 26330 }, { "epoch": 0.5706021277056746, "grad_norm": 2.1311137676239014, "learning_rate": 7.800110668347057e-06, "loss": 0.222, "step": 26335 }, { "epoch": 0.5707104630251555, "grad_norm": 3.0681514739990234, "learning_rate": 7.796790717960766e-06, "loss": 0.1974, "step": 26340 }, { "epoch": 0.5708187983446363, "grad_norm": 1.9856133460998535, "learning_rate": 7.793471022783267e-06, "loss": 0.1837, "step": 26345 }, { "epoch": 0.5709271336641172, "grad_norm": 1.346368670463562, "learning_rate": 7.790151583199096e-06, "loss": 0.201, "step": 26350 }, { "epoch": 0.571035468983598, "grad_norm": 1.5523566007614136, "learning_rate": 7.78683239959276e-06, "loss": 0.1323, "step": 26355 }, { "epoch": 0.5711438043030789, "grad_norm": 1.4727461338043213, "learning_rate": 7.783513472348738e-06, "loss": 0.1913, "step": 26360 }, { "epoch": 0.5712521396225597, "grad_norm": 1.3348439931869507, "learning_rate": 7.780194801851477e-06, "loss": 0.1474, "step": 26365 }, { "epoch": 0.5713604749420406, "grad_norm": 1.7813177108764648, "learning_rate": 7.776876388485398e-06, "loss": 0.1398, "step": 26370 }, { "epoch": 0.5714688102615214, "grad_norm": 1.6775329113006592, "learning_rate": 7.773558232634883e-06, "loss": 0.1682, "step": 26375 }, { "epoch": 0.5715771455810024, "grad_norm": 2.027463436126709, "learning_rate": 7.770240334684293e-06, "loss": 0.2161, "step": 26380 }, { "epoch": 0.5716854809004832, "grad_norm": 1.7468339204788208, "learning_rate": 7.766922695017964e-06, "loss": 0.137, "step": 26385 }, { "epoch": 0.5717938162199641, "grad_norm": 2.1834564208984375, "learning_rate": 7.763605314020186e-06, "loss": 0.1911, "step": 26390 }, { "epoch": 0.5719021515394449, "grad_norm": 2.061643362045288, "learning_rate": 7.760288192075232e-06, "loss": 0.1957, "step": 26395 }, { "epoch": 0.5720104868589257, "grad_norm": 1.895895004272461, "learning_rate": 7.75697132956734e-06, "loss": 0.1504, "step": 26400 }, { "epoch": 0.5721188221784066, "grad_norm": 1.938424825668335, "learning_rate": 7.75365472688072e-06, "loss": 0.1906, "step": 26405 }, { "epoch": 0.5722271574978874, "grad_norm": 1.3713209629058838, "learning_rate": 7.750338384399548e-06, "loss": 0.3088, "step": 26410 }, { "epoch": 0.5723354928173683, "grad_norm": 2.2938859462738037, "learning_rate": 7.747022302507975e-06, "loss": 0.1788, "step": 26415 }, { "epoch": 0.5724438281368491, "grad_norm": 1.470499873161316, "learning_rate": 7.743706481590121e-06, "loss": 0.1945, "step": 26420 }, { "epoch": 0.5725521634563301, "grad_norm": 1.8641506433486938, "learning_rate": 7.740390922030065e-06, "loss": 0.1888, "step": 26425 }, { "epoch": 0.5726604987758109, "grad_norm": 1.5901578664779663, "learning_rate": 7.737075624211876e-06, "loss": 0.2125, "step": 26430 }, { "epoch": 0.5727688340952918, "grad_norm": 2.7553658485412598, "learning_rate": 7.733760588519579e-06, "loss": 0.3316, "step": 26435 }, { "epoch": 0.5728771694147726, "grad_norm": 0.8701195120811462, "learning_rate": 7.730445815337167e-06, "loss": 0.1459, "step": 26440 }, { "epoch": 0.5729855047342535, "grad_norm": 1.3197520971298218, "learning_rate": 7.727131305048612e-06, "loss": 0.1389, "step": 26445 }, { "epoch": 0.5730938400537343, "grad_norm": 2.0189764499664307, "learning_rate": 7.723817058037846e-06, "loss": 0.2235, "step": 26450 }, { "epoch": 0.5732021753732152, "grad_norm": 1.1638542413711548, "learning_rate": 7.720503074688777e-06, "loss": 0.1648, "step": 26455 }, { "epoch": 0.573310510692696, "grad_norm": 1.7229969501495361, "learning_rate": 7.717189355385281e-06, "loss": 0.2413, "step": 26460 }, { "epoch": 0.5734188460121769, "grad_norm": 1.3670318126678467, "learning_rate": 7.713875900511202e-06, "loss": 0.2586, "step": 26465 }, { "epoch": 0.5735271813316577, "grad_norm": 1.1333850622177124, "learning_rate": 7.710562710450351e-06, "loss": 0.1617, "step": 26470 }, { "epoch": 0.5736355166511387, "grad_norm": 1.8341395854949951, "learning_rate": 7.707249785586511e-06, "loss": 0.1808, "step": 26475 }, { "epoch": 0.5737438519706195, "grad_norm": 1.8208259344100952, "learning_rate": 7.703937126303443e-06, "loss": 0.211, "step": 26480 }, { "epoch": 0.5738521872901003, "grad_norm": 1.6758105754852295, "learning_rate": 7.700624732984863e-06, "loss": 0.1239, "step": 26485 }, { "epoch": 0.5739605226095812, "grad_norm": 2.1555750370025635, "learning_rate": 7.697312606014465e-06, "loss": 0.2646, "step": 26490 }, { "epoch": 0.574068857929062, "grad_norm": 1.9037531614303589, "learning_rate": 7.694000745775908e-06, "loss": 0.1962, "step": 26495 }, { "epoch": 0.5741771932485429, "grad_norm": 1.024685025215149, "learning_rate": 7.690689152652817e-06, "loss": 0.1941, "step": 26500 }, { "epoch": 0.5742855285680237, "grad_norm": 1.0995429754257202, "learning_rate": 7.687377827028796e-06, "loss": 0.1803, "step": 26505 }, { "epoch": 0.5743938638875046, "grad_norm": 1.4697977304458618, "learning_rate": 7.68406676928741e-06, "loss": 0.3421, "step": 26510 }, { "epoch": 0.5745021992069854, "grad_norm": 1.6857185363769531, "learning_rate": 7.680755979812197e-06, "loss": 0.1435, "step": 26515 }, { "epoch": 0.5746105345264663, "grad_norm": 1.9640929698944092, "learning_rate": 7.677445458986661e-06, "loss": 0.1746, "step": 26520 }, { "epoch": 0.5747188698459472, "grad_norm": 1.8955482244491577, "learning_rate": 7.674135207194272e-06, "loss": 0.1416, "step": 26525 }, { "epoch": 0.5748272051654281, "grad_norm": 1.742129921913147, "learning_rate": 7.670825224818485e-06, "loss": 0.1511, "step": 26530 }, { "epoch": 0.5749355404849089, "grad_norm": 2.1391663551330566, "learning_rate": 7.6675155122427e-06, "loss": 0.1854, "step": 26535 }, { "epoch": 0.5750438758043898, "grad_norm": 2.020402669906616, "learning_rate": 7.664206069850306e-06, "loss": 0.2593, "step": 26540 }, { "epoch": 0.5751522111238706, "grad_norm": 1.962774634361267, "learning_rate": 7.660896898024646e-06, "loss": 0.1619, "step": 26545 }, { "epoch": 0.5752605464433515, "grad_norm": 1.2577464580535889, "learning_rate": 7.657587997149043e-06, "loss": 0.1907, "step": 26550 }, { "epoch": 0.5753688817628323, "grad_norm": 1.6325008869171143, "learning_rate": 7.65427936760678e-06, "loss": 0.2164, "step": 26555 }, { "epoch": 0.5754772170823131, "grad_norm": 1.51250422000885, "learning_rate": 7.650971009781112e-06, "loss": 0.178, "step": 26560 }, { "epoch": 0.575585552401794, "grad_norm": 2.014726400375366, "learning_rate": 7.647662924055267e-06, "loss": 0.1198, "step": 26565 }, { "epoch": 0.5756938877212749, "grad_norm": 0.785642683506012, "learning_rate": 7.644355110812429e-06, "loss": 0.1527, "step": 26570 }, { "epoch": 0.5758022230407558, "grad_norm": 2.1532845497131348, "learning_rate": 7.641047570435767e-06, "loss": 0.2476, "step": 26575 }, { "epoch": 0.5759105583602366, "grad_norm": 1.7753483057022095, "learning_rate": 7.637740303308408e-06, "loss": 0.1781, "step": 26580 }, { "epoch": 0.5760188936797175, "grad_norm": 1.3010139465332031, "learning_rate": 7.634433309813447e-06, "loss": 0.1655, "step": 26585 }, { "epoch": 0.5761272289991983, "grad_norm": 1.5315701961517334, "learning_rate": 7.631126590333951e-06, "loss": 0.2871, "step": 26590 }, { "epoch": 0.5762355643186792, "grad_norm": 1.1088346242904663, "learning_rate": 7.627820145252954e-06, "loss": 0.125, "step": 26595 }, { "epoch": 0.57634389963816, "grad_norm": 2.8985588550567627, "learning_rate": 7.624513974953458e-06, "loss": 0.2586, "step": 26600 }, { "epoch": 0.5764522349576409, "grad_norm": 1.8180882930755615, "learning_rate": 7.621208079818433e-06, "loss": 0.1692, "step": 26605 }, { "epoch": 0.5765605702771217, "grad_norm": 2.056199789047241, "learning_rate": 7.6179024602308186e-06, "loss": 0.192, "step": 26610 }, { "epoch": 0.5766689055966026, "grad_norm": 1.7750517129898071, "learning_rate": 7.614597116573519e-06, "loss": 0.2877, "step": 26615 }, { "epoch": 0.5767772409160835, "grad_norm": 1.7179920673370361, "learning_rate": 7.6112920492294084e-06, "loss": 0.2289, "step": 26620 }, { "epoch": 0.5768855762355644, "grad_norm": 1.8899842500686646, "learning_rate": 7.607987258581333e-06, "loss": 0.1762, "step": 26625 }, { "epoch": 0.5769939115550452, "grad_norm": 1.5639748573303223, "learning_rate": 7.604682745012102e-06, "loss": 0.2031, "step": 26630 }, { "epoch": 0.577102246874526, "grad_norm": 1.9352879524230957, "learning_rate": 7.601378508904493e-06, "loss": 0.251, "step": 26635 }, { "epoch": 0.5772105821940069, "grad_norm": 1.916297435760498, "learning_rate": 7.598074550641253e-06, "loss": 0.2405, "step": 26640 }, { "epoch": 0.5773189175134877, "grad_norm": 2.1164562702178955, "learning_rate": 7.594770870605094e-06, "loss": 0.2039, "step": 26645 }, { "epoch": 0.5774272528329686, "grad_norm": 1.01029372215271, "learning_rate": 7.591467469178701e-06, "loss": 0.1389, "step": 26650 }, { "epoch": 0.5775355881524494, "grad_norm": 1.5734180212020874, "learning_rate": 7.5881643467447195e-06, "loss": 0.1815, "step": 26655 }, { "epoch": 0.5776439234719303, "grad_norm": 1.7965102195739746, "learning_rate": 7.584861503685771e-06, "loss": 0.2082, "step": 26660 }, { "epoch": 0.5777522587914111, "grad_norm": 1.7569475173950195, "learning_rate": 7.58155894038444e-06, "loss": 0.1937, "step": 26665 }, { "epoch": 0.5778605941108921, "grad_norm": 2.1619837284088135, "learning_rate": 7.578256657223272e-06, "loss": 0.1613, "step": 26670 }, { "epoch": 0.5779689294303729, "grad_norm": 1.605828046798706, "learning_rate": 7.574954654584796e-06, "loss": 0.177, "step": 26675 }, { "epoch": 0.5780772647498538, "grad_norm": 1.0213898420333862, "learning_rate": 7.571652932851497e-06, "loss": 0.2148, "step": 26680 }, { "epoch": 0.5781856000693346, "grad_norm": 1.7093428373336792, "learning_rate": 7.568351492405828e-06, "loss": 0.2021, "step": 26685 }, { "epoch": 0.5782939353888155, "grad_norm": 1.8585116863250732, "learning_rate": 7.565050333630212e-06, "loss": 0.076, "step": 26690 }, { "epoch": 0.5784022707082963, "grad_norm": 1.810219407081604, "learning_rate": 7.561749456907041e-06, "loss": 0.2152, "step": 26695 }, { "epoch": 0.5785106060277772, "grad_norm": 2.261483669281006, "learning_rate": 7.55844886261867e-06, "loss": 0.109, "step": 26700 }, { "epoch": 0.578618941347258, "grad_norm": 2.371628761291504, "learning_rate": 7.555148551147424e-06, "loss": 0.1824, "step": 26705 }, { "epoch": 0.5787272766667388, "grad_norm": 1.7021939754486084, "learning_rate": 7.551848522875595e-06, "loss": 0.1944, "step": 26710 }, { "epoch": 0.5788356119862198, "grad_norm": 1.2312700748443604, "learning_rate": 7.548548778185435e-06, "loss": 0.1839, "step": 26715 }, { "epoch": 0.5789439473057006, "grad_norm": 1.9211665391921997, "learning_rate": 7.545249317459181e-06, "loss": 0.1601, "step": 26720 }, { "epoch": 0.5790522826251815, "grad_norm": 2.1215617656707764, "learning_rate": 7.541950141079022e-06, "loss": 0.1896, "step": 26725 }, { "epoch": 0.5791606179446623, "grad_norm": 1.2997937202453613, "learning_rate": 7.538651249427118e-06, "loss": 0.1941, "step": 26730 }, { "epoch": 0.5792689532641432, "grad_norm": 1.3825589418411255, "learning_rate": 7.535352642885597e-06, "loss": 0.1389, "step": 26735 }, { "epoch": 0.579377288583624, "grad_norm": 1.6223164796829224, "learning_rate": 7.53205432183655e-06, "loss": 0.1519, "step": 26740 }, { "epoch": 0.5794856239031049, "grad_norm": 0.9532009363174438, "learning_rate": 7.52875628666204e-06, "loss": 0.1741, "step": 26745 }, { "epoch": 0.5795939592225857, "grad_norm": 1.0555641651153564, "learning_rate": 7.525458537744096e-06, "loss": 0.1513, "step": 26750 }, { "epoch": 0.5797022945420666, "grad_norm": 0.7412003874778748, "learning_rate": 7.52216107546471e-06, "loss": 0.1544, "step": 26755 }, { "epoch": 0.5798106298615474, "grad_norm": 2.122117757797241, "learning_rate": 7.518863900205846e-06, "loss": 0.1956, "step": 26760 }, { "epoch": 0.5799189651810284, "grad_norm": 1.1964632272720337, "learning_rate": 7.515567012349428e-06, "loss": 0.1592, "step": 26765 }, { "epoch": 0.5800273005005092, "grad_norm": 2.0075652599334717, "learning_rate": 7.512270412277362e-06, "loss": 0.1327, "step": 26770 }, { "epoch": 0.5801356358199901, "grad_norm": 1.7832655906677246, "learning_rate": 7.5089741003714986e-06, "loss": 0.139, "step": 26775 }, { "epoch": 0.5802439711394709, "grad_norm": 1.199450135231018, "learning_rate": 7.50567807701367e-06, "loss": 0.1934, "step": 26780 }, { "epoch": 0.5803523064589517, "grad_norm": 2.1379446983337402, "learning_rate": 7.502382342585673e-06, "loss": 0.2526, "step": 26785 }, { "epoch": 0.5804606417784326, "grad_norm": 1.4826637506484985, "learning_rate": 7.4990868974692665e-06, "loss": 0.243, "step": 26790 }, { "epoch": 0.5805689770979134, "grad_norm": 1.5324031114578247, "learning_rate": 7.495791742046179e-06, "loss": 0.225, "step": 26795 }, { "epoch": 0.5806773124173943, "grad_norm": 1.0256010293960571, "learning_rate": 7.492496876698105e-06, "loss": 0.0917, "step": 26800 }, { "epoch": 0.5807856477368751, "grad_norm": 0.8803384304046631, "learning_rate": 7.4892023018067066e-06, "loss": 0.161, "step": 26805 }, { "epoch": 0.580893983056356, "grad_norm": 1.2065273523330688, "learning_rate": 7.485908017753609e-06, "loss": 0.1829, "step": 26810 }, { "epoch": 0.5810023183758369, "grad_norm": 1.329565405845642, "learning_rate": 7.482614024920404e-06, "loss": 0.166, "step": 26815 }, { "epoch": 0.5811106536953178, "grad_norm": 1.236606240272522, "learning_rate": 7.479320323688655e-06, "loss": 0.1208, "step": 26820 }, { "epoch": 0.5812189890147986, "grad_norm": 1.7381750345230103, "learning_rate": 7.476026914439889e-06, "loss": 0.1569, "step": 26825 }, { "epoch": 0.5813273243342795, "grad_norm": 2.167203426361084, "learning_rate": 7.4727337975555945e-06, "loss": 0.2202, "step": 26830 }, { "epoch": 0.5814356596537603, "grad_norm": 1.794054388999939, "learning_rate": 7.469440973417231e-06, "loss": 0.1649, "step": 26835 }, { "epoch": 0.5815439949732412, "grad_norm": 1.8985021114349365, "learning_rate": 7.466148442406224e-06, "loss": 0.175, "step": 26840 }, { "epoch": 0.581652330292722, "grad_norm": 1.4427400827407837, "learning_rate": 7.462856204903961e-06, "loss": 0.1948, "step": 26845 }, { "epoch": 0.5817606656122029, "grad_norm": 1.2531441450119019, "learning_rate": 7.4595642612918015e-06, "loss": 0.1932, "step": 26850 }, { "epoch": 0.5818690009316837, "grad_norm": 1.5182768106460571, "learning_rate": 7.456272611951067e-06, "loss": 0.2322, "step": 26855 }, { "epoch": 0.5819773362511647, "grad_norm": 1.636420488357544, "learning_rate": 7.452981257263043e-06, "loss": 0.195, "step": 26860 }, { "epoch": 0.5820856715706455, "grad_norm": 1.5709550380706787, "learning_rate": 7.449690197608988e-06, "loss": 0.1754, "step": 26865 }, { "epoch": 0.5821940068901263, "grad_norm": 1.4344770908355713, "learning_rate": 7.44639943337012e-06, "loss": 0.1002, "step": 26870 }, { "epoch": 0.5823023422096072, "grad_norm": 1.775500774383545, "learning_rate": 7.443108964927624e-06, "loss": 0.1876, "step": 26875 }, { "epoch": 0.582410677529088, "grad_norm": 1.091137409210205, "learning_rate": 7.4398187926626515e-06, "loss": 0.1807, "step": 26880 }, { "epoch": 0.5825190128485689, "grad_norm": 1.411527156829834, "learning_rate": 7.436528916956322e-06, "loss": 0.1898, "step": 26885 }, { "epoch": 0.5826273481680497, "grad_norm": 1.8061552047729492, "learning_rate": 7.433239338189717e-06, "loss": 0.1597, "step": 26890 }, { "epoch": 0.5827356834875306, "grad_norm": 1.5870481729507446, "learning_rate": 7.429950056743884e-06, "loss": 0.2794, "step": 26895 }, { "epoch": 0.5828440188070114, "grad_norm": 1.8715424537658691, "learning_rate": 7.426661072999837e-06, "loss": 0.2289, "step": 26900 }, { "epoch": 0.5829523541264923, "grad_norm": 1.4375652074813843, "learning_rate": 7.423372387338559e-06, "loss": 0.1532, "step": 26905 }, { "epoch": 0.5830606894459732, "grad_norm": 2.347280263900757, "learning_rate": 7.420084000140986e-06, "loss": 0.2486, "step": 26910 }, { "epoch": 0.5831690247654541, "grad_norm": 1.2633427381515503, "learning_rate": 7.416795911788037e-06, "loss": 0.2319, "step": 26915 }, { "epoch": 0.5832773600849349, "grad_norm": 1.1679154634475708, "learning_rate": 7.4135081226605865e-06, "loss": 0.1292, "step": 26920 }, { "epoch": 0.5833856954044158, "grad_norm": 1.2538299560546875, "learning_rate": 7.4102206331394746e-06, "loss": 0.1483, "step": 26925 }, { "epoch": 0.5834940307238966, "grad_norm": 0.7507836818695068, "learning_rate": 7.406933443605506e-06, "loss": 0.1874, "step": 26930 }, { "epoch": 0.5836023660433775, "grad_norm": 1.461763858795166, "learning_rate": 7.403646554439456e-06, "loss": 0.1919, "step": 26935 }, { "epoch": 0.5837107013628583, "grad_norm": 0.9728952050209045, "learning_rate": 7.400359966022057e-06, "loss": 0.172, "step": 26940 }, { "epoch": 0.5838190366823391, "grad_norm": 1.1584700345993042, "learning_rate": 7.397073678734017e-06, "loss": 0.1801, "step": 26945 }, { "epoch": 0.58392737200182, "grad_norm": 0.6920061707496643, "learning_rate": 7.393787692955996e-06, "loss": 0.1937, "step": 26950 }, { "epoch": 0.584035707321301, "grad_norm": 1.0614185333251953, "learning_rate": 7.390502009068627e-06, "loss": 0.1283, "step": 26955 }, { "epoch": 0.5841440426407818, "grad_norm": 1.6700598001480103, "learning_rate": 7.387216627452513e-06, "loss": 0.1434, "step": 26960 }, { "epoch": 0.5842523779602626, "grad_norm": 2.030534029006958, "learning_rate": 7.383931548488215e-06, "loss": 0.2002, "step": 26965 }, { "epoch": 0.5843607132797435, "grad_norm": 1.1373047828674316, "learning_rate": 7.380646772556257e-06, "loss": 0.182, "step": 26970 }, { "epoch": 0.5844690485992243, "grad_norm": 1.5463895797729492, "learning_rate": 7.377362300037132e-06, "loss": 0.1765, "step": 26975 }, { "epoch": 0.5845773839187052, "grad_norm": 1.8768833875656128, "learning_rate": 7.3740781313113005e-06, "loss": 0.227, "step": 26980 }, { "epoch": 0.584685719238186, "grad_norm": 2.007871389389038, "learning_rate": 7.370794266759178e-06, "loss": 0.1969, "step": 26985 }, { "epoch": 0.5847940545576669, "grad_norm": 1.8180376291275024, "learning_rate": 7.367510706761156e-06, "loss": 0.1447, "step": 26990 }, { "epoch": 0.5849023898771477, "grad_norm": 1.5308915376663208, "learning_rate": 7.364227451697583e-06, "loss": 0.2036, "step": 26995 }, { "epoch": 0.5850107251966286, "grad_norm": 0.4846397936344147, "learning_rate": 7.360944501948776e-06, "loss": 0.1896, "step": 27000 }, { "epoch": 0.5851190605161095, "grad_norm": 1.721441626548767, "learning_rate": 7.357661857895011e-06, "loss": 0.2076, "step": 27005 }, { "epoch": 0.5852273958355904, "grad_norm": 2.074878454208374, "learning_rate": 7.3543795199165415e-06, "loss": 0.2181, "step": 27010 }, { "epoch": 0.5853357311550712, "grad_norm": 2.3924758434295654, "learning_rate": 7.351097488393575e-06, "loss": 0.1116, "step": 27015 }, { "epoch": 0.585444066474552, "grad_norm": 1.6706969738006592, "learning_rate": 7.347815763706283e-06, "loss": 0.1055, "step": 27020 }, { "epoch": 0.5855524017940329, "grad_norm": 1.4670956134796143, "learning_rate": 7.344534346234804e-06, "loss": 0.1333, "step": 27025 }, { "epoch": 0.5856607371135137, "grad_norm": 1.3248071670532227, "learning_rate": 7.3412532363592425e-06, "loss": 0.1997, "step": 27030 }, { "epoch": 0.5857690724329946, "grad_norm": 1.8168258666992188, "learning_rate": 7.337972434459666e-06, "loss": 0.1795, "step": 27035 }, { "epoch": 0.5858774077524754, "grad_norm": 1.4617801904678345, "learning_rate": 7.334691940916105e-06, "loss": 0.138, "step": 27040 }, { "epoch": 0.5859857430719563, "grad_norm": 1.3801815509796143, "learning_rate": 7.331411756108556e-06, "loss": 0.1667, "step": 27045 }, { "epoch": 0.5860940783914371, "grad_norm": 1.764763355255127, "learning_rate": 7.3281318804169806e-06, "loss": 0.1116, "step": 27050 }, { "epoch": 0.5862024137109181, "grad_norm": 1.4620511531829834, "learning_rate": 7.3248523142213005e-06, "loss": 0.2322, "step": 27055 }, { "epoch": 0.5863107490303989, "grad_norm": 1.4666874408721924, "learning_rate": 7.321573057901408e-06, "loss": 0.2119, "step": 27060 }, { "epoch": 0.5864190843498798, "grad_norm": 1.6688436269760132, "learning_rate": 7.318294111837152e-06, "loss": 0.1389, "step": 27065 }, { "epoch": 0.5865274196693606, "grad_norm": 0.9102423191070557, "learning_rate": 7.315015476408354e-06, "loss": 0.1581, "step": 27070 }, { "epoch": 0.5866357549888415, "grad_norm": 0.574555516242981, "learning_rate": 7.31173715199479e-06, "loss": 0.1706, "step": 27075 }, { "epoch": 0.5867440903083223, "grad_norm": 0.8114798665046692, "learning_rate": 7.308459138976207e-06, "loss": 0.1629, "step": 27080 }, { "epoch": 0.5868524256278032, "grad_norm": 1.7411818504333496, "learning_rate": 7.305181437732314e-06, "loss": 0.2025, "step": 27085 }, { "epoch": 0.586960760947284, "grad_norm": 2.0672056674957275, "learning_rate": 7.301904048642783e-06, "loss": 0.2043, "step": 27090 }, { "epoch": 0.5870690962667648, "grad_norm": 1.1214250326156616, "learning_rate": 7.29862697208725e-06, "loss": 0.1608, "step": 27095 }, { "epoch": 0.5871774315862458, "grad_norm": 1.4518711566925049, "learning_rate": 7.295350208445315e-06, "loss": 0.1551, "step": 27100 }, { "epoch": 0.5872857669057266, "grad_norm": 1.7915611267089844, "learning_rate": 7.292073758096543e-06, "loss": 0.2732, "step": 27105 }, { "epoch": 0.5873941022252075, "grad_norm": 2.063429594039917, "learning_rate": 7.288797621420462e-06, "loss": 0.2004, "step": 27110 }, { "epoch": 0.5875024375446883, "grad_norm": 1.7721549272537231, "learning_rate": 7.285521798796565e-06, "loss": 0.2098, "step": 27115 }, { "epoch": 0.5876107728641692, "grad_norm": 2.0220682621002197, "learning_rate": 7.282246290604302e-06, "loss": 0.1751, "step": 27120 }, { "epoch": 0.58771910818365, "grad_norm": 1.4801428318023682, "learning_rate": 7.278971097223094e-06, "loss": 0.2739, "step": 27125 }, { "epoch": 0.5878274435031309, "grad_norm": 1.0539315938949585, "learning_rate": 7.275696219032324e-06, "loss": 0.1545, "step": 27130 }, { "epoch": 0.5879357788226117, "grad_norm": 1.53062903881073, "learning_rate": 7.2724216564113385e-06, "loss": 0.1774, "step": 27135 }, { "epoch": 0.5880441141420926, "grad_norm": 1.8432034254074097, "learning_rate": 7.269147409739444e-06, "loss": 0.1384, "step": 27140 }, { "epoch": 0.5881524494615734, "grad_norm": 1.886759638786316, "learning_rate": 7.265873479395913e-06, "loss": 0.1978, "step": 27145 }, { "epoch": 0.5882607847810544, "grad_norm": 1.398430347442627, "learning_rate": 7.26259986575998e-06, "loss": 0.1865, "step": 27150 }, { "epoch": 0.5883691201005352, "grad_norm": 2.1379735469818115, "learning_rate": 7.25932656921085e-06, "loss": 0.1246, "step": 27155 }, { "epoch": 0.5884774554200161, "grad_norm": 2.6729989051818848, "learning_rate": 7.25605359012768e-06, "loss": 0.1955, "step": 27160 }, { "epoch": 0.5885857907394969, "grad_norm": 2.218341827392578, "learning_rate": 7.2527809288896e-06, "loss": 0.2142, "step": 27165 }, { "epoch": 0.5886941260589778, "grad_norm": 2.0122182369232178, "learning_rate": 7.249508585875693e-06, "loss": 0.1224, "step": 27170 }, { "epoch": 0.5888024613784586, "grad_norm": 0.9364925622940063, "learning_rate": 7.246236561465016e-06, "loss": 0.08, "step": 27175 }, { "epoch": 0.5889107966979394, "grad_norm": 1.1239275932312012, "learning_rate": 7.24296485603658e-06, "loss": 0.18, "step": 27180 }, { "epoch": 0.5890191320174203, "grad_norm": 1.4652955532073975, "learning_rate": 7.2396934699693685e-06, "loss": 0.1785, "step": 27185 }, { "epoch": 0.5891274673369011, "grad_norm": 1.6808807849884033, "learning_rate": 7.236422403642318e-06, "loss": 0.244, "step": 27190 }, { "epoch": 0.589235802656382, "grad_norm": 1.0052611827850342, "learning_rate": 7.233151657434332e-06, "loss": 0.1347, "step": 27195 }, { "epoch": 0.5893441379758629, "grad_norm": 2.0785086154937744, "learning_rate": 7.229881231724276e-06, "loss": 0.1877, "step": 27200 }, { "epoch": 0.5894524732953438, "grad_norm": 1.8550060987472534, "learning_rate": 7.226611126890986e-06, "loss": 0.1542, "step": 27205 }, { "epoch": 0.5895608086148246, "grad_norm": 1.5124210119247437, "learning_rate": 7.223341343313253e-06, "loss": 0.2605, "step": 27210 }, { "epoch": 0.5896691439343055, "grad_norm": 0.8902127742767334, "learning_rate": 7.220071881369829e-06, "loss": 0.1812, "step": 27215 }, { "epoch": 0.5897774792537863, "grad_norm": 1.5464279651641846, "learning_rate": 7.216802741439437e-06, "loss": 0.2395, "step": 27220 }, { "epoch": 0.5898858145732672, "grad_norm": 1.4502936601638794, "learning_rate": 7.213533923900754e-06, "loss": 0.2281, "step": 27225 }, { "epoch": 0.589994149892748, "grad_norm": 1.4737577438354492, "learning_rate": 7.2102654291324235e-06, "loss": 0.1727, "step": 27230 }, { "epoch": 0.5901024852122289, "grad_norm": 2.164522647857666, "learning_rate": 7.206997257513054e-06, "loss": 0.1684, "step": 27235 }, { "epoch": 0.5902108205317097, "grad_norm": 1.4534564018249512, "learning_rate": 7.203729409421213e-06, "loss": 0.1594, "step": 27240 }, { "epoch": 0.5903191558511907, "grad_norm": 1.7960011959075928, "learning_rate": 7.2004618852354265e-06, "loss": 0.2127, "step": 27245 }, { "epoch": 0.5904274911706715, "grad_norm": 1.839072346687317, "learning_rate": 7.197194685334199e-06, "loss": 0.1559, "step": 27250 }, { "epoch": 0.5905358264901523, "grad_norm": 1.352832555770874, "learning_rate": 7.193927810095982e-06, "loss": 0.2584, "step": 27255 }, { "epoch": 0.5906441618096332, "grad_norm": 2.247654676437378, "learning_rate": 7.190661259899192e-06, "loss": 0.1941, "step": 27260 }, { "epoch": 0.590752497129114, "grad_norm": 1.7974611520767212, "learning_rate": 7.187395035122211e-06, "loss": 0.1722, "step": 27265 }, { "epoch": 0.5908608324485949, "grad_norm": 1.6590211391448975, "learning_rate": 7.184129136143382e-06, "loss": 0.1585, "step": 27270 }, { "epoch": 0.5909691677680757, "grad_norm": 1.379737377166748, "learning_rate": 7.180863563341011e-06, "loss": 0.1868, "step": 27275 }, { "epoch": 0.5910775030875566, "grad_norm": 2.2190492153167725, "learning_rate": 7.177598317093366e-06, "loss": 0.2074, "step": 27280 }, { "epoch": 0.5911858384070374, "grad_norm": 1.627274513244629, "learning_rate": 7.174333397778676e-06, "loss": 0.2822, "step": 27285 }, { "epoch": 0.5912941737265183, "grad_norm": 1.3872151374816895, "learning_rate": 7.171068805775132e-06, "loss": 0.1778, "step": 27290 }, { "epoch": 0.5914025090459992, "grad_norm": 0.4661925435066223, "learning_rate": 7.16780454146089e-06, "loss": 0.1796, "step": 27295 }, { "epoch": 0.5915108443654801, "grad_norm": 2.32619309425354, "learning_rate": 7.164540605214065e-06, "loss": 0.1984, "step": 27300 }, { "epoch": 0.5916191796849609, "grad_norm": 1.323569655418396, "learning_rate": 7.161276997412739e-06, "loss": 0.1614, "step": 27305 }, { "epoch": 0.5917275150044418, "grad_norm": 1.6149629354476929, "learning_rate": 7.158013718434946e-06, "loss": 0.2198, "step": 27310 }, { "epoch": 0.5918358503239226, "grad_norm": 1.643518090248108, "learning_rate": 7.154750768658692e-06, "loss": 0.0676, "step": 27315 }, { "epoch": 0.5919441856434035, "grad_norm": 1.6878600120544434, "learning_rate": 7.151488148461938e-06, "loss": 0.1823, "step": 27320 }, { "epoch": 0.5920525209628843, "grad_norm": 1.9581726789474487, "learning_rate": 7.148225858222613e-06, "loss": 0.1521, "step": 27325 }, { "epoch": 0.5921608562823651, "grad_norm": 1.5891172885894775, "learning_rate": 7.1449638983186024e-06, "loss": 0.2371, "step": 27330 }, { "epoch": 0.592269191601846, "grad_norm": 1.1216052770614624, "learning_rate": 7.141702269127756e-06, "loss": 0.1928, "step": 27335 }, { "epoch": 0.5923775269213268, "grad_norm": 0.7999765872955322, "learning_rate": 7.138440971027885e-06, "loss": 0.1007, "step": 27340 }, { "epoch": 0.5924858622408078, "grad_norm": 1.1930369138717651, "learning_rate": 7.135180004396758e-06, "loss": 0.1182, "step": 27345 }, { "epoch": 0.5925941975602886, "grad_norm": 1.295690655708313, "learning_rate": 7.131919369612115e-06, "loss": 0.2073, "step": 27350 }, { "epoch": 0.5927025328797695, "grad_norm": 1.5103330612182617, "learning_rate": 7.1286590670516486e-06, "loss": 0.1704, "step": 27355 }, { "epoch": 0.5928108681992503, "grad_norm": 1.959656000137329, "learning_rate": 7.125399097093018e-06, "loss": 0.1862, "step": 27360 }, { "epoch": 0.5929192035187312, "grad_norm": 0.6926324367523193, "learning_rate": 7.122139460113838e-06, "loss": 0.2018, "step": 27365 }, { "epoch": 0.593027538838212, "grad_norm": 1.1754511594772339, "learning_rate": 7.1188801564916935e-06, "loss": 0.1683, "step": 27370 }, { "epoch": 0.5931358741576929, "grad_norm": 1.3959871530532837, "learning_rate": 7.115621186604122e-06, "loss": 0.1558, "step": 27375 }, { "epoch": 0.5932442094771737, "grad_norm": 0.9027882218360901, "learning_rate": 7.11236255082863e-06, "loss": 0.1516, "step": 27380 }, { "epoch": 0.5933525447966546, "grad_norm": 1.324855923652649, "learning_rate": 7.109104249542679e-06, "loss": 0.1997, "step": 27385 }, { "epoch": 0.5934608801161355, "grad_norm": 1.2392935752868652, "learning_rate": 7.1058462831236915e-06, "loss": 0.1688, "step": 27390 }, { "epoch": 0.5935692154356164, "grad_norm": 2.213273286819458, "learning_rate": 7.1025886519490615e-06, "loss": 0.1186, "step": 27395 }, { "epoch": 0.5936775507550972, "grad_norm": 2.2347354888916016, "learning_rate": 7.099331356396133e-06, "loss": 0.1988, "step": 27400 }, { "epoch": 0.593785886074578, "grad_norm": 2.056797981262207, "learning_rate": 7.096074396842214e-06, "loss": 0.1335, "step": 27405 }, { "epoch": 0.5938942213940589, "grad_norm": 1.5371675491333008, "learning_rate": 7.092817773664575e-06, "loss": 0.1819, "step": 27410 }, { "epoch": 0.5940025567135397, "grad_norm": 1.472852349281311, "learning_rate": 7.0895614872404485e-06, "loss": 0.1912, "step": 27415 }, { "epoch": 0.5941108920330206, "grad_norm": 2.657205104827881, "learning_rate": 7.086305537947025e-06, "loss": 0.2594, "step": 27420 }, { "epoch": 0.5942192273525014, "grad_norm": 1.9139354228973389, "learning_rate": 7.0830499261614596e-06, "loss": 0.1417, "step": 27425 }, { "epoch": 0.5943275626719823, "grad_norm": 1.470113754272461, "learning_rate": 7.079794652260862e-06, "loss": 0.0989, "step": 27430 }, { "epoch": 0.5944358979914631, "grad_norm": 1.1277674436569214, "learning_rate": 7.07653971662231e-06, "loss": 0.2346, "step": 27435 }, { "epoch": 0.5945442333109441, "grad_norm": 1.9254963397979736, "learning_rate": 7.073285119622832e-06, "loss": 0.2568, "step": 27440 }, { "epoch": 0.5946525686304249, "grad_norm": 1.5601890087127686, "learning_rate": 7.070030861639439e-06, "loss": 0.1968, "step": 27445 }, { "epoch": 0.5947609039499058, "grad_norm": 1.9380483627319336, "learning_rate": 7.066776943049076e-06, "loss": 0.1606, "step": 27450 }, { "epoch": 0.5948692392693866, "grad_norm": 1.4663350582122803, "learning_rate": 7.063523364228666e-06, "loss": 0.2179, "step": 27455 }, { "epoch": 0.5949775745888675, "grad_norm": 1.1119478940963745, "learning_rate": 7.060270125555087e-06, "loss": 0.1056, "step": 27460 }, { "epoch": 0.5950859099083483, "grad_norm": 1.2130731344223022, "learning_rate": 7.057017227405176e-06, "loss": 0.2317, "step": 27465 }, { "epoch": 0.5951942452278292, "grad_norm": 2.402474880218506, "learning_rate": 7.053764670155734e-06, "loss": 0.1169, "step": 27470 }, { "epoch": 0.59530258054731, "grad_norm": 1.08907151222229, "learning_rate": 7.050512454183518e-06, "loss": 0.1164, "step": 27475 }, { "epoch": 0.5954109158667908, "grad_norm": 1.2008236646652222, "learning_rate": 7.047260579865252e-06, "loss": 0.1898, "step": 27480 }, { "epoch": 0.5955192511862718, "grad_norm": 1.2812230587005615, "learning_rate": 7.044009047577612e-06, "loss": 0.2081, "step": 27485 }, { "epoch": 0.5956275865057526, "grad_norm": 1.345220685005188, "learning_rate": 7.0407578576972465e-06, "loss": 0.1351, "step": 27490 }, { "epoch": 0.5957359218252335, "grad_norm": 1.2138588428497314, "learning_rate": 7.0375070106007545e-06, "loss": 0.2066, "step": 27495 }, { "epoch": 0.5958442571447143, "grad_norm": 0.9222223162651062, "learning_rate": 7.0342565066647e-06, "loss": 0.0924, "step": 27500 }, { "epoch": 0.5959525924641952, "grad_norm": 1.9599931240081787, "learning_rate": 7.031006346265598e-06, "loss": 0.1188, "step": 27505 }, { "epoch": 0.596060927783676, "grad_norm": 1.327823281288147, "learning_rate": 7.027756529779937e-06, "loss": 0.1632, "step": 27510 }, { "epoch": 0.5961692631031569, "grad_norm": 1.0766959190368652, "learning_rate": 7.024507057584158e-06, "loss": 0.1336, "step": 27515 }, { "epoch": 0.5962775984226377, "grad_norm": 1.9522560834884644, "learning_rate": 7.021257930054662e-06, "loss": 0.1287, "step": 27520 }, { "epoch": 0.5963859337421186, "grad_norm": 2.156402111053467, "learning_rate": 7.018009147567815e-06, "loss": 0.141, "step": 27525 }, { "epoch": 0.5964942690615994, "grad_norm": 2.1232078075408936, "learning_rate": 7.014760710499937e-06, "loss": 0.1695, "step": 27530 }, { "epoch": 0.5966026043810804, "grad_norm": 0.9435896873474121, "learning_rate": 7.01151261922731e-06, "loss": 0.1734, "step": 27535 }, { "epoch": 0.5967109397005612, "grad_norm": 1.2421936988830566, "learning_rate": 7.00826487412618e-06, "loss": 0.1626, "step": 27540 }, { "epoch": 0.5968192750200421, "grad_norm": 1.810028314590454, "learning_rate": 7.005017475572748e-06, "loss": 0.159, "step": 27545 }, { "epoch": 0.5969276103395229, "grad_norm": 1.255969524383545, "learning_rate": 7.0017704239431775e-06, "loss": 0.2214, "step": 27550 }, { "epoch": 0.5970359456590038, "grad_norm": 1.463921308517456, "learning_rate": 6.99852371961359e-06, "loss": 0.0903, "step": 27555 }, { "epoch": 0.5971442809784846, "grad_norm": 2.6095917224884033, "learning_rate": 6.9952773629600665e-06, "loss": 0.2299, "step": 27560 }, { "epoch": 0.5972526162979654, "grad_norm": 1.1572400331497192, "learning_rate": 6.992031354358651e-06, "loss": 0.1746, "step": 27565 }, { "epoch": 0.5973609516174463, "grad_norm": 1.787436604499817, "learning_rate": 6.9887856941853426e-06, "loss": 0.1998, "step": 27570 }, { "epoch": 0.5974692869369271, "grad_norm": 1.3621906042099, "learning_rate": 6.985540382816104e-06, "loss": 0.1147, "step": 27575 }, { "epoch": 0.597577622256408, "grad_norm": 1.4472849369049072, "learning_rate": 6.982295420626854e-06, "loss": 0.2034, "step": 27580 }, { "epoch": 0.5976859575758889, "grad_norm": 2.7363128662109375, "learning_rate": 6.9790508079934745e-06, "loss": 0.2328, "step": 27585 }, { "epoch": 0.5977942928953698, "grad_norm": 2.1816446781158447, "learning_rate": 6.975806545291807e-06, "loss": 0.1698, "step": 27590 }, { "epoch": 0.5979026282148506, "grad_norm": 1.6004916429519653, "learning_rate": 6.972562632897648e-06, "loss": 0.1849, "step": 27595 }, { "epoch": 0.5980109635343315, "grad_norm": 1.9980331659317017, "learning_rate": 6.969319071186758e-06, "loss": 0.1386, "step": 27600 }, { "epoch": 0.5981192988538123, "grad_norm": 1.3313581943511963, "learning_rate": 6.966075860534852e-06, "loss": 0.1689, "step": 27605 }, { "epoch": 0.5982276341732932, "grad_norm": 2.1657259464263916, "learning_rate": 6.9628330013176105e-06, "loss": 0.1737, "step": 27610 }, { "epoch": 0.598335969492774, "grad_norm": 1.7381948232650757, "learning_rate": 6.959590493910668e-06, "loss": 0.1509, "step": 27615 }, { "epoch": 0.5984443048122549, "grad_norm": 2.1967966556549072, "learning_rate": 6.956348338689623e-06, "loss": 0.2437, "step": 27620 }, { "epoch": 0.5985526401317357, "grad_norm": 1.5763463973999023, "learning_rate": 6.95310653603003e-06, "loss": 0.1486, "step": 27625 }, { "epoch": 0.5986609754512167, "grad_norm": 1.2587569952011108, "learning_rate": 6.949865086307398e-06, "loss": 0.2018, "step": 27630 }, { "epoch": 0.5987693107706975, "grad_norm": 2.233153820037842, "learning_rate": 6.946623989897208e-06, "loss": 0.1831, "step": 27635 }, { "epoch": 0.5988776460901784, "grad_norm": 1.1548421382904053, "learning_rate": 6.943383247174889e-06, "loss": 0.1547, "step": 27640 }, { "epoch": 0.5989859814096592, "grad_norm": 1.2076880931854248, "learning_rate": 6.940142858515833e-06, "loss": 0.0799, "step": 27645 }, { "epoch": 0.59909431672914, "grad_norm": 1.0696165561676025, "learning_rate": 6.936902824295388e-06, "loss": 0.1648, "step": 27650 }, { "epoch": 0.5992026520486209, "grad_norm": 2.143927812576294, "learning_rate": 6.933663144888868e-06, "loss": 0.1556, "step": 27655 }, { "epoch": 0.5993109873681017, "grad_norm": 1.575294017791748, "learning_rate": 6.930423820671539e-06, "loss": 0.2131, "step": 27660 }, { "epoch": 0.5994193226875826, "grad_norm": 1.5771727561950684, "learning_rate": 6.927184852018627e-06, "loss": 0.1548, "step": 27665 }, { "epoch": 0.5995276580070634, "grad_norm": 2.0632736682891846, "learning_rate": 6.923946239305321e-06, "loss": 0.2493, "step": 27670 }, { "epoch": 0.5996359933265443, "grad_norm": 1.01603102684021, "learning_rate": 6.920707982906762e-06, "loss": 0.1648, "step": 27675 }, { "epoch": 0.5997443286460252, "grad_norm": 1.775931715965271, "learning_rate": 6.917470083198053e-06, "loss": 0.1653, "step": 27680 }, { "epoch": 0.5998526639655061, "grad_norm": 1.568021535873413, "learning_rate": 6.91423254055426e-06, "loss": 0.1976, "step": 27685 }, { "epoch": 0.5999609992849869, "grad_norm": 1.1686278581619263, "learning_rate": 6.910995355350405e-06, "loss": 0.19, "step": 27690 }, { "epoch": 0.6000693346044678, "grad_norm": 1.3055914640426636, "learning_rate": 6.907758527961463e-06, "loss": 0.1774, "step": 27695 }, { "epoch": 0.6001776699239486, "grad_norm": 1.8752410411834717, "learning_rate": 6.9045220587623744e-06, "loss": 0.1543, "step": 27700 }, { "epoch": 0.6002860052434295, "grad_norm": 1.6682837009429932, "learning_rate": 6.901285948128037e-06, "loss": 0.1401, "step": 27705 }, { "epoch": 0.6003943405629103, "grad_norm": 1.6561470031738281, "learning_rate": 6.898050196433302e-06, "loss": 0.1992, "step": 27710 }, { "epoch": 0.6005026758823911, "grad_norm": 1.7295154333114624, "learning_rate": 6.894814804052984e-06, "loss": 0.2475, "step": 27715 }, { "epoch": 0.600611011201872, "grad_norm": 1.6960220336914062, "learning_rate": 6.891579771361856e-06, "loss": 0.1632, "step": 27720 }, { "epoch": 0.6007193465213528, "grad_norm": 1.386728048324585, "learning_rate": 6.888345098734646e-06, "loss": 0.2986, "step": 27725 }, { "epoch": 0.6008276818408338, "grad_norm": 1.2176084518432617, "learning_rate": 6.885110786546041e-06, "loss": 0.1606, "step": 27730 }, { "epoch": 0.6009360171603146, "grad_norm": 1.388153314590454, "learning_rate": 6.881876835170693e-06, "loss": 0.1468, "step": 27735 }, { "epoch": 0.6010443524797955, "grad_norm": 1.4082111120224, "learning_rate": 6.878643244983207e-06, "loss": 0.1351, "step": 27740 }, { "epoch": 0.6011526877992763, "grad_norm": 1.1464331150054932, "learning_rate": 6.875410016358142e-06, "loss": 0.2126, "step": 27745 }, { "epoch": 0.6012610231187572, "grad_norm": 1.0573374032974243, "learning_rate": 6.872177149670019e-06, "loss": 0.2619, "step": 27750 }, { "epoch": 0.601369358438238, "grad_norm": 1.1632707118988037, "learning_rate": 6.8689446452933195e-06, "loss": 0.1821, "step": 27755 }, { "epoch": 0.6014776937577189, "grad_norm": 2.1440954208374023, "learning_rate": 6.86571250360248e-06, "loss": 0.2003, "step": 27760 }, { "epoch": 0.6015860290771997, "grad_norm": 1.3055691719055176, "learning_rate": 6.862480724971894e-06, "loss": 0.1407, "step": 27765 }, { "epoch": 0.6016943643966806, "grad_norm": 3.1934056282043457, "learning_rate": 6.8592493097759164e-06, "loss": 0.248, "step": 27770 }, { "epoch": 0.6018026997161615, "grad_norm": 1.727267861366272, "learning_rate": 6.856018258388854e-06, "loss": 0.3325, "step": 27775 }, { "epoch": 0.6019110350356424, "grad_norm": 1.8663091659545898, "learning_rate": 6.852787571184984e-06, "loss": 0.1377, "step": 27780 }, { "epoch": 0.6020193703551232, "grad_norm": 1.2914538383483887, "learning_rate": 6.849557248538529e-06, "loss": 0.1947, "step": 27785 }, { "epoch": 0.602127705674604, "grad_norm": 1.230988621711731, "learning_rate": 6.8463272908236715e-06, "loss": 0.1796, "step": 27790 }, { "epoch": 0.6022360409940849, "grad_norm": 2.2170238494873047, "learning_rate": 6.843097698414556e-06, "loss": 0.2028, "step": 27795 }, { "epoch": 0.6023443763135657, "grad_norm": 1.073268175125122, "learning_rate": 6.839868471685283e-06, "loss": 0.225, "step": 27800 }, { "epoch": 0.6024527116330466, "grad_norm": 1.685591697692871, "learning_rate": 6.836639611009907e-06, "loss": 0.2787, "step": 27805 }, { "epoch": 0.6025610469525274, "grad_norm": 3.021040678024292, "learning_rate": 6.8334111167624454e-06, "loss": 0.2169, "step": 27810 }, { "epoch": 0.6026693822720083, "grad_norm": 1.2846683263778687, "learning_rate": 6.830182989316869e-06, "loss": 0.1237, "step": 27815 }, { "epoch": 0.6027777175914891, "grad_norm": 1.8940963745117188, "learning_rate": 6.826955229047112e-06, "loss": 0.16, "step": 27820 }, { "epoch": 0.6028860529109701, "grad_norm": 1.5876544713974, "learning_rate": 6.8237278363270565e-06, "loss": 0.1575, "step": 27825 }, { "epoch": 0.6029943882304509, "grad_norm": 2.422724723815918, "learning_rate": 6.820500811530552e-06, "loss": 0.2106, "step": 27830 }, { "epoch": 0.6031027235499318, "grad_norm": 2.1327314376831055, "learning_rate": 6.817274155031399e-06, "loss": 0.162, "step": 27835 }, { "epoch": 0.6032110588694126, "grad_norm": 1.2333639860153198, "learning_rate": 6.8140478672033595e-06, "loss": 0.2082, "step": 27840 }, { "epoch": 0.6033193941888935, "grad_norm": 1.5664151906967163, "learning_rate": 6.810821948420149e-06, "loss": 0.1814, "step": 27845 }, { "epoch": 0.6034277295083743, "grad_norm": 1.8922072649002075, "learning_rate": 6.807596399055441e-06, "loss": 0.2103, "step": 27850 }, { "epoch": 0.6035360648278552, "grad_norm": 1.4405583143234253, "learning_rate": 6.804371219482868e-06, "loss": 0.1877, "step": 27855 }, { "epoch": 0.603644400147336, "grad_norm": 1.841373085975647, "learning_rate": 6.801146410076019e-06, "loss": 0.2429, "step": 27860 }, { "epoch": 0.6037527354668168, "grad_norm": 1.3163902759552002, "learning_rate": 6.797921971208441e-06, "loss": 0.2466, "step": 27865 }, { "epoch": 0.6038610707862977, "grad_norm": 1.559907078742981, "learning_rate": 6.794697903253633e-06, "loss": 0.1824, "step": 27870 }, { "epoch": 0.6039694061057787, "grad_norm": 2.1775639057159424, "learning_rate": 6.791474206585057e-06, "loss": 0.1419, "step": 27875 }, { "epoch": 0.6040777414252595, "grad_norm": 1.219000220298767, "learning_rate": 6.788250881576133e-06, "loss": 0.1672, "step": 27880 }, { "epoch": 0.6041860767447403, "grad_norm": 1.2382874488830566, "learning_rate": 6.78502792860023e-06, "loss": 0.204, "step": 27885 }, { "epoch": 0.6042944120642212, "grad_norm": 1.5133366584777832, "learning_rate": 6.781805348030683e-06, "loss": 0.2077, "step": 27890 }, { "epoch": 0.604402747383702, "grad_norm": 1.5632264614105225, "learning_rate": 6.778583140240778e-06, "loss": 0.185, "step": 27895 }, { "epoch": 0.6045110827031829, "grad_norm": 1.3988392353057861, "learning_rate": 6.775361305603758e-06, "loss": 0.1569, "step": 27900 }, { "epoch": 0.6046194180226637, "grad_norm": 1.7274982929229736, "learning_rate": 6.772139844492827e-06, "loss": 0.3277, "step": 27905 }, { "epoch": 0.6047277533421446, "grad_norm": 1.520588994026184, "learning_rate": 6.768918757281144e-06, "loss": 0.1998, "step": 27910 }, { "epoch": 0.6048360886616254, "grad_norm": 0.8577021956443787, "learning_rate": 6.765698044341817e-06, "loss": 0.1077, "step": 27915 }, { "epoch": 0.6049444239811064, "grad_norm": 1.7137457132339478, "learning_rate": 6.762477706047921e-06, "loss": 0.2083, "step": 27920 }, { "epoch": 0.6050527593005872, "grad_norm": 1.3426424264907837, "learning_rate": 6.759257742772486e-06, "loss": 0.1409, "step": 27925 }, { "epoch": 0.6051610946200681, "grad_norm": 2.5320651531219482, "learning_rate": 6.7560381548884955e-06, "loss": 0.1934, "step": 27930 }, { "epoch": 0.6052694299395489, "grad_norm": 1.52970290184021, "learning_rate": 6.752818942768892e-06, "loss": 0.1195, "step": 27935 }, { "epoch": 0.6053777652590298, "grad_norm": 1.6048481464385986, "learning_rate": 6.749600106786569e-06, "loss": 0.2598, "step": 27940 }, { "epoch": 0.6054861005785106, "grad_norm": 1.5760513544082642, "learning_rate": 6.746381647314384e-06, "loss": 0.2142, "step": 27945 }, { "epoch": 0.6055944358979914, "grad_norm": 1.8033761978149414, "learning_rate": 6.743163564725148e-06, "loss": 0.1318, "step": 27950 }, { "epoch": 0.6057027712174723, "grad_norm": 2.2374277114868164, "learning_rate": 6.739945859391623e-06, "loss": 0.1356, "step": 27955 }, { "epoch": 0.6058111065369531, "grad_norm": 1.880859613418579, "learning_rate": 6.736728531686536e-06, "loss": 0.172, "step": 27960 }, { "epoch": 0.605919441856434, "grad_norm": 1.8998353481292725, "learning_rate": 6.733511581982564e-06, "loss": 0.1881, "step": 27965 }, { "epoch": 0.6060277771759149, "grad_norm": 2.7777485847473145, "learning_rate": 6.730295010652338e-06, "loss": 0.2759, "step": 27970 }, { "epoch": 0.6061361124953958, "grad_norm": 1.2487903833389282, "learning_rate": 6.72707881806846e-06, "loss": 0.157, "step": 27975 }, { "epoch": 0.6062444478148766, "grad_norm": 1.3738890886306763, "learning_rate": 6.723863004603472e-06, "loss": 0.1623, "step": 27980 }, { "epoch": 0.6063527831343575, "grad_norm": 1.428309679031372, "learning_rate": 6.720647570629881e-06, "loss": 0.1313, "step": 27985 }, { "epoch": 0.6064611184538383, "grad_norm": 2.0905303955078125, "learning_rate": 6.717432516520142e-06, "loss": 0.1402, "step": 27990 }, { "epoch": 0.6065694537733192, "grad_norm": 1.3528180122375488, "learning_rate": 6.714217842646673e-06, "loss": 0.1537, "step": 27995 }, { "epoch": 0.6066777890928, "grad_norm": 1.7669562101364136, "learning_rate": 6.711003549381847e-06, "loss": 0.1292, "step": 28000 }, { "epoch": 0.6067861244122809, "grad_norm": 0.9815248250961304, "learning_rate": 6.707789637097989e-06, "loss": 0.2039, "step": 28005 }, { "epoch": 0.6068944597317617, "grad_norm": 1.389741063117981, "learning_rate": 6.704576106167384e-06, "loss": 0.2168, "step": 28010 }, { "epoch": 0.6070027950512427, "grad_norm": 2.215442180633545, "learning_rate": 6.70136295696227e-06, "loss": 0.2764, "step": 28015 }, { "epoch": 0.6071111303707235, "grad_norm": 1.3801182508468628, "learning_rate": 6.698150189854849e-06, "loss": 0.2153, "step": 28020 }, { "epoch": 0.6072194656902044, "grad_norm": 1.764917254447937, "learning_rate": 6.694937805217263e-06, "loss": 0.1936, "step": 28025 }, { "epoch": 0.6073278010096852, "grad_norm": 1.6225169897079468, "learning_rate": 6.691725803421624e-06, "loss": 0.1622, "step": 28030 }, { "epoch": 0.607436136329166, "grad_norm": 1.6760377883911133, "learning_rate": 6.688514184839992e-06, "loss": 0.1141, "step": 28035 }, { "epoch": 0.6075444716486469, "grad_norm": 1.7769911289215088, "learning_rate": 6.685302949844386e-06, "loss": 0.1679, "step": 28040 }, { "epoch": 0.6076528069681277, "grad_norm": 1.6419343948364258, "learning_rate": 6.682092098806778e-06, "loss": 0.1266, "step": 28045 }, { "epoch": 0.6077611422876086, "grad_norm": 2.2832605838775635, "learning_rate": 6.6788816320991e-06, "loss": 0.1314, "step": 28050 }, { "epoch": 0.6078694776070894, "grad_norm": 1.4493426084518433, "learning_rate": 6.675671550093234e-06, "loss": 0.2307, "step": 28055 }, { "epoch": 0.6079778129265703, "grad_norm": 2.4119348526000977, "learning_rate": 6.672461853161021e-06, "loss": 0.1189, "step": 28060 }, { "epoch": 0.6080861482460512, "grad_norm": 1.5821311473846436, "learning_rate": 6.669252541674251e-06, "loss": 0.085, "step": 28065 }, { "epoch": 0.6081944835655321, "grad_norm": 1.402620792388916, "learning_rate": 6.666043616004685e-06, "loss": 0.1272, "step": 28070 }, { "epoch": 0.6083028188850129, "grad_norm": 1.7857186794281006, "learning_rate": 6.662835076524021e-06, "loss": 0.2549, "step": 28075 }, { "epoch": 0.6084111542044938, "grad_norm": 1.9238207340240479, "learning_rate": 6.6596269236039236e-06, "loss": 0.1711, "step": 28080 }, { "epoch": 0.6085194895239746, "grad_norm": 1.5561641454696655, "learning_rate": 6.656419157616008e-06, "loss": 0.1903, "step": 28085 }, { "epoch": 0.6086278248434555, "grad_norm": 1.752535104751587, "learning_rate": 6.6532117789318475e-06, "loss": 0.2138, "step": 28090 }, { "epoch": 0.6087361601629363, "grad_norm": 2.190704345703125, "learning_rate": 6.650004787922966e-06, "loss": 0.1782, "step": 28095 }, { "epoch": 0.6088444954824171, "grad_norm": 1.181704044342041, "learning_rate": 6.646798184960848e-06, "loss": 0.179, "step": 28100 }, { "epoch": 0.608952830801898, "grad_norm": 1.4747822284698486, "learning_rate": 6.643591970416928e-06, "loss": 0.1262, "step": 28105 }, { "epoch": 0.6090611661213788, "grad_norm": 1.1042544841766357, "learning_rate": 6.6403861446626005e-06, "loss": 0.154, "step": 28110 }, { "epoch": 0.6091695014408598, "grad_norm": 2.3377721309661865, "learning_rate": 6.637180708069208e-06, "loss": 0.121, "step": 28115 }, { "epoch": 0.6092778367603406, "grad_norm": 2.107616901397705, "learning_rate": 6.633975661008059e-06, "loss": 0.1904, "step": 28120 }, { "epoch": 0.6093861720798215, "grad_norm": 1.4802442789077759, "learning_rate": 6.630771003850405e-06, "loss": 0.2297, "step": 28125 }, { "epoch": 0.6094945073993023, "grad_norm": 1.3732471466064453, "learning_rate": 6.627566736967459e-06, "loss": 0.14, "step": 28130 }, { "epoch": 0.6096028427187832, "grad_norm": 1.3585273027420044, "learning_rate": 6.624362860730389e-06, "loss": 0.1603, "step": 28135 }, { "epoch": 0.609711178038264, "grad_norm": 2.3947343826293945, "learning_rate": 6.6211593755103136e-06, "loss": 0.1866, "step": 28140 }, { "epoch": 0.6098195133577449, "grad_norm": 0.9813748002052307, "learning_rate": 6.617956281678309e-06, "loss": 0.2009, "step": 28145 }, { "epoch": 0.6099278486772257, "grad_norm": 1.733061671257019, "learning_rate": 6.6147535796054075e-06, "loss": 0.2032, "step": 28150 }, { "epoch": 0.6100361839967066, "grad_norm": 1.5827198028564453, "learning_rate": 6.611551269662594e-06, "loss": 0.2111, "step": 28155 }, { "epoch": 0.6101445193161875, "grad_norm": 1.5586143732070923, "learning_rate": 6.6083493522208005e-06, "loss": 0.0993, "step": 28160 }, { "epoch": 0.6102528546356684, "grad_norm": 3.3704123497009277, "learning_rate": 6.605147827650933e-06, "loss": 0.2361, "step": 28165 }, { "epoch": 0.6103611899551492, "grad_norm": 2.3018906116485596, "learning_rate": 6.601946696323833e-06, "loss": 0.1433, "step": 28170 }, { "epoch": 0.61046952527463, "grad_norm": 2.0793092250823975, "learning_rate": 6.598745958610307e-06, "loss": 0.2345, "step": 28175 }, { "epoch": 0.6105778605941109, "grad_norm": 0.8456293940544128, "learning_rate": 6.59554561488111e-06, "loss": 0.1828, "step": 28180 }, { "epoch": 0.6106861959135917, "grad_norm": 1.5321331024169922, "learning_rate": 6.592345665506956e-06, "loss": 0.2126, "step": 28185 }, { "epoch": 0.6107945312330726, "grad_norm": 3.354314088821411, "learning_rate": 6.589146110858511e-06, "loss": 0.1941, "step": 28190 }, { "epoch": 0.6109028665525534, "grad_norm": 1.9531136751174927, "learning_rate": 6.585946951306393e-06, "loss": 0.1562, "step": 28195 }, { "epoch": 0.6110112018720343, "grad_norm": 1.435215950012207, "learning_rate": 6.582748187221177e-06, "loss": 0.2014, "step": 28200 }, { "epoch": 0.6111195371915151, "grad_norm": 1.6225930452346802, "learning_rate": 6.5795498189733945e-06, "loss": 0.1406, "step": 28205 }, { "epoch": 0.6112278725109961, "grad_norm": 1.5738919973373413, "learning_rate": 6.576351846933522e-06, "loss": 0.2137, "step": 28210 }, { "epoch": 0.6113362078304769, "grad_norm": 1.6772360801696777, "learning_rate": 6.573154271472006e-06, "loss": 0.2162, "step": 28215 }, { "epoch": 0.6114445431499578, "grad_norm": 1.379615068435669, "learning_rate": 6.569957092959234e-06, "loss": 0.2173, "step": 28220 }, { "epoch": 0.6115528784694386, "grad_norm": 0.9127633571624756, "learning_rate": 6.566760311765552e-06, "loss": 0.1675, "step": 28225 }, { "epoch": 0.6116612137889195, "grad_norm": 1.8981847763061523, "learning_rate": 6.563563928261256e-06, "loss": 0.1866, "step": 28230 }, { "epoch": 0.6117695491084003, "grad_norm": 1.9680895805358887, "learning_rate": 6.560367942816602e-06, "loss": 0.2574, "step": 28235 }, { "epoch": 0.6118778844278812, "grad_norm": 1.3233458995819092, "learning_rate": 6.557172355801797e-06, "loss": 0.192, "step": 28240 }, { "epoch": 0.611986219747362, "grad_norm": 1.6946830749511719, "learning_rate": 6.553977167586999e-06, "loss": 0.1616, "step": 28245 }, { "epoch": 0.6120945550668428, "grad_norm": 1.8287330865859985, "learning_rate": 6.550782378542325e-06, "loss": 0.1944, "step": 28250 }, { "epoch": 0.6122028903863237, "grad_norm": 2.1389410495758057, "learning_rate": 6.547587989037845e-06, "loss": 0.1728, "step": 28255 }, { "epoch": 0.6123112257058047, "grad_norm": 1.861887812614441, "learning_rate": 6.5443939994435745e-06, "loss": 0.1712, "step": 28260 }, { "epoch": 0.6124195610252855, "grad_norm": 1.977396845817566, "learning_rate": 6.5412004101294975e-06, "loss": 0.1815, "step": 28265 }, { "epoch": 0.6125278963447663, "grad_norm": 0.8570895791053772, "learning_rate": 6.538007221465541e-06, "loss": 0.1124, "step": 28270 }, { "epoch": 0.6126362316642472, "grad_norm": 1.0525957345962524, "learning_rate": 6.534814433821585e-06, "loss": 0.1082, "step": 28275 }, { "epoch": 0.612744566983728, "grad_norm": 1.7660876512527466, "learning_rate": 6.531622047567472e-06, "loss": 0.2195, "step": 28280 }, { "epoch": 0.6128529023032089, "grad_norm": 1.6357207298278809, "learning_rate": 6.528430063072986e-06, "loss": 0.2032, "step": 28285 }, { "epoch": 0.6129612376226897, "grad_norm": 0.7294777035713196, "learning_rate": 6.5252384807078735e-06, "loss": 0.129, "step": 28290 }, { "epoch": 0.6130695729421706, "grad_norm": 1.5503201484680176, "learning_rate": 6.522047300841831e-06, "loss": 0.2217, "step": 28295 }, { "epoch": 0.6131779082616514, "grad_norm": 1.18230140209198, "learning_rate": 6.518856523844509e-06, "loss": 0.1678, "step": 28300 }, { "epoch": 0.6132862435811324, "grad_norm": 2.172349691390991, "learning_rate": 6.515666150085509e-06, "loss": 0.1888, "step": 28305 }, { "epoch": 0.6133945789006132, "grad_norm": 1.637771487236023, "learning_rate": 6.5124761799343925e-06, "loss": 0.1636, "step": 28310 }, { "epoch": 0.6135029142200941, "grad_norm": 2.1168134212493896, "learning_rate": 6.509286613760668e-06, "loss": 0.1904, "step": 28315 }, { "epoch": 0.6136112495395749, "grad_norm": 1.9579684734344482, "learning_rate": 6.5060974519337975e-06, "loss": 0.1153, "step": 28320 }, { "epoch": 0.6137195848590558, "grad_norm": 1.8772938251495361, "learning_rate": 6.502908694823198e-06, "loss": 0.1555, "step": 28325 }, { "epoch": 0.6138279201785366, "grad_norm": 1.2994275093078613, "learning_rate": 6.499720342798241e-06, "loss": 0.2091, "step": 28330 }, { "epoch": 0.6139362554980174, "grad_norm": 1.0893778800964355, "learning_rate": 6.496532396228248e-06, "loss": 0.1776, "step": 28335 }, { "epoch": 0.6140445908174983, "grad_norm": 2.485452651977539, "learning_rate": 6.493344855482495e-06, "loss": 0.2726, "step": 28340 }, { "epoch": 0.6141529261369791, "grad_norm": 1.4605785608291626, "learning_rate": 6.49015772093021e-06, "loss": 0.2317, "step": 28345 }, { "epoch": 0.61426126145646, "grad_norm": 0.9267185926437378, "learning_rate": 6.486970992940576e-06, "loss": 0.1148, "step": 28350 }, { "epoch": 0.6143695967759409, "grad_norm": 1.6858489513397217, "learning_rate": 6.483784671882725e-06, "loss": 0.2085, "step": 28355 }, { "epoch": 0.6144779320954218, "grad_norm": 1.2470563650131226, "learning_rate": 6.480598758125749e-06, "loss": 0.1628, "step": 28360 }, { "epoch": 0.6145862674149026, "grad_norm": 1.2674002647399902, "learning_rate": 6.477413252038687e-06, "loss": 0.1898, "step": 28365 }, { "epoch": 0.6146946027343835, "grad_norm": 1.5575658082962036, "learning_rate": 6.474228153990532e-06, "loss": 0.2422, "step": 28370 }, { "epoch": 0.6148029380538643, "grad_norm": 2.0656042098999023, "learning_rate": 6.471043464350227e-06, "loss": 0.2419, "step": 28375 }, { "epoch": 0.6149112733733452, "grad_norm": 1.5499294996261597, "learning_rate": 6.467859183486673e-06, "loss": 0.1391, "step": 28380 }, { "epoch": 0.615019608692826, "grad_norm": 1.8433127403259277, "learning_rate": 6.464675311768723e-06, "loss": 0.106, "step": 28385 }, { "epoch": 0.6151279440123069, "grad_norm": 1.517484188079834, "learning_rate": 6.461491849565178e-06, "loss": 0.1705, "step": 28390 }, { "epoch": 0.6152362793317877, "grad_norm": 2.3075523376464844, "learning_rate": 6.4583087972447986e-06, "loss": 0.1767, "step": 28395 }, { "epoch": 0.6153446146512687, "grad_norm": 0.5285857319831848, "learning_rate": 6.455126155176283e-06, "loss": 0.1411, "step": 28400 }, { "epoch": 0.6154529499707495, "grad_norm": 1.3328286409378052, "learning_rate": 6.4519439237283045e-06, "loss": 0.1008, "step": 28405 }, { "epoch": 0.6155612852902304, "grad_norm": 1.5517218112945557, "learning_rate": 6.448762103269473e-06, "loss": 0.206, "step": 28410 }, { "epoch": 0.6156696206097112, "grad_norm": 1.2169520854949951, "learning_rate": 6.445580694168354e-06, "loss": 0.1013, "step": 28415 }, { "epoch": 0.615777955929192, "grad_norm": 1.6241286993026733, "learning_rate": 6.442399696793466e-06, "loss": 0.1481, "step": 28420 }, { "epoch": 0.6158862912486729, "grad_norm": 2.3087317943573, "learning_rate": 6.43921911151328e-06, "loss": 0.1637, "step": 28425 }, { "epoch": 0.6159946265681537, "grad_norm": 2.0836448669433594, "learning_rate": 6.436038938696223e-06, "loss": 0.1305, "step": 28430 }, { "epoch": 0.6161029618876346, "grad_norm": 1.4669153690338135, "learning_rate": 6.432859178710663e-06, "loss": 0.2381, "step": 28435 }, { "epoch": 0.6162112972071154, "grad_norm": 0.9354986548423767, "learning_rate": 6.4296798319249335e-06, "loss": 0.1593, "step": 28440 }, { "epoch": 0.6163196325265963, "grad_norm": 1.6515793800354004, "learning_rate": 6.42650089870731e-06, "loss": 0.191, "step": 28445 }, { "epoch": 0.6164279678460772, "grad_norm": 1.80220365524292, "learning_rate": 6.423322379426023e-06, "loss": 0.2158, "step": 28450 }, { "epoch": 0.6165363031655581, "grad_norm": 2.2085912227630615, "learning_rate": 6.420144274449264e-06, "loss": 0.2239, "step": 28455 }, { "epoch": 0.6166446384850389, "grad_norm": 2.117821216583252, "learning_rate": 6.416966584145163e-06, "loss": 0.1798, "step": 28460 }, { "epoch": 0.6167529738045198, "grad_norm": 2.2894182205200195, "learning_rate": 6.413789308881812e-06, "loss": 0.2343, "step": 28465 }, { "epoch": 0.6168613091240006, "grad_norm": 0.7464118003845215, "learning_rate": 6.410612449027249e-06, "loss": 0.2369, "step": 28470 }, { "epoch": 0.6169696444434815, "grad_norm": 1.5582941770553589, "learning_rate": 6.407436004949463e-06, "loss": 0.1631, "step": 28475 }, { "epoch": 0.6170779797629623, "grad_norm": 0.9184187054634094, "learning_rate": 6.404259977016398e-06, "loss": 0.1809, "step": 28480 }, { "epoch": 0.6171863150824431, "grad_norm": 1.8722445964813232, "learning_rate": 6.401084365595952e-06, "loss": 0.2318, "step": 28485 }, { "epoch": 0.617294650401924, "grad_norm": 1.1063392162322998, "learning_rate": 6.397909171055969e-06, "loss": 0.1856, "step": 28490 }, { "epoch": 0.6174029857214048, "grad_norm": 1.4576095342636108, "learning_rate": 6.39473439376425e-06, "loss": 0.1887, "step": 28495 }, { "epoch": 0.6175113210408858, "grad_norm": 0.9869041442871094, "learning_rate": 6.391560034088542e-06, "loss": 0.2036, "step": 28500 }, { "epoch": 0.6176196563603666, "grad_norm": 1.6505151987075806, "learning_rate": 6.388386092396554e-06, "loss": 0.1613, "step": 28505 }, { "epoch": 0.6177279916798475, "grad_norm": 0.8136293292045593, "learning_rate": 6.385212569055934e-06, "loss": 0.1392, "step": 28510 }, { "epoch": 0.6178363269993283, "grad_norm": 1.4170540571212769, "learning_rate": 6.382039464434287e-06, "loss": 0.1884, "step": 28515 }, { "epoch": 0.6179446623188092, "grad_norm": 1.613797664642334, "learning_rate": 6.378866778899173e-06, "loss": 0.1874, "step": 28520 }, { "epoch": 0.61805299763829, "grad_norm": 1.3091692924499512, "learning_rate": 6.375694512818097e-06, "loss": 0.1691, "step": 28525 }, { "epoch": 0.6181613329577709, "grad_norm": 2.297896385192871, "learning_rate": 6.372522666558519e-06, "loss": 0.275, "step": 28530 }, { "epoch": 0.6182696682772517, "grad_norm": 1.6574896574020386, "learning_rate": 6.369351240487851e-06, "loss": 0.2348, "step": 28535 }, { "epoch": 0.6183780035967326, "grad_norm": 1.5327095985412598, "learning_rate": 6.366180234973456e-06, "loss": 0.193, "step": 28540 }, { "epoch": 0.6184863389162135, "grad_norm": 1.7975053787231445, "learning_rate": 6.363009650382642e-06, "loss": 0.127, "step": 28545 }, { "epoch": 0.6185946742356944, "grad_norm": 1.304384469985962, "learning_rate": 6.359839487082682e-06, "loss": 0.1861, "step": 28550 }, { "epoch": 0.6187030095551752, "grad_norm": 1.3849010467529297, "learning_rate": 6.356669745440785e-06, "loss": 0.144, "step": 28555 }, { "epoch": 0.618811344874656, "grad_norm": 1.9361332654953003, "learning_rate": 6.353500425824124e-06, "loss": 0.1493, "step": 28560 }, { "epoch": 0.6189196801941369, "grad_norm": 1.4468334913253784, "learning_rate": 6.350331528599811e-06, "loss": 0.1418, "step": 28565 }, { "epoch": 0.6190280155136177, "grad_norm": 1.2724623680114746, "learning_rate": 6.347163054134921e-06, "loss": 0.1978, "step": 28570 }, { "epoch": 0.6191363508330986, "grad_norm": 2.580726146697998, "learning_rate": 6.34399500279647e-06, "loss": 0.1654, "step": 28575 }, { "epoch": 0.6192446861525794, "grad_norm": 1.31072199344635, "learning_rate": 6.340827374951433e-06, "loss": 0.0958, "step": 28580 }, { "epoch": 0.6193530214720603, "grad_norm": 1.7135854959487915, "learning_rate": 6.3376601709667285e-06, "loss": 0.1405, "step": 28585 }, { "epoch": 0.6194613567915411, "grad_norm": 1.4548667669296265, "learning_rate": 6.334493391209231e-06, "loss": 0.1897, "step": 28590 }, { "epoch": 0.6195696921110221, "grad_norm": 1.5346161127090454, "learning_rate": 6.3313270360457644e-06, "loss": 0.1815, "step": 28595 }, { "epoch": 0.6196780274305029, "grad_norm": 1.0376005172729492, "learning_rate": 6.328161105843105e-06, "loss": 0.109, "step": 28600 }, { "epoch": 0.6197863627499838, "grad_norm": 2.0512948036193848, "learning_rate": 6.324995600967978e-06, "loss": 0.2667, "step": 28605 }, { "epoch": 0.6198946980694646, "grad_norm": 1.0252330303192139, "learning_rate": 6.321830521787058e-06, "loss": 0.206, "step": 28610 }, { "epoch": 0.6200030333889455, "grad_norm": 1.651466965675354, "learning_rate": 6.318665868666974e-06, "loss": 0.1399, "step": 28615 }, { "epoch": 0.6201113687084263, "grad_norm": 2.3500561714172363, "learning_rate": 6.3155016419743025e-06, "loss": 0.2392, "step": 28620 }, { "epoch": 0.6202197040279072, "grad_norm": 1.2757502794265747, "learning_rate": 6.312337842075571e-06, "loss": 0.1914, "step": 28625 }, { "epoch": 0.620328039347388, "grad_norm": 3.0246400833129883, "learning_rate": 6.309174469337261e-06, "loss": 0.1589, "step": 28630 }, { "epoch": 0.6204363746668689, "grad_norm": 1.6636914014816284, "learning_rate": 6.306011524125799e-06, "loss": 0.1261, "step": 28635 }, { "epoch": 0.6205447099863497, "grad_norm": 1.3465626239776611, "learning_rate": 6.302849006807569e-06, "loss": 0.1422, "step": 28640 }, { "epoch": 0.6206530453058307, "grad_norm": 1.5510444641113281, "learning_rate": 6.2996869177488905e-06, "loss": 0.1369, "step": 28645 }, { "epoch": 0.6207613806253115, "grad_norm": 1.507808804512024, "learning_rate": 6.296525257316057e-06, "loss": 0.1132, "step": 28650 }, { "epoch": 0.6208697159447923, "grad_norm": 1.7831127643585205, "learning_rate": 6.293364025875295e-06, "loss": 0.1373, "step": 28655 }, { "epoch": 0.6209780512642732, "grad_norm": 1.4793833494186401, "learning_rate": 6.290203223792785e-06, "loss": 0.1874, "step": 28660 }, { "epoch": 0.621086386583754, "grad_norm": 1.994454264640808, "learning_rate": 6.287042851434657e-06, "loss": 0.2206, "step": 28665 }, { "epoch": 0.6211947219032349, "grad_norm": 1.4613134860992432, "learning_rate": 6.283882909166996e-06, "loss": 0.1205, "step": 28670 }, { "epoch": 0.6213030572227157, "grad_norm": 1.9978312253952026, "learning_rate": 6.280723397355834e-06, "loss": 0.1789, "step": 28675 }, { "epoch": 0.6214113925421966, "grad_norm": 1.725951075553894, "learning_rate": 6.277564316367151e-06, "loss": 0.2065, "step": 28680 }, { "epoch": 0.6215197278616774, "grad_norm": 1.3922927379608154, "learning_rate": 6.27440566656688e-06, "loss": 0.1772, "step": 28685 }, { "epoch": 0.6216280631811584, "grad_norm": 1.6750702857971191, "learning_rate": 6.271247448320897e-06, "loss": 0.1967, "step": 28690 }, { "epoch": 0.6217363985006392, "grad_norm": 1.7476600408554077, "learning_rate": 6.268089661995046e-06, "loss": 0.1924, "step": 28695 }, { "epoch": 0.6218447338201201, "grad_norm": 2.290681838989258, "learning_rate": 6.264932307955103e-06, "loss": 0.1553, "step": 28700 }, { "epoch": 0.6219530691396009, "grad_norm": 0.8397970199584961, "learning_rate": 6.261775386566802e-06, "loss": 0.1305, "step": 28705 }, { "epoch": 0.6220614044590818, "grad_norm": 1.3786271810531616, "learning_rate": 6.258618898195824e-06, "loss": 0.173, "step": 28710 }, { "epoch": 0.6221697397785626, "grad_norm": 0.9098799824714661, "learning_rate": 6.2554628432078006e-06, "loss": 0.233, "step": 28715 }, { "epoch": 0.6222780750980434, "grad_norm": 1.5984183549880981, "learning_rate": 6.252307221968312e-06, "loss": 0.1393, "step": 28720 }, { "epoch": 0.6223864104175243, "grad_norm": 1.7682552337646484, "learning_rate": 6.2491520348428915e-06, "loss": 0.2317, "step": 28725 }, { "epoch": 0.6224947457370051, "grad_norm": 1.85713791847229, "learning_rate": 6.245997282197021e-06, "loss": 0.1393, "step": 28730 }, { "epoch": 0.622603081056486, "grad_norm": 0.9871297478675842, "learning_rate": 6.2428429643961295e-06, "loss": 0.1625, "step": 28735 }, { "epoch": 0.6227114163759669, "grad_norm": 1.2156445980072021, "learning_rate": 6.2396890818055955e-06, "loss": 0.1171, "step": 28740 }, { "epoch": 0.6228197516954478, "grad_norm": 0.9458218812942505, "learning_rate": 6.236535634790757e-06, "loss": 0.2755, "step": 28745 }, { "epoch": 0.6229280870149286, "grad_norm": 1.8319640159606934, "learning_rate": 6.233382623716886e-06, "loss": 0.1727, "step": 28750 }, { "epoch": 0.6230364223344095, "grad_norm": 1.53866708278656, "learning_rate": 6.2302300489492154e-06, "loss": 0.2093, "step": 28755 }, { "epoch": 0.6231447576538903, "grad_norm": 1.9216009378433228, "learning_rate": 6.227077910852921e-06, "loss": 0.2646, "step": 28760 }, { "epoch": 0.6232530929733712, "grad_norm": 1.5539867877960205, "learning_rate": 6.223926209793134e-06, "loss": 0.2257, "step": 28765 }, { "epoch": 0.623361428292852, "grad_norm": 1.3003740310668945, "learning_rate": 6.220774946134928e-06, "loss": 0.1539, "step": 28770 }, { "epoch": 0.6234697636123329, "grad_norm": 1.1760492324829102, "learning_rate": 6.217624120243334e-06, "loss": 0.1169, "step": 28775 }, { "epoch": 0.6235780989318137, "grad_norm": 1.7264872789382935, "learning_rate": 6.214473732483324e-06, "loss": 0.0902, "step": 28780 }, { "epoch": 0.6236864342512946, "grad_norm": 1.1833887100219727, "learning_rate": 6.211323783219826e-06, "loss": 0.115, "step": 28785 }, { "epoch": 0.6237947695707755, "grad_norm": 1.5675561428070068, "learning_rate": 6.208174272817711e-06, "loss": 0.2235, "step": 28790 }, { "epoch": 0.6239031048902564, "grad_norm": 2.0864477157592773, "learning_rate": 6.205025201641806e-06, "loss": 0.1317, "step": 28795 }, { "epoch": 0.6240114402097372, "grad_norm": 1.2066742181777954, "learning_rate": 6.2018765700568836e-06, "loss": 0.2439, "step": 28800 }, { "epoch": 0.624119775529218, "grad_norm": 1.7923930883407593, "learning_rate": 6.198728378427665e-06, "loss": 0.1454, "step": 28805 }, { "epoch": 0.6242281108486989, "grad_norm": 1.7269619703292847, "learning_rate": 6.19558062711882e-06, "loss": 0.1484, "step": 28810 }, { "epoch": 0.6243364461681797, "grad_norm": 1.4918510913848877, "learning_rate": 6.19243331649497e-06, "loss": 0.2136, "step": 28815 }, { "epoch": 0.6244447814876606, "grad_norm": 1.626234769821167, "learning_rate": 6.189286446920681e-06, "loss": 0.1183, "step": 28820 }, { "epoch": 0.6245531168071414, "grad_norm": 1.8640943765640259, "learning_rate": 6.186140018760475e-06, "loss": 0.1472, "step": 28825 }, { "epoch": 0.6246614521266223, "grad_norm": 1.1438379287719727, "learning_rate": 6.1829940323788155e-06, "loss": 0.2213, "step": 28830 }, { "epoch": 0.6247697874461032, "grad_norm": 1.4786195755004883, "learning_rate": 6.179848488140116e-06, "loss": 0.1265, "step": 28835 }, { "epoch": 0.6248781227655841, "grad_norm": 1.7694851160049438, "learning_rate": 6.176703386408744e-06, "loss": 0.2169, "step": 28840 }, { "epoch": 0.6249864580850649, "grad_norm": 1.4732815027236938, "learning_rate": 6.173558727549014e-06, "loss": 0.2108, "step": 28845 }, { "epoch": 0.6250947934045458, "grad_norm": 1.38979971408844, "learning_rate": 6.1704145119251845e-06, "loss": 0.2349, "step": 28850 }, { "epoch": 0.6252031287240266, "grad_norm": 1.7754398584365845, "learning_rate": 6.167270739901468e-06, "loss": 0.1875, "step": 28855 }, { "epoch": 0.6253114640435075, "grad_norm": 1.1087700128555298, "learning_rate": 6.164127411842021e-06, "loss": 0.1793, "step": 28860 }, { "epoch": 0.6254197993629883, "grad_norm": 1.0438055992126465, "learning_rate": 6.16098452811095e-06, "loss": 0.1764, "step": 28865 }, { "epoch": 0.6255281346824692, "grad_norm": 2.063706398010254, "learning_rate": 6.157842089072315e-06, "loss": 0.232, "step": 28870 }, { "epoch": 0.62563647000195, "grad_norm": 1.3816196918487549, "learning_rate": 6.154700095090118e-06, "loss": 0.1868, "step": 28875 }, { "epoch": 0.6257448053214308, "grad_norm": 2.276247262954712, "learning_rate": 6.1515585465283155e-06, "loss": 0.2664, "step": 28880 }, { "epoch": 0.6258531406409118, "grad_norm": 1.54007089138031, "learning_rate": 6.148417443750798e-06, "loss": 0.1773, "step": 28885 }, { "epoch": 0.6259614759603926, "grad_norm": 1.4752815961837769, "learning_rate": 6.145276787121428e-06, "loss": 0.2781, "step": 28890 }, { "epoch": 0.6260698112798735, "grad_norm": 1.854849100112915, "learning_rate": 6.142136577003997e-06, "loss": 0.1745, "step": 28895 }, { "epoch": 0.6261781465993543, "grad_norm": 1.905091643333435, "learning_rate": 6.1389968137622545e-06, "loss": 0.1517, "step": 28900 }, { "epoch": 0.6262864819188352, "grad_norm": 1.031184196472168, "learning_rate": 6.135857497759893e-06, "loss": 0.2416, "step": 28905 }, { "epoch": 0.626394817238316, "grad_norm": 1.653691053390503, "learning_rate": 6.132718629360554e-06, "loss": 0.1292, "step": 28910 }, { "epoch": 0.6265031525577969, "grad_norm": 1.8767961263656616, "learning_rate": 6.129580208927835e-06, "loss": 0.2301, "step": 28915 }, { "epoch": 0.6266114878772777, "grad_norm": 1.7031733989715576, "learning_rate": 6.126442236825265e-06, "loss": 0.1653, "step": 28920 }, { "epoch": 0.6267198231967586, "grad_norm": 0.7530261278152466, "learning_rate": 6.123304713416338e-06, "loss": 0.1565, "step": 28925 }, { "epoch": 0.6268281585162395, "grad_norm": 1.5048444271087646, "learning_rate": 6.1201676390644815e-06, "loss": 0.1817, "step": 28930 }, { "epoch": 0.6269364938357204, "grad_norm": 1.2653576135635376, "learning_rate": 6.117031014133088e-06, "loss": 0.1486, "step": 28935 }, { "epoch": 0.6270448291552012, "grad_norm": 1.5119503736495972, "learning_rate": 6.113894838985486e-06, "loss": 0.1583, "step": 28940 }, { "epoch": 0.6271531644746821, "grad_norm": 1.8382772207260132, "learning_rate": 6.110759113984952e-06, "loss": 0.176, "step": 28945 }, { "epoch": 0.6272614997941629, "grad_norm": 1.172737717628479, "learning_rate": 6.107623839494716e-06, "loss": 0.1292, "step": 28950 }, { "epoch": 0.6273698351136437, "grad_norm": 2.1324501037597656, "learning_rate": 6.104489015877949e-06, "loss": 0.1722, "step": 28955 }, { "epoch": 0.6274781704331246, "grad_norm": 2.0591933727264404, "learning_rate": 6.101354643497775e-06, "loss": 0.2101, "step": 28960 }, { "epoch": 0.6275865057526054, "grad_norm": 1.522961139678955, "learning_rate": 6.098220722717263e-06, "loss": 0.1232, "step": 28965 }, { "epoch": 0.6276948410720863, "grad_norm": 1.0364680290222168, "learning_rate": 6.095087253899432e-06, "loss": 0.1979, "step": 28970 }, { "epoch": 0.6278031763915671, "grad_norm": 1.1444462537765503, "learning_rate": 6.091954237407247e-06, "loss": 0.2766, "step": 28975 }, { "epoch": 0.6279115117110481, "grad_norm": 1.4278292655944824, "learning_rate": 6.088821673603616e-06, "loss": 0.1008, "step": 28980 }, { "epoch": 0.6280198470305289, "grad_norm": 1.835537314414978, "learning_rate": 6.0856895628514105e-06, "loss": 0.1838, "step": 28985 }, { "epoch": 0.6281281823500098, "grad_norm": 1.2963950634002686, "learning_rate": 6.082557905513433e-06, "loss": 0.2621, "step": 28990 }, { "epoch": 0.6282365176694906, "grad_norm": 1.813263177871704, "learning_rate": 6.079426701952438e-06, "loss": 0.1732, "step": 28995 }, { "epoch": 0.6283448529889715, "grad_norm": 1.0586929321289062, "learning_rate": 6.07629595253113e-06, "loss": 0.0947, "step": 29000 }, { "epoch": 0.6284531883084523, "grad_norm": 1.5411187410354614, "learning_rate": 6.0731656576121575e-06, "loss": 0.1388, "step": 29005 }, { "epoch": 0.6285615236279332, "grad_norm": 2.413567543029785, "learning_rate": 6.0700358175581195e-06, "loss": 0.1673, "step": 29010 }, { "epoch": 0.628669858947414, "grad_norm": 1.666019320487976, "learning_rate": 6.066906432731563e-06, "loss": 0.1102, "step": 29015 }, { "epoch": 0.6287781942668949, "grad_norm": 1.9114372730255127, "learning_rate": 6.063777503494976e-06, "loss": 0.1864, "step": 29020 }, { "epoch": 0.6288865295863757, "grad_norm": 1.6920219659805298, "learning_rate": 6.060649030210803e-06, "loss": 0.206, "step": 29025 }, { "epoch": 0.6289948649058567, "grad_norm": 0.8555421829223633, "learning_rate": 6.0575210132414255e-06, "loss": 0.2324, "step": 29030 }, { "epoch": 0.6291032002253375, "grad_norm": 1.5864219665527344, "learning_rate": 6.054393452949181e-06, "loss": 0.1438, "step": 29035 }, { "epoch": 0.6292115355448183, "grad_norm": 1.5715820789337158, "learning_rate": 6.051266349696352e-06, "loss": 0.1797, "step": 29040 }, { "epoch": 0.6293198708642992, "grad_norm": 1.4900308847427368, "learning_rate": 6.048139703845161e-06, "loss": 0.1244, "step": 29045 }, { "epoch": 0.62942820618378, "grad_norm": 0.9089646339416504, "learning_rate": 6.045013515757789e-06, "loss": 0.1626, "step": 29050 }, { "epoch": 0.6295365415032609, "grad_norm": 1.5854030847549438, "learning_rate": 6.041887785796356e-06, "loss": 0.1594, "step": 29055 }, { "epoch": 0.6296448768227417, "grad_norm": 0.4595452547073364, "learning_rate": 6.03876251432293e-06, "loss": 0.1477, "step": 29060 }, { "epoch": 0.6297532121422226, "grad_norm": 2.316542625427246, "learning_rate": 6.035637701699527e-06, "loss": 0.1305, "step": 29065 }, { "epoch": 0.6298615474617034, "grad_norm": 0.8234489560127258, "learning_rate": 6.032513348288111e-06, "loss": 0.0904, "step": 29070 }, { "epoch": 0.6299698827811844, "grad_norm": 1.9957609176635742, "learning_rate": 6.029389454450587e-06, "loss": 0.1185, "step": 29075 }, { "epoch": 0.6300782181006652, "grad_norm": 0.7128311395645142, "learning_rate": 6.026266020548817e-06, "loss": 0.1052, "step": 29080 }, { "epoch": 0.6301865534201461, "grad_norm": 1.7855620384216309, "learning_rate": 6.023143046944604e-06, "loss": 0.1506, "step": 29085 }, { "epoch": 0.6302948887396269, "grad_norm": 2.1106250286102295, "learning_rate": 6.020020533999694e-06, "loss": 0.198, "step": 29090 }, { "epoch": 0.6304032240591078, "grad_norm": 2.2823424339294434, "learning_rate": 6.016898482075786e-06, "loss": 0.2027, "step": 29095 }, { "epoch": 0.6305115593785886, "grad_norm": 0.8557913899421692, "learning_rate": 6.013776891534522e-06, "loss": 0.1531, "step": 29100 }, { "epoch": 0.6306198946980694, "grad_norm": 1.5952662229537964, "learning_rate": 6.01065576273749e-06, "loss": 0.1688, "step": 29105 }, { "epoch": 0.6307282300175503, "grad_norm": 0.8553566336631775, "learning_rate": 6.0075350960462284e-06, "loss": 0.156, "step": 29110 }, { "epoch": 0.6308365653370311, "grad_norm": 1.8649826049804688, "learning_rate": 6.004414891822219e-06, "loss": 0.2352, "step": 29115 }, { "epoch": 0.630944900656512, "grad_norm": 2.2562742233276367, "learning_rate": 6.001295150426891e-06, "loss": 0.1633, "step": 29120 }, { "epoch": 0.6310532359759929, "grad_norm": 1.766412615776062, "learning_rate": 5.998175872221616e-06, "loss": 0.181, "step": 29125 }, { "epoch": 0.6311615712954738, "grad_norm": 2.1244616508483887, "learning_rate": 5.995057057567721e-06, "loss": 0.1864, "step": 29130 }, { "epoch": 0.6312699066149546, "grad_norm": 2.052494764328003, "learning_rate": 5.9919387068264735e-06, "loss": 0.2588, "step": 29135 }, { "epoch": 0.6313782419344355, "grad_norm": 1.2782057523727417, "learning_rate": 5.988820820359085e-06, "loss": 0.1191, "step": 29140 }, { "epoch": 0.6314865772539163, "grad_norm": 1.0520870685577393, "learning_rate": 5.985703398526716e-06, "loss": 0.1512, "step": 29145 }, { "epoch": 0.6315949125733972, "grad_norm": 1.3563135862350464, "learning_rate": 5.982586441690475e-06, "loss": 0.1577, "step": 29150 }, { "epoch": 0.631703247892878, "grad_norm": 0.7075536251068115, "learning_rate": 5.979469950211414e-06, "loss": 0.1925, "step": 29155 }, { "epoch": 0.6318115832123589, "grad_norm": 1.512600302696228, "learning_rate": 5.9763539244505345e-06, "loss": 0.1094, "step": 29160 }, { "epoch": 0.6319199185318397, "grad_norm": 1.6130175590515137, "learning_rate": 5.973238364768776e-06, "loss": 0.1851, "step": 29165 }, { "epoch": 0.6320282538513206, "grad_norm": 1.4594404697418213, "learning_rate": 5.970123271527031e-06, "loss": 0.2075, "step": 29170 }, { "epoch": 0.6321365891708015, "grad_norm": 1.3549127578735352, "learning_rate": 5.9670086450861345e-06, "loss": 0.1878, "step": 29175 }, { "epoch": 0.6322449244902824, "grad_norm": 1.2630616426467896, "learning_rate": 5.963894485806876e-06, "loss": 0.1561, "step": 29180 }, { "epoch": 0.6323532598097632, "grad_norm": 1.9679062366485596, "learning_rate": 5.960780794049981e-06, "loss": 0.1958, "step": 29185 }, { "epoch": 0.632461595129244, "grad_norm": 1.6684678792953491, "learning_rate": 5.957667570176122e-06, "loss": 0.2077, "step": 29190 }, { "epoch": 0.6325699304487249, "grad_norm": 1.6542413234710693, "learning_rate": 5.954554814545925e-06, "loss": 0.2094, "step": 29195 }, { "epoch": 0.6326782657682057, "grad_norm": 1.2694447040557861, "learning_rate": 5.951442527519949e-06, "loss": 0.1064, "step": 29200 }, { "epoch": 0.6327866010876866, "grad_norm": 1.1522942781448364, "learning_rate": 5.948330709458709e-06, "loss": 0.0901, "step": 29205 }, { "epoch": 0.6328949364071674, "grad_norm": 0.9831975698471069, "learning_rate": 5.945219360722662e-06, "loss": 0.1515, "step": 29210 }, { "epoch": 0.6330032717266483, "grad_norm": 1.1941781044006348, "learning_rate": 5.94210848167221e-06, "loss": 0.1374, "step": 29215 }, { "epoch": 0.6331116070461292, "grad_norm": 2.164435386657715, "learning_rate": 5.938998072667701e-06, "loss": 0.2424, "step": 29220 }, { "epoch": 0.6332199423656101, "grad_norm": 1.7550106048583984, "learning_rate": 5.935888134069436e-06, "loss": 0.2474, "step": 29225 }, { "epoch": 0.6333282776850909, "grad_norm": 0.9637218117713928, "learning_rate": 5.932778666237651e-06, "loss": 0.1396, "step": 29230 }, { "epoch": 0.6334366130045718, "grad_norm": 0.972011923789978, "learning_rate": 5.92966966953253e-06, "loss": 0.2538, "step": 29235 }, { "epoch": 0.6335449483240526, "grad_norm": 1.0690603256225586, "learning_rate": 5.9265611443142024e-06, "loss": 0.1204, "step": 29240 }, { "epoch": 0.6336532836435335, "grad_norm": 1.6424556970596313, "learning_rate": 5.923453090942747e-06, "loss": 0.2537, "step": 29245 }, { "epoch": 0.6337616189630143, "grad_norm": 1.4066675901412964, "learning_rate": 5.920345509778183e-06, "loss": 0.2463, "step": 29250 }, { "epoch": 0.6338699542824952, "grad_norm": 1.4341689348220825, "learning_rate": 5.91723840118048e-06, "loss": 0.1982, "step": 29255 }, { "epoch": 0.633978289601976, "grad_norm": 1.447320818901062, "learning_rate": 5.9141317655095485e-06, "loss": 0.1277, "step": 29260 }, { "epoch": 0.6340866249214568, "grad_norm": 1.9675545692443848, "learning_rate": 5.911025603125245e-06, "loss": 0.2443, "step": 29265 }, { "epoch": 0.6341949602409378, "grad_norm": 2.313359260559082, "learning_rate": 5.907919914387371e-06, "loss": 0.1166, "step": 29270 }, { "epoch": 0.6343032955604186, "grad_norm": 2.7407467365264893, "learning_rate": 5.904814699655678e-06, "loss": 0.1886, "step": 29275 }, { "epoch": 0.6344116308798995, "grad_norm": 2.5034217834472656, "learning_rate": 5.9017099592898555e-06, "loss": 0.1383, "step": 29280 }, { "epoch": 0.6345199661993803, "grad_norm": 0.8630160093307495, "learning_rate": 5.898605693649542e-06, "loss": 0.1663, "step": 29285 }, { "epoch": 0.6346283015188612, "grad_norm": 1.8350995779037476, "learning_rate": 5.895501903094319e-06, "loss": 0.181, "step": 29290 }, { "epoch": 0.634736636838342, "grad_norm": 2.018502712249756, "learning_rate": 5.892398587983717e-06, "loss": 0.1849, "step": 29295 }, { "epoch": 0.6348449721578229, "grad_norm": 3.1474449634552, "learning_rate": 5.889295748677206e-06, "loss": 0.1554, "step": 29300 }, { "epoch": 0.6349533074773037, "grad_norm": 1.2424720525741577, "learning_rate": 5.886193385534204e-06, "loss": 0.17, "step": 29305 }, { "epoch": 0.6350616427967846, "grad_norm": 1.0629000663757324, "learning_rate": 5.883091498914073e-06, "loss": 0.1591, "step": 29310 }, { "epoch": 0.6351699781162654, "grad_norm": 2.0964643955230713, "learning_rate": 5.8799900891761206e-06, "loss": 0.1225, "step": 29315 }, { "epoch": 0.6352783134357464, "grad_norm": 2.0215694904327393, "learning_rate": 5.876889156679597e-06, "loss": 0.2303, "step": 29320 }, { "epoch": 0.6353866487552272, "grad_norm": 2.4310691356658936, "learning_rate": 5.873788701783703e-06, "loss": 0.097, "step": 29325 }, { "epoch": 0.6354949840747081, "grad_norm": 2.348125696182251, "learning_rate": 5.8706887248475765e-06, "loss": 0.1683, "step": 29330 }, { "epoch": 0.6356033193941889, "grad_norm": 2.4731009006500244, "learning_rate": 5.867589226230305e-06, "loss": 0.168, "step": 29335 }, { "epoch": 0.6357116547136697, "grad_norm": 2.7514021396636963, "learning_rate": 5.864490206290917e-06, "loss": 0.1679, "step": 29340 }, { "epoch": 0.6358199900331506, "grad_norm": 2.6417815685272217, "learning_rate": 5.861391665388389e-06, "loss": 0.22, "step": 29345 }, { "epoch": 0.6359283253526314, "grad_norm": 1.7304799556732178, "learning_rate": 5.858293603881641e-06, "loss": 0.23, "step": 29350 }, { "epoch": 0.6360366606721123, "grad_norm": 1.7461652755737305, "learning_rate": 5.855196022129535e-06, "loss": 0.1682, "step": 29355 }, { "epoch": 0.6361449959915931, "grad_norm": 1.119220495223999, "learning_rate": 5.852098920490881e-06, "loss": 0.1661, "step": 29360 }, { "epoch": 0.6362533313110741, "grad_norm": 1.4469932317733765, "learning_rate": 5.84900229932443e-06, "loss": 0.1823, "step": 29365 }, { "epoch": 0.6363616666305549, "grad_norm": 1.3290314674377441, "learning_rate": 5.845906158988883e-06, "loss": 0.21, "step": 29370 }, { "epoch": 0.6364700019500358, "grad_norm": 1.4489362239837646, "learning_rate": 5.842810499842878e-06, "loss": 0.2035, "step": 29375 }, { "epoch": 0.6365783372695166, "grad_norm": 1.3629071712493896, "learning_rate": 5.839715322245003e-06, "loss": 0.1349, "step": 29380 }, { "epoch": 0.6366866725889975, "grad_norm": 1.6062484979629517, "learning_rate": 5.836620626553786e-06, "loss": 0.1868, "step": 29385 }, { "epoch": 0.6367950079084783, "grad_norm": 1.046340823173523, "learning_rate": 5.833526413127704e-06, "loss": 0.1452, "step": 29390 }, { "epoch": 0.6369033432279592, "grad_norm": 1.5314905643463135, "learning_rate": 5.83043268232517e-06, "loss": 0.198, "step": 29395 }, { "epoch": 0.63701167854744, "grad_norm": 1.3918513059616089, "learning_rate": 5.827339434504553e-06, "loss": 0.2186, "step": 29400 }, { "epoch": 0.6371200138669209, "grad_norm": 1.851743221282959, "learning_rate": 5.824246670024152e-06, "loss": 0.1893, "step": 29405 }, { "epoch": 0.6372283491864017, "grad_norm": 1.531853437423706, "learning_rate": 5.821154389242226e-06, "loss": 0.1813, "step": 29410 }, { "epoch": 0.6373366845058827, "grad_norm": 1.0077738761901855, "learning_rate": 5.818062592516961e-06, "loss": 0.1367, "step": 29415 }, { "epoch": 0.6374450198253635, "grad_norm": 1.3014050722122192, "learning_rate": 5.814971280206498e-06, "loss": 0.1246, "step": 29420 }, { "epoch": 0.6375533551448443, "grad_norm": 1.1741366386413574, "learning_rate": 5.8118804526689265e-06, "loss": 0.1086, "step": 29425 }, { "epoch": 0.6376616904643252, "grad_norm": 1.6001360416412354, "learning_rate": 5.808790110262261e-06, "loss": 0.1303, "step": 29430 }, { "epoch": 0.637770025783806, "grad_norm": 1.3968733549118042, "learning_rate": 5.805700253344482e-06, "loss": 0.1413, "step": 29435 }, { "epoch": 0.6378783611032869, "grad_norm": 1.6477569341659546, "learning_rate": 5.802610882273494e-06, "loss": 0.1701, "step": 29440 }, { "epoch": 0.6379866964227677, "grad_norm": 2.192568063735962, "learning_rate": 5.7995219974071605e-06, "loss": 0.211, "step": 29445 }, { "epoch": 0.6380950317422486, "grad_norm": 1.5706554651260376, "learning_rate": 5.796433599103278e-06, "loss": 0.2081, "step": 29450 }, { "epoch": 0.6382033670617294, "grad_norm": 1.3077853918075562, "learning_rate": 5.793345687719598e-06, "loss": 0.1331, "step": 29455 }, { "epoch": 0.6383117023812104, "grad_norm": 1.8871803283691406, "learning_rate": 5.790258263613799e-06, "loss": 0.2124, "step": 29460 }, { "epoch": 0.6384200377006912, "grad_norm": 1.2488141059875488, "learning_rate": 5.787171327143519e-06, "loss": 0.2006, "step": 29465 }, { "epoch": 0.6385283730201721, "grad_norm": 1.3138248920440674, "learning_rate": 5.784084878666337e-06, "loss": 0.1606, "step": 29470 }, { "epoch": 0.6386367083396529, "grad_norm": 1.192319393157959, "learning_rate": 5.780998918539763e-06, "loss": 0.2599, "step": 29475 }, { "epoch": 0.6387450436591338, "grad_norm": 1.8011302947998047, "learning_rate": 5.777913447121268e-06, "loss": 0.1417, "step": 29480 }, { "epoch": 0.6388533789786146, "grad_norm": 1.425645351409912, "learning_rate": 5.77482846476825e-06, "loss": 0.1986, "step": 29485 }, { "epoch": 0.6389617142980955, "grad_norm": 1.294122576713562, "learning_rate": 5.771743971838063e-06, "loss": 0.146, "step": 29490 }, { "epoch": 0.6390700496175763, "grad_norm": 1.4321900606155396, "learning_rate": 5.768659968687995e-06, "loss": 0.1776, "step": 29495 }, { "epoch": 0.6391783849370571, "grad_norm": 1.373246669769287, "learning_rate": 5.765576455675288e-06, "loss": 0.1743, "step": 29500 }, { "epoch": 0.639286720256538, "grad_norm": 2.3993310928344727, "learning_rate": 5.762493433157117e-06, "loss": 0.1458, "step": 29505 }, { "epoch": 0.639395055576019, "grad_norm": 1.7877488136291504, "learning_rate": 5.759410901490596e-06, "loss": 0.1949, "step": 29510 }, { "epoch": 0.6395033908954998, "grad_norm": 1.00574791431427, "learning_rate": 5.7563288610328046e-06, "loss": 0.186, "step": 29515 }, { "epoch": 0.6396117262149806, "grad_norm": 1.2418243885040283, "learning_rate": 5.753247312140743e-06, "loss": 0.1656, "step": 29520 }, { "epoch": 0.6397200615344615, "grad_norm": 1.724853515625, "learning_rate": 5.750166255171366e-06, "loss": 0.2204, "step": 29525 }, { "epoch": 0.6398283968539423, "grad_norm": 1.6409556865692139, "learning_rate": 5.747085690481563e-06, "loss": 0.2039, "step": 29530 }, { "epoch": 0.6399367321734232, "grad_norm": 0.7576285004615784, "learning_rate": 5.744005618428178e-06, "loss": 0.1658, "step": 29535 }, { "epoch": 0.640045067492904, "grad_norm": 1.7112491130828857, "learning_rate": 5.740926039367986e-06, "loss": 0.1936, "step": 29540 }, { "epoch": 0.6401534028123849, "grad_norm": 1.2895328998565674, "learning_rate": 5.737846953657708e-06, "loss": 0.2099, "step": 29545 }, { "epoch": 0.6402617381318657, "grad_norm": 1.3592368364334106, "learning_rate": 5.734768361654019e-06, "loss": 0.2061, "step": 29550 }, { "epoch": 0.6403700734513466, "grad_norm": 1.8564897775650024, "learning_rate": 5.7316902637135165e-06, "loss": 0.213, "step": 29555 }, { "epoch": 0.6404784087708275, "grad_norm": 0.8973308205604553, "learning_rate": 5.728612660192759e-06, "loss": 0.1997, "step": 29560 }, { "epoch": 0.6405867440903084, "grad_norm": 1.45340096950531, "learning_rate": 5.725535551448238e-06, "loss": 0.1834, "step": 29565 }, { "epoch": 0.6406950794097892, "grad_norm": 1.8686039447784424, "learning_rate": 5.722458937836396e-06, "loss": 0.1632, "step": 29570 }, { "epoch": 0.64080341472927, "grad_norm": 1.976218581199646, "learning_rate": 5.719382819713608e-06, "loss": 0.3322, "step": 29575 }, { "epoch": 0.6409117500487509, "grad_norm": 1.6368168592453003, "learning_rate": 5.716307197436195e-06, "loss": 0.1704, "step": 29580 }, { "epoch": 0.6410200853682317, "grad_norm": 1.2822144031524658, "learning_rate": 5.713232071360426e-06, "loss": 0.1132, "step": 29585 }, { "epoch": 0.6411284206877126, "grad_norm": 1.2192120552062988, "learning_rate": 5.710157441842501e-06, "loss": 0.105, "step": 29590 }, { "epoch": 0.6412367560071934, "grad_norm": 1.0298413038253784, "learning_rate": 5.707083309238579e-06, "loss": 0.1348, "step": 29595 }, { "epoch": 0.6413450913266743, "grad_norm": 0.8260613679885864, "learning_rate": 5.7040096739047445e-06, "loss": 0.1068, "step": 29600 }, { "epoch": 0.6414534266461552, "grad_norm": 1.7993645668029785, "learning_rate": 5.7009365361970346e-06, "loss": 0.2259, "step": 29605 }, { "epoch": 0.6415617619656361, "grad_norm": 1.1401652097702026, "learning_rate": 5.697863896471432e-06, "loss": 0.1415, "step": 29610 }, { "epoch": 0.6416700972851169, "grad_norm": 1.8668129444122314, "learning_rate": 5.694791755083846e-06, "loss": 0.1325, "step": 29615 }, { "epoch": 0.6417784326045978, "grad_norm": 1.51896333694458, "learning_rate": 5.691720112390147e-06, "loss": 0.1936, "step": 29620 }, { "epoch": 0.6418867679240786, "grad_norm": 1.3666374683380127, "learning_rate": 5.688648968746131e-06, "loss": 0.2606, "step": 29625 }, { "epoch": 0.6419951032435595, "grad_norm": 1.870077133178711, "learning_rate": 5.685578324507552e-06, "loss": 0.1923, "step": 29630 }, { "epoch": 0.6421034385630403, "grad_norm": 1.2090893983840942, "learning_rate": 5.6825081800300915e-06, "loss": 0.1925, "step": 29635 }, { "epoch": 0.6422117738825212, "grad_norm": 1.4237785339355469, "learning_rate": 5.679438535669386e-06, "loss": 0.2189, "step": 29640 }, { "epoch": 0.642320109202002, "grad_norm": 1.9556982517242432, "learning_rate": 5.676369391780998e-06, "loss": 0.1565, "step": 29645 }, { "epoch": 0.6424284445214828, "grad_norm": 1.4726723432540894, "learning_rate": 5.673300748720454e-06, "loss": 0.2178, "step": 29650 }, { "epoch": 0.6425367798409638, "grad_norm": 1.391932725906372, "learning_rate": 5.6702326068432e-06, "loss": 0.1903, "step": 29655 }, { "epoch": 0.6426451151604446, "grad_norm": 1.172602653503418, "learning_rate": 5.667164966504638e-06, "loss": 0.1775, "step": 29660 }, { "epoch": 0.6427534504799255, "grad_norm": 0.9309897422790527, "learning_rate": 5.664097828060113e-06, "loss": 0.1938, "step": 29665 }, { "epoch": 0.6428617857994063, "grad_norm": 1.721915602684021, "learning_rate": 5.661031191864898e-06, "loss": 0.1741, "step": 29670 }, { "epoch": 0.6429701211188872, "grad_norm": 1.31454598903656, "learning_rate": 5.657965058274228e-06, "loss": 0.2275, "step": 29675 }, { "epoch": 0.643078456438368, "grad_norm": 1.9936760663986206, "learning_rate": 5.654899427643257e-06, "loss": 0.1566, "step": 29680 }, { "epoch": 0.6431867917578489, "grad_norm": 2.0553388595581055, "learning_rate": 5.651834300327101e-06, "loss": 0.121, "step": 29685 }, { "epoch": 0.6432951270773297, "grad_norm": 1.6041936874389648, "learning_rate": 5.648769676680802e-06, "loss": 0.2089, "step": 29690 }, { "epoch": 0.6434034623968106, "grad_norm": 0.7896501421928406, "learning_rate": 5.645705557059359e-06, "loss": 0.1901, "step": 29695 }, { "epoch": 0.6435117977162914, "grad_norm": 1.0369333028793335, "learning_rate": 5.6426419418176955e-06, "loss": 0.1122, "step": 29700 }, { "epoch": 0.6436201330357724, "grad_norm": 1.4203130006790161, "learning_rate": 5.63957883131069e-06, "loss": 0.2176, "step": 29705 }, { "epoch": 0.6437284683552532, "grad_norm": 1.764252781867981, "learning_rate": 5.636516225893162e-06, "loss": 0.2548, "step": 29710 }, { "epoch": 0.6438368036747341, "grad_norm": 0.8273146748542786, "learning_rate": 5.633454125919859e-06, "loss": 0.1124, "step": 29715 }, { "epoch": 0.6439451389942149, "grad_norm": 1.7700471878051758, "learning_rate": 5.630392531745491e-06, "loss": 0.2356, "step": 29720 }, { "epoch": 0.6440534743136958, "grad_norm": 2.440617799758911, "learning_rate": 5.627331443724686e-06, "loss": 0.2298, "step": 29725 }, { "epoch": 0.6441618096331766, "grad_norm": 1.2581785917282104, "learning_rate": 5.624270862212035e-06, "loss": 0.2229, "step": 29730 }, { "epoch": 0.6442701449526574, "grad_norm": 1.8937548398971558, "learning_rate": 5.621210787562053e-06, "loss": 0.1229, "step": 29735 }, { "epoch": 0.6443784802721383, "grad_norm": 1.8443468809127808, "learning_rate": 5.6181512201292084e-06, "loss": 0.158, "step": 29740 }, { "epoch": 0.6444868155916191, "grad_norm": 1.0086578130722046, "learning_rate": 5.615092160267907e-06, "loss": 0.1364, "step": 29745 }, { "epoch": 0.6445951509111001, "grad_norm": 1.7137207984924316, "learning_rate": 5.612033608332486e-06, "loss": 0.1968, "step": 29750 }, { "epoch": 0.6447034862305809, "grad_norm": 1.2898937463760376, "learning_rate": 5.608975564677245e-06, "loss": 0.1676, "step": 29755 }, { "epoch": 0.6448118215500618, "grad_norm": 1.841310739517212, "learning_rate": 5.605918029656406e-06, "loss": 0.177, "step": 29760 }, { "epoch": 0.6449201568695426, "grad_norm": 1.556734323501587, "learning_rate": 5.602861003624141e-06, "loss": 0.1736, "step": 29765 }, { "epoch": 0.6450284921890235, "grad_norm": 1.4452015161514282, "learning_rate": 5.5998044869345566e-06, "loss": 0.1591, "step": 29770 }, { "epoch": 0.6451368275085043, "grad_norm": 0.8229280710220337, "learning_rate": 5.596748479941711e-06, "loss": 0.1341, "step": 29775 }, { "epoch": 0.6452451628279852, "grad_norm": 1.020993947982788, "learning_rate": 5.593692982999596e-06, "loss": 0.1856, "step": 29780 }, { "epoch": 0.645353498147466, "grad_norm": 0.8879640698432922, "learning_rate": 5.590637996462136e-06, "loss": 0.1323, "step": 29785 }, { "epoch": 0.6454618334669469, "grad_norm": 1.4281740188598633, "learning_rate": 5.587583520683216e-06, "loss": 0.2256, "step": 29790 }, { "epoch": 0.6455701687864277, "grad_norm": 0.7874500155448914, "learning_rate": 5.584529556016645e-06, "loss": 0.1011, "step": 29795 }, { "epoch": 0.6456785041059087, "grad_norm": 1.3845891952514648, "learning_rate": 5.581476102816179e-06, "loss": 0.1609, "step": 29800 }, { "epoch": 0.6457868394253895, "grad_norm": 0.6344032287597656, "learning_rate": 5.5784231614355175e-06, "loss": 0.1675, "step": 29805 }, { "epoch": 0.6458951747448703, "grad_norm": 1.3132336139678955, "learning_rate": 5.575370732228303e-06, "loss": 0.2241, "step": 29810 }, { "epoch": 0.6460035100643512, "grad_norm": 1.3015390634536743, "learning_rate": 5.5723188155481025e-06, "loss": 0.19, "step": 29815 }, { "epoch": 0.646111845383832, "grad_norm": 1.998063564300537, "learning_rate": 5.569267411748445e-06, "loss": 0.1841, "step": 29820 }, { "epoch": 0.6462201807033129, "grad_norm": 1.5693894624710083, "learning_rate": 5.566216521182786e-06, "loss": 0.1756, "step": 29825 }, { "epoch": 0.6463285160227937, "grad_norm": 1.9154163599014282, "learning_rate": 5.563166144204519e-06, "loss": 0.1992, "step": 29830 }, { "epoch": 0.6464368513422746, "grad_norm": 1.2634947299957275, "learning_rate": 5.560116281166997e-06, "loss": 0.2234, "step": 29835 }, { "epoch": 0.6465451866617554, "grad_norm": 1.197989821434021, "learning_rate": 5.557066932423489e-06, "loss": 0.2053, "step": 29840 }, { "epoch": 0.6466535219812363, "grad_norm": 3.728541374206543, "learning_rate": 5.5540180983272265e-06, "loss": 0.2482, "step": 29845 }, { "epoch": 0.6467618573007172, "grad_norm": 1.0855013132095337, "learning_rate": 5.550969779231359e-06, "loss": 0.1969, "step": 29850 }, { "epoch": 0.6468701926201981, "grad_norm": 1.1531375646591187, "learning_rate": 5.547921975489003e-06, "loss": 0.1603, "step": 29855 }, { "epoch": 0.6469785279396789, "grad_norm": 0.7721705436706543, "learning_rate": 5.544874687453194e-06, "loss": 0.2305, "step": 29860 }, { "epoch": 0.6470868632591598, "grad_norm": 2.0240116119384766, "learning_rate": 5.541827915476911e-06, "loss": 0.1805, "step": 29865 }, { "epoch": 0.6471951985786406, "grad_norm": 1.8165525197982788, "learning_rate": 5.538781659913084e-06, "loss": 0.2345, "step": 29870 }, { "epoch": 0.6473035338981215, "grad_norm": 0.6290788650512695, "learning_rate": 5.535735921114569e-06, "loss": 0.1712, "step": 29875 }, { "epoch": 0.6474118692176023, "grad_norm": 0.7139585614204407, "learning_rate": 5.532690699434178e-06, "loss": 0.1966, "step": 29880 }, { "epoch": 0.6475202045370831, "grad_norm": 2.1556010246276855, "learning_rate": 5.529645995224644e-06, "loss": 0.2414, "step": 29885 }, { "epoch": 0.647628539856564, "grad_norm": 1.0192351341247559, "learning_rate": 5.526601808838659e-06, "loss": 0.139, "step": 29890 }, { "epoch": 0.647736875176045, "grad_norm": 1.6867867708206177, "learning_rate": 5.52355814062884e-06, "loss": 0.2412, "step": 29895 }, { "epoch": 0.6478452104955258, "grad_norm": 1.112552285194397, "learning_rate": 5.520514990947753e-06, "loss": 0.1721, "step": 29900 }, { "epoch": 0.6479535458150066, "grad_norm": 0.77604740858078, "learning_rate": 5.517472360147906e-06, "loss": 0.087, "step": 29905 }, { "epoch": 0.6480618811344875, "grad_norm": 1.1774463653564453, "learning_rate": 5.514430248581733e-06, "loss": 0.2451, "step": 29910 }, { "epoch": 0.6481702164539683, "grad_norm": 1.6611366271972656, "learning_rate": 5.511388656601626e-06, "loss": 0.152, "step": 29915 }, { "epoch": 0.6482785517734492, "grad_norm": 1.948067545890808, "learning_rate": 5.508347584559901e-06, "loss": 0.1596, "step": 29920 }, { "epoch": 0.64838688709293, "grad_norm": 2.121427059173584, "learning_rate": 5.505307032808826e-06, "loss": 0.1, "step": 29925 }, { "epoch": 0.6484952224124109, "grad_norm": 2.0308570861816406, "learning_rate": 5.502267001700596e-06, "loss": 0.2078, "step": 29930 }, { "epoch": 0.6486035577318917, "grad_norm": 1.1261132955551147, "learning_rate": 5.4992274915873645e-06, "loss": 0.1228, "step": 29935 }, { "epoch": 0.6487118930513726, "grad_norm": 2.660261631011963, "learning_rate": 5.4961885028212e-06, "loss": 0.1987, "step": 29940 }, { "epoch": 0.6488202283708535, "grad_norm": 1.1543759107589722, "learning_rate": 5.493150035754132e-06, "loss": 0.1397, "step": 29945 }, { "epoch": 0.6489285636903344, "grad_norm": 1.3161866664886475, "learning_rate": 5.490112090738124e-06, "loss": 0.265, "step": 29950 }, { "epoch": 0.6490368990098152, "grad_norm": 1.8037934303283691, "learning_rate": 5.4870746681250665e-06, "loss": 0.2021, "step": 29955 }, { "epoch": 0.649145234329296, "grad_norm": 0.5900561809539795, "learning_rate": 5.48403776826681e-06, "loss": 0.1271, "step": 29960 }, { "epoch": 0.6492535696487769, "grad_norm": 1.0175544023513794, "learning_rate": 5.481001391515125e-06, "loss": 0.0952, "step": 29965 }, { "epoch": 0.6493619049682577, "grad_norm": 1.5726375579833984, "learning_rate": 5.477965538221738e-06, "loss": 0.2186, "step": 29970 }, { "epoch": 0.6494702402877386, "grad_norm": 2.1533772945404053, "learning_rate": 5.4749302087382995e-06, "loss": 0.1998, "step": 29975 }, { "epoch": 0.6495785756072194, "grad_norm": 2.016754388809204, "learning_rate": 5.471895403416414e-06, "loss": 0.313, "step": 29980 }, { "epoch": 0.6496869109267003, "grad_norm": 1.2564353942871094, "learning_rate": 5.468861122607612e-06, "loss": 0.1919, "step": 29985 }, { "epoch": 0.6497952462461812, "grad_norm": 1.5095605850219727, "learning_rate": 5.465827366663372e-06, "loss": 0.1154, "step": 29990 }, { "epoch": 0.6499035815656621, "grad_norm": 0.9127535223960876, "learning_rate": 5.462794135935114e-06, "loss": 0.1535, "step": 29995 }, { "epoch": 0.6500119168851429, "grad_norm": 1.2283551692962646, "learning_rate": 5.459761430774185e-06, "loss": 0.1165, "step": 30000 }, { "epoch": 0.6501202522046238, "grad_norm": 2.2667911052703857, "learning_rate": 5.456729251531883e-06, "loss": 0.2267, "step": 30005 }, { "epoch": 0.6502285875241046, "grad_norm": 1.8569788932800293, "learning_rate": 5.453697598559436e-06, "loss": 0.1637, "step": 30010 }, { "epoch": 0.6503369228435855, "grad_norm": 1.5493942499160767, "learning_rate": 5.450666472208024e-06, "loss": 0.1619, "step": 30015 }, { "epoch": 0.6504452581630663, "grad_norm": 1.4551804065704346, "learning_rate": 5.447635872828747e-06, "loss": 0.1629, "step": 30020 }, { "epoch": 0.6505535934825472, "grad_norm": 2.7961461544036865, "learning_rate": 5.444605800772663e-06, "loss": 0.1812, "step": 30025 }, { "epoch": 0.650661928802028, "grad_norm": 0.9376404285430908, "learning_rate": 5.441576256390758e-06, "loss": 0.1579, "step": 30030 }, { "epoch": 0.6507702641215088, "grad_norm": 0.9763346314430237, "learning_rate": 5.438547240033954e-06, "loss": 0.2027, "step": 30035 }, { "epoch": 0.6508785994409898, "grad_norm": 1.1546908617019653, "learning_rate": 5.435518752053123e-06, "loss": 0.1763, "step": 30040 }, { "epoch": 0.6509869347604706, "grad_norm": 2.2589659690856934, "learning_rate": 5.432490792799068e-06, "loss": 0.1271, "step": 30045 }, { "epoch": 0.6510952700799515, "grad_norm": 1.75006103515625, "learning_rate": 5.429463362622537e-06, "loss": 0.109, "step": 30050 }, { "epoch": 0.6512036053994323, "grad_norm": 1.9296033382415771, "learning_rate": 5.426436461874205e-06, "loss": 0.1979, "step": 30055 }, { "epoch": 0.6513119407189132, "grad_norm": 0.6621514558792114, "learning_rate": 5.423410090904702e-06, "loss": 0.1397, "step": 30060 }, { "epoch": 0.651420276038394, "grad_norm": 2.0492589473724365, "learning_rate": 5.420384250064581e-06, "loss": 0.1786, "step": 30065 }, { "epoch": 0.6515286113578749, "grad_norm": 1.6143207550048828, "learning_rate": 5.417358939704338e-06, "loss": 0.1354, "step": 30070 }, { "epoch": 0.6516369466773557, "grad_norm": 1.4364700317382812, "learning_rate": 5.4143341601744196e-06, "loss": 0.189, "step": 30075 }, { "epoch": 0.6517452819968366, "grad_norm": 1.3838640451431274, "learning_rate": 5.411309911825189e-06, "loss": 0.1658, "step": 30080 }, { "epoch": 0.6518536173163174, "grad_norm": 1.667296051979065, "learning_rate": 5.408286195006972e-06, "loss": 0.145, "step": 30085 }, { "epoch": 0.6519619526357984, "grad_norm": 1.4591090679168701, "learning_rate": 5.405263010070007e-06, "loss": 0.1671, "step": 30090 }, { "epoch": 0.6520702879552792, "grad_norm": 2.132533073425293, "learning_rate": 5.402240357364502e-06, "loss": 0.2341, "step": 30095 }, { "epoch": 0.6521786232747601, "grad_norm": 2.034188747406006, "learning_rate": 5.399218237240577e-06, "loss": 0.1721, "step": 30100 }, { "epoch": 0.6522869585942409, "grad_norm": 1.5220673084259033, "learning_rate": 5.396196650048296e-06, "loss": 0.1239, "step": 30105 }, { "epoch": 0.6523952939137218, "grad_norm": 0.4569935202598572, "learning_rate": 5.393175596137669e-06, "loss": 0.1515, "step": 30110 }, { "epoch": 0.6525036292332026, "grad_norm": 1.435753345489502, "learning_rate": 5.390155075858638e-06, "loss": 0.1958, "step": 30115 }, { "epoch": 0.6526119645526834, "grad_norm": 1.8458229303359985, "learning_rate": 5.387135089561088e-06, "loss": 0.1703, "step": 30120 }, { "epoch": 0.6527202998721643, "grad_norm": 1.246219515800476, "learning_rate": 5.384115637594835e-06, "loss": 0.1439, "step": 30125 }, { "epoch": 0.6528286351916451, "grad_norm": 1.516432285308838, "learning_rate": 5.381096720309643e-06, "loss": 0.1296, "step": 30130 }, { "epoch": 0.6529369705111261, "grad_norm": 0.8844525814056396, "learning_rate": 5.378078338055201e-06, "loss": 0.1219, "step": 30135 }, { "epoch": 0.6530453058306069, "grad_norm": 1.7053831815719604, "learning_rate": 5.375060491181147e-06, "loss": 0.2028, "step": 30140 }, { "epoch": 0.6531536411500878, "grad_norm": 1.3518816232681274, "learning_rate": 5.372043180037057e-06, "loss": 0.1406, "step": 30145 }, { "epoch": 0.6532619764695686, "grad_norm": 2.2943530082702637, "learning_rate": 5.369026404972434e-06, "loss": 0.1627, "step": 30150 }, { "epoch": 0.6533703117890495, "grad_norm": 1.874754786491394, "learning_rate": 5.366010166336735e-06, "loss": 0.1536, "step": 30155 }, { "epoch": 0.6534786471085303, "grad_norm": 1.562540888786316, "learning_rate": 5.3629944644793355e-06, "loss": 0.0874, "step": 30160 }, { "epoch": 0.6535869824280112, "grad_norm": 1.6170356273651123, "learning_rate": 5.359979299749569e-06, "loss": 0.2669, "step": 30165 }, { "epoch": 0.653695317747492, "grad_norm": 0.3821979761123657, "learning_rate": 5.356964672496689e-06, "loss": 0.1773, "step": 30170 }, { "epoch": 0.6538036530669729, "grad_norm": 1.2325351238250732, "learning_rate": 5.353950583069905e-06, "loss": 0.2977, "step": 30175 }, { "epoch": 0.6539119883864537, "grad_norm": 1.4650589227676392, "learning_rate": 5.3509370318183415e-06, "loss": 0.1821, "step": 30180 }, { "epoch": 0.6540203237059347, "grad_norm": 1.0488766431808472, "learning_rate": 5.3479240190910815e-06, "loss": 0.0813, "step": 30185 }, { "epoch": 0.6541286590254155, "grad_norm": 1.8179765939712524, "learning_rate": 5.3449115452371405e-06, "loss": 0.2075, "step": 30190 }, { "epoch": 0.6542369943448963, "grad_norm": 1.1251779794692993, "learning_rate": 5.341899610605459e-06, "loss": 0.1508, "step": 30195 }, { "epoch": 0.6543453296643772, "grad_norm": 1.7613157033920288, "learning_rate": 5.338888215544933e-06, "loss": 0.1585, "step": 30200 }, { "epoch": 0.654453664983858, "grad_norm": 1.8965187072753906, "learning_rate": 5.33587736040438e-06, "loss": 0.2232, "step": 30205 }, { "epoch": 0.6545620003033389, "grad_norm": 1.3838553428649902, "learning_rate": 5.33286704553257e-06, "loss": 0.1742, "step": 30210 }, { "epoch": 0.6546703356228197, "grad_norm": 1.8194962739944458, "learning_rate": 5.3298572712781945e-06, "loss": 0.1799, "step": 30215 }, { "epoch": 0.6547786709423006, "grad_norm": 1.3908964395523071, "learning_rate": 5.3268480379899e-06, "loss": 0.1814, "step": 30220 }, { "epoch": 0.6548870062617814, "grad_norm": 1.559236764907837, "learning_rate": 5.323839346016253e-06, "loss": 0.2085, "step": 30225 }, { "epoch": 0.6549953415812623, "grad_norm": 1.8515104055404663, "learning_rate": 5.320831195705772e-06, "loss": 0.1423, "step": 30230 }, { "epoch": 0.6551036769007432, "grad_norm": 1.3209242820739746, "learning_rate": 5.3178235874069005e-06, "loss": 0.1906, "step": 30235 }, { "epoch": 0.6552120122202241, "grad_norm": 0.5379090905189514, "learning_rate": 5.314816521468026e-06, "loss": 0.1698, "step": 30240 }, { "epoch": 0.6553203475397049, "grad_norm": 1.1450027227401733, "learning_rate": 5.311809998237478e-06, "loss": 0.0676, "step": 30245 }, { "epoch": 0.6554286828591858, "grad_norm": 1.7917450666427612, "learning_rate": 5.3088040180635095e-06, "loss": 0.2315, "step": 30250 }, { "epoch": 0.6555370181786666, "grad_norm": 1.7520809173583984, "learning_rate": 5.3057985812943235e-06, "loss": 0.1868, "step": 30255 }, { "epoch": 0.6556453534981475, "grad_norm": 1.588504672050476, "learning_rate": 5.30279368827805e-06, "loss": 0.1894, "step": 30260 }, { "epoch": 0.6557536888176283, "grad_norm": 1.9731972217559814, "learning_rate": 5.2997893393627665e-06, "loss": 0.2228, "step": 30265 }, { "epoch": 0.6558620241371091, "grad_norm": 2.464026927947998, "learning_rate": 5.29678553489648e-06, "loss": 0.1891, "step": 30270 }, { "epoch": 0.65597035945659, "grad_norm": 0.8313810229301453, "learning_rate": 5.29378227522713e-06, "loss": 0.1948, "step": 30275 }, { "epoch": 0.656078694776071, "grad_norm": 1.5841526985168457, "learning_rate": 5.290779560702606e-06, "loss": 0.1517, "step": 30280 }, { "epoch": 0.6561870300955518, "grad_norm": 1.511440634727478, "learning_rate": 5.287777391670724e-06, "loss": 0.1604, "step": 30285 }, { "epoch": 0.6562953654150326, "grad_norm": 1.3763409852981567, "learning_rate": 5.284775768479247e-06, "loss": 0.0988, "step": 30290 }, { "epoch": 0.6564037007345135, "grad_norm": 1.7834657430648804, "learning_rate": 5.281774691475859e-06, "loss": 0.1849, "step": 30295 }, { "epoch": 0.6565120360539943, "grad_norm": 1.3510650396347046, "learning_rate": 5.2787741610081975e-06, "loss": 0.2178, "step": 30300 }, { "epoch": 0.6566203713734752, "grad_norm": 0.9645902514457703, "learning_rate": 5.275774177423827e-06, "loss": 0.1533, "step": 30305 }, { "epoch": 0.656728706692956, "grad_norm": 1.5343669652938843, "learning_rate": 5.272774741070243e-06, "loss": 0.1851, "step": 30310 }, { "epoch": 0.6568370420124369, "grad_norm": 0.8590788841247559, "learning_rate": 5.269775852294896e-06, "loss": 0.2371, "step": 30315 }, { "epoch": 0.6569453773319177, "grad_norm": 1.2423226833343506, "learning_rate": 5.266777511445156e-06, "loss": 0.1578, "step": 30320 }, { "epoch": 0.6570537126513986, "grad_norm": 1.9617905616760254, "learning_rate": 5.263779718868339e-06, "loss": 0.2053, "step": 30325 }, { "epoch": 0.6571620479708795, "grad_norm": 1.6397026777267456, "learning_rate": 5.2607824749116855e-06, "loss": 0.2431, "step": 30330 }, { "epoch": 0.6572703832903604, "grad_norm": 0.8532289266586304, "learning_rate": 5.257785779922398e-06, "loss": 0.1496, "step": 30335 }, { "epoch": 0.6573787186098412, "grad_norm": 1.4804693460464478, "learning_rate": 5.254789634247588e-06, "loss": 0.1286, "step": 30340 }, { "epoch": 0.657487053929322, "grad_norm": 1.6920533180236816, "learning_rate": 5.251794038234312e-06, "loss": 0.1849, "step": 30345 }, { "epoch": 0.6575953892488029, "grad_norm": 1.507002592086792, "learning_rate": 5.248798992229573e-06, "loss": 0.1828, "step": 30350 }, { "epoch": 0.6577037245682837, "grad_norm": 1.8315716981887817, "learning_rate": 5.245804496580295e-06, "loss": 0.0942, "step": 30355 }, { "epoch": 0.6578120598877646, "grad_norm": 1.1543865203857422, "learning_rate": 5.24281055163335e-06, "loss": 0.1661, "step": 30360 }, { "epoch": 0.6579203952072454, "grad_norm": 1.5492361783981323, "learning_rate": 5.239817157735537e-06, "loss": 0.1293, "step": 30365 }, { "epoch": 0.6580287305267263, "grad_norm": 1.5426617860794067, "learning_rate": 5.236824315233603e-06, "loss": 0.1533, "step": 30370 }, { "epoch": 0.6581370658462071, "grad_norm": 1.5527747869491577, "learning_rate": 5.233832024474215e-06, "loss": 0.1533, "step": 30375 }, { "epoch": 0.6582454011656881, "grad_norm": 1.8996559381484985, "learning_rate": 5.23084028580399e-06, "loss": 0.1491, "step": 30380 }, { "epoch": 0.6583537364851689, "grad_norm": 1.3055670261383057, "learning_rate": 5.227849099569479e-06, "loss": 0.2106, "step": 30385 }, { "epoch": 0.6584620718046498, "grad_norm": 1.916609525680542, "learning_rate": 5.224858466117161e-06, "loss": 0.155, "step": 30390 }, { "epoch": 0.6585704071241306, "grad_norm": 1.593052864074707, "learning_rate": 5.2218683857934606e-06, "loss": 0.1756, "step": 30395 }, { "epoch": 0.6586787424436115, "grad_norm": 1.5910441875457764, "learning_rate": 5.218878858944728e-06, "loss": 0.1002, "step": 30400 }, { "epoch": 0.6587870777630923, "grad_norm": 2.0577552318573, "learning_rate": 5.215889885917262e-06, "loss": 0.1382, "step": 30405 }, { "epoch": 0.6588954130825732, "grad_norm": 1.6761906147003174, "learning_rate": 5.212901467057283e-06, "loss": 0.2302, "step": 30410 }, { "epoch": 0.659003748402054, "grad_norm": 2.0540761947631836, "learning_rate": 5.209913602710962e-06, "loss": 0.1685, "step": 30415 }, { "epoch": 0.6591120837215348, "grad_norm": 1.3506895303726196, "learning_rate": 5.2069262932243905e-06, "loss": 0.11, "step": 30420 }, { "epoch": 0.6592204190410158, "grad_norm": 1.3179011344909668, "learning_rate": 5.203939538943609e-06, "loss": 0.1526, "step": 30425 }, { "epoch": 0.6593287543604966, "grad_norm": 1.6987391710281372, "learning_rate": 5.2009533402145894e-06, "loss": 0.1585, "step": 30430 }, { "epoch": 0.6594370896799775, "grad_norm": 0.6384521126747131, "learning_rate": 5.1979676973832325e-06, "loss": 0.149, "step": 30435 }, { "epoch": 0.6595454249994583, "grad_norm": 1.973013997077942, "learning_rate": 5.194982610795387e-06, "loss": 0.2046, "step": 30440 }, { "epoch": 0.6596537603189392, "grad_norm": 0.9130323529243469, "learning_rate": 5.191998080796823e-06, "loss": 0.1776, "step": 30445 }, { "epoch": 0.65976209563842, "grad_norm": 1.1954152584075928, "learning_rate": 5.189014107733261e-06, "loss": 0.1626, "step": 30450 }, { "epoch": 0.6598704309579009, "grad_norm": 1.6159508228302002, "learning_rate": 5.1860306919503435e-06, "loss": 0.1274, "step": 30455 }, { "epoch": 0.6599787662773817, "grad_norm": 1.529816746711731, "learning_rate": 5.183047833793659e-06, "loss": 0.1641, "step": 30460 }, { "epoch": 0.6600871015968626, "grad_norm": 2.440843343734741, "learning_rate": 5.180065533608723e-06, "loss": 0.1659, "step": 30465 }, { "epoch": 0.6601954369163434, "grad_norm": 1.3979270458221436, "learning_rate": 5.177083791740995e-06, "loss": 0.1645, "step": 30470 }, { "epoch": 0.6603037722358244, "grad_norm": 0.9783510565757751, "learning_rate": 5.174102608535858e-06, "loss": 0.158, "step": 30475 }, { "epoch": 0.6604121075553052, "grad_norm": 1.6611076593399048, "learning_rate": 5.1711219843386426e-06, "loss": 0.0914, "step": 30480 }, { "epoch": 0.6605204428747861, "grad_norm": 1.2623865604400635, "learning_rate": 5.168141919494614e-06, "loss": 0.162, "step": 30485 }, { "epoch": 0.6606287781942669, "grad_norm": 0.8788455724716187, "learning_rate": 5.165162414348957e-06, "loss": 0.1922, "step": 30490 }, { "epoch": 0.6607371135137478, "grad_norm": 1.4493968486785889, "learning_rate": 5.162183469246813e-06, "loss": 0.0807, "step": 30495 }, { "epoch": 0.6608454488332286, "grad_norm": 1.1336796283721924, "learning_rate": 5.15920508453324e-06, "loss": 0.079, "step": 30500 }, { "epoch": 0.6609537841527094, "grad_norm": 1.7088747024536133, "learning_rate": 5.1562272605532456e-06, "loss": 0.1912, "step": 30505 }, { "epoch": 0.6610621194721903, "grad_norm": 1.3978281021118164, "learning_rate": 5.153249997651765e-06, "loss": 0.1532, "step": 30510 }, { "epoch": 0.6611704547916711, "grad_norm": 1.6564090251922607, "learning_rate": 5.150273296173662e-06, "loss": 0.1441, "step": 30515 }, { "epoch": 0.6612787901111521, "grad_norm": 1.2083163261413574, "learning_rate": 5.147297156463751e-06, "loss": 0.1637, "step": 30520 }, { "epoch": 0.6613871254306329, "grad_norm": 1.560012936592102, "learning_rate": 5.14432157886677e-06, "loss": 0.181, "step": 30525 }, { "epoch": 0.6614954607501138, "grad_norm": 1.3964028358459473, "learning_rate": 5.1413465637273995e-06, "loss": 0.1486, "step": 30530 }, { "epoch": 0.6616037960695946, "grad_norm": 1.0869109630584717, "learning_rate": 5.138372111390244e-06, "loss": 0.0951, "step": 30535 }, { "epoch": 0.6617121313890755, "grad_norm": 1.5032052993774414, "learning_rate": 5.1353982221998546e-06, "loss": 0.2007, "step": 30540 }, { "epoch": 0.6618204667085563, "grad_norm": 1.8937619924545288, "learning_rate": 5.132424896500711e-06, "loss": 0.2283, "step": 30545 }, { "epoch": 0.6619288020280372, "grad_norm": 1.557828664779663, "learning_rate": 5.129452134637223e-06, "loss": 0.1452, "step": 30550 }, { "epoch": 0.662037137347518, "grad_norm": 1.4108150005340576, "learning_rate": 5.126479936953746e-06, "loss": 0.1349, "step": 30555 }, { "epoch": 0.6621454726669989, "grad_norm": 0.9927622675895691, "learning_rate": 5.123508303794561e-06, "loss": 0.1738, "step": 30560 }, { "epoch": 0.6622538079864797, "grad_norm": 0.7957897782325745, "learning_rate": 5.120537235503893e-06, "loss": 0.1077, "step": 30565 }, { "epoch": 0.6623621433059607, "grad_norm": 1.2230068445205688, "learning_rate": 5.117566732425884e-06, "loss": 0.1347, "step": 30570 }, { "epoch": 0.6624704786254415, "grad_norm": 0.97560054063797, "learning_rate": 5.114596794904638e-06, "loss": 0.153, "step": 30575 }, { "epoch": 0.6625788139449224, "grad_norm": 1.0899839401245117, "learning_rate": 5.111627423284169e-06, "loss": 0.1334, "step": 30580 }, { "epoch": 0.6626871492644032, "grad_norm": 1.0396238565444946, "learning_rate": 5.108658617908433e-06, "loss": 0.1906, "step": 30585 }, { "epoch": 0.662795484583884, "grad_norm": 1.660706877708435, "learning_rate": 5.1056903791213265e-06, "loss": 0.1654, "step": 30590 }, { "epoch": 0.6629038199033649, "grad_norm": 1.6156349182128906, "learning_rate": 5.1027227072666694e-06, "loss": 0.1485, "step": 30595 }, { "epoch": 0.6630121552228457, "grad_norm": 1.2271922826766968, "learning_rate": 5.099755602688229e-06, "loss": 0.1873, "step": 30600 }, { "epoch": 0.6631204905423266, "grad_norm": 1.7323147058486938, "learning_rate": 5.096789065729692e-06, "loss": 0.2343, "step": 30605 }, { "epoch": 0.6632288258618074, "grad_norm": 2.1687846183776855, "learning_rate": 5.093823096734697e-06, "loss": 0.1805, "step": 30610 }, { "epoch": 0.6633371611812883, "grad_norm": 0.8406171798706055, "learning_rate": 5.090857696046797e-06, "loss": 0.1403, "step": 30615 }, { "epoch": 0.6634454965007692, "grad_norm": 1.08815336227417, "learning_rate": 5.087892864009494e-06, "loss": 0.1587, "step": 30620 }, { "epoch": 0.6635538318202501, "grad_norm": 2.3212928771972656, "learning_rate": 5.084928600966224e-06, "loss": 0.18, "step": 30625 }, { "epoch": 0.6636621671397309, "grad_norm": 2.047114849090576, "learning_rate": 5.081964907260342e-06, "loss": 0.1257, "step": 30630 }, { "epoch": 0.6637705024592118, "grad_norm": 1.9743729829788208, "learning_rate": 5.079001783235158e-06, "loss": 0.2652, "step": 30635 }, { "epoch": 0.6638788377786926, "grad_norm": 1.0830386877059937, "learning_rate": 5.076039229233898e-06, "loss": 0.2048, "step": 30640 }, { "epoch": 0.6639871730981735, "grad_norm": 1.329552412033081, "learning_rate": 5.073077245599736e-06, "loss": 0.1313, "step": 30645 }, { "epoch": 0.6640955084176543, "grad_norm": 1.768899917602539, "learning_rate": 5.070115832675765e-06, "loss": 0.1603, "step": 30650 }, { "epoch": 0.6642038437371351, "grad_norm": 1.169012427330017, "learning_rate": 5.06715499080503e-06, "loss": 0.1124, "step": 30655 }, { "epoch": 0.664312179056616, "grad_norm": 3.1449973583221436, "learning_rate": 5.064194720330491e-06, "loss": 0.1691, "step": 30660 }, { "epoch": 0.664420514376097, "grad_norm": 1.0931904315948486, "learning_rate": 5.061235021595055e-06, "loss": 0.1471, "step": 30665 }, { "epoch": 0.6645288496955778, "grad_norm": 1.4240703582763672, "learning_rate": 5.058275894941562e-06, "loss": 0.1461, "step": 30670 }, { "epoch": 0.6646371850150586, "grad_norm": 2.0811407566070557, "learning_rate": 5.055317340712778e-06, "loss": 0.2, "step": 30675 }, { "epoch": 0.6647455203345395, "grad_norm": 1.3647111654281616, "learning_rate": 5.052359359251411e-06, "loss": 0.1446, "step": 30680 }, { "epoch": 0.6648538556540203, "grad_norm": 2.290928602218628, "learning_rate": 5.049401950900094e-06, "loss": 0.2123, "step": 30685 }, { "epoch": 0.6649621909735012, "grad_norm": 1.2141867876052856, "learning_rate": 5.046445116001404e-06, "loss": 0.2346, "step": 30690 }, { "epoch": 0.665070526292982, "grad_norm": 0.8648304343223572, "learning_rate": 5.0434888548978385e-06, "loss": 0.1082, "step": 30695 }, { "epoch": 0.6651788616124629, "grad_norm": 0.4948769807815552, "learning_rate": 5.0405331679318455e-06, "loss": 0.1231, "step": 30700 }, { "epoch": 0.6652871969319437, "grad_norm": 1.1759705543518066, "learning_rate": 5.037578055445789e-06, "loss": 0.1676, "step": 30705 }, { "epoch": 0.6653955322514246, "grad_norm": 2.0250465869903564, "learning_rate": 5.0346235177819805e-06, "loss": 0.1622, "step": 30710 }, { "epoch": 0.6655038675709055, "grad_norm": 1.2044339179992676, "learning_rate": 5.031669555282653e-06, "loss": 0.1546, "step": 30715 }, { "epoch": 0.6656122028903864, "grad_norm": 1.7676477432250977, "learning_rate": 5.02871616828998e-06, "loss": 0.1895, "step": 30720 }, { "epoch": 0.6657205382098672, "grad_norm": 1.5955604314804077, "learning_rate": 5.025763357146074e-06, "loss": 0.2446, "step": 30725 }, { "epoch": 0.665828873529348, "grad_norm": 1.34870183467865, "learning_rate": 5.022811122192965e-06, "loss": 0.1098, "step": 30730 }, { "epoch": 0.6659372088488289, "grad_norm": 2.816826105117798, "learning_rate": 5.019859463772634e-06, "loss": 0.2764, "step": 30735 }, { "epoch": 0.6660455441683097, "grad_norm": 1.4575875997543335, "learning_rate": 5.016908382226977e-06, "loss": 0.1628, "step": 30740 }, { "epoch": 0.6661538794877906, "grad_norm": 1.5519009828567505, "learning_rate": 5.0139578778978415e-06, "loss": 0.1657, "step": 30745 }, { "epoch": 0.6662622148072714, "grad_norm": 2.132857322692871, "learning_rate": 5.011007951126996e-06, "loss": 0.139, "step": 30750 }, { "epoch": 0.6663705501267523, "grad_norm": 1.8975750207901, "learning_rate": 5.0080586022561385e-06, "loss": 0.1674, "step": 30755 }, { "epoch": 0.6664788854462331, "grad_norm": 1.7105363607406616, "learning_rate": 5.005109831626917e-06, "loss": 0.1851, "step": 30760 }, { "epoch": 0.6665872207657141, "grad_norm": 1.230176568031311, "learning_rate": 5.002161639580891e-06, "loss": 0.1596, "step": 30765 }, { "epoch": 0.6666955560851949, "grad_norm": 1.1381944417953491, "learning_rate": 4.999214026459578e-06, "loss": 0.2479, "step": 30770 }, { "epoch": 0.6668038914046758, "grad_norm": 1.3161593675613403, "learning_rate": 4.996266992604405e-06, "loss": 0.1778, "step": 30775 }, { "epoch": 0.6669122267241566, "grad_norm": 1.7503145933151245, "learning_rate": 4.99332053835675e-06, "loss": 0.1017, "step": 30780 }, { "epoch": 0.6670205620436375, "grad_norm": 2.0785112380981445, "learning_rate": 4.990374664057908e-06, "loss": 0.2204, "step": 30785 }, { "epoch": 0.6671288973631183, "grad_norm": 2.020639419555664, "learning_rate": 4.987429370049116e-06, "loss": 0.1843, "step": 30790 }, { "epoch": 0.6672372326825992, "grad_norm": 1.4715678691864014, "learning_rate": 4.984484656671545e-06, "loss": 0.2145, "step": 30795 }, { "epoch": 0.66734556800208, "grad_norm": 1.345439076423645, "learning_rate": 4.981540524266292e-06, "loss": 0.1542, "step": 30800 }, { "epoch": 0.6674539033215608, "grad_norm": 0.6899699568748474, "learning_rate": 4.978596973174395e-06, "loss": 0.1687, "step": 30805 }, { "epoch": 0.6675622386410418, "grad_norm": 1.7076705694198608, "learning_rate": 4.975654003736811e-06, "loss": 0.2159, "step": 30810 }, { "epoch": 0.6676705739605227, "grad_norm": 1.955897569656372, "learning_rate": 4.972711616294454e-06, "loss": 0.1965, "step": 30815 }, { "epoch": 0.6677789092800035, "grad_norm": 1.8801995515823364, "learning_rate": 4.969769811188142e-06, "loss": 0.2284, "step": 30820 }, { "epoch": 0.6678872445994843, "grad_norm": 1.1447325944900513, "learning_rate": 4.96682858875865e-06, "loss": 0.1596, "step": 30825 }, { "epoch": 0.6679955799189652, "grad_norm": 1.3779199123382568, "learning_rate": 4.963887949346669e-06, "loss": 0.1527, "step": 30830 }, { "epoch": 0.668103915238446, "grad_norm": 0.8160905838012695, "learning_rate": 4.960947893292824e-06, "loss": 0.12, "step": 30835 }, { "epoch": 0.6682122505579269, "grad_norm": 1.4964215755462646, "learning_rate": 4.9580084209376835e-06, "loss": 0.1469, "step": 30840 }, { "epoch": 0.6683205858774077, "grad_norm": 1.0681170225143433, "learning_rate": 4.955069532621736e-06, "loss": 0.1984, "step": 30845 }, { "epoch": 0.6684289211968886, "grad_norm": 1.7827632427215576, "learning_rate": 4.952131228685413e-06, "loss": 0.1751, "step": 30850 }, { "epoch": 0.6685372565163694, "grad_norm": 1.2046501636505127, "learning_rate": 4.9491935094690666e-06, "loss": 0.1278, "step": 30855 }, { "epoch": 0.6686455918358504, "grad_norm": 1.338572382926941, "learning_rate": 4.946256375312991e-06, "loss": 0.1824, "step": 30860 }, { "epoch": 0.6687539271553312, "grad_norm": 1.4570575952529907, "learning_rate": 4.943319826557413e-06, "loss": 0.2053, "step": 30865 }, { "epoch": 0.6688622624748121, "grad_norm": 1.4989523887634277, "learning_rate": 4.94038386354248e-06, "loss": 0.2059, "step": 30870 }, { "epoch": 0.6689705977942929, "grad_norm": 1.5492600202560425, "learning_rate": 4.9374484866082885e-06, "loss": 0.1536, "step": 30875 }, { "epoch": 0.6690789331137738, "grad_norm": 1.0550957918167114, "learning_rate": 4.934513696094847e-06, "loss": 0.1914, "step": 30880 }, { "epoch": 0.6691872684332546, "grad_norm": 1.961944341659546, "learning_rate": 4.9315794923421175e-06, "loss": 0.1971, "step": 30885 }, { "epoch": 0.6692956037527354, "grad_norm": 1.1772806644439697, "learning_rate": 4.9286458756899755e-06, "loss": 0.1498, "step": 30890 }, { "epoch": 0.6694039390722163, "grad_norm": 1.5821030139923096, "learning_rate": 4.925712846478244e-06, "loss": 0.1984, "step": 30895 }, { "epoch": 0.6695122743916971, "grad_norm": 2.4294750690460205, "learning_rate": 4.922780405046662e-06, "loss": 0.2295, "step": 30900 }, { "epoch": 0.669620609711178, "grad_norm": 2.216599225997925, "learning_rate": 4.919848551734917e-06, "loss": 0.1979, "step": 30905 }, { "epoch": 0.6697289450306589, "grad_norm": 2.9654572010040283, "learning_rate": 4.916917286882613e-06, "loss": 0.1701, "step": 30910 }, { "epoch": 0.6698372803501398, "grad_norm": 1.9800715446472168, "learning_rate": 4.913986610829296e-06, "loss": 0.145, "step": 30915 }, { "epoch": 0.6699456156696206, "grad_norm": 0.771828293800354, "learning_rate": 4.911056523914447e-06, "loss": 0.16, "step": 30920 }, { "epoch": 0.6700539509891015, "grad_norm": 1.7897270917892456, "learning_rate": 4.908127026477462e-06, "loss": 0.1676, "step": 30925 }, { "epoch": 0.6701622863085823, "grad_norm": 2.462796688079834, "learning_rate": 4.905198118857689e-06, "loss": 0.1783, "step": 30930 }, { "epoch": 0.6702706216280632, "grad_norm": 0.8682758808135986, "learning_rate": 4.902269801394392e-06, "loss": 0.1495, "step": 30935 }, { "epoch": 0.670378956947544, "grad_norm": 1.1649905443191528, "learning_rate": 4.899342074426775e-06, "loss": 0.1762, "step": 30940 }, { "epoch": 0.6704872922670249, "grad_norm": 1.5047918558120728, "learning_rate": 4.8964149382939696e-06, "loss": 0.2089, "step": 30945 }, { "epoch": 0.6705956275865057, "grad_norm": 1.390893578529358, "learning_rate": 4.893488393335044e-06, "loss": 0.0577, "step": 30950 }, { "epoch": 0.6707039629059867, "grad_norm": 1.4696696996688843, "learning_rate": 4.890562439888989e-06, "loss": 0.2273, "step": 30955 }, { "epoch": 0.6708122982254675, "grad_norm": 1.3534561395645142, "learning_rate": 4.887637078294737e-06, "loss": 0.1071, "step": 30960 }, { "epoch": 0.6709206335449484, "grad_norm": 1.1974273920059204, "learning_rate": 4.88471230889115e-06, "loss": 0.1694, "step": 30965 }, { "epoch": 0.6710289688644292, "grad_norm": 1.6973230838775635, "learning_rate": 4.88178813201701e-06, "loss": 0.1077, "step": 30970 }, { "epoch": 0.67113730418391, "grad_norm": 1.9710534811019897, "learning_rate": 4.878864548011048e-06, "loss": 0.1857, "step": 30975 }, { "epoch": 0.6712456395033909, "grad_norm": 1.8017486333847046, "learning_rate": 4.875941557211911e-06, "loss": 0.1168, "step": 30980 }, { "epoch": 0.6713539748228717, "grad_norm": 2.1913599967956543, "learning_rate": 4.8730191599581886e-06, "loss": 0.2012, "step": 30985 }, { "epoch": 0.6714623101423526, "grad_norm": 2.0255799293518066, "learning_rate": 4.870097356588391e-06, "loss": 0.1729, "step": 30990 }, { "epoch": 0.6715706454618334, "grad_norm": 2.0055649280548096, "learning_rate": 4.867176147440973e-06, "loss": 0.2034, "step": 30995 }, { "epoch": 0.6716789807813143, "grad_norm": 1.5599733591079712, "learning_rate": 4.864255532854308e-06, "loss": 0.1966, "step": 31000 }, { "epoch": 0.6717873161007952, "grad_norm": 1.2789700031280518, "learning_rate": 4.861335513166699e-06, "loss": 0.1838, "step": 31005 }, { "epoch": 0.6718956514202761, "grad_norm": 0.9168071150779724, "learning_rate": 4.858416088716401e-06, "loss": 0.1232, "step": 31010 }, { "epoch": 0.6720039867397569, "grad_norm": 2.0388808250427246, "learning_rate": 4.855497259841575e-06, "loss": 0.1745, "step": 31015 }, { "epoch": 0.6721123220592378, "grad_norm": 1.1504876613616943, "learning_rate": 4.85257902688033e-06, "loss": 0.113, "step": 31020 }, { "epoch": 0.6722206573787186, "grad_norm": 1.4968111515045166, "learning_rate": 4.849661390170693e-06, "loss": 0.1761, "step": 31025 }, { "epoch": 0.6723289926981995, "grad_norm": 1.2291297912597656, "learning_rate": 4.846744350050635e-06, "loss": 0.1942, "step": 31030 }, { "epoch": 0.6724373280176803, "grad_norm": 1.3159313201904297, "learning_rate": 4.843827906858048e-06, "loss": 0.2329, "step": 31035 }, { "epoch": 0.6725456633371611, "grad_norm": 1.1230846643447876, "learning_rate": 4.840912060930756e-06, "loss": 0.1838, "step": 31040 }, { "epoch": 0.672653998656642, "grad_norm": 1.990231990814209, "learning_rate": 4.837996812606521e-06, "loss": 0.2682, "step": 31045 }, { "epoch": 0.672762333976123, "grad_norm": 1.2677509784698486, "learning_rate": 4.835082162223021e-06, "loss": 0.1564, "step": 31050 }, { "epoch": 0.6728706692956038, "grad_norm": 1.590033769607544, "learning_rate": 4.832168110117891e-06, "loss": 0.1725, "step": 31055 }, { "epoch": 0.6729790046150846, "grad_norm": 1.5040497779846191, "learning_rate": 4.8292546566286665e-06, "loss": 0.1767, "step": 31060 }, { "epoch": 0.6730873399345655, "grad_norm": 2.0966031551361084, "learning_rate": 4.826341802092836e-06, "loss": 0.2253, "step": 31065 }, { "epoch": 0.6731956752540463, "grad_norm": 1.0892565250396729, "learning_rate": 4.823429546847808e-06, "loss": 0.1675, "step": 31070 }, { "epoch": 0.6733040105735272, "grad_norm": 1.9228928089141846, "learning_rate": 4.820517891230916e-06, "loss": 0.1869, "step": 31075 }, { "epoch": 0.673412345893008, "grad_norm": 2.3505465984344482, "learning_rate": 4.817606835579442e-06, "loss": 0.224, "step": 31080 }, { "epoch": 0.6735206812124889, "grad_norm": 1.0952519178390503, "learning_rate": 4.814696380230582e-06, "loss": 0.1625, "step": 31085 }, { "epoch": 0.6736290165319697, "grad_norm": 1.41488778591156, "learning_rate": 4.811786525521471e-06, "loss": 0.1871, "step": 31090 }, { "epoch": 0.6737373518514506, "grad_norm": 1.7280004024505615, "learning_rate": 4.80887727178917e-06, "loss": 0.2985, "step": 31095 }, { "epoch": 0.6738456871709315, "grad_norm": 1.7432055473327637, "learning_rate": 4.8059686193706735e-06, "loss": 0.189, "step": 31100 }, { "epoch": 0.6739540224904124, "grad_norm": 1.0672574043273926, "learning_rate": 4.8030605686029095e-06, "loss": 0.155, "step": 31105 }, { "epoch": 0.6740623578098932, "grad_norm": 1.4605114459991455, "learning_rate": 4.800153119822725e-06, "loss": 0.1271, "step": 31110 }, { "epoch": 0.674170693129374, "grad_norm": 1.8608779907226562, "learning_rate": 4.797246273366911e-06, "loss": 0.1489, "step": 31115 }, { "epoch": 0.6742790284488549, "grad_norm": 1.4089173078536987, "learning_rate": 4.794340029572175e-06, "loss": 0.2507, "step": 31120 }, { "epoch": 0.6743873637683357, "grad_norm": 0.5635206699371338, "learning_rate": 4.791434388775172e-06, "loss": 0.1584, "step": 31125 }, { "epoch": 0.6744956990878166, "grad_norm": 1.48989737033844, "learning_rate": 4.788529351312464e-06, "loss": 0.1479, "step": 31130 }, { "epoch": 0.6746040344072974, "grad_norm": 1.885077714920044, "learning_rate": 4.785624917520568e-06, "loss": 0.1782, "step": 31135 }, { "epoch": 0.6747123697267783, "grad_norm": 3.6144039630889893, "learning_rate": 4.78272108773591e-06, "loss": 0.2351, "step": 31140 }, { "epoch": 0.6748207050462591, "grad_norm": 1.6365430355072021, "learning_rate": 4.779817862294863e-06, "loss": 0.1767, "step": 31145 }, { "epoch": 0.6749290403657401, "grad_norm": 1.7572071552276611, "learning_rate": 4.776915241533715e-06, "loss": 0.1805, "step": 31150 }, { "epoch": 0.6750373756852209, "grad_norm": 1.6677263975143433, "learning_rate": 4.774013225788694e-06, "loss": 0.1836, "step": 31155 }, { "epoch": 0.6751457110047018, "grad_norm": 1.484261393547058, "learning_rate": 4.771111815395959e-06, "loss": 0.1636, "step": 31160 }, { "epoch": 0.6752540463241826, "grad_norm": 1.35662841796875, "learning_rate": 4.768211010691588e-06, "loss": 0.2465, "step": 31165 }, { "epoch": 0.6753623816436635, "grad_norm": 1.8699969053268433, "learning_rate": 4.765310812011602e-06, "loss": 0.1392, "step": 31170 }, { "epoch": 0.6754707169631443, "grad_norm": 1.8849432468414307, "learning_rate": 4.76241121969194e-06, "loss": 0.1957, "step": 31175 }, { "epoch": 0.6755790522826252, "grad_norm": 1.7973172664642334, "learning_rate": 4.759512234068483e-06, "loss": 0.1397, "step": 31180 }, { "epoch": 0.675687387602106, "grad_norm": 1.4039863348007202, "learning_rate": 4.756613855477028e-06, "loss": 0.1701, "step": 31185 }, { "epoch": 0.6757957229215868, "grad_norm": 0.44588425755500793, "learning_rate": 4.753716084253315e-06, "loss": 0.1258, "step": 31190 }, { "epoch": 0.6759040582410678, "grad_norm": 1.0582635402679443, "learning_rate": 4.750818920733001e-06, "loss": 0.1524, "step": 31195 }, { "epoch": 0.6760123935605487, "grad_norm": 1.6966825723648071, "learning_rate": 4.74792236525168e-06, "loss": 0.2277, "step": 31200 }, { "epoch": 0.6761207288800295, "grad_norm": 0.9229334592819214, "learning_rate": 4.7450264181448835e-06, "loss": 0.1365, "step": 31205 }, { "epoch": 0.6762290641995103, "grad_norm": 1.247519612312317, "learning_rate": 4.742131079748052e-06, "loss": 0.125, "step": 31210 }, { "epoch": 0.6763373995189912, "grad_norm": 1.4037162065505981, "learning_rate": 4.739236350396575e-06, "loss": 0.1312, "step": 31215 }, { "epoch": 0.676445734838472, "grad_norm": 1.35813570022583, "learning_rate": 4.736342230425758e-06, "loss": 0.1719, "step": 31220 }, { "epoch": 0.6765540701579529, "grad_norm": 1.51543390750885, "learning_rate": 4.733448720170848e-06, "loss": 0.2101, "step": 31225 }, { "epoch": 0.6766624054774337, "grad_norm": 1.415773630142212, "learning_rate": 4.730555819967007e-06, "loss": 0.1663, "step": 31230 }, { "epoch": 0.6767707407969146, "grad_norm": 2.0944454669952393, "learning_rate": 4.7276635301493405e-06, "loss": 0.1825, "step": 31235 }, { "epoch": 0.6768790761163954, "grad_norm": 0.9850000143051147, "learning_rate": 4.724771851052875e-06, "loss": 0.1753, "step": 31240 }, { "epoch": 0.6769874114358764, "grad_norm": 1.8831993341445923, "learning_rate": 4.721880783012561e-06, "loss": 0.1523, "step": 31245 }, { "epoch": 0.6770957467553572, "grad_norm": 1.9589134454727173, "learning_rate": 4.718990326363298e-06, "loss": 0.1064, "step": 31250 }, { "epoch": 0.6772040820748381, "grad_norm": 1.669472098350525, "learning_rate": 4.716100481439894e-06, "loss": 0.2319, "step": 31255 }, { "epoch": 0.6773124173943189, "grad_norm": 2.337367057800293, "learning_rate": 4.7132112485771e-06, "loss": 0.1508, "step": 31260 }, { "epoch": 0.6774207527137998, "grad_norm": 0.8950393199920654, "learning_rate": 4.710322628109582e-06, "loss": 0.2077, "step": 31265 }, { "epoch": 0.6775290880332806, "grad_norm": 1.6408774852752686, "learning_rate": 4.707434620371952e-06, "loss": 0.1357, "step": 31270 }, { "epoch": 0.6776374233527614, "grad_norm": 1.4826569557189941, "learning_rate": 4.7045472256987405e-06, "loss": 0.193, "step": 31275 }, { "epoch": 0.6777457586722423, "grad_norm": 1.4640495777130127, "learning_rate": 4.701660444424401e-06, "loss": 0.1265, "step": 31280 }, { "epoch": 0.6778540939917231, "grad_norm": 1.4109548330307007, "learning_rate": 4.698774276883334e-06, "loss": 0.1894, "step": 31285 }, { "epoch": 0.677962429311204, "grad_norm": 1.359488844871521, "learning_rate": 4.695888723409851e-06, "loss": 0.1815, "step": 31290 }, { "epoch": 0.6780707646306849, "grad_norm": 2.165471315383911, "learning_rate": 4.693003784338205e-06, "loss": 0.2175, "step": 31295 }, { "epoch": 0.6781790999501658, "grad_norm": 1.4026352167129517, "learning_rate": 4.690119460002571e-06, "loss": 0.1156, "step": 31300 }, { "epoch": 0.6782874352696466, "grad_norm": 0.8730936646461487, "learning_rate": 4.687235750737059e-06, "loss": 0.1705, "step": 31305 }, { "epoch": 0.6783957705891275, "grad_norm": 1.2978886365890503, "learning_rate": 4.684352656875701e-06, "loss": 0.2234, "step": 31310 }, { "epoch": 0.6785041059086083, "grad_norm": 0.7396539449691772, "learning_rate": 4.681470178752455e-06, "loss": 0.1544, "step": 31315 }, { "epoch": 0.6786124412280892, "grad_norm": 1.1268737316131592, "learning_rate": 4.67858831670122e-06, "loss": 0.0944, "step": 31320 }, { "epoch": 0.67872077654757, "grad_norm": 0.7617799043655396, "learning_rate": 4.675707071055812e-06, "loss": 0.155, "step": 31325 }, { "epoch": 0.6788291118670509, "grad_norm": 1.1361474990844727, "learning_rate": 4.6728264421499845e-06, "loss": 0.1609, "step": 31330 }, { "epoch": 0.6789374471865317, "grad_norm": 1.3055274486541748, "learning_rate": 4.6699464303174095e-06, "loss": 0.1452, "step": 31335 }, { "epoch": 0.6790457825060127, "grad_norm": 1.3429101705551147, "learning_rate": 4.667067035891695e-06, "loss": 0.2699, "step": 31340 }, { "epoch": 0.6791541178254935, "grad_norm": 1.7821842432022095, "learning_rate": 4.664188259206381e-06, "loss": 0.2195, "step": 31345 }, { "epoch": 0.6792624531449744, "grad_norm": 0.9331972002983093, "learning_rate": 4.661310100594925e-06, "loss": 0.2448, "step": 31350 }, { "epoch": 0.6793707884644552, "grad_norm": 1.2121020555496216, "learning_rate": 4.6584325603907224e-06, "loss": 0.1667, "step": 31355 }, { "epoch": 0.679479123783936, "grad_norm": 1.7740435600280762, "learning_rate": 4.655555638927087e-06, "loss": 0.1318, "step": 31360 }, { "epoch": 0.6795874591034169, "grad_norm": 1.9844890832901, "learning_rate": 4.6526793365372755e-06, "loss": 0.1567, "step": 31365 }, { "epoch": 0.6796957944228977, "grad_norm": 1.2095969915390015, "learning_rate": 4.6498036535544554e-06, "loss": 0.1244, "step": 31370 }, { "epoch": 0.6798041297423786, "grad_norm": 1.1874462366104126, "learning_rate": 4.646928590311741e-06, "loss": 0.1825, "step": 31375 }, { "epoch": 0.6799124650618594, "grad_norm": 1.273433804512024, "learning_rate": 4.644054147142157e-06, "loss": 0.2038, "step": 31380 }, { "epoch": 0.6800208003813403, "grad_norm": 1.7993870973587036, "learning_rate": 4.64118032437867e-06, "loss": 0.1755, "step": 31385 }, { "epoch": 0.6801291357008212, "grad_norm": 1.5632123947143555, "learning_rate": 4.638307122354164e-06, "loss": 0.1634, "step": 31390 }, { "epoch": 0.6802374710203021, "grad_norm": 1.281166672706604, "learning_rate": 4.63543454140146e-06, "loss": 0.1727, "step": 31395 }, { "epoch": 0.6803458063397829, "grad_norm": 3.129054069519043, "learning_rate": 4.632562581853307e-06, "loss": 0.1781, "step": 31400 }, { "epoch": 0.6804541416592638, "grad_norm": 1.089983344078064, "learning_rate": 4.629691244042371e-06, "loss": 0.1362, "step": 31405 }, { "epoch": 0.6805624769787446, "grad_norm": 1.1399915218353271, "learning_rate": 4.626820528301261e-06, "loss": 0.2025, "step": 31410 }, { "epoch": 0.6806708122982255, "grad_norm": 2.2134554386138916, "learning_rate": 4.6239504349625e-06, "loss": 0.1327, "step": 31415 }, { "epoch": 0.6807791476177063, "grad_norm": 1.3662163019180298, "learning_rate": 4.6210809643585496e-06, "loss": 0.0871, "step": 31420 }, { "epoch": 0.6808874829371871, "grad_norm": 2.747835874557495, "learning_rate": 4.618212116821791e-06, "loss": 0.1826, "step": 31425 }, { "epoch": 0.680995818256668, "grad_norm": 1.0316542387008667, "learning_rate": 4.615343892684542e-06, "loss": 0.1307, "step": 31430 }, { "epoch": 0.6811041535761488, "grad_norm": 1.4296025037765503, "learning_rate": 4.6124762922790375e-06, "loss": 0.2658, "step": 31435 }, { "epoch": 0.6812124888956298, "grad_norm": 2.291621446609497, "learning_rate": 4.60960931593745e-06, "loss": 0.2029, "step": 31440 }, { "epoch": 0.6813208242151106, "grad_norm": 1.5906627178192139, "learning_rate": 4.6067429639918785e-06, "loss": 0.2145, "step": 31445 }, { "epoch": 0.6814291595345915, "grad_norm": 0.7117305397987366, "learning_rate": 4.60387723677434e-06, "loss": 0.1389, "step": 31450 }, { "epoch": 0.6815374948540723, "grad_norm": 1.401376724243164, "learning_rate": 4.601012134616793e-06, "loss": 0.201, "step": 31455 }, { "epoch": 0.6816458301735532, "grad_norm": 1.551045536994934, "learning_rate": 4.598147657851111e-06, "loss": 0.2292, "step": 31460 }, { "epoch": 0.681754165493034, "grad_norm": 1.5077987909317017, "learning_rate": 4.595283806809105e-06, "loss": 0.1831, "step": 31465 }, { "epoch": 0.6818625008125149, "grad_norm": 1.0404939651489258, "learning_rate": 4.592420581822506e-06, "loss": 0.1896, "step": 31470 }, { "epoch": 0.6819708361319957, "grad_norm": 0.8272004127502441, "learning_rate": 4.589557983222979e-06, "loss": 0.107, "step": 31475 }, { "epoch": 0.6820791714514766, "grad_norm": 1.3252570629119873, "learning_rate": 4.586696011342111e-06, "loss": 0.1188, "step": 31480 }, { "epoch": 0.6821875067709575, "grad_norm": 0.634502112865448, "learning_rate": 4.583834666511412e-06, "loss": 0.2531, "step": 31485 }, { "epoch": 0.6822958420904384, "grad_norm": 1.2221810817718506, "learning_rate": 4.580973949062339e-06, "loss": 0.1713, "step": 31490 }, { "epoch": 0.6824041774099192, "grad_norm": 2.280965566635132, "learning_rate": 4.578113859326255e-06, "loss": 0.2199, "step": 31495 }, { "epoch": 0.6825125127294001, "grad_norm": 1.3656903505325317, "learning_rate": 4.575254397634463e-06, "loss": 0.0866, "step": 31500 }, { "epoch": 0.6826208480488809, "grad_norm": 2.799903154373169, "learning_rate": 4.572395564318184e-06, "loss": 0.1336, "step": 31505 }, { "epoch": 0.6827291833683617, "grad_norm": 0.9039177298545837, "learning_rate": 4.569537359708576e-06, "loss": 0.1041, "step": 31510 }, { "epoch": 0.6828375186878426, "grad_norm": 1.4366469383239746, "learning_rate": 4.566679784136717e-06, "loss": 0.1415, "step": 31515 }, { "epoch": 0.6829458540073234, "grad_norm": 1.1505141258239746, "learning_rate": 4.56382283793361e-06, "loss": 0.1305, "step": 31520 }, { "epoch": 0.6830541893268043, "grad_norm": 1.8990837335586548, "learning_rate": 4.560966521430197e-06, "loss": 0.1746, "step": 31525 }, { "epoch": 0.6831625246462851, "grad_norm": 1.0118402242660522, "learning_rate": 4.5581108349573325e-06, "loss": 0.0978, "step": 31530 }, { "epoch": 0.6832708599657661, "grad_norm": 1.7631759643554688, "learning_rate": 4.555255778845807e-06, "loss": 0.2551, "step": 31535 }, { "epoch": 0.6833791952852469, "grad_norm": 1.310581088066101, "learning_rate": 4.552401353426339e-06, "loss": 0.1727, "step": 31540 }, { "epoch": 0.6834875306047278, "grad_norm": 2.742238998413086, "learning_rate": 4.549547559029571e-06, "loss": 0.2171, "step": 31545 }, { "epoch": 0.6835958659242086, "grad_norm": 1.6400312185287476, "learning_rate": 4.546694395986072e-06, "loss": 0.2, "step": 31550 }, { "epoch": 0.6837042012436895, "grad_norm": 1.3848406076431274, "learning_rate": 4.543841864626332e-06, "loss": 0.1414, "step": 31555 }, { "epoch": 0.6838125365631703, "grad_norm": 1.5587974786758423, "learning_rate": 4.540989965280784e-06, "loss": 0.1206, "step": 31560 }, { "epoch": 0.6839208718826512, "grad_norm": 1.9025362730026245, "learning_rate": 4.538138698279767e-06, "loss": 0.1455, "step": 31565 }, { "epoch": 0.684029207202132, "grad_norm": 1.460448980331421, "learning_rate": 4.535288063953568e-06, "loss": 0.1745, "step": 31570 }, { "epoch": 0.6841375425216129, "grad_norm": 1.950728416442871, "learning_rate": 4.5324380626323815e-06, "loss": 0.0946, "step": 31575 }, { "epoch": 0.6842458778410938, "grad_norm": 1.7626079320907593, "learning_rate": 4.529588694646342e-06, "loss": 0.1998, "step": 31580 }, { "epoch": 0.6843542131605747, "grad_norm": 1.677567481994629, "learning_rate": 4.526739960325508e-06, "loss": 0.2279, "step": 31585 }, { "epoch": 0.6844625484800555, "grad_norm": 2.092221975326538, "learning_rate": 4.523891859999857e-06, "loss": 0.2124, "step": 31590 }, { "epoch": 0.6845708837995363, "grad_norm": 1.3344464302062988, "learning_rate": 4.521044393999306e-06, "loss": 0.1973, "step": 31595 }, { "epoch": 0.6846792191190172, "grad_norm": 1.178155541419983, "learning_rate": 4.518197562653682e-06, "loss": 0.107, "step": 31600 }, { "epoch": 0.684787554438498, "grad_norm": 1.2037689685821533, "learning_rate": 4.515351366292758e-06, "loss": 0.3445, "step": 31605 }, { "epoch": 0.6848958897579789, "grad_norm": 1.3782567977905273, "learning_rate": 4.512505805246215e-06, "loss": 0.1876, "step": 31610 }, { "epoch": 0.6850042250774597, "grad_norm": 2.358546495437622, "learning_rate": 4.509660879843674e-06, "loss": 0.2569, "step": 31615 }, { "epoch": 0.6851125603969406, "grad_norm": 1.2803131341934204, "learning_rate": 4.506816590414671e-06, "loss": 0.1869, "step": 31620 }, { "epoch": 0.6852208957164214, "grad_norm": 1.098071575164795, "learning_rate": 4.503972937288683e-06, "loss": 0.1666, "step": 31625 }, { "epoch": 0.6853292310359024, "grad_norm": 1.4021965265274048, "learning_rate": 4.5011299207950955e-06, "loss": 0.141, "step": 31630 }, { "epoch": 0.6854375663553832, "grad_norm": 1.02572762966156, "learning_rate": 4.498287541263234e-06, "loss": 0.2212, "step": 31635 }, { "epoch": 0.6855459016748641, "grad_norm": 1.2525776624679565, "learning_rate": 4.495445799022349e-06, "loss": 0.1187, "step": 31640 }, { "epoch": 0.6856542369943449, "grad_norm": 1.6922647953033447, "learning_rate": 4.492604694401606e-06, "loss": 0.1453, "step": 31645 }, { "epoch": 0.6857625723138258, "grad_norm": 1.2559157609939575, "learning_rate": 4.489764227730112e-06, "loss": 0.169, "step": 31650 }, { "epoch": 0.6858709076333066, "grad_norm": 0.832840621471405, "learning_rate": 4.486924399336885e-06, "loss": 0.2115, "step": 31655 }, { "epoch": 0.6859792429527874, "grad_norm": 1.7578495740890503, "learning_rate": 4.484085209550884e-06, "loss": 0.1483, "step": 31660 }, { "epoch": 0.6860875782722683, "grad_norm": 0.7921032905578613, "learning_rate": 4.48124665870098e-06, "loss": 0.1304, "step": 31665 }, { "epoch": 0.6861959135917491, "grad_norm": 1.9156357049942017, "learning_rate": 4.478408747115983e-06, "loss": 0.2024, "step": 31670 }, { "epoch": 0.68630424891123, "grad_norm": 2.0430407524108887, "learning_rate": 4.475571475124615e-06, "loss": 0.1685, "step": 31675 }, { "epoch": 0.6864125842307109, "grad_norm": 1.4730257987976074, "learning_rate": 4.472734843055536e-06, "loss": 0.1701, "step": 31680 }, { "epoch": 0.6865209195501918, "grad_norm": 1.7668935060501099, "learning_rate": 4.469898851237332e-06, "loss": 0.1523, "step": 31685 }, { "epoch": 0.6866292548696726, "grad_norm": 0.7875220775604248, "learning_rate": 4.4670634999985e-06, "loss": 0.1688, "step": 31690 }, { "epoch": 0.6867375901891535, "grad_norm": 1.5589399337768555, "learning_rate": 4.464228789667482e-06, "loss": 0.1605, "step": 31695 }, { "epoch": 0.6868459255086343, "grad_norm": 2.7213613986968994, "learning_rate": 4.4613947205726295e-06, "loss": 0.1925, "step": 31700 }, { "epoch": 0.6869542608281152, "grad_norm": 1.4757397174835205, "learning_rate": 4.458561293042234e-06, "loss": 0.1965, "step": 31705 }, { "epoch": 0.687062596147596, "grad_norm": 1.4114601612091064, "learning_rate": 4.455728507404499e-06, "loss": 0.264, "step": 31710 }, { "epoch": 0.6871709314670769, "grad_norm": 0.8820356726646423, "learning_rate": 4.452896363987566e-06, "loss": 0.1246, "step": 31715 }, { "epoch": 0.6872792667865577, "grad_norm": 2.091348171234131, "learning_rate": 4.4500648631194936e-06, "loss": 0.1329, "step": 31720 }, { "epoch": 0.6873876021060387, "grad_norm": 2.036198616027832, "learning_rate": 4.447234005128261e-06, "loss": 0.2278, "step": 31725 }, { "epoch": 0.6874959374255195, "grad_norm": 1.6280356645584106, "learning_rate": 4.444403790341797e-06, "loss": 0.1781, "step": 31730 }, { "epoch": 0.6876042727450004, "grad_norm": 1.6264773607254028, "learning_rate": 4.441574219087926e-06, "loss": 0.1718, "step": 31735 }, { "epoch": 0.6877126080644812, "grad_norm": 1.4500190019607544, "learning_rate": 4.438745291694422e-06, "loss": 0.1656, "step": 31740 }, { "epoch": 0.687820943383962, "grad_norm": 1.7671889066696167, "learning_rate": 4.435917008488963e-06, "loss": 0.2346, "step": 31745 }, { "epoch": 0.6879292787034429, "grad_norm": 1.5034608840942383, "learning_rate": 4.433089369799173e-06, "loss": 0.1708, "step": 31750 }, { "epoch": 0.6880376140229237, "grad_norm": 2.3742146492004395, "learning_rate": 4.430262375952588e-06, "loss": 0.1724, "step": 31755 }, { "epoch": 0.6881459493424046, "grad_norm": 1.356520652770996, "learning_rate": 4.427436027276667e-06, "loss": 0.1985, "step": 31760 }, { "epoch": 0.6882542846618854, "grad_norm": 1.4869657754898071, "learning_rate": 4.4246103240988095e-06, "loss": 0.1913, "step": 31765 }, { "epoch": 0.6883626199813663, "grad_norm": 1.4701282978057861, "learning_rate": 4.421785266746323e-06, "loss": 0.1563, "step": 31770 }, { "epoch": 0.6884709553008472, "grad_norm": 1.4962067604064941, "learning_rate": 4.418960855546451e-06, "loss": 0.2103, "step": 31775 }, { "epoch": 0.6885792906203281, "grad_norm": 2.6620776653289795, "learning_rate": 4.4161370908263616e-06, "loss": 0.1538, "step": 31780 }, { "epoch": 0.6886876259398089, "grad_norm": 0.7437504529953003, "learning_rate": 4.413313972913146e-06, "loss": 0.1926, "step": 31785 }, { "epoch": 0.6887959612592898, "grad_norm": 2.4823896884918213, "learning_rate": 4.410491502133819e-06, "loss": 0.1843, "step": 31790 }, { "epoch": 0.6889042965787706, "grad_norm": 1.2599791288375854, "learning_rate": 4.407669678815316e-06, "loss": 0.2222, "step": 31795 }, { "epoch": 0.6890126318982515, "grad_norm": 1.553673267364502, "learning_rate": 4.4048485032845125e-06, "loss": 0.1386, "step": 31800 }, { "epoch": 0.6891209672177323, "grad_norm": 1.9810267686843872, "learning_rate": 4.402027975868191e-06, "loss": 0.1599, "step": 31805 }, { "epoch": 0.6892293025372132, "grad_norm": 1.4815255403518677, "learning_rate": 4.3992080968930725e-06, "loss": 0.1738, "step": 31810 }, { "epoch": 0.689337637856694, "grad_norm": 0.952785313129425, "learning_rate": 4.396388866685794e-06, "loss": 0.2197, "step": 31815 }, { "epoch": 0.6894459731761748, "grad_norm": 1.0247548818588257, "learning_rate": 4.3935702855729266e-06, "loss": 0.1693, "step": 31820 }, { "epoch": 0.6895543084956558, "grad_norm": 1.1807467937469482, "learning_rate": 4.3907523538809505e-06, "loss": 0.1466, "step": 31825 }, { "epoch": 0.6896626438151366, "grad_norm": 1.6717149019241333, "learning_rate": 4.387935071936295e-06, "loss": 0.1757, "step": 31830 }, { "epoch": 0.6897709791346175, "grad_norm": 1.8586204051971436, "learning_rate": 4.3851184400652916e-06, "loss": 0.1467, "step": 31835 }, { "epoch": 0.6898793144540983, "grad_norm": 1.1955646276474, "learning_rate": 4.382302458594203e-06, "loss": 0.1471, "step": 31840 }, { "epoch": 0.6899876497735792, "grad_norm": 1.965441346168518, "learning_rate": 4.379487127849225e-06, "loss": 0.1425, "step": 31845 }, { "epoch": 0.69009598509306, "grad_norm": 1.7634892463684082, "learning_rate": 4.376672448156465e-06, "loss": 0.194, "step": 31850 }, { "epoch": 0.6902043204125409, "grad_norm": 1.82509446144104, "learning_rate": 4.373858419841967e-06, "loss": 0.2487, "step": 31855 }, { "epoch": 0.6903126557320217, "grad_norm": 1.530027985572815, "learning_rate": 4.371045043231688e-06, "loss": 0.1911, "step": 31860 }, { "epoch": 0.6904209910515026, "grad_norm": 1.7556915283203125, "learning_rate": 4.368232318651523e-06, "loss": 0.2155, "step": 31865 }, { "epoch": 0.6905293263709835, "grad_norm": 0.9130792021751404, "learning_rate": 4.365420246427276e-06, "loss": 0.1366, "step": 31870 }, { "epoch": 0.6906376616904644, "grad_norm": 1.421959638595581, "learning_rate": 4.362608826884688e-06, "loss": 0.1558, "step": 31875 }, { "epoch": 0.6907459970099452, "grad_norm": 0.5702622532844543, "learning_rate": 4.359798060349423e-06, "loss": 0.1245, "step": 31880 }, { "epoch": 0.6908543323294261, "grad_norm": 1.4506235122680664, "learning_rate": 4.356987947147059e-06, "loss": 0.1617, "step": 31885 }, { "epoch": 0.6909626676489069, "grad_norm": 1.473156213760376, "learning_rate": 4.354178487603111e-06, "loss": 0.2946, "step": 31890 }, { "epoch": 0.6910710029683877, "grad_norm": 0.9649934768676758, "learning_rate": 4.351369682043009e-06, "loss": 0.2023, "step": 31895 }, { "epoch": 0.6911793382878686, "grad_norm": 1.7576711177825928, "learning_rate": 4.348561530792116e-06, "loss": 0.1542, "step": 31900 }, { "epoch": 0.6912876736073494, "grad_norm": 1.3657249212265015, "learning_rate": 4.3457540341757075e-06, "loss": 0.1788, "step": 31905 }, { "epoch": 0.6913960089268303, "grad_norm": 1.9018319845199585, "learning_rate": 4.342947192518997e-06, "loss": 0.1768, "step": 31910 }, { "epoch": 0.6915043442463111, "grad_norm": 3.120879650115967, "learning_rate": 4.3401410061471085e-06, "loss": 0.093, "step": 31915 }, { "epoch": 0.6916126795657921, "grad_norm": 1.0144447088241577, "learning_rate": 4.3373354753850985e-06, "loss": 0.1768, "step": 31920 }, { "epoch": 0.6917210148852729, "grad_norm": 1.404564619064331, "learning_rate": 4.334530600557951e-06, "loss": 0.1943, "step": 31925 }, { "epoch": 0.6918293502047538, "grad_norm": 1.6230955123901367, "learning_rate": 4.331726381990562e-06, "loss": 0.1209, "step": 31930 }, { "epoch": 0.6919376855242346, "grad_norm": 1.7849318981170654, "learning_rate": 4.3289228200077634e-06, "loss": 0.2031, "step": 31935 }, { "epoch": 0.6920460208437155, "grad_norm": 1.7494903802871704, "learning_rate": 4.3261199149343e-06, "loss": 0.2239, "step": 31940 }, { "epoch": 0.6921543561631963, "grad_norm": 2.0186574459075928, "learning_rate": 4.323317667094854e-06, "loss": 0.2385, "step": 31945 }, { "epoch": 0.6922626914826772, "grad_norm": 1.6869252920150757, "learning_rate": 4.320516076814016e-06, "loss": 0.2294, "step": 31950 }, { "epoch": 0.692371026802158, "grad_norm": 1.9890265464782715, "learning_rate": 4.317715144416314e-06, "loss": 0.1743, "step": 31955 }, { "epoch": 0.6924793621216389, "grad_norm": 1.8636956214904785, "learning_rate": 4.31491487022619e-06, "loss": 0.2306, "step": 31960 }, { "epoch": 0.6925876974411197, "grad_norm": 1.1930440664291382, "learning_rate": 4.312115254568019e-06, "loss": 0.1581, "step": 31965 }, { "epoch": 0.6926960327606007, "grad_norm": 1.7267612218856812, "learning_rate": 4.309316297766088e-06, "loss": 0.226, "step": 31970 }, { "epoch": 0.6928043680800815, "grad_norm": 1.204607605934143, "learning_rate": 4.306518000144616e-06, "loss": 0.1993, "step": 31975 }, { "epoch": 0.6929127033995623, "grad_norm": 1.761127233505249, "learning_rate": 4.3037203620277504e-06, "loss": 0.1873, "step": 31980 }, { "epoch": 0.6930210387190432, "grad_norm": 1.0057765245437622, "learning_rate": 4.300923383739546e-06, "loss": 0.1583, "step": 31985 }, { "epoch": 0.693129374038524, "grad_norm": 1.5211783647537231, "learning_rate": 4.298127065603999e-06, "loss": 0.1202, "step": 31990 }, { "epoch": 0.6932377093580049, "grad_norm": 1.4644174575805664, "learning_rate": 4.295331407945014e-06, "loss": 0.1542, "step": 31995 }, { "epoch": 0.6933460446774857, "grad_norm": 1.4447308778762817, "learning_rate": 4.292536411086433e-06, "loss": 0.1806, "step": 32000 }, { "epoch": 0.6934543799969666, "grad_norm": 1.9322067499160767, "learning_rate": 4.289742075352012e-06, "loss": 0.2083, "step": 32005 }, { "epoch": 0.6935627153164474, "grad_norm": 1.539610743522644, "learning_rate": 4.2869484010654275e-06, "loss": 0.2007, "step": 32010 }, { "epoch": 0.6936710506359284, "grad_norm": 1.1972541809082031, "learning_rate": 4.2841553885502885e-06, "loss": 0.1338, "step": 32015 }, { "epoch": 0.6937793859554092, "grad_norm": 0.7966684699058533, "learning_rate": 4.281363038130126e-06, "loss": 0.1899, "step": 32020 }, { "epoch": 0.6938877212748901, "grad_norm": 0.9386897087097168, "learning_rate": 4.278571350128392e-06, "loss": 0.1894, "step": 32025 }, { "epoch": 0.6939960565943709, "grad_norm": 1.3746466636657715, "learning_rate": 4.275780324868458e-06, "loss": 0.2236, "step": 32030 }, { "epoch": 0.6941043919138518, "grad_norm": 1.0512347221374512, "learning_rate": 4.272989962673627e-06, "loss": 0.2308, "step": 32035 }, { "epoch": 0.6942127272333326, "grad_norm": 1.1245869398117065, "learning_rate": 4.2702002638671195e-06, "loss": 0.1523, "step": 32040 }, { "epoch": 0.6943210625528135, "grad_norm": 1.6534463167190552, "learning_rate": 4.267411228772074e-06, "loss": 0.2655, "step": 32045 }, { "epoch": 0.6944293978722943, "grad_norm": 1.0164227485656738, "learning_rate": 4.264622857711569e-06, "loss": 0.1609, "step": 32050 }, { "epoch": 0.6945377331917751, "grad_norm": 1.7957524061203003, "learning_rate": 4.261835151008585e-06, "loss": 0.2466, "step": 32055 }, { "epoch": 0.694646068511256, "grad_norm": 1.049125075340271, "learning_rate": 4.2590481089860444e-06, "loss": 0.1285, "step": 32060 }, { "epoch": 0.6947544038307369, "grad_norm": 1.740118145942688, "learning_rate": 4.256261731966775e-06, "loss": 0.1732, "step": 32065 }, { "epoch": 0.6948627391502178, "grad_norm": 2.252241611480713, "learning_rate": 4.253476020273549e-06, "loss": 0.26, "step": 32070 }, { "epoch": 0.6949710744696986, "grad_norm": 1.6274300813674927, "learning_rate": 4.250690974229044e-06, "loss": 0.1537, "step": 32075 }, { "epoch": 0.6950794097891795, "grad_norm": 1.0246831178665161, "learning_rate": 4.2479065941558604e-06, "loss": 0.181, "step": 32080 }, { "epoch": 0.6951877451086603, "grad_norm": 0.870398759841919, "learning_rate": 4.245122880376535e-06, "loss": 0.189, "step": 32085 }, { "epoch": 0.6952960804281412, "grad_norm": 1.1137088537216187, "learning_rate": 4.242339833213513e-06, "loss": 0.1493, "step": 32090 }, { "epoch": 0.695404415747622, "grad_norm": 1.3945046663284302, "learning_rate": 4.239557452989175e-06, "loss": 0.1804, "step": 32095 }, { "epoch": 0.6955127510671029, "grad_norm": 0.8689361214637756, "learning_rate": 4.236775740025811e-06, "loss": 0.138, "step": 32100 }, { "epoch": 0.6956210863865837, "grad_norm": 1.1836280822753906, "learning_rate": 4.2339946946456475e-06, "loss": 0.145, "step": 32105 }, { "epoch": 0.6957294217060647, "grad_norm": 2.0091235637664795, "learning_rate": 4.231214317170821e-06, "loss": 0.1344, "step": 32110 }, { "epoch": 0.6958377570255455, "grad_norm": 1.7762279510498047, "learning_rate": 4.2284346079234e-06, "loss": 0.2378, "step": 32115 }, { "epoch": 0.6959460923450264, "grad_norm": 1.3298864364624023, "learning_rate": 4.225655567225374e-06, "loss": 0.1615, "step": 32120 }, { "epoch": 0.6960544276645072, "grad_norm": 1.6172592639923096, "learning_rate": 4.222877195398648e-06, "loss": 0.1827, "step": 32125 }, { "epoch": 0.696162762983988, "grad_norm": 1.2765270471572876, "learning_rate": 4.22009949276506e-06, "loss": 0.1001, "step": 32130 }, { "epoch": 0.6962710983034689, "grad_norm": 1.8232845067977905, "learning_rate": 4.217322459646361e-06, "loss": 0.1359, "step": 32135 }, { "epoch": 0.6963794336229497, "grad_norm": 2.179821252822876, "learning_rate": 4.214546096364234e-06, "loss": 0.2326, "step": 32140 }, { "epoch": 0.6964877689424306, "grad_norm": 3.0307698249816895, "learning_rate": 4.211770403240271e-06, "loss": 0.1699, "step": 32145 }, { "epoch": 0.6965961042619114, "grad_norm": 1.5550448894500732, "learning_rate": 4.2089953805960025e-06, "loss": 0.1611, "step": 32150 }, { "epoch": 0.6967044395813923, "grad_norm": 0.38137152791023254, "learning_rate": 4.206221028752867e-06, "loss": 0.1851, "step": 32155 }, { "epoch": 0.6968127749008732, "grad_norm": 1.4701292514801025, "learning_rate": 4.203447348032234e-06, "loss": 0.1825, "step": 32160 }, { "epoch": 0.6969211102203541, "grad_norm": 1.8239693641662598, "learning_rate": 4.2006743387553985e-06, "loss": 0.1671, "step": 32165 }, { "epoch": 0.6970294455398349, "grad_norm": 1.1469539403915405, "learning_rate": 4.197902001243561e-06, "loss": 0.1849, "step": 32170 }, { "epoch": 0.6971377808593158, "grad_norm": 1.612527847290039, "learning_rate": 4.1951303358178665e-06, "loss": 0.1163, "step": 32175 }, { "epoch": 0.6972461161787966, "grad_norm": 1.8412649631500244, "learning_rate": 4.192359342799361e-06, "loss": 0.2555, "step": 32180 }, { "epoch": 0.6973544514982775, "grad_norm": 1.556705117225647, "learning_rate": 4.189589022509032e-06, "loss": 0.2158, "step": 32185 }, { "epoch": 0.6974627868177583, "grad_norm": 1.3700306415557861, "learning_rate": 4.186819375267771e-06, "loss": 0.1497, "step": 32190 }, { "epoch": 0.6975711221372392, "grad_norm": 1.9225122928619385, "learning_rate": 4.184050401396407e-06, "loss": 0.2404, "step": 32195 }, { "epoch": 0.69767945745672, "grad_norm": 0.9704068899154663, "learning_rate": 4.181282101215678e-06, "loss": 0.1056, "step": 32200 }, { "epoch": 0.6977877927762008, "grad_norm": 1.2008475065231323, "learning_rate": 4.178514475046256e-06, "loss": 0.1514, "step": 32205 }, { "epoch": 0.6978961280956818, "grad_norm": 1.2930909395217896, "learning_rate": 4.175747523208723e-06, "loss": 0.2377, "step": 32210 }, { "epoch": 0.6980044634151626, "grad_norm": 0.978428840637207, "learning_rate": 4.172981246023592e-06, "loss": 0.1432, "step": 32215 }, { "epoch": 0.6981127987346435, "grad_norm": 1.4101523160934448, "learning_rate": 4.170215643811299e-06, "loss": 0.1747, "step": 32220 }, { "epoch": 0.6982211340541243, "grad_norm": 0.11814384907484055, "learning_rate": 4.16745071689219e-06, "loss": 0.1921, "step": 32225 }, { "epoch": 0.6983294693736052, "grad_norm": 1.3355265855789185, "learning_rate": 4.164686465586546e-06, "loss": 0.2014, "step": 32230 }, { "epoch": 0.698437804693086, "grad_norm": 1.7794384956359863, "learning_rate": 4.16192289021456e-06, "loss": 0.1707, "step": 32235 }, { "epoch": 0.6985461400125669, "grad_norm": 1.8469700813293457, "learning_rate": 4.159159991096355e-06, "loss": 0.2033, "step": 32240 }, { "epoch": 0.6986544753320477, "grad_norm": 1.3041424751281738, "learning_rate": 4.156397768551971e-06, "loss": 0.1046, "step": 32245 }, { "epoch": 0.6987628106515286, "grad_norm": 1.13532555103302, "learning_rate": 4.153636222901364e-06, "loss": 0.151, "step": 32250 }, { "epoch": 0.6988711459710095, "grad_norm": 1.3004199266433716, "learning_rate": 4.150875354464421e-06, "loss": 0.1171, "step": 32255 }, { "epoch": 0.6989794812904904, "grad_norm": 1.4813250303268433, "learning_rate": 4.1481151635609495e-06, "loss": 0.1634, "step": 32260 }, { "epoch": 0.6990878166099712, "grad_norm": 2.3252177238464355, "learning_rate": 4.145355650510679e-06, "loss": 0.1336, "step": 32265 }, { "epoch": 0.6991961519294521, "grad_norm": 1.2444297075271606, "learning_rate": 4.1425968156332485e-06, "loss": 0.108, "step": 32270 }, { "epoch": 0.6993044872489329, "grad_norm": 0.9276859760284424, "learning_rate": 4.1398386592482386e-06, "loss": 0.1787, "step": 32275 }, { "epoch": 0.6994128225684138, "grad_norm": 1.465333342552185, "learning_rate": 4.1370811816751335e-06, "loss": 0.2455, "step": 32280 }, { "epoch": 0.6995211578878946, "grad_norm": 1.2159264087677002, "learning_rate": 4.134324383233344e-06, "loss": 0.1758, "step": 32285 }, { "epoch": 0.6996294932073754, "grad_norm": 2.0410709381103516, "learning_rate": 4.1315682642422095e-06, "loss": 0.1782, "step": 32290 }, { "epoch": 0.6997378285268563, "grad_norm": 1.4831738471984863, "learning_rate": 4.128812825020981e-06, "loss": 0.171, "step": 32295 }, { "epoch": 0.6998461638463371, "grad_norm": 2.067596435546875, "learning_rate": 4.126058065888837e-06, "loss": 0.2502, "step": 32300 }, { "epoch": 0.6999544991658181, "grad_norm": 2.002699136734009, "learning_rate": 4.123303987164869e-06, "loss": 0.1561, "step": 32305 }, { "epoch": 0.7000628344852989, "grad_norm": 1.4832921028137207, "learning_rate": 4.120550589168108e-06, "loss": 0.188, "step": 32310 }, { "epoch": 0.7001711698047798, "grad_norm": 1.465903401374817, "learning_rate": 4.117797872217488e-06, "loss": 0.2197, "step": 32315 }, { "epoch": 0.7002795051242606, "grad_norm": 1.136555790901184, "learning_rate": 4.115045836631865e-06, "loss": 0.114, "step": 32320 }, { "epoch": 0.7003878404437415, "grad_norm": 1.7552059888839722, "learning_rate": 4.1122944827300286e-06, "loss": 0.1131, "step": 32325 }, { "epoch": 0.7004961757632223, "grad_norm": 1.6255253553390503, "learning_rate": 4.109543810830675e-06, "loss": 0.2453, "step": 32330 }, { "epoch": 0.7006045110827032, "grad_norm": 2.242905855178833, "learning_rate": 4.106793821252435e-06, "loss": 0.2185, "step": 32335 }, { "epoch": 0.700712846402184, "grad_norm": 1.7932144403457642, "learning_rate": 4.104044514313847e-06, "loss": 0.1995, "step": 32340 }, { "epoch": 0.7008211817216649, "grad_norm": 1.893103003501892, "learning_rate": 4.1012958903333855e-06, "loss": 0.1512, "step": 32345 }, { "epoch": 0.7009295170411457, "grad_norm": 1.0074228048324585, "learning_rate": 4.098547949629428e-06, "loss": 0.1328, "step": 32350 }, { "epoch": 0.7010378523606267, "grad_norm": 1.7946186065673828, "learning_rate": 4.095800692520287e-06, "loss": 0.1901, "step": 32355 }, { "epoch": 0.7011461876801075, "grad_norm": 2.456899404525757, "learning_rate": 4.093054119324195e-06, "loss": 0.2179, "step": 32360 }, { "epoch": 0.7012545229995883, "grad_norm": 1.7478829622268677, "learning_rate": 4.090308230359292e-06, "loss": 0.1081, "step": 32365 }, { "epoch": 0.7013628583190692, "grad_norm": 0.76261967420578, "learning_rate": 4.087563025943658e-06, "loss": 0.1603, "step": 32370 }, { "epoch": 0.70147119363855, "grad_norm": 1.5606834888458252, "learning_rate": 4.084818506395276e-06, "loss": 0.1481, "step": 32375 }, { "epoch": 0.7015795289580309, "grad_norm": 1.516750454902649, "learning_rate": 4.0820746720320635e-06, "loss": 0.1619, "step": 32380 }, { "epoch": 0.7016878642775117, "grad_norm": 2.293147563934326, "learning_rate": 4.079331523171845e-06, "loss": 0.2001, "step": 32385 }, { "epoch": 0.7017961995969926, "grad_norm": 1.101440668106079, "learning_rate": 4.076589060132384e-06, "loss": 0.2342, "step": 32390 }, { "epoch": 0.7019045349164734, "grad_norm": 1.7936453819274902, "learning_rate": 4.073847283231343e-06, "loss": 0.146, "step": 32395 }, { "epoch": 0.7020128702359544, "grad_norm": 1.5110336542129517, "learning_rate": 4.0711061927863205e-06, "loss": 0.1633, "step": 32400 }, { "epoch": 0.7021212055554352, "grad_norm": 1.2529813051223755, "learning_rate": 4.068365789114834e-06, "loss": 0.1571, "step": 32405 }, { "epoch": 0.7022295408749161, "grad_norm": 2.0263283252716064, "learning_rate": 4.065626072534311e-06, "loss": 0.1818, "step": 32410 }, { "epoch": 0.7023378761943969, "grad_norm": 1.5387929677963257, "learning_rate": 4.062887043362116e-06, "loss": 0.2201, "step": 32415 }, { "epoch": 0.7024462115138778, "grad_norm": 0.5625081062316895, "learning_rate": 4.060148701915514e-06, "loss": 0.1227, "step": 32420 }, { "epoch": 0.7025545468333586, "grad_norm": 1.3546719551086426, "learning_rate": 4.057411048511709e-06, "loss": 0.1052, "step": 32425 }, { "epoch": 0.7026628821528395, "grad_norm": 1.5546399354934692, "learning_rate": 4.0546740834678125e-06, "loss": 0.0819, "step": 32430 }, { "epoch": 0.7027712174723203, "grad_norm": 1.0243158340454102, "learning_rate": 4.051937807100864e-06, "loss": 0.2093, "step": 32435 }, { "epoch": 0.7028795527918011, "grad_norm": 1.6240657567977905, "learning_rate": 4.049202219727815e-06, "loss": 0.0897, "step": 32440 }, { "epoch": 0.702987888111282, "grad_norm": 0.7391762733459473, "learning_rate": 4.0464673216655516e-06, "loss": 0.193, "step": 32445 }, { "epoch": 0.703096223430763, "grad_norm": 2.0923895835876465, "learning_rate": 4.04373311323086e-06, "loss": 0.2195, "step": 32450 }, { "epoch": 0.7032045587502438, "grad_norm": 1.5781034231185913, "learning_rate": 4.040999594740463e-06, "loss": 0.1809, "step": 32455 }, { "epoch": 0.7033128940697246, "grad_norm": 1.946305513381958, "learning_rate": 4.038266766511e-06, "loss": 0.1576, "step": 32460 }, { "epoch": 0.7034212293892055, "grad_norm": 0.5569218397140503, "learning_rate": 4.035534628859021e-06, "loss": 0.1083, "step": 32465 }, { "epoch": 0.7035295647086863, "grad_norm": 1.73417067527771, "learning_rate": 4.0328031821010126e-06, "loss": 0.201, "step": 32470 }, { "epoch": 0.7036379000281672, "grad_norm": 2.2998504638671875, "learning_rate": 4.030072426553363e-06, "loss": 0.1727, "step": 32475 }, { "epoch": 0.703746235347648, "grad_norm": 1.4882495403289795, "learning_rate": 4.027342362532396e-06, "loss": 0.147, "step": 32480 }, { "epoch": 0.7038545706671289, "grad_norm": 0.8415104150772095, "learning_rate": 4.024612990354347e-06, "loss": 0.2247, "step": 32485 }, { "epoch": 0.7039629059866097, "grad_norm": 1.3304685354232788, "learning_rate": 4.021884310335368e-06, "loss": 0.1367, "step": 32490 }, { "epoch": 0.7040712413060907, "grad_norm": 1.3409501314163208, "learning_rate": 4.01915632279154e-06, "loss": 0.1367, "step": 32495 }, { "epoch": 0.7041795766255715, "grad_norm": 1.6499502658843994, "learning_rate": 4.016429028038858e-06, "loss": 0.1839, "step": 32500 }, { "epoch": 0.7042879119450524, "grad_norm": 1.580083966255188, "learning_rate": 4.013702426393242e-06, "loss": 0.1205, "step": 32505 }, { "epoch": 0.7043962472645332, "grad_norm": 1.6143391132354736, "learning_rate": 4.010976518170523e-06, "loss": 0.1329, "step": 32510 }, { "epoch": 0.704504582584014, "grad_norm": 1.70407235622406, "learning_rate": 4.008251303686462e-06, "loss": 0.1563, "step": 32515 }, { "epoch": 0.7046129179034949, "grad_norm": 1.8650779724121094, "learning_rate": 4.005526783256731e-06, "loss": 0.127, "step": 32520 }, { "epoch": 0.7047212532229757, "grad_norm": 1.7330409288406372, "learning_rate": 4.002802957196922e-06, "loss": 0.2131, "step": 32525 }, { "epoch": 0.7048295885424566, "grad_norm": 1.5156593322753906, "learning_rate": 4.000079825822556e-06, "loss": 0.1544, "step": 32530 }, { "epoch": 0.7049379238619374, "grad_norm": 0.776354193687439, "learning_rate": 3.997357389449059e-06, "loss": 0.17, "step": 32535 }, { "epoch": 0.7050462591814183, "grad_norm": 0.9815037250518799, "learning_rate": 3.994635648391792e-06, "loss": 0.2456, "step": 32540 }, { "epoch": 0.7051545945008992, "grad_norm": 1.6104769706726074, "learning_rate": 3.991914602966019e-06, "loss": 0.2306, "step": 32545 }, { "epoch": 0.7052629298203801, "grad_norm": 1.2232205867767334, "learning_rate": 3.989194253486944e-06, "loss": 0.2179, "step": 32550 }, { "epoch": 0.7053712651398609, "grad_norm": 2.1247966289520264, "learning_rate": 3.986474600269674e-06, "loss": 0.2539, "step": 32555 }, { "epoch": 0.7054796004593418, "grad_norm": 1.799935221672058, "learning_rate": 3.9837556436292345e-06, "loss": 0.2359, "step": 32560 }, { "epoch": 0.7055879357788226, "grad_norm": 1.446844458580017, "learning_rate": 3.981037383880585e-06, "loss": 0.165, "step": 32565 }, { "epoch": 0.7056962710983035, "grad_norm": 1.0475512742996216, "learning_rate": 3.978319821338586e-06, "loss": 0.1634, "step": 32570 }, { "epoch": 0.7058046064177843, "grad_norm": 1.4605021476745605, "learning_rate": 3.9756029563180344e-06, "loss": 0.1394, "step": 32575 }, { "epoch": 0.7059129417372652, "grad_norm": 2.2069175243377686, "learning_rate": 3.972886789133632e-06, "loss": 0.1396, "step": 32580 }, { "epoch": 0.706021277056746, "grad_norm": 1.6767983436584473, "learning_rate": 3.970171320100012e-06, "loss": 0.1044, "step": 32585 }, { "epoch": 0.7061296123762268, "grad_norm": 2.36769437789917, "learning_rate": 3.967456549531714e-06, "loss": 0.1445, "step": 32590 }, { "epoch": 0.7062379476957078, "grad_norm": 1.5510510206222534, "learning_rate": 3.964742477743207e-06, "loss": 0.1138, "step": 32595 }, { "epoch": 0.7063462830151886, "grad_norm": 0.7688974142074585, "learning_rate": 3.962029105048881e-06, "loss": 0.1298, "step": 32600 }, { "epoch": 0.7064546183346695, "grad_norm": 1.0755664110183716, "learning_rate": 3.95931643176303e-06, "loss": 0.1987, "step": 32605 }, { "epoch": 0.7065629536541503, "grad_norm": 1.3690595626831055, "learning_rate": 3.956604458199884e-06, "loss": 0.1157, "step": 32610 }, { "epoch": 0.7066712889736312, "grad_norm": 1.9704742431640625, "learning_rate": 3.953893184673579e-06, "loss": 0.2031, "step": 32615 }, { "epoch": 0.706779624293112, "grad_norm": 1.987392544746399, "learning_rate": 3.95118261149818e-06, "loss": 0.1547, "step": 32620 }, { "epoch": 0.7068879596125929, "grad_norm": 1.1486815214157104, "learning_rate": 3.948472738987661e-06, "loss": 0.0699, "step": 32625 }, { "epoch": 0.7069962949320737, "grad_norm": 2.6389784812927246, "learning_rate": 3.9457635674559266e-06, "loss": 0.1737, "step": 32630 }, { "epoch": 0.7071046302515546, "grad_norm": 1.6843847036361694, "learning_rate": 3.943055097216788e-06, "loss": 0.1487, "step": 32635 }, { "epoch": 0.7072129655710355, "grad_norm": 0.7578485012054443, "learning_rate": 3.9403473285839826e-06, "loss": 0.1531, "step": 32640 }, { "epoch": 0.7073213008905164, "grad_norm": 1.2390422821044922, "learning_rate": 3.93764026187117e-06, "loss": 0.0937, "step": 32645 }, { "epoch": 0.7074296362099972, "grad_norm": 2.172132730484009, "learning_rate": 3.9349338973919135e-06, "loss": 0.2421, "step": 32650 }, { "epoch": 0.7075379715294781, "grad_norm": 2.244351387023926, "learning_rate": 3.9322282354597155e-06, "loss": 0.2005, "step": 32655 }, { "epoch": 0.7076463068489589, "grad_norm": 1.3773869276046753, "learning_rate": 3.929523276387976e-06, "loss": 0.1736, "step": 32660 }, { "epoch": 0.7077546421684398, "grad_norm": 1.625499963760376, "learning_rate": 3.926819020490035e-06, "loss": 0.1729, "step": 32665 }, { "epoch": 0.7078629774879206, "grad_norm": 1.226602554321289, "learning_rate": 3.924115468079129e-06, "loss": 0.23, "step": 32670 }, { "epoch": 0.7079713128074014, "grad_norm": 1.9249975681304932, "learning_rate": 3.921412619468434e-06, "loss": 0.1245, "step": 32675 }, { "epoch": 0.7080796481268823, "grad_norm": 1.4307749271392822, "learning_rate": 3.918710474971026e-06, "loss": 0.2116, "step": 32680 }, { "epoch": 0.7081879834463631, "grad_norm": 1.5358151197433472, "learning_rate": 3.916009034899915e-06, "loss": 0.1724, "step": 32685 }, { "epoch": 0.7082963187658441, "grad_norm": 1.7549854516983032, "learning_rate": 3.913308299568015e-06, "loss": 0.1788, "step": 32690 }, { "epoch": 0.7084046540853249, "grad_norm": 1.1810343265533447, "learning_rate": 3.9106082692881705e-06, "loss": 0.1383, "step": 32695 }, { "epoch": 0.7085129894048058, "grad_norm": 1.4147934913635254, "learning_rate": 3.907908944373142e-06, "loss": 0.0845, "step": 32700 }, { "epoch": 0.7086213247242866, "grad_norm": 1.610969066619873, "learning_rate": 3.9052103251356e-06, "loss": 0.1703, "step": 32705 }, { "epoch": 0.7087296600437675, "grad_norm": 1.1933339834213257, "learning_rate": 3.902512411888145e-06, "loss": 0.1534, "step": 32710 }, { "epoch": 0.7088379953632483, "grad_norm": 1.282333493232727, "learning_rate": 3.899815204943284e-06, "loss": 0.119, "step": 32715 }, { "epoch": 0.7089463306827292, "grad_norm": 1.768905758857727, "learning_rate": 3.897118704613453e-06, "loss": 0.2076, "step": 32720 }, { "epoch": 0.70905466600221, "grad_norm": 1.642155647277832, "learning_rate": 3.894422911210999e-06, "loss": 0.1597, "step": 32725 }, { "epoch": 0.7091630013216909, "grad_norm": 1.4658949375152588, "learning_rate": 3.891727825048186e-06, "loss": 0.1994, "step": 32730 }, { "epoch": 0.7092713366411717, "grad_norm": 0.8692777752876282, "learning_rate": 3.889033446437206e-06, "loss": 0.2434, "step": 32735 }, { "epoch": 0.7093796719606527, "grad_norm": 1.4750455617904663, "learning_rate": 3.886339775690152e-06, "loss": 0.225, "step": 32740 }, { "epoch": 0.7094880072801335, "grad_norm": 1.9054828882217407, "learning_rate": 3.88364681311906e-06, "loss": 0.1372, "step": 32745 }, { "epoch": 0.7095963425996143, "grad_norm": 1.3341606855392456, "learning_rate": 3.880954559035858e-06, "loss": 0.0953, "step": 32750 }, { "epoch": 0.7097046779190952, "grad_norm": 1.3957864046096802, "learning_rate": 3.87826301375241e-06, "loss": 0.1057, "step": 32755 }, { "epoch": 0.709813013238576, "grad_norm": 1.16420316696167, "learning_rate": 3.875572177580489e-06, "loss": 0.1423, "step": 32760 }, { "epoch": 0.7099213485580569, "grad_norm": 1.3519386053085327, "learning_rate": 3.872882050831782e-06, "loss": 0.177, "step": 32765 }, { "epoch": 0.7100296838775377, "grad_norm": 2.0506036281585693, "learning_rate": 3.870192633817911e-06, "loss": 0.2165, "step": 32770 }, { "epoch": 0.7101380191970186, "grad_norm": 1.5465353727340698, "learning_rate": 3.867503926850395e-06, "loss": 0.2268, "step": 32775 }, { "epoch": 0.7102463545164994, "grad_norm": 1.1822516918182373, "learning_rate": 3.864815930240686e-06, "loss": 0.1503, "step": 32780 }, { "epoch": 0.7103546898359804, "grad_norm": 0.8125865459442139, "learning_rate": 3.86212864430014e-06, "loss": 0.1592, "step": 32785 }, { "epoch": 0.7104630251554612, "grad_norm": 1.1257133483886719, "learning_rate": 3.859442069340054e-06, "loss": 0.1265, "step": 32790 }, { "epoch": 0.7105713604749421, "grad_norm": 1.607536792755127, "learning_rate": 3.856756205671617e-06, "loss": 0.1159, "step": 32795 }, { "epoch": 0.7106796957944229, "grad_norm": 1.8908593654632568, "learning_rate": 3.8540710536059445e-06, "loss": 0.1304, "step": 32800 }, { "epoch": 0.7107880311139038, "grad_norm": 1.2588986158370972, "learning_rate": 3.851386613454079e-06, "loss": 0.1691, "step": 32805 }, { "epoch": 0.7108963664333846, "grad_norm": 1.3314684629440308, "learning_rate": 3.848702885526964e-06, "loss": 0.2119, "step": 32810 }, { "epoch": 0.7110047017528655, "grad_norm": 1.8702428340911865, "learning_rate": 3.8460198701354765e-06, "loss": 0.2381, "step": 32815 }, { "epoch": 0.7111130370723463, "grad_norm": 1.536009669303894, "learning_rate": 3.843337567590397e-06, "loss": 0.1552, "step": 32820 }, { "epoch": 0.7112213723918271, "grad_norm": 0.5917944312095642, "learning_rate": 3.840655978202436e-06, "loss": 0.1372, "step": 32825 }, { "epoch": 0.711329707711308, "grad_norm": 1.3272931575775146, "learning_rate": 3.837975102282211e-06, "loss": 0.1796, "step": 32830 }, { "epoch": 0.711438043030789, "grad_norm": 1.5945444107055664, "learning_rate": 3.835294940140263e-06, "loss": 0.1845, "step": 32835 }, { "epoch": 0.7115463783502698, "grad_norm": 0.9624264240264893, "learning_rate": 3.832615492087053e-06, "loss": 0.1346, "step": 32840 }, { "epoch": 0.7116547136697506, "grad_norm": 1.22862708568573, "learning_rate": 3.829936758432946e-06, "loss": 0.1668, "step": 32845 }, { "epoch": 0.7117630489892315, "grad_norm": 1.202701210975647, "learning_rate": 3.827258739488242e-06, "loss": 0.1113, "step": 32850 }, { "epoch": 0.7118713843087123, "grad_norm": 1.129679799079895, "learning_rate": 3.824581435563142e-06, "loss": 0.1345, "step": 32855 }, { "epoch": 0.7119797196281932, "grad_norm": 1.2080214023590088, "learning_rate": 3.821904846967778e-06, "loss": 0.1281, "step": 32860 }, { "epoch": 0.712088054947674, "grad_norm": 1.9414342641830444, "learning_rate": 3.819228974012187e-06, "loss": 0.1819, "step": 32865 }, { "epoch": 0.7121963902671549, "grad_norm": 0.7279384136199951, "learning_rate": 3.816553817006335e-06, "loss": 0.1785, "step": 32870 }, { "epoch": 0.7123047255866357, "grad_norm": 1.4195430278778076, "learning_rate": 3.81387937626009e-06, "loss": 0.1243, "step": 32875 }, { "epoch": 0.7124130609061166, "grad_norm": 1.0584218502044678, "learning_rate": 3.8112056520832563e-06, "loss": 0.2281, "step": 32880 }, { "epoch": 0.7125213962255975, "grad_norm": 1.8841391801834106, "learning_rate": 3.8085326447855353e-06, "loss": 0.1807, "step": 32885 }, { "epoch": 0.7126297315450784, "grad_norm": 1.6954066753387451, "learning_rate": 3.8058603546765593e-06, "loss": 0.1051, "step": 32890 }, { "epoch": 0.7127380668645592, "grad_norm": 2.0314905643463135, "learning_rate": 3.8031887820658776e-06, "loss": 0.1922, "step": 32895 }, { "epoch": 0.71284640218404, "grad_norm": 1.7661113739013672, "learning_rate": 3.800517927262942e-06, "loss": 0.1727, "step": 32900 }, { "epoch": 0.7129547375035209, "grad_norm": 1.3805140256881714, "learning_rate": 3.797847790577142e-06, "loss": 0.1545, "step": 32905 }, { "epoch": 0.7130630728230017, "grad_norm": 1.9206470251083374, "learning_rate": 3.7951783723177614e-06, "loss": 0.1823, "step": 32910 }, { "epoch": 0.7131714081424826, "grad_norm": 2.0224480628967285, "learning_rate": 3.7925096727940236e-06, "loss": 0.1437, "step": 32915 }, { "epoch": 0.7132797434619634, "grad_norm": 1.572511076927185, "learning_rate": 3.7898416923150473e-06, "loss": 0.1129, "step": 32920 }, { "epoch": 0.7133880787814443, "grad_norm": 2.155390977859497, "learning_rate": 3.7871744311898875e-06, "loss": 0.2424, "step": 32925 }, { "epoch": 0.7134964141009252, "grad_norm": 1.4341773986816406, "learning_rate": 3.7845078897274968e-06, "loss": 0.1454, "step": 32930 }, { "epoch": 0.7136047494204061, "grad_norm": 2.3545992374420166, "learning_rate": 3.7818420682367598e-06, "loss": 0.1649, "step": 32935 }, { "epoch": 0.7137130847398869, "grad_norm": 1.2857997417449951, "learning_rate": 3.7791769670264746e-06, "loss": 0.1299, "step": 32940 }, { "epoch": 0.7138214200593678, "grad_norm": 1.8196748495101929, "learning_rate": 3.7765125864053454e-06, "loss": 0.2191, "step": 32945 }, { "epoch": 0.7139297553788486, "grad_norm": 1.4906694889068604, "learning_rate": 3.77384892668201e-06, "loss": 0.2135, "step": 32950 }, { "epoch": 0.7140380906983295, "grad_norm": 1.1555856466293335, "learning_rate": 3.771185988165005e-06, "loss": 0.205, "step": 32955 }, { "epoch": 0.7141464260178103, "grad_norm": 1.6816246509552002, "learning_rate": 3.768523771162799e-06, "loss": 0.1586, "step": 32960 }, { "epoch": 0.7142547613372912, "grad_norm": 1.6743990182876587, "learning_rate": 3.7658622759837626e-06, "loss": 0.1916, "step": 32965 }, { "epoch": 0.714363096656772, "grad_norm": 1.5638009309768677, "learning_rate": 3.763201502936198e-06, "loss": 0.1864, "step": 32970 }, { "epoch": 0.7144714319762528, "grad_norm": 0.7358003854751587, "learning_rate": 3.7605414523283124e-06, "loss": 0.1851, "step": 32975 }, { "epoch": 0.7145797672957338, "grad_norm": 1.1707258224487305, "learning_rate": 3.757882124468225e-06, "loss": 0.2093, "step": 32980 }, { "epoch": 0.7146881026152146, "grad_norm": 1.344482421875, "learning_rate": 3.755223519663994e-06, "loss": 0.1495, "step": 32985 }, { "epoch": 0.7147964379346955, "grad_norm": 1.6856772899627686, "learning_rate": 3.7525656382235675e-06, "loss": 0.2363, "step": 32990 }, { "epoch": 0.7149047732541763, "grad_norm": 1.8752943277359009, "learning_rate": 3.7499084804548293e-06, "loss": 0.1577, "step": 32995 }, { "epoch": 0.7150131085736572, "grad_norm": 1.250411868095398, "learning_rate": 3.7472520466655625e-06, "loss": 0.2346, "step": 33000 }, { "epoch": 0.715121443893138, "grad_norm": 1.1120073795318604, "learning_rate": 3.7445963371634853e-06, "loss": 0.1811, "step": 33005 }, { "epoch": 0.7152297792126189, "grad_norm": 1.1487387418746948, "learning_rate": 3.7419413522562154e-06, "loss": 0.1338, "step": 33010 }, { "epoch": 0.7153381145320997, "grad_norm": 1.4177812337875366, "learning_rate": 3.7392870922512902e-06, "loss": 0.2037, "step": 33015 }, { "epoch": 0.7154464498515806, "grad_norm": 1.5541200637817383, "learning_rate": 3.7366335574561743e-06, "loss": 0.1415, "step": 33020 }, { "epoch": 0.7155547851710615, "grad_norm": 1.8785772323608398, "learning_rate": 3.7339807481782286e-06, "loss": 0.1195, "step": 33025 }, { "epoch": 0.7156631204905424, "grad_norm": 1.4085701704025269, "learning_rate": 3.731328664724755e-06, "loss": 0.2091, "step": 33030 }, { "epoch": 0.7157714558100232, "grad_norm": 2.4932103157043457, "learning_rate": 3.728677307402947e-06, "loss": 0.2111, "step": 33035 }, { "epoch": 0.7158797911295041, "grad_norm": 1.6054482460021973, "learning_rate": 3.7260266765199327e-06, "loss": 0.1389, "step": 33040 }, { "epoch": 0.7159881264489849, "grad_norm": 1.7432574033737183, "learning_rate": 3.723376772382743e-06, "loss": 0.1404, "step": 33045 }, { "epoch": 0.7160964617684658, "grad_norm": 2.56838059425354, "learning_rate": 3.720727595298328e-06, "loss": 0.2239, "step": 33050 }, { "epoch": 0.7162047970879466, "grad_norm": 1.1574759483337402, "learning_rate": 3.71807914557356e-06, "loss": 0.1426, "step": 33055 }, { "epoch": 0.7163131324074274, "grad_norm": 1.8220205307006836, "learning_rate": 3.715431423515217e-06, "loss": 0.1835, "step": 33060 }, { "epoch": 0.7164214677269083, "grad_norm": 1.4691152572631836, "learning_rate": 3.7127844294300043e-06, "loss": 0.2079, "step": 33065 }, { "epoch": 0.7165298030463891, "grad_norm": 1.7595804929733276, "learning_rate": 3.7101381636245283e-06, "loss": 0.1697, "step": 33070 }, { "epoch": 0.7166381383658701, "grad_norm": 1.0814130306243896, "learning_rate": 3.707492626405326e-06, "loss": 0.0945, "step": 33075 }, { "epoch": 0.7167464736853509, "grad_norm": 0.9537820816040039, "learning_rate": 3.704847818078843e-06, "loss": 0.1704, "step": 33080 }, { "epoch": 0.7168548090048318, "grad_norm": 1.3973112106323242, "learning_rate": 3.7022037389514354e-06, "loss": 0.2569, "step": 33085 }, { "epoch": 0.7169631443243126, "grad_norm": 1.2446368932724, "learning_rate": 3.6995603893293873e-06, "loss": 0.2245, "step": 33090 }, { "epoch": 0.7170714796437935, "grad_norm": 2.480003833770752, "learning_rate": 3.6969177695188853e-06, "loss": 0.1748, "step": 33095 }, { "epoch": 0.7171798149632743, "grad_norm": 1.9220833778381348, "learning_rate": 3.6942758798260416e-06, "loss": 0.1818, "step": 33100 }, { "epoch": 0.7172881502827552, "grad_norm": 1.418965458869934, "learning_rate": 3.6916347205568747e-06, "loss": 0.1122, "step": 33105 }, { "epoch": 0.717396485602236, "grad_norm": 1.839890480041504, "learning_rate": 3.6889942920173292e-06, "loss": 0.1686, "step": 33110 }, { "epoch": 0.7175048209217169, "grad_norm": 1.2310506105422974, "learning_rate": 3.6863545945132526e-06, "loss": 0.1508, "step": 33115 }, { "epoch": 0.7176131562411977, "grad_norm": 0.8080288171768188, "learning_rate": 3.6837156283504217e-06, "loss": 0.1642, "step": 33120 }, { "epoch": 0.7177214915606787, "grad_norm": 1.330837607383728, "learning_rate": 3.6810773938345136e-06, "loss": 0.1904, "step": 33125 }, { "epoch": 0.7178298268801595, "grad_norm": 0.8050349950790405, "learning_rate": 3.678439891271133e-06, "loss": 0.1322, "step": 33130 }, { "epoch": 0.7179381621996404, "grad_norm": 1.371816873550415, "learning_rate": 3.675803120965796e-06, "loss": 0.2331, "step": 33135 }, { "epoch": 0.7180464975191212, "grad_norm": 1.6987857818603516, "learning_rate": 3.6731670832239275e-06, "loss": 0.1976, "step": 33140 }, { "epoch": 0.718154832838602, "grad_norm": 1.7504751682281494, "learning_rate": 3.670531778350881e-06, "loss": 0.2093, "step": 33145 }, { "epoch": 0.7182631681580829, "grad_norm": 0.7906005382537842, "learning_rate": 3.667897206651908e-06, "loss": 0.1427, "step": 33150 }, { "epoch": 0.7183715034775637, "grad_norm": 1.416334629058838, "learning_rate": 3.6652633684321926e-06, "loss": 0.17, "step": 33155 }, { "epoch": 0.7184798387970446, "grad_norm": 1.1079305410385132, "learning_rate": 3.6626302639968194e-06, "loss": 0.2051, "step": 33160 }, { "epoch": 0.7185881741165254, "grad_norm": 2.1711442470550537, "learning_rate": 3.6599978936507987e-06, "loss": 0.2513, "step": 33165 }, { "epoch": 0.7186965094360064, "grad_norm": 2.3251993656158447, "learning_rate": 3.6573662576990442e-06, "loss": 0.1067, "step": 33170 }, { "epoch": 0.7188048447554872, "grad_norm": 1.435014009475708, "learning_rate": 3.6547353564463972e-06, "loss": 0.1101, "step": 33175 }, { "epoch": 0.7189131800749681, "grad_norm": 1.5513006448745728, "learning_rate": 3.6521051901976112e-06, "loss": 0.1638, "step": 33180 }, { "epoch": 0.7190215153944489, "grad_norm": 1.2787647247314453, "learning_rate": 3.649475759257343e-06, "loss": 0.1181, "step": 33185 }, { "epoch": 0.7191298507139298, "grad_norm": 2.4119040966033936, "learning_rate": 3.646847063930181e-06, "loss": 0.165, "step": 33190 }, { "epoch": 0.7192381860334106, "grad_norm": 1.7186720371246338, "learning_rate": 3.644219104520613e-06, "loss": 0.2222, "step": 33195 }, { "epoch": 0.7193465213528915, "grad_norm": 1.4079846143722534, "learning_rate": 3.6415918813330564e-06, "loss": 0.159, "step": 33200 }, { "epoch": 0.7194548566723723, "grad_norm": 1.4319283962249756, "learning_rate": 3.638965394671826e-06, "loss": 0.2134, "step": 33205 }, { "epoch": 0.7195631919918531, "grad_norm": 2.6430211067199707, "learning_rate": 3.6363396448411715e-06, "loss": 0.1912, "step": 33210 }, { "epoch": 0.719671527311334, "grad_norm": 1.584823489189148, "learning_rate": 3.633714632145241e-06, "loss": 0.2138, "step": 33215 }, { "epoch": 0.719779862630815, "grad_norm": 2.064023017883301, "learning_rate": 3.6310903568880963e-06, "loss": 0.2081, "step": 33220 }, { "epoch": 0.7198881979502958, "grad_norm": 2.1757283210754395, "learning_rate": 3.6284668193737347e-06, "loss": 0.1539, "step": 33225 }, { "epoch": 0.7199965332697766, "grad_norm": 2.549633264541626, "learning_rate": 3.625844019906043e-06, "loss": 0.2385, "step": 33230 }, { "epoch": 0.7201048685892575, "grad_norm": 1.2757723331451416, "learning_rate": 3.6232219587888406e-06, "loss": 0.1672, "step": 33235 }, { "epoch": 0.7202132039087383, "grad_norm": 1.7375638484954834, "learning_rate": 3.6206006363258463e-06, "loss": 0.1174, "step": 33240 }, { "epoch": 0.7203215392282192, "grad_norm": 0.9712104201316833, "learning_rate": 3.6179800528207076e-06, "loss": 0.2135, "step": 33245 }, { "epoch": 0.7204298745477, "grad_norm": 2.483917236328125, "learning_rate": 3.615360208576978e-06, "loss": 0.1382, "step": 33250 }, { "epoch": 0.7205382098671809, "grad_norm": 1.687260627746582, "learning_rate": 3.612741103898123e-06, "loss": 0.1414, "step": 33255 }, { "epoch": 0.7206465451866617, "grad_norm": 1.3627017736434937, "learning_rate": 3.6101227390875328e-06, "loss": 0.0833, "step": 33260 }, { "epoch": 0.7207548805061426, "grad_norm": 1.5897610187530518, "learning_rate": 3.6075051144484996e-06, "loss": 0.2372, "step": 33265 }, { "epoch": 0.7208632158256235, "grad_norm": 1.4364689588546753, "learning_rate": 3.6048882302842404e-06, "loss": 0.1304, "step": 33270 }, { "epoch": 0.7209715511451044, "grad_norm": 2.26482892036438, "learning_rate": 3.60227208689788e-06, "loss": 0.1602, "step": 33275 }, { "epoch": 0.7210798864645852, "grad_norm": 1.8697690963745117, "learning_rate": 3.599656684592463e-06, "loss": 0.1639, "step": 33280 }, { "epoch": 0.721188221784066, "grad_norm": 1.3849653005599976, "learning_rate": 3.5970420236709434e-06, "loss": 0.1614, "step": 33285 }, { "epoch": 0.7212965571035469, "grad_norm": 1.3054709434509277, "learning_rate": 3.5944281044361853e-06, "loss": 0.1841, "step": 33290 }, { "epoch": 0.7214048924230277, "grad_norm": 1.5800349712371826, "learning_rate": 3.5918149271909785e-06, "loss": 0.1937, "step": 33295 }, { "epoch": 0.7215132277425086, "grad_norm": 0.7709336280822754, "learning_rate": 3.5892024922380154e-06, "loss": 0.1747, "step": 33300 }, { "epoch": 0.7216215630619894, "grad_norm": 2.025192975997925, "learning_rate": 3.586590799879912e-06, "loss": 0.1876, "step": 33305 }, { "epoch": 0.7217298983814703, "grad_norm": 1.2395919561386108, "learning_rate": 3.5839798504191893e-06, "loss": 0.1772, "step": 33310 }, { "epoch": 0.7218382337009512, "grad_norm": 1.3560441732406616, "learning_rate": 3.581369644158289e-06, "loss": 0.1235, "step": 33315 }, { "epoch": 0.7219465690204321, "grad_norm": 1.4865601062774658, "learning_rate": 3.5787601813995664e-06, "loss": 0.1464, "step": 33320 }, { "epoch": 0.7220549043399129, "grad_norm": 2.456284999847412, "learning_rate": 3.576151462445284e-06, "loss": 0.1778, "step": 33325 }, { "epoch": 0.7221632396593938, "grad_norm": 1.5029284954071045, "learning_rate": 3.5735434875976292e-06, "loss": 0.1578, "step": 33330 }, { "epoch": 0.7222715749788746, "grad_norm": 1.3341845273971558, "learning_rate": 3.570936257158689e-06, "loss": 0.1494, "step": 33335 }, { "epoch": 0.7223799102983555, "grad_norm": 1.9402252435684204, "learning_rate": 3.568329771430481e-06, "loss": 0.1607, "step": 33340 }, { "epoch": 0.7224882456178363, "grad_norm": 0.5946969389915466, "learning_rate": 3.5657240307149176e-06, "loss": 0.1451, "step": 33345 }, { "epoch": 0.7225965809373172, "grad_norm": 1.024730920791626, "learning_rate": 3.5631190353138434e-06, "loss": 0.1377, "step": 33350 }, { "epoch": 0.722704916256798, "grad_norm": 1.9147692918777466, "learning_rate": 3.560514785529001e-06, "loss": 0.1708, "step": 33355 }, { "epoch": 0.7228132515762788, "grad_norm": 1.025789499282837, "learning_rate": 3.557911281662061e-06, "loss": 0.1732, "step": 33360 }, { "epoch": 0.7229215868957598, "grad_norm": 1.1505857706069946, "learning_rate": 3.5553085240145936e-06, "loss": 0.1471, "step": 33365 }, { "epoch": 0.7230299222152407, "grad_norm": 0.9391269087791443, "learning_rate": 3.552706512888091e-06, "loss": 0.1433, "step": 33370 }, { "epoch": 0.7231382575347215, "grad_norm": 1.5496841669082642, "learning_rate": 3.550105248583963e-06, "loss": 0.1193, "step": 33375 }, { "epoch": 0.7232465928542023, "grad_norm": 1.0789457559585571, "learning_rate": 3.5475047314035183e-06, "loss": 0.1334, "step": 33380 }, { "epoch": 0.7233549281736832, "grad_norm": 1.3324910402297974, "learning_rate": 3.5449049616479957e-06, "loss": 0.2199, "step": 33385 }, { "epoch": 0.723463263493164, "grad_norm": 1.0106091499328613, "learning_rate": 3.542305939618533e-06, "loss": 0.1883, "step": 33390 }, { "epoch": 0.7235715988126449, "grad_norm": 1.7089060544967651, "learning_rate": 3.5397076656161944e-06, "loss": 0.2077, "step": 33395 }, { "epoch": 0.7236799341321257, "grad_norm": 2.107974052429199, "learning_rate": 3.537110139941944e-06, "loss": 0.1905, "step": 33400 }, { "epoch": 0.7237882694516066, "grad_norm": 1.1093331575393677, "learning_rate": 3.5345133628966722e-06, "loss": 0.1734, "step": 33405 }, { "epoch": 0.7238966047710874, "grad_norm": 1.035513162612915, "learning_rate": 3.531917334781172e-06, "loss": 0.1966, "step": 33410 }, { "epoch": 0.7240049400905684, "grad_norm": 0.6741146445274353, "learning_rate": 3.529322055896156e-06, "loss": 0.1633, "step": 33415 }, { "epoch": 0.7241132754100492, "grad_norm": 1.143947958946228, "learning_rate": 3.526727526542253e-06, "loss": 0.1832, "step": 33420 }, { "epoch": 0.7242216107295301, "grad_norm": 1.852770209312439, "learning_rate": 3.5241337470199933e-06, "loss": 0.2563, "step": 33425 }, { "epoch": 0.7243299460490109, "grad_norm": 1.3683799505233765, "learning_rate": 3.5215407176298332e-06, "loss": 0.1285, "step": 33430 }, { "epoch": 0.7244382813684918, "grad_norm": 1.5108956098556519, "learning_rate": 3.518948438672131e-06, "loss": 0.1592, "step": 33435 }, { "epoch": 0.7245466166879726, "grad_norm": 2.304471969604492, "learning_rate": 3.5163569104471695e-06, "loss": 0.1647, "step": 33440 }, { "epoch": 0.7246549520074534, "grad_norm": 1.225704312324524, "learning_rate": 3.513766133255131e-06, "loss": 0.1371, "step": 33445 }, { "epoch": 0.7247632873269343, "grad_norm": 1.4719945192337036, "learning_rate": 3.511176107396125e-06, "loss": 0.1551, "step": 33450 }, { "epoch": 0.7248716226464151, "grad_norm": 2.441720962524414, "learning_rate": 3.5085868331701657e-06, "loss": 0.1813, "step": 33455 }, { "epoch": 0.7249799579658961, "grad_norm": 1.9723032712936401, "learning_rate": 3.505998310877172e-06, "loss": 0.1902, "step": 33460 }, { "epoch": 0.7250882932853769, "grad_norm": 2.071282386779785, "learning_rate": 3.5034105408170026e-06, "loss": 0.1931, "step": 33465 }, { "epoch": 0.7251966286048578, "grad_norm": 1.4102665185928345, "learning_rate": 3.5008235232893984e-06, "loss": 0.1462, "step": 33470 }, { "epoch": 0.7253049639243386, "grad_norm": 1.0882058143615723, "learning_rate": 3.498237258594035e-06, "loss": 0.148, "step": 33475 }, { "epoch": 0.7254132992438195, "grad_norm": 2.5492334365844727, "learning_rate": 3.4956517470304862e-06, "loss": 0.195, "step": 33480 }, { "epoch": 0.7255216345633003, "grad_norm": 2.4216268062591553, "learning_rate": 3.4930669888982494e-06, "loss": 0.177, "step": 33485 }, { "epoch": 0.7256299698827812, "grad_norm": 0.9748948216438293, "learning_rate": 3.4904829844967293e-06, "loss": 0.133, "step": 33490 }, { "epoch": 0.725738305202262, "grad_norm": 1.857223391532898, "learning_rate": 3.4878997341252387e-06, "loss": 0.1268, "step": 33495 }, { "epoch": 0.7258466405217429, "grad_norm": 2.2101755142211914, "learning_rate": 3.485317238083017e-06, "loss": 0.2044, "step": 33500 }, { "epoch": 0.7259549758412237, "grad_norm": 1.0088045597076416, "learning_rate": 3.4827354966691985e-06, "loss": 0.1506, "step": 33505 }, { "epoch": 0.7260633111607047, "grad_norm": 1.3145153522491455, "learning_rate": 3.480154510182845e-06, "loss": 0.1196, "step": 33510 }, { "epoch": 0.7261716464801855, "grad_norm": 1.0796270370483398, "learning_rate": 3.4775742789229237e-06, "loss": 0.1948, "step": 33515 }, { "epoch": 0.7262799817996664, "grad_norm": 1.9927817583084106, "learning_rate": 3.474994803188321e-06, "loss": 0.1352, "step": 33520 }, { "epoch": 0.7263883171191472, "grad_norm": 1.7788459062576294, "learning_rate": 3.4724160832778243e-06, "loss": 0.1918, "step": 33525 }, { "epoch": 0.726496652438628, "grad_norm": 1.210524082183838, "learning_rate": 3.4698381194901374e-06, "loss": 0.1238, "step": 33530 }, { "epoch": 0.7266049877581089, "grad_norm": 0.8254932761192322, "learning_rate": 3.467260912123888e-06, "loss": 0.1995, "step": 33535 }, { "epoch": 0.7267133230775897, "grad_norm": 1.484217643737793, "learning_rate": 3.4646844614775965e-06, "loss": 0.1509, "step": 33540 }, { "epoch": 0.7268216583970706, "grad_norm": 1.0135087966918945, "learning_rate": 3.4621087678497147e-06, "loss": 0.2142, "step": 33545 }, { "epoch": 0.7269299937165514, "grad_norm": 1.8084194660186768, "learning_rate": 3.459533831538592e-06, "loss": 0.1782, "step": 33550 }, { "epoch": 0.7270383290360324, "grad_norm": 0.7917832136154175, "learning_rate": 3.4569596528424974e-06, "loss": 0.0924, "step": 33555 }, { "epoch": 0.7271466643555132, "grad_norm": 1.0667228698730469, "learning_rate": 3.4543862320596167e-06, "loss": 0.1453, "step": 33560 }, { "epoch": 0.7272549996749941, "grad_norm": 0.790784478187561, "learning_rate": 3.451813569488034e-06, "loss": 0.1618, "step": 33565 }, { "epoch": 0.7273633349944749, "grad_norm": 0.26518407464027405, "learning_rate": 3.449241665425761e-06, "loss": 0.1467, "step": 33570 }, { "epoch": 0.7274716703139558, "grad_norm": 2.0998687744140625, "learning_rate": 3.4466705201707074e-06, "loss": 0.1664, "step": 33575 }, { "epoch": 0.7275800056334366, "grad_norm": 1.3825074434280396, "learning_rate": 3.444100134020708e-06, "loss": 0.209, "step": 33580 }, { "epoch": 0.7276883409529175, "grad_norm": 1.315619945526123, "learning_rate": 3.4415305072734974e-06, "loss": 0.1033, "step": 33585 }, { "epoch": 0.7277966762723983, "grad_norm": 1.2062625885009766, "learning_rate": 3.4389616402267357e-06, "loss": 0.1865, "step": 33590 }, { "epoch": 0.7279050115918791, "grad_norm": 0.8411208987236023, "learning_rate": 3.43639353317798e-06, "loss": 0.1531, "step": 33595 }, { "epoch": 0.72801334691136, "grad_norm": 1.366334319114685, "learning_rate": 3.4338261864247137e-06, "loss": 0.2093, "step": 33600 }, { "epoch": 0.728121682230841, "grad_norm": 2.6117939949035645, "learning_rate": 3.4312596002643184e-06, "loss": 0.1275, "step": 33605 }, { "epoch": 0.7282300175503218, "grad_norm": 1.131063461303711, "learning_rate": 3.4286937749940994e-06, "loss": 0.1578, "step": 33610 }, { "epoch": 0.7283383528698026, "grad_norm": 1.7450110912322998, "learning_rate": 3.426128710911273e-06, "loss": 0.1171, "step": 33615 }, { "epoch": 0.7284466881892835, "grad_norm": 1.1118593215942383, "learning_rate": 3.423564408312954e-06, "loss": 0.1962, "step": 33620 }, { "epoch": 0.7285550235087643, "grad_norm": 1.3118687868118286, "learning_rate": 3.421000867496187e-06, "loss": 0.1937, "step": 33625 }, { "epoch": 0.7286633588282452, "grad_norm": 2.960038661956787, "learning_rate": 3.4184380887579126e-06, "loss": 0.3689, "step": 33630 }, { "epoch": 0.728771694147726, "grad_norm": 1.5926880836486816, "learning_rate": 3.4158760723949967e-06, "loss": 0.133, "step": 33635 }, { "epoch": 0.7288800294672069, "grad_norm": 1.6494492292404175, "learning_rate": 3.413314818704205e-06, "loss": 0.1761, "step": 33640 }, { "epoch": 0.7289883647866877, "grad_norm": 1.2753640413284302, "learning_rate": 3.4107543279822262e-06, "loss": 0.1572, "step": 33645 }, { "epoch": 0.7290967001061686, "grad_norm": 2.261644124984741, "learning_rate": 3.4081946005256493e-06, "loss": 0.1285, "step": 33650 }, { "epoch": 0.7292050354256495, "grad_norm": 0.41827693581581116, "learning_rate": 3.4056356366309817e-06, "loss": 0.1743, "step": 33655 }, { "epoch": 0.7293133707451304, "grad_norm": 1.458528757095337, "learning_rate": 3.403077436594645e-06, "loss": 0.1754, "step": 33660 }, { "epoch": 0.7294217060646112, "grad_norm": 1.8951386213302612, "learning_rate": 3.4005200007129625e-06, "loss": 0.1427, "step": 33665 }, { "epoch": 0.729530041384092, "grad_norm": 1.9256428480148315, "learning_rate": 3.397963329282181e-06, "loss": 0.2214, "step": 33670 }, { "epoch": 0.7296383767035729, "grad_norm": 1.2529553174972534, "learning_rate": 3.395407422598446e-06, "loss": 0.141, "step": 33675 }, { "epoch": 0.7297467120230537, "grad_norm": 0.9549828171730042, "learning_rate": 3.392852280957828e-06, "loss": 0.182, "step": 33680 }, { "epoch": 0.7298550473425346, "grad_norm": 1.5673850774765015, "learning_rate": 3.3902979046562947e-06, "loss": 0.145, "step": 33685 }, { "epoch": 0.7299633826620154, "grad_norm": 1.205217957496643, "learning_rate": 3.3877442939897388e-06, "loss": 0.1203, "step": 33690 }, { "epoch": 0.7300717179814963, "grad_norm": 1.9348841905593872, "learning_rate": 3.385191449253955e-06, "loss": 0.1383, "step": 33695 }, { "epoch": 0.7301800533009772, "grad_norm": 1.1524955034255981, "learning_rate": 3.3826393707446448e-06, "loss": 0.123, "step": 33700 }, { "epoch": 0.7302883886204581, "grad_norm": 1.244889259338379, "learning_rate": 3.3800880587574424e-06, "loss": 0.136, "step": 33705 }, { "epoch": 0.7303967239399389, "grad_norm": 1.1293795108795166, "learning_rate": 3.3775375135878695e-06, "loss": 0.142, "step": 33710 }, { "epoch": 0.7305050592594198, "grad_norm": 1.8368628025054932, "learning_rate": 3.3749877355313742e-06, "loss": 0.1843, "step": 33715 }, { "epoch": 0.7306133945789006, "grad_norm": 0.7102980613708496, "learning_rate": 3.3724387248833033e-06, "loss": 0.1294, "step": 33720 }, { "epoch": 0.7307217298983815, "grad_norm": 1.5341088771820068, "learning_rate": 3.369890481938929e-06, "loss": 0.152, "step": 33725 }, { "epoch": 0.7308300652178623, "grad_norm": 1.3970807790756226, "learning_rate": 3.3673430069934234e-06, "loss": 0.1567, "step": 33730 }, { "epoch": 0.7309384005373432, "grad_norm": 1.551023006439209, "learning_rate": 3.3647963003418694e-06, "loss": 0.138, "step": 33735 }, { "epoch": 0.731046735856824, "grad_norm": 1.0108182430267334, "learning_rate": 3.362250362279271e-06, "loss": 0.1206, "step": 33740 }, { "epoch": 0.7311550711763048, "grad_norm": 1.3670415878295898, "learning_rate": 3.359705193100533e-06, "loss": 0.1359, "step": 33745 }, { "epoch": 0.7312634064957858, "grad_norm": 1.8426337242126465, "learning_rate": 3.357160793100477e-06, "loss": 0.1305, "step": 33750 }, { "epoch": 0.7313717418152667, "grad_norm": 1.0328954458236694, "learning_rate": 3.354617162573832e-06, "loss": 0.1066, "step": 33755 }, { "epoch": 0.7314800771347475, "grad_norm": 1.53471040725708, "learning_rate": 3.352074301815246e-06, "loss": 0.1639, "step": 33760 }, { "epoch": 0.7315884124542283, "grad_norm": 0.6875874400138855, "learning_rate": 3.3495322111192643e-06, "loss": 0.1518, "step": 33765 }, { "epoch": 0.7316967477737092, "grad_norm": 1.883326768875122, "learning_rate": 3.3469908907803495e-06, "loss": 0.3178, "step": 33770 }, { "epoch": 0.73180508309319, "grad_norm": 1.3801213502883911, "learning_rate": 3.3444503410928806e-06, "loss": 0.1985, "step": 33775 }, { "epoch": 0.7319134184126709, "grad_norm": 0.9648265242576599, "learning_rate": 3.341910562351137e-06, "loss": 0.2092, "step": 33780 }, { "epoch": 0.7320217537321517, "grad_norm": 1.7954587936401367, "learning_rate": 3.33937155484932e-06, "loss": 0.1224, "step": 33785 }, { "epoch": 0.7321300890516326, "grad_norm": 1.1633025407791138, "learning_rate": 3.3368333188815295e-06, "loss": 0.1447, "step": 33790 }, { "epoch": 0.7322384243711134, "grad_norm": 1.765462875366211, "learning_rate": 3.334295854741787e-06, "loss": 0.1549, "step": 33795 }, { "epoch": 0.7323467596905944, "grad_norm": 1.7270042896270752, "learning_rate": 3.3317591627240144e-06, "loss": 0.1968, "step": 33800 }, { "epoch": 0.7324550950100752, "grad_norm": 2.2082018852233887, "learning_rate": 3.329223243122052e-06, "loss": 0.1292, "step": 33805 }, { "epoch": 0.7325634303295561, "grad_norm": 1.9089704751968384, "learning_rate": 3.326688096229652e-06, "loss": 0.2209, "step": 33810 }, { "epoch": 0.7326717656490369, "grad_norm": 0.8745293021202087, "learning_rate": 3.3241537223404674e-06, "loss": 0.168, "step": 33815 }, { "epoch": 0.7327801009685178, "grad_norm": 1.0219221115112305, "learning_rate": 3.3216201217480725e-06, "loss": 0.168, "step": 33820 }, { "epoch": 0.7328884362879986, "grad_norm": 1.2979072332382202, "learning_rate": 3.3190872947459417e-06, "loss": 0.185, "step": 33825 }, { "epoch": 0.7329967716074794, "grad_norm": 1.1436902284622192, "learning_rate": 3.3165552416274705e-06, "loss": 0.1601, "step": 33830 }, { "epoch": 0.7331051069269603, "grad_norm": 1.6417512893676758, "learning_rate": 3.314023962685954e-06, "loss": 0.1041, "step": 33835 }, { "epoch": 0.7332134422464411, "grad_norm": 1.4307106733322144, "learning_rate": 3.3114934582146087e-06, "loss": 0.2008, "step": 33840 }, { "epoch": 0.7333217775659221, "grad_norm": 1.8113930225372314, "learning_rate": 3.308963728506548e-06, "loss": 0.2115, "step": 33845 }, { "epoch": 0.7334301128854029, "grad_norm": 1.5733705759048462, "learning_rate": 3.3064347738548088e-06, "loss": 0.2466, "step": 33850 }, { "epoch": 0.7335384482048838, "grad_norm": 1.0393425226211548, "learning_rate": 3.303906594552334e-06, "loss": 0.1152, "step": 33855 }, { "epoch": 0.7336467835243646, "grad_norm": 1.4824556112289429, "learning_rate": 3.30137919089197e-06, "loss": 0.1164, "step": 33860 }, { "epoch": 0.7337551188438455, "grad_norm": 1.5475304126739502, "learning_rate": 3.2988525631664846e-06, "loss": 0.1593, "step": 33865 }, { "epoch": 0.7338634541633263, "grad_norm": 1.873902440071106, "learning_rate": 3.2963267116685425e-06, "loss": 0.1892, "step": 33870 }, { "epoch": 0.7339717894828072, "grad_norm": 2.1306426525115967, "learning_rate": 3.2938016366907343e-06, "loss": 0.1533, "step": 33875 }, { "epoch": 0.734080124802288, "grad_norm": 2.313594102859497, "learning_rate": 3.2912773385255436e-06, "loss": 0.1455, "step": 33880 }, { "epoch": 0.7341884601217689, "grad_norm": 1.0071758031845093, "learning_rate": 3.28875381746538e-06, "loss": 0.2351, "step": 33885 }, { "epoch": 0.7342967954412497, "grad_norm": 0.7646321654319763, "learning_rate": 3.2862310738025493e-06, "loss": 0.1671, "step": 33890 }, { "epoch": 0.7344051307607307, "grad_norm": 2.1972384452819824, "learning_rate": 3.283709107829276e-06, "loss": 0.2678, "step": 33895 }, { "epoch": 0.7345134660802115, "grad_norm": 1.235127329826355, "learning_rate": 3.281187919837696e-06, "loss": 0.1954, "step": 33900 }, { "epoch": 0.7346218013996924, "grad_norm": 1.3949294090270996, "learning_rate": 3.278667510119844e-06, "loss": 0.1542, "step": 33905 }, { "epoch": 0.7347301367191732, "grad_norm": 1.3794862031936646, "learning_rate": 3.2761478789676793e-06, "loss": 0.2715, "step": 33910 }, { "epoch": 0.734838472038654, "grad_norm": 1.009720802307129, "learning_rate": 3.2736290266730565e-06, "loss": 0.1338, "step": 33915 }, { "epoch": 0.7349468073581349, "grad_norm": 1.9155203104019165, "learning_rate": 3.271110953527752e-06, "loss": 0.1403, "step": 33920 }, { "epoch": 0.7350551426776157, "grad_norm": 1.2928545475006104, "learning_rate": 3.2685936598234426e-06, "loss": 0.2155, "step": 33925 }, { "epoch": 0.7351634779970966, "grad_norm": 1.032085657119751, "learning_rate": 3.2660771458517238e-06, "loss": 0.1167, "step": 33930 }, { "epoch": 0.7352718133165774, "grad_norm": 1.6882836818695068, "learning_rate": 3.2635614119040936e-06, "loss": 0.2017, "step": 33935 }, { "epoch": 0.7353801486360583, "grad_norm": 1.3542708158493042, "learning_rate": 3.2610464582719594e-06, "loss": 0.1912, "step": 33940 }, { "epoch": 0.7354884839555392, "grad_norm": 2.3731746673583984, "learning_rate": 3.2585322852466428e-06, "loss": 0.1544, "step": 33945 }, { "epoch": 0.7355968192750201, "grad_norm": 0.6981651782989502, "learning_rate": 3.2560188931193737e-06, "loss": 0.1406, "step": 33950 }, { "epoch": 0.7357051545945009, "grad_norm": 1.8714418411254883, "learning_rate": 3.2535062821812947e-06, "loss": 0.1718, "step": 33955 }, { "epoch": 0.7358134899139818, "grad_norm": 0.7615852952003479, "learning_rate": 3.250994452723447e-06, "loss": 0.1816, "step": 33960 }, { "epoch": 0.7359218252334626, "grad_norm": 1.3423370122909546, "learning_rate": 3.2484834050367953e-06, "loss": 0.189, "step": 33965 }, { "epoch": 0.7360301605529435, "grad_norm": 0.9210069179534912, "learning_rate": 3.2459731394121997e-06, "loss": 0.1839, "step": 33970 }, { "epoch": 0.7361384958724243, "grad_norm": 2.2820637226104736, "learning_rate": 3.2434636561404442e-06, "loss": 0.1966, "step": 33975 }, { "epoch": 0.7362468311919051, "grad_norm": 0.8111798763275146, "learning_rate": 3.240954955512211e-06, "loss": 0.1229, "step": 33980 }, { "epoch": 0.736355166511386, "grad_norm": 0.9637171030044556, "learning_rate": 3.2384470378180933e-06, "loss": 0.1016, "step": 33985 }, { "epoch": 0.736463501830867, "grad_norm": 1.935928463935852, "learning_rate": 3.235939903348597e-06, "loss": 0.3112, "step": 33990 }, { "epoch": 0.7365718371503478, "grad_norm": 1.1977373361587524, "learning_rate": 3.2334335523941384e-06, "loss": 0.2335, "step": 33995 }, { "epoch": 0.7366801724698286, "grad_norm": 1.7786378860473633, "learning_rate": 3.2309279852450416e-06, "loss": 0.1205, "step": 34000 }, { "epoch": 0.7367885077893095, "grad_norm": 2.8416049480438232, "learning_rate": 3.2284232021915353e-06, "loss": 0.2212, "step": 34005 }, { "epoch": 0.7368968431087903, "grad_norm": 1.5934382677078247, "learning_rate": 3.225919203523765e-06, "loss": 0.1503, "step": 34010 }, { "epoch": 0.7370051784282712, "grad_norm": 1.3554022312164307, "learning_rate": 3.2234159895317798e-06, "loss": 0.2032, "step": 34015 }, { "epoch": 0.737113513747752, "grad_norm": 1.2785661220550537, "learning_rate": 3.2209135605055343e-06, "loss": 0.1172, "step": 34020 }, { "epoch": 0.7372218490672329, "grad_norm": 2.1969258785247803, "learning_rate": 3.218411916734907e-06, "loss": 0.1834, "step": 34025 }, { "epoch": 0.7373301843867137, "grad_norm": 1.81545090675354, "learning_rate": 3.2159110585096666e-06, "loss": 0.1985, "step": 34030 }, { "epoch": 0.7374385197061946, "grad_norm": 1.370194673538208, "learning_rate": 3.2134109861195086e-06, "loss": 0.1468, "step": 34035 }, { "epoch": 0.7375468550256755, "grad_norm": 1.1124255657196045, "learning_rate": 3.210911699854018e-06, "loss": 0.1888, "step": 34040 }, { "epoch": 0.7376551903451564, "grad_norm": 1.6443407535552979, "learning_rate": 3.2084132000027123e-06, "loss": 0.1391, "step": 34045 }, { "epoch": 0.7377635256646372, "grad_norm": 1.6170610189437866, "learning_rate": 3.2059154868550003e-06, "loss": 0.165, "step": 34050 }, { "epoch": 0.737871860984118, "grad_norm": 1.4616549015045166, "learning_rate": 3.2034185607002e-06, "loss": 0.1541, "step": 34055 }, { "epoch": 0.7379801963035989, "grad_norm": 1.7933454513549805, "learning_rate": 3.2009224218275504e-06, "loss": 0.1993, "step": 34060 }, { "epoch": 0.7380885316230797, "grad_norm": 1.8324849605560303, "learning_rate": 3.1984270705261844e-06, "loss": 0.2351, "step": 34065 }, { "epoch": 0.7381968669425606, "grad_norm": 0.4292617738246918, "learning_rate": 3.1959325070851578e-06, "loss": 0.1299, "step": 34070 }, { "epoch": 0.7383052022620414, "grad_norm": 1.4306285381317139, "learning_rate": 3.1934387317934223e-06, "loss": 0.1863, "step": 34075 }, { "epoch": 0.7384135375815223, "grad_norm": 1.567579984664917, "learning_rate": 3.1909457449398505e-06, "loss": 0.1966, "step": 34080 }, { "epoch": 0.7385218729010032, "grad_norm": 1.8906490802764893, "learning_rate": 3.1884535468132117e-06, "loss": 0.1797, "step": 34085 }, { "epoch": 0.7386302082204841, "grad_norm": 1.5149465799331665, "learning_rate": 3.1859621377021923e-06, "loss": 0.1739, "step": 34090 }, { "epoch": 0.7387385435399649, "grad_norm": 1.1507058143615723, "learning_rate": 3.183471517895389e-06, "loss": 0.1303, "step": 34095 }, { "epoch": 0.7388468788594458, "grad_norm": 1.399577021598816, "learning_rate": 3.1809816876812947e-06, "loss": 0.1523, "step": 34100 }, { "epoch": 0.7389552141789266, "grad_norm": 0.8029621243476868, "learning_rate": 3.1784926473483256e-06, "loss": 0.1625, "step": 34105 }, { "epoch": 0.7390635494984075, "grad_norm": 2.3465685844421387, "learning_rate": 3.1760043971847954e-06, "loss": 0.1952, "step": 34110 }, { "epoch": 0.7391718848178883, "grad_norm": 0.9793636798858643, "learning_rate": 3.173516937478934e-06, "loss": 0.105, "step": 34115 }, { "epoch": 0.7392802201373692, "grad_norm": 2.1627376079559326, "learning_rate": 3.171030268518872e-06, "loss": 0.2126, "step": 34120 }, { "epoch": 0.73938855545685, "grad_norm": 1.9092528820037842, "learning_rate": 3.1685443905926593e-06, "loss": 0.2608, "step": 34125 }, { "epoch": 0.7394968907763309, "grad_norm": 2.2039976119995117, "learning_rate": 3.1660593039882405e-06, "loss": 0.2203, "step": 34130 }, { "epoch": 0.7396052260958118, "grad_norm": 1.698759913444519, "learning_rate": 3.1635750089934782e-06, "loss": 0.1766, "step": 34135 }, { "epoch": 0.7397135614152927, "grad_norm": 1.3046199083328247, "learning_rate": 3.1610915058961457e-06, "loss": 0.2195, "step": 34140 }, { "epoch": 0.7398218967347735, "grad_norm": 1.6737267971038818, "learning_rate": 3.1586087949839106e-06, "loss": 0.1763, "step": 34145 }, { "epoch": 0.7399302320542543, "grad_norm": 1.4467191696166992, "learning_rate": 3.1561268765443663e-06, "loss": 0.1582, "step": 34150 }, { "epoch": 0.7400385673737352, "grad_norm": 2.4006924629211426, "learning_rate": 3.1536457508649997e-06, "loss": 0.2049, "step": 34155 }, { "epoch": 0.740146902693216, "grad_norm": 0.692672848701477, "learning_rate": 3.1511654182332175e-06, "loss": 0.138, "step": 34160 }, { "epoch": 0.7402552380126969, "grad_norm": 1.8372420072555542, "learning_rate": 3.1486858789363228e-06, "loss": 0.1282, "step": 34165 }, { "epoch": 0.7403635733321777, "grad_norm": 0.9301207065582275, "learning_rate": 3.1462071332615396e-06, "loss": 0.1403, "step": 34170 }, { "epoch": 0.7404719086516586, "grad_norm": 0.8738407492637634, "learning_rate": 3.143729181495986e-06, "loss": 0.2224, "step": 34175 }, { "epoch": 0.7405802439711394, "grad_norm": 1.2398935556411743, "learning_rate": 3.141252023926704e-06, "loss": 0.1733, "step": 34180 }, { "epoch": 0.7406885792906204, "grad_norm": 1.6371192932128906, "learning_rate": 3.1387756608406274e-06, "loss": 0.1538, "step": 34185 }, { "epoch": 0.7407969146101012, "grad_norm": 1.6060577630996704, "learning_rate": 3.13630009252461e-06, "loss": 0.1293, "step": 34190 }, { "epoch": 0.7409052499295821, "grad_norm": 1.799720287322998, "learning_rate": 3.133825319265411e-06, "loss": 0.2067, "step": 34195 }, { "epoch": 0.7410135852490629, "grad_norm": 0.8583829402923584, "learning_rate": 3.131351341349691e-06, "loss": 0.1249, "step": 34200 }, { "epoch": 0.7411219205685438, "grad_norm": 1.9559662342071533, "learning_rate": 3.1288781590640284e-06, "loss": 0.1868, "step": 34205 }, { "epoch": 0.7412302558880246, "grad_norm": 1.0668108463287354, "learning_rate": 3.1264057726948995e-06, "loss": 0.256, "step": 34210 }, { "epoch": 0.7413385912075054, "grad_norm": 0.866854727268219, "learning_rate": 3.123934182528697e-06, "loss": 0.1227, "step": 34215 }, { "epoch": 0.7414469265269863, "grad_norm": 2.0077812671661377, "learning_rate": 3.1214633888517165e-06, "loss": 0.2236, "step": 34220 }, { "epoch": 0.7415552618464671, "grad_norm": 1.546221375465393, "learning_rate": 3.118993391950159e-06, "loss": 0.1349, "step": 34225 }, { "epoch": 0.7416635971659481, "grad_norm": 2.044137716293335, "learning_rate": 3.1165241921101395e-06, "loss": 0.2264, "step": 34230 }, { "epoch": 0.7417719324854289, "grad_norm": 1.5545474290847778, "learning_rate": 3.114055789617678e-06, "loss": 0.2039, "step": 34235 }, { "epoch": 0.7418802678049098, "grad_norm": 1.0228720903396606, "learning_rate": 3.111588184758706e-06, "loss": 0.1384, "step": 34240 }, { "epoch": 0.7419886031243906, "grad_norm": 0.9903321266174316, "learning_rate": 3.10912137781905e-06, "loss": 0.1108, "step": 34245 }, { "epoch": 0.7420969384438715, "grad_norm": 0.6972874999046326, "learning_rate": 3.1066553690844602e-06, "loss": 0.1734, "step": 34250 }, { "epoch": 0.7422052737633523, "grad_norm": 1.5579311847686768, "learning_rate": 3.104190158840583e-06, "loss": 0.1868, "step": 34255 }, { "epoch": 0.7423136090828332, "grad_norm": 1.6680108308792114, "learning_rate": 3.1017257473729747e-06, "loss": 0.1889, "step": 34260 }, { "epoch": 0.742421944402314, "grad_norm": 1.6813597679138184, "learning_rate": 3.099262134967106e-06, "loss": 0.2359, "step": 34265 }, { "epoch": 0.7425302797217949, "grad_norm": 1.684859275817871, "learning_rate": 3.0967993219083413e-06, "loss": 0.2119, "step": 34270 }, { "epoch": 0.7426386150412757, "grad_norm": 0.7948161959648132, "learning_rate": 3.0943373084819694e-06, "loss": 0.1657, "step": 34275 }, { "epoch": 0.7427469503607567, "grad_norm": 1.964316487312317, "learning_rate": 3.091876094973166e-06, "loss": 0.1546, "step": 34280 }, { "epoch": 0.7428552856802375, "grad_norm": 1.2752838134765625, "learning_rate": 3.0894156816670406e-06, "loss": 0.1409, "step": 34285 }, { "epoch": 0.7429636209997184, "grad_norm": 2.158376932144165, "learning_rate": 3.086956068848588e-06, "loss": 0.221, "step": 34290 }, { "epoch": 0.7430719563191992, "grad_norm": 0.8480796217918396, "learning_rate": 3.084497256802714e-06, "loss": 0.1521, "step": 34295 }, { "epoch": 0.74318029163868, "grad_norm": 2.239450216293335, "learning_rate": 3.0820392458142424e-06, "loss": 0.1568, "step": 34300 }, { "epoch": 0.7432886269581609, "grad_norm": 2.7676613330841064, "learning_rate": 3.0795820361678885e-06, "loss": 0.1853, "step": 34305 }, { "epoch": 0.7433969622776417, "grad_norm": 0.4445478320121765, "learning_rate": 3.077125628148292e-06, "loss": 0.1291, "step": 34310 }, { "epoch": 0.7435052975971226, "grad_norm": 1.3501466512680054, "learning_rate": 3.074670022039984e-06, "loss": 0.1463, "step": 34315 }, { "epoch": 0.7436136329166034, "grad_norm": 1.394749402999878, "learning_rate": 3.0722152181274144e-06, "loss": 0.1565, "step": 34320 }, { "epoch": 0.7437219682360843, "grad_norm": 1.4202064275741577, "learning_rate": 3.069761216694932e-06, "loss": 0.2228, "step": 34325 }, { "epoch": 0.7438303035555652, "grad_norm": 1.1474429368972778, "learning_rate": 3.0673080180267966e-06, "loss": 0.2156, "step": 34330 }, { "epoch": 0.7439386388750461, "grad_norm": 1.608985424041748, "learning_rate": 3.064855622407179e-06, "loss": 0.1931, "step": 34335 }, { "epoch": 0.7440469741945269, "grad_norm": 0.926254391670227, "learning_rate": 3.0624040301201462e-06, "loss": 0.2118, "step": 34340 }, { "epoch": 0.7441553095140078, "grad_norm": 1.0444103479385376, "learning_rate": 3.0599532414496835e-06, "loss": 0.0814, "step": 34345 }, { "epoch": 0.7442636448334886, "grad_norm": 1.2310489416122437, "learning_rate": 3.0575032566796735e-06, "loss": 0.1526, "step": 34350 }, { "epoch": 0.7443719801529695, "grad_norm": 1.7726154327392578, "learning_rate": 3.055054076093916e-06, "loss": 0.1659, "step": 34355 }, { "epoch": 0.7444803154724503, "grad_norm": 1.732712745666504, "learning_rate": 3.0526056999761058e-06, "loss": 0.1609, "step": 34360 }, { "epoch": 0.7445886507919312, "grad_norm": 1.418480396270752, "learning_rate": 3.0501581286098546e-06, "loss": 0.1561, "step": 34365 }, { "epoch": 0.744696986111412, "grad_norm": 1.2850974798202515, "learning_rate": 3.0477113622786734e-06, "loss": 0.0992, "step": 34370 }, { "epoch": 0.744805321430893, "grad_norm": 0.7700263261795044, "learning_rate": 3.0452654012659866e-06, "loss": 0.104, "step": 34375 }, { "epoch": 0.7449136567503738, "grad_norm": 0.7766558527946472, "learning_rate": 3.0428202458551238e-06, "loss": 0.2108, "step": 34380 }, { "epoch": 0.7450219920698546, "grad_norm": 1.2083172798156738, "learning_rate": 3.040375896329313e-06, "loss": 0.1572, "step": 34385 }, { "epoch": 0.7451303273893355, "grad_norm": 1.9810007810592651, "learning_rate": 3.0379323529717033e-06, "loss": 0.1628, "step": 34390 }, { "epoch": 0.7452386627088163, "grad_norm": 1.5255464315414429, "learning_rate": 3.0354896160653346e-06, "loss": 0.1186, "step": 34395 }, { "epoch": 0.7453469980282972, "grad_norm": 1.5144968032836914, "learning_rate": 3.03304768589317e-06, "loss": 0.1085, "step": 34400 }, { "epoch": 0.745455333347778, "grad_norm": 1.2709665298461914, "learning_rate": 3.0306065627380623e-06, "loss": 0.1466, "step": 34405 }, { "epoch": 0.7455636686672589, "grad_norm": 1.2303904294967651, "learning_rate": 3.0281662468827856e-06, "loss": 0.1897, "step": 34410 }, { "epoch": 0.7456720039867397, "grad_norm": 0.6073073744773865, "learning_rate": 3.0257267386100085e-06, "loss": 0.1595, "step": 34415 }, { "epoch": 0.7457803393062206, "grad_norm": 1.5098079442977905, "learning_rate": 3.0232880382023176e-06, "loss": 0.1309, "step": 34420 }, { "epoch": 0.7458886746257015, "grad_norm": 1.1259052753448486, "learning_rate": 3.0208501459421925e-06, "loss": 0.1176, "step": 34425 }, { "epoch": 0.7459970099451824, "grad_norm": 1.32931387424469, "learning_rate": 3.018413062112031e-06, "loss": 0.1442, "step": 34430 }, { "epoch": 0.7461053452646632, "grad_norm": 1.909805178642273, "learning_rate": 3.015976786994135e-06, "loss": 0.1894, "step": 34435 }, { "epoch": 0.7462136805841441, "grad_norm": 1.6718738079071045, "learning_rate": 3.0135413208707033e-06, "loss": 0.1169, "step": 34440 }, { "epoch": 0.7463220159036249, "grad_norm": 2.300060272216797, "learning_rate": 3.0111066640238574e-06, "loss": 0.1532, "step": 34445 }, { "epoch": 0.7464303512231057, "grad_norm": 1.0634580850601196, "learning_rate": 3.008672816735606e-06, "loss": 0.1631, "step": 34450 }, { "epoch": 0.7465386865425866, "grad_norm": 2.049659013748169, "learning_rate": 3.006239779287883e-06, "loss": 0.1988, "step": 34455 }, { "epoch": 0.7466470218620674, "grad_norm": 1.03834867477417, "learning_rate": 3.0038075519625144e-06, "loss": 0.2, "step": 34460 }, { "epoch": 0.7467553571815483, "grad_norm": 1.8631551265716553, "learning_rate": 3.001376135041235e-06, "loss": 0.1166, "step": 34465 }, { "epoch": 0.7468636925010291, "grad_norm": 1.326847791671753, "learning_rate": 2.9989455288056945e-06, "loss": 0.1947, "step": 34470 }, { "epoch": 0.7469720278205101, "grad_norm": 2.047283172607422, "learning_rate": 2.9965157335374316e-06, "loss": 0.1909, "step": 34475 }, { "epoch": 0.7470803631399909, "grad_norm": 2.059624671936035, "learning_rate": 2.994086749517916e-06, "loss": 0.1956, "step": 34480 }, { "epoch": 0.7471886984594718, "grad_norm": 1.112836241722107, "learning_rate": 2.991658577028499e-06, "loss": 0.1603, "step": 34485 }, { "epoch": 0.7472970337789526, "grad_norm": 1.7433055639266968, "learning_rate": 2.9892312163504534e-06, "loss": 0.1424, "step": 34490 }, { "epoch": 0.7474053690984335, "grad_norm": 2.385660171508789, "learning_rate": 2.98680466776495e-06, "loss": 0.1811, "step": 34495 }, { "epoch": 0.7475137044179143, "grad_norm": 0.9335393309593201, "learning_rate": 2.9843789315530647e-06, "loss": 0.1777, "step": 34500 }, { "epoch": 0.7476220397373952, "grad_norm": 2.5420784950256348, "learning_rate": 2.98195400799579e-06, "loss": 0.1432, "step": 34505 }, { "epoch": 0.747730375056876, "grad_norm": 1.8340277671813965, "learning_rate": 2.9795298973740095e-06, "loss": 0.1446, "step": 34510 }, { "epoch": 0.7478387103763569, "grad_norm": 1.2569602727890015, "learning_rate": 2.9771065999685277e-06, "loss": 0.2246, "step": 34515 }, { "epoch": 0.7479470456958378, "grad_norm": 1.339595913887024, "learning_rate": 2.974684116060036e-06, "loss": 0.1835, "step": 34520 }, { "epoch": 0.7480553810153187, "grad_norm": 1.2781693935394287, "learning_rate": 2.972262445929157e-06, "loss": 0.2036, "step": 34525 }, { "epoch": 0.7481637163347995, "grad_norm": 1.3791069984436035, "learning_rate": 2.969841589856398e-06, "loss": 0.1853, "step": 34530 }, { "epoch": 0.7482720516542803, "grad_norm": 2.158222198486328, "learning_rate": 2.967421548122177e-06, "loss": 0.124, "step": 34535 }, { "epoch": 0.7483803869737612, "grad_norm": 1.7568262815475464, "learning_rate": 2.9650023210068235e-06, "loss": 0.1248, "step": 34540 }, { "epoch": 0.748488722293242, "grad_norm": 1.694748044013977, "learning_rate": 2.962583908790564e-06, "loss": 0.241, "step": 34545 }, { "epoch": 0.7485970576127229, "grad_norm": 1.0623488426208496, "learning_rate": 2.9601663117535416e-06, "loss": 0.1586, "step": 34550 }, { "epoch": 0.7487053929322037, "grad_norm": 1.5672235488891602, "learning_rate": 2.957749530175792e-06, "loss": 0.2411, "step": 34555 }, { "epoch": 0.7488137282516846, "grad_norm": 1.9633750915527344, "learning_rate": 2.9553335643372696e-06, "loss": 0.2133, "step": 34560 }, { "epoch": 0.7489220635711654, "grad_norm": 1.6421016454696655, "learning_rate": 2.9529184145178215e-06, "loss": 0.1504, "step": 34565 }, { "epoch": 0.7490303988906464, "grad_norm": 1.2189915180206299, "learning_rate": 2.9505040809972097e-06, "loss": 0.1293, "step": 34570 }, { "epoch": 0.7491387342101272, "grad_norm": 1.3588144779205322, "learning_rate": 2.9480905640551015e-06, "loss": 0.1506, "step": 34575 }, { "epoch": 0.7492470695296081, "grad_norm": 1.2836443185806274, "learning_rate": 2.9456778639710605e-06, "loss": 0.1619, "step": 34580 }, { "epoch": 0.7493554048490889, "grad_norm": 1.1636481285095215, "learning_rate": 2.943265981024569e-06, "loss": 0.1872, "step": 34585 }, { "epoch": 0.7494637401685698, "grad_norm": 1.1398236751556396, "learning_rate": 2.9408549154950007e-06, "loss": 0.1617, "step": 34590 }, { "epoch": 0.7495720754880506, "grad_norm": 1.6913199424743652, "learning_rate": 2.9384446676616475e-06, "loss": 0.1717, "step": 34595 }, { "epoch": 0.7496804108075314, "grad_norm": 1.396467924118042, "learning_rate": 2.936035237803694e-06, "loss": 0.1705, "step": 34600 }, { "epoch": 0.7497887461270123, "grad_norm": 1.8497157096862793, "learning_rate": 2.9336266262002432e-06, "loss": 0.1784, "step": 34605 }, { "epoch": 0.7498970814464931, "grad_norm": 1.5102554559707642, "learning_rate": 2.9312188331302906e-06, "loss": 0.1453, "step": 34610 }, { "epoch": 0.7500054167659741, "grad_norm": 1.8885992765426636, "learning_rate": 2.9288118588727466e-06, "loss": 0.2219, "step": 34615 }, { "epoch": 0.7501137520854549, "grad_norm": 1.624415397644043, "learning_rate": 2.9264057037064243e-06, "loss": 0.2099, "step": 34620 }, { "epoch": 0.7502220874049358, "grad_norm": 2.3195230960845947, "learning_rate": 2.924000367910036e-06, "loss": 0.2196, "step": 34625 }, { "epoch": 0.7503304227244166, "grad_norm": 1.97918701171875, "learning_rate": 2.9215958517622102e-06, "loss": 0.099, "step": 34630 }, { "epoch": 0.7504387580438975, "grad_norm": 1.1420598030090332, "learning_rate": 2.9191921555414658e-06, "loss": 0.0698, "step": 34635 }, { "epoch": 0.7505470933633783, "grad_norm": 0.8626310229301453, "learning_rate": 2.916789279526244e-06, "loss": 0.0978, "step": 34640 }, { "epoch": 0.7506554286828592, "grad_norm": 1.3238434791564941, "learning_rate": 2.9143872239948744e-06, "loss": 0.1313, "step": 34645 }, { "epoch": 0.75076376400234, "grad_norm": 1.5787171125411987, "learning_rate": 2.9119859892256065e-06, "loss": 0.1805, "step": 34650 }, { "epoch": 0.7508720993218209, "grad_norm": 2.2824838161468506, "learning_rate": 2.9095855754965785e-06, "loss": 0.1443, "step": 34655 }, { "epoch": 0.7509804346413017, "grad_norm": 1.1177657842636108, "learning_rate": 2.907185983085852e-06, "loss": 0.1669, "step": 34660 }, { "epoch": 0.7510887699607827, "grad_norm": 2.20835018157959, "learning_rate": 2.904787212271375e-06, "loss": 0.1653, "step": 34665 }, { "epoch": 0.7511971052802635, "grad_norm": 1.3802093267440796, "learning_rate": 2.9023892633310125e-06, "loss": 0.1784, "step": 34670 }, { "epoch": 0.7513054405997444, "grad_norm": 1.5660357475280762, "learning_rate": 2.8999921365425352e-06, "loss": 0.2338, "step": 34675 }, { "epoch": 0.7514137759192252, "grad_norm": 1.5652309656143188, "learning_rate": 2.8975958321836085e-06, "loss": 0.2218, "step": 34680 }, { "epoch": 0.751522111238706, "grad_norm": 0.4214749038219452, "learning_rate": 2.8952003505318126e-06, "loss": 0.1801, "step": 34685 }, { "epoch": 0.7516304465581869, "grad_norm": 1.9147861003875732, "learning_rate": 2.892805691864624e-06, "loss": 0.2091, "step": 34690 }, { "epoch": 0.7517387818776677, "grad_norm": 1.3345918655395508, "learning_rate": 2.890411856459433e-06, "loss": 0.1697, "step": 34695 }, { "epoch": 0.7518471171971486, "grad_norm": 2.059178113937378, "learning_rate": 2.8880188445935265e-06, "loss": 0.1218, "step": 34700 }, { "epoch": 0.7519554525166294, "grad_norm": 1.6139706373214722, "learning_rate": 2.885626656544097e-06, "loss": 0.1411, "step": 34705 }, { "epoch": 0.7520637878361103, "grad_norm": 0.9090677499771118, "learning_rate": 2.883235292588249e-06, "loss": 0.1594, "step": 34710 }, { "epoch": 0.7521721231555912, "grad_norm": 1.468686580657959, "learning_rate": 2.880844753002976e-06, "loss": 0.1228, "step": 34715 }, { "epoch": 0.7522804584750721, "grad_norm": 1.0519946813583374, "learning_rate": 2.878455038065201e-06, "loss": 0.1436, "step": 34720 }, { "epoch": 0.7523887937945529, "grad_norm": 1.7461434602737427, "learning_rate": 2.876066148051725e-06, "loss": 0.1958, "step": 34725 }, { "epoch": 0.7524971291140338, "grad_norm": 1.0540564060211182, "learning_rate": 2.873678083239273e-06, "loss": 0.1344, "step": 34730 }, { "epoch": 0.7526054644335146, "grad_norm": 1.54998779296875, "learning_rate": 2.8712908439044616e-06, "loss": 0.1204, "step": 34735 }, { "epoch": 0.7527137997529955, "grad_norm": 1.901009202003479, "learning_rate": 2.868904430323817e-06, "loss": 0.109, "step": 34740 }, { "epoch": 0.7528221350724763, "grad_norm": 1.2149425745010376, "learning_rate": 2.8665188427737713e-06, "loss": 0.1068, "step": 34745 }, { "epoch": 0.7529304703919572, "grad_norm": 1.3890396356582642, "learning_rate": 2.864134081530656e-06, "loss": 0.1797, "step": 34750 }, { "epoch": 0.753038805711438, "grad_norm": 1.2912659645080566, "learning_rate": 2.8617501468707144e-06, "loss": 0.1402, "step": 34755 }, { "epoch": 0.753147141030919, "grad_norm": 1.1867806911468506, "learning_rate": 2.8593670390700823e-06, "loss": 0.156, "step": 34760 }, { "epoch": 0.7532554763503998, "grad_norm": 1.6507731676101685, "learning_rate": 2.8569847584048173e-06, "loss": 0.174, "step": 34765 }, { "epoch": 0.7533638116698806, "grad_norm": 1.6673752069473267, "learning_rate": 2.854603305150866e-06, "loss": 0.1675, "step": 34770 }, { "epoch": 0.7534721469893615, "grad_norm": 1.0744917392730713, "learning_rate": 2.8522226795840802e-06, "loss": 0.1398, "step": 34775 }, { "epoch": 0.7535804823088423, "grad_norm": 1.58540940284729, "learning_rate": 2.849842881980227e-06, "loss": 0.1474, "step": 34780 }, { "epoch": 0.7536888176283232, "grad_norm": 2.143758535385132, "learning_rate": 2.847463912614964e-06, "loss": 0.2023, "step": 34785 }, { "epoch": 0.753797152947804, "grad_norm": 1.7071237564086914, "learning_rate": 2.8450857717638635e-06, "loss": 0.1678, "step": 34790 }, { "epoch": 0.7539054882672849, "grad_norm": 1.1880544424057007, "learning_rate": 2.8427084597023934e-06, "loss": 0.1664, "step": 34795 }, { "epoch": 0.7540138235867657, "grad_norm": 1.5462318658828735, "learning_rate": 2.8403319767059356e-06, "loss": 0.2311, "step": 34800 }, { "epoch": 0.7541221589062466, "grad_norm": 0.6445701718330383, "learning_rate": 2.837956323049762e-06, "loss": 0.1131, "step": 34805 }, { "epoch": 0.7542304942257275, "grad_norm": 1.5515598058700562, "learning_rate": 2.8355814990090623e-06, "loss": 0.1703, "step": 34810 }, { "epoch": 0.7543388295452084, "grad_norm": 1.2939929962158203, "learning_rate": 2.8332075048589257e-06, "loss": 0.1345, "step": 34815 }, { "epoch": 0.7544471648646892, "grad_norm": 1.6773099899291992, "learning_rate": 2.8308343408743376e-06, "loss": 0.2216, "step": 34820 }, { "epoch": 0.7545555001841701, "grad_norm": 0.8203269839286804, "learning_rate": 2.8284620073302006e-06, "loss": 0.1418, "step": 34825 }, { "epoch": 0.7546638355036509, "grad_norm": 0.7542096376419067, "learning_rate": 2.8260905045013065e-06, "loss": 0.0781, "step": 34830 }, { "epoch": 0.7547721708231317, "grad_norm": 1.6244348287582397, "learning_rate": 2.823719832662366e-06, "loss": 0.1617, "step": 34835 }, { "epoch": 0.7548805061426126, "grad_norm": 0.9184170961380005, "learning_rate": 2.8213499920879793e-06, "loss": 0.1434, "step": 34840 }, { "epoch": 0.7549888414620934, "grad_norm": 2.1214025020599365, "learning_rate": 2.8189809830526628e-06, "loss": 0.114, "step": 34845 }, { "epoch": 0.7550971767815743, "grad_norm": 1.026854157447815, "learning_rate": 2.816612805830824e-06, "loss": 0.1389, "step": 34850 }, { "epoch": 0.7552055121010551, "grad_norm": 1.6271485090255737, "learning_rate": 2.814245460696788e-06, "loss": 0.13, "step": 34855 }, { "epoch": 0.7553138474205361, "grad_norm": 1.0480661392211914, "learning_rate": 2.81187894792477e-06, "loss": 0.1527, "step": 34860 }, { "epoch": 0.7554221827400169, "grad_norm": 1.1758226156234741, "learning_rate": 2.809513267788898e-06, "loss": 0.1257, "step": 34865 }, { "epoch": 0.7555305180594978, "grad_norm": 1.2719815969467163, "learning_rate": 2.8071484205632037e-06, "loss": 0.156, "step": 34870 }, { "epoch": 0.7556388533789786, "grad_norm": 0.9041404724121094, "learning_rate": 2.8047844065216124e-06, "loss": 0.1881, "step": 34875 }, { "epoch": 0.7557471886984595, "grad_norm": 1.3804455995559692, "learning_rate": 2.8024212259379656e-06, "loss": 0.2093, "step": 34880 }, { "epoch": 0.7558555240179403, "grad_norm": 1.197435975074768, "learning_rate": 2.8000588790859985e-06, "loss": 0.1456, "step": 34885 }, { "epoch": 0.7559638593374212, "grad_norm": 1.3901985883712769, "learning_rate": 2.797697366239357e-06, "loss": 0.2387, "step": 34890 }, { "epoch": 0.756072194656902, "grad_norm": 2.676701784133911, "learning_rate": 2.7953366876715827e-06, "loss": 0.2084, "step": 34895 }, { "epoch": 0.7561805299763829, "grad_norm": 1.3251652717590332, "learning_rate": 2.79297684365613e-06, "loss": 0.1685, "step": 34900 }, { "epoch": 0.7562888652958638, "grad_norm": 1.9471596479415894, "learning_rate": 2.790617834466346e-06, "loss": 0.1183, "step": 34905 }, { "epoch": 0.7563972006153447, "grad_norm": 1.7943089008331299, "learning_rate": 2.7882596603754895e-06, "loss": 0.2048, "step": 34910 }, { "epoch": 0.7565055359348255, "grad_norm": 1.8952980041503906, "learning_rate": 2.7859023216567217e-06, "loss": 0.1571, "step": 34915 }, { "epoch": 0.7566138712543063, "grad_norm": 1.0320568084716797, "learning_rate": 2.783545818583101e-06, "loss": 0.2452, "step": 34920 }, { "epoch": 0.7567222065737872, "grad_norm": 1.9548252820968628, "learning_rate": 2.7811901514275963e-06, "loss": 0.175, "step": 34925 }, { "epoch": 0.756830541893268, "grad_norm": 1.8309378623962402, "learning_rate": 2.7788353204630723e-06, "loss": 0.1206, "step": 34930 }, { "epoch": 0.7569388772127489, "grad_norm": 1.0861732959747314, "learning_rate": 2.776481325962307e-06, "loss": 0.1211, "step": 34935 }, { "epoch": 0.7570472125322297, "grad_norm": 1.6085067987442017, "learning_rate": 2.7741281681979715e-06, "loss": 0.1193, "step": 34940 }, { "epoch": 0.7571555478517106, "grad_norm": 0.834119439125061, "learning_rate": 2.7717758474426417e-06, "loss": 0.119, "step": 34945 }, { "epoch": 0.7572638831711914, "grad_norm": 1.6980724334716797, "learning_rate": 2.7694243639688033e-06, "loss": 0.1776, "step": 34950 }, { "epoch": 0.7573722184906724, "grad_norm": 2.490294933319092, "learning_rate": 2.7670737180488326e-06, "loss": 0.1672, "step": 34955 }, { "epoch": 0.7574805538101532, "grad_norm": 0.7419748902320862, "learning_rate": 2.764723909955028e-06, "loss": 0.1733, "step": 34960 }, { "epoch": 0.7575888891296341, "grad_norm": 0.6474830508232117, "learning_rate": 2.7623749399595713e-06, "loss": 0.1184, "step": 34965 }, { "epoch": 0.7576972244491149, "grad_norm": 1.2713338136672974, "learning_rate": 2.7600268083345616e-06, "loss": 0.1207, "step": 34970 }, { "epoch": 0.7578055597685958, "grad_norm": 1.857161045074463, "learning_rate": 2.7576795153519907e-06, "loss": 0.1781, "step": 34975 }, { "epoch": 0.7579138950880766, "grad_norm": 1.0587984323501587, "learning_rate": 2.7553330612837557e-06, "loss": 0.2009, "step": 34980 }, { "epoch": 0.7580222304075575, "grad_norm": 1.5789103507995605, "learning_rate": 2.7529874464016627e-06, "loss": 0.1137, "step": 34985 }, { "epoch": 0.7581305657270383, "grad_norm": 0.9122623205184937, "learning_rate": 2.7506426709774116e-06, "loss": 0.1061, "step": 34990 }, { "epoch": 0.7582389010465191, "grad_norm": 1.007308006286621, "learning_rate": 2.748298735282614e-06, "loss": 0.1124, "step": 34995 }, { "epoch": 0.758347236366, "grad_norm": 1.1437458992004395, "learning_rate": 2.7459556395887753e-06, "loss": 0.1513, "step": 35000 }, { "epoch": 0.758455571685481, "grad_norm": 1.1394973993301392, "learning_rate": 2.7436133841673095e-06, "loss": 0.1114, "step": 35005 }, { "epoch": 0.7585639070049618, "grad_norm": 2.677698850631714, "learning_rate": 2.7412719692895317e-06, "loss": 0.162, "step": 35010 }, { "epoch": 0.7586722423244426, "grad_norm": 1.6129282712936401, "learning_rate": 2.738931395226665e-06, "loss": 0.1758, "step": 35015 }, { "epoch": 0.7587805776439235, "grad_norm": 1.6387885808944702, "learning_rate": 2.7365916622498245e-06, "loss": 0.1266, "step": 35020 }, { "epoch": 0.7588889129634043, "grad_norm": 1.7386865615844727, "learning_rate": 2.7342527706300314e-06, "loss": 0.1521, "step": 35025 }, { "epoch": 0.7589972482828852, "grad_norm": 1.0290471315383911, "learning_rate": 2.731914720638217e-06, "loss": 0.144, "step": 35030 }, { "epoch": 0.759105583602366, "grad_norm": 1.2725741863250732, "learning_rate": 2.7295775125452028e-06, "loss": 0.1239, "step": 35035 }, { "epoch": 0.7592139189218469, "grad_norm": 1.338225245475769, "learning_rate": 2.7272411466217263e-06, "loss": 0.1378, "step": 35040 }, { "epoch": 0.7593222542413277, "grad_norm": 1.1643376350402832, "learning_rate": 2.724905623138414e-06, "loss": 0.1191, "step": 35045 }, { "epoch": 0.7594305895608087, "grad_norm": 0.9648059606552124, "learning_rate": 2.722570942365804e-06, "loss": 0.1488, "step": 35050 }, { "epoch": 0.7595389248802895, "grad_norm": 1.8303767442703247, "learning_rate": 2.720237104574338e-06, "loss": 0.1327, "step": 35055 }, { "epoch": 0.7596472601997704, "grad_norm": 1.3469536304473877, "learning_rate": 2.7179041100343494e-06, "loss": 0.2054, "step": 35060 }, { "epoch": 0.7597555955192512, "grad_norm": 1.1895500421524048, "learning_rate": 2.7155719590160868e-06, "loss": 0.1583, "step": 35065 }, { "epoch": 0.759863930838732, "grad_norm": 1.5241868495941162, "learning_rate": 2.713240651789689e-06, "loss": 0.1366, "step": 35070 }, { "epoch": 0.7599722661582129, "grad_norm": 2.0360147953033447, "learning_rate": 2.7109101886252097e-06, "loss": 0.1392, "step": 35075 }, { "epoch": 0.7600806014776937, "grad_norm": 1.6259673833847046, "learning_rate": 2.7085805697925902e-06, "loss": 0.1547, "step": 35080 }, { "epoch": 0.7601889367971746, "grad_norm": 1.8531813621520996, "learning_rate": 2.706251795561691e-06, "loss": 0.1471, "step": 35085 }, { "epoch": 0.7602972721166554, "grad_norm": 1.5045809745788574, "learning_rate": 2.703923866202256e-06, "loss": 0.1794, "step": 35090 }, { "epoch": 0.7604056074361363, "grad_norm": 1.2390397787094116, "learning_rate": 2.7015967819839497e-06, "loss": 0.1752, "step": 35095 }, { "epoch": 0.7605139427556172, "grad_norm": 1.559119701385498, "learning_rate": 2.699270543176323e-06, "loss": 0.1961, "step": 35100 }, { "epoch": 0.7606222780750981, "grad_norm": 0.8938836455345154, "learning_rate": 2.6969451500488396e-06, "loss": 0.0712, "step": 35105 }, { "epoch": 0.7607306133945789, "grad_norm": 1.8710989952087402, "learning_rate": 2.6946206028708634e-06, "loss": 0.1435, "step": 35110 }, { "epoch": 0.7608389487140598, "grad_norm": 2.175147533416748, "learning_rate": 2.692296901911653e-06, "loss": 0.1751, "step": 35115 }, { "epoch": 0.7609472840335406, "grad_norm": 1.7370105981826782, "learning_rate": 2.689974047440379e-06, "loss": 0.1837, "step": 35120 }, { "epoch": 0.7610556193530215, "grad_norm": 1.9090455770492554, "learning_rate": 2.6876520397261053e-06, "loss": 0.1606, "step": 35125 }, { "epoch": 0.7611639546725023, "grad_norm": 0.9458649754524231, "learning_rate": 2.6853308790378076e-06, "loss": 0.1352, "step": 35130 }, { "epoch": 0.7612722899919832, "grad_norm": 1.2284551858901978, "learning_rate": 2.6830105656443495e-06, "loss": 0.1553, "step": 35135 }, { "epoch": 0.761380625311464, "grad_norm": 2.2451987266540527, "learning_rate": 2.680691099814513e-06, "loss": 0.1876, "step": 35140 }, { "epoch": 0.761488960630945, "grad_norm": 2.346745014190674, "learning_rate": 2.6783724818169655e-06, "loss": 0.2141, "step": 35145 }, { "epoch": 0.7615972959504258, "grad_norm": 2.189136266708374, "learning_rate": 2.6760547119202884e-06, "loss": 0.1729, "step": 35150 }, { "epoch": 0.7617056312699066, "grad_norm": 1.2866942882537842, "learning_rate": 2.6737377903929627e-06, "loss": 0.1915, "step": 35155 }, { "epoch": 0.7618139665893875, "grad_norm": 1.9520251750946045, "learning_rate": 2.671421717503364e-06, "loss": 0.1385, "step": 35160 }, { "epoch": 0.7619223019088683, "grad_norm": 1.2192507982254028, "learning_rate": 2.6691064935197806e-06, "loss": 0.2805, "step": 35165 }, { "epoch": 0.7620306372283492, "grad_norm": 2.248347043991089, "learning_rate": 2.6667921187103896e-06, "loss": 0.2069, "step": 35170 }, { "epoch": 0.76213897254783, "grad_norm": 1.5159140825271606, "learning_rate": 2.6644785933432828e-06, "loss": 0.1419, "step": 35175 }, { "epoch": 0.7622473078673109, "grad_norm": 1.1485464572906494, "learning_rate": 2.6621659176864423e-06, "loss": 0.1865, "step": 35180 }, { "epoch": 0.7623556431867917, "grad_norm": 0.6331849098205566, "learning_rate": 2.659854092007763e-06, "loss": 0.1331, "step": 35185 }, { "epoch": 0.7624639785062726, "grad_norm": 1.1423861980438232, "learning_rate": 2.657543116575031e-06, "loss": 0.1374, "step": 35190 }, { "epoch": 0.7625723138257535, "grad_norm": 0.791572093963623, "learning_rate": 2.6552329916559338e-06, "loss": 0.0995, "step": 35195 }, { "epoch": 0.7626806491452344, "grad_norm": 1.1046392917633057, "learning_rate": 2.6529237175180754e-06, "loss": 0.1311, "step": 35200 }, { "epoch": 0.7627889844647152, "grad_norm": 1.5173770189285278, "learning_rate": 2.650615294428942e-06, "loss": 0.1624, "step": 35205 }, { "epoch": 0.7628973197841961, "grad_norm": 0.8715978860855103, "learning_rate": 2.6483077226559364e-06, "loss": 0.0929, "step": 35210 }, { "epoch": 0.7630056551036769, "grad_norm": 1.6915614604949951, "learning_rate": 2.6460010024663494e-06, "loss": 0.2047, "step": 35215 }, { "epoch": 0.7631139904231578, "grad_norm": 1.685198426246643, "learning_rate": 2.6436951341273863e-06, "loss": 0.1564, "step": 35220 }, { "epoch": 0.7632223257426386, "grad_norm": 1.5931836366653442, "learning_rate": 2.641390117906145e-06, "loss": 0.1377, "step": 35225 }, { "epoch": 0.7633306610621194, "grad_norm": 1.6205801963806152, "learning_rate": 2.6390859540696222e-06, "loss": 0.2068, "step": 35230 }, { "epoch": 0.7634389963816003, "grad_norm": 1.3465840816497803, "learning_rate": 2.63678264288473e-06, "loss": 0.2162, "step": 35235 }, { "epoch": 0.7635473317010811, "grad_norm": 2.0572781562805176, "learning_rate": 2.6344801846182634e-06, "loss": 0.1102, "step": 35240 }, { "epoch": 0.7636556670205621, "grad_norm": 1.5211588144302368, "learning_rate": 2.6321785795369324e-06, "loss": 0.1511, "step": 35245 }, { "epoch": 0.7637640023400429, "grad_norm": 1.9673622846603394, "learning_rate": 2.6298778279073435e-06, "loss": 0.2186, "step": 35250 }, { "epoch": 0.7638723376595238, "grad_norm": 1.596836805343628, "learning_rate": 2.6275779299960056e-06, "loss": 0.1245, "step": 35255 }, { "epoch": 0.7639806729790046, "grad_norm": 1.7800889015197754, "learning_rate": 2.6252788860693266e-06, "loss": 0.183, "step": 35260 }, { "epoch": 0.7640890082984855, "grad_norm": 0.7335589528083801, "learning_rate": 2.6229806963936124e-06, "loss": 0.1529, "step": 35265 }, { "epoch": 0.7641973436179663, "grad_norm": 1.2636340856552124, "learning_rate": 2.620683361235079e-06, "loss": 0.1847, "step": 35270 }, { "epoch": 0.7643056789374472, "grad_norm": 1.5610733032226562, "learning_rate": 2.6183868808598334e-06, "loss": 0.1869, "step": 35275 }, { "epoch": 0.764414014256928, "grad_norm": 2.553449869155884, "learning_rate": 2.6160912555338937e-06, "loss": 0.1433, "step": 35280 }, { "epoch": 0.7645223495764089, "grad_norm": 0.33775341510772705, "learning_rate": 2.613796485523169e-06, "loss": 0.085, "step": 35285 }, { "epoch": 0.7646306848958898, "grad_norm": 1.4565128087997437, "learning_rate": 2.6115025710934748e-06, "loss": 0.1537, "step": 35290 }, { "epoch": 0.7647390202153707, "grad_norm": 1.3163284063339233, "learning_rate": 2.6092095125105323e-06, "loss": 0.2063, "step": 35295 }, { "epoch": 0.7648473555348515, "grad_norm": 1.4264148473739624, "learning_rate": 2.60691731003995e-06, "loss": 0.1581, "step": 35300 }, { "epoch": 0.7649556908543323, "grad_norm": 1.6637247800827026, "learning_rate": 2.6046259639472525e-06, "loss": 0.1781, "step": 35305 }, { "epoch": 0.7650640261738132, "grad_norm": 2.0971639156341553, "learning_rate": 2.6023354744978514e-06, "loss": 0.1324, "step": 35310 }, { "epoch": 0.765172361493294, "grad_norm": 1.1950231790542603, "learning_rate": 2.6000458419570717e-06, "loss": 0.1045, "step": 35315 }, { "epoch": 0.7652806968127749, "grad_norm": 1.6274770498275757, "learning_rate": 2.5977570665901264e-06, "loss": 0.2142, "step": 35320 }, { "epoch": 0.7653890321322557, "grad_norm": 1.6400524377822876, "learning_rate": 2.595469148662142e-06, "loss": 0.1738, "step": 35325 }, { "epoch": 0.7654973674517366, "grad_norm": 1.7526285648345947, "learning_rate": 2.5931820884381344e-06, "loss": 0.1881, "step": 35330 }, { "epoch": 0.7656057027712174, "grad_norm": 0.9244678020477295, "learning_rate": 2.5908958861830313e-06, "loss": 0.1769, "step": 35335 }, { "epoch": 0.7657140380906984, "grad_norm": 2.2971534729003906, "learning_rate": 2.588610542161647e-06, "loss": 0.185, "step": 35340 }, { "epoch": 0.7658223734101792, "grad_norm": 1.7151799201965332, "learning_rate": 2.5863260566387105e-06, "loss": 0.1929, "step": 35345 }, { "epoch": 0.7659307087296601, "grad_norm": 1.014827013015747, "learning_rate": 2.5840424298788448e-06, "loss": 0.2436, "step": 35350 }, { "epoch": 0.7660390440491409, "grad_norm": 2.106600284576416, "learning_rate": 2.58175966214657e-06, "loss": 0.181, "step": 35355 }, { "epoch": 0.7661473793686218, "grad_norm": 1.483106017112732, "learning_rate": 2.5794777537063176e-06, "loss": 0.246, "step": 35360 }, { "epoch": 0.7662557146881026, "grad_norm": 1.0199816226959229, "learning_rate": 2.5771967048224033e-06, "loss": 0.1876, "step": 35365 }, { "epoch": 0.7663640500075835, "grad_norm": 1.0857467651367188, "learning_rate": 2.5749165157590605e-06, "loss": 0.1598, "step": 35370 }, { "epoch": 0.7664723853270643, "grad_norm": 0.991389274597168, "learning_rate": 2.572637186780409e-06, "loss": 0.1182, "step": 35375 }, { "epoch": 0.7665807206465451, "grad_norm": 1.9640824794769287, "learning_rate": 2.570358718150481e-06, "loss": 0.188, "step": 35380 }, { "epoch": 0.766689055966026, "grad_norm": 0.8611831665039062, "learning_rate": 2.568081110133195e-06, "loss": 0.1663, "step": 35385 }, { "epoch": 0.766797391285507, "grad_norm": 1.1647469997406006, "learning_rate": 2.5658043629923834e-06, "loss": 0.1712, "step": 35390 }, { "epoch": 0.7669057266049878, "grad_norm": 1.257062315940857, "learning_rate": 2.5635284769917744e-06, "loss": 0.1011, "step": 35395 }, { "epoch": 0.7670140619244686, "grad_norm": 1.3330812454223633, "learning_rate": 2.5612534523949906e-06, "loss": 0.0954, "step": 35400 }, { "epoch": 0.7671223972439495, "grad_norm": 1.3440473079681396, "learning_rate": 2.558979289465565e-06, "loss": 0.2159, "step": 35405 }, { "epoch": 0.7672307325634303, "grad_norm": 1.609562873840332, "learning_rate": 2.5567059884669186e-06, "loss": 0.1797, "step": 35410 }, { "epoch": 0.7673390678829112, "grad_norm": 2.1057345867156982, "learning_rate": 2.5544335496623862e-06, "loss": 0.1762, "step": 35415 }, { "epoch": 0.767447403202392, "grad_norm": 1.3360044956207275, "learning_rate": 2.55216197331519e-06, "loss": 0.1668, "step": 35420 }, { "epoch": 0.7675557385218729, "grad_norm": 1.3467410802841187, "learning_rate": 2.549891259688463e-06, "loss": 0.1466, "step": 35425 }, { "epoch": 0.7676640738413537, "grad_norm": 1.1296306848526, "learning_rate": 2.547621409045231e-06, "loss": 0.138, "step": 35430 }, { "epoch": 0.7677724091608347, "grad_norm": 2.4440553188323975, "learning_rate": 2.545352421648416e-06, "loss": 0.1192, "step": 35435 }, { "epoch": 0.7678807444803155, "grad_norm": 0.8011283874511719, "learning_rate": 2.5430842977608596e-06, "loss": 0.1474, "step": 35440 }, { "epoch": 0.7679890797997964, "grad_norm": 1.4543839693069458, "learning_rate": 2.540817037645278e-06, "loss": 0.155, "step": 35445 }, { "epoch": 0.7680974151192772, "grad_norm": 1.5304386615753174, "learning_rate": 2.538550641564308e-06, "loss": 0.1191, "step": 35450 }, { "epoch": 0.768205750438758, "grad_norm": 1.5631120204925537, "learning_rate": 2.5362851097804696e-06, "loss": 0.1606, "step": 35455 }, { "epoch": 0.7683140857582389, "grad_norm": 1.8504785299301147, "learning_rate": 2.5340204425561987e-06, "loss": 0.218, "step": 35460 }, { "epoch": 0.7684224210777197, "grad_norm": 1.3200228214263916, "learning_rate": 2.53175664015382e-06, "loss": 0.1921, "step": 35465 }, { "epoch": 0.7685307563972006, "grad_norm": 2.092820167541504, "learning_rate": 2.529493702835556e-06, "loss": 0.1359, "step": 35470 }, { "epoch": 0.7686390917166814, "grad_norm": 1.520151972770691, "learning_rate": 2.5272316308635415e-06, "loss": 0.1762, "step": 35475 }, { "epoch": 0.7687474270361623, "grad_norm": 1.6061996221542358, "learning_rate": 2.5249704244997975e-06, "loss": 0.171, "step": 35480 }, { "epoch": 0.7688557623556432, "grad_norm": 0.9417845010757446, "learning_rate": 2.5227100840062534e-06, "loss": 0.1357, "step": 35485 }, { "epoch": 0.7689640976751241, "grad_norm": 0.7037985920906067, "learning_rate": 2.520450609644738e-06, "loss": 0.1536, "step": 35490 }, { "epoch": 0.7690724329946049, "grad_norm": 1.4562716484069824, "learning_rate": 2.5181920016769767e-06, "loss": 0.115, "step": 35495 }, { "epoch": 0.7691807683140858, "grad_norm": 1.9555743932724, "learning_rate": 2.5159342603645965e-06, "loss": 0.1567, "step": 35500 }, { "epoch": 0.7692891036335666, "grad_norm": 1.1284027099609375, "learning_rate": 2.5136773859691164e-06, "loss": 0.1828, "step": 35505 }, { "epoch": 0.7693974389530475, "grad_norm": 0.887008011341095, "learning_rate": 2.5114213787519692e-06, "loss": 0.1516, "step": 35510 }, { "epoch": 0.7695057742725283, "grad_norm": 1.0297131538391113, "learning_rate": 2.5091662389744743e-06, "loss": 0.1472, "step": 35515 }, { "epoch": 0.7696141095920092, "grad_norm": 0.9958484768867493, "learning_rate": 2.5069119668978605e-06, "loss": 0.1211, "step": 35520 }, { "epoch": 0.76972244491149, "grad_norm": 0.975896418094635, "learning_rate": 2.504658562783245e-06, "loss": 0.1406, "step": 35525 }, { "epoch": 0.7698307802309708, "grad_norm": 1.0193564891815186, "learning_rate": 2.5024060268916593e-06, "loss": 0.1396, "step": 35530 }, { "epoch": 0.7699391155504518, "grad_norm": 1.4825705289840698, "learning_rate": 2.5001543594840183e-06, "loss": 0.1899, "step": 35535 }, { "epoch": 0.7700474508699326, "grad_norm": 1.3645309209823608, "learning_rate": 2.4979035608211464e-06, "loss": 0.1375, "step": 35540 }, { "epoch": 0.7701557861894135, "grad_norm": 1.9813169240951538, "learning_rate": 2.495653631163768e-06, "loss": 0.1259, "step": 35545 }, { "epoch": 0.7702641215088943, "grad_norm": 0.957911491394043, "learning_rate": 2.4934045707724995e-06, "loss": 0.2344, "step": 35550 }, { "epoch": 0.7703724568283752, "grad_norm": 1.3440754413604736, "learning_rate": 2.4911563799078654e-06, "loss": 0.1739, "step": 35555 }, { "epoch": 0.770480792147856, "grad_norm": 0.8070446252822876, "learning_rate": 2.488909058830279e-06, "loss": 0.1559, "step": 35560 }, { "epoch": 0.7705891274673369, "grad_norm": 0.6171295046806335, "learning_rate": 2.4866626078000644e-06, "loss": 0.1309, "step": 35565 }, { "epoch": 0.7706974627868177, "grad_norm": 1.0014232397079468, "learning_rate": 2.484417027077435e-06, "loss": 0.1534, "step": 35570 }, { "epoch": 0.7708057981062986, "grad_norm": 1.9908287525177002, "learning_rate": 2.482172316922512e-06, "loss": 0.1839, "step": 35575 }, { "epoch": 0.7709141334257795, "grad_norm": 1.46133553981781, "learning_rate": 2.479928477595306e-06, "loss": 0.1876, "step": 35580 }, { "epoch": 0.7710224687452604, "grad_norm": 2.0854578018188477, "learning_rate": 2.477685509355735e-06, "loss": 0.204, "step": 35585 }, { "epoch": 0.7711308040647412, "grad_norm": 1.2033497095108032, "learning_rate": 2.475443412463617e-06, "loss": 0.2035, "step": 35590 }, { "epoch": 0.7712391393842221, "grad_norm": 1.4462077617645264, "learning_rate": 2.4732021871786595e-06, "loss": 0.2503, "step": 35595 }, { "epoch": 0.7713474747037029, "grad_norm": 1.4555692672729492, "learning_rate": 2.470961833760479e-06, "loss": 0.1899, "step": 35600 }, { "epoch": 0.7714558100231838, "grad_norm": 0.8385633230209351, "learning_rate": 2.468722352468582e-06, "loss": 0.1029, "step": 35605 }, { "epoch": 0.7715641453426646, "grad_norm": 1.0147974491119385, "learning_rate": 2.4664837435623854e-06, "loss": 0.1741, "step": 35610 }, { "epoch": 0.7716724806621454, "grad_norm": 1.1282484531402588, "learning_rate": 2.464246007301192e-06, "loss": 0.1584, "step": 35615 }, { "epoch": 0.7717808159816263, "grad_norm": 0.940040647983551, "learning_rate": 2.462009143944216e-06, "loss": 0.2446, "step": 35620 }, { "epoch": 0.7718891513011071, "grad_norm": 1.1346319913864136, "learning_rate": 2.4597731537505585e-06, "loss": 0.1696, "step": 35625 }, { "epoch": 0.7719974866205881, "grad_norm": 0.503745973110199, "learning_rate": 2.457538036979229e-06, "loss": 0.0802, "step": 35630 }, { "epoch": 0.7721058219400689, "grad_norm": 1.3569254875183105, "learning_rate": 2.4553037938891344e-06, "loss": 0.1954, "step": 35635 }, { "epoch": 0.7722141572595498, "grad_norm": 1.8620922565460205, "learning_rate": 2.4530704247390724e-06, "loss": 0.1879, "step": 35640 }, { "epoch": 0.7723224925790306, "grad_norm": 2.171250581741333, "learning_rate": 2.4508379297877527e-06, "loss": 0.19, "step": 35645 }, { "epoch": 0.7724308278985115, "grad_norm": 1.0835939645767212, "learning_rate": 2.4486063092937685e-06, "loss": 0.1679, "step": 35650 }, { "epoch": 0.7725391632179923, "grad_norm": 0.8395276069641113, "learning_rate": 2.446375563515627e-06, "loss": 0.1431, "step": 35655 }, { "epoch": 0.7726474985374732, "grad_norm": 1.7583941221237183, "learning_rate": 2.44414569271172e-06, "loss": 0.3408, "step": 35660 }, { "epoch": 0.772755833856954, "grad_norm": 1.7665282487869263, "learning_rate": 2.4419166971403506e-06, "loss": 0.1788, "step": 35665 }, { "epoch": 0.7728641691764349, "grad_norm": 1.555632472038269, "learning_rate": 2.439688577059712e-06, "loss": 0.2204, "step": 35670 }, { "epoch": 0.7729725044959158, "grad_norm": 1.465988278388977, "learning_rate": 2.437461332727893e-06, "loss": 0.1131, "step": 35675 }, { "epoch": 0.7730808398153967, "grad_norm": 1.4030410051345825, "learning_rate": 2.435234964402896e-06, "loss": 0.1311, "step": 35680 }, { "epoch": 0.7731891751348775, "grad_norm": 1.5253868103027344, "learning_rate": 2.433009472342607e-06, "loss": 0.2479, "step": 35685 }, { "epoch": 0.7732975104543583, "grad_norm": 1.3417582511901855, "learning_rate": 2.4307848568048187e-06, "loss": 0.2195, "step": 35690 }, { "epoch": 0.7734058457738392, "grad_norm": 1.2212697267532349, "learning_rate": 2.428561118047216e-06, "loss": 0.1658, "step": 35695 }, { "epoch": 0.77351418109332, "grad_norm": 1.8929078578948975, "learning_rate": 2.4263382563273908e-06, "loss": 0.1438, "step": 35700 }, { "epoch": 0.7736225164128009, "grad_norm": 1.7570719718933105, "learning_rate": 2.4241162719028245e-06, "loss": 0.1637, "step": 35705 }, { "epoch": 0.7737308517322817, "grad_norm": 1.7147173881530762, "learning_rate": 2.4218951650308974e-06, "loss": 0.1178, "step": 35710 }, { "epoch": 0.7738391870517626, "grad_norm": 1.4910824298858643, "learning_rate": 2.4196749359689e-06, "loss": 0.1833, "step": 35715 }, { "epoch": 0.7739475223712434, "grad_norm": 1.617845892906189, "learning_rate": 2.4174555849740044e-06, "loss": 0.1917, "step": 35720 }, { "epoch": 0.7740558576907244, "grad_norm": 1.1218281984329224, "learning_rate": 2.4152371123032926e-06, "loss": 0.1437, "step": 35725 }, { "epoch": 0.7741641930102052, "grad_norm": 2.139275550842285, "learning_rate": 2.413019518213742e-06, "loss": 0.1954, "step": 35730 }, { "epoch": 0.7742725283296861, "grad_norm": 1.2835289239883423, "learning_rate": 2.41080280296223e-06, "loss": 0.1947, "step": 35735 }, { "epoch": 0.7743808636491669, "grad_norm": 1.4250239133834839, "learning_rate": 2.408586966805527e-06, "loss": 0.0736, "step": 35740 }, { "epoch": 0.7744891989686478, "grad_norm": 0.7079359889030457, "learning_rate": 2.406372010000302e-06, "loss": 0.1598, "step": 35745 }, { "epoch": 0.7745975342881286, "grad_norm": 1.28795325756073, "learning_rate": 2.4041579328031296e-06, "loss": 0.1785, "step": 35750 }, { "epoch": 0.7747058696076095, "grad_norm": 1.7836247682571411, "learning_rate": 2.4019447354704726e-06, "loss": 0.186, "step": 35755 }, { "epoch": 0.7748142049270903, "grad_norm": 1.9752179384231567, "learning_rate": 2.3997324182587014e-06, "loss": 0.196, "step": 35760 }, { "epoch": 0.7749225402465711, "grad_norm": 1.6126917600631714, "learning_rate": 2.397520981424075e-06, "loss": 0.191, "step": 35765 }, { "epoch": 0.775030875566052, "grad_norm": 1.4548324346542358, "learning_rate": 2.395310425222761e-06, "loss": 0.1947, "step": 35770 }, { "epoch": 0.775139210885533, "grad_norm": 1.1317158937454224, "learning_rate": 2.393100749910813e-06, "loss": 0.1196, "step": 35775 }, { "epoch": 0.7752475462050138, "grad_norm": 1.7059262990951538, "learning_rate": 2.3908919557441913e-06, "loss": 0.1728, "step": 35780 }, { "epoch": 0.7753558815244946, "grad_norm": 2.1919548511505127, "learning_rate": 2.388684042978755e-06, "loss": 0.155, "step": 35785 }, { "epoch": 0.7754642168439755, "grad_norm": 1.4385331869125366, "learning_rate": 2.386477011870252e-06, "loss": 0.1589, "step": 35790 }, { "epoch": 0.7755725521634563, "grad_norm": 1.3855724334716797, "learning_rate": 2.384270862674339e-06, "loss": 0.153, "step": 35795 }, { "epoch": 0.7756808874829372, "grad_norm": 1.8009636402130127, "learning_rate": 2.382065595646561e-06, "loss": 0.1844, "step": 35800 }, { "epoch": 0.775789222802418, "grad_norm": 1.323678731918335, "learning_rate": 2.37986121104237e-06, "loss": 0.2446, "step": 35805 }, { "epoch": 0.7758975581218989, "grad_norm": 1.2042083740234375, "learning_rate": 2.3776577091171048e-06, "loss": 0.1884, "step": 35810 }, { "epoch": 0.7760058934413797, "grad_norm": 1.3295642137527466, "learning_rate": 2.3754550901260143e-06, "loss": 0.218, "step": 35815 }, { "epoch": 0.7761142287608607, "grad_norm": 1.5526463985443115, "learning_rate": 2.373253354324232e-06, "loss": 0.2191, "step": 35820 }, { "epoch": 0.7762225640803415, "grad_norm": 1.6580570936203003, "learning_rate": 2.3710525019668017e-06, "loss": 0.1241, "step": 35825 }, { "epoch": 0.7763308993998224, "grad_norm": 1.349256992340088, "learning_rate": 2.3688525333086588e-06, "loss": 0.2412, "step": 35830 }, { "epoch": 0.7764392347193032, "grad_norm": 1.243455171585083, "learning_rate": 2.366653448604633e-06, "loss": 0.2074, "step": 35835 }, { "epoch": 0.776547570038784, "grad_norm": 1.189090371131897, "learning_rate": 2.3644552481094606e-06, "loss": 0.1205, "step": 35840 }, { "epoch": 0.7766559053582649, "grad_norm": 1.7082865238189697, "learning_rate": 2.362257932077765e-06, "loss": 0.2465, "step": 35845 }, { "epoch": 0.7767642406777457, "grad_norm": 1.8205162286758423, "learning_rate": 2.360061500764077e-06, "loss": 0.0997, "step": 35850 }, { "epoch": 0.7768725759972266, "grad_norm": 0.835120677947998, "learning_rate": 2.3578659544228146e-06, "loss": 0.1919, "step": 35855 }, { "epoch": 0.7769809113167074, "grad_norm": 1.1118308305740356, "learning_rate": 2.3556712933083057e-06, "loss": 0.094, "step": 35860 }, { "epoch": 0.7770892466361883, "grad_norm": 2.308461904525757, "learning_rate": 2.3534775176747626e-06, "loss": 0.1311, "step": 35865 }, { "epoch": 0.7771975819556692, "grad_norm": 0.7064722776412964, "learning_rate": 2.3512846277763037e-06, "loss": 0.2369, "step": 35870 }, { "epoch": 0.7773059172751501, "grad_norm": 0.952960729598999, "learning_rate": 2.3490926238669455e-06, "loss": 0.1971, "step": 35875 }, { "epoch": 0.7774142525946309, "grad_norm": 0.8810786604881287, "learning_rate": 2.346901506200594e-06, "loss": 0.1128, "step": 35880 }, { "epoch": 0.7775225879141118, "grad_norm": 1.204935908317566, "learning_rate": 2.344711275031062e-06, "loss": 0.1362, "step": 35885 }, { "epoch": 0.7776309232335926, "grad_norm": 1.1870062351226807, "learning_rate": 2.3425219306120505e-06, "loss": 0.1911, "step": 35890 }, { "epoch": 0.7777392585530735, "grad_norm": 0.25624704360961914, "learning_rate": 2.340333473197166e-06, "loss": 0.1473, "step": 35895 }, { "epoch": 0.7778475938725543, "grad_norm": 1.704437494277954, "learning_rate": 2.3381459030399044e-06, "loss": 0.1972, "step": 35900 }, { "epoch": 0.7779559291920352, "grad_norm": 1.6253057718276978, "learning_rate": 2.3359592203936688e-06, "loss": 0.2129, "step": 35905 }, { "epoch": 0.778064264511516, "grad_norm": 1.8639671802520752, "learning_rate": 2.3337734255117494e-06, "loss": 0.1456, "step": 35910 }, { "epoch": 0.7781725998309968, "grad_norm": 1.2812765836715698, "learning_rate": 2.3315885186473364e-06, "loss": 0.1092, "step": 35915 }, { "epoch": 0.7782809351504778, "grad_norm": 0.9748331904411316, "learning_rate": 2.3294045000535205e-06, "loss": 0.1693, "step": 35920 }, { "epoch": 0.7783892704699586, "grad_norm": 0.7862744331359863, "learning_rate": 2.3272213699832878e-06, "loss": 0.1396, "step": 35925 }, { "epoch": 0.7784976057894395, "grad_norm": 1.215026617050171, "learning_rate": 2.3250391286895245e-06, "loss": 0.1542, "step": 35930 }, { "epoch": 0.7786059411089203, "grad_norm": 1.652754306793213, "learning_rate": 2.322857776425004e-06, "loss": 0.1735, "step": 35935 }, { "epoch": 0.7787142764284012, "grad_norm": 1.5393725633621216, "learning_rate": 2.320677313442409e-06, "loss": 0.1851, "step": 35940 }, { "epoch": 0.778822611747882, "grad_norm": 1.1896384954452515, "learning_rate": 2.318497739994311e-06, "loss": 0.115, "step": 35945 }, { "epoch": 0.7789309470673629, "grad_norm": 1.0706994533538818, "learning_rate": 2.316319056333178e-06, "loss": 0.1243, "step": 35950 }, { "epoch": 0.7790392823868437, "grad_norm": 1.2802793979644775, "learning_rate": 2.314141262711385e-06, "loss": 0.1132, "step": 35955 }, { "epoch": 0.7791476177063246, "grad_norm": 2.0448482036590576, "learning_rate": 2.311964359381188e-06, "loss": 0.1471, "step": 35960 }, { "epoch": 0.7792559530258055, "grad_norm": 0.8090513944625854, "learning_rate": 2.3097883465947537e-06, "loss": 0.2138, "step": 35965 }, { "epoch": 0.7793642883452864, "grad_norm": 2.4412002563476562, "learning_rate": 2.3076132246041406e-06, "loss": 0.1607, "step": 35970 }, { "epoch": 0.7794726236647672, "grad_norm": 1.1698212623596191, "learning_rate": 2.305438993661305e-06, "loss": 0.2278, "step": 35975 }, { "epoch": 0.7795809589842481, "grad_norm": 1.761031985282898, "learning_rate": 2.3032656540180987e-06, "loss": 0.1229, "step": 35980 }, { "epoch": 0.7796892943037289, "grad_norm": 1.6177737712860107, "learning_rate": 2.301093205926267e-06, "loss": 0.1884, "step": 35985 }, { "epoch": 0.7797976296232098, "grad_norm": 1.4990899562835693, "learning_rate": 2.298921649637459e-06, "loss": 0.1908, "step": 35990 }, { "epoch": 0.7799059649426906, "grad_norm": 1.1657238006591797, "learning_rate": 2.2967509854032145e-06, "loss": 0.1913, "step": 35995 }, { "epoch": 0.7800143002621714, "grad_norm": 1.9460642337799072, "learning_rate": 2.294581213474976e-06, "loss": 0.0941, "step": 36000 }, { "epoch": 0.7801226355816523, "grad_norm": 1.7095519304275513, "learning_rate": 2.2924123341040727e-06, "loss": 0.2017, "step": 36005 }, { "epoch": 0.7802309709011331, "grad_norm": 1.6244783401489258, "learning_rate": 2.2902443475417446e-06, "loss": 0.2243, "step": 36010 }, { "epoch": 0.7803393062206141, "grad_norm": 0.8127338290214539, "learning_rate": 2.2880772540391118e-06, "loss": 0.1426, "step": 36015 }, { "epoch": 0.7804476415400949, "grad_norm": 1.145253300666809, "learning_rate": 2.2859110538472085e-06, "loss": 0.1791, "step": 36020 }, { "epoch": 0.7805559768595758, "grad_norm": 1.5872746706008911, "learning_rate": 2.2837457472169534e-06, "loss": 0.2261, "step": 36025 }, { "epoch": 0.7806643121790566, "grad_norm": 0.7753366827964783, "learning_rate": 2.2815813343991623e-06, "loss": 0.1678, "step": 36030 }, { "epoch": 0.7807726474985375, "grad_norm": 1.7629188299179077, "learning_rate": 2.2794178156445523e-06, "loss": 0.1573, "step": 36035 }, { "epoch": 0.7808809828180183, "grad_norm": 2.7284297943115234, "learning_rate": 2.2772551912037334e-06, "loss": 0.1338, "step": 36040 }, { "epoch": 0.7809893181374992, "grad_norm": 0.5610459446907043, "learning_rate": 2.275093461327216e-06, "loss": 0.1685, "step": 36045 }, { "epoch": 0.78109765345698, "grad_norm": 1.8891234397888184, "learning_rate": 2.272932626265398e-06, "loss": 0.1398, "step": 36050 }, { "epoch": 0.7812059887764609, "grad_norm": 1.1288896799087524, "learning_rate": 2.2707726862685875e-06, "loss": 0.2414, "step": 36055 }, { "epoch": 0.7813143240959417, "grad_norm": 1.5139849185943604, "learning_rate": 2.268613641586975e-06, "loss": 0.1403, "step": 36060 }, { "epoch": 0.7814226594154227, "grad_norm": 1.6121516227722168, "learning_rate": 2.266455492470656e-06, "loss": 0.2002, "step": 36065 }, { "epoch": 0.7815309947349035, "grad_norm": 1.238714337348938, "learning_rate": 2.2642982391696223e-06, "loss": 0.1412, "step": 36070 }, { "epoch": 0.7816393300543844, "grad_norm": 1.6593902111053467, "learning_rate": 2.2621418819337536e-06, "loss": 0.14, "step": 36075 }, { "epoch": 0.7817476653738652, "grad_norm": 1.5566743612289429, "learning_rate": 2.2599864210128374e-06, "loss": 0.1565, "step": 36080 }, { "epoch": 0.781856000693346, "grad_norm": 1.90118408203125, "learning_rate": 2.2578318566565473e-06, "loss": 0.1174, "step": 36085 }, { "epoch": 0.7819643360128269, "grad_norm": 2.095367670059204, "learning_rate": 2.2556781891144607e-06, "loss": 0.1663, "step": 36090 }, { "epoch": 0.7820726713323077, "grad_norm": 1.3517853021621704, "learning_rate": 2.253525418636043e-06, "loss": 0.1427, "step": 36095 }, { "epoch": 0.7821810066517886, "grad_norm": 0.8394643664360046, "learning_rate": 2.2513735454706664e-06, "loss": 0.1638, "step": 36100 }, { "epoch": 0.7822893419712694, "grad_norm": 1.5948785543441772, "learning_rate": 2.2492225698675875e-06, "loss": 0.1268, "step": 36105 }, { "epoch": 0.7823976772907504, "grad_norm": 1.1245237588882446, "learning_rate": 2.247072492075968e-06, "loss": 0.1137, "step": 36110 }, { "epoch": 0.7825060126102312, "grad_norm": 1.411722183227539, "learning_rate": 2.2449233123448633e-06, "loss": 0.1614, "step": 36115 }, { "epoch": 0.7826143479297121, "grad_norm": 1.9742685556411743, "learning_rate": 2.2427750309232187e-06, "loss": 0.2593, "step": 36120 }, { "epoch": 0.7827226832491929, "grad_norm": 1.6665515899658203, "learning_rate": 2.2406276480598875e-06, "loss": 0.1908, "step": 36125 }, { "epoch": 0.7828310185686738, "grad_norm": 1.730000615119934, "learning_rate": 2.2384811640036042e-06, "loss": 0.1547, "step": 36130 }, { "epoch": 0.7829393538881546, "grad_norm": 1.7555160522460938, "learning_rate": 2.236335579003014e-06, "loss": 0.1689, "step": 36135 }, { "epoch": 0.7830476892076355, "grad_norm": 1.0814974308013916, "learning_rate": 2.2341908933066437e-06, "loss": 0.1752, "step": 36140 }, { "epoch": 0.7831560245271163, "grad_norm": 2.0835893154144287, "learning_rate": 2.23204710716293e-06, "loss": 0.2124, "step": 36145 }, { "epoch": 0.7832643598465971, "grad_norm": 1.5276178121566772, "learning_rate": 2.2299042208201914e-06, "loss": 0.144, "step": 36150 }, { "epoch": 0.783372695166078, "grad_norm": 1.085402011871338, "learning_rate": 2.227762234526657e-06, "loss": 0.1001, "step": 36155 }, { "epoch": 0.783481030485559, "grad_norm": 1.6614186763763428, "learning_rate": 2.2256211485304357e-06, "loss": 0.17, "step": 36160 }, { "epoch": 0.7835893658050398, "grad_norm": 0.8347421288490295, "learning_rate": 2.223480963079544e-06, "loss": 0.0975, "step": 36165 }, { "epoch": 0.7836977011245206, "grad_norm": 1.6756036281585693, "learning_rate": 2.2213416784218944e-06, "loss": 0.1519, "step": 36170 }, { "epoch": 0.7838060364440015, "grad_norm": 1.23740816116333, "learning_rate": 2.2192032948052833e-06, "loss": 0.125, "step": 36175 }, { "epoch": 0.7839143717634823, "grad_norm": 1.532158374786377, "learning_rate": 2.217065812477417e-06, "loss": 0.2031, "step": 36180 }, { "epoch": 0.7840227070829632, "grad_norm": 0.5625511407852173, "learning_rate": 2.214929231685886e-06, "loss": 0.1524, "step": 36185 }, { "epoch": 0.784131042402444, "grad_norm": 1.139190435409546, "learning_rate": 2.212793552678185e-06, "loss": 0.1685, "step": 36190 }, { "epoch": 0.7842393777219249, "grad_norm": 1.1698566675186157, "learning_rate": 2.2106587757016994e-06, "loss": 0.1754, "step": 36195 }, { "epoch": 0.7843477130414057, "grad_norm": 0.6340176463127136, "learning_rate": 2.2085249010037067e-06, "loss": 0.1507, "step": 36200 }, { "epoch": 0.7844560483608867, "grad_norm": 1.412535309791565, "learning_rate": 2.2063919288313884e-06, "loss": 0.1294, "step": 36205 }, { "epoch": 0.7845643836803675, "grad_norm": 0.3479641377925873, "learning_rate": 2.2042598594318165e-06, "loss": 0.1893, "step": 36210 }, { "epoch": 0.7846727189998484, "grad_norm": 1.759448528289795, "learning_rate": 2.2021286930519636e-06, "loss": 0.1786, "step": 36215 }, { "epoch": 0.7847810543193292, "grad_norm": 1.924487829208374, "learning_rate": 2.1999984299386855e-06, "loss": 0.1516, "step": 36220 }, { "epoch": 0.78488938963881, "grad_norm": 0.7561995983123779, "learning_rate": 2.1978690703387484e-06, "loss": 0.1134, "step": 36225 }, { "epoch": 0.7849977249582909, "grad_norm": 1.5158346891403198, "learning_rate": 2.1957406144988035e-06, "loss": 0.1893, "step": 36230 }, { "epoch": 0.7851060602777717, "grad_norm": 0.9352960586547852, "learning_rate": 2.1936130626653984e-06, "loss": 0.1573, "step": 36235 }, { "epoch": 0.7852143955972526, "grad_norm": 1.3910367488861084, "learning_rate": 2.1914864150849825e-06, "loss": 0.1643, "step": 36240 }, { "epoch": 0.7853227309167334, "grad_norm": 0.8717742562294006, "learning_rate": 2.189360672003892e-06, "loss": 0.1713, "step": 36245 }, { "epoch": 0.7854310662362143, "grad_norm": 0.8897132277488708, "learning_rate": 2.1872358336683664e-06, "loss": 0.1372, "step": 36250 }, { "epoch": 0.7855394015556952, "grad_norm": 0.8130455017089844, "learning_rate": 2.185111900324528e-06, "loss": 0.1252, "step": 36255 }, { "epoch": 0.7856477368751761, "grad_norm": 0.973473846912384, "learning_rate": 2.1829888722184155e-06, "loss": 0.1958, "step": 36260 }, { "epoch": 0.7857560721946569, "grad_norm": 1.1517804861068726, "learning_rate": 2.1808667495959425e-06, "loss": 0.1399, "step": 36265 }, { "epoch": 0.7858644075141378, "grad_norm": 1.1376087665557861, "learning_rate": 2.178745532702924e-06, "loss": 0.2154, "step": 36270 }, { "epoch": 0.7859727428336186, "grad_norm": 1.405861496925354, "learning_rate": 2.176625221785076e-06, "loss": 0.2113, "step": 36275 }, { "epoch": 0.7860810781530995, "grad_norm": 1.4347708225250244, "learning_rate": 2.1745058170879986e-06, "loss": 0.1731, "step": 36280 }, { "epoch": 0.7861894134725803, "grad_norm": 1.3872305154800415, "learning_rate": 2.1723873188571987e-06, "loss": 0.2244, "step": 36285 }, { "epoch": 0.7862977487920612, "grad_norm": 1.7130402326583862, "learning_rate": 2.170269727338068e-06, "loss": 0.2148, "step": 36290 }, { "epoch": 0.786406084111542, "grad_norm": 1.3847525119781494, "learning_rate": 2.1681530427759033e-06, "loss": 0.1187, "step": 36295 }, { "epoch": 0.7865144194310228, "grad_norm": 1.7950263023376465, "learning_rate": 2.1660372654158844e-06, "loss": 0.1805, "step": 36300 }, { "epoch": 0.7866227547505038, "grad_norm": 1.4292422533035278, "learning_rate": 2.1639223955030954e-06, "loss": 0.1355, "step": 36305 }, { "epoch": 0.7867310900699847, "grad_norm": 1.6160130500793457, "learning_rate": 2.161808433282515e-06, "loss": 0.1122, "step": 36310 }, { "epoch": 0.7868394253894655, "grad_norm": 1.8293023109436035, "learning_rate": 2.1596953789990095e-06, "loss": 0.1836, "step": 36315 }, { "epoch": 0.7869477607089463, "grad_norm": 2.4397170543670654, "learning_rate": 2.157583232897349e-06, "loss": 0.1938, "step": 36320 }, { "epoch": 0.7870560960284272, "grad_norm": 2.071775197982788, "learning_rate": 2.155471995222189e-06, "loss": 0.1952, "step": 36325 }, { "epoch": 0.787164431347908, "grad_norm": 1.842242956161499, "learning_rate": 2.15336166621809e-06, "loss": 0.1051, "step": 36330 }, { "epoch": 0.7872727666673889, "grad_norm": 1.513384461402893, "learning_rate": 2.151252246129496e-06, "loss": 0.1416, "step": 36335 }, { "epoch": 0.7873811019868697, "grad_norm": 0.7263227701187134, "learning_rate": 2.149143735200758e-06, "loss": 0.1033, "step": 36340 }, { "epoch": 0.7874894373063506, "grad_norm": 1.3541027307510376, "learning_rate": 2.14703613367611e-06, "loss": 0.1718, "step": 36345 }, { "epoch": 0.7875977726258315, "grad_norm": 2.0456702709198, "learning_rate": 2.1449294417996893e-06, "loss": 0.1728, "step": 36350 }, { "epoch": 0.7877061079453124, "grad_norm": 1.3868522644042969, "learning_rate": 2.142823659815525e-06, "loss": 0.1434, "step": 36355 }, { "epoch": 0.7878144432647932, "grad_norm": 1.9932284355163574, "learning_rate": 2.140718787967537e-06, "loss": 0.2039, "step": 36360 }, { "epoch": 0.7879227785842741, "grad_norm": 0.7052660584449768, "learning_rate": 2.1386148264995486e-06, "loss": 0.1425, "step": 36365 }, { "epoch": 0.7880311139037549, "grad_norm": 1.5120253562927246, "learning_rate": 2.1365117756552654e-06, "loss": 0.121, "step": 36370 }, { "epoch": 0.7881394492232358, "grad_norm": 1.9473870992660522, "learning_rate": 2.1344096356783016e-06, "loss": 0.2103, "step": 36375 }, { "epoch": 0.7882477845427166, "grad_norm": 1.0742138624191284, "learning_rate": 2.1323084068121527e-06, "loss": 0.1467, "step": 36380 }, { "epoch": 0.7883561198621974, "grad_norm": 1.5403069257736206, "learning_rate": 2.1302080893002185e-06, "loss": 0.1802, "step": 36385 }, { "epoch": 0.7884644551816783, "grad_norm": 0.515861988067627, "learning_rate": 2.1281086833857844e-06, "loss": 0.1696, "step": 36390 }, { "epoch": 0.7885727905011591, "grad_norm": 1.2863104343414307, "learning_rate": 2.1260101893120423e-06, "loss": 0.1587, "step": 36395 }, { "epoch": 0.7886811258206401, "grad_norm": 1.4292035102844238, "learning_rate": 2.1239126073220637e-06, "loss": 0.1791, "step": 36400 }, { "epoch": 0.7887894611401209, "grad_norm": 1.5621705055236816, "learning_rate": 2.1218159376588276e-06, "loss": 0.2417, "step": 36405 }, { "epoch": 0.7888977964596018, "grad_norm": 1.01705002784729, "learning_rate": 2.119720180565201e-06, "loss": 0.1984, "step": 36410 }, { "epoch": 0.7890061317790826, "grad_norm": 1.6303980350494385, "learning_rate": 2.1176253362839427e-06, "loss": 0.1639, "step": 36415 }, { "epoch": 0.7891144670985635, "grad_norm": 1.1756136417388916, "learning_rate": 2.1155314050577137e-06, "loss": 0.1883, "step": 36420 }, { "epoch": 0.7892228024180443, "grad_norm": 1.4558910131454468, "learning_rate": 2.11343838712906e-06, "loss": 0.1816, "step": 36425 }, { "epoch": 0.7893311377375252, "grad_norm": 1.2051039934158325, "learning_rate": 2.111346282740431e-06, "loss": 0.1365, "step": 36430 }, { "epoch": 0.789439473057006, "grad_norm": 1.3823453187942505, "learning_rate": 2.1092550921341624e-06, "loss": 0.135, "step": 36435 }, { "epoch": 0.7895478083764869, "grad_norm": 2.1122639179229736, "learning_rate": 2.107164815552486e-06, "loss": 0.1636, "step": 36440 }, { "epoch": 0.7896561436959677, "grad_norm": 0.9178901314735413, "learning_rate": 2.1050754532375328e-06, "loss": 0.1687, "step": 36445 }, { "epoch": 0.7897644790154487, "grad_norm": 1.2811781167984009, "learning_rate": 2.1029870054313183e-06, "loss": 0.1256, "step": 36450 }, { "epoch": 0.7898728143349295, "grad_norm": 2.0285747051239014, "learning_rate": 2.100899472375767e-06, "loss": 0.2035, "step": 36455 }, { "epoch": 0.7899811496544104, "grad_norm": 1.6241662502288818, "learning_rate": 2.0988128543126796e-06, "loss": 0.1927, "step": 36460 }, { "epoch": 0.7900894849738912, "grad_norm": 0.9041608572006226, "learning_rate": 2.0967271514837673e-06, "loss": 0.1354, "step": 36465 }, { "epoch": 0.790197820293372, "grad_norm": 2.0329737663269043, "learning_rate": 2.0946423641306224e-06, "loss": 0.208, "step": 36470 }, { "epoch": 0.7903061556128529, "grad_norm": 1.2366565465927124, "learning_rate": 2.092558492494736e-06, "loss": 0.1758, "step": 36475 }, { "epoch": 0.7904144909323337, "grad_norm": 2.041954278945923, "learning_rate": 2.090475536817497e-06, "loss": 0.2254, "step": 36480 }, { "epoch": 0.7905228262518146, "grad_norm": 2.367601156234741, "learning_rate": 2.088393497340179e-06, "loss": 0.2184, "step": 36485 }, { "epoch": 0.7906311615712954, "grad_norm": 0.9007954597473145, "learning_rate": 2.0863123743039616e-06, "loss": 0.1075, "step": 36490 }, { "epoch": 0.7907394968907764, "grad_norm": 1.4240074157714844, "learning_rate": 2.0842321679499023e-06, "loss": 0.1861, "step": 36495 }, { "epoch": 0.7908478322102572, "grad_norm": 2.5349254608154297, "learning_rate": 2.0821528785189747e-06, "loss": 0.1605, "step": 36500 }, { "epoch": 0.7909561675297381, "grad_norm": 1.9017006158828735, "learning_rate": 2.080074506252027e-06, "loss": 0.1761, "step": 36505 }, { "epoch": 0.7910645028492189, "grad_norm": 1.428061604499817, "learning_rate": 2.077997051389804e-06, "loss": 0.1401, "step": 36510 }, { "epoch": 0.7911728381686998, "grad_norm": 1.7254493236541748, "learning_rate": 2.075920514172953e-06, "loss": 0.0712, "step": 36515 }, { "epoch": 0.7912811734881806, "grad_norm": 1.549617052078247, "learning_rate": 2.0738448948420066e-06, "loss": 0.1834, "step": 36520 }, { "epoch": 0.7913895088076615, "grad_norm": 2.2351906299591064, "learning_rate": 2.071770193637397e-06, "loss": 0.1321, "step": 36525 }, { "epoch": 0.7914978441271423, "grad_norm": 2.04292631149292, "learning_rate": 2.0696964107994434e-06, "loss": 0.2162, "step": 36530 }, { "epoch": 0.7916061794466231, "grad_norm": 1.0828078985214233, "learning_rate": 2.067623546568367e-06, "loss": 0.0876, "step": 36535 }, { "epoch": 0.791714514766104, "grad_norm": 1.5970499515533447, "learning_rate": 2.0655516011842723e-06, "loss": 0.2005, "step": 36540 }, { "epoch": 0.791822850085585, "grad_norm": 1.2904326915740967, "learning_rate": 2.0634805748871678e-06, "loss": 0.2067, "step": 36545 }, { "epoch": 0.7919311854050658, "grad_norm": 1.8797836303710938, "learning_rate": 2.0614104679169524e-06, "loss": 0.2136, "step": 36550 }, { "epoch": 0.7920395207245466, "grad_norm": 0.43368232250213623, "learning_rate": 2.0593412805134105e-06, "loss": 0.144, "step": 36555 }, { "epoch": 0.7921478560440275, "grad_norm": 2.1734678745269775, "learning_rate": 2.057273012916233e-06, "loss": 0.1863, "step": 36560 }, { "epoch": 0.7922561913635083, "grad_norm": 1.4931890964508057, "learning_rate": 2.055205665364993e-06, "loss": 0.1695, "step": 36565 }, { "epoch": 0.7923645266829892, "grad_norm": 2.7570712566375732, "learning_rate": 2.0531392380991665e-06, "loss": 0.1574, "step": 36570 }, { "epoch": 0.79247286200247, "grad_norm": 1.5297651290893555, "learning_rate": 2.051073731358112e-06, "loss": 0.1342, "step": 36575 }, { "epoch": 0.7925811973219509, "grad_norm": 1.413155436515808, "learning_rate": 2.0490091453810935e-06, "loss": 0.1448, "step": 36580 }, { "epoch": 0.7926895326414317, "grad_norm": 1.9172292947769165, "learning_rate": 2.0469454804072573e-06, "loss": 0.226, "step": 36585 }, { "epoch": 0.7927978679609127, "grad_norm": 1.7081855535507202, "learning_rate": 2.0448827366756495e-06, "loss": 0.1995, "step": 36590 }, { "epoch": 0.7929062032803935, "grad_norm": 1.1018290519714355, "learning_rate": 2.042820914425213e-06, "loss": 0.1464, "step": 36595 }, { "epoch": 0.7930145385998744, "grad_norm": 2.686539649963379, "learning_rate": 2.0407600138947714e-06, "loss": 0.1187, "step": 36600 }, { "epoch": 0.7931228739193552, "grad_norm": 1.2112464904785156, "learning_rate": 2.0387000353230557e-06, "loss": 0.1905, "step": 36605 }, { "epoch": 0.793231209238836, "grad_norm": 2.1033151149749756, "learning_rate": 2.0366409789486785e-06, "loss": 0.1313, "step": 36610 }, { "epoch": 0.7933395445583169, "grad_norm": 0.7280643582344055, "learning_rate": 2.0345828450101545e-06, "loss": 0.1351, "step": 36615 }, { "epoch": 0.7934478798777977, "grad_norm": 0.7511759400367737, "learning_rate": 2.0325256337458842e-06, "loss": 0.1061, "step": 36620 }, { "epoch": 0.7935562151972786, "grad_norm": 1.6081606149673462, "learning_rate": 2.0304693453941693e-06, "loss": 0.1565, "step": 36625 }, { "epoch": 0.7936645505167594, "grad_norm": 1.7679557800292969, "learning_rate": 2.0284139801931936e-06, "loss": 0.1251, "step": 36630 }, { "epoch": 0.7937728858362403, "grad_norm": 2.141127586364746, "learning_rate": 2.0263595383810477e-06, "loss": 0.1387, "step": 36635 }, { "epoch": 0.7938812211557212, "grad_norm": 1.4541354179382324, "learning_rate": 2.0243060201957e-06, "loss": 0.1232, "step": 36640 }, { "epoch": 0.7939895564752021, "grad_norm": 0.9809059500694275, "learning_rate": 2.022253425875025e-06, "loss": 0.1393, "step": 36645 }, { "epoch": 0.7940978917946829, "grad_norm": 1.5949640274047852, "learning_rate": 2.0202017556567867e-06, "loss": 0.1191, "step": 36650 }, { "epoch": 0.7942062271141638, "grad_norm": 1.6577672958374023, "learning_rate": 2.018151009778635e-06, "loss": 0.1807, "step": 36655 }, { "epoch": 0.7943145624336446, "grad_norm": 1.0330109596252441, "learning_rate": 2.0161011884781223e-06, "loss": 0.1994, "step": 36660 }, { "epoch": 0.7944228977531255, "grad_norm": 0.7894076704978943, "learning_rate": 2.0140522919926864e-06, "loss": 0.1597, "step": 36665 }, { "epoch": 0.7945312330726063, "grad_norm": 1.896391749382019, "learning_rate": 2.012004320559665e-06, "loss": 0.163, "step": 36670 }, { "epoch": 0.7946395683920872, "grad_norm": 1.0208475589752197, "learning_rate": 2.009957274416284e-06, "loss": 0.204, "step": 36675 }, { "epoch": 0.794747903711568, "grad_norm": 0.683738648891449, "learning_rate": 2.007911153799659e-06, "loss": 0.1717, "step": 36680 }, { "epoch": 0.7948562390310489, "grad_norm": 2.2859158515930176, "learning_rate": 2.0058659589468076e-06, "loss": 0.1231, "step": 36685 }, { "epoch": 0.7949645743505298, "grad_norm": 1.1929739713668823, "learning_rate": 2.0038216900946274e-06, "loss": 0.2169, "step": 36690 }, { "epoch": 0.7950729096700107, "grad_norm": 1.7244000434875488, "learning_rate": 2.0017783474799267e-06, "loss": 0.2054, "step": 36695 }, { "epoch": 0.7951812449894915, "grad_norm": 1.5668920278549194, "learning_rate": 1.9997359313393884e-06, "loss": 0.1538, "step": 36700 }, { "epoch": 0.7952895803089723, "grad_norm": 1.1671643257141113, "learning_rate": 1.9976944419096024e-06, "loss": 0.1813, "step": 36705 }, { "epoch": 0.7953979156284532, "grad_norm": 1.8718178272247314, "learning_rate": 1.9956538794270396e-06, "loss": 0.21, "step": 36710 }, { "epoch": 0.795506250947934, "grad_norm": 1.6439316272735596, "learning_rate": 1.993614244128067e-06, "loss": 0.1646, "step": 36715 }, { "epoch": 0.7956145862674149, "grad_norm": 1.198371410369873, "learning_rate": 1.9915755362489518e-06, "loss": 0.1987, "step": 36720 }, { "epoch": 0.7957229215868957, "grad_norm": 1.1884398460388184, "learning_rate": 1.989537756025842e-06, "loss": 0.1618, "step": 36725 }, { "epoch": 0.7958312569063766, "grad_norm": 1.351913571357727, "learning_rate": 1.987500903694789e-06, "loss": 0.1371, "step": 36730 }, { "epoch": 0.7959395922258575, "grad_norm": 1.6460647583007812, "learning_rate": 1.9854649794917234e-06, "loss": 0.1516, "step": 36735 }, { "epoch": 0.7960479275453384, "grad_norm": 1.2782273292541504, "learning_rate": 1.9834299836524872e-06, "loss": 0.1342, "step": 36740 }, { "epoch": 0.7961562628648192, "grad_norm": 0.8524689078330994, "learning_rate": 1.9813959164128005e-06, "loss": 0.1544, "step": 36745 }, { "epoch": 0.7962645981843001, "grad_norm": 1.5434906482696533, "learning_rate": 1.9793627780082746e-06, "loss": 0.1268, "step": 36750 }, { "epoch": 0.7963729335037809, "grad_norm": 1.6397461891174316, "learning_rate": 1.9773305686744238e-06, "loss": 0.1888, "step": 36755 }, { "epoch": 0.7964812688232618, "grad_norm": 1.5418850183486938, "learning_rate": 1.975299288646646e-06, "loss": 0.1212, "step": 36760 }, { "epoch": 0.7965896041427426, "grad_norm": 1.689117193222046, "learning_rate": 1.9732689381602366e-06, "loss": 0.2344, "step": 36765 }, { "epoch": 0.7966979394622234, "grad_norm": 1.014786958694458, "learning_rate": 1.9712395174503784e-06, "loss": 0.1052, "step": 36770 }, { "epoch": 0.7968062747817043, "grad_norm": 1.0834320783615112, "learning_rate": 1.9692110267521548e-06, "loss": 0.0937, "step": 36775 }, { "epoch": 0.7969146101011851, "grad_norm": 1.2405918836593628, "learning_rate": 1.9671834663005285e-06, "loss": 0.1216, "step": 36780 }, { "epoch": 0.7970229454206661, "grad_norm": 0.8500007390975952, "learning_rate": 1.9651568363303665e-06, "loss": 0.1746, "step": 36785 }, { "epoch": 0.7971312807401469, "grad_norm": 1.7519090175628662, "learning_rate": 1.963131137076426e-06, "loss": 0.1775, "step": 36790 }, { "epoch": 0.7972396160596278, "grad_norm": 1.1049654483795166, "learning_rate": 1.961106368773348e-06, "loss": 0.1213, "step": 36795 }, { "epoch": 0.7973479513791086, "grad_norm": 1.157145380973816, "learning_rate": 1.959082531655677e-06, "loss": 0.1357, "step": 36800 }, { "epoch": 0.7974562866985895, "grad_norm": 2.0844650268554688, "learning_rate": 1.95705962595784e-06, "loss": 0.198, "step": 36805 }, { "epoch": 0.7975646220180703, "grad_norm": 1.9354578256607056, "learning_rate": 1.9550376519141635e-06, "loss": 0.18, "step": 36810 }, { "epoch": 0.7976729573375512, "grad_norm": 0.8744456171989441, "learning_rate": 1.95301660975886e-06, "loss": 0.1532, "step": 36815 }, { "epoch": 0.797781292657032, "grad_norm": 1.335434913635254, "learning_rate": 1.95099649972604e-06, "loss": 0.1862, "step": 36820 }, { "epoch": 0.7978896279765129, "grad_norm": 1.1572571992874146, "learning_rate": 1.9489773220497e-06, "loss": 0.2164, "step": 36825 }, { "epoch": 0.7979979632959937, "grad_norm": 1.429362177848816, "learning_rate": 1.946959076963735e-06, "loss": 0.1368, "step": 36830 }, { "epoch": 0.7981062986154747, "grad_norm": 1.4362587928771973, "learning_rate": 1.944941764701924e-06, "loss": 0.1786, "step": 36835 }, { "epoch": 0.7982146339349555, "grad_norm": 0.9717754125595093, "learning_rate": 1.9429253854979446e-06, "loss": 0.1684, "step": 36840 }, { "epoch": 0.7983229692544364, "grad_norm": 0.6230279803276062, "learning_rate": 1.940909939585367e-06, "loss": 0.1297, "step": 36845 }, { "epoch": 0.7984313045739172, "grad_norm": 1.4404237270355225, "learning_rate": 1.9388954271976448e-06, "loss": 0.1764, "step": 36850 }, { "epoch": 0.798539639893398, "grad_norm": 1.1829310655593872, "learning_rate": 1.936881848568135e-06, "loss": 0.1869, "step": 36855 }, { "epoch": 0.7986479752128789, "grad_norm": 1.5992039442062378, "learning_rate": 1.9348692039300745e-06, "loss": 0.1714, "step": 36860 }, { "epoch": 0.7987563105323597, "grad_norm": 0.916317880153656, "learning_rate": 1.9328574935166033e-06, "loss": 0.1337, "step": 36865 }, { "epoch": 0.7988646458518406, "grad_norm": 1.4458802938461304, "learning_rate": 1.930846717560745e-06, "loss": 0.1898, "step": 36870 }, { "epoch": 0.7989729811713214, "grad_norm": 1.717733383178711, "learning_rate": 1.928836876295419e-06, "loss": 0.2063, "step": 36875 }, { "epoch": 0.7990813164908024, "grad_norm": 1.7082511186599731, "learning_rate": 1.926827969953433e-06, "loss": 0.1429, "step": 36880 }, { "epoch": 0.7991896518102832, "grad_norm": 1.2136150598526, "learning_rate": 1.9248199987674907e-06, "loss": 0.2162, "step": 36885 }, { "epoch": 0.7992979871297641, "grad_norm": 1.4694924354553223, "learning_rate": 1.9228129629701884e-06, "loss": 0.2054, "step": 36890 }, { "epoch": 0.7994063224492449, "grad_norm": 1.5844742059707642, "learning_rate": 1.9208068627940045e-06, "loss": 0.2166, "step": 36895 }, { "epoch": 0.7995146577687258, "grad_norm": 1.7253894805908203, "learning_rate": 1.918801698471323e-06, "loss": 0.1622, "step": 36900 }, { "epoch": 0.7996229930882066, "grad_norm": 1.6889804601669312, "learning_rate": 1.916797470234405e-06, "loss": 0.1872, "step": 36905 }, { "epoch": 0.7997313284076875, "grad_norm": 0.4299590289592743, "learning_rate": 1.9147941783154166e-06, "loss": 0.2081, "step": 36910 }, { "epoch": 0.7998396637271683, "grad_norm": 1.1792017221450806, "learning_rate": 1.912791822946406e-06, "loss": 0.253, "step": 36915 }, { "epoch": 0.7999479990466491, "grad_norm": 0.8660869598388672, "learning_rate": 1.910790404359314e-06, "loss": 0.1353, "step": 36920 }, { "epoch": 0.80005633436613, "grad_norm": 1.9205527305603027, "learning_rate": 1.9087899227859796e-06, "loss": 0.2534, "step": 36925 }, { "epoch": 0.800164669685611, "grad_norm": 0.9157060384750366, "learning_rate": 1.9067903784581221e-06, "loss": 0.1417, "step": 36930 }, { "epoch": 0.8002730050050918, "grad_norm": 1.7588545083999634, "learning_rate": 1.9047917716073672e-06, "loss": 0.1493, "step": 36935 }, { "epoch": 0.8003813403245726, "grad_norm": 1.0968903303146362, "learning_rate": 1.9027941024652174e-06, "loss": 0.1364, "step": 36940 }, { "epoch": 0.8004896756440535, "grad_norm": 0.9237842559814453, "learning_rate": 1.9007973712630768e-06, "loss": 0.1437, "step": 36945 }, { "epoch": 0.8005980109635343, "grad_norm": 1.0354719161987305, "learning_rate": 1.8988015782322343e-06, "loss": 0.1502, "step": 36950 }, { "epoch": 0.8007063462830152, "grad_norm": 1.117396593093872, "learning_rate": 1.896806723603871e-06, "loss": 0.1792, "step": 36955 }, { "epoch": 0.800814681602496, "grad_norm": 1.3122014999389648, "learning_rate": 1.8948128076090654e-06, "loss": 0.1346, "step": 36960 }, { "epoch": 0.8009230169219769, "grad_norm": 0.2658691108226776, "learning_rate": 1.8928198304787782e-06, "loss": 0.1382, "step": 36965 }, { "epoch": 0.8010313522414577, "grad_norm": 1.3447734117507935, "learning_rate": 1.89082779244387e-06, "loss": 0.1249, "step": 36970 }, { "epoch": 0.8011396875609386, "grad_norm": 2.25620174407959, "learning_rate": 1.8888366937350845e-06, "loss": 0.1233, "step": 36975 }, { "epoch": 0.8012480228804195, "grad_norm": 0.8348904252052307, "learning_rate": 1.8868465345830622e-06, "loss": 0.0965, "step": 36980 }, { "epoch": 0.8013563581999004, "grad_norm": 1.5181094408035278, "learning_rate": 1.884857315218337e-06, "loss": 0.1686, "step": 36985 }, { "epoch": 0.8014646935193812, "grad_norm": 2.3221595287323, "learning_rate": 1.8828690358713242e-06, "loss": 0.1454, "step": 36990 }, { "epoch": 0.8015730288388621, "grad_norm": 1.3944190740585327, "learning_rate": 1.880881696772342e-06, "loss": 0.2273, "step": 36995 }, { "epoch": 0.8016813641583429, "grad_norm": 1.508795142173767, "learning_rate": 1.8788952981515874e-06, "loss": 0.0985, "step": 37000 }, { "epoch": 0.8017896994778237, "grad_norm": 1.8271796703338623, "learning_rate": 1.8769098402391618e-06, "loss": 0.1896, "step": 37005 }, { "epoch": 0.8018980347973046, "grad_norm": 0.959009051322937, "learning_rate": 1.8749253232650443e-06, "loss": 0.1581, "step": 37010 }, { "epoch": 0.8020063701167854, "grad_norm": 1.868902325630188, "learning_rate": 1.872941747459117e-06, "loss": 0.1116, "step": 37015 }, { "epoch": 0.8021147054362663, "grad_norm": 1.7847450971603394, "learning_rate": 1.8709591130511428e-06, "loss": 0.1601, "step": 37020 }, { "epoch": 0.8022230407557472, "grad_norm": 1.8028638362884521, "learning_rate": 1.8689774202707823e-06, "loss": 0.2089, "step": 37025 }, { "epoch": 0.8023313760752281, "grad_norm": 1.0954900979995728, "learning_rate": 1.8669966693475871e-06, "loss": 0.0939, "step": 37030 }, { "epoch": 0.8024397113947089, "grad_norm": 0.9617019295692444, "learning_rate": 1.865016860510993e-06, "loss": 0.1139, "step": 37035 }, { "epoch": 0.8025480467141898, "grad_norm": 2.376434803009033, "learning_rate": 1.8630379939903353e-06, "loss": 0.1116, "step": 37040 }, { "epoch": 0.8026563820336706, "grad_norm": 2.22811222076416, "learning_rate": 1.8610600700148329e-06, "loss": 0.171, "step": 37045 }, { "epoch": 0.8027647173531515, "grad_norm": 1.5672736167907715, "learning_rate": 1.8590830888136024e-06, "loss": 0.2016, "step": 37050 }, { "epoch": 0.8028730526726323, "grad_norm": 0.9676152467727661, "learning_rate": 1.8571070506156419e-06, "loss": 0.0906, "step": 37055 }, { "epoch": 0.8029813879921132, "grad_norm": 0.9192622303962708, "learning_rate": 1.8551319556498526e-06, "loss": 0.1674, "step": 37060 }, { "epoch": 0.803089723311594, "grad_norm": 1.2170199155807495, "learning_rate": 1.8531578041450126e-06, "loss": 0.1165, "step": 37065 }, { "epoch": 0.8031980586310749, "grad_norm": 1.0941414833068848, "learning_rate": 1.8511845963298048e-06, "loss": 0.205, "step": 37070 }, { "epoch": 0.8033063939505558, "grad_norm": 1.1203441619873047, "learning_rate": 1.8492123324327892e-06, "loss": 0.1748, "step": 37075 }, { "epoch": 0.8034147292700367, "grad_norm": 1.47663152217865, "learning_rate": 1.847241012682427e-06, "loss": 0.1585, "step": 37080 }, { "epoch": 0.8035230645895175, "grad_norm": 1.401909589767456, "learning_rate": 1.8452706373070662e-06, "loss": 0.1185, "step": 37085 }, { "epoch": 0.8036313999089983, "grad_norm": 0.8626863956451416, "learning_rate": 1.843301206534942e-06, "loss": 0.1935, "step": 37090 }, { "epoch": 0.8037397352284792, "grad_norm": 1.8031748533248901, "learning_rate": 1.841332720594189e-06, "loss": 0.1966, "step": 37095 }, { "epoch": 0.80384807054796, "grad_norm": 2.0307817459106445, "learning_rate": 1.839365179712821e-06, "loss": 0.2298, "step": 37100 }, { "epoch": 0.8039564058674409, "grad_norm": 1.6387840509414673, "learning_rate": 1.8373985841187525e-06, "loss": 0.155, "step": 37105 }, { "epoch": 0.8040647411869217, "grad_norm": 1.1446927785873413, "learning_rate": 1.8354329340397803e-06, "loss": 0.229, "step": 37110 }, { "epoch": 0.8041730765064026, "grad_norm": 2.116562843322754, "learning_rate": 1.8334682297036e-06, "loss": 0.1717, "step": 37115 }, { "epoch": 0.8042814118258835, "grad_norm": 0.8395913243293762, "learning_rate": 1.8315044713377872e-06, "loss": 0.143, "step": 37120 }, { "epoch": 0.8043897471453644, "grad_norm": 0.807597279548645, "learning_rate": 1.8295416591698179e-06, "loss": 0.1706, "step": 37125 }, { "epoch": 0.8044980824648452, "grad_norm": 2.0331435203552246, "learning_rate": 1.8275797934270568e-06, "loss": 0.3231, "step": 37130 }, { "epoch": 0.8046064177843261, "grad_norm": 1.0909897089004517, "learning_rate": 1.8256188743367509e-06, "loss": 0.1236, "step": 37135 }, { "epoch": 0.8047147531038069, "grad_norm": 1.3677297830581665, "learning_rate": 1.8236589021260487e-06, "loss": 0.1269, "step": 37140 }, { "epoch": 0.8048230884232878, "grad_norm": 2.9318978786468506, "learning_rate": 1.8216998770219785e-06, "loss": 0.2071, "step": 37145 }, { "epoch": 0.8049314237427686, "grad_norm": 2.602924108505249, "learning_rate": 1.81974179925147e-06, "loss": 0.083, "step": 37150 }, { "epoch": 0.8050397590622494, "grad_norm": 0.9607676863670349, "learning_rate": 1.8177846690413303e-06, "loss": 0.1279, "step": 37155 }, { "epoch": 0.8051480943817303, "grad_norm": 2.1131584644317627, "learning_rate": 1.8158284866182695e-06, "loss": 0.2271, "step": 37160 }, { "epoch": 0.8052564297012111, "grad_norm": 1.1255277395248413, "learning_rate": 1.8138732522088799e-06, "loss": 0.1665, "step": 37165 }, { "epoch": 0.8053647650206921, "grad_norm": 1.608707308769226, "learning_rate": 1.8119189660396409e-06, "loss": 0.2923, "step": 37170 }, { "epoch": 0.8054731003401729, "grad_norm": 3.3747875690460205, "learning_rate": 1.809965628336937e-06, "loss": 0.1677, "step": 37175 }, { "epoch": 0.8055814356596538, "grad_norm": 1.43407142162323, "learning_rate": 1.8080132393270267e-06, "loss": 0.162, "step": 37180 }, { "epoch": 0.8056897709791346, "grad_norm": 1.7286218404769897, "learning_rate": 1.8060617992360685e-06, "loss": 0.1913, "step": 37185 }, { "epoch": 0.8057981062986155, "grad_norm": 1.7599042654037476, "learning_rate": 1.804111308290104e-06, "loss": 0.1286, "step": 37190 }, { "epoch": 0.8059064416180963, "grad_norm": 0.5649123787879944, "learning_rate": 1.8021617667150714e-06, "loss": 0.1547, "step": 37195 }, { "epoch": 0.8060147769375772, "grad_norm": 1.0348081588745117, "learning_rate": 1.8002131747367947e-06, "loss": 0.0887, "step": 37200 }, { "epoch": 0.806123112257058, "grad_norm": 1.4970028400421143, "learning_rate": 1.7982655325809872e-06, "loss": 0.1547, "step": 37205 }, { "epoch": 0.8062314475765389, "grad_norm": 1.4637750387191772, "learning_rate": 1.7963188404732579e-06, "loss": 0.1209, "step": 37210 }, { "epoch": 0.8063397828960197, "grad_norm": 0.5840369462966919, "learning_rate": 1.7943730986390973e-06, "loss": 0.1538, "step": 37215 }, { "epoch": 0.8064481182155007, "grad_norm": 1.1928032636642456, "learning_rate": 1.7924283073038928e-06, "loss": 0.1874, "step": 37220 }, { "epoch": 0.8065564535349815, "grad_norm": 1.1712952852249146, "learning_rate": 1.790484466692919e-06, "loss": 0.1264, "step": 37225 }, { "epoch": 0.8066647888544624, "grad_norm": 1.6689385175704956, "learning_rate": 1.788541577031344e-06, "loss": 0.0915, "step": 37230 }, { "epoch": 0.8067731241739432, "grad_norm": 1.7131327390670776, "learning_rate": 1.7865996385442197e-06, "loss": 0.1628, "step": 37235 }, { "epoch": 0.806881459493424, "grad_norm": 0.4774944484233856, "learning_rate": 1.784658651456489e-06, "loss": 0.0756, "step": 37240 }, { "epoch": 0.8069897948129049, "grad_norm": 0.5475968718528748, "learning_rate": 1.7827186159929888e-06, "loss": 0.2084, "step": 37245 }, { "epoch": 0.8070981301323857, "grad_norm": 1.3863478899002075, "learning_rate": 1.7807795323784404e-06, "loss": 0.0605, "step": 37250 }, { "epoch": 0.8072064654518666, "grad_norm": 1.0778465270996094, "learning_rate": 1.7788414008374611e-06, "loss": 0.1498, "step": 37255 }, { "epoch": 0.8073148007713474, "grad_norm": 0.9824923276901245, "learning_rate": 1.776904221594551e-06, "loss": 0.1413, "step": 37260 }, { "epoch": 0.8074231360908284, "grad_norm": 1.4575393199920654, "learning_rate": 1.7749679948741038e-06, "loss": 0.2011, "step": 37265 }, { "epoch": 0.8075314714103092, "grad_norm": 1.6388057470321655, "learning_rate": 1.7730327209004071e-06, "loss": 0.1937, "step": 37270 }, { "epoch": 0.8076398067297901, "grad_norm": 1.9235199689865112, "learning_rate": 1.771098399897626e-06, "loss": 0.0977, "step": 37275 }, { "epoch": 0.8077481420492709, "grad_norm": 1.7648866176605225, "learning_rate": 1.76916503208983e-06, "loss": 0.2796, "step": 37280 }, { "epoch": 0.8078564773687518, "grad_norm": 1.8076519966125488, "learning_rate": 1.7672326177009636e-06, "loss": 0.118, "step": 37285 }, { "epoch": 0.8079648126882326, "grad_norm": 0.624265730381012, "learning_rate": 1.765301156954875e-06, "loss": 0.1785, "step": 37290 }, { "epoch": 0.8080731480077135, "grad_norm": 1.259608268737793, "learning_rate": 1.763370650075289e-06, "loss": 0.1753, "step": 37295 }, { "epoch": 0.8081814833271943, "grad_norm": 1.4786783456802368, "learning_rate": 1.7614410972858299e-06, "loss": 0.1054, "step": 37300 }, { "epoch": 0.8082898186466752, "grad_norm": 1.4970433712005615, "learning_rate": 1.7595124988100043e-06, "loss": 0.1952, "step": 37305 }, { "epoch": 0.808398153966156, "grad_norm": 0.5453710556030273, "learning_rate": 1.757584854871216e-06, "loss": 0.1732, "step": 37310 }, { "epoch": 0.808506489285637, "grad_norm": 1.472898006439209, "learning_rate": 1.7556581656927486e-06, "loss": 0.1673, "step": 37315 }, { "epoch": 0.8086148246051178, "grad_norm": 1.212679147720337, "learning_rate": 1.753732431497781e-06, "loss": 0.0847, "step": 37320 }, { "epoch": 0.8087231599245986, "grad_norm": 1.3646767139434814, "learning_rate": 1.7518076525093863e-06, "loss": 0.151, "step": 37325 }, { "epoch": 0.8088314952440795, "grad_norm": 1.4132503271102905, "learning_rate": 1.7498838289505139e-06, "loss": 0.1558, "step": 37330 }, { "epoch": 0.8089398305635603, "grad_norm": 1.5766303539276123, "learning_rate": 1.7479609610440152e-06, "loss": 0.202, "step": 37335 }, { "epoch": 0.8090481658830412, "grad_norm": 2.1764025688171387, "learning_rate": 1.7460390490126222e-06, "loss": 0.2291, "step": 37340 }, { "epoch": 0.809156501202522, "grad_norm": 1.8244515657424927, "learning_rate": 1.7441180930789626e-06, "loss": 0.147, "step": 37345 }, { "epoch": 0.8092648365220029, "grad_norm": 2.3848159313201904, "learning_rate": 1.7421980934655469e-06, "loss": 0.1423, "step": 37350 }, { "epoch": 0.8093731718414837, "grad_norm": 1.151278018951416, "learning_rate": 1.7402790503947831e-06, "loss": 0.2233, "step": 37355 }, { "epoch": 0.8094815071609646, "grad_norm": 2.971935987472534, "learning_rate": 1.7383609640889575e-06, "loss": 0.1846, "step": 37360 }, { "epoch": 0.8095898424804455, "grad_norm": 2.465785264968872, "learning_rate": 1.7364438347702551e-06, "loss": 0.1853, "step": 37365 }, { "epoch": 0.8096981777999264, "grad_norm": 1.8043110370635986, "learning_rate": 1.7345276626607489e-06, "loss": 0.143, "step": 37370 }, { "epoch": 0.8098065131194072, "grad_norm": 0.9150254130363464, "learning_rate": 1.7326124479823936e-06, "loss": 0.1906, "step": 37375 }, { "epoch": 0.8099148484388881, "grad_norm": 2.1788296699523926, "learning_rate": 1.730698190957043e-06, "loss": 0.1374, "step": 37380 }, { "epoch": 0.8100231837583689, "grad_norm": 2.1510307788848877, "learning_rate": 1.7287848918064298e-06, "loss": 0.1348, "step": 37385 }, { "epoch": 0.8101315190778497, "grad_norm": 1.539947509765625, "learning_rate": 1.7268725507521878e-06, "loss": 0.2074, "step": 37390 }, { "epoch": 0.8102398543973306, "grad_norm": 1.0123870372772217, "learning_rate": 1.7249611680158263e-06, "loss": 0.1086, "step": 37395 }, { "epoch": 0.8103481897168114, "grad_norm": 1.7668441534042358, "learning_rate": 1.723050743818756e-06, "loss": 0.1552, "step": 37400 }, { "epoch": 0.8104565250362923, "grad_norm": 0.5761354565620422, "learning_rate": 1.7211412783822689e-06, "loss": 0.1385, "step": 37405 }, { "epoch": 0.8105648603557732, "grad_norm": 0.8005703687667847, "learning_rate": 1.7192327719275415e-06, "loss": 0.1253, "step": 37410 }, { "epoch": 0.8106731956752541, "grad_norm": 2.222959518432617, "learning_rate": 1.7173252246756567e-06, "loss": 0.2455, "step": 37415 }, { "epoch": 0.8107815309947349, "grad_norm": 1.2463980913162231, "learning_rate": 1.7154186368475678e-06, "loss": 0.1061, "step": 37420 }, { "epoch": 0.8108898663142158, "grad_norm": 1.107912302017212, "learning_rate": 1.7135130086641295e-06, "loss": 0.2103, "step": 37425 }, { "epoch": 0.8109982016336966, "grad_norm": 1.9474848508834839, "learning_rate": 1.7116083403460759e-06, "loss": 0.1492, "step": 37430 }, { "epoch": 0.8111065369531775, "grad_norm": 1.474104642868042, "learning_rate": 1.7097046321140375e-06, "loss": 0.2089, "step": 37435 }, { "epoch": 0.8112148722726583, "grad_norm": 1.6183483600616455, "learning_rate": 1.707801884188528e-06, "loss": 0.2173, "step": 37440 }, { "epoch": 0.8113232075921392, "grad_norm": 1.1013797521591187, "learning_rate": 1.705900096789952e-06, "loss": 0.1403, "step": 37445 }, { "epoch": 0.81143154291162, "grad_norm": 1.2525508403778076, "learning_rate": 1.7039992701386066e-06, "loss": 0.1612, "step": 37450 }, { "epoch": 0.8115398782311009, "grad_norm": 2.148289918899536, "learning_rate": 1.702099404454668e-06, "loss": 0.2123, "step": 37455 }, { "epoch": 0.8116482135505818, "grad_norm": 2.109968662261963, "learning_rate": 1.7002004999582122e-06, "loss": 0.1271, "step": 37460 }, { "epoch": 0.8117565488700627, "grad_norm": 1.849241018295288, "learning_rate": 1.698302556869197e-06, "loss": 0.2512, "step": 37465 }, { "epoch": 0.8118648841895435, "grad_norm": 1.5852487087249756, "learning_rate": 1.6964055754074739e-06, "loss": 0.1825, "step": 37470 }, { "epoch": 0.8119732195090243, "grad_norm": 1.541987657546997, "learning_rate": 1.694509555792776e-06, "loss": 0.1735, "step": 37475 }, { "epoch": 0.8120815548285052, "grad_norm": 1.2495895624160767, "learning_rate": 1.692614498244728e-06, "loss": 0.1367, "step": 37480 }, { "epoch": 0.812189890147986, "grad_norm": 1.8066396713256836, "learning_rate": 1.6907204029828484e-06, "loss": 0.1359, "step": 37485 }, { "epoch": 0.8122982254674669, "grad_norm": 1.1918832063674927, "learning_rate": 1.6888272702265352e-06, "loss": 0.147, "step": 37490 }, { "epoch": 0.8124065607869477, "grad_norm": 0.8619315028190613, "learning_rate": 1.6869351001950829e-06, "loss": 0.1075, "step": 37495 }, { "epoch": 0.8125148961064286, "grad_norm": 1.9717764854431152, "learning_rate": 1.6850438931076675e-06, "loss": 0.129, "step": 37500 }, { "epoch": 0.8126232314259094, "grad_norm": 2.2555601596832275, "learning_rate": 1.6831536491833612e-06, "loss": 0.2076, "step": 37505 }, { "epoch": 0.8127315667453904, "grad_norm": 1.6295206546783447, "learning_rate": 1.6812643686411157e-06, "loss": 0.1834, "step": 37510 }, { "epoch": 0.8128399020648712, "grad_norm": 1.3203473091125488, "learning_rate": 1.6793760516997792e-06, "loss": 0.1086, "step": 37515 }, { "epoch": 0.8129482373843521, "grad_norm": 1.5023016929626465, "learning_rate": 1.677488698578086e-06, "loss": 0.1428, "step": 37520 }, { "epoch": 0.8130565727038329, "grad_norm": 1.0855573415756226, "learning_rate": 1.6756023094946538e-06, "loss": 0.1375, "step": 37525 }, { "epoch": 0.8131649080233138, "grad_norm": 1.3158189058303833, "learning_rate": 1.6737168846679962e-06, "loss": 0.1813, "step": 37530 }, { "epoch": 0.8132732433427946, "grad_norm": 2.402651309967041, "learning_rate": 1.6718324243165086e-06, "loss": 0.1882, "step": 37535 }, { "epoch": 0.8133815786622755, "grad_norm": 0.7158005833625793, "learning_rate": 1.6699489286584802e-06, "loss": 0.152, "step": 37540 }, { "epoch": 0.8134899139817563, "grad_norm": 2.1853833198547363, "learning_rate": 1.6680663979120814e-06, "loss": 0.2194, "step": 37545 }, { "epoch": 0.8135982493012371, "grad_norm": 1.209177851676941, "learning_rate": 1.6661848322953823e-06, "loss": 0.1492, "step": 37550 }, { "epoch": 0.8137065846207181, "grad_norm": 1.8111733198165894, "learning_rate": 1.6643042320263258e-06, "loss": 0.1769, "step": 37555 }, { "epoch": 0.8138149199401989, "grad_norm": 0.9892306327819824, "learning_rate": 1.662424597322756e-06, "loss": 0.1102, "step": 37560 }, { "epoch": 0.8139232552596798, "grad_norm": 1.2108644247055054, "learning_rate": 1.6605459284024016e-06, "loss": 0.2035, "step": 37565 }, { "epoch": 0.8140315905791606, "grad_norm": 0.5602030158042908, "learning_rate": 1.6586682254828746e-06, "loss": 0.1763, "step": 37570 }, { "epoch": 0.8141399258986415, "grad_norm": 1.452134609222412, "learning_rate": 1.6567914887816827e-06, "loss": 0.1257, "step": 37575 }, { "epoch": 0.8142482612181223, "grad_norm": 1.2974754571914673, "learning_rate": 1.6549157185162125e-06, "loss": 0.1709, "step": 37580 }, { "epoch": 0.8143565965376032, "grad_norm": 2.0761725902557373, "learning_rate": 1.6530409149037508e-06, "loss": 0.1453, "step": 37585 }, { "epoch": 0.814464931857084, "grad_norm": 1.797297477722168, "learning_rate": 1.6511670781614576e-06, "loss": 0.235, "step": 37590 }, { "epoch": 0.8145732671765649, "grad_norm": 1.9860343933105469, "learning_rate": 1.6492942085063967e-06, "loss": 0.0848, "step": 37595 }, { "epoch": 0.8146816024960457, "grad_norm": 0.6975988149642944, "learning_rate": 1.647422306155505e-06, "loss": 0.0874, "step": 37600 }, { "epoch": 0.8147899378155267, "grad_norm": 0.3227006196975708, "learning_rate": 1.6455513713256177e-06, "loss": 0.1702, "step": 37605 }, { "epoch": 0.8148982731350075, "grad_norm": 0.6878400444984436, "learning_rate": 1.643681404233457e-06, "loss": 0.1115, "step": 37610 }, { "epoch": 0.8150066084544884, "grad_norm": 0.8789529800415039, "learning_rate": 1.6418124050956253e-06, "loss": 0.1222, "step": 37615 }, { "epoch": 0.8151149437739692, "grad_norm": 1.7088466882705688, "learning_rate": 1.6399443741286236e-06, "loss": 0.1004, "step": 37620 }, { "epoch": 0.81522327909345, "grad_norm": 1.1351732015609741, "learning_rate": 1.6380773115488301e-06, "loss": 0.1872, "step": 37625 }, { "epoch": 0.8153316144129309, "grad_norm": 1.3266395330429077, "learning_rate": 1.6362112175725198e-06, "loss": 0.1205, "step": 37630 }, { "epoch": 0.8154399497324117, "grad_norm": 1.7590017318725586, "learning_rate": 1.6343460924158494e-06, "loss": 0.1789, "step": 37635 }, { "epoch": 0.8155482850518926, "grad_norm": 1.6364521980285645, "learning_rate": 1.6324819362948684e-06, "loss": 0.0996, "step": 37640 }, { "epoch": 0.8156566203713734, "grad_norm": 1.9125096797943115, "learning_rate": 1.63061874942551e-06, "loss": 0.1727, "step": 37645 }, { "epoch": 0.8157649556908544, "grad_norm": 1.5812432765960693, "learning_rate": 1.628756532023591e-06, "loss": 0.1646, "step": 37650 }, { "epoch": 0.8158732910103352, "grad_norm": 2.314194440841675, "learning_rate": 1.6268952843048314e-06, "loss": 0.1573, "step": 37655 }, { "epoch": 0.8159816263298161, "grad_norm": 1.000690221786499, "learning_rate": 1.625035006484822e-06, "loss": 0.0834, "step": 37660 }, { "epoch": 0.8160899616492969, "grad_norm": 1.220039963722229, "learning_rate": 1.6231756987790525e-06, "loss": 0.1301, "step": 37665 }, { "epoch": 0.8161982969687778, "grad_norm": 1.531977653503418, "learning_rate": 1.6213173614028898e-06, "loss": 0.1436, "step": 37670 }, { "epoch": 0.8163066322882586, "grad_norm": 1.2948483228683472, "learning_rate": 1.6194599945716016e-06, "loss": 0.1934, "step": 37675 }, { "epoch": 0.8164149676077395, "grad_norm": 0.994713306427002, "learning_rate": 1.6176035985003336e-06, "loss": 0.1435, "step": 37680 }, { "epoch": 0.8165233029272203, "grad_norm": 1.1543008089065552, "learning_rate": 1.6157481734041169e-06, "loss": 0.1327, "step": 37685 }, { "epoch": 0.8166316382467012, "grad_norm": 1.3845411539077759, "learning_rate": 1.61389371949788e-06, "loss": 0.1318, "step": 37690 }, { "epoch": 0.816739973566182, "grad_norm": 2.0433382987976074, "learning_rate": 1.6120402369964305e-06, "loss": 0.1607, "step": 37695 }, { "epoch": 0.816848308885663, "grad_norm": 1.35537850856781, "learning_rate": 1.6101877261144672e-06, "loss": 0.1713, "step": 37700 }, { "epoch": 0.8169566442051438, "grad_norm": 1.269930124282837, "learning_rate": 1.608336187066577e-06, "loss": 0.1844, "step": 37705 }, { "epoch": 0.8170649795246246, "grad_norm": 1.7706342935562134, "learning_rate": 1.606485620067234e-06, "loss": 0.0957, "step": 37710 }, { "epoch": 0.8171733148441055, "grad_norm": 1.1536957025527954, "learning_rate": 1.604636025330798e-06, "loss": 0.1476, "step": 37715 }, { "epoch": 0.8172816501635863, "grad_norm": 1.202115535736084, "learning_rate": 1.602787403071513e-06, "loss": 0.1578, "step": 37720 }, { "epoch": 0.8173899854830672, "grad_norm": 1.0185933113098145, "learning_rate": 1.6009397535035199e-06, "loss": 0.1701, "step": 37725 }, { "epoch": 0.817498320802548, "grad_norm": 1.1952295303344727, "learning_rate": 1.5990930768408354e-06, "loss": 0.1967, "step": 37730 }, { "epoch": 0.8176066561220289, "grad_norm": 0.9034190773963928, "learning_rate": 1.5972473732973758e-06, "loss": 0.108, "step": 37735 }, { "epoch": 0.8177149914415097, "grad_norm": 1.3284363746643066, "learning_rate": 1.5954026430869318e-06, "loss": 0.1535, "step": 37740 }, { "epoch": 0.8178233267609906, "grad_norm": 1.5738036632537842, "learning_rate": 1.5935588864231931e-06, "loss": 0.1633, "step": 37745 }, { "epoch": 0.8179316620804715, "grad_norm": 1.9728047847747803, "learning_rate": 1.5917161035197259e-06, "loss": 0.1045, "step": 37750 }, { "epoch": 0.8180399973999524, "grad_norm": 0.8842402696609497, "learning_rate": 1.5898742945899925e-06, "loss": 0.1824, "step": 37755 }, { "epoch": 0.8181483327194332, "grad_norm": 0.8967958688735962, "learning_rate": 1.5880334598473413e-06, "loss": 0.1686, "step": 37760 }, { "epoch": 0.8182566680389141, "grad_norm": 1.0680217742919922, "learning_rate": 1.5861935995049993e-06, "loss": 0.1597, "step": 37765 }, { "epoch": 0.8183650033583949, "grad_norm": 1.3180615901947021, "learning_rate": 1.5843547137760928e-06, "loss": 0.1635, "step": 37770 }, { "epoch": 0.8184733386778758, "grad_norm": 0.9916046261787415, "learning_rate": 1.5825168028736248e-06, "loss": 0.1647, "step": 37775 }, { "epoch": 0.8185816739973566, "grad_norm": 2.20670747756958, "learning_rate": 1.5806798670104927e-06, "loss": 0.176, "step": 37780 }, { "epoch": 0.8186900093168374, "grad_norm": 2.0495529174804688, "learning_rate": 1.5788439063994743e-06, "loss": 0.0861, "step": 37785 }, { "epoch": 0.8187983446363183, "grad_norm": 2.128969430923462, "learning_rate": 1.5770089212532435e-06, "loss": 0.167, "step": 37790 }, { "epoch": 0.8189066799557992, "grad_norm": 2.12601637840271, "learning_rate": 1.5751749117843495e-06, "loss": 0.1343, "step": 37795 }, { "epoch": 0.8190150152752801, "grad_norm": 1.9834474325180054, "learning_rate": 1.573341878205238e-06, "loss": 0.2236, "step": 37800 }, { "epoch": 0.8191233505947609, "grad_norm": 1.1412605047225952, "learning_rate": 1.571509820728242e-06, "loss": 0.1839, "step": 37805 }, { "epoch": 0.8192316859142418, "grad_norm": 2.013625144958496, "learning_rate": 1.5696787395655711e-06, "loss": 0.2732, "step": 37810 }, { "epoch": 0.8193400212337226, "grad_norm": 2.002314567565918, "learning_rate": 1.5678486349293354e-06, "loss": 0.1016, "step": 37815 }, { "epoch": 0.8194483565532035, "grad_norm": 1.1141514778137207, "learning_rate": 1.5660195070315175e-06, "loss": 0.1427, "step": 37820 }, { "epoch": 0.8195566918726843, "grad_norm": 1.9196124076843262, "learning_rate": 1.5641913560840028e-06, "loss": 0.1885, "step": 37825 }, { "epoch": 0.8196650271921652, "grad_norm": 1.8462910652160645, "learning_rate": 1.562364182298548e-06, "loss": 0.2103, "step": 37830 }, { "epoch": 0.819773362511646, "grad_norm": 1.2294692993164062, "learning_rate": 1.5605379858868098e-06, "loss": 0.1775, "step": 37835 }, { "epoch": 0.8198816978311269, "grad_norm": 0.8262034058570862, "learning_rate": 1.5587127670603198e-06, "loss": 0.1323, "step": 37840 }, { "epoch": 0.8199900331506078, "grad_norm": 0.7893002033233643, "learning_rate": 1.5568885260305056e-06, "loss": 0.1093, "step": 37845 }, { "epoch": 0.8200983684700887, "grad_norm": 1.7616980075836182, "learning_rate": 1.5550652630086804e-06, "loss": 0.1999, "step": 37850 }, { "epoch": 0.8202067037895695, "grad_norm": 2.1917052268981934, "learning_rate": 1.5532429782060366e-06, "loss": 0.1372, "step": 37855 }, { "epoch": 0.8203150391090503, "grad_norm": 1.5489249229431152, "learning_rate": 1.5514216718336638e-06, "loss": 0.1216, "step": 37860 }, { "epoch": 0.8204233744285312, "grad_norm": 1.0584660768508911, "learning_rate": 1.5496013441025293e-06, "loss": 0.1048, "step": 37865 }, { "epoch": 0.820531709748012, "grad_norm": 1.607223391532898, "learning_rate": 1.5477819952234929e-06, "loss": 0.1586, "step": 37870 }, { "epoch": 0.8206400450674929, "grad_norm": 1.1551131010055542, "learning_rate": 1.5459636254072962e-06, "loss": 0.2167, "step": 37875 }, { "epoch": 0.8207483803869737, "grad_norm": 0.4353386461734772, "learning_rate": 1.5441462348645752e-06, "loss": 0.2057, "step": 37880 }, { "epoch": 0.8208567157064546, "grad_norm": 1.2620686292648315, "learning_rate": 1.5423298238058438e-06, "loss": 0.1117, "step": 37885 }, { "epoch": 0.8209650510259354, "grad_norm": 1.6184293031692505, "learning_rate": 1.5405143924415034e-06, "loss": 0.1331, "step": 37890 }, { "epoch": 0.8210733863454164, "grad_norm": 1.1557958126068115, "learning_rate": 1.5386999409818482e-06, "loss": 0.1485, "step": 37895 }, { "epoch": 0.8211817216648972, "grad_norm": 1.4422253370285034, "learning_rate": 1.5368864696370545e-06, "loss": 0.0976, "step": 37900 }, { "epoch": 0.8212900569843781, "grad_norm": 2.159611940383911, "learning_rate": 1.5350739786171886e-06, "loss": 0.1386, "step": 37905 }, { "epoch": 0.8213983923038589, "grad_norm": 0.8160333037376404, "learning_rate": 1.533262468132195e-06, "loss": 0.1121, "step": 37910 }, { "epoch": 0.8215067276233398, "grad_norm": 0.5835907459259033, "learning_rate": 1.5314519383919147e-06, "loss": 0.1843, "step": 37915 }, { "epoch": 0.8216150629428206, "grad_norm": 1.8647164106369019, "learning_rate": 1.5296423896060687e-06, "loss": 0.1159, "step": 37920 }, { "epoch": 0.8217233982623015, "grad_norm": 0.43775060772895813, "learning_rate": 1.5278338219842637e-06, "loss": 0.1954, "step": 37925 }, { "epoch": 0.8218317335817823, "grad_norm": 1.342162847518921, "learning_rate": 1.5260262357360001e-06, "loss": 0.2024, "step": 37930 }, { "epoch": 0.8219400689012631, "grad_norm": 1.3147988319396973, "learning_rate": 1.5242196310706537e-06, "loss": 0.1782, "step": 37935 }, { "epoch": 0.8220484042207441, "grad_norm": 1.1813613176345825, "learning_rate": 1.522414008197497e-06, "loss": 0.2343, "step": 37940 }, { "epoch": 0.822156739540225, "grad_norm": 1.4040814638137817, "learning_rate": 1.5206093673256817e-06, "loss": 0.1368, "step": 37945 }, { "epoch": 0.8222650748597058, "grad_norm": 1.2190406322479248, "learning_rate": 1.5188057086642537e-06, "loss": 0.19, "step": 37950 }, { "epoch": 0.8223734101791866, "grad_norm": 1.5660971403121948, "learning_rate": 1.5170030324221352e-06, "loss": 0.1571, "step": 37955 }, { "epoch": 0.8224817454986675, "grad_norm": 0.878080427646637, "learning_rate": 1.5152013388081388e-06, "loss": 0.2229, "step": 37960 }, { "epoch": 0.8225900808181483, "grad_norm": 0.9447469711303711, "learning_rate": 1.5134006280309666e-06, "loss": 0.1765, "step": 37965 }, { "epoch": 0.8226984161376292, "grad_norm": 1.5033378601074219, "learning_rate": 1.5116009002991993e-06, "loss": 0.2123, "step": 37970 }, { "epoch": 0.82280675145711, "grad_norm": 2.570868730545044, "learning_rate": 1.509802155821315e-06, "loss": 0.1981, "step": 37975 }, { "epoch": 0.8229150867765909, "grad_norm": 1.0209544897079468, "learning_rate": 1.5080043948056656e-06, "loss": 0.2332, "step": 37980 }, { "epoch": 0.8230234220960717, "grad_norm": 1.5571297407150269, "learning_rate": 1.5062076174604978e-06, "loss": 0.1793, "step": 37985 }, { "epoch": 0.8231317574155527, "grad_norm": 1.2363057136535645, "learning_rate": 1.5044118239939398e-06, "loss": 0.1966, "step": 37990 }, { "epoch": 0.8232400927350335, "grad_norm": 1.9116629362106323, "learning_rate": 1.5026170146140073e-06, "loss": 0.1103, "step": 37995 }, { "epoch": 0.8233484280545144, "grad_norm": 1.3522988557815552, "learning_rate": 1.5008231895286051e-06, "loss": 0.1698, "step": 38000 }, { "epoch": 0.8234567633739952, "grad_norm": 0.5116066932678223, "learning_rate": 1.4990303489455172e-06, "loss": 0.1961, "step": 38005 }, { "epoch": 0.823565098693476, "grad_norm": 1.5193943977355957, "learning_rate": 1.4972384930724205e-06, "loss": 0.1448, "step": 38010 }, { "epoch": 0.8236734340129569, "grad_norm": 1.783752202987671, "learning_rate": 1.4954476221168711e-06, "loss": 0.1651, "step": 38015 }, { "epoch": 0.8237817693324377, "grad_norm": 0.9624485969543457, "learning_rate": 1.493657736286318e-06, "loss": 0.1346, "step": 38020 }, { "epoch": 0.8238901046519186, "grad_norm": 1.543779969215393, "learning_rate": 1.4918688357880894e-06, "loss": 0.1062, "step": 38025 }, { "epoch": 0.8239984399713994, "grad_norm": 1.5664184093475342, "learning_rate": 1.4900809208294066e-06, "loss": 0.1659, "step": 38030 }, { "epoch": 0.8241067752908803, "grad_norm": 1.0611450672149658, "learning_rate": 1.4882939916173688e-06, "loss": 0.1643, "step": 38035 }, { "epoch": 0.8242151106103612, "grad_norm": 1.0617620944976807, "learning_rate": 1.4865080483589667e-06, "loss": 0.1161, "step": 38040 }, { "epoch": 0.8243234459298421, "grad_norm": 1.3577542304992676, "learning_rate": 1.4847230912610767e-06, "loss": 0.1696, "step": 38045 }, { "epoch": 0.8244317812493229, "grad_norm": 0.9739269614219666, "learning_rate": 1.4829391205304555e-06, "loss": 0.2178, "step": 38050 }, { "epoch": 0.8245401165688038, "grad_norm": 1.2050946950912476, "learning_rate": 1.4811561363737537e-06, "loss": 0.1908, "step": 38055 }, { "epoch": 0.8246484518882846, "grad_norm": 1.3033312559127808, "learning_rate": 1.4793741389974992e-06, "loss": 0.1818, "step": 38060 }, { "epoch": 0.8247567872077655, "grad_norm": 0.9571746587753296, "learning_rate": 1.4775931286081147e-06, "loss": 0.1751, "step": 38065 }, { "epoch": 0.8248651225272463, "grad_norm": 0.7340001463890076, "learning_rate": 1.4758131054118974e-06, "loss": 0.1675, "step": 38070 }, { "epoch": 0.8249734578467272, "grad_norm": 1.1615173816680908, "learning_rate": 1.4740340696150423e-06, "loss": 0.1973, "step": 38075 }, { "epoch": 0.825081793166208, "grad_norm": 1.7716764211654663, "learning_rate": 1.4722560214236193e-06, "loss": 0.1856, "step": 38080 }, { "epoch": 0.825190128485689, "grad_norm": 1.5503712892532349, "learning_rate": 1.47047896104359e-06, "loss": 0.0886, "step": 38085 }, { "epoch": 0.8252984638051698, "grad_norm": 1.5515211820602417, "learning_rate": 1.468702888680803e-06, "loss": 0.0818, "step": 38090 }, { "epoch": 0.8254067991246506, "grad_norm": 3.201591730117798, "learning_rate": 1.4669278045409863e-06, "loss": 0.1781, "step": 38095 }, { "epoch": 0.8255151344441315, "grad_norm": 0.7621470093727112, "learning_rate": 1.4651537088297596e-06, "loss": 0.1768, "step": 38100 }, { "epoch": 0.8256234697636123, "grad_norm": 2.1836514472961426, "learning_rate": 1.4633806017526208e-06, "loss": 0.1743, "step": 38105 }, { "epoch": 0.8257318050830932, "grad_norm": 1.0922600030899048, "learning_rate": 1.4616084835149635e-06, "loss": 0.1123, "step": 38110 }, { "epoch": 0.825840140402574, "grad_norm": 0.9443358778953552, "learning_rate": 1.4598373543220567e-06, "loss": 0.1509, "step": 38115 }, { "epoch": 0.8259484757220549, "grad_norm": 1.548020362854004, "learning_rate": 1.458067214379062e-06, "loss": 0.1246, "step": 38120 }, { "epoch": 0.8260568110415357, "grad_norm": 1.5557900667190552, "learning_rate": 1.456298063891023e-06, "loss": 0.1518, "step": 38125 }, { "epoch": 0.8261651463610166, "grad_norm": 0.969343900680542, "learning_rate": 1.4545299030628667e-06, "loss": 0.0767, "step": 38130 }, { "epoch": 0.8262734816804975, "grad_norm": 2.1439170837402344, "learning_rate": 1.4527627320994097e-06, "loss": 0.206, "step": 38135 }, { "epoch": 0.8263818169999784, "grad_norm": 0.9057874083518982, "learning_rate": 1.4509965512053526e-06, "loss": 0.1689, "step": 38140 }, { "epoch": 0.8264901523194592, "grad_norm": 1.5075740814208984, "learning_rate": 1.4492313605852825e-06, "loss": 0.1996, "step": 38145 }, { "epoch": 0.8265984876389401, "grad_norm": 3.1611902713775635, "learning_rate": 1.4474671604436674e-06, "loss": 0.2544, "step": 38150 }, { "epoch": 0.8267068229584209, "grad_norm": 1.4168745279312134, "learning_rate": 1.445703950984867e-06, "loss": 0.1637, "step": 38155 }, { "epoch": 0.8268151582779018, "grad_norm": 1.0909801721572876, "learning_rate": 1.4439417324131177e-06, "loss": 0.1945, "step": 38160 }, { "epoch": 0.8269234935973826, "grad_norm": 1.9376953840255737, "learning_rate": 1.442180504932551e-06, "loss": 0.132, "step": 38165 }, { "epoch": 0.8270318289168634, "grad_norm": 0.48819518089294434, "learning_rate": 1.440420268747178e-06, "loss": 0.1331, "step": 38170 }, { "epoch": 0.8271401642363443, "grad_norm": 0.9154869318008423, "learning_rate": 1.4386610240608912e-06, "loss": 0.1377, "step": 38175 }, { "epoch": 0.8272484995558252, "grad_norm": 0.8452828526496887, "learning_rate": 1.4369027710774764e-06, "loss": 0.1497, "step": 38180 }, { "epoch": 0.8273568348753061, "grad_norm": 0.3250289559364319, "learning_rate": 1.4351455100005994e-06, "loss": 0.1135, "step": 38185 }, { "epoch": 0.8274651701947869, "grad_norm": 0.9223267436027527, "learning_rate": 1.4333892410338169e-06, "loss": 0.1572, "step": 38190 }, { "epoch": 0.8275735055142678, "grad_norm": 0.616644561290741, "learning_rate": 1.43163396438056e-06, "loss": 0.1102, "step": 38195 }, { "epoch": 0.8276818408337486, "grad_norm": 1.809208631515503, "learning_rate": 1.4298796802441573e-06, "loss": 0.2032, "step": 38200 }, { "epoch": 0.8277901761532295, "grad_norm": 1.4881441593170166, "learning_rate": 1.4281263888278118e-06, "loss": 0.1655, "step": 38205 }, { "epoch": 0.8278985114727103, "grad_norm": 0.46610477566719055, "learning_rate": 1.4263740903346168e-06, "loss": 0.1045, "step": 38210 }, { "epoch": 0.8280068467921912, "grad_norm": 0.6661702394485474, "learning_rate": 1.4246227849675521e-06, "loss": 0.1234, "step": 38215 }, { "epoch": 0.828115182111672, "grad_norm": 1.3949235677719116, "learning_rate": 1.4228724729294762e-06, "loss": 0.1954, "step": 38220 }, { "epoch": 0.8282235174311529, "grad_norm": 1.2798768281936646, "learning_rate": 1.4211231544231418e-06, "loss": 0.2063, "step": 38225 }, { "epoch": 0.8283318527506338, "grad_norm": 0.9307990670204163, "learning_rate": 1.4193748296511733e-06, "loss": 0.1597, "step": 38230 }, { "epoch": 0.8284401880701147, "grad_norm": 0.7396515607833862, "learning_rate": 1.4176274988160976e-06, "loss": 0.1943, "step": 38235 }, { "epoch": 0.8285485233895955, "grad_norm": 0.6490251421928406, "learning_rate": 1.4158811621203127e-06, "loss": 0.1142, "step": 38240 }, { "epoch": 0.8286568587090763, "grad_norm": 1.6259384155273438, "learning_rate": 1.4141358197661025e-06, "loss": 0.2138, "step": 38245 }, { "epoch": 0.8287651940285572, "grad_norm": 1.71076500415802, "learning_rate": 1.412391471955643e-06, "loss": 0.1634, "step": 38250 }, { "epoch": 0.828873529348038, "grad_norm": 1.8194330930709839, "learning_rate": 1.4106481188909882e-06, "loss": 0.1426, "step": 38255 }, { "epoch": 0.8289818646675189, "grad_norm": 1.3997726440429688, "learning_rate": 1.4089057607740809e-06, "loss": 0.1174, "step": 38260 }, { "epoch": 0.8290901999869997, "grad_norm": 0.8790441155433655, "learning_rate": 1.4071643978067461e-06, "loss": 0.1454, "step": 38265 }, { "epoch": 0.8291985353064806, "grad_norm": 0.9887353777885437, "learning_rate": 1.4054240301906962e-06, "loss": 0.0925, "step": 38270 }, { "epoch": 0.8293068706259614, "grad_norm": 0.6939262747764587, "learning_rate": 1.4036846581275242e-06, "loss": 0.1186, "step": 38275 }, { "epoch": 0.8294152059454424, "grad_norm": 1.9198684692382812, "learning_rate": 1.4019462818187113e-06, "loss": 0.2054, "step": 38280 }, { "epoch": 0.8295235412649232, "grad_norm": 0.637712299823761, "learning_rate": 1.400208901465625e-06, "loss": 0.2293, "step": 38285 }, { "epoch": 0.8296318765844041, "grad_norm": 1.1941756010055542, "learning_rate": 1.3984725172695102e-06, "loss": 0.1408, "step": 38290 }, { "epoch": 0.8297402119038849, "grad_norm": 0.6016050577163696, "learning_rate": 1.3967371294315057e-06, "loss": 0.1876, "step": 38295 }, { "epoch": 0.8298485472233658, "grad_norm": 1.618203043937683, "learning_rate": 1.3950027381526266e-06, "loss": 0.165, "step": 38300 }, { "epoch": 0.8299568825428466, "grad_norm": 1.2394918203353882, "learning_rate": 1.3932693436337786e-06, "loss": 0.1025, "step": 38305 }, { "epoch": 0.8300652178623275, "grad_norm": 1.9662584066390991, "learning_rate": 1.3915369460757467e-06, "loss": 0.1809, "step": 38310 }, { "epoch": 0.8301735531818083, "grad_norm": 1.8784714937210083, "learning_rate": 1.389805545679207e-06, "loss": 0.1769, "step": 38315 }, { "epoch": 0.8302818885012891, "grad_norm": 1.0834439992904663, "learning_rate": 1.3880751426447114e-06, "loss": 0.2376, "step": 38320 }, { "epoch": 0.8303902238207701, "grad_norm": 1.0430309772491455, "learning_rate": 1.386345737172704e-06, "loss": 0.1184, "step": 38325 }, { "epoch": 0.830498559140251, "grad_norm": 1.5013394355773926, "learning_rate": 1.3846173294635135e-06, "loss": 0.1318, "step": 38330 }, { "epoch": 0.8306068944597318, "grad_norm": 1.4981608390808105, "learning_rate": 1.382889919717344e-06, "loss": 0.2301, "step": 38335 }, { "epoch": 0.8307152297792126, "grad_norm": 1.5695573091506958, "learning_rate": 1.3811635081342945e-06, "loss": 0.2025, "step": 38340 }, { "epoch": 0.8308235650986935, "grad_norm": 1.3245759010314941, "learning_rate": 1.3794380949143416e-06, "loss": 0.1562, "step": 38345 }, { "epoch": 0.8309319004181743, "grad_norm": 1.5999300479888916, "learning_rate": 1.3777136802573509e-06, "loss": 0.1402, "step": 38350 }, { "epoch": 0.8310402357376552, "grad_norm": 1.1498948335647583, "learning_rate": 1.3759902643630664e-06, "loss": 0.1429, "step": 38355 }, { "epoch": 0.831148571057136, "grad_norm": 0.9424191117286682, "learning_rate": 1.3742678474311244e-06, "loss": 0.13, "step": 38360 }, { "epoch": 0.8312569063766169, "grad_norm": 1.0409986972808838, "learning_rate": 1.3725464296610357e-06, "loss": 0.2343, "step": 38365 }, { "epoch": 0.8313652416960977, "grad_norm": 0.7985055446624756, "learning_rate": 1.3708260112522075e-06, "loss": 0.1183, "step": 38370 }, { "epoch": 0.8314735770155787, "grad_norm": 1.1160005331039429, "learning_rate": 1.3691065924039182e-06, "loss": 0.1812, "step": 38375 }, { "epoch": 0.8315819123350595, "grad_norm": 1.3260324001312256, "learning_rate": 1.3673881733153394e-06, "loss": 0.2327, "step": 38380 }, { "epoch": 0.8316902476545404, "grad_norm": 1.5710183382034302, "learning_rate": 1.3656707541855264e-06, "loss": 0.1656, "step": 38385 }, { "epoch": 0.8317985829740212, "grad_norm": 1.4545061588287354, "learning_rate": 1.363954335213411e-06, "loss": 0.1606, "step": 38390 }, { "epoch": 0.831906918293502, "grad_norm": 1.1061710119247437, "learning_rate": 1.3622389165978212e-06, "loss": 0.1875, "step": 38395 }, { "epoch": 0.8320152536129829, "grad_norm": 0.46886172890663147, "learning_rate": 1.3605244985374577e-06, "loss": 0.136, "step": 38400 }, { "epoch": 0.8321235889324637, "grad_norm": 1.9394354820251465, "learning_rate": 1.358811081230913e-06, "loss": 0.1827, "step": 38405 }, { "epoch": 0.8322319242519446, "grad_norm": 1.9506992101669312, "learning_rate": 1.3570986648766593e-06, "loss": 0.1409, "step": 38410 }, { "epoch": 0.8323402595714254, "grad_norm": 0.9100056290626526, "learning_rate": 1.3553872496730536e-06, "loss": 0.1396, "step": 38415 }, { "epoch": 0.8324485948909063, "grad_norm": 1.1019976139068604, "learning_rate": 1.3536768358183395e-06, "loss": 0.1406, "step": 38420 }, { "epoch": 0.8325569302103872, "grad_norm": 1.0762672424316406, "learning_rate": 1.3519674235106383e-06, "loss": 0.1742, "step": 38425 }, { "epoch": 0.8326652655298681, "grad_norm": 1.7525808811187744, "learning_rate": 1.3502590129479675e-06, "loss": 0.1514, "step": 38430 }, { "epoch": 0.8327736008493489, "grad_norm": 0.9054889678955078, "learning_rate": 1.3485516043282154e-06, "loss": 0.1191, "step": 38435 }, { "epoch": 0.8328819361688298, "grad_norm": 1.4860602617263794, "learning_rate": 1.3468451978491638e-06, "loss": 0.1343, "step": 38440 }, { "epoch": 0.8329902714883106, "grad_norm": 0.8741964101791382, "learning_rate": 1.345139793708471e-06, "loss": 0.1332, "step": 38445 }, { "epoch": 0.8330986068077915, "grad_norm": 0.8511791229248047, "learning_rate": 1.3434353921036813e-06, "loss": 0.1202, "step": 38450 }, { "epoch": 0.8332069421272723, "grad_norm": 1.3704537153244019, "learning_rate": 1.3417319932322282e-06, "loss": 0.1896, "step": 38455 }, { "epoch": 0.8333152774467532, "grad_norm": 2.0006284713745117, "learning_rate": 1.3400295972914212e-06, "loss": 0.1226, "step": 38460 }, { "epoch": 0.833423612766234, "grad_norm": 1.7628165483474731, "learning_rate": 1.3383282044784607e-06, "loss": 0.1987, "step": 38465 }, { "epoch": 0.833531948085715, "grad_norm": 0.9461841583251953, "learning_rate": 1.336627814990421e-06, "loss": 0.1833, "step": 38470 }, { "epoch": 0.8336402834051958, "grad_norm": 1.7950938940048218, "learning_rate": 1.334928429024277e-06, "loss": 0.2585, "step": 38475 }, { "epoch": 0.8337486187246766, "grad_norm": 1.0592536926269531, "learning_rate": 1.333230046776871e-06, "loss": 0.2313, "step": 38480 }, { "epoch": 0.8338569540441575, "grad_norm": 1.550123691558838, "learning_rate": 1.3315326684449348e-06, "loss": 0.1205, "step": 38485 }, { "epoch": 0.8339652893636383, "grad_norm": 0.7813596129417419, "learning_rate": 1.3298362942250864e-06, "loss": 0.1108, "step": 38490 }, { "epoch": 0.8340736246831192, "grad_norm": 1.8877053260803223, "learning_rate": 1.3281409243138222e-06, "loss": 0.0943, "step": 38495 }, { "epoch": 0.8341819600026, "grad_norm": 1.360791802406311, "learning_rate": 1.3264465589075293e-06, "loss": 0.1578, "step": 38500 }, { "epoch": 0.8342902953220809, "grad_norm": 0.7253754734992981, "learning_rate": 1.3247531982024719e-06, "loss": 0.0928, "step": 38505 }, { "epoch": 0.8343986306415617, "grad_norm": 0.09237063676118851, "learning_rate": 1.3230608423948022e-06, "loss": 0.1252, "step": 38510 }, { "epoch": 0.8345069659610426, "grad_norm": 1.6878447532653809, "learning_rate": 1.321369491680552e-06, "loss": 0.1587, "step": 38515 }, { "epoch": 0.8346153012805235, "grad_norm": 1.9823968410491943, "learning_rate": 1.3196791462556403e-06, "loss": 0.1815, "step": 38520 }, { "epoch": 0.8347236366000044, "grad_norm": 1.7439043521881104, "learning_rate": 1.3179898063158692e-06, "loss": 0.2021, "step": 38525 }, { "epoch": 0.8348319719194852, "grad_norm": 1.342702031135559, "learning_rate": 1.3163014720569224e-06, "loss": 0.166, "step": 38530 }, { "epoch": 0.8349403072389661, "grad_norm": 0.8539639115333557, "learning_rate": 1.3146141436743687e-06, "loss": 0.1136, "step": 38535 }, { "epoch": 0.8350486425584469, "grad_norm": 2.0694241523742676, "learning_rate": 1.3129278213636577e-06, "loss": 0.1331, "step": 38540 }, { "epoch": 0.8351569778779278, "grad_norm": 2.8476791381835938, "learning_rate": 1.3112425053201282e-06, "loss": 0.1652, "step": 38545 }, { "epoch": 0.8352653131974086, "grad_norm": 2.359309196472168, "learning_rate": 1.3095581957389958e-06, "loss": 0.1665, "step": 38550 }, { "epoch": 0.8353736485168894, "grad_norm": 1.5596777200698853, "learning_rate": 1.3078748928153651e-06, "loss": 0.1644, "step": 38555 }, { "epoch": 0.8354819838363703, "grad_norm": 0.9255752563476562, "learning_rate": 1.3061925967442179e-06, "loss": 0.106, "step": 38560 }, { "epoch": 0.8355903191558511, "grad_norm": 0.6873523592948914, "learning_rate": 1.3045113077204274e-06, "loss": 0.1375, "step": 38565 }, { "epoch": 0.8356986544753321, "grad_norm": 1.4863377809524536, "learning_rate": 1.302831025938741e-06, "loss": 0.1881, "step": 38570 }, { "epoch": 0.8358069897948129, "grad_norm": 1.416284203529358, "learning_rate": 1.3011517515937965e-06, "loss": 0.2197, "step": 38575 }, { "epoch": 0.8359153251142938, "grad_norm": 1.5367668867111206, "learning_rate": 1.2994734848801161e-06, "loss": 0.2116, "step": 38580 }, { "epoch": 0.8360236604337746, "grad_norm": 0.9714785814285278, "learning_rate": 1.297796225992095e-06, "loss": 0.1181, "step": 38585 }, { "epoch": 0.8361319957532555, "grad_norm": 0.8433043956756592, "learning_rate": 1.296119975124025e-06, "loss": 0.155, "step": 38590 }, { "epoch": 0.8362403310727363, "grad_norm": 1.546978235244751, "learning_rate": 1.2944447324700693e-06, "loss": 0.1394, "step": 38595 }, { "epoch": 0.8363486663922172, "grad_norm": 1.4389432668685913, "learning_rate": 1.292770498224285e-06, "loss": 0.1431, "step": 38600 }, { "epoch": 0.836457001711698, "grad_norm": 1.246811032295227, "learning_rate": 1.2910972725806016e-06, "loss": 0.1648, "step": 38605 }, { "epoch": 0.8365653370311789, "grad_norm": 1.7272791862487793, "learning_rate": 1.2894250557328426e-06, "loss": 0.1941, "step": 38610 }, { "epoch": 0.8366736723506598, "grad_norm": 2.0971360206604004, "learning_rate": 1.2877538478747031e-06, "loss": 0.0926, "step": 38615 }, { "epoch": 0.8367820076701407, "grad_norm": 1.097223162651062, "learning_rate": 1.2860836491997718e-06, "loss": 0.1287, "step": 38620 }, { "epoch": 0.8368903429896215, "grad_norm": 1.0191314220428467, "learning_rate": 1.2844144599015175e-06, "loss": 0.108, "step": 38625 }, { "epoch": 0.8369986783091024, "grad_norm": 0.7525142431259155, "learning_rate": 1.282746280173285e-06, "loss": 0.2404, "step": 38630 }, { "epoch": 0.8371070136285832, "grad_norm": 1.1262255907058716, "learning_rate": 1.2810791102083154e-06, "loss": 0.2284, "step": 38635 }, { "epoch": 0.837215348948064, "grad_norm": 1.1426866054534912, "learning_rate": 1.2794129501997176e-06, "loss": 0.1848, "step": 38640 }, { "epoch": 0.8373236842675449, "grad_norm": 1.235605239868164, "learning_rate": 1.2777478003404986e-06, "loss": 0.1217, "step": 38645 }, { "epoch": 0.8374320195870257, "grad_norm": 1.4733010530471802, "learning_rate": 1.2760836608235373e-06, "loss": 0.1416, "step": 38650 }, { "epoch": 0.8375403549065066, "grad_norm": 1.225629448890686, "learning_rate": 1.274420531841598e-06, "loss": 0.1923, "step": 38655 }, { "epoch": 0.8376486902259874, "grad_norm": 1.3974411487579346, "learning_rate": 1.2727584135873317e-06, "loss": 0.167, "step": 38660 }, { "epoch": 0.8377570255454684, "grad_norm": 1.0535008907318115, "learning_rate": 1.2710973062532662e-06, "loss": 0.1616, "step": 38665 }, { "epoch": 0.8378653608649492, "grad_norm": 1.0868786573410034, "learning_rate": 1.269437210031822e-06, "loss": 0.1841, "step": 38670 }, { "epoch": 0.8379736961844301, "grad_norm": 0.5105567574501038, "learning_rate": 1.267778125115292e-06, "loss": 0.1779, "step": 38675 }, { "epoch": 0.8380820315039109, "grad_norm": 1.2579489946365356, "learning_rate": 1.2661200516958594e-06, "loss": 0.1001, "step": 38680 }, { "epoch": 0.8381903668233918, "grad_norm": 1.780890941619873, "learning_rate": 1.2644629899655848e-06, "loss": 0.174, "step": 38685 }, { "epoch": 0.8382987021428726, "grad_norm": 1.1664752960205078, "learning_rate": 1.2628069401164134e-06, "loss": 0.0876, "step": 38690 }, { "epoch": 0.8384070374623535, "grad_norm": 1.0860366821289062, "learning_rate": 1.2611519023401764e-06, "loss": 0.2348, "step": 38695 }, { "epoch": 0.8385153727818343, "grad_norm": 1.024547815322876, "learning_rate": 1.2594978768285804e-06, "loss": 0.1312, "step": 38700 }, { "epoch": 0.8386237081013151, "grad_norm": 0.7234358787536621, "learning_rate": 1.2578448637732266e-06, "loss": 0.11, "step": 38705 }, { "epoch": 0.8387320434207961, "grad_norm": 1.6528407335281372, "learning_rate": 1.2561928633655817e-06, "loss": 0.1421, "step": 38710 }, { "epoch": 0.838840378740277, "grad_norm": 1.136871576309204, "learning_rate": 1.2545418757970173e-06, "loss": 0.1175, "step": 38715 }, { "epoch": 0.8389487140597578, "grad_norm": 1.6078516244888306, "learning_rate": 1.2528919012587693e-06, "loss": 0.1149, "step": 38720 }, { "epoch": 0.8390570493792386, "grad_norm": 1.0070430040359497, "learning_rate": 1.2512429399419601e-06, "loss": 0.0542, "step": 38725 }, { "epoch": 0.8391653846987195, "grad_norm": 0.7321898341178894, "learning_rate": 1.2495949920376016e-06, "loss": 0.1308, "step": 38730 }, { "epoch": 0.8392737200182003, "grad_norm": 1.1490331888198853, "learning_rate": 1.247948057736581e-06, "loss": 0.1245, "step": 38735 }, { "epoch": 0.8393820553376812, "grad_norm": 0.38442760705947876, "learning_rate": 1.2463021372296747e-06, "loss": 0.1475, "step": 38740 }, { "epoch": 0.839490390657162, "grad_norm": 1.7775861024856567, "learning_rate": 1.2446572307075321e-06, "loss": 0.1428, "step": 38745 }, { "epoch": 0.8395987259766429, "grad_norm": 0.5183170437812805, "learning_rate": 1.2430133383606979e-06, "loss": 0.1305, "step": 38750 }, { "epoch": 0.8397070612961237, "grad_norm": 1.1656484603881836, "learning_rate": 1.241370460379585e-06, "loss": 0.1769, "step": 38755 }, { "epoch": 0.8398153966156047, "grad_norm": 1.4679832458496094, "learning_rate": 1.2397285969545015e-06, "loss": 0.1677, "step": 38760 }, { "epoch": 0.8399237319350855, "grad_norm": 1.459030032157898, "learning_rate": 1.2380877482756326e-06, "loss": 0.1038, "step": 38765 }, { "epoch": 0.8400320672545664, "grad_norm": 1.1678906679153442, "learning_rate": 1.2364479145330422e-06, "loss": 0.1487, "step": 38770 }, { "epoch": 0.8401404025740472, "grad_norm": 2.0643928050994873, "learning_rate": 1.2348090959166858e-06, "loss": 0.1676, "step": 38775 }, { "epoch": 0.840248737893528, "grad_norm": 0.7269030213356018, "learning_rate": 1.2331712926163896e-06, "loss": 0.1469, "step": 38780 }, { "epoch": 0.8403570732130089, "grad_norm": 1.1912719011306763, "learning_rate": 1.2315345048218763e-06, "loss": 0.1714, "step": 38785 }, { "epoch": 0.8404654085324897, "grad_norm": 1.9070640802383423, "learning_rate": 1.2298987327227352e-06, "loss": 0.1634, "step": 38790 }, { "epoch": 0.8405737438519706, "grad_norm": 1.2125680446624756, "learning_rate": 1.2282639765084524e-06, "loss": 0.1356, "step": 38795 }, { "epoch": 0.8406820791714514, "grad_norm": 1.583865761756897, "learning_rate": 1.2266302363683857e-06, "loss": 0.1705, "step": 38800 }, { "epoch": 0.8407904144909323, "grad_norm": 0.35827749967575073, "learning_rate": 1.2249975124917822e-06, "loss": 0.1493, "step": 38805 }, { "epoch": 0.8408987498104132, "grad_norm": 2.0124118328094482, "learning_rate": 1.2233658050677656e-06, "loss": 0.0904, "step": 38810 }, { "epoch": 0.8410070851298941, "grad_norm": 0.8909143209457397, "learning_rate": 1.2217351142853474e-06, "loss": 0.2035, "step": 38815 }, { "epoch": 0.8411154204493749, "grad_norm": 1.3830336332321167, "learning_rate": 1.2201054403334189e-06, "loss": 0.1689, "step": 38820 }, { "epoch": 0.8412237557688558, "grad_norm": 1.740132212638855, "learning_rate": 1.2184767834007506e-06, "loss": 0.1094, "step": 38825 }, { "epoch": 0.8413320910883366, "grad_norm": 1.8406285047531128, "learning_rate": 1.2168491436760011e-06, "loss": 0.2492, "step": 38830 }, { "epoch": 0.8414404264078175, "grad_norm": 1.3630059957504272, "learning_rate": 1.2152225213477054e-06, "loss": 0.0933, "step": 38835 }, { "epoch": 0.8415487617272983, "grad_norm": 1.0799980163574219, "learning_rate": 1.2135969166042872e-06, "loss": 0.155, "step": 38840 }, { "epoch": 0.8416570970467792, "grad_norm": 1.27134370803833, "learning_rate": 1.2119723296340424e-06, "loss": 0.1795, "step": 38845 }, { "epoch": 0.84176543236626, "grad_norm": 1.5853471755981445, "learning_rate": 1.210348760625162e-06, "loss": 0.2103, "step": 38850 }, { "epoch": 0.841873767685741, "grad_norm": 0.4363458752632141, "learning_rate": 1.208726209765706e-06, "loss": 0.1282, "step": 38855 }, { "epoch": 0.8419821030052218, "grad_norm": 1.2900062799453735, "learning_rate": 1.2071046772436246e-06, "loss": 0.2778, "step": 38860 }, { "epoch": 0.8420904383247027, "grad_norm": 0.920364260673523, "learning_rate": 1.2054841632467517e-06, "loss": 0.1597, "step": 38865 }, { "epoch": 0.8421987736441835, "grad_norm": 1.2194358110427856, "learning_rate": 1.2038646679627953e-06, "loss": 0.1576, "step": 38870 }, { "epoch": 0.8423071089636643, "grad_norm": 1.235669493675232, "learning_rate": 1.2022461915793515e-06, "loss": 0.1377, "step": 38875 }, { "epoch": 0.8424154442831452, "grad_norm": 1.1337639093399048, "learning_rate": 1.2006287342838952e-06, "loss": 0.1399, "step": 38880 }, { "epoch": 0.842523779602626, "grad_norm": 1.1959493160247803, "learning_rate": 1.1990122962637872e-06, "loss": 0.1583, "step": 38885 }, { "epoch": 0.8426321149221069, "grad_norm": 2.005531072616577, "learning_rate": 1.1973968777062662e-06, "loss": 0.1297, "step": 38890 }, { "epoch": 0.8427404502415877, "grad_norm": 1.1327893733978271, "learning_rate": 1.1957824787984508e-06, "loss": 0.1871, "step": 38895 }, { "epoch": 0.8428487855610686, "grad_norm": 1.46401846408844, "learning_rate": 1.1941690997273514e-06, "loss": 0.2045, "step": 38900 }, { "epoch": 0.8429571208805495, "grad_norm": 1.657429814338684, "learning_rate": 1.1925567406798456e-06, "loss": 0.1994, "step": 38905 }, { "epoch": 0.8430654562000304, "grad_norm": 1.9996658563613892, "learning_rate": 1.1909454018427103e-06, "loss": 0.2169, "step": 38910 }, { "epoch": 0.8431737915195112, "grad_norm": 1.9837418794631958, "learning_rate": 1.1893350834025885e-06, "loss": 0.1752, "step": 38915 }, { "epoch": 0.8432821268389921, "grad_norm": 1.6719532012939453, "learning_rate": 1.1877257855460156e-06, "loss": 0.072, "step": 38920 }, { "epoch": 0.8433904621584729, "grad_norm": 1.9780857563018799, "learning_rate": 1.1861175084594022e-06, "loss": 0.1188, "step": 38925 }, { "epoch": 0.8434987974779538, "grad_norm": 1.3741636276245117, "learning_rate": 1.184510252329042e-06, "loss": 0.1263, "step": 38930 }, { "epoch": 0.8436071327974346, "grad_norm": 0.7113772630691528, "learning_rate": 1.1829040173411144e-06, "loss": 0.1097, "step": 38935 }, { "epoch": 0.8437154681169154, "grad_norm": 0.7108696699142456, "learning_rate": 1.1812988036816741e-06, "loss": 0.1737, "step": 38940 }, { "epoch": 0.8438238034363963, "grad_norm": 1.1880041360855103, "learning_rate": 1.1796946115366658e-06, "loss": 0.1764, "step": 38945 }, { "epoch": 0.8439321387558771, "grad_norm": 1.0081318616867065, "learning_rate": 1.1780914410919075e-06, "loss": 0.1786, "step": 38950 }, { "epoch": 0.8440404740753581, "grad_norm": 1.0028342008590698, "learning_rate": 1.1764892925331018e-06, "loss": 0.1777, "step": 38955 }, { "epoch": 0.8441488093948389, "grad_norm": 1.6158723831176758, "learning_rate": 1.174888166045839e-06, "loss": 0.1516, "step": 38960 }, { "epoch": 0.8442571447143198, "grad_norm": 1.0349000692367554, "learning_rate": 1.1732880618155784e-06, "loss": 0.1887, "step": 38965 }, { "epoch": 0.8443654800338006, "grad_norm": 1.5509575605392456, "learning_rate": 1.1716889800276753e-06, "loss": 0.1701, "step": 38970 }, { "epoch": 0.8444738153532815, "grad_norm": 0.6745392084121704, "learning_rate": 1.1700909208673528e-06, "loss": 0.1218, "step": 38975 }, { "epoch": 0.8445821506727623, "grad_norm": 0.9672269225120544, "learning_rate": 1.1684938845197269e-06, "loss": 0.0963, "step": 38980 }, { "epoch": 0.8446904859922432, "grad_norm": 1.856703281402588, "learning_rate": 1.1668978711697875e-06, "loss": 0.1222, "step": 38985 }, { "epoch": 0.844798821311724, "grad_norm": 0.5079423189163208, "learning_rate": 1.1653028810024114e-06, "loss": 0.1404, "step": 38990 }, { "epoch": 0.8449071566312049, "grad_norm": 1.7230503559112549, "learning_rate": 1.1637089142023506e-06, "loss": 0.1484, "step": 38995 }, { "epoch": 0.8450154919506858, "grad_norm": 0.8660928010940552, "learning_rate": 1.162115970954244e-06, "loss": 0.167, "step": 39000 }, { "epoch": 0.8451238272701667, "grad_norm": 1.5382344722747803, "learning_rate": 1.160524051442613e-06, "loss": 0.1916, "step": 39005 }, { "epoch": 0.8452321625896475, "grad_norm": 1.5004998445510864, "learning_rate": 1.1589331558518535e-06, "loss": 0.1484, "step": 39010 }, { "epoch": 0.8453404979091284, "grad_norm": 1.3926318883895874, "learning_rate": 1.1573432843662513e-06, "loss": 0.1444, "step": 39015 }, { "epoch": 0.8454488332286092, "grad_norm": 1.7518997192382812, "learning_rate": 1.1557544371699635e-06, "loss": 0.3059, "step": 39020 }, { "epoch": 0.84555716854809, "grad_norm": 1.7785190343856812, "learning_rate": 1.1541666144470398e-06, "loss": 0.2517, "step": 39025 }, { "epoch": 0.8456655038675709, "grad_norm": 1.239648461341858, "learning_rate": 1.1525798163814016e-06, "loss": 0.2011, "step": 39030 }, { "epoch": 0.8457738391870517, "grad_norm": 0.7844147086143494, "learning_rate": 1.1509940431568588e-06, "loss": 0.1517, "step": 39035 }, { "epoch": 0.8458821745065326, "grad_norm": 2.232342481613159, "learning_rate": 1.1494092949570968e-06, "loss": 0.1924, "step": 39040 }, { "epoch": 0.8459905098260134, "grad_norm": 1.4131678342819214, "learning_rate": 1.1478255719656872e-06, "loss": 0.168, "step": 39045 }, { "epoch": 0.8460988451454944, "grad_norm": 1.5196956396102905, "learning_rate": 1.1462428743660781e-06, "loss": 0.0866, "step": 39050 }, { "epoch": 0.8462071804649752, "grad_norm": 2.0476841926574707, "learning_rate": 1.1446612023416026e-06, "loss": 0.1442, "step": 39055 }, { "epoch": 0.8463155157844561, "grad_norm": 1.4051650762557983, "learning_rate": 1.1430805560754765e-06, "loss": 0.201, "step": 39060 }, { "epoch": 0.8464238511039369, "grad_norm": 1.100538969039917, "learning_rate": 1.1415009357507879e-06, "loss": 0.1403, "step": 39065 }, { "epoch": 0.8465321864234178, "grad_norm": 1.2193244695663452, "learning_rate": 1.139922341550519e-06, "loss": 0.1774, "step": 39070 }, { "epoch": 0.8466405217428986, "grad_norm": 1.8792227506637573, "learning_rate": 1.1383447736575193e-06, "loss": 0.1502, "step": 39075 }, { "epoch": 0.8467488570623795, "grad_norm": 2.2587926387786865, "learning_rate": 1.1367682322545336e-06, "loss": 0.1301, "step": 39080 }, { "epoch": 0.8468571923818603, "grad_norm": 0.7586103081703186, "learning_rate": 1.135192717524174e-06, "loss": 0.1769, "step": 39085 }, { "epoch": 0.8469655277013411, "grad_norm": 0.7740786671638489, "learning_rate": 1.1336182296489452e-06, "loss": 0.1486, "step": 39090 }, { "epoch": 0.847073863020822, "grad_norm": 1.396209716796875, "learning_rate": 1.1320447688112269e-06, "loss": 0.2619, "step": 39095 }, { "epoch": 0.847182198340303, "grad_norm": 0.6721225380897522, "learning_rate": 1.1304723351932757e-06, "loss": 0.1061, "step": 39100 }, { "epoch": 0.8472905336597838, "grad_norm": 1.5471813678741455, "learning_rate": 1.1289009289772434e-06, "loss": 0.1159, "step": 39105 }, { "epoch": 0.8473988689792646, "grad_norm": 1.1608487367630005, "learning_rate": 1.1273305503451471e-06, "loss": 0.1248, "step": 39110 }, { "epoch": 0.8475072042987455, "grad_norm": 1.951636552810669, "learning_rate": 1.1257611994788953e-06, "loss": 0.2401, "step": 39115 }, { "epoch": 0.8476155396182263, "grad_norm": 0.801329493522644, "learning_rate": 1.1241928765602705e-06, "loss": 0.1769, "step": 39120 }, { "epoch": 0.8477238749377072, "grad_norm": 1.3887391090393066, "learning_rate": 1.1226255817709442e-06, "loss": 0.124, "step": 39125 }, { "epoch": 0.847832210257188, "grad_norm": 2.5047366619110107, "learning_rate": 1.1210593152924608e-06, "loss": 0.2049, "step": 39130 }, { "epoch": 0.8479405455766689, "grad_norm": 1.1766287088394165, "learning_rate": 1.119494077306248e-06, "loss": 0.1031, "step": 39135 }, { "epoch": 0.8480488808961497, "grad_norm": 1.1012307405471802, "learning_rate": 1.1179298679936168e-06, "loss": 0.1178, "step": 39140 }, { "epoch": 0.8481572162156307, "grad_norm": 1.6632883548736572, "learning_rate": 1.1163666875357538e-06, "loss": 0.2065, "step": 39145 }, { "epoch": 0.8482655515351115, "grad_norm": 1.475389003753662, "learning_rate": 1.114804536113737e-06, "loss": 0.194, "step": 39150 }, { "epoch": 0.8483738868545924, "grad_norm": 1.9258309602737427, "learning_rate": 1.1132434139085136e-06, "loss": 0.0792, "step": 39155 }, { "epoch": 0.8484822221740732, "grad_norm": 0.9313329458236694, "learning_rate": 1.111683321100918e-06, "loss": 0.1868, "step": 39160 }, { "epoch": 0.848590557493554, "grad_norm": 1.7089868783950806, "learning_rate": 1.1101242578716608e-06, "loss": 0.1578, "step": 39165 }, { "epoch": 0.8486988928130349, "grad_norm": 0.8817110657691956, "learning_rate": 1.1085662244013407e-06, "loss": 0.125, "step": 39170 }, { "epoch": 0.8488072281325157, "grad_norm": 0.7784671783447266, "learning_rate": 1.1070092208704286e-06, "loss": 0.1676, "step": 39175 }, { "epoch": 0.8489155634519966, "grad_norm": 0.7986711263656616, "learning_rate": 1.1054532474592805e-06, "loss": 0.1704, "step": 39180 }, { "epoch": 0.8490238987714774, "grad_norm": 1.065043568611145, "learning_rate": 1.1038983043481345e-06, "loss": 0.1466, "step": 39185 }, { "epoch": 0.8491322340909583, "grad_norm": 1.644760012626648, "learning_rate": 1.102344391717104e-06, "loss": 0.1276, "step": 39190 }, { "epoch": 0.8492405694104392, "grad_norm": 1.066691517829895, "learning_rate": 1.1007915097461896e-06, "loss": 0.2183, "step": 39195 }, { "epoch": 0.8493489047299201, "grad_norm": 1.070061206817627, "learning_rate": 1.0992396586152687e-06, "loss": 0.0723, "step": 39200 }, { "epoch": 0.8494572400494009, "grad_norm": 1.1368228197097778, "learning_rate": 1.0976888385041018e-06, "loss": 0.1369, "step": 39205 }, { "epoch": 0.8495655753688818, "grad_norm": 0.9279648661613464, "learning_rate": 1.0961390495923264e-06, "loss": 0.1952, "step": 39210 }, { "epoch": 0.8496739106883626, "grad_norm": 1.5546684265136719, "learning_rate": 1.0945902920594598e-06, "loss": 0.1813, "step": 39215 }, { "epoch": 0.8497822460078435, "grad_norm": 1.705427885055542, "learning_rate": 1.0930425660849076e-06, "loss": 0.1136, "step": 39220 }, { "epoch": 0.8498905813273243, "grad_norm": 1.9889975786209106, "learning_rate": 1.0914958718479452e-06, "loss": 0.1832, "step": 39225 }, { "epoch": 0.8499989166468052, "grad_norm": 1.2919358015060425, "learning_rate": 1.0899502095277393e-06, "loss": 0.1881, "step": 39230 }, { "epoch": 0.850107251966286, "grad_norm": 1.0815339088439941, "learning_rate": 1.0884055793033266e-06, "loss": 0.1079, "step": 39235 }, { "epoch": 0.850215587285767, "grad_norm": 1.629321575164795, "learning_rate": 1.086861981353633e-06, "loss": 0.1178, "step": 39240 }, { "epoch": 0.8503239226052478, "grad_norm": 1.2376841306686401, "learning_rate": 1.085319415857461e-06, "loss": 0.2027, "step": 39245 }, { "epoch": 0.8504322579247287, "grad_norm": 0.9439263343811035, "learning_rate": 1.0837778829934908e-06, "loss": 0.1251, "step": 39250 }, { "epoch": 0.8505405932442095, "grad_norm": 1.1825413703918457, "learning_rate": 1.0822373829402899e-06, "loss": 0.1306, "step": 39255 }, { "epoch": 0.8506489285636903, "grad_norm": 1.235314130783081, "learning_rate": 1.0806979158762976e-06, "loss": 0.1475, "step": 39260 }, { "epoch": 0.8507572638831712, "grad_norm": 0.5720102787017822, "learning_rate": 1.0791594819798435e-06, "loss": 0.1068, "step": 39265 }, { "epoch": 0.850865599202652, "grad_norm": 1.6432092189788818, "learning_rate": 1.0776220814291272e-06, "loss": 0.1519, "step": 39270 }, { "epoch": 0.8509739345221329, "grad_norm": 1.5048844814300537, "learning_rate": 1.0760857144022373e-06, "loss": 0.1681, "step": 39275 }, { "epoch": 0.8510822698416137, "grad_norm": 1.4326589107513428, "learning_rate": 1.0745503810771352e-06, "loss": 0.1943, "step": 39280 }, { "epoch": 0.8511906051610946, "grad_norm": 1.452077865600586, "learning_rate": 1.0730160816316692e-06, "loss": 0.1665, "step": 39285 }, { "epoch": 0.8512989404805755, "grad_norm": 1.3833283185958862, "learning_rate": 1.071482816243563e-06, "loss": 0.197, "step": 39290 }, { "epoch": 0.8514072758000564, "grad_norm": 2.031679153442383, "learning_rate": 1.0699505850904234e-06, "loss": 0.1242, "step": 39295 }, { "epoch": 0.8515156111195372, "grad_norm": 1.76611328125, "learning_rate": 1.0684193883497385e-06, "loss": 0.1246, "step": 39300 }, { "epoch": 0.8516239464390181, "grad_norm": 1.129706859588623, "learning_rate": 1.0668892261988706e-06, "loss": 0.1744, "step": 39305 }, { "epoch": 0.8517322817584989, "grad_norm": 1.7617467641830444, "learning_rate": 1.0653600988150692e-06, "loss": 0.1379, "step": 39310 }, { "epoch": 0.8518406170779798, "grad_norm": 1.1650053262710571, "learning_rate": 1.063832006375457e-06, "loss": 0.1254, "step": 39315 }, { "epoch": 0.8519489523974606, "grad_norm": 1.9245473146438599, "learning_rate": 1.0623049490570458e-06, "loss": 0.2136, "step": 39320 }, { "epoch": 0.8520572877169414, "grad_norm": 1.67019784450531, "learning_rate": 1.0607789270367176e-06, "loss": 0.1946, "step": 39325 }, { "epoch": 0.8521656230364223, "grad_norm": 1.2618342638015747, "learning_rate": 1.0592539404912426e-06, "loss": 0.1406, "step": 39330 }, { "epoch": 0.8522739583559031, "grad_norm": 1.2345194816589355, "learning_rate": 1.0577299895972648e-06, "loss": 0.1444, "step": 39335 }, { "epoch": 0.8523822936753841, "grad_norm": 0.38306093215942383, "learning_rate": 1.0562070745313124e-06, "loss": 0.0839, "step": 39340 }, { "epoch": 0.8524906289948649, "grad_norm": 1.8184125423431396, "learning_rate": 1.0546851954697946e-06, "loss": 0.1037, "step": 39345 }, { "epoch": 0.8525989643143458, "grad_norm": 1.0187684297561646, "learning_rate": 1.0531643525889945e-06, "loss": 0.1256, "step": 39350 }, { "epoch": 0.8527072996338266, "grad_norm": 1.3626267910003662, "learning_rate": 1.0516445460650814e-06, "loss": 0.0983, "step": 39355 }, { "epoch": 0.8528156349533075, "grad_norm": 0.9987910389900208, "learning_rate": 1.0501257760741002e-06, "loss": 0.2117, "step": 39360 }, { "epoch": 0.8529239702727883, "grad_norm": 1.1073609590530396, "learning_rate": 1.0486080427919798e-06, "loss": 0.2148, "step": 39365 }, { "epoch": 0.8530323055922692, "grad_norm": 1.5280102491378784, "learning_rate": 1.0470913463945243e-06, "loss": 0.1605, "step": 39370 }, { "epoch": 0.85314064091175, "grad_norm": 1.4497357606887817, "learning_rate": 1.0455756870574242e-06, "loss": 0.1781, "step": 39375 }, { "epoch": 0.8532489762312309, "grad_norm": 1.5523865222930908, "learning_rate": 1.0440610649562433e-06, "loss": 0.1534, "step": 39380 }, { "epoch": 0.8533573115507118, "grad_norm": 1.521357774734497, "learning_rate": 1.0425474802664237e-06, "loss": 0.1034, "step": 39385 }, { "epoch": 0.8534656468701927, "grad_norm": 1.1199759244918823, "learning_rate": 1.0410349331633008e-06, "loss": 0.2019, "step": 39390 }, { "epoch": 0.8535739821896735, "grad_norm": 0.8734356760978699, "learning_rate": 1.0395234238220718e-06, "loss": 0.0941, "step": 39395 }, { "epoch": 0.8536823175091544, "grad_norm": 1.475605845451355, "learning_rate": 1.0380129524178295e-06, "loss": 0.1784, "step": 39400 }, { "epoch": 0.8537906528286352, "grad_norm": 1.5897133350372314, "learning_rate": 1.0365035191255346e-06, "loss": 0.2314, "step": 39405 }, { "epoch": 0.853898988148116, "grad_norm": 0.8545833826065063, "learning_rate": 1.034995124120035e-06, "loss": 0.1254, "step": 39410 }, { "epoch": 0.8540073234675969, "grad_norm": 0.8432884216308594, "learning_rate": 1.0334877675760545e-06, "loss": 0.1631, "step": 39415 }, { "epoch": 0.8541156587870777, "grad_norm": 1.722882866859436, "learning_rate": 1.0319814496681957e-06, "loss": 0.1251, "step": 39420 }, { "epoch": 0.8542239941065586, "grad_norm": 1.615465760231018, "learning_rate": 1.0304761705709477e-06, "loss": 0.166, "step": 39425 }, { "epoch": 0.8543323294260394, "grad_norm": 0.9895012974739075, "learning_rate": 1.0289719304586688e-06, "loss": 0.1493, "step": 39430 }, { "epoch": 0.8544406647455204, "grad_norm": 0.4903734028339386, "learning_rate": 1.0274687295056063e-06, "loss": 0.1499, "step": 39435 }, { "epoch": 0.8545490000650012, "grad_norm": 1.1652835607528687, "learning_rate": 1.0259665678858831e-06, "loss": 0.1538, "step": 39440 }, { "epoch": 0.8546573353844821, "grad_norm": 1.2034703493118286, "learning_rate": 1.0244654457735048e-06, "loss": 0.128, "step": 39445 }, { "epoch": 0.8547656707039629, "grad_norm": 0.6963739991188049, "learning_rate": 1.0229653633423498e-06, "loss": 0.1642, "step": 39450 }, { "epoch": 0.8548740060234438, "grad_norm": 1.104596734046936, "learning_rate": 1.0214663207661802e-06, "loss": 0.0833, "step": 39455 }, { "epoch": 0.8549823413429246, "grad_norm": 0.8282214403152466, "learning_rate": 1.0199683182186404e-06, "loss": 0.1164, "step": 39460 }, { "epoch": 0.8550906766624055, "grad_norm": 1.149441123008728, "learning_rate": 1.0184713558732484e-06, "loss": 0.2727, "step": 39465 }, { "epoch": 0.8551990119818863, "grad_norm": 1.1876018047332764, "learning_rate": 1.0169754339034088e-06, "loss": 0.1704, "step": 39470 }, { "epoch": 0.8553073473013671, "grad_norm": 1.510985016822815, "learning_rate": 1.0154805524823974e-06, "loss": 0.1316, "step": 39475 }, { "epoch": 0.855415682620848, "grad_norm": 1.3153140544891357, "learning_rate": 1.0139867117833769e-06, "loss": 0.1548, "step": 39480 }, { "epoch": 0.855524017940329, "grad_norm": 0.8893576860427856, "learning_rate": 1.0124939119793843e-06, "loss": 0.1369, "step": 39485 }, { "epoch": 0.8556323532598098, "grad_norm": 1.438761591911316, "learning_rate": 1.011002153243339e-06, "loss": 0.1507, "step": 39490 }, { "epoch": 0.8557406885792906, "grad_norm": 1.0717920064926147, "learning_rate": 1.0095114357480418e-06, "loss": 0.1734, "step": 39495 }, { "epoch": 0.8558490238987715, "grad_norm": 1.215254545211792, "learning_rate": 1.0080217596661645e-06, "loss": 0.1318, "step": 39500 }, { "epoch": 0.8559573592182523, "grad_norm": 0.9123037457466125, "learning_rate": 1.006533125170268e-06, "loss": 0.1375, "step": 39505 }, { "epoch": 0.8560656945377332, "grad_norm": 0.9482547044754028, "learning_rate": 1.0050455324327857e-06, "loss": 0.1492, "step": 39510 }, { "epoch": 0.856174029857214, "grad_norm": 1.6183772087097168, "learning_rate": 1.003558981626036e-06, "loss": 0.2016, "step": 39515 }, { "epoch": 0.8562823651766949, "grad_norm": 1.2207099199295044, "learning_rate": 1.0020734729222093e-06, "loss": 0.125, "step": 39520 }, { "epoch": 0.8563907004961757, "grad_norm": 0.9753361940383911, "learning_rate": 1.0005890064933833e-06, "loss": 0.1607, "step": 39525 }, { "epoch": 0.8564990358156567, "grad_norm": 1.3874574899673462, "learning_rate": 9.991055825115082e-07, "loss": 0.1698, "step": 39530 }, { "epoch": 0.8566073711351375, "grad_norm": 1.09773588180542, "learning_rate": 9.976232011484188e-07, "loss": 0.1882, "step": 39535 }, { "epoch": 0.8567157064546184, "grad_norm": 0.9982890486717224, "learning_rate": 9.961418625758269e-07, "loss": 0.1303, "step": 39540 }, { "epoch": 0.8568240417740992, "grad_norm": 2.1878223419189453, "learning_rate": 9.946615669653204e-07, "loss": 0.197, "step": 39545 }, { "epoch": 0.85693237709358, "grad_norm": 1.0985561609268188, "learning_rate": 9.931823144883745e-07, "loss": 0.0916, "step": 39550 }, { "epoch": 0.8570407124130609, "grad_norm": 1.5116504430770874, "learning_rate": 9.917041053163322e-07, "loss": 0.1791, "step": 39555 }, { "epoch": 0.8571490477325417, "grad_norm": 0.7082956433296204, "learning_rate": 9.902269396204278e-07, "loss": 0.1201, "step": 39560 }, { "epoch": 0.8572573830520226, "grad_norm": 1.6021596193313599, "learning_rate": 9.88750817571763e-07, "loss": 0.105, "step": 39565 }, { "epoch": 0.8573657183715034, "grad_norm": 0.8571248054504395, "learning_rate": 9.872757393413302e-07, "loss": 0.1485, "step": 39570 }, { "epoch": 0.8574740536909843, "grad_norm": 0.8402720093727112, "learning_rate": 9.858017050999902e-07, "loss": 0.156, "step": 39575 }, { "epoch": 0.8575823890104652, "grad_norm": 0.47217193245887756, "learning_rate": 9.84328715018491e-07, "loss": 0.0978, "step": 39580 }, { "epoch": 0.8576907243299461, "grad_norm": 1.308687448501587, "learning_rate": 9.828567692674563e-07, "loss": 0.1363, "step": 39585 }, { "epoch": 0.8577990596494269, "grad_norm": 1.0534011125564575, "learning_rate": 9.81385868017386e-07, "loss": 0.1619, "step": 39590 }, { "epoch": 0.8579073949689078, "grad_norm": 1.1049190759658813, "learning_rate": 9.799160114386664e-07, "loss": 0.1847, "step": 39595 }, { "epoch": 0.8580157302883886, "grad_norm": 0.9614110589027405, "learning_rate": 9.784471997015542e-07, "loss": 0.0773, "step": 39600 }, { "epoch": 0.8581240656078695, "grad_norm": 1.1848851442337036, "learning_rate": 9.769794329761928e-07, "loss": 0.1352, "step": 39605 }, { "epoch": 0.8582324009273503, "grad_norm": 1.204484462738037, "learning_rate": 9.75512711432598e-07, "loss": 0.2101, "step": 39610 }, { "epoch": 0.8583407362468312, "grad_norm": 1.6266118288040161, "learning_rate": 9.740470352406695e-07, "loss": 0.1693, "step": 39615 }, { "epoch": 0.858449071566312, "grad_norm": 0.9682551026344299, "learning_rate": 9.725824045701838e-07, "loss": 0.1361, "step": 39620 }, { "epoch": 0.8585574068857929, "grad_norm": 0.9609023332595825, "learning_rate": 9.71118819590794e-07, "loss": 0.1421, "step": 39625 }, { "epoch": 0.8586657422052738, "grad_norm": 1.762377142906189, "learning_rate": 9.696562804720367e-07, "loss": 0.141, "step": 39630 }, { "epoch": 0.8587740775247547, "grad_norm": 0.8001314997673035, "learning_rate": 9.681947873833243e-07, "loss": 0.1604, "step": 39635 }, { "epoch": 0.8588824128442355, "grad_norm": 2.475306510925293, "learning_rate": 9.667343404939511e-07, "loss": 0.1694, "step": 39640 }, { "epoch": 0.8589907481637163, "grad_norm": 2.015974283218384, "learning_rate": 9.652749399730843e-07, "loss": 0.1408, "step": 39645 }, { "epoch": 0.8590990834831972, "grad_norm": 0.6922505497932434, "learning_rate": 9.638165859897774e-07, "loss": 0.1335, "step": 39650 }, { "epoch": 0.859207418802678, "grad_norm": 2.196361780166626, "learning_rate": 9.623592787129576e-07, "loss": 0.1698, "step": 39655 }, { "epoch": 0.8593157541221589, "grad_norm": 1.6395312547683716, "learning_rate": 9.609030183114287e-07, "loss": 0.2066, "step": 39660 }, { "epoch": 0.8594240894416397, "grad_norm": 0.9878327250480652, "learning_rate": 9.594478049538824e-07, "loss": 0.1601, "step": 39665 }, { "epoch": 0.8595324247611206, "grad_norm": 1.6236543655395508, "learning_rate": 9.579936388088773e-07, "loss": 0.1182, "step": 39670 }, { "epoch": 0.8596407600806015, "grad_norm": 1.932865023612976, "learning_rate": 9.565405200448607e-07, "loss": 0.2252, "step": 39675 }, { "epoch": 0.8597490954000824, "grad_norm": 1.1332064867019653, "learning_rate": 9.550884488301537e-07, "loss": 0.1812, "step": 39680 }, { "epoch": 0.8598574307195632, "grad_norm": 1.2315653562545776, "learning_rate": 9.536374253329594e-07, "loss": 0.1872, "step": 39685 }, { "epoch": 0.8599657660390441, "grad_norm": 1.5899763107299805, "learning_rate": 9.521874497213546e-07, "loss": 0.2207, "step": 39690 }, { "epoch": 0.8600741013585249, "grad_norm": 0.9848626852035522, "learning_rate": 9.507385221632959e-07, "loss": 0.1856, "step": 39695 }, { "epoch": 0.8601824366780058, "grad_norm": 1.4033540487289429, "learning_rate": 9.492906428266224e-07, "loss": 0.1811, "step": 39700 }, { "epoch": 0.8602907719974866, "grad_norm": 0.6226968765258789, "learning_rate": 9.478438118790467e-07, "loss": 0.1504, "step": 39705 }, { "epoch": 0.8603991073169674, "grad_norm": 1.3489283323287964, "learning_rate": 9.463980294881669e-07, "loss": 0.241, "step": 39710 }, { "epoch": 0.8605074426364483, "grad_norm": 1.8120800256729126, "learning_rate": 9.449532958214503e-07, "loss": 0.2669, "step": 39715 }, { "epoch": 0.8606157779559291, "grad_norm": 2.895766258239746, "learning_rate": 9.435096110462516e-07, "loss": 0.2241, "step": 39720 }, { "epoch": 0.8607241132754101, "grad_norm": 1.330260992050171, "learning_rate": 9.42066975329795e-07, "loss": 0.1634, "step": 39725 }, { "epoch": 0.8608324485948909, "grad_norm": 1.0837476253509521, "learning_rate": 9.406253888391937e-07, "loss": 0.1146, "step": 39730 }, { "epoch": 0.8609407839143718, "grad_norm": 0.35410353541374207, "learning_rate": 9.391848517414315e-07, "loss": 0.1355, "step": 39735 }, { "epoch": 0.8610491192338526, "grad_norm": 1.783860445022583, "learning_rate": 9.377453642033729e-07, "loss": 0.1223, "step": 39740 }, { "epoch": 0.8611574545533335, "grad_norm": 1.3693389892578125, "learning_rate": 9.363069263917623e-07, "loss": 0.1275, "step": 39745 }, { "epoch": 0.8612657898728143, "grad_norm": 1.4386005401611328, "learning_rate": 9.348695384732188e-07, "loss": 0.1868, "step": 39750 }, { "epoch": 0.8613741251922952, "grad_norm": 0.5456451773643494, "learning_rate": 9.334332006142455e-07, "loss": 0.1697, "step": 39755 }, { "epoch": 0.861482460511776, "grad_norm": 1.4282581806182861, "learning_rate": 9.319979129812162e-07, "loss": 0.1515, "step": 39760 }, { "epoch": 0.8615907958312569, "grad_norm": 0.981681227684021, "learning_rate": 9.305636757403924e-07, "loss": 0.1102, "step": 39765 }, { "epoch": 0.8616991311507378, "grad_norm": 1.0581480264663696, "learning_rate": 9.291304890579045e-07, "loss": 0.1999, "step": 39770 }, { "epoch": 0.8618074664702187, "grad_norm": 1.999383807182312, "learning_rate": 9.276983530997685e-07, "loss": 0.1651, "step": 39775 }, { "epoch": 0.8619158017896995, "grad_norm": 2.3896729946136475, "learning_rate": 9.262672680318763e-07, "loss": 0.1965, "step": 39780 }, { "epoch": 0.8620241371091804, "grad_norm": 1.1820570230484009, "learning_rate": 9.248372340199952e-07, "loss": 0.1435, "step": 39785 }, { "epoch": 0.8621324724286612, "grad_norm": 1.4020800590515137, "learning_rate": 9.23408251229776e-07, "loss": 0.1315, "step": 39790 }, { "epoch": 0.862240807748142, "grad_norm": 1.320985198020935, "learning_rate": 9.219803198267418e-07, "loss": 0.1555, "step": 39795 }, { "epoch": 0.8623491430676229, "grad_norm": 1.2761375904083252, "learning_rate": 9.205534399763016e-07, "loss": 0.1564, "step": 39800 }, { "epoch": 0.8624574783871037, "grad_norm": 0.587694525718689, "learning_rate": 9.19127611843732e-07, "loss": 0.1203, "step": 39805 }, { "epoch": 0.8625658137065846, "grad_norm": 2.8087196350097656, "learning_rate": 9.177028355941997e-07, "loss": 0.1835, "step": 39810 }, { "epoch": 0.8626741490260654, "grad_norm": 1.1249752044677734, "learning_rate": 9.162791113927394e-07, "loss": 0.159, "step": 39815 }, { "epoch": 0.8627824843455464, "grad_norm": 1.5901237726211548, "learning_rate": 9.148564394042703e-07, "loss": 0.1777, "step": 39820 }, { "epoch": 0.8628908196650272, "grad_norm": 1.746673583984375, "learning_rate": 9.134348197935882e-07, "loss": 0.1569, "step": 39825 }, { "epoch": 0.8629991549845081, "grad_norm": 1.7257455587387085, "learning_rate": 9.120142527253639e-07, "loss": 0.14, "step": 39830 }, { "epoch": 0.8631074903039889, "grad_norm": 0.8886482119560242, "learning_rate": 9.105947383641523e-07, "loss": 0.1686, "step": 39835 }, { "epoch": 0.8632158256234698, "grad_norm": 1.0584704875946045, "learning_rate": 9.091762768743794e-07, "loss": 0.1768, "step": 39840 }, { "epoch": 0.8633241609429506, "grad_norm": 1.439193606376648, "learning_rate": 9.077588684203553e-07, "loss": 0.1967, "step": 39845 }, { "epoch": 0.8634324962624315, "grad_norm": 1.581250548362732, "learning_rate": 9.06342513166264e-07, "loss": 0.1924, "step": 39850 }, { "epoch": 0.8635408315819123, "grad_norm": 1.659947156906128, "learning_rate": 9.049272112761697e-07, "loss": 0.1094, "step": 39855 }, { "epoch": 0.8636491669013932, "grad_norm": 2.9010870456695557, "learning_rate": 9.03512962914015e-07, "loss": 0.1437, "step": 39860 }, { "epoch": 0.863757502220874, "grad_norm": 1.254956841468811, "learning_rate": 9.020997682436161e-07, "loss": 0.0976, "step": 39865 }, { "epoch": 0.863865837540355, "grad_norm": 1.5693795680999756, "learning_rate": 9.006876274286725e-07, "loss": 0.1813, "step": 39870 }, { "epoch": 0.8639741728598358, "grad_norm": 1.951953649520874, "learning_rate": 8.992765406327597e-07, "loss": 0.1696, "step": 39875 }, { "epoch": 0.8640825081793166, "grad_norm": 0.8778464198112488, "learning_rate": 8.978665080193328e-07, "loss": 0.1139, "step": 39880 }, { "epoch": 0.8641908434987975, "grad_norm": 1.3210721015930176, "learning_rate": 8.964575297517187e-07, "loss": 0.1469, "step": 39885 }, { "epoch": 0.8642991788182783, "grad_norm": 1.2880321741104126, "learning_rate": 8.950496059931313e-07, "loss": 0.1841, "step": 39890 }, { "epoch": 0.8644075141377592, "grad_norm": 1.47305166721344, "learning_rate": 8.936427369066536e-07, "loss": 0.1352, "step": 39895 }, { "epoch": 0.86451584945724, "grad_norm": 0.979810893535614, "learning_rate": 8.922369226552507e-07, "loss": 0.1007, "step": 39900 }, { "epoch": 0.8646241847767209, "grad_norm": 1.4960309267044067, "learning_rate": 8.908321634017681e-07, "loss": 0.1231, "step": 39905 }, { "epoch": 0.8647325200962017, "grad_norm": 0.2786593437194824, "learning_rate": 8.894284593089219e-07, "loss": 0.1648, "step": 39910 }, { "epoch": 0.8648408554156827, "grad_norm": 0.535730242729187, "learning_rate": 8.880258105393125e-07, "loss": 0.1087, "step": 39915 }, { "epoch": 0.8649491907351635, "grad_norm": 1.8573955297470093, "learning_rate": 8.866242172554151e-07, "loss": 0.1763, "step": 39920 }, { "epoch": 0.8650575260546444, "grad_norm": 1.8860830068588257, "learning_rate": 8.852236796195857e-07, "loss": 0.1207, "step": 39925 }, { "epoch": 0.8651658613741252, "grad_norm": 1.3966970443725586, "learning_rate": 8.838241977940542e-07, "loss": 0.1633, "step": 39930 }, { "epoch": 0.8652741966936061, "grad_norm": 1.5444930791854858, "learning_rate": 8.824257719409269e-07, "loss": 0.2008, "step": 39935 }, { "epoch": 0.8653825320130869, "grad_norm": 1.2476081848144531, "learning_rate": 8.810284022221938e-07, "loss": 0.1355, "step": 39940 }, { "epoch": 0.8654908673325677, "grad_norm": 1.3404048681259155, "learning_rate": 8.796320887997167e-07, "loss": 0.1545, "step": 39945 }, { "epoch": 0.8655992026520486, "grad_norm": 1.32175612449646, "learning_rate": 8.782368318352419e-07, "loss": 0.1179, "step": 39950 }, { "epoch": 0.8657075379715294, "grad_norm": 1.8957459926605225, "learning_rate": 8.768426314903832e-07, "loss": 0.2016, "step": 39955 }, { "epoch": 0.8658158732910103, "grad_norm": 0.7620193362236023, "learning_rate": 8.754494879266429e-07, "loss": 0.0876, "step": 39960 }, { "epoch": 0.8659242086104912, "grad_norm": 1.6727104187011719, "learning_rate": 8.740574013053916e-07, "loss": 0.2703, "step": 39965 }, { "epoch": 0.8660325439299721, "grad_norm": 1.5762248039245605, "learning_rate": 8.726663717878848e-07, "loss": 0.1724, "step": 39970 }, { "epoch": 0.8661408792494529, "grad_norm": 0.913321852684021, "learning_rate": 8.712763995352513e-07, "loss": 0.1609, "step": 39975 }, { "epoch": 0.8662492145689338, "grad_norm": 0.9881748557090759, "learning_rate": 8.698874847084981e-07, "loss": 0.1236, "step": 39980 }, { "epoch": 0.8663575498884146, "grad_norm": 1.2814186811447144, "learning_rate": 8.68499627468512e-07, "loss": 0.1808, "step": 39985 }, { "epoch": 0.8664658852078955, "grad_norm": 0.5791594982147217, "learning_rate": 8.671128279760532e-07, "loss": 0.0939, "step": 39990 }, { "epoch": 0.8665742205273763, "grad_norm": 1.5316886901855469, "learning_rate": 8.657270863917644e-07, "loss": 0.1795, "step": 39995 }, { "epoch": 0.8666825558468572, "grad_norm": 1.5251590013504028, "learning_rate": 8.643424028761582e-07, "loss": 0.2032, "step": 40000 }, { "epoch": 0.866790891166338, "grad_norm": 1.3702623844146729, "learning_rate": 8.629587775896353e-07, "loss": 0.1523, "step": 40005 }, { "epoch": 0.8668992264858189, "grad_norm": 1.3881224393844604, "learning_rate": 8.61576210692463e-07, "loss": 0.1694, "step": 40010 }, { "epoch": 0.8670075618052998, "grad_norm": 1.7494163513183594, "learning_rate": 8.601947023447932e-07, "loss": 0.1119, "step": 40015 }, { "epoch": 0.8671158971247807, "grad_norm": 1.0490692853927612, "learning_rate": 8.588142527066546e-07, "loss": 0.1129, "step": 40020 }, { "epoch": 0.8672242324442615, "grad_norm": 1.2471665143966675, "learning_rate": 8.57434861937948e-07, "loss": 0.1714, "step": 40025 }, { "epoch": 0.8673325677637423, "grad_norm": 0.2640661299228668, "learning_rate": 8.560565301984591e-07, "loss": 0.068, "step": 40030 }, { "epoch": 0.8674409030832232, "grad_norm": 1.332364797592163, "learning_rate": 8.546792576478435e-07, "loss": 0.2384, "step": 40035 }, { "epoch": 0.867549238402704, "grad_norm": 0.6578027009963989, "learning_rate": 8.533030444456403e-07, "loss": 0.2414, "step": 40040 }, { "epoch": 0.8676575737221849, "grad_norm": 0.5374659299850464, "learning_rate": 8.519278907512596e-07, "loss": 0.193, "step": 40045 }, { "epoch": 0.8677659090416657, "grad_norm": 1.4752302169799805, "learning_rate": 8.505537967239974e-07, "loss": 0.1369, "step": 40050 }, { "epoch": 0.8678742443611466, "grad_norm": 1.7700272798538208, "learning_rate": 8.491807625230164e-07, "loss": 0.1343, "step": 40055 }, { "epoch": 0.8679825796806275, "grad_norm": 1.8078371286392212, "learning_rate": 8.47808788307366e-07, "loss": 0.1573, "step": 40060 }, { "epoch": 0.8680909150001084, "grad_norm": 3.000532865524292, "learning_rate": 8.46437874235968e-07, "loss": 0.2057, "step": 40065 }, { "epoch": 0.8681992503195892, "grad_norm": 0.8680190443992615, "learning_rate": 8.45068020467621e-07, "loss": 0.1241, "step": 40070 }, { "epoch": 0.8683075856390701, "grad_norm": 1.8478569984436035, "learning_rate": 8.436992271610045e-07, "loss": 0.2189, "step": 40075 }, { "epoch": 0.8684159209585509, "grad_norm": 1.5719071626663208, "learning_rate": 8.423314944746697e-07, "loss": 0.1388, "step": 40080 }, { "epoch": 0.8685242562780318, "grad_norm": 0.7611844539642334, "learning_rate": 8.409648225670508e-07, "loss": 0.1864, "step": 40085 }, { "epoch": 0.8686325915975126, "grad_norm": 1.4060815572738647, "learning_rate": 8.395992115964536e-07, "loss": 0.2196, "step": 40090 }, { "epoch": 0.8687409269169935, "grad_norm": 1.7266708612442017, "learning_rate": 8.382346617210668e-07, "loss": 0.1483, "step": 40095 }, { "epoch": 0.8688492622364743, "grad_norm": 1.2156891822814941, "learning_rate": 8.368711730989499e-07, "loss": 0.2472, "step": 40100 }, { "epoch": 0.8689575975559551, "grad_norm": 1.3179398775100708, "learning_rate": 8.35508745888044e-07, "loss": 0.1604, "step": 40105 }, { "epoch": 0.8690659328754361, "grad_norm": 1.21015465259552, "learning_rate": 8.341473802461642e-07, "loss": 0.1681, "step": 40110 }, { "epoch": 0.8691742681949169, "grad_norm": 1.5682421922683716, "learning_rate": 8.327870763310064e-07, "loss": 0.2257, "step": 40115 }, { "epoch": 0.8692826035143978, "grad_norm": 0.5467965602874756, "learning_rate": 8.314278343001436e-07, "loss": 0.1075, "step": 40120 }, { "epoch": 0.8693909388338786, "grad_norm": 2.0288383960723877, "learning_rate": 8.300696543110176e-07, "loss": 0.1357, "step": 40125 }, { "epoch": 0.8694992741533595, "grad_norm": 1.9965990781784058, "learning_rate": 8.287125365209603e-07, "loss": 0.1994, "step": 40130 }, { "epoch": 0.8696076094728403, "grad_norm": 0.9095658659934998, "learning_rate": 8.273564810871682e-07, "loss": 0.1281, "step": 40135 }, { "epoch": 0.8697159447923212, "grad_norm": 1.3593355417251587, "learning_rate": 8.2600148816672e-07, "loss": 0.1801, "step": 40140 }, { "epoch": 0.869824280111802, "grad_norm": 1.0581271648406982, "learning_rate": 8.246475579165758e-07, "loss": 0.2427, "step": 40145 }, { "epoch": 0.8699326154312829, "grad_norm": 1.6953624486923218, "learning_rate": 8.232946904935623e-07, "loss": 0.1158, "step": 40150 }, { "epoch": 0.8700409507507637, "grad_norm": 1.5387991666793823, "learning_rate": 8.219428860543943e-07, "loss": 0.1547, "step": 40155 }, { "epoch": 0.8701492860702447, "grad_norm": 2.3431379795074463, "learning_rate": 8.20592144755652e-07, "loss": 0.163, "step": 40160 }, { "epoch": 0.8702576213897255, "grad_norm": 0.9174114465713501, "learning_rate": 8.192424667538057e-07, "loss": 0.1253, "step": 40165 }, { "epoch": 0.8703659567092064, "grad_norm": 1.421643614768982, "learning_rate": 8.178938522051904e-07, "loss": 0.1881, "step": 40170 }, { "epoch": 0.8704742920286872, "grad_norm": 1.8378872871398926, "learning_rate": 8.165463012660257e-07, "loss": 0.1821, "step": 40175 }, { "epoch": 0.870582627348168, "grad_norm": 0.8472855091094971, "learning_rate": 8.151998140924034e-07, "loss": 0.1907, "step": 40180 }, { "epoch": 0.8706909626676489, "grad_norm": 0.9865318536758423, "learning_rate": 8.13854390840293e-07, "loss": 0.1882, "step": 40185 }, { "epoch": 0.8707992979871297, "grad_norm": 1.0741093158721924, "learning_rate": 8.125100316655455e-07, "loss": 0.1949, "step": 40190 }, { "epoch": 0.8709076333066106, "grad_norm": 1.182349443435669, "learning_rate": 8.111667367238795e-07, "loss": 0.1453, "step": 40195 }, { "epoch": 0.8710159686260914, "grad_norm": 1.5316660404205322, "learning_rate": 8.098245061709009e-07, "loss": 0.0907, "step": 40200 }, { "epoch": 0.8711243039455724, "grad_norm": 1.5558487176895142, "learning_rate": 8.084833401620806e-07, "loss": 0.1452, "step": 40205 }, { "epoch": 0.8712326392650532, "grad_norm": 1.8510173559188843, "learning_rate": 8.071432388527789e-07, "loss": 0.2073, "step": 40210 }, { "epoch": 0.8713409745845341, "grad_norm": 1.3193377256393433, "learning_rate": 8.05804202398226e-07, "loss": 0.1478, "step": 40215 }, { "epoch": 0.8714493099040149, "grad_norm": 2.1739940643310547, "learning_rate": 8.044662309535234e-07, "loss": 0.2117, "step": 40220 }, { "epoch": 0.8715576452234958, "grad_norm": 0.9611420631408691, "learning_rate": 8.031293246736616e-07, "loss": 0.2315, "step": 40225 }, { "epoch": 0.8716659805429766, "grad_norm": 1.6367193460464478, "learning_rate": 8.017934837134967e-07, "loss": 0.1924, "step": 40230 }, { "epoch": 0.8717743158624575, "grad_norm": 0.93551105260849, "learning_rate": 8.004587082277693e-07, "loss": 0.1857, "step": 40235 }, { "epoch": 0.8718826511819383, "grad_norm": 0.7791262865066528, "learning_rate": 7.991249983710903e-07, "loss": 0.0838, "step": 40240 }, { "epoch": 0.8719909865014192, "grad_norm": 1.2703466415405273, "learning_rate": 7.977923542979516e-07, "loss": 0.1743, "step": 40245 }, { "epoch": 0.8720993218209, "grad_norm": 1.799752116203308, "learning_rate": 7.964607761627186e-07, "loss": 0.1728, "step": 40250 }, { "epoch": 0.872207657140381, "grad_norm": 1.042517900466919, "learning_rate": 7.951302641196357e-07, "loss": 0.2035, "step": 40255 }, { "epoch": 0.8723159924598618, "grad_norm": 1.1924153566360474, "learning_rate": 7.938008183228241e-07, "loss": 0.1117, "step": 40260 }, { "epoch": 0.8724243277793426, "grad_norm": 1.8736004829406738, "learning_rate": 7.924724389262784e-07, "loss": 0.2156, "step": 40265 }, { "epoch": 0.8725326630988235, "grad_norm": 2.0858986377716064, "learning_rate": 7.911451260838721e-07, "loss": 0.1603, "step": 40270 }, { "epoch": 0.8726409984183043, "grad_norm": 1.109744668006897, "learning_rate": 7.898188799493534e-07, "loss": 0.1298, "step": 40275 }, { "epoch": 0.8727493337377852, "grad_norm": 2.8974344730377197, "learning_rate": 7.884937006763505e-07, "loss": 0.1199, "step": 40280 }, { "epoch": 0.872857669057266, "grad_norm": 2.247370481491089, "learning_rate": 7.871695884183617e-07, "loss": 0.1547, "step": 40285 }, { "epoch": 0.8729660043767469, "grad_norm": 1.3037227392196655, "learning_rate": 7.858465433287698e-07, "loss": 0.1676, "step": 40290 }, { "epoch": 0.8730743396962277, "grad_norm": 0.6820080876350403, "learning_rate": 7.845245655608269e-07, "loss": 0.1448, "step": 40295 }, { "epoch": 0.8731826750157087, "grad_norm": 2.074336528778076, "learning_rate": 7.832036552676647e-07, "loss": 0.136, "step": 40300 }, { "epoch": 0.8732910103351895, "grad_norm": 0.956534743309021, "learning_rate": 7.818838126022932e-07, "loss": 0.1538, "step": 40305 }, { "epoch": 0.8733993456546704, "grad_norm": 0.9499374032020569, "learning_rate": 7.805650377175933e-07, "loss": 0.1167, "step": 40310 }, { "epoch": 0.8735076809741512, "grad_norm": 1.7745232582092285, "learning_rate": 7.792473307663273e-07, "loss": 0.1923, "step": 40315 }, { "epoch": 0.8736160162936321, "grad_norm": 1.7428464889526367, "learning_rate": 7.779306919011309e-07, "loss": 0.1566, "step": 40320 }, { "epoch": 0.8737243516131129, "grad_norm": 2.2233777046203613, "learning_rate": 7.766151212745177e-07, "loss": 0.1466, "step": 40325 }, { "epoch": 0.8738326869325937, "grad_norm": 1.7253450155258179, "learning_rate": 7.753006190388757e-07, "loss": 0.1153, "step": 40330 }, { "epoch": 0.8739410222520746, "grad_norm": 1.4619026184082031, "learning_rate": 7.73987185346472e-07, "loss": 0.1519, "step": 40335 }, { "epoch": 0.8740493575715554, "grad_norm": 2.9134809970855713, "learning_rate": 7.72674820349446e-07, "loss": 0.1383, "step": 40340 }, { "epoch": 0.8741576928910363, "grad_norm": 1.0837996006011963, "learning_rate": 7.713635241998174e-07, "loss": 0.1423, "step": 40345 }, { "epoch": 0.8742660282105172, "grad_norm": 1.0209976434707642, "learning_rate": 7.700532970494789e-07, "loss": 0.1151, "step": 40350 }, { "epoch": 0.8743743635299981, "grad_norm": 1.8031684160232544, "learning_rate": 7.687441390502015e-07, "loss": 0.1325, "step": 40355 }, { "epoch": 0.8744826988494789, "grad_norm": 1.5433677434921265, "learning_rate": 7.674360503536326e-07, "loss": 0.1904, "step": 40360 }, { "epoch": 0.8745910341689598, "grad_norm": 1.156130075454712, "learning_rate": 7.661290311112913e-07, "loss": 0.1883, "step": 40365 }, { "epoch": 0.8746993694884406, "grad_norm": 1.7418792247772217, "learning_rate": 7.648230814745805e-07, "loss": 0.1249, "step": 40370 }, { "epoch": 0.8748077048079215, "grad_norm": 1.8236806392669678, "learning_rate": 7.635182015947717e-07, "loss": 0.1789, "step": 40375 }, { "epoch": 0.8749160401274023, "grad_norm": 0.6771182417869568, "learning_rate": 7.622143916230184e-07, "loss": 0.1089, "step": 40380 }, { "epoch": 0.8750243754468832, "grad_norm": 2.2115533351898193, "learning_rate": 7.609116517103454e-07, "loss": 0.2665, "step": 40385 }, { "epoch": 0.875132710766364, "grad_norm": 1.2915611267089844, "learning_rate": 7.596099820076541e-07, "loss": 0.1128, "step": 40390 }, { "epoch": 0.8752410460858449, "grad_norm": 1.4710713624954224, "learning_rate": 7.583093826657273e-07, "loss": 0.1881, "step": 40395 }, { "epoch": 0.8753493814053258, "grad_norm": 1.5532933473587036, "learning_rate": 7.570098538352144e-07, "loss": 0.2699, "step": 40400 }, { "epoch": 0.8754577167248067, "grad_norm": 1.6478819847106934, "learning_rate": 7.557113956666529e-07, "loss": 0.1409, "step": 40405 }, { "epoch": 0.8755660520442875, "grad_norm": 1.6186002492904663, "learning_rate": 7.544140083104456e-07, "loss": 0.1621, "step": 40410 }, { "epoch": 0.8756743873637683, "grad_norm": 1.993727684020996, "learning_rate": 7.531176919168781e-07, "loss": 0.1976, "step": 40415 }, { "epoch": 0.8757827226832492, "grad_norm": 1.4584347009658813, "learning_rate": 7.518224466361079e-07, "loss": 0.121, "step": 40420 }, { "epoch": 0.87589105800273, "grad_norm": 1.44636869430542, "learning_rate": 7.505282726181684e-07, "loss": 0.1795, "step": 40425 }, { "epoch": 0.8759993933222109, "grad_norm": 1.9681382179260254, "learning_rate": 7.49235170012973e-07, "loss": 0.159, "step": 40430 }, { "epoch": 0.8761077286416917, "grad_norm": 1.3223376274108887, "learning_rate": 7.47943138970304e-07, "loss": 0.1202, "step": 40435 }, { "epoch": 0.8762160639611726, "grad_norm": 0.9247742295265198, "learning_rate": 7.466521796398285e-07, "loss": 0.1331, "step": 40440 }, { "epoch": 0.8763243992806535, "grad_norm": 1.5180140733718872, "learning_rate": 7.453622921710801e-07, "loss": 0.1115, "step": 40445 }, { "epoch": 0.8764327346001344, "grad_norm": 1.7929608821868896, "learning_rate": 7.440734767134794e-07, "loss": 0.1689, "step": 40450 }, { "epoch": 0.8765410699196152, "grad_norm": 1.77816641330719, "learning_rate": 7.427857334163113e-07, "loss": 0.1761, "step": 40455 }, { "epoch": 0.8766494052390961, "grad_norm": 1.4479821920394897, "learning_rate": 7.414990624287421e-07, "loss": 0.175, "step": 40460 }, { "epoch": 0.8767577405585769, "grad_norm": 1.2463836669921875, "learning_rate": 7.40213463899816e-07, "loss": 0.179, "step": 40465 }, { "epoch": 0.8768660758780578, "grad_norm": 1.5184288024902344, "learning_rate": 7.38928937978447e-07, "loss": 0.175, "step": 40470 }, { "epoch": 0.8769744111975386, "grad_norm": 0.7547604441642761, "learning_rate": 7.376454848134307e-07, "loss": 0.1539, "step": 40475 }, { "epoch": 0.8770827465170195, "grad_norm": 1.4789141416549683, "learning_rate": 7.363631045534336e-07, "loss": 0.1291, "step": 40480 }, { "epoch": 0.8771910818365003, "grad_norm": 1.695638656616211, "learning_rate": 7.350817973470026e-07, "loss": 0.2412, "step": 40485 }, { "epoch": 0.8772994171559811, "grad_norm": 1.1834934949874878, "learning_rate": 7.338015633425566e-07, "loss": 0.2459, "step": 40490 }, { "epoch": 0.8774077524754621, "grad_norm": 0.6998067498207092, "learning_rate": 7.325224026883904e-07, "loss": 0.1661, "step": 40495 }, { "epoch": 0.877516087794943, "grad_norm": 1.8372050523757935, "learning_rate": 7.312443155326799e-07, "loss": 0.2171, "step": 40500 }, { "epoch": 0.8776244231144238, "grad_norm": 1.1250544786453247, "learning_rate": 7.299673020234666e-07, "loss": 0.1624, "step": 40505 }, { "epoch": 0.8777327584339046, "grad_norm": 0.777484655380249, "learning_rate": 7.286913623086788e-07, "loss": 0.2468, "step": 40510 }, { "epoch": 0.8778410937533855, "grad_norm": 1.077837347984314, "learning_rate": 7.274164965361108e-07, "loss": 0.14, "step": 40515 }, { "epoch": 0.8779494290728663, "grad_norm": 1.4022969007492065, "learning_rate": 7.261427048534397e-07, "loss": 0.1919, "step": 40520 }, { "epoch": 0.8780577643923472, "grad_norm": 1.1096575260162354, "learning_rate": 7.248699874082121e-07, "loss": 0.1475, "step": 40525 }, { "epoch": 0.878166099711828, "grad_norm": 1.545649766921997, "learning_rate": 7.235983443478578e-07, "loss": 0.2282, "step": 40530 }, { "epoch": 0.8782744350313089, "grad_norm": 1.6492723226547241, "learning_rate": 7.223277758196723e-07, "loss": 0.2085, "step": 40535 }, { "epoch": 0.8783827703507897, "grad_norm": 0.589683473110199, "learning_rate": 7.210582819708356e-07, "loss": 0.183, "step": 40540 }, { "epoch": 0.8784911056702707, "grad_norm": 1.1935466527938843, "learning_rate": 7.197898629483968e-07, "loss": 0.1496, "step": 40545 }, { "epoch": 0.8785994409897515, "grad_norm": 1.2763583660125732, "learning_rate": 7.185225188992861e-07, "loss": 0.0899, "step": 40550 }, { "epoch": 0.8787077763092324, "grad_norm": 1.3586024045944214, "learning_rate": 7.17256249970305e-07, "loss": 0.1575, "step": 40555 }, { "epoch": 0.8788161116287132, "grad_norm": 0.9734241962432861, "learning_rate": 7.159910563081318e-07, "loss": 0.1834, "step": 40560 }, { "epoch": 0.878924446948194, "grad_norm": 0.9898757934570312, "learning_rate": 7.147269380593213e-07, "loss": 0.1319, "step": 40565 }, { "epoch": 0.8790327822676749, "grad_norm": 0.7823055982589722, "learning_rate": 7.134638953703e-07, "loss": 0.1104, "step": 40570 }, { "epoch": 0.8791411175871557, "grad_norm": 1.6785000562667847, "learning_rate": 7.122019283873761e-07, "loss": 0.1527, "step": 40575 }, { "epoch": 0.8792494529066366, "grad_norm": 1.1017659902572632, "learning_rate": 7.109410372567249e-07, "loss": 0.1627, "step": 40580 }, { "epoch": 0.8793577882261174, "grad_norm": 0.7120999693870544, "learning_rate": 7.096812221244065e-07, "loss": 0.1211, "step": 40585 }, { "epoch": 0.8794661235455984, "grad_norm": 0.8812230229377747, "learning_rate": 7.084224831363485e-07, "loss": 0.1643, "step": 40590 }, { "epoch": 0.8795744588650792, "grad_norm": 1.1971255540847778, "learning_rate": 7.071648204383574e-07, "loss": 0.1137, "step": 40595 }, { "epoch": 0.8796827941845601, "grad_norm": 1.6002769470214844, "learning_rate": 7.05908234176117e-07, "loss": 0.1641, "step": 40600 }, { "epoch": 0.8797911295040409, "grad_norm": 1.5792968273162842, "learning_rate": 7.046527244951806e-07, "loss": 0.1595, "step": 40605 }, { "epoch": 0.8798994648235218, "grad_norm": 1.0664350986480713, "learning_rate": 7.033982915409842e-07, "loss": 0.1889, "step": 40610 }, { "epoch": 0.8800078001430026, "grad_norm": 1.3679763078689575, "learning_rate": 7.021449354588295e-07, "loss": 0.1533, "step": 40615 }, { "epoch": 0.8801161354624835, "grad_norm": 1.4944424629211426, "learning_rate": 7.008926563939045e-07, "loss": 0.1449, "step": 40620 }, { "epoch": 0.8802244707819643, "grad_norm": 1.2437211275100708, "learning_rate": 6.996414544912655e-07, "loss": 0.1215, "step": 40625 }, { "epoch": 0.8803328061014452, "grad_norm": 1.6882820129394531, "learning_rate": 6.983913298958412e-07, "loss": 0.1724, "step": 40630 }, { "epoch": 0.880441141420926, "grad_norm": 0.8198120594024658, "learning_rate": 6.971422827524466e-07, "loss": 0.0973, "step": 40635 }, { "epoch": 0.880549476740407, "grad_norm": 1.4720327854156494, "learning_rate": 6.958943132057572e-07, "loss": 0.2072, "step": 40640 }, { "epoch": 0.8806578120598878, "grad_norm": 0.6201657056808472, "learning_rate": 6.946474214003407e-07, "loss": 0.1312, "step": 40645 }, { "epoch": 0.8807661473793686, "grad_norm": 2.593008041381836, "learning_rate": 6.934016074806238e-07, "loss": 0.1766, "step": 40650 }, { "epoch": 0.8808744826988495, "grad_norm": 0.5109556317329407, "learning_rate": 6.921568715909188e-07, "loss": 0.1359, "step": 40655 }, { "epoch": 0.8809828180183303, "grad_norm": 1.2514455318450928, "learning_rate": 6.909132138754093e-07, "loss": 0.1546, "step": 40660 }, { "epoch": 0.8810911533378112, "grad_norm": 1.4141695499420166, "learning_rate": 6.896706344781524e-07, "loss": 0.2104, "step": 40665 }, { "epoch": 0.881199488657292, "grad_norm": 1.7277871370315552, "learning_rate": 6.884291335430859e-07, "loss": 0.2897, "step": 40670 }, { "epoch": 0.8813078239767729, "grad_norm": 0.4676864743232727, "learning_rate": 6.871887112140152e-07, "loss": 0.1934, "step": 40675 }, { "epoch": 0.8814161592962537, "grad_norm": 1.2367535829544067, "learning_rate": 6.859493676346274e-07, "loss": 0.1429, "step": 40680 }, { "epoch": 0.8815244946157347, "grad_norm": 1.1303749084472656, "learning_rate": 6.847111029484777e-07, "loss": 0.1579, "step": 40685 }, { "epoch": 0.8816328299352155, "grad_norm": 1.3408153057098389, "learning_rate": 6.834739172990068e-07, "loss": 0.1216, "step": 40690 }, { "epoch": 0.8817411652546964, "grad_norm": 1.452410340309143, "learning_rate": 6.822378108295213e-07, "loss": 0.1556, "step": 40695 }, { "epoch": 0.8818495005741772, "grad_norm": 1.3741672039031982, "learning_rate": 6.810027836832044e-07, "loss": 0.1632, "step": 40700 }, { "epoch": 0.8819578358936581, "grad_norm": 0.5313239097595215, "learning_rate": 6.797688360031174e-07, "loss": 0.1327, "step": 40705 }, { "epoch": 0.8820661712131389, "grad_norm": 1.9493279457092285, "learning_rate": 6.785359679321923e-07, "loss": 0.1202, "step": 40710 }, { "epoch": 0.8821745065326198, "grad_norm": 2.25111985206604, "learning_rate": 6.773041796132407e-07, "loss": 0.1091, "step": 40715 }, { "epoch": 0.8822828418521006, "grad_norm": 1.9252816438674927, "learning_rate": 6.76073471188945e-07, "loss": 0.1919, "step": 40720 }, { "epoch": 0.8823911771715814, "grad_norm": 2.191984176635742, "learning_rate": 6.748438428018667e-07, "loss": 0.1762, "step": 40725 }, { "epoch": 0.8824995124910623, "grad_norm": 2.2229831218719482, "learning_rate": 6.736152945944363e-07, "loss": 0.176, "step": 40730 }, { "epoch": 0.8826078478105432, "grad_norm": 1.3250494003295898, "learning_rate": 6.723878267089656e-07, "loss": 0.1355, "step": 40735 }, { "epoch": 0.8827161831300241, "grad_norm": 0.7389346361160278, "learning_rate": 6.711614392876386e-07, "loss": 0.1416, "step": 40740 }, { "epoch": 0.8828245184495049, "grad_norm": 1.3234140872955322, "learning_rate": 6.699361324725117e-07, "loss": 0.1205, "step": 40745 }, { "epoch": 0.8829328537689858, "grad_norm": 2.4981584548950195, "learning_rate": 6.687119064055214e-07, "loss": 0.1671, "step": 40750 }, { "epoch": 0.8830411890884666, "grad_norm": 1.03770911693573, "learning_rate": 6.67488761228472e-07, "loss": 0.1656, "step": 40755 }, { "epoch": 0.8831495244079475, "grad_norm": 0.7914669513702393, "learning_rate": 6.662666970830501e-07, "loss": 0.1553, "step": 40760 }, { "epoch": 0.8832578597274283, "grad_norm": 1.2457998991012573, "learning_rate": 6.650457141108102e-07, "loss": 0.1084, "step": 40765 }, { "epoch": 0.8833661950469092, "grad_norm": 1.9146645069122314, "learning_rate": 6.638258124531882e-07, "loss": 0.108, "step": 40770 }, { "epoch": 0.88347453036639, "grad_norm": 0.9621110558509827, "learning_rate": 6.626069922514888e-07, "loss": 0.1619, "step": 40775 }, { "epoch": 0.8835828656858709, "grad_norm": 1.4082367420196533, "learning_rate": 6.613892536468969e-07, "loss": 0.1469, "step": 40780 }, { "epoch": 0.8836912010053518, "grad_norm": 0.788552463054657, "learning_rate": 6.601725967804662e-07, "loss": 0.077, "step": 40785 }, { "epoch": 0.8837995363248327, "grad_norm": 1.6580469608306885, "learning_rate": 6.589570217931285e-07, "loss": 0.1573, "step": 40790 }, { "epoch": 0.8839078716443135, "grad_norm": 1.214508295059204, "learning_rate": 6.577425288256933e-07, "loss": 0.1483, "step": 40795 }, { "epoch": 0.8840162069637943, "grad_norm": 1.0611369609832764, "learning_rate": 6.565291180188383e-07, "loss": 0.1036, "step": 40800 }, { "epoch": 0.8841245422832752, "grad_norm": 0.9121061563491821, "learning_rate": 6.553167895131207e-07, "loss": 0.1457, "step": 40805 }, { "epoch": 0.884232877602756, "grad_norm": 1.197194218635559, "learning_rate": 6.541055434489674e-07, "loss": 0.1543, "step": 40810 }, { "epoch": 0.8843412129222369, "grad_norm": 1.7943601608276367, "learning_rate": 6.528953799666882e-07, "loss": 0.1228, "step": 40815 }, { "epoch": 0.8844495482417177, "grad_norm": 0.5761184096336365, "learning_rate": 6.516862992064566e-07, "loss": 0.1218, "step": 40820 }, { "epoch": 0.8845578835611986, "grad_norm": 0.6692762970924377, "learning_rate": 6.504783013083327e-07, "loss": 0.1726, "step": 40825 }, { "epoch": 0.8846662188806795, "grad_norm": 1.0468953847885132, "learning_rate": 6.492713864122391e-07, "loss": 0.1458, "step": 40830 }, { "epoch": 0.8847745542001604, "grad_norm": 2.4033758640289307, "learning_rate": 6.480655546579817e-07, "loss": 0.1413, "step": 40835 }, { "epoch": 0.8848828895196412, "grad_norm": 1.4565576314926147, "learning_rate": 6.468608061852388e-07, "loss": 0.182, "step": 40840 }, { "epoch": 0.8849912248391221, "grad_norm": 1.084742784500122, "learning_rate": 6.456571411335611e-07, "loss": 0.1179, "step": 40845 }, { "epoch": 0.8850995601586029, "grad_norm": 1.5781055688858032, "learning_rate": 6.444545596423768e-07, "loss": 0.1743, "step": 40850 }, { "epoch": 0.8852078954780838, "grad_norm": 1.725986123085022, "learning_rate": 6.432530618509835e-07, "loss": 0.1563, "step": 40855 }, { "epoch": 0.8853162307975646, "grad_norm": 0.6755660176277161, "learning_rate": 6.420526478985612e-07, "loss": 0.1651, "step": 40860 }, { "epoch": 0.8854245661170455, "grad_norm": 1.7713512182235718, "learning_rate": 6.408533179241571e-07, "loss": 0.1088, "step": 40865 }, { "epoch": 0.8855329014365263, "grad_norm": 0.9188803434371948, "learning_rate": 6.39655072066695e-07, "loss": 0.1762, "step": 40870 }, { "epoch": 0.8856412367560071, "grad_norm": 1.982348084449768, "learning_rate": 6.384579104649758e-07, "loss": 0.2163, "step": 40875 }, { "epoch": 0.8857495720754881, "grad_norm": 3.192385673522949, "learning_rate": 6.372618332576697e-07, "loss": 0.2067, "step": 40880 }, { "epoch": 0.885857907394969, "grad_norm": 1.445610761642456, "learning_rate": 6.360668405833293e-07, "loss": 0.1561, "step": 40885 }, { "epoch": 0.8859662427144498, "grad_norm": 1.171568512916565, "learning_rate": 6.348729325803715e-07, "loss": 0.21, "step": 40890 }, { "epoch": 0.8860745780339306, "grad_norm": 1.1009247303009033, "learning_rate": 6.336801093870959e-07, "loss": 0.1883, "step": 40895 }, { "epoch": 0.8861829133534115, "grad_norm": 1.2578967809677124, "learning_rate": 6.32488371141673e-07, "loss": 0.1739, "step": 40900 }, { "epoch": 0.8862912486728923, "grad_norm": 2.4145727157592773, "learning_rate": 6.312977179821456e-07, "loss": 0.1837, "step": 40905 }, { "epoch": 0.8863995839923732, "grad_norm": 1.3478204011917114, "learning_rate": 6.301081500464357e-07, "loss": 0.2266, "step": 40910 }, { "epoch": 0.886507919311854, "grad_norm": 1.2957831621170044, "learning_rate": 6.289196674723342e-07, "loss": 0.1838, "step": 40915 }, { "epoch": 0.8866162546313349, "grad_norm": 0.417833536863327, "learning_rate": 6.277322703975119e-07, "loss": 0.1058, "step": 40920 }, { "epoch": 0.8867245899508157, "grad_norm": 1.5459232330322266, "learning_rate": 6.26545958959508e-07, "loss": 0.1815, "step": 40925 }, { "epoch": 0.8868329252702967, "grad_norm": 1.5106701850891113, "learning_rate": 6.253607332957401e-07, "loss": 0.1319, "step": 40930 }, { "epoch": 0.8869412605897775, "grad_norm": 2.1252663135528564, "learning_rate": 6.241765935435018e-07, "loss": 0.1205, "step": 40935 }, { "epoch": 0.8870495959092584, "grad_norm": 2.626370906829834, "learning_rate": 6.229935398399523e-07, "loss": 0.1168, "step": 40940 }, { "epoch": 0.8871579312287392, "grad_norm": 2.154270648956299, "learning_rate": 6.218115723221363e-07, "loss": 0.1791, "step": 40945 }, { "epoch": 0.88726626654822, "grad_norm": 1.7003438472747803, "learning_rate": 6.206306911269622e-07, "loss": 0.2006, "step": 40950 }, { "epoch": 0.8873746018677009, "grad_norm": 0.9206592440605164, "learning_rate": 6.194508963912216e-07, "loss": 0.1427, "step": 40955 }, { "epoch": 0.8874829371871817, "grad_norm": 1.2993755340576172, "learning_rate": 6.182721882515718e-07, "loss": 0.1557, "step": 40960 }, { "epoch": 0.8875912725066626, "grad_norm": 1.7513935565948486, "learning_rate": 6.170945668445527e-07, "loss": 0.1651, "step": 40965 }, { "epoch": 0.8876996078261434, "grad_norm": 0.9322627186775208, "learning_rate": 6.159180323065705e-07, "loss": 0.1835, "step": 40970 }, { "epoch": 0.8878079431456244, "grad_norm": 1.8038318157196045, "learning_rate": 6.147425847739108e-07, "loss": 0.1458, "step": 40975 }, { "epoch": 0.8879162784651052, "grad_norm": 1.0369101762771606, "learning_rate": 6.135682243827334e-07, "loss": 0.2202, "step": 40980 }, { "epoch": 0.8880246137845861, "grad_norm": 0.6394705772399902, "learning_rate": 6.123949512690663e-07, "loss": 0.1201, "step": 40985 }, { "epoch": 0.8881329491040669, "grad_norm": 1.8522316217422485, "learning_rate": 6.112227655688196e-07, "loss": 0.207, "step": 40990 }, { "epoch": 0.8882412844235478, "grad_norm": 1.5493991374969482, "learning_rate": 6.100516674177703e-07, "loss": 0.2135, "step": 40995 }, { "epoch": 0.8883496197430286, "grad_norm": 1.314551591873169, "learning_rate": 6.088816569515754e-07, "loss": 0.1675, "step": 41000 }, { "epoch": 0.8884579550625095, "grad_norm": 0.42987093329429626, "learning_rate": 6.077127343057598e-07, "loss": 0.1762, "step": 41005 }, { "epoch": 0.8885662903819903, "grad_norm": 1.4292205572128296, "learning_rate": 6.065448996157286e-07, "loss": 0.1778, "step": 41010 }, { "epoch": 0.8886746257014712, "grad_norm": 1.3720272779464722, "learning_rate": 6.053781530167557e-07, "loss": 0.147, "step": 41015 }, { "epoch": 0.888782961020952, "grad_norm": 1.112979531288147, "learning_rate": 6.042124946439943e-07, "loss": 0.1764, "step": 41020 }, { "epoch": 0.888891296340433, "grad_norm": 1.4929821491241455, "learning_rate": 6.03047924632465e-07, "loss": 0.2011, "step": 41025 }, { "epoch": 0.8889996316599138, "grad_norm": 0.7757676839828491, "learning_rate": 6.018844431170667e-07, "loss": 0.1619, "step": 41030 }, { "epoch": 0.8891079669793946, "grad_norm": 1.6356693506240845, "learning_rate": 6.007220502325739e-07, "loss": 0.1571, "step": 41035 }, { "epoch": 0.8892163022988755, "grad_norm": 1.321074366569519, "learning_rate": 5.995607461136288e-07, "loss": 0.1893, "step": 41040 }, { "epoch": 0.8893246376183563, "grad_norm": 1.958577275276184, "learning_rate": 5.984005308947538e-07, "loss": 0.1859, "step": 41045 }, { "epoch": 0.8894329729378372, "grad_norm": 1.9157541990280151, "learning_rate": 5.972414047103403e-07, "loss": 0.1704, "step": 41050 }, { "epoch": 0.889541308257318, "grad_norm": 2.38413143157959, "learning_rate": 5.960833676946576e-07, "loss": 0.1843, "step": 41055 }, { "epoch": 0.8896496435767989, "grad_norm": 1.833888053894043, "learning_rate": 5.949264199818449e-07, "loss": 0.1221, "step": 41060 }, { "epoch": 0.8897579788962797, "grad_norm": 0.9965684413909912, "learning_rate": 5.937705617059208e-07, "loss": 0.1828, "step": 41065 }, { "epoch": 0.8898663142157606, "grad_norm": 1.1196463108062744, "learning_rate": 5.926157930007714e-07, "loss": 0.1878, "step": 41070 }, { "epoch": 0.8899746495352415, "grad_norm": 1.139780879020691, "learning_rate": 5.914621140001565e-07, "loss": 0.178, "step": 41075 }, { "epoch": 0.8900829848547224, "grad_norm": 1.440401315689087, "learning_rate": 5.903095248377189e-07, "loss": 0.1269, "step": 41080 }, { "epoch": 0.8901913201742032, "grad_norm": 0.6029985547065735, "learning_rate": 5.891580256469631e-07, "loss": 0.1607, "step": 41085 }, { "epoch": 0.8902996554936841, "grad_norm": 1.440505862236023, "learning_rate": 5.880076165612791e-07, "loss": 0.1735, "step": 41090 }, { "epoch": 0.8904079908131649, "grad_norm": 0.9797359704971313, "learning_rate": 5.868582977139181e-07, "loss": 0.1338, "step": 41095 }, { "epoch": 0.8905163261326458, "grad_norm": 2.144493579864502, "learning_rate": 5.857100692380168e-07, "loss": 0.1646, "step": 41100 }, { "epoch": 0.8906246614521266, "grad_norm": 1.6710100173950195, "learning_rate": 5.845629312665768e-07, "loss": 0.1863, "step": 41105 }, { "epoch": 0.8907329967716074, "grad_norm": 1.6533509492874146, "learning_rate": 5.834168839324783e-07, "loss": 0.1776, "step": 41110 }, { "epoch": 0.8908413320910883, "grad_norm": 0.8745838403701782, "learning_rate": 5.822719273684729e-07, "loss": 0.1783, "step": 41115 }, { "epoch": 0.8909496674105692, "grad_norm": 1.7356507778167725, "learning_rate": 5.811280617071858e-07, "loss": 0.1925, "step": 41120 }, { "epoch": 0.8910580027300501, "grad_norm": 1.546789288520813, "learning_rate": 5.799852870811207e-07, "loss": 0.1334, "step": 41125 }, { "epoch": 0.8911663380495309, "grad_norm": 1.652392029762268, "learning_rate": 5.788436036226464e-07, "loss": 0.2177, "step": 41130 }, { "epoch": 0.8912746733690118, "grad_norm": 0.9531424641609192, "learning_rate": 5.777030114640125e-07, "loss": 0.1462, "step": 41135 }, { "epoch": 0.8913830086884926, "grad_norm": 1.7435895204544067, "learning_rate": 5.765635107373402e-07, "loss": 0.1223, "step": 41140 }, { "epoch": 0.8914913440079735, "grad_norm": 1.839377999305725, "learning_rate": 5.754251015746192e-07, "loss": 0.1922, "step": 41145 }, { "epoch": 0.8915996793274543, "grad_norm": 1.6851155757904053, "learning_rate": 5.742877841077222e-07, "loss": 0.2041, "step": 41150 }, { "epoch": 0.8917080146469352, "grad_norm": 1.7698900699615479, "learning_rate": 5.731515584683867e-07, "loss": 0.1407, "step": 41155 }, { "epoch": 0.891816349966416, "grad_norm": 1.6905672550201416, "learning_rate": 5.720164247882309e-07, "loss": 0.2146, "step": 41160 }, { "epoch": 0.8919246852858969, "grad_norm": 0.6560724973678589, "learning_rate": 5.708823831987398e-07, "loss": 0.1708, "step": 41165 }, { "epoch": 0.8920330206053778, "grad_norm": 0.7283318638801575, "learning_rate": 5.697494338312759e-07, "loss": 0.1981, "step": 41170 }, { "epoch": 0.8921413559248587, "grad_norm": 1.9567196369171143, "learning_rate": 5.686175768170743e-07, "loss": 0.1888, "step": 41175 }, { "epoch": 0.8922496912443395, "grad_norm": 1.509975552558899, "learning_rate": 5.674868122872468e-07, "loss": 0.0816, "step": 41180 }, { "epoch": 0.8923580265638204, "grad_norm": 1.3756623268127441, "learning_rate": 5.66357140372773e-07, "loss": 0.1543, "step": 41185 }, { "epoch": 0.8924663618833012, "grad_norm": 1.4594587087631226, "learning_rate": 5.652285612045061e-07, "loss": 0.1772, "step": 41190 }, { "epoch": 0.892574697202782, "grad_norm": 0.8211742639541626, "learning_rate": 5.641010749131803e-07, "loss": 0.1469, "step": 41195 }, { "epoch": 0.8926830325222629, "grad_norm": 1.4631662368774414, "learning_rate": 5.629746816293924e-07, "loss": 0.2094, "step": 41200 }, { "epoch": 0.8927913678417437, "grad_norm": 1.2332273721694946, "learning_rate": 5.618493814836235e-07, "loss": 0.1304, "step": 41205 }, { "epoch": 0.8928997031612246, "grad_norm": 1.497419834136963, "learning_rate": 5.607251746062181e-07, "loss": 0.2184, "step": 41210 }, { "epoch": 0.8930080384807055, "grad_norm": 1.4426754713058472, "learning_rate": 5.596020611274011e-07, "loss": 0.2056, "step": 41215 }, { "epoch": 0.8931163738001864, "grad_norm": 1.025368332862854, "learning_rate": 5.584800411772706e-07, "loss": 0.1866, "step": 41220 }, { "epoch": 0.8932247091196672, "grad_norm": 0.9278343915939331, "learning_rate": 5.573591148857904e-07, "loss": 0.1068, "step": 41225 }, { "epoch": 0.8933330444391481, "grad_norm": 0.7190464735031128, "learning_rate": 5.562392823828089e-07, "loss": 0.1611, "step": 41230 }, { "epoch": 0.8934413797586289, "grad_norm": 2.3642632961273193, "learning_rate": 5.551205437980367e-07, "loss": 0.1539, "step": 41235 }, { "epoch": 0.8935497150781098, "grad_norm": 1.321510672569275, "learning_rate": 5.540028992610668e-07, "loss": 0.1579, "step": 41240 }, { "epoch": 0.8936580503975906, "grad_norm": 1.6321892738342285, "learning_rate": 5.528863489013581e-07, "loss": 0.2159, "step": 41245 }, { "epoch": 0.8937663857170715, "grad_norm": 1.857482671737671, "learning_rate": 5.517708928482502e-07, "loss": 0.1908, "step": 41250 }, { "epoch": 0.8938747210365523, "grad_norm": 1.1288634538650513, "learning_rate": 5.506565312309487e-07, "loss": 0.2288, "step": 41255 }, { "epoch": 0.8939830563560331, "grad_norm": 1.3292040824890137, "learning_rate": 5.49543264178537e-07, "loss": 0.2728, "step": 41260 }, { "epoch": 0.8940913916755141, "grad_norm": 2.19132399559021, "learning_rate": 5.484310918199698e-07, "loss": 0.2416, "step": 41265 }, { "epoch": 0.894199726994995, "grad_norm": 1.3402754068374634, "learning_rate": 5.473200142840762e-07, "loss": 0.1664, "step": 41270 }, { "epoch": 0.8943080623144758, "grad_norm": 1.3473060131072998, "learning_rate": 5.4621003169956e-07, "loss": 0.1226, "step": 41275 }, { "epoch": 0.8944163976339566, "grad_norm": 1.6063677072525024, "learning_rate": 5.451011441949905e-07, "loss": 0.1967, "step": 41280 }, { "epoch": 0.8945247329534375, "grad_norm": 0.9844839572906494, "learning_rate": 5.439933518988216e-07, "loss": 0.0735, "step": 41285 }, { "epoch": 0.8946330682729183, "grad_norm": 1.106793999671936, "learning_rate": 5.428866549393708e-07, "loss": 0.1234, "step": 41290 }, { "epoch": 0.8947414035923992, "grad_norm": 1.124383568763733, "learning_rate": 5.417810534448342e-07, "loss": 0.1379, "step": 41295 }, { "epoch": 0.89484973891188, "grad_norm": 2.1531848907470703, "learning_rate": 5.406765475432773e-07, "loss": 0.115, "step": 41300 }, { "epoch": 0.8949580742313609, "grad_norm": 1.4189780950546265, "learning_rate": 5.395731373626433e-07, "loss": 0.1517, "step": 41305 }, { "epoch": 0.8950664095508417, "grad_norm": 1.4830734729766846, "learning_rate": 5.384708230307445e-07, "loss": 0.175, "step": 41310 }, { "epoch": 0.8951747448703227, "grad_norm": 0.9892273545265198, "learning_rate": 5.373696046752641e-07, "loss": 0.1483, "step": 41315 }, { "epoch": 0.8952830801898035, "grad_norm": 1.136523723602295, "learning_rate": 5.362694824237669e-07, "loss": 0.1555, "step": 41320 }, { "epoch": 0.8953914155092844, "grad_norm": 0.7319695353507996, "learning_rate": 5.35170456403683e-07, "loss": 0.2838, "step": 41325 }, { "epoch": 0.8954997508287652, "grad_norm": 1.1472638845443726, "learning_rate": 5.340725267423197e-07, "loss": 0.1619, "step": 41330 }, { "epoch": 0.895608086148246, "grad_norm": 1.663150668144226, "learning_rate": 5.329756935668528e-07, "loss": 0.2352, "step": 41335 }, { "epoch": 0.8957164214677269, "grad_norm": 1.369673728942871, "learning_rate": 5.318799570043376e-07, "loss": 0.1902, "step": 41340 }, { "epoch": 0.8958247567872077, "grad_norm": 1.7784186601638794, "learning_rate": 5.307853171816957e-07, "loss": 0.2192, "step": 41345 }, { "epoch": 0.8959330921066886, "grad_norm": 0.8422970175743103, "learning_rate": 5.296917742257268e-07, "loss": 0.1399, "step": 41350 }, { "epoch": 0.8960414274261694, "grad_norm": 1.0309780836105347, "learning_rate": 5.285993282631008e-07, "loss": 0.1442, "step": 41355 }, { "epoch": 0.8961497627456504, "grad_norm": 2.2547690868377686, "learning_rate": 5.275079794203586e-07, "loss": 0.1458, "step": 41360 }, { "epoch": 0.8962580980651312, "grad_norm": 1.3490723371505737, "learning_rate": 5.264177278239213e-07, "loss": 0.1443, "step": 41365 }, { "epoch": 0.8963664333846121, "grad_norm": 1.2309627532958984, "learning_rate": 5.253285736000746e-07, "loss": 0.1351, "step": 41370 }, { "epoch": 0.8964747687040929, "grad_norm": 1.5470435619354248, "learning_rate": 5.242405168749842e-07, "loss": 0.1682, "step": 41375 }, { "epoch": 0.8965831040235738, "grad_norm": 1.7200464010238647, "learning_rate": 5.231535577746816e-07, "loss": 0.1071, "step": 41380 }, { "epoch": 0.8966914393430546, "grad_norm": 1.046433925628662, "learning_rate": 5.22067696425077e-07, "loss": 0.2129, "step": 41385 }, { "epoch": 0.8967997746625355, "grad_norm": 0.8211389183998108, "learning_rate": 5.2098293295195e-07, "loss": 0.1242, "step": 41390 }, { "epoch": 0.8969081099820163, "grad_norm": 1.0365406274795532, "learning_rate": 5.198992674809544e-07, "loss": 0.1211, "step": 41395 }, { "epoch": 0.8970164453014972, "grad_norm": 1.5318101644515991, "learning_rate": 5.188167001376177e-07, "loss": 0.2404, "step": 41400 }, { "epoch": 0.897124780620978, "grad_norm": 2.1788008213043213, "learning_rate": 5.177352310473361e-07, "loss": 0.2702, "step": 41405 }, { "epoch": 0.897233115940459, "grad_norm": 1.712720274925232, "learning_rate": 5.166548603353838e-07, "loss": 0.1301, "step": 41410 }, { "epoch": 0.8973414512599398, "grad_norm": 0.43481141328811646, "learning_rate": 5.155755881269064e-07, "loss": 0.1301, "step": 41415 }, { "epoch": 0.8974497865794206, "grad_norm": 2.014195442199707, "learning_rate": 5.144974145469205e-07, "loss": 0.1083, "step": 41420 }, { "epoch": 0.8975581218989015, "grad_norm": 0.8215240240097046, "learning_rate": 5.134203397203175e-07, "loss": 0.1324, "step": 41425 }, { "epoch": 0.8976664572183823, "grad_norm": 1.4936394691467285, "learning_rate": 5.123443637718572e-07, "loss": 0.208, "step": 41430 }, { "epoch": 0.8977747925378632, "grad_norm": 1.545599341392517, "learning_rate": 5.112694868261791e-07, "loss": 0.1509, "step": 41435 }, { "epoch": 0.897883127857344, "grad_norm": 1.7406103610992432, "learning_rate": 5.10195709007788e-07, "loss": 0.16, "step": 41440 }, { "epoch": 0.8979914631768249, "grad_norm": 1.2911429405212402, "learning_rate": 5.091230304410677e-07, "loss": 0.1664, "step": 41445 }, { "epoch": 0.8980997984963057, "grad_norm": 1.234763741493225, "learning_rate": 5.080514512502699e-07, "loss": 0.1125, "step": 41450 }, { "epoch": 0.8982081338157866, "grad_norm": 0.8109881281852722, "learning_rate": 5.069809715595242e-07, "loss": 0.0485, "step": 41455 }, { "epoch": 0.8983164691352675, "grad_norm": 0.9912799596786499, "learning_rate": 5.059115914928259e-07, "loss": 0.1725, "step": 41460 }, { "epoch": 0.8984248044547484, "grad_norm": 1.7225691080093384, "learning_rate": 5.04843311174048e-07, "loss": 0.1203, "step": 41465 }, { "epoch": 0.8985331397742292, "grad_norm": 1.9875216484069824, "learning_rate": 5.037761307269362e-07, "loss": 0.1874, "step": 41470 }, { "epoch": 0.8986414750937101, "grad_norm": 2.778886556625366, "learning_rate": 5.027100502751048e-07, "loss": 0.1426, "step": 41475 }, { "epoch": 0.8987498104131909, "grad_norm": 1.0672616958618164, "learning_rate": 5.016450699420461e-07, "loss": 0.1078, "step": 41480 }, { "epoch": 0.8988581457326718, "grad_norm": 2.1769447326660156, "learning_rate": 5.005811898511204e-07, "loss": 0.2285, "step": 41485 }, { "epoch": 0.8989664810521526, "grad_norm": 0.770918607711792, "learning_rate": 4.995184101255624e-07, "loss": 0.1157, "step": 41490 }, { "epoch": 0.8990748163716334, "grad_norm": 1.5171277523040771, "learning_rate": 4.984567308884791e-07, "loss": 0.2111, "step": 41495 }, { "epoch": 0.8991831516911143, "grad_norm": 1.9409676790237427, "learning_rate": 4.973961522628512e-07, "loss": 0.1951, "step": 41500 }, { "epoch": 0.8992914870105952, "grad_norm": 1.1718189716339111, "learning_rate": 4.963366743715293e-07, "loss": 0.1684, "step": 41505 }, { "epoch": 0.8993998223300761, "grad_norm": 1.6914650201797485, "learning_rate": 4.952782973372383e-07, "loss": 0.1299, "step": 41510 }, { "epoch": 0.8995081576495569, "grad_norm": 0.9826122522354126, "learning_rate": 4.942210212825771e-07, "loss": 0.1821, "step": 41515 }, { "epoch": 0.8996164929690378, "grad_norm": 1.7587348222732544, "learning_rate": 4.931648463300132e-07, "loss": 0.1748, "step": 41520 }, { "epoch": 0.8997248282885186, "grad_norm": 0.9583898782730103, "learning_rate": 4.921097726018908e-07, "loss": 0.2396, "step": 41525 }, { "epoch": 0.8998331636079995, "grad_norm": 0.916861355304718, "learning_rate": 4.910558002204213e-07, "loss": 0.1462, "step": 41530 }, { "epoch": 0.8999414989274803, "grad_norm": 1.468428611755371, "learning_rate": 4.900029293076947e-07, "loss": 0.0779, "step": 41535 }, { "epoch": 0.9000498342469612, "grad_norm": 1.481660008430481, "learning_rate": 4.889511599856677e-07, "loss": 0.1283, "step": 41540 }, { "epoch": 0.900158169566442, "grad_norm": 0.5900688767433167, "learning_rate": 4.879004923761743e-07, "loss": 0.1017, "step": 41545 }, { "epoch": 0.9002665048859229, "grad_norm": 1.7710355520248413, "learning_rate": 4.86850926600917e-07, "loss": 0.1869, "step": 41550 }, { "epoch": 0.9003748402054038, "grad_norm": 1.1954631805419922, "learning_rate": 4.858024627814728e-07, "loss": 0.1245, "step": 41555 }, { "epoch": 0.9004831755248847, "grad_norm": 1.7025703191757202, "learning_rate": 4.847551010392915e-07, "loss": 0.2287, "step": 41560 }, { "epoch": 0.9005915108443655, "grad_norm": 1.4533427953720093, "learning_rate": 4.837088414956925e-07, "loss": 0.1879, "step": 41565 }, { "epoch": 0.9006998461638464, "grad_norm": 1.0181260108947754, "learning_rate": 4.826636842718712e-07, "loss": 0.1767, "step": 41570 }, { "epoch": 0.9008081814833272, "grad_norm": 0.9796648621559143, "learning_rate": 4.81619629488892e-07, "loss": 0.135, "step": 41575 }, { "epoch": 0.900916516802808, "grad_norm": 2.0602898597717285, "learning_rate": 4.805766772676934e-07, "loss": 0.1286, "step": 41580 }, { "epoch": 0.9010248521222889, "grad_norm": 2.222522258758545, "learning_rate": 4.795348277290845e-07, "loss": 0.1656, "step": 41585 }, { "epoch": 0.9011331874417697, "grad_norm": 1.0267409086227417, "learning_rate": 4.784940809937511e-07, "loss": 0.1531, "step": 41590 }, { "epoch": 0.9012415227612506, "grad_norm": 2.0330426692962646, "learning_rate": 4.774544371822465e-07, "loss": 0.149, "step": 41595 }, { "epoch": 0.9013498580807314, "grad_norm": 1.1407482624053955, "learning_rate": 4.7641589641499566e-07, "loss": 0.1404, "step": 41600 }, { "epoch": 0.9014581934002124, "grad_norm": 0.876645028591156, "learning_rate": 4.7537845881230113e-07, "loss": 0.1576, "step": 41605 }, { "epoch": 0.9015665287196932, "grad_norm": 1.724778413772583, "learning_rate": 4.743421244943325e-07, "loss": 0.2582, "step": 41610 }, { "epoch": 0.9016748640391741, "grad_norm": 1.4222602844238281, "learning_rate": 4.733068935811358e-07, "loss": 0.1444, "step": 41615 }, { "epoch": 0.9017831993586549, "grad_norm": 1.4100532531738281, "learning_rate": 4.722727661926241e-07, "loss": 0.1751, "step": 41620 }, { "epoch": 0.9018915346781358, "grad_norm": 0.9993183016777039, "learning_rate": 4.7123974244858817e-07, "loss": 0.0726, "step": 41625 }, { "epoch": 0.9019998699976166, "grad_norm": 0.8026193380355835, "learning_rate": 4.702078224686879e-07, "loss": 0.1676, "step": 41630 }, { "epoch": 0.9021082053170975, "grad_norm": 1.7836828231811523, "learning_rate": 4.691770063724532e-07, "loss": 0.1984, "step": 41635 }, { "epoch": 0.9022165406365783, "grad_norm": 1.4231430292129517, "learning_rate": 4.6814729427929196e-07, "loss": 0.1862, "step": 41640 }, { "epoch": 0.9023248759560591, "grad_norm": 1.8507202863693237, "learning_rate": 4.6711868630847867e-07, "loss": 0.1687, "step": 41645 }, { "epoch": 0.9024332112755401, "grad_norm": 1.2723090648651123, "learning_rate": 4.6609118257916255e-07, "loss": 0.1781, "step": 41650 }, { "epoch": 0.902541546595021, "grad_norm": 3.0105607509613037, "learning_rate": 4.650647832103661e-07, "loss": 0.1228, "step": 41655 }, { "epoch": 0.9026498819145018, "grad_norm": 1.7471836805343628, "learning_rate": 4.6403948832098087e-07, "loss": 0.1839, "step": 41660 }, { "epoch": 0.9027582172339826, "grad_norm": 1.3040273189544678, "learning_rate": 4.6301529802977307e-07, "loss": 0.1398, "step": 41665 }, { "epoch": 0.9028665525534635, "grad_norm": 0.931145191192627, "learning_rate": 4.619922124553777e-07, "loss": 0.1327, "step": 41670 }, { "epoch": 0.9029748878729443, "grad_norm": 1.2674437761306763, "learning_rate": 4.609702317163067e-07, "loss": 0.1522, "step": 41675 }, { "epoch": 0.9030832231924252, "grad_norm": 1.444716453552246, "learning_rate": 4.599493559309387e-07, "loss": 0.1603, "step": 41680 }, { "epoch": 0.903191558511906, "grad_norm": 0.992302417755127, "learning_rate": 4.5892958521752793e-07, "loss": 0.1564, "step": 41685 }, { "epoch": 0.9032998938313869, "grad_norm": 1.4869788885116577, "learning_rate": 4.579109196941989e-07, "loss": 0.2431, "step": 41690 }, { "epoch": 0.9034082291508677, "grad_norm": 1.2787256240844727, "learning_rate": 4.568933594789504e-07, "loss": 0.1458, "step": 41695 }, { "epoch": 0.9035165644703487, "grad_norm": 2.174919843673706, "learning_rate": 4.558769046896494e-07, "loss": 0.14, "step": 41700 }, { "epoch": 0.9036248997898295, "grad_norm": 2.128952741622925, "learning_rate": 4.548615554440383e-07, "loss": 0.16, "step": 41705 }, { "epoch": 0.9037332351093104, "grad_norm": 1.3391586542129517, "learning_rate": 4.5384731185973086e-07, "loss": 0.2196, "step": 41710 }, { "epoch": 0.9038415704287912, "grad_norm": 0.8941188454627991, "learning_rate": 4.528341740542097e-07, "loss": 0.1186, "step": 41715 }, { "epoch": 0.903949905748272, "grad_norm": 1.6908398866653442, "learning_rate": 4.5182214214483434e-07, "loss": 0.2016, "step": 41720 }, { "epoch": 0.9040582410677529, "grad_norm": 2.031047821044922, "learning_rate": 4.5081121624882987e-07, "loss": 0.1793, "step": 41725 }, { "epoch": 0.9041665763872337, "grad_norm": 1.689761996269226, "learning_rate": 4.4980139648330143e-07, "loss": 0.0933, "step": 41730 }, { "epoch": 0.9042749117067146, "grad_norm": 1.648824691772461, "learning_rate": 4.4879268296521784e-07, "loss": 0.1757, "step": 41735 }, { "epoch": 0.9043832470261954, "grad_norm": 1.4295605421066284, "learning_rate": 4.477850758114266e-07, "loss": 0.1217, "step": 41740 }, { "epoch": 0.9044915823456764, "grad_norm": 0.9748402833938599, "learning_rate": 4.4677857513863997e-07, "loss": 0.1874, "step": 41745 }, { "epoch": 0.9045999176651572, "grad_norm": 0.9772599339485168, "learning_rate": 4.45773181063448e-07, "loss": 0.1659, "step": 41750 }, { "epoch": 0.9047082529846381, "grad_norm": 1.510326862335205, "learning_rate": 4.447688937023131e-07, "loss": 0.1112, "step": 41755 }, { "epoch": 0.9048165883041189, "grad_norm": 1.0909130573272705, "learning_rate": 4.4376571317156226e-07, "loss": 0.1403, "step": 41760 }, { "epoch": 0.9049249236235998, "grad_norm": 0.7232761383056641, "learning_rate": 4.427636395874024e-07, "loss": 0.161, "step": 41765 }, { "epoch": 0.9050332589430806, "grad_norm": 1.2877105474472046, "learning_rate": 4.417626730659075e-07, "loss": 0.1806, "step": 41770 }, { "epoch": 0.9051415942625615, "grad_norm": 1.712547779083252, "learning_rate": 4.4076281372302465e-07, "loss": 0.2037, "step": 41775 }, { "epoch": 0.9052499295820423, "grad_norm": 2.168865919113159, "learning_rate": 4.397640616745713e-07, "loss": 0.0858, "step": 41780 }, { "epoch": 0.9053582649015232, "grad_norm": 1.4209181070327759, "learning_rate": 4.3876641703624044e-07, "loss": 0.1094, "step": 41785 }, { "epoch": 0.905466600221004, "grad_norm": 2.0907297134399414, "learning_rate": 4.377698799235919e-07, "loss": 0.1886, "step": 41790 }, { "epoch": 0.905574935540485, "grad_norm": 1.4710655212402344, "learning_rate": 4.36774450452061e-07, "loss": 0.1907, "step": 41795 }, { "epoch": 0.9056832708599658, "grad_norm": 0.6006914377212524, "learning_rate": 4.3578012873695344e-07, "loss": 0.117, "step": 41800 }, { "epoch": 0.9057916061794467, "grad_norm": 1.4869532585144043, "learning_rate": 4.347869148934447e-07, "loss": 0.2687, "step": 41805 }, { "epoch": 0.9058999414989275, "grad_norm": 1.2195868492126465, "learning_rate": 4.337948090365862e-07, "loss": 0.136, "step": 41810 }, { "epoch": 0.9060082768184083, "grad_norm": 1.261500597000122, "learning_rate": 4.32803811281296e-07, "loss": 0.1785, "step": 41815 }, { "epoch": 0.9061166121378892, "grad_norm": 1.470658779144287, "learning_rate": 4.3181392174236893e-07, "loss": 0.1437, "step": 41820 }, { "epoch": 0.90622494745737, "grad_norm": 1.7416387796401978, "learning_rate": 4.3082514053446657e-07, "loss": 0.0637, "step": 41825 }, { "epoch": 0.9063332827768509, "grad_norm": 1.6947911977767944, "learning_rate": 4.298374677721273e-07, "loss": 0.2268, "step": 41830 }, { "epoch": 0.9064416180963317, "grad_norm": 1.5286827087402344, "learning_rate": 4.288509035697563e-07, "loss": 0.1772, "step": 41835 }, { "epoch": 0.9065499534158126, "grad_norm": 1.3825491666793823, "learning_rate": 4.2786544804163e-07, "loss": 0.1337, "step": 41840 }, { "epoch": 0.9066582887352935, "grad_norm": 1.3268001079559326, "learning_rate": 4.2688110130190143e-07, "loss": 0.0887, "step": 41845 }, { "epoch": 0.9067666240547744, "grad_norm": 1.4640260934829712, "learning_rate": 4.258978634645927e-07, "loss": 0.1491, "step": 41850 }, { "epoch": 0.9068749593742552, "grad_norm": 1.316942811012268, "learning_rate": 4.2491573464359613e-07, "loss": 0.1895, "step": 41855 }, { "epoch": 0.9069832946937361, "grad_norm": 2.254072427749634, "learning_rate": 4.2393471495267734e-07, "loss": 0.1718, "step": 41860 }, { "epoch": 0.9070916300132169, "grad_norm": 1.0467272996902466, "learning_rate": 4.2295480450547323e-07, "loss": 0.1062, "step": 41865 }, { "epoch": 0.9071999653326978, "grad_norm": 1.1954842805862427, "learning_rate": 4.219760034154896e-07, "loss": 0.1929, "step": 41870 }, { "epoch": 0.9073083006521786, "grad_norm": 0.7351048588752747, "learning_rate": 4.2099831179610693e-07, "loss": 0.1493, "step": 41875 }, { "epoch": 0.9074166359716594, "grad_norm": 1.0017856359481812, "learning_rate": 4.20021729760578e-07, "loss": 0.2335, "step": 41880 }, { "epoch": 0.9075249712911403, "grad_norm": 1.3102216720581055, "learning_rate": 4.190462574220222e-07, "loss": 0.172, "step": 41885 }, { "epoch": 0.9076333066106212, "grad_norm": 1.100266456604004, "learning_rate": 4.180718948934348e-07, "loss": 0.1909, "step": 41890 }, { "epoch": 0.9077416419301021, "grad_norm": 0.9331762790679932, "learning_rate": 4.17098642287681e-07, "loss": 0.1787, "step": 41895 }, { "epoch": 0.9078499772495829, "grad_norm": 1.481768250465393, "learning_rate": 4.1612649971749854e-07, "loss": 0.2146, "step": 41900 }, { "epoch": 0.9079583125690638, "grad_norm": 1.272023320198059, "learning_rate": 4.1515546729549495e-07, "loss": 0.1853, "step": 41905 }, { "epoch": 0.9080666478885446, "grad_norm": 1.9614546298980713, "learning_rate": 4.141855451341481e-07, "loss": 0.1808, "step": 41910 }, { "epoch": 0.9081749832080255, "grad_norm": 1.520743489265442, "learning_rate": 4.1321673334581147e-07, "loss": 0.2039, "step": 41915 }, { "epoch": 0.9082833185275063, "grad_norm": 1.275791883468628, "learning_rate": 4.1224903204270527e-07, "loss": 0.2353, "step": 41920 }, { "epoch": 0.9083916538469872, "grad_norm": 1.6834588050842285, "learning_rate": 4.1128244133692544e-07, "loss": 0.2014, "step": 41925 }, { "epoch": 0.908499989166468, "grad_norm": 0.5585950613021851, "learning_rate": 4.1031696134043453e-07, "loss": 0.0634, "step": 41930 }, { "epoch": 0.9086083244859489, "grad_norm": 2.050058126449585, "learning_rate": 4.093525921650721e-07, "loss": 0.2119, "step": 41935 }, { "epoch": 0.9087166598054298, "grad_norm": 0.8463895320892334, "learning_rate": 4.08389333922542e-07, "loss": 0.1402, "step": 41940 }, { "epoch": 0.9088249951249107, "grad_norm": 0.6237738728523254, "learning_rate": 4.0742718672442503e-07, "loss": 0.1308, "step": 41945 }, { "epoch": 0.9089333304443915, "grad_norm": 1.410187840461731, "learning_rate": 4.064661506821732e-07, "loss": 0.2146, "step": 41950 }, { "epoch": 0.9090416657638724, "grad_norm": 0.6036958694458008, "learning_rate": 4.055062259071063e-07, "loss": 0.1703, "step": 41955 }, { "epoch": 0.9091500010833532, "grad_norm": 1.3160984516143799, "learning_rate": 4.0454741251041765e-07, "loss": 0.138, "step": 41960 }, { "epoch": 0.909258336402834, "grad_norm": 1.0875605344772339, "learning_rate": 4.0358971060317056e-07, "loss": 0.1285, "step": 41965 }, { "epoch": 0.9093666717223149, "grad_norm": 2.0437111854553223, "learning_rate": 4.0263312029630297e-07, "loss": 0.181, "step": 41970 }, { "epoch": 0.9094750070417957, "grad_norm": 0.5498220324516296, "learning_rate": 4.0167764170061854e-07, "loss": 0.1719, "step": 41975 }, { "epoch": 0.9095833423612766, "grad_norm": 1.7787823677062988, "learning_rate": 4.0072327492679753e-07, "loss": 0.1162, "step": 41980 }, { "epoch": 0.9096916776807574, "grad_norm": 0.9957221150398254, "learning_rate": 3.99770020085386e-07, "loss": 0.1243, "step": 41985 }, { "epoch": 0.9098000130002384, "grad_norm": 1.0877360105514526, "learning_rate": 3.988178772868068e-07, "loss": 0.1451, "step": 41990 }, { "epoch": 0.9099083483197192, "grad_norm": 1.2792526483535767, "learning_rate": 3.9786684664135264e-07, "loss": 0.1416, "step": 41995 }, { "epoch": 0.9100166836392001, "grad_norm": 1.3693351745605469, "learning_rate": 3.9691692825918225e-07, "loss": 0.174, "step": 42000 }, { "epoch": 0.9101250189586809, "grad_norm": 0.957791268825531, "learning_rate": 3.959681222503331e-07, "loss": 0.1748, "step": 42005 }, { "epoch": 0.9102333542781618, "grad_norm": 0.8403183221817017, "learning_rate": 3.9502042872470727e-07, "loss": 0.2034, "step": 42010 }, { "epoch": 0.9103416895976426, "grad_norm": 1.4135823249816895, "learning_rate": 3.9407384779208355e-07, "loss": 0.2455, "step": 42015 }, { "epoch": 0.9104500249171235, "grad_norm": 0.7068964838981628, "learning_rate": 3.931283795621066e-07, "loss": 0.1177, "step": 42020 }, { "epoch": 0.9105583602366043, "grad_norm": 0.957467794418335, "learning_rate": 3.9218402414429645e-07, "loss": 0.1212, "step": 42025 }, { "epoch": 0.9106666955560851, "grad_norm": 1.6915918588638306, "learning_rate": 3.9124078164804233e-07, "loss": 0.1388, "step": 42030 }, { "epoch": 0.9107750308755661, "grad_norm": 1.564972162246704, "learning_rate": 3.9029865218260355e-07, "loss": 0.1609, "step": 42035 }, { "epoch": 0.910883366195047, "grad_norm": 1.4028784036636353, "learning_rate": 3.8935763585711384e-07, "loss": 0.2199, "step": 42040 }, { "epoch": 0.9109917015145278, "grad_norm": 1.742741346359253, "learning_rate": 3.884177327805727e-07, "loss": 0.1894, "step": 42045 }, { "epoch": 0.9111000368340086, "grad_norm": 2.1922552585601807, "learning_rate": 3.874789430618575e-07, "loss": 0.2664, "step": 42050 }, { "epoch": 0.9112083721534895, "grad_norm": 1.6519399881362915, "learning_rate": 3.8654126680971014e-07, "loss": 0.1408, "step": 42055 }, { "epoch": 0.9113167074729703, "grad_norm": 1.3587530851364136, "learning_rate": 3.856047041327493e-07, "loss": 0.1916, "step": 42060 }, { "epoch": 0.9114250427924512, "grad_norm": 0.7995801568031311, "learning_rate": 3.8466925513945705e-07, "loss": 0.1595, "step": 42065 }, { "epoch": 0.911533378111932, "grad_norm": 1.0548352003097534, "learning_rate": 3.837349199381968e-07, "loss": 0.186, "step": 42070 }, { "epoch": 0.9116417134314129, "grad_norm": 0.7114394307136536, "learning_rate": 3.8280169863719295e-07, "loss": 0.2034, "step": 42075 }, { "epoch": 0.9117500487508937, "grad_norm": 1.3680131435394287, "learning_rate": 3.818695913445469e-07, "loss": 0.1471, "step": 42080 }, { "epoch": 0.9118583840703747, "grad_norm": 0.9113610982894897, "learning_rate": 3.8093859816822786e-07, "loss": 0.1463, "step": 42085 }, { "epoch": 0.9119667193898555, "grad_norm": 1.540239691734314, "learning_rate": 3.800087192160795e-07, "loss": 0.1434, "step": 42090 }, { "epoch": 0.9120750547093364, "grad_norm": 0.3650689721107483, "learning_rate": 3.790799545958146e-07, "loss": 0.1849, "step": 42095 }, { "epoch": 0.9121833900288172, "grad_norm": 1.8587968349456787, "learning_rate": 3.781523044150137e-07, "loss": 0.1302, "step": 42100 }, { "epoch": 0.912291725348298, "grad_norm": 0.6248246431350708, "learning_rate": 3.7722576878113537e-07, "loss": 0.1849, "step": 42105 }, { "epoch": 0.9124000606677789, "grad_norm": 1.8312835693359375, "learning_rate": 3.763003478015015e-07, "loss": 0.1867, "step": 42110 }, { "epoch": 0.9125083959872597, "grad_norm": 2.0268714427948, "learning_rate": 3.7537604158330855e-07, "loss": 0.14, "step": 42115 }, { "epoch": 0.9126167313067406, "grad_norm": 1.6780436038970947, "learning_rate": 3.7445285023362644e-07, "loss": 0.193, "step": 42120 }, { "epoch": 0.9127250666262214, "grad_norm": 1.757536768913269, "learning_rate": 3.735307738593885e-07, "loss": 0.1556, "step": 42125 }, { "epoch": 0.9128334019457023, "grad_norm": 1.8672783374786377, "learning_rate": 3.7260981256740827e-07, "loss": 0.1783, "step": 42130 }, { "epoch": 0.9129417372651832, "grad_norm": 0.7226545214653015, "learning_rate": 3.716899664643592e-07, "loss": 0.2002, "step": 42135 }, { "epoch": 0.9130500725846641, "grad_norm": 0.7021084427833557, "learning_rate": 3.707712356567983e-07, "loss": 0.1552, "step": 42140 }, { "epoch": 0.9131584079041449, "grad_norm": 0.7661086916923523, "learning_rate": 3.698536202511449e-07, "loss": 0.0986, "step": 42145 }, { "epoch": 0.9132667432236258, "grad_norm": 1.3452633619308472, "learning_rate": 3.6893712035368734e-07, "loss": 0.1715, "step": 42150 }, { "epoch": 0.9133750785431066, "grad_norm": 1.1598141193389893, "learning_rate": 3.6802173607059287e-07, "loss": 0.1243, "step": 42155 }, { "epoch": 0.9134834138625875, "grad_norm": 1.9141290187835693, "learning_rate": 3.6710746750789226e-07, "loss": 0.1656, "step": 42160 }, { "epoch": 0.9135917491820683, "grad_norm": 0.882853627204895, "learning_rate": 3.661943147714919e-07, "loss": 0.1863, "step": 42165 }, { "epoch": 0.9137000845015492, "grad_norm": 1.565795660018921, "learning_rate": 3.65282277967165e-07, "loss": 0.1601, "step": 42170 }, { "epoch": 0.91380841982103, "grad_norm": 1.2879592180252075, "learning_rate": 3.6437135720055805e-07, "loss": 0.1706, "step": 42175 }, { "epoch": 0.913916755140511, "grad_norm": 1.660800814628601, "learning_rate": 3.6346155257718786e-07, "loss": 0.2002, "step": 42180 }, { "epoch": 0.9140250904599918, "grad_norm": 1.2564860582351685, "learning_rate": 3.625528642024412e-07, "loss": 0.1489, "step": 42185 }, { "epoch": 0.9141334257794727, "grad_norm": 0.8758149147033691, "learning_rate": 3.6164529218157716e-07, "loss": 0.1647, "step": 42190 }, { "epoch": 0.9142417610989535, "grad_norm": 1.5434224605560303, "learning_rate": 3.6073883661972176e-07, "loss": 0.1687, "step": 42195 }, { "epoch": 0.9143500964184343, "grad_norm": 1.3808143138885498, "learning_rate": 3.598334976218776e-07, "loss": 0.1438, "step": 42200 }, { "epoch": 0.9144584317379152, "grad_norm": 1.2325384616851807, "learning_rate": 3.589292752929119e-07, "loss": 0.116, "step": 42205 }, { "epoch": 0.914566767057396, "grad_norm": 1.073542833328247, "learning_rate": 3.5802616973756867e-07, "loss": 0.1472, "step": 42210 }, { "epoch": 0.9146751023768769, "grad_norm": 1.6513314247131348, "learning_rate": 3.571241810604542e-07, "loss": 0.1341, "step": 42215 }, { "epoch": 0.9147834376963577, "grad_norm": 1.4095417261123657, "learning_rate": 3.5622330936605496e-07, "loss": 0.1496, "step": 42220 }, { "epoch": 0.9148917730158386, "grad_norm": 1.2230405807495117, "learning_rate": 3.553235547587197e-07, "loss": 0.1832, "step": 42225 }, { "epoch": 0.9150001083353195, "grad_norm": 1.7567858695983887, "learning_rate": 3.5442491734267503e-07, "loss": 0.1423, "step": 42230 }, { "epoch": 0.9151084436548004, "grad_norm": 1.4500492811203003, "learning_rate": 3.5352739722201215e-07, "loss": 0.0991, "step": 42235 }, { "epoch": 0.9152167789742812, "grad_norm": 1.1287304162979126, "learning_rate": 3.526309945006967e-07, "loss": 0.1565, "step": 42240 }, { "epoch": 0.9153251142937621, "grad_norm": 1.1874619722366333, "learning_rate": 3.517357092825635e-07, "loss": 0.1007, "step": 42245 }, { "epoch": 0.9154334496132429, "grad_norm": 2.170670747756958, "learning_rate": 3.5084154167131513e-07, "loss": 0.0987, "step": 42250 }, { "epoch": 0.9155417849327238, "grad_norm": 1.7274478673934937, "learning_rate": 3.49948491770532e-07, "loss": 0.1614, "step": 42255 }, { "epoch": 0.9156501202522046, "grad_norm": 2.0277020931243896, "learning_rate": 3.4905655968365706e-07, "loss": 0.0903, "step": 42260 }, { "epoch": 0.9157584555716854, "grad_norm": 0.8125719428062439, "learning_rate": 3.481657455140097e-07, "loss": 0.1392, "step": 42265 }, { "epoch": 0.9158667908911663, "grad_norm": 0.9643428325653076, "learning_rate": 3.472760493647753e-07, "loss": 0.1068, "step": 42270 }, { "epoch": 0.9159751262106473, "grad_norm": 0.8878528475761414, "learning_rate": 3.463874713390125e-07, "loss": 0.1865, "step": 42275 }, { "epoch": 0.9160834615301281, "grad_norm": 1.6743921041488647, "learning_rate": 3.455000115396512e-07, "loss": 0.1729, "step": 42280 }, { "epoch": 0.9161917968496089, "grad_norm": 2.3202412128448486, "learning_rate": 3.44613670069488e-07, "loss": 0.2035, "step": 42285 }, { "epoch": 0.9163001321690898, "grad_norm": 2.4878275394439697, "learning_rate": 3.437284470311952e-07, "loss": 0.113, "step": 42290 }, { "epoch": 0.9164084674885706, "grad_norm": 1.6830867528915405, "learning_rate": 3.428443425273087e-07, "loss": 0.1252, "step": 42295 }, { "epoch": 0.9165168028080515, "grad_norm": 1.4622442722320557, "learning_rate": 3.419613566602431e-07, "loss": 0.1754, "step": 42300 }, { "epoch": 0.9166251381275323, "grad_norm": 1.802571177482605, "learning_rate": 3.410794895322744e-07, "loss": 0.1001, "step": 42305 }, { "epoch": 0.9167334734470132, "grad_norm": 1.97153902053833, "learning_rate": 3.4019874124555874e-07, "loss": 0.1306, "step": 42310 }, { "epoch": 0.916841808766494, "grad_norm": 1.5628056526184082, "learning_rate": 3.3931911190211444e-07, "loss": 0.192, "step": 42315 }, { "epoch": 0.9169501440859749, "grad_norm": 1.7201167345046997, "learning_rate": 3.384406016038333e-07, "loss": 0.159, "step": 42320 }, { "epoch": 0.9170584794054558, "grad_norm": 1.0514702796936035, "learning_rate": 3.375632104524784e-07, "loss": 0.2292, "step": 42325 }, { "epoch": 0.9171668147249367, "grad_norm": 1.755172610282898, "learning_rate": 3.366869385496818e-07, "loss": 0.1522, "step": 42330 }, { "epoch": 0.9172751500444175, "grad_norm": 2.1446945667266846, "learning_rate": 3.358117859969479e-07, "loss": 0.2234, "step": 42335 }, { "epoch": 0.9173834853638984, "grad_norm": 1.451278567314148, "learning_rate": 3.3493775289564767e-07, "loss": 0.1273, "step": 42340 }, { "epoch": 0.9174918206833792, "grad_norm": 1.1849831342697144, "learning_rate": 3.3406483934702807e-07, "loss": 0.0848, "step": 42345 }, { "epoch": 0.91760015600286, "grad_norm": 0.7465011477470398, "learning_rate": 3.331930454522003e-07, "loss": 0.1332, "step": 42350 }, { "epoch": 0.9177084913223409, "grad_norm": 2.6682145595550537, "learning_rate": 3.323223713121493e-07, "loss": 0.2131, "step": 42355 }, { "epoch": 0.9178168266418217, "grad_norm": 1.1818554401397705, "learning_rate": 3.3145281702773093e-07, "loss": 0.2146, "step": 42360 }, { "epoch": 0.9179251619613026, "grad_norm": 1.7889155149459839, "learning_rate": 3.3058438269966796e-07, "loss": 0.1906, "step": 42365 }, { "epoch": 0.9180334972807834, "grad_norm": 1.2661420106887817, "learning_rate": 3.2971706842855666e-07, "loss": 0.1195, "step": 42370 }, { "epoch": 0.9181418326002644, "grad_norm": 1.2047840356826782, "learning_rate": 3.2885087431485996e-07, "loss": 0.1165, "step": 42375 }, { "epoch": 0.9182501679197452, "grad_norm": 1.3354148864746094, "learning_rate": 3.279858004589176e-07, "loss": 0.1637, "step": 42380 }, { "epoch": 0.9183585032392261, "grad_norm": 1.06977117061615, "learning_rate": 3.2712184696093384e-07, "loss": 0.1824, "step": 42385 }, { "epoch": 0.9184668385587069, "grad_norm": 0.792693018913269, "learning_rate": 3.2625901392098425e-07, "loss": 0.1266, "step": 42390 }, { "epoch": 0.9185751738781878, "grad_norm": 1.3415800333023071, "learning_rate": 3.253973014390155e-07, "loss": 0.1693, "step": 42395 }, { "epoch": 0.9186835091976686, "grad_norm": 0.9668383598327637, "learning_rate": 3.245367096148433e-07, "loss": 0.1477, "step": 42400 }, { "epoch": 0.9187918445171495, "grad_norm": 1.6831066608428955, "learning_rate": 3.236772385481546e-07, "loss": 0.1352, "step": 42405 }, { "epoch": 0.9189001798366303, "grad_norm": 0.8953995704650879, "learning_rate": 3.228188883385064e-07, "loss": 0.1182, "step": 42410 }, { "epoch": 0.9190085151561111, "grad_norm": 2.403541326522827, "learning_rate": 3.2196165908532696e-07, "loss": 0.1917, "step": 42415 }, { "epoch": 0.9191168504755921, "grad_norm": 0.941527783870697, "learning_rate": 3.211055508879102e-07, "loss": 0.2403, "step": 42420 }, { "epoch": 0.919225185795073, "grad_norm": 1.6472573280334473, "learning_rate": 3.202505638454279e-07, "loss": 0.2209, "step": 42425 }, { "epoch": 0.9193335211145538, "grad_norm": 1.7998703718185425, "learning_rate": 3.193966980569141e-07, "loss": 0.1847, "step": 42430 }, { "epoch": 0.9194418564340346, "grad_norm": 1.0657339096069336, "learning_rate": 3.1854395362127753e-07, "loss": 0.1945, "step": 42435 }, { "epoch": 0.9195501917535155, "grad_norm": 1.4282609224319458, "learning_rate": 3.1769233063729585e-07, "loss": 0.1757, "step": 42440 }, { "epoch": 0.9196585270729963, "grad_norm": 1.0839155912399292, "learning_rate": 3.1684182920361575e-07, "loss": 0.1561, "step": 42445 }, { "epoch": 0.9197668623924772, "grad_norm": 1.7453498840332031, "learning_rate": 3.1599244941875716e-07, "loss": 0.1825, "step": 42450 }, { "epoch": 0.919875197711958, "grad_norm": 1.1960517168045044, "learning_rate": 3.151441913811071e-07, "loss": 0.142, "step": 42455 }, { "epoch": 0.9199835330314389, "grad_norm": 1.34526526927948, "learning_rate": 3.142970551889224e-07, "loss": 0.1278, "step": 42460 }, { "epoch": 0.9200918683509197, "grad_norm": 1.3707895278930664, "learning_rate": 3.134510409403324e-07, "loss": 0.1762, "step": 42465 }, { "epoch": 0.9202002036704007, "grad_norm": 1.1725541353225708, "learning_rate": 3.126061487333343e-07, "loss": 0.1656, "step": 42470 }, { "epoch": 0.9203085389898815, "grad_norm": 0.8659223914146423, "learning_rate": 3.117623786657975e-07, "loss": 0.1787, "step": 42475 }, { "epoch": 0.9204168743093624, "grad_norm": 1.903184413909912, "learning_rate": 3.1091973083545836e-07, "loss": 0.224, "step": 42480 }, { "epoch": 0.9205252096288432, "grad_norm": 2.8109450340270996, "learning_rate": 3.100782053399276e-07, "loss": 0.1405, "step": 42485 }, { "epoch": 0.9206335449483241, "grad_norm": 0.9513352513313293, "learning_rate": 3.0923780227668067e-07, "loss": 0.139, "step": 42490 }, { "epoch": 0.9207418802678049, "grad_norm": 1.2262433767318726, "learning_rate": 3.083985217430674e-07, "loss": 0.1554, "step": 42495 }, { "epoch": 0.9208502155872857, "grad_norm": 1.2470744848251343, "learning_rate": 3.075603638363045e-07, "loss": 0.1264, "step": 42500 }, { "epoch": 0.9209585509067666, "grad_norm": 1.3717892169952393, "learning_rate": 3.06723328653481e-07, "loss": 0.1571, "step": 42505 }, { "epoch": 0.9210668862262474, "grad_norm": 1.3715670108795166, "learning_rate": 3.058874162915537e-07, "loss": 0.1607, "step": 42510 }, { "epoch": 0.9211752215457283, "grad_norm": 1.9465066194534302, "learning_rate": 3.0505262684735306e-07, "loss": 0.1622, "step": 42515 }, { "epoch": 0.9212835568652092, "grad_norm": 0.9938545227050781, "learning_rate": 3.0421896041757384e-07, "loss": 0.1243, "step": 42520 }, { "epoch": 0.9213918921846901, "grad_norm": 2.09126877784729, "learning_rate": 3.033864170987855e-07, "loss": 0.1925, "step": 42525 }, { "epoch": 0.9215002275041709, "grad_norm": 1.9295192956924438, "learning_rate": 3.025549969874264e-07, "loss": 0.1623, "step": 42530 }, { "epoch": 0.9216085628236518, "grad_norm": 1.6366214752197266, "learning_rate": 3.0172470017980295e-07, "loss": 0.1495, "step": 42535 }, { "epoch": 0.9217168981431326, "grad_norm": 2.1611127853393555, "learning_rate": 3.008955267720925e-07, "loss": 0.1425, "step": 42540 }, { "epoch": 0.9218252334626135, "grad_norm": 0.9300190806388855, "learning_rate": 3.0006747686034176e-07, "loss": 0.1324, "step": 42545 }, { "epoch": 0.9219335687820943, "grad_norm": 1.382105827331543, "learning_rate": 2.992405505404705e-07, "loss": 0.1658, "step": 42550 }, { "epoch": 0.9220419041015752, "grad_norm": 1.300193428993225, "learning_rate": 2.984147479082633e-07, "loss": 0.135, "step": 42555 }, { "epoch": 0.922150239421056, "grad_norm": 0.9370086789131165, "learning_rate": 2.9759006905937806e-07, "loss": 0.1651, "step": 42560 }, { "epoch": 0.922258574740537, "grad_norm": 1.7388207912445068, "learning_rate": 2.9676651408934163e-07, "loss": 0.1474, "step": 42565 }, { "epoch": 0.9223669100600178, "grad_norm": 1.3467735052108765, "learning_rate": 2.9594408309354894e-07, "loss": 0.1889, "step": 42570 }, { "epoch": 0.9224752453794987, "grad_norm": 1.3201206922531128, "learning_rate": 2.951227761672681e-07, "loss": 0.1836, "step": 42575 }, { "epoch": 0.9225835806989795, "grad_norm": 1.2536916732788086, "learning_rate": 2.9430259340563425e-07, "loss": 0.1422, "step": 42580 }, { "epoch": 0.9226919160184603, "grad_norm": 1.604270339012146, "learning_rate": 2.934835349036547e-07, "loss": 0.147, "step": 42585 }, { "epoch": 0.9228002513379412, "grad_norm": 1.5905097723007202, "learning_rate": 2.9266560075620234e-07, "loss": 0.1982, "step": 42590 }, { "epoch": 0.922908586657422, "grad_norm": 1.3168532848358154, "learning_rate": 2.918487910580259e-07, "loss": 0.1202, "step": 42595 }, { "epoch": 0.9230169219769029, "grad_norm": 1.5583455562591553, "learning_rate": 2.9103310590373965e-07, "loss": 0.1072, "step": 42600 }, { "epoch": 0.9231252572963837, "grad_norm": 0.8587419986724854, "learning_rate": 2.902185453878259e-07, "loss": 0.2131, "step": 42605 }, { "epoch": 0.9232335926158646, "grad_norm": 1.158625602722168, "learning_rate": 2.894051096046413e-07, "loss": 0.1391, "step": 42610 }, { "epoch": 0.9233419279353455, "grad_norm": 0.9203721880912781, "learning_rate": 2.885927986484094e-07, "loss": 0.162, "step": 42615 }, { "epoch": 0.9234502632548264, "grad_norm": 1.180519461631775, "learning_rate": 2.87781612613226e-07, "loss": 0.1896, "step": 42620 }, { "epoch": 0.9235585985743072, "grad_norm": 1.87057363986969, "learning_rate": 2.8697155159305377e-07, "loss": 0.1321, "step": 42625 }, { "epoch": 0.9236669338937881, "grad_norm": 1.605905294418335, "learning_rate": 2.8616261568172655e-07, "loss": 0.1896, "step": 42630 }, { "epoch": 0.9237752692132689, "grad_norm": 0.9665895700454712, "learning_rate": 2.853548049729471e-07, "loss": 0.1444, "step": 42635 }, { "epoch": 0.9238836045327498, "grad_norm": 2.3301095962524414, "learning_rate": 2.8454811956028614e-07, "loss": 0.1838, "step": 42640 }, { "epoch": 0.9239919398522306, "grad_norm": 1.5727531909942627, "learning_rate": 2.837425595371901e-07, "loss": 0.1815, "step": 42645 }, { "epoch": 0.9241002751717114, "grad_norm": 0.6607615351676941, "learning_rate": 2.829381249969676e-07, "loss": 0.0822, "step": 42650 }, { "epoch": 0.9242086104911923, "grad_norm": 0.7853750586509705, "learning_rate": 2.82134816032803e-07, "loss": 0.095, "step": 42655 }, { "epoch": 0.9243169458106731, "grad_norm": 1.2373857498168945, "learning_rate": 2.813326327377464e-07, "loss": 0.219, "step": 42660 }, { "epoch": 0.9244252811301541, "grad_norm": 0.9767295122146606, "learning_rate": 2.805315752047166e-07, "loss": 0.139, "step": 42665 }, { "epoch": 0.9245336164496349, "grad_norm": 1.3536001443862915, "learning_rate": 2.797316435265085e-07, "loss": 0.1782, "step": 42670 }, { "epoch": 0.9246419517691158, "grad_norm": 1.1839759349822998, "learning_rate": 2.78932837795779e-07, "loss": 0.1081, "step": 42675 }, { "epoch": 0.9247502870885966, "grad_norm": 1.4667226076126099, "learning_rate": 2.7813515810505955e-07, "loss": 0.1868, "step": 42680 }, { "epoch": 0.9248586224080775, "grad_norm": 1.2435344457626343, "learning_rate": 2.773386045467463e-07, "loss": 0.1118, "step": 42685 }, { "epoch": 0.9249669577275583, "grad_norm": 1.2578752040863037, "learning_rate": 2.765431772131122e-07, "loss": 0.1122, "step": 42690 }, { "epoch": 0.9250752930470392, "grad_norm": 0.582115113735199, "learning_rate": 2.7574887619629344e-07, "loss": 0.2358, "step": 42695 }, { "epoch": 0.92518362836652, "grad_norm": 1.842262625694275, "learning_rate": 2.7495570158829757e-07, "loss": 0.206, "step": 42700 }, { "epoch": 0.9252919636860009, "grad_norm": 2.0305867195129395, "learning_rate": 2.7416365348100214e-07, "loss": 0.1821, "step": 42705 }, { "epoch": 0.9254002990054818, "grad_norm": 1.1024534702301025, "learning_rate": 2.73372731966155e-07, "loss": 0.1393, "step": 42710 }, { "epoch": 0.9255086343249627, "grad_norm": 1.6153745651245117, "learning_rate": 2.7258293713537274e-07, "loss": 0.1814, "step": 42715 }, { "epoch": 0.9256169696444435, "grad_norm": 0.9054747223854065, "learning_rate": 2.7179426908013895e-07, "loss": 0.165, "step": 42720 }, { "epoch": 0.9257253049639244, "grad_norm": 2.089038610458374, "learning_rate": 2.710067278918116e-07, "loss": 0.247, "step": 42725 }, { "epoch": 0.9258336402834052, "grad_norm": 0.9653466939926147, "learning_rate": 2.702203136616144e-07, "loss": 0.152, "step": 42730 }, { "epoch": 0.925941975602886, "grad_norm": 0.8990182280540466, "learning_rate": 2.6943502648064225e-07, "loss": 0.1567, "step": 42735 }, { "epoch": 0.9260503109223669, "grad_norm": 2.683424472808838, "learning_rate": 2.6865086643985683e-07, "loss": 0.1371, "step": 42740 }, { "epoch": 0.9261586462418477, "grad_norm": 1.3657644987106323, "learning_rate": 2.6786783363009437e-07, "loss": 0.2343, "step": 42745 }, { "epoch": 0.9262669815613286, "grad_norm": 1.0589944124221802, "learning_rate": 2.670859281420557e-07, "loss": 0.126, "step": 42750 }, { "epoch": 0.9263753168808094, "grad_norm": 1.0266882181167603, "learning_rate": 2.663051500663139e-07, "loss": 0.1159, "step": 42755 }, { "epoch": 0.9264836522002904, "grad_norm": 1.403913974761963, "learning_rate": 2.6552549949330765e-07, "loss": 0.1411, "step": 42760 }, { "epoch": 0.9265919875197712, "grad_norm": 0.4317358732223511, "learning_rate": 2.6474697651335144e-07, "loss": 0.1655, "step": 42765 }, { "epoch": 0.9267003228392521, "grad_norm": 1.199630856513977, "learning_rate": 2.639695812166232e-07, "loss": 0.2343, "step": 42770 }, { "epoch": 0.9268086581587329, "grad_norm": 1.2044717073440552, "learning_rate": 2.6319331369317414e-07, "loss": 0.187, "step": 42775 }, { "epoch": 0.9269169934782138, "grad_norm": 1.479932188987732, "learning_rate": 2.6241817403292235e-07, "loss": 0.119, "step": 42780 }, { "epoch": 0.9270253287976946, "grad_norm": 1.38163161277771, "learning_rate": 2.616441623256549e-07, "loss": 0.1945, "step": 42785 }, { "epoch": 0.9271336641171755, "grad_norm": 0.695760190486908, "learning_rate": 2.6087127866103236e-07, "loss": 0.1184, "step": 42790 }, { "epoch": 0.9272419994366563, "grad_norm": 1.9453903436660767, "learning_rate": 2.6009952312857857e-07, "loss": 0.1786, "step": 42795 }, { "epoch": 0.9273503347561372, "grad_norm": 1.1997734308242798, "learning_rate": 2.5932889581769204e-07, "loss": 0.1249, "step": 42800 }, { "epoch": 0.9274586700756181, "grad_norm": 1.5813517570495605, "learning_rate": 2.5855939681763697e-07, "loss": 0.1671, "step": 42805 }, { "epoch": 0.927567005395099, "grad_norm": 0.7920128107070923, "learning_rate": 2.577910262175498e-07, "loss": 0.1249, "step": 42810 }, { "epoch": 0.9276753407145798, "grad_norm": 1.721107006072998, "learning_rate": 2.570237841064338e-07, "loss": 0.1353, "step": 42815 }, { "epoch": 0.9277836760340606, "grad_norm": 1.2557100057601929, "learning_rate": 2.562576705731623e-07, "loss": 0.1199, "step": 42820 }, { "epoch": 0.9278920113535415, "grad_norm": 0.4110565185546875, "learning_rate": 2.554926857064788e-07, "loss": 0.2602, "step": 42825 }, { "epoch": 0.9280003466730223, "grad_norm": 1.1437028646469116, "learning_rate": 2.547288295949946e-07, "loss": 0.1562, "step": 42830 }, { "epoch": 0.9281086819925032, "grad_norm": 0.5549347400665283, "learning_rate": 2.539661023271922e-07, "loss": 0.0904, "step": 42835 }, { "epoch": 0.928217017311984, "grad_norm": 0.8010765910148621, "learning_rate": 2.5320450399142104e-07, "loss": 0.1533, "step": 42840 }, { "epoch": 0.9283253526314649, "grad_norm": 1.0378738641738892, "learning_rate": 2.524440346759005e-07, "loss": 0.2175, "step": 42845 }, { "epoch": 0.9284336879509457, "grad_norm": 0.8931065797805786, "learning_rate": 2.516846944687212e-07, "loss": 0.1053, "step": 42850 }, { "epoch": 0.9285420232704267, "grad_norm": 1.0863088369369507, "learning_rate": 2.5092648345783733e-07, "loss": 0.159, "step": 42855 }, { "epoch": 0.9286503585899075, "grad_norm": 2.2261176109313965, "learning_rate": 2.5016940173108183e-07, "loss": 0.1903, "step": 42860 }, { "epoch": 0.9287586939093884, "grad_norm": 1.6381349563598633, "learning_rate": 2.49413449376148e-07, "loss": 0.1759, "step": 42865 }, { "epoch": 0.9288670292288692, "grad_norm": 1.6383405923843384, "learning_rate": 2.4865862648060347e-07, "loss": 0.1661, "step": 42870 }, { "epoch": 0.9289753645483501, "grad_norm": 1.6365203857421875, "learning_rate": 2.479049331318817e-07, "loss": 0.1571, "step": 42875 }, { "epoch": 0.9290836998678309, "grad_norm": 1.8293168544769287, "learning_rate": 2.4715236941728507e-07, "loss": 0.1462, "step": 42880 }, { "epoch": 0.9291920351873117, "grad_norm": 1.027355432510376, "learning_rate": 2.464009354239916e-07, "loss": 0.0957, "step": 42885 }, { "epoch": 0.9293003705067926, "grad_norm": 1.7252272367477417, "learning_rate": 2.456506312390383e-07, "loss": 0.1571, "step": 42890 }, { "epoch": 0.9294087058262734, "grad_norm": 0.37608063220977783, "learning_rate": 2.449014569493413e-07, "loss": 0.12, "step": 42895 }, { "epoch": 0.9295170411457543, "grad_norm": 1.5767627954483032, "learning_rate": 2.441534126416767e-07, "loss": 0.1663, "step": 42900 }, { "epoch": 0.9296253764652352, "grad_norm": 1.7582833766937256, "learning_rate": 2.4340649840269845e-07, "loss": 0.1045, "step": 42905 }, { "epoch": 0.9297337117847161, "grad_norm": 0.4693700671195984, "learning_rate": 2.42660714318923e-07, "loss": 0.1189, "step": 42910 }, { "epoch": 0.9298420471041969, "grad_norm": 1.159684419631958, "learning_rate": 2.41916060476739e-07, "loss": 0.1848, "step": 42915 }, { "epoch": 0.9299503824236778, "grad_norm": 2.275641441345215, "learning_rate": 2.4117253696240405e-07, "loss": 0.197, "step": 42920 }, { "epoch": 0.9300587177431586, "grad_norm": 0.360228568315506, "learning_rate": 2.4043014386204267e-07, "loss": 0.161, "step": 42925 }, { "epoch": 0.9301670530626395, "grad_norm": 0.23881039023399353, "learning_rate": 2.3968888126165156e-07, "loss": 0.1528, "step": 42930 }, { "epoch": 0.9302753883821203, "grad_norm": 1.378036618232727, "learning_rate": 2.389487492470932e-07, "loss": 0.1394, "step": 42935 }, { "epoch": 0.9303837237016012, "grad_norm": 0.9114748239517212, "learning_rate": 2.3820974790410234e-07, "loss": 0.2018, "step": 42940 }, { "epoch": 0.930492059021082, "grad_norm": 1.3250313997268677, "learning_rate": 2.3747187731827935e-07, "loss": 0.1422, "step": 42945 }, { "epoch": 0.930600394340563, "grad_norm": 0.8844397068023682, "learning_rate": 2.3673513757509702e-07, "loss": 0.1033, "step": 42950 }, { "epoch": 0.9307087296600438, "grad_norm": 0.6843792200088501, "learning_rate": 2.3599952875989706e-07, "loss": 0.0878, "step": 42955 }, { "epoch": 0.9308170649795247, "grad_norm": 0.9288965463638306, "learning_rate": 2.3526505095788466e-07, "loss": 0.1458, "step": 42960 }, { "epoch": 0.9309254002990055, "grad_norm": 2.3826794624328613, "learning_rate": 2.3453170425414174e-07, "loss": 0.1037, "step": 42965 }, { "epoch": 0.9310337356184863, "grad_norm": 0.926001787185669, "learning_rate": 2.3379948873361369e-07, "loss": 0.1325, "step": 42970 }, { "epoch": 0.9311420709379672, "grad_norm": 1.4347975254058838, "learning_rate": 2.3306840448111822e-07, "loss": 0.2044, "step": 42975 }, { "epoch": 0.931250406257448, "grad_norm": 1.0137280225753784, "learning_rate": 2.3233845158133872e-07, "loss": 0.1071, "step": 42980 }, { "epoch": 0.9313587415769289, "grad_norm": 0.9588260054588318, "learning_rate": 2.3160963011882975e-07, "loss": 0.1423, "step": 42985 }, { "epoch": 0.9314670768964097, "grad_norm": 1.2978038787841797, "learning_rate": 2.3088194017801492e-07, "loss": 0.1262, "step": 42990 }, { "epoch": 0.9315754122158906, "grad_norm": 1.2152503728866577, "learning_rate": 2.3015538184318675e-07, "loss": 0.1772, "step": 42995 }, { "epoch": 0.9316837475353715, "grad_norm": 1.1861553192138672, "learning_rate": 2.2942995519850353e-07, "loss": 0.2256, "step": 43000 }, { "epoch": 0.9317920828548524, "grad_norm": 1.2731608152389526, "learning_rate": 2.2870566032799802e-07, "loss": 0.1489, "step": 43005 }, { "epoch": 0.9319004181743332, "grad_norm": 1.4501885175704956, "learning_rate": 2.2798249731556754e-07, "loss": 0.1627, "step": 43010 }, { "epoch": 0.9320087534938141, "grad_norm": 1.1528295278549194, "learning_rate": 2.2726046624497954e-07, "loss": 0.1724, "step": 43015 }, { "epoch": 0.9321170888132949, "grad_norm": 1.179998517036438, "learning_rate": 2.265395671998716e-07, "loss": 0.1323, "step": 43020 }, { "epoch": 0.9322254241327758, "grad_norm": 1.723850131034851, "learning_rate": 2.2581980026374794e-07, "loss": 0.2397, "step": 43025 }, { "epoch": 0.9323337594522566, "grad_norm": 1.2637490034103394, "learning_rate": 2.2510116551998417e-07, "loss": 0.2065, "step": 43030 }, { "epoch": 0.9324420947717375, "grad_norm": 0.7875922322273254, "learning_rate": 2.2438366305182148e-07, "loss": 0.2092, "step": 43035 }, { "epoch": 0.9325504300912183, "grad_norm": 1.1045191287994385, "learning_rate": 2.2366729294237332e-07, "loss": 0.1, "step": 43040 }, { "epoch": 0.9326587654106991, "grad_norm": 1.4294092655181885, "learning_rate": 2.2295205527462006e-07, "loss": 0.1508, "step": 43045 }, { "epoch": 0.9327671007301801, "grad_norm": 2.1301214694976807, "learning_rate": 2.222379501314087e-07, "loss": 0.2701, "step": 43050 }, { "epoch": 0.9328754360496609, "grad_norm": 1.3606488704681396, "learning_rate": 2.2152497759546198e-07, "loss": 0.1437, "step": 43055 }, { "epoch": 0.9329837713691418, "grad_norm": 1.463819146156311, "learning_rate": 2.2081313774936386e-07, "loss": 0.2521, "step": 43060 }, { "epoch": 0.9330921066886226, "grad_norm": 1.7690308094024658, "learning_rate": 2.201024306755728e-07, "loss": 0.1453, "step": 43065 }, { "epoch": 0.9332004420081035, "grad_norm": 0.6924794912338257, "learning_rate": 2.1939285645641184e-07, "loss": 0.1303, "step": 43070 }, { "epoch": 0.9333087773275843, "grad_norm": 1.0465450286865234, "learning_rate": 2.1868441517407525e-07, "loss": 0.1332, "step": 43075 }, { "epoch": 0.9334171126470652, "grad_norm": 1.6679869890213013, "learning_rate": 2.1797710691062512e-07, "loss": 0.1777, "step": 43080 }, { "epoch": 0.933525447966546, "grad_norm": 1.7250455617904663, "learning_rate": 2.1727093174799153e-07, "loss": 0.1783, "step": 43085 }, { "epoch": 0.9336337832860269, "grad_norm": 1.6424816846847534, "learning_rate": 2.1656588976797677e-07, "loss": 0.1934, "step": 43090 }, { "epoch": 0.9337421186055078, "grad_norm": 2.0926084518432617, "learning_rate": 2.1586198105224554e-07, "loss": 0.1651, "step": 43095 }, { "epoch": 0.9338504539249887, "grad_norm": 2.713897705078125, "learning_rate": 2.1515920568233928e-07, "loss": 0.2975, "step": 43100 }, { "epoch": 0.9339587892444695, "grad_norm": 1.348828673362732, "learning_rate": 2.1445756373966065e-07, "loss": 0.1545, "step": 43105 }, { "epoch": 0.9340671245639504, "grad_norm": 1.0023372173309326, "learning_rate": 2.1375705530548795e-07, "loss": 0.1362, "step": 43110 }, { "epoch": 0.9341754598834312, "grad_norm": 1.399563193321228, "learning_rate": 2.1305768046096187e-07, "loss": 0.139, "step": 43115 }, { "epoch": 0.934283795202912, "grad_norm": 1.499942660331726, "learning_rate": 2.1235943928709312e-07, "loss": 0.1351, "step": 43120 }, { "epoch": 0.9343921305223929, "grad_norm": 1.2219246625900269, "learning_rate": 2.1166233186476703e-07, "loss": 0.165, "step": 43125 }, { "epoch": 0.9345004658418737, "grad_norm": 0.6429606676101685, "learning_rate": 2.10966358274729e-07, "loss": 0.1473, "step": 43130 }, { "epoch": 0.9346088011613546, "grad_norm": 1.3159234523773193, "learning_rate": 2.1027151859759897e-07, "loss": 0.1009, "step": 43135 }, { "epoch": 0.9347171364808354, "grad_norm": 1.8553005456924438, "learning_rate": 2.095778129138637e-07, "loss": 0.124, "step": 43140 }, { "epoch": 0.9348254718003164, "grad_norm": 0.9092363715171814, "learning_rate": 2.0888524130387776e-07, "loss": 0.102, "step": 43145 }, { "epoch": 0.9349338071197972, "grad_norm": 1.6994273662567139, "learning_rate": 2.0819380384786592e-07, "loss": 0.0802, "step": 43150 }, { "epoch": 0.9350421424392781, "grad_norm": 1.1798815727233887, "learning_rate": 2.0750350062592073e-07, "loss": 0.1297, "step": 43155 }, { "epoch": 0.9351504777587589, "grad_norm": 1.7940008640289307, "learning_rate": 2.068143317180038e-07, "loss": 0.1108, "step": 43160 }, { "epoch": 0.9352588130782398, "grad_norm": 2.263396739959717, "learning_rate": 2.061262972039435e-07, "loss": 0.1495, "step": 43165 }, { "epoch": 0.9353671483977206, "grad_norm": 1.5346053838729858, "learning_rate": 2.0543939716344163e-07, "loss": 0.1205, "step": 43170 }, { "epoch": 0.9354754837172015, "grad_norm": 0.8965458273887634, "learning_rate": 2.0475363167606122e-07, "loss": 0.0871, "step": 43175 }, { "epoch": 0.9355838190366823, "grad_norm": 0.9717088341712952, "learning_rate": 2.0406900082123981e-07, "loss": 0.1782, "step": 43180 }, { "epoch": 0.9356921543561632, "grad_norm": 1.0368119478225708, "learning_rate": 2.0338550467828177e-07, "loss": 0.101, "step": 43185 }, { "epoch": 0.935800489675644, "grad_norm": 0.9908826947212219, "learning_rate": 2.0270314332636042e-07, "loss": 0.165, "step": 43190 }, { "epoch": 0.935908824995125, "grad_norm": 1.6049998998641968, "learning_rate": 2.0202191684451588e-07, "loss": 0.1198, "step": 43195 }, { "epoch": 0.9360171603146058, "grad_norm": 1.6934062242507935, "learning_rate": 2.0134182531165837e-07, "loss": 0.1701, "step": 43200 }, { "epoch": 0.9361254956340866, "grad_norm": 1.068413496017456, "learning_rate": 2.0066286880656706e-07, "loss": 0.1711, "step": 43205 }, { "epoch": 0.9362338309535675, "grad_norm": 1.2927159070968628, "learning_rate": 1.9998504740788793e-07, "loss": 0.0871, "step": 43210 }, { "epoch": 0.9363421662730483, "grad_norm": 2.376262903213501, "learning_rate": 1.9930836119413822e-07, "loss": 0.1214, "step": 43215 }, { "epoch": 0.9364505015925292, "grad_norm": 0.41412273049354553, "learning_rate": 1.9863281024369853e-07, "loss": 0.0885, "step": 43220 }, { "epoch": 0.93655883691201, "grad_norm": 0.9565329551696777, "learning_rate": 1.9795839463482513e-07, "loss": 0.1102, "step": 43225 }, { "epoch": 0.9366671722314909, "grad_norm": 1.0148073434829712, "learning_rate": 1.9728511444563557e-07, "loss": 0.2054, "step": 43230 }, { "epoch": 0.9367755075509717, "grad_norm": 2.4291930198669434, "learning_rate": 1.966129697541219e-07, "loss": 0.1618, "step": 43235 }, { "epoch": 0.9368838428704527, "grad_norm": 0.9671140909194946, "learning_rate": 1.9594196063813965e-07, "loss": 0.2309, "step": 43240 }, { "epoch": 0.9369921781899335, "grad_norm": 0.7411275506019592, "learning_rate": 1.9527208717541658e-07, "loss": 0.1066, "step": 43245 }, { "epoch": 0.9371005135094144, "grad_norm": 1.2966126203536987, "learning_rate": 1.9460334944354841e-07, "loss": 0.1897, "step": 43250 }, { "epoch": 0.9372088488288952, "grad_norm": 1.4115673303604126, "learning_rate": 1.9393574751999766e-07, "loss": 0.1976, "step": 43255 }, { "epoch": 0.9373171841483761, "grad_norm": 0.6442558169364929, "learning_rate": 1.9326928148209467e-07, "loss": 0.1517, "step": 43260 }, { "epoch": 0.9374255194678569, "grad_norm": 1.354689359664917, "learning_rate": 1.9260395140704102e-07, "loss": 0.1928, "step": 43265 }, { "epoch": 0.9375338547873378, "grad_norm": 0.8850402235984802, "learning_rate": 1.9193975737190506e-07, "loss": 0.1374, "step": 43270 }, { "epoch": 0.9376421901068186, "grad_norm": 1.1151936054229736, "learning_rate": 1.9127669945362303e-07, "loss": 0.1444, "step": 43275 }, { "epoch": 0.9377505254262994, "grad_norm": 1.4957178831100464, "learning_rate": 1.906147777290013e-07, "loss": 0.1582, "step": 43280 }, { "epoch": 0.9378588607457803, "grad_norm": 1.1061277389526367, "learning_rate": 1.899539922747129e-07, "loss": 0.2529, "step": 43285 }, { "epoch": 0.9379671960652612, "grad_norm": 1.8179630041122437, "learning_rate": 1.8929434316729888e-07, "loss": 0.1568, "step": 43290 }, { "epoch": 0.9380755313847421, "grad_norm": 1.5334978103637695, "learning_rate": 1.8863583048317257e-07, "loss": 0.1159, "step": 43295 }, { "epoch": 0.9381838667042229, "grad_norm": 1.010009527206421, "learning_rate": 1.879784542986096e-07, "loss": 0.1121, "step": 43300 }, { "epoch": 0.9382922020237038, "grad_norm": 1.4335870742797852, "learning_rate": 1.873222146897602e-07, "loss": 0.1967, "step": 43305 }, { "epoch": 0.9384005373431846, "grad_norm": 1.3573626279830933, "learning_rate": 1.8666711173263685e-07, "loss": 0.1923, "step": 43310 }, { "epoch": 0.9385088726626655, "grad_norm": 1.0810356140136719, "learning_rate": 1.8601314550312554e-07, "loss": 0.1854, "step": 43315 }, { "epoch": 0.9386172079821463, "grad_norm": 1.2852609157562256, "learning_rate": 1.853603160769779e-07, "loss": 0.1458, "step": 43320 }, { "epoch": 0.9387255433016272, "grad_norm": 0.8297284245491028, "learning_rate": 1.8470862352981344e-07, "loss": 0.1116, "step": 43325 }, { "epoch": 0.938833878621108, "grad_norm": 2.2672910690307617, "learning_rate": 1.84058067937124e-07, "loss": 0.0656, "step": 43330 }, { "epoch": 0.938942213940589, "grad_norm": 1.0289174318313599, "learning_rate": 1.834086493742615e-07, "loss": 0.0695, "step": 43335 }, { "epoch": 0.9390505492600698, "grad_norm": 0.7910485863685608, "learning_rate": 1.8276036791645692e-07, "loss": 0.1544, "step": 43340 }, { "epoch": 0.9391588845795507, "grad_norm": 1.3651840686798096, "learning_rate": 1.8211322363880014e-07, "loss": 0.1361, "step": 43345 }, { "epoch": 0.9392672198990315, "grad_norm": 1.0253944396972656, "learning_rate": 1.8146721661625454e-07, "loss": 0.1188, "step": 43350 }, { "epoch": 0.9393755552185123, "grad_norm": 1.0938581228256226, "learning_rate": 1.808223469236514e-07, "loss": 0.19, "step": 43355 }, { "epoch": 0.9394838905379932, "grad_norm": 1.2672971487045288, "learning_rate": 1.8017861463568763e-07, "loss": 0.1436, "step": 43360 }, { "epoch": 0.939592225857474, "grad_norm": 1.4967312812805176, "learning_rate": 1.7953601982693025e-07, "loss": 0.1457, "step": 43365 }, { "epoch": 0.9397005611769549, "grad_norm": 1.379992961883545, "learning_rate": 1.7889456257181414e-07, "loss": 0.1957, "step": 43370 }, { "epoch": 0.9398088964964357, "grad_norm": 1.35054612159729, "learning_rate": 1.7825424294464323e-07, "loss": 0.1776, "step": 43375 }, { "epoch": 0.9399172318159166, "grad_norm": 2.171433210372925, "learning_rate": 1.7761506101958814e-07, "loss": 0.1964, "step": 43380 }, { "epoch": 0.9400255671353975, "grad_norm": 0.9446073770523071, "learning_rate": 1.7697701687068858e-07, "loss": 0.2144, "step": 43385 }, { "epoch": 0.9401339024548784, "grad_norm": 0.6239278316497803, "learning_rate": 1.7634011057185318e-07, "loss": 0.1604, "step": 43390 }, { "epoch": 0.9402422377743592, "grad_norm": 1.5463517904281616, "learning_rate": 1.757043421968585e-07, "loss": 0.2348, "step": 43395 }, { "epoch": 0.9403505730938401, "grad_norm": 1.565512776374817, "learning_rate": 1.7506971181934672e-07, "loss": 0.1767, "step": 43400 }, { "epoch": 0.9404589084133209, "grad_norm": 1.5282220840454102, "learning_rate": 1.744362195128313e-07, "loss": 0.1778, "step": 43405 }, { "epoch": 0.9405672437328018, "grad_norm": 3.081237554550171, "learning_rate": 1.738038653506946e-07, "loss": 0.1198, "step": 43410 }, { "epoch": 0.9406755790522826, "grad_norm": 1.2612919807434082, "learning_rate": 1.731726494061814e-07, "loss": 0.0918, "step": 43415 }, { "epoch": 0.9407839143717635, "grad_norm": 1.2053850889205933, "learning_rate": 1.725425717524132e-07, "loss": 0.1278, "step": 43420 }, { "epoch": 0.9408922496912443, "grad_norm": 1.427828073501587, "learning_rate": 1.7191363246237046e-07, "loss": 0.1912, "step": 43425 }, { "epoch": 0.9410005850107251, "grad_norm": 0.805196225643158, "learning_rate": 1.7128583160891056e-07, "loss": 0.1721, "step": 43430 }, { "epoch": 0.9411089203302061, "grad_norm": 0.6314041614532471, "learning_rate": 1.7065916926475079e-07, "loss": 0.209, "step": 43435 }, { "epoch": 0.941217255649687, "grad_norm": 1.102439284324646, "learning_rate": 1.7003364550248425e-07, "loss": 0.0974, "step": 43440 }, { "epoch": 0.9413255909691678, "grad_norm": 1.619410753250122, "learning_rate": 1.694092603945674e-07, "loss": 0.1692, "step": 43445 }, { "epoch": 0.9414339262886486, "grad_norm": 1.4991073608398438, "learning_rate": 1.687860140133246e-07, "loss": 0.1246, "step": 43450 }, { "epoch": 0.9415422616081295, "grad_norm": 1.2031856775283813, "learning_rate": 1.6816390643095038e-07, "loss": 0.1306, "step": 43455 }, { "epoch": 0.9416505969276103, "grad_norm": 1.053157091140747, "learning_rate": 1.6754293771950702e-07, "loss": 0.0796, "step": 43460 }, { "epoch": 0.9417589322470912, "grad_norm": 1.4875987768173218, "learning_rate": 1.669231079509248e-07, "loss": 0.1832, "step": 43465 }, { "epoch": 0.941867267566572, "grad_norm": 1.2996619939804077, "learning_rate": 1.6630441719699962e-07, "loss": 0.1825, "step": 43470 }, { "epoch": 0.9419756028860529, "grad_norm": 0.5925490856170654, "learning_rate": 1.6568686552940084e-07, "loss": 0.1034, "step": 43475 }, { "epoch": 0.9420839382055338, "grad_norm": 1.2036340236663818, "learning_rate": 1.6507045301965896e-07, "loss": 0.1917, "step": 43480 }, { "epoch": 0.9421922735250147, "grad_norm": 0.7232049703598022, "learning_rate": 1.64455179739178e-07, "loss": 0.1895, "step": 43485 }, { "epoch": 0.9423006088444955, "grad_norm": 0.9785255193710327, "learning_rate": 1.6384104575922877e-07, "loss": 0.1556, "step": 43490 }, { "epoch": 0.9424089441639764, "grad_norm": 1.4061996936798096, "learning_rate": 1.6322805115094763e-07, "loss": 0.2048, "step": 43495 }, { "epoch": 0.9425172794834572, "grad_norm": 0.6087446808815002, "learning_rate": 1.626161959853434e-07, "loss": 0.1248, "step": 43500 }, { "epoch": 0.942625614802938, "grad_norm": 1.777397632598877, "learning_rate": 1.6200548033328822e-07, "loss": 0.1692, "step": 43505 }, { "epoch": 0.9427339501224189, "grad_norm": 1.0287508964538574, "learning_rate": 1.6139590426552553e-07, "loss": 0.1733, "step": 43510 }, { "epoch": 0.9428422854418997, "grad_norm": 1.2114415168762207, "learning_rate": 1.6078746785266442e-07, "loss": 0.1271, "step": 43515 }, { "epoch": 0.9429506207613806, "grad_norm": 0.9015019536018372, "learning_rate": 1.6018017116518403e-07, "loss": 0.1184, "step": 43520 }, { "epoch": 0.9430589560808614, "grad_norm": 1.4876259565353394, "learning_rate": 1.5957401427342922e-07, "loss": 0.1747, "step": 43525 }, { "epoch": 0.9431672914003424, "grad_norm": 1.8740317821502686, "learning_rate": 1.5896899724761604e-07, "loss": 0.2381, "step": 43530 }, { "epoch": 0.9432756267198232, "grad_norm": 1.7399126291275024, "learning_rate": 1.5836512015782623e-07, "loss": 0.1225, "step": 43535 }, { "epoch": 0.9433839620393041, "grad_norm": 1.1385432481765747, "learning_rate": 1.5776238307400936e-07, "loss": 0.1083, "step": 43540 }, { "epoch": 0.9434922973587849, "grad_norm": 1.7178900241851807, "learning_rate": 1.5716078606598406e-07, "loss": 0.1536, "step": 43545 }, { "epoch": 0.9436006326782658, "grad_norm": 1.0381231307983398, "learning_rate": 1.5656032920343566e-07, "loss": 0.1623, "step": 43550 }, { "epoch": 0.9437089679977466, "grad_norm": 1.818457841873169, "learning_rate": 1.5596101255591857e-07, "loss": 0.2439, "step": 43555 }, { "epoch": 0.9438173033172275, "grad_norm": 1.7167208194732666, "learning_rate": 1.5536283619285386e-07, "loss": 0.1085, "step": 43560 }, { "epoch": 0.9439256386367083, "grad_norm": 2.0738751888275146, "learning_rate": 1.547658001835328e-07, "loss": 0.1898, "step": 43565 }, { "epoch": 0.9440339739561892, "grad_norm": 1.2091599702835083, "learning_rate": 1.5416990459711234e-07, "loss": 0.1226, "step": 43570 }, { "epoch": 0.94414230927567, "grad_norm": 1.3116352558135986, "learning_rate": 1.535751495026172e-07, "loss": 0.2091, "step": 43575 }, { "epoch": 0.944250644595151, "grad_norm": 1.1942554712295532, "learning_rate": 1.529815349689412e-07, "loss": 0.2776, "step": 43580 }, { "epoch": 0.9443589799146318, "grad_norm": 1.5891605615615845, "learning_rate": 1.5238906106484597e-07, "loss": 0.2049, "step": 43585 }, { "epoch": 0.9444673152341126, "grad_norm": 1.1275745630264282, "learning_rate": 1.517977278589622e-07, "loss": 0.1399, "step": 43590 }, { "epoch": 0.9445756505535935, "grad_norm": 1.647711157798767, "learning_rate": 1.5120753541978394e-07, "loss": 0.2204, "step": 43595 }, { "epoch": 0.9446839858730743, "grad_norm": 1.0296061038970947, "learning_rate": 1.506184838156799e-07, "loss": 0.1536, "step": 43600 }, { "epoch": 0.9447923211925552, "grad_norm": 1.7430601119995117, "learning_rate": 1.500305731148799e-07, "loss": 0.2209, "step": 43605 }, { "epoch": 0.944900656512036, "grad_norm": 1.7002081871032715, "learning_rate": 1.4944380338548504e-07, "loss": 0.2019, "step": 43610 }, { "epoch": 0.9450089918315169, "grad_norm": 1.689377784729004, "learning_rate": 1.4885817469546425e-07, "loss": 0.1821, "step": 43615 }, { "epoch": 0.9451173271509977, "grad_norm": 1.3019444942474365, "learning_rate": 1.4827368711265334e-07, "loss": 0.285, "step": 43620 }, { "epoch": 0.9452256624704787, "grad_norm": 1.3906779289245605, "learning_rate": 1.4769034070475697e-07, "loss": 0.1651, "step": 43625 }, { "epoch": 0.9453339977899595, "grad_norm": 1.6250213384628296, "learning_rate": 1.4710813553934776e-07, "loss": 0.1641, "step": 43630 }, { "epoch": 0.9454423331094404, "grad_norm": 0.6528511643409729, "learning_rate": 1.4652707168386403e-07, "loss": 0.2291, "step": 43635 }, { "epoch": 0.9455506684289212, "grad_norm": 1.1302562952041626, "learning_rate": 1.4594714920561525e-07, "loss": 0.1603, "step": 43640 }, { "epoch": 0.9456590037484021, "grad_norm": 1.8466696739196777, "learning_rate": 1.4536836817177436e-07, "loss": 0.1425, "step": 43645 }, { "epoch": 0.9457673390678829, "grad_norm": 1.8357480764389038, "learning_rate": 1.4479072864938658e-07, "loss": 0.1874, "step": 43650 }, { "epoch": 0.9458756743873638, "grad_norm": 3.2392492294311523, "learning_rate": 1.4421423070536066e-07, "loss": 0.1367, "step": 43655 }, { "epoch": 0.9459840097068446, "grad_norm": 0.9306288361549377, "learning_rate": 1.436388744064776e-07, "loss": 0.1976, "step": 43660 }, { "epoch": 0.9460923450263254, "grad_norm": 1.453568696975708, "learning_rate": 1.430646598193819e-07, "loss": 0.1387, "step": 43665 }, { "epoch": 0.9462006803458063, "grad_norm": 1.397448182106018, "learning_rate": 1.424915870105892e-07, "loss": 0.2212, "step": 43670 }, { "epoch": 0.9463090156652872, "grad_norm": 1.4770926237106323, "learning_rate": 1.4191965604648084e-07, "loss": 0.1647, "step": 43675 }, { "epoch": 0.9464173509847681, "grad_norm": 1.6948059797286987, "learning_rate": 1.4134886699330497e-07, "loss": 0.1702, "step": 43680 }, { "epoch": 0.9465256863042489, "grad_norm": 0.8835970759391785, "learning_rate": 1.407792199171809e-07, "loss": 0.1429, "step": 43685 }, { "epoch": 0.9466340216237298, "grad_norm": 1.0082647800445557, "learning_rate": 1.4021071488409366e-07, "loss": 0.1764, "step": 43690 }, { "epoch": 0.9467423569432106, "grad_norm": 1.47392737865448, "learning_rate": 1.3964335195989498e-07, "loss": 0.1709, "step": 43695 }, { "epoch": 0.9468506922626915, "grad_norm": 0.5870417952537537, "learning_rate": 1.3907713121030565e-07, "loss": 0.2098, "step": 43700 }, { "epoch": 0.9469590275821723, "grad_norm": 1.5532948970794678, "learning_rate": 1.3851205270091428e-07, "loss": 0.1527, "step": 43705 }, { "epoch": 0.9470673629016532, "grad_norm": 1.1041285991668701, "learning_rate": 1.3794811649717632e-07, "loss": 0.2102, "step": 43710 }, { "epoch": 0.947175698221134, "grad_norm": 2.019461154937744, "learning_rate": 1.3738532266441618e-07, "loss": 0.2051, "step": 43715 }, { "epoch": 0.9472840335406149, "grad_norm": 1.4465487003326416, "learning_rate": 1.368236712678239e-07, "loss": 0.1093, "step": 43720 }, { "epoch": 0.9473923688600958, "grad_norm": 1.5746835470199585, "learning_rate": 1.362631623724586e-07, "loss": 0.1751, "step": 43725 }, { "epoch": 0.9475007041795767, "grad_norm": 0.5331364870071411, "learning_rate": 1.3570379604324835e-07, "loss": 0.195, "step": 43730 }, { "epoch": 0.9476090394990575, "grad_norm": 0.8956770300865173, "learning_rate": 1.351455723449846e-07, "loss": 0.1385, "step": 43735 }, { "epoch": 0.9477173748185383, "grad_norm": 1.518133282661438, "learning_rate": 1.345884913423323e-07, "loss": 0.161, "step": 43740 }, { "epoch": 0.9478257101380192, "grad_norm": 1.2879704236984253, "learning_rate": 1.3403255309981877e-07, "loss": 0.1259, "step": 43745 }, { "epoch": 0.9479340454575, "grad_norm": 1.1000136137008667, "learning_rate": 1.3347775768184134e-07, "loss": 0.2205, "step": 43750 }, { "epoch": 0.9480423807769809, "grad_norm": 1.3093584775924683, "learning_rate": 1.329241051526653e-07, "loss": 0.1817, "step": 43755 }, { "epoch": 0.9481507160964617, "grad_norm": 1.3191790580749512, "learning_rate": 1.3237159557642376e-07, "loss": 0.1954, "step": 43760 }, { "epoch": 0.9482590514159426, "grad_norm": 1.3689589500427246, "learning_rate": 1.3182022901711444e-07, "loss": 0.1354, "step": 43765 }, { "epoch": 0.9483673867354235, "grad_norm": 0.8121963739395142, "learning_rate": 1.3127000553860737e-07, "loss": 0.1668, "step": 43770 }, { "epoch": 0.9484757220549044, "grad_norm": 0.5587966442108154, "learning_rate": 1.307209252046371e-07, "loss": 0.1186, "step": 43775 }, { "epoch": 0.9485840573743852, "grad_norm": 0.9927020072937012, "learning_rate": 1.3017298807880386e-07, "loss": 0.1216, "step": 43780 }, { "epoch": 0.9486923926938661, "grad_norm": 1.2898329496383667, "learning_rate": 1.2962619422458132e-07, "loss": 0.1565, "step": 43785 }, { "epoch": 0.9488007280133469, "grad_norm": 0.4602801203727722, "learning_rate": 1.2908054370530554e-07, "loss": 0.185, "step": 43790 }, { "epoch": 0.9489090633328278, "grad_norm": 0.8188362121582031, "learning_rate": 1.2853603658418367e-07, "loss": 0.1752, "step": 43795 }, { "epoch": 0.9490173986523086, "grad_norm": 1.8040552139282227, "learning_rate": 1.279926729242864e-07, "loss": 0.1787, "step": 43800 }, { "epoch": 0.9491257339717895, "grad_norm": 0.7359220385551453, "learning_rate": 1.274504527885556e-07, "loss": 0.1319, "step": 43805 }, { "epoch": 0.9492340692912703, "grad_norm": 1.0933090448379517, "learning_rate": 1.2690937623979882e-07, "loss": 0.1672, "step": 43810 }, { "epoch": 0.9493424046107511, "grad_norm": 1.5993503332138062, "learning_rate": 1.2636944334069145e-07, "loss": 0.122, "step": 43815 }, { "epoch": 0.9494507399302321, "grad_norm": 1.1206462383270264, "learning_rate": 1.2583065415377682e-07, "loss": 0.154, "step": 43820 }, { "epoch": 0.949559075249713, "grad_norm": 1.7961262464523315, "learning_rate": 1.2529300874146722e-07, "loss": 0.1365, "step": 43825 }, { "epoch": 0.9496674105691938, "grad_norm": 1.4116157293319702, "learning_rate": 1.2475650716603838e-07, "loss": 0.1853, "step": 43830 }, { "epoch": 0.9497757458886746, "grad_norm": 1.1677327156066895, "learning_rate": 1.2422114948963726e-07, "loss": 0.1742, "step": 43835 }, { "epoch": 0.9498840812081555, "grad_norm": 1.3834865093231201, "learning_rate": 1.236869357742776e-07, "loss": 0.2639, "step": 43840 }, { "epoch": 0.9499924165276363, "grad_norm": 1.45106840133667, "learning_rate": 1.2315386608183877e-07, "loss": 0.1892, "step": 43845 }, { "epoch": 0.9501007518471172, "grad_norm": 1.1327441930770874, "learning_rate": 1.2262194047406917e-07, "loss": 0.1391, "step": 43850 }, { "epoch": 0.950209087166598, "grad_norm": 1.6331905126571655, "learning_rate": 1.2209115901258506e-07, "loss": 0.1886, "step": 43855 }, { "epoch": 0.9503174224860789, "grad_norm": 1.531497836112976, "learning_rate": 1.215615217588695e-07, "loss": 0.1167, "step": 43860 }, { "epoch": 0.9504257578055598, "grad_norm": 1.6677300930023193, "learning_rate": 1.2103302877427114e-07, "loss": 0.1347, "step": 43865 }, { "epoch": 0.9505340931250407, "grad_norm": 0.8136223554611206, "learning_rate": 1.2050568012000997e-07, "loss": 0.1821, "step": 43870 }, { "epoch": 0.9506424284445215, "grad_norm": 1.990570306777954, "learning_rate": 1.199794758571715e-07, "loss": 0.1649, "step": 43875 }, { "epoch": 0.9507507637640024, "grad_norm": 0.3641653060913086, "learning_rate": 1.1945441604670816e-07, "loss": 0.1343, "step": 43880 }, { "epoch": 0.9508590990834832, "grad_norm": 1.4996415376663208, "learning_rate": 1.1893050074944012e-07, "loss": 0.1329, "step": 43885 }, { "epoch": 0.950967434402964, "grad_norm": 0.9364339113235474, "learning_rate": 1.1840773002605555e-07, "loss": 0.1187, "step": 43890 }, { "epoch": 0.9510757697224449, "grad_norm": 1.3680561780929565, "learning_rate": 1.178861039371082e-07, "loss": 0.1733, "step": 43895 }, { "epoch": 0.9511841050419257, "grad_norm": 1.4509711265563965, "learning_rate": 1.1736562254302197e-07, "loss": 0.1582, "step": 43900 }, { "epoch": 0.9512924403614066, "grad_norm": 0.45151716470718384, "learning_rate": 1.1684628590408641e-07, "loss": 0.1911, "step": 43905 }, { "epoch": 0.9514007756808874, "grad_norm": 2.2165541648864746, "learning_rate": 1.1632809408046009e-07, "loss": 0.1736, "step": 43910 }, { "epoch": 0.9515091110003684, "grad_norm": 1.6115399599075317, "learning_rate": 1.1581104713216496e-07, "loss": 0.1583, "step": 43915 }, { "epoch": 0.9516174463198492, "grad_norm": 0.648404598236084, "learning_rate": 1.1529514511909534e-07, "loss": 0.166, "step": 43920 }, { "epoch": 0.9517257816393301, "grad_norm": 1.4221516847610474, "learning_rate": 1.1478038810101122e-07, "loss": 0.1486, "step": 43925 }, { "epoch": 0.9518341169588109, "grad_norm": 1.2214124202728271, "learning_rate": 1.1426677613753712e-07, "loss": 0.0827, "step": 43930 }, { "epoch": 0.9519424522782918, "grad_norm": 1.8648993968963623, "learning_rate": 1.1375430928816988e-07, "loss": 0.2263, "step": 43935 }, { "epoch": 0.9520507875977726, "grad_norm": 1.1009334325790405, "learning_rate": 1.1324298761226982e-07, "loss": 0.1819, "step": 43940 }, { "epoch": 0.9521591229172535, "grad_norm": 1.1905714273452759, "learning_rate": 1.127328111690662e-07, "loss": 0.1288, "step": 43945 }, { "epoch": 0.9522674582367343, "grad_norm": 1.855762004852295, "learning_rate": 1.1222378001765399e-07, "loss": 0.1667, "step": 43950 }, { "epoch": 0.9523757935562152, "grad_norm": 1.4782732725143433, "learning_rate": 1.1171589421699935e-07, "loss": 0.1355, "step": 43955 }, { "epoch": 0.952484128875696, "grad_norm": 1.8997938632965088, "learning_rate": 1.1120915382593079e-07, "loss": 0.1187, "step": 43960 }, { "epoch": 0.952592464195177, "grad_norm": 1.5996876955032349, "learning_rate": 1.1070355890314799e-07, "loss": 0.1714, "step": 43965 }, { "epoch": 0.9527007995146578, "grad_norm": 0.6584671139717102, "learning_rate": 1.1019910950721746e-07, "loss": 0.1391, "step": 43970 }, { "epoch": 0.9528091348341386, "grad_norm": 0.6298546195030212, "learning_rate": 1.0969580569656912e-07, "loss": 0.1246, "step": 43975 }, { "epoch": 0.9529174701536195, "grad_norm": 1.199831485748291, "learning_rate": 1.0919364752950634e-07, "loss": 0.1635, "step": 43980 }, { "epoch": 0.9530258054731003, "grad_norm": 0.7752609848976135, "learning_rate": 1.086926350641948e-07, "loss": 0.1687, "step": 43985 }, { "epoch": 0.9531341407925812, "grad_norm": 0.9284036755561829, "learning_rate": 1.081927683586692e-07, "loss": 0.171, "step": 43990 }, { "epoch": 0.953242476112062, "grad_norm": 1.1842970848083496, "learning_rate": 1.0769404747083211e-07, "loss": 0.2263, "step": 43995 }, { "epoch": 0.9533508114315429, "grad_norm": 2.1651127338409424, "learning_rate": 1.07196472458454e-07, "loss": 0.153, "step": 44000 }, { "epoch": 0.9534591467510237, "grad_norm": 0.9702625870704651, "learning_rate": 1.0670004337916873e-07, "loss": 0.1524, "step": 44005 }, { "epoch": 0.9535674820705047, "grad_norm": 1.5687720775604248, "learning_rate": 1.0620476029048254e-07, "loss": 0.0813, "step": 44010 }, { "epoch": 0.9536758173899855, "grad_norm": 0.4022093117237091, "learning_rate": 1.0571062324976621e-07, "loss": 0.0883, "step": 44015 }, { "epoch": 0.9537841527094664, "grad_norm": 1.8727179765701294, "learning_rate": 1.052176323142573e-07, "loss": 0.1136, "step": 44020 }, { "epoch": 0.9538924880289472, "grad_norm": 1.2210032939910889, "learning_rate": 1.0472578754106234e-07, "loss": 0.2026, "step": 44025 }, { "epoch": 0.9540008233484281, "grad_norm": 1.8727905750274658, "learning_rate": 1.0423508898715351e-07, "loss": 0.1737, "step": 44030 }, { "epoch": 0.9541091586679089, "grad_norm": 0.7250543832778931, "learning_rate": 1.0374553670937093e-07, "loss": 0.1442, "step": 44035 }, { "epoch": 0.9542174939873898, "grad_norm": 1.6700884103775024, "learning_rate": 1.0325713076442257e-07, "loss": 0.1601, "step": 44040 }, { "epoch": 0.9543258293068706, "grad_norm": 1.7003123760223389, "learning_rate": 1.0276987120888204e-07, "loss": 0.1793, "step": 44045 }, { "epoch": 0.9544341646263514, "grad_norm": 1.8753544092178345, "learning_rate": 1.022837580991909e-07, "loss": 0.1974, "step": 44050 }, { "epoch": 0.9545424999458323, "grad_norm": 0.40526095032691956, "learning_rate": 1.017987914916596e-07, "loss": 0.1807, "step": 44055 }, { "epoch": 0.9546508352653132, "grad_norm": 1.0804520845413208, "learning_rate": 1.0131497144246216e-07, "loss": 0.1402, "step": 44060 }, { "epoch": 0.9547591705847941, "grad_norm": 1.4568793773651123, "learning_rate": 1.008322980076426e-07, "loss": 0.151, "step": 44065 }, { "epoch": 0.9548675059042749, "grad_norm": 1.2836936712265015, "learning_rate": 1.0035077124311288e-07, "loss": 0.1327, "step": 44070 }, { "epoch": 0.9549758412237558, "grad_norm": 1.5240265130996704, "learning_rate": 9.987039120464947e-08, "loss": 0.2295, "step": 44075 }, { "epoch": 0.9550841765432366, "grad_norm": 1.1435283422470093, "learning_rate": 9.939115794789789e-08, "loss": 0.2032, "step": 44080 }, { "epoch": 0.9551925118627175, "grad_norm": 1.7948873043060303, "learning_rate": 9.891307152836927e-08, "loss": 0.1479, "step": 44085 }, { "epoch": 0.9553008471821983, "grad_norm": 0.8747650980949402, "learning_rate": 9.843613200144153e-08, "loss": 0.1267, "step": 44090 }, { "epoch": 0.9554091825016792, "grad_norm": 1.634716510772705, "learning_rate": 9.796033942236493e-08, "loss": 0.2089, "step": 44095 }, { "epoch": 0.95551751782116, "grad_norm": 1.901173710823059, "learning_rate": 9.748569384624873e-08, "loss": 0.1596, "step": 44100 }, { "epoch": 0.9556258531406409, "grad_norm": 1.5329508781433105, "learning_rate": 9.70121953280756e-08, "loss": 0.1381, "step": 44105 }, { "epoch": 0.9557341884601218, "grad_norm": 2.0179662704467773, "learning_rate": 9.653984392269167e-08, "loss": 0.1269, "step": 44110 }, { "epoch": 0.9558425237796027, "grad_norm": 1.0797120332717896, "learning_rate": 9.606863968481539e-08, "loss": 0.0833, "step": 44115 }, { "epoch": 0.9559508590990835, "grad_norm": 1.0978820323944092, "learning_rate": 9.559858266902533e-08, "loss": 0.2156, "step": 44120 }, { "epoch": 0.9560591944185644, "grad_norm": 1.7006994485855103, "learning_rate": 9.512967292977126e-08, "loss": 0.1482, "step": 44125 }, { "epoch": 0.9561675297380452, "grad_norm": 1.5693354606628418, "learning_rate": 9.466191052136975e-08, "loss": 0.1549, "step": 44130 }, { "epoch": 0.956275865057526, "grad_norm": 1.587990641593933, "learning_rate": 9.41952954980041e-08, "loss": 0.2179, "step": 44135 }, { "epoch": 0.9563842003770069, "grad_norm": 1.1846349239349365, "learning_rate": 9.372982791372443e-08, "loss": 0.1805, "step": 44140 }, { "epoch": 0.9564925356964877, "grad_norm": 0.8903842568397522, "learning_rate": 9.326550782244759e-08, "loss": 0.1487, "step": 44145 }, { "epoch": 0.9566008710159686, "grad_norm": 1.0930947065353394, "learning_rate": 9.280233527795946e-08, "loss": 0.1907, "step": 44150 }, { "epoch": 0.9567092063354495, "grad_norm": 1.9386721849441528, "learning_rate": 9.234031033391155e-08, "loss": 0.1934, "step": 44155 }, { "epoch": 0.9568175416549304, "grad_norm": 1.2834382057189941, "learning_rate": 9.187943304382107e-08, "loss": 0.1735, "step": 44160 }, { "epoch": 0.9569258769744112, "grad_norm": 1.429553747177124, "learning_rate": 9.141970346107532e-08, "loss": 0.1133, "step": 44165 }, { "epoch": 0.9570342122938921, "grad_norm": 0.9360944628715515, "learning_rate": 9.0961121638925e-08, "loss": 0.0909, "step": 44170 }, { "epoch": 0.9571425476133729, "grad_norm": 1.3856279850006104, "learning_rate": 9.050368763049323e-08, "loss": 0.1736, "step": 44175 }, { "epoch": 0.9572508829328538, "grad_norm": 1.3173335790634155, "learning_rate": 9.004740148876311e-08, "loss": 0.2032, "step": 44180 }, { "epoch": 0.9573592182523346, "grad_norm": 1.439157247543335, "learning_rate": 8.959226326659242e-08, "loss": 0.1833, "step": 44185 }, { "epoch": 0.9574675535718155, "grad_norm": 2.2129664421081543, "learning_rate": 8.913827301669897e-08, "loss": 0.1503, "step": 44190 }, { "epoch": 0.9575758888912963, "grad_norm": 0.7840569019317627, "learning_rate": 8.86854307916718e-08, "loss": 0.1356, "step": 44195 }, { "epoch": 0.9576842242107771, "grad_norm": 1.437760591506958, "learning_rate": 8.823373664396673e-08, "loss": 0.164, "step": 44200 }, { "epoch": 0.9577925595302581, "grad_norm": 1.7245128154754639, "learning_rate": 8.778319062590413e-08, "loss": 0.1597, "step": 44205 }, { "epoch": 0.957900894849739, "grad_norm": 1.9122616052627563, "learning_rate": 8.733379278967446e-08, "loss": 0.1762, "step": 44210 }, { "epoch": 0.9580092301692198, "grad_norm": 1.5124447345733643, "learning_rate": 8.688554318733278e-08, "loss": 0.1814, "step": 44215 }, { "epoch": 0.9581175654887006, "grad_norm": 1.3359229564666748, "learning_rate": 8.643844187080308e-08, "loss": 0.2028, "step": 44220 }, { "epoch": 0.9582259008081815, "grad_norm": 1.2386949062347412, "learning_rate": 8.599248889187395e-08, "loss": 0.1646, "step": 44225 }, { "epoch": 0.9583342361276623, "grad_norm": 1.9303134679794312, "learning_rate": 8.554768430220406e-08, "loss": 0.1843, "step": 44230 }, { "epoch": 0.9584425714471432, "grad_norm": 1.2141337394714355, "learning_rate": 8.510402815331553e-08, "loss": 0.1936, "step": 44235 }, { "epoch": 0.958550906766624, "grad_norm": 0.4251849055290222, "learning_rate": 8.46615204966017e-08, "loss": 0.1093, "step": 44240 }, { "epoch": 0.9586592420861049, "grad_norm": 1.126591682434082, "learning_rate": 8.422016138331712e-08, "loss": 0.1322, "step": 44245 }, { "epoch": 0.9587675774055857, "grad_norm": 1.3725786209106445, "learning_rate": 8.377995086458979e-08, "loss": 0.1474, "step": 44250 }, { "epoch": 0.9588759127250667, "grad_norm": 1.1335574388504028, "learning_rate": 8.334088899141001e-08, "loss": 0.1865, "step": 44255 }, { "epoch": 0.9589842480445475, "grad_norm": 1.012367606163025, "learning_rate": 8.290297581463603e-08, "loss": 0.1124, "step": 44260 }, { "epoch": 0.9590925833640284, "grad_norm": 1.56771981716156, "learning_rate": 8.246621138499499e-08, "loss": 0.1929, "step": 44265 }, { "epoch": 0.9592009186835092, "grad_norm": 1.5401760339736938, "learning_rate": 8.203059575307759e-08, "loss": 0.1772, "step": 44270 }, { "epoch": 0.95930925400299, "grad_norm": 1.4186915159225464, "learning_rate": 8.159612896934566e-08, "loss": 0.1631, "step": 44275 }, { "epoch": 0.9594175893224709, "grad_norm": 0.6618066430091858, "learning_rate": 8.116281108412338e-08, "loss": 0.1838, "step": 44280 }, { "epoch": 0.9595259246419517, "grad_norm": 2.0849108695983887, "learning_rate": 8.073064214760618e-08, "loss": 0.1368, "step": 44285 }, { "epoch": 0.9596342599614326, "grad_norm": 0.7249450087547302, "learning_rate": 8.029962220985399e-08, "loss": 0.1459, "step": 44290 }, { "epoch": 0.9597425952809134, "grad_norm": 1.7088871002197266, "learning_rate": 7.986975132079244e-08, "loss": 0.1264, "step": 44295 }, { "epoch": 0.9598509306003944, "grad_norm": 1.1213241815567017, "learning_rate": 7.944102953021616e-08, "loss": 0.2037, "step": 44300 }, { "epoch": 0.9599592659198752, "grad_norm": 0.47478657960891724, "learning_rate": 7.901345688778761e-08, "loss": 0.1377, "step": 44305 }, { "epoch": 0.9600676012393561, "grad_norm": 1.0738047361373901, "learning_rate": 7.858703344303386e-08, "loss": 0.1969, "step": 44310 }, { "epoch": 0.9601759365588369, "grad_norm": 1.559961199760437, "learning_rate": 7.816175924534874e-08, "loss": 0.1449, "step": 44315 }, { "epoch": 0.9602842718783178, "grad_norm": 1.6760510206222534, "learning_rate": 7.773763434399506e-08, "loss": 0.2202, "step": 44320 }, { "epoch": 0.9603926071977986, "grad_norm": 1.776748538017273, "learning_rate": 7.73146587881013e-08, "loss": 0.1577, "step": 44325 }, { "epoch": 0.9605009425172795, "grad_norm": 1.703843116760254, "learning_rate": 7.689283262666159e-08, "loss": 0.0978, "step": 44330 }, { "epoch": 0.9606092778367603, "grad_norm": 2.048757553100586, "learning_rate": 7.647215590854018e-08, "loss": 0.2222, "step": 44335 }, { "epoch": 0.9607176131562412, "grad_norm": 2.769435405731201, "learning_rate": 7.605262868246477e-08, "loss": 0.2329, "step": 44340 }, { "epoch": 0.960825948475722, "grad_norm": 1.1656633615493774, "learning_rate": 7.563425099703092e-08, "loss": 0.095, "step": 44345 }, { "epoch": 0.960934283795203, "grad_norm": 2.705379009246826, "learning_rate": 7.52170229007021e-08, "loss": 0.2033, "step": 44350 }, { "epoch": 0.9610426191146838, "grad_norm": 1.6004393100738525, "learning_rate": 7.480094444180853e-08, "loss": 0.2463, "step": 44355 }, { "epoch": 0.9611509544341647, "grad_norm": 1.668735384941101, "learning_rate": 7.438601566854609e-08, "loss": 0.2035, "step": 44360 }, { "epoch": 0.9612592897536455, "grad_norm": 1.4020702838897705, "learning_rate": 7.397223662897856e-08, "loss": 0.1333, "step": 44365 }, { "epoch": 0.9613676250731263, "grad_norm": 1.2765718698501587, "learning_rate": 7.35596073710343e-08, "loss": 0.1146, "step": 44370 }, { "epoch": 0.9614759603926072, "grad_norm": 1.140994668006897, "learning_rate": 7.31481279425128e-08, "loss": 0.163, "step": 44375 }, { "epoch": 0.961584295712088, "grad_norm": 1.6012240648269653, "learning_rate": 7.273779839107598e-08, "loss": 0.212, "step": 44380 }, { "epoch": 0.9616926310315689, "grad_norm": 0.7992435693740845, "learning_rate": 7.232861876425357e-08, "loss": 0.1152, "step": 44385 }, { "epoch": 0.9618009663510497, "grad_norm": 1.1774859428405762, "learning_rate": 7.192058910944544e-08, "loss": 0.0934, "step": 44390 }, { "epoch": 0.9619093016705307, "grad_norm": 1.7124314308166504, "learning_rate": 7.151370947391379e-08, "loss": 0.1243, "step": 44395 }, { "epoch": 0.9620176369900115, "grad_norm": 1.2966729402542114, "learning_rate": 7.11079799047909e-08, "loss": 0.1486, "step": 44400 }, { "epoch": 0.9621259723094924, "grad_norm": 1.2605341672897339, "learning_rate": 7.070340044907364e-08, "loss": 0.1363, "step": 44405 }, { "epoch": 0.9622343076289732, "grad_norm": 1.5068501234054565, "learning_rate": 7.029997115362564e-08, "loss": 0.1151, "step": 44410 }, { "epoch": 0.9623426429484541, "grad_norm": 1.4005094766616821, "learning_rate": 6.98976920651806e-08, "loss": 0.1547, "step": 44415 }, { "epoch": 0.9624509782679349, "grad_norm": 1.1533464193344116, "learning_rate": 6.949656323033349e-08, "loss": 0.1453, "step": 44420 }, { "epoch": 0.9625593135874158, "grad_norm": 1.538063645362854, "learning_rate": 6.909658469555159e-08, "loss": 0.1669, "step": 44425 }, { "epoch": 0.9626676489068966, "grad_norm": 1.398741364479065, "learning_rate": 6.869775650716448e-08, "loss": 0.1581, "step": 44430 }, { "epoch": 0.9627759842263774, "grad_norm": 0.6132838129997253, "learning_rate": 6.830007871137301e-08, "loss": 0.1179, "step": 44435 }, { "epoch": 0.9628843195458583, "grad_norm": 0.6404792666435242, "learning_rate": 6.790355135423921e-08, "loss": 0.1152, "step": 44440 }, { "epoch": 0.9629926548653392, "grad_norm": 1.6700263023376465, "learning_rate": 6.750817448169633e-08, "loss": 0.2582, "step": 44445 }, { "epoch": 0.9631009901848201, "grad_norm": 2.4319441318511963, "learning_rate": 6.71139481395433e-08, "loss": 0.1702, "step": 44450 }, { "epoch": 0.9632093255043009, "grad_norm": 1.233353853225708, "learning_rate": 6.67208723734436e-08, "loss": 0.1238, "step": 44455 }, { "epoch": 0.9633176608237818, "grad_norm": 1.5029178857803345, "learning_rate": 6.63289472289319e-08, "loss": 0.1831, "step": 44460 }, { "epoch": 0.9634259961432626, "grad_norm": 0.9435096383094788, "learning_rate": 6.593817275140413e-08, "loss": 0.1372, "step": 44465 }, { "epoch": 0.9635343314627435, "grad_norm": 1.074836015701294, "learning_rate": 6.554854898612739e-08, "loss": 0.1506, "step": 44470 }, { "epoch": 0.9636426667822243, "grad_norm": 1.438956618309021, "learning_rate": 6.516007597823338e-08, "loss": 0.1147, "step": 44475 }, { "epoch": 0.9637510021017052, "grad_norm": 1.5833901166915894, "learning_rate": 6.477275377272052e-08, "loss": 0.2352, "step": 44480 }, { "epoch": 0.963859337421186, "grad_norm": 0.46255412697792053, "learning_rate": 6.438658241445405e-08, "loss": 0.1476, "step": 44485 }, { "epoch": 0.9639676727406669, "grad_norm": 2.23573637008667, "learning_rate": 6.400156194816598e-08, "loss": 0.1805, "step": 44490 }, { "epoch": 0.9640760080601478, "grad_norm": 0.9403710961341858, "learning_rate": 6.361769241845617e-08, "loss": 0.1859, "step": 44495 }, { "epoch": 0.9641843433796287, "grad_norm": 1.5623010396957397, "learning_rate": 6.323497386979016e-08, "loss": 0.102, "step": 44500 }, { "epoch": 0.9642926786991095, "grad_norm": 0.7677386403083801, "learning_rate": 6.285340634650028e-08, "loss": 0.1372, "step": 44505 }, { "epoch": 0.9644010140185904, "grad_norm": 1.8945914506912231, "learning_rate": 6.247298989278339e-08, "loss": 0.1456, "step": 44510 }, { "epoch": 0.9645093493380712, "grad_norm": 1.4737621545791626, "learning_rate": 6.209372455270757e-08, "loss": 0.1753, "step": 44515 }, { "epoch": 0.964617684657552, "grad_norm": 0.6341220140457153, "learning_rate": 6.171561037020324e-08, "loss": 0.1192, "step": 44520 }, { "epoch": 0.9647260199770329, "grad_norm": 1.5347706079483032, "learning_rate": 6.133864738906981e-08, "loss": 0.2199, "step": 44525 }, { "epoch": 0.9648343552965137, "grad_norm": 1.7794137001037598, "learning_rate": 6.096283565297345e-08, "loss": 0.1465, "step": 44530 }, { "epoch": 0.9649426906159946, "grad_norm": 1.5510269403457642, "learning_rate": 6.058817520544601e-08, "loss": 0.1368, "step": 44535 }, { "epoch": 0.9650510259354755, "grad_norm": 0.5853563547134399, "learning_rate": 6.021466608988503e-08, "loss": 0.106, "step": 44540 }, { "epoch": 0.9651593612549564, "grad_norm": 0.8276887536048889, "learning_rate": 5.984230834955806e-08, "loss": 0.1019, "step": 44545 }, { "epoch": 0.9652676965744372, "grad_norm": 0.4145447909832001, "learning_rate": 5.9471102027596204e-08, "loss": 0.1373, "step": 44550 }, { "epoch": 0.9653760318939181, "grad_norm": 2.040125608444214, "learning_rate": 5.910104716699727e-08, "loss": 0.2374, "step": 44555 }, { "epoch": 0.9654843672133989, "grad_norm": 1.4157971143722534, "learning_rate": 5.873214381062808e-08, "loss": 0.209, "step": 44560 }, { "epoch": 0.9655927025328798, "grad_norm": 1.4604663848876953, "learning_rate": 5.8364392001218905e-08, "loss": 0.1499, "step": 44565 }, { "epoch": 0.9657010378523606, "grad_norm": 2.039085865020752, "learning_rate": 5.7997791781370103e-08, "loss": 0.085, "step": 44570 }, { "epoch": 0.9658093731718415, "grad_norm": 0.8187650442123413, "learning_rate": 5.763234319354549e-08, "loss": 0.1624, "step": 44575 }, { "epoch": 0.9659177084913223, "grad_norm": 0.755515456199646, "learning_rate": 5.726804628007787e-08, "loss": 0.1995, "step": 44580 }, { "epoch": 0.9660260438108031, "grad_norm": 1.5172739028930664, "learning_rate": 5.69049010831646e-08, "loss": 0.1852, "step": 44585 }, { "epoch": 0.9661343791302841, "grad_norm": 1.211484432220459, "learning_rate": 5.6542907644869806e-08, "loss": 0.1325, "step": 44590 }, { "epoch": 0.966242714449765, "grad_norm": 1.4397777318954468, "learning_rate": 5.6182066007127724e-08, "loss": 0.2297, "step": 44595 }, { "epoch": 0.9663510497692458, "grad_norm": 1.470511555671692, "learning_rate": 5.582237621173492e-08, "loss": 0.2028, "step": 44600 }, { "epoch": 0.9664593850887266, "grad_norm": 0.7565996050834656, "learning_rate": 5.5463838300355846e-08, "loss": 0.1469, "step": 44605 }, { "epoch": 0.9665677204082075, "grad_norm": 1.5955318212509155, "learning_rate": 5.510645231452172e-08, "loss": 0.1719, "step": 44610 }, { "epoch": 0.9666760557276883, "grad_norm": 1.9524255990982056, "learning_rate": 5.475021829563054e-08, "loss": 0.1617, "step": 44615 }, { "epoch": 0.9667843910471692, "grad_norm": 1.151757836341858, "learning_rate": 5.439513628494708e-08, "loss": 0.2111, "step": 44620 }, { "epoch": 0.96689272636665, "grad_norm": 0.7319628000259399, "learning_rate": 5.404120632360177e-08, "loss": 0.0902, "step": 44625 }, { "epoch": 0.9670010616861309, "grad_norm": 2.2343170642852783, "learning_rate": 5.3688428452591814e-08, "loss": 0.2656, "step": 44630 }, { "epoch": 0.9671093970056117, "grad_norm": 1.0610679388046265, "learning_rate": 5.33368027127823e-08, "loss": 0.1586, "step": 44635 }, { "epoch": 0.9672177323250927, "grad_norm": 1.713315486907959, "learning_rate": 5.298632914490176e-08, "loss": 0.1714, "step": 44640 }, { "epoch": 0.9673260676445735, "grad_norm": 3.0401082038879395, "learning_rate": 5.263700778955105e-08, "loss": 0.2089, "step": 44645 }, { "epoch": 0.9674344029640544, "grad_norm": 1.3819994926452637, "learning_rate": 5.2288838687190035e-08, "loss": 0.2264, "step": 44650 }, { "epoch": 0.9675427382835352, "grad_norm": 1.8363611698150635, "learning_rate": 5.194182187815089e-08, "loss": 0.1186, "step": 44655 }, { "epoch": 0.967651073603016, "grad_norm": 0.9278543591499329, "learning_rate": 5.159595740262924e-08, "loss": 0.1408, "step": 44660 }, { "epoch": 0.9677594089224969, "grad_norm": 1.467279076576233, "learning_rate": 5.12512453006897e-08, "loss": 0.1563, "step": 44665 }, { "epoch": 0.9678677442419777, "grad_norm": 1.1801611185073853, "learning_rate": 5.090768561226034e-08, "loss": 0.2004, "step": 44670 }, { "epoch": 0.9679760795614586, "grad_norm": 1.4921512603759766, "learning_rate": 5.05652783771382e-08, "loss": 0.1242, "step": 44675 }, { "epoch": 0.9680844148809394, "grad_norm": 0.8658315539360046, "learning_rate": 5.022402363498602e-08, "loss": 0.0939, "step": 44680 }, { "epoch": 0.9681927502004204, "grad_norm": 0.8814326524734497, "learning_rate": 4.988392142533327e-08, "loss": 0.1469, "step": 44685 }, { "epoch": 0.9683010855199012, "grad_norm": 1.4151893854141235, "learning_rate": 4.954497178757622e-08, "loss": 0.2057, "step": 44690 }, { "epoch": 0.9684094208393821, "grad_norm": 1.2584545612335205, "learning_rate": 4.9207174760974586e-08, "loss": 0.1756, "step": 44695 }, { "epoch": 0.9685177561588629, "grad_norm": 1.7272560596466064, "learning_rate": 4.887053038466039e-08, "loss": 0.225, "step": 44700 }, { "epoch": 0.9686260914783438, "grad_norm": 1.0098336935043335, "learning_rate": 4.853503869762688e-08, "loss": 0.183, "step": 44705 }, { "epoch": 0.9687344267978246, "grad_norm": 1.6153161525726318, "learning_rate": 4.8200699738736313e-08, "loss": 0.1474, "step": 44710 }, { "epoch": 0.9688427621173055, "grad_norm": 1.2964653968811035, "learning_rate": 4.786751354671659e-08, "loss": 0.1816, "step": 44715 }, { "epoch": 0.9689510974367863, "grad_norm": 1.3763097524642944, "learning_rate": 4.753548016016352e-08, "loss": 0.1138, "step": 44720 }, { "epoch": 0.9690594327562672, "grad_norm": 1.3997366428375244, "learning_rate": 4.7204599617535206e-08, "loss": 0.174, "step": 44725 }, { "epoch": 0.969167768075748, "grad_norm": 1.263257622718811, "learning_rate": 4.687487195716323e-08, "loss": 0.1203, "step": 44730 }, { "epoch": 0.969276103395229, "grad_norm": 0.6375719308853149, "learning_rate": 4.6546297217238134e-08, "loss": 0.1249, "step": 44735 }, { "epoch": 0.9693844387147098, "grad_norm": 1.4268581867218018, "learning_rate": 4.621887543582171e-08, "loss": 0.1796, "step": 44740 }, { "epoch": 0.9694927740341907, "grad_norm": 1.3004496097564697, "learning_rate": 4.589260665084139e-08, "loss": 0.1252, "step": 44745 }, { "epoch": 0.9696011093536715, "grad_norm": 1.782731533050537, "learning_rate": 4.5567490900090275e-08, "loss": 0.1351, "step": 44750 }, { "epoch": 0.9697094446731523, "grad_norm": 1.1212961673736572, "learning_rate": 4.524352822122824e-08, "loss": 0.1893, "step": 44755 }, { "epoch": 0.9698177799926332, "grad_norm": 1.2384923696517944, "learning_rate": 4.4920718651779715e-08, "loss": 0.2255, "step": 44760 }, { "epoch": 0.969926115312114, "grad_norm": 0.994519054889679, "learning_rate": 4.4599062229140347e-08, "loss": 0.1182, "step": 44765 }, { "epoch": 0.9700344506315949, "grad_norm": 1.1614195108413696, "learning_rate": 4.427855899056699e-08, "loss": 0.2423, "step": 44770 }, { "epoch": 0.9701427859510757, "grad_norm": 1.178641676902771, "learning_rate": 4.395920897318662e-08, "loss": 0.1821, "step": 44775 }, { "epoch": 0.9702511212705567, "grad_norm": 1.2552247047424316, "learning_rate": 4.364101221398964e-08, "loss": 0.1171, "step": 44780 }, { "epoch": 0.9703594565900375, "grad_norm": 0.8147911429405212, "learning_rate": 4.332396874983547e-08, "loss": 0.1705, "step": 44785 }, { "epoch": 0.9704677919095184, "grad_norm": 0.8457643389701843, "learning_rate": 4.3008078617448044e-08, "loss": 0.1053, "step": 44790 }, { "epoch": 0.9705761272289992, "grad_norm": 1.6356498003005981, "learning_rate": 4.269334185341922e-08, "loss": 0.1204, "step": 44795 }, { "epoch": 0.9706844625484801, "grad_norm": 0.9345842003822327, "learning_rate": 4.2379758494207615e-08, "loss": 0.1864, "step": 44800 }, { "epoch": 0.9707927978679609, "grad_norm": 1.9930980205535889, "learning_rate": 4.2067328576134156e-08, "loss": 0.1659, "step": 44805 }, { "epoch": 0.9709011331874418, "grad_norm": 0.8595021963119507, "learning_rate": 4.175605213539102e-08, "loss": 0.1886, "step": 44810 }, { "epoch": 0.9710094685069226, "grad_norm": 1.1661272048950195, "learning_rate": 4.144592920803603e-08, "loss": 0.1192, "step": 44815 }, { "epoch": 0.9711178038264034, "grad_norm": 1.5468356609344482, "learning_rate": 4.113695982998933e-08, "loss": 0.1684, "step": 44820 }, { "epoch": 0.9712261391458843, "grad_norm": 1.681489109992981, "learning_rate": 4.082914403704341e-08, "loss": 0.2443, "step": 44825 }, { "epoch": 0.9713344744653652, "grad_norm": 1.0485265254974365, "learning_rate": 4.052248186485197e-08, "loss": 0.1026, "step": 44830 }, { "epoch": 0.9714428097848461, "grad_norm": 1.4180508852005005, "learning_rate": 4.021697334893881e-08, "loss": 0.1147, "step": 44835 }, { "epoch": 0.9715511451043269, "grad_norm": 0.8200636506080627, "learning_rate": 3.9912618524691196e-08, "loss": 0.1019, "step": 44840 }, { "epoch": 0.9716594804238078, "grad_norm": 0.8762860894203186, "learning_rate": 3.960941742736424e-08, "loss": 0.1656, "step": 44845 }, { "epoch": 0.9717678157432886, "grad_norm": 1.7127768993377686, "learning_rate": 3.9307370092080966e-08, "loss": 0.2083, "step": 44850 }, { "epoch": 0.9718761510627695, "grad_norm": 1.5600709915161133, "learning_rate": 3.900647655382561e-08, "loss": 0.1548, "step": 44855 }, { "epoch": 0.9719844863822503, "grad_norm": 1.922255277633667, "learning_rate": 3.870673684745585e-08, "loss": 0.1967, "step": 44860 }, { "epoch": 0.9720928217017312, "grad_norm": 1.5438276529312134, "learning_rate": 3.840815100769058e-08, "loss": 0.1507, "step": 44865 }, { "epoch": 0.972201157021212, "grad_norm": 0.6553032994270325, "learning_rate": 3.8110719069115456e-08, "loss": 0.1158, "step": 44870 }, { "epoch": 0.9723094923406929, "grad_norm": 0.6204732060432434, "learning_rate": 3.7814441066185145e-08, "loss": 0.162, "step": 44875 }, { "epoch": 0.9724178276601738, "grad_norm": 1.1457489728927612, "learning_rate": 3.751931703321776e-08, "loss": 0.179, "step": 44880 }, { "epoch": 0.9725261629796547, "grad_norm": 1.5562013387680054, "learning_rate": 3.7225347004400395e-08, "loss": 0.1383, "step": 44885 }, { "epoch": 0.9726344982991355, "grad_norm": 1.0748251676559448, "learning_rate": 3.693253101378358e-08, "loss": 0.1303, "step": 44890 }, { "epoch": 0.9727428336186164, "grad_norm": 0.8403367400169373, "learning_rate": 3.6640869095285745e-08, "loss": 0.1347, "step": 44895 }, { "epoch": 0.9728511689380972, "grad_norm": 1.551202416419983, "learning_rate": 3.63503612826932e-08, "loss": 0.175, "step": 44900 }, { "epoch": 0.972959504257578, "grad_norm": 1.6101913452148438, "learning_rate": 3.6061007609655696e-08, "loss": 0.1827, "step": 44905 }, { "epoch": 0.9730678395770589, "grad_norm": 0.2804703414440155, "learning_rate": 3.577280810968975e-08, "loss": 0.1097, "step": 44910 }, { "epoch": 0.9731761748965397, "grad_norm": 2.3674957752227783, "learning_rate": 3.548576281618088e-08, "loss": 0.282, "step": 44915 }, { "epoch": 0.9732845102160206, "grad_norm": 1.2170796394348145, "learning_rate": 3.5199871762376934e-08, "loss": 0.1694, "step": 44920 }, { "epoch": 0.9733928455355015, "grad_norm": 1.1027132272720337, "learning_rate": 3.491513498139587e-08, "loss": 0.1254, "step": 44925 }, { "epoch": 0.9735011808549824, "grad_norm": 1.5620410442352295, "learning_rate": 3.463155250621908e-08, "loss": 0.0949, "step": 44930 }, { "epoch": 0.9736095161744632, "grad_norm": 1.1044520139694214, "learning_rate": 3.434912436969584e-08, "loss": 0.1835, "step": 44935 }, { "epoch": 0.9737178514939441, "grad_norm": 1.2022240161895752, "learning_rate": 3.406785060454221e-08, "loss": 0.0803, "step": 44940 }, { "epoch": 0.9738261868134249, "grad_norm": 1.4658846855163574, "learning_rate": 3.3787731243336566e-08, "loss": 0.1335, "step": 44945 }, { "epoch": 0.9739345221329058, "grad_norm": 1.9425572156906128, "learning_rate": 3.3508766318529616e-08, "loss": 0.1108, "step": 44950 }, { "epoch": 0.9740428574523866, "grad_norm": 1.3200472593307495, "learning_rate": 3.323095586243441e-08, "loss": 0.1737, "step": 44955 }, { "epoch": 0.9741511927718675, "grad_norm": 1.0133479833602905, "learning_rate": 3.2954299907229644e-08, "loss": 0.1949, "step": 44960 }, { "epoch": 0.9742595280913483, "grad_norm": 1.4962102174758911, "learning_rate": 3.267879848496303e-08, "loss": 0.0894, "step": 44965 }, { "epoch": 0.9743678634108291, "grad_norm": 1.5658788681030273, "learning_rate": 3.240445162754791e-08, "loss": 0.1044, "step": 44970 }, { "epoch": 0.9744761987303101, "grad_norm": 1.2826144695281982, "learning_rate": 3.21312593667622e-08, "loss": 0.1971, "step": 44975 }, { "epoch": 0.974584534049791, "grad_norm": 1.7343103885650635, "learning_rate": 3.18592217342506e-08, "loss": 0.1256, "step": 44980 }, { "epoch": 0.9746928693692718, "grad_norm": 1.150887370109558, "learning_rate": 3.1588338761526784e-08, "loss": 0.1422, "step": 44985 }, { "epoch": 0.9748012046887526, "grad_norm": 0.4358462691307068, "learning_rate": 3.131861047996676e-08, "loss": 0.1457, "step": 44990 }, { "epoch": 0.9749095400082335, "grad_norm": 1.4202196598052979, "learning_rate": 3.105003692081443e-08, "loss": 0.1558, "step": 44995 }, { "epoch": 0.9750178753277143, "grad_norm": 1.3659156560897827, "learning_rate": 3.078261811518046e-08, "loss": 0.1127, "step": 45000 }, { "epoch": 0.9751262106471952, "grad_norm": 2.862999200820923, "learning_rate": 3.051635409404119e-08, "loss": 0.1764, "step": 45005 }, { "epoch": 0.975234545966676, "grad_norm": 1.615261435508728, "learning_rate": 3.025124488823972e-08, "loss": 0.1779, "step": 45010 }, { "epoch": 0.9753428812861569, "grad_norm": 0.8041927814483643, "learning_rate": 2.9987290528484815e-08, "loss": 0.1111, "step": 45015 }, { "epoch": 0.9754512166056377, "grad_norm": 0.996174156665802, "learning_rate": 2.9724491045352023e-08, "loss": 0.118, "step": 45020 }, { "epoch": 0.9755595519251187, "grad_norm": 0.888830840587616, "learning_rate": 2.9462846469281437e-08, "loss": 0.1183, "step": 45025 }, { "epoch": 0.9756678872445995, "grad_norm": 1.7208775281906128, "learning_rate": 2.9202356830582145e-08, "loss": 0.1062, "step": 45030 }, { "epoch": 0.9757762225640804, "grad_norm": 1.7440086603164673, "learning_rate": 2.894302215942557e-08, "loss": 0.1772, "step": 45035 }, { "epoch": 0.9758845578835612, "grad_norm": 1.165907621383667, "learning_rate": 2.8684842485855456e-08, "loss": 0.1354, "step": 45040 }, { "epoch": 0.9759928932030421, "grad_norm": 1.9191433191299438, "learning_rate": 2.8427817839774553e-08, "loss": 0.1646, "step": 45045 }, { "epoch": 0.9761012285225229, "grad_norm": 0.8620597720146179, "learning_rate": 2.8171948250957925e-08, "loss": 0.1434, "step": 45050 }, { "epoch": 0.9762095638420037, "grad_norm": 1.3007993698120117, "learning_rate": 2.7917233749042984e-08, "loss": 0.1634, "step": 45055 }, { "epoch": 0.9763178991614846, "grad_norm": 1.1785829067230225, "learning_rate": 2.76636743635339e-08, "loss": 0.1375, "step": 45060 }, { "epoch": 0.9764262344809654, "grad_norm": 1.196387767791748, "learning_rate": 2.7411270123803847e-08, "loss": 0.2011, "step": 45065 }, { "epoch": 0.9765345698004464, "grad_norm": 1.0068098306655884, "learning_rate": 2.7160021059087215e-08, "loss": 0.1863, "step": 45070 }, { "epoch": 0.9766429051199272, "grad_norm": 1.3375113010406494, "learning_rate": 2.6909927198490727e-08, "loss": 0.1897, "step": 45075 }, { "epoch": 0.9767512404394081, "grad_norm": 0.5541808605194092, "learning_rate": 2.6660988570981206e-08, "loss": 0.1522, "step": 45080 }, { "epoch": 0.9768595757588889, "grad_norm": 1.6737240552902222, "learning_rate": 2.6413205205395586e-08, "loss": 0.1652, "step": 45085 }, { "epoch": 0.9769679110783698, "grad_norm": 1.8599927425384521, "learning_rate": 2.6166577130436466e-08, "loss": 0.2714, "step": 45090 }, { "epoch": 0.9770762463978506, "grad_norm": 1.466587781906128, "learning_rate": 2.592110437466988e-08, "loss": 0.2867, "step": 45095 }, { "epoch": 0.9771845817173315, "grad_norm": 1.5311287641525269, "learning_rate": 2.5676786966533084e-08, "loss": 0.1396, "step": 45100 }, { "epoch": 0.9772929170368123, "grad_norm": 1.1954478025436401, "learning_rate": 2.5433624934324554e-08, "loss": 0.112, "step": 45105 }, { "epoch": 0.9774012523562932, "grad_norm": 0.8136472105979919, "learning_rate": 2.519161830621175e-08, "loss": 0.2014, "step": 45110 }, { "epoch": 0.977509587675774, "grad_norm": 1.9189857244491577, "learning_rate": 2.4950767110227813e-08, "loss": 0.2403, "step": 45115 }, { "epoch": 0.977617922995255, "grad_norm": 0.38264718651771545, "learning_rate": 2.4711071374270423e-08, "loss": 0.1237, "step": 45120 }, { "epoch": 0.9777262583147358, "grad_norm": 1.7834233045578003, "learning_rate": 2.4472531126106258e-08, "loss": 0.2002, "step": 45125 }, { "epoch": 0.9778345936342167, "grad_norm": 1.5074965953826904, "learning_rate": 2.4235146393365438e-08, "loss": 0.1458, "step": 45130 }, { "epoch": 0.9779429289536975, "grad_norm": 1.463473916053772, "learning_rate": 2.399891720354597e-08, "loss": 0.2119, "step": 45135 }, { "epoch": 0.9780512642731783, "grad_norm": 0.7220052480697632, "learning_rate": 2.3763843584011515e-08, "loss": 0.0757, "step": 45140 }, { "epoch": 0.9781595995926592, "grad_norm": 1.5860580205917358, "learning_rate": 2.3529925561992516e-08, "loss": 0.1415, "step": 45145 }, { "epoch": 0.97826793491214, "grad_norm": 1.4106684923171997, "learning_rate": 2.329716316458397e-08, "loss": 0.2178, "step": 45150 }, { "epoch": 0.9783762702316209, "grad_norm": 1.325585126876831, "learning_rate": 2.3065556418747635e-08, "loss": 0.216, "step": 45155 }, { "epoch": 0.9784846055511017, "grad_norm": 1.137455701828003, "learning_rate": 2.283510535131206e-08, "loss": 0.1712, "step": 45160 }, { "epoch": 0.9785929408705826, "grad_norm": 1.6090707778930664, "learning_rate": 2.260580998897144e-08, "loss": 0.1569, "step": 45165 }, { "epoch": 0.9787012761900635, "grad_norm": 1.192445993423462, "learning_rate": 2.2377670358286752e-08, "loss": 0.1, "step": 45170 }, { "epoch": 0.9788096115095444, "grad_norm": 0.3883790075778961, "learning_rate": 2.2150686485683527e-08, "loss": 0.1425, "step": 45175 }, { "epoch": 0.9789179468290252, "grad_norm": 1.3893773555755615, "learning_rate": 2.1924858397456285e-08, "loss": 0.1383, "step": 45180 }, { "epoch": 0.9790262821485061, "grad_norm": 1.614141821861267, "learning_rate": 2.1700186119761878e-08, "loss": 0.1622, "step": 45185 }, { "epoch": 0.9791346174679869, "grad_norm": 1.252606987953186, "learning_rate": 2.1476669678626162e-08, "loss": 0.1996, "step": 45190 }, { "epoch": 0.9792429527874678, "grad_norm": 1.6727283000946045, "learning_rate": 2.125430909993953e-08, "loss": 0.2138, "step": 45195 }, { "epoch": 0.9793512881069486, "grad_norm": 2.1095850467681885, "learning_rate": 2.103310440946027e-08, "loss": 0.1981, "step": 45200 }, { "epoch": 0.9794596234264294, "grad_norm": 1.4946584701538086, "learning_rate": 2.0813055632811218e-08, "loss": 0.1336, "step": 45205 }, { "epoch": 0.9795679587459103, "grad_norm": 1.5729619264602661, "learning_rate": 2.0594162795480875e-08, "loss": 0.118, "step": 45210 }, { "epoch": 0.9796762940653913, "grad_norm": 1.471799373626709, "learning_rate": 2.0376425922824515e-08, "loss": 0.1766, "step": 45215 }, { "epoch": 0.9797846293848721, "grad_norm": 0.6605020761489868, "learning_rate": 2.0159845040065297e-08, "loss": 0.1252, "step": 45220 }, { "epoch": 0.9798929647043529, "grad_norm": 1.4958605766296387, "learning_rate": 1.9944420172290923e-08, "loss": 0.1429, "step": 45225 }, { "epoch": 0.9800013000238338, "grad_norm": 1.5707745552062988, "learning_rate": 1.9730151344452553e-08, "loss": 0.1425, "step": 45230 }, { "epoch": 0.9801096353433146, "grad_norm": 1.383885383605957, "learning_rate": 1.951703858137255e-08, "loss": 0.2056, "step": 45235 }, { "epoch": 0.9802179706627955, "grad_norm": 1.0110468864440918, "learning_rate": 1.9305081907735612e-08, "loss": 0.1125, "step": 45240 }, { "epoch": 0.9803263059822763, "grad_norm": 1.6114082336425781, "learning_rate": 1.9094281348095435e-08, "loss": 0.1737, "step": 45245 }, { "epoch": 0.9804346413017572, "grad_norm": 2.1126039028167725, "learning_rate": 1.8884636926866927e-08, "loss": 0.1763, "step": 45250 }, { "epoch": 0.980542976621238, "grad_norm": 1.7783652544021606, "learning_rate": 1.8676148668337333e-08, "loss": 0.1361, "step": 45255 }, { "epoch": 0.9806513119407189, "grad_norm": 0.839091956615448, "learning_rate": 1.8468816596656224e-08, "loss": 0.1352, "step": 45260 }, { "epoch": 0.9807596472601998, "grad_norm": 0.753541111946106, "learning_rate": 1.826264073583772e-08, "loss": 0.1691, "step": 45265 }, { "epoch": 0.9808679825796807, "grad_norm": 1.0293774604797363, "learning_rate": 1.8057621109767165e-08, "loss": 0.1952, "step": 45270 }, { "epoch": 0.9809763178991615, "grad_norm": 1.098581314086914, "learning_rate": 1.7853757742191114e-08, "loss": 0.1439, "step": 45275 }, { "epoch": 0.9810846532186424, "grad_norm": 1.7904820442199707, "learning_rate": 1.765105065672512e-08, "loss": 0.1673, "step": 45280 }, { "epoch": 0.9811929885381232, "grad_norm": 1.0288444757461548, "learning_rate": 1.7449499876848186e-08, "loss": 0.2068, "step": 45285 }, { "epoch": 0.981301323857604, "grad_norm": 1.1583811044692993, "learning_rate": 1.7249105425909406e-08, "loss": 0.1678, "step": 45290 }, { "epoch": 0.9814096591770849, "grad_norm": 1.8737761974334717, "learning_rate": 1.7049867327120218e-08, "loss": 0.0961, "step": 45295 }, { "epoch": 0.9815179944965657, "grad_norm": 1.7842999696731567, "learning_rate": 1.6851785603558824e-08, "loss": 0.1705, "step": 45300 }, { "epoch": 0.9816263298160466, "grad_norm": 1.1951695680618286, "learning_rate": 1.6654860278170203e-08, "loss": 0.1431, "step": 45305 }, { "epoch": 0.9817346651355275, "grad_norm": 2.035656452178955, "learning_rate": 1.645909137376611e-08, "loss": 0.2129, "step": 45310 }, { "epoch": 0.9818430004550084, "grad_norm": 1.2665586471557617, "learning_rate": 1.6264478913021743e-08, "loss": 0.1537, "step": 45315 }, { "epoch": 0.9819513357744892, "grad_norm": 2.1745657920837402, "learning_rate": 1.6071022918482392e-08, "loss": 0.2229, "step": 45320 }, { "epoch": 0.9820596710939701, "grad_norm": 0.7295961380004883, "learning_rate": 1.5878723412555696e-08, "loss": 0.1326, "step": 45325 }, { "epoch": 0.9821680064134509, "grad_norm": 1.225587248802185, "learning_rate": 1.568758041751606e-08, "loss": 0.2547, "step": 45330 }, { "epoch": 0.9822763417329318, "grad_norm": 1.6597501039505005, "learning_rate": 1.5497593955505763e-08, "loss": 0.1575, "step": 45335 }, { "epoch": 0.9823846770524126, "grad_norm": 1.8996543884277344, "learning_rate": 1.5308764048531655e-08, "loss": 0.1482, "step": 45340 }, { "epoch": 0.9824930123718935, "grad_norm": 1.9101839065551758, "learning_rate": 1.5121090718466236e-08, "loss": 0.1347, "step": 45345 }, { "epoch": 0.9826013476913743, "grad_norm": 1.6307333707809448, "learning_rate": 1.4934573987048784e-08, "loss": 0.2539, "step": 45350 }, { "epoch": 0.9827096830108552, "grad_norm": 1.1713749170303345, "learning_rate": 1.4749213875884238e-08, "loss": 0.0833, "step": 45355 }, { "epoch": 0.9828180183303361, "grad_norm": 1.1846323013305664, "learning_rate": 1.4565010406444313e-08, "loss": 0.1169, "step": 45360 }, { "epoch": 0.982926353649817, "grad_norm": 0.6343989372253418, "learning_rate": 1.4381963600066384e-08, "loss": 0.1652, "step": 45365 }, { "epoch": 0.9830346889692978, "grad_norm": 0.7187505960464478, "learning_rate": 1.4200073477954601e-08, "loss": 0.066, "step": 45370 }, { "epoch": 0.9831430242887786, "grad_norm": 1.04214346408844, "learning_rate": 1.4019340061175446e-08, "loss": 0.1889, "step": 45375 }, { "epoch": 0.9832513596082595, "grad_norm": 0.9772824048995972, "learning_rate": 1.3839763370666615e-08, "loss": 0.1442, "step": 45380 }, { "epoch": 0.9833596949277403, "grad_norm": 1.868100643157959, "learning_rate": 1.3661343427228135e-08, "loss": 0.1537, "step": 45385 }, { "epoch": 0.9834680302472212, "grad_norm": 1.230435848236084, "learning_rate": 1.348408025152792e-08, "loss": 0.0976, "step": 45390 }, { "epoch": 0.983576365566702, "grad_norm": 1.4669313430786133, "learning_rate": 1.3307973864099543e-08, "loss": 0.1334, "step": 45395 }, { "epoch": 0.9836847008861829, "grad_norm": 1.7714735269546509, "learning_rate": 1.3133024285340024e-08, "loss": 0.1459, "step": 45400 }, { "epoch": 0.9837930362056637, "grad_norm": 1.557904601097107, "learning_rate": 1.2959231535517592e-08, "loss": 0.1284, "step": 45405 }, { "epoch": 0.9839013715251447, "grad_norm": 1.4339320659637451, "learning_rate": 1.278659563476281e-08, "loss": 0.1764, "step": 45410 }, { "epoch": 0.9840097068446255, "grad_norm": 1.5222294330596924, "learning_rate": 1.2615116603071909e-08, "loss": 0.1708, "step": 45415 }, { "epoch": 0.9841180421641064, "grad_norm": 1.1579731702804565, "learning_rate": 1.2444794460307884e-08, "loss": 0.1612, "step": 45420 }, { "epoch": 0.9842263774835872, "grad_norm": 1.161596417427063, "learning_rate": 1.2275629226201624e-08, "loss": 0.1467, "step": 45425 }, { "epoch": 0.9843347128030681, "grad_norm": 2.0702364444732666, "learning_rate": 1.2107620920348562e-08, "loss": 0.1796, "step": 45430 }, { "epoch": 0.9844430481225489, "grad_norm": 1.0506951808929443, "learning_rate": 1.1940769562207577e-08, "loss": 0.1414, "step": 45435 }, { "epoch": 0.9845513834420297, "grad_norm": 1.5705459117889404, "learning_rate": 1.1775075171107652e-08, "loss": 0.1802, "step": 45440 }, { "epoch": 0.9846597187615106, "grad_norm": 1.107155442237854, "learning_rate": 1.1610537766242325e-08, "loss": 0.1807, "step": 45445 }, { "epoch": 0.9847680540809914, "grad_norm": 1.0295350551605225, "learning_rate": 1.144715736666968e-08, "loss": 0.0889, "step": 45450 }, { "epoch": 0.9848763894004724, "grad_norm": 0.8164606094360352, "learning_rate": 1.1284933991314584e-08, "loss": 0.1974, "step": 45455 }, { "epoch": 0.9849847247199532, "grad_norm": 1.0661406517028809, "learning_rate": 1.1123867658969779e-08, "loss": 0.1803, "step": 45460 }, { "epoch": 0.9850930600394341, "grad_norm": 1.3610312938690186, "learning_rate": 1.0963958388291452e-08, "loss": 0.1474, "step": 45465 }, { "epoch": 0.9852013953589149, "grad_norm": 0.5469425320625305, "learning_rate": 1.0805206197802564e-08, "loss": 0.1612, "step": 45470 }, { "epoch": 0.9853097306783958, "grad_norm": 0.5487455129623413, "learning_rate": 1.0647611105892852e-08, "loss": 0.116, "step": 45475 }, { "epoch": 0.9854180659978766, "grad_norm": 2.025123357772827, "learning_rate": 1.04911731308166e-08, "loss": 0.1395, "step": 45480 }, { "epoch": 0.9855264013173575, "grad_norm": 1.1254518032073975, "learning_rate": 1.0335892290695981e-08, "loss": 0.2003, "step": 45485 }, { "epoch": 0.9856347366368383, "grad_norm": 1.105901837348938, "learning_rate": 1.0181768603515497e-08, "loss": 0.0759, "step": 45490 }, { "epoch": 0.9857430719563192, "grad_norm": 1.1824926137924194, "learning_rate": 1.0028802087130863e-08, "loss": 0.1569, "step": 45495 }, { "epoch": 0.9858514072758, "grad_norm": 1.6673305034637451, "learning_rate": 9.876992759259018e-09, "loss": 0.2401, "step": 45500 }, { "epoch": 0.985959742595281, "grad_norm": 0.7940477728843689, "learning_rate": 9.726340637485898e-09, "loss": 0.1147, "step": 45505 }, { "epoch": 0.9860680779147618, "grad_norm": 1.3997868299484253, "learning_rate": 9.576845739261986e-09, "loss": 0.1548, "step": 45510 }, { "epoch": 0.9861764132342427, "grad_norm": 1.360838532447815, "learning_rate": 9.42850808190343e-09, "loss": 0.1485, "step": 45515 }, { "epoch": 0.9862847485537235, "grad_norm": 1.0878328084945679, "learning_rate": 9.281327682594266e-09, "loss": 0.1497, "step": 45520 }, { "epoch": 0.9863930838732043, "grad_norm": 1.3639099597930908, "learning_rate": 9.135304558381962e-09, "loss": 0.2387, "step": 45525 }, { "epoch": 0.9865014191926852, "grad_norm": 0.5849078297615051, "learning_rate": 8.990438726181883e-09, "loss": 0.1453, "step": 45530 }, { "epoch": 0.986609754512166, "grad_norm": 1.173069715499878, "learning_rate": 8.846730202772823e-09, "loss": 0.1502, "step": 45535 }, { "epoch": 0.9867180898316469, "grad_norm": 1.1067684888839722, "learning_rate": 8.704179004803691e-09, "loss": 0.1178, "step": 45540 }, { "epoch": 0.9868264251511277, "grad_norm": 0.46983739733695984, "learning_rate": 8.562785148785724e-09, "loss": 0.2267, "step": 45545 }, { "epoch": 0.9869347604706086, "grad_norm": 1.5030744075775146, "learning_rate": 8.422548651098039e-09, "loss": 0.1481, "step": 45550 }, { "epoch": 0.9870430957900895, "grad_norm": 1.07999849319458, "learning_rate": 8.2834695279832e-09, "loss": 0.0827, "step": 45555 }, { "epoch": 0.9871514311095704, "grad_norm": 0.7317792773246765, "learning_rate": 8.145547795552766e-09, "loss": 0.1772, "step": 45560 }, { "epoch": 0.9872597664290512, "grad_norm": 0.5550919771194458, "learning_rate": 8.008783469782844e-09, "loss": 0.1202, "step": 45565 }, { "epoch": 0.9873681017485321, "grad_norm": 1.6506279706954956, "learning_rate": 7.873176566515206e-09, "loss": 0.2141, "step": 45570 }, { "epoch": 0.9874764370680129, "grad_norm": 1.833932638168335, "learning_rate": 7.738727101457288e-09, "loss": 0.1372, "step": 45575 }, { "epoch": 0.9875847723874938, "grad_norm": 1.8792188167572021, "learning_rate": 7.60543509018552e-09, "loss": 0.1571, "step": 45580 }, { "epoch": 0.9876931077069746, "grad_norm": 1.3965574502944946, "learning_rate": 7.473300548136442e-09, "loss": 0.175, "step": 45585 }, { "epoch": 0.9878014430264555, "grad_norm": 0.4884676933288574, "learning_rate": 7.342323490617808e-09, "loss": 0.1535, "step": 45590 }, { "epoch": 0.9879097783459363, "grad_norm": 0.657014012336731, "learning_rate": 7.212503932801929e-09, "loss": 0.2158, "step": 45595 }, { "epoch": 0.9880181136654173, "grad_norm": 1.3386898040771484, "learning_rate": 7.083841889724552e-09, "loss": 0.1492, "step": 45600 }, { "epoch": 0.9881264489848981, "grad_norm": 2.4395370483398438, "learning_rate": 6.956337376290423e-09, "loss": 0.2197, "step": 45605 }, { "epoch": 0.9882347843043789, "grad_norm": 0.8967231512069702, "learning_rate": 6.829990407268838e-09, "loss": 0.1192, "step": 45610 }, { "epoch": 0.9883431196238598, "grad_norm": 1.20222008228302, "learning_rate": 6.7048009972947585e-09, "loss": 0.1139, "step": 45615 }, { "epoch": 0.9884514549433406, "grad_norm": 1.3815776109695435, "learning_rate": 6.5807691608710255e-09, "loss": 0.1216, "step": 45620 }, { "epoch": 0.9885597902628215, "grad_norm": 1.4769692420959473, "learning_rate": 6.457894912362817e-09, "loss": 0.1172, "step": 45625 }, { "epoch": 0.9886681255823023, "grad_norm": 1.7181819677352905, "learning_rate": 6.336178266004301e-09, "loss": 0.1741, "step": 45630 }, { "epoch": 0.9887764609017832, "grad_norm": 1.4064782857894897, "learning_rate": 6.215619235895309e-09, "loss": 0.1419, "step": 45635 }, { "epoch": 0.988884796221264, "grad_norm": 2.056548833847046, "learning_rate": 6.096217835999119e-09, "loss": 0.1514, "step": 45640 }, { "epoch": 0.9889931315407449, "grad_norm": 1.0717693567276, "learning_rate": 5.977974080147997e-09, "loss": 0.1901, "step": 45645 }, { "epoch": 0.9891014668602258, "grad_norm": 1.39899742603302, "learning_rate": 5.860887982037655e-09, "loss": 0.1529, "step": 45650 }, { "epoch": 0.9892098021797067, "grad_norm": 0.6672426462173462, "learning_rate": 5.744959555231688e-09, "loss": 0.1303, "step": 45655 }, { "epoch": 0.9893181374991875, "grad_norm": 1.7786076068878174, "learning_rate": 5.6301888131571336e-09, "loss": 0.1562, "step": 45660 }, { "epoch": 0.9894264728186684, "grad_norm": 1.8788200616836548, "learning_rate": 5.516575769111132e-09, "loss": 0.1882, "step": 45665 }, { "epoch": 0.9895348081381492, "grad_norm": 1.217623233795166, "learning_rate": 5.404120436250937e-09, "loss": 0.2155, "step": 45670 }, { "epoch": 0.98964314345763, "grad_norm": 1.0289270877838135, "learning_rate": 5.292822827605015e-09, "loss": 0.1401, "step": 45675 }, { "epoch": 0.9897514787771109, "grad_norm": 1.3917957544326782, "learning_rate": 5.182682956065277e-09, "loss": 0.1978, "step": 45680 }, { "epoch": 0.9898598140965917, "grad_norm": 0.9127823114395142, "learning_rate": 5.073700834389295e-09, "loss": 0.1225, "step": 45685 }, { "epoch": 0.9899681494160726, "grad_norm": 1.9480942487716675, "learning_rate": 4.965876475201415e-09, "loss": 0.1262, "step": 45690 }, { "epoch": 0.9900764847355534, "grad_norm": 2.2543742656707764, "learning_rate": 4.859209890990535e-09, "loss": 0.1694, "step": 45695 }, { "epoch": 0.9901848200550344, "grad_norm": 1.442266583442688, "learning_rate": 4.753701094112328e-09, "loss": 0.1709, "step": 45700 }, { "epoch": 0.9902931553745152, "grad_norm": 1.556787371635437, "learning_rate": 4.6493500967903505e-09, "loss": 0.1949, "step": 45705 }, { "epoch": 0.9904014906939961, "grad_norm": 0.9205502867698669, "learning_rate": 4.546156911109378e-09, "loss": 0.1486, "step": 45710 }, { "epoch": 0.9905098260134769, "grad_norm": 1.2558612823486328, "learning_rate": 4.444121549025404e-09, "loss": 0.1515, "step": 45715 }, { "epoch": 0.9906181613329578, "grad_norm": 1.7928963899612427, "learning_rate": 4.343244022356752e-09, "loss": 0.2658, "step": 45720 }, { "epoch": 0.9907264966524386, "grad_norm": 1.2115906476974487, "learning_rate": 4.243524342787408e-09, "loss": 0.1373, "step": 45725 }, { "epoch": 0.9908348319719195, "grad_norm": 0.6351116299629211, "learning_rate": 4.144962521869245e-09, "loss": 0.1791, "step": 45730 }, { "epoch": 0.9909431672914003, "grad_norm": 2.611323118209839, "learning_rate": 4.047558571020904e-09, "loss": 0.1525, "step": 45735 }, { "epoch": 0.9910515026108812, "grad_norm": 1.408082365989685, "learning_rate": 3.9513125015222535e-09, "loss": 0.1347, "step": 45740 }, { "epoch": 0.9911598379303621, "grad_norm": 1.1049988269805908, "learning_rate": 3.856224324523261e-09, "loss": 0.0688, "step": 45745 }, { "epoch": 0.991268173249843, "grad_norm": 1.754779577255249, "learning_rate": 3.762294051038451e-09, "loss": 0.1467, "step": 45750 }, { "epoch": 0.9913765085693238, "grad_norm": 1.2180848121643066, "learning_rate": 3.669521691948008e-09, "loss": 0.0981, "step": 45755 }, { "epoch": 0.9914848438888046, "grad_norm": 0.3487095534801483, "learning_rate": 3.5779072579988916e-09, "loss": 0.2177, "step": 45760 }, { "epoch": 0.9915931792082855, "grad_norm": 1.1085307598114014, "learning_rate": 3.487450759802613e-09, "loss": 0.1165, "step": 45765 }, { "epoch": 0.9917015145277663, "grad_norm": 1.0162073373794556, "learning_rate": 3.3981522078385676e-09, "loss": 0.1482, "step": 45770 }, { "epoch": 0.9918098498472472, "grad_norm": 1.59839928150177, "learning_rate": 3.3100116124484825e-09, "loss": 0.1516, "step": 45775 }, { "epoch": 0.991918185166728, "grad_norm": 1.788330316543579, "learning_rate": 3.2230289838430797e-09, "loss": 0.2363, "step": 45780 }, { "epoch": 0.9920265204862089, "grad_norm": 1.033976674079895, "learning_rate": 3.137204332097632e-09, "loss": 0.1643, "step": 45785 }, { "epoch": 0.9921348558056897, "grad_norm": 1.8330512046813965, "learning_rate": 3.0525376671552974e-09, "loss": 0.156, "step": 45790 }, { "epoch": 0.9922431911251707, "grad_norm": 1.4295756816864014, "learning_rate": 2.969028998821566e-09, "loss": 0.2042, "step": 45795 }, { "epoch": 0.9923515264446515, "grad_norm": 1.5459262132644653, "learning_rate": 2.8866783367698103e-09, "loss": 0.1118, "step": 45800 }, { "epoch": 0.9924598617641324, "grad_norm": 1.3403446674346924, "learning_rate": 2.805485690540177e-09, "loss": 0.1868, "step": 45805 }, { "epoch": 0.9925681970836132, "grad_norm": 1.6390129327774048, "learning_rate": 2.7254510695362556e-09, "loss": 0.165, "step": 45810 }, { "epoch": 0.9926765324030941, "grad_norm": 1.6757864952087402, "learning_rate": 2.6465744830306285e-09, "loss": 0.1363, "step": 45815 }, { "epoch": 0.9927848677225749, "grad_norm": 0.8005425930023193, "learning_rate": 2.5688559401593206e-09, "loss": 0.0961, "step": 45820 }, { "epoch": 0.9928932030420557, "grad_norm": 2.07175874710083, "learning_rate": 2.4922954499240205e-09, "loss": 0.174, "step": 45825 }, { "epoch": 0.9930015383615366, "grad_norm": 1.7685507535934448, "learning_rate": 2.4168930211931896e-09, "loss": 0.1374, "step": 45830 }, { "epoch": 0.9931098736810174, "grad_norm": 1.836203932762146, "learning_rate": 2.342648662702063e-09, "loss": 0.1371, "step": 45835 }, { "epoch": 0.9932182090004984, "grad_norm": 1.6197534799575806, "learning_rate": 2.2695623830504275e-09, "loss": 0.1452, "step": 45840 }, { "epoch": 0.9933265443199792, "grad_norm": 0.9539565443992615, "learning_rate": 2.1976341907026244e-09, "loss": 0.1438, "step": 45845 }, { "epoch": 0.9934348796394601, "grad_norm": 0.6930379867553711, "learning_rate": 2.1268640939930974e-09, "loss": 0.1603, "step": 45850 }, { "epoch": 0.9935432149589409, "grad_norm": 1.3180385828018188, "learning_rate": 2.057252101118623e-09, "loss": 0.1722, "step": 45855 }, { "epoch": 0.9936515502784218, "grad_norm": 1.1571801900863647, "learning_rate": 1.988798220141641e-09, "loss": 0.2336, "step": 45860 }, { "epoch": 0.9937598855979026, "grad_norm": 1.1901040077209473, "learning_rate": 1.9215024589913643e-09, "loss": 0.0791, "step": 45865 }, { "epoch": 0.9938682209173835, "grad_norm": 2.139244556427002, "learning_rate": 1.8553648254648893e-09, "loss": 0.1281, "step": 45870 }, { "epoch": 0.9939765562368643, "grad_norm": 0.6717481017112732, "learning_rate": 1.7903853272216442e-09, "loss": 0.1989, "step": 45875 }, { "epoch": 0.9940848915563452, "grad_norm": 1.5844449996948242, "learning_rate": 1.7265639717900517e-09, "loss": 0.1609, "step": 45880 }, { "epoch": 0.994193226875826, "grad_norm": 1.8666136264801025, "learning_rate": 1.6639007665608665e-09, "loss": 0.1656, "step": 45885 }, { "epoch": 0.994301562195307, "grad_norm": 1.9627190828323364, "learning_rate": 1.6023957187938366e-09, "loss": 0.1438, "step": 45890 }, { "epoch": 0.9944098975147878, "grad_norm": 0.143265500664711, "learning_rate": 1.5420488356143737e-09, "loss": 0.1209, "step": 45895 }, { "epoch": 0.9945182328342687, "grad_norm": 2.8199336528778076, "learning_rate": 1.4828601240102213e-09, "loss": 0.1777, "step": 45900 }, { "epoch": 0.9946265681537495, "grad_norm": 1.0557754039764404, "learning_rate": 1.4248295908392274e-09, "loss": 0.178, "step": 45905 }, { "epoch": 0.9947349034732303, "grad_norm": 1.15532648563385, "learning_rate": 1.367957242823792e-09, "loss": 0.1758, "step": 45910 }, { "epoch": 0.9948432387927112, "grad_norm": 1.821589708328247, "learning_rate": 1.3122430865508683e-09, "loss": 0.1677, "step": 45915 }, { "epoch": 0.994951574112192, "grad_norm": 1.4026004076004028, "learning_rate": 1.2576871284741831e-09, "loss": 0.2373, "step": 45920 }, { "epoch": 0.9950599094316729, "grad_norm": 1.1160777807235718, "learning_rate": 1.2042893749131258e-09, "loss": 0.1555, "step": 45925 }, { "epoch": 0.9951682447511537, "grad_norm": 1.2864902019500732, "learning_rate": 1.1520498320527484e-09, "loss": 0.1618, "step": 45930 }, { "epoch": 0.9952765800706346, "grad_norm": 0.9827545881271362, "learning_rate": 1.100968505944877e-09, "loss": 0.0602, "step": 45935 }, { "epoch": 0.9953849153901155, "grad_norm": 1.282821536064148, "learning_rate": 1.0510454025070006e-09, "loss": 0.1141, "step": 45940 }, { "epoch": 0.9954932507095964, "grad_norm": 1.9312447309494019, "learning_rate": 1.00228052752116e-09, "loss": 0.2308, "step": 45945 }, { "epoch": 0.9956015860290772, "grad_norm": 1.631813645362854, "learning_rate": 9.546738866350602e-10, "loss": 0.231, "step": 45950 }, { "epoch": 0.9957099213485581, "grad_norm": 1.4623867273330688, "learning_rate": 9.082254853653993e-10, "loss": 0.1583, "step": 45955 }, { "epoch": 0.9958182566680389, "grad_norm": 1.1067571640014648, "learning_rate": 8.629353290912079e-10, "loss": 0.1684, "step": 45960 }, { "epoch": 0.9959265919875198, "grad_norm": 1.7019338607788086, "learning_rate": 8.188034230582897e-10, "loss": 0.1426, "step": 45965 }, { "epoch": 0.9960349273070006, "grad_norm": 1.7686690092086792, "learning_rate": 7.758297723803321e-10, "loss": 0.1731, "step": 45970 }, { "epoch": 0.9961432626264815, "grad_norm": 1.4483987092971802, "learning_rate": 7.340143820333545e-10, "loss": 0.1101, "step": 45975 }, { "epoch": 0.9962515979459623, "grad_norm": 1.7357850074768066, "learning_rate": 6.933572568612601e-10, "loss": 0.2015, "step": 45980 }, { "epoch": 0.9963599332654433, "grad_norm": 2.3246819972991943, "learning_rate": 6.538584015747251e-10, "loss": 0.2153, "step": 45985 }, { "epoch": 0.9964682685849241, "grad_norm": 1.4407099485397339, "learning_rate": 6.155178207489787e-10, "loss": 0.1802, "step": 45990 }, { "epoch": 0.996576603904405, "grad_norm": 0.5227840542793274, "learning_rate": 5.783355188249129e-10, "loss": 0.1613, "step": 45995 }, { "epoch": 0.9966849392238858, "grad_norm": 1.4031078815460205, "learning_rate": 5.423115001079726e-10, "loss": 0.2256, "step": 46000 }, { "epoch": 0.9967932745433666, "grad_norm": 0.5056806206703186, "learning_rate": 5.074457687737066e-10, "loss": 0.0618, "step": 46005 }, { "epoch": 0.9969016098628475, "grad_norm": 0.8546344637870789, "learning_rate": 4.737383288588859e-10, "loss": 0.0873, "step": 46010 }, { "epoch": 0.9970099451823283, "grad_norm": 0.4426642060279846, "learning_rate": 4.411891842681648e-10, "loss": 0.1649, "step": 46015 }, { "epoch": 0.9971182805018092, "grad_norm": 2.0553901195526123, "learning_rate": 4.0979833877297095e-10, "loss": 0.2668, "step": 46020 }, { "epoch": 0.99722661582129, "grad_norm": 1.2741724252700806, "learning_rate": 3.795657960092847e-10, "loss": 0.1573, "step": 46025 }, { "epoch": 0.9973349511407709, "grad_norm": 1.7793080806732178, "learning_rate": 3.5049155947763924e-10, "loss": 0.1774, "step": 46030 }, { "epoch": 0.9974432864602518, "grad_norm": 1.6637650728225708, "learning_rate": 3.225756325464513e-10, "loss": 0.1519, "step": 46035 }, { "epoch": 0.9975516217797327, "grad_norm": 0.8855960965156555, "learning_rate": 2.9581801845091073e-10, "loss": 0.1035, "step": 46040 }, { "epoch": 0.9976599570992135, "grad_norm": 1.6356725692749023, "learning_rate": 2.702187202885398e-10, "loss": 0.2093, "step": 46045 }, { "epoch": 0.9977682924186944, "grad_norm": 1.461717963218689, "learning_rate": 2.4577774102585437e-10, "loss": 0.2102, "step": 46050 }, { "epoch": 0.9978766277381752, "grad_norm": 1.7252360582351685, "learning_rate": 2.22495083492813e-10, "loss": 0.1906, "step": 46055 }, { "epoch": 0.997984963057656, "grad_norm": 1.0947827100753784, "learning_rate": 2.0037075038725761e-10, "loss": 0.1997, "step": 46060 }, { "epoch": 0.9980932983771369, "grad_norm": 0.5982900857925415, "learning_rate": 1.7940474427269316e-10, "loss": 0.1405, "step": 46065 }, { "epoch": 0.9982016336966177, "grad_norm": 1.1445115804672241, "learning_rate": 1.5959706757606718e-10, "loss": 0.1584, "step": 46070 }, { "epoch": 0.9983099690160986, "grad_norm": 1.2519699335098267, "learning_rate": 1.4094772259221068e-10, "loss": 0.2398, "step": 46075 }, { "epoch": 0.9984183043355794, "grad_norm": 1.0610405206680298, "learning_rate": 1.2345671148161764e-10, "loss": 0.1547, "step": 46080 }, { "epoch": 0.9985266396550604, "grad_norm": 1.5232285261154175, "learning_rate": 1.0712403627155532e-10, "loss": 0.1507, "step": 46085 }, { "epoch": 0.9986349749745412, "grad_norm": 0.9748086333274841, "learning_rate": 9.19496988516233e-11, "loss": 0.1575, "step": 46090 }, { "epoch": 0.9987433102940221, "grad_norm": 0.5381373763084412, "learning_rate": 7.793370098152508e-11, "loss": 0.1126, "step": 46095 }, { "epoch": 0.9988516456135029, "grad_norm": 1.466058373451233, "learning_rate": 6.50760442832965e-11, "loss": 0.0999, "step": 46100 }, { "epoch": 0.9989599809329838, "grad_norm": 1.3630963563919067, "learning_rate": 5.3376730246856854e-11, "loss": 0.1627, "step": 46105 }, { "epoch": 0.9990683162524646, "grad_norm": 0.7751365900039673, "learning_rate": 4.283576022778846e-11, "loss": 0.1905, "step": 46110 }, { "epoch": 0.9991766515719455, "grad_norm": 1.6390751600265503, "learning_rate": 3.345313544733664e-11, "loss": 0.1526, "step": 46115 }, { "epoch": 0.9992849868914263, "grad_norm": 0.9549440741539001, "learning_rate": 2.522885699129951e-11, "loss": 0.1858, "step": 46120 }, { "epoch": 0.9993933222109072, "grad_norm": 2.3777363300323486, "learning_rate": 1.8162925813358658e-11, "loss": 0.1435, "step": 46125 }, { "epoch": 0.9995016575303881, "grad_norm": 1.2852659225463867, "learning_rate": 1.225534273063822e-11, "loss": 0.1546, "step": 46130 }, { "epoch": 0.999609992849869, "grad_norm": 2.279456615447998, "learning_rate": 7.506108429256032e-12, "loss": 0.2089, "step": 46135 }, { "epoch": 0.9997183281693498, "grad_norm": 1.6034420728683472, "learning_rate": 3.915223458772488e-12, "loss": 0.1335, "step": 46140 }, { "epoch": 0.9998266634888306, "grad_norm": 2.1457302570343018, "learning_rate": 1.4826882333007774e-12, "loss": 0.1827, "step": 46145 }, { "epoch": 0.9999349988083115, "grad_norm": 0.8025592565536499, "learning_rate": 2.0850303705799435e-13, "loss": 0.1991, "step": 46150 }, { "epoch": 1.0, "step": 46153, "total_flos": 1.1002273748480242e+20, "train_loss": 0.1970659160609881, "train_runtime": 294776.7627, "train_samples_per_second": 3.758, "train_steps_per_second": 0.157 } ], "logging_steps": 5, "max_steps": 46153, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 15000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.1002273748480242e+20, "train_batch_size": 2, "trial_name": null, "trial_params": null }