{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.7699711260827719, "eval_steps": 500, "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00019249278152069297, "grad_norm": 1.9932657480239868, "learning_rate": 0.00016, "loss": 2.962, "step": 5 }, { "epoch": 0.00038498556304138594, "grad_norm": 1.2827370166778564, "learning_rate": 0.000199999997073812, "loss": 2.2983, "step": 10 }, { "epoch": 0.0005774783445620789, "grad_norm": 1.3585917949676514, "learning_rate": 0.0001999999851861734, "loss": 1.9389, "step": 15 }, { "epoch": 0.0007699711260827719, "grad_norm": 2.117544412612915, "learning_rate": 0.00019999996415419864, "loss": 1.6659, "step": 20 }, { "epoch": 0.0009624639076034649, "grad_norm": 0.8802940249443054, "learning_rate": 0.0001999999339778896, "loss": 1.6015, "step": 25 }, { "epoch": 0.0011549566891241579, "grad_norm": 1.256873369216919, "learning_rate": 0.000199999894657249, "loss": 1.7428, "step": 30 }, { "epoch": 0.001347449470644851, "grad_norm": 1.9709804058074951, "learning_rate": 0.0001999998461922805, "loss": 1.4316, "step": 35 }, { "epoch": 0.0015399422521655437, "grad_norm": 1.2085392475128174, "learning_rate": 0.00019999978858298848, "loss": 1.8392, "step": 40 }, { "epoch": 0.0017324350336862368, "grad_norm": 0.9966161847114563, "learning_rate": 0.00019999972182937827, "loss": 1.6381, "step": 45 }, { "epoch": 0.0019249278152069298, "grad_norm": 1.5572378635406494, "learning_rate": 0.0001999996459314559, "loss": 1.6214, "step": 50 }, { "epoch": 0.0021174205967276227, "grad_norm": 0.9813450574874878, "learning_rate": 0.00019999956088922837, "loss": 1.5337, "step": 55 }, { "epoch": 0.0023099133782483157, "grad_norm": 1.140754222869873, "learning_rate": 0.00019999946670270341, "loss": 1.5865, "step": 60 }, { "epoch": 0.0025024061597690088, "grad_norm": 1.7033613920211792, "learning_rate": 0.0001999993633718897, "loss": 1.5483, "step": 65 }, { "epoch": 0.002694898941289702, "grad_norm": 0.8782416582107544, "learning_rate": 0.00019999925089679658, "loss": 1.7574, "step": 70 }, { "epoch": 0.0028873917228103944, "grad_norm": 0.94110506772995, "learning_rate": 0.00019999912927743445, "loss": 1.747, "step": 75 }, { "epoch": 0.0030798845043310875, "grad_norm": 2.9130144119262695, "learning_rate": 0.00019999899851381436, "loss": 1.5482, "step": 80 }, { "epoch": 0.0032723772858517805, "grad_norm": 1.444981336593628, "learning_rate": 0.00019999885860594828, "loss": 1.7935, "step": 85 }, { "epoch": 0.0034648700673724736, "grad_norm": 0.8361923098564148, "learning_rate": 0.00019999870955384906, "loss": 1.5566, "step": 90 }, { "epoch": 0.0036573628488931666, "grad_norm": 1.0198391675949097, "learning_rate": 0.00019999855135753025, "loss": 1.6608, "step": 95 }, { "epoch": 0.0038498556304138597, "grad_norm": 0.9720978736877441, "learning_rate": 0.00019999838401700632, "loss": 1.4217, "step": 100 }, { "epoch": 0.004042348411934553, "grad_norm": 0.7735599279403687, "learning_rate": 0.00019999820753229263, "loss": 1.4195, "step": 105 }, { "epoch": 0.004234841193455245, "grad_norm": 1.1776920557022095, "learning_rate": 0.0001999980219034053, "loss": 1.7147, "step": 110 }, { "epoch": 0.004427333974975939, "grad_norm": 1.156069278717041, "learning_rate": 0.0001999978271303613, "loss": 1.7, "step": 115 }, { "epoch": 0.0046198267564966315, "grad_norm": 1.2335503101348877, "learning_rate": 0.0001999976232131784, "loss": 1.3309, "step": 120 }, { "epoch": 0.004812319538017324, "grad_norm": 1.0332967042922974, "learning_rate": 0.0001999974101518753, "loss": 1.7515, "step": 125 }, { "epoch": 0.0050048123195380175, "grad_norm": 1.561087727546692, "learning_rate": 0.00019999718794647145, "loss": 1.5517, "step": 130 }, { "epoch": 0.00519730510105871, "grad_norm": 1.3611408472061157, "learning_rate": 0.00019999695659698717, "loss": 1.5771, "step": 135 }, { "epoch": 0.005389797882579404, "grad_norm": 1.5531154870986938, "learning_rate": 0.0001999967161034437, "loss": 1.4217, "step": 140 }, { "epoch": 0.005582290664100096, "grad_norm": 1.5827676057815552, "learning_rate": 0.00019999646646586287, "loss": 1.611, "step": 145 }, { "epoch": 0.005774783445620789, "grad_norm": 1.1693483591079712, "learning_rate": 0.00019999620768426763, "loss": 1.3961, "step": 150 }, { "epoch": 0.005967276227141482, "grad_norm": 1.4277936220169067, "learning_rate": 0.00019999593975868164, "loss": 1.638, "step": 155 }, { "epoch": 0.006159769008662175, "grad_norm": 1.2951083183288574, "learning_rate": 0.00019999566268912933, "loss": 1.6187, "step": 160 }, { "epoch": 0.0063522617901828685, "grad_norm": 2.4885995388031006, "learning_rate": 0.0001999953764756361, "loss": 1.5669, "step": 165 }, { "epoch": 0.006544754571703561, "grad_norm": 1.3352105617523193, "learning_rate": 0.00019999508111822811, "loss": 1.3157, "step": 170 }, { "epoch": 0.006737247353224254, "grad_norm": 1.2560889720916748, "learning_rate": 0.00019999477661693233, "loss": 1.7011, "step": 175 }, { "epoch": 0.006929740134744947, "grad_norm": 2.4167582988739014, "learning_rate": 0.00019999446297177666, "loss": 1.4827, "step": 180 }, { "epoch": 0.00712223291626564, "grad_norm": 1.0598788261413574, "learning_rate": 0.00019999414018278974, "loss": 1.5718, "step": 185 }, { "epoch": 0.007314725697786333, "grad_norm": 1.5576567649841309, "learning_rate": 0.00019999380825000111, "loss": 1.7717, "step": 190 }, { "epoch": 0.007507218479307026, "grad_norm": 1.005711317062378, "learning_rate": 0.0001999934671734411, "loss": 1.5085, "step": 195 }, { "epoch": 0.007699711260827719, "grad_norm": 1.7211413383483887, "learning_rate": 0.00019999311695314095, "loss": 1.623, "step": 200 }, { "epoch": 0.007892204042348411, "grad_norm": 1.5765767097473145, "learning_rate": 0.00019999275758913261, "loss": 1.5982, "step": 205 }, { "epoch": 0.008084696823869105, "grad_norm": 1.0989298820495605, "learning_rate": 0.00019999238908144896, "loss": 1.3306, "step": 210 }, { "epoch": 0.008277189605389798, "grad_norm": 1.0234464406967163, "learning_rate": 0.0001999920114301238, "loss": 1.5856, "step": 215 }, { "epoch": 0.00846968238691049, "grad_norm": 1.6681355237960815, "learning_rate": 0.0001999916246351915, "loss": 1.4777, "step": 220 }, { "epoch": 0.008662175168431183, "grad_norm": 0.9723508358001709, "learning_rate": 0.00019999122869668754, "loss": 1.5357, "step": 225 }, { "epoch": 0.008854667949951878, "grad_norm": 0.8840959072113037, "learning_rate": 0.0001999908236146481, "loss": 1.5296, "step": 230 }, { "epoch": 0.00904716073147257, "grad_norm": 0.9913238883018494, "learning_rate": 0.0001999904093891102, "loss": 1.5846, "step": 235 }, { "epoch": 0.009239653512993263, "grad_norm": 1.129952073097229, "learning_rate": 0.00019998998602011178, "loss": 1.4455, "step": 240 }, { "epoch": 0.009432146294513956, "grad_norm": 1.0377521514892578, "learning_rate": 0.00019998955350769148, "loss": 1.4212, "step": 245 }, { "epoch": 0.009624639076034648, "grad_norm": 2.2103137969970703, "learning_rate": 0.00019998911185188886, "loss": 1.5812, "step": 250 }, { "epoch": 0.009817131857555342, "grad_norm": 0.8716953992843628, "learning_rate": 0.00019998866105274437, "loss": 1.5326, "step": 255 }, { "epoch": 0.010009624639076035, "grad_norm": 1.1956042051315308, "learning_rate": 0.00019998820111029916, "loss": 1.7183, "step": 260 }, { "epoch": 0.010202117420596728, "grad_norm": 2.747600555419922, "learning_rate": 0.00019998773202459534, "loss": 1.7952, "step": 265 }, { "epoch": 0.01039461020211742, "grad_norm": 1.3412338495254517, "learning_rate": 0.00019998725379567577, "loss": 1.3538, "step": 270 }, { "epoch": 0.010587102983638113, "grad_norm": 1.651822805404663, "learning_rate": 0.00019998676642358422, "loss": 1.5458, "step": 275 }, { "epoch": 0.010779595765158807, "grad_norm": 1.3036198616027832, "learning_rate": 0.00019998626990836522, "loss": 1.7305, "step": 280 }, { "epoch": 0.0109720885466795, "grad_norm": 0.8263657093048096, "learning_rate": 0.00019998576425006416, "loss": 1.3767, "step": 285 }, { "epoch": 0.011164581328200193, "grad_norm": 2.022136926651001, "learning_rate": 0.00019998524944872737, "loss": 1.5823, "step": 290 }, { "epoch": 0.011357074109720885, "grad_norm": 1.1224019527435303, "learning_rate": 0.00019998472550440178, "loss": 1.5723, "step": 295 }, { "epoch": 0.011549566891241578, "grad_norm": 1.375664234161377, "learning_rate": 0.00019998419241713542, "loss": 1.5224, "step": 300 }, { "epoch": 0.011742059672762272, "grad_norm": 1.2721813917160034, "learning_rate": 0.000199983650186977, "loss": 1.7217, "step": 305 }, { "epoch": 0.011934552454282965, "grad_norm": 1.4723321199417114, "learning_rate": 0.0001999830988139761, "loss": 1.4666, "step": 310 }, { "epoch": 0.012127045235803657, "grad_norm": 0.695198118686676, "learning_rate": 0.00019998253829818315, "loss": 1.2672, "step": 315 }, { "epoch": 0.01231953801732435, "grad_norm": 1.716638207435608, "learning_rate": 0.00019998196863964937, "loss": 1.3461, "step": 320 }, { "epoch": 0.012512030798845043, "grad_norm": 1.1060154438018799, "learning_rate": 0.0001999813898384269, "loss": 1.3816, "step": 325 }, { "epoch": 0.012704523580365737, "grad_norm": 1.6124354600906372, "learning_rate": 0.00019998080189456862, "loss": 1.5232, "step": 330 }, { "epoch": 0.01289701636188643, "grad_norm": 1.5060306787490845, "learning_rate": 0.00019998020480812832, "loss": 1.5767, "step": 335 }, { "epoch": 0.013089509143407122, "grad_norm": 1.1920175552368164, "learning_rate": 0.00019997959857916063, "loss": 1.6112, "step": 340 }, { "epoch": 0.013282001924927815, "grad_norm": 1.1669896841049194, "learning_rate": 0.00019997898320772096, "loss": 1.3679, "step": 345 }, { "epoch": 0.013474494706448507, "grad_norm": 1.1692086458206177, "learning_rate": 0.00019997835869386553, "loss": 1.4147, "step": 350 }, { "epoch": 0.013666987487969202, "grad_norm": 2.0466034412384033, "learning_rate": 0.00019997772503765153, "loss": 1.5261, "step": 355 }, { "epoch": 0.013859480269489894, "grad_norm": 1.1581529378890991, "learning_rate": 0.00019997708223913686, "loss": 1.5441, "step": 360 }, { "epoch": 0.014051973051010587, "grad_norm": 1.4370143413543701, "learning_rate": 0.0001999764302983803, "loss": 1.651, "step": 365 }, { "epoch": 0.01424446583253128, "grad_norm": 0.998635470867157, "learning_rate": 0.00019997576921544147, "loss": 1.4311, "step": 370 }, { "epoch": 0.014436958614051972, "grad_norm": 1.2625153064727783, "learning_rate": 0.00019997509899038086, "loss": 1.4634, "step": 375 }, { "epoch": 0.014629451395572667, "grad_norm": 1.171949863433838, "learning_rate": 0.00019997441962325968, "loss": 1.2474, "step": 380 }, { "epoch": 0.01482194417709336, "grad_norm": 1.4312052726745605, "learning_rate": 0.00019997373111414009, "loss": 1.4814, "step": 385 }, { "epoch": 0.015014436958614052, "grad_norm": 1.1508846282958984, "learning_rate": 0.00019997303346308508, "loss": 1.6291, "step": 390 }, { "epoch": 0.015206929740134744, "grad_norm": 1.2096014022827148, "learning_rate": 0.0001999723266701584, "loss": 1.5507, "step": 395 }, { "epoch": 0.015399422521655439, "grad_norm": 0.996391773223877, "learning_rate": 0.00019997161073542473, "loss": 1.6402, "step": 400 }, { "epoch": 0.015591915303176131, "grad_norm": 1.6977828741073608, "learning_rate": 0.00019997088565894947, "loss": 1.5706, "step": 405 }, { "epoch": 0.015784408084696822, "grad_norm": 1.4707343578338623, "learning_rate": 0.000199970151440799, "loss": 1.6348, "step": 410 }, { "epoch": 0.015976900866217517, "grad_norm": 1.5461647510528564, "learning_rate": 0.0001999694080810404, "loss": 1.4836, "step": 415 }, { "epoch": 0.01616939364773821, "grad_norm": 1.6253695487976074, "learning_rate": 0.00019996865557974166, "loss": 1.5834, "step": 420 }, { "epoch": 0.016361886429258902, "grad_norm": 1.671321988105774, "learning_rate": 0.00019996789393697165, "loss": 1.3816, "step": 425 }, { "epoch": 0.016554379210779596, "grad_norm": 0.9412807822227478, "learning_rate": 0.00019996712315279992, "loss": 1.443, "step": 430 }, { "epoch": 0.016746871992300287, "grad_norm": 0.8705793023109436, "learning_rate": 0.000199966343227297, "loss": 1.4938, "step": 435 }, { "epoch": 0.01693936477382098, "grad_norm": 1.6019854545593262, "learning_rate": 0.00019996555416053422, "loss": 1.3622, "step": 440 }, { "epoch": 0.017131857555341676, "grad_norm": 1.0340136289596558, "learning_rate": 0.00019996475595258372, "loss": 1.5803, "step": 445 }, { "epoch": 0.017324350336862367, "grad_norm": 1.4469108581542969, "learning_rate": 0.0001999639486035185, "loss": 1.6074, "step": 450 }, { "epoch": 0.01751684311838306, "grad_norm": 1.3311457633972168, "learning_rate": 0.00019996313211341238, "loss": 1.5337, "step": 455 }, { "epoch": 0.017709335899903755, "grad_norm": 0.9691542387008667, "learning_rate": 0.00019996230648234003, "loss": 1.3835, "step": 460 }, { "epoch": 0.017901828681424446, "grad_norm": 1.0229564905166626, "learning_rate": 0.00019996147171037691, "loss": 1.4925, "step": 465 }, { "epoch": 0.01809432146294514, "grad_norm": 1.0120052099227905, "learning_rate": 0.00019996062779759942, "loss": 1.4781, "step": 470 }, { "epoch": 0.01828681424446583, "grad_norm": 0.8471246361732483, "learning_rate": 0.00019995977474408468, "loss": 1.4961, "step": 475 }, { "epoch": 0.018479307025986526, "grad_norm": 2.020277261734009, "learning_rate": 0.00019995891254991072, "loss": 1.5299, "step": 480 }, { "epoch": 0.01867179980750722, "grad_norm": 1.2169212102890015, "learning_rate": 0.00019995804121515637, "loss": 1.4626, "step": 485 }, { "epoch": 0.01886429258902791, "grad_norm": 2.31048321723938, "learning_rate": 0.00019995716073990133, "loss": 1.3653, "step": 490 }, { "epoch": 0.019056785370548605, "grad_norm": 1.8170429468154907, "learning_rate": 0.0001999562711242261, "loss": 1.3537, "step": 495 }, { "epoch": 0.019249278152069296, "grad_norm": 1.1187188625335693, "learning_rate": 0.00019995537236821198, "loss": 1.6358, "step": 500 }, { "epoch": 0.01944177093358999, "grad_norm": 1.2112963199615479, "learning_rate": 0.0001999544644719412, "loss": 1.4565, "step": 505 }, { "epoch": 0.019634263715110685, "grad_norm": 1.3345009088516235, "learning_rate": 0.0001999535474354968, "loss": 1.647, "step": 510 }, { "epoch": 0.019826756496631376, "grad_norm": 1.3109021186828613, "learning_rate": 0.00019995262125896266, "loss": 1.5462, "step": 515 }, { "epoch": 0.02001924927815207, "grad_norm": 1.1681957244873047, "learning_rate": 0.00019995168594242338, "loss": 1.5292, "step": 520 }, { "epoch": 0.02021174205967276, "grad_norm": 0.9509350657463074, "learning_rate": 0.00019995074148596457, "loss": 1.5566, "step": 525 }, { "epoch": 0.020404234841193455, "grad_norm": 0.6594029664993286, "learning_rate": 0.00019994978788967255, "loss": 1.3693, "step": 530 }, { "epoch": 0.02059672762271415, "grad_norm": 0.8029458522796631, "learning_rate": 0.00019994882515363452, "loss": 1.4664, "step": 535 }, { "epoch": 0.02078922040423484, "grad_norm": 1.1551908254623413, "learning_rate": 0.00019994785327793856, "loss": 1.5342, "step": 540 }, { "epoch": 0.020981713185755535, "grad_norm": 1.3600980043411255, "learning_rate": 0.0001999468722626735, "loss": 1.5262, "step": 545 }, { "epoch": 0.021174205967276226, "grad_norm": 1.0333319902420044, "learning_rate": 0.00019994588210792906, "loss": 1.5079, "step": 550 }, { "epoch": 0.02136669874879692, "grad_norm": 1.2757694721221924, "learning_rate": 0.00019994488281379578, "loss": 1.7721, "step": 555 }, { "epoch": 0.021559191530317615, "grad_norm": 1.1292661428451538, "learning_rate": 0.00019994387438036505, "loss": 1.5077, "step": 560 }, { "epoch": 0.021751684311838305, "grad_norm": 1.105522871017456, "learning_rate": 0.00019994285680772906, "loss": 1.6468, "step": 565 }, { "epoch": 0.021944177093359, "grad_norm": 1.6378583908081055, "learning_rate": 0.00019994183009598086, "loss": 1.5432, "step": 570 }, { "epoch": 0.02213666987487969, "grad_norm": 0.931384801864624, "learning_rate": 0.0001999407942452144, "loss": 1.3818, "step": 575 }, { "epoch": 0.022329162656400385, "grad_norm": 1.0986119508743286, "learning_rate": 0.0001999397492555243, "loss": 1.552, "step": 580 }, { "epoch": 0.02252165543792108, "grad_norm": 1.121957540512085, "learning_rate": 0.00019993869512700623, "loss": 1.5241, "step": 585 }, { "epoch": 0.02271414821944177, "grad_norm": 1.2508270740509033, "learning_rate": 0.00019993763185975646, "loss": 1.6431, "step": 590 }, { "epoch": 0.022906641000962465, "grad_norm": 1.293603777885437, "learning_rate": 0.00019993655945387234, "loss": 1.3788, "step": 595 }, { "epoch": 0.023099133782483156, "grad_norm": 1.3218696117401123, "learning_rate": 0.00019993547790945183, "loss": 1.398, "step": 600 }, { "epoch": 0.02329162656400385, "grad_norm": 0.8816308975219727, "learning_rate": 0.0001999343872265939, "loss": 1.4239, "step": 605 }, { "epoch": 0.023484119345524544, "grad_norm": 1.9127452373504639, "learning_rate": 0.00019993328740539824, "loss": 1.549, "step": 610 }, { "epoch": 0.023676612127045235, "grad_norm": 2.071992874145508, "learning_rate": 0.0001999321784459655, "loss": 1.6769, "step": 615 }, { "epoch": 0.02386910490856593, "grad_norm": 1.335153579711914, "learning_rate": 0.000199931060348397, "loss": 1.6157, "step": 620 }, { "epoch": 0.02406159769008662, "grad_norm": 1.1237496137619019, "learning_rate": 0.000199929933112795, "loss": 1.4733, "step": 625 }, { "epoch": 0.024254090471607315, "grad_norm": 1.2557927370071411, "learning_rate": 0.00019992879673926258, "loss": 1.3888, "step": 630 }, { "epoch": 0.02444658325312801, "grad_norm": 1.0877735614776611, "learning_rate": 0.00019992765122790371, "loss": 1.4241, "step": 635 }, { "epoch": 0.0246390760346487, "grad_norm": 1.0029325485229492, "learning_rate": 0.00019992649657882307, "loss": 1.6504, "step": 640 }, { "epoch": 0.024831568816169394, "grad_norm": 1.5832372903823853, "learning_rate": 0.00019992533279212626, "loss": 1.4662, "step": 645 }, { "epoch": 0.025024061597690085, "grad_norm": 1.1658433675765991, "learning_rate": 0.00019992415986791974, "loss": 1.3723, "step": 650 }, { "epoch": 0.02521655437921078, "grad_norm": 1.8895657062530518, "learning_rate": 0.00019992297780631072, "loss": 1.457, "step": 655 }, { "epoch": 0.025409047160731474, "grad_norm": 1.193961501121521, "learning_rate": 0.00019992178660740732, "loss": 1.623, "step": 660 }, { "epoch": 0.025601539942252165, "grad_norm": 0.9851275086402893, "learning_rate": 0.00019992058627131844, "loss": 1.6884, "step": 665 }, { "epoch": 0.02579403272377286, "grad_norm": 1.5353829860687256, "learning_rate": 0.00019991937679815386, "loss": 1.3246, "step": 670 }, { "epoch": 0.02598652550529355, "grad_norm": 1.2476325035095215, "learning_rate": 0.0001999181581880242, "loss": 1.596, "step": 675 }, { "epoch": 0.026179018286814244, "grad_norm": 1.1163430213928223, "learning_rate": 0.00019991693044104083, "loss": 1.5077, "step": 680 }, { "epoch": 0.02637151106833494, "grad_norm": 1.1388076543807983, "learning_rate": 0.0001999156935573161, "loss": 1.4827, "step": 685 }, { "epoch": 0.02656400384985563, "grad_norm": 0.9100907444953918, "learning_rate": 0.00019991444753696304, "loss": 1.3429, "step": 690 }, { "epoch": 0.026756496631376324, "grad_norm": 2.032510995864868, "learning_rate": 0.00019991319238009565, "loss": 1.5473, "step": 695 }, { "epoch": 0.026948989412897015, "grad_norm": 1.0866800546646118, "learning_rate": 0.00019991192808682868, "loss": 1.5552, "step": 700 }, { "epoch": 0.02714148219441771, "grad_norm": 1.3941971063613892, "learning_rate": 0.00019991065465727774, "loss": 1.4103, "step": 705 }, { "epoch": 0.027333974975938403, "grad_norm": 1.721247911453247, "learning_rate": 0.0001999093720915593, "loss": 1.4965, "step": 710 }, { "epoch": 0.027526467757459094, "grad_norm": 1.4090749025344849, "learning_rate": 0.00019990808038979058, "loss": 1.3159, "step": 715 }, { "epoch": 0.02771896053897979, "grad_norm": 1.731886625289917, "learning_rate": 0.00019990677955208973, "loss": 1.4392, "step": 720 }, { "epoch": 0.02791145332050048, "grad_norm": 1.9695488214492798, "learning_rate": 0.00019990546957857576, "loss": 1.6206, "step": 725 }, { "epoch": 0.028103946102021174, "grad_norm": 0.7977893352508545, "learning_rate": 0.0001999041504693684, "loss": 1.5764, "step": 730 }, { "epoch": 0.02829643888354187, "grad_norm": 0.9448668360710144, "learning_rate": 0.00019990282222458826, "loss": 1.3149, "step": 735 }, { "epoch": 0.02848893166506256, "grad_norm": 1.0612679719924927, "learning_rate": 0.00019990148484435682, "loss": 1.4942, "step": 740 }, { "epoch": 0.028681424446583254, "grad_norm": 1.4038052558898926, "learning_rate": 0.0001999001383287964, "loss": 1.5184, "step": 745 }, { "epoch": 0.028873917228103944, "grad_norm": 1.0545177459716797, "learning_rate": 0.0001998987826780301, "loss": 1.5617, "step": 750 }, { "epoch": 0.02906641000962464, "grad_norm": 2.392878532409668, "learning_rate": 0.0001998974178921819, "loss": 1.3638, "step": 755 }, { "epoch": 0.029258902791145333, "grad_norm": 1.1004624366760254, "learning_rate": 0.0001998960439713766, "loss": 1.5162, "step": 760 }, { "epoch": 0.029451395572666024, "grad_norm": 1.2530279159545898, "learning_rate": 0.0001998946609157398, "loss": 1.5422, "step": 765 }, { "epoch": 0.02964388835418672, "grad_norm": 0.8240470290184021, "learning_rate": 0.00019989326872539803, "loss": 1.3828, "step": 770 }, { "epoch": 0.029836381135707413, "grad_norm": 0.9734111428260803, "learning_rate": 0.00019989186740047857, "loss": 1.7041, "step": 775 }, { "epoch": 0.030028873917228104, "grad_norm": 0.9785217642784119, "learning_rate": 0.00019989045694110953, "loss": 1.6267, "step": 780 }, { "epoch": 0.030221366698748798, "grad_norm": 1.3278164863586426, "learning_rate": 0.00019988903734741994, "loss": 1.5041, "step": 785 }, { "epoch": 0.03041385948026949, "grad_norm": 1.9143437147140503, "learning_rate": 0.00019988760861953958, "loss": 1.4728, "step": 790 }, { "epoch": 0.030606352261790183, "grad_norm": 1.5717315673828125, "learning_rate": 0.0001998861707575991, "loss": 1.3824, "step": 795 }, { "epoch": 0.030798845043310877, "grad_norm": 1.0486010313034058, "learning_rate": 0.00019988472376173, "loss": 1.6186, "step": 800 }, { "epoch": 0.03099133782483157, "grad_norm": 1.1566083431243896, "learning_rate": 0.00019988326763206458, "loss": 1.3773, "step": 805 }, { "epoch": 0.031183830606352263, "grad_norm": 1.6336543560028076, "learning_rate": 0.00019988180236873602, "loss": 1.2998, "step": 810 }, { "epoch": 0.031376323387872954, "grad_norm": 1.4655206203460693, "learning_rate": 0.00019988032797187824, "loss": 1.3966, "step": 815 }, { "epoch": 0.031568816169393644, "grad_norm": 2.0325050354003906, "learning_rate": 0.00019987884444162618, "loss": 1.3464, "step": 820 }, { "epoch": 0.03176130895091434, "grad_norm": 1.254342794418335, "learning_rate": 0.0001998773517781154, "loss": 1.5236, "step": 825 }, { "epoch": 0.03195380173243503, "grad_norm": 0.8909908533096313, "learning_rate": 0.00019987584998148244, "loss": 1.4838, "step": 830 }, { "epoch": 0.032146294513955724, "grad_norm": 1.1440258026123047, "learning_rate": 0.00019987433905186458, "loss": 1.3952, "step": 835 }, { "epoch": 0.03233878729547642, "grad_norm": 1.2138668298721313, "learning_rate": 0.00019987281898940003, "loss": 1.5982, "step": 840 }, { "epoch": 0.03253128007699711, "grad_norm": 1.1847470998764038, "learning_rate": 0.00019987128979422782, "loss": 1.4313, "step": 845 }, { "epoch": 0.032723772858517804, "grad_norm": 1.4961762428283691, "learning_rate": 0.0001998697514664877, "loss": 1.5187, "step": 850 }, { "epoch": 0.0329162656400385, "grad_norm": 1.4735344648361206, "learning_rate": 0.00019986820400632043, "loss": 1.5443, "step": 855 }, { "epoch": 0.03310875842155919, "grad_norm": 1.1350771188735962, "learning_rate": 0.00019986664741386743, "loss": 1.5219, "step": 860 }, { "epoch": 0.03330125120307988, "grad_norm": 1.098781943321228, "learning_rate": 0.0001998650816892711, "loss": 1.6074, "step": 865 }, { "epoch": 0.033493743984600574, "grad_norm": 1.9639078378677368, "learning_rate": 0.0001998635068326746, "loss": 1.342, "step": 870 }, { "epoch": 0.03368623676612127, "grad_norm": 1.1193336248397827, "learning_rate": 0.00019986192284422193, "loss": 1.5647, "step": 875 }, { "epoch": 0.03387872954764196, "grad_norm": 1.0558106899261475, "learning_rate": 0.00019986032972405793, "loss": 1.2448, "step": 880 }, { "epoch": 0.034071222329162654, "grad_norm": 1.1178051233291626, "learning_rate": 0.0001998587274723283, "loss": 1.3455, "step": 885 }, { "epoch": 0.03426371511068335, "grad_norm": 1.728400468826294, "learning_rate": 0.0001998571160891795, "loss": 1.44, "step": 890 }, { "epoch": 0.03445620789220404, "grad_norm": 1.158931016921997, "learning_rate": 0.000199855495574759, "loss": 1.4247, "step": 895 }, { "epoch": 0.03464870067372473, "grad_norm": 1.8745627403259277, "learning_rate": 0.0001998538659292149, "loss": 1.4036, "step": 900 }, { "epoch": 0.03484119345524543, "grad_norm": 1.4273000955581665, "learning_rate": 0.0001998522271526962, "loss": 1.4857, "step": 905 }, { "epoch": 0.03503368623676612, "grad_norm": 1.1671931743621826, "learning_rate": 0.0001998505792453528, "loss": 1.7199, "step": 910 }, { "epoch": 0.03522617901828681, "grad_norm": 1.1703475713729858, "learning_rate": 0.00019984892220733537, "loss": 1.5659, "step": 915 }, { "epoch": 0.03541867179980751, "grad_norm": 0.8550274968147278, "learning_rate": 0.00019984725603879546, "loss": 1.3608, "step": 920 }, { "epoch": 0.0356111645813282, "grad_norm": 1.676072359085083, "learning_rate": 0.0001998455807398854, "loss": 1.4841, "step": 925 }, { "epoch": 0.03580365736284889, "grad_norm": 1.362423062324524, "learning_rate": 0.00019984389631075842, "loss": 1.5501, "step": 930 }, { "epoch": 0.03599615014436958, "grad_norm": 1.1643259525299072, "learning_rate": 0.0001998422027515685, "loss": 1.4954, "step": 935 }, { "epoch": 0.03618864292589028, "grad_norm": 1.4984415769577026, "learning_rate": 0.00019984050006247053, "loss": 1.337, "step": 940 }, { "epoch": 0.03638113570741097, "grad_norm": 1.399708867073059, "learning_rate": 0.00019983878824362023, "loss": 1.5546, "step": 945 }, { "epoch": 0.03657362848893166, "grad_norm": 1.8458516597747803, "learning_rate": 0.00019983706729517412, "loss": 1.5268, "step": 950 }, { "epoch": 0.03676612127045236, "grad_norm": 1.1428085565567017, "learning_rate": 0.00019983533721728956, "loss": 1.4454, "step": 955 }, { "epoch": 0.03695861405197305, "grad_norm": 1.2200374603271484, "learning_rate": 0.00019983359801012475, "loss": 1.5586, "step": 960 }, { "epoch": 0.03715110683349374, "grad_norm": 1.3679723739624023, "learning_rate": 0.00019983184967383875, "loss": 1.3948, "step": 965 }, { "epoch": 0.03734359961501444, "grad_norm": 1.489397644996643, "learning_rate": 0.00019983009220859142, "loss": 1.5154, "step": 970 }, { "epoch": 0.03753609239653513, "grad_norm": 1.0442456007003784, "learning_rate": 0.00019982832561454345, "loss": 1.5704, "step": 975 }, { "epoch": 0.03772858517805582, "grad_norm": 1.7480882406234741, "learning_rate": 0.00019982654989185642, "loss": 1.5235, "step": 980 }, { "epoch": 0.03792107795957651, "grad_norm": 1.0078760385513306, "learning_rate": 0.00019982476504069272, "loss": 1.3936, "step": 985 }, { "epoch": 0.03811357074109721, "grad_norm": 1.0461446046829224, "learning_rate": 0.0001998229710612155, "loss": 1.6994, "step": 990 }, { "epoch": 0.0383060635226179, "grad_norm": 2.1919922828674316, "learning_rate": 0.00019982116795358885, "loss": 1.5739, "step": 995 }, { "epoch": 0.03849855630413859, "grad_norm": 1.7092692852020264, "learning_rate": 0.00019981935571797768, "loss": 1.2746, "step": 1000 }, { "epoch": 0.03869104908565929, "grad_norm": 1.3044835329055786, "learning_rate": 0.00019981753435454764, "loss": 1.5254, "step": 1005 }, { "epoch": 0.03888354186717998, "grad_norm": 1.1550064086914062, "learning_rate": 0.0001998157038634653, "loss": 1.6154, "step": 1010 }, { "epoch": 0.03907603464870067, "grad_norm": 2.0250370502471924, "learning_rate": 0.00019981386424489808, "loss": 1.4807, "step": 1015 }, { "epoch": 0.03926852743022137, "grad_norm": 1.036095380783081, "learning_rate": 0.00019981201549901419, "loss": 1.4124, "step": 1020 }, { "epoch": 0.03946102021174206, "grad_norm": 1.126434564590454, "learning_rate": 0.0001998101576259827, "loss": 1.4959, "step": 1025 }, { "epoch": 0.03965351299326275, "grad_norm": 1.2912375926971436, "learning_rate": 0.00019980829062597342, "loss": 1.5006, "step": 1030 }, { "epoch": 0.03984600577478344, "grad_norm": 1.5378974676132202, "learning_rate": 0.00019980641449915713, "loss": 1.3073, "step": 1035 }, { "epoch": 0.04003849855630414, "grad_norm": 1.52741277217865, "learning_rate": 0.0001998045292457054, "loss": 1.3709, "step": 1040 }, { "epoch": 0.04023099133782483, "grad_norm": 1.6989667415618896, "learning_rate": 0.00019980263486579064, "loss": 1.4784, "step": 1045 }, { "epoch": 0.04042348411934552, "grad_norm": 1.0623974800109863, "learning_rate": 0.00019980073135958607, "loss": 1.5163, "step": 1050 }, { "epoch": 0.04061597690086622, "grad_norm": 1.323283314704895, "learning_rate": 0.0001997988187272657, "loss": 1.4793, "step": 1055 }, { "epoch": 0.04080846968238691, "grad_norm": 1.4508922100067139, "learning_rate": 0.00019979689696900447, "loss": 1.4746, "step": 1060 }, { "epoch": 0.0410009624639076, "grad_norm": 1.159579873085022, "learning_rate": 0.0001997949660849781, "loss": 1.2928, "step": 1065 }, { "epoch": 0.0411934552454283, "grad_norm": 1.5187591314315796, "learning_rate": 0.0001997930260753632, "loss": 1.5116, "step": 1070 }, { "epoch": 0.04138594802694899, "grad_norm": 1.7137175798416138, "learning_rate": 0.0001997910769403371, "loss": 1.6406, "step": 1075 }, { "epoch": 0.04157844080846968, "grad_norm": 1.221326470375061, "learning_rate": 0.00019978911868007807, "loss": 1.418, "step": 1080 }, { "epoch": 0.04177093358999037, "grad_norm": 1.0666981935501099, "learning_rate": 0.0001997871512947652, "loss": 1.3768, "step": 1085 }, { "epoch": 0.04196342637151107, "grad_norm": 0.9577809572219849, "learning_rate": 0.00019978517478457834, "loss": 1.4915, "step": 1090 }, { "epoch": 0.04215591915303176, "grad_norm": 2.3966264724731445, "learning_rate": 0.00019978318914969827, "loss": 1.7057, "step": 1095 }, { "epoch": 0.04234841193455245, "grad_norm": 1.0523775815963745, "learning_rate": 0.0001997811943903066, "loss": 1.3887, "step": 1100 }, { "epoch": 0.04254090471607315, "grad_norm": 1.3975977897644043, "learning_rate": 0.00019977919050658566, "loss": 1.5335, "step": 1105 }, { "epoch": 0.04273339749759384, "grad_norm": 1.5198701620101929, "learning_rate": 0.0001997771774987187, "loss": 1.3939, "step": 1110 }, { "epoch": 0.04292589027911453, "grad_norm": 0.7943345308303833, "learning_rate": 0.00019977515536688984, "loss": 1.5908, "step": 1115 }, { "epoch": 0.04311838306063523, "grad_norm": 0.9602519869804382, "learning_rate": 0.00019977312411128398, "loss": 1.3225, "step": 1120 }, { "epoch": 0.04331087584215592, "grad_norm": 1.0204732418060303, "learning_rate": 0.00019977108373208687, "loss": 1.518, "step": 1125 }, { "epoch": 0.04350336862367661, "grad_norm": 1.2130141258239746, "learning_rate": 0.00019976903422948503, "loss": 1.3693, "step": 1130 }, { "epoch": 0.0436958614051973, "grad_norm": 0.854958176612854, "learning_rate": 0.00019976697560366598, "loss": 1.4907, "step": 1135 }, { "epoch": 0.043888354186718, "grad_norm": 1.3699367046356201, "learning_rate": 0.00019976490785481789, "loss": 1.4448, "step": 1140 }, { "epoch": 0.04408084696823869, "grad_norm": 1.1766821146011353, "learning_rate": 0.00019976283098312983, "loss": 1.5171, "step": 1145 }, { "epoch": 0.04427333974975938, "grad_norm": 1.6543035507202148, "learning_rate": 0.00019976074498879174, "loss": 1.2751, "step": 1150 }, { "epoch": 0.04446583253128008, "grad_norm": 1.2228333950042725, "learning_rate": 0.0001997586498719944, "loss": 1.4522, "step": 1155 }, { "epoch": 0.04465832531280077, "grad_norm": 1.2733262777328491, "learning_rate": 0.00019975654563292937, "loss": 1.6292, "step": 1160 }, { "epoch": 0.04485081809432146, "grad_norm": 1.3934366703033447, "learning_rate": 0.00019975443227178904, "loss": 1.433, "step": 1165 }, { "epoch": 0.04504331087584216, "grad_norm": 1.5495753288269043, "learning_rate": 0.00019975230978876672, "loss": 1.5803, "step": 1170 }, { "epoch": 0.04523580365736285, "grad_norm": 1.0099114179611206, "learning_rate": 0.00019975017818405646, "loss": 1.3434, "step": 1175 }, { "epoch": 0.04542829643888354, "grad_norm": 0.9009067416191101, "learning_rate": 0.0001997480374578532, "loss": 1.2312, "step": 1180 }, { "epoch": 0.04562078922040423, "grad_norm": 1.8678425550460815, "learning_rate": 0.00019974588761035266, "loss": 1.6331, "step": 1185 }, { "epoch": 0.04581328200192493, "grad_norm": 0.8258862495422363, "learning_rate": 0.00019974372864175148, "loss": 1.4584, "step": 1190 }, { "epoch": 0.04600577478344562, "grad_norm": 1.44557523727417, "learning_rate": 0.00019974156055224706, "loss": 1.4866, "step": 1195 }, { "epoch": 0.04619826756496631, "grad_norm": 1.7249491214752197, "learning_rate": 0.00019973938334203763, "loss": 1.3704, "step": 1200 }, { "epoch": 0.04639076034648701, "grad_norm": 1.005623698234558, "learning_rate": 0.0001997371970113223, "loss": 1.1993, "step": 1205 }, { "epoch": 0.0465832531280077, "grad_norm": 1.4596670866012573, "learning_rate": 0.00019973500156030105, "loss": 1.4996, "step": 1210 }, { "epoch": 0.04677574590952839, "grad_norm": 1.3085503578186035, "learning_rate": 0.00019973279698917454, "loss": 1.441, "step": 1215 }, { "epoch": 0.04696823869104909, "grad_norm": 0.9477142691612244, "learning_rate": 0.00019973058329814445, "loss": 1.5278, "step": 1220 }, { "epoch": 0.04716073147256978, "grad_norm": 0.9040088653564453, "learning_rate": 0.00019972836048741318, "loss": 1.5374, "step": 1225 }, { "epoch": 0.04735322425409047, "grad_norm": 1.7435801029205322, "learning_rate": 0.00019972612855718395, "loss": 1.3884, "step": 1230 }, { "epoch": 0.04754571703561117, "grad_norm": 1.180665135383606, "learning_rate": 0.00019972388750766088, "loss": 1.2097, "step": 1235 }, { "epoch": 0.04773820981713186, "grad_norm": 1.066064715385437, "learning_rate": 0.00019972163733904895, "loss": 1.4299, "step": 1240 }, { "epoch": 0.04793070259865255, "grad_norm": 1.1051660776138306, "learning_rate": 0.00019971937805155382, "loss": 1.5055, "step": 1245 }, { "epoch": 0.04812319538017324, "grad_norm": 1.2021822929382324, "learning_rate": 0.0001997171096453822, "loss": 1.5842, "step": 1250 }, { "epoch": 0.04831568816169394, "grad_norm": 2.1715807914733887, "learning_rate": 0.00019971483212074146, "loss": 1.4096, "step": 1255 }, { "epoch": 0.04850818094321463, "grad_norm": 1.1615819931030273, "learning_rate": 0.00019971254547783987, "loss": 1.2554, "step": 1260 }, { "epoch": 0.04870067372473532, "grad_norm": 1.5363492965698242, "learning_rate": 0.00019971024971688652, "loss": 1.5773, "step": 1265 }, { "epoch": 0.04889316650625602, "grad_norm": 1.3774447441101074, "learning_rate": 0.00019970794483809137, "loss": 1.3441, "step": 1270 }, { "epoch": 0.04908565928777671, "grad_norm": 2.065901041030884, "learning_rate": 0.00019970563084166515, "loss": 1.6342, "step": 1275 }, { "epoch": 0.0492781520692974, "grad_norm": 1.3221025466918945, "learning_rate": 0.0001997033077278195, "loss": 1.4967, "step": 1280 }, { "epoch": 0.0494706448508181, "grad_norm": 1.6636276245117188, "learning_rate": 0.00019970097549676684, "loss": 1.4936, "step": 1285 }, { "epoch": 0.04966313763233879, "grad_norm": 1.4630615711212158, "learning_rate": 0.0001996986341487204, "loss": 1.4096, "step": 1290 }, { "epoch": 0.04985563041385948, "grad_norm": 1.9586588144302368, "learning_rate": 0.00019969628368389432, "loss": 1.5956, "step": 1295 }, { "epoch": 0.05004812319538017, "grad_norm": 1.0234311819076538, "learning_rate": 0.00019969392410250353, "loss": 1.247, "step": 1300 }, { "epoch": 0.05024061597690087, "grad_norm": 1.7005319595336914, "learning_rate": 0.0001996915554047638, "loss": 1.4179, "step": 1305 }, { "epoch": 0.05043310875842156, "grad_norm": 1.3052936792373657, "learning_rate": 0.0001996891775908917, "loss": 1.4002, "step": 1310 }, { "epoch": 0.05062560153994225, "grad_norm": 1.0146903991699219, "learning_rate": 0.00019968679066110473, "loss": 1.5062, "step": 1315 }, { "epoch": 0.05081809432146295, "grad_norm": 0.9611810445785522, "learning_rate": 0.00019968439461562104, "loss": 1.5303, "step": 1320 }, { "epoch": 0.05101058710298364, "grad_norm": 0.8518236875534058, "learning_rate": 0.0001996819894546599, "loss": 1.3589, "step": 1325 }, { "epoch": 0.05120307988450433, "grad_norm": 1.6918632984161377, "learning_rate": 0.00019967957517844111, "loss": 1.4589, "step": 1330 }, { "epoch": 0.05139557266602503, "grad_norm": 1.4838560819625854, "learning_rate": 0.00019967715178718551, "loss": 1.2714, "step": 1335 }, { "epoch": 0.05158806544754572, "grad_norm": 1.291231632232666, "learning_rate": 0.00019967471928111465, "loss": 1.6378, "step": 1340 }, { "epoch": 0.05178055822906641, "grad_norm": 1.2091941833496094, "learning_rate": 0.00019967227766045102, "loss": 1.3985, "step": 1345 }, { "epoch": 0.0519730510105871, "grad_norm": 1.2294058799743652, "learning_rate": 0.00019966982692541785, "loss": 1.498, "step": 1350 }, { "epoch": 0.0521655437921078, "grad_norm": 1.1644397974014282, "learning_rate": 0.00019966736707623928, "loss": 1.4185, "step": 1355 }, { "epoch": 0.05235803657362849, "grad_norm": 1.7669397592544556, "learning_rate": 0.0001996648981131402, "loss": 1.3564, "step": 1360 }, { "epoch": 0.05255052935514918, "grad_norm": 0.7178487777709961, "learning_rate": 0.00019966242003634644, "loss": 1.2015, "step": 1365 }, { "epoch": 0.05274302213666988, "grad_norm": 0.8149698376655579, "learning_rate": 0.00019965993284608457, "loss": 1.4046, "step": 1370 }, { "epoch": 0.05293551491819057, "grad_norm": 1.3934742212295532, "learning_rate": 0.00019965743654258198, "loss": 1.5289, "step": 1375 }, { "epoch": 0.05312800769971126, "grad_norm": 1.060002326965332, "learning_rate": 0.00019965493112606702, "loss": 1.391, "step": 1380 }, { "epoch": 0.05332050048123196, "grad_norm": 1.1154258251190186, "learning_rate": 0.00019965241659676875, "loss": 1.3004, "step": 1385 }, { "epoch": 0.05351299326275265, "grad_norm": 1.8101186752319336, "learning_rate": 0.00019964989295491713, "loss": 1.4968, "step": 1390 }, { "epoch": 0.05370548604427334, "grad_norm": 1.075211524963379, "learning_rate": 0.00019964736020074294, "loss": 1.5198, "step": 1395 }, { "epoch": 0.05389797882579403, "grad_norm": 2.0130980014801025, "learning_rate": 0.00019964481833447775, "loss": 1.5495, "step": 1400 }, { "epoch": 0.05409047160731473, "grad_norm": 1.214570164680481, "learning_rate": 0.000199642267356354, "loss": 1.5886, "step": 1405 }, { "epoch": 0.05428296438883542, "grad_norm": 1.6430037021636963, "learning_rate": 0.00019963970726660497, "loss": 1.5293, "step": 1410 }, { "epoch": 0.05447545717035611, "grad_norm": 0.94575035572052, "learning_rate": 0.00019963713806546478, "loss": 1.276, "step": 1415 }, { "epoch": 0.05466794995187681, "grad_norm": 1.1988322734832764, "learning_rate": 0.00019963455975316832, "loss": 1.3151, "step": 1420 }, { "epoch": 0.0548604427333975, "grad_norm": 1.2768787145614624, "learning_rate": 0.00019963197232995142, "loss": 1.5559, "step": 1425 }, { "epoch": 0.05505293551491819, "grad_norm": 1.5184259414672852, "learning_rate": 0.0001996293757960506, "loss": 1.2998, "step": 1430 }, { "epoch": 0.055245428296438887, "grad_norm": 6.240184783935547, "learning_rate": 0.0001996267701517034, "loss": 1.4497, "step": 1435 }, { "epoch": 0.05543792107795958, "grad_norm": 1.4356882572174072, "learning_rate": 0.00019962415539714803, "loss": 1.6364, "step": 1440 }, { "epoch": 0.05563041385948027, "grad_norm": 0.9310120940208435, "learning_rate": 0.00019962153153262358, "loss": 1.417, "step": 1445 }, { "epoch": 0.05582290664100096, "grad_norm": 1.2131333351135254, "learning_rate": 0.00019961889855837, "loss": 1.4059, "step": 1450 }, { "epoch": 0.05601539942252166, "grad_norm": 1.2134804725646973, "learning_rate": 0.00019961625647462808, "loss": 1.458, "step": 1455 }, { "epoch": 0.05620789220404235, "grad_norm": 1.5725634098052979, "learning_rate": 0.0001996136052816394, "loss": 1.352, "step": 1460 }, { "epoch": 0.05640038498556304, "grad_norm": 0.9882212281227112, "learning_rate": 0.00019961094497964642, "loss": 1.1665, "step": 1465 }, { "epoch": 0.05659287776708374, "grad_norm": 1.055966854095459, "learning_rate": 0.00019960827556889235, "loss": 1.388, "step": 1470 }, { "epoch": 0.05678537054860443, "grad_norm": 1.0809309482574463, "learning_rate": 0.00019960559704962133, "loss": 1.4287, "step": 1475 }, { "epoch": 0.05697786333012512, "grad_norm": 1.0014935731887817, "learning_rate": 0.00019960290942207828, "loss": 1.5539, "step": 1480 }, { "epoch": 0.057170356111645816, "grad_norm": 1.1717151403427124, "learning_rate": 0.000199600212686509, "loss": 1.3619, "step": 1485 }, { "epoch": 0.05736284889316651, "grad_norm": 1.3981553316116333, "learning_rate": 0.00019959750684316, "loss": 1.3303, "step": 1490 }, { "epoch": 0.0575553416746872, "grad_norm": 0.7471413016319275, "learning_rate": 0.00019959479189227884, "loss": 1.4048, "step": 1495 }, { "epoch": 0.05774783445620789, "grad_norm": 1.1570223569869995, "learning_rate": 0.00019959206783411372, "loss": 1.6713, "step": 1500 }, { "epoch": 0.05794032723772859, "grad_norm": 1.4656585454940796, "learning_rate": 0.00019958933466891366, "loss": 1.3911, "step": 1505 }, { "epoch": 0.05813282001924928, "grad_norm": 1.5338329076766968, "learning_rate": 0.0001995865923969287, "loss": 1.578, "step": 1510 }, { "epoch": 0.05832531280076997, "grad_norm": 0.9481655955314636, "learning_rate": 0.0001995838410184096, "loss": 1.2903, "step": 1515 }, { "epoch": 0.058517805582290666, "grad_norm": 1.4928970336914062, "learning_rate": 0.00019958108053360788, "loss": 1.4139, "step": 1520 }, { "epoch": 0.05871029836381136, "grad_norm": 1.015381932258606, "learning_rate": 0.00019957831094277604, "loss": 1.5427, "step": 1525 }, { "epoch": 0.05890279114533205, "grad_norm": 1.3471331596374512, "learning_rate": 0.0001995755322461673, "loss": 1.3763, "step": 1530 }, { "epoch": 0.059095283926852746, "grad_norm": 2.0942165851593018, "learning_rate": 0.00019957274444403576, "loss": 1.4669, "step": 1535 }, { "epoch": 0.05928777670837344, "grad_norm": 1.4853599071502686, "learning_rate": 0.00019956994753663634, "loss": 1.4259, "step": 1540 }, { "epoch": 0.05948026948989413, "grad_norm": 1.3337596654891968, "learning_rate": 0.0001995671415242248, "loss": 1.4169, "step": 1545 }, { "epoch": 0.059672762271414825, "grad_norm": 1.3816536664962769, "learning_rate": 0.00019956432640705777, "loss": 1.3679, "step": 1550 }, { "epoch": 0.059865255052935516, "grad_norm": 1.1726235151290894, "learning_rate": 0.00019956150218539262, "loss": 1.4076, "step": 1555 }, { "epoch": 0.06005774783445621, "grad_norm": 1.419520378112793, "learning_rate": 0.00019955866885948764, "loss": 1.3621, "step": 1560 }, { "epoch": 0.0602502406159769, "grad_norm": 1.4154486656188965, "learning_rate": 0.0001995558264296019, "loss": 1.4221, "step": 1565 }, { "epoch": 0.060442733397497596, "grad_norm": 1.4721988439559937, "learning_rate": 0.00019955297489599537, "loss": 1.3641, "step": 1570 }, { "epoch": 0.06063522617901829, "grad_norm": 1.1087952852249146, "learning_rate": 0.0001995501142589287, "loss": 1.3734, "step": 1575 }, { "epoch": 0.06082771896053898, "grad_norm": 1.4815518856048584, "learning_rate": 0.00019954724451866357, "loss": 1.4042, "step": 1580 }, { "epoch": 0.061020211742059675, "grad_norm": 1.835754632949829, "learning_rate": 0.00019954436567546236, "loss": 1.2457, "step": 1585 }, { "epoch": 0.061212704523580366, "grad_norm": 1.3139601945877075, "learning_rate": 0.00019954147772958836, "loss": 1.4457, "step": 1590 }, { "epoch": 0.06140519730510106, "grad_norm": 1.155369758605957, "learning_rate": 0.0001995385806813056, "loss": 1.3483, "step": 1595 }, { "epoch": 0.061597690086621755, "grad_norm": 1.1897907257080078, "learning_rate": 0.00019953567453087902, "loss": 1.467, "step": 1600 }, { "epoch": 0.061790182868142446, "grad_norm": 1.0794181823730469, "learning_rate": 0.00019953275927857438, "loss": 1.5171, "step": 1605 }, { "epoch": 0.06198267564966314, "grad_norm": 0.9538444876670837, "learning_rate": 0.00019952983492465824, "loss": 1.2643, "step": 1610 }, { "epoch": 0.06217516843118383, "grad_norm": 1.1179461479187012, "learning_rate": 0.00019952690146939804, "loss": 1.408, "step": 1615 }, { "epoch": 0.062367661212704525, "grad_norm": 1.8034144639968872, "learning_rate": 0.00019952395891306197, "loss": 1.3685, "step": 1620 }, { "epoch": 0.06256015399422522, "grad_norm": 1.04547119140625, "learning_rate": 0.00019952100725591912, "loss": 1.4271, "step": 1625 }, { "epoch": 0.06275264677574591, "grad_norm": 1.3097724914550781, "learning_rate": 0.00019951804649823949, "loss": 1.3303, "step": 1630 }, { "epoch": 0.0629451395572666, "grad_norm": 1.8794469833374023, "learning_rate": 0.00019951507664029374, "loss": 1.5223, "step": 1635 }, { "epoch": 0.06313763233878729, "grad_norm": 1.4077703952789307, "learning_rate": 0.00019951209768235344, "loss": 1.5582, "step": 1640 }, { "epoch": 0.06333012512030799, "grad_norm": 1.2244471311569214, "learning_rate": 0.000199509109624691, "loss": 1.3437, "step": 1645 }, { "epoch": 0.06352261790182868, "grad_norm": 1.4610791206359863, "learning_rate": 0.00019950611246757972, "loss": 1.6944, "step": 1650 }, { "epoch": 0.06371511068334937, "grad_norm": 1.544989824295044, "learning_rate": 0.00019950310621129358, "loss": 1.3288, "step": 1655 }, { "epoch": 0.06390760346487007, "grad_norm": 1.4837945699691772, "learning_rate": 0.00019950009085610755, "loss": 1.1296, "step": 1660 }, { "epoch": 0.06410009624639076, "grad_norm": 2.2527410984039307, "learning_rate": 0.0001994970664022973, "loss": 1.3105, "step": 1665 }, { "epoch": 0.06429258902791145, "grad_norm": 1.3723945617675781, "learning_rate": 0.00019949403285013948, "loss": 1.3976, "step": 1670 }, { "epoch": 0.06448508180943215, "grad_norm": 1.571265459060669, "learning_rate": 0.0001994909901999114, "loss": 1.4603, "step": 1675 }, { "epoch": 0.06467757459095284, "grad_norm": 1.2445194721221924, "learning_rate": 0.00019948793845189137, "loss": 1.3072, "step": 1680 }, { "epoch": 0.06487006737247353, "grad_norm": 2.068112373352051, "learning_rate": 0.00019948487760635842, "loss": 1.4638, "step": 1685 }, { "epoch": 0.06506256015399423, "grad_norm": 1.0896637439727783, "learning_rate": 0.00019948180766359244, "loss": 1.3184, "step": 1690 }, { "epoch": 0.06525505293551492, "grad_norm": 2.0666351318359375, "learning_rate": 0.00019947872862387413, "loss": 1.3944, "step": 1695 }, { "epoch": 0.06544754571703561, "grad_norm": 1.5204085111618042, "learning_rate": 0.00019947564048748508, "loss": 1.3795, "step": 1700 }, { "epoch": 0.0656400384985563, "grad_norm": 0.9768043160438538, "learning_rate": 0.00019947254325470768, "loss": 1.3329, "step": 1705 }, { "epoch": 0.065832531280077, "grad_norm": 1.3453469276428223, "learning_rate": 0.00019946943692582516, "loss": 1.304, "step": 1710 }, { "epoch": 0.06602502406159769, "grad_norm": 1.0725489854812622, "learning_rate": 0.00019946632150112152, "loss": 1.5547, "step": 1715 }, { "epoch": 0.06621751684311838, "grad_norm": 1.5973418951034546, "learning_rate": 0.0001994631969808817, "loss": 1.3263, "step": 1720 }, { "epoch": 0.06641000962463908, "grad_norm": 1.2451751232147217, "learning_rate": 0.0001994600633653914, "loss": 1.4935, "step": 1725 }, { "epoch": 0.06660250240615977, "grad_norm": 1.3474830389022827, "learning_rate": 0.00019945692065493717, "loss": 1.6282, "step": 1730 }, { "epoch": 0.06679499518768046, "grad_norm": 1.7913939952850342, "learning_rate": 0.00019945376884980643, "loss": 1.2935, "step": 1735 }, { "epoch": 0.06698748796920115, "grad_norm": 1.0764446258544922, "learning_rate": 0.00019945060795028728, "loss": 1.6034, "step": 1740 }, { "epoch": 0.06717998075072185, "grad_norm": 1.0572975873947144, "learning_rate": 0.00019944743795666887, "loss": 1.3997, "step": 1745 }, { "epoch": 0.06737247353224254, "grad_norm": 1.3195079565048218, "learning_rate": 0.00019944425886924102, "loss": 1.4838, "step": 1750 }, { "epoch": 0.06756496631376323, "grad_norm": 1.0044989585876465, "learning_rate": 0.00019944107068829448, "loss": 1.388, "step": 1755 }, { "epoch": 0.06775745909528393, "grad_norm": 1.8276032209396362, "learning_rate": 0.0001994378734141207, "loss": 1.447, "step": 1760 }, { "epoch": 0.06794995187680462, "grad_norm": 1.5056366920471191, "learning_rate": 0.00019943466704701218, "loss": 1.5153, "step": 1765 }, { "epoch": 0.06814244465832531, "grad_norm": 1.6947304010391235, "learning_rate": 0.00019943145158726205, "loss": 1.5551, "step": 1770 }, { "epoch": 0.068334937439846, "grad_norm": 0.9702686667442322, "learning_rate": 0.00019942822703516433, "loss": 1.3168, "step": 1775 }, { "epoch": 0.0685274302213667, "grad_norm": 1.6755216121673584, "learning_rate": 0.0001994249933910139, "loss": 1.6223, "step": 1780 }, { "epoch": 0.06871992300288739, "grad_norm": 1.3666303157806396, "learning_rate": 0.00019942175065510643, "loss": 1.5748, "step": 1785 }, { "epoch": 0.06891241578440808, "grad_norm": 1.3785196542739868, "learning_rate": 0.0001994184988277385, "loss": 1.4033, "step": 1790 }, { "epoch": 0.06910490856592878, "grad_norm": 1.081828236579895, "learning_rate": 0.00019941523790920743, "loss": 1.4, "step": 1795 }, { "epoch": 0.06929740134744947, "grad_norm": 1.1024401187896729, "learning_rate": 0.0001994119678998114, "loss": 1.4751, "step": 1800 }, { "epoch": 0.06948989412897016, "grad_norm": 3.584055185317993, "learning_rate": 0.0001994086887998495, "loss": 1.3449, "step": 1805 }, { "epoch": 0.06968238691049086, "grad_norm": 0.9418397545814514, "learning_rate": 0.0001994054006096215, "loss": 1.3217, "step": 1810 }, { "epoch": 0.06987487969201155, "grad_norm": 1.6071193218231201, "learning_rate": 0.00019940210332942813, "loss": 1.3636, "step": 1815 }, { "epoch": 0.07006737247353224, "grad_norm": 2.0080580711364746, "learning_rate": 0.00019939879695957084, "loss": 1.4779, "step": 1820 }, { "epoch": 0.07025986525505294, "grad_norm": 1.169058918952942, "learning_rate": 0.00019939548150035207, "loss": 1.4031, "step": 1825 }, { "epoch": 0.07045235803657363, "grad_norm": 0.9863006472587585, "learning_rate": 0.00019939215695207496, "loss": 1.3832, "step": 1830 }, { "epoch": 0.07064485081809432, "grad_norm": 1.2257460355758667, "learning_rate": 0.00019938882331504347, "loss": 1.4967, "step": 1835 }, { "epoch": 0.07083734359961502, "grad_norm": 1.0062893629074097, "learning_rate": 0.00019938548058956253, "loss": 1.2637, "step": 1840 }, { "epoch": 0.0710298363811357, "grad_norm": 1.4179530143737793, "learning_rate": 0.0001993821287759377, "loss": 1.2961, "step": 1845 }, { "epoch": 0.0712223291626564, "grad_norm": 1.2181779146194458, "learning_rate": 0.00019937876787447557, "loss": 1.4104, "step": 1850 }, { "epoch": 0.07141482194417709, "grad_norm": 1.6110061407089233, "learning_rate": 0.00019937539788548344, "loss": 1.4045, "step": 1855 }, { "epoch": 0.07160731472569778, "grad_norm": 1.2814903259277344, "learning_rate": 0.0001993720188092695, "loss": 1.4194, "step": 1860 }, { "epoch": 0.07179980750721848, "grad_norm": 1.382265329360962, "learning_rate": 0.00019936863064614268, "loss": 1.5848, "step": 1865 }, { "epoch": 0.07199230028873917, "grad_norm": 1.4708553552627563, "learning_rate": 0.00019936523339641286, "loss": 1.6196, "step": 1870 }, { "epoch": 0.07218479307025986, "grad_norm": 1.0691862106323242, "learning_rate": 0.0001993618270603907, "loss": 1.4939, "step": 1875 }, { "epoch": 0.07237728585178056, "grad_norm": 0.9476374387741089, "learning_rate": 0.0001993584116383876, "loss": 1.5043, "step": 1880 }, { "epoch": 0.07256977863330125, "grad_norm": 1.37090003490448, "learning_rate": 0.000199354987130716, "loss": 1.4371, "step": 1885 }, { "epoch": 0.07276227141482194, "grad_norm": 1.2001820802688599, "learning_rate": 0.000199351553537689, "loss": 1.3048, "step": 1890 }, { "epoch": 0.07295476419634264, "grad_norm": 1.1123398542404175, "learning_rate": 0.00019934811085962055, "loss": 1.4398, "step": 1895 }, { "epoch": 0.07314725697786333, "grad_norm": 1.638574242591858, "learning_rate": 0.0001993446590968255, "loss": 1.3563, "step": 1900 }, { "epoch": 0.07333974975938402, "grad_norm": 1.9532630443572998, "learning_rate": 0.00019934119824961948, "loss": 1.3723, "step": 1905 }, { "epoch": 0.07353224254090472, "grad_norm": 1.3247241973876953, "learning_rate": 0.0001993377283183189, "loss": 1.4474, "step": 1910 }, { "epoch": 0.0737247353224254, "grad_norm": 1.203049659729004, "learning_rate": 0.00019933424930324118, "loss": 1.3347, "step": 1915 }, { "epoch": 0.0739172281039461, "grad_norm": 1.8858312368392944, "learning_rate": 0.00019933076120470436, "loss": 1.4754, "step": 1920 }, { "epoch": 0.0741097208854668, "grad_norm": 1.117814540863037, "learning_rate": 0.00019932726402302744, "loss": 1.4828, "step": 1925 }, { "epoch": 0.07430221366698748, "grad_norm": 1.0317554473876953, "learning_rate": 0.00019932375775853021, "loss": 1.5034, "step": 1930 }, { "epoch": 0.07449470644850818, "grad_norm": 2.315903902053833, "learning_rate": 0.00019932024241153332, "loss": 1.4311, "step": 1935 }, { "epoch": 0.07468719923002888, "grad_norm": 1.5780115127563477, "learning_rate": 0.00019931671798235817, "loss": 1.3917, "step": 1940 }, { "epoch": 0.07487969201154956, "grad_norm": 1.3360038995742798, "learning_rate": 0.00019931318447132706, "loss": 1.3634, "step": 1945 }, { "epoch": 0.07507218479307026, "grad_norm": 2.275620937347412, "learning_rate": 0.00019930964187876314, "loss": 1.414, "step": 1950 }, { "epoch": 0.07526467757459095, "grad_norm": 1.7956300973892212, "learning_rate": 0.00019930609020499032, "loss": 1.5117, "step": 1955 }, { "epoch": 0.07545717035611164, "grad_norm": 1.6429657936096191, "learning_rate": 0.0001993025294503334, "loss": 1.4436, "step": 1960 }, { "epoch": 0.07564966313763234, "grad_norm": 1.432246446609497, "learning_rate": 0.000199298959615118, "loss": 1.3952, "step": 1965 }, { "epoch": 0.07584215591915303, "grad_norm": 1.0579869747161865, "learning_rate": 0.00019929538069967051, "loss": 1.4369, "step": 1970 }, { "epoch": 0.07603464870067372, "grad_norm": 1.766543984413147, "learning_rate": 0.00019929179270431824, "loss": 1.5033, "step": 1975 }, { "epoch": 0.07622714148219442, "grad_norm": 1.0774848461151123, "learning_rate": 0.00019928819562938928, "loss": 1.3399, "step": 1980 }, { "epoch": 0.0764196342637151, "grad_norm": 1.0951963663101196, "learning_rate": 0.00019928458947521252, "loss": 1.3656, "step": 1985 }, { "epoch": 0.0766121270452358, "grad_norm": 1.278283953666687, "learning_rate": 0.0001992809742421178, "loss": 1.3467, "step": 1990 }, { "epoch": 0.0768046198267565, "grad_norm": 1.139508605003357, "learning_rate": 0.00019927734993043566, "loss": 1.4316, "step": 1995 }, { "epoch": 0.07699711260827719, "grad_norm": 1.39482581615448, "learning_rate": 0.00019927371654049748, "loss": 1.2032, "step": 2000 }, { "epoch": 0.07718960538979788, "grad_norm": 0.9154567718505859, "learning_rate": 0.0001992700740726356, "loss": 1.5053, "step": 2005 }, { "epoch": 0.07738209817131858, "grad_norm": 1.5105671882629395, "learning_rate": 0.00019926642252718303, "loss": 1.5059, "step": 2010 }, { "epoch": 0.07757459095283926, "grad_norm": 1.4019540548324585, "learning_rate": 0.00019926276190447367, "loss": 1.4051, "step": 2015 }, { "epoch": 0.07776708373435996, "grad_norm": 1.619841456413269, "learning_rate": 0.00019925909220484234, "loss": 1.1784, "step": 2020 }, { "epoch": 0.07795957651588066, "grad_norm": 1.6128195524215698, "learning_rate": 0.0001992554134286245, "loss": 1.4623, "step": 2025 }, { "epoch": 0.07815206929740134, "grad_norm": 1.2766104936599731, "learning_rate": 0.00019925172557615665, "loss": 1.3162, "step": 2030 }, { "epoch": 0.07834456207892204, "grad_norm": 1.2187426090240479, "learning_rate": 0.00019924802864777598, "loss": 1.2874, "step": 2035 }, { "epoch": 0.07853705486044274, "grad_norm": 1.1050268411636353, "learning_rate": 0.00019924432264382055, "loss": 1.433, "step": 2040 }, { "epoch": 0.07872954764196342, "grad_norm": 1.6128287315368652, "learning_rate": 0.00019924060756462925, "loss": 1.4698, "step": 2045 }, { "epoch": 0.07892204042348412, "grad_norm": 1.6588749885559082, "learning_rate": 0.00019923688341054176, "loss": 1.4972, "step": 2050 }, { "epoch": 0.0791145332050048, "grad_norm": 1.135289192199707, "learning_rate": 0.0001992331501818987, "loss": 1.3991, "step": 2055 }, { "epoch": 0.0793070259865255, "grad_norm": 1.757759928703308, "learning_rate": 0.00019922940787904137, "loss": 1.3736, "step": 2060 }, { "epoch": 0.0794995187680462, "grad_norm": 0.9943239092826843, "learning_rate": 0.00019922565650231207, "loss": 1.4476, "step": 2065 }, { "epoch": 0.07969201154956689, "grad_norm": 0.9459586143493652, "learning_rate": 0.00019922189605205379, "loss": 1.3913, "step": 2070 }, { "epoch": 0.07988450433108758, "grad_norm": 1.2325133085250854, "learning_rate": 0.00019921812652861037, "loss": 1.4658, "step": 2075 }, { "epoch": 0.08007699711260828, "grad_norm": 1.2397321462631226, "learning_rate": 0.00019921434793232658, "loss": 1.2552, "step": 2080 }, { "epoch": 0.08026948989412896, "grad_norm": 0.9636020660400391, "learning_rate": 0.0001992105602635479, "loss": 1.3296, "step": 2085 }, { "epoch": 0.08046198267564966, "grad_norm": 0.900841474533081, "learning_rate": 0.00019920676352262067, "loss": 1.2329, "step": 2090 }, { "epoch": 0.08065447545717036, "grad_norm": 1.0425807237625122, "learning_rate": 0.00019920295770989213, "loss": 1.1604, "step": 2095 }, { "epoch": 0.08084696823869104, "grad_norm": 1.1449722051620483, "learning_rate": 0.00019919914282571024, "loss": 1.3233, "step": 2100 }, { "epoch": 0.08103946102021174, "grad_norm": 1.2076728343963623, "learning_rate": 0.00019919531887042387, "loss": 1.3449, "step": 2105 }, { "epoch": 0.08123195380173244, "grad_norm": 0.968323826789856, "learning_rate": 0.00019919148584438272, "loss": 1.4273, "step": 2110 }, { "epoch": 0.08142444658325312, "grad_norm": 1.7322039604187012, "learning_rate": 0.00019918764374793726, "loss": 1.4994, "step": 2115 }, { "epoch": 0.08161693936477382, "grad_norm": 1.4216794967651367, "learning_rate": 0.00019918379258143884, "loss": 1.4071, "step": 2120 }, { "epoch": 0.08180943214629452, "grad_norm": 1.2262970209121704, "learning_rate": 0.00019917993234523963, "loss": 1.3528, "step": 2125 }, { "epoch": 0.0820019249278152, "grad_norm": 1.3137859106063843, "learning_rate": 0.0001991760630396926, "loss": 1.4367, "step": 2130 }, { "epoch": 0.0821944177093359, "grad_norm": 1.364478588104248, "learning_rate": 0.00019917218466515156, "loss": 1.6896, "step": 2135 }, { "epoch": 0.0823869104908566, "grad_norm": 1.2037614583969116, "learning_rate": 0.00019916829722197124, "loss": 1.5371, "step": 2140 }, { "epoch": 0.08257940327237728, "grad_norm": 1.7590453624725342, "learning_rate": 0.00019916440071050706, "loss": 1.6331, "step": 2145 }, { "epoch": 0.08277189605389798, "grad_norm": 1.6112565994262695, "learning_rate": 0.00019916049513111532, "loss": 1.5066, "step": 2150 }, { "epoch": 0.08296438883541868, "grad_norm": 0.937174916267395, "learning_rate": 0.00019915658048415318, "loss": 1.4698, "step": 2155 }, { "epoch": 0.08315688161693936, "grad_norm": 1.8568309545516968, "learning_rate": 0.00019915265676997862, "loss": 1.3197, "step": 2160 }, { "epoch": 0.08334937439846006, "grad_norm": 1.9865350723266602, "learning_rate": 0.00019914872398895043, "loss": 1.4883, "step": 2165 }, { "epoch": 0.08354186717998074, "grad_norm": 1.0227729082107544, "learning_rate": 0.0001991447821414282, "loss": 1.3967, "step": 2170 }, { "epoch": 0.08373435996150144, "grad_norm": 1.3028923273086548, "learning_rate": 0.00019914083122777245, "loss": 1.4296, "step": 2175 }, { "epoch": 0.08392685274302214, "grad_norm": 1.6131690740585327, "learning_rate": 0.00019913687124834442, "loss": 1.2983, "step": 2180 }, { "epoch": 0.08411934552454282, "grad_norm": 1.1791858673095703, "learning_rate": 0.00019913290220350622, "loss": 1.4632, "step": 2185 }, { "epoch": 0.08431183830606352, "grad_norm": 1.8457857370376587, "learning_rate": 0.00019912892409362085, "loss": 1.3623, "step": 2190 }, { "epoch": 0.08450433108758422, "grad_norm": 1.525680422782898, "learning_rate": 0.00019912493691905198, "loss": 1.2729, "step": 2195 }, { "epoch": 0.0846968238691049, "grad_norm": 1.3267451524734497, "learning_rate": 0.0001991209406801643, "loss": 1.3808, "step": 2200 }, { "epoch": 0.0848893166506256, "grad_norm": 1.37312912940979, "learning_rate": 0.00019911693537732323, "loss": 1.6072, "step": 2205 }, { "epoch": 0.0850818094321463, "grad_norm": 1.3433706760406494, "learning_rate": 0.000199112921010895, "loss": 1.4956, "step": 2210 }, { "epoch": 0.08527430221366698, "grad_norm": 1.220732569694519, "learning_rate": 0.00019910889758124672, "loss": 1.4875, "step": 2215 }, { "epoch": 0.08546679499518768, "grad_norm": 0.9385544657707214, "learning_rate": 0.00019910486508874627, "loss": 1.4202, "step": 2220 }, { "epoch": 0.08565928777670838, "grad_norm": 0.8727134466171265, "learning_rate": 0.0001991008235337624, "loss": 1.2268, "step": 2225 }, { "epoch": 0.08585178055822906, "grad_norm": 2.276063919067383, "learning_rate": 0.00019909677291666473, "loss": 1.3911, "step": 2230 }, { "epoch": 0.08604427333974976, "grad_norm": 1.2023353576660156, "learning_rate": 0.00019909271323782364, "loss": 1.4754, "step": 2235 }, { "epoch": 0.08623676612127046, "grad_norm": 0.9018556475639343, "learning_rate": 0.00019908864449761033, "loss": 1.4073, "step": 2240 }, { "epoch": 0.08642925890279114, "grad_norm": 1.2011221647262573, "learning_rate": 0.00019908456669639687, "loss": 1.3213, "step": 2245 }, { "epoch": 0.08662175168431184, "grad_norm": 1.9858746528625488, "learning_rate": 0.0001990804798345562, "loss": 1.3403, "step": 2250 }, { "epoch": 0.08681424446583254, "grad_norm": 1.0072557926177979, "learning_rate": 0.000199076383912462, "loss": 1.3387, "step": 2255 }, { "epoch": 0.08700673724735322, "grad_norm": 1.4516913890838623, "learning_rate": 0.00019907227893048877, "loss": 1.3755, "step": 2260 }, { "epoch": 0.08719923002887392, "grad_norm": 1.0636364221572876, "learning_rate": 0.00019906816488901195, "loss": 1.2495, "step": 2265 }, { "epoch": 0.0873917228103946, "grad_norm": 1.8495078086853027, "learning_rate": 0.0001990640417884077, "loss": 1.4166, "step": 2270 }, { "epoch": 0.0875842155919153, "grad_norm": 2.327951431274414, "learning_rate": 0.00019905990962905312, "loss": 1.3934, "step": 2275 }, { "epoch": 0.087776708373436, "grad_norm": 1.5719425678253174, "learning_rate": 0.00019905576841132595, "loss": 1.3932, "step": 2280 }, { "epoch": 0.08796920115495668, "grad_norm": 1.5799787044525146, "learning_rate": 0.000199051618135605, "loss": 1.5148, "step": 2285 }, { "epoch": 0.08816169393647738, "grad_norm": 0.7972100377082825, "learning_rate": 0.00019904745880226966, "loss": 1.2456, "step": 2290 }, { "epoch": 0.08835418671799808, "grad_norm": 1.4252464771270752, "learning_rate": 0.00019904329041170042, "loss": 1.4287, "step": 2295 }, { "epoch": 0.08854667949951876, "grad_norm": 1.5532910823822021, "learning_rate": 0.00019903911296427834, "loss": 1.3685, "step": 2300 }, { "epoch": 0.08873917228103946, "grad_norm": 1.3019160032272339, "learning_rate": 0.00019903492646038544, "loss": 1.3928, "step": 2305 }, { "epoch": 0.08893166506256016, "grad_norm": 1.7292853593826294, "learning_rate": 0.00019903073090040457, "loss": 1.369, "step": 2310 }, { "epoch": 0.08912415784408084, "grad_norm": 1.1780908107757568, "learning_rate": 0.00019902652628471938, "loss": 1.2541, "step": 2315 }, { "epoch": 0.08931665062560154, "grad_norm": 1.353721261024475, "learning_rate": 0.00019902231261371433, "loss": 1.2658, "step": 2320 }, { "epoch": 0.08950914340712224, "grad_norm": 1.0020657777786255, "learning_rate": 0.0001990180898877748, "loss": 1.3319, "step": 2325 }, { "epoch": 0.08970163618864292, "grad_norm": 1.1655325889587402, "learning_rate": 0.00019901385810728686, "loss": 1.3783, "step": 2330 }, { "epoch": 0.08989412897016362, "grad_norm": 1.2237039804458618, "learning_rate": 0.00019900961727263748, "loss": 1.2919, "step": 2335 }, { "epoch": 0.09008662175168432, "grad_norm": 1.6417179107666016, "learning_rate": 0.0001990053673842145, "loss": 1.471, "step": 2340 }, { "epoch": 0.090279114533205, "grad_norm": 1.2170498371124268, "learning_rate": 0.00019900110844240653, "loss": 1.3889, "step": 2345 }, { "epoch": 0.0904716073147257, "grad_norm": 1.1462334394454956, "learning_rate": 0.00019899684044760304, "loss": 1.4191, "step": 2350 }, { "epoch": 0.0906641000962464, "grad_norm": 0.961063802242279, "learning_rate": 0.00019899256340019425, "loss": 1.5019, "step": 2355 }, { "epoch": 0.09085659287776708, "grad_norm": 0.9323278069496155, "learning_rate": 0.0001989882773005713, "loss": 1.3988, "step": 2360 }, { "epoch": 0.09104908565928778, "grad_norm": 1.8326833248138428, "learning_rate": 0.00019898398214912612, "loss": 1.4211, "step": 2365 }, { "epoch": 0.09124157844080846, "grad_norm": 1.2725722789764404, "learning_rate": 0.00019897967794625153, "loss": 1.3274, "step": 2370 }, { "epoch": 0.09143407122232916, "grad_norm": 0.9105005860328674, "learning_rate": 0.00019897536469234102, "loss": 1.3309, "step": 2375 }, { "epoch": 0.09162656400384986, "grad_norm": 1.3157737255096436, "learning_rate": 0.00019897104238778907, "loss": 1.4086, "step": 2380 }, { "epoch": 0.09181905678537054, "grad_norm": 1.9295995235443115, "learning_rate": 0.00019896671103299094, "loss": 1.3849, "step": 2385 }, { "epoch": 0.09201154956689124, "grad_norm": 1.0183601379394531, "learning_rate": 0.00019896237062834267, "loss": 1.4397, "step": 2390 }, { "epoch": 0.09220404234841194, "grad_norm": 1.118998646736145, "learning_rate": 0.00019895802117424118, "loss": 1.568, "step": 2395 }, { "epoch": 0.09239653512993262, "grad_norm": 1.6463871002197266, "learning_rate": 0.00019895366267108416, "loss": 1.2755, "step": 2400 }, { "epoch": 0.09258902791145332, "grad_norm": 1.3326902389526367, "learning_rate": 0.00019894929511927022, "loss": 1.4369, "step": 2405 }, { "epoch": 0.09278152069297402, "grad_norm": 1.4168566465377808, "learning_rate": 0.00019894491851919871, "loss": 1.4323, "step": 2410 }, { "epoch": 0.0929740134744947, "grad_norm": 1.3266388177871704, "learning_rate": 0.00019894053287126986, "loss": 1.17, "step": 2415 }, { "epoch": 0.0931665062560154, "grad_norm": 1.7362377643585205, "learning_rate": 0.0001989361381758847, "loss": 1.5996, "step": 2420 }, { "epoch": 0.0933589990375361, "grad_norm": 1.1684424877166748, "learning_rate": 0.00019893173443344511, "loss": 1.3486, "step": 2425 }, { "epoch": 0.09355149181905678, "grad_norm": 1.3784310817718506, "learning_rate": 0.00019892732164435376, "loss": 1.2775, "step": 2430 }, { "epoch": 0.09374398460057748, "grad_norm": 1.1288561820983887, "learning_rate": 0.00019892289980901414, "loss": 1.2044, "step": 2435 }, { "epoch": 0.09393647738209818, "grad_norm": 1.1601535081863403, "learning_rate": 0.00019891846892783067, "loss": 1.4937, "step": 2440 }, { "epoch": 0.09412897016361886, "grad_norm": 1.3866316080093384, "learning_rate": 0.0001989140290012085, "loss": 1.913, "step": 2445 }, { "epoch": 0.09432146294513956, "grad_norm": 1.4638808965682983, "learning_rate": 0.00019890958002955362, "loss": 1.4114, "step": 2450 }, { "epoch": 0.09451395572666026, "grad_norm": 1.4660701751708984, "learning_rate": 0.00019890512201327284, "loss": 1.3607, "step": 2455 }, { "epoch": 0.09470644850818094, "grad_norm": 0.9787619113922119, "learning_rate": 0.00019890065495277388, "loss": 1.3729, "step": 2460 }, { "epoch": 0.09489894128970164, "grad_norm": 1.4845494031906128, "learning_rate": 0.00019889617884846517, "loss": 1.3326, "step": 2465 }, { "epoch": 0.09509143407122234, "grad_norm": 1.2955145835876465, "learning_rate": 0.000198891693700756, "loss": 1.3738, "step": 2470 }, { "epoch": 0.09528392685274302, "grad_norm": 1.7431209087371826, "learning_rate": 0.00019888719951005656, "loss": 1.3676, "step": 2475 }, { "epoch": 0.09547641963426372, "grad_norm": 0.923613965511322, "learning_rate": 0.00019888269627677777, "loss": 1.4142, "step": 2480 }, { "epoch": 0.0956689124157844, "grad_norm": 1.0258625745773315, "learning_rate": 0.0001988781840013315, "loss": 1.3868, "step": 2485 }, { "epoch": 0.0958614051973051, "grad_norm": 1.1365761756896973, "learning_rate": 0.00019887366268413025, "loss": 1.2871, "step": 2490 }, { "epoch": 0.0960538979788258, "grad_norm": 2.3250112533569336, "learning_rate": 0.00019886913232558754, "loss": 1.4345, "step": 2495 }, { "epoch": 0.09624639076034648, "grad_norm": 1.1625771522521973, "learning_rate": 0.00019886459292611767, "loss": 1.5796, "step": 2500 }, { "epoch": 0.09643888354186718, "grad_norm": 1.7454233169555664, "learning_rate": 0.00019886004448613562, "loss": 1.6151, "step": 2505 }, { "epoch": 0.09663137632338788, "grad_norm": 1.3514907360076904, "learning_rate": 0.00019885548700605745, "loss": 1.4529, "step": 2510 }, { "epoch": 0.09682386910490856, "grad_norm": 1.9735958576202393, "learning_rate": 0.00019885092048629982, "loss": 1.4945, "step": 2515 }, { "epoch": 0.09701636188642926, "grad_norm": 1.190207600593567, "learning_rate": 0.00019884634492728037, "loss": 1.473, "step": 2520 }, { "epoch": 0.09720885466794996, "grad_norm": 1.1596134901046753, "learning_rate": 0.00019884176032941743, "loss": 1.3745, "step": 2525 }, { "epoch": 0.09740134744947064, "grad_norm": 1.0496324300765991, "learning_rate": 0.0001988371666931303, "loss": 1.3853, "step": 2530 }, { "epoch": 0.09759384023099134, "grad_norm": 1.2820552587509155, "learning_rate": 0.000198832564018839, "loss": 1.4205, "step": 2535 }, { "epoch": 0.09778633301251204, "grad_norm": 0.9559310674667358, "learning_rate": 0.00019882795230696446, "loss": 1.2517, "step": 2540 }, { "epoch": 0.09797882579403272, "grad_norm": 1.026782751083374, "learning_rate": 0.00019882333155792835, "loss": 1.335, "step": 2545 }, { "epoch": 0.09817131857555342, "grad_norm": 1.3378793001174927, "learning_rate": 0.00019881870177215319, "loss": 1.3419, "step": 2550 }, { "epoch": 0.09836381135707412, "grad_norm": 1.0646761655807495, "learning_rate": 0.00019881406295006238, "loss": 1.3793, "step": 2555 }, { "epoch": 0.0985563041385948, "grad_norm": 1.3302899599075317, "learning_rate": 0.00019880941509208005, "loss": 1.3056, "step": 2560 }, { "epoch": 0.0987487969201155, "grad_norm": 1.3029305934906006, "learning_rate": 0.00019880475819863134, "loss": 1.3028, "step": 2565 }, { "epoch": 0.0989412897016362, "grad_norm": 1.6653764247894287, "learning_rate": 0.00019880009227014197, "loss": 1.4698, "step": 2570 }, { "epoch": 0.09913378248315688, "grad_norm": 1.5575610399246216, "learning_rate": 0.00019879541730703865, "loss": 1.2843, "step": 2575 }, { "epoch": 0.09932627526467758, "grad_norm": 1.1219451427459717, "learning_rate": 0.0001987907333097489, "loss": 1.2824, "step": 2580 }, { "epoch": 0.09951876804619826, "grad_norm": 1.680050253868103, "learning_rate": 0.000198786040278701, "loss": 1.431, "step": 2585 }, { "epoch": 0.09971126082771896, "grad_norm": 2.5341451168060303, "learning_rate": 0.00019878133821432412, "loss": 1.3925, "step": 2590 }, { "epoch": 0.09990375360923966, "grad_norm": 1.132542610168457, "learning_rate": 0.00019877662711704824, "loss": 1.4082, "step": 2595 }, { "epoch": 0.10009624639076034, "grad_norm": 1.0605584383010864, "learning_rate": 0.0001987719069873041, "loss": 1.2904, "step": 2600 }, { "epoch": 0.10028873917228104, "grad_norm": 1.161116361618042, "learning_rate": 0.0001987671778255234, "loss": 1.2922, "step": 2605 }, { "epoch": 0.10048123195380174, "grad_norm": 2.2763168811798096, "learning_rate": 0.0001987624396321386, "loss": 1.4692, "step": 2610 }, { "epoch": 0.10067372473532242, "grad_norm": 1.547316312789917, "learning_rate": 0.00019875769240758286, "loss": 1.458, "step": 2615 }, { "epoch": 0.10086621751684312, "grad_norm": 1.0679529905319214, "learning_rate": 0.0001987529361522904, "loss": 1.3075, "step": 2620 }, { "epoch": 0.10105871029836382, "grad_norm": 1.9426227807998657, "learning_rate": 0.0001987481708666961, "loss": 1.4985, "step": 2625 }, { "epoch": 0.1012512030798845, "grad_norm": 1.1619765758514404, "learning_rate": 0.00019874339655123575, "loss": 1.329, "step": 2630 }, { "epoch": 0.1014436958614052, "grad_norm": 0.8115332722663879, "learning_rate": 0.00019873861320634587, "loss": 1.218, "step": 2635 }, { "epoch": 0.1016361886429259, "grad_norm": 1.2575538158416748, "learning_rate": 0.0001987338208324639, "loss": 1.3133, "step": 2640 }, { "epoch": 0.10182868142444658, "grad_norm": 0.9605635404586792, "learning_rate": 0.00019872901943002806, "loss": 1.4462, "step": 2645 }, { "epoch": 0.10202117420596728, "grad_norm": 1.7909116744995117, "learning_rate": 0.00019872420899947742, "loss": 1.257, "step": 2650 }, { "epoch": 0.10221366698748797, "grad_norm": 1.5501129627227783, "learning_rate": 0.00019871938954125185, "loss": 1.2825, "step": 2655 }, { "epoch": 0.10240615976900866, "grad_norm": 1.4636069536209106, "learning_rate": 0.00019871456105579208, "loss": 1.3909, "step": 2660 }, { "epoch": 0.10259865255052936, "grad_norm": 1.4283297061920166, "learning_rate": 0.0001987097235435396, "loss": 1.2148, "step": 2665 }, { "epoch": 0.10279114533205005, "grad_norm": 1.316149115562439, "learning_rate": 0.00019870487700493684, "loss": 1.393, "step": 2670 }, { "epoch": 0.10298363811357074, "grad_norm": 0.8449459671974182, "learning_rate": 0.00019870002144042689, "loss": 1.4969, "step": 2675 }, { "epoch": 0.10317613089509144, "grad_norm": 1.3309835195541382, "learning_rate": 0.00019869515685045383, "loss": 1.4927, "step": 2680 }, { "epoch": 0.10336862367661212, "grad_norm": 0.9159907102584839, "learning_rate": 0.00019869028323546246, "loss": 1.3526, "step": 2685 }, { "epoch": 0.10356111645813282, "grad_norm": 2.2842464447021484, "learning_rate": 0.00019868540059589845, "loss": 1.3646, "step": 2690 }, { "epoch": 0.10375360923965352, "grad_norm": 0.9444146156311035, "learning_rate": 0.00019868050893220832, "loss": 1.349, "step": 2695 }, { "epoch": 0.1039461020211742, "grad_norm": 1.8546898365020752, "learning_rate": 0.0001986756082448393, "loss": 1.3195, "step": 2700 }, { "epoch": 0.1041385948026949, "grad_norm": 1.310783863067627, "learning_rate": 0.00019867069853423961, "loss": 1.6065, "step": 2705 }, { "epoch": 0.1043310875842156, "grad_norm": 1.248542308807373, "learning_rate": 0.00019866577980085813, "loss": 1.1987, "step": 2710 }, { "epoch": 0.10452358036573628, "grad_norm": 1.421844482421875, "learning_rate": 0.00019866085204514472, "loss": 1.3576, "step": 2715 }, { "epoch": 0.10471607314725698, "grad_norm": 1.1641993522644043, "learning_rate": 0.00019865591526754996, "loss": 1.436, "step": 2720 }, { "epoch": 0.10490856592877768, "grad_norm": 1.1122993230819702, "learning_rate": 0.0001986509694685253, "loss": 1.4218, "step": 2725 }, { "epoch": 0.10510105871029836, "grad_norm": 1.222016453742981, "learning_rate": 0.00019864601464852295, "loss": 1.2965, "step": 2730 }, { "epoch": 0.10529355149181906, "grad_norm": 1.6765378713607788, "learning_rate": 0.00019864105080799602, "loss": 1.3908, "step": 2735 }, { "epoch": 0.10548604427333975, "grad_norm": 1.8405592441558838, "learning_rate": 0.00019863607794739845, "loss": 1.2583, "step": 2740 }, { "epoch": 0.10567853705486044, "grad_norm": 1.3908604383468628, "learning_rate": 0.00019863109606718497, "loss": 1.2726, "step": 2745 }, { "epoch": 0.10587102983638114, "grad_norm": 1.3825894594192505, "learning_rate": 0.0001986261051678111, "loss": 1.3234, "step": 2750 }, { "epoch": 0.10606352261790183, "grad_norm": 1.5409029722213745, "learning_rate": 0.00019862110524973328, "loss": 1.4151, "step": 2755 }, { "epoch": 0.10625601539942252, "grad_norm": 2.1902191638946533, "learning_rate": 0.00019861609631340868, "loss": 1.3865, "step": 2760 }, { "epoch": 0.10644850818094322, "grad_norm": 0.9851712584495544, "learning_rate": 0.00019861107835929533, "loss": 1.4799, "step": 2765 }, { "epoch": 0.10664100096246391, "grad_norm": 1.2206732034683228, "learning_rate": 0.0001986060513878521, "loss": 1.3456, "step": 2770 }, { "epoch": 0.1068334937439846, "grad_norm": 1.3443645238876343, "learning_rate": 0.0001986010153995387, "loss": 1.2586, "step": 2775 }, { "epoch": 0.1070259865255053, "grad_norm": 1.1602864265441895, "learning_rate": 0.00019859597039481561, "loss": 1.1789, "step": 2780 }, { "epoch": 0.107218479307026, "grad_norm": 0.8068190813064575, "learning_rate": 0.00019859091637414414, "loss": 1.4228, "step": 2785 }, { "epoch": 0.10741097208854668, "grad_norm": 1.4439321756362915, "learning_rate": 0.0001985858533379865, "loss": 1.4365, "step": 2790 }, { "epoch": 0.10760346487006738, "grad_norm": 1.0814299583435059, "learning_rate": 0.00019858078128680564, "loss": 1.2755, "step": 2795 }, { "epoch": 0.10779595765158806, "grad_norm": 1.7848068475723267, "learning_rate": 0.00019857570022106536, "loss": 1.4061, "step": 2800 }, { "epoch": 0.10798845043310876, "grad_norm": 1.3163549900054932, "learning_rate": 0.0001985706101412303, "loss": 1.3599, "step": 2805 }, { "epoch": 0.10818094321462945, "grad_norm": 1.439104437828064, "learning_rate": 0.0001985655110477659, "loss": 1.3054, "step": 2810 }, { "epoch": 0.10837343599615014, "grad_norm": 0.892706036567688, "learning_rate": 0.0001985604029411385, "loss": 1.3504, "step": 2815 }, { "epoch": 0.10856592877767084, "grad_norm": 1.102704405784607, "learning_rate": 0.0001985552858218151, "loss": 1.3902, "step": 2820 }, { "epoch": 0.10875842155919153, "grad_norm": 1.21804678440094, "learning_rate": 0.0001985501596902637, "loss": 1.36, "step": 2825 }, { "epoch": 0.10895091434071222, "grad_norm": 1.6015477180480957, "learning_rate": 0.00019854502454695302, "loss": 1.6163, "step": 2830 }, { "epoch": 0.10914340712223292, "grad_norm": 1.3947224617004395, "learning_rate": 0.00019853988039235265, "loss": 1.2207, "step": 2835 }, { "epoch": 0.10933589990375361, "grad_norm": 1.616458535194397, "learning_rate": 0.00019853472722693302, "loss": 1.2081, "step": 2840 }, { "epoch": 0.1095283926852743, "grad_norm": 2.1588330268859863, "learning_rate": 0.00019852956505116528, "loss": 1.4428, "step": 2845 }, { "epoch": 0.109720885466795, "grad_norm": 1.2287509441375732, "learning_rate": 0.00019852439386552152, "loss": 1.4548, "step": 2850 }, { "epoch": 0.1099133782483157, "grad_norm": 1.7198657989501953, "learning_rate": 0.00019851921367047463, "loss": 1.2034, "step": 2855 }, { "epoch": 0.11010587102983638, "grad_norm": 1.4924067258834839, "learning_rate": 0.00019851402446649825, "loss": 1.3635, "step": 2860 }, { "epoch": 0.11029836381135708, "grad_norm": 1.3675332069396973, "learning_rate": 0.00019850882625406695, "loss": 1.29, "step": 2865 }, { "epoch": 0.11049085659287777, "grad_norm": 1.2170599699020386, "learning_rate": 0.00019850361903365603, "loss": 1.3495, "step": 2870 }, { "epoch": 0.11068334937439846, "grad_norm": 1.6067026853561401, "learning_rate": 0.00019849840280574167, "loss": 1.4679, "step": 2875 }, { "epoch": 0.11087584215591915, "grad_norm": 1.0457261800765991, "learning_rate": 0.00019849317757080092, "loss": 1.3289, "step": 2880 }, { "epoch": 0.11106833493743985, "grad_norm": 0.6958736181259155, "learning_rate": 0.00019848794332931146, "loss": 0.9412, "step": 2885 }, { "epoch": 0.11126082771896054, "grad_norm": 0.9687005281448364, "learning_rate": 0.00019848270008175205, "loss": 1.2777, "step": 2890 }, { "epoch": 0.11145332050048123, "grad_norm": 0.8073298931121826, "learning_rate": 0.00019847744782860213, "loss": 1.4295, "step": 2895 }, { "epoch": 0.11164581328200192, "grad_norm": 0.8794350624084473, "learning_rate": 0.00019847218657034193, "loss": 1.2199, "step": 2900 }, { "epoch": 0.11183830606352262, "grad_norm": 1.644554853439331, "learning_rate": 0.00019846691630745258, "loss": 1.3076, "step": 2905 }, { "epoch": 0.11203079884504331, "grad_norm": 1.0819231271743774, "learning_rate": 0.00019846163704041603, "loss": 1.385, "step": 2910 }, { "epoch": 0.112223291626564, "grad_norm": 1.4424269199371338, "learning_rate": 0.000198456348769715, "loss": 1.4287, "step": 2915 }, { "epoch": 0.1124157844080847, "grad_norm": 1.289413332939148, "learning_rate": 0.00019845105149583308, "loss": 1.25, "step": 2920 }, { "epoch": 0.1126082771896054, "grad_norm": 1.4669229984283447, "learning_rate": 0.00019844574521925474, "loss": 1.5371, "step": 2925 }, { "epoch": 0.11280076997112608, "grad_norm": 2.102736473083496, "learning_rate": 0.0001984404299404651, "loss": 1.5017, "step": 2930 }, { "epoch": 0.11299326275264678, "grad_norm": 1.1487330198287964, "learning_rate": 0.00019843510565995025, "loss": 1.3164, "step": 2935 }, { "epoch": 0.11318575553416747, "grad_norm": 1.259538173675537, "learning_rate": 0.00019842977237819707, "loss": 1.2946, "step": 2940 }, { "epoch": 0.11337824831568816, "grad_norm": 2.3158466815948486, "learning_rate": 0.00019842443009569324, "loss": 1.4614, "step": 2945 }, { "epoch": 0.11357074109720885, "grad_norm": 1.5077046155929565, "learning_rate": 0.0001984190788129273, "loss": 1.3478, "step": 2950 }, { "epoch": 0.11376323387872955, "grad_norm": 1.2548809051513672, "learning_rate": 0.00019841371853038852, "loss": 1.3351, "step": 2955 }, { "epoch": 0.11395572666025024, "grad_norm": 1.4622430801391602, "learning_rate": 0.00019840834924856715, "loss": 1.2788, "step": 2960 }, { "epoch": 0.11414821944177093, "grad_norm": 0.9759154319763184, "learning_rate": 0.00019840297096795415, "loss": 1.2793, "step": 2965 }, { "epoch": 0.11434071222329163, "grad_norm": 1.2217987775802612, "learning_rate": 0.00019839758368904128, "loss": 1.284, "step": 2970 }, { "epoch": 0.11453320500481232, "grad_norm": 2.180697441101074, "learning_rate": 0.00019839326738746614, "loss": 1.4163, "step": 2975 }, { "epoch": 0.11472569778633301, "grad_norm": 1.156293511390686, "learning_rate": 0.00019838786391285554, "loss": 1.3045, "step": 2980 }, { "epoch": 0.11491819056785371, "grad_norm": 1.1444417238235474, "learning_rate": 0.00019838245144132658, "loss": 1.4522, "step": 2985 }, { "epoch": 0.1151106833493744, "grad_norm": 1.3959949016571045, "learning_rate": 0.00019837702997337414, "loss": 1.3959, "step": 2990 }, { "epoch": 0.1153031761308951, "grad_norm": 1.2789435386657715, "learning_rate": 0.00019837159950949402, "loss": 1.2951, "step": 2995 }, { "epoch": 0.11549566891241578, "grad_norm": 1.0902299880981445, "learning_rate": 0.00019836616005018275, "loss": 1.4573, "step": 3000 }, { "epoch": 0.11568816169393648, "grad_norm": 1.452920913696289, "learning_rate": 0.0001983607115959378, "loss": 1.4688, "step": 3005 }, { "epoch": 0.11588065447545717, "grad_norm": 2.192514419555664, "learning_rate": 0.0001983552541472573, "loss": 1.4282, "step": 3010 }, { "epoch": 0.11607314725697786, "grad_norm": 1.938883900642395, "learning_rate": 0.0001983497877046404, "loss": 1.6123, "step": 3015 }, { "epoch": 0.11626564003849855, "grad_norm": 2.4365732669830322, "learning_rate": 0.0001983443122685869, "loss": 1.4987, "step": 3020 }, { "epoch": 0.11645813282001925, "grad_norm": 1.827972173690796, "learning_rate": 0.0001983388278395975, "loss": 1.2196, "step": 3025 }, { "epoch": 0.11665062560153994, "grad_norm": 1.6184618473052979, "learning_rate": 0.00019833333441817374, "loss": 1.5257, "step": 3030 }, { "epoch": 0.11684311838306063, "grad_norm": 1.0191036462783813, "learning_rate": 0.00019832783200481797, "loss": 1.4799, "step": 3035 }, { "epoch": 0.11703561116458133, "grad_norm": 1.1552925109863281, "learning_rate": 0.0001983223206000333, "loss": 1.2014, "step": 3040 }, { "epoch": 0.11722810394610202, "grad_norm": 0.9793531894683838, "learning_rate": 0.00019831680020432376, "loss": 1.2092, "step": 3045 }, { "epoch": 0.11742059672762271, "grad_norm": 1.480634331703186, "learning_rate": 0.0001983112708181941, "loss": 1.3238, "step": 3050 }, { "epoch": 0.11761308950914341, "grad_norm": 1.5112073421478271, "learning_rate": 0.00019830573244215, "loss": 1.5513, "step": 3055 }, { "epoch": 0.1178055822906641, "grad_norm": 1.4130852222442627, "learning_rate": 0.00019830018507669786, "loss": 1.4368, "step": 3060 }, { "epoch": 0.1179980750721848, "grad_norm": 1.401934027671814, "learning_rate": 0.000198294628722345, "loss": 1.243, "step": 3065 }, { "epoch": 0.11819056785370549, "grad_norm": 1.8309379816055298, "learning_rate": 0.00019828906337959946, "loss": 1.1656, "step": 3070 }, { "epoch": 0.11838306063522618, "grad_norm": 0.8511875867843628, "learning_rate": 0.0001982834890489702, "loss": 1.406, "step": 3075 }, { "epoch": 0.11857555341674687, "grad_norm": 1.4291598796844482, "learning_rate": 0.00019827790573096694, "loss": 1.3963, "step": 3080 }, { "epoch": 0.11876804619826757, "grad_norm": 0.6835631132125854, "learning_rate": 0.0001982723134261002, "loss": 1.1238, "step": 3085 }, { "epoch": 0.11896053897978826, "grad_norm": 1.6569236516952515, "learning_rate": 0.00019826671213488145, "loss": 1.3335, "step": 3090 }, { "epoch": 0.11915303176130895, "grad_norm": 1.0488132238388062, "learning_rate": 0.00019826110185782277, "loss": 1.3009, "step": 3095 }, { "epoch": 0.11934552454282965, "grad_norm": 1.3253639936447144, "learning_rate": 0.00019825548259543726, "loss": 1.3863, "step": 3100 }, { "epoch": 0.11953801732435033, "grad_norm": 0.9408076405525208, "learning_rate": 0.00019824985434823878, "loss": 1.3184, "step": 3105 }, { "epoch": 0.11973051010587103, "grad_norm": 0.9649772644042969, "learning_rate": 0.00019824421711674194, "loss": 1.2427, "step": 3110 }, { "epoch": 0.11992300288739172, "grad_norm": 1.7673052549362183, "learning_rate": 0.00019823857090146225, "loss": 1.2804, "step": 3115 }, { "epoch": 0.12011549566891241, "grad_norm": 1.230724811553955, "learning_rate": 0.00019823291570291604, "loss": 1.3527, "step": 3120 }, { "epoch": 0.12030798845043311, "grad_norm": 2.382617473602295, "learning_rate": 0.0001982272515216204, "loss": 1.4123, "step": 3125 }, { "epoch": 0.1205004812319538, "grad_norm": 1.2811720371246338, "learning_rate": 0.00019822157835809332, "loss": 1.3935, "step": 3130 }, { "epoch": 0.1206929740134745, "grad_norm": 1.9592630863189697, "learning_rate": 0.00019821589621285356, "loss": 1.2387, "step": 3135 }, { "epoch": 0.12088546679499519, "grad_norm": 1.659197449684143, "learning_rate": 0.0001982102050864207, "loss": 1.4228, "step": 3140 }, { "epoch": 0.12107795957651588, "grad_norm": 1.2591451406478882, "learning_rate": 0.00019820450497931517, "loss": 1.3192, "step": 3145 }, { "epoch": 0.12127045235803657, "grad_norm": 1.1670453548431396, "learning_rate": 0.00019819879589205822, "loss": 1.2593, "step": 3150 }, { "epoch": 0.12146294513955727, "grad_norm": 1.680776834487915, "learning_rate": 0.0001981930778251719, "loss": 1.5809, "step": 3155 }, { "epoch": 0.12165543792107796, "grad_norm": 1.388492226600647, "learning_rate": 0.00019818735077917904, "loss": 1.5646, "step": 3160 }, { "epoch": 0.12184793070259865, "grad_norm": 1.3851470947265625, "learning_rate": 0.00019818161475460342, "loss": 1.3282, "step": 3165 }, { "epoch": 0.12204042348411935, "grad_norm": 1.252103567123413, "learning_rate": 0.0001981758697519695, "loss": 1.3326, "step": 3170 }, { "epoch": 0.12223291626564003, "grad_norm": 2.6637227535247803, "learning_rate": 0.0001981701157718027, "loss": 1.4247, "step": 3175 }, { "epoch": 0.12242540904716073, "grad_norm": 1.4228829145431519, "learning_rate": 0.00019816435281462907, "loss": 1.3287, "step": 3180 }, { "epoch": 0.12261790182868143, "grad_norm": 1.0654631853103638, "learning_rate": 0.00019815858088097565, "loss": 1.3651, "step": 3185 }, { "epoch": 0.12281039461020211, "grad_norm": 1.1779879331588745, "learning_rate": 0.00019815279997137028, "loss": 1.2699, "step": 3190 }, { "epoch": 0.12300288739172281, "grad_norm": 0.966482937335968, "learning_rate": 0.0001981470100863416, "loss": 1.3029, "step": 3195 }, { "epoch": 0.12319538017324351, "grad_norm": 1.13119375705719, "learning_rate": 0.00019814121122641894, "loss": 1.3431, "step": 3200 }, { "epoch": 0.1233878729547642, "grad_norm": 1.0690468549728394, "learning_rate": 0.00019813540339213263, "loss": 1.237, "step": 3205 }, { "epoch": 0.12358036573628489, "grad_norm": 1.169592022895813, "learning_rate": 0.00019812958658401382, "loss": 1.3341, "step": 3210 }, { "epoch": 0.12377285851780558, "grad_norm": 0.9310591816902161, "learning_rate": 0.00019812376080259435, "loss": 1.3168, "step": 3215 }, { "epoch": 0.12396535129932627, "grad_norm": 1.1262513399124146, "learning_rate": 0.00019811792604840694, "loss": 1.322, "step": 3220 }, { "epoch": 0.12415784408084697, "grad_norm": 1.0723376274108887, "learning_rate": 0.00019811208232198518, "loss": 1.2814, "step": 3225 }, { "epoch": 0.12435033686236766, "grad_norm": 1.5084266662597656, "learning_rate": 0.00019810622962386344, "loss": 1.3136, "step": 3230 }, { "epoch": 0.12454282964388835, "grad_norm": 1.5219266414642334, "learning_rate": 0.0001981003679545769, "loss": 1.2971, "step": 3235 }, { "epoch": 0.12473532242540905, "grad_norm": 1.8135708570480347, "learning_rate": 0.00019809449731466154, "loss": 1.3987, "step": 3240 }, { "epoch": 0.12492781520692973, "grad_norm": 1.9838290214538574, "learning_rate": 0.00019808861770465424, "loss": 1.4063, "step": 3245 }, { "epoch": 0.12512030798845045, "grad_norm": 0.9821895956993103, "learning_rate": 0.00019808272912509258, "loss": 1.4336, "step": 3250 }, { "epoch": 0.12531280076997112, "grad_norm": 1.0371532440185547, "learning_rate": 0.00019807683157651513, "loss": 1.4659, "step": 3255 }, { "epoch": 0.12550529355149181, "grad_norm": 1.2441003322601318, "learning_rate": 0.0001980709250594611, "loss": 1.3807, "step": 3260 }, { "epoch": 0.1256977863330125, "grad_norm": 1.6097456216812134, "learning_rate": 0.00019806500957447067, "loss": 1.4115, "step": 3265 }, { "epoch": 0.1258902791145332, "grad_norm": 1.4005634784698486, "learning_rate": 0.0001980590851220847, "loss": 1.6008, "step": 3270 }, { "epoch": 0.1260827718960539, "grad_norm": 1.1883544921875, "learning_rate": 0.00019805315170284498, "loss": 1.3768, "step": 3275 }, { "epoch": 0.12627526467757458, "grad_norm": 1.2404242753982544, "learning_rate": 0.00019804720931729413, "loss": 1.463, "step": 3280 }, { "epoch": 0.12646775745909528, "grad_norm": 0.625027596950531, "learning_rate": 0.00019804125796597544, "loss": 1.3286, "step": 3285 }, { "epoch": 0.12666025024061597, "grad_norm": 1.5616633892059326, "learning_rate": 0.0001980352976494332, "loss": 1.4161, "step": 3290 }, { "epoch": 0.12685274302213667, "grad_norm": 0.8003360629081726, "learning_rate": 0.0001980293283682124, "loss": 1.4117, "step": 3295 }, { "epoch": 0.12704523580365737, "grad_norm": 1.0671011209487915, "learning_rate": 0.0001980233501228589, "loss": 1.4192, "step": 3300 }, { "epoch": 0.12723772858517807, "grad_norm": 1.4135669469833374, "learning_rate": 0.0001980173629139194, "loss": 1.3046, "step": 3305 }, { "epoch": 0.12743022136669874, "grad_norm": 1.0450470447540283, "learning_rate": 0.00019801136674194134, "loss": 1.4156, "step": 3310 }, { "epoch": 0.12762271414821943, "grad_norm": 1.1435261964797974, "learning_rate": 0.00019800536160747306, "loss": 1.2311, "step": 3315 }, { "epoch": 0.12781520692974013, "grad_norm": 1.5508229732513428, "learning_rate": 0.0001979993475110637, "loss": 1.4224, "step": 3320 }, { "epoch": 0.12800769971126083, "grad_norm": 0.9542085528373718, "learning_rate": 0.0001979933244532632, "loss": 1.2423, "step": 3325 }, { "epoch": 0.12820019249278153, "grad_norm": 1.5797593593597412, "learning_rate": 0.0001979872924346223, "loss": 1.3357, "step": 3330 }, { "epoch": 0.12839268527430223, "grad_norm": 1.0982688665390015, "learning_rate": 0.00019798125145569263, "loss": 1.2404, "step": 3335 }, { "epoch": 0.1285851780558229, "grad_norm": 1.5471248626708984, "learning_rate": 0.0001979752015170266, "loss": 1.3556, "step": 3340 }, { "epoch": 0.1287776708373436, "grad_norm": 1.64442777633667, "learning_rate": 0.0001979691426191774, "loss": 1.3407, "step": 3345 }, { "epoch": 0.1289701636188643, "grad_norm": 1.494186520576477, "learning_rate": 0.0001979630747626991, "loss": 1.4509, "step": 3350 }, { "epoch": 0.129162656400385, "grad_norm": 0.9598186612129211, "learning_rate": 0.00019795699794814654, "loss": 1.3221, "step": 3355 }, { "epoch": 0.1293551491819057, "grad_norm": 1.1328315734863281, "learning_rate": 0.00019795091217607544, "loss": 1.5129, "step": 3360 }, { "epoch": 0.12954764196342639, "grad_norm": 1.0476043224334717, "learning_rate": 0.00019794481744704227, "loss": 1.3448, "step": 3365 }, { "epoch": 0.12974013474494706, "grad_norm": 1.2570463418960571, "learning_rate": 0.0001979387137616044, "loss": 1.2726, "step": 3370 }, { "epoch": 0.12993262752646775, "grad_norm": 1.395627498626709, "learning_rate": 0.00019793260112031992, "loss": 1.1469, "step": 3375 }, { "epoch": 0.13012512030798845, "grad_norm": 2.2382960319519043, "learning_rate": 0.00019792647952374782, "loss": 1.3375, "step": 3380 }, { "epoch": 0.13031761308950915, "grad_norm": 1.4930087327957153, "learning_rate": 0.00019792034897244784, "loss": 1.3684, "step": 3385 }, { "epoch": 0.13051010587102985, "grad_norm": 0.9732452034950256, "learning_rate": 0.00019791420946698064, "loss": 1.0792, "step": 3390 }, { "epoch": 0.13070259865255052, "grad_norm": 1.9484987258911133, "learning_rate": 0.0001979080610079076, "loss": 1.4284, "step": 3395 }, { "epoch": 0.13089509143407121, "grad_norm": 1.3746837377548218, "learning_rate": 0.00019790190359579097, "loss": 1.4393, "step": 3400 }, { "epoch": 0.1310875842155919, "grad_norm": 1.2191319465637207, "learning_rate": 0.0001978957372311938, "loss": 1.2184, "step": 3405 }, { "epoch": 0.1312800769971126, "grad_norm": 1.0825196504592896, "learning_rate": 0.00019788956191467994, "loss": 1.3891, "step": 3410 }, { "epoch": 0.1314725697786333, "grad_norm": 1.9972898960113525, "learning_rate": 0.00019788337764681412, "loss": 1.3207, "step": 3415 }, { "epoch": 0.131665062560154, "grad_norm": 1.3864003419876099, "learning_rate": 0.00019787718442816182, "loss": 1.3791, "step": 3420 }, { "epoch": 0.13185755534167468, "grad_norm": 1.3315006494522095, "learning_rate": 0.0001978709822592894, "loss": 1.4253, "step": 3425 }, { "epoch": 0.13205004812319537, "grad_norm": 1.0171843767166138, "learning_rate": 0.00019786477114076397, "loss": 1.2974, "step": 3430 }, { "epoch": 0.13224254090471607, "grad_norm": 1.293380618095398, "learning_rate": 0.00019785855107315353, "loss": 1.3616, "step": 3435 }, { "epoch": 0.13243503368623677, "grad_norm": 2.0498528480529785, "learning_rate": 0.00019785232205702681, "loss": 1.3431, "step": 3440 }, { "epoch": 0.13262752646775747, "grad_norm": 0.8635803461074829, "learning_rate": 0.0001978460840929535, "loss": 1.3672, "step": 3445 }, { "epoch": 0.13282001924927817, "grad_norm": 0.9983857274055481, "learning_rate": 0.00019783983718150392, "loss": 1.4856, "step": 3450 }, { "epoch": 0.13301251203079884, "grad_norm": 4.542407989501953, "learning_rate": 0.00019783358132324937, "loss": 1.4599, "step": 3455 }, { "epoch": 0.13320500481231953, "grad_norm": 1.5495860576629639, "learning_rate": 0.00019782731651876194, "loss": 1.3641, "step": 3460 }, { "epoch": 0.13339749759384023, "grad_norm": 1.2070780992507935, "learning_rate": 0.00019782104276861443, "loss": 1.3596, "step": 3465 }, { "epoch": 0.13358999037536093, "grad_norm": 1.1749752759933472, "learning_rate": 0.00019781476007338058, "loss": 1.2387, "step": 3470 }, { "epoch": 0.13378248315688163, "grad_norm": 1.8580079078674316, "learning_rate": 0.00019780846843363485, "loss": 1.3966, "step": 3475 }, { "epoch": 0.1339749759384023, "grad_norm": 1.9713795185089111, "learning_rate": 0.00019780216784995265, "loss": 1.2541, "step": 3480 }, { "epoch": 0.134167468719923, "grad_norm": 1.4017597436904907, "learning_rate": 0.00019779585832291002, "loss": 1.4827, "step": 3485 }, { "epoch": 0.1343599615014437, "grad_norm": 1.188761591911316, "learning_rate": 0.00019778953985308406, "loss": 1.3972, "step": 3490 }, { "epoch": 0.1345524542829644, "grad_norm": 1.0930372476577759, "learning_rate": 0.00019778321244105242, "loss": 1.4706, "step": 3495 }, { "epoch": 0.1347449470644851, "grad_norm": 1.3041532039642334, "learning_rate": 0.0001977768760873938, "loss": 1.1929, "step": 3500 }, { "epoch": 0.13493743984600579, "grad_norm": 2.6741833686828613, "learning_rate": 0.00019777053079268753, "loss": 1.268, "step": 3505 }, { "epoch": 0.13512993262752646, "grad_norm": 1.091823935508728, "learning_rate": 0.0001977641765575139, "loss": 1.2776, "step": 3510 }, { "epoch": 0.13532242540904715, "grad_norm": 0.9205764532089233, "learning_rate": 0.00019775781338245398, "loss": 1.3007, "step": 3515 }, { "epoch": 0.13551491819056785, "grad_norm": 1.6321576833724976, "learning_rate": 0.00019775144126808958, "loss": 1.4214, "step": 3520 }, { "epoch": 0.13570741097208855, "grad_norm": 1.7947146892547607, "learning_rate": 0.00019774506021500343, "loss": 1.3895, "step": 3525 }, { "epoch": 0.13589990375360925, "grad_norm": 1.6696717739105225, "learning_rate": 0.00019773867022377902, "loss": 1.3968, "step": 3530 }, { "epoch": 0.13609239653512994, "grad_norm": 1.1003444194793701, "learning_rate": 0.0001977322712950007, "loss": 1.4084, "step": 3535 }, { "epoch": 0.13628488931665061, "grad_norm": 1.0268352031707764, "learning_rate": 0.00019772586342925357, "loss": 1.254, "step": 3540 }, { "epoch": 0.1364773820981713, "grad_norm": 1.3906810283660889, "learning_rate": 0.0001977194466271236, "loss": 1.3266, "step": 3545 }, { "epoch": 0.136669874879692, "grad_norm": 1.1786664724349976, "learning_rate": 0.00019771302088919757, "loss": 1.3114, "step": 3550 }, { "epoch": 0.1368623676612127, "grad_norm": 1.0252714157104492, "learning_rate": 0.00019770658621606307, "loss": 1.2089, "step": 3555 }, { "epoch": 0.1370548604427334, "grad_norm": 0.8099033236503601, "learning_rate": 0.00019770014260830853, "loss": 1.2607, "step": 3560 }, { "epoch": 0.1372473532242541, "grad_norm": 1.3679542541503906, "learning_rate": 0.0001976936900665231, "loss": 1.376, "step": 3565 }, { "epoch": 0.13743984600577477, "grad_norm": 1.7685283422470093, "learning_rate": 0.00019768722859129693, "loss": 1.4522, "step": 3570 }, { "epoch": 0.13763233878729547, "grad_norm": 1.0158277750015259, "learning_rate": 0.00019768075818322081, "loss": 1.2714, "step": 3575 }, { "epoch": 0.13782483156881617, "grad_norm": 1.7043020725250244, "learning_rate": 0.00019767427884288642, "loss": 1.5669, "step": 3580 }, { "epoch": 0.13801732435033687, "grad_norm": 1.8171344995498657, "learning_rate": 0.00019766779057088627, "loss": 1.4186, "step": 3585 }, { "epoch": 0.13820981713185757, "grad_norm": 1.0524088144302368, "learning_rate": 0.00019766129336781365, "loss": 1.167, "step": 3590 }, { "epoch": 0.13840230991337824, "grad_norm": 1.558383584022522, "learning_rate": 0.0001976547872342627, "loss": 1.5015, "step": 3595 }, { "epoch": 0.13859480269489893, "grad_norm": 1.9925919771194458, "learning_rate": 0.00019764827217082838, "loss": 1.3661, "step": 3600 }, { "epoch": 0.13878729547641963, "grad_norm": 1.5693559646606445, "learning_rate": 0.0001976417481781064, "loss": 1.3389, "step": 3605 }, { "epoch": 0.13897978825794033, "grad_norm": 1.2609871625900269, "learning_rate": 0.00019763521525669343, "loss": 1.2883, "step": 3610 }, { "epoch": 0.13917228103946103, "grad_norm": 1.4910306930541992, "learning_rate": 0.00019762867340718674, "loss": 1.4237, "step": 3615 }, { "epoch": 0.13936477382098172, "grad_norm": 0.9409481287002563, "learning_rate": 0.0001976221226301846, "loss": 1.4289, "step": 3620 }, { "epoch": 0.1395572666025024, "grad_norm": 0.9263445138931274, "learning_rate": 0.00019761556292628604, "loss": 1.2987, "step": 3625 }, { "epoch": 0.1397497593840231, "grad_norm": 0.9329832792282104, "learning_rate": 0.0001976089942960909, "loss": 1.3709, "step": 3630 }, { "epoch": 0.1399422521655438, "grad_norm": 1.7852829694747925, "learning_rate": 0.00019760241674019984, "loss": 1.2282, "step": 3635 }, { "epoch": 0.1401347449470645, "grad_norm": 1.0068609714508057, "learning_rate": 0.0001975958302592143, "loss": 1.3143, "step": 3640 }, { "epoch": 0.14032723772858519, "grad_norm": 2.1680188179016113, "learning_rate": 0.0001975892348537366, "loss": 1.4447, "step": 3645 }, { "epoch": 0.14051973051010588, "grad_norm": 1.633169412612915, "learning_rate": 0.00019758263052436988, "loss": 1.2633, "step": 3650 }, { "epoch": 0.14071222329162655, "grad_norm": 1.3609623908996582, "learning_rate": 0.000197576017271718, "loss": 1.3352, "step": 3655 }, { "epoch": 0.14090471607314725, "grad_norm": 1.50294828414917, "learning_rate": 0.00019756939509638573, "loss": 1.3557, "step": 3660 }, { "epoch": 0.14109720885466795, "grad_norm": 0.9931232333183289, "learning_rate": 0.0001975627639989786, "loss": 1.4719, "step": 3665 }, { "epoch": 0.14128970163618865, "grad_norm": 1.3870011568069458, "learning_rate": 0.000197556123980103, "loss": 1.5173, "step": 3670 }, { "epoch": 0.14148219441770934, "grad_norm": 1.274064540863037, "learning_rate": 0.00019754947504036608, "loss": 1.3951, "step": 3675 }, { "epoch": 0.14167468719923004, "grad_norm": 1.6096014976501465, "learning_rate": 0.00019754281718037593, "loss": 1.4478, "step": 3680 }, { "epoch": 0.1418671799807507, "grad_norm": 1.155772089958191, "learning_rate": 0.00019753615040074131, "loss": 1.229, "step": 3685 }, { "epoch": 0.1420596727622714, "grad_norm": 1.123856544494629, "learning_rate": 0.0001975294747020718, "loss": 1.5036, "step": 3690 }, { "epoch": 0.1422521655437921, "grad_norm": 1.541308879852295, "learning_rate": 0.00019752279008497796, "loss": 1.1174, "step": 3695 }, { "epoch": 0.1424446583253128, "grad_norm": 1.8912441730499268, "learning_rate": 0.00019751609655007098, "loss": 1.3753, "step": 3700 }, { "epoch": 0.1426371511068335, "grad_norm": 1.7746648788452148, "learning_rate": 0.00019750939409796293, "loss": 1.3115, "step": 3705 }, { "epoch": 0.14282964388835417, "grad_norm": 1.2228045463562012, "learning_rate": 0.00019750268272926676, "loss": 1.3477, "step": 3710 }, { "epoch": 0.14302213666987487, "grad_norm": 1.5031695365905762, "learning_rate": 0.00019749596244459614, "loss": 1.1905, "step": 3715 }, { "epoch": 0.14321462945139557, "grad_norm": 2.871879816055298, "learning_rate": 0.0001974892332445656, "loss": 1.3334, "step": 3720 }, { "epoch": 0.14340712223291627, "grad_norm": 1.1911511421203613, "learning_rate": 0.00019748249512979048, "loss": 1.2528, "step": 3725 }, { "epoch": 0.14359961501443697, "grad_norm": 1.2722115516662598, "learning_rate": 0.00019747574810088697, "loss": 1.3314, "step": 3730 }, { "epoch": 0.14379210779595766, "grad_norm": 1.0464539527893066, "learning_rate": 0.00019746899215847198, "loss": 1.1621, "step": 3735 }, { "epoch": 0.14398460057747833, "grad_norm": 1.8877158164978027, "learning_rate": 0.00019746222730316338, "loss": 1.2534, "step": 3740 }, { "epoch": 0.14417709335899903, "grad_norm": 1.5137780904769897, "learning_rate": 0.00019745545353557967, "loss": 1.1738, "step": 3745 }, { "epoch": 0.14436958614051973, "grad_norm": 1.7104227542877197, "learning_rate": 0.00019744867085634034, "loss": 1.2868, "step": 3750 }, { "epoch": 0.14456207892204043, "grad_norm": 1.2920212745666504, "learning_rate": 0.00019744187926606558, "loss": 1.3054, "step": 3755 }, { "epoch": 0.14475457170356112, "grad_norm": 2.3661959171295166, "learning_rate": 0.00019743507876537647, "loss": 1.3187, "step": 3760 }, { "epoch": 0.14494706448508182, "grad_norm": 1.4622807502746582, "learning_rate": 0.00019742826935489487, "loss": 1.1548, "step": 3765 }, { "epoch": 0.1451395572666025, "grad_norm": 1.7818437814712524, "learning_rate": 0.00019742145103524342, "loss": 1.4081, "step": 3770 }, { "epoch": 0.1453320500481232, "grad_norm": 1.023716926574707, "learning_rate": 0.00019741462380704566, "loss": 1.3367, "step": 3775 }, { "epoch": 0.1455245428296439, "grad_norm": 1.4382961988449097, "learning_rate": 0.00019740778767092585, "loss": 1.3498, "step": 3780 }, { "epoch": 0.14571703561116459, "grad_norm": 1.5282870531082153, "learning_rate": 0.0001974009426275091, "loss": 1.2685, "step": 3785 }, { "epoch": 0.14590952839268528, "grad_norm": 1.2222365140914917, "learning_rate": 0.0001973940886774214, "loss": 1.2273, "step": 3790 }, { "epoch": 0.14610202117420595, "grad_norm": 1.3231360912322998, "learning_rate": 0.00019738722582128944, "loss": 1.5449, "step": 3795 }, { "epoch": 0.14629451395572665, "grad_norm": 1.2198995351791382, "learning_rate": 0.00019738035405974085, "loss": 1.4927, "step": 3800 }, { "epoch": 0.14648700673724735, "grad_norm": 1.1108288764953613, "learning_rate": 0.00019737347339340394, "loss": 1.3894, "step": 3805 }, { "epoch": 0.14667949951876805, "grad_norm": 1.1478091478347778, "learning_rate": 0.0001973665838229079, "loss": 1.342, "step": 3810 }, { "epoch": 0.14687199230028875, "grad_norm": 1.555680751800537, "learning_rate": 0.0001973596853488828, "loss": 1.269, "step": 3815 }, { "epoch": 0.14706448508180944, "grad_norm": 1.2819339036941528, "learning_rate": 0.0001973527779719594, "loss": 1.3462, "step": 3820 }, { "epoch": 0.1472569778633301, "grad_norm": 1.6733057498931885, "learning_rate": 0.00019734586169276939, "loss": 1.3179, "step": 3825 }, { "epoch": 0.1474494706448508, "grad_norm": 1.8622225522994995, "learning_rate": 0.00019733893651194517, "loss": 1.452, "step": 3830 }, { "epoch": 0.1476419634263715, "grad_norm": 1.2225052118301392, "learning_rate": 0.00019733200243012006, "loss": 1.2925, "step": 3835 }, { "epoch": 0.1478344562078922, "grad_norm": 0.7980884313583374, "learning_rate": 0.00019732505944792804, "loss": 1.1505, "step": 3840 }, { "epoch": 0.1480269489894129, "grad_norm": 1.3874131441116333, "learning_rate": 0.00019731810756600405, "loss": 1.2989, "step": 3845 }, { "epoch": 0.1482194417709336, "grad_norm": 1.4387590885162354, "learning_rate": 0.00019731114678498378, "loss": 1.3295, "step": 3850 }, { "epoch": 0.14841193455245427, "grad_norm": 1.8189646005630493, "learning_rate": 0.00019730417710550383, "loss": 1.2926, "step": 3855 }, { "epoch": 0.14860442733397497, "grad_norm": 0.9577664732933044, "learning_rate": 0.0001972971985282014, "loss": 1.2375, "step": 3860 }, { "epoch": 0.14879692011549567, "grad_norm": 1.7154825925827026, "learning_rate": 0.00019729021105371474, "loss": 1.2853, "step": 3865 }, { "epoch": 0.14898941289701637, "grad_norm": 2.1061089038848877, "learning_rate": 0.00019728321468268277, "loss": 1.3391, "step": 3870 }, { "epoch": 0.14918190567853706, "grad_norm": 1.0177017450332642, "learning_rate": 0.00019727620941574524, "loss": 1.2801, "step": 3875 }, { "epoch": 0.14937439846005776, "grad_norm": 1.0773547887802124, "learning_rate": 0.00019726919525354277, "loss": 1.3063, "step": 3880 }, { "epoch": 0.14956689124157843, "grad_norm": 0.9082854986190796, "learning_rate": 0.00019726217219671673, "loss": 1.3601, "step": 3885 }, { "epoch": 0.14975938402309913, "grad_norm": 1.341280221939087, "learning_rate": 0.00019725514024590934, "loss": 1.4052, "step": 3890 }, { "epoch": 0.14995187680461983, "grad_norm": 2.240399122238159, "learning_rate": 0.00019724809940176364, "loss": 1.1955, "step": 3895 }, { "epoch": 0.15014436958614052, "grad_norm": 1.549137830734253, "learning_rate": 0.00019724104966492348, "loss": 1.3089, "step": 3900 }, { "epoch": 0.15033686236766122, "grad_norm": 1.6887294054031372, "learning_rate": 0.00019723399103603346, "loss": 1.4147, "step": 3905 }, { "epoch": 0.1505293551491819, "grad_norm": 1.793087363243103, "learning_rate": 0.0001972269235157391, "loss": 1.2674, "step": 3910 }, { "epoch": 0.1507218479307026, "grad_norm": 1.718336820602417, "learning_rate": 0.00019721984710468663, "loss": 1.2716, "step": 3915 }, { "epoch": 0.1509143407122233, "grad_norm": 2.2342288494110107, "learning_rate": 0.0001972127618035232, "loss": 0.965, "step": 3920 }, { "epoch": 0.15110683349374399, "grad_norm": 1.5450822114944458, "learning_rate": 0.00019720566761289665, "loss": 1.3461, "step": 3925 }, { "epoch": 0.15129932627526468, "grad_norm": 1.4395346641540527, "learning_rate": 0.0001971985645334557, "loss": 1.3462, "step": 3930 }, { "epoch": 0.15149181905678538, "grad_norm": 1.1160500049591064, "learning_rate": 0.00019719145256584994, "loss": 1.3334, "step": 3935 }, { "epoch": 0.15168431183830605, "grad_norm": 1.0270999670028687, "learning_rate": 0.00019718433171072967, "loss": 1.2737, "step": 3940 }, { "epoch": 0.15187680461982675, "grad_norm": 1.4266023635864258, "learning_rate": 0.00019717720196874608, "loss": 1.3639, "step": 3945 }, { "epoch": 0.15206929740134745, "grad_norm": 1.552283525466919, "learning_rate": 0.00019717006334055108, "loss": 1.301, "step": 3950 }, { "epoch": 0.15226179018286815, "grad_norm": 1.5459437370300293, "learning_rate": 0.0001971629158267975, "loss": 1.265, "step": 3955 }, { "epoch": 0.15245428296438884, "grad_norm": 1.4866915941238403, "learning_rate": 0.00019715575942813888, "loss": 1.5694, "step": 3960 }, { "epoch": 0.15264677574590954, "grad_norm": 1.1116254329681396, "learning_rate": 0.00019714859414522967, "loss": 1.4858, "step": 3965 }, { "epoch": 0.1528392685274302, "grad_norm": 1.1708245277404785, "learning_rate": 0.0001971414199787251, "loss": 1.3582, "step": 3970 }, { "epoch": 0.1530317613089509, "grad_norm": 1.1672711372375488, "learning_rate": 0.00019713423692928114, "loss": 1.3393, "step": 3975 }, { "epoch": 0.1532242540904716, "grad_norm": 1.4800153970718384, "learning_rate": 0.0001971270449975547, "loss": 1.22, "step": 3980 }, { "epoch": 0.1534167468719923, "grad_norm": 1.92826509475708, "learning_rate": 0.00019711984418420338, "loss": 1.3902, "step": 3985 }, { "epoch": 0.153609239653513, "grad_norm": 1.2292252779006958, "learning_rate": 0.00019711263448988567, "loss": 1.2327, "step": 3990 }, { "epoch": 0.1538017324350337, "grad_norm": 1.1007169485092163, "learning_rate": 0.00019710541591526085, "loss": 1.4284, "step": 3995 }, { "epoch": 0.15399422521655437, "grad_norm": 0.9456301927566528, "learning_rate": 0.00019709818846098905, "loss": 1.1589, "step": 4000 }, { "epoch": 0.15418671799807507, "grad_norm": 1.518704891204834, "learning_rate": 0.0001970909521277311, "loss": 1.3976, "step": 4005 }, { "epoch": 0.15437921077959577, "grad_norm": 1.3318589925765991, "learning_rate": 0.00019708370691614872, "loss": 1.3635, "step": 4010 }, { "epoch": 0.15457170356111646, "grad_norm": 1.752626657485962, "learning_rate": 0.0001970764528269045, "loss": 1.3175, "step": 4015 }, { "epoch": 0.15476419634263716, "grad_norm": 2.055469512939453, "learning_rate": 0.00019706918986066172, "loss": 1.2873, "step": 4020 }, { "epoch": 0.15495668912415783, "grad_norm": 2.1063289642333984, "learning_rate": 0.00019706191801808457, "loss": 1.3208, "step": 4025 }, { "epoch": 0.15514918190567853, "grad_norm": 1.2449209690093994, "learning_rate": 0.00019705463729983798, "loss": 1.2863, "step": 4030 }, { "epoch": 0.15534167468719923, "grad_norm": 1.4950852394104004, "learning_rate": 0.00019704734770658778, "loss": 1.2338, "step": 4035 }, { "epoch": 0.15553416746871992, "grad_norm": 0.9372254014015198, "learning_rate": 0.00019704004923900046, "loss": 1.2105, "step": 4040 }, { "epoch": 0.15572666025024062, "grad_norm": 1.2273038625717163, "learning_rate": 0.00019703274189774347, "loss": 1.3584, "step": 4045 }, { "epoch": 0.15591915303176132, "grad_norm": 1.1560612916946411, "learning_rate": 0.00019702542568348502, "loss": 1.432, "step": 4050 }, { "epoch": 0.156111645813282, "grad_norm": 1.2214939594268799, "learning_rate": 0.00019701810059689415, "loss": 1.3237, "step": 4055 }, { "epoch": 0.1563041385948027, "grad_norm": 1.255182147026062, "learning_rate": 0.00019701076663864066, "loss": 1.5111, "step": 4060 }, { "epoch": 0.1564966313763234, "grad_norm": 1.2496423721313477, "learning_rate": 0.0001970034238093952, "loss": 1.3917, "step": 4065 }, { "epoch": 0.15668912415784408, "grad_norm": 2.773935556411743, "learning_rate": 0.00019699607210982918, "loss": 1.3072, "step": 4070 }, { "epoch": 0.15688161693936478, "grad_norm": 2.5853006839752197, "learning_rate": 0.00019698871154061497, "loss": 1.2737, "step": 4075 }, { "epoch": 0.15707410972088548, "grad_norm": 0.9573465585708618, "learning_rate": 0.00019698134210242553, "loss": 1.411, "step": 4080 }, { "epoch": 0.15726660250240615, "grad_norm": 2.204242467880249, "learning_rate": 0.00019697396379593482, "loss": 1.2493, "step": 4085 }, { "epoch": 0.15745909528392685, "grad_norm": 1.4688855409622192, "learning_rate": 0.0001969665766218175, "loss": 1.273, "step": 4090 }, { "epoch": 0.15765158806544755, "grad_norm": 2.1439919471740723, "learning_rate": 0.0001969591805807491, "loss": 1.4691, "step": 4095 }, { "epoch": 0.15784408084696824, "grad_norm": 1.4877434968948364, "learning_rate": 0.00019695177567340594, "loss": 1.4427, "step": 4100 }, { "epoch": 0.15803657362848894, "grad_norm": 1.3709458112716675, "learning_rate": 0.00019694436190046514, "loss": 1.2713, "step": 4105 }, { "epoch": 0.1582290664100096, "grad_norm": 2.1676931381225586, "learning_rate": 0.00019693693926260464, "loss": 1.1888, "step": 4110 }, { "epoch": 0.1584215591915303, "grad_norm": 1.1726205348968506, "learning_rate": 0.0001969295077605032, "loss": 1.3544, "step": 4115 }, { "epoch": 0.158614051973051, "grad_norm": 1.2441811561584473, "learning_rate": 0.00019692206739484037, "loss": 1.4796, "step": 4120 }, { "epoch": 0.1588065447545717, "grad_norm": 1.4889960289001465, "learning_rate": 0.00019691461816629652, "loss": 1.418, "step": 4125 }, { "epoch": 0.1589990375360924, "grad_norm": 1.3810794353485107, "learning_rate": 0.00019690716007555282, "loss": 1.6398, "step": 4130 }, { "epoch": 0.1591915303176131, "grad_norm": 1.589390754699707, "learning_rate": 0.00019689969312329132, "loss": 1.3203, "step": 4135 }, { "epoch": 0.15938402309913377, "grad_norm": 0.8731974959373474, "learning_rate": 0.00019689221731019477, "loss": 1.2408, "step": 4140 }, { "epoch": 0.15957651588065447, "grad_norm": 1.046852707862854, "learning_rate": 0.00019688473263694678, "loss": 1.1249, "step": 4145 }, { "epoch": 0.15976900866217517, "grad_norm": 0.8767102360725403, "learning_rate": 0.0001968772391042318, "loss": 1.2611, "step": 4150 }, { "epoch": 0.15996150144369586, "grad_norm": 1.1452685594558716, "learning_rate": 0.0001968697367127351, "loss": 1.2992, "step": 4155 }, { "epoch": 0.16015399422521656, "grad_norm": 0.9254185557365417, "learning_rate": 0.00019686222546314266, "loss": 1.3894, "step": 4160 }, { "epoch": 0.16034648700673726, "grad_norm": 0.9607768654823303, "learning_rate": 0.00019685470535614133, "loss": 1.3076, "step": 4165 }, { "epoch": 0.16053897978825793, "grad_norm": 1.2880384922027588, "learning_rate": 0.0001968471763924188, "loss": 1.3868, "step": 4170 }, { "epoch": 0.16073147256977863, "grad_norm": 1.1116464138031006, "learning_rate": 0.00019683963857266356, "loss": 1.2489, "step": 4175 }, { "epoch": 0.16092396535129933, "grad_norm": 0.9132522940635681, "learning_rate": 0.0001968320918975649, "loss": 1.3788, "step": 4180 }, { "epoch": 0.16111645813282002, "grad_norm": 1.1793001890182495, "learning_rate": 0.00019682453636781283, "loss": 1.4742, "step": 4185 }, { "epoch": 0.16130895091434072, "grad_norm": 1.1624877452850342, "learning_rate": 0.00019681697198409835, "loss": 1.3547, "step": 4190 }, { "epoch": 0.16150144369586142, "grad_norm": 1.1367181539535522, "learning_rate": 0.00019680939874711312, "loss": 1.3692, "step": 4195 }, { "epoch": 0.1616939364773821, "grad_norm": 1.0168886184692383, "learning_rate": 0.00019680181665754972, "loss": 1.4148, "step": 4200 }, { "epoch": 0.1618864292589028, "grad_norm": 1.3179705142974854, "learning_rate": 0.0001967942257161014, "loss": 1.2674, "step": 4205 }, { "epoch": 0.16207892204042348, "grad_norm": 0.8679062724113464, "learning_rate": 0.00019678662592346235, "loss": 1.4001, "step": 4210 }, { "epoch": 0.16227141482194418, "grad_norm": 0.8477693200111389, "learning_rate": 0.00019677901728032754, "loss": 1.3527, "step": 4215 }, { "epoch": 0.16246390760346488, "grad_norm": 1.280357003211975, "learning_rate": 0.00019677139978739266, "loss": 1.2576, "step": 4220 }, { "epoch": 0.16265640038498555, "grad_norm": 3.5572381019592285, "learning_rate": 0.00019676377344535434, "loss": 1.3059, "step": 4225 }, { "epoch": 0.16284889316650625, "grad_norm": 0.9162838459014893, "learning_rate": 0.0001967561382549099, "loss": 1.3655, "step": 4230 }, { "epoch": 0.16304138594802695, "grad_norm": 1.0635076761245728, "learning_rate": 0.00019674849421675764, "loss": 1.2356, "step": 4235 }, { "epoch": 0.16323387872954764, "grad_norm": 2.3638720512390137, "learning_rate": 0.00019674084133159642, "loss": 1.3598, "step": 4240 }, { "epoch": 0.16342637151106834, "grad_norm": 1.013108730316162, "learning_rate": 0.00019673317960012615, "loss": 1.6119, "step": 4245 }, { "epoch": 0.16361886429258904, "grad_norm": 1.391450047492981, "learning_rate": 0.00019672550902304737, "loss": 1.2481, "step": 4250 }, { "epoch": 0.1638113570741097, "grad_norm": 1.5574865341186523, "learning_rate": 0.00019671782960106157, "loss": 1.345, "step": 4255 }, { "epoch": 0.1640038498556304, "grad_norm": 1.8456825017929077, "learning_rate": 0.00019671014133487095, "loss": 1.3582, "step": 4260 }, { "epoch": 0.1641963426371511, "grad_norm": 1.4087297916412354, "learning_rate": 0.00019670244422517855, "loss": 1.3162, "step": 4265 }, { "epoch": 0.1643888354186718, "grad_norm": 1.167403221130371, "learning_rate": 0.0001966947382726882, "loss": 1.3841, "step": 4270 }, { "epoch": 0.1645813282001925, "grad_norm": 1.3395906686782837, "learning_rate": 0.0001966870234781046, "loss": 1.1306, "step": 4275 }, { "epoch": 0.1647738209817132, "grad_norm": 0.8549813628196716, "learning_rate": 0.00019667929984213317, "loss": 1.3017, "step": 4280 }, { "epoch": 0.16496631376323387, "grad_norm": 0.8681890368461609, "learning_rate": 0.00019667156736548021, "loss": 1.2152, "step": 4285 }, { "epoch": 0.16515880654475457, "grad_norm": 1.8476097583770752, "learning_rate": 0.00019666382604885283, "loss": 1.2571, "step": 4290 }, { "epoch": 0.16535129932627526, "grad_norm": 1.6583194732666016, "learning_rate": 0.00019665607589295888, "loss": 1.3866, "step": 4295 }, { "epoch": 0.16554379210779596, "grad_norm": 1.6784121990203857, "learning_rate": 0.00019664831689850712, "loss": 1.2966, "step": 4300 }, { "epoch": 0.16573628488931666, "grad_norm": 1.5268521308898926, "learning_rate": 0.00019664054906620696, "loss": 1.3086, "step": 4305 }, { "epoch": 0.16592877767083736, "grad_norm": 2.0114951133728027, "learning_rate": 0.00019663277239676877, "loss": 1.2137, "step": 4310 }, { "epoch": 0.16612127045235803, "grad_norm": 1.4572757482528687, "learning_rate": 0.00019662498689090372, "loss": 1.2505, "step": 4315 }, { "epoch": 0.16631376323387873, "grad_norm": 1.4267566204071045, "learning_rate": 0.00019661719254932369, "loss": 1.1485, "step": 4320 }, { "epoch": 0.16650625601539942, "grad_norm": 0.9921162128448486, "learning_rate": 0.00019660938937274142, "loss": 1.304, "step": 4325 }, { "epoch": 0.16669874879692012, "grad_norm": 1.3901869058609009, "learning_rate": 0.00019660157736187047, "loss": 1.4347, "step": 4330 }, { "epoch": 0.16689124157844082, "grad_norm": 1.5446443557739258, "learning_rate": 0.0001965937565174252, "loss": 1.3157, "step": 4335 }, { "epoch": 0.1670837343599615, "grad_norm": 1.2553350925445557, "learning_rate": 0.0001965859268401208, "loss": 1.1882, "step": 4340 }, { "epoch": 0.1672762271414822, "grad_norm": 1.9385195970535278, "learning_rate": 0.0001965780883306732, "loss": 1.4522, "step": 4345 }, { "epoch": 0.16746871992300288, "grad_norm": 1.426032543182373, "learning_rate": 0.00019657024098979916, "loss": 1.1029, "step": 4350 }, { "epoch": 0.16766121270452358, "grad_norm": 1.5562461614608765, "learning_rate": 0.0001965623848182163, "loss": 1.4837, "step": 4355 }, { "epoch": 0.16785370548604428, "grad_norm": 1.0057613849639893, "learning_rate": 0.00019655451981664306, "loss": 1.3095, "step": 4360 }, { "epoch": 0.16804619826756498, "grad_norm": 1.447845697402954, "learning_rate": 0.00019654664598579857, "loss": 1.4002, "step": 4365 }, { "epoch": 0.16823869104908565, "grad_norm": 0.9452415108680725, "learning_rate": 0.00019653876332640288, "loss": 1.3324, "step": 4370 }, { "epoch": 0.16843118383060635, "grad_norm": 1.7831186056137085, "learning_rate": 0.00019653087183917677, "loss": 1.3004, "step": 4375 }, { "epoch": 0.16862367661212704, "grad_norm": 1.0656229257583618, "learning_rate": 0.0001965229715248419, "loss": 1.5165, "step": 4380 }, { "epoch": 0.16881616939364774, "grad_norm": 1.0360915660858154, "learning_rate": 0.0001965150623841207, "loss": 1.2842, "step": 4385 }, { "epoch": 0.16900866217516844, "grad_norm": 1.286447525024414, "learning_rate": 0.00019650714441773643, "loss": 1.2902, "step": 4390 }, { "epoch": 0.16920115495668914, "grad_norm": 1.2435790300369263, "learning_rate": 0.00019649921762641306, "loss": 1.3049, "step": 4395 }, { "epoch": 0.1693936477382098, "grad_norm": 1.9299678802490234, "learning_rate": 0.0001964912820108755, "loss": 1.3057, "step": 4400 }, { "epoch": 0.1695861405197305, "grad_norm": 1.7493208646774292, "learning_rate": 0.0001964833375718494, "loss": 1.3225, "step": 4405 }, { "epoch": 0.1697786333012512, "grad_norm": 1.3697878122329712, "learning_rate": 0.0001964753843100612, "loss": 1.3518, "step": 4410 }, { "epoch": 0.1699711260827719, "grad_norm": 1.343985676765442, "learning_rate": 0.0001964674222262382, "loss": 1.3195, "step": 4415 }, { "epoch": 0.1701636188642926, "grad_norm": 1.0094975233078003, "learning_rate": 0.00019645945132110853, "loss": 1.3184, "step": 4420 }, { "epoch": 0.17035611164581327, "grad_norm": 1.6048771142959595, "learning_rate": 0.00019645147159540096, "loss": 1.3307, "step": 4425 }, { "epoch": 0.17054860442733397, "grad_norm": 2.14099383354187, "learning_rate": 0.00019644348304984524, "loss": 1.3221, "step": 4430 }, { "epoch": 0.17074109720885466, "grad_norm": 2.5571303367614746, "learning_rate": 0.00019643548568517192, "loss": 1.3092, "step": 4435 }, { "epoch": 0.17093358999037536, "grad_norm": 1.1076972484588623, "learning_rate": 0.00019642747950211225, "loss": 1.1981, "step": 4440 }, { "epoch": 0.17112608277189606, "grad_norm": 1.1315946578979492, "learning_rate": 0.00019641946450139831, "loss": 1.335, "step": 4445 }, { "epoch": 0.17131857555341676, "grad_norm": 1.33171808719635, "learning_rate": 0.00019641144068376312, "loss": 1.4677, "step": 4450 }, { "epoch": 0.17151106833493743, "grad_norm": 0.87531977891922, "learning_rate": 0.0001964034080499403, "loss": 1.1795, "step": 4455 }, { "epoch": 0.17170356111645813, "grad_norm": 1.6923136711120605, "learning_rate": 0.00019639536660066446, "loss": 1.2491, "step": 4460 }, { "epoch": 0.17189605389797882, "grad_norm": 1.481703519821167, "learning_rate": 0.0001963873163366709, "loss": 1.2894, "step": 4465 }, { "epoch": 0.17208854667949952, "grad_norm": 3.3689515590667725, "learning_rate": 0.00019637925725869576, "loss": 1.3785, "step": 4470 }, { "epoch": 0.17228103946102022, "grad_norm": 2.498059034347534, "learning_rate": 0.000196371189367476, "loss": 1.2854, "step": 4475 }, { "epoch": 0.17247353224254092, "grad_norm": 1.2852959632873535, "learning_rate": 0.00019636311266374939, "loss": 1.2272, "step": 4480 }, { "epoch": 0.1726660250240616, "grad_norm": 0.9257192015647888, "learning_rate": 0.00019635502714825446, "loss": 1.1707, "step": 4485 }, { "epoch": 0.17285851780558228, "grad_norm": 0.989142656326294, "learning_rate": 0.00019634693282173058, "loss": 1.3174, "step": 4490 }, { "epoch": 0.17305101058710298, "grad_norm": 1.4923882484436035, "learning_rate": 0.00019633882968491794, "loss": 1.2334, "step": 4495 }, { "epoch": 0.17324350336862368, "grad_norm": 1.2684218883514404, "learning_rate": 0.0001963307177385575, "loss": 1.2468, "step": 4500 }, { "epoch": 0.17343599615014438, "grad_norm": 0.9474775791168213, "learning_rate": 0.0001963225969833911, "loss": 1.2767, "step": 4505 }, { "epoch": 0.17362848893166508, "grad_norm": 2.477541446685791, "learning_rate": 0.00019631446742016126, "loss": 1.4144, "step": 4510 }, { "epoch": 0.17382098171318575, "grad_norm": 1.040477991104126, "learning_rate": 0.00019630632904961138, "loss": 1.5665, "step": 4515 }, { "epoch": 0.17401347449470644, "grad_norm": 1.3127304315567017, "learning_rate": 0.0001962981818724857, "loss": 1.3511, "step": 4520 }, { "epoch": 0.17420596727622714, "grad_norm": 1.6968106031417847, "learning_rate": 0.0001962900258895292, "loss": 1.3202, "step": 4525 }, { "epoch": 0.17439846005774784, "grad_norm": 2.2431318759918213, "learning_rate": 0.0001962818611014877, "loss": 1.351, "step": 4530 }, { "epoch": 0.17459095283926854, "grad_norm": 1.2938642501831055, "learning_rate": 0.00019627368750910779, "loss": 1.276, "step": 4535 }, { "epoch": 0.1747834456207892, "grad_norm": 1.1331931352615356, "learning_rate": 0.00019626550511313694, "loss": 1.4734, "step": 4540 }, { "epoch": 0.1749759384023099, "grad_norm": 1.4755507707595825, "learning_rate": 0.00019625731391432333, "loss": 1.24, "step": 4545 }, { "epoch": 0.1751684311838306, "grad_norm": 1.5442554950714111, "learning_rate": 0.00019624911391341604, "loss": 1.0894, "step": 4550 }, { "epoch": 0.1753609239653513, "grad_norm": 1.2970473766326904, "learning_rate": 0.00019624090511116481, "loss": 1.3262, "step": 4555 }, { "epoch": 0.175553416746872, "grad_norm": 2.1946523189544678, "learning_rate": 0.0001962326875083204, "loss": 1.4652, "step": 4560 }, { "epoch": 0.1757459095283927, "grad_norm": 1.1216411590576172, "learning_rate": 0.00019622446110563417, "loss": 1.1608, "step": 4565 }, { "epoch": 0.17593840230991337, "grad_norm": 1.996535301208496, "learning_rate": 0.00019621622590385842, "loss": 1.2568, "step": 4570 }, { "epoch": 0.17613089509143406, "grad_norm": 1.9742660522460938, "learning_rate": 0.0001962079819037462, "loss": 1.3335, "step": 4575 }, { "epoch": 0.17632338787295476, "grad_norm": 1.985192060470581, "learning_rate": 0.00019619972910605134, "loss": 1.3529, "step": 4580 }, { "epoch": 0.17651588065447546, "grad_norm": 0.8765020966529846, "learning_rate": 0.00019619146751152848, "loss": 1.3956, "step": 4585 }, { "epoch": 0.17670837343599616, "grad_norm": 1.483407974243164, "learning_rate": 0.00019618319712093319, "loss": 1.4396, "step": 4590 }, { "epoch": 0.17690086621751686, "grad_norm": 1.5663124322891235, "learning_rate": 0.00019617491793502164, "loss": 1.3896, "step": 4595 }, { "epoch": 0.17709335899903753, "grad_norm": 1.3831099271774292, "learning_rate": 0.00019616662995455096, "loss": 1.2669, "step": 4600 }, { "epoch": 0.17728585178055822, "grad_norm": 0.8688403964042664, "learning_rate": 0.00019615833318027898, "loss": 1.2098, "step": 4605 }, { "epoch": 0.17747834456207892, "grad_norm": 1.9218660593032837, "learning_rate": 0.00019615002761296446, "loss": 1.1568, "step": 4610 }, { "epoch": 0.17767083734359962, "grad_norm": 1.5095698833465576, "learning_rate": 0.00019614171325336684, "loss": 1.0516, "step": 4615 }, { "epoch": 0.17786333012512032, "grad_norm": 0.9288404583930969, "learning_rate": 0.00019613339010224646, "loss": 1.075, "step": 4620 }, { "epoch": 0.17805582290664101, "grad_norm": 1.414787769317627, "learning_rate": 0.00019612505816036434, "loss": 1.2158, "step": 4625 }, { "epoch": 0.17824831568816168, "grad_norm": 1.3182802200317383, "learning_rate": 0.0001961167174284824, "loss": 1.3719, "step": 4630 }, { "epoch": 0.17844080846968238, "grad_norm": 1.1671231985092163, "learning_rate": 0.0001961083679073634, "loss": 1.3067, "step": 4635 }, { "epoch": 0.17863330125120308, "grad_norm": 1.11225163936615, "learning_rate": 0.0001961000095977708, "loss": 1.1593, "step": 4640 }, { "epoch": 0.17882579403272378, "grad_norm": 1.235335111618042, "learning_rate": 0.00019609164250046894, "loss": 1.2232, "step": 4645 }, { "epoch": 0.17901828681424448, "grad_norm": 1.0023348331451416, "learning_rate": 0.00019608326661622291, "loss": 1.2926, "step": 4650 }, { "epoch": 0.17921077959576515, "grad_norm": 1.7143383026123047, "learning_rate": 0.00019607488194579867, "loss": 1.3149, "step": 4655 }, { "epoch": 0.17940327237728584, "grad_norm": 1.135324478149414, "learning_rate": 0.00019606648848996287, "loss": 1.4155, "step": 4660 }, { "epoch": 0.17959576515880654, "grad_norm": 0.7830592393875122, "learning_rate": 0.0001960580862494831, "loss": 1.2632, "step": 4665 }, { "epoch": 0.17978825794032724, "grad_norm": 1.546481966972351, "learning_rate": 0.0001960496752251277, "loss": 1.4674, "step": 4670 }, { "epoch": 0.17998075072184794, "grad_norm": 1.5377360582351685, "learning_rate": 0.00019604125541766574, "loss": 1.0782, "step": 4675 }, { "epoch": 0.18017324350336864, "grad_norm": 2.1382510662078857, "learning_rate": 0.0001960328268278672, "loss": 1.3008, "step": 4680 }, { "epoch": 0.1803657362848893, "grad_norm": 1.4963937997817993, "learning_rate": 0.00019602438945650277, "loss": 1.2601, "step": 4685 }, { "epoch": 0.18055822906641, "grad_norm": 1.4736862182617188, "learning_rate": 0.00019601594330434405, "loss": 1.163, "step": 4690 }, { "epoch": 0.1807507218479307, "grad_norm": 0.9905889630317688, "learning_rate": 0.00019600748837216337, "loss": 1.3675, "step": 4695 }, { "epoch": 0.1809432146294514, "grad_norm": 1.1800122261047363, "learning_rate": 0.00019599902466073385, "loss": 1.3252, "step": 4700 }, { "epoch": 0.1811357074109721, "grad_norm": 1.1933966875076294, "learning_rate": 0.00019599055217082949, "loss": 1.2163, "step": 4705 }, { "epoch": 0.1813282001924928, "grad_norm": 1.3980772495269775, "learning_rate": 0.000195982070903225, "loss": 1.2807, "step": 4710 }, { "epoch": 0.18152069297401346, "grad_norm": 2.541808605194092, "learning_rate": 0.00019597358085869594, "loss": 1.1333, "step": 4715 }, { "epoch": 0.18171318575553416, "grad_norm": 1.616479516029358, "learning_rate": 0.0001959650820380187, "loss": 1.2991, "step": 4720 }, { "epoch": 0.18190567853705486, "grad_norm": 0.9473749399185181, "learning_rate": 0.00019595657444197037, "loss": 1.2273, "step": 4725 }, { "epoch": 0.18209817131857556, "grad_norm": 1.3119609355926514, "learning_rate": 0.000195948058071329, "loss": 1.2754, "step": 4730 }, { "epoch": 0.18229066410009626, "grad_norm": 1.0062682628631592, "learning_rate": 0.00019593953292687332, "loss": 1.2494, "step": 4735 }, { "epoch": 0.18248315688161693, "grad_norm": 1.2124086618423462, "learning_rate": 0.0001959309990093829, "loss": 1.3725, "step": 4740 }, { "epoch": 0.18267564966313762, "grad_norm": 1.2050824165344238, "learning_rate": 0.0001959224563196381, "loss": 1.5103, "step": 4745 }, { "epoch": 0.18286814244465832, "grad_norm": 0.9262427091598511, "learning_rate": 0.00019591390485842008, "loss": 1.4155, "step": 4750 }, { "epoch": 0.18306063522617902, "grad_norm": 1.5612881183624268, "learning_rate": 0.00019590534462651086, "loss": 1.2289, "step": 4755 }, { "epoch": 0.18325312800769972, "grad_norm": 1.5384646654129028, "learning_rate": 0.00019589677562469312, "loss": 1.2474, "step": 4760 }, { "epoch": 0.18344562078922041, "grad_norm": 1.397716999053955, "learning_rate": 0.00019588819785375057, "loss": 1.4273, "step": 4765 }, { "epoch": 0.18363811357074108, "grad_norm": 1.169207215309143, "learning_rate": 0.00019587961131446754, "loss": 1.3963, "step": 4770 }, { "epoch": 0.18383060635226178, "grad_norm": 1.5064833164215088, "learning_rate": 0.00019587101600762916, "loss": 1.5192, "step": 4775 }, { "epoch": 0.18402309913378248, "grad_norm": 0.9700071811676025, "learning_rate": 0.00019586241193402147, "loss": 1.2697, "step": 4780 }, { "epoch": 0.18421559191530318, "grad_norm": 1.2304507493972778, "learning_rate": 0.00019585379909443123, "loss": 1.3025, "step": 4785 }, { "epoch": 0.18440808469682388, "grad_norm": 1.3768020868301392, "learning_rate": 0.00019584517748964605, "loss": 1.3785, "step": 4790 }, { "epoch": 0.18460057747834457, "grad_norm": 1.062251091003418, "learning_rate": 0.0001958365471204543, "loss": 1.5416, "step": 4795 }, { "epoch": 0.18479307025986524, "grad_norm": 0.9126803874969482, "learning_rate": 0.00019582790798764518, "loss": 1.1479, "step": 4800 }, { "epoch": 0.18498556304138594, "grad_norm": 1.579830288887024, "learning_rate": 0.00019581926009200866, "loss": 1.3315, "step": 4805 }, { "epoch": 0.18517805582290664, "grad_norm": 2.351717710494995, "learning_rate": 0.00019581060343433555, "loss": 1.2503, "step": 4810 }, { "epoch": 0.18537054860442734, "grad_norm": 1.1480222940444946, "learning_rate": 0.00019580193801541746, "loss": 1.2048, "step": 4815 }, { "epoch": 0.18556304138594804, "grad_norm": 1.606439471244812, "learning_rate": 0.00019579326383604675, "loss": 1.5204, "step": 4820 }, { "epoch": 0.18575553416746873, "grad_norm": 1.520969271659851, "learning_rate": 0.00019578458089701664, "loss": 1.2584, "step": 4825 }, { "epoch": 0.1859480269489894, "grad_norm": 1.9096931219100952, "learning_rate": 0.00019577588919912113, "loss": 1.5508, "step": 4830 }, { "epoch": 0.1861405197305101, "grad_norm": 1.004654884338379, "learning_rate": 0.00019576718874315501, "loss": 1.2249, "step": 4835 }, { "epoch": 0.1863330125120308, "grad_norm": 1.0160667896270752, "learning_rate": 0.00019575847952991388, "loss": 1.0782, "step": 4840 }, { "epoch": 0.1865255052935515, "grad_norm": 1.4719328880310059, "learning_rate": 0.0001957497615601941, "loss": 1.4679, "step": 4845 }, { "epoch": 0.1867179980750722, "grad_norm": 1.229625940322876, "learning_rate": 0.00019574103483479296, "loss": 1.347, "step": 4850 }, { "epoch": 0.18691049085659286, "grad_norm": 3.0996217727661133, "learning_rate": 0.00019573229935450842, "loss": 1.3325, "step": 4855 }, { "epoch": 0.18710298363811356, "grad_norm": 1.59645676612854, "learning_rate": 0.00019572355512013922, "loss": 1.2983, "step": 4860 }, { "epoch": 0.18729547641963426, "grad_norm": 1.373542070388794, "learning_rate": 0.00019571480213248504, "loss": 1.3285, "step": 4865 }, { "epoch": 0.18748796920115496, "grad_norm": 0.9625198245048523, "learning_rate": 0.00019570604039234626, "loss": 1.2823, "step": 4870 }, { "epoch": 0.18768046198267566, "grad_norm": 1.1096363067626953, "learning_rate": 0.00019569726990052407, "loss": 1.2508, "step": 4875 }, { "epoch": 0.18787295476419635, "grad_norm": 1.2040042877197266, "learning_rate": 0.0001956884906578205, "loss": 1.3767, "step": 4880 }, { "epoch": 0.18806544754571702, "grad_norm": 1.103530764579773, "learning_rate": 0.00019567970266503833, "loss": 1.4559, "step": 4885 }, { "epoch": 0.18825794032723772, "grad_norm": 1.1266409158706665, "learning_rate": 0.0001956709059229812, "loss": 1.0687, "step": 4890 }, { "epoch": 0.18845043310875842, "grad_norm": 1.2266972064971924, "learning_rate": 0.00019566210043245344, "loss": 1.1801, "step": 4895 }, { "epoch": 0.18864292589027912, "grad_norm": 1.416676640510559, "learning_rate": 0.0001956532861942603, "loss": 1.346, "step": 4900 }, { "epoch": 0.18883541867179982, "grad_norm": 1.5538910627365112, "learning_rate": 0.0001956444632092078, "loss": 1.3498, "step": 4905 }, { "epoch": 0.1890279114533205, "grad_norm": 1.1525146961212158, "learning_rate": 0.00019563563147810274, "loss": 1.39, "step": 4910 }, { "epoch": 0.18922040423484118, "grad_norm": 1.6796061992645264, "learning_rate": 0.00019562679100175266, "loss": 1.3377, "step": 4915 }, { "epoch": 0.18941289701636188, "grad_norm": 1.6094450950622559, "learning_rate": 0.00019561794178096607, "loss": 1.3057, "step": 4920 }, { "epoch": 0.18960538979788258, "grad_norm": 1.8123548030853271, "learning_rate": 0.00019560908381655208, "loss": 1.1257, "step": 4925 }, { "epoch": 0.18979788257940328, "grad_norm": 1.5495673418045044, "learning_rate": 0.00019560021710932074, "loss": 1.303, "step": 4930 }, { "epoch": 0.18999037536092397, "grad_norm": 1.623429298400879, "learning_rate": 0.00019559134166008283, "loss": 1.1491, "step": 4935 }, { "epoch": 0.19018286814244467, "grad_norm": 1.2682925462722778, "learning_rate": 0.00019558245746964997, "loss": 1.3774, "step": 4940 }, { "epoch": 0.19037536092396534, "grad_norm": 0.9362719058990479, "learning_rate": 0.00019557356453883456, "loss": 1.2936, "step": 4945 }, { "epoch": 0.19056785370548604, "grad_norm": 1.4271594285964966, "learning_rate": 0.00019556466286844976, "loss": 1.3865, "step": 4950 }, { "epoch": 0.19076034648700674, "grad_norm": 1.4094691276550293, "learning_rate": 0.00019555575245930963, "loss": 1.2941, "step": 4955 }, { "epoch": 0.19095283926852744, "grad_norm": 0.9695935249328613, "learning_rate": 0.00019554683331222893, "loss": 1.1724, "step": 4960 }, { "epoch": 0.19114533205004813, "grad_norm": 1.110616683959961, "learning_rate": 0.00019553790542802327, "loss": 1.3999, "step": 4965 }, { "epoch": 0.1913378248315688, "grad_norm": 1.5389796495437622, "learning_rate": 0.000195528968807509, "loss": 1.2693, "step": 4970 }, { "epoch": 0.1915303176130895, "grad_norm": 1.921168565750122, "learning_rate": 0.00019552002345150338, "loss": 1.2392, "step": 4975 }, { "epoch": 0.1917228103946102, "grad_norm": 1.3342314958572388, "learning_rate": 0.00019551106936082437, "loss": 1.2477, "step": 4980 }, { "epoch": 0.1919153031761309, "grad_norm": 1.745754361152649, "learning_rate": 0.0001955021065362908, "loss": 1.7169, "step": 4985 }, { "epoch": 0.1921077959576516, "grad_norm": 1.090145468711853, "learning_rate": 0.0001954931349787222, "loss": 1.1156, "step": 4990 }, { "epoch": 0.1923002887391723, "grad_norm": 1.5357612371444702, "learning_rate": 0.00019548415468893899, "loss": 1.5436, "step": 4995 }, { "epoch": 0.19249278152069296, "grad_norm": 1.0309633016586304, "learning_rate": 0.00019547516566776238, "loss": 1.3212, "step": 5000 }, { "epoch": 0.19268527430221366, "grad_norm": 1.000688076019287, "learning_rate": 0.0001954661679160143, "loss": 1.2821, "step": 5005 }, { "epoch": 0.19287776708373436, "grad_norm": 1.268754243850708, "learning_rate": 0.0001954571614345176, "loss": 1.2168, "step": 5010 }, { "epoch": 0.19307025986525506, "grad_norm": 1.3859111070632935, "learning_rate": 0.00019544814622409582, "loss": 1.0701, "step": 5015 }, { "epoch": 0.19326275264677575, "grad_norm": 2.248309850692749, "learning_rate": 0.00019543912228557337, "loss": 1.3548, "step": 5020 }, { "epoch": 0.19345524542829645, "grad_norm": 1.0269944667816162, "learning_rate": 0.00019543008961977538, "loss": 1.213, "step": 5025 }, { "epoch": 0.19364773820981712, "grad_norm": 1.0082924365997314, "learning_rate": 0.00019542104822752789, "loss": 1.2395, "step": 5030 }, { "epoch": 0.19384023099133782, "grad_norm": 2.1287014484405518, "learning_rate": 0.00019541199810965766, "loss": 1.3794, "step": 5035 }, { "epoch": 0.19403272377285852, "grad_norm": 1.230859637260437, "learning_rate": 0.0001954029392669922, "loss": 1.3985, "step": 5040 }, { "epoch": 0.19422521655437922, "grad_norm": 1.0987460613250732, "learning_rate": 0.00019539387170035996, "loss": 1.2637, "step": 5045 }, { "epoch": 0.1944177093358999, "grad_norm": 1.2570157051086426, "learning_rate": 0.00019538479541059007, "loss": 1.2752, "step": 5050 }, { "epoch": 0.19461020211742058, "grad_norm": 0.5122241377830505, "learning_rate": 0.00019537571039851252, "loss": 1.1927, "step": 5055 }, { "epoch": 0.19480269489894128, "grad_norm": 1.7925124168395996, "learning_rate": 0.00019536661666495807, "loss": 1.1414, "step": 5060 }, { "epoch": 0.19499518768046198, "grad_norm": 0.8517950773239136, "learning_rate": 0.00019535751421075826, "loss": 1.2359, "step": 5065 }, { "epoch": 0.19518768046198268, "grad_norm": 0.582260012626648, "learning_rate": 0.00019534840303674544, "loss": 1.3528, "step": 5070 }, { "epoch": 0.19538017324350337, "grad_norm": 1.3547414541244507, "learning_rate": 0.0001953392831437528, "loss": 1.296, "step": 5075 }, { "epoch": 0.19557266602502407, "grad_norm": Infinity, "learning_rate": 0.0001953319809522536, "loss": 1.4074, "step": 5080 }, { "epoch": 0.19576515880654474, "grad_norm": 2.2984917163848877, "learning_rate": 0.00019532284536719936, "loss": 1.2002, "step": 5085 }, { "epoch": 0.19595765158806544, "grad_norm": 1.4113095998764038, "learning_rate": 0.0001953137010655024, "loss": 1.2755, "step": 5090 }, { "epoch": 0.19615014436958614, "grad_norm": 1.921242594718933, "learning_rate": 0.00019530454804799881, "loss": 1.2431, "step": 5095 }, { "epoch": 0.19634263715110684, "grad_norm": 1.3097113370895386, "learning_rate": 0.0001952953863155257, "loss": 1.415, "step": 5100 }, { "epoch": 0.19653512993262753, "grad_norm": 2.1493217945098877, "learning_rate": 0.00019528621586892072, "loss": 1.4282, "step": 5105 }, { "epoch": 0.19672762271414823, "grad_norm": 1.2487257719039917, "learning_rate": 0.0001952770367090226, "loss": 1.3512, "step": 5110 }, { "epoch": 0.1969201154956689, "grad_norm": 0.9984391331672668, "learning_rate": 0.00019526784883667055, "loss": 1.5437, "step": 5115 }, { "epoch": 0.1971126082771896, "grad_norm": 1.241417646408081, "learning_rate": 0.00019525865225270486, "loss": 1.2399, "step": 5120 }, { "epoch": 0.1973051010587103, "grad_norm": 1.5192227363586426, "learning_rate": 0.00019524944695796642, "loss": 1.3236, "step": 5125 }, { "epoch": 0.197497593840231, "grad_norm": 1.7465555667877197, "learning_rate": 0.00019524023295329704, "loss": 1.4247, "step": 5130 }, { "epoch": 0.1976900866217517, "grad_norm": 1.455175757408142, "learning_rate": 0.00019523101023953925, "loss": 1.5053, "step": 5135 }, { "epoch": 0.1978825794032724, "grad_norm": 2.164982318878174, "learning_rate": 0.00019522177881753643, "loss": 1.2796, "step": 5140 }, { "epoch": 0.19807507218479306, "grad_norm": 1.58863365650177, "learning_rate": 0.00019521253868813273, "loss": 1.349, "step": 5145 }, { "epoch": 0.19826756496631376, "grad_norm": 1.5380641222000122, "learning_rate": 0.0001952032898521731, "loss": 1.3107, "step": 5150 }, { "epoch": 0.19846005774783446, "grad_norm": 1.1790603399276733, "learning_rate": 0.00019519403231050327, "loss": 1.2178, "step": 5155 }, { "epoch": 0.19865255052935515, "grad_norm": 1.7905482053756714, "learning_rate": 0.0001951847660639698, "loss": 1.3579, "step": 5160 }, { "epoch": 0.19884504331087585, "grad_norm": 1.1262041330337524, "learning_rate": 0.00019517549111342, "loss": 1.2988, "step": 5165 }, { "epoch": 0.19903753609239652, "grad_norm": 1.6370010375976562, "learning_rate": 0.00019516620745970199, "loss": 1.2326, "step": 5170 }, { "epoch": 0.19923002887391722, "grad_norm": 1.1789335012435913, "learning_rate": 0.00019515691510366476, "loss": 1.1357, "step": 5175 }, { "epoch": 0.19942252165543792, "grad_norm": 1.167226791381836, "learning_rate": 0.000195147614046158, "loss": 1.4007, "step": 5180 }, { "epoch": 0.19961501443695862, "grad_norm": 1.3708933591842651, "learning_rate": 0.00019513830428803225, "loss": 1.3029, "step": 5185 }, { "epoch": 0.1998075072184793, "grad_norm": 1.6595165729522705, "learning_rate": 0.00019512898583013875, "loss": 1.3159, "step": 5190 }, { "epoch": 0.2, "grad_norm": 1.1252923011779785, "learning_rate": 0.00019511965867332972, "loss": 1.1894, "step": 5195 }, { "epoch": 0.20019249278152068, "grad_norm": 0.8440331816673279, "learning_rate": 0.00019511032281845797, "loss": 1.2108, "step": 5200 }, { "epoch": 0.20038498556304138, "grad_norm": 1.427147626876831, "learning_rate": 0.0001951009782663773, "loss": 1.197, "step": 5205 }, { "epoch": 0.20057747834456208, "grad_norm": 1.3509503602981567, "learning_rate": 0.00019509162501794213, "loss": 1.3348, "step": 5210 }, { "epoch": 0.20076997112608277, "grad_norm": 1.533103108406067, "learning_rate": 0.00019508226307400777, "loss": 1.1919, "step": 5215 }, { "epoch": 0.20096246390760347, "grad_norm": 1.1347332000732422, "learning_rate": 0.0001950728924354303, "loss": 1.2954, "step": 5220 }, { "epoch": 0.20115495668912417, "grad_norm": 1.65277099609375, "learning_rate": 0.00019506351310306664, "loss": 1.2686, "step": 5225 }, { "epoch": 0.20134744947064484, "grad_norm": 1.0601050853729248, "learning_rate": 0.00019505412507777442, "loss": 1.4066, "step": 5230 }, { "epoch": 0.20153994225216554, "grad_norm": 0.9429787397384644, "learning_rate": 0.00019504472836041217, "loss": 1.208, "step": 5235 }, { "epoch": 0.20173243503368624, "grad_norm": 0.9101033806800842, "learning_rate": 0.00019503532295183908, "loss": 1.3172, "step": 5240 }, { "epoch": 0.20192492781520693, "grad_norm": 1.1404805183410645, "learning_rate": 0.0001950259088529153, "loss": 1.1539, "step": 5245 }, { "epoch": 0.20211742059672763, "grad_norm": 1.1555522680282593, "learning_rate": 0.00019501648606450161, "loss": 1.3754, "step": 5250 }, { "epoch": 0.20230991337824833, "grad_norm": 1.5473912954330444, "learning_rate": 0.00019500705458745974, "loss": 1.1878, "step": 5255 }, { "epoch": 0.202502406159769, "grad_norm": 1.8766716718673706, "learning_rate": 0.00019499761442265208, "loss": 1.2445, "step": 5260 }, { "epoch": 0.2026948989412897, "grad_norm": 1.7951183319091797, "learning_rate": 0.00019498816557094188, "loss": 1.3496, "step": 5265 }, { "epoch": 0.2028873917228104, "grad_norm": 1.6615973711013794, "learning_rate": 0.00019497870803319317, "loss": 1.2919, "step": 5270 }, { "epoch": 0.2030798845043311, "grad_norm": 1.2885236740112305, "learning_rate": 0.00019496924181027078, "loss": 1.1807, "step": 5275 }, { "epoch": 0.2032723772858518, "grad_norm": 0.9546861052513123, "learning_rate": 0.00019495976690304034, "loss": 1.309, "step": 5280 }, { "epoch": 0.20346487006737246, "grad_norm": 1.6904189586639404, "learning_rate": 0.0001949502833123683, "loss": 1.2244, "step": 5285 }, { "epoch": 0.20365736284889316, "grad_norm": 1.394254446029663, "learning_rate": 0.0001949407910391218, "loss": 1.2877, "step": 5290 }, { "epoch": 0.20384985563041386, "grad_norm": 0.8937919735908508, "learning_rate": 0.0001949312900841689, "loss": 1.2389, "step": 5295 }, { "epoch": 0.20404234841193455, "grad_norm": 1.1096867322921753, "learning_rate": 0.00019492178044837837, "loss": 1.3766, "step": 5300 }, { "epoch": 0.20423484119345525, "grad_norm": 1.009758472442627, "learning_rate": 0.00019491226213261983, "loss": 1.2281, "step": 5305 }, { "epoch": 0.20442733397497595, "grad_norm": 1.4888296127319336, "learning_rate": 0.00019490273513776365, "loss": 1.0624, "step": 5310 }, { "epoch": 0.20461982675649662, "grad_norm": 1.4901612997055054, "learning_rate": 0.00019489319946468104, "loss": 1.1554, "step": 5315 }, { "epoch": 0.20481231953801732, "grad_norm": 1.2920863628387451, "learning_rate": 0.0001948836551142439, "loss": 1.2103, "step": 5320 }, { "epoch": 0.20500481231953802, "grad_norm": 1.3616580963134766, "learning_rate": 0.00019487410208732508, "loss": 1.3246, "step": 5325 }, { "epoch": 0.2051973051010587, "grad_norm": 1.0202921628952026, "learning_rate": 0.0001948645403847981, "loss": 1.3046, "step": 5330 }, { "epoch": 0.2053897978825794, "grad_norm": 1.0083186626434326, "learning_rate": 0.00019485497000753735, "loss": 1.2541, "step": 5335 }, { "epoch": 0.2055822906641001, "grad_norm": 1.137617588043213, "learning_rate": 0.0001948453909564179, "loss": 1.3143, "step": 5340 }, { "epoch": 0.20577478344562078, "grad_norm": 1.6331067085266113, "learning_rate": 0.00019483580323231578, "loss": 1.1129, "step": 5345 }, { "epoch": 0.20596727622714148, "grad_norm": 1.4032361507415771, "learning_rate": 0.00019482620683610767, "loss": 1.3412, "step": 5350 }, { "epoch": 0.20615976900866217, "grad_norm": 1.3207452297210693, "learning_rate": 0.00019481660176867108, "loss": 1.4614, "step": 5355 }, { "epoch": 0.20635226179018287, "grad_norm": 0.9236577749252319, "learning_rate": 0.0001948069880308844, "loss": 1.3131, "step": 5360 }, { "epoch": 0.20654475457170357, "grad_norm": 2.2021703720092773, "learning_rate": 0.0001947973656236267, "loss": 1.2434, "step": 5365 }, { "epoch": 0.20673724735322424, "grad_norm": 1.5074305534362793, "learning_rate": 0.00019478773454777789, "loss": 1.4204, "step": 5370 }, { "epoch": 0.20692974013474494, "grad_norm": 1.5073877573013306, "learning_rate": 0.00019477809480421865, "loss": 1.4193, "step": 5375 }, { "epoch": 0.20712223291626564, "grad_norm": 1.0522600412368774, "learning_rate": 0.00019476844639383049, "loss": 1.228, "step": 5380 }, { "epoch": 0.20731472569778633, "grad_norm": 1.1478843688964844, "learning_rate": 0.0001947587893174957, "loss": 1.2315, "step": 5385 }, { "epoch": 0.20750721847930703, "grad_norm": 0.922837495803833, "learning_rate": 0.00019474912357609733, "loss": 1.2567, "step": 5390 }, { "epoch": 0.20769971126082773, "grad_norm": 1.156615972518921, "learning_rate": 0.0001947394491705193, "loss": 1.443, "step": 5395 }, { "epoch": 0.2078922040423484, "grad_norm": 1.909555435180664, "learning_rate": 0.0001947297661016462, "loss": 1.1625, "step": 5400 }, { "epoch": 0.2080846968238691, "grad_norm": 1.8379411697387695, "learning_rate": 0.00019472007437036352, "loss": 1.3015, "step": 5405 }, { "epoch": 0.2082771896053898, "grad_norm": 1.188402771949768, "learning_rate": 0.00019471037397755754, "loss": 1.3294, "step": 5410 }, { "epoch": 0.2084696823869105, "grad_norm": 1.597538948059082, "learning_rate": 0.00019470066492411521, "loss": 1.3824, "step": 5415 }, { "epoch": 0.2086621751684312, "grad_norm": 1.0081026554107666, "learning_rate": 0.00019469094721092444, "loss": 1.2914, "step": 5420 }, { "epoch": 0.2088546679499519, "grad_norm": 1.3790476322174072, "learning_rate": 0.0001946812208388738, "loss": 1.2817, "step": 5425 }, { "epoch": 0.20904716073147256, "grad_norm": 1.777570128440857, "learning_rate": 0.00019467148580885272, "loss": 1.2253, "step": 5430 }, { "epoch": 0.20923965351299326, "grad_norm": 1.1196024417877197, "learning_rate": 0.00019466174212175142, "loss": 1.2956, "step": 5435 }, { "epoch": 0.20943214629451395, "grad_norm": 2.940906524658203, "learning_rate": 0.00019465198977846086, "loss": 1.3912, "step": 5440 }, { "epoch": 0.20962463907603465, "grad_norm": 1.9075424671173096, "learning_rate": 0.00019464222877987286, "loss": 1.2518, "step": 5445 }, { "epoch": 0.20981713185755535, "grad_norm": 1.0282469987869263, "learning_rate": 0.00019463245912687996, "loss": 1.2569, "step": 5450 }, { "epoch": 0.21000962463907605, "grad_norm": 1.1651009321212769, "learning_rate": 0.0001946226808203756, "loss": 1.4676, "step": 5455 }, { "epoch": 0.21020211742059672, "grad_norm": 1.1911680698394775, "learning_rate": 0.00019461289386125388, "loss": 1.3822, "step": 5460 }, { "epoch": 0.21039461020211742, "grad_norm": 0.7187578082084656, "learning_rate": 0.00019460309825040974, "loss": 1.1462, "step": 5465 }, { "epoch": 0.2105871029836381, "grad_norm": 2.401764154434204, "learning_rate": 0.000194593293988739, "loss": 1.3187, "step": 5470 }, { "epoch": 0.2107795957651588, "grad_norm": 1.783333659172058, "learning_rate": 0.0001945834810771381, "loss": 1.3539, "step": 5475 }, { "epoch": 0.2109720885466795, "grad_norm": 0.9923986196517944, "learning_rate": 0.00019457365951650445, "loss": 1.4837, "step": 5480 }, { "epoch": 0.21116458132820018, "grad_norm": 1.0704642534255981, "learning_rate": 0.00019456382930773612, "loss": 1.2345, "step": 5485 }, { "epoch": 0.21135707410972088, "grad_norm": 1.5242959260940552, "learning_rate": 0.000194553990451732, "loss": 1.2113, "step": 5490 }, { "epoch": 0.21154956689124157, "grad_norm": 1.3185608386993408, "learning_rate": 0.00019454414294939185, "loss": 1.4083, "step": 5495 }, { "epoch": 0.21174205967276227, "grad_norm": 1.1448662281036377, "learning_rate": 0.00019453428680161615, "loss": 1.4091, "step": 5500 }, { "epoch": 0.21193455245428297, "grad_norm": 1.172396183013916, "learning_rate": 0.0001945244220093061, "loss": 1.1414, "step": 5505 }, { "epoch": 0.21212704523580367, "grad_norm": 2.988346576690674, "learning_rate": 0.00019451454857336383, "loss": 1.3968, "step": 5510 }, { "epoch": 0.21231953801732434, "grad_norm": 0.8824801445007324, "learning_rate": 0.00019450466649469222, "loss": 1.2229, "step": 5515 }, { "epoch": 0.21251203079884504, "grad_norm": 1.7703745365142822, "learning_rate": 0.00019449477577419488, "loss": 1.3073, "step": 5520 }, { "epoch": 0.21270452358036573, "grad_norm": 1.3374749422073364, "learning_rate": 0.00019448487641277629, "loss": 1.3908, "step": 5525 }, { "epoch": 0.21289701636188643, "grad_norm": 1.2366503477096558, "learning_rate": 0.00019447496841134163, "loss": 1.2764, "step": 5530 }, { "epoch": 0.21308950914340713, "grad_norm": 1.242353081703186, "learning_rate": 0.00019446505177079696, "loss": 1.3136, "step": 5535 }, { "epoch": 0.21328200192492783, "grad_norm": 1.046583652496338, "learning_rate": 0.00019445512649204907, "loss": 1.1483, "step": 5540 }, { "epoch": 0.2134744947064485, "grad_norm": 1.6280517578125, "learning_rate": 0.00019444519257600558, "loss": 1.4076, "step": 5545 }, { "epoch": 0.2136669874879692, "grad_norm": 1.7472679615020752, "learning_rate": 0.00019443525002357486, "loss": 1.2842, "step": 5550 }, { "epoch": 0.2138594802694899, "grad_norm": 1.101185917854309, "learning_rate": 0.00019442529883566612, "loss": 1.3037, "step": 5555 }, { "epoch": 0.2140519730510106, "grad_norm": 1.8548834323883057, "learning_rate": 0.0001944153390131893, "loss": 1.4081, "step": 5560 }, { "epoch": 0.2142444658325313, "grad_norm": 1.4205219745635986, "learning_rate": 0.00019440537055705515, "loss": 1.3419, "step": 5565 }, { "epoch": 0.214436958614052, "grad_norm": 1.135933756828308, "learning_rate": 0.0001943953934681753, "loss": 0.9906, "step": 5570 }, { "epoch": 0.21462945139557266, "grad_norm": 1.7350742816925049, "learning_rate": 0.00019438540774746198, "loss": 1.1193, "step": 5575 }, { "epoch": 0.21482194417709335, "grad_norm": 1.891998291015625, "learning_rate": 0.00019437541339582836, "loss": 1.2271, "step": 5580 }, { "epoch": 0.21501443695861405, "grad_norm": 1.2564722299575806, "learning_rate": 0.0001943654104141884, "loss": 1.5134, "step": 5585 }, { "epoch": 0.21520692974013475, "grad_norm": 1.3632197380065918, "learning_rate": 0.00019435539880345673, "loss": 1.1772, "step": 5590 }, { "epoch": 0.21539942252165545, "grad_norm": 1.8670414686203003, "learning_rate": 0.00019434537856454894, "loss": 1.2685, "step": 5595 }, { "epoch": 0.21559191530317612, "grad_norm": 2.5948314666748047, "learning_rate": 0.00019433534969838122, "loss": 1.487, "step": 5600 }, { "epoch": 0.21578440808469682, "grad_norm": 1.2312328815460205, "learning_rate": 0.00019432531220587071, "loss": 1.3394, "step": 5605 }, { "epoch": 0.2159769008662175, "grad_norm": 0.9402896165847778, "learning_rate": 0.0001943152660879352, "loss": 1.1471, "step": 5610 }, { "epoch": 0.2161693936477382, "grad_norm": 0.3871050477027893, "learning_rate": 0.00019430521134549346, "loss": 0.9597, "step": 5615 }, { "epoch": 0.2163618864292589, "grad_norm": 0.9395222067832947, "learning_rate": 0.0001942951479794648, "loss": 1.3055, "step": 5620 }, { "epoch": 0.2165543792107796, "grad_norm": 0.8928638696670532, "learning_rate": 0.00019428507599076955, "loss": 1.4099, "step": 5625 }, { "epoch": 0.21674687199230028, "grad_norm": 1.8891551494598389, "learning_rate": 0.00019427499538032865, "loss": 1.5009, "step": 5630 }, { "epoch": 0.21693936477382098, "grad_norm": 0.6684243679046631, "learning_rate": 0.00019426490614906394, "loss": 1.2251, "step": 5635 }, { "epoch": 0.21713185755534167, "grad_norm": 1.5765355825424194, "learning_rate": 0.00019425480829789803, "loss": 1.1114, "step": 5640 }, { "epoch": 0.21732435033686237, "grad_norm": 0.9966096878051758, "learning_rate": 0.00019424470182775427, "loss": 1.2907, "step": 5645 }, { "epoch": 0.21751684311838307, "grad_norm": 1.263469934463501, "learning_rate": 0.00019423458673955684, "loss": 1.1443, "step": 5650 }, { "epoch": 0.21770933589990377, "grad_norm": 1.5138813257217407, "learning_rate": 0.0001942244630342307, "loss": 1.2699, "step": 5655 }, { "epoch": 0.21790182868142444, "grad_norm": 1.0215526819229126, "learning_rate": 0.00019421433071270156, "loss": 1.4265, "step": 5660 }, { "epoch": 0.21809432146294513, "grad_norm": 0.7587301731109619, "learning_rate": 0.00019420418977589605, "loss": 1.1706, "step": 5665 }, { "epoch": 0.21828681424446583, "grad_norm": 0.9531148672103882, "learning_rate": 0.0001941940402247414, "loss": 1.4041, "step": 5670 }, { "epoch": 0.21847930702598653, "grad_norm": 1.098739743232727, "learning_rate": 0.00019418388206016575, "loss": 1.3476, "step": 5675 }, { "epoch": 0.21867179980750723, "grad_norm": 1.0307271480560303, "learning_rate": 0.000194173715283098, "loss": 1.2333, "step": 5680 }, { "epoch": 0.2188642925890279, "grad_norm": 1.538256049156189, "learning_rate": 0.00019416353989446785, "loss": 1.4489, "step": 5685 }, { "epoch": 0.2190567853705486, "grad_norm": 1.5411714315414429, "learning_rate": 0.00019415335589520574, "loss": 1.2597, "step": 5690 }, { "epoch": 0.2192492781520693, "grad_norm": 1.3543205261230469, "learning_rate": 0.00019414316328624293, "loss": 1.265, "step": 5695 }, { "epoch": 0.21944177093359, "grad_norm": 0.7644770741462708, "learning_rate": 0.0001941329620685115, "loss": 1.1888, "step": 5700 }, { "epoch": 0.2196342637151107, "grad_norm": 2.1122093200683594, "learning_rate": 0.00019412275224294423, "loss": 1.1301, "step": 5705 }, { "epoch": 0.2198267564966314, "grad_norm": 1.4159448146820068, "learning_rate": 0.00019411253381047477, "loss": 1.209, "step": 5710 }, { "epoch": 0.22001924927815206, "grad_norm": 1.4212615489959717, "learning_rate": 0.00019410230677203755, "loss": 1.3268, "step": 5715 }, { "epoch": 0.22021174205967275, "grad_norm": 1.2042075395584106, "learning_rate": 0.00019409207112856778, "loss": 1.1976, "step": 5720 }, { "epoch": 0.22040423484119345, "grad_norm": 1.5765044689178467, "learning_rate": 0.00019408182688100136, "loss": 1.3631, "step": 5725 }, { "epoch": 0.22059672762271415, "grad_norm": 2.197000026702881, "learning_rate": 0.00019407157403027514, "loss": 1.2964, "step": 5730 }, { "epoch": 0.22078922040423485, "grad_norm": 1.3434042930603027, "learning_rate": 0.00019406131257732664, "loss": 1.244, "step": 5735 }, { "epoch": 0.22098171318575555, "grad_norm": 1.2889900207519531, "learning_rate": 0.0001940510425230942, "loss": 1.1333, "step": 5740 }, { "epoch": 0.22117420596727622, "grad_norm": 0.8795220851898193, "learning_rate": 0.00019404076386851692, "loss": 1.2635, "step": 5745 }, { "epoch": 0.22136669874879691, "grad_norm": 1.0312747955322266, "learning_rate": 0.00019403047661453477, "loss": 1.3195, "step": 5750 }, { "epoch": 0.2215591915303176, "grad_norm": 1.5083264112472534, "learning_rate": 0.00019402018076208845, "loss": 1.3417, "step": 5755 }, { "epoch": 0.2217516843118383, "grad_norm": 1.1538232564926147, "learning_rate": 0.00019400987631211936, "loss": 1.2956, "step": 5760 }, { "epoch": 0.221944177093359, "grad_norm": 1.975381851196289, "learning_rate": 0.0001939995632655699, "loss": 1.4641, "step": 5765 }, { "epoch": 0.2221366698748797, "grad_norm": 1.3251721858978271, "learning_rate": 0.00019398924162338305, "loss": 1.3429, "step": 5770 }, { "epoch": 0.22232916265640038, "grad_norm": 1.1281229257583618, "learning_rate": 0.0001939789113865027, "loss": 1.2155, "step": 5775 }, { "epoch": 0.22252165543792107, "grad_norm": 2.6070075035095215, "learning_rate": 0.00019396857255587344, "loss": 1.2634, "step": 5780 }, { "epoch": 0.22271414821944177, "grad_norm": 1.0815184116363525, "learning_rate": 0.00019395822513244067, "loss": 1.1176, "step": 5785 }, { "epoch": 0.22290664100096247, "grad_norm": 2.819180965423584, "learning_rate": 0.0001939478691171507, "loss": 1.2624, "step": 5790 }, { "epoch": 0.22309913378248317, "grad_norm": 1.180055022239685, "learning_rate": 0.0001939375045109504, "loss": 1.3433, "step": 5795 }, { "epoch": 0.22329162656400384, "grad_norm": 1.1582396030426025, "learning_rate": 0.0001939271313147876, "loss": 1.2815, "step": 5800 }, { "epoch": 0.22348411934552453, "grad_norm": 2.32379412651062, "learning_rate": 0.00019391674952961085, "loss": 1.4095, "step": 5805 }, { "epoch": 0.22367661212704523, "grad_norm": 1.5146657228469849, "learning_rate": 0.0001939063591563695, "loss": 1.2434, "step": 5810 }, { "epoch": 0.22386910490856593, "grad_norm": 1.6434500217437744, "learning_rate": 0.00019389596019601365, "loss": 1.1739, "step": 5815 }, { "epoch": 0.22406159769008663, "grad_norm": 1.7917993068695068, "learning_rate": 0.0001938855526494943, "loss": 1.5106, "step": 5820 }, { "epoch": 0.22425409047160733, "grad_norm": 1.10679030418396, "learning_rate": 0.00019387513651776303, "loss": 1.284, "step": 5825 }, { "epoch": 0.224446583253128, "grad_norm": 1.521506905555725, "learning_rate": 0.00019386471180177247, "loss": 1.4129, "step": 5830 }, { "epoch": 0.2246390760346487, "grad_norm": 1.4055581092834473, "learning_rate": 0.00019385427850247572, "loss": 1.2476, "step": 5835 }, { "epoch": 0.2248315688161694, "grad_norm": 0.9506363868713379, "learning_rate": 0.00019384383662082703, "loss": 1.3105, "step": 5840 }, { "epoch": 0.2250240615976901, "grad_norm": 1.354658842086792, "learning_rate": 0.00019383338615778107, "loss": 1.29, "step": 5845 }, { "epoch": 0.2252165543792108, "grad_norm": 0.8972203135490417, "learning_rate": 0.00019382292711429353, "loss": 1.3407, "step": 5850 }, { "epoch": 0.22540904716073148, "grad_norm": 0.9989115595817566, "learning_rate": 0.00019381245949132085, "loss": 1.1662, "step": 5855 }, { "epoch": 0.22560153994225216, "grad_norm": 1.1133052110671997, "learning_rate": 0.0001938019832898202, "loss": 1.2674, "step": 5860 }, { "epoch": 0.22579403272377285, "grad_norm": 1.3640556335449219, "learning_rate": 0.00019379149851074957, "loss": 1.1989, "step": 5865 }, { "epoch": 0.22598652550529355, "grad_norm": 1.2812589406967163, "learning_rate": 0.0001937810051550677, "loss": 1.4749, "step": 5870 }, { "epoch": 0.22617901828681425, "grad_norm": 1.223944902420044, "learning_rate": 0.00019377050322373412, "loss": 1.305, "step": 5875 }, { "epoch": 0.22637151106833495, "grad_norm": 1.3493690490722656, "learning_rate": 0.00019375999271770925, "loss": 1.458, "step": 5880 }, { "epoch": 0.22656400384985564, "grad_norm": 1.4042202234268188, "learning_rate": 0.0001937494736379541, "loss": 1.1714, "step": 5885 }, { "epoch": 0.22675649663137631, "grad_norm": 1.6239880323410034, "learning_rate": 0.00019373894598543066, "loss": 1.3224, "step": 5890 }, { "epoch": 0.226948989412897, "grad_norm": 1.096960425376892, "learning_rate": 0.00019372840976110154, "loss": 1.128, "step": 5895 }, { "epoch": 0.2271414821944177, "grad_norm": 1.6740233898162842, "learning_rate": 0.00019371786496593028, "loss": 1.195, "step": 5900 }, { "epoch": 0.2273339749759384, "grad_norm": 1.454030156135559, "learning_rate": 0.00019370731160088105, "loss": 1.2641, "step": 5905 }, { "epoch": 0.2275264677574591, "grad_norm": 1.4465221166610718, "learning_rate": 0.00019369674966691897, "loss": 1.331, "step": 5910 }, { "epoch": 0.22771896053897978, "grad_norm": 1.6115851402282715, "learning_rate": 0.00019368617916500978, "loss": 1.4061, "step": 5915 }, { "epoch": 0.22791145332050047, "grad_norm": 1.0165706872940063, "learning_rate": 0.00019367560009612013, "loss": 1.177, "step": 5920 }, { "epoch": 0.22810394610202117, "grad_norm": 1.5200728178024292, "learning_rate": 0.00019366501246121737, "loss": 1.1323, "step": 5925 }, { "epoch": 0.22829643888354187, "grad_norm": 1.4613386392593384, "learning_rate": 0.00019365441626126976, "loss": 1.4626, "step": 5930 }, { "epoch": 0.22848893166506257, "grad_norm": 1.2502466440200806, "learning_rate": 0.00019364381149724613, "loss": 1.2797, "step": 5935 }, { "epoch": 0.22868142444658326, "grad_norm": 1.2946960926055908, "learning_rate": 0.0001936331981701163, "loss": 1.3844, "step": 5940 }, { "epoch": 0.22887391722810393, "grad_norm": 1.2478231191635132, "learning_rate": 0.00019362257628085074, "loss": 1.2855, "step": 5945 }, { "epoch": 0.22906641000962463, "grad_norm": 1.0097830295562744, "learning_rate": 0.0001936119458304208, "loss": 1.1223, "step": 5950 }, { "epoch": 0.22925890279114533, "grad_norm": 1.3235141038894653, "learning_rate": 0.00019360130681979852, "loss": 1.284, "step": 5955 }, { "epoch": 0.22945139557266603, "grad_norm": 1.6869986057281494, "learning_rate": 0.00019359065924995678, "loss": 1.517, "step": 5960 }, { "epoch": 0.22964388835418673, "grad_norm": 0.9644334316253662, "learning_rate": 0.00019358000312186925, "loss": 1.0607, "step": 5965 }, { "epoch": 0.22983638113570742, "grad_norm": 1.063192367553711, "learning_rate": 0.0001935693384365103, "loss": 0.9187, "step": 5970 }, { "epoch": 0.2300288739172281, "grad_norm": 1.0339081287384033, "learning_rate": 0.00019355866519485523, "loss": 1.2946, "step": 5975 }, { "epoch": 0.2302213666987488, "grad_norm": 1.3194791078567505, "learning_rate": 0.00019354798339788, "loss": 1.4293, "step": 5980 }, { "epoch": 0.2304138594802695, "grad_norm": 1.8870794773101807, "learning_rate": 0.00019353729304656136, "loss": 1.4124, "step": 5985 }, { "epoch": 0.2306063522617902, "grad_norm": 1.132385015487671, "learning_rate": 0.00019352659414187694, "loss": 1.1949, "step": 5990 }, { "epoch": 0.23079884504331089, "grad_norm": 2.763613700866699, "learning_rate": 0.000193515886684805, "loss": 1.2341, "step": 5995 }, { "epoch": 0.23099133782483156, "grad_norm": 1.6793404817581177, "learning_rate": 0.00019350517067632473, "loss": 1.3597, "step": 6000 }, { "epoch": 0.23118383060635225, "grad_norm": 1.1538963317871094, "learning_rate": 0.000193494446117416, "loss": 1.1981, "step": 6005 }, { "epoch": 0.23137632338787295, "grad_norm": 1.0233584642410278, "learning_rate": 0.00019348371300905955, "loss": 1.2821, "step": 6010 }, { "epoch": 0.23156881616939365, "grad_norm": 1.3905096054077148, "learning_rate": 0.0001934729713522368, "loss": 1.3471, "step": 6015 }, { "epoch": 0.23176130895091435, "grad_norm": 1.345563292503357, "learning_rate": 0.00019346222114793, "loss": 1.0454, "step": 6020 }, { "epoch": 0.23195380173243504, "grad_norm": 0.739811897277832, "learning_rate": 0.00019345146239712225, "loss": 1.3125, "step": 6025 }, { "epoch": 0.23214629451395571, "grad_norm": 1.977918028831482, "learning_rate": 0.0001934406951007973, "loss": 1.3328, "step": 6030 }, { "epoch": 0.2323387872954764, "grad_norm": 0.9505223035812378, "learning_rate": 0.00019342991925993977, "loss": 1.1388, "step": 6035 }, { "epoch": 0.2325312800769971, "grad_norm": 1.257755160331726, "learning_rate": 0.00019341913487553502, "loss": 1.3064, "step": 6040 }, { "epoch": 0.2327237728585178, "grad_norm": 1.2003203630447388, "learning_rate": 0.00019340834194856926, "loss": 1.4369, "step": 6045 }, { "epoch": 0.2329162656400385, "grad_norm": 1.2289738655090332, "learning_rate": 0.0001933975404800294, "loss": 1.1462, "step": 6050 }, { "epoch": 0.2331087584215592, "grad_norm": 1.227171540260315, "learning_rate": 0.00019338673047090317, "loss": 1.1829, "step": 6055 }, { "epoch": 0.23330125120307987, "grad_norm": 1.2766560316085815, "learning_rate": 0.00019337591192217904, "loss": 1.2572, "step": 6060 }, { "epoch": 0.23349374398460057, "grad_norm": 2.6716904640197754, "learning_rate": 0.00019336508483484634, "loss": 1.0195, "step": 6065 }, { "epoch": 0.23368623676612127, "grad_norm": 1.1586931943893433, "learning_rate": 0.00019335424920989512, "loss": 1.4932, "step": 6070 }, { "epoch": 0.23387872954764197, "grad_norm": 1.0196670293807983, "learning_rate": 0.00019334340504831624, "loss": 1.3497, "step": 6075 }, { "epoch": 0.23407122232916266, "grad_norm": 1.6527109146118164, "learning_rate": 0.00019333255235110127, "loss": 1.1239, "step": 6080 }, { "epoch": 0.23426371511068336, "grad_norm": 0.9913870096206665, "learning_rate": 0.00019332169111924271, "loss": 1.2757, "step": 6085 }, { "epoch": 0.23445620789220403, "grad_norm": 1.1027697324752808, "learning_rate": 0.00019331082135373367, "loss": 1.2512, "step": 6090 }, { "epoch": 0.23464870067372473, "grad_norm": 1.9269218444824219, "learning_rate": 0.00019329994305556815, "loss": 1.4698, "step": 6095 }, { "epoch": 0.23484119345524543, "grad_norm": 1.1504942178726196, "learning_rate": 0.00019328905622574086, "loss": 1.4844, "step": 6100 }, { "epoch": 0.23503368623676613, "grad_norm": 1.1164321899414062, "learning_rate": 0.0001932781608652474, "loss": 1.2972, "step": 6105 }, { "epoch": 0.23522617901828682, "grad_norm": 1.283000111579895, "learning_rate": 0.00019326725697508407, "loss": 1.3117, "step": 6110 }, { "epoch": 0.2354186717998075, "grad_norm": 1.3553595542907715, "learning_rate": 0.00019325634455624787, "loss": 1.027, "step": 6115 }, { "epoch": 0.2356111645813282, "grad_norm": 2.1605517864227295, "learning_rate": 0.00019324542360973674, "loss": 1.2211, "step": 6120 }, { "epoch": 0.2358036573628489, "grad_norm": 1.1028283834457397, "learning_rate": 0.00019323449413654933, "loss": 1.3034, "step": 6125 }, { "epoch": 0.2359961501443696, "grad_norm": 1.1728841066360474, "learning_rate": 0.00019322355613768505, "loss": 1.3135, "step": 6130 }, { "epoch": 0.23618864292589029, "grad_norm": 1.7304178476333618, "learning_rate": 0.0001932126096141441, "loss": 1.3516, "step": 6135 }, { "epoch": 0.23638113570741098, "grad_norm": 1.3326451778411865, "learning_rate": 0.00019320165456692748, "loss": 1.3371, "step": 6140 }, { "epoch": 0.23657362848893165, "grad_norm": 1.6894330978393555, "learning_rate": 0.00019319069099703697, "loss": 1.2126, "step": 6145 }, { "epoch": 0.23676612127045235, "grad_norm": 1.7248213291168213, "learning_rate": 0.0001931797189054751, "loss": 1.193, "step": 6150 }, { "epoch": 0.23695861405197305, "grad_norm": 1.1517174243927002, "learning_rate": 0.0001931687382932452, "loss": 1.1472, "step": 6155 }, { "epoch": 0.23715110683349375, "grad_norm": 2.4606590270996094, "learning_rate": 0.00019315774916135134, "loss": 1.524, "step": 6160 }, { "epoch": 0.23734359961501444, "grad_norm": 1.6130386590957642, "learning_rate": 0.00019314675151079844, "loss": 1.052, "step": 6165 }, { "epoch": 0.23753609239653514, "grad_norm": 1.3845412731170654, "learning_rate": 0.00019313574534259216, "loss": 1.2557, "step": 6170 }, { "epoch": 0.2377285851780558, "grad_norm": 1.3509567975997925, "learning_rate": 0.00019312473065773893, "loss": 1.3083, "step": 6175 }, { "epoch": 0.2379210779595765, "grad_norm": 1.358113408088684, "learning_rate": 0.000193113707457246, "loss": 1.2226, "step": 6180 }, { "epoch": 0.2381135707410972, "grad_norm": 0.9598337411880493, "learning_rate": 0.00019310267574212134, "loss": 1.1861, "step": 6185 }, { "epoch": 0.2383060635226179, "grad_norm": 1.347159743309021, "learning_rate": 0.0001930916355133737, "loss": 1.2782, "step": 6190 }, { "epoch": 0.2384985563041386, "grad_norm": 1.0227164030075073, "learning_rate": 0.0001930805867720127, "loss": 1.2909, "step": 6195 }, { "epoch": 0.2386910490856593, "grad_norm": 1.8373135328292847, "learning_rate": 0.00019306952951904865, "loss": 1.3371, "step": 6200 }, { "epoch": 0.23888354186717997, "grad_norm": 2.130218267440796, "learning_rate": 0.00019305846375549263, "loss": 1.3275, "step": 6205 }, { "epoch": 0.23907603464870067, "grad_norm": 1.3699109554290771, "learning_rate": 0.00019304738948235656, "loss": 1.172, "step": 6210 }, { "epoch": 0.23926852743022137, "grad_norm": 1.8254964351654053, "learning_rate": 0.0001930363067006531, "loss": 1.166, "step": 6215 }, { "epoch": 0.23946102021174206, "grad_norm": 2.6475026607513428, "learning_rate": 0.00019302521541139571, "loss": 1.3168, "step": 6220 }, { "epoch": 0.23965351299326276, "grad_norm": 1.4869440793991089, "learning_rate": 0.0001930141156155986, "loss": 1.1112, "step": 6225 }, { "epoch": 0.23984600577478343, "grad_norm": 1.0316526889801025, "learning_rate": 0.00019300300731427678, "loss": 1.3845, "step": 6230 }, { "epoch": 0.24003849855630413, "grad_norm": 1.1549556255340576, "learning_rate": 0.00019299189050844603, "loss": 1.378, "step": 6235 }, { "epoch": 0.24023099133782483, "grad_norm": 1.9833987951278687, "learning_rate": 0.00019298076519912294, "loss": 1.2631, "step": 6240 }, { "epoch": 0.24042348411934553, "grad_norm": 1.1354988813400269, "learning_rate": 0.00019296963138732478, "loss": 1.6525, "step": 6245 }, { "epoch": 0.24061597690086622, "grad_norm": 1.6483670473098755, "learning_rate": 0.0001929584890740697, "loss": 0.9828, "step": 6250 }, { "epoch": 0.24080846968238692, "grad_norm": 1.537610650062561, "learning_rate": 0.00019294733826037659, "loss": 1.3566, "step": 6255 }, { "epoch": 0.2410009624639076, "grad_norm": 1.207406759262085, "learning_rate": 0.0001929361789472651, "loss": 1.3306, "step": 6260 }, { "epoch": 0.2411934552454283, "grad_norm": 1.4772666692733765, "learning_rate": 0.00019292501113575572, "loss": 1.3117, "step": 6265 }, { "epoch": 0.241385948026949, "grad_norm": 1.8285613059997559, "learning_rate": 0.00019291383482686962, "loss": 1.3711, "step": 6270 }, { "epoch": 0.24157844080846969, "grad_norm": 0.9223503470420837, "learning_rate": 0.00019290265002162884, "loss": 1.1712, "step": 6275 }, { "epoch": 0.24177093358999038, "grad_norm": 2.1818087100982666, "learning_rate": 0.00019289145672105612, "loss": 1.1596, "step": 6280 }, { "epoch": 0.24196342637151108, "grad_norm": 0.8749092817306519, "learning_rate": 0.00019288025492617504, "loss": 1.0726, "step": 6285 }, { "epoch": 0.24215591915303175, "grad_norm": 1.1598855257034302, "learning_rate": 0.00019286904463800995, "loss": 1.2931, "step": 6290 }, { "epoch": 0.24234841193455245, "grad_norm": 1.4357101917266846, "learning_rate": 0.0001928578258575859, "loss": 1.2612, "step": 6295 }, { "epoch": 0.24254090471607315, "grad_norm": 0.9731203317642212, "learning_rate": 0.0001928465985859288, "loss": 1.178, "step": 6300 }, { "epoch": 0.24273339749759384, "grad_norm": 1.1217381954193115, "learning_rate": 0.00019283536282406534, "loss": 1.285, "step": 6305 }, { "epoch": 0.24292589027911454, "grad_norm": 1.415860891342163, "learning_rate": 0.0001928241185730229, "loss": 1.399, "step": 6310 }, { "epoch": 0.2431183830606352, "grad_norm": 0.9067175388336182, "learning_rate": 0.00019281286583382973, "loss": 1.2336, "step": 6315 }, { "epoch": 0.2433108758421559, "grad_norm": 1.6320233345031738, "learning_rate": 0.0001928016046075148, "loss": 1.4348, "step": 6320 }, { "epoch": 0.2435033686236766, "grad_norm": 1.3945854902267456, "learning_rate": 0.0001927903348951079, "loss": 1.1614, "step": 6325 }, { "epoch": 0.2436958614051973, "grad_norm": 1.37948477268219, "learning_rate": 0.00019277905669763952, "loss": 1.2058, "step": 6330 }, { "epoch": 0.243888354186718, "grad_norm": 1.3325083255767822, "learning_rate": 0.00019276777001614104, "loss": 1.2737, "step": 6335 }, { "epoch": 0.2440808469682387, "grad_norm": 1.5902581214904785, "learning_rate": 0.00019275647485164453, "loss": 1.3706, "step": 6340 }, { "epoch": 0.24427333974975937, "grad_norm": 1.1309142112731934, "learning_rate": 0.00019274517120518284, "loss": 1.2408, "step": 6345 }, { "epoch": 0.24446583253128007, "grad_norm": 1.9998489618301392, "learning_rate": 0.0001927338590777896, "loss": 1.3079, "step": 6350 }, { "epoch": 0.24465832531280077, "grad_norm": 1.569667100906372, "learning_rate": 0.00019272253847049927, "loss": 1.2365, "step": 6355 }, { "epoch": 0.24485081809432147, "grad_norm": 1.2294694185256958, "learning_rate": 0.00019271120938434702, "loss": 1.3544, "step": 6360 }, { "epoch": 0.24504331087584216, "grad_norm": 1.9876806735992432, "learning_rate": 0.00019269987182036883, "loss": 1.3675, "step": 6365 }, { "epoch": 0.24523580365736286, "grad_norm": 1.3317819833755493, "learning_rate": 0.0001926885257796015, "loss": 1.0949, "step": 6370 }, { "epoch": 0.24542829643888353, "grad_norm": 1.7602546215057373, "learning_rate": 0.00019267717126308242, "loss": 1.3168, "step": 6375 }, { "epoch": 0.24562078922040423, "grad_norm": 1.5651274919509888, "learning_rate": 0.00019266580827184996, "loss": 1.2802, "step": 6380 }, { "epoch": 0.24581328200192493, "grad_norm": 0.9537544846534729, "learning_rate": 0.0001926544368069432, "loss": 1.1876, "step": 6385 }, { "epoch": 0.24600577478344562, "grad_norm": 0.9649773240089417, "learning_rate": 0.000192643056869402, "loss": 1.1378, "step": 6390 }, { "epoch": 0.24619826756496632, "grad_norm": 1.6363686323165894, "learning_rate": 0.00019263166846026692, "loss": 1.3284, "step": 6395 }, { "epoch": 0.24639076034648702, "grad_norm": 1.748897910118103, "learning_rate": 0.00019262027158057943, "loss": 1.4314, "step": 6400 }, { "epoch": 0.2465832531280077, "grad_norm": 2.138967990875244, "learning_rate": 0.00019260886623138164, "loss": 1.2244, "step": 6405 }, { "epoch": 0.2467757459095284, "grad_norm": 2.517312526702881, "learning_rate": 0.0001925974524137165, "loss": 1.3394, "step": 6410 }, { "epoch": 0.24696823869104909, "grad_norm": 1.7510714530944824, "learning_rate": 0.00019258603012862772, "loss": 1.3369, "step": 6415 }, { "epoch": 0.24716073147256978, "grad_norm": 1.1651504039764404, "learning_rate": 0.00019257459937715985, "loss": 1.2953, "step": 6420 }, { "epoch": 0.24735322425409048, "grad_norm": 1.325554609298706, "learning_rate": 0.0001925631601603581, "loss": 1.3062, "step": 6425 }, { "epoch": 0.24754571703561115, "grad_norm": 1.0340043306350708, "learning_rate": 0.00019255171247926852, "loss": 1.337, "step": 6430 }, { "epoch": 0.24773820981713185, "grad_norm": 1.677131175994873, "learning_rate": 0.00019254025633493792, "loss": 1.3179, "step": 6435 }, { "epoch": 0.24793070259865255, "grad_norm": 2.475339651107788, "learning_rate": 0.00019252879172841395, "loss": 1.4765, "step": 6440 }, { "epoch": 0.24812319538017324, "grad_norm": 1.1302917003631592, "learning_rate": 0.00019251731866074486, "loss": 1.3029, "step": 6445 }, { "epoch": 0.24831568816169394, "grad_norm": 1.3425379991531372, "learning_rate": 0.0001925058371329799, "loss": 1.1263, "step": 6450 }, { "epoch": 0.24850818094321464, "grad_norm": 1.0058633089065552, "learning_rate": 0.0001924943471461689, "loss": 1.1059, "step": 6455 }, { "epoch": 0.2487006737247353, "grad_norm": 1.9793190956115723, "learning_rate": 0.0001924828487013626, "loss": 1.5268, "step": 6460 }, { "epoch": 0.248893166506256, "grad_norm": 1.0673744678497314, "learning_rate": 0.00019247134179961242, "loss": 1.2199, "step": 6465 }, { "epoch": 0.2490856592877767, "grad_norm": 1.1182838678359985, "learning_rate": 0.00019245982644197057, "loss": 1.5456, "step": 6470 }, { "epoch": 0.2492781520692974, "grad_norm": 0.9264312982559204, "learning_rate": 0.00019244830262949014, "loss": 1.2367, "step": 6475 }, { "epoch": 0.2494706448508181, "grad_norm": 1.2094528675079346, "learning_rate": 0.00019243677036322478, "loss": 1.2026, "step": 6480 }, { "epoch": 0.2496631376323388, "grad_norm": 1.275902509689331, "learning_rate": 0.00019242522964422917, "loss": 1.206, "step": 6485 }, { "epoch": 0.24985563041385947, "grad_norm": 1.515559434890747, "learning_rate": 0.00019241368047355853, "loss": 1.2222, "step": 6490 }, { "epoch": 0.25004812319538017, "grad_norm": 0.9974495768547058, "learning_rate": 0.000192402122852269, "loss": 1.5274, "step": 6495 }, { "epoch": 0.2502406159769009, "grad_norm": 1.8940407037734985, "learning_rate": 0.00019239055678141746, "loss": 1.3639, "step": 6500 }, { "epoch": 0.25043310875842156, "grad_norm": 1.7484371662139893, "learning_rate": 0.00019237898226206153, "loss": 1.3517, "step": 6505 }, { "epoch": 0.25062560153994223, "grad_norm": 1.004660725593567, "learning_rate": 0.00019236739929525963, "loss": 1.0603, "step": 6510 }, { "epoch": 0.25081809432146296, "grad_norm": 0.9729489684104919, "learning_rate": 0.00019235580788207093, "loss": 1.3252, "step": 6515 }, { "epoch": 0.25101058710298363, "grad_norm": 0.4645654857158661, "learning_rate": 0.00019234420802355539, "loss": 1.1804, "step": 6520 }, { "epoch": 0.25120307988450435, "grad_norm": 1.0810743570327759, "learning_rate": 0.00019233259972077378, "loss": 1.3045, "step": 6525 }, { "epoch": 0.251395572666025, "grad_norm": 1.1666224002838135, "learning_rate": 0.00019232098297478756, "loss": 1.324, "step": 6530 }, { "epoch": 0.2515880654475457, "grad_norm": 1.06947660446167, "learning_rate": 0.000192309357786659, "loss": 1.3131, "step": 6535 }, { "epoch": 0.2517805582290664, "grad_norm": 1.1774028539657593, "learning_rate": 0.0001922977241574512, "loss": 1.301, "step": 6540 }, { "epoch": 0.2519730510105871, "grad_norm": 1.528041958808899, "learning_rate": 0.0001922860820882279, "loss": 1.2542, "step": 6545 }, { "epoch": 0.2521655437921078, "grad_norm": 1.1932915449142456, "learning_rate": 0.00019227443158005377, "loss": 1.125, "step": 6550 }, { "epoch": 0.2523580365736285, "grad_norm": 1.3258370161056519, "learning_rate": 0.0001922627726339941, "loss": 1.3776, "step": 6555 }, { "epoch": 0.25255052935514916, "grad_norm": 0.994076132774353, "learning_rate": 0.0001922511052511151, "loss": 1.0908, "step": 6560 }, { "epoch": 0.2527430221366699, "grad_norm": 1.0820032358169556, "learning_rate": 0.00019223942943248358, "loss": 1.215, "step": 6565 }, { "epoch": 0.25293551491819055, "grad_norm": 0.9792138338088989, "learning_rate": 0.00019222774517916734, "loss": 1.2413, "step": 6570 }, { "epoch": 0.2531280076997113, "grad_norm": 1.1704801321029663, "learning_rate": 0.0001922160524922347, "loss": 1.5203, "step": 6575 }, { "epoch": 0.25332050048123195, "grad_norm": 1.6249198913574219, "learning_rate": 0.00019220435137275494, "loss": 1.2771, "step": 6580 }, { "epoch": 0.2535129932627527, "grad_norm": 1.3218034505844116, "learning_rate": 0.00019219264182179804, "loss": 1.4433, "step": 6585 }, { "epoch": 0.25370548604427334, "grad_norm": 1.7230724096298218, "learning_rate": 0.0001921809238404348, "loss": 1.1069, "step": 6590 }, { "epoch": 0.253897978825794, "grad_norm": 1.3148738145828247, "learning_rate": 0.00019216919742973669, "loss": 1.2386, "step": 6595 }, { "epoch": 0.25409047160731474, "grad_norm": 1.257513403892517, "learning_rate": 0.00019215746259077605, "loss": 1.3476, "step": 6600 }, { "epoch": 0.2542829643888354, "grad_norm": 0.965403139591217, "learning_rate": 0.00019214571932462592, "loss": 1.1045, "step": 6605 }, { "epoch": 0.25447545717035613, "grad_norm": 0.8903887867927551, "learning_rate": 0.0001921339676323602, "loss": 1.1481, "step": 6610 }, { "epoch": 0.2546679499518768, "grad_norm": 1.284529209136963, "learning_rate": 0.00019212220751505345, "loss": 1.3179, "step": 6615 }, { "epoch": 0.2548604427333975, "grad_norm": 2.3491082191467285, "learning_rate": 0.0001921104389737811, "loss": 1.3042, "step": 6620 }, { "epoch": 0.2550529355149182, "grad_norm": 1.4170057773590088, "learning_rate": 0.00019209866200961927, "loss": 1.3775, "step": 6625 }, { "epoch": 0.25524542829643887, "grad_norm": 1.4182847738265991, "learning_rate": 0.00019208687662364488, "loss": 1.3895, "step": 6630 }, { "epoch": 0.2554379210779596, "grad_norm": 1.2162110805511475, "learning_rate": 0.00019207508281693568, "loss": 1.0754, "step": 6635 }, { "epoch": 0.25563041385948027, "grad_norm": 1.473873257637024, "learning_rate": 0.00019206328059057006, "loss": 1.3323, "step": 6640 }, { "epoch": 0.25582290664100094, "grad_norm": 1.2990386486053467, "learning_rate": 0.0001920514699456273, "loss": 1.2304, "step": 6645 }, { "epoch": 0.25601539942252166, "grad_norm": 1.2828303575515747, "learning_rate": 0.00019203965088318743, "loss": 1.2566, "step": 6650 }, { "epoch": 0.25620789220404233, "grad_norm": 0.9165570735931396, "learning_rate": 0.00019202782340433115, "loss": 1.2186, "step": 6655 }, { "epoch": 0.25640038498556306, "grad_norm": 2.0381886959075928, "learning_rate": 0.00019201598751014006, "loss": 1.114, "step": 6660 }, { "epoch": 0.2565928777670837, "grad_norm": 1.252790093421936, "learning_rate": 0.00019200414320169647, "loss": 1.2354, "step": 6665 }, { "epoch": 0.25678537054860445, "grad_norm": 1.1557594537734985, "learning_rate": 0.00019199229048008347, "loss": 1.3652, "step": 6670 }, { "epoch": 0.2569778633301251, "grad_norm": 1.356181025505066, "learning_rate": 0.0001919804293463849, "loss": 1.1026, "step": 6675 }, { "epoch": 0.2571703561116458, "grad_norm": 1.2493314743041992, "learning_rate": 0.00019196855980168536, "loss": 1.2225, "step": 6680 }, { "epoch": 0.2573628488931665, "grad_norm": 1.7480677366256714, "learning_rate": 0.00019195668184707025, "loss": 1.2898, "step": 6685 }, { "epoch": 0.2575553416746872, "grad_norm": 1.0522620677947998, "learning_rate": 0.00019194479548362577, "loss": 1.1404, "step": 6690 }, { "epoch": 0.2577478344562079, "grad_norm": 1.4085676670074463, "learning_rate": 0.00019193290071243882, "loss": 1.5024, "step": 6695 }, { "epoch": 0.2579403272377286, "grad_norm": 1.393096923828125, "learning_rate": 0.0001919209975345971, "loss": 1.2555, "step": 6700 }, { "epoch": 0.25813282001924925, "grad_norm": 1.5740808248519897, "learning_rate": 0.00019190908595118907, "loss": 1.2362, "step": 6705 }, { "epoch": 0.25832531280077, "grad_norm": 1.3243273496627808, "learning_rate": 0.00019189716596330395, "loss": 1.2517, "step": 6710 }, { "epoch": 0.25851780558229065, "grad_norm": 2.5867626667022705, "learning_rate": 0.00019188523757203177, "loss": 1.3509, "step": 6715 }, { "epoch": 0.2587102983638114, "grad_norm": 1.450181484222412, "learning_rate": 0.00019187330077846334, "loss": 1.3451, "step": 6720 }, { "epoch": 0.25890279114533205, "grad_norm": 1.4387754201889038, "learning_rate": 0.0001918613555836901, "loss": 1.2518, "step": 6725 }, { "epoch": 0.25909528392685277, "grad_norm": 1.427882432937622, "learning_rate": 0.00019184940198880448, "loss": 1.235, "step": 6730 }, { "epoch": 0.25928777670837344, "grad_norm": 1.060436487197876, "learning_rate": 0.00019183743999489947, "loss": 1.4583, "step": 6735 }, { "epoch": 0.2594802694898941, "grad_norm": 1.0780494213104248, "learning_rate": 0.00019182546960306893, "loss": 1.1134, "step": 6740 }, { "epoch": 0.25967276227141484, "grad_norm": 1.3795710802078247, "learning_rate": 0.0001918134908144075, "loss": 1.2979, "step": 6745 }, { "epoch": 0.2598652550529355, "grad_norm": 2.0972957611083984, "learning_rate": 0.00019180150363001051, "loss": 1.6512, "step": 6750 }, { "epoch": 0.26005774783445623, "grad_norm": 1.129204273223877, "learning_rate": 0.00019178950805097416, "loss": 1.2263, "step": 6755 }, { "epoch": 0.2602502406159769, "grad_norm": 0.8816843628883362, "learning_rate": 0.00019177750407839536, "loss": 1.2265, "step": 6760 }, { "epoch": 0.26044273339749757, "grad_norm": 1.5167860984802246, "learning_rate": 0.00019176549171337178, "loss": 1.226, "step": 6765 }, { "epoch": 0.2606352261790183, "grad_norm": 1.329172968864441, "learning_rate": 0.00019175347095700188, "loss": 1.3375, "step": 6770 }, { "epoch": 0.26082771896053897, "grad_norm": 1.8215051889419556, "learning_rate": 0.00019174144181038485, "loss": 1.2453, "step": 6775 }, { "epoch": 0.2610202117420597, "grad_norm": 1.147878646850586, "learning_rate": 0.00019172940427462072, "loss": 1.3137, "step": 6780 }, { "epoch": 0.26121270452358036, "grad_norm": 1.5783206224441528, "learning_rate": 0.0001917173583508102, "loss": 1.1803, "step": 6785 }, { "epoch": 0.26140519730510103, "grad_norm": 1.7433182001113892, "learning_rate": 0.00019170530404005485, "loss": 1.171, "step": 6790 }, { "epoch": 0.26159769008662176, "grad_norm": 1.5278960466384888, "learning_rate": 0.0001916932413434569, "loss": 1.2274, "step": 6795 }, { "epoch": 0.26179018286814243, "grad_norm": 1.375710368156433, "learning_rate": 0.00019168117026211948, "loss": 1.241, "step": 6800 }, { "epoch": 0.26198267564966315, "grad_norm": 2.146165370941162, "learning_rate": 0.00019166909079714636, "loss": 1.2778, "step": 6805 }, { "epoch": 0.2621751684311838, "grad_norm": 1.7670506238937378, "learning_rate": 0.00019165700294964216, "loss": 1.3293, "step": 6810 }, { "epoch": 0.26236766121270455, "grad_norm": 1.5492186546325684, "learning_rate": 0.00019164490672071217, "loss": 1.2808, "step": 6815 }, { "epoch": 0.2625601539942252, "grad_norm": 1.4138727188110352, "learning_rate": 0.00019163280211146257, "loss": 1.2352, "step": 6820 }, { "epoch": 0.2627526467757459, "grad_norm": 1.185674786567688, "learning_rate": 0.00019162068912300024, "loss": 1.1883, "step": 6825 }, { "epoch": 0.2629451395572666, "grad_norm": 1.717349886894226, "learning_rate": 0.0001916085677564328, "loss": 1.1329, "step": 6830 }, { "epoch": 0.2631376323387873, "grad_norm": 1.1391080617904663, "learning_rate": 0.00019159643801286872, "loss": 1.4104, "step": 6835 }, { "epoch": 0.263330125120308, "grad_norm": 1.0915690660476685, "learning_rate": 0.00019158429989341716, "loss": 1.2813, "step": 6840 }, { "epoch": 0.2635226179018287, "grad_norm": 1.120492696762085, "learning_rate": 0.000191572153399188, "loss": 1.2669, "step": 6845 }, { "epoch": 0.26371511068334935, "grad_norm": 1.0648150444030762, "learning_rate": 0.0001915599985312921, "loss": 1.2581, "step": 6850 }, { "epoch": 0.2639076034648701, "grad_norm": 1.7173513174057007, "learning_rate": 0.0001915478352908408, "loss": 1.2081, "step": 6855 }, { "epoch": 0.26410009624639075, "grad_norm": 1.3801002502441406, "learning_rate": 0.00019153566367894644, "loss": 1.4625, "step": 6860 }, { "epoch": 0.2642925890279115, "grad_norm": 2.5863940715789795, "learning_rate": 0.00019152348369672203, "loss": 1.4777, "step": 6865 }, { "epoch": 0.26448508180943214, "grad_norm": 1.5995707511901855, "learning_rate": 0.0001915112953452813, "loss": 1.2089, "step": 6870 }, { "epoch": 0.2646775745909528, "grad_norm": 1.2661023139953613, "learning_rate": 0.0001914990986257388, "loss": 1.1937, "step": 6875 }, { "epoch": 0.26487006737247354, "grad_norm": 1.4782702922821045, "learning_rate": 0.00019148689353920987, "loss": 1.2462, "step": 6880 }, { "epoch": 0.2650625601539942, "grad_norm": 1.8557063341140747, "learning_rate": 0.0001914746800868106, "loss": 1.425, "step": 6885 }, { "epoch": 0.26525505293551493, "grad_norm": 2.825359582901001, "learning_rate": 0.00019146245826965775, "loss": 1.3628, "step": 6890 }, { "epoch": 0.2654475457170356, "grad_norm": 1.7262654304504395, "learning_rate": 0.00019145022808886902, "loss": 1.2902, "step": 6895 }, { "epoch": 0.26564003849855633, "grad_norm": 0.9676236510276794, "learning_rate": 0.00019143798954556268, "loss": 1.3342, "step": 6900 }, { "epoch": 0.265832531280077, "grad_norm": 1.4607850313186646, "learning_rate": 0.00019142574264085797, "loss": 1.3084, "step": 6905 }, { "epoch": 0.26602502406159767, "grad_norm": 2.181511878967285, "learning_rate": 0.0001914134873758747, "loss": 1.1746, "step": 6910 }, { "epoch": 0.2662175168431184, "grad_norm": 1.4534579515457153, "learning_rate": 0.00019140122375173362, "loss": 1.3071, "step": 6915 }, { "epoch": 0.26641000962463907, "grad_norm": 1.607039213180542, "learning_rate": 0.00019138895176955604, "loss": 1.2883, "step": 6920 }, { "epoch": 0.2666025024061598, "grad_norm": 0.9929762482643127, "learning_rate": 0.00019137667143046425, "loss": 1.1122, "step": 6925 }, { "epoch": 0.26679499518768046, "grad_norm": 1.6732393503189087, "learning_rate": 0.0001913643827355812, "loss": 1.149, "step": 6930 }, { "epoch": 0.26698748796920113, "grad_norm": 1.3785120248794556, "learning_rate": 0.0001913520856860305, "loss": 1.3759, "step": 6935 }, { "epoch": 0.26717998075072186, "grad_norm": 1.8252770900726318, "learning_rate": 0.0001913397802829368, "loss": 1.2633, "step": 6940 }, { "epoch": 0.2673724735322425, "grad_norm": 1.6789536476135254, "learning_rate": 0.0001913274665274252, "loss": 1.2741, "step": 6945 }, { "epoch": 0.26756496631376325, "grad_norm": 2.0153861045837402, "learning_rate": 0.00019131514442062184, "loss": 1.196, "step": 6950 }, { "epoch": 0.2677574590952839, "grad_norm": 1.0000704526901245, "learning_rate": 0.0001913028139636534, "loss": 1.1872, "step": 6955 }, { "epoch": 0.2679499518768046, "grad_norm": 1.2803142070770264, "learning_rate": 0.00019129047515764743, "loss": 1.2655, "step": 6960 }, { "epoch": 0.2681424446583253, "grad_norm": 0.9827659130096436, "learning_rate": 0.00019127812800373225, "loss": 1.3503, "step": 6965 }, { "epoch": 0.268334937439846, "grad_norm": 1.3766348361968994, "learning_rate": 0.00019126577250303697, "loss": 1.2851, "step": 6970 }, { "epoch": 0.2685274302213667, "grad_norm": 2.285708427429199, "learning_rate": 0.00019125340865669134, "loss": 1.3247, "step": 6975 }, { "epoch": 0.2687199230028874, "grad_norm": 1.79937744140625, "learning_rate": 0.000191241036465826, "loss": 1.0306, "step": 6980 }, { "epoch": 0.2689124157844081, "grad_norm": 1.6062885522842407, "learning_rate": 0.0001912286559315723, "loss": 1.2068, "step": 6985 }, { "epoch": 0.2691049085659288, "grad_norm": 1.9590744972229004, "learning_rate": 0.00019121626705506233, "loss": 1.2195, "step": 6990 }, { "epoch": 0.26929740134744945, "grad_norm": 1.366186261177063, "learning_rate": 0.000191203869837429, "loss": 1.1627, "step": 6995 }, { "epoch": 0.2694898941289702, "grad_norm": 0.9655261635780334, "learning_rate": 0.00019119146427980593, "loss": 1.053, "step": 7000 }, { "epoch": 0.26968238691049085, "grad_norm": 1.4636151790618896, "learning_rate": 0.00019117905038332756, "loss": 1.0954, "step": 7005 }, { "epoch": 0.26987487969201157, "grad_norm": 1.4435783624649048, "learning_rate": 0.00019116662814912903, "loss": 1.2102, "step": 7010 }, { "epoch": 0.27006737247353224, "grad_norm": 0.9880768060684204, "learning_rate": 0.00019115419757834628, "loss": 1.0698, "step": 7015 }, { "epoch": 0.2702598652550529, "grad_norm": 1.516515851020813, "learning_rate": 0.000191141758672116, "loss": 1.3894, "step": 7020 }, { "epoch": 0.27045235803657364, "grad_norm": 2.1763806343078613, "learning_rate": 0.00019112931143157563, "loss": 1.3794, "step": 7025 }, { "epoch": 0.2706448508180943, "grad_norm": 1.2275705337524414, "learning_rate": 0.00019111685585786344, "loss": 1.2897, "step": 7030 }, { "epoch": 0.27083734359961503, "grad_norm": 0.966526985168457, "learning_rate": 0.00019110439195211835, "loss": 1.2112, "step": 7035 }, { "epoch": 0.2710298363811357, "grad_norm": 1.251911997795105, "learning_rate": 0.00019109191971548016, "loss": 1.2481, "step": 7040 }, { "epoch": 0.27122232916265643, "grad_norm": 2.3555140495300293, "learning_rate": 0.0001910794391490893, "loss": 1.3372, "step": 7045 }, { "epoch": 0.2714148219441771, "grad_norm": 1.229268193244934, "learning_rate": 0.0001910669502540871, "loss": 1.4362, "step": 7050 }, { "epoch": 0.27160731472569777, "grad_norm": 1.2356593608856201, "learning_rate": 0.00019105445303161555, "loss": 1.379, "step": 7055 }, { "epoch": 0.2717998075072185, "grad_norm": 1.910232424736023, "learning_rate": 0.00019104194748281747, "loss": 1.2902, "step": 7060 }, { "epoch": 0.27199230028873916, "grad_norm": 1.9058904647827148, "learning_rate": 0.0001910294336088364, "loss": 1.3313, "step": 7065 }, { "epoch": 0.2721847930702599, "grad_norm": 0.8631892800331116, "learning_rate": 0.0001910169114108166, "loss": 1.2843, "step": 7070 }, { "epoch": 0.27237728585178056, "grad_norm": 1.2212119102478027, "learning_rate": 0.0001910043808899032, "loss": 1.2588, "step": 7075 }, { "epoch": 0.27256977863330123, "grad_norm": 2.3140738010406494, "learning_rate": 0.00019099184204724202, "loss": 1.1781, "step": 7080 }, { "epoch": 0.27276227141482196, "grad_norm": 1.0162906646728516, "learning_rate": 0.00019097929488397965, "loss": 1.3433, "step": 7085 }, { "epoch": 0.2729547641963426, "grad_norm": 1.719766616821289, "learning_rate": 0.00019096673940126343, "loss": 1.1469, "step": 7090 }, { "epoch": 0.27314725697786335, "grad_norm": 1.5173147916793823, "learning_rate": 0.00019095417560024153, "loss": 1.1663, "step": 7095 }, { "epoch": 0.273339749759384, "grad_norm": 2.1228654384613037, "learning_rate": 0.00019094160348206277, "loss": 1.3433, "step": 7100 }, { "epoch": 0.2735322425409047, "grad_norm": 1.3896198272705078, "learning_rate": 0.00019092902304787679, "loss": 1.1782, "step": 7105 }, { "epoch": 0.2737247353224254, "grad_norm": 1.6935322284698486, "learning_rate": 0.00019091643429883402, "loss": 1.1867, "step": 7110 }, { "epoch": 0.2739172281039461, "grad_norm": 1.5454139709472656, "learning_rate": 0.00019090383723608558, "loss": 1.3938, "step": 7115 }, { "epoch": 0.2741097208854668, "grad_norm": 1.1493245363235474, "learning_rate": 0.00019089123186078342, "loss": 1.2127, "step": 7120 }, { "epoch": 0.2743022136669875, "grad_norm": 1.7321335077285767, "learning_rate": 0.00019087861817408021, "loss": 1.3068, "step": 7125 }, { "epoch": 0.2744947064485082, "grad_norm": 1.7654987573623657, "learning_rate": 0.00019086599617712936, "loss": 1.3236, "step": 7130 }, { "epoch": 0.2746871992300289, "grad_norm": 1.0047959089279175, "learning_rate": 0.0001908533658710851, "loss": 1.404, "step": 7135 }, { "epoch": 0.27487969201154955, "grad_norm": 1.9708582162857056, "learning_rate": 0.0001908407272571024, "loss": 1.2387, "step": 7140 }, { "epoch": 0.2750721847930703, "grad_norm": 2.097369432449341, "learning_rate": 0.00019082808033633696, "loss": 1.189, "step": 7145 }, { "epoch": 0.27526467757459094, "grad_norm": 1.1789932250976562, "learning_rate": 0.00019081542510994523, "loss": 1.4815, "step": 7150 }, { "epoch": 0.27545717035611167, "grad_norm": 1.7205069065093994, "learning_rate": 0.00019080276157908447, "loss": 1.2906, "step": 7155 }, { "epoch": 0.27564966313763234, "grad_norm": 1.7320606708526611, "learning_rate": 0.0001907900897449127, "loss": 1.339, "step": 7160 }, { "epoch": 0.275842155919153, "grad_norm": 2.100649356842041, "learning_rate": 0.00019077740960858863, "loss": 1.3145, "step": 7165 }, { "epoch": 0.27603464870067373, "grad_norm": 1.9302312135696411, "learning_rate": 0.00019076472117127182, "loss": 1.3082, "step": 7170 }, { "epoch": 0.2762271414821944, "grad_norm": 0.5863549113273621, "learning_rate": 0.0001907520244341225, "loss": 1.0183, "step": 7175 }, { "epoch": 0.27641963426371513, "grad_norm": 1.0428977012634277, "learning_rate": 0.00019073931939830174, "loss": 1.2488, "step": 7180 }, { "epoch": 0.2766121270452358, "grad_norm": 1.1643081903457642, "learning_rate": 0.0001907266060649713, "loss": 1.476, "step": 7185 }, { "epoch": 0.27680461982675647, "grad_norm": 1.0771207809448242, "learning_rate": 0.00019071388443529376, "loss": 1.3134, "step": 7190 }, { "epoch": 0.2769971126082772, "grad_norm": 1.9787309169769287, "learning_rate": 0.00019070115451043238, "loss": 1.3884, "step": 7195 }, { "epoch": 0.27718960538979787, "grad_norm": 2.095546245574951, "learning_rate": 0.0001906884162915513, "loss": 1.1221, "step": 7200 }, { "epoch": 0.2773820981713186, "grad_norm": 2.0389225482940674, "learning_rate": 0.00019067566977981528, "loss": 1.0463, "step": 7205 }, { "epoch": 0.27757459095283926, "grad_norm": 0.9991855621337891, "learning_rate": 0.00019066291497638993, "loss": 1.341, "step": 7210 }, { "epoch": 0.27776708373436, "grad_norm": 1.411401391029358, "learning_rate": 0.0001906501518824416, "loss": 1.434, "step": 7215 }, { "epoch": 0.27795957651588066, "grad_norm": 1.61775803565979, "learning_rate": 0.0001906373804991374, "loss": 1.1553, "step": 7220 }, { "epoch": 0.2781520692974013, "grad_norm": 2.546022653579712, "learning_rate": 0.00019062460082764515, "loss": 1.2496, "step": 7225 }, { "epoch": 0.27834456207892205, "grad_norm": 1.2731270790100098, "learning_rate": 0.00019061181286913348, "loss": 1.3236, "step": 7230 }, { "epoch": 0.2785370548604427, "grad_norm": 1.0163904428482056, "learning_rate": 0.00019059901662477177, "loss": 1.2854, "step": 7235 }, { "epoch": 0.27872954764196345, "grad_norm": 1.0653849840164185, "learning_rate": 0.0001905862120957302, "loss": 1.6351, "step": 7240 }, { "epoch": 0.2789220404234841, "grad_norm": 1.081264853477478, "learning_rate": 0.00019057339928317958, "loss": 1.2466, "step": 7245 }, { "epoch": 0.2791145332050048, "grad_norm": 1.3285462856292725, "learning_rate": 0.00019056057818829156, "loss": 1.2087, "step": 7250 }, { "epoch": 0.2793070259865255, "grad_norm": 1.067254900932312, "learning_rate": 0.0001905477488122386, "loss": 1.3877, "step": 7255 }, { "epoch": 0.2794995187680462, "grad_norm": 0.9383085370063782, "learning_rate": 0.0001905349111561938, "loss": 1.0643, "step": 7260 }, { "epoch": 0.2796920115495669, "grad_norm": 2.7797493934631348, "learning_rate": 0.00019052206522133117, "loss": 1.3828, "step": 7265 }, { "epoch": 0.2798845043310876, "grad_norm": 1.410261631011963, "learning_rate": 0.0001905092110088253, "loss": 1.3019, "step": 7270 }, { "epoch": 0.28007699711260825, "grad_norm": 2.313541889190674, "learning_rate": 0.0001904963485198517, "loss": 1.2058, "step": 7275 }, { "epoch": 0.280269489894129, "grad_norm": 1.4474842548370361, "learning_rate": 0.00019048347775558645, "loss": 1.2187, "step": 7280 }, { "epoch": 0.28046198267564965, "grad_norm": 1.5846171379089355, "learning_rate": 0.00019047059871720657, "loss": 1.0326, "step": 7285 }, { "epoch": 0.28065447545717037, "grad_norm": 1.1118413209915161, "learning_rate": 0.00019045771140588976, "loss": 1.2881, "step": 7290 }, { "epoch": 0.28084696823869104, "grad_norm": 2.5894134044647217, "learning_rate": 0.00019044481582281448, "loss": 1.3885, "step": 7295 }, { "epoch": 0.28103946102021177, "grad_norm": 1.6019679307937622, "learning_rate": 0.00019043191196915993, "loss": 1.3247, "step": 7300 }, { "epoch": 0.28123195380173244, "grad_norm": 1.3384417295455933, "learning_rate": 0.00019041899984610606, "loss": 1.346, "step": 7305 }, { "epoch": 0.2814244465832531, "grad_norm": 1.3584142923355103, "learning_rate": 0.00019040607945483367, "loss": 1.3418, "step": 7310 }, { "epoch": 0.28161693936477383, "grad_norm": 1.379162073135376, "learning_rate": 0.00019039315079652416, "loss": 1.293, "step": 7315 }, { "epoch": 0.2818094321462945, "grad_norm": 1.499841570854187, "learning_rate": 0.00019038021387235982, "loss": 1.2131, "step": 7320 }, { "epoch": 0.28200192492781523, "grad_norm": 1.9813991785049438, "learning_rate": 0.00019036726868352366, "loss": 1.3282, "step": 7325 }, { "epoch": 0.2821944177093359, "grad_norm": 1.404096245765686, "learning_rate": 0.00019035431523119938, "loss": 1.2238, "step": 7330 }, { "epoch": 0.28238691049085657, "grad_norm": 1.1089609861373901, "learning_rate": 0.00019034135351657152, "loss": 1.1705, "step": 7335 }, { "epoch": 0.2825794032723773, "grad_norm": 1.0567266941070557, "learning_rate": 0.00019032838354082535, "loss": 1.1228, "step": 7340 }, { "epoch": 0.28277189605389796, "grad_norm": 1.2407151460647583, "learning_rate": 0.00019031540530514685, "loss": 1.1154, "step": 7345 }, { "epoch": 0.2829643888354187, "grad_norm": 1.3094842433929443, "learning_rate": 0.00019030241881072283, "loss": 1.2251, "step": 7350 }, { "epoch": 0.28315688161693936, "grad_norm": 0.9434831142425537, "learning_rate": 0.00019028942405874082, "loss": 1.0644, "step": 7355 }, { "epoch": 0.2833493743984601, "grad_norm": 1.107958197593689, "learning_rate": 0.0001902764210503891, "loss": 1.295, "step": 7360 }, { "epoch": 0.28354186717998076, "grad_norm": 1.4402803182601929, "learning_rate": 0.00019026340978685666, "loss": 1.3339, "step": 7365 }, { "epoch": 0.2837343599615014, "grad_norm": 1.1564158201217651, "learning_rate": 0.0001902503902693334, "loss": 1.252, "step": 7370 }, { "epoch": 0.28392685274302215, "grad_norm": 1.8258494138717651, "learning_rate": 0.00019023736249900973, "loss": 1.3495, "step": 7375 }, { "epoch": 0.2841193455245428, "grad_norm": 1.1436362266540527, "learning_rate": 0.00019022432647707708, "loss": 1.4295, "step": 7380 }, { "epoch": 0.28431183830606355, "grad_norm": 1.1649361848831177, "learning_rate": 0.00019021128220472747, "loss": 1.3438, "step": 7385 }, { "epoch": 0.2845043310875842, "grad_norm": 1.7044711112976074, "learning_rate": 0.00019019822968315364, "loss": 1.2735, "step": 7390 }, { "epoch": 0.2846968238691049, "grad_norm": 0.8998376727104187, "learning_rate": 0.00019018516891354924, "loss": 1.1817, "step": 7395 }, { "epoch": 0.2848893166506256, "grad_norm": 1.8617538213729858, "learning_rate": 0.00019017209989710855, "loss": 1.3235, "step": 7400 }, { "epoch": 0.2850818094321463, "grad_norm": 0.9981639981269836, "learning_rate": 0.00019015902263502669, "loss": 1.1171, "step": 7405 }, { "epoch": 0.285274302213667, "grad_norm": 0.935457170009613, "learning_rate": 0.00019014593712849944, "loss": 1.1926, "step": 7410 }, { "epoch": 0.2854667949951877, "grad_norm": 1.3465532064437866, "learning_rate": 0.00019013284337872341, "loss": 1.5102, "step": 7415 }, { "epoch": 0.28565928777670835, "grad_norm": 1.3213337659835815, "learning_rate": 0.00019011974138689595, "loss": 1.2597, "step": 7420 }, { "epoch": 0.2858517805582291, "grad_norm": 1.655229091644287, "learning_rate": 0.0001901066311542151, "loss": 1.0345, "step": 7425 }, { "epoch": 0.28604427333974974, "grad_norm": 1.0165207386016846, "learning_rate": 0.00019009351268187974, "loss": 1.2854, "step": 7430 }, { "epoch": 0.28623676612127047, "grad_norm": 1.3425116539001465, "learning_rate": 0.00019008038597108945, "loss": 1.381, "step": 7435 }, { "epoch": 0.28642925890279114, "grad_norm": 1.2017732858657837, "learning_rate": 0.0001900672510230446, "loss": 1.2171, "step": 7440 }, { "epoch": 0.28662175168431187, "grad_norm": 1.4958349466323853, "learning_rate": 0.00019005410783894626, "loss": 1.3524, "step": 7445 }, { "epoch": 0.28681424446583254, "grad_norm": 1.1109000444412231, "learning_rate": 0.00019004095641999636, "loss": 1.2046, "step": 7450 }, { "epoch": 0.2870067372473532, "grad_norm": 1.5347834825515747, "learning_rate": 0.00019002779676739745, "loss": 1.2295, "step": 7455 }, { "epoch": 0.28719923002887393, "grad_norm": 1.5204600095748901, "learning_rate": 0.00019001462888235286, "loss": 1.0319, "step": 7460 }, { "epoch": 0.2873917228103946, "grad_norm": 2.0644850730895996, "learning_rate": 0.00019000145276606677, "loss": 1.2371, "step": 7465 }, { "epoch": 0.2875842155919153, "grad_norm": 1.5903024673461914, "learning_rate": 0.00018998826841974407, "loss": 1.3781, "step": 7470 }, { "epoch": 0.287776708373436, "grad_norm": 1.045086145401001, "learning_rate": 0.00018997507584459032, "loss": 1.0918, "step": 7475 }, { "epoch": 0.28796920115495667, "grad_norm": 1.499211311340332, "learning_rate": 0.0001899618750418119, "loss": 1.2377, "step": 7480 }, { "epoch": 0.2881616939364774, "grad_norm": 1.2885223627090454, "learning_rate": 0.00018994866601261597, "loss": 1.2936, "step": 7485 }, { "epoch": 0.28835418671799806, "grad_norm": 1.9687073230743408, "learning_rate": 0.00018993544875821035, "loss": 1.2043, "step": 7490 }, { "epoch": 0.2885466794995188, "grad_norm": 0.9758608937263489, "learning_rate": 0.00018992222327980375, "loss": 1.0775, "step": 7495 }, { "epoch": 0.28873917228103946, "grad_norm": 1.4256442785263062, "learning_rate": 0.00018990898957860547, "loss": 1.2608, "step": 7500 }, { "epoch": 0.28893166506256013, "grad_norm": 1.267991304397583, "learning_rate": 0.00018989574765582572, "loss": 1.3826, "step": 7505 }, { "epoch": 0.28912415784408085, "grad_norm": 1.4104158878326416, "learning_rate": 0.00018988249751267534, "loss": 1.1589, "step": 7510 }, { "epoch": 0.2893166506256015, "grad_norm": 0.9540778994560242, "learning_rate": 0.000189869239150366, "loss": 1.196, "step": 7515 }, { "epoch": 0.28950914340712225, "grad_norm": 4.175881385803223, "learning_rate": 0.00018985597257011006, "loss": 1.3408, "step": 7520 }, { "epoch": 0.2897016361886429, "grad_norm": 1.79558527469635, "learning_rate": 0.00018984269777312066, "loss": 1.0596, "step": 7525 }, { "epoch": 0.28989412897016364, "grad_norm": 1.5449460744857788, "learning_rate": 0.0001898294147606117, "loss": 1.2628, "step": 7530 }, { "epoch": 0.2900866217516843, "grad_norm": 1.5056041479110718, "learning_rate": 0.00018981612353379784, "loss": 1.132, "step": 7535 }, { "epoch": 0.290279114533205, "grad_norm": 1.7045507431030273, "learning_rate": 0.00018980282409389445, "loss": 1.1663, "step": 7540 }, { "epoch": 0.2904716073147257, "grad_norm": 1.203892469406128, "learning_rate": 0.00018978951644211766, "loss": 1.1168, "step": 7545 }, { "epoch": 0.2906641000962464, "grad_norm": 0.9239038228988647, "learning_rate": 0.0001897762005796844, "loss": 1.3328, "step": 7550 }, { "epoch": 0.2908565928777671, "grad_norm": 1.3521167039871216, "learning_rate": 0.00018976287650781238, "loss": 1.2766, "step": 7555 }, { "epoch": 0.2910490856592878, "grad_norm": 1.3824992179870605, "learning_rate": 0.00018974954422771987, "loss": 1.0153, "step": 7560 }, { "epoch": 0.29124157844080845, "grad_norm": 0.9183006286621094, "learning_rate": 0.00018973620374062607, "loss": 1.0558, "step": 7565 }, { "epoch": 0.29143407122232917, "grad_norm": 1.7128045558929443, "learning_rate": 0.0001897228550477509, "loss": 1.316, "step": 7570 }, { "epoch": 0.29162656400384984, "grad_norm": 1.3998011350631714, "learning_rate": 0.000189709498150315, "loss": 1.2359, "step": 7575 }, { "epoch": 0.29181905678537057, "grad_norm": 1.2251836061477661, "learning_rate": 0.00018969613304953975, "loss": 1.2464, "step": 7580 }, { "epoch": 0.29201154956689124, "grad_norm": 1.3014954328536987, "learning_rate": 0.00018968275974664734, "loss": 1.0624, "step": 7585 }, { "epoch": 0.2922040423484119, "grad_norm": 1.8785862922668457, "learning_rate": 0.00018966937824286062, "loss": 1.3491, "step": 7590 }, { "epoch": 0.29239653512993263, "grad_norm": 1.0634154081344604, "learning_rate": 0.00018965598853940327, "loss": 1.1012, "step": 7595 }, { "epoch": 0.2925890279114533, "grad_norm": 0.9114715456962585, "learning_rate": 0.00018964259063749967, "loss": 1.3738, "step": 7600 }, { "epoch": 0.29278152069297403, "grad_norm": 1.9063506126403809, "learning_rate": 0.00018962918453837503, "loss": 1.1161, "step": 7605 }, { "epoch": 0.2929740134744947, "grad_norm": 1.12264084815979, "learning_rate": 0.00018961577024325516, "loss": 1.4191, "step": 7610 }, { "epoch": 0.2931665062560154, "grad_norm": 1.4751306772232056, "learning_rate": 0.00018960234775336677, "loss": 1.2153, "step": 7615 }, { "epoch": 0.2933589990375361, "grad_norm": 1.4374860525131226, "learning_rate": 0.00018958891706993724, "loss": 1.1999, "step": 7620 }, { "epoch": 0.29355149181905676, "grad_norm": 1.5792250633239746, "learning_rate": 0.0001895754781941947, "loss": 1.266, "step": 7625 }, { "epoch": 0.2937439846005775, "grad_norm": 1.3390734195709229, "learning_rate": 0.00018956203112736807, "loss": 1.2703, "step": 7630 }, { "epoch": 0.29393647738209816, "grad_norm": 1.2470978498458862, "learning_rate": 0.00018954857587068701, "loss": 1.0415, "step": 7635 }, { "epoch": 0.2941289701636189, "grad_norm": 1.6102235317230225, "learning_rate": 0.00018953511242538186, "loss": 1.2707, "step": 7640 }, { "epoch": 0.29432146294513956, "grad_norm": 1.334554672241211, "learning_rate": 0.0001895216407926838, "loss": 1.2672, "step": 7645 }, { "epoch": 0.2945139557266602, "grad_norm": 1.2881218194961548, "learning_rate": 0.00018950816097382475, "loss": 1.1641, "step": 7650 }, { "epoch": 0.29470644850818095, "grad_norm": 1.2150179147720337, "learning_rate": 0.00018949467297003732, "loss": 1.2636, "step": 7655 }, { "epoch": 0.2948989412897016, "grad_norm": 1.1388130187988281, "learning_rate": 0.00018948117678255485, "loss": 1.2354, "step": 7660 }, { "epoch": 0.29509143407122235, "grad_norm": 0.785776674747467, "learning_rate": 0.0001894676724126115, "loss": 1.2621, "step": 7665 }, { "epoch": 0.295283926852743, "grad_norm": 1.005819320678711, "learning_rate": 0.00018945415986144223, "loss": 1.1175, "step": 7670 }, { "epoch": 0.29547641963426374, "grad_norm": 2.2892065048217773, "learning_rate": 0.00018944063913028264, "loss": 1.148, "step": 7675 }, { "epoch": 0.2956689124157844, "grad_norm": 2.0920302867889404, "learning_rate": 0.00018942711022036903, "loss": 1.178, "step": 7680 }, { "epoch": 0.2958614051973051, "grad_norm": 1.228538155555725, "learning_rate": 0.00018941357313293863, "loss": 1.2499, "step": 7685 }, { "epoch": 0.2960538979788258, "grad_norm": 1.8671079874038696, "learning_rate": 0.00018940002786922925, "loss": 1.2361, "step": 7690 }, { "epoch": 0.2962463907603465, "grad_norm": 1.7283247709274292, "learning_rate": 0.00018938647443047957, "loss": 1.2695, "step": 7695 }, { "epoch": 0.2964388835418672, "grad_norm": 1.9629713296890259, "learning_rate": 0.0001893729128179289, "loss": 1.5226, "step": 7700 }, { "epoch": 0.2966313763233879, "grad_norm": 1.2868784666061401, "learning_rate": 0.00018935934303281743, "loss": 1.3237, "step": 7705 }, { "epoch": 0.29682386910490854, "grad_norm": 1.3925827741622925, "learning_rate": 0.000189345765076386, "loss": 1.4075, "step": 7710 }, { "epoch": 0.29701636188642927, "grad_norm": 1.1560002565383911, "learning_rate": 0.0001893321789498762, "loss": 1.3212, "step": 7715 }, { "epoch": 0.29720885466794994, "grad_norm": 1.207263708114624, "learning_rate": 0.0001893185846545304, "loss": 1.3106, "step": 7720 }, { "epoch": 0.29740134744947067, "grad_norm": Infinity, "learning_rate": 0.00018930770333752716, "loss": 1.5499, "step": 7725 }, { "epoch": 0.29759384023099134, "grad_norm": 1.2437909841537476, "learning_rate": 0.0001892940943414097, "loss": 1.2797, "step": 7730 }, { "epoch": 0.297786333012512, "grad_norm": 0.8919286131858826, "learning_rate": 0.00018928047717993885, "loss": 1.1074, "step": 7735 }, { "epoch": 0.29797882579403273, "grad_norm": 1.219995379447937, "learning_rate": 0.00018926685185435978, "loss": 1.0856, "step": 7740 }, { "epoch": 0.2981713185755534, "grad_norm": 0.8819857835769653, "learning_rate": 0.00018925321836591846, "loss": 1.3518, "step": 7745 }, { "epoch": 0.2983638113570741, "grad_norm": 1.2268033027648926, "learning_rate": 0.00018923957671586154, "loss": 1.3786, "step": 7750 }, { "epoch": 0.2985563041385948, "grad_norm": 0.9456066489219666, "learning_rate": 0.0001892259269054365, "loss": 1.3424, "step": 7755 }, { "epoch": 0.2987487969201155, "grad_norm": 1.5397047996520996, "learning_rate": 0.0001892122689358915, "loss": 1.3618, "step": 7760 }, { "epoch": 0.2989412897016362, "grad_norm": 1.3874872922897339, "learning_rate": 0.0001891986028084755, "loss": 1.2717, "step": 7765 }, { "epoch": 0.29913378248315686, "grad_norm": 1.1725342273712158, "learning_rate": 0.00018918492852443817, "loss": 1.4347, "step": 7770 }, { "epoch": 0.2993262752646776, "grad_norm": 1.2135777473449707, "learning_rate": 0.0001891712460850299, "loss": 1.1892, "step": 7775 }, { "epoch": 0.29951876804619826, "grad_norm": 1.549715280532837, "learning_rate": 0.00018915755549150188, "loss": 1.2041, "step": 7780 }, { "epoch": 0.299711260827719, "grad_norm": 0.9927541613578796, "learning_rate": 0.00018914385674510605, "loss": 1.2198, "step": 7785 }, { "epoch": 0.29990375360923965, "grad_norm": 1.3314557075500488, "learning_rate": 0.00018913014984709502, "loss": 1.1805, "step": 7790 }, { "epoch": 0.3000962463907603, "grad_norm": 1.4021222591400146, "learning_rate": 0.00018911643479872225, "loss": 1.3375, "step": 7795 }, { "epoch": 0.30028873917228105, "grad_norm": 1.0226534605026245, "learning_rate": 0.00018910271160124182, "loss": 1.329, "step": 7800 }, { "epoch": 0.3004812319538017, "grad_norm": 0.8493847846984863, "learning_rate": 0.0001890889802559087, "loss": 1.4581, "step": 7805 }, { "epoch": 0.30067372473532245, "grad_norm": 1.0437967777252197, "learning_rate": 0.00018907524076397847, "loss": 1.409, "step": 7810 }, { "epoch": 0.3008662175168431, "grad_norm": 2.574695110321045, "learning_rate": 0.00018906149312670754, "loss": 1.3962, "step": 7815 }, { "epoch": 0.3010587102983638, "grad_norm": 1.3757768869400024, "learning_rate": 0.00018904773734535306, "loss": 1.4098, "step": 7820 }, { "epoch": 0.3012512030798845, "grad_norm": 1.2249635457992554, "learning_rate": 0.0001890339734211729, "loss": 1.1643, "step": 7825 }, { "epoch": 0.3014436958614052, "grad_norm": 1.6329936981201172, "learning_rate": 0.00018902020135542564, "loss": 1.1914, "step": 7830 }, { "epoch": 0.3016361886429259, "grad_norm": 1.0217385292053223, "learning_rate": 0.0001890064211493707, "loss": 1.043, "step": 7835 }, { "epoch": 0.3018286814244466, "grad_norm": 1.448754072189331, "learning_rate": 0.0001889926328042681, "loss": 1.0953, "step": 7840 }, { "epoch": 0.3020211742059673, "grad_norm": 0.9284221529960632, "learning_rate": 0.00018897883632137881, "loss": 1.321, "step": 7845 }, { "epoch": 0.30221366698748797, "grad_norm": 1.4679608345031738, "learning_rate": 0.00018896503170196435, "loss": 1.2266, "step": 7850 }, { "epoch": 0.30240615976900864, "grad_norm": 1.1148631572723389, "learning_rate": 0.00018895121894728709, "loss": 1.1666, "step": 7855 }, { "epoch": 0.30259865255052937, "grad_norm": 1.0431932210922241, "learning_rate": 0.00018893739805861008, "loss": 1.2986, "step": 7860 }, { "epoch": 0.30279114533205004, "grad_norm": 1.5691524744033813, "learning_rate": 0.00018892356903719718, "loss": 1.3928, "step": 7865 }, { "epoch": 0.30298363811357076, "grad_norm": 1.6849128007888794, "learning_rate": 0.000188909731884313, "loss": 1.3569, "step": 7870 }, { "epoch": 0.30317613089509143, "grad_norm": 1.1832456588745117, "learning_rate": 0.00018889588660122276, "loss": 1.2984, "step": 7875 }, { "epoch": 0.3033686236766121, "grad_norm": 1.3270272016525269, "learning_rate": 0.0001888820331891926, "loss": 1.1498, "step": 7880 }, { "epoch": 0.30356111645813283, "grad_norm": 1.6383373737335205, "learning_rate": 0.0001888681716494893, "loss": 1.4725, "step": 7885 }, { "epoch": 0.3037536092396535, "grad_norm": 1.1068469285964966, "learning_rate": 0.00018885430198338038, "loss": 1.3326, "step": 7890 }, { "epoch": 0.3039461020211742, "grad_norm": 1.8454192876815796, "learning_rate": 0.00018884042419213412, "loss": 1.2307, "step": 7895 }, { "epoch": 0.3041385948026949, "grad_norm": 1.160762906074524, "learning_rate": 0.00018882653827701965, "loss": 1.6025, "step": 7900 }, { "epoch": 0.30433108758421556, "grad_norm": 1.9325065612792969, "learning_rate": 0.00018881264423930663, "loss": 1.3071, "step": 7905 }, { "epoch": 0.3045235803657363, "grad_norm": 0.9047966003417969, "learning_rate": 0.00018879874208026562, "loss": 1.3166, "step": 7910 }, { "epoch": 0.30471607314725696, "grad_norm": 0.9753623008728027, "learning_rate": 0.00018878483180116793, "loss": 1.3702, "step": 7915 }, { "epoch": 0.3049085659287777, "grad_norm": 1.210321307182312, "learning_rate": 0.00018877091340328549, "loss": 1.3775, "step": 7920 }, { "epoch": 0.30510105871029836, "grad_norm": 1.287484049797058, "learning_rate": 0.00018875698688789106, "loss": 1.3534, "step": 7925 }, { "epoch": 0.3052935514918191, "grad_norm": 1.1604797840118408, "learning_rate": 0.00018874305225625814, "loss": 1.2154, "step": 7930 }, { "epoch": 0.30548604427333975, "grad_norm": 1.4771429300308228, "learning_rate": 0.00018872910950966097, "loss": 1.2438, "step": 7935 }, { "epoch": 0.3056785370548604, "grad_norm": 1.1472980976104736, "learning_rate": 0.00018871515864937453, "loss": 1.0805, "step": 7940 }, { "epoch": 0.30587102983638115, "grad_norm": 1.1015262603759766, "learning_rate": 0.0001887011996766745, "loss": 1.0594, "step": 7945 }, { "epoch": 0.3060635226179018, "grad_norm": 1.5410771369934082, "learning_rate": 0.00018868723259283737, "loss": 1.2624, "step": 7950 }, { "epoch": 0.30625601539942254, "grad_norm": 1.2014496326446533, "learning_rate": 0.0001886732573991403, "loss": 1.2259, "step": 7955 }, { "epoch": 0.3064485081809432, "grad_norm": 2.0007143020629883, "learning_rate": 0.0001886592740968612, "loss": 1.3877, "step": 7960 }, { "epoch": 0.3066410009624639, "grad_norm": 1.2455111742019653, "learning_rate": 0.00018864528268727887, "loss": 1.3254, "step": 7965 }, { "epoch": 0.3068334937439846, "grad_norm": 1.2766424417495728, "learning_rate": 0.00018863128317167264, "loss": 1.2663, "step": 7970 }, { "epoch": 0.3070259865255053, "grad_norm": 1.2151165008544922, "learning_rate": 0.0001886172755513227, "loss": 1.3597, "step": 7975 }, { "epoch": 0.307218479307026, "grad_norm": 1.1774568557739258, "learning_rate": 0.0001886032598275099, "loss": 1.1311, "step": 7980 }, { "epoch": 0.3074109720885467, "grad_norm": 1.43276846408844, "learning_rate": 0.00018858923600151596, "loss": 1.1123, "step": 7985 }, { "epoch": 0.3076034648700674, "grad_norm": 1.691684603691101, "learning_rate": 0.00018857520407462326, "loss": 1.4089, "step": 7990 }, { "epoch": 0.30779595765158807, "grad_norm": 1.7944872379302979, "learning_rate": 0.00018856116404811487, "loss": 1.3098, "step": 7995 }, { "epoch": 0.30798845043310874, "grad_norm": 1.2894377708435059, "learning_rate": 0.00018854711592327473, "loss": 1.2128, "step": 8000 }, { "epoch": 0.30818094321462947, "grad_norm": 2.52504301071167, "learning_rate": 0.00018853305970138737, "loss": 1.4214, "step": 8005 }, { "epoch": 0.30837343599615014, "grad_norm": 1.0757540464401245, "learning_rate": 0.0001885189953837382, "loss": 1.1836, "step": 8010 }, { "epoch": 0.30856592877767086, "grad_norm": 0.9253488183021545, "learning_rate": 0.0001885049229716133, "loss": 1.0756, "step": 8015 }, { "epoch": 0.30875842155919153, "grad_norm": 2.042194366455078, "learning_rate": 0.00018849084246629945, "loss": 1.4017, "step": 8020 }, { "epoch": 0.3089509143407122, "grad_norm": 1.750023603439331, "learning_rate": 0.00018847675386908427, "loss": 1.2352, "step": 8025 }, { "epoch": 0.3091434071222329, "grad_norm": 1.5334408283233643, "learning_rate": 0.00018846265718125605, "loss": 1.3053, "step": 8030 }, { "epoch": 0.3093358999037536, "grad_norm": 1.262428641319275, "learning_rate": 0.00018844855240410387, "loss": 1.28, "step": 8035 }, { "epoch": 0.3095283926852743, "grad_norm": 1.1430000066757202, "learning_rate": 0.0001884344395389175, "loss": 1.2133, "step": 8040 }, { "epoch": 0.309720885466795, "grad_norm": 1.792740821838379, "learning_rate": 0.0001884203185869874, "loss": 1.3004, "step": 8045 }, { "epoch": 0.30991337824831566, "grad_norm": 1.7067112922668457, "learning_rate": 0.00018840618954960495, "loss": 1.4131, "step": 8050 }, { "epoch": 0.3101058710298364, "grad_norm": 1.5428810119628906, "learning_rate": 0.00018839205242806206, "loss": 1.2361, "step": 8055 }, { "epoch": 0.31029836381135706, "grad_norm": 1.078902244567871, "learning_rate": 0.00018837790722365152, "loss": 1.2126, "step": 8060 }, { "epoch": 0.3104908565928778, "grad_norm": 1.5348985195159912, "learning_rate": 0.00018836375393766684, "loss": 1.2591, "step": 8065 }, { "epoch": 0.31068334937439845, "grad_norm": 1.2026286125183105, "learning_rate": 0.00018834959257140222, "loss": 1.3059, "step": 8070 }, { "epoch": 0.3108758421559192, "grad_norm": 1.3559043407440186, "learning_rate": 0.0001883354231261526, "loss": 1.2006, "step": 8075 }, { "epoch": 0.31106833493743985, "grad_norm": 1.2358171939849854, "learning_rate": 0.00018832124560321374, "loss": 1.2656, "step": 8080 }, { "epoch": 0.3112608277189605, "grad_norm": 1.720358967781067, "learning_rate": 0.00018830706000388202, "loss": 1.3493, "step": 8085 }, { "epoch": 0.31145332050048125, "grad_norm": 1.4281798601150513, "learning_rate": 0.00018829286632945463, "loss": 1.1485, "step": 8090 }, { "epoch": 0.3116458132820019, "grad_norm": 1.6174485683441162, "learning_rate": 0.00018827866458122951, "loss": 1.4384, "step": 8095 }, { "epoch": 0.31183830606352264, "grad_norm": 1.0020065307617188, "learning_rate": 0.00018826445476050532, "loss": 1.0489, "step": 8100 }, { "epoch": 0.3120307988450433, "grad_norm": 1.8663140535354614, "learning_rate": 0.0001882502368685814, "loss": 1.3252, "step": 8105 }, { "epoch": 0.312223291626564, "grad_norm": 1.4404470920562744, "learning_rate": 0.00018823601090675796, "loss": 1.1452, "step": 8110 }, { "epoch": 0.3124157844080847, "grad_norm": 1.3358442783355713, "learning_rate": 0.00018822177687633583, "loss": 1.1581, "step": 8115 }, { "epoch": 0.3126082771896054, "grad_norm": 1.6938860416412354, "learning_rate": 0.00018820753477861662, "loss": 1.5378, "step": 8120 }, { "epoch": 0.3128007699711261, "grad_norm": 1.1914762258529663, "learning_rate": 0.00018819328461490268, "loss": 1.172, "step": 8125 }, { "epoch": 0.3129932627526468, "grad_norm": 2.0504634380340576, "learning_rate": 0.0001881790263864971, "loss": 1.2462, "step": 8130 }, { "epoch": 0.31318575553416744, "grad_norm": 1.548021912574768, "learning_rate": 0.00018816476009470367, "loss": 1.271, "step": 8135 }, { "epoch": 0.31337824831568817, "grad_norm": 1.2875434160232544, "learning_rate": 0.00018815048574082698, "loss": 1.2484, "step": 8140 }, { "epoch": 0.31357074109720884, "grad_norm": 0.936850905418396, "learning_rate": 0.00018813620332617227, "loss": 1.2765, "step": 8145 }, { "epoch": 0.31376323387872956, "grad_norm": 1.2823413610458374, "learning_rate": 0.00018812191285204566, "loss": 1.1859, "step": 8150 }, { "epoch": 0.31395572666025023, "grad_norm": 2.052490472793579, "learning_rate": 0.00018810761431975386, "loss": 1.2033, "step": 8155 }, { "epoch": 0.31414821944177096, "grad_norm": 2.4439830780029297, "learning_rate": 0.00018809330773060442, "loss": 1.3678, "step": 8160 }, { "epoch": 0.31434071222329163, "grad_norm": 1.9978455305099487, "learning_rate": 0.0001880789930859055, "loss": 1.25, "step": 8165 }, { "epoch": 0.3145332050048123, "grad_norm": 1.2606321573257446, "learning_rate": 0.00018806467038696615, "loss": 1.4966, "step": 8170 }, { "epoch": 0.314725697786333, "grad_norm": 1.4588353633880615, "learning_rate": 0.00018805033963509605, "loss": 1.1843, "step": 8175 }, { "epoch": 0.3149181905678537, "grad_norm": 2.8686156272888184, "learning_rate": 0.00018803600083160574, "loss": 1.3017, "step": 8180 }, { "epoch": 0.3151106833493744, "grad_norm": 1.812328815460205, "learning_rate": 0.00018802165397780626, "loss": 1.4141, "step": 8185 }, { "epoch": 0.3153031761308951, "grad_norm": 1.4686119556427002, "learning_rate": 0.00018800729907500968, "loss": 1.4522, "step": 8190 }, { "epoch": 0.31549566891241576, "grad_norm": 1.766160249710083, "learning_rate": 0.00018799293612452856, "loss": 1.1501, "step": 8195 }, { "epoch": 0.3156881616939365, "grad_norm": 1.5843030214309692, "learning_rate": 0.00018797856512767634, "loss": 1.2997, "step": 8200 }, { "epoch": 0.31588065447545716, "grad_norm": 1.2028679847717285, "learning_rate": 0.00018796418608576712, "loss": 1.108, "step": 8205 }, { "epoch": 0.3160731472569779, "grad_norm": 1.4626559019088745, "learning_rate": 0.0001879497990001158, "loss": 1.116, "step": 8210 }, { "epoch": 0.31626564003849855, "grad_norm": 1.956745982170105, "learning_rate": 0.000187935403872038, "loss": 1.2741, "step": 8215 }, { "epoch": 0.3164581328200192, "grad_norm": 1.1932622194290161, "learning_rate": 0.00018792100070285002, "loss": 1.1966, "step": 8220 }, { "epoch": 0.31665062560153995, "grad_norm": 2.212184429168701, "learning_rate": 0.00018790658949386892, "loss": 1.1485, "step": 8225 }, { "epoch": 0.3168431183830606, "grad_norm": 0.867708146572113, "learning_rate": 0.00018789217024641256, "loss": 1.2457, "step": 8230 }, { "epoch": 0.31703561116458134, "grad_norm": 2.4929304122924805, "learning_rate": 0.0001878777429617995, "loss": 1.1819, "step": 8235 }, { "epoch": 0.317228103946102, "grad_norm": 1.4232670068740845, "learning_rate": 0.00018786330764134897, "loss": 1.2189, "step": 8240 }, { "epoch": 0.31742059672762274, "grad_norm": 1.8306447267532349, "learning_rate": 0.00018784886428638094, "loss": 1.2939, "step": 8245 }, { "epoch": 0.3176130895091434, "grad_norm": 0.9103988409042358, "learning_rate": 0.00018783441289821627, "loss": 1.2982, "step": 8250 }, { "epoch": 0.3178055822906641, "grad_norm": 1.08035409450531, "learning_rate": 0.0001878199534781764, "loss": 1.2777, "step": 8255 }, { "epoch": 0.3179980750721848, "grad_norm": 1.1342133283615112, "learning_rate": 0.0001878054860275835, "loss": 1.1476, "step": 8260 }, { "epoch": 0.3181905678537055, "grad_norm": 1.7727190256118774, "learning_rate": 0.0001877910105477606, "loss": 1.1887, "step": 8265 }, { "epoch": 0.3183830606352262, "grad_norm": 2.5168001651763916, "learning_rate": 0.0001877765270400313, "loss": 1.0494, "step": 8270 }, { "epoch": 0.31857555341674687, "grad_norm": 1.2397305965423584, "learning_rate": 0.0001877620355057201, "loss": 1.321, "step": 8275 }, { "epoch": 0.31876804619826754, "grad_norm": 1.3002814054489136, "learning_rate": 0.0001877475359461521, "loss": 1.1543, "step": 8280 }, { "epoch": 0.31896053897978827, "grad_norm": 1.5683960914611816, "learning_rate": 0.00018773302836265322, "loss": 1.1987, "step": 8285 }, { "epoch": 0.31915303176130894, "grad_norm": 1.6934245824813843, "learning_rate": 0.00018771851275655008, "loss": 1.2946, "step": 8290 }, { "epoch": 0.31934552454282966, "grad_norm": 1.4387637376785278, "learning_rate": 0.00018770398912917004, "loss": 1.2151, "step": 8295 }, { "epoch": 0.31953801732435033, "grad_norm": 1.3155730962753296, "learning_rate": 0.00018768945748184117, "loss": 1.1692, "step": 8300 }, { "epoch": 0.31973051010587106, "grad_norm": 1.039670467376709, "learning_rate": 0.0001876749178158923, "loss": 1.2783, "step": 8305 }, { "epoch": 0.3199230028873917, "grad_norm": 1.1988794803619385, "learning_rate": 0.00018766037013265302, "loss": 1.1775, "step": 8310 }, { "epoch": 0.3201154956689124, "grad_norm": 1.39814031124115, "learning_rate": 0.00018764581443345355, "loss": 1.2256, "step": 8315 }, { "epoch": 0.3203079884504331, "grad_norm": 1.7934690713882446, "learning_rate": 0.00018763125071962495, "loss": 1.3505, "step": 8320 }, { "epoch": 0.3205004812319538, "grad_norm": 1.5974578857421875, "learning_rate": 0.00018761667899249899, "loss": 1.1725, "step": 8325 }, { "epoch": 0.3206929740134745, "grad_norm": 0.9480400085449219, "learning_rate": 0.00018760209925340818, "loss": 1.2059, "step": 8330 }, { "epoch": 0.3208854667949952, "grad_norm": 1.9734187126159668, "learning_rate": 0.00018758751150368564, "loss": 1.2116, "step": 8335 }, { "epoch": 0.32107795957651586, "grad_norm": 0.9984979033470154, "learning_rate": 0.00018757291574466543, "loss": 1.1347, "step": 8340 }, { "epoch": 0.3212704523580366, "grad_norm": 0.96681147813797, "learning_rate": 0.00018755831197768215, "loss": 1.2824, "step": 8345 }, { "epoch": 0.32146294513955725, "grad_norm": 1.5365724563598633, "learning_rate": 0.00018754370020407127, "loss": 1.3718, "step": 8350 }, { "epoch": 0.321655437921078, "grad_norm": 1.6202696561813354, "learning_rate": 0.00018752908042516897, "loss": 1.3233, "step": 8355 }, { "epoch": 0.32184793070259865, "grad_norm": 2.0272514820098877, "learning_rate": 0.00018751445264231207, "loss": 1.3406, "step": 8360 }, { "epoch": 0.3220404234841193, "grad_norm": 1.1724604368209839, "learning_rate": 0.0001874998168568382, "loss": 1.2649, "step": 8365 }, { "epoch": 0.32223291626564005, "grad_norm": 1.0908805131912231, "learning_rate": 0.00018748517307008573, "loss": 1.2924, "step": 8370 }, { "epoch": 0.3224254090471607, "grad_norm": 1.0658169984817505, "learning_rate": 0.0001874705212833937, "loss": 1.1266, "step": 8375 }, { "epoch": 0.32261790182868144, "grad_norm": 1.2267755270004272, "learning_rate": 0.00018745586149810194, "loss": 1.172, "step": 8380 }, { "epoch": 0.3228103946102021, "grad_norm": 0.9808927178382874, "learning_rate": 0.000187441193715551, "loss": 1.1241, "step": 8385 }, { "epoch": 0.32300288739172284, "grad_norm": 1.2251529693603516, "learning_rate": 0.00018742651793708212, "loss": 1.1649, "step": 8390 }, { "epoch": 0.3231953801732435, "grad_norm": 1.7396290302276611, "learning_rate": 0.00018741183416403734, "loss": 1.173, "step": 8395 }, { "epoch": 0.3233878729547642, "grad_norm": 1.1498087644577026, "learning_rate": 0.00018739714239775936, "loss": 1.266, "step": 8400 }, { "epoch": 0.3235803657362849, "grad_norm": 0.9458256959915161, "learning_rate": 0.0001873824426395917, "loss": 1.1651, "step": 8405 }, { "epoch": 0.3237728585178056, "grad_norm": 1.701441764831543, "learning_rate": 0.00018736773489087845, "loss": 1.4314, "step": 8410 }, { "epoch": 0.3239653512993263, "grad_norm": 1.3168058395385742, "learning_rate": 0.00018735301915296466, "loss": 1.3837, "step": 8415 }, { "epoch": 0.32415784408084697, "grad_norm": 1.2277673482894897, "learning_rate": 0.0001873382954271959, "loss": 1.2433, "step": 8420 }, { "epoch": 0.32435033686236764, "grad_norm": 1.3443776369094849, "learning_rate": 0.00018732356371491858, "loss": 1.1514, "step": 8425 }, { "epoch": 0.32454282964388836, "grad_norm": 1.3421462774276733, "learning_rate": 0.00018730882401747984, "loss": 1.2908, "step": 8430 }, { "epoch": 0.32473532242540903, "grad_norm": 2.7043700218200684, "learning_rate": 0.0001872940763362275, "loss": 1.426, "step": 8435 }, { "epoch": 0.32492781520692976, "grad_norm": 1.2363086938858032, "learning_rate": 0.00018727932067251016, "loss": 1.2172, "step": 8440 }, { "epoch": 0.32512030798845043, "grad_norm": 1.7551484107971191, "learning_rate": 0.00018726455702767713, "loss": 1.2379, "step": 8445 }, { "epoch": 0.3253128007699711, "grad_norm": 1.2935433387756348, "learning_rate": 0.00018724978540307844, "loss": 1.2109, "step": 8450 }, { "epoch": 0.3255052935514918, "grad_norm": 1.723219871520996, "learning_rate": 0.00018723500580006483, "loss": 1.3996, "step": 8455 }, { "epoch": 0.3256977863330125, "grad_norm": 1.1455639600753784, "learning_rate": 0.0001872202182199878, "loss": 1.1223, "step": 8460 }, { "epoch": 0.3258902791145332, "grad_norm": 1.194926381111145, "learning_rate": 0.0001872054226641996, "loss": 1.3301, "step": 8465 }, { "epoch": 0.3260827718960539, "grad_norm": 1.9672341346740723, "learning_rate": 0.00018719061913405322, "loss": 1.3884, "step": 8470 }, { "epoch": 0.3262752646775746, "grad_norm": 1.5594457387924194, "learning_rate": 0.0001871758076309023, "loss": 1.1862, "step": 8475 }, { "epoch": 0.3264677574590953, "grad_norm": 1.141787052154541, "learning_rate": 0.0001871609881561012, "loss": 1.2375, "step": 8480 }, { "epoch": 0.32666025024061596, "grad_norm": 1.1914411783218384, "learning_rate": 0.0001871461607110052, "loss": 1.397, "step": 8485 }, { "epoch": 0.3268527430221367, "grad_norm": 1.2841687202453613, "learning_rate": 0.00018713132529697007, "loss": 1.3052, "step": 8490 }, { "epoch": 0.32704523580365735, "grad_norm": 2.2977144718170166, "learning_rate": 0.0001871164819153524, "loss": 1.2819, "step": 8495 }, { "epoch": 0.3272377285851781, "grad_norm": 1.62446928024292, "learning_rate": 0.00018710163056750957, "loss": 1.1739, "step": 8500 }, { "epoch": 0.32743022136669875, "grad_norm": 1.471348524093628, "learning_rate": 0.00018708677125479963, "loss": 1.0684, "step": 8505 }, { "epoch": 0.3276227141482194, "grad_norm": 1.0703455209732056, "learning_rate": 0.00018707190397858133, "loss": 1.0832, "step": 8510 }, { "epoch": 0.32781520692974014, "grad_norm": 1.3942466974258423, "learning_rate": 0.00018705702874021425, "loss": 1.1855, "step": 8515 }, { "epoch": 0.3280076997112608, "grad_norm": 1.1790398359298706, "learning_rate": 0.00018704214554105856, "loss": 1.1459, "step": 8520 }, { "epoch": 0.32820019249278154, "grad_norm": 1.2982394695281982, "learning_rate": 0.00018702725438247527, "loss": 1.2642, "step": 8525 }, { "epoch": 0.3283926852743022, "grad_norm": 1.4757968187332153, "learning_rate": 0.00018701235526582608, "loss": 1.291, "step": 8530 }, { "epoch": 0.3285851780558229, "grad_norm": 1.6837409734725952, "learning_rate": 0.0001870004302436148, "loss": 1.3796, "step": 8535 }, { "epoch": 0.3287776708373436, "grad_norm": 1.1914480924606323, "learning_rate": 0.00018698551680588075, "loss": 1.2608, "step": 8540 }, { "epoch": 0.3289701636188643, "grad_norm": 1.2581427097320557, "learning_rate": 0.00018697059541389742, "loss": 1.3011, "step": 8545 }, { "epoch": 0.329162656400385, "grad_norm": 1.5642743110656738, "learning_rate": 0.0001869556660690293, "loss": 1.2273, "step": 8550 }, { "epoch": 0.32935514918190567, "grad_norm": 1.621721863746643, "learning_rate": 0.0001869407287726415, "loss": 1.1648, "step": 8555 }, { "epoch": 0.3295476419634264, "grad_norm": 0.9840386509895325, "learning_rate": 0.00018692578352610002, "loss": 1.2741, "step": 8560 }, { "epoch": 0.32974013474494707, "grad_norm": 1.5852268934249878, "learning_rate": 0.00018691083033077144, "loss": 1.2913, "step": 8565 }, { "epoch": 0.32993262752646774, "grad_norm": 1.280247688293457, "learning_rate": 0.00018689586918802314, "loss": 1.172, "step": 8570 }, { "epoch": 0.33012512030798846, "grad_norm": 1.3940321207046509, "learning_rate": 0.0001868809000992233, "loss": 1.175, "step": 8575 }, { "epoch": 0.33031761308950913, "grad_norm": 1.0753341913223267, "learning_rate": 0.00018686592306574063, "loss": 1.3922, "step": 8580 }, { "epoch": 0.33051010587102986, "grad_norm": 1.5959515571594238, "learning_rate": 0.00018685093808894476, "loss": 1.2741, "step": 8585 }, { "epoch": 0.33070259865255053, "grad_norm": 1.1567896604537964, "learning_rate": 0.00018683594517020593, "loss": 1.1325, "step": 8590 }, { "epoch": 0.3308950914340712, "grad_norm": 1.202486276626587, "learning_rate": 0.0001868209443108951, "loss": 1.1915, "step": 8595 }, { "epoch": 0.3310875842155919, "grad_norm": 1.6866669654846191, "learning_rate": 0.00018680593551238412, "loss": 1.2806, "step": 8600 }, { "epoch": 0.3312800769971126, "grad_norm": 1.1932209730148315, "learning_rate": 0.00018679091877604536, "loss": 1.2254, "step": 8605 }, { "epoch": 0.3314725697786333, "grad_norm": 1.5348761081695557, "learning_rate": 0.000186775894103252, "loss": 1.1519, "step": 8610 }, { "epoch": 0.331665062560154, "grad_norm": 1.908500075340271, "learning_rate": 0.00018676086149537792, "loss": 1.3105, "step": 8615 }, { "epoch": 0.3318575553416747, "grad_norm": 2.0427961349487305, "learning_rate": 0.00018674582095379788, "loss": 1.1415, "step": 8620 }, { "epoch": 0.3320500481231954, "grad_norm": 1.0964915752410889, "learning_rate": 0.00018673077247988707, "loss": 1.2041, "step": 8625 }, { "epoch": 0.33224254090471605, "grad_norm": 1.2229498624801636, "learning_rate": 0.00018671571607502168, "loss": 1.2975, "step": 8630 }, { "epoch": 0.3324350336862368, "grad_norm": 1.3551470041275024, "learning_rate": 0.00018670065174057854, "loss": 1.1592, "step": 8635 }, { "epoch": 0.33262752646775745, "grad_norm": 0.8810299634933472, "learning_rate": 0.0001866855794779351, "loss": 1.1414, "step": 8640 }, { "epoch": 0.3328200192492782, "grad_norm": 1.5907199382781982, "learning_rate": 0.00018667049928846967, "loss": 1.2191, "step": 8645 }, { "epoch": 0.33301251203079885, "grad_norm": 2.042478561401367, "learning_rate": 0.0001866554111735612, "loss": 1.1619, "step": 8650 }, { "epoch": 0.3332050048123195, "grad_norm": 1.6686564683914185, "learning_rate": 0.00018664031513458942, "loss": 1.2534, "step": 8655 }, { "epoch": 0.33339749759384024, "grad_norm": 1.7643070220947266, "learning_rate": 0.0001866252111729348, "loss": 1.2631, "step": 8660 }, { "epoch": 0.3335899903753609, "grad_norm": 1.4883722066879272, "learning_rate": 0.0001866100992899784, "loss": 1.1786, "step": 8665 }, { "epoch": 0.33378248315688164, "grad_norm": 0.9850770235061646, "learning_rate": 0.00018659497948710218, "loss": 1.4181, "step": 8670 }, { "epoch": 0.3339749759384023, "grad_norm": 0.9056932926177979, "learning_rate": 0.00018657985176568875, "loss": 1.0365, "step": 8675 }, { "epoch": 0.334167468719923, "grad_norm": 1.9456449747085571, "learning_rate": 0.00018656471612712137, "loss": 1.227, "step": 8680 }, { "epoch": 0.3343599615014437, "grad_norm": 1.289870262145996, "learning_rate": 0.00018654957257278415, "loss": 1.32, "step": 8685 }, { "epoch": 0.3345524542829644, "grad_norm": 1.048143744468689, "learning_rate": 0.00018653442110406189, "loss": 1.2123, "step": 8690 }, { "epoch": 0.3347449470644851, "grad_norm": 1.1696733236312866, "learning_rate": 0.00018651926172234004, "loss": 1.0226, "step": 8695 }, { "epoch": 0.33493743984600577, "grad_norm": 1.4806257486343384, "learning_rate": 0.00018650409442900486, "loss": 1.1715, "step": 8700 }, { "epoch": 0.3351299326275265, "grad_norm": 1.525719404220581, "learning_rate": 0.00018648891922544325, "loss": 1.2037, "step": 8705 }, { "epoch": 0.33532242540904716, "grad_norm": 1.3378442525863647, "learning_rate": 0.00018647373611304293, "loss": 1.2188, "step": 8710 }, { "epoch": 0.33551491819056783, "grad_norm": 0.870988130569458, "learning_rate": 0.00018645854509319226, "loss": 1.2153, "step": 8715 }, { "epoch": 0.33570741097208856, "grad_norm": 1.5496007204055786, "learning_rate": 0.00018644334616728042, "loss": 1.1974, "step": 8720 }, { "epoch": 0.33589990375360923, "grad_norm": 1.0248416662216187, "learning_rate": 0.00018642813933669717, "loss": 1.2845, "step": 8725 }, { "epoch": 0.33609239653512996, "grad_norm": 1.9984816312789917, "learning_rate": 0.00018641292460283313, "loss": 1.3144, "step": 8730 }, { "epoch": 0.3362848893166506, "grad_norm": 1.3114112615585327, "learning_rate": 0.00018639770196707955, "loss": 1.209, "step": 8735 }, { "epoch": 0.3364773820981713, "grad_norm": 1.1683485507965088, "learning_rate": 0.00018638247143082848, "loss": 1.2688, "step": 8740 }, { "epoch": 0.336669874879692, "grad_norm": 1.507900595664978, "learning_rate": 0.0001863672329954726, "loss": 1.1325, "step": 8745 }, { "epoch": 0.3368623676612127, "grad_norm": 1.3393852710723877, "learning_rate": 0.00018635198666240542, "loss": 1.1573, "step": 8750 }, { "epoch": 0.3370548604427334, "grad_norm": 1.0203709602355957, "learning_rate": 0.00018633673243302108, "loss": 1.2922, "step": 8755 }, { "epoch": 0.3372473532242541, "grad_norm": 0.8483877778053284, "learning_rate": 0.00018632147030871448, "loss": 1.2252, "step": 8760 }, { "epoch": 0.33743984600577476, "grad_norm": 0.983748197555542, "learning_rate": 0.00018630620029088125, "loss": 1.2027, "step": 8765 }, { "epoch": 0.3376323387872955, "grad_norm": 1.2489101886749268, "learning_rate": 0.00018629092238091775, "loss": 1.1962, "step": 8770 }, { "epoch": 0.33782483156881615, "grad_norm": 1.4553676843643188, "learning_rate": 0.000186275636580221, "loss": 1.3698, "step": 8775 }, { "epoch": 0.3380173243503369, "grad_norm": 0.9494854807853699, "learning_rate": 0.0001862603428901888, "loss": 1.25, "step": 8780 }, { "epoch": 0.33820981713185755, "grad_norm": 0.8667522072792053, "learning_rate": 0.00018624504131221968, "loss": 1.222, "step": 8785 }, { "epoch": 0.3384023099133783, "grad_norm": 1.4215630292892456, "learning_rate": 0.00018622973184771285, "loss": 1.2592, "step": 8790 }, { "epoch": 0.33859480269489894, "grad_norm": 0.9913888573646545, "learning_rate": 0.00018621441449806828, "loss": 1.2904, "step": 8795 }, { "epoch": 0.3387872954764196, "grad_norm": 0.9612273573875427, "learning_rate": 0.00018619908926468664, "loss": 1.24, "step": 8800 }, { "epoch": 0.33897978825794034, "grad_norm": 1.656568169593811, "learning_rate": 0.00018618375614896926, "loss": 1.1763, "step": 8805 }, { "epoch": 0.339172281039461, "grad_norm": 1.4496088027954102, "learning_rate": 0.0001861684151523183, "loss": 1.2045, "step": 8810 }, { "epoch": 0.33936477382098174, "grad_norm": 1.3886058330535889, "learning_rate": 0.0001861530662761366, "loss": 1.3111, "step": 8815 }, { "epoch": 0.3395572666025024, "grad_norm": 1.644887089729309, "learning_rate": 0.0001861377095218277, "loss": 1.3172, "step": 8820 }, { "epoch": 0.3397497593840231, "grad_norm": 1.1925910711288452, "learning_rate": 0.00018612234489079587, "loss": 1.3268, "step": 8825 }, { "epoch": 0.3399422521655438, "grad_norm": 1.1367309093475342, "learning_rate": 0.0001861069723844461, "loss": 1.1209, "step": 8830 }, { "epoch": 0.34013474494706447, "grad_norm": 1.0649480819702148, "learning_rate": 0.00018609159200418414, "loss": 1.1514, "step": 8835 }, { "epoch": 0.3403272377285852, "grad_norm": 1.1887884140014648, "learning_rate": 0.00018607620375141637, "loss": 1.1026, "step": 8840 }, { "epoch": 0.34051973051010587, "grad_norm": 1.9125694036483765, "learning_rate": 0.00018606080762754995, "loss": 1.4718, "step": 8845 }, { "epoch": 0.34071222329162654, "grad_norm": 1.1742594242095947, "learning_rate": 0.00018604540363399282, "loss": 1.3206, "step": 8850 }, { "epoch": 0.34090471607314726, "grad_norm": 1.504146695137024, "learning_rate": 0.0001860299917721535, "loss": 1.1639, "step": 8855 }, { "epoch": 0.34109720885466793, "grad_norm": 0.8869237899780273, "learning_rate": 0.00018601457204344131, "loss": 1.2674, "step": 8860 }, { "epoch": 0.34128970163618866, "grad_norm": 0.8492304682731628, "learning_rate": 0.00018599914444926636, "loss": 1.2732, "step": 8865 }, { "epoch": 0.34148219441770933, "grad_norm": 1.1681571006774902, "learning_rate": 0.00018598370899103932, "loss": 1.2995, "step": 8870 }, { "epoch": 0.34167468719923005, "grad_norm": 1.6912837028503418, "learning_rate": 0.00018596826567017166, "loss": 1.3217, "step": 8875 }, { "epoch": 0.3418671799807507, "grad_norm": 1.0427602529525757, "learning_rate": 0.0001859528144880756, "loss": 1.05, "step": 8880 }, { "epoch": 0.3420596727622714, "grad_norm": 1.9644991159439087, "learning_rate": 0.00018593735544616404, "loss": 1.1087, "step": 8885 }, { "epoch": 0.3422521655437921, "grad_norm": 1.966264247894287, "learning_rate": 0.0001859218885458506, "loss": 1.2221, "step": 8890 }, { "epoch": 0.3424446583253128, "grad_norm": 1.9770557880401611, "learning_rate": 0.00018590641378854965, "loss": 1.2489, "step": 8895 }, { "epoch": 0.3426371511068335, "grad_norm": 1.4175180196762085, "learning_rate": 0.00018589093117567625, "loss": 1.1292, "step": 8900 }, { "epoch": 0.3428296438883542, "grad_norm": 1.066177487373352, "learning_rate": 0.00018587544070864612, "loss": 1.1182, "step": 8905 }, { "epoch": 0.34302213666987486, "grad_norm": 2.6207172870635986, "learning_rate": 0.00018585994238887586, "loss": 1.1, "step": 8910 }, { "epoch": 0.3432146294513956, "grad_norm": 1.6905888319015503, "learning_rate": 0.0001858444362177826, "loss": 1.3135, "step": 8915 }, { "epoch": 0.34340712223291625, "grad_norm": 1.117883324623108, "learning_rate": 0.00018582892219678435, "loss": 1.3394, "step": 8920 }, { "epoch": 0.343599615014437, "grad_norm": 1.549805760383606, "learning_rate": 0.00018581340032729972, "loss": 1.1957, "step": 8925 }, { "epoch": 0.34379210779595765, "grad_norm": 1.165260672569275, "learning_rate": 0.00018579787061074807, "loss": 1.2406, "step": 8930 }, { "epoch": 0.34398460057747837, "grad_norm": 1.1872533559799194, "learning_rate": 0.00018578233304854952, "loss": 1.1831, "step": 8935 }, { "epoch": 0.34417709335899904, "grad_norm": 0.8727648854255676, "learning_rate": 0.00018576678764212489, "loss": 1.2645, "step": 8940 }, { "epoch": 0.3443695861405197, "grad_norm": 1.1179304122924805, "learning_rate": 0.00018575123439289567, "loss": 1.297, "step": 8945 }, { "epoch": 0.34456207892204044, "grad_norm": 1.9064927101135254, "learning_rate": 0.0001857356733022841, "loss": 1.3917, "step": 8950 }, { "epoch": 0.3447545717035611, "grad_norm": 2.100154399871826, "learning_rate": 0.00018572010437171315, "loss": 1.1723, "step": 8955 }, { "epoch": 0.34494706448508183, "grad_norm": 1.0105838775634766, "learning_rate": 0.00018570452760260654, "loss": 1.0851, "step": 8960 }, { "epoch": 0.3451395572666025, "grad_norm": 1.760038137435913, "learning_rate": 0.0001856889429963886, "loss": 1.0612, "step": 8965 }, { "epoch": 0.3453320500481232, "grad_norm": 1.5740501880645752, "learning_rate": 0.00018567335055448444, "loss": 1.117, "step": 8970 }, { "epoch": 0.3455245428296439, "grad_norm": 1.4148597717285156, "learning_rate": 0.00018565775027831993, "loss": 1.2003, "step": 8975 }, { "epoch": 0.34571703561116457, "grad_norm": 1.2243534326553345, "learning_rate": 0.00018564214216932159, "loss": 1.2106, "step": 8980 }, { "epoch": 0.3459095283926853, "grad_norm": 1.3532603979110718, "learning_rate": 0.00018562652622891666, "loss": 1.1703, "step": 8985 }, { "epoch": 0.34610202117420596, "grad_norm": 1.6701220273971558, "learning_rate": 0.00018561090245853315, "loss": 1.2409, "step": 8990 }, { "epoch": 0.34629451395572663, "grad_norm": 1.6342322826385498, "learning_rate": 0.00018559527085959968, "loss": 1.2981, "step": 8995 }, { "epoch": 0.34648700673724736, "grad_norm": 2.4354701042175293, "learning_rate": 0.00018557963143354576, "loss": 1.1021, "step": 9000 }, { "epoch": 0.34667949951876803, "grad_norm": 1.5688186883926392, "learning_rate": 0.00018556398418180146, "loss": 1.2649, "step": 9005 }, { "epoch": 0.34687199230028876, "grad_norm": 2.2158894538879395, "learning_rate": 0.0001855483291057976, "loss": 1.2335, "step": 9010 }, { "epoch": 0.3470644850818094, "grad_norm": 1.7294437885284424, "learning_rate": 0.00018553266620696573, "loss": 1.3235, "step": 9015 }, { "epoch": 0.34725697786333015, "grad_norm": 1.1023756265640259, "learning_rate": 0.00018551699548673814, "loss": 1.3515, "step": 9020 }, { "epoch": 0.3474494706448508, "grad_norm": 1.4505863189697266, "learning_rate": 0.00018550131694654784, "loss": 1.3773, "step": 9025 }, { "epoch": 0.3476419634263715, "grad_norm": 2.221957206726074, "learning_rate": 0.00018548563058782847, "loss": 1.0896, "step": 9030 }, { "epoch": 0.3478344562078922, "grad_norm": 0.917010486125946, "learning_rate": 0.0001854699364120145, "loss": 1.1569, "step": 9035 }, { "epoch": 0.3480269489894129, "grad_norm": 1.4631186723709106, "learning_rate": 0.00018545423442054105, "loss": 1.2169, "step": 9040 }, { "epoch": 0.3482194417709336, "grad_norm": 1.0917268991470337, "learning_rate": 0.0001854385246148439, "loss": 1.2425, "step": 9045 }, { "epoch": 0.3484119345524543, "grad_norm": 1.5985426902770996, "learning_rate": 0.00018542280699635968, "loss": 1.0944, "step": 9050 }, { "epoch": 0.34860442733397495, "grad_norm": 1.5402495861053467, "learning_rate": 0.0001854070815665256, "loss": 1.1497, "step": 9055 }, { "epoch": 0.3487969201154957, "grad_norm": 1.211295485496521, "learning_rate": 0.00018539134832677972, "loss": 1.0403, "step": 9060 }, { "epoch": 0.34898941289701635, "grad_norm": 1.0569374561309814, "learning_rate": 0.00018537560727856068, "loss": 1.2886, "step": 9065 }, { "epoch": 0.3491819056785371, "grad_norm": 1.550212025642395, "learning_rate": 0.00018535985842330793, "loss": 1.2654, "step": 9070 }, { "epoch": 0.34937439846005774, "grad_norm": 1.7941083908081055, "learning_rate": 0.00018534410176246154, "loss": 1.2757, "step": 9075 }, { "epoch": 0.3495668912415784, "grad_norm": 0.9004856944084167, "learning_rate": 0.00018532833729746243, "loss": 1.2045, "step": 9080 }, { "epoch": 0.34975938402309914, "grad_norm": 0.9916037321090698, "learning_rate": 0.00018531256502975216, "loss": 1.1788, "step": 9085 }, { "epoch": 0.3499518768046198, "grad_norm": 1.0524908304214478, "learning_rate": 0.00018529678496077292, "loss": 1.3298, "step": 9090 }, { "epoch": 0.35014436958614054, "grad_norm": 2.7244019508361816, "learning_rate": 0.00018528099709196774, "loss": 1.3274, "step": 9095 }, { "epoch": 0.3503368623676612, "grad_norm": 1.4286680221557617, "learning_rate": 0.0001852652014247803, "loss": 1.193, "step": 9100 }, { "epoch": 0.35052935514918193, "grad_norm": 1.0943810939788818, "learning_rate": 0.00018524939796065503, "loss": 1.2953, "step": 9105 }, { "epoch": 0.3507218479307026, "grad_norm": 1.1513092517852783, "learning_rate": 0.00018523358670103704, "loss": 1.3436, "step": 9110 }, { "epoch": 0.35091434071222327, "grad_norm": 2.142829656600952, "learning_rate": 0.00018521776764737218, "loss": 1.2998, "step": 9115 }, { "epoch": 0.351106833493744, "grad_norm": 0.9734616875648499, "learning_rate": 0.00018520194080110699, "loss": 1.2794, "step": 9120 }, { "epoch": 0.35129932627526467, "grad_norm": 1.0793628692626953, "learning_rate": 0.00018518610616368868, "loss": 1.2574, "step": 9125 }, { "epoch": 0.3514918190567854, "grad_norm": 2.409484386444092, "learning_rate": 0.00018517026373656532, "loss": 1.1601, "step": 9130 }, { "epoch": 0.35168431183830606, "grad_norm": 1.1166318655014038, "learning_rate": 0.0001851544135211855, "loss": 1.2705, "step": 9135 }, { "epoch": 0.35187680461982673, "grad_norm": 1.183131217956543, "learning_rate": 0.0001851385555189987, "loss": 1.132, "step": 9140 }, { "epoch": 0.35206929740134746, "grad_norm": 1.3792176246643066, "learning_rate": 0.00018512268973145497, "loss": 1.1271, "step": 9145 }, { "epoch": 0.35226179018286813, "grad_norm": 1.3978809118270874, "learning_rate": 0.00018510681616000513, "loss": 1.3828, "step": 9150 }, { "epoch": 0.35245428296438885, "grad_norm": 1.0242118835449219, "learning_rate": 0.00018509093480610078, "loss": 1.1982, "step": 9155 }, { "epoch": 0.3526467757459095, "grad_norm": 1.326621174812317, "learning_rate": 0.00018507504567119408, "loss": 1.0175, "step": 9160 }, { "epoch": 0.3528392685274302, "grad_norm": 1.1905460357666016, "learning_rate": 0.00018505914875673805, "loss": 1.3367, "step": 9165 }, { "epoch": 0.3530317613089509, "grad_norm": 1.5423171520233154, "learning_rate": 0.0001850432440641863, "loss": 1.1721, "step": 9170 }, { "epoch": 0.3532242540904716, "grad_norm": 1.0577900409698486, "learning_rate": 0.00018502733159499326, "loss": 1.2173, "step": 9175 }, { "epoch": 0.3534167468719923, "grad_norm": 0.8053417205810547, "learning_rate": 0.000185011411350614, "loss": 1.1492, "step": 9180 }, { "epoch": 0.353609239653513, "grad_norm": 1.076053261756897, "learning_rate": 0.0001849954833325043, "loss": 1.2117, "step": 9185 }, { "epoch": 0.3538017324350337, "grad_norm": 1.206359624862671, "learning_rate": 0.0001849795475421207, "loss": 1.1659, "step": 9190 }, { "epoch": 0.3539942252165544, "grad_norm": 1.4652369022369385, "learning_rate": 0.00018496360398092046, "loss": 1.2605, "step": 9195 }, { "epoch": 0.35418671799807505, "grad_norm": 1.158055067062378, "learning_rate": 0.00018494765265036144, "loss": 1.414, "step": 9200 }, { "epoch": 0.3543792107795958, "grad_norm": 2.4634461402893066, "learning_rate": 0.0001849316935519023, "loss": 1.1982, "step": 9205 }, { "epoch": 0.35457170356111645, "grad_norm": 1.875139594078064, "learning_rate": 0.00018491572668700242, "loss": 1.4133, "step": 9210 }, { "epoch": 0.3547641963426372, "grad_norm": 1.0054875612258911, "learning_rate": 0.00018489975205712185, "loss": 1.2294, "step": 9215 }, { "epoch": 0.35495668912415784, "grad_norm": 2.2620842456817627, "learning_rate": 0.00018488376966372134, "loss": 1.2672, "step": 9220 }, { "epoch": 0.3551491819056785, "grad_norm": 1.584251880645752, "learning_rate": 0.00018486777950826243, "loss": 1.4366, "step": 9225 }, { "epoch": 0.35534167468719924, "grad_norm": 1.6498923301696777, "learning_rate": 0.00018485178159220725, "loss": 1.3502, "step": 9230 }, { "epoch": 0.3555341674687199, "grad_norm": 1.6700108051300049, "learning_rate": 0.00018483577591701876, "loss": 1.2462, "step": 9235 }, { "epoch": 0.35572666025024063, "grad_norm": 1.6976680755615234, "learning_rate": 0.00018481976248416052, "loss": 1.4637, "step": 9240 }, { "epoch": 0.3559191530317613, "grad_norm": 0.9686551094055176, "learning_rate": 0.0001848037412950969, "loss": 1.1902, "step": 9245 }, { "epoch": 0.35611164581328203, "grad_norm": 1.2102336883544922, "learning_rate": 0.00018478771235129292, "loss": 1.586, "step": 9250 }, { "epoch": 0.3563041385948027, "grad_norm": 1.7220674753189087, "learning_rate": 0.0001847716756542143, "loss": 1.2324, "step": 9255 }, { "epoch": 0.35649663137632337, "grad_norm": 1.7433216571807861, "learning_rate": 0.0001847556312053275, "loss": 1.4454, "step": 9260 }, { "epoch": 0.3566891241578441, "grad_norm": 0.9930455088615417, "learning_rate": 0.0001847395790060997, "loss": 1.1601, "step": 9265 }, { "epoch": 0.35688161693936477, "grad_norm": 1.1169023513793945, "learning_rate": 0.00018472351905799873, "loss": 1.2534, "step": 9270 }, { "epoch": 0.3570741097208855, "grad_norm": 1.238748550415039, "learning_rate": 0.00018470745136249316, "loss": 1.2174, "step": 9275 }, { "epoch": 0.35726660250240616, "grad_norm": 2.130223035812378, "learning_rate": 0.00018469137592105235, "loss": 1.3975, "step": 9280 }, { "epoch": 0.35745909528392683, "grad_norm": 1.4341787099838257, "learning_rate": 0.0001846752927351462, "loss": 1.1725, "step": 9285 }, { "epoch": 0.35765158806544756, "grad_norm": 1.948145866394043, "learning_rate": 0.00018465920180624548, "loss": 1.2741, "step": 9290 }, { "epoch": 0.3578440808469682, "grad_norm": 1.0314382314682007, "learning_rate": 0.00018464310313582157, "loss": 1.0998, "step": 9295 }, { "epoch": 0.35803657362848895, "grad_norm": 1.0461472272872925, "learning_rate": 0.0001846269967253466, "loss": 1.1953, "step": 9300 }, { "epoch": 0.3582290664100096, "grad_norm": 1.781084656715393, "learning_rate": 0.00018461088257629334, "loss": 1.3629, "step": 9305 }, { "epoch": 0.3584215591915303, "grad_norm": 1.9082306623458862, "learning_rate": 0.00018459476069013537, "loss": 1.2675, "step": 9310 }, { "epoch": 0.358614051973051, "grad_norm": 1.803348422050476, "learning_rate": 0.00018457863106834693, "loss": 1.2303, "step": 9315 }, { "epoch": 0.3588065447545717, "grad_norm": 1.5346139669418335, "learning_rate": 0.000184562493712403, "loss": 1.3354, "step": 9320 }, { "epoch": 0.3589990375360924, "grad_norm": 1.3731290102005005, "learning_rate": 0.00018454634862377916, "loss": 1.4874, "step": 9325 }, { "epoch": 0.3591915303176131, "grad_norm": 1.186759352684021, "learning_rate": 0.0001845301958039518, "loss": 1.29, "step": 9330 }, { "epoch": 0.3593840230991338, "grad_norm": 3.729174852371216, "learning_rate": 0.00018451403525439802, "loss": 1.2589, "step": 9335 }, { "epoch": 0.3595765158806545, "grad_norm": 2.46051025390625, "learning_rate": 0.00018449786697659554, "loss": 1.1818, "step": 9340 }, { "epoch": 0.35976900866217515, "grad_norm": 1.6652323007583618, "learning_rate": 0.00018448169097202288, "loss": 1.2719, "step": 9345 }, { "epoch": 0.3599615014436959, "grad_norm": 1.375410556793213, "learning_rate": 0.00018446550724215922, "loss": 1.2687, "step": 9350 }, { "epoch": 0.36015399422521654, "grad_norm": 1.9113675355911255, "learning_rate": 0.00018444931578848447, "loss": 1.2475, "step": 9355 }, { "epoch": 0.36034648700673727, "grad_norm": 1.8949065208435059, "learning_rate": 0.0001844331166124792, "loss": 1.3439, "step": 9360 }, { "epoch": 0.36053897978825794, "grad_norm": 1.0940630435943604, "learning_rate": 0.00018441690971562476, "loss": 1.203, "step": 9365 }, { "epoch": 0.3607314725697786, "grad_norm": 1.2999101877212524, "learning_rate": 0.00018440069509940315, "loss": 1.2729, "step": 9370 }, { "epoch": 0.36092396535129934, "grad_norm": 1.3675721883773804, "learning_rate": 0.00018438447276529702, "loss": 1.2024, "step": 9375 }, { "epoch": 0.36111645813282, "grad_norm": 1.6651533842086792, "learning_rate": 0.00018436824271478988, "loss": 1.2235, "step": 9380 }, { "epoch": 0.36130895091434073, "grad_norm": 2.16670823097229, "learning_rate": 0.00018435200494936585, "loss": 1.4486, "step": 9385 }, { "epoch": 0.3615014436958614, "grad_norm": 1.3305730819702148, "learning_rate": 0.00018433575947050972, "loss": 1.2003, "step": 9390 }, { "epoch": 0.36169393647738207, "grad_norm": 1.5913615226745605, "learning_rate": 0.00018431950627970708, "loss": 1.2722, "step": 9395 }, { "epoch": 0.3618864292589028, "grad_norm": 0.9965779781341553, "learning_rate": 0.00018430324537844415, "loss": 1.0604, "step": 9400 }, { "epoch": 0.36207892204042347, "grad_norm": 1.7614198923110962, "learning_rate": 0.00018428697676820788, "loss": 1.2734, "step": 9405 }, { "epoch": 0.3622714148219442, "grad_norm": 1.190706491470337, "learning_rate": 0.00018427070045048594, "loss": 1.2309, "step": 9410 }, { "epoch": 0.36246390760346486, "grad_norm": 1.1487165689468384, "learning_rate": 0.00018425441642676667, "loss": 1.2049, "step": 9415 }, { "epoch": 0.3626564003849856, "grad_norm": 1.0437067747116089, "learning_rate": 0.00018423812469853918, "loss": 1.3632, "step": 9420 }, { "epoch": 0.36284889316650626, "grad_norm": 1.7774686813354492, "learning_rate": 0.00018422182526729318, "loss": 1.1797, "step": 9425 }, { "epoch": 0.36304138594802693, "grad_norm": 1.3748910427093506, "learning_rate": 0.0001842055181345192, "loss": 1.4438, "step": 9430 }, { "epoch": 0.36323387872954765, "grad_norm": 0.891248881816864, "learning_rate": 0.00018418920330170842, "loss": 1.3017, "step": 9435 }, { "epoch": 0.3634263715110683, "grad_norm": 1.5410393476486206, "learning_rate": 0.00018417288077035267, "loss": 1.2239, "step": 9440 }, { "epoch": 0.36361886429258905, "grad_norm": 1.3638213872909546, "learning_rate": 0.00018415655054194457, "loss": 1.2245, "step": 9445 }, { "epoch": 0.3638113570741097, "grad_norm": 1.84505033493042, "learning_rate": 0.00018414021261797743, "loss": 1.1362, "step": 9450 }, { "epoch": 0.3640038498556304, "grad_norm": 1.5999794006347656, "learning_rate": 0.00018412386699994518, "loss": 1.1647, "step": 9455 }, { "epoch": 0.3641963426371511, "grad_norm": 1.55308997631073, "learning_rate": 0.0001841075136893426, "loss": 1.2612, "step": 9460 }, { "epoch": 0.3643888354186718, "grad_norm": 1.3549528121948242, "learning_rate": 0.00018409115268766505, "loss": 1.2095, "step": 9465 }, { "epoch": 0.3645813282001925, "grad_norm": 1.123184323310852, "learning_rate": 0.00018407478399640862, "loss": 1.3047, "step": 9470 }, { "epoch": 0.3647738209817132, "grad_norm": 1.3776748180389404, "learning_rate": 0.00018405840761707016, "loss": 1.1064, "step": 9475 }, { "epoch": 0.36496631376323385, "grad_norm": 1.3778200149536133, "learning_rate": 0.00018404202355114718, "loss": 1.0956, "step": 9480 }, { "epoch": 0.3651588065447546, "grad_norm": 0.9069898128509521, "learning_rate": 0.00018402563180013783, "loss": 1.141, "step": 9485 }, { "epoch": 0.36535129932627525, "grad_norm": 1.3908804655075073, "learning_rate": 0.0001840092323655411, "loss": 1.2679, "step": 9490 }, { "epoch": 0.365543792107796, "grad_norm": 1.3785732984542847, "learning_rate": 0.00018399282524885654, "loss": 1.22, "step": 9495 }, { "epoch": 0.36573628488931664, "grad_norm": 1.1326193809509277, "learning_rate": 0.00018397641045158453, "loss": 1.2289, "step": 9500 }, { "epoch": 0.36592877767083737, "grad_norm": 1.2267814874649048, "learning_rate": 0.0001839599879752261, "loss": 1.1337, "step": 9505 }, { "epoch": 0.36612127045235804, "grad_norm": 0.8690314888954163, "learning_rate": 0.00018394355782128295, "loss": 1.2535, "step": 9510 }, { "epoch": 0.3663137632338787, "grad_norm": 1.448415994644165, "learning_rate": 0.00018392711999125748, "loss": 1.1405, "step": 9515 }, { "epoch": 0.36650625601539943, "grad_norm": 1.8989317417144775, "learning_rate": 0.00018391067448665288, "loss": 1.091, "step": 9520 }, { "epoch": 0.3666987487969201, "grad_norm": 1.2263299226760864, "learning_rate": 0.00018389422130897295, "loss": 1.1925, "step": 9525 }, { "epoch": 0.36689124157844083, "grad_norm": 0.8818153142929077, "learning_rate": 0.00018387776045972225, "loss": 1.2961, "step": 9530 }, { "epoch": 0.3670837343599615, "grad_norm": 1.0975017547607422, "learning_rate": 0.00018386129194040597, "loss": 1.414, "step": 9535 }, { "epoch": 0.36727622714148217, "grad_norm": 2.2097692489624023, "learning_rate": 0.00018384481575253004, "loss": 1.1941, "step": 9540 }, { "epoch": 0.3674687199230029, "grad_norm": 1.2249376773834229, "learning_rate": 0.0001838283318976012, "loss": 1.4472, "step": 9545 }, { "epoch": 0.36766121270452357, "grad_norm": 1.0000889301300049, "learning_rate": 0.0001838118403771267, "loss": 1.2399, "step": 9550 }, { "epoch": 0.3678537054860443, "grad_norm": 1.0249544382095337, "learning_rate": 0.00018379534119261458, "loss": 1.3182, "step": 9555 }, { "epoch": 0.36804619826756496, "grad_norm": 1.2347283363342285, "learning_rate": 0.00018377883434557362, "loss": 1.1313, "step": 9560 }, { "epoch": 0.3682386910490857, "grad_norm": 1.1021714210510254, "learning_rate": 0.0001837623198375132, "loss": 1.2381, "step": 9565 }, { "epoch": 0.36843118383060636, "grad_norm": 1.0923985242843628, "learning_rate": 0.00018374579766994355, "loss": 1.3386, "step": 9570 }, { "epoch": 0.368623676612127, "grad_norm": 1.7709978818893433, "learning_rate": 0.00018372926784437547, "loss": 1.2405, "step": 9575 }, { "epoch": 0.36881616939364775, "grad_norm": 1.316901683807373, "learning_rate": 0.00018371273036232047, "loss": 1.1244, "step": 9580 }, { "epoch": 0.3690086621751684, "grad_norm": 1.7281345129013062, "learning_rate": 0.00018369618522529085, "loss": 1.2979, "step": 9585 }, { "epoch": 0.36920115495668915, "grad_norm": 1.6363762617111206, "learning_rate": 0.00018367963243479953, "loss": 1.1528, "step": 9590 }, { "epoch": 0.3693936477382098, "grad_norm": 1.7078179121017456, "learning_rate": 0.00018366307199236013, "loss": 1.2833, "step": 9595 }, { "epoch": 0.3695861405197305, "grad_norm": 1.9110232591629028, "learning_rate": 0.000183646503899487, "loss": 1.4191, "step": 9600 }, { "epoch": 0.3697786333012512, "grad_norm": 0.952301025390625, "learning_rate": 0.00018362992815769525, "loss": 1.1504, "step": 9605 }, { "epoch": 0.3699711260827719, "grad_norm": 0.9142165780067444, "learning_rate": 0.0001836133447685005, "loss": 1.2617, "step": 9610 }, { "epoch": 0.3701636188642926, "grad_norm": 1.5571134090423584, "learning_rate": 0.0001835967537334193, "loss": 1.3054, "step": 9615 }, { "epoch": 0.3703561116458133, "grad_norm": 1.799795389175415, "learning_rate": 0.00018358015505396877, "loss": 1.0603, "step": 9620 }, { "epoch": 0.37054860442733395, "grad_norm": 1.6660315990447998, "learning_rate": 0.0001835635487316667, "loss": 1.1757, "step": 9625 }, { "epoch": 0.3707410972088547, "grad_norm": 0.9840423464775085, "learning_rate": 0.00018354693476803168, "loss": 0.9815, "step": 9630 }, { "epoch": 0.37093358999037535, "grad_norm": 2.0538954734802246, "learning_rate": 0.00018353031316458286, "loss": 1.2396, "step": 9635 }, { "epoch": 0.37112608277189607, "grad_norm": 1.2079198360443115, "learning_rate": 0.0001835136839228403, "loss": 1.2731, "step": 9640 }, { "epoch": 0.37131857555341674, "grad_norm": 1.7076921463012695, "learning_rate": 0.00018349704704432457, "loss": 1.1388, "step": 9645 }, { "epoch": 0.37151106833493747, "grad_norm": 1.0324435234069824, "learning_rate": 0.00018348040253055698, "loss": 0.9949, "step": 9650 }, { "epoch": 0.37170356111645814, "grad_norm": 1.3635584115982056, "learning_rate": 0.0001834637503830596, "loss": 1.307, "step": 9655 }, { "epoch": 0.3718960538979788, "grad_norm": 1.6683429479599, "learning_rate": 0.00018344709060335513, "loss": 1.1687, "step": 9660 }, { "epoch": 0.37208854667949953, "grad_norm": 2.3687121868133545, "learning_rate": 0.00018343042319296702, "loss": 1.4163, "step": 9665 }, { "epoch": 0.3722810394610202, "grad_norm": 1.9078242778778076, "learning_rate": 0.00018341374815341937, "loss": 1.2986, "step": 9670 }, { "epoch": 0.37247353224254093, "grad_norm": 1.6381220817565918, "learning_rate": 0.00018339706548623706, "loss": 1.5092, "step": 9675 }, { "epoch": 0.3726660250240616, "grad_norm": 1.3529161214828491, "learning_rate": 0.00018338037519294553, "loss": 1.2296, "step": 9680 }, { "epoch": 0.37285851780558227, "grad_norm": 1.1034053564071655, "learning_rate": 0.00018336367727507104, "loss": 1.2774, "step": 9685 }, { "epoch": 0.373051010587103, "grad_norm": 2.0935397148132324, "learning_rate": 0.0001833469717341405, "loss": 1.2247, "step": 9690 }, { "epoch": 0.37324350336862366, "grad_norm": 1.6294866800308228, "learning_rate": 0.0001833302585716815, "loss": 1.3766, "step": 9695 }, { "epoch": 0.3734359961501444, "grad_norm": 1.6927978992462158, "learning_rate": 0.0001833135377892224, "loss": 1.3069, "step": 9700 }, { "epoch": 0.37362848893166506, "grad_norm": 0.8497247695922852, "learning_rate": 0.00018329680938829212, "loss": 1.0906, "step": 9705 }, { "epoch": 0.37382098171318573, "grad_norm": 1.9347554445266724, "learning_rate": 0.00018328007337042046, "loss": 1.277, "step": 9710 }, { "epoch": 0.37401347449470645, "grad_norm": 1.023130178451538, "learning_rate": 0.00018326332973713776, "loss": 1.254, "step": 9715 }, { "epoch": 0.3742059672762271, "grad_norm": 1.7206385135650635, "learning_rate": 0.0001832465784899751, "loss": 1.2141, "step": 9720 }, { "epoch": 0.37439846005774785, "grad_norm": 1.2445294857025146, "learning_rate": 0.00018322981963046433, "loss": 1.3817, "step": 9725 }, { "epoch": 0.3745909528392685, "grad_norm": 1.832334280014038, "learning_rate": 0.00018321305316013788, "loss": 1.3584, "step": 9730 }, { "epoch": 0.37478344562078925, "grad_norm": 1.2087010145187378, "learning_rate": 0.00018319627908052898, "loss": 1.116, "step": 9735 }, { "epoch": 0.3749759384023099, "grad_norm": 1.286687970161438, "learning_rate": 0.00018317949739317147, "loss": 1.1913, "step": 9740 }, { "epoch": 0.3751684311838306, "grad_norm": 1.44833242893219, "learning_rate": 0.00018316270809959993, "loss": 1.2713, "step": 9745 }, { "epoch": 0.3753609239653513, "grad_norm": 1.1395667791366577, "learning_rate": 0.00018314591120134963, "loss": 1.2912, "step": 9750 }, { "epoch": 0.375553416746872, "grad_norm": 1.1399837732315063, "learning_rate": 0.00018312910669995654, "loss": 1.2804, "step": 9755 }, { "epoch": 0.3757459095283927, "grad_norm": 1.814249038696289, "learning_rate": 0.00018311229459695735, "loss": 1.1062, "step": 9760 }, { "epoch": 0.3759384023099134, "grad_norm": 1.4851144552230835, "learning_rate": 0.00018309547489388933, "loss": 1.2826, "step": 9765 }, { "epoch": 0.37613089509143405, "grad_norm": 0.9308827519416809, "learning_rate": 0.00018307864759229065, "loss": 1.3706, "step": 9770 }, { "epoch": 0.3763233878729548, "grad_norm": 3.707566261291504, "learning_rate": 0.00018306181269369998, "loss": 1.2292, "step": 9775 }, { "epoch": 0.37651588065447544, "grad_norm": 2.6666324138641357, "learning_rate": 0.00018304497019965677, "loss": 1.4645, "step": 9780 }, { "epoch": 0.37670837343599617, "grad_norm": 1.5997512340545654, "learning_rate": 0.00018302812011170114, "loss": 1.2812, "step": 9785 }, { "epoch": 0.37690086621751684, "grad_norm": 0.8998873233795166, "learning_rate": 0.00018301126243137395, "loss": 1.195, "step": 9790 }, { "epoch": 0.3770933589990375, "grad_norm": 1.407524585723877, "learning_rate": 0.0001829943971602167, "loss": 1.1793, "step": 9795 }, { "epoch": 0.37728585178055823, "grad_norm": 1.1469497680664062, "learning_rate": 0.00018297752429977164, "loss": 1.3624, "step": 9800 }, { "epoch": 0.3774783445620789, "grad_norm": 1.4583423137664795, "learning_rate": 0.00018296064385158164, "loss": 1.2033, "step": 9805 }, { "epoch": 0.37767083734359963, "grad_norm": 1.0782575607299805, "learning_rate": 0.00018294375581719036, "loss": 1.1823, "step": 9810 }, { "epoch": 0.3778633301251203, "grad_norm": 1.1890922784805298, "learning_rate": 0.00018292686019814202, "loss": 1.2711, "step": 9815 }, { "epoch": 0.378055822906641, "grad_norm": 0.854491651058197, "learning_rate": 0.00018290995699598165, "loss": 1.1953, "step": 9820 }, { "epoch": 0.3782483156881617, "grad_norm": 1.2184374332427979, "learning_rate": 0.00018289304621225497, "loss": 1.2052, "step": 9825 }, { "epoch": 0.37844080846968237, "grad_norm": 1.1952948570251465, "learning_rate": 0.0001828761278485083, "loss": 1.2516, "step": 9830 }, { "epoch": 0.3786333012512031, "grad_norm": 2.1117265224456787, "learning_rate": 0.00018285920190628879, "loss": 1.2834, "step": 9835 }, { "epoch": 0.37882579403272376, "grad_norm": 1.1815403699874878, "learning_rate": 0.00018284226838714412, "loss": 1.0574, "step": 9840 }, { "epoch": 0.3790182868142445, "grad_norm": 1.3763145208358765, "learning_rate": 0.00018282532729262278, "loss": 1.2813, "step": 9845 }, { "epoch": 0.37921077959576516, "grad_norm": 1.5308822393417358, "learning_rate": 0.00018280837862427393, "loss": 1.2118, "step": 9850 }, { "epoch": 0.3794032723772858, "grad_norm": 1.1991111040115356, "learning_rate": 0.00018279142238364745, "loss": 1.0999, "step": 9855 }, { "epoch": 0.37959576515880655, "grad_norm": 1.7062435150146484, "learning_rate": 0.0001827744585722938, "loss": 1.2103, "step": 9860 }, { "epoch": 0.3797882579403272, "grad_norm": 1.5572453737258911, "learning_rate": 0.00018275748719176425, "loss": 1.112, "step": 9865 }, { "epoch": 0.37998075072184795, "grad_norm": 0.9328321218490601, "learning_rate": 0.00018274050824361072, "loss": 1.2688, "step": 9870 }, { "epoch": 0.3801732435033686, "grad_norm": 1.290634036064148, "learning_rate": 0.0001827235217293858, "loss": 1.1486, "step": 9875 }, { "epoch": 0.38036573628488934, "grad_norm": 1.7471963167190552, "learning_rate": 0.00018270652765064283, "loss": 1.2584, "step": 9880 }, { "epoch": 0.38055822906641, "grad_norm": 1.4827409982681274, "learning_rate": 0.00018268952600893577, "loss": 1.3655, "step": 9885 }, { "epoch": 0.3807507218479307, "grad_norm": 1.0229063034057617, "learning_rate": 0.00018267251680581935, "loss": 1.1955, "step": 9890 }, { "epoch": 0.3809432146294514, "grad_norm": 1.3075898885726929, "learning_rate": 0.0001826555000428489, "loss": 0.9779, "step": 9895 }, { "epoch": 0.3811357074109721, "grad_norm": 1.5942119359970093, "learning_rate": 0.00018263847572158053, "loss": 1.2556, "step": 9900 }, { "epoch": 0.3813282001924928, "grad_norm": 0.9223330616950989, "learning_rate": 0.00018262144384357097, "loss": 1.1109, "step": 9905 }, { "epoch": 0.3815206929740135, "grad_norm": 1.7757457494735718, "learning_rate": 0.00018260440441037766, "loss": 1.2219, "step": 9910 }, { "epoch": 0.38171318575553415, "grad_norm": 1.4870551824569702, "learning_rate": 0.00018258735742355883, "loss": 1.3312, "step": 9915 }, { "epoch": 0.38190567853705487, "grad_norm": 1.2982031106948853, "learning_rate": 0.00018257030288467322, "loss": 1.2421, "step": 9920 }, { "epoch": 0.38209817131857554, "grad_norm": 1.016822338104248, "learning_rate": 0.0001825532407952804, "loss": 1.3542, "step": 9925 }, { "epoch": 0.38229066410009627, "grad_norm": 1.0763219594955444, "learning_rate": 0.00018253617115694058, "loss": 1.2579, "step": 9930 }, { "epoch": 0.38248315688161694, "grad_norm": 1.7673341035842896, "learning_rate": 0.00018251909397121464, "loss": 1.1875, "step": 9935 }, { "epoch": 0.3826756496631376, "grad_norm": 1.3719041347503662, "learning_rate": 0.00018250200923966423, "loss": 1.1493, "step": 9940 }, { "epoch": 0.38286814244465833, "grad_norm": 1.8589760065078735, "learning_rate": 0.00018248491696385157, "loss": 1.2751, "step": 9945 }, { "epoch": 0.383060635226179, "grad_norm": 1.6069539785385132, "learning_rate": 0.0001824678171453397, "loss": 1.415, "step": 9950 }, { "epoch": 0.38325312800769973, "grad_norm": 1.7131226062774658, "learning_rate": 0.0001824507097856922, "loss": 1.1773, "step": 9955 }, { "epoch": 0.3834456207892204, "grad_norm": 0.7622759342193604, "learning_rate": 0.0001824335948864735, "loss": 1.1588, "step": 9960 }, { "epoch": 0.3836381135707411, "grad_norm": 1.6202800273895264, "learning_rate": 0.0001824164724492486, "loss": 1.3064, "step": 9965 }, { "epoch": 0.3838306063522618, "grad_norm": 1.5452194213867188, "learning_rate": 0.0001823993424755833, "loss": 1.2993, "step": 9970 }, { "epoch": 0.38402309913378246, "grad_norm": 1.013929009437561, "learning_rate": 0.00018238220496704396, "loss": 1.3123, "step": 9975 }, { "epoch": 0.3842155919153032, "grad_norm": 0.9624648094177246, "learning_rate": 0.0001823650599251977, "loss": 1.0517, "step": 9980 }, { "epoch": 0.38440808469682386, "grad_norm": 1.2065962553024292, "learning_rate": 0.00018234790735161232, "loss": 1.1954, "step": 9985 }, { "epoch": 0.3846005774783446, "grad_norm": 1.425376057624817, "learning_rate": 0.00018233074724785634, "loss": 1.069, "step": 9990 }, { "epoch": 0.38479307025986526, "grad_norm": 1.0355112552642822, "learning_rate": 0.00018231357961549888, "loss": 1.0839, "step": 9995 }, { "epoch": 0.3849855630413859, "grad_norm": 1.7273633480072021, "learning_rate": 0.00018229640445610988, "loss": 1.1324, "step": 10000 }, { "epoch": 0.38517805582290665, "grad_norm": 1.413021445274353, "learning_rate": 0.00018227922177125984, "loss": 1.0402, "step": 10005 }, { "epoch": 0.3853705486044273, "grad_norm": 1.125299334526062, "learning_rate": 0.00018226203156252005, "loss": 1.271, "step": 10010 }, { "epoch": 0.38556304138594805, "grad_norm": 1.2611075639724731, "learning_rate": 0.00018224483383146237, "loss": 1.2228, "step": 10015 }, { "epoch": 0.3857555341674687, "grad_norm": 1.0332306623458862, "learning_rate": 0.00018222762857965944, "loss": 1.2059, "step": 10020 }, { "epoch": 0.3859480269489894, "grad_norm": 1.965288758277893, "learning_rate": 0.00018221041580868464, "loss": 1.217, "step": 10025 }, { "epoch": 0.3861405197305101, "grad_norm": 0.8059799075126648, "learning_rate": 0.00018219319552011186, "loss": 1.2039, "step": 10030 }, { "epoch": 0.3863330125120308, "grad_norm": 1.4955195188522339, "learning_rate": 0.00018217596771551584, "loss": 1.2206, "step": 10035 }, { "epoch": 0.3865255052935515, "grad_norm": 0.987479567527771, "learning_rate": 0.00018215873239647197, "loss": 1.3134, "step": 10040 }, { "epoch": 0.3867179980750722, "grad_norm": 1.7247464656829834, "learning_rate": 0.00018214148956455627, "loss": 1.1786, "step": 10045 }, { "epoch": 0.3869104908565929, "grad_norm": 0.9822973608970642, "learning_rate": 0.00018212423922134546, "loss": 1.0866, "step": 10050 }, { "epoch": 0.3871029836381136, "grad_norm": 1.1217613220214844, "learning_rate": 0.000182106981368417, "loss": 1.3292, "step": 10055 }, { "epoch": 0.38729547641963424, "grad_norm": 1.2722941637039185, "learning_rate": 0.000182089716007349, "loss": 1.0294, "step": 10060 }, { "epoch": 0.38748796920115497, "grad_norm": 1.6616365909576416, "learning_rate": 0.00018207244313972026, "loss": 1.3691, "step": 10065 }, { "epoch": 0.38768046198267564, "grad_norm": 4.093936443328857, "learning_rate": 0.0001820551627671103, "loss": 1.1916, "step": 10070 }, { "epoch": 0.38787295476419636, "grad_norm": 1.9061866998672485, "learning_rate": 0.00018203787489109926, "loss": 1.3733, "step": 10075 }, { "epoch": 0.38806544754571703, "grad_norm": 1.6439005136489868, "learning_rate": 0.00018202057951326804, "loss": 1.3533, "step": 10080 }, { "epoch": 0.3882579403272377, "grad_norm": 1.535980224609375, "learning_rate": 0.0001820032766351981, "loss": 1.3916, "step": 10085 }, { "epoch": 0.38845043310875843, "grad_norm": 1.6342761516571045, "learning_rate": 0.00018198596625847177, "loss": 1.335, "step": 10090 }, { "epoch": 0.3886429258902791, "grad_norm": 2.2760815620422363, "learning_rate": 0.00018196864838467192, "loss": 1.0399, "step": 10095 }, { "epoch": 0.3888354186717998, "grad_norm": 1.173302412033081, "learning_rate": 0.0001819513230153822, "loss": 1.3414, "step": 10100 }, { "epoch": 0.3890279114533205, "grad_norm": 1.7409497499465942, "learning_rate": 0.00018193399015218684, "loss": 1.3377, "step": 10105 }, { "epoch": 0.38922040423484117, "grad_norm": 1.3547555208206177, "learning_rate": 0.00018191664979667085, "loss": 1.2576, "step": 10110 }, { "epoch": 0.3894128970163619, "grad_norm": 2.2421867847442627, "learning_rate": 0.0001818993019504199, "loss": 1.1624, "step": 10115 }, { "epoch": 0.38960538979788256, "grad_norm": 1.5812993049621582, "learning_rate": 0.00018188194661502029, "loss": 1.2319, "step": 10120 }, { "epoch": 0.3897978825794033, "grad_norm": 1.8024287223815918, "learning_rate": 0.00018186458379205908, "loss": 1.4016, "step": 10125 }, { "epoch": 0.38999037536092396, "grad_norm": 0.9069392681121826, "learning_rate": 0.000181847213483124, "loss": 1.1683, "step": 10130 }, { "epoch": 0.3901828681424447, "grad_norm": 1.6808935403823853, "learning_rate": 0.00018182983568980346, "loss": 1.3519, "step": 10135 }, { "epoch": 0.39037536092396535, "grad_norm": 2.584958553314209, "learning_rate": 0.0001818124504136865, "loss": 1.3804, "step": 10140 }, { "epoch": 0.390567853705486, "grad_norm": 1.4569361209869385, "learning_rate": 0.00018179505765636287, "loss": 1.2862, "step": 10145 }, { "epoch": 0.39076034648700675, "grad_norm": 2.0809457302093506, "learning_rate": 0.0001817776574194231, "loss": 1.1108, "step": 10150 }, { "epoch": 0.3909528392685274, "grad_norm": 1.7902493476867676, "learning_rate": 0.00018176024970445828, "loss": 1.0611, "step": 10155 }, { "epoch": 0.39114533205004814, "grad_norm": 0.9953207969665527, "learning_rate": 0.00018174283451306025, "loss": 1.1883, "step": 10160 }, { "epoch": 0.3913378248315688, "grad_norm": 1.0629642009735107, "learning_rate": 0.00018172541184682147, "loss": 1.3, "step": 10165 }, { "epoch": 0.3915303176130895, "grad_norm": 1.546132206916809, "learning_rate": 0.0001817079817073352, "loss": 1.2446, "step": 10170 }, { "epoch": 0.3917228103946102, "grad_norm": 1.379883050918579, "learning_rate": 0.0001816905440961952, "loss": 1.2964, "step": 10175 }, { "epoch": 0.3919153031761309, "grad_norm": 1.132592797279358, "learning_rate": 0.00018167309901499613, "loss": 1.3951, "step": 10180 }, { "epoch": 0.3921077959576516, "grad_norm": 1.4765934944152832, "learning_rate": 0.00018165564646533322, "loss": 1.2278, "step": 10185 }, { "epoch": 0.3923002887391723, "grad_norm": 1.5826079845428467, "learning_rate": 0.00018163818644880233, "loss": 1.2615, "step": 10190 }, { "epoch": 0.392492781520693, "grad_norm": 1.5647984743118286, "learning_rate": 0.00018162071896700007, "loss": 1.4696, "step": 10195 }, { "epoch": 0.39268527430221367, "grad_norm": 1.0377607345581055, "learning_rate": 0.0001816032440215238, "loss": 1.1309, "step": 10200 }, { "epoch": 0.39287776708373434, "grad_norm": 1.1878221035003662, "learning_rate": 0.0001815857616139714, "loss": 1.1442, "step": 10205 }, { "epoch": 0.39307025986525507, "grad_norm": 1.5119047164916992, "learning_rate": 0.00018156827174594157, "loss": 1.2436, "step": 10210 }, { "epoch": 0.39326275264677574, "grad_norm": 1.6624690294265747, "learning_rate": 0.00018155077441903364, "loss": 1.1726, "step": 10215 }, { "epoch": 0.39345524542829646, "grad_norm": 1.2995012998580933, "learning_rate": 0.0001815332696348476, "loss": 1.3053, "step": 10220 }, { "epoch": 0.39364773820981713, "grad_norm": 1.3727355003356934, "learning_rate": 0.00018151575739498417, "loss": 1.4224, "step": 10225 }, { "epoch": 0.3938402309913378, "grad_norm": 1.1980619430541992, "learning_rate": 0.0001814982377010447, "loss": 1.0973, "step": 10230 }, { "epoch": 0.39403272377285853, "grad_norm": 1.4235668182373047, "learning_rate": 0.00018148071055463128, "loss": 1.1659, "step": 10235 }, { "epoch": 0.3942252165543792, "grad_norm": 1.1501004695892334, "learning_rate": 0.00018146317595734663, "loss": 1.2738, "step": 10240 }, { "epoch": 0.3944177093358999, "grad_norm": 1.1686300039291382, "learning_rate": 0.00018144563391079419, "loss": 1.1691, "step": 10245 }, { "epoch": 0.3946102021174206, "grad_norm": 1.3350188732147217, "learning_rate": 0.00018142808441657806, "loss": 1.2344, "step": 10250 }, { "epoch": 0.39480269489894126, "grad_norm": 1.0583946704864502, "learning_rate": 0.00018141052747630302, "loss": 1.1358, "step": 10255 }, { "epoch": 0.394995187680462, "grad_norm": 1.0637165307998657, "learning_rate": 0.00018139296309157454, "loss": 1.2589, "step": 10260 }, { "epoch": 0.39518768046198266, "grad_norm": 1.971304178237915, "learning_rate": 0.00018137539126399874, "loss": 1.1413, "step": 10265 }, { "epoch": 0.3953801732435034, "grad_norm": 1.1685267686843872, "learning_rate": 0.0001813578119951825, "loss": 1.1702, "step": 10270 }, { "epoch": 0.39557266602502406, "grad_norm": 1.620936393737793, "learning_rate": 0.0001813402252867333, "loss": 1.2636, "step": 10275 }, { "epoch": 0.3957651588065448, "grad_norm": 1.553240180015564, "learning_rate": 0.00018132263114025934, "loss": 1.4167, "step": 10280 }, { "epoch": 0.39595765158806545, "grad_norm": 1.260498285293579, "learning_rate": 0.00018130502955736942, "loss": 1.2984, "step": 10285 }, { "epoch": 0.3961501443695861, "grad_norm": 1.7073127031326294, "learning_rate": 0.0001812874205396732, "loss": 1.29, "step": 10290 }, { "epoch": 0.39634263715110685, "grad_norm": 0.900610625743866, "learning_rate": 0.00018126980408878082, "loss": 1.0423, "step": 10295 }, { "epoch": 0.3965351299326275, "grad_norm": 1.359563946723938, "learning_rate": 0.00018125218020630324, "loss": 1.1576, "step": 10300 }, { "epoch": 0.39672762271414824, "grad_norm": 0.9399506449699402, "learning_rate": 0.000181234548893852, "loss": 1.1481, "step": 10305 }, { "epoch": 0.3969201154956689, "grad_norm": 1.4632538557052612, "learning_rate": 0.00018121691015303944, "loss": 1.1404, "step": 10310 }, { "epoch": 0.3971126082771896, "grad_norm": 1.644718050956726, "learning_rate": 0.00018119926398547839, "loss": 1.1783, "step": 10315 }, { "epoch": 0.3973051010587103, "grad_norm": 1.299018144607544, "learning_rate": 0.00018118161039278258, "loss": 1.2076, "step": 10320 }, { "epoch": 0.397497593840231, "grad_norm": 1.5833697319030762, "learning_rate": 0.00018116394937656632, "loss": 1.0825, "step": 10325 }, { "epoch": 0.3976900866217517, "grad_norm": 1.4813597202301025, "learning_rate": 0.0001811462809384445, "loss": 1.263, "step": 10330 }, { "epoch": 0.3978825794032724, "grad_norm": 1.8714033365249634, "learning_rate": 0.00018112860508003284, "loss": 1.2425, "step": 10335 }, { "epoch": 0.39807507218479304, "grad_norm": 1.5847947597503662, "learning_rate": 0.0001811109218029477, "loss": 1.0863, "step": 10340 }, { "epoch": 0.39826756496631377, "grad_norm": 1.339046597480774, "learning_rate": 0.00018109323110880604, "loss": 1.3871, "step": 10345 }, { "epoch": 0.39846005774783444, "grad_norm": 2.370396375656128, "learning_rate": 0.0001810755329992256, "loss": 1.2629, "step": 10350 }, { "epoch": 0.39865255052935517, "grad_norm": 1.2930303812026978, "learning_rate": 0.00018105782747582474, "loss": 1.1281, "step": 10355 }, { "epoch": 0.39884504331087584, "grad_norm": 1.2590947151184082, "learning_rate": 0.0001810401145402225, "loss": 1.2229, "step": 10360 }, { "epoch": 0.39903753609239656, "grad_norm": 0.8280492424964905, "learning_rate": 0.00018102239419403866, "loss": 1.2601, "step": 10365 }, { "epoch": 0.39923002887391723, "grad_norm": 1.6567853689193726, "learning_rate": 0.0001810046664388936, "loss": 1.1296, "step": 10370 }, { "epoch": 0.3994225216554379, "grad_norm": 1.2103195190429688, "learning_rate": 0.00018098693127640834, "loss": 1.1524, "step": 10375 }, { "epoch": 0.3996150144369586, "grad_norm": 1.4716650247573853, "learning_rate": 0.00018096918870820475, "loss": 1.1805, "step": 10380 }, { "epoch": 0.3998075072184793, "grad_norm": 1.291873574256897, "learning_rate": 0.00018095143873590524, "loss": 1.2877, "step": 10385 }, { "epoch": 0.4, "grad_norm": 0.8508723974227905, "learning_rate": 0.0001809336813611329, "loss": 0.9215, "step": 10390 }, { "epoch": 0.4001924927815207, "grad_norm": 1.1256935596466064, "learning_rate": 0.00018091591658551154, "loss": 1.3286, "step": 10395 }, { "epoch": 0.40038498556304136, "grad_norm": 1.1910960674285889, "learning_rate": 0.0001808981444106656, "loss": 1.1078, "step": 10400 }, { "epoch": 0.4005774783445621, "grad_norm": 2.188884735107422, "learning_rate": 0.00018088036483822028, "loss": 1.2762, "step": 10405 }, { "epoch": 0.40076997112608276, "grad_norm": 0.9240724444389343, "learning_rate": 0.00018086257786980136, "loss": 1.1288, "step": 10410 }, { "epoch": 0.4009624639076035, "grad_norm": 1.961204171180725, "learning_rate": 0.00018084478350703537, "loss": 1.1863, "step": 10415 }, { "epoch": 0.40115495668912415, "grad_norm": 1.5713763236999512, "learning_rate": 0.00018082698175154947, "loss": 1.2157, "step": 10420 }, { "epoch": 0.4013474494706448, "grad_norm": 2.006776809692383, "learning_rate": 0.00018080917260497153, "loss": 1.1671, "step": 10425 }, { "epoch": 0.40153994225216555, "grad_norm": 1.511513352394104, "learning_rate": 0.00018079135606893006, "loss": 1.2428, "step": 10430 }, { "epoch": 0.4017324350336862, "grad_norm": 1.5270637273788452, "learning_rate": 0.00018077353214505427, "loss": 1.2887, "step": 10435 }, { "epoch": 0.40192492781520694, "grad_norm": 1.470389723777771, "learning_rate": 0.00018075570083497407, "loss": 1.2739, "step": 10440 }, { "epoch": 0.4021174205967276, "grad_norm": 1.224330186843872, "learning_rate": 0.00018073786214031992, "loss": 1.0882, "step": 10445 }, { "epoch": 0.40230991337824834, "grad_norm": 2.0693979263305664, "learning_rate": 0.00018072001606272316, "loss": 1.5091, "step": 10450 }, { "epoch": 0.402502406159769, "grad_norm": 1.418346643447876, "learning_rate": 0.00018070216260381567, "loss": 1.2886, "step": 10455 }, { "epoch": 0.4026948989412897, "grad_norm": 1.8632601499557495, "learning_rate": 0.00018068430176522998, "loss": 1.1809, "step": 10460 }, { "epoch": 0.4028873917228104, "grad_norm": 1.6064730882644653, "learning_rate": 0.00018066643354859937, "loss": 1.2394, "step": 10465 }, { "epoch": 0.4030798845043311, "grad_norm": 1.2319833040237427, "learning_rate": 0.0001806485579555578, "loss": 1.1979, "step": 10470 }, { "epoch": 0.4032723772858518, "grad_norm": 1.5506865978240967, "learning_rate": 0.00018063067498773987, "loss": 1.1899, "step": 10475 }, { "epoch": 0.40346487006737247, "grad_norm": 1.360120415687561, "learning_rate": 0.00018061278464678082, "loss": 1.0995, "step": 10480 }, { "epoch": 0.40365736284889314, "grad_norm": 1.133346438407898, "learning_rate": 0.00018059488693431664, "loss": 1.1972, "step": 10485 }, { "epoch": 0.40384985563041387, "grad_norm": 1.6961482763290405, "learning_rate": 0.00018057698185198394, "loss": 1.0823, "step": 10490 }, { "epoch": 0.40404234841193454, "grad_norm": 1.0126832723617554, "learning_rate": 0.00018055906940142, "loss": 1.3294, "step": 10495 }, { "epoch": 0.40423484119345526, "grad_norm": 1.378825068473816, "learning_rate": 0.00018054114958426283, "loss": 1.3188, "step": 10500 }, { "epoch": 0.40442733397497593, "grad_norm": 1.1392402648925781, "learning_rate": 0.00018052322240215104, "loss": 1.2428, "step": 10505 }, { "epoch": 0.40461982675649666, "grad_norm": 1.0441240072250366, "learning_rate": 0.00018050528785672402, "loss": 1.2997, "step": 10510 }, { "epoch": 0.40481231953801733, "grad_norm": 1.3564190864562988, "learning_rate": 0.00018048734594962171, "loss": 1.3018, "step": 10515 }, { "epoch": 0.405004812319538, "grad_norm": 1.3429349660873413, "learning_rate": 0.0001804693966824848, "loss": 1.0567, "step": 10520 }, { "epoch": 0.4051973051010587, "grad_norm": 0.920313835144043, "learning_rate": 0.00018045144005695462, "loss": 1.1386, "step": 10525 }, { "epoch": 0.4053897978825794, "grad_norm": 2.402700662612915, "learning_rate": 0.00018043347607467317, "loss": 1.2837, "step": 10530 }, { "epoch": 0.4055822906641001, "grad_norm": 1.7154083251953125, "learning_rate": 0.00018041550473728318, "loss": 1.3188, "step": 10535 }, { "epoch": 0.4057747834456208, "grad_norm": 0.8770251274108887, "learning_rate": 0.000180397526046428, "loss": 1.1641, "step": 10540 }, { "epoch": 0.40596727622714146, "grad_norm": 0.9887571334838867, "learning_rate": 0.0001803795400037516, "loss": 1.0042, "step": 10545 }, { "epoch": 0.4061597690086622, "grad_norm": 2.665354013442993, "learning_rate": 0.00018036154661089877, "loss": 1.2579, "step": 10550 }, { "epoch": 0.40635226179018286, "grad_norm": 2.6088809967041016, "learning_rate": 0.00018034354586951486, "loss": 1.1098, "step": 10555 }, { "epoch": 0.4065447545717036, "grad_norm": 1.4641830921173096, "learning_rate": 0.00018032553778124586, "loss": 1.1108, "step": 10560 }, { "epoch": 0.40673724735322425, "grad_norm": 1.0744770765304565, "learning_rate": 0.00018030752234773854, "loss": 1.1234, "step": 10565 }, { "epoch": 0.4069297401347449, "grad_norm": 1.2617886066436768, "learning_rate": 0.00018028949957064034, "loss": 1.1753, "step": 10570 }, { "epoch": 0.40712223291626565, "grad_norm": 1.4641857147216797, "learning_rate": 0.00018027146945159923, "loss": 1.2671, "step": 10575 }, { "epoch": 0.4073147256977863, "grad_norm": 1.4347914457321167, "learning_rate": 0.00018025343199226402, "loss": 1.1348, "step": 10580 }, { "epoch": 0.40750721847930704, "grad_norm": 1.434019923210144, "learning_rate": 0.00018023538719428407, "loss": 1.2439, "step": 10585 }, { "epoch": 0.4076997112608277, "grad_norm": 1.1034338474273682, "learning_rate": 0.00018021733505930944, "loss": 1.0502, "step": 10590 }, { "epoch": 0.40789220404234844, "grad_norm": 1.591850996017456, "learning_rate": 0.00018019927558899097, "loss": 1.178, "step": 10595 }, { "epoch": 0.4080846968238691, "grad_norm": 1.672735333442688, "learning_rate": 0.00018018120878498, "loss": 1.2363, "step": 10600 }, { "epoch": 0.4082771896053898, "grad_norm": 1.8779442310333252, "learning_rate": 0.00018016313464892862, "loss": 1.2537, "step": 10605 }, { "epoch": 0.4084696823869105, "grad_norm": 1.075453281402588, "learning_rate": 0.00018014505318248963, "loss": 1.081, "step": 10610 }, { "epoch": 0.4086621751684312, "grad_norm": 1.350914478302002, "learning_rate": 0.0001801269643873164, "loss": 1.3958, "step": 10615 }, { "epoch": 0.4088546679499519, "grad_norm": 1.6566729545593262, "learning_rate": 0.0001801088682650631, "loss": 1.3208, "step": 10620 }, { "epoch": 0.40904716073147257, "grad_norm": 1.243171215057373, "learning_rate": 0.00018009076481738446, "loss": 1.17, "step": 10625 }, { "epoch": 0.40923965351299324, "grad_norm": 1.110456109046936, "learning_rate": 0.00018007265404593593, "loss": 1.1311, "step": 10630 }, { "epoch": 0.40943214629451397, "grad_norm": 2.485719919204712, "learning_rate": 0.00018005453595237362, "loss": 1.3703, "step": 10635 }, { "epoch": 0.40962463907603464, "grad_norm": 1.3115043640136719, "learning_rate": 0.00018003641053835435, "loss": 1.2551, "step": 10640 }, { "epoch": 0.40981713185755536, "grad_norm": 1.530535340309143, "learning_rate": 0.0001800182778055355, "loss": 1.3577, "step": 10645 }, { "epoch": 0.41000962463907603, "grad_norm": 1.2339287996292114, "learning_rate": 0.00018000013775557521, "loss": 1.2539, "step": 10650 }, { "epoch": 0.4102021174205967, "grad_norm": 1.579942226409912, "learning_rate": 0.00017998199039013225, "loss": 1.3568, "step": 10655 }, { "epoch": 0.4103946102021174, "grad_norm": 1.831764817237854, "learning_rate": 0.00017996383571086612, "loss": 1.3662, "step": 10660 }, { "epoch": 0.4105871029836381, "grad_norm": 2.1747963428497314, "learning_rate": 0.00017994567371943697, "loss": 1.1333, "step": 10665 }, { "epoch": 0.4107795957651588, "grad_norm": 1.5603039264678955, "learning_rate": 0.00017992750441750549, "loss": 1.2327, "step": 10670 }, { "epoch": 0.4109720885466795, "grad_norm": 1.7836112976074219, "learning_rate": 0.00017990932780673324, "loss": 1.0281, "step": 10675 }, { "epoch": 0.4111645813282002, "grad_norm": 1.5049426555633545, "learning_rate": 0.0001798911438887823, "loss": 1.3338, "step": 10680 }, { "epoch": 0.4113570741097209, "grad_norm": 1.6236990690231323, "learning_rate": 0.00017987295266531548, "loss": 1.3937, "step": 10685 }, { "epoch": 0.41154956689124156, "grad_norm": 1.2450697422027588, "learning_rate": 0.00017985475413799623, "loss": 1.3456, "step": 10690 }, { "epoch": 0.4117420596727623, "grad_norm": 1.031137228012085, "learning_rate": 0.00017983654830848873, "loss": 1.0254, "step": 10695 }, { "epoch": 0.41193455245428295, "grad_norm": 1.588884949684143, "learning_rate": 0.00017981833517845773, "loss": 1.0554, "step": 10700 }, { "epoch": 0.4121270452358037, "grad_norm": 1.2405824661254883, "learning_rate": 0.00017980011474956874, "loss": 1.4561, "step": 10705 }, { "epoch": 0.41231953801732435, "grad_norm": 2.03009295463562, "learning_rate": 0.00017978188702348792, "loss": 1.2479, "step": 10710 }, { "epoch": 0.412512030798845, "grad_norm": 0.9755954146385193, "learning_rate": 0.00017976365200188198, "loss": 1.1632, "step": 10715 }, { "epoch": 0.41270452358036575, "grad_norm": 1.3121798038482666, "learning_rate": 0.00017974540968641848, "loss": 1.2069, "step": 10720 }, { "epoch": 0.4128970163618864, "grad_norm": 1.880199909210205, "learning_rate": 0.00017972716007876556, "loss": 1.32, "step": 10725 }, { "epoch": 0.41308950914340714, "grad_norm": 2.1090636253356934, "learning_rate": 0.00017970890318059194, "loss": 1.2943, "step": 10730 }, { "epoch": 0.4132820019249278, "grad_norm": 1.2155611515045166, "learning_rate": 0.00017969063899356716, "loss": 1.3022, "step": 10735 }, { "epoch": 0.4134744947064485, "grad_norm": 1.191871166229248, "learning_rate": 0.00017967236751936135, "loss": 1.2699, "step": 10740 }, { "epoch": 0.4136669874879692, "grad_norm": 1.4702094793319702, "learning_rate": 0.00017965408875964534, "loss": 1.3936, "step": 10745 }, { "epoch": 0.4138594802694899, "grad_norm": 1.7658724784851074, "learning_rate": 0.00017963580271609052, "loss": 1.1633, "step": 10750 }, { "epoch": 0.4140519730510106, "grad_norm": 1.5030126571655273, "learning_rate": 0.00017961750939036913, "loss": 1.4213, "step": 10755 }, { "epoch": 0.4142444658325313, "grad_norm": 1.5616711378097534, "learning_rate": 0.0001795992087841539, "loss": 1.3342, "step": 10760 }, { "epoch": 0.414436958614052, "grad_norm": 1.2506111860275269, "learning_rate": 0.0001795809008991183, "loss": 1.0034, "step": 10765 }, { "epoch": 0.41462945139557267, "grad_norm": 1.1011154651641846, "learning_rate": 0.00017956258573693657, "loss": 1.0936, "step": 10770 }, { "epoch": 0.41482194417709334, "grad_norm": 1.2040156126022339, "learning_rate": 0.00017954426329928335, "loss": 1.1974, "step": 10775 }, { "epoch": 0.41501443695861406, "grad_norm": 1.5271620750427246, "learning_rate": 0.0001795259335878342, "loss": 1.2563, "step": 10780 }, { "epoch": 0.41520692974013473, "grad_norm": 1.342129111289978, "learning_rate": 0.00017950759660426523, "loss": 1.2319, "step": 10785 }, { "epoch": 0.41539942252165546, "grad_norm": 0.9986871480941772, "learning_rate": 0.00017948925235025326, "loss": 1.0781, "step": 10790 }, { "epoch": 0.41559191530317613, "grad_norm": 1.107088327407837, "learning_rate": 0.00017947090082747573, "loss": 1.1499, "step": 10795 }, { "epoch": 0.4157844080846968, "grad_norm": 1.5566056966781616, "learning_rate": 0.00017945254203761076, "loss": 1.0997, "step": 10800 }, { "epoch": 0.4159769008662175, "grad_norm": 1.4681777954101562, "learning_rate": 0.00017943417598233715, "loss": 1.3307, "step": 10805 }, { "epoch": 0.4161693936477382, "grad_norm": 1.4198453426361084, "learning_rate": 0.00017941580266333433, "loss": 0.9664, "step": 10810 }, { "epoch": 0.4163618864292589, "grad_norm": 1.1474230289459229, "learning_rate": 0.00017939742208228246, "loss": 1.2454, "step": 10815 }, { "epoch": 0.4165543792107796, "grad_norm": 1.186672568321228, "learning_rate": 0.00017937903424086228, "loss": 1.3311, "step": 10820 }, { "epoch": 0.4167468719923003, "grad_norm": 1.4548507928848267, "learning_rate": 0.00017936063914075526, "loss": 1.2508, "step": 10825 }, { "epoch": 0.416939364773821, "grad_norm": 1.0224876403808594, "learning_rate": 0.00017934223678364353, "loss": 0.9364, "step": 10830 }, { "epoch": 0.41713185755534166, "grad_norm": 1.5561485290527344, "learning_rate": 0.00017932382717120984, "loss": 1.1686, "step": 10835 }, { "epoch": 0.4173243503368624, "grad_norm": 1.9549082517623901, "learning_rate": 0.00017930541030513762, "loss": 1.2678, "step": 10840 }, { "epoch": 0.41751684311838305, "grad_norm": 1.2266019582748413, "learning_rate": 0.00017928698618711094, "loss": 1.2963, "step": 10845 }, { "epoch": 0.4177093358999038, "grad_norm": 0.6992445588111877, "learning_rate": 0.00017926855481881465, "loss": 1.1042, "step": 10850 }, { "epoch": 0.41790182868142445, "grad_norm": 1.515512466430664, "learning_rate": 0.00017925011620193408, "loss": 1.0718, "step": 10855 }, { "epoch": 0.4180943214629451, "grad_norm": 1.5123271942138672, "learning_rate": 0.0001792316703381554, "loss": 1.1307, "step": 10860 }, { "epoch": 0.41828681424446584, "grad_norm": 1.3709865808486938, "learning_rate": 0.00017921321722916535, "loss": 1.3652, "step": 10865 }, { "epoch": 0.4184793070259865, "grad_norm": 1.3327142000198364, "learning_rate": 0.0001791947568766513, "loss": 1.2644, "step": 10870 }, { "epoch": 0.41867179980750724, "grad_norm": 1.460595726966858, "learning_rate": 0.00017917628928230134, "loss": 1.2783, "step": 10875 }, { "epoch": 0.4188642925890279, "grad_norm": 1.1008737087249756, "learning_rate": 0.00017915781444780425, "loss": 1.2889, "step": 10880 }, { "epoch": 0.4190567853705486, "grad_norm": 1.8467929363250732, "learning_rate": 0.00017913933237484936, "loss": 1.1897, "step": 10885 }, { "epoch": 0.4192492781520693, "grad_norm": 1.286544680595398, "learning_rate": 0.00017912084306512683, "loss": 1.1239, "step": 10890 }, { "epoch": 0.41944177093359, "grad_norm": 1.8240995407104492, "learning_rate": 0.00017910234652032726, "loss": 1.3085, "step": 10895 }, { "epoch": 0.4196342637151107, "grad_norm": 1.1262156963348389, "learning_rate": 0.00017908384274214215, "loss": 1.3779, "step": 10900 }, { "epoch": 0.41982675649663137, "grad_norm": 1.2274012565612793, "learning_rate": 0.0001790653317322635, "loss": 1.3361, "step": 10905 }, { "epoch": 0.4200192492781521, "grad_norm": 2.0522284507751465, "learning_rate": 0.000179046813492384, "loss": 1.2329, "step": 10910 }, { "epoch": 0.42021174205967277, "grad_norm": 1.927666187286377, "learning_rate": 0.0001790282880241971, "loss": 1.2217, "step": 10915 }, { "epoch": 0.42040423484119344, "grad_norm": 2.254720687866211, "learning_rate": 0.0001790097553293967, "loss": 1.2867, "step": 10920 }, { "epoch": 0.42059672762271416, "grad_norm": 1.9560370445251465, "learning_rate": 0.0001789912154096776, "loss": 1.2959, "step": 10925 }, { "epoch": 0.42078922040423483, "grad_norm": 1.109393835067749, "learning_rate": 0.00017897266826673517, "loss": 1.2397, "step": 10930 }, { "epoch": 0.42098171318575556, "grad_norm": 1.1880956888198853, "learning_rate": 0.00017895411390226527, "loss": 1.192, "step": 10935 }, { "epoch": 0.4211742059672762, "grad_norm": 1.851517677307129, "learning_rate": 0.00017893555231796477, "loss": 1.1866, "step": 10940 }, { "epoch": 0.4213666987487969, "grad_norm": 1.1871724128723145, "learning_rate": 0.0001789169835155309, "loss": 1.1627, "step": 10945 }, { "epoch": 0.4215591915303176, "grad_norm": 0.9478880167007446, "learning_rate": 0.0001788984074966616, "loss": 1.198, "step": 10950 }, { "epoch": 0.4217516843118383, "grad_norm": 1.753989577293396, "learning_rate": 0.00017887982426305566, "loss": 1.2923, "step": 10955 }, { "epoch": 0.421944177093359, "grad_norm": 2.161820650100708, "learning_rate": 0.00017886123381641227, "loss": 1.2651, "step": 10960 }, { "epoch": 0.4221366698748797, "grad_norm": 1.203307867050171, "learning_rate": 0.00017884263615843145, "loss": 1.1854, "step": 10965 }, { "epoch": 0.42232916265640036, "grad_norm": 1.6671913862228394, "learning_rate": 0.0001788240312908139, "loss": 1.2466, "step": 10970 }, { "epoch": 0.4225216554379211, "grad_norm": 1.643796443939209, "learning_rate": 0.0001788054192152608, "loss": 1.202, "step": 10975 }, { "epoch": 0.42271414821944175, "grad_norm": 1.024296522140503, "learning_rate": 0.00017878679993347415, "loss": 1.2392, "step": 10980 }, { "epoch": 0.4229066410009625, "grad_norm": 1.363425612449646, "learning_rate": 0.0001787681734471566, "loss": 1.3577, "step": 10985 }, { "epoch": 0.42309913378248315, "grad_norm": 1.7815190553665161, "learning_rate": 0.00017874953975801134, "loss": 0.9826, "step": 10990 }, { "epoch": 0.4232916265640039, "grad_norm": 1.6736468076705933, "learning_rate": 0.00017873089886774236, "loss": 1.168, "step": 10995 }, { "epoch": 0.42348411934552455, "grad_norm": 1.3047553300857544, "learning_rate": 0.0001787122507780542, "loss": 1.1839, "step": 11000 }, { "epoch": 0.4236766121270452, "grad_norm": 1.5737935304641724, "learning_rate": 0.00017869359549065216, "loss": 1.0693, "step": 11005 }, { "epoch": 0.42386910490856594, "grad_norm": 0.9130328893661499, "learning_rate": 0.00017867493300724208, "loss": 1.1609, "step": 11010 }, { "epoch": 0.4240615976900866, "grad_norm": 2.444490432739258, "learning_rate": 0.00017865626332953056, "loss": 1.2422, "step": 11015 }, { "epoch": 0.42425409047160734, "grad_norm": 1.4214091300964355, "learning_rate": 0.00017863758645922481, "loss": 1.2028, "step": 11020 }, { "epoch": 0.424446583253128, "grad_norm": 1.3986276388168335, "learning_rate": 0.0001786189023980327, "loss": 0.9271, "step": 11025 }, { "epoch": 0.4246390760346487, "grad_norm": 1.6309832334518433, "learning_rate": 0.00017860021114766275, "loss": 1.1242, "step": 11030 }, { "epoch": 0.4248315688161694, "grad_norm": 1.0703374147415161, "learning_rate": 0.00017858151270982423, "loss": 1.1688, "step": 11035 }, { "epoch": 0.4250240615976901, "grad_norm": 0.9345492720603943, "learning_rate": 0.00017856280708622687, "loss": 1.0759, "step": 11040 }, { "epoch": 0.4252165543792108, "grad_norm": 1.1012792587280273, "learning_rate": 0.00017854409427858124, "loss": 1.3299, "step": 11045 }, { "epoch": 0.42540904716073147, "grad_norm": 1.087344765663147, "learning_rate": 0.00017852537428859853, "loss": 1.1188, "step": 11050 }, { "epoch": 0.42560153994225214, "grad_norm": 1.0374698638916016, "learning_rate": 0.0001785066471179905, "loss": 1.2403, "step": 11055 }, { "epoch": 0.42579403272377286, "grad_norm": 1.2250018119812012, "learning_rate": 0.00017848791276846963, "loss": 1.1217, "step": 11060 }, { "epoch": 0.42598652550529353, "grad_norm": 1.9863545894622803, "learning_rate": 0.0001784691712417491, "loss": 1.0159, "step": 11065 }, { "epoch": 0.42617901828681426, "grad_norm": 1.3587582111358643, "learning_rate": 0.0001784504225395427, "loss": 1.1266, "step": 11070 }, { "epoch": 0.42637151106833493, "grad_norm": 1.3274664878845215, "learning_rate": 0.0001784316666635648, "loss": 1.2295, "step": 11075 }, { "epoch": 0.42656400384985566, "grad_norm": 1.594498872756958, "learning_rate": 0.00017841290361553057, "loss": 1.2942, "step": 11080 }, { "epoch": 0.4267564966313763, "grad_norm": 2.5940325260162354, "learning_rate": 0.00017839413339715572, "loss": 1.3333, "step": 11085 }, { "epoch": 0.426948989412897, "grad_norm": 1.5368024110794067, "learning_rate": 0.0001783753560101567, "loss": 1.2738, "step": 11090 }, { "epoch": 0.4271414821944177, "grad_norm": 1.8095320463180542, "learning_rate": 0.00017835657145625055, "loss": 1.3245, "step": 11095 }, { "epoch": 0.4273339749759384, "grad_norm": 1.4597771167755127, "learning_rate": 0.000178337779737155, "loss": 1.3837, "step": 11100 }, { "epoch": 0.4275264677574591, "grad_norm": 1.052746057510376, "learning_rate": 0.00017831898085458842, "loss": 1.1603, "step": 11105 }, { "epoch": 0.4277189605389798, "grad_norm": 1.547523856163025, "learning_rate": 0.0001783001748102699, "loss": 1.2277, "step": 11110 }, { "epoch": 0.42791145332050046, "grad_norm": 2.109560012817383, "learning_rate": 0.00017828136160591906, "loss": 1.1299, "step": 11115 }, { "epoch": 0.4281039461020212, "grad_norm": 0.9221099019050598, "learning_rate": 0.00017826254124325626, "loss": 1.1447, "step": 11120 }, { "epoch": 0.42829643888354185, "grad_norm": 1.1257829666137695, "learning_rate": 0.00017824371372400255, "loss": 1.0844, "step": 11125 }, { "epoch": 0.4284889316650626, "grad_norm": 1.9643393754959106, "learning_rate": 0.00017822487904987948, "loss": 1.1511, "step": 11130 }, { "epoch": 0.42868142444658325, "grad_norm": 1.2279611825942993, "learning_rate": 0.00017820603722260944, "loss": 1.3039, "step": 11135 }, { "epoch": 0.428873917228104, "grad_norm": 1.8037766218185425, "learning_rate": 0.00017818718824391536, "loss": 1.2338, "step": 11140 }, { "epoch": 0.42906641000962464, "grad_norm": 2.1256327629089355, "learning_rate": 0.00017816833211552085, "loss": 1.2502, "step": 11145 }, { "epoch": 0.4292589027911453, "grad_norm": 1.1520932912826538, "learning_rate": 0.0001781494688391502, "loss": 1.121, "step": 11150 }, { "epoch": 0.42945139557266604, "grad_norm": 1.1287842988967896, "learning_rate": 0.00017813059841652833, "loss": 1.2012, "step": 11155 }, { "epoch": 0.4296438883541867, "grad_norm": 1.2584294080734253, "learning_rate": 0.00017811172084938076, "loss": 1.3221, "step": 11160 }, { "epoch": 0.42983638113570743, "grad_norm": 1.901994228363037, "learning_rate": 0.0001780928361394338, "loss": 1.1184, "step": 11165 }, { "epoch": 0.4300288739172281, "grad_norm": 1.564501166343689, "learning_rate": 0.00017807394428841428, "loss": 1.12, "step": 11170 }, { "epoch": 0.4302213666987488, "grad_norm": 2.138155221939087, "learning_rate": 0.00017805504529804975, "loss": 1.1928, "step": 11175 }, { "epoch": 0.4304138594802695, "grad_norm": 1.3132466077804565, "learning_rate": 0.00017803613917006841, "loss": 1.2674, "step": 11180 }, { "epoch": 0.43060635226179017, "grad_norm": 1.1847275495529175, "learning_rate": 0.00017801722590619903, "loss": 1.1457, "step": 11185 }, { "epoch": 0.4307988450433109, "grad_norm": 1.6100077629089355, "learning_rate": 0.00017799830550817124, "loss": 1.3779, "step": 11190 }, { "epoch": 0.43099133782483157, "grad_norm": 2.1193013191223145, "learning_rate": 0.00017797937797771503, "loss": 1.0515, "step": 11195 }, { "epoch": 0.43118383060635224, "grad_norm": 1.6185005903244019, "learning_rate": 0.0001779604433165613, "loss": 1.078, "step": 11200 }, { "epoch": 0.43137632338787296, "grad_norm": 1.275046467781067, "learning_rate": 0.00017794150152644148, "loss": 2.2652, "step": 11205 }, { "epoch": 0.43156881616939363, "grad_norm": 1.4507300853729248, "learning_rate": 0.00017792255260908765, "loss": 1.3556, "step": 11210 }, { "epoch": 0.43176130895091436, "grad_norm": 1.5722453594207764, "learning_rate": 0.00017790359656623256, "loss": 1.1115, "step": 11215 }, { "epoch": 0.431953801732435, "grad_norm": 1.802585244178772, "learning_rate": 0.00017788463339960962, "loss": 1.1885, "step": 11220 }, { "epoch": 0.43214629451395575, "grad_norm": 1.0945521593093872, "learning_rate": 0.00017786566311095295, "loss": 1.2419, "step": 11225 }, { "epoch": 0.4323387872954764, "grad_norm": 1.6798467636108398, "learning_rate": 0.00017784668570199714, "loss": 1.0404, "step": 11230 }, { "epoch": 0.4325312800769971, "grad_norm": 1.9263988733291626, "learning_rate": 0.00017782770117447764, "loss": 1.2925, "step": 11235 }, { "epoch": 0.4327237728585178, "grad_norm": 1.3327709436416626, "learning_rate": 0.0001778087095301304, "loss": 1.2621, "step": 11240 }, { "epoch": 0.4329162656400385, "grad_norm": 1.540216088294983, "learning_rate": 0.00017778971077069214, "loss": 1.2733, "step": 11245 }, { "epoch": 0.4331087584215592, "grad_norm": 0.8980332612991333, "learning_rate": 0.00017777070489790014, "loss": 1.1849, "step": 11250 }, { "epoch": 0.4333012512030799, "grad_norm": 1.1286743879318237, "learning_rate": 0.00017775169191349238, "loss": 1.0491, "step": 11255 }, { "epoch": 0.43349374398460055, "grad_norm": 1.5880367755889893, "learning_rate": 0.0001777326718192074, "loss": 1.1371, "step": 11260 }, { "epoch": 0.4336862367661213, "grad_norm": 1.8634532690048218, "learning_rate": 0.00017771364461678454, "loss": 1.3491, "step": 11265 }, { "epoch": 0.43387872954764195, "grad_norm": 1.13876473903656, "learning_rate": 0.0001776946103079637, "loss": 1.1284, "step": 11270 }, { "epoch": 0.4340712223291627, "grad_norm": 1.1511520147323608, "learning_rate": 0.0001776755688944854, "loss": 0.9705, "step": 11275 }, { "epoch": 0.43426371511068335, "grad_norm": 2.0832314491271973, "learning_rate": 0.00017765652037809087, "loss": 1.1134, "step": 11280 }, { "epoch": 0.434456207892204, "grad_norm": 1.3219777345657349, "learning_rate": 0.000177637464760522, "loss": 1.1519, "step": 11285 }, { "epoch": 0.43464870067372474, "grad_norm": 1.2205532789230347, "learning_rate": 0.0001776184020435213, "loss": 1.1526, "step": 11290 }, { "epoch": 0.4348411934552454, "grad_norm": 1.1612414121627808, "learning_rate": 0.00017759933222883187, "loss": 1.2236, "step": 11295 }, { "epoch": 0.43503368623676614, "grad_norm": 2.214245319366455, "learning_rate": 0.00017758025531819756, "loss": 1.1962, "step": 11300 }, { "epoch": 0.4352261790182868, "grad_norm": 1.1582585573196411, "learning_rate": 0.00017756117131336284, "loss": 1.1488, "step": 11305 }, { "epoch": 0.43541867179980753, "grad_norm": 1.6610682010650635, "learning_rate": 0.0001775420802160728, "loss": 1.2349, "step": 11310 }, { "epoch": 0.4356111645813282, "grad_norm": 1.2163527011871338, "learning_rate": 0.00017752298202807317, "loss": 1.0914, "step": 11315 }, { "epoch": 0.4358036573628489, "grad_norm": 1.3684804439544678, "learning_rate": 0.00017750387675111043, "loss": 1.1035, "step": 11320 }, { "epoch": 0.4359961501443696, "grad_norm": 2.0042598247528076, "learning_rate": 0.00017748476438693151, "loss": 1.1783, "step": 11325 }, { "epoch": 0.43618864292589027, "grad_norm": 1.4552195072174072, "learning_rate": 0.00017746564493728424, "loss": 1.1373, "step": 11330 }, { "epoch": 0.436381135707411, "grad_norm": 1.1513317823410034, "learning_rate": 0.00017744651840391685, "loss": 1.122, "step": 11335 }, { "epoch": 0.43657362848893166, "grad_norm": 1.1842467784881592, "learning_rate": 0.0001774273847885784, "loss": 1.085, "step": 11340 }, { "epoch": 0.43676612127045233, "grad_norm": 1.5492455959320068, "learning_rate": 0.00017740824409301852, "loss": 1.1355, "step": 11345 }, { "epoch": 0.43695861405197306, "grad_norm": 1.6276592016220093, "learning_rate": 0.00017738909631898753, "loss": 1.3922, "step": 11350 }, { "epoch": 0.43715110683349373, "grad_norm": 1.5947320461273193, "learning_rate": 0.0001773699414682363, "loss": 1.1952, "step": 11355 }, { "epoch": 0.43734359961501446, "grad_norm": 1.0628368854522705, "learning_rate": 0.00017735077954251648, "loss": 1.3908, "step": 11360 }, { "epoch": 0.4375360923965351, "grad_norm": 1.6347852945327759, "learning_rate": 0.00017733161054358027, "loss": 1.3614, "step": 11365 }, { "epoch": 0.4377285851780558, "grad_norm": 0.98406583070755, "learning_rate": 0.00017731243447318055, "loss": 1.0818, "step": 11370 }, { "epoch": 0.4379210779595765, "grad_norm": 2.522155284881592, "learning_rate": 0.0001772932513330708, "loss": 1.1043, "step": 11375 }, { "epoch": 0.4381135707410972, "grad_norm": 1.3053642511367798, "learning_rate": 0.0001772740611250053, "loss": 1.1731, "step": 11380 }, { "epoch": 0.4383060635226179, "grad_norm": 1.540334701538086, "learning_rate": 0.0001772548638507388, "loss": 1.0897, "step": 11385 }, { "epoch": 0.4384985563041386, "grad_norm": 1.756795048713684, "learning_rate": 0.00017723565951202673, "loss": 1.26, "step": 11390 }, { "epoch": 0.4386910490856593, "grad_norm": 2.263253688812256, "learning_rate": 0.00017721644811062524, "loss": 1.2498, "step": 11395 }, { "epoch": 0.43888354186718, "grad_norm": 1.2686541080474854, "learning_rate": 0.0001771972296482911, "loss": 1.2002, "step": 11400 }, { "epoch": 0.43907603464870065, "grad_norm": 1.7692358493804932, "learning_rate": 0.00017717800412678168, "loss": 1.2989, "step": 11405 }, { "epoch": 0.4392685274302214, "grad_norm": 0.9414786100387573, "learning_rate": 0.00017715877154785505, "loss": 1.0743, "step": 11410 }, { "epoch": 0.43946102021174205, "grad_norm": 1.6488560438156128, "learning_rate": 0.0001771395319132699, "loss": 1.3217, "step": 11415 }, { "epoch": 0.4396535129932628, "grad_norm": 0.9546147584915161, "learning_rate": 0.00017712028522478556, "loss": 1.1849, "step": 11420 }, { "epoch": 0.43984600577478344, "grad_norm": 1.9460307359695435, "learning_rate": 0.000177101031484162, "loss": 1.3702, "step": 11425 }, { "epoch": 0.4400384985563041, "grad_norm": 0.8990427255630493, "learning_rate": 0.00017708177069315987, "loss": 1.2009, "step": 11430 }, { "epoch": 0.44023099133782484, "grad_norm": 1.3581219911575317, "learning_rate": 0.0001770625028535404, "loss": 1.1846, "step": 11435 }, { "epoch": 0.4404234841193455, "grad_norm": 1.259728193283081, "learning_rate": 0.00017704322796706557, "loss": 1.2683, "step": 11440 }, { "epoch": 0.44061597690086624, "grad_norm": 1.1262446641921997, "learning_rate": 0.00017702394603549788, "loss": 1.0015, "step": 11445 }, { "epoch": 0.4408084696823869, "grad_norm": 2.5833356380462646, "learning_rate": 0.0001770046570606006, "loss": 1.1348, "step": 11450 }, { "epoch": 0.44100096246390763, "grad_norm": 0.9725410342216492, "learning_rate": 0.00017698536104413749, "loss": 1.2214, "step": 11455 }, { "epoch": 0.4411934552454283, "grad_norm": 1.0890756845474243, "learning_rate": 0.00017696605798787313, "loss": 1.1178, "step": 11460 }, { "epoch": 0.44138594802694897, "grad_norm": 1.3130367994308472, "learning_rate": 0.0001769467478935726, "loss": 1.1582, "step": 11465 }, { "epoch": 0.4415784408084697, "grad_norm": 2.401630163192749, "learning_rate": 0.00017692743076300172, "loss": 1.3043, "step": 11470 }, { "epoch": 0.44177093358999037, "grad_norm": 1.2928016185760498, "learning_rate": 0.00017690810659792686, "loss": 1.1354, "step": 11475 }, { "epoch": 0.4419634263715111, "grad_norm": 1.6433988809585571, "learning_rate": 0.00017688877540011517, "loss": 1.2385, "step": 11480 }, { "epoch": 0.44215591915303176, "grad_norm": 1.5450482368469238, "learning_rate": 0.00017686943717133428, "loss": 1.3096, "step": 11485 }, { "epoch": 0.44234841193455243, "grad_norm": 2.0231974124908447, "learning_rate": 0.00017685009191335257, "loss": 1.2129, "step": 11490 }, { "epoch": 0.44254090471607316, "grad_norm": 1.2831270694732666, "learning_rate": 0.00017683073962793908, "loss": 1.2213, "step": 11495 }, { "epoch": 0.44273339749759383, "grad_norm": 1.035520076751709, "learning_rate": 0.00017681138031686337, "loss": 1.248, "step": 11500 }, { "epoch": 0.44292589027911455, "grad_norm": 1.113934874534607, "learning_rate": 0.00017679201398189577, "loss": 1.0017, "step": 11505 }, { "epoch": 0.4431183830606352, "grad_norm": 1.202412724494934, "learning_rate": 0.0001767726406248072, "loss": 1.0873, "step": 11510 }, { "epoch": 0.4433108758421559, "grad_norm": 1.2946287393569946, "learning_rate": 0.0001767532602473692, "loss": 1.3873, "step": 11515 }, { "epoch": 0.4435033686236766, "grad_norm": 1.2840358018875122, "learning_rate": 0.00017673387285135398, "loss": 1.2559, "step": 11520 }, { "epoch": 0.4436958614051973, "grad_norm": 0.9422056078910828, "learning_rate": 0.00017671447843853444, "loss": 1.1179, "step": 11525 }, { "epoch": 0.443888354186718, "grad_norm": 1.9112647771835327, "learning_rate": 0.000176695077010684, "loss": 1.1519, "step": 11530 }, { "epoch": 0.4440808469682387, "grad_norm": 0.9463594555854797, "learning_rate": 0.00017667566856957687, "loss": 1.1175, "step": 11535 }, { "epoch": 0.4442733397497594, "grad_norm": 2.1585206985473633, "learning_rate": 0.00017665625311698776, "loss": 1.1535, "step": 11540 }, { "epoch": 0.4444658325312801, "grad_norm": 1.038095474243164, "learning_rate": 0.0001766368306546921, "loss": 1.1633, "step": 11545 }, { "epoch": 0.44465832531280075, "grad_norm": 1.4679070711135864, "learning_rate": 0.00017661740118446594, "loss": 1.3792, "step": 11550 }, { "epoch": 0.4448508180943215, "grad_norm": 1.3058511018753052, "learning_rate": 0.00017659796470808597, "loss": 1.2802, "step": 11555 }, { "epoch": 0.44504331087584215, "grad_norm": 1.0330942869186401, "learning_rate": 0.0001765785212273296, "loss": 1.1621, "step": 11560 }, { "epoch": 0.44523580365736287, "grad_norm": 1.6481776237487793, "learning_rate": 0.0001765590707439747, "loss": 1.1098, "step": 11565 }, { "epoch": 0.44542829643888354, "grad_norm": 1.3850781917572021, "learning_rate": 0.00017653961325979998, "loss": 1.3687, "step": 11570 }, { "epoch": 0.4456207892204042, "grad_norm": 1.6551322937011719, "learning_rate": 0.0001765201487765846, "loss": 1.3436, "step": 11575 }, { "epoch": 0.44581328200192494, "grad_norm": 1.0752167701721191, "learning_rate": 0.00017650067729610856, "loss": 1.1667, "step": 11580 }, { "epoch": 0.4460057747834456, "grad_norm": 1.4762775897979736, "learning_rate": 0.00017648119882015232, "loss": 1.0119, "step": 11585 }, { "epoch": 0.44619826756496633, "grad_norm": 0.7833762764930725, "learning_rate": 0.0001764617133504971, "loss": 1.1047, "step": 11590 }, { "epoch": 0.446390760346487, "grad_norm": 1.1666022539138794, "learning_rate": 0.00017644222088892473, "loss": 1.2339, "step": 11595 }, { "epoch": 0.4465832531280077, "grad_norm": 1.7897813320159912, "learning_rate": 0.0001764227214372176, "loss": 1.205, "step": 11600 }, { "epoch": 0.4467757459095284, "grad_norm": 1.2021222114562988, "learning_rate": 0.00017640321499715888, "loss": 1.2518, "step": 11605 }, { "epoch": 0.44696823869104907, "grad_norm": 2.9843320846557617, "learning_rate": 0.00017638370157053228, "loss": 0.9705, "step": 11610 }, { "epoch": 0.4471607314725698, "grad_norm": 1.2910903692245483, "learning_rate": 0.00017636418115912213, "loss": 1.4018, "step": 11615 }, { "epoch": 0.44735322425409046, "grad_norm": 1.0188699960708618, "learning_rate": 0.00017634855980214943, "loss": 1.6206, "step": 11620 }, { "epoch": 0.4475457170356112, "grad_norm": 1.0419138669967651, "learning_rate": 0.00017632902682262764, "loss": 1.2483, "step": 11625 }, { "epoch": 0.44773820981713186, "grad_norm": 1.665586233139038, "learning_rate": 0.000176309486863322, "loss": 1.1838, "step": 11630 }, { "epoch": 0.44793070259865253, "grad_norm": 2.3444008827209473, "learning_rate": 0.00017628993992601925, "loss": 1.291, "step": 11635 }, { "epoch": 0.44812319538017326, "grad_norm": 1.960339069366455, "learning_rate": 0.00017627038601250686, "loss": 1.2312, "step": 11640 }, { "epoch": 0.4483156881616939, "grad_norm": 1.5672719478607178, "learning_rate": 0.00017625082512457297, "loss": 1.2281, "step": 11645 }, { "epoch": 0.44850818094321465, "grad_norm": 1.5053352117538452, "learning_rate": 0.00017623125726400621, "loss": 1.1688, "step": 11650 }, { "epoch": 0.4487006737247353, "grad_norm": 1.841610312461853, "learning_rate": 0.00017621168243259596, "loss": 1.1607, "step": 11655 }, { "epoch": 0.448893166506256, "grad_norm": 1.1526665687561035, "learning_rate": 0.0001761921006321322, "loss": 1.1788, "step": 11660 }, { "epoch": 0.4490856592877767, "grad_norm": 1.4064139127731323, "learning_rate": 0.00017617251186440556, "loss": 1.0825, "step": 11665 }, { "epoch": 0.4492781520692974, "grad_norm": 1.1119096279144287, "learning_rate": 0.00017615291613120736, "loss": 1.2768, "step": 11670 }, { "epoch": 0.4494706448508181, "grad_norm": 1.2367806434631348, "learning_rate": 0.00017613331343432938, "loss": 1.2612, "step": 11675 }, { "epoch": 0.4496631376323388, "grad_norm": 1.093410611152649, "learning_rate": 0.00017611370377556423, "loss": 1.3075, "step": 11680 }, { "epoch": 0.44985563041385945, "grad_norm": 1.0085220336914062, "learning_rate": 0.00017609408715670512, "loss": 1.2391, "step": 11685 }, { "epoch": 0.4500481231953802, "grad_norm": 1.4346550703048706, "learning_rate": 0.0001760744635795458, "loss": 1.2241, "step": 11690 }, { "epoch": 0.45024061597690085, "grad_norm": 1.483905553817749, "learning_rate": 0.0001760548330458807, "loss": 1.2696, "step": 11695 }, { "epoch": 0.4504331087584216, "grad_norm": 1.6455215215682983, "learning_rate": 0.00017603519555750498, "loss": 1.2113, "step": 11700 }, { "epoch": 0.45062560153994224, "grad_norm": 1.7613027095794678, "learning_rate": 0.00017601555111621428, "loss": 1.1581, "step": 11705 }, { "epoch": 0.45081809432146297, "grad_norm": 1.5872759819030762, "learning_rate": 0.000175995899723805, "loss": 1.0977, "step": 11710 }, { "epoch": 0.45101058710298364, "grad_norm": 1.5521520376205444, "learning_rate": 0.00017597624138207413, "loss": 1.3003, "step": 11715 }, { "epoch": 0.4512030798845043, "grad_norm": 2.1746668815612793, "learning_rate": 0.0001759565760928193, "loss": 1.1861, "step": 11720 }, { "epoch": 0.45139557266602504, "grad_norm": 1.73439359664917, "learning_rate": 0.00017593690385783866, "loss": 1.242, "step": 11725 }, { "epoch": 0.4515880654475457, "grad_norm": 1.6027134656906128, "learning_rate": 0.0001759172246789313, "loss": 1.2936, "step": 11730 }, { "epoch": 0.45178055822906643, "grad_norm": 1.62489652633667, "learning_rate": 0.0001758975385578966, "loss": 1.3521, "step": 11735 }, { "epoch": 0.4519730510105871, "grad_norm": 1.3407773971557617, "learning_rate": 0.00017587784549653477, "loss": 1.1653, "step": 11740 }, { "epoch": 0.45216554379210777, "grad_norm": 2.064875364303589, "learning_rate": 0.00017585814549664664, "loss": 1.2321, "step": 11745 }, { "epoch": 0.4523580365736285, "grad_norm": 1.115850806236267, "learning_rate": 0.0001758384385600336, "loss": 1.0289, "step": 11750 }, { "epoch": 0.45255052935514917, "grad_norm": 1.3943949937820435, "learning_rate": 0.00017581872468849777, "loss": 1.2846, "step": 11755 }, { "epoch": 0.4527430221366699, "grad_norm": 1.0405654907226562, "learning_rate": 0.0001757990038838418, "loss": 1.1209, "step": 11760 }, { "epoch": 0.45293551491819056, "grad_norm": 1.0115854740142822, "learning_rate": 0.00017577927614786902, "loss": 1.0178, "step": 11765 }, { "epoch": 0.4531280076997113, "grad_norm": 2.48100209236145, "learning_rate": 0.00017575954148238345, "loss": 1.2485, "step": 11770 }, { "epoch": 0.45332050048123196, "grad_norm": 1.5187568664550781, "learning_rate": 0.00017573979988918967, "loss": 1.3345, "step": 11775 }, { "epoch": 0.45351299326275263, "grad_norm": 1.2286217212677002, "learning_rate": 0.00017572005137009292, "loss": 1.1079, "step": 11780 }, { "epoch": 0.45370548604427335, "grad_norm": 1.5858092308044434, "learning_rate": 0.00017570029592689908, "loss": 1.4054, "step": 11785 }, { "epoch": 0.453897978825794, "grad_norm": 2.0436697006225586, "learning_rate": 0.00017568053356141464, "loss": 1.3221, "step": 11790 }, { "epoch": 0.45409047160731475, "grad_norm": 1.6980565786361694, "learning_rate": 0.00017566076427544673, "loss": 1.2384, "step": 11795 }, { "epoch": 0.4542829643888354, "grad_norm": 1.3811545372009277, "learning_rate": 0.00017564098807080315, "loss": 1.171, "step": 11800 }, { "epoch": 0.4544754571703561, "grad_norm": 1.2215286493301392, "learning_rate": 0.00017562120494929228, "loss": 1.1781, "step": 11805 }, { "epoch": 0.4546679499518768, "grad_norm": 1.1313782930374146, "learning_rate": 0.00017560141491272319, "loss": 1.2166, "step": 11810 }, { "epoch": 0.4548604427333975, "grad_norm": 1.2630988359451294, "learning_rate": 0.0001755816179629055, "loss": 1.2652, "step": 11815 }, { "epoch": 0.4550529355149182, "grad_norm": 1.0977842807769775, "learning_rate": 0.0001755618141016495, "loss": 1.3057, "step": 11820 }, { "epoch": 0.4552454282964389, "grad_norm": 0.8517459034919739, "learning_rate": 0.0001755420033307662, "loss": 1.1769, "step": 11825 }, { "epoch": 0.45543792107795955, "grad_norm": 0.7195164561271667, "learning_rate": 0.00017552218565206707, "loss": 0.9777, "step": 11830 }, { "epoch": 0.4556304138594803, "grad_norm": 1.125056266784668, "learning_rate": 0.00017550236106736436, "loss": 1.1008, "step": 11835 }, { "epoch": 0.45582290664100095, "grad_norm": 0.8211593627929688, "learning_rate": 0.00017548252957847092, "loss": 1.1539, "step": 11840 }, { "epoch": 0.4560153994225217, "grad_norm": 1.8936784267425537, "learning_rate": 0.00017546269118720015, "loss": 1.143, "step": 11845 }, { "epoch": 0.45620789220404234, "grad_norm": 1.5479308366775513, "learning_rate": 0.00017544284589536617, "loss": 1.1481, "step": 11850 }, { "epoch": 0.45640038498556307, "grad_norm": 1.4597593545913696, "learning_rate": 0.00017542299370478372, "loss": 1.2907, "step": 11855 }, { "epoch": 0.45659287776708374, "grad_norm": 1.4036239385604858, "learning_rate": 0.0001754031346172681, "loss": 1.2927, "step": 11860 }, { "epoch": 0.4567853705486044, "grad_norm": 0.9842814207077026, "learning_rate": 0.00017538326863463533, "loss": 1.0571, "step": 11865 }, { "epoch": 0.45697786333012513, "grad_norm": 2.478254556655884, "learning_rate": 0.000175363395758702, "loss": 1.3115, "step": 11870 }, { "epoch": 0.4571703561116458, "grad_norm": 1.000182032585144, "learning_rate": 0.00017534351599128538, "loss": 1.3071, "step": 11875 }, { "epoch": 0.45736284889316653, "grad_norm": 1.8669004440307617, "learning_rate": 0.0001753236293342033, "loss": 1.2386, "step": 11880 }, { "epoch": 0.4575553416746872, "grad_norm": 1.6287200450897217, "learning_rate": 0.00017530373578927432, "loss": 1.2196, "step": 11885 }, { "epoch": 0.45774783445620787, "grad_norm": 2.1733322143554688, "learning_rate": 0.00017528383535831755, "loss": 1.6165, "step": 11890 }, { "epoch": 0.4579403272377286, "grad_norm": 1.0370094776153564, "learning_rate": 0.00017526392804315273, "loss": 1.1799, "step": 11895 }, { "epoch": 0.45813282001924927, "grad_norm": 1.3969937562942505, "learning_rate": 0.00017524401384560025, "loss": 1.2224, "step": 11900 }, { "epoch": 0.45832531280077, "grad_norm": 1.3850924968719482, "learning_rate": 0.00017522409276748117, "loss": 1.4161, "step": 11905 }, { "epoch": 0.45851780558229066, "grad_norm": 1.4318947792053223, "learning_rate": 0.00017520416481061712, "loss": 1.4166, "step": 11910 }, { "epoch": 0.45871029836381133, "grad_norm": 1.525709629058838, "learning_rate": 0.00017518422997683038, "loss": 1.255, "step": 11915 }, { "epoch": 0.45890279114533206, "grad_norm": 0.9193233847618103, "learning_rate": 0.00017516428826794384, "loss": 1.2299, "step": 11920 }, { "epoch": 0.4590952839268527, "grad_norm": 1.8636525869369507, "learning_rate": 0.00017514433968578107, "loss": 1.1992, "step": 11925 }, { "epoch": 0.45928777670837345, "grad_norm": 1.3876943588256836, "learning_rate": 0.00017512438423216624, "loss": 1.2022, "step": 11930 }, { "epoch": 0.4594802694898941, "grad_norm": 1.5370129346847534, "learning_rate": 0.00017510442190892412, "loss": 1.319, "step": 11935 }, { "epoch": 0.45967276227141485, "grad_norm": 1.8562203645706177, "learning_rate": 0.00017508445271788013, "loss": 1.0784, "step": 11940 }, { "epoch": 0.4598652550529355, "grad_norm": 1.1265978813171387, "learning_rate": 0.0001750644766608603, "loss": 1.1591, "step": 11945 }, { "epoch": 0.4600577478344562, "grad_norm": 1.3049321174621582, "learning_rate": 0.00017504449373969137, "loss": 1.2567, "step": 11950 }, { "epoch": 0.4602502406159769, "grad_norm": 1.4252487421035767, "learning_rate": 0.0001750245039562006, "loss": 1.0848, "step": 11955 }, { "epoch": 0.4604427333974976, "grad_norm": 1.888185977935791, "learning_rate": 0.00017500450731221592, "loss": 1.1976, "step": 11960 }, { "epoch": 0.4606352261790183, "grad_norm": 1.4043982028961182, "learning_rate": 0.00017498450380956594, "loss": 1.2038, "step": 11965 }, { "epoch": 0.460827718960539, "grad_norm": 1.182576060295105, "learning_rate": 0.00017496449345007982, "loss": 1.3408, "step": 11970 }, { "epoch": 0.46102021174205965, "grad_norm": 2.084197521209717, "learning_rate": 0.00017494447623558733, "loss": 1.269, "step": 11975 }, { "epoch": 0.4612127045235804, "grad_norm": 1.709518551826477, "learning_rate": 0.00017492445216791896, "loss": 1.3736, "step": 11980 }, { "epoch": 0.46140519730510104, "grad_norm": 1.1446977853775024, "learning_rate": 0.00017490442124890577, "loss": 1.2449, "step": 11985 }, { "epoch": 0.46159769008662177, "grad_norm": 1.9139240980148315, "learning_rate": 0.00017488438348037946, "loss": 1.0845, "step": 11990 }, { "epoch": 0.46179018286814244, "grad_norm": 1.6536133289337158, "learning_rate": 0.00017486433886417234, "loss": 1.4398, "step": 11995 }, { "epoch": 0.4619826756496631, "grad_norm": 1.0629438161849976, "learning_rate": 0.00017484428740211736, "loss": 1.2631, "step": 12000 }, { "epoch": 0.46217516843118384, "grad_norm": 1.1966623067855835, "learning_rate": 0.00017482422909604809, "loss": 1.088, "step": 12005 }, { "epoch": 0.4623676612127045, "grad_norm": 1.1087130308151245, "learning_rate": 0.00017480416394779878, "loss": 1.2133, "step": 12010 }, { "epoch": 0.46256015399422523, "grad_norm": 1.9408375024795532, "learning_rate": 0.00017478409195920413, "loss": 1.1677, "step": 12015 }, { "epoch": 0.4627526467757459, "grad_norm": 1.2703943252563477, "learning_rate": 0.00017476401313209973, "loss": 1.232, "step": 12020 }, { "epoch": 0.4629451395572666, "grad_norm": 1.7841099500656128, "learning_rate": 0.0001747439274683216, "loss": 1.1688, "step": 12025 }, { "epoch": 0.4631376323387873, "grad_norm": 1.9395395517349243, "learning_rate": 0.0001747238349697064, "loss": 1.2336, "step": 12030 }, { "epoch": 0.46333012512030797, "grad_norm": 1.5011239051818848, "learning_rate": 0.0001747037356380915, "loss": 1.1849, "step": 12035 }, { "epoch": 0.4635226179018287, "grad_norm": 1.6130584478378296, "learning_rate": 0.00017468362947531486, "loss": 1.3113, "step": 12040 }, { "epoch": 0.46371511068334936, "grad_norm": 1.5666422843933105, "learning_rate": 0.000174663516483215, "loss": 1.2178, "step": 12045 }, { "epoch": 0.4639076034648701, "grad_norm": 1.490662932395935, "learning_rate": 0.0001746433966636312, "loss": 1.3034, "step": 12050 }, { "epoch": 0.46410009624639076, "grad_norm": 1.1972042322158813, "learning_rate": 0.00017462327001840322, "loss": 1.1732, "step": 12055 }, { "epoch": 0.46429258902791143, "grad_norm": 1.5201470851898193, "learning_rate": 0.00017460313654937154, "loss": 1.1545, "step": 12060 }, { "epoch": 0.46448508180943215, "grad_norm": 0.8927121758460999, "learning_rate": 0.00017458299625837723, "loss": 1.1516, "step": 12065 }, { "epoch": 0.4646775745909528, "grad_norm": 1.394187092781067, "learning_rate": 0.00017456284914726196, "loss": 1.2791, "step": 12070 }, { "epoch": 0.46487006737247355, "grad_norm": 1.8900322914123535, "learning_rate": 0.00017454269521786808, "loss": 1.244, "step": 12075 }, { "epoch": 0.4650625601539942, "grad_norm": 2.20624041557312, "learning_rate": 0.00017452253447203852, "loss": 1.2526, "step": 12080 }, { "epoch": 0.46525505293551495, "grad_norm": 1.404261827468872, "learning_rate": 0.00017450236691161686, "loss": 1.1711, "step": 12085 }, { "epoch": 0.4654475457170356, "grad_norm": 1.6828880310058594, "learning_rate": 0.00017448219253844726, "loss": 1.3007, "step": 12090 }, { "epoch": 0.4656400384985563, "grad_norm": 1.0239325761795044, "learning_rate": 0.00017446201135437456, "loss": 1.1359, "step": 12095 }, { "epoch": 0.465832531280077, "grad_norm": 0.9242125749588013, "learning_rate": 0.0001744418233612442, "loss": 1.1848, "step": 12100 }, { "epoch": 0.4660250240615977, "grad_norm": 2.9907031059265137, "learning_rate": 0.0001744216285609022, "loss": 1.155, "step": 12105 }, { "epoch": 0.4662175168431184, "grad_norm": 0.9708018898963928, "learning_rate": 0.0001744014269551953, "loss": 1.4752, "step": 12110 }, { "epoch": 0.4664100096246391, "grad_norm": 1.1917387247085571, "learning_rate": 0.00017438121854597075, "loss": 1.1197, "step": 12115 }, { "epoch": 0.46660250240615975, "grad_norm": 1.5464357137680054, "learning_rate": 0.00017436100333507648, "loss": 1.1908, "step": 12120 }, { "epoch": 0.4667949951876805, "grad_norm": 1.9502155780792236, "learning_rate": 0.00017434078132436107, "loss": 1.1888, "step": 12125 }, { "epoch": 0.46698748796920114, "grad_norm": 2.054029941558838, "learning_rate": 0.00017432055251567365, "loss": 1.2771, "step": 12130 }, { "epoch": 0.46717998075072187, "grad_norm": 1.276356816291809, "learning_rate": 0.00017430031691086407, "loss": 1.2392, "step": 12135 }, { "epoch": 0.46737247353224254, "grad_norm": 1.4474079608917236, "learning_rate": 0.00017428007451178267, "loss": 1.2111, "step": 12140 }, { "epoch": 0.4675649663137632, "grad_norm": 1.389797568321228, "learning_rate": 0.00017425982532028053, "loss": 1.2094, "step": 12145 }, { "epoch": 0.46775745909528393, "grad_norm": 1.2491530179977417, "learning_rate": 0.00017423956933820928, "loss": 1.2374, "step": 12150 }, { "epoch": 0.4679499518768046, "grad_norm": 1.0517950057983398, "learning_rate": 0.00017421930656742122, "loss": 1.1003, "step": 12155 }, { "epoch": 0.46814244465832533, "grad_norm": 1.410630226135254, "learning_rate": 0.00017419903700976924, "loss": 1.2722, "step": 12160 }, { "epoch": 0.468334937439846, "grad_norm": 1.5544359683990479, "learning_rate": 0.00017417876066710682, "loss": 1.1961, "step": 12165 }, { "epoch": 0.4685274302213667, "grad_norm": 1.3200881481170654, "learning_rate": 0.00017415847754128817, "loss": 1.0058, "step": 12170 }, { "epoch": 0.4687199230028874, "grad_norm": 1.998949646949768, "learning_rate": 0.00017413818763416795, "loss": 1.1513, "step": 12175 }, { "epoch": 0.46891241578440807, "grad_norm": 1.4105117321014404, "learning_rate": 0.0001741178909476016, "loss": 1.2993, "step": 12180 }, { "epoch": 0.4691049085659288, "grad_norm": 1.4521151781082153, "learning_rate": 0.00017409758748344515, "loss": 1.1659, "step": 12185 }, { "epoch": 0.46929740134744946, "grad_norm": 1.3822886943817139, "learning_rate": 0.00017407727724355515, "loss": 1.3419, "step": 12190 }, { "epoch": 0.4694898941289702, "grad_norm": 1.5602283477783203, "learning_rate": 0.00017405696022978885, "loss": 1.1506, "step": 12195 }, { "epoch": 0.46968238691049086, "grad_norm": 1.2674669027328491, "learning_rate": 0.00017403663644400413, "loss": 1.2992, "step": 12200 }, { "epoch": 0.4698748796920115, "grad_norm": 1.6091759204864502, "learning_rate": 0.00017401630588805947, "loss": 1.1105, "step": 12205 }, { "epoch": 0.47006737247353225, "grad_norm": 1.591635823249817, "learning_rate": 0.00017399596856381395, "loss": 1.2884, "step": 12210 }, { "epoch": 0.4702598652550529, "grad_norm": 1.5781102180480957, "learning_rate": 0.00017397562447312725, "loss": 1.1476, "step": 12215 }, { "epoch": 0.47045235803657365, "grad_norm": 1.4029310941696167, "learning_rate": 0.00017395527361785976, "loss": 1.4271, "step": 12220 }, { "epoch": 0.4706448508180943, "grad_norm": 1.8287990093231201, "learning_rate": 0.0001739349159998724, "loss": 1.2079, "step": 12225 }, { "epoch": 0.470837343599615, "grad_norm": 0.9693268537521362, "learning_rate": 0.00017391455162102677, "loss": 1.2341, "step": 12230 }, { "epoch": 0.4710298363811357, "grad_norm": 1.4181095361709595, "learning_rate": 0.00017389418048318502, "loss": 1.2796, "step": 12235 }, { "epoch": 0.4712223291626564, "grad_norm": 1.9247058629989624, "learning_rate": 0.00017387380258820993, "loss": 1.1858, "step": 12240 }, { "epoch": 0.4714148219441771, "grad_norm": 1.0236104726791382, "learning_rate": 0.00017385341793796502, "loss": 1.1713, "step": 12245 }, { "epoch": 0.4716073147256978, "grad_norm": 1.0250846147537231, "learning_rate": 0.00017383302653431427, "loss": 1.3036, "step": 12250 }, { "epoch": 0.4717998075072185, "grad_norm": 1.1760774850845337, "learning_rate": 0.00017381262837912228, "loss": 1.1779, "step": 12255 }, { "epoch": 0.4719923002887392, "grad_norm": 1.6482713222503662, "learning_rate": 0.00017379222347425446, "loss": 1.151, "step": 12260 }, { "epoch": 0.47218479307025985, "grad_norm": 1.3430352210998535, "learning_rate": 0.00017377181182157657, "loss": 1.3512, "step": 12265 }, { "epoch": 0.47237728585178057, "grad_norm": 2.4042775630950928, "learning_rate": 0.00017375139342295522, "loss": 1.3002, "step": 12270 }, { "epoch": 0.47256977863330124, "grad_norm": 0.967472493648529, "learning_rate": 0.00017373096828025752, "loss": 1.0813, "step": 12275 }, { "epoch": 0.47276227141482197, "grad_norm": 1.9774664640426636, "learning_rate": 0.00017371053639535117, "loss": 1.2232, "step": 12280 }, { "epoch": 0.47295476419634264, "grad_norm": 1.2525962591171265, "learning_rate": 0.00017369009777010454, "loss": 1.3974, "step": 12285 }, { "epoch": 0.4731472569778633, "grad_norm": 2.268892765045166, "learning_rate": 0.00017366965240638664, "loss": 1.2812, "step": 12290 }, { "epoch": 0.47333974975938403, "grad_norm": 1.143028974533081, "learning_rate": 0.000173649200306067, "loss": 1.2017, "step": 12295 }, { "epoch": 0.4735322425409047, "grad_norm": 0.9833802580833435, "learning_rate": 0.00017362874147101596, "loss": 1.1669, "step": 12300 }, { "epoch": 0.4737247353224254, "grad_norm": 1.5986253023147583, "learning_rate": 0.0001736082759031042, "loss": 1.2275, "step": 12305 }, { "epoch": 0.4739172281039461, "grad_norm": 1.8394620418548584, "learning_rate": 0.0001735878036042032, "loss": 1.1077, "step": 12310 }, { "epoch": 0.47410972088546677, "grad_norm": 2.2321078777313232, "learning_rate": 0.00017356732457618506, "loss": 1.1385, "step": 12315 }, { "epoch": 0.4743022136669875, "grad_norm": 1.2479119300842285, "learning_rate": 0.00017354683882092245, "loss": 1.2189, "step": 12320 }, { "epoch": 0.47449470644850816, "grad_norm": 1.6812646389007568, "learning_rate": 0.0001735263463402886, "loss": 1.3836, "step": 12325 }, { "epoch": 0.4746871992300289, "grad_norm": 1.4916552305221558, "learning_rate": 0.00017350584713615746, "loss": 1.1306, "step": 12330 }, { "epoch": 0.47487969201154956, "grad_norm": 1.7067712545394897, "learning_rate": 0.00017348534121040354, "loss": 1.3352, "step": 12335 }, { "epoch": 0.4750721847930703, "grad_norm": 1.1849184036254883, "learning_rate": 0.00017346482856490196, "loss": 0.9746, "step": 12340 }, { "epoch": 0.47526467757459095, "grad_norm": 1.700038194656372, "learning_rate": 0.00017344430920152845, "loss": 1.3462, "step": 12345 }, { "epoch": 0.4754571703561116, "grad_norm": 1.4579262733459473, "learning_rate": 0.0001734237831221594, "loss": 1.2296, "step": 12350 }, { "epoch": 0.47564966313763235, "grad_norm": 1.230469822883606, "learning_rate": 0.00017340325032867178, "loss": 1.1615, "step": 12355 }, { "epoch": 0.475842155919153, "grad_norm": 1.4839364290237427, "learning_rate": 0.00017338271082294315, "loss": 1.2143, "step": 12360 }, { "epoch": 0.47603464870067375, "grad_norm": 0.8386423587799072, "learning_rate": 0.00017336216460685173, "loss": 1.1173, "step": 12365 }, { "epoch": 0.4762271414821944, "grad_norm": 1.9203957319259644, "learning_rate": 0.00017334161168227634, "loss": 1.2371, "step": 12370 }, { "epoch": 0.4764196342637151, "grad_norm": 1.752314567565918, "learning_rate": 0.00017332105205109641, "loss": 1.1022, "step": 12375 }, { "epoch": 0.4766121270452358, "grad_norm": 1.2998472452163696, "learning_rate": 0.00017330048571519198, "loss": 1.3008, "step": 12380 }, { "epoch": 0.4768046198267565, "grad_norm": 1.8506637811660767, "learning_rate": 0.0001732799126764437, "loss": 1.0814, "step": 12385 }, { "epoch": 0.4769971126082772, "grad_norm": 1.4652866125106812, "learning_rate": 0.00017325933293673283, "loss": 1.3528, "step": 12390 }, { "epoch": 0.4771896053897979, "grad_norm": 1.0838465690612793, "learning_rate": 0.00017323874649794127, "loss": 1.1435, "step": 12395 }, { "epoch": 0.4773820981713186, "grad_norm": 1.1437288522720337, "learning_rate": 0.0001732181533619515, "loss": 1.2403, "step": 12400 }, { "epoch": 0.4775745909528393, "grad_norm": 1.5026469230651855, "learning_rate": 0.00017319755353064665, "loss": 1.3211, "step": 12405 }, { "epoch": 0.47776708373435994, "grad_norm": 1.477759838104248, "learning_rate": 0.00017317694700591041, "loss": 1.062, "step": 12410 }, { "epoch": 0.47795957651588067, "grad_norm": 1.6099724769592285, "learning_rate": 0.00017315633378962712, "loss": 1.322, "step": 12415 }, { "epoch": 0.47815206929740134, "grad_norm": 1.2413129806518555, "learning_rate": 0.00017313571388368173, "loss": 1.3106, "step": 12420 }, { "epoch": 0.47834456207892206, "grad_norm": 1.2218198776245117, "learning_rate": 0.00017311508728995976, "loss": 1.3899, "step": 12425 }, { "epoch": 0.47853705486044273, "grad_norm": 1.337332844734192, "learning_rate": 0.0001730944540103474, "loss": 1.2143, "step": 12430 }, { "epoch": 0.4787295476419634, "grad_norm": 1.132523775100708, "learning_rate": 0.00017307381404673143, "loss": 1.2243, "step": 12435 }, { "epoch": 0.47892204042348413, "grad_norm": 1.481467366218567, "learning_rate": 0.00017305316740099928, "loss": 1.1563, "step": 12440 }, { "epoch": 0.4791145332050048, "grad_norm": 1.3262776136398315, "learning_rate": 0.00017303251407503885, "loss": 1.1995, "step": 12445 }, { "epoch": 0.4793070259865255, "grad_norm": 1.8554911613464355, "learning_rate": 0.00017301185407073884, "loss": 1.2679, "step": 12450 }, { "epoch": 0.4794995187680462, "grad_norm": 1.5468156337738037, "learning_rate": 0.0001729911873899884, "loss": 1.1839, "step": 12455 }, { "epoch": 0.47969201154956687, "grad_norm": 1.1333458423614502, "learning_rate": 0.00017297051403467745, "loss": 1.2927, "step": 12460 }, { "epoch": 0.4798845043310876, "grad_norm": 1.5019558668136597, "learning_rate": 0.00017294983400669632, "loss": 1.1634, "step": 12465 }, { "epoch": 0.48007699711260826, "grad_norm": 0.9122928977012634, "learning_rate": 0.00017292914730793614, "loss": 1.0842, "step": 12470 }, { "epoch": 0.480269489894129, "grad_norm": 3.623866558074951, "learning_rate": 0.00017290845394028853, "loss": 1.2303, "step": 12475 }, { "epoch": 0.48046198267564966, "grad_norm": 1.1163458824157715, "learning_rate": 0.0001728877539056458, "loss": 1.1877, "step": 12480 }, { "epoch": 0.4806544754571704, "grad_norm": 1.1737778186798096, "learning_rate": 0.00017286704720590083, "loss": 1.2183, "step": 12485 }, { "epoch": 0.48084696823869105, "grad_norm": 1.0381931066513062, "learning_rate": 0.0001728463338429471, "loss": 1.103, "step": 12490 }, { "epoch": 0.4810394610202117, "grad_norm": 1.4400640726089478, "learning_rate": 0.00017282561381867865, "loss": 1.0941, "step": 12495 }, { "epoch": 0.48123195380173245, "grad_norm": 1.774886131286621, "learning_rate": 0.00017280488713499029, "loss": 1.2161, "step": 12500 }, { "epoch": 0.4814244465832531, "grad_norm": 1.680755376815796, "learning_rate": 0.00017278415379377724, "loss": 1.2248, "step": 12505 }, { "epoch": 0.48161693936477384, "grad_norm": 1.0955753326416016, "learning_rate": 0.00017276341379693553, "loss": 1.1558, "step": 12510 }, { "epoch": 0.4818094321462945, "grad_norm": 1.9817503690719604, "learning_rate": 0.00017274266714636163, "loss": 0.9682, "step": 12515 }, { "epoch": 0.4820019249278152, "grad_norm": 1.2484976053237915, "learning_rate": 0.00017272191384395266, "loss": 1.1304, "step": 12520 }, { "epoch": 0.4821944177093359, "grad_norm": 1.263295292854309, "learning_rate": 0.00017270115389160645, "loss": 1.1851, "step": 12525 }, { "epoch": 0.4823869104908566, "grad_norm": 1.749971628189087, "learning_rate": 0.00017268038729122126, "loss": 1.2665, "step": 12530 }, { "epoch": 0.4825794032723773, "grad_norm": 2.1695139408111572, "learning_rate": 0.0001726596140446962, "loss": 1.2351, "step": 12535 }, { "epoch": 0.482771896053898, "grad_norm": 1.8199032545089722, "learning_rate": 0.00017263883415393069, "loss": 1.0922, "step": 12540 }, { "epoch": 0.48296438883541865, "grad_norm": 1.5746350288391113, "learning_rate": 0.00017261804762082501, "loss": 1.2759, "step": 12545 }, { "epoch": 0.48315688161693937, "grad_norm": 1.1286424398422241, "learning_rate": 0.00017259725444727995, "loss": 1.2286, "step": 12550 }, { "epoch": 0.48334937439846004, "grad_norm": 1.1256860494613647, "learning_rate": 0.00017257645463519686, "loss": 1.1408, "step": 12555 }, { "epoch": 0.48354186717998077, "grad_norm": 0.907913863658905, "learning_rate": 0.00017255564818647776, "loss": 1.1889, "step": 12560 }, { "epoch": 0.48373435996150144, "grad_norm": 1.0480241775512695, "learning_rate": 0.0001725348351030253, "loss": 1.0954, "step": 12565 }, { "epoch": 0.48392685274302216, "grad_norm": 1.4278559684753418, "learning_rate": 0.0001725140153867426, "loss": 1.1367, "step": 12570 }, { "epoch": 0.48411934552454283, "grad_norm": 0.9501696825027466, "learning_rate": 0.00017249318903953364, "loss": 1.2135, "step": 12575 }, { "epoch": 0.4843118383060635, "grad_norm": 1.8626338243484497, "learning_rate": 0.00017247235606330271, "loss": 1.2106, "step": 12580 }, { "epoch": 0.48450433108758423, "grad_norm": 1.3876720666885376, "learning_rate": 0.00017245151645995494, "loss": 1.3711, "step": 12585 }, { "epoch": 0.4846968238691049, "grad_norm": 1.6943193674087524, "learning_rate": 0.0001724306702313959, "loss": 1.117, "step": 12590 }, { "epoch": 0.4848893166506256, "grad_norm": 0.9052426218986511, "learning_rate": 0.00017240981737953192, "loss": 1.2578, "step": 12595 }, { "epoch": 0.4850818094321463, "grad_norm": 0.8325613141059875, "learning_rate": 0.00017238895790626976, "loss": 1.1599, "step": 12600 }, { "epoch": 0.48527430221366696, "grad_norm": 1.2736178636550903, "learning_rate": 0.00017236809181351697, "loss": 1.266, "step": 12605 }, { "epoch": 0.4854667949951877, "grad_norm": 1.8093243837356567, "learning_rate": 0.00017234721910318158, "loss": 1.2076, "step": 12610 }, { "epoch": 0.48565928777670836, "grad_norm": 1.7740625143051147, "learning_rate": 0.00017232633977717226, "loss": 1.2431, "step": 12615 }, { "epoch": 0.4858517805582291, "grad_norm": 0.83774334192276, "learning_rate": 0.00017230545383739826, "loss": 1.1987, "step": 12620 }, { "epoch": 0.48604427333974976, "grad_norm": 0.987789511680603, "learning_rate": 0.0001722845612857695, "loss": 1.0, "step": 12625 }, { "epoch": 0.4862367661212704, "grad_norm": 1.1741127967834473, "learning_rate": 0.00017226366212419645, "loss": 1.1961, "step": 12630 }, { "epoch": 0.48642925890279115, "grad_norm": 1.9914991855621338, "learning_rate": 0.00017224275635459023, "loss": 1.3657, "step": 12635 }, { "epoch": 0.4866217516843118, "grad_norm": 1.187045693397522, "learning_rate": 0.00017222184397886245, "loss": 1.1048, "step": 12640 }, { "epoch": 0.48681424446583255, "grad_norm": 1.1656368970870972, "learning_rate": 0.0001722009249989255, "loss": 1.1226, "step": 12645 }, { "epoch": 0.4870067372473532, "grad_norm": 1.050398349761963, "learning_rate": 0.00017217999941669226, "loss": 1.2018, "step": 12650 }, { "epoch": 0.48719923002887394, "grad_norm": 1.4217538833618164, "learning_rate": 0.00017215906723407618, "loss": 1.0039, "step": 12655 }, { "epoch": 0.4873917228103946, "grad_norm": 1.1657346487045288, "learning_rate": 0.00017213812845299145, "loss": 1.3153, "step": 12660 }, { "epoch": 0.4875842155919153, "grad_norm": 1.0622743368148804, "learning_rate": 0.0001721171830753527, "loss": 1.1889, "step": 12665 }, { "epoch": 0.487776708373436, "grad_norm": 1.3200461864471436, "learning_rate": 0.00017209623110307534, "loss": 1.2643, "step": 12670 }, { "epoch": 0.4879692011549567, "grad_norm": 1.3201463222503662, "learning_rate": 0.0001720752725380752, "loss": 1.1021, "step": 12675 }, { "epoch": 0.4881616939364774, "grad_norm": 1.3110108375549316, "learning_rate": 0.00017205430738226885, "loss": 1.297, "step": 12680 }, { "epoch": 0.4883541867179981, "grad_norm": 1.9574589729309082, "learning_rate": 0.00017203333563757344, "loss": 1.0589, "step": 12685 }, { "epoch": 0.48854667949951874, "grad_norm": 2.1387152671813965, "learning_rate": 0.00017201235730590663, "loss": 1.1929, "step": 12690 }, { "epoch": 0.48873917228103947, "grad_norm": 1.007409691810608, "learning_rate": 0.0001719913723891868, "loss": 1.1213, "step": 12695 }, { "epoch": 0.48893166506256014, "grad_norm": 0.9808946847915649, "learning_rate": 0.00017197038088933285, "loss": 1.2553, "step": 12700 }, { "epoch": 0.48912415784408086, "grad_norm": 1.276231050491333, "learning_rate": 0.00017194938280826433, "loss": 1.2783, "step": 12705 }, { "epoch": 0.48931665062560153, "grad_norm": 2.288778305053711, "learning_rate": 0.0001719283781479014, "loss": 1.2736, "step": 12710 }, { "epoch": 0.48950914340712226, "grad_norm": 1.3838545083999634, "learning_rate": 0.00017190736691016475, "loss": 1.1101, "step": 12715 }, { "epoch": 0.48970163618864293, "grad_norm": 1.5680103302001953, "learning_rate": 0.00017188634909697572, "loss": 1.3685, "step": 12720 }, { "epoch": 0.4898941289701636, "grad_norm": 0.9690240621566772, "learning_rate": 0.00017186532471025626, "loss": 1.1677, "step": 12725 }, { "epoch": 0.4900866217516843, "grad_norm": 1.617100477218628, "learning_rate": 0.00017184429375192894, "loss": 1.3773, "step": 12730 }, { "epoch": 0.490279114533205, "grad_norm": 1.751895785331726, "learning_rate": 0.0001718232562239169, "loss": 1.1795, "step": 12735 }, { "epoch": 0.4904716073147257, "grad_norm": 1.1766438484191895, "learning_rate": 0.00017180221212814386, "loss": 1.0915, "step": 12740 }, { "epoch": 0.4906641000962464, "grad_norm": 1.384392499923706, "learning_rate": 0.00017178116146653415, "loss": 1.2823, "step": 12745 }, { "epoch": 0.49085659287776706, "grad_norm": 1.2379616498947144, "learning_rate": 0.00017176010424101274, "loss": 1.1182, "step": 12750 }, { "epoch": 0.4910490856592878, "grad_norm": 1.0620834827423096, "learning_rate": 0.00017173904045350515, "loss": 1.0414, "step": 12755 }, { "epoch": 0.49124157844080846, "grad_norm": 1.2448549270629883, "learning_rate": 0.00017171797010593755, "loss": 1.2487, "step": 12760 }, { "epoch": 0.4914340712223292, "grad_norm": 1.1862956285476685, "learning_rate": 0.00017169689320023666, "loss": 1.2117, "step": 12765 }, { "epoch": 0.49162656400384985, "grad_norm": 0.8380292057991028, "learning_rate": 0.00017167580973832984, "loss": 1.1396, "step": 12770 }, { "epoch": 0.4918190567853705, "grad_norm": 1.807305097579956, "learning_rate": 0.00017165471972214506, "loss": 1.2316, "step": 12775 }, { "epoch": 0.49201154956689125, "grad_norm": 1.5265247821807861, "learning_rate": 0.00017163362315361077, "loss": 1.1191, "step": 12780 }, { "epoch": 0.4922040423484119, "grad_norm": 1.4569288492202759, "learning_rate": 0.00017161252003465626, "loss": 1.3273, "step": 12785 }, { "epoch": 0.49239653512993264, "grad_norm": 1.0343568325042725, "learning_rate": 0.00017159141036721112, "loss": 1.3457, "step": 12790 }, { "epoch": 0.4925890279114533, "grad_norm": 1.4867749214172363, "learning_rate": 0.00017157029415320577, "loss": 1.1783, "step": 12795 }, { "epoch": 0.49278152069297404, "grad_norm": 1.0775165557861328, "learning_rate": 0.0001715491713945711, "loss": 1.2355, "step": 12800 }, { "epoch": 0.4929740134744947, "grad_norm": 1.1424553394317627, "learning_rate": 0.0001715280420932387, "loss": 1.1305, "step": 12805 }, { "epoch": 0.4931665062560154, "grad_norm": 2.403656482696533, "learning_rate": 0.00017150690625114065, "loss": 1.2118, "step": 12810 }, { "epoch": 0.4933589990375361, "grad_norm": 1.2673543691635132, "learning_rate": 0.00017148576387020976, "loss": 1.163, "step": 12815 }, { "epoch": 0.4935514918190568, "grad_norm": 1.4545459747314453, "learning_rate": 0.0001714646149523793, "loss": 1.1661, "step": 12820 }, { "epoch": 0.4937439846005775, "grad_norm": 1.8716140985488892, "learning_rate": 0.0001714434594995832, "loss": 1.0518, "step": 12825 }, { "epoch": 0.49393647738209817, "grad_norm": 1.4617652893066406, "learning_rate": 0.000171422297513756, "loss": 0.9978, "step": 12830 }, { "epoch": 0.49412897016361884, "grad_norm": 1.8650813102722168, "learning_rate": 0.00017140112899683284, "loss": 1.0077, "step": 12835 }, { "epoch": 0.49432146294513957, "grad_norm": 1.4080079793930054, "learning_rate": 0.00017137995395074938, "loss": 1.2906, "step": 12840 }, { "epoch": 0.49451395572666024, "grad_norm": 1.2144896984100342, "learning_rate": 0.000171358772377442, "loss": 1.1323, "step": 12845 }, { "epoch": 0.49470644850818096, "grad_norm": 1.3294404745101929, "learning_rate": 0.0001713375842788476, "loss": 1.1987, "step": 12850 }, { "epoch": 0.49489894128970163, "grad_norm": 1.3663264513015747, "learning_rate": 0.00017131638965690372, "loss": 1.224, "step": 12855 }, { "epoch": 0.4950914340712223, "grad_norm": 2.325491428375244, "learning_rate": 0.0001712951885135484, "loss": 1.2449, "step": 12860 }, { "epoch": 0.49528392685274303, "grad_norm": 1.3226628303527832, "learning_rate": 0.00017127398085072039, "loss": 1.2321, "step": 12865 }, { "epoch": 0.4954764196342637, "grad_norm": 1.309049367904663, "learning_rate": 0.00017125276667035895, "loss": 1.1242, "step": 12870 }, { "epoch": 0.4956689124157844, "grad_norm": 2.189549446105957, "learning_rate": 0.00017123154597440402, "loss": 1.1479, "step": 12875 }, { "epoch": 0.4958614051973051, "grad_norm": 1.258787989616394, "learning_rate": 0.00017121031876479606, "loss": 1.3375, "step": 12880 }, { "epoch": 0.4960538979788258, "grad_norm": 1.4555957317352295, "learning_rate": 0.00017118908504347623, "loss": 1.29, "step": 12885 }, { "epoch": 0.4962463907603465, "grad_norm": 1.4220309257507324, "learning_rate": 0.00017116784481238613, "loss": 1.2438, "step": 12890 }, { "epoch": 0.49643888354186716, "grad_norm": 1.1141269207000732, "learning_rate": 0.00017114659807346803, "loss": 1.1286, "step": 12895 }, { "epoch": 0.4966313763233879, "grad_norm": 2.7541897296905518, "learning_rate": 0.00017112534482866486, "loss": 1.2506, "step": 12900 }, { "epoch": 0.49682386910490856, "grad_norm": 1.8478270769119263, "learning_rate": 0.00017110408507992007, "loss": 1.2337, "step": 12905 }, { "epoch": 0.4970163618864293, "grad_norm": 1.3013496398925781, "learning_rate": 0.00017108281882917775, "loss": 1.109, "step": 12910 }, { "epoch": 0.49720885466794995, "grad_norm": 1.6363266706466675, "learning_rate": 0.00017106154607838249, "loss": 1.2546, "step": 12915 }, { "epoch": 0.4974013474494706, "grad_norm": 1.4996516704559326, "learning_rate": 0.0001710402668294796, "loss": 1.3066, "step": 12920 }, { "epoch": 0.49759384023099135, "grad_norm": 1.2411632537841797, "learning_rate": 0.0001710189810844149, "loss": 1.2678, "step": 12925 }, { "epoch": 0.497786333012512, "grad_norm": 1.197771430015564, "learning_rate": 0.00017099768884513484, "loss": 1.1069, "step": 12930 }, { "epoch": 0.49797882579403274, "grad_norm": 1.1568015813827515, "learning_rate": 0.00017097639011358644, "loss": 1.1863, "step": 12935 }, { "epoch": 0.4981713185755534, "grad_norm": 1.8848886489868164, "learning_rate": 0.00017095508489171736, "loss": 1.3294, "step": 12940 }, { "epoch": 0.4983638113570741, "grad_norm": 1.4993650913238525, "learning_rate": 0.00017093377318147578, "loss": 1.1768, "step": 12945 }, { "epoch": 0.4985563041385948, "grad_norm": 1.1212975978851318, "learning_rate": 0.00017091245498481055, "loss": 1.2018, "step": 12950 }, { "epoch": 0.4987487969201155, "grad_norm": 1.086147665977478, "learning_rate": 0.00017089113030367107, "loss": 1.3022, "step": 12955 }, { "epoch": 0.4989412897016362, "grad_norm": 2.185974359512329, "learning_rate": 0.00017086979914000732, "loss": 1.239, "step": 12960 }, { "epoch": 0.4991337824831569, "grad_norm": 1.0493237972259521, "learning_rate": 0.00017084846149576993, "loss": 1.1614, "step": 12965 }, { "epoch": 0.4993262752646776, "grad_norm": 2.034449815750122, "learning_rate": 0.00017082711737291005, "loss": 1.2636, "step": 12970 }, { "epoch": 0.49951876804619827, "grad_norm": 1.6736334562301636, "learning_rate": 0.00017080576677337944, "loss": 1.1721, "step": 12975 }, { "epoch": 0.49971126082771894, "grad_norm": 1.8189449310302734, "learning_rate": 0.00017078440969913055, "loss": 1.3739, "step": 12980 }, { "epoch": 0.49990375360923966, "grad_norm": 1.500243067741394, "learning_rate": 0.00017076304615211627, "loss": 1.0446, "step": 12985 }, { "epoch": 0.5000962463907603, "grad_norm": 1.2671639919281006, "learning_rate": 0.0001707416761342902, "loss": 1.1841, "step": 12990 }, { "epoch": 0.500288739172281, "grad_norm": 1.3602843284606934, "learning_rate": 0.00017072029964760644, "loss": 1.3311, "step": 12995 }, { "epoch": 0.5004812319538018, "grad_norm": 1.106224775314331, "learning_rate": 0.00017069891669401982, "loss": 1.1083, "step": 13000 }, { "epoch": 0.5006737247353225, "grad_norm": 1.3517072200775146, "learning_rate": 0.00017067752727548555, "loss": 1.2598, "step": 13005 }, { "epoch": 0.5008662175168431, "grad_norm": 1.1175580024719238, "learning_rate": 0.00017065613139395958, "loss": 1.0876, "step": 13010 }, { "epoch": 0.5010587102983638, "grad_norm": 1.8314218521118164, "learning_rate": 0.00017063472905139854, "loss": 1.3194, "step": 13015 }, { "epoch": 0.5012512030798845, "grad_norm": 0.7892528176307678, "learning_rate": 0.00017061332024975937, "loss": 1.0172, "step": 13020 }, { "epoch": 0.5014436958614052, "grad_norm": 0.8713880777359009, "learning_rate": 0.00017059190499099986, "loss": 1.0775, "step": 13025 }, { "epoch": 0.5016361886429259, "grad_norm": 2.4726779460906982, "learning_rate": 0.00017057048327707826, "loss": 1.3351, "step": 13030 }, { "epoch": 0.5018286814244466, "grad_norm": 1.1800824403762817, "learning_rate": 0.00017054905510995342, "loss": 1.3102, "step": 13035 }, { "epoch": 0.5020211742059673, "grad_norm": 1.583617091178894, "learning_rate": 0.0001705276204915849, "loss": 1.3633, "step": 13040 }, { "epoch": 0.5022136669874879, "grad_norm": 2.0497727394104004, "learning_rate": 0.00017050617942393264, "loss": 1.2055, "step": 13045 }, { "epoch": 0.5024061597690087, "grad_norm": 1.775793433189392, "learning_rate": 0.00017048473190895734, "loss": 1.1747, "step": 13050 }, { "epoch": 0.5025986525505294, "grad_norm": 1.8494744300842285, "learning_rate": 0.00017046327794862024, "loss": 1.2159, "step": 13055 }, { "epoch": 0.50279114533205, "grad_norm": 1.6188912391662598, "learning_rate": 0.00017044181754488315, "loss": 1.156, "step": 13060 }, { "epoch": 0.5029836381135707, "grad_norm": 1.575838565826416, "learning_rate": 0.00017042035069970846, "loss": 1.2103, "step": 13065 }, { "epoch": 0.5031761308950914, "grad_norm": 1.7594157457351685, "learning_rate": 0.0001703988774150592, "loss": 1.4271, "step": 13070 }, { "epoch": 0.5033686236766122, "grad_norm": 2.066418170928955, "learning_rate": 0.00017037739769289894, "loss": 1.215, "step": 13075 }, { "epoch": 0.5035611164581328, "grad_norm": 1.8343390226364136, "learning_rate": 0.0001703559115351919, "loss": 1.3318, "step": 13080 }, { "epoch": 0.5037536092396535, "grad_norm": 1.177186369895935, "learning_rate": 0.00017033441894390278, "loss": 1.2845, "step": 13085 }, { "epoch": 0.5039461020211742, "grad_norm": 1.7650407552719116, "learning_rate": 0.000170312919920997, "loss": 1.2133, "step": 13090 }, { "epoch": 0.5041385948026949, "grad_norm": 1.0483806133270264, "learning_rate": 0.00017029141446844043, "loss": 1.2309, "step": 13095 }, { "epoch": 0.5043310875842156, "grad_norm": 1.1729573011398315, "learning_rate": 0.00017026990258819968, "loss": 1.2975, "step": 13100 }, { "epoch": 0.5045235803657363, "grad_norm": 1.8557977676391602, "learning_rate": 0.00017024838428224184, "loss": 1.1332, "step": 13105 }, { "epoch": 0.504716073147257, "grad_norm": 2.2607064247131348, "learning_rate": 0.00017022685955253458, "loss": 1.186, "step": 13110 }, { "epoch": 0.5049085659287776, "grad_norm": 1.0992141962051392, "learning_rate": 0.00017020532840104625, "loss": 1.0708, "step": 13115 }, { "epoch": 0.5051010587102983, "grad_norm": 0.9550696611404419, "learning_rate": 0.0001701837908297457, "loss": 1.2069, "step": 13120 }, { "epoch": 0.5052935514918191, "grad_norm": 1.9301509857177734, "learning_rate": 0.00017016224684060242, "loss": 1.1152, "step": 13125 }, { "epoch": 0.5054860442733398, "grad_norm": 1.2657769918441772, "learning_rate": 0.0001701406964355864, "loss": 1.1028, "step": 13130 }, { "epoch": 0.5056785370548604, "grad_norm": 1.549902081489563, "learning_rate": 0.00017011913961666837, "loss": 1.1677, "step": 13135 }, { "epoch": 0.5058710298363811, "grad_norm": 1.5015727281570435, "learning_rate": 0.00017009757638581952, "loss": 1.1799, "step": 13140 }, { "epoch": 0.5060635226179019, "grad_norm": 1.206145167350769, "learning_rate": 0.00017007600674501166, "loss": 1.131, "step": 13145 }, { "epoch": 0.5062560153994226, "grad_norm": 1.1034317016601562, "learning_rate": 0.00017005443069621716, "loss": 1.0504, "step": 13150 }, { "epoch": 0.5064485081809432, "grad_norm": 1.0830001831054688, "learning_rate": 0.00017003284824140908, "loss": 1.1653, "step": 13155 }, { "epoch": 0.5066410009624639, "grad_norm": 1.8119686841964722, "learning_rate": 0.00017001125938256094, "loss": 1.1646, "step": 13160 }, { "epoch": 0.5068334937439846, "grad_norm": 1.1078890562057495, "learning_rate": 0.00016998966412164692, "loss": 1.3281, "step": 13165 }, { "epoch": 0.5070259865255053, "grad_norm": 2.1965198516845703, "learning_rate": 0.00016996806246064174, "loss": 1.1042, "step": 13170 }, { "epoch": 0.507218479307026, "grad_norm": 1.3997282981872559, "learning_rate": 0.00016994645440152075, "loss": 1.1662, "step": 13175 }, { "epoch": 0.5074109720885467, "grad_norm": 1.2493458986282349, "learning_rate": 0.00016992483994625985, "loss": 1.1594, "step": 13180 }, { "epoch": 0.5076034648700674, "grad_norm": 0.8307852745056152, "learning_rate": 0.00016990321909683557, "loss": 1.1701, "step": 13185 }, { "epoch": 0.507795957651588, "grad_norm": 1.1798492670059204, "learning_rate": 0.00016988159185522497, "loss": 1.2217, "step": 13190 }, { "epoch": 0.5079884504331088, "grad_norm": 2.131786823272705, "learning_rate": 0.00016985995822340567, "loss": 1.277, "step": 13195 }, { "epoch": 0.5081809432146295, "grad_norm": 1.0258443355560303, "learning_rate": 0.00016983831820335603, "loss": 1.0954, "step": 13200 }, { "epoch": 0.5083734359961501, "grad_norm": 2.4005777835845947, "learning_rate": 0.0001698166717970548, "loss": 1.3484, "step": 13205 }, { "epoch": 0.5085659287776708, "grad_norm": 1.3329745531082153, "learning_rate": 0.00016979501900648143, "loss": 1.2548, "step": 13210 }, { "epoch": 0.5087584215591915, "grad_norm": 1.2792582511901855, "learning_rate": 0.00016977335983361594, "loss": 1.1056, "step": 13215 }, { "epoch": 0.5089509143407123, "grad_norm": 1.1146180629730225, "learning_rate": 0.00016975169428043888, "loss": 1.1728, "step": 13220 }, { "epoch": 0.5091434071222329, "grad_norm": 1.155003309249878, "learning_rate": 0.0001697300223489314, "loss": 1.2469, "step": 13225 }, { "epoch": 0.5093358999037536, "grad_norm": 1.8456053733825684, "learning_rate": 0.00016970834404107535, "loss": 1.1515, "step": 13230 }, { "epoch": 0.5095283926852743, "grad_norm": 1.3863856792449951, "learning_rate": 0.000169686659358853, "loss": 1.2561, "step": 13235 }, { "epoch": 0.509720885466795, "grad_norm": 2.099985361099243, "learning_rate": 0.00016966496830424728, "loss": 1.2639, "step": 13240 }, { "epoch": 0.5099133782483157, "grad_norm": 1.4132083654403687, "learning_rate": 0.0001696432708792417, "loss": 0.9859, "step": 13245 }, { "epoch": 0.5101058710298364, "grad_norm": 1.0421473979949951, "learning_rate": 0.00016962156708582037, "loss": 1.1239, "step": 13250 }, { "epoch": 0.5102983638113571, "grad_norm": 1.4971591234207153, "learning_rate": 0.0001695998569259679, "loss": 1.183, "step": 13255 }, { "epoch": 0.5104908565928777, "grad_norm": 1.7850632667541504, "learning_rate": 0.00016957814040166955, "loss": 1.2342, "step": 13260 }, { "epoch": 0.5106833493743984, "grad_norm": 2.817624092102051, "learning_rate": 0.0001695564175149112, "loss": 1.1795, "step": 13265 }, { "epoch": 0.5108758421559192, "grad_norm": 1.4107112884521484, "learning_rate": 0.00016953468826767925, "loss": 1.2791, "step": 13270 }, { "epoch": 0.5110683349374399, "grad_norm": 1.4817914962768555, "learning_rate": 0.00016951295266196063, "loss": 1.0816, "step": 13275 }, { "epoch": 0.5112608277189605, "grad_norm": 1.054870367050171, "learning_rate": 0.00016949121069974302, "loss": 1.2114, "step": 13280 }, { "epoch": 0.5114533205004812, "grad_norm": 1.2629690170288086, "learning_rate": 0.00016946946238301453, "loss": 1.1014, "step": 13285 }, { "epoch": 0.5116458132820019, "grad_norm": 0.9189853668212891, "learning_rate": 0.00016944770771376387, "loss": 1.319, "step": 13290 }, { "epoch": 0.5118383060635227, "grad_norm": 1.4326847791671753, "learning_rate": 0.0001694259466939804, "loss": 1.299, "step": 13295 }, { "epoch": 0.5120307988450433, "grad_norm": 1.1833186149597168, "learning_rate": 0.00016940417932565402, "loss": 1.2863, "step": 13300 }, { "epoch": 0.512223291626564, "grad_norm": 1.1329289674758911, "learning_rate": 0.0001693824056107752, "loss": 1.2228, "step": 13305 }, { "epoch": 0.5124157844080847, "grad_norm": 1.2103817462921143, "learning_rate": 0.000169360625551335, "loss": 1.042, "step": 13310 }, { "epoch": 0.5126082771896054, "grad_norm": 1.2664172649383545, "learning_rate": 0.00016933883914932506, "loss": 1.299, "step": 13315 }, { "epoch": 0.5128007699711261, "grad_norm": 1.8509985208511353, "learning_rate": 0.0001693170464067376, "loss": 1.135, "step": 13320 }, { "epoch": 0.5129932627526468, "grad_norm": 2.1004250049591064, "learning_rate": 0.00016929524732556546, "loss": 1.1321, "step": 13325 }, { "epoch": 0.5131857555341675, "grad_norm": 1.1648815870285034, "learning_rate": 0.00016927344190780197, "loss": 1.094, "step": 13330 }, { "epoch": 0.5133782483156881, "grad_norm": 0.9492617249488831, "learning_rate": 0.0001692516301554411, "loss": 1.2055, "step": 13335 }, { "epoch": 0.5135707410972089, "grad_norm": 1.7911789417266846, "learning_rate": 0.00016922981207047742, "loss": 1.1726, "step": 13340 }, { "epoch": 0.5137632338787296, "grad_norm": 1.2055487632751465, "learning_rate": 0.00016920798765490601, "loss": 1.158, "step": 13345 }, { "epoch": 0.5139557266602502, "grad_norm": 1.1120411157608032, "learning_rate": 0.0001691861569107226, "loss": 1.2508, "step": 13350 }, { "epoch": 0.5141482194417709, "grad_norm": 1.1816275119781494, "learning_rate": 0.0001691643198399235, "loss": 1.0294, "step": 13355 }, { "epoch": 0.5143407122232916, "grad_norm": 1.1714962720870972, "learning_rate": 0.00016914247644450546, "loss": 1.2843, "step": 13360 }, { "epoch": 0.5145332050048124, "grad_norm": 2.0812292098999023, "learning_rate": 0.000169120626726466, "loss": 1.284, "step": 13365 }, { "epoch": 0.514725697786333, "grad_norm": 1.7628620862960815, "learning_rate": 0.00016909877068780314, "loss": 1.1104, "step": 13370 }, { "epoch": 0.5149181905678537, "grad_norm": 1.7429643869400024, "learning_rate": 0.0001690769083305154, "loss": 1.1387, "step": 13375 }, { "epoch": 0.5151106833493744, "grad_norm": 2.087916612625122, "learning_rate": 0.00016905503965660196, "loss": 1.2737, "step": 13380 }, { "epoch": 0.515303176130895, "grad_norm": 1.5689221620559692, "learning_rate": 0.00016903316466806265, "loss": 1.145, "step": 13385 }, { "epoch": 0.5154956689124158, "grad_norm": 1.0740375518798828, "learning_rate": 0.0001690112833668977, "loss": 0.9748, "step": 13390 }, { "epoch": 0.5156881616939365, "grad_norm": 1.4595876932144165, "learning_rate": 0.00016898939575510805, "loss": 1.1378, "step": 13395 }, { "epoch": 0.5158806544754572, "grad_norm": 1.9210182428359985, "learning_rate": 0.00016896750183469517, "loss": 1.2581, "step": 13400 }, { "epoch": 0.5160731472569778, "grad_norm": 1.0922927856445312, "learning_rate": 0.00016894560160766117, "loss": 1.1601, "step": 13405 }, { "epoch": 0.5162656400384985, "grad_norm": 2.037611246109009, "learning_rate": 0.00016892369507600855, "loss": 1.2394, "step": 13410 }, { "epoch": 0.5164581328200193, "grad_norm": 1.1577821969985962, "learning_rate": 0.0001689017822417406, "loss": 1.2428, "step": 13415 }, { "epoch": 0.51665062560154, "grad_norm": 1.1762430667877197, "learning_rate": 0.00016887986310686114, "loss": 1.1648, "step": 13420 }, { "epoch": 0.5168431183830606, "grad_norm": 1.8631316423416138, "learning_rate": 0.00016885793767337445, "loss": 1.2288, "step": 13425 }, { "epoch": 0.5170356111645813, "grad_norm": 1.191747784614563, "learning_rate": 0.0001688360059432855, "loss": 1.1287, "step": 13430 }, { "epoch": 0.517228103946102, "grad_norm": 1.092367172241211, "learning_rate": 0.00016881406791859985, "loss": 1.2073, "step": 13435 }, { "epoch": 0.5174205967276228, "grad_norm": 0.9805938601493835, "learning_rate": 0.00016879212360132345, "loss": 1.3199, "step": 13440 }, { "epoch": 0.5176130895091434, "grad_norm": 1.0042074918746948, "learning_rate": 0.00016877017299346314, "loss": 1.1389, "step": 13445 }, { "epoch": 0.5178055822906641, "grad_norm": 1.3087821006774902, "learning_rate": 0.00016874821609702605, "loss": 1.2112, "step": 13450 }, { "epoch": 0.5179980750721848, "grad_norm": 1.4208637475967407, "learning_rate": 0.00016872625291401998, "loss": 1.119, "step": 13455 }, { "epoch": 0.5181905678537055, "grad_norm": 0.9211226105690002, "learning_rate": 0.0001687042834464534, "loss": 1.1458, "step": 13460 }, { "epoch": 0.5183830606352262, "grad_norm": 1.1774996519088745, "learning_rate": 0.00016868230769633518, "loss": 1.195, "step": 13465 }, { "epoch": 0.5185755534167469, "grad_norm": 1.395883321762085, "learning_rate": 0.0001686603256656749, "loss": 1.1633, "step": 13470 }, { "epoch": 0.5187680461982676, "grad_norm": 2.2554938793182373, "learning_rate": 0.00016863833735648268, "loss": 1.1345, "step": 13475 }, { "epoch": 0.5189605389797882, "grad_norm": 1.2396293878555298, "learning_rate": 0.00016861634277076922, "loss": 1.1109, "step": 13480 }, { "epoch": 0.519153031761309, "grad_norm": 1.2292909622192383, "learning_rate": 0.00016859434191054574, "loss": 1.1029, "step": 13485 }, { "epoch": 0.5193455245428297, "grad_norm": 1.145571231842041, "learning_rate": 0.00016857233477782409, "loss": 1.1734, "step": 13490 }, { "epoch": 0.5195380173243503, "grad_norm": 1.7307795286178589, "learning_rate": 0.00016855032137461667, "loss": 1.1476, "step": 13495 }, { "epoch": 0.519730510105871, "grad_norm": 1.611140489578247, "learning_rate": 0.0001685283017029365, "loss": 1.1304, "step": 13500 }, { "epoch": 0.5199230028873917, "grad_norm": 1.3966014385223389, "learning_rate": 0.00016850627576479705, "loss": 1.1231, "step": 13505 }, { "epoch": 0.5201154956689125, "grad_norm": 1.505765676498413, "learning_rate": 0.0001684842435622125, "loss": 1.0741, "step": 13510 }, { "epoch": 0.5203079884504331, "grad_norm": 1.791595220565796, "learning_rate": 0.00016846220509719755, "loss": 1.1928, "step": 13515 }, { "epoch": 0.5205004812319538, "grad_norm": 1.2992479801177979, "learning_rate": 0.00016844016037176744, "loss": 1.1523, "step": 13520 }, { "epoch": 0.5206929740134745, "grad_norm": 1.8747221231460571, "learning_rate": 0.00016841810938793807, "loss": 1.0704, "step": 13525 }, { "epoch": 0.5208854667949951, "grad_norm": 1.3441274166107178, "learning_rate": 0.00016839605214772583, "loss": 1.1979, "step": 13530 }, { "epoch": 0.5210779595765159, "grad_norm": 0.8640159964561462, "learning_rate": 0.0001683739886531477, "loss": 1.1577, "step": 13535 }, { "epoch": 0.5212704523580366, "grad_norm": 1.7198442220687866, "learning_rate": 0.00016835191890622123, "loss": 1.2623, "step": 13540 }, { "epoch": 0.5214629451395573, "grad_norm": 1.2651041746139526, "learning_rate": 0.0001683298429089646, "loss": 1.2428, "step": 13545 }, { "epoch": 0.5216554379210779, "grad_norm": 1.9191710948944092, "learning_rate": 0.00016830776066339642, "loss": 1.2872, "step": 13550 }, { "epoch": 0.5218479307025986, "grad_norm": 1.8098481893539429, "learning_rate": 0.00016828567217153605, "loss": 1.2838, "step": 13555 }, { "epoch": 0.5220404234841194, "grad_norm": 1.732160210609436, "learning_rate": 0.00016826357743540332, "loss": 1.0766, "step": 13560 }, { "epoch": 0.5222329162656401, "grad_norm": 1.4580518007278442, "learning_rate": 0.00016824147645701863, "loss": 1.2825, "step": 13565 }, { "epoch": 0.5224254090471607, "grad_norm": 1.5836480855941772, "learning_rate": 0.000168219369238403, "loss": 1.1772, "step": 13570 }, { "epoch": 0.5226179018286814, "grad_norm": 1.5529143810272217, "learning_rate": 0.00016819725578157794, "loss": 1.2795, "step": 13575 }, { "epoch": 0.5228103946102021, "grad_norm": 1.1405484676361084, "learning_rate": 0.0001681751360885656, "loss": 1.2133, "step": 13580 }, { "epoch": 0.5230028873917228, "grad_norm": 1.0912057161331177, "learning_rate": 0.00016815301016138873, "loss": 1.0493, "step": 13585 }, { "epoch": 0.5231953801732435, "grad_norm": 0.9384201169013977, "learning_rate": 0.0001681308780020705, "loss": 1.1638, "step": 13590 }, { "epoch": 0.5233878729547642, "grad_norm": 1.3467286825180054, "learning_rate": 0.0001681087396126348, "loss": 1.1927, "step": 13595 }, { "epoch": 0.5235803657362849, "grad_norm": 0.9008259773254395, "learning_rate": 0.00016808659499510607, "loss": 1.2158, "step": 13600 }, { "epoch": 0.5237728585178055, "grad_norm": 1.1013727188110352, "learning_rate": 0.00016806444415150927, "loss": 1.1575, "step": 13605 }, { "epoch": 0.5239653512993263, "grad_norm": 1.160654902458191, "learning_rate": 0.00016804228708386992, "loss": 1.1662, "step": 13610 }, { "epoch": 0.524157844080847, "grad_norm": 1.5752032995224, "learning_rate": 0.00016802012379421414, "loss": 1.1596, "step": 13615 }, { "epoch": 0.5243503368623676, "grad_norm": 1.1819881200790405, "learning_rate": 0.00016799795428456865, "loss": 1.1686, "step": 13620 }, { "epoch": 0.5245428296438883, "grad_norm": 0.9841921329498291, "learning_rate": 0.00016797577855696069, "loss": 1.0872, "step": 13625 }, { "epoch": 0.5247353224254091, "grad_norm": 1.2292228937149048, "learning_rate": 0.00016795359661341808, "loss": 1.3943, "step": 13630 }, { "epoch": 0.5249278152069298, "grad_norm": 1.2674068212509155, "learning_rate": 0.0001679314084559692, "loss": 1.232, "step": 13635 }, { "epoch": 0.5251203079884504, "grad_norm": 1.1942312717437744, "learning_rate": 0.00016790921408664302, "loss": 1.2223, "step": 13640 }, { "epoch": 0.5253128007699711, "grad_norm": 1.5753337144851685, "learning_rate": 0.00016788701350746907, "loss": 1.2936, "step": 13645 }, { "epoch": 0.5255052935514918, "grad_norm": 1.1031461954116821, "learning_rate": 0.00016786480672047744, "loss": 1.2651, "step": 13650 }, { "epoch": 0.5256977863330126, "grad_norm": 3.8325674533843994, "learning_rate": 0.00016784259372769884, "loss": 1.1693, "step": 13655 }, { "epoch": 0.5258902791145332, "grad_norm": 1.6535909175872803, "learning_rate": 0.0001678203745311644, "loss": 1.1606, "step": 13660 }, { "epoch": 0.5260827718960539, "grad_norm": 1.6406097412109375, "learning_rate": 0.000167798149132906, "loss": 1.3232, "step": 13665 }, { "epoch": 0.5262752646775746, "grad_norm": 1.6994904279708862, "learning_rate": 0.000167775917534956, "loss": 1.0998, "step": 13670 }, { "epoch": 0.5264677574590952, "grad_norm": 1.6446374654769897, "learning_rate": 0.0001677536797393473, "loss": 1.2997, "step": 13675 }, { "epoch": 0.526660250240616, "grad_norm": 1.6050851345062256, "learning_rate": 0.0001677314357481134, "loss": 1.19, "step": 13680 }, { "epoch": 0.5268527430221367, "grad_norm": 1.473940134048462, "learning_rate": 0.00016770918556328844, "loss": 1.2007, "step": 13685 }, { "epoch": 0.5270452358036574, "grad_norm": 1.1209567785263062, "learning_rate": 0.00016768692918690695, "loss": 1.2956, "step": 13690 }, { "epoch": 0.527237728585178, "grad_norm": 1.4143558740615845, "learning_rate": 0.00016766466662100415, "loss": 1.1734, "step": 13695 }, { "epoch": 0.5274302213666987, "grad_norm": 1.138107180595398, "learning_rate": 0.00016764239786761585, "loss": 1.1318, "step": 13700 }, { "epoch": 0.5276227141482195, "grad_norm": 1.5194774866104126, "learning_rate": 0.00016762012292877835, "loss": 1.1525, "step": 13705 }, { "epoch": 0.5278152069297402, "grad_norm": 1.136946439743042, "learning_rate": 0.00016759784180652858, "loss": 1.1289, "step": 13710 }, { "epoch": 0.5280076997112608, "grad_norm": 1.5263949632644653, "learning_rate": 0.00016757555450290396, "loss": 1.2811, "step": 13715 }, { "epoch": 0.5282001924927815, "grad_norm": 2.306833505630493, "learning_rate": 0.00016755326101994248, "loss": 1.0326, "step": 13720 }, { "epoch": 0.5283926852743022, "grad_norm": 1.4330452680587769, "learning_rate": 0.0001675309613596828, "loss": 1.0126, "step": 13725 }, { "epoch": 0.528585178055823, "grad_norm": 0.8746087551116943, "learning_rate": 0.00016750865552416408, "loss": 1.224, "step": 13730 }, { "epoch": 0.5287776708373436, "grad_norm": 2.576612949371338, "learning_rate": 0.000167486343515426, "loss": 1.178, "step": 13735 }, { "epoch": 0.5289701636188643, "grad_norm": 1.3074976205825806, "learning_rate": 0.00016746402533550887, "loss": 1.2453, "step": 13740 }, { "epoch": 0.529162656400385, "grad_norm": 1.0941317081451416, "learning_rate": 0.00016744170098645353, "loss": 0.9341, "step": 13745 }, { "epoch": 0.5293551491819056, "grad_norm": 1.6738418340682983, "learning_rate": 0.00016741937047030139, "loss": 1.1423, "step": 13750 }, { "epoch": 0.5295476419634264, "grad_norm": 1.9735844135284424, "learning_rate": 0.00016739703378909444, "loss": 1.0691, "step": 13755 }, { "epoch": 0.5297401347449471, "grad_norm": 1.0063233375549316, "learning_rate": 0.00016737469094487518, "loss": 1.0096, "step": 13760 }, { "epoch": 0.5299326275264677, "grad_norm": 1.2500115633010864, "learning_rate": 0.00016735234193968678, "loss": 1.1627, "step": 13765 }, { "epoch": 0.5301251203079884, "grad_norm": 1.0908536911010742, "learning_rate": 0.00016732998677557287, "loss": 1.0477, "step": 13770 }, { "epoch": 0.5303176130895092, "grad_norm": 1.609208106994629, "learning_rate": 0.0001673076254545777, "loss": 0.964, "step": 13775 }, { "epoch": 0.5305101058710299, "grad_norm": 1.0210634469985962, "learning_rate": 0.00016728525797874607, "loss": 1.2982, "step": 13780 }, { "epoch": 0.5307025986525505, "grad_norm": 2.0595545768737793, "learning_rate": 0.0001672628843501233, "loss": 1.1969, "step": 13785 }, { "epoch": 0.5308950914340712, "grad_norm": 1.7514983415603638, "learning_rate": 0.00016724050457075533, "loss": 1.2918, "step": 13790 }, { "epoch": 0.5310875842155919, "grad_norm": 1.483798861503601, "learning_rate": 0.00016721811864268865, "loss": 1.0163, "step": 13795 }, { "epoch": 0.5312800769971127, "grad_norm": 1.4174484014511108, "learning_rate": 0.0001671957265679703, "loss": 1.1936, "step": 13800 }, { "epoch": 0.5314725697786333, "grad_norm": 1.4664232730865479, "learning_rate": 0.00016717332834864787, "loss": 1.2553, "step": 13805 }, { "epoch": 0.531665062560154, "grad_norm": 0.6863868832588196, "learning_rate": 0.00016715092398676958, "loss": 0.8998, "step": 13810 }, { "epoch": 0.5318575553416747, "grad_norm": 2.3511574268341064, "learning_rate": 0.00016712851348438408, "loss": 1.4484, "step": 13815 }, { "epoch": 0.5320500481231953, "grad_norm": 1.418361783027649, "learning_rate": 0.00016710609684354074, "loss": 1.1139, "step": 13820 }, { "epoch": 0.5322425409047161, "grad_norm": 1.5918070077896118, "learning_rate": 0.00016708367406628938, "loss": 1.1045, "step": 13825 }, { "epoch": 0.5324350336862368, "grad_norm": 1.1937044858932495, "learning_rate": 0.00016706124515468042, "loss": 1.2665, "step": 13830 }, { "epoch": 0.5326275264677575, "grad_norm": 1.60366952419281, "learning_rate": 0.00016703881011076482, "loss": 1.3277, "step": 13835 }, { "epoch": 0.5328200192492781, "grad_norm": 1.2769535779953003, "learning_rate": 0.00016701636893659414, "loss": 1.2517, "step": 13840 }, { "epoch": 0.5330125120307988, "grad_norm": 1.3906430006027222, "learning_rate": 0.00016699392163422043, "loss": 1.3485, "step": 13845 }, { "epoch": 0.5332050048123196, "grad_norm": 1.461391568183899, "learning_rate": 0.0001669714682056964, "loss": 1.1297, "step": 13850 }, { "epoch": 0.5333974975938403, "grad_norm": 1.3566093444824219, "learning_rate": 0.00016694900865307525, "loss": 1.2833, "step": 13855 }, { "epoch": 0.5335899903753609, "grad_norm": 1.4480105638504028, "learning_rate": 0.00016692654297841076, "loss": 1.0877, "step": 13860 }, { "epoch": 0.5337824831568816, "grad_norm": 1.0896391868591309, "learning_rate": 0.00016690407118375724, "loss": 1.1286, "step": 13865 }, { "epoch": 0.5339749759384023, "grad_norm": 1.101636290550232, "learning_rate": 0.00016688159327116962, "loss": 1.0802, "step": 13870 }, { "epoch": 0.534167468719923, "grad_norm": 1.1488208770751953, "learning_rate": 0.00016685910924270337, "loss": 1.144, "step": 13875 }, { "epoch": 0.5343599615014437, "grad_norm": 1.4691115617752075, "learning_rate": 0.00016683661910041445, "loss": 1.2133, "step": 13880 }, { "epoch": 0.5345524542829644, "grad_norm": 0.9920752048492432, "learning_rate": 0.0001668141228463595, "loss": 1.1326, "step": 13885 }, { "epoch": 0.534744947064485, "grad_norm": 1.2828654050827026, "learning_rate": 0.00016679162048259557, "loss": 1.2162, "step": 13890 }, { "epoch": 0.5349374398460057, "grad_norm": 1.3294516801834106, "learning_rate": 0.00016676911201118043, "loss": 1.1797, "step": 13895 }, { "epoch": 0.5351299326275265, "grad_norm": 1.5326685905456543, "learning_rate": 0.00016674659743417232, "loss": 1.1147, "step": 13900 }, { "epoch": 0.5353224254090472, "grad_norm": 1.9222960472106934, "learning_rate": 0.00016672407675363, "loss": 1.1615, "step": 13905 }, { "epoch": 0.5355149181905678, "grad_norm": 1.412458062171936, "learning_rate": 0.00016670154997161288, "loss": 1.1556, "step": 13910 }, { "epoch": 0.5357074109720885, "grad_norm": 1.230669617652893, "learning_rate": 0.00016667901709018087, "loss": 1.062, "step": 13915 }, { "epoch": 0.5358999037536092, "grad_norm": 1.431746006011963, "learning_rate": 0.00016665647811139444, "loss": 1.0561, "step": 13920 }, { "epoch": 0.53609239653513, "grad_norm": 1.6623647212982178, "learning_rate": 0.00016663393303731466, "loss": 1.1495, "step": 13925 }, { "epoch": 0.5362848893166506, "grad_norm": 1.5261880159378052, "learning_rate": 0.00016661138187000312, "loss": 1.3093, "step": 13930 }, { "epoch": 0.5364773820981713, "grad_norm": 1.5623407363891602, "learning_rate": 0.00016658882461152195, "loss": 1.0859, "step": 13935 }, { "epoch": 0.536669874879692, "grad_norm": 1.2155213356018066, "learning_rate": 0.0001665662612639339, "loss": 1.2502, "step": 13940 }, { "epoch": 0.5368623676612128, "grad_norm": 0.7948794364929199, "learning_rate": 0.0001665436918293022, "loss": 1.1741, "step": 13945 }, { "epoch": 0.5370548604427334, "grad_norm": 1.370322585105896, "learning_rate": 0.0001665211163096907, "loss": 1.2727, "step": 13950 }, { "epoch": 0.5372473532242541, "grad_norm": 1.146519660949707, "learning_rate": 0.00016649853470716378, "loss": 1.2603, "step": 13955 }, { "epoch": 0.5374398460057748, "grad_norm": 1.1492048501968384, "learning_rate": 0.00016647594702378637, "loss": 1.1772, "step": 13960 }, { "epoch": 0.5376323387872954, "grad_norm": 2.4730112552642822, "learning_rate": 0.00016645335326162397, "loss": 1.4024, "step": 13965 }, { "epoch": 0.5378248315688162, "grad_norm": 1.411889910697937, "learning_rate": 0.00016643075342274264, "loss": 1.1121, "step": 13970 }, { "epoch": 0.5380173243503369, "grad_norm": 1.0818617343902588, "learning_rate": 0.00016640814750920895, "loss": 1.2139, "step": 13975 }, { "epoch": 0.5382098171318576, "grad_norm": 1.1196002960205078, "learning_rate": 0.0001663855355230901, "loss": 1.0877, "step": 13980 }, { "epoch": 0.5384023099133782, "grad_norm": 1.5476993322372437, "learning_rate": 0.00016636291746645378, "loss": 1.1055, "step": 13985 }, { "epoch": 0.5385948026948989, "grad_norm": 0.924186646938324, "learning_rate": 0.00016634029334136827, "loss": 1.0307, "step": 13990 }, { "epoch": 0.5387872954764197, "grad_norm": 1.157355546951294, "learning_rate": 0.0001663176631499024, "loss": 1.0783, "step": 13995 }, { "epoch": 0.5389797882579404, "grad_norm": 1.1704423427581787, "learning_rate": 0.00016629502689412555, "loss": 1.3452, "step": 14000 }, { "epoch": 0.539172281039461, "grad_norm": 2.0251457691192627, "learning_rate": 0.00016627238457610766, "loss": 1.3611, "step": 14005 }, { "epoch": 0.5393647738209817, "grad_norm": 1.018612265586853, "learning_rate": 0.0001662497361979192, "loss": 1.115, "step": 14010 }, { "epoch": 0.5395572666025024, "grad_norm": 1.2389349937438965, "learning_rate": 0.00016622708176163126, "loss": 1.2055, "step": 14015 }, { "epoch": 0.5397497593840231, "grad_norm": 2.2555086612701416, "learning_rate": 0.0001662044212693154, "loss": 1.0512, "step": 14020 }, { "epoch": 0.5399422521655438, "grad_norm": 1.059856653213501, "learning_rate": 0.00016618175472304375, "loss": 1.2114, "step": 14025 }, { "epoch": 0.5401347449470645, "grad_norm": 1.484417200088501, "learning_rate": 0.00016615908212488906, "loss": 1.1872, "step": 14030 }, { "epoch": 0.5403272377285852, "grad_norm": 1.4816780090332031, "learning_rate": 0.00016613640347692458, "loss": 1.1261, "step": 14035 }, { "epoch": 0.5405197305101058, "grad_norm": 1.6735597848892212, "learning_rate": 0.00016611371878122412, "loss": 1.2311, "step": 14040 }, { "epoch": 0.5407122232916266, "grad_norm": 1.8882919549942017, "learning_rate": 0.00016609102803986204, "loss": 1.3099, "step": 14045 }, { "epoch": 0.5409047160731473, "grad_norm": 1.4272384643554688, "learning_rate": 0.00016606833125491327, "loss": 1.2343, "step": 14050 }, { "epoch": 0.5410972088546679, "grad_norm": 1.2361105680465698, "learning_rate": 0.0001660456284284532, "loss": 1.155, "step": 14055 }, { "epoch": 0.5412897016361886, "grad_norm": 1.294826626777649, "learning_rate": 0.000166022919562558, "loss": 1.0691, "step": 14060 }, { "epoch": 0.5414821944177093, "grad_norm": 2.163748264312744, "learning_rate": 0.00016600020465930415, "loss": 1.4603, "step": 14065 }, { "epoch": 0.5416746871992301, "grad_norm": 2.8181777000427246, "learning_rate": 0.00016597748372076878, "loss": 1.1513, "step": 14070 }, { "epoch": 0.5418671799807507, "grad_norm": 1.558497667312622, "learning_rate": 0.00016595475674902957, "loss": 1.1758, "step": 14075 }, { "epoch": 0.5420596727622714, "grad_norm": 1.5868738889694214, "learning_rate": 0.0001659320237461648, "loss": 1.1867, "step": 14080 }, { "epoch": 0.5422521655437921, "grad_norm": 0.850387692451477, "learning_rate": 0.0001659092847142532, "loss": 0.8849, "step": 14085 }, { "epoch": 0.5424446583253129, "grad_norm": 1.334726095199585, "learning_rate": 0.00016588653965537412, "loss": 1.252, "step": 14090 }, { "epoch": 0.5426371511068335, "grad_norm": 1.1548973321914673, "learning_rate": 0.00016586378857160743, "loss": 1.2255, "step": 14095 }, { "epoch": 0.5428296438883542, "grad_norm": 1.3282769918441772, "learning_rate": 0.00016584103146503364, "loss": 1.0991, "step": 14100 }, { "epoch": 0.5430221366698749, "grad_norm": 1.635657548904419, "learning_rate": 0.00016581826833773363, "loss": 1.1963, "step": 14105 }, { "epoch": 0.5432146294513955, "grad_norm": 1.7892380952835083, "learning_rate": 0.00016579549919178903, "loss": 1.0593, "step": 14110 }, { "epoch": 0.5434071222329163, "grad_norm": 2.381394147872925, "learning_rate": 0.00016577272402928183, "loss": 1.2743, "step": 14115 }, { "epoch": 0.543599615014437, "grad_norm": 1.1770328283309937, "learning_rate": 0.00016574994285229478, "loss": 1.1433, "step": 14120 }, { "epoch": 0.5437921077959577, "grad_norm": 1.9077178239822388, "learning_rate": 0.00016572715566291098, "loss": 1.2422, "step": 14125 }, { "epoch": 0.5439846005774783, "grad_norm": 1.2600334882736206, "learning_rate": 0.00016570436246321417, "loss": 1.1479, "step": 14130 }, { "epoch": 0.544177093358999, "grad_norm": 1.0997780561447144, "learning_rate": 0.0001656815632552887, "loss": 1.2516, "step": 14135 }, { "epoch": 0.5443695861405198, "grad_norm": 1.1767383813858032, "learning_rate": 0.00016565875804121935, "loss": 1.1713, "step": 14140 }, { "epoch": 0.5445620789220404, "grad_norm": 1.62860906124115, "learning_rate": 0.00016563594682309152, "loss": 1.2017, "step": 14145 }, { "epoch": 0.5447545717035611, "grad_norm": 1.6149252653121948, "learning_rate": 0.0001656131296029912, "loss": 1.2104, "step": 14150 }, { "epoch": 0.5449470644850818, "grad_norm": 1.0693351030349731, "learning_rate": 0.0001655903063830048, "loss": 1.2656, "step": 14155 }, { "epoch": 0.5451395572666025, "grad_norm": 1.4624438285827637, "learning_rate": 0.00016556747716521937, "loss": 1.2323, "step": 14160 }, { "epoch": 0.5453320500481232, "grad_norm": 1.8848096132278442, "learning_rate": 0.0001655446419517225, "loss": 1.2104, "step": 14165 }, { "epoch": 0.5455245428296439, "grad_norm": 1.076907753944397, "learning_rate": 0.00016552180074460231, "loss": 1.2503, "step": 14170 }, { "epoch": 0.5457170356111646, "grad_norm": 2.496718645095825, "learning_rate": 0.00016549895354594748, "loss": 1.146, "step": 14175 }, { "epoch": 0.5459095283926853, "grad_norm": 1.8133556842803955, "learning_rate": 0.00016547610035784724, "loss": 1.274, "step": 14180 }, { "epoch": 0.5461020211742059, "grad_norm": 1.1353720426559448, "learning_rate": 0.0001654532411823914, "loss": 1.3842, "step": 14185 }, { "epoch": 0.5462945139557267, "grad_norm": 2.368894577026367, "learning_rate": 0.00016543037602167017, "loss": 1.3566, "step": 14190 }, { "epoch": 0.5464870067372474, "grad_norm": 1.884104609489441, "learning_rate": 0.00016540750487777455, "loss": 1.15, "step": 14195 }, { "epoch": 0.546679499518768, "grad_norm": 1.1348326206207275, "learning_rate": 0.00016538462775279587, "loss": 1.1782, "step": 14200 }, { "epoch": 0.5468719923002887, "grad_norm": 1.2342017889022827, "learning_rate": 0.00016536174464882613, "loss": 1.1361, "step": 14205 }, { "epoch": 0.5470644850818094, "grad_norm": 1.0037345886230469, "learning_rate": 0.0001653388555679578, "loss": 1.1282, "step": 14210 }, { "epoch": 0.5472569778633302, "grad_norm": 2.8669965267181396, "learning_rate": 0.000165315960512284, "loss": 1.184, "step": 14215 }, { "epoch": 0.5474494706448508, "grad_norm": 1.0212280750274658, "learning_rate": 0.00016529305948389825, "loss": 1.1422, "step": 14220 }, { "epoch": 0.5476419634263715, "grad_norm": 1.1197772026062012, "learning_rate": 0.00016527015248489474, "loss": 1.077, "step": 14225 }, { "epoch": 0.5478344562078922, "grad_norm": 1.4821882247924805, "learning_rate": 0.0001652472395173682, "loss": 1.3187, "step": 14230 }, { "epoch": 0.5480269489894128, "grad_norm": 1.1993844509124756, "learning_rate": 0.00016522432058341377, "loss": 1.1834, "step": 14235 }, { "epoch": 0.5482194417709336, "grad_norm": 1.9386481046676636, "learning_rate": 0.00016520139568512734, "loss": 1.1461, "step": 14240 }, { "epoch": 0.5484119345524543, "grad_norm": 0.8914703130722046, "learning_rate": 0.00016517846482460517, "loss": 1.4175, "step": 14245 }, { "epoch": 0.548604427333975, "grad_norm": 1.8703666925430298, "learning_rate": 0.00016515552800394417, "loss": 1.2483, "step": 14250 }, { "epoch": 0.5487969201154956, "grad_norm": 1.1656851768493652, "learning_rate": 0.00016513258522524177, "loss": 1.2293, "step": 14255 }, { "epoch": 0.5489894128970164, "grad_norm": 1.402370810508728, "learning_rate": 0.0001651096364905959, "loss": 1.3326, "step": 14260 }, { "epoch": 0.5491819056785371, "grad_norm": 1.8804208040237427, "learning_rate": 0.00016508668180210506, "loss": 1.1033, "step": 14265 }, { "epoch": 0.5493743984600578, "grad_norm": 1.0970590114593506, "learning_rate": 0.00016506372116186836, "loss": 1.1887, "step": 14270 }, { "epoch": 0.5495668912415784, "grad_norm": 1.3364982604980469, "learning_rate": 0.00016504075457198533, "loss": 1.1183, "step": 14275 }, { "epoch": 0.5497593840230991, "grad_norm": 1.4718800783157349, "learning_rate": 0.0001650177820345562, "loss": 1.1305, "step": 14280 }, { "epoch": 0.5499518768046199, "grad_norm": 1.3023836612701416, "learning_rate": 0.00016499480355168156, "loss": 1.2267, "step": 14285 }, { "epoch": 0.5501443695861405, "grad_norm": 1.809346079826355, "learning_rate": 0.0001649718191254627, "loss": 1.081, "step": 14290 }, { "epoch": 0.5503368623676612, "grad_norm": 1.3828262090682983, "learning_rate": 0.0001649488287580014, "loss": 1.209, "step": 14295 }, { "epoch": 0.5505293551491819, "grad_norm": 1.4741365909576416, "learning_rate": 0.00016492583245139995, "loss": 1.0607, "step": 14300 }, { "epoch": 0.5507218479307026, "grad_norm": 1.057210922241211, "learning_rate": 0.0001649028302077612, "loss": 1.281, "step": 14305 }, { "epoch": 0.5509143407122233, "grad_norm": 2.588911294937134, "learning_rate": 0.00016487982202918858, "loss": 1.382, "step": 14310 }, { "epoch": 0.551106833493744, "grad_norm": 2.2811248302459717, "learning_rate": 0.00016485680791778604, "loss": 1.3173, "step": 14315 }, { "epoch": 0.5512993262752647, "grad_norm": 1.675776481628418, "learning_rate": 0.00016483378787565802, "loss": 1.1948, "step": 14320 }, { "epoch": 0.5514918190567853, "grad_norm": 1.1149309873580933, "learning_rate": 0.0001648107619049096, "loss": 1.1406, "step": 14325 }, { "epoch": 0.551684311838306, "grad_norm": 1.0165066719055176, "learning_rate": 0.00016478773000764635, "loss": 1.1491, "step": 14330 }, { "epoch": 0.5518768046198268, "grad_norm": 1.8692020177841187, "learning_rate": 0.00016476469218597433, "loss": 1.0848, "step": 14335 }, { "epoch": 0.5520692974013475, "grad_norm": 0.9627811908721924, "learning_rate": 0.0001647416484420003, "loss": 1.2159, "step": 14340 }, { "epoch": 0.5522617901828681, "grad_norm": 2.1085097789764404, "learning_rate": 0.00016471859877783133, "loss": 1.1551, "step": 14345 }, { "epoch": 0.5524542829643888, "grad_norm": 2.2478790283203125, "learning_rate": 0.00016469554319557527, "loss": 1.3081, "step": 14350 }, { "epoch": 0.5526467757459095, "grad_norm": 1.0580302476882935, "learning_rate": 0.00016467248169734037, "loss": 1.3293, "step": 14355 }, { "epoch": 0.5528392685274303, "grad_norm": 1.3953101634979248, "learning_rate": 0.00016464941428523538, "loss": 1.1256, "step": 14360 }, { "epoch": 0.5530317613089509, "grad_norm": 0.9302542209625244, "learning_rate": 0.0001646263409613697, "loss": 0.9645, "step": 14365 }, { "epoch": 0.5532242540904716, "grad_norm": 1.9415937662124634, "learning_rate": 0.00016460326172785332, "loss": 1.3428, "step": 14370 }, { "epoch": 0.5534167468719923, "grad_norm": 0.9449756145477295, "learning_rate": 0.00016458017658679656, "loss": 1.3183, "step": 14375 }, { "epoch": 0.5536092396535129, "grad_norm": 1.2944326400756836, "learning_rate": 0.00016455708554031047, "loss": 1.1277, "step": 14380 }, { "epoch": 0.5538017324350337, "grad_norm": 1.3632171154022217, "learning_rate": 0.00016453398859050657, "loss": 1.3262, "step": 14385 }, { "epoch": 0.5539942252165544, "grad_norm": 1.119086503982544, "learning_rate": 0.00016451088573949692, "loss": 1.2639, "step": 14390 }, { "epoch": 0.5541867179980751, "grad_norm": 1.3261640071868896, "learning_rate": 0.00016448777698939407, "loss": 1.0911, "step": 14395 }, { "epoch": 0.5543792107795957, "grad_norm": 1.6098653078079224, "learning_rate": 0.00016446466234231125, "loss": 1.2942, "step": 14400 }, { "epoch": 0.5545717035611165, "grad_norm": 2.1425249576568604, "learning_rate": 0.0001644415418003621, "loss": 1.1566, "step": 14405 }, { "epoch": 0.5547641963426372, "grad_norm": 1.0087484121322632, "learning_rate": 0.0001644184153656608, "loss": 1.1515, "step": 14410 }, { "epoch": 0.5549566891241579, "grad_norm": 1.3792825937271118, "learning_rate": 0.00016439528304032218, "loss": 1.3815, "step": 14415 }, { "epoch": 0.5551491819056785, "grad_norm": 1.0076264142990112, "learning_rate": 0.0001643721448264615, "loss": 1.1996, "step": 14420 }, { "epoch": 0.5553416746871992, "grad_norm": 1.6108455657958984, "learning_rate": 0.0001643490007261946, "loss": 1.2801, "step": 14425 }, { "epoch": 0.55553416746872, "grad_norm": 1.4850428104400635, "learning_rate": 0.00016432585074163783, "loss": 1.1272, "step": 14430 }, { "epoch": 0.5557266602502406, "grad_norm": 1.482926607131958, "learning_rate": 0.0001643026948749082, "loss": 0.9271, "step": 14435 }, { "epoch": 0.5559191530317613, "grad_norm": 1.404266119003296, "learning_rate": 0.000164279533128123, "loss": 1.2476, "step": 14440 }, { "epoch": 0.556111645813282, "grad_norm": 1.5951578617095947, "learning_rate": 0.00016425636550340035, "loss": 1.036, "step": 14445 }, { "epoch": 0.5563041385948027, "grad_norm": 1.11802339553833, "learning_rate": 0.00016423319200285877, "loss": 1.2595, "step": 14450 }, { "epoch": 0.5564966313763234, "grad_norm": 0.9702684283256531, "learning_rate": 0.00016421001262861723, "loss": 1.1478, "step": 14455 }, { "epoch": 0.5566891241578441, "grad_norm": 1.4077606201171875, "learning_rate": 0.00016418682738279542, "loss": 1.2807, "step": 14460 }, { "epoch": 0.5568816169393648, "grad_norm": 1.5000783205032349, "learning_rate": 0.00016416363626751344, "loss": 1.3231, "step": 14465 }, { "epoch": 0.5570741097208854, "grad_norm": 1.0804152488708496, "learning_rate": 0.00016414043928489195, "loss": 1.2609, "step": 14470 }, { "epoch": 0.5572666025024061, "grad_norm": 2.0902814865112305, "learning_rate": 0.0001641172364370522, "loss": 1.1007, "step": 14475 }, { "epoch": 0.5574590952839269, "grad_norm": 0.9129114151000977, "learning_rate": 0.0001640940277261159, "loss": 1.2385, "step": 14480 }, { "epoch": 0.5576515880654476, "grad_norm": 1.5251227617263794, "learning_rate": 0.0001640708131542054, "loss": 1.3068, "step": 14485 }, { "epoch": 0.5578440808469682, "grad_norm": 1.7822771072387695, "learning_rate": 0.00016404759272344342, "loss": 1.2942, "step": 14490 }, { "epoch": 0.5580365736284889, "grad_norm": 1.7675615549087524, "learning_rate": 0.00016402436643595336, "loss": 0.9753, "step": 14495 }, { "epoch": 0.5582290664100096, "grad_norm": 1.4113742113113403, "learning_rate": 0.0001640011342938591, "loss": 1.2727, "step": 14500 }, { "epoch": 0.5584215591915304, "grad_norm": 2.213724136352539, "learning_rate": 0.0001639778962992851, "loss": 1.1275, "step": 14505 }, { "epoch": 0.558614051973051, "grad_norm": 0.777229368686676, "learning_rate": 0.0001639546524543563, "loss": 1.0434, "step": 14510 }, { "epoch": 0.5588065447545717, "grad_norm": 1.0420740842819214, "learning_rate": 0.00016393140276119817, "loss": 1.1202, "step": 14515 }, { "epoch": 0.5589990375360924, "grad_norm": 1.4241138696670532, "learning_rate": 0.00016390814722193678, "loss": 1.0245, "step": 14520 }, { "epoch": 0.559191530317613, "grad_norm": 1.1826037168502808, "learning_rate": 0.00016388488583869872, "loss": 1.1894, "step": 14525 }, { "epoch": 0.5593840230991338, "grad_norm": 1.136072039604187, "learning_rate": 0.000163861618613611, "loss": 1.2093, "step": 14530 }, { "epoch": 0.5595765158806545, "grad_norm": 1.0932581424713135, "learning_rate": 0.0001638383455488013, "loss": 1.26, "step": 14535 }, { "epoch": 0.5597690086621752, "grad_norm": 1.4892606735229492, "learning_rate": 0.00016381506664639784, "loss": 1.0244, "step": 14540 }, { "epoch": 0.5599615014436958, "grad_norm": 1.4259272813796997, "learning_rate": 0.0001637917819085292, "loss": 0.9896, "step": 14545 }, { "epoch": 0.5601539942252165, "grad_norm": 1.0615971088409424, "learning_rate": 0.00016376849133732473, "loss": 1.1619, "step": 14550 }, { "epoch": 0.5603464870067373, "grad_norm": 0.8815811276435852, "learning_rate": 0.00016374519493491413, "loss": 1.1123, "step": 14555 }, { "epoch": 0.560538979788258, "grad_norm": 1.2956461906433105, "learning_rate": 0.00016372189270342778, "loss": 1.1978, "step": 14560 }, { "epoch": 0.5607314725697786, "grad_norm": 1.8797427415847778, "learning_rate": 0.00016369858464499641, "loss": 1.4186, "step": 14565 }, { "epoch": 0.5609239653512993, "grad_norm": 1.6631108522415161, "learning_rate": 0.00016367527076175143, "loss": 0.9839, "step": 14570 }, { "epoch": 0.5611164581328201, "grad_norm": 1.8200160264968872, "learning_rate": 0.0001636519510558248, "loss": 1.1272, "step": 14575 }, { "epoch": 0.5613089509143407, "grad_norm": 1.884712815284729, "learning_rate": 0.00016362862552934886, "loss": 1.15, "step": 14580 }, { "epoch": 0.5615014436958614, "grad_norm": 0.7094476222991943, "learning_rate": 0.00016360529418445662, "loss": 0.9581, "step": 14585 }, { "epoch": 0.5616939364773821, "grad_norm": 0.9652591347694397, "learning_rate": 0.00016358195702328158, "loss": 1.0858, "step": 14590 }, { "epoch": 0.5618864292589028, "grad_norm": 1.3010308742523193, "learning_rate": 0.00016355861404795778, "loss": 1.2491, "step": 14595 }, { "epoch": 0.5620789220404235, "grad_norm": 1.459953784942627, "learning_rate": 0.00016353526526061973, "loss": 1.1194, "step": 14600 }, { "epoch": 0.5622714148219442, "grad_norm": 1.0818215608596802, "learning_rate": 0.0001635119106634026, "loss": 1.202, "step": 14605 }, { "epoch": 0.5624639076034649, "grad_norm": 1.0625619888305664, "learning_rate": 0.0001634885502584419, "loss": 1.3284, "step": 14610 }, { "epoch": 0.5626564003849855, "grad_norm": 1.5708478689193726, "learning_rate": 0.0001634651840478739, "loss": 1.036, "step": 14615 }, { "epoch": 0.5628488931665062, "grad_norm": 1.2847293615341187, "learning_rate": 0.00016344181203383523, "loss": 1.0858, "step": 14620 }, { "epoch": 0.563041385948027, "grad_norm": 1.082689881324768, "learning_rate": 0.00016341843421846313, "loss": 1.3457, "step": 14625 }, { "epoch": 0.5632338787295477, "grad_norm": 1.9000965356826782, "learning_rate": 0.0001633950506038953, "loss": 1.3901, "step": 14630 }, { "epoch": 0.5634263715110683, "grad_norm": 1.4664018154144287, "learning_rate": 0.0001633716611922701, "loss": 1.0836, "step": 14635 }, { "epoch": 0.563618864292589, "grad_norm": 1.6126337051391602, "learning_rate": 0.0001633482659857262, "loss": 1.0794, "step": 14640 }, { "epoch": 0.5638113570741097, "grad_norm": 1.865504503250122, "learning_rate": 0.00016332486498640307, "loss": 0.9427, "step": 14645 }, { "epoch": 0.5640038498556305, "grad_norm": 1.4346791505813599, "learning_rate": 0.0001633014581964405, "loss": 1.1952, "step": 14650 }, { "epoch": 0.5641963426371511, "grad_norm": 1.3558484315872192, "learning_rate": 0.00016327804561797895, "loss": 1.1679, "step": 14655 }, { "epoch": 0.5643888354186718, "grad_norm": 1.3297834396362305, "learning_rate": 0.00016325462725315926, "loss": 1.2225, "step": 14660 }, { "epoch": 0.5645813282001925, "grad_norm": 2.106694221496582, "learning_rate": 0.00016323120310412297, "loss": 1.072, "step": 14665 }, { "epoch": 0.5647738209817131, "grad_norm": 1.284629225730896, "learning_rate": 0.00016320777317301198, "loss": 1.0004, "step": 14670 }, { "epoch": 0.5649663137632339, "grad_norm": 1.4289201498031616, "learning_rate": 0.0001631843374619689, "loss": 1.1239, "step": 14675 }, { "epoch": 0.5651588065447546, "grad_norm": 1.9027820825576782, "learning_rate": 0.0001631608959731367, "loss": 1.2137, "step": 14680 }, { "epoch": 0.5653512993262753, "grad_norm": 1.878009557723999, "learning_rate": 0.00016313744870865895, "loss": 1.247, "step": 14685 }, { "epoch": 0.5655437921077959, "grad_norm": 1.4919451475143433, "learning_rate": 0.00016311399567067974, "loss": 1.3506, "step": 14690 }, { "epoch": 0.5657362848893166, "grad_norm": 2.0583205223083496, "learning_rate": 0.00016309053686134378, "loss": 1.1191, "step": 14695 }, { "epoch": 0.5659287776708374, "grad_norm": 1.1545616388320923, "learning_rate": 0.00016306707228279615, "loss": 1.2105, "step": 14700 }, { "epoch": 0.566121270452358, "grad_norm": 0.8714199662208557, "learning_rate": 0.0001630436019371825, "loss": 1.0834, "step": 14705 }, { "epoch": 0.5663137632338787, "grad_norm": 2.1866228580474854, "learning_rate": 0.0001630201258266491, "loss": 1.3334, "step": 14710 }, { "epoch": 0.5665062560153994, "grad_norm": 1.4117622375488281, "learning_rate": 0.00016299664395334266, "loss": 1.1353, "step": 14715 }, { "epoch": 0.5666987487969202, "grad_norm": 1.5454515218734741, "learning_rate": 0.00016297315631941045, "loss": 1.096, "step": 14720 }, { "epoch": 0.5668912415784408, "grad_norm": 1.1799986362457275, "learning_rate": 0.00016294966292700026, "loss": 1.214, "step": 14725 }, { "epoch": 0.5670837343599615, "grad_norm": 1.2906007766723633, "learning_rate": 0.00016292616377826038, "loss": 1.2613, "step": 14730 }, { "epoch": 0.5672762271414822, "grad_norm": 2.8731329441070557, "learning_rate": 0.00016290265887533968, "loss": 1.3257, "step": 14735 }, { "epoch": 0.5674687199230029, "grad_norm": 1.0078117847442627, "learning_rate": 0.0001628791482203875, "loss": 1.2053, "step": 14740 }, { "epoch": 0.5676612127045236, "grad_norm": 1.05767023563385, "learning_rate": 0.0001628556318155538, "loss": 0.9775, "step": 14745 }, { "epoch": 0.5678537054860443, "grad_norm": 2.118110418319702, "learning_rate": 0.0001628321096629889, "loss": 1.1801, "step": 14750 }, { "epoch": 0.568046198267565, "grad_norm": 1.1577699184417725, "learning_rate": 0.00016280858176484384, "loss": 1.2156, "step": 14755 }, { "epoch": 0.5682386910490856, "grad_norm": 1.5565030574798584, "learning_rate": 0.00016278504812327002, "loss": 1.0586, "step": 14760 }, { "epoch": 0.5684311838306063, "grad_norm": 1.5205986499786377, "learning_rate": 0.00016276150874041946, "loss": 1.3679, "step": 14765 }, { "epoch": 0.5686236766121271, "grad_norm": 0.9402291774749756, "learning_rate": 0.00016273796361844468, "loss": 1.0996, "step": 14770 }, { "epoch": 0.5688161693936478, "grad_norm": 1.3806294202804565, "learning_rate": 0.00016271441275949875, "loss": 1.1815, "step": 14775 }, { "epoch": 0.5690086621751684, "grad_norm": 2.0714609622955322, "learning_rate": 0.0001626908561657352, "loss": 1.1945, "step": 14780 }, { "epoch": 0.5692011549566891, "grad_norm": 0.9732249975204468, "learning_rate": 0.00016266729383930816, "loss": 1.0233, "step": 14785 }, { "epoch": 0.5693936477382098, "grad_norm": 1.3748955726623535, "learning_rate": 0.0001626437257823722, "loss": 1.25, "step": 14790 }, { "epoch": 0.5695861405197306, "grad_norm": 1.9781707525253296, "learning_rate": 0.00016262015199708252, "loss": 1.1745, "step": 14795 }, { "epoch": 0.5697786333012512, "grad_norm": 1.5062282085418701, "learning_rate": 0.00016259657248559475, "loss": 1.098, "step": 14800 }, { "epoch": 0.5699711260827719, "grad_norm": 1.7073885202407837, "learning_rate": 0.0001625729872500651, "loss": 1.1191, "step": 14805 }, { "epoch": 0.5701636188642926, "grad_norm": 2.0891575813293457, "learning_rate": 0.00016254939629265026, "loss": 1.2533, "step": 14810 }, { "epoch": 0.5703561116458132, "grad_norm": 1.6380434036254883, "learning_rate": 0.0001625257996155075, "loss": 1.2756, "step": 14815 }, { "epoch": 0.570548604427334, "grad_norm": 1.1182715892791748, "learning_rate": 0.00016250219722079452, "loss": 1.3084, "step": 14820 }, { "epoch": 0.5707410972088547, "grad_norm": 1.2113651037216187, "learning_rate": 0.0001624785891106697, "loss": 1.287, "step": 14825 }, { "epoch": 0.5709335899903754, "grad_norm": 1.1726208925247192, "learning_rate": 0.00016245497528729174, "loss": 1.1491, "step": 14830 }, { "epoch": 0.571126082771896, "grad_norm": 1.0203557014465332, "learning_rate": 0.00016243135575282004, "loss": 1.0809, "step": 14835 }, { "epoch": 0.5713185755534167, "grad_norm": 1.2878923416137695, "learning_rate": 0.00016240773050941443, "loss": 1.1848, "step": 14840 }, { "epoch": 0.5715110683349375, "grad_norm": 1.5805665254592896, "learning_rate": 0.00016238409955923527, "loss": 1.1191, "step": 14845 }, { "epoch": 0.5717035611164581, "grad_norm": 1.089296579360962, "learning_rate": 0.00016236046290444347, "loss": 1.066, "step": 14850 }, { "epoch": 0.5718960538979788, "grad_norm": 1.1492708921432495, "learning_rate": 0.0001623368205472004, "loss": 1.174, "step": 14855 }, { "epoch": 0.5720885466794995, "grad_norm": 1.9744573831558228, "learning_rate": 0.00016231317248966809, "loss": 1.248, "step": 14860 }, { "epoch": 0.5722810394610202, "grad_norm": 2.2061898708343506, "learning_rate": 0.0001622895187340089, "loss": 1.2028, "step": 14865 }, { "epoch": 0.5724735322425409, "grad_norm": 1.0993640422821045, "learning_rate": 0.0001622658592823859, "loss": 1.059, "step": 14870 }, { "epoch": 0.5726660250240616, "grad_norm": 1.5680936574935913, "learning_rate": 0.00016224219413696252, "loss": 1.4181, "step": 14875 }, { "epoch": 0.5728585178055823, "grad_norm": 1.3295773267745972, "learning_rate": 0.00016221852329990276, "loss": 1.214, "step": 14880 }, { "epoch": 0.573051010587103, "grad_norm": 1.2004729509353638, "learning_rate": 0.00016219484677337126, "loss": 1.1474, "step": 14885 }, { "epoch": 0.5732435033686237, "grad_norm": 1.3868520259857178, "learning_rate": 0.000162171164559533, "loss": 1.2373, "step": 14890 }, { "epoch": 0.5734359961501444, "grad_norm": 1.2218377590179443, "learning_rate": 0.00016214747666055358, "loss": 1.1009, "step": 14895 }, { "epoch": 0.5736284889316651, "grad_norm": 1.1113415956497192, "learning_rate": 0.00016212378307859914, "loss": 1.2191, "step": 14900 }, { "epoch": 0.5738209817131857, "grad_norm": 1.099223256111145, "learning_rate": 0.00016210008381583623, "loss": 1.2024, "step": 14905 }, { "epoch": 0.5740134744947064, "grad_norm": 1.3597705364227295, "learning_rate": 0.00016207637887443208, "loss": 1.1785, "step": 14910 }, { "epoch": 0.5742059672762272, "grad_norm": 1.675276279449463, "learning_rate": 0.00016205266825655427, "loss": 1.1492, "step": 14915 }, { "epoch": 0.5743984600577479, "grad_norm": 1.5977553129196167, "learning_rate": 0.000162028951964371, "loss": 1.1355, "step": 14920 }, { "epoch": 0.5745909528392685, "grad_norm": 2.0862395763397217, "learning_rate": 0.000162005230000051, "loss": 1.1747, "step": 14925 }, { "epoch": 0.5747834456207892, "grad_norm": 0.8812354803085327, "learning_rate": 0.00016198150236576347, "loss": 1.2876, "step": 14930 }, { "epoch": 0.5749759384023099, "grad_norm": 1.3878661394119263, "learning_rate": 0.0001619577690636781, "loss": 1.2269, "step": 14935 }, { "epoch": 0.5751684311838307, "grad_norm": 1.0739976167678833, "learning_rate": 0.0001619340300959652, "loss": 1.2467, "step": 14940 }, { "epoch": 0.5753609239653513, "grad_norm": 0.766392707824707, "learning_rate": 0.0001619102854647955, "loss": 1.0829, "step": 14945 }, { "epoch": 0.575553416746872, "grad_norm": 1.2837680578231812, "learning_rate": 0.00016188653517234036, "loss": 1.2027, "step": 14950 }, { "epoch": 0.5757459095283927, "grad_norm": 2.2257256507873535, "learning_rate": 0.00016186277922077152, "loss": 1.1181, "step": 14955 }, { "epoch": 0.5759384023099133, "grad_norm": 1.257380723953247, "learning_rate": 0.00016183901761226133, "loss": 1.1899, "step": 14960 }, { "epoch": 0.5761308950914341, "grad_norm": 1.1324365139007568, "learning_rate": 0.00016181525034898261, "loss": 1.0823, "step": 14965 }, { "epoch": 0.5763233878729548, "grad_norm": 0.9696788787841797, "learning_rate": 0.00016179147743310872, "loss": 1.176, "step": 14970 }, { "epoch": 0.5765158806544755, "grad_norm": 1.8557454347610474, "learning_rate": 0.00016176769886681357, "loss": 1.1396, "step": 14975 }, { "epoch": 0.5767083734359961, "grad_norm": 1.2395600080490112, "learning_rate": 0.00016174391465227154, "loss": 1.0799, "step": 14980 }, { "epoch": 0.5769008662175168, "grad_norm": 1.8957431316375732, "learning_rate": 0.00016172012479165752, "loss": 1.146, "step": 14985 }, { "epoch": 0.5770933589990376, "grad_norm": 1.191486120223999, "learning_rate": 0.00016169632928714697, "loss": 1.0166, "step": 14990 }, { "epoch": 0.5772858517805582, "grad_norm": 1.7964496612548828, "learning_rate": 0.0001616725281409158, "loss": 1.2131, "step": 14995 }, { "epoch": 0.5774783445620789, "grad_norm": 1.4722768068313599, "learning_rate": 0.00016164872135514044, "loss": 1.0148, "step": 15000 }, { "epoch": 0.5776708373435996, "grad_norm": 1.265663981437683, "learning_rate": 0.00016162490893199791, "loss": 1.1166, "step": 15005 }, { "epoch": 0.5778633301251203, "grad_norm": 1.2796491384506226, "learning_rate": 0.0001616010908736657, "loss": 1.2911, "step": 15010 }, { "epoch": 0.578055822906641, "grad_norm": 1.025158166885376, "learning_rate": 0.00016157726718232177, "loss": 1.0723, "step": 15015 }, { "epoch": 0.5782483156881617, "grad_norm": 1.5206444263458252, "learning_rate": 0.00016155343786014472, "loss": 1.0406, "step": 15020 }, { "epoch": 0.5784408084696824, "grad_norm": 1.5212637186050415, "learning_rate": 0.0001615296029093135, "loss": 1.0445, "step": 15025 }, { "epoch": 0.578633301251203, "grad_norm": 1.3746932744979858, "learning_rate": 0.0001615057623320077, "loss": 1.1385, "step": 15030 }, { "epoch": 0.5788257940327238, "grad_norm": 1.8660439252853394, "learning_rate": 0.00016148191613040734, "loss": 1.0786, "step": 15035 }, { "epoch": 0.5790182868142445, "grad_norm": 2.343719720840454, "learning_rate": 0.0001614580643066931, "loss": 1.276, "step": 15040 }, { "epoch": 0.5792107795957652, "grad_norm": 1.1358321905136108, "learning_rate": 0.00016143420686304594, "loss": 1.3055, "step": 15045 }, { "epoch": 0.5794032723772858, "grad_norm": 1.6678638458251953, "learning_rate": 0.00016141034380164754, "loss": 1.0694, "step": 15050 }, { "epoch": 0.5795957651588065, "grad_norm": 1.6096512079238892, "learning_rate": 0.00016138647512468004, "loss": 1.4079, "step": 15055 }, { "epoch": 0.5797882579403273, "grad_norm": 2.3922042846679688, "learning_rate": 0.000161362600834326, "loss": 1.2739, "step": 15060 }, { "epoch": 0.579980750721848, "grad_norm": 1.167476773262024, "learning_rate": 0.0001613387209327686, "loss": 1.2336, "step": 15065 }, { "epoch": 0.5801732435033686, "grad_norm": 0.9550272226333618, "learning_rate": 0.00016131483542219152, "loss": 1.1557, "step": 15070 }, { "epoch": 0.5803657362848893, "grad_norm": 1.1105631589889526, "learning_rate": 0.00016129094430477893, "loss": 1.0289, "step": 15075 }, { "epoch": 0.58055822906641, "grad_norm": 1.3411318063735962, "learning_rate": 0.00016126704758271548, "loss": 1.2454, "step": 15080 }, { "epoch": 0.5807507218479308, "grad_norm": 1.2867335081100464, "learning_rate": 0.00016124314525818635, "loss": 1.2983, "step": 15085 }, { "epoch": 0.5809432146294514, "grad_norm": 1.6035441160202026, "learning_rate": 0.00016121923733337736, "loss": 1.2227, "step": 15090 }, { "epoch": 0.5811357074109721, "grad_norm": 1.6657713651657104, "learning_rate": 0.0001611953238104746, "loss": 1.1144, "step": 15095 }, { "epoch": 0.5813282001924928, "grad_norm": 1.8781518936157227, "learning_rate": 0.00016117140469166486, "loss": 1.1393, "step": 15100 }, { "epoch": 0.5815206929740134, "grad_norm": 1.542438268661499, "learning_rate": 0.00016114747997913542, "loss": 1.188, "step": 15105 }, { "epoch": 0.5817131857555342, "grad_norm": 2.148175001144409, "learning_rate": 0.00016112354967507398, "loss": 1.1323, "step": 15110 }, { "epoch": 0.5819056785370549, "grad_norm": 1.3092713356018066, "learning_rate": 0.0001610996137816688, "loss": 1.1799, "step": 15115 }, { "epoch": 0.5820981713185756, "grad_norm": 1.4203580617904663, "learning_rate": 0.00016107567230110874, "loss": 1.0916, "step": 15120 }, { "epoch": 0.5822906641000962, "grad_norm": 1.2932054996490479, "learning_rate": 0.00016105172523558301, "loss": 1.13, "step": 15125 }, { "epoch": 0.5824831568816169, "grad_norm": 2.218705654144287, "learning_rate": 0.00016102777258728142, "loss": 1.153, "step": 15130 }, { "epoch": 0.5826756496631377, "grad_norm": 1.180166244506836, "learning_rate": 0.00016100381435839433, "loss": 1.2611, "step": 15135 }, { "epoch": 0.5828681424446583, "grad_norm": 2.007887125015259, "learning_rate": 0.00016097985055111256, "loss": 1.2046, "step": 15140 }, { "epoch": 0.583060635226179, "grad_norm": 1.20327889919281, "learning_rate": 0.00016095588116762734, "loss": 1.3217, "step": 15145 }, { "epoch": 0.5832531280076997, "grad_norm": 1.2758903503417969, "learning_rate": 0.00016093190621013063, "loss": 1.1277, "step": 15150 }, { "epoch": 0.5834456207892204, "grad_norm": 1.851881980895996, "learning_rate": 0.00016090792568081473, "loss": 1.1701, "step": 15155 }, { "epoch": 0.5836381135707411, "grad_norm": 1.6895406246185303, "learning_rate": 0.00016088393958187247, "loss": 1.3331, "step": 15160 }, { "epoch": 0.5838306063522618, "grad_norm": 1.4138762950897217, "learning_rate": 0.0001608599479154973, "loss": 1.3016, "step": 15165 }, { "epoch": 0.5840230991337825, "grad_norm": 1.3571628332138062, "learning_rate": 0.00016083595068388303, "loss": 1.1407, "step": 15170 }, { "epoch": 0.5842155919153031, "grad_norm": 1.3217098712921143, "learning_rate": 0.00016081194788922405, "loss": 1.0032, "step": 15175 }, { "epoch": 0.5844080846968238, "grad_norm": 0.9765079617500305, "learning_rate": 0.00016078793953371533, "loss": 1.1543, "step": 15180 }, { "epoch": 0.5846005774783446, "grad_norm": 1.0757596492767334, "learning_rate": 0.0001607639256195522, "loss": 1.0828, "step": 15185 }, { "epoch": 0.5847930702598653, "grad_norm": 1.2296372652053833, "learning_rate": 0.00016073990614893057, "loss": 1.2089, "step": 15190 }, { "epoch": 0.5849855630413859, "grad_norm": 1.8743308782577515, "learning_rate": 0.00016071588112404693, "loss": 1.2195, "step": 15195 }, { "epoch": 0.5851780558229066, "grad_norm": 1.347332239151001, "learning_rate": 0.00016069185054709814, "loss": 1.1664, "step": 15200 }, { "epoch": 0.5853705486044274, "grad_norm": 1.629981279373169, "learning_rate": 0.00016066781442028165, "loss": 1.1888, "step": 15205 }, { "epoch": 0.5855630413859481, "grad_norm": 1.353702425956726, "learning_rate": 0.00016064377274579544, "loss": 1.265, "step": 15210 }, { "epoch": 0.5857555341674687, "grad_norm": 0.9861169457435608, "learning_rate": 0.00016061972552583795, "loss": 1.0908, "step": 15215 }, { "epoch": 0.5859480269489894, "grad_norm": 1.1305365562438965, "learning_rate": 0.00016059567276260813, "loss": 1.0076, "step": 15220 }, { "epoch": 0.5861405197305101, "grad_norm": 1.4098013639450073, "learning_rate": 0.00016057161445830542, "loss": 1.1882, "step": 15225 }, { "epoch": 0.5863330125120308, "grad_norm": 1.1900111436843872, "learning_rate": 0.00016054755061512986, "loss": 1.1961, "step": 15230 }, { "epoch": 0.5865255052935515, "grad_norm": 1.0856738090515137, "learning_rate": 0.00016052348123528183, "loss": 1.2169, "step": 15235 }, { "epoch": 0.5867179980750722, "grad_norm": 1.109937071800232, "learning_rate": 0.0001604994063209624, "loss": 1.0818, "step": 15240 }, { "epoch": 0.5869104908565929, "grad_norm": 1.9059746265411377, "learning_rate": 0.00016047532587437304, "loss": 1.3035, "step": 15245 }, { "epoch": 0.5871029836381135, "grad_norm": 1.089796781539917, "learning_rate": 0.00016045123989771575, "loss": 1.0872, "step": 15250 }, { "epoch": 0.5872954764196343, "grad_norm": 1.3014196157455444, "learning_rate": 0.00016042714839319298, "loss": 1.1809, "step": 15255 }, { "epoch": 0.587487969201155, "grad_norm": 1.5097154378890991, "learning_rate": 0.00016040305136300783, "loss": 1.1026, "step": 15260 }, { "epoch": 0.5876804619826757, "grad_norm": 1.9508148431777954, "learning_rate": 0.00016037894880936376, "loss": 1.0489, "step": 15265 }, { "epoch": 0.5878729547641963, "grad_norm": 1.2007025480270386, "learning_rate": 0.0001603548407344648, "loss": 1.2376, "step": 15270 }, { "epoch": 0.588065447545717, "grad_norm": 4.035842418670654, "learning_rate": 0.00016033072714051545, "loss": 1.2894, "step": 15275 }, { "epoch": 0.5882579403272378, "grad_norm": 1.2279680967330933, "learning_rate": 0.00016030660802972074, "loss": 1.1945, "step": 15280 }, { "epoch": 0.5884504331087584, "grad_norm": 1.0882714986801147, "learning_rate": 0.00016028248340428625, "loss": 1.0842, "step": 15285 }, { "epoch": 0.5886429258902791, "grad_norm": 1.3169769048690796, "learning_rate": 0.00016025835326641797, "loss": 1.0085, "step": 15290 }, { "epoch": 0.5888354186717998, "grad_norm": 1.3032643795013428, "learning_rate": 0.00016023421761832246, "loss": 1.1994, "step": 15295 }, { "epoch": 0.5890279114533205, "grad_norm": 1.053415060043335, "learning_rate": 0.00016021007646220678, "loss": 1.0983, "step": 15300 }, { "epoch": 0.5892204042348412, "grad_norm": 1.483736515045166, "learning_rate": 0.00016018592980027846, "loss": 1.1709, "step": 15305 }, { "epoch": 0.5894128970163619, "grad_norm": 1.4688469171524048, "learning_rate": 0.00016016177763474555, "loss": 1.0505, "step": 15310 }, { "epoch": 0.5896053897978826, "grad_norm": 1.7809165716171265, "learning_rate": 0.00016013761996781661, "loss": 1.1585, "step": 15315 }, { "epoch": 0.5897978825794032, "grad_norm": 1.5344901084899902, "learning_rate": 0.00016011345680170072, "loss": 1.1269, "step": 15320 }, { "epoch": 0.5899903753609239, "grad_norm": 1.298094630241394, "learning_rate": 0.0001600892881386074, "loss": 1.1804, "step": 15325 }, { "epoch": 0.5901828681424447, "grad_norm": 1.8283668756484985, "learning_rate": 0.0001600651139807467, "loss": 1.2059, "step": 15330 }, { "epoch": 0.5903753609239654, "grad_norm": 1.3290801048278809, "learning_rate": 0.00016004093433032924, "loss": 1.2334, "step": 15335 }, { "epoch": 0.590567853705486, "grad_norm": 1.461422324180603, "learning_rate": 0.00016001674918956612, "loss": 1.2987, "step": 15340 }, { "epoch": 0.5907603464870067, "grad_norm": 1.6681803464889526, "learning_rate": 0.00015999255856066885, "loss": 1.0221, "step": 15345 }, { "epoch": 0.5909528392685275, "grad_norm": 1.1714918613433838, "learning_rate": 0.00015996836244584948, "loss": 1.0144, "step": 15350 }, { "epoch": 0.5911453320500482, "grad_norm": 0.9316911697387695, "learning_rate": 0.00015994416084732062, "loss": 1.3241, "step": 15355 }, { "epoch": 0.5913378248315688, "grad_norm": 2.429568290710449, "learning_rate": 0.00015991995376729535, "loss": 1.3155, "step": 15360 }, { "epoch": 0.5915303176130895, "grad_norm": 1.3793234825134277, "learning_rate": 0.00015989574120798725, "loss": 1.2822, "step": 15365 }, { "epoch": 0.5917228103946102, "grad_norm": 1.1756724119186401, "learning_rate": 0.0001598715231716104, "loss": 1.0682, "step": 15370 }, { "epoch": 0.591915303176131, "grad_norm": 1.9872701168060303, "learning_rate": 0.00015984729966037934, "loss": 1.2034, "step": 15375 }, { "epoch": 0.5921077959576516, "grad_norm": 1.5333032608032227, "learning_rate": 0.00015982307067650918, "loss": 1.3922, "step": 15380 }, { "epoch": 0.5923002887391723, "grad_norm": 1.1813582181930542, "learning_rate": 0.00015979883622221555, "loss": 1.1811, "step": 15385 }, { "epoch": 0.592492781520693, "grad_norm": 1.632565975189209, "learning_rate": 0.00015977459629971442, "loss": 1.0877, "step": 15390 }, { "epoch": 0.5926852743022136, "grad_norm": 1.0945332050323486, "learning_rate": 0.00015975035091122245, "loss": 1.0836, "step": 15395 }, { "epoch": 0.5928777670837344, "grad_norm": 0.8069517016410828, "learning_rate": 0.0001597261000589567, "loss": 1.1574, "step": 15400 }, { "epoch": 0.5930702598652551, "grad_norm": 1.8364413976669312, "learning_rate": 0.00015970184374513476, "loss": 1.1935, "step": 15405 }, { "epoch": 0.5932627526467757, "grad_norm": 1.5146484375, "learning_rate": 0.00015967758197197468, "loss": 1.06, "step": 15410 }, { "epoch": 0.5934552454282964, "grad_norm": 1.5792328119277954, "learning_rate": 0.00015965331474169508, "loss": 1.1464, "step": 15415 }, { "epoch": 0.5936477382098171, "grad_norm": 1.887292742729187, "learning_rate": 0.00015962904205651495, "loss": 1.2039, "step": 15420 }, { "epoch": 0.5938402309913379, "grad_norm": 1.8241037130355835, "learning_rate": 0.000159604763918654, "loss": 1.2011, "step": 15425 }, { "epoch": 0.5940327237728585, "grad_norm": 1.2130569219589233, "learning_rate": 0.0001595804803303322, "loss": 1.2401, "step": 15430 }, { "epoch": 0.5942252165543792, "grad_norm": 1.1083897352218628, "learning_rate": 0.00015955619129377017, "loss": 1.2919, "step": 15435 }, { "epoch": 0.5944177093358999, "grad_norm": 1.8266736268997192, "learning_rate": 0.00015953189681118895, "loss": 1.1609, "step": 15440 }, { "epoch": 0.5946102021174205, "grad_norm": 1.5710999965667725, "learning_rate": 0.0001595075968848102, "loss": 1.2178, "step": 15445 }, { "epoch": 0.5948026948989413, "grad_norm": 2.023061752319336, "learning_rate": 0.00015948329151685583, "loss": 1.2577, "step": 15450 }, { "epoch": 0.594995187680462, "grad_norm": 1.3245149850845337, "learning_rate": 0.00015945898070954853, "loss": 0.9832, "step": 15455 }, { "epoch": 0.5951876804619827, "grad_norm": 1.7696577310562134, "learning_rate": 0.00015943466446511132, "loss": 1.1991, "step": 15460 }, { "epoch": 0.5953801732435033, "grad_norm": 1.0893733501434326, "learning_rate": 0.00015941034278576775, "loss": 1.3321, "step": 15465 }, { "epoch": 0.595572666025024, "grad_norm": 1.294731616973877, "learning_rate": 0.0001593860156737419, "loss": 1.0485, "step": 15470 }, { "epoch": 0.5957651588065448, "grad_norm": 1.1282588243484497, "learning_rate": 0.00015936168313125833, "loss": 1.0585, "step": 15475 }, { "epoch": 0.5959576515880655, "grad_norm": 0.9207860231399536, "learning_rate": 0.00015933734516054203, "loss": 1.1343, "step": 15480 }, { "epoch": 0.5961501443695861, "grad_norm": 2.2860140800476074, "learning_rate": 0.00015931300176381865, "loss": 1.3317, "step": 15485 }, { "epoch": 0.5963426371511068, "grad_norm": 1.2698768377304077, "learning_rate": 0.00015928865294331413, "loss": 1.175, "step": 15490 }, { "epoch": 0.5965351299326275, "grad_norm": 1.0986465215682983, "learning_rate": 0.00015926429870125505, "loss": 1.1309, "step": 15495 }, { "epoch": 0.5967276227141483, "grad_norm": 1.5664902925491333, "learning_rate": 0.00015923993903986844, "loss": 1.0117, "step": 15500 }, { "epoch": 0.5969201154956689, "grad_norm": 1.3162322044372559, "learning_rate": 0.00015921557396138188, "loss": 1.1964, "step": 15505 }, { "epoch": 0.5971126082771896, "grad_norm": 0.8635309934616089, "learning_rate": 0.0001591912034680233, "loss": 1.1119, "step": 15510 }, { "epoch": 0.5973051010587103, "grad_norm": 1.3118690252304077, "learning_rate": 0.00015916682756202127, "loss": 1.0618, "step": 15515 }, { "epoch": 0.597497593840231, "grad_norm": 1.0313913822174072, "learning_rate": 0.00015914244624560481, "loss": 1.0686, "step": 15520 }, { "epoch": 0.5976900866217517, "grad_norm": 1.3414394855499268, "learning_rate": 0.00015911805952100347, "loss": 1.2013, "step": 15525 }, { "epoch": 0.5978825794032724, "grad_norm": 1.2710504531860352, "learning_rate": 0.00015909366739044715, "loss": 1.3748, "step": 15530 }, { "epoch": 0.598075072184793, "grad_norm": 1.6694974899291992, "learning_rate": 0.0001590692698561664, "loss": 0.9833, "step": 15535 }, { "epoch": 0.5982675649663137, "grad_norm": 1.5924476385116577, "learning_rate": 0.00015904486692039227, "loss": 1.2046, "step": 15540 }, { "epoch": 0.5984600577478345, "grad_norm": 2.3105616569519043, "learning_rate": 0.00015902045858535616, "loss": 1.233, "step": 15545 }, { "epoch": 0.5986525505293552, "grad_norm": 1.3003478050231934, "learning_rate": 0.00015899604485329012, "loss": 1.1891, "step": 15550 }, { "epoch": 0.5988450433108758, "grad_norm": 1.2988343238830566, "learning_rate": 0.00015897162572642656, "loss": 1.0767, "step": 15555 }, { "epoch": 0.5990375360923965, "grad_norm": 1.0845260620117188, "learning_rate": 0.00015894720120699849, "loss": 1.2702, "step": 15560 }, { "epoch": 0.5992300288739172, "grad_norm": 1.0050013065338135, "learning_rate": 0.00015892277129723935, "loss": 1.2267, "step": 15565 }, { "epoch": 0.599422521655438, "grad_norm": 1.3145102262496948, "learning_rate": 0.0001588983359993831, "loss": 1.1086, "step": 15570 }, { "epoch": 0.5996150144369586, "grad_norm": 1.817396640777588, "learning_rate": 0.00015887389531566424, "loss": 1.0999, "step": 15575 }, { "epoch": 0.5998075072184793, "grad_norm": 1.4001067876815796, "learning_rate": 0.0001588494492483176, "loss": 1.2802, "step": 15580 }, { "epoch": 0.6, "grad_norm": 2.1305971145629883, "learning_rate": 0.00015882499779957868, "loss": 1.2481, "step": 15585 }, { "epoch": 0.6001924927815206, "grad_norm": 1.5675426721572876, "learning_rate": 0.00015880054097168337, "loss": 1.2555, "step": 15590 }, { "epoch": 0.6003849855630414, "grad_norm": 1.3107160329818726, "learning_rate": 0.00015877607876686815, "loss": 1.273, "step": 15595 }, { "epoch": 0.6005774783445621, "grad_norm": 0.5348256230354309, "learning_rate": 0.00015875161118736986, "loss": 0.9708, "step": 15600 }, { "epoch": 0.6007699711260828, "grad_norm": 1.0877107381820679, "learning_rate": 0.00015872713823542593, "loss": 1.1419, "step": 15605 }, { "epoch": 0.6009624639076034, "grad_norm": 1.0563950538635254, "learning_rate": 0.00015870265991327424, "loss": 1.0216, "step": 15610 }, { "epoch": 0.6011549566891241, "grad_norm": 1.0346797704696655, "learning_rate": 0.00015867817622315316, "loss": 1.205, "step": 15615 }, { "epoch": 0.6013474494706449, "grad_norm": 1.67006254196167, "learning_rate": 0.00015865368716730158, "loss": 1.2875, "step": 15620 }, { "epoch": 0.6015399422521656, "grad_norm": 1.8183788061141968, "learning_rate": 0.00015862919274795884, "loss": 1.1703, "step": 15625 }, { "epoch": 0.6017324350336862, "grad_norm": 1.1460903882980347, "learning_rate": 0.00015860469296736482, "loss": 1.1998, "step": 15630 }, { "epoch": 0.6019249278152069, "grad_norm": 1.5365129709243774, "learning_rate": 0.00015858018782775985, "loss": 1.054, "step": 15635 }, { "epoch": 0.6021174205967276, "grad_norm": 1.4886486530303955, "learning_rate": 0.00015855567733138478, "loss": 1.2914, "step": 15640 }, { "epoch": 0.6023099133782484, "grad_norm": 1.519114375114441, "learning_rate": 0.00015853116148048087, "loss": 1.0586, "step": 15645 }, { "epoch": 0.602502406159769, "grad_norm": 1.2735627889633179, "learning_rate": 0.00015850664027729, "loss": 1.1287, "step": 15650 }, { "epoch": 0.6026948989412897, "grad_norm": 2.464672327041626, "learning_rate": 0.00015848211372405444, "loss": 1.0616, "step": 15655 }, { "epoch": 0.6028873917228104, "grad_norm": 0.9507278800010681, "learning_rate": 0.000158457581823017, "loss": 1.0118, "step": 15660 }, { "epoch": 0.6030798845043311, "grad_norm": 1.155150294303894, "learning_rate": 0.00015843304457642093, "loss": 1.0563, "step": 15665 }, { "epoch": 0.6032723772858518, "grad_norm": 2.669029474258423, "learning_rate": 0.00015840850198651002, "loss": 1.1918, "step": 15670 }, { "epoch": 0.6034648700673725, "grad_norm": 1.4008570909500122, "learning_rate": 0.00015838395405552854, "loss": 1.2122, "step": 15675 }, { "epoch": 0.6036573628488932, "grad_norm": 1.4199731349945068, "learning_rate": 0.0001583594007857212, "loss": 1.2546, "step": 15680 }, { "epoch": 0.6038498556304138, "grad_norm": 2.2346031665802, "learning_rate": 0.0001583348421793333, "loss": 1.1184, "step": 15685 }, { "epoch": 0.6040423484119346, "grad_norm": 1.1559759378433228, "learning_rate": 0.00015831027823861048, "loss": 1.157, "step": 15690 }, { "epoch": 0.6042348411934553, "grad_norm": 1.9930438995361328, "learning_rate": 0.00015828570896579897, "loss": 1.1095, "step": 15695 }, { "epoch": 0.6044273339749759, "grad_norm": 1.040358304977417, "learning_rate": 0.00015826113436314548, "loss": 1.062, "step": 15700 }, { "epoch": 0.6046198267564966, "grad_norm": 0.8409137725830078, "learning_rate": 0.00015823655443289724, "loss": 1.0204, "step": 15705 }, { "epoch": 0.6048123195380173, "grad_norm": 1.477950930595398, "learning_rate": 0.00015821196917730184, "loss": 1.2479, "step": 15710 }, { "epoch": 0.6050048123195381, "grad_norm": 1.5752694606781006, "learning_rate": 0.00015818737859860752, "loss": 1.343, "step": 15715 }, { "epoch": 0.6051973051010587, "grad_norm": 1.505356788635254, "learning_rate": 0.00015816278269906284, "loss": 1.0742, "step": 15720 }, { "epoch": 0.6053897978825794, "grad_norm": 1.165273904800415, "learning_rate": 0.000158138181480917, "loss": 1.1023, "step": 15725 }, { "epoch": 0.6055822906641001, "grad_norm": 1.7088487148284912, "learning_rate": 0.00015811357494641958, "loss": 1.2899, "step": 15730 }, { "epoch": 0.6057747834456207, "grad_norm": 1.6200921535491943, "learning_rate": 0.0001580889630978207, "loss": 1.0353, "step": 15735 }, { "epoch": 0.6059672762271415, "grad_norm": 1.1059575080871582, "learning_rate": 0.00015806434593737095, "loss": 0.8117, "step": 15740 }, { "epoch": 0.6061597690086622, "grad_norm": 1.3026262521743774, "learning_rate": 0.00015803972346732143, "loss": 1.1648, "step": 15745 }, { "epoch": 0.6063522617901829, "grad_norm": 1.316931128501892, "learning_rate": 0.00015801509568992366, "loss": 1.0999, "step": 15750 }, { "epoch": 0.6065447545717035, "grad_norm": 0.9396672248840332, "learning_rate": 0.00015799046260742968, "loss": 1.0374, "step": 15755 }, { "epoch": 0.6067372473532242, "grad_norm": 1.1851413249969482, "learning_rate": 0.00015796582422209206, "loss": 1.1861, "step": 15760 }, { "epoch": 0.606929740134745, "grad_norm": 2.0202128887176514, "learning_rate": 0.00015794118053616383, "loss": 1.139, "step": 15765 }, { "epoch": 0.6071222329162657, "grad_norm": 1.642561912536621, "learning_rate": 0.00015791653155189841, "loss": 1.1811, "step": 15770 }, { "epoch": 0.6073147256977863, "grad_norm": 1.4148608446121216, "learning_rate": 0.0001578918772715499, "loss": 1.3261, "step": 15775 }, { "epoch": 0.607507218479307, "grad_norm": 1.160662293434143, "learning_rate": 0.0001578672176973727, "loss": 1.2117, "step": 15780 }, { "epoch": 0.6076997112608277, "grad_norm": 1.4699779748916626, "learning_rate": 0.00015784255283162176, "loss": 1.1937, "step": 15785 }, { "epoch": 0.6078922040423484, "grad_norm": 1.579142451286316, "learning_rate": 0.00015781788267655252, "loss": 1.2722, "step": 15790 }, { "epoch": 0.6080846968238691, "grad_norm": 1.3598978519439697, "learning_rate": 0.00015779320723442096, "loss": 1.0829, "step": 15795 }, { "epoch": 0.6082771896053898, "grad_norm": 1.1840283870697021, "learning_rate": 0.0001577734630755471, "loss": 1.3599, "step": 15800 }, { "epoch": 0.6084696823869105, "grad_norm": 1.1004847288131714, "learning_rate": 0.00015774877812238972, "loss": 1.3756, "step": 15805 }, { "epoch": 0.6086621751684311, "grad_norm": 1.6455458402633667, "learning_rate": 0.00015772408788848914, "loss": 1.1067, "step": 15810 }, { "epoch": 0.6088546679499519, "grad_norm": 1.6274205446243286, "learning_rate": 0.00015769939237610312, "loss": 1.3339, "step": 15815 }, { "epoch": 0.6090471607314726, "grad_norm": 1.2150076627731323, "learning_rate": 0.00015767469158748987, "loss": 0.9821, "step": 15820 }, { "epoch": 0.6092396535129933, "grad_norm": 1.2452518939971924, "learning_rate": 0.00015764998552490815, "loss": 0.9994, "step": 15825 }, { "epoch": 0.6094321462945139, "grad_norm": 1.4766079187393188, "learning_rate": 0.00015762527419061715, "loss": 1.0281, "step": 15830 }, { "epoch": 0.6096246390760347, "grad_norm": 1.1288725137710571, "learning_rate": 0.00015760055758687655, "loss": 1.082, "step": 15835 }, { "epoch": 0.6098171318575554, "grad_norm": 1.181159496307373, "learning_rate": 0.00015757583571594653, "loss": 1.1515, "step": 15840 }, { "epoch": 0.610009624639076, "grad_norm": 1.2939519882202148, "learning_rate": 0.00015755110858008773, "loss": 0.9892, "step": 15845 }, { "epoch": 0.6102021174205967, "grad_norm": 2.3088269233703613, "learning_rate": 0.0001575263761815613, "loss": 0.9798, "step": 15850 }, { "epoch": 0.6103946102021174, "grad_norm": 1.4175939559936523, "learning_rate": 0.00015750163852262886, "loss": 1.1408, "step": 15855 }, { "epoch": 0.6105871029836382, "grad_norm": 1.0206336975097656, "learning_rate": 0.00015747689560555248, "loss": 1.2078, "step": 15860 }, { "epoch": 0.6107795957651588, "grad_norm": 0.9995696544647217, "learning_rate": 0.0001574521474325948, "loss": 1.1922, "step": 15865 }, { "epoch": 0.6109720885466795, "grad_norm": 1.6652652025222778, "learning_rate": 0.00015742739400601872, "loss": 1.1039, "step": 15870 }, { "epoch": 0.6111645813282002, "grad_norm": 1.3411548137664795, "learning_rate": 0.00015740263532808792, "loss": 1.1592, "step": 15875 }, { "epoch": 0.6113570741097208, "grad_norm": 0.9215561151504517, "learning_rate": 0.0001573778714010664, "loss": 1.1379, "step": 15880 }, { "epoch": 0.6115495668912416, "grad_norm": 1.269482970237732, "learning_rate": 0.00015735310222721863, "loss": 1.2042, "step": 15885 }, { "epoch": 0.6117420596727623, "grad_norm": 1.316909909248352, "learning_rate": 0.00015732832780880957, "loss": 1.2702, "step": 15890 }, { "epoch": 0.611934552454283, "grad_norm": 1.2689425945281982, "learning_rate": 0.0001573035481481047, "loss": 1.0755, "step": 15895 }, { "epoch": 0.6121270452358036, "grad_norm": 1.0369685888290405, "learning_rate": 0.00015727876324736996, "loss": 1.0574, "step": 15900 }, { "epoch": 0.6123195380173243, "grad_norm": 1.0056127309799194, "learning_rate": 0.00015725397310887174, "loss": 1.2219, "step": 15905 }, { "epoch": 0.6125120307988451, "grad_norm": 1.3123587369918823, "learning_rate": 0.00015722917773487702, "loss": 1.2203, "step": 15910 }, { "epoch": 0.6127045235803658, "grad_norm": 1.0959875583648682, "learning_rate": 0.00015720437712765306, "loss": 1.2516, "step": 15915 }, { "epoch": 0.6128970163618864, "grad_norm": 2.0152196884155273, "learning_rate": 0.00015717957128946774, "loss": 1.2099, "step": 15920 }, { "epoch": 0.6130895091434071, "grad_norm": 2.816568374633789, "learning_rate": 0.00015715476022258942, "loss": 1.1093, "step": 15925 }, { "epoch": 0.6132820019249278, "grad_norm": 1.8223321437835693, "learning_rate": 0.00015712994392928689, "loss": 1.0474, "step": 15930 }, { "epoch": 0.6134744947064485, "grad_norm": 1.2718263864517212, "learning_rate": 0.00015710512241182945, "loss": 1.1405, "step": 15935 }, { "epoch": 0.6136669874879692, "grad_norm": 1.2518097162246704, "learning_rate": 0.00015708029567248683, "loss": 1.13, "step": 15940 }, { "epoch": 0.6138594802694899, "grad_norm": 0.8542113900184631, "learning_rate": 0.0001570554637135293, "loss": 1.0871, "step": 15945 }, { "epoch": 0.6140519730510106, "grad_norm": 1.0798470973968506, "learning_rate": 0.00015703062653722757, "loss": 1.1563, "step": 15950 }, { "epoch": 0.6142444658325312, "grad_norm": 1.123974084854126, "learning_rate": 0.00015700578414585284, "loss": 1.1253, "step": 15955 }, { "epoch": 0.614436958614052, "grad_norm": 1.2129628658294678, "learning_rate": 0.0001569809365416768, "loss": 1.1123, "step": 15960 }, { "epoch": 0.6146294513955727, "grad_norm": 1.4137890338897705, "learning_rate": 0.00015695608372697154, "loss": 1.115, "step": 15965 }, { "epoch": 0.6148219441770933, "grad_norm": 1.2815289497375488, "learning_rate": 0.00015693122570400975, "loss": 1.0876, "step": 15970 }, { "epoch": 0.615014436958614, "grad_norm": 0.9300668835639954, "learning_rate": 0.00015690636247506448, "loss": 1.1442, "step": 15975 }, { "epoch": 0.6152069297401348, "grad_norm": 0.9866906404495239, "learning_rate": 0.00015688149404240938, "loss": 1.0664, "step": 15980 }, { "epoch": 0.6153994225216555, "grad_norm": 1.1951825618743896, "learning_rate": 0.0001568566204083184, "loss": 1.0933, "step": 15985 }, { "epoch": 0.6155919153031761, "grad_norm": 1.4439541101455688, "learning_rate": 0.00015683174157506616, "loss": 1.1618, "step": 15990 }, { "epoch": 0.6157844080846968, "grad_norm": 1.242619276046753, "learning_rate": 0.00015680685754492762, "loss": 1.0794, "step": 15995 }, { "epoch": 0.6159769008662175, "grad_norm": 1.9631248712539673, "learning_rate": 0.00015678196832017823, "loss": 1.1082, "step": 16000 }, { "epoch": 0.6161693936477383, "grad_norm": 1.056715488433838, "learning_rate": 0.00015675707390309403, "loss": 1.0893, "step": 16005 }, { "epoch": 0.6163618864292589, "grad_norm": 2.3864753246307373, "learning_rate": 0.00015673217429595143, "loss": 1.3378, "step": 16010 }, { "epoch": 0.6165543792107796, "grad_norm": 1.3226178884506226, "learning_rate": 0.00015670726950102725, "loss": 1.1959, "step": 16015 }, { "epoch": 0.6167468719923003, "grad_norm": 2.254422426223755, "learning_rate": 0.00015668235952059892, "loss": 1.1495, "step": 16020 }, { "epoch": 0.6169393647738209, "grad_norm": 1.6376910209655762, "learning_rate": 0.00015665744435694435, "loss": 1.0027, "step": 16025 }, { "epoch": 0.6171318575553417, "grad_norm": 1.190169334411621, "learning_rate": 0.00015663252401234177, "loss": 1.0419, "step": 16030 }, { "epoch": 0.6173243503368624, "grad_norm": 1.6388911008834839, "learning_rate": 0.00015660759848907008, "loss": 1.3868, "step": 16035 }, { "epoch": 0.6175168431183831, "grad_norm": 0.9445647597312927, "learning_rate": 0.00015658266778940843, "loss": 1.1382, "step": 16040 }, { "epoch": 0.6177093358999037, "grad_norm": 0.9717797636985779, "learning_rate": 0.00015655773191563664, "loss": 1.3385, "step": 16045 }, { "epoch": 0.6179018286814244, "grad_norm": 1.7297828197479248, "learning_rate": 0.000156532790870035, "loss": 1.2607, "step": 16050 }, { "epoch": 0.6180943214629452, "grad_norm": 1.3885836601257324, "learning_rate": 0.00015650784465488405, "loss": 1.2271, "step": 16055 }, { "epoch": 0.6182868142444659, "grad_norm": 1.3968501091003418, "learning_rate": 0.00015648289327246508, "loss": 1.255, "step": 16060 }, { "epoch": 0.6184793070259865, "grad_norm": 1.7532678842544556, "learning_rate": 0.00015645793672505967, "loss": 1.2088, "step": 16065 }, { "epoch": 0.6186717998075072, "grad_norm": 1.4146851301193237, "learning_rate": 0.00015643297501494999, "loss": 0.9797, "step": 16070 }, { "epoch": 0.6188642925890279, "grad_norm": 1.4249024391174316, "learning_rate": 0.00015640800814441851, "loss": 1.1446, "step": 16075 }, { "epoch": 0.6190567853705486, "grad_norm": 1.3387399911880493, "learning_rate": 0.0001563830361157484, "loss": 1.2204, "step": 16080 }, { "epoch": 0.6192492781520693, "grad_norm": 1.137149691581726, "learning_rate": 0.00015635805893122312, "loss": 1.1626, "step": 16085 }, { "epoch": 0.61944177093359, "grad_norm": 1.8353437185287476, "learning_rate": 0.0001563330765931267, "loss": 1.2476, "step": 16090 }, { "epoch": 0.6196342637151107, "grad_norm": 0.969289243221283, "learning_rate": 0.00015630808910374358, "loss": 1.026, "step": 16095 }, { "epoch": 0.6198267564966313, "grad_norm": 1.0529965162277222, "learning_rate": 0.0001562830964653587, "loss": 1.15, "step": 16100 }, { "epoch": 0.6200192492781521, "grad_norm": 1.2508490085601807, "learning_rate": 0.00015625809868025756, "loss": 1.08, "step": 16105 }, { "epoch": 0.6202117420596728, "grad_norm": 1.1188933849334717, "learning_rate": 0.0001562330957507259, "loss": 1.2273, "step": 16110 }, { "epoch": 0.6204042348411934, "grad_norm": 1.9137325286865234, "learning_rate": 0.00015620808767905018, "loss": 1.1073, "step": 16115 }, { "epoch": 0.6205967276227141, "grad_norm": 1.146921157836914, "learning_rate": 0.0001561830744675172, "loss": 1.0292, "step": 16120 }, { "epoch": 0.6207892204042348, "grad_norm": 1.6574608087539673, "learning_rate": 0.00015615805611841424, "loss": 1.2067, "step": 16125 }, { "epoch": 0.6209817131857556, "grad_norm": 1.599156379699707, "learning_rate": 0.00015613303263402903, "loss": 1.4416, "step": 16130 }, { "epoch": 0.6211742059672762, "grad_norm": 1.9472912549972534, "learning_rate": 0.00015610800401664988, "loss": 0.9591, "step": 16135 }, { "epoch": 0.6213666987487969, "grad_norm": 1.2037914991378784, "learning_rate": 0.00015608297026856538, "loss": 1.0899, "step": 16140 }, { "epoch": 0.6215591915303176, "grad_norm": 1.0116618871688843, "learning_rate": 0.0001560579313920648, "loss": 1.2294, "step": 16145 }, { "epoch": 0.6217516843118384, "grad_norm": 1.6344687938690186, "learning_rate": 0.00015603288738943774, "loss": 1.1918, "step": 16150 }, { "epoch": 0.621944177093359, "grad_norm": 1.3862853050231934, "learning_rate": 0.0001560078382629743, "loss": 1.157, "step": 16155 }, { "epoch": 0.6221366698748797, "grad_norm": 0.9576367139816284, "learning_rate": 0.00015598278401496508, "loss": 1.0759, "step": 16160 }, { "epoch": 0.6223291626564004, "grad_norm": 1.2092609405517578, "learning_rate": 0.0001559577246477011, "loss": 1.1928, "step": 16165 }, { "epoch": 0.622521655437921, "grad_norm": 1.594510793685913, "learning_rate": 0.0001559326601634739, "loss": 1.2336, "step": 16170 }, { "epoch": 0.6227141482194418, "grad_norm": 0.851620078086853, "learning_rate": 0.00015590759056457546, "loss": 1.1646, "step": 16175 }, { "epoch": 0.6229066410009625, "grad_norm": 1.1468600034713745, "learning_rate": 0.0001558825158532982, "loss": 1.1879, "step": 16180 }, { "epoch": 0.6230991337824832, "grad_norm": 1.934251308441162, "learning_rate": 0.00015585743603193505, "loss": 1.1207, "step": 16185 }, { "epoch": 0.6232916265640038, "grad_norm": 0.9963223934173584, "learning_rate": 0.00015583235110277943, "loss": 1.068, "step": 16190 }, { "epoch": 0.6234841193455245, "grad_norm": 0.8857359290122986, "learning_rate": 0.00015580726106812512, "loss": 1.1148, "step": 16195 }, { "epoch": 0.6236766121270453, "grad_norm": 1.2589722871780396, "learning_rate": 0.00015578216593026647, "loss": 1.0485, "step": 16200 }, { "epoch": 0.623869104908566, "grad_norm": 1.0346484184265137, "learning_rate": 0.0001557570656914983, "loss": 1.1276, "step": 16205 }, { "epoch": 0.6240615976900866, "grad_norm": 0.8794786334037781, "learning_rate": 0.0001557319603541158, "loss": 1.2591, "step": 16210 }, { "epoch": 0.6242540904716073, "grad_norm": 1.0909137725830078, "learning_rate": 0.00015570684992041473, "loss": 1.1197, "step": 16215 }, { "epoch": 0.624446583253128, "grad_norm": 1.3499592542648315, "learning_rate": 0.0001556817343926913, "loss": 1.0165, "step": 16220 }, { "epoch": 0.6246390760346487, "grad_norm": 1.5356526374816895, "learning_rate": 0.00015565661377324203, "loss": 1.0144, "step": 16225 }, { "epoch": 0.6248315688161694, "grad_norm": 1.849442958831787, "learning_rate": 0.0001556314880643642, "loss": 1.2191, "step": 16230 }, { "epoch": 0.6250240615976901, "grad_norm": 1.1928755044937134, "learning_rate": 0.00015560635726835525, "loss": 1.2685, "step": 16235 }, { "epoch": 0.6252165543792108, "grad_norm": 1.1445300579071045, "learning_rate": 0.00015558122138751332, "loss": 1.445, "step": 16240 }, { "epoch": 0.6254090471607314, "grad_norm": 1.7465559244155884, "learning_rate": 0.00015555608042413689, "loss": 1.2479, "step": 16245 }, { "epoch": 0.6256015399422522, "grad_norm": 1.1695505380630493, "learning_rate": 0.0001555309343805249, "loss": 1.2347, "step": 16250 }, { "epoch": 0.6257940327237729, "grad_norm": 1.2655342817306519, "learning_rate": 0.00015550578325897687, "loss": 1.2343, "step": 16255 }, { "epoch": 0.6259865255052935, "grad_norm": 1.569800853729248, "learning_rate": 0.0001554806270617926, "loss": 1.0798, "step": 16260 }, { "epoch": 0.6261790182868142, "grad_norm": 2.0027542114257812, "learning_rate": 0.00015545546579127256, "loss": 1.0084, "step": 16265 }, { "epoch": 0.6263715110683349, "grad_norm": 2.259096145629883, "learning_rate": 0.0001554302994497175, "loss": 1.1921, "step": 16270 }, { "epoch": 0.6265640038498557, "grad_norm": 1.092046856880188, "learning_rate": 0.00015540512803942878, "loss": 1.112, "step": 16275 }, { "epoch": 0.6267564966313763, "grad_norm": 1.67642343044281, "learning_rate": 0.00015537995156270808, "loss": 1.3709, "step": 16280 }, { "epoch": 0.626948989412897, "grad_norm": 1.7039928436279297, "learning_rate": 0.0001553547700218577, "loss": 1.2211, "step": 16285 }, { "epoch": 0.6271414821944177, "grad_norm": 2.0744543075561523, "learning_rate": 0.00015532958341918027, "loss": 1.2324, "step": 16290 }, { "epoch": 0.6273339749759385, "grad_norm": 1.2610362768173218, "learning_rate": 0.00015530439175697898, "loss": 1.1924, "step": 16295 }, { "epoch": 0.6275264677574591, "grad_norm": 1.8385295867919922, "learning_rate": 0.00015527919503755742, "loss": 1.2602, "step": 16300 }, { "epoch": 0.6277189605389798, "grad_norm": 1.62607741355896, "learning_rate": 0.00015525399326321966, "loss": 1.2135, "step": 16305 }, { "epoch": 0.6279114533205005, "grad_norm": 1.164507508277893, "learning_rate": 0.00015522878643627023, "loss": 1.07, "step": 16310 }, { "epoch": 0.6281039461020211, "grad_norm": 0.9871059060096741, "learning_rate": 0.0001552035745590142, "loss": 1.1749, "step": 16315 }, { "epoch": 0.6282964388835419, "grad_norm": 1.1414002180099487, "learning_rate": 0.00015517835763375688, "loss": 1.233, "step": 16320 }, { "epoch": 0.6284889316650626, "grad_norm": 1.1266084909439087, "learning_rate": 0.00015515313566280428, "loss": 1.1642, "step": 16325 }, { "epoch": 0.6286814244465833, "grad_norm": 1.8156638145446777, "learning_rate": 0.00015512790864846286, "loss": 1.0328, "step": 16330 }, { "epoch": 0.6288739172281039, "grad_norm": 1.9357597827911377, "learning_rate": 0.00015510267659303933, "loss": 1.3325, "step": 16335 }, { "epoch": 0.6290664100096246, "grad_norm": 1.767910122871399, "learning_rate": 0.00015507743949884104, "loss": 1.2381, "step": 16340 }, { "epoch": 0.6292589027911454, "grad_norm": 2.1196887493133545, "learning_rate": 0.0001550521973681758, "loss": 1.2286, "step": 16345 }, { "epoch": 0.629451395572666, "grad_norm": 1.7220022678375244, "learning_rate": 0.00015502695020335177, "loss": 1.1699, "step": 16350 }, { "epoch": 0.6296438883541867, "grad_norm": 1.9612696170806885, "learning_rate": 0.00015500169800667765, "loss": 1.0786, "step": 16355 }, { "epoch": 0.6298363811357074, "grad_norm": 1.666223406791687, "learning_rate": 0.00015497644078046261, "loss": 1.2211, "step": 16360 }, { "epoch": 0.6300288739172281, "grad_norm": 1.7156059741973877, "learning_rate": 0.00015495117852701626, "loss": 1.0621, "step": 16365 }, { "epoch": 0.6302213666987488, "grad_norm": 1.5840719938278198, "learning_rate": 0.00015492591124864865, "loss": 1.2364, "step": 16370 }, { "epoch": 0.6304138594802695, "grad_norm": 1.1821776628494263, "learning_rate": 0.0001549006389476703, "loss": 1.1479, "step": 16375 }, { "epoch": 0.6306063522617902, "grad_norm": 1.2549364566802979, "learning_rate": 0.00015487536162639223, "loss": 1.0564, "step": 16380 }, { "epoch": 0.6307988450433109, "grad_norm": 1.5308479070663452, "learning_rate": 0.0001548500792871258, "loss": 1.1825, "step": 16385 }, { "epoch": 0.6309913378248315, "grad_norm": 1.6546053886413574, "learning_rate": 0.000154824791932183, "loss": 1.1673, "step": 16390 }, { "epoch": 0.6311838306063523, "grad_norm": 1.1561111211776733, "learning_rate": 0.00015479949956387617, "loss": 1.1014, "step": 16395 }, { "epoch": 0.631376323387873, "grad_norm": 1.6901589632034302, "learning_rate": 0.0001547742021845181, "loss": 1.2377, "step": 16400 }, { "epoch": 0.6315688161693936, "grad_norm": 1.2808809280395508, "learning_rate": 0.0001547488997964221, "loss": 1.2976, "step": 16405 }, { "epoch": 0.6317613089509143, "grad_norm": 0.9793625473976135, "learning_rate": 0.0001547235924019019, "loss": 1.0328, "step": 16410 }, { "epoch": 0.631953801732435, "grad_norm": 1.6001505851745605, "learning_rate": 0.00015469828000327164, "loss": 1.0232, "step": 16415 }, { "epoch": 0.6321462945139558, "grad_norm": 1.3900479078292847, "learning_rate": 0.00015467296260284605, "loss": 1.2412, "step": 16420 }, { "epoch": 0.6323387872954764, "grad_norm": 1.6030535697937012, "learning_rate": 0.0001546476402029402, "loss": 1.178, "step": 16425 }, { "epoch": 0.6325312800769971, "grad_norm": 1.5602627992630005, "learning_rate": 0.00015462231280586965, "loss": 1.2834, "step": 16430 }, { "epoch": 0.6327237728585178, "grad_norm": 1.3648455142974854, "learning_rate": 0.00015459698041395045, "loss": 1.1425, "step": 16435 }, { "epoch": 0.6329162656400384, "grad_norm": 1.4346479177474976, "learning_rate": 0.00015457164302949908, "loss": 1.0076, "step": 16440 }, { "epoch": 0.6331087584215592, "grad_norm": 0.9692068696022034, "learning_rate": 0.00015454630065483242, "loss": 1.0133, "step": 16445 }, { "epoch": 0.6333012512030799, "grad_norm": 1.479915976524353, "learning_rate": 0.0001545209532922679, "loss": 1.131, "step": 16450 }, { "epoch": 0.6334937439846006, "grad_norm": 1.0446960926055908, "learning_rate": 0.00015449560094412342, "loss": 1.2545, "step": 16455 }, { "epoch": 0.6336862367661212, "grad_norm": 1.458414077758789, "learning_rate": 0.00015447024361271721, "loss": 1.325, "step": 16460 }, { "epoch": 0.633878729547642, "grad_norm": 1.2071151733398438, "learning_rate": 0.00015444488130036802, "loss": 1.2303, "step": 16465 }, { "epoch": 0.6340712223291627, "grad_norm": 1.9108256101608276, "learning_rate": 0.00015441951400939515, "loss": 1.2031, "step": 16470 }, { "epoch": 0.6342637151106834, "grad_norm": 1.1393382549285889, "learning_rate": 0.0001543941417421182, "loss": 1.016, "step": 16475 }, { "epoch": 0.634456207892204, "grad_norm": 2.0735628604888916, "learning_rate": 0.00015436876450085728, "loss": 1.1619, "step": 16480 }, { "epoch": 0.6346487006737247, "grad_norm": 1.6895620822906494, "learning_rate": 0.00015434338228793306, "loss": 1.1621, "step": 16485 }, { "epoch": 0.6348411934552455, "grad_norm": 1.9663159847259521, "learning_rate": 0.0001543179951056665, "loss": 1.2465, "step": 16490 }, { "epoch": 0.6350336862367661, "grad_norm": 1.1372085809707642, "learning_rate": 0.0001542926029563791, "loss": 1.1643, "step": 16495 }, { "epoch": 0.6352261790182868, "grad_norm": 0.5948193669319153, "learning_rate": 0.00015426720584239283, "loss": 0.9659, "step": 16500 }, { "epoch": 0.6354186717998075, "grad_norm": 1.829047441482544, "learning_rate": 0.00015424180376603008, "loss": 1.1334, "step": 16505 }, { "epoch": 0.6356111645813282, "grad_norm": 1.4863371849060059, "learning_rate": 0.00015421639672961367, "loss": 1.1206, "step": 16510 }, { "epoch": 0.6358036573628489, "grad_norm": 1.2481038570404053, "learning_rate": 0.00015419098473546696, "loss": 1.1101, "step": 16515 }, { "epoch": 0.6359961501443696, "grad_norm": 1.8721559047698975, "learning_rate": 0.00015416556778591363, "loss": 1.1293, "step": 16520 }, { "epoch": 0.6361886429258903, "grad_norm": 1.5730985403060913, "learning_rate": 0.000154140145883278, "loss": 1.277, "step": 16525 }, { "epoch": 0.636381135707411, "grad_norm": 1.4351321458816528, "learning_rate": 0.00015411471902988463, "loss": 1.2475, "step": 16530 }, { "epoch": 0.6365736284889316, "grad_norm": 0.8733989596366882, "learning_rate": 0.00015408928722805874, "loss": 1.0728, "step": 16535 }, { "epoch": 0.6367661212704524, "grad_norm": 1.454068899154663, "learning_rate": 0.00015406385048012577, "loss": 1.0163, "step": 16540 }, { "epoch": 0.6369586140519731, "grad_norm": 0.9600105285644531, "learning_rate": 0.00015403840878841182, "loss": 1.097, "step": 16545 }, { "epoch": 0.6371511068334937, "grad_norm": 2.419609546661377, "learning_rate": 0.00015401296215524345, "loss": 1.2003, "step": 16550 }, { "epoch": 0.6373435996150144, "grad_norm": 1.313755989074707, "learning_rate": 0.0001539875105829474, "loss": 1.1276, "step": 16555 }, { "epoch": 0.6375360923965351, "grad_norm": 1.6932001113891602, "learning_rate": 0.00015396205407385116, "loss": 1.1689, "step": 16560 }, { "epoch": 0.6377285851780559, "grad_norm": 1.076905608177185, "learning_rate": 0.00015393659263028257, "loss": 1.189, "step": 16565 }, { "epoch": 0.6379210779595765, "grad_norm": 1.2433785200119019, "learning_rate": 0.00015391112625456983, "loss": 0.9797, "step": 16570 }, { "epoch": 0.6381135707410972, "grad_norm": 1.1299281120300293, "learning_rate": 0.00015388565494904176, "loss": 1.1399, "step": 16575 }, { "epoch": 0.6383060635226179, "grad_norm": 1.0440160036087036, "learning_rate": 0.0001538601787160275, "loss": 1.2491, "step": 16580 }, { "epoch": 0.6384985563041385, "grad_norm": 1.1874500513076782, "learning_rate": 0.00015383469755785668, "loss": 1.2762, "step": 16585 }, { "epoch": 0.6386910490856593, "grad_norm": 1.2737995386123657, "learning_rate": 0.0001538092114768594, "loss": 1.2102, "step": 16590 }, { "epoch": 0.63888354186718, "grad_norm": 1.8649038076400757, "learning_rate": 0.0001537837204753662, "loss": 1.0711, "step": 16595 }, { "epoch": 0.6390760346487007, "grad_norm": 1.0375845432281494, "learning_rate": 0.000153758224555708, "loss": 1.0349, "step": 16600 }, { "epoch": 0.6392685274302213, "grad_norm": 1.4500465393066406, "learning_rate": 0.0001537327237202163, "loss": 1.1501, "step": 16605 }, { "epoch": 0.6394610202117421, "grad_norm": 1.5905102491378784, "learning_rate": 0.000153707217971223, "loss": 1.1946, "step": 16610 }, { "epoch": 0.6396535129932628, "grad_norm": 1.224752426147461, "learning_rate": 0.00015368170731106036, "loss": 1.1101, "step": 16615 }, { "epoch": 0.6398460057747835, "grad_norm": 2.605717182159424, "learning_rate": 0.00015365619174206117, "loss": 1.0483, "step": 16620 }, { "epoch": 0.6400384985563041, "grad_norm": 1.2829294204711914, "learning_rate": 0.00015363067126655873, "loss": 1.2265, "step": 16625 }, { "epoch": 0.6402309913378248, "grad_norm": 1.1748125553131104, "learning_rate": 0.00015360514588688665, "loss": 1.0909, "step": 16630 }, { "epoch": 0.6404234841193456, "grad_norm": 1.0052121877670288, "learning_rate": 0.00015357961560537908, "loss": 1.3145, "step": 16635 }, { "epoch": 0.6406159769008662, "grad_norm": 1.1692798137664795, "learning_rate": 0.00015355408042437061, "loss": 1.3134, "step": 16640 }, { "epoch": 0.6408084696823869, "grad_norm": 1.1379728317260742, "learning_rate": 0.00015352854034619622, "loss": 1.0519, "step": 16645 }, { "epoch": 0.6410009624639076, "grad_norm": 1.067920207977295, "learning_rate": 0.00015350299537319147, "loss": 1.17, "step": 16650 }, { "epoch": 0.6411934552454283, "grad_norm": 1.9951469898223877, "learning_rate": 0.00015347744550769216, "loss": 1.0478, "step": 16655 }, { "epoch": 0.641385948026949, "grad_norm": 1.036605715751648, "learning_rate": 0.00015345189075203477, "loss": 1.1288, "step": 16660 }, { "epoch": 0.6415784408084697, "grad_norm": 0.5938658714294434, "learning_rate": 0.000153426331108556, "loss": 1.0589, "step": 16665 }, { "epoch": 0.6417709335899904, "grad_norm": 1.212049961090088, "learning_rate": 0.00015340076657959317, "loss": 1.1104, "step": 16670 }, { "epoch": 0.641963426371511, "grad_norm": 1.3548222780227661, "learning_rate": 0.00015337519716748403, "loss": 1.2639, "step": 16675 }, { "epoch": 0.6421559191530317, "grad_norm": 1.210879921913147, "learning_rate": 0.00015334962287456665, "loss": 1.0576, "step": 16680 }, { "epoch": 0.6423484119345525, "grad_norm": 2.2316668033599854, "learning_rate": 0.00015332404370317965, "loss": 1.2075, "step": 16685 }, { "epoch": 0.6425409047160732, "grad_norm": 1.0065557956695557, "learning_rate": 0.00015329845965566215, "loss": 1.0872, "step": 16690 }, { "epoch": 0.6427333974975938, "grad_norm": 1.36894953250885, "learning_rate": 0.00015327287073435355, "loss": 0.9866, "step": 16695 }, { "epoch": 0.6429258902791145, "grad_norm": 0.9726212620735168, "learning_rate": 0.0001532472769415938, "loss": 1.0069, "step": 16700 }, { "epoch": 0.6431183830606352, "grad_norm": 0.9447348117828369, "learning_rate": 0.00015322167827972334, "loss": 1.3184, "step": 16705 }, { "epoch": 0.643310875842156, "grad_norm": 1.7236000299453735, "learning_rate": 0.00015319607475108296, "loss": 1.2547, "step": 16710 }, { "epoch": 0.6435033686236766, "grad_norm": 2.3541550636291504, "learning_rate": 0.00015317046635801392, "loss": 1.2886, "step": 16715 }, { "epoch": 0.6436958614051973, "grad_norm": 1.8849072456359863, "learning_rate": 0.00015314485310285796, "loss": 1.1295, "step": 16720 }, { "epoch": 0.643888354186718, "grad_norm": 4.183611869812012, "learning_rate": 0.00015311923498795724, "loss": 1.1109, "step": 16725 }, { "epoch": 0.6440808469682386, "grad_norm": 1.4037699699401855, "learning_rate": 0.00015309361201565436, "loss": 1.1097, "step": 16730 }, { "epoch": 0.6442733397497594, "grad_norm": 1.626489520072937, "learning_rate": 0.00015306798418829236, "loss": 1.2515, "step": 16735 }, { "epoch": 0.6444658325312801, "grad_norm": 2.0744874477386475, "learning_rate": 0.00015304235150821475, "loss": 1.2196, "step": 16740 }, { "epoch": 0.6446583253128008, "grad_norm": 1.2196972370147705, "learning_rate": 0.0001530167139777655, "loss": 1.0935, "step": 16745 }, { "epoch": 0.6448508180943214, "grad_norm": 2.1287968158721924, "learning_rate": 0.00015299107159928897, "loss": 0.9476, "step": 16750 }, { "epoch": 0.6450433108758421, "grad_norm": 1.6050670146942139, "learning_rate": 0.00015296542437512995, "loss": 1.2276, "step": 16755 }, { "epoch": 0.6452358036573629, "grad_norm": 1.316373348236084, "learning_rate": 0.0001529397723076337, "loss": 1.121, "step": 16760 }, { "epoch": 0.6454282964388836, "grad_norm": 1.4219224452972412, "learning_rate": 0.00015291411539914603, "loss": 1.3219, "step": 16765 }, { "epoch": 0.6456207892204042, "grad_norm": 1.3470525741577148, "learning_rate": 0.00015288845365201299, "loss": 1.0538, "step": 16770 }, { "epoch": 0.6458132820019249, "grad_norm": 1.6893870830535889, "learning_rate": 0.0001528627870685812, "loss": 1.1907, "step": 16775 }, { "epoch": 0.6460057747834457, "grad_norm": 1.7264561653137207, "learning_rate": 0.00015283711565119775, "loss": 1.167, "step": 16780 }, { "epoch": 0.6461982675649663, "grad_norm": 1.1093302965164185, "learning_rate": 0.0001528114394022101, "loss": 1.1477, "step": 16785 }, { "epoch": 0.646390760346487, "grad_norm": 1.1114470958709717, "learning_rate": 0.00015278575832396613, "loss": 1.1224, "step": 16790 }, { "epoch": 0.6465832531280077, "grad_norm": 2.0239744186401367, "learning_rate": 0.00015276007241881424, "loss": 1.1655, "step": 16795 }, { "epoch": 0.6467757459095284, "grad_norm": 1.0726968050003052, "learning_rate": 0.00015273438168910322, "loss": 0.9021, "step": 16800 }, { "epoch": 0.6469682386910491, "grad_norm": 1.2715688943862915, "learning_rate": 0.00015270868613718238, "loss": 1.1776, "step": 16805 }, { "epoch": 0.6471607314725698, "grad_norm": 1.4808478355407715, "learning_rate": 0.00015268298576540129, "loss": 1.1023, "step": 16810 }, { "epoch": 0.6473532242540905, "grad_norm": 1.63973069190979, "learning_rate": 0.0001526572805761102, "loss": 1.0025, "step": 16815 }, { "epoch": 0.6475457170356111, "grad_norm": 1.0935505628585815, "learning_rate": 0.0001526315705716596, "loss": 1.1039, "step": 16820 }, { "epoch": 0.6477382098171318, "grad_norm": 1.0586233139038086, "learning_rate": 0.00015260585575440052, "loss": 1.0884, "step": 16825 }, { "epoch": 0.6479307025986526, "grad_norm": 1.0608752965927124, "learning_rate": 0.0001525801361266844, "loss": 1.2997, "step": 16830 }, { "epoch": 0.6481231953801733, "grad_norm": 1.0017322301864624, "learning_rate": 0.00015255441169086318, "loss": 1.3023, "step": 16835 }, { "epoch": 0.6483156881616939, "grad_norm": 0.9409940242767334, "learning_rate": 0.00015252868244928914, "loss": 1.2462, "step": 16840 }, { "epoch": 0.6485081809432146, "grad_norm": 1.646735429763794, "learning_rate": 0.00015250294840431504, "loss": 1.1759, "step": 16845 }, { "epoch": 0.6487006737247353, "grad_norm": 2.878627300262451, "learning_rate": 0.00015247720955829412, "loss": 1.2458, "step": 16850 }, { "epoch": 0.6488931665062561, "grad_norm": 1.6578867435455322, "learning_rate": 0.00015245146591358002, "loss": 1.297, "step": 16855 }, { "epoch": 0.6490856592877767, "grad_norm": 1.9454634189605713, "learning_rate": 0.00015242571747252682, "loss": 1.2366, "step": 16860 }, { "epoch": 0.6492781520692974, "grad_norm": 1.8211311101913452, "learning_rate": 0.00015239996423748906, "loss": 1.1163, "step": 16865 }, { "epoch": 0.6494706448508181, "grad_norm": 1.5382091999053955, "learning_rate": 0.00015237420621082163, "loss": 1.0103, "step": 16870 }, { "epoch": 0.6496631376323387, "grad_norm": 1.7348453998565674, "learning_rate": 0.00015234844339488004, "loss": 1.1667, "step": 16875 }, { "epoch": 0.6498556304138595, "grad_norm": 1.0255297422409058, "learning_rate": 0.0001523226757920201, "loss": 1.1472, "step": 16880 }, { "epoch": 0.6500481231953802, "grad_norm": 1.730460524559021, "learning_rate": 0.00015229690340459802, "loss": 1.2442, "step": 16885 }, { "epoch": 0.6502406159769009, "grad_norm": 1.6826850175857544, "learning_rate": 0.00015227112623497058, "loss": 1.2426, "step": 16890 }, { "epoch": 0.6504331087584215, "grad_norm": 1.6523195505142212, "learning_rate": 0.00015224534428549488, "loss": 1.1543, "step": 16895 }, { "epoch": 0.6506256015399422, "grad_norm": 2.3335843086242676, "learning_rate": 0.00015221955755852858, "loss": 1.115, "step": 16900 }, { "epoch": 0.650818094321463, "grad_norm": 1.0122956037521362, "learning_rate": 0.00015219376605642962, "loss": 1.2913, "step": 16905 }, { "epoch": 0.6510105871029837, "grad_norm": 1.5100213289260864, "learning_rate": 0.00015216796978155655, "loss": 1.0309, "step": 16910 }, { "epoch": 0.6512030798845043, "grad_norm": 1.1331759691238403, "learning_rate": 0.0001521421687362682, "loss": 1.0732, "step": 16915 }, { "epoch": 0.651395572666025, "grad_norm": 0.9450187087059021, "learning_rate": 0.00015211636292292394, "loss": 1.2011, "step": 16920 }, { "epoch": 0.6515880654475458, "grad_norm": 1.1546697616577148, "learning_rate": 0.00015209055234388354, "loss": 1.1368, "step": 16925 }, { "epoch": 0.6517805582290664, "grad_norm": 1.5972734689712524, "learning_rate": 0.00015206473700150717, "loss": 1.0546, "step": 16930 }, { "epoch": 0.6519730510105871, "grad_norm": 1.1828382015228271, "learning_rate": 0.0001520389168981555, "loss": 1.0311, "step": 16935 }, { "epoch": 0.6521655437921078, "grad_norm": 1.0515602827072144, "learning_rate": 0.00015201309203618962, "loss": 1.3763, "step": 16940 }, { "epoch": 0.6523580365736285, "grad_norm": 1.0648945569992065, "learning_rate": 0.00015198726241797103, "loss": 1.136, "step": 16945 }, { "epoch": 0.6525505293551492, "grad_norm": 1.3983291387557983, "learning_rate": 0.00015196142804586166, "loss": 1.121, "step": 16950 }, { "epoch": 0.6527430221366699, "grad_norm": 1.1980384588241577, "learning_rate": 0.00015193558892222394, "loss": 1.1442, "step": 16955 }, { "epoch": 0.6529355149181906, "grad_norm": 0.92877596616745, "learning_rate": 0.00015190974504942064, "loss": 1.1025, "step": 16960 }, { "epoch": 0.6531280076997112, "grad_norm": 1.3868606090545654, "learning_rate": 0.00015188389642981502, "loss": 1.0714, "step": 16965 }, { "epoch": 0.6533205004812319, "grad_norm": 2.058389663696289, "learning_rate": 0.00015185804306577075, "loss": 1.3543, "step": 16970 }, { "epoch": 0.6535129932627527, "grad_norm": 0.5963343381881714, "learning_rate": 0.00015183218495965202, "loss": 0.9247, "step": 16975 }, { "epoch": 0.6537054860442734, "grad_norm": 1.6353943347930908, "learning_rate": 0.0001518063221138233, "loss": 1.1284, "step": 16980 }, { "epoch": 0.653897978825794, "grad_norm": 2.303635597229004, "learning_rate": 0.00015178045453064962, "loss": 1.3496, "step": 16985 }, { "epoch": 0.6540904716073147, "grad_norm": 0.9238683581352234, "learning_rate": 0.00015175458221249638, "loss": 1.1348, "step": 16990 }, { "epoch": 0.6542829643888354, "grad_norm": 1.4203814268112183, "learning_rate": 0.00015172870516172942, "loss": 1.1032, "step": 16995 }, { "epoch": 0.6544754571703562, "grad_norm": 1.018648386001587, "learning_rate": 0.0001517028233807151, "loss": 1.237, "step": 17000 }, { "epoch": 0.6546679499518768, "grad_norm": 1.4779586791992188, "learning_rate": 0.00015167693687182, "loss": 1.173, "step": 17005 }, { "epoch": 0.6548604427333975, "grad_norm": 1.7097437381744385, "learning_rate": 0.0001516510456374114, "loss": 1.1935, "step": 17010 }, { "epoch": 0.6550529355149182, "grad_norm": 1.4055527448654175, "learning_rate": 0.00015162514967985682, "loss": 1.0832, "step": 17015 }, { "epoch": 0.6552454282964388, "grad_norm": 1.5012494325637817, "learning_rate": 0.00015159924900152432, "loss": 1.3221, "step": 17020 }, { "epoch": 0.6554379210779596, "grad_norm": 1.13307785987854, "learning_rate": 0.00015157334360478228, "loss": 1.2599, "step": 17025 }, { "epoch": 0.6556304138594803, "grad_norm": 2.10911226272583, "learning_rate": 0.0001515474334919996, "loss": 1.1446, "step": 17030 }, { "epoch": 0.655822906641001, "grad_norm": 1.4689563512802124, "learning_rate": 0.00015152151866554563, "loss": 1.3851, "step": 17035 }, { "epoch": 0.6560153994225216, "grad_norm": 1.3363420963287354, "learning_rate": 0.00015149559912779005, "loss": 1.1939, "step": 17040 }, { "epoch": 0.6562078922040423, "grad_norm": 1.665319561958313, "learning_rate": 0.00015146967488110307, "loss": 1.3353, "step": 17045 }, { "epoch": 0.6564003849855631, "grad_norm": 1.03946852684021, "learning_rate": 0.00015144374592785528, "loss": 1.0736, "step": 17050 }, { "epoch": 0.6565928777670837, "grad_norm": 1.941311240196228, "learning_rate": 0.0001514178122704177, "loss": 1.1745, "step": 17055 }, { "epoch": 0.6567853705486044, "grad_norm": 2.091871738433838, "learning_rate": 0.00015139187391116182, "loss": 0.9826, "step": 17060 }, { "epoch": 0.6569778633301251, "grad_norm": 1.3722056150436401, "learning_rate": 0.0001513659308524595, "loss": 1.0969, "step": 17065 }, { "epoch": 0.6571703561116458, "grad_norm": 1.9604045152664185, "learning_rate": 0.00015133998309668306, "loss": 1.0726, "step": 17070 }, { "epoch": 0.6573628488931665, "grad_norm": 1.1731983423233032, "learning_rate": 0.00015131403064620527, "loss": 1.0909, "step": 17075 }, { "epoch": 0.6575553416746872, "grad_norm": 1.3418563604354858, "learning_rate": 0.0001512880735033993, "loss": 1.2574, "step": 17080 }, { "epoch": 0.6577478344562079, "grad_norm": 2.054722785949707, "learning_rate": 0.00015126211167063876, "loss": 1.1705, "step": 17085 }, { "epoch": 0.6579403272377286, "grad_norm": 1.1431398391723633, "learning_rate": 0.00015123614515029772, "loss": 1.2606, "step": 17090 }, { "epoch": 0.6581328200192493, "grad_norm": 1.4750339984893799, "learning_rate": 0.0001512101739447506, "loss": 1.1471, "step": 17095 }, { "epoch": 0.65832531280077, "grad_norm": 1.6877497434616089, "learning_rate": 0.00015118419805637228, "loss": 0.986, "step": 17100 }, { "epoch": 0.6585178055822907, "grad_norm": 0.6538336873054504, "learning_rate": 0.0001511582174875381, "loss": 0.9426, "step": 17105 }, { "epoch": 0.6587102983638113, "grad_norm": 1.1754498481750488, "learning_rate": 0.00015113223224062384, "loss": 1.0994, "step": 17110 }, { "epoch": 0.658902791145332, "grad_norm": 2.219837188720703, "learning_rate": 0.00015110624231800567, "loss": 1.1205, "step": 17115 }, { "epoch": 0.6590952839268528, "grad_norm": 1.826324701309204, "learning_rate": 0.0001510802477220602, "loss": 1.2335, "step": 17120 }, { "epoch": 0.6592877767083735, "grad_norm": 1.8668159246444702, "learning_rate": 0.00015105424845516445, "loss": 1.2609, "step": 17125 }, { "epoch": 0.6594802694898941, "grad_norm": 0.9887051582336426, "learning_rate": 0.00015102824451969585, "loss": 1.0539, "step": 17130 }, { "epoch": 0.6596727622714148, "grad_norm": 1.2473443746566772, "learning_rate": 0.00015100223591803236, "loss": 1.2355, "step": 17135 }, { "epoch": 0.6598652550529355, "grad_norm": 1.2736021280288696, "learning_rate": 0.00015097622265255222, "loss": 1.3073, "step": 17140 }, { "epoch": 0.6600577478344563, "grad_norm": 1.0870583057403564, "learning_rate": 0.00015095020472563424, "loss": 0.8381, "step": 17145 }, { "epoch": 0.6602502406159769, "grad_norm": 1.6099382638931274, "learning_rate": 0.0001509241821396575, "loss": 1.2738, "step": 17150 }, { "epoch": 0.6604427333974976, "grad_norm": 1.3321658372879028, "learning_rate": 0.0001508981548970017, "loss": 1.2924, "step": 17155 }, { "epoch": 0.6606352261790183, "grad_norm": 1.0399209260940552, "learning_rate": 0.00015087212300004678, "loss": 0.9254, "step": 17160 }, { "epoch": 0.6608277189605389, "grad_norm": 0.9332255721092224, "learning_rate": 0.0001508460864511732, "loss": 1.2693, "step": 17165 }, { "epoch": 0.6610202117420597, "grad_norm": 1.408109188079834, "learning_rate": 0.00015082004525276185, "loss": 1.0394, "step": 17170 }, { "epoch": 0.6612127045235804, "grad_norm": 1.3958436250686646, "learning_rate": 0.00015079399940719402, "loss": 1.1119, "step": 17175 }, { "epoch": 0.6614051973051011, "grad_norm": 1.3326903581619263, "learning_rate": 0.00015076794891685143, "loss": 1.0996, "step": 17180 }, { "epoch": 0.6615976900866217, "grad_norm": 1.1485531330108643, "learning_rate": 0.00015074189378411622, "loss": 1.1617, "step": 17185 }, { "epoch": 0.6617901828681424, "grad_norm": 1.9735444784164429, "learning_rate": 0.00015071583401137092, "loss": 1.1168, "step": 17190 }, { "epoch": 0.6619826756496632, "grad_norm": 1.6123241186141968, "learning_rate": 0.00015068976960099862, "loss": 1.1232, "step": 17195 }, { "epoch": 0.6621751684311838, "grad_norm": 1.3553659915924072, "learning_rate": 0.0001506637005553826, "loss": 0.9969, "step": 17200 }, { "epoch": 0.6623676612127045, "grad_norm": 1.3059508800506592, "learning_rate": 0.00015063762687690684, "loss": 1.0852, "step": 17205 }, { "epoch": 0.6625601539942252, "grad_norm": 0.9797844290733337, "learning_rate": 0.00015061154856795553, "loss": 0.8927, "step": 17210 }, { "epoch": 0.6627526467757459, "grad_norm": 1.2405691146850586, "learning_rate": 0.00015058546563091337, "loss": 1.1381, "step": 17215 }, { "epoch": 0.6629451395572666, "grad_norm": 0.7226620316505432, "learning_rate": 0.00015055937806816548, "loss": 0.9773, "step": 17220 }, { "epoch": 0.6631376323387873, "grad_norm": 1.302935004234314, "learning_rate": 0.0001505332858820974, "loss": 1.2386, "step": 17225 }, { "epoch": 0.663330125120308, "grad_norm": 0.8981648683547974, "learning_rate": 0.00015050718907509505, "loss": 1.1499, "step": 17230 }, { "epoch": 0.6635226179018286, "grad_norm": 1.6177557706832886, "learning_rate": 0.00015048108764954487, "loss": 1.0118, "step": 17235 }, { "epoch": 0.6637151106833494, "grad_norm": 1.4030743837356567, "learning_rate": 0.00015045498160783362, "loss": 1.2892, "step": 17240 }, { "epoch": 0.6639076034648701, "grad_norm": 1.3468968868255615, "learning_rate": 0.00015042887095234852, "loss": 1.2397, "step": 17245 }, { "epoch": 0.6641000962463908, "grad_norm": 0.9706347584724426, "learning_rate": 0.00015040275568547728, "loss": 1.0251, "step": 17250 }, { "epoch": 0.6642925890279114, "grad_norm": 1.623147964477539, "learning_rate": 0.00015037663580960787, "loss": 1.1651, "step": 17255 }, { "epoch": 0.6644850818094321, "grad_norm": 0.9518052935600281, "learning_rate": 0.00015035051132712883, "loss": 1.1605, "step": 17260 }, { "epoch": 0.6646775745909529, "grad_norm": 1.36576509475708, "learning_rate": 0.00015032438224042908, "loss": 1.1485, "step": 17265 }, { "epoch": 0.6648700673724736, "grad_norm": 1.4218300580978394, "learning_rate": 0.00015029824855189797, "loss": 1.0527, "step": 17270 }, { "epoch": 0.6650625601539942, "grad_norm": 1.573996663093567, "learning_rate": 0.0001502721102639252, "loss": 1.1692, "step": 17275 }, { "epoch": 0.6652550529355149, "grad_norm": 1.1809152364730835, "learning_rate": 0.00015024596737890097, "loss": 1.0801, "step": 17280 }, { "epoch": 0.6654475457170356, "grad_norm": 1.043346881866455, "learning_rate": 0.00015021981989921587, "loss": 1.181, "step": 17285 }, { "epoch": 0.6656400384985564, "grad_norm": 0.9252155423164368, "learning_rate": 0.00015019366782726093, "loss": 1.0204, "step": 17290 }, { "epoch": 0.665832531280077, "grad_norm": 1.4319888353347778, "learning_rate": 0.00015016751116542757, "loss": 1.2009, "step": 17295 }, { "epoch": 0.6660250240615977, "grad_norm": 0.6749492287635803, "learning_rate": 0.00015014134991610766, "loss": 1.1157, "step": 17300 }, { "epoch": 0.6662175168431184, "grad_norm": 0.9866890907287598, "learning_rate": 0.0001501151840816934, "loss": 1.1943, "step": 17305 }, { "epoch": 0.666410009624639, "grad_norm": 1.4207334518432617, "learning_rate": 0.00015008901366457756, "loss": 1.1103, "step": 17310 }, { "epoch": 0.6666025024061598, "grad_norm": 1.0321522951126099, "learning_rate": 0.00015006283866715326, "loss": 1.079, "step": 17315 }, { "epoch": 0.6667949951876805, "grad_norm": 1.6033141613006592, "learning_rate": 0.000150036659091814, "loss": 0.982, "step": 17320 }, { "epoch": 0.6669874879692012, "grad_norm": 1.503190279006958, "learning_rate": 0.00015001047494095368, "loss": 1.1371, "step": 17325 }, { "epoch": 0.6671799807507218, "grad_norm": 1.2487331628799438, "learning_rate": 0.00014998428621696677, "loss": 1.1328, "step": 17330 }, { "epoch": 0.6673724735322425, "grad_norm": 1.2876261472702026, "learning_rate": 0.00014995809292224797, "loss": 1.2034, "step": 17335 }, { "epoch": 0.6675649663137633, "grad_norm": 1.0377410650253296, "learning_rate": 0.0001499318950591925, "loss": 1.2794, "step": 17340 }, { "epoch": 0.667757459095284, "grad_norm": 2.4566397666931152, "learning_rate": 0.00014990569263019602, "loss": 1.1211, "step": 17345 }, { "epoch": 0.6679499518768046, "grad_norm": 1.3069671392440796, "learning_rate": 0.00014987948563765455, "loss": 1.1101, "step": 17350 }, { "epoch": 0.6681424446583253, "grad_norm": 1.0914125442504883, "learning_rate": 0.0001498532740839645, "loss": 1.0383, "step": 17355 }, { "epoch": 0.668334937439846, "grad_norm": 1.1379315853118896, "learning_rate": 0.00014982705797152285, "loss": 1.0903, "step": 17360 }, { "epoch": 0.6685274302213667, "grad_norm": 0.9188007712364197, "learning_rate": 0.00014980083730272675, "loss": 1.0696, "step": 17365 }, { "epoch": 0.6687199230028874, "grad_norm": 1.2434134483337402, "learning_rate": 0.00014977461207997403, "loss": 1.2438, "step": 17370 }, { "epoch": 0.6689124157844081, "grad_norm": 1.1543229818344116, "learning_rate": 0.00014974838230566274, "loss": 1.12, "step": 17375 }, { "epoch": 0.6691049085659287, "grad_norm": 1.4789245128631592, "learning_rate": 0.00014972214798219144, "loss": 1.0437, "step": 17380 }, { "epoch": 0.6692974013474494, "grad_norm": 1.4191787242889404, "learning_rate": 0.0001496959091119591, "loss": 1.1827, "step": 17385 }, { "epoch": 0.6694898941289702, "grad_norm": 1.749631404876709, "learning_rate": 0.00014966966569736508, "loss": 1.0353, "step": 17390 }, { "epoch": 0.6696823869104909, "grad_norm": 1.4120956659317017, "learning_rate": 0.00014964341774080912, "loss": 1.2257, "step": 17395 }, { "epoch": 0.6698748796920115, "grad_norm": 1.6030794382095337, "learning_rate": 0.00014961716524469152, "loss": 1.0767, "step": 17400 }, { "epoch": 0.6700673724735322, "grad_norm": 1.4263496398925781, "learning_rate": 0.00014959090821141282, "loss": 1.1188, "step": 17405 }, { "epoch": 0.670259865255053, "grad_norm": 1.1514267921447754, "learning_rate": 0.00014956464664337408, "loss": 1.0731, "step": 17410 }, { "epoch": 0.6704523580365737, "grad_norm": 1.5985325574874878, "learning_rate": 0.00014953838054297672, "loss": 1.1342, "step": 17415 }, { "epoch": 0.6706448508180943, "grad_norm": 2.1868584156036377, "learning_rate": 0.00014951210991262262, "loss": 1.1169, "step": 17420 }, { "epoch": 0.670837343599615, "grad_norm": 1.1203131675720215, "learning_rate": 0.0001494858347547141, "loss": 1.051, "step": 17425 }, { "epoch": 0.6710298363811357, "grad_norm": 1.3077278137207031, "learning_rate": 0.00014945955507165377, "loss": 1.19, "step": 17430 }, { "epoch": 0.6712223291626565, "grad_norm": 1.1149485111236572, "learning_rate": 0.00014943327086584476, "loss": 1.3471, "step": 17435 }, { "epoch": 0.6714148219441771, "grad_norm": 1.7210713624954224, "learning_rate": 0.00014940698213969063, "loss": 1.0918, "step": 17440 }, { "epoch": 0.6716073147256978, "grad_norm": 1.265023946762085, "learning_rate": 0.00014938068889559526, "loss": 1.0716, "step": 17445 }, { "epoch": 0.6717998075072185, "grad_norm": 1.37469482421875, "learning_rate": 0.00014935439113596298, "loss": 1.1524, "step": 17450 }, { "epoch": 0.6719923002887391, "grad_norm": 1.189141035079956, "learning_rate": 0.0001493280888631986, "loss": 1.1097, "step": 17455 }, { "epoch": 0.6721847930702599, "grad_norm": 1.5825908184051514, "learning_rate": 0.00014930178207970727, "loss": 1.2842, "step": 17460 }, { "epoch": 0.6723772858517806, "grad_norm": 1.1093425750732422, "learning_rate": 0.00014927547078789452, "loss": 1.0679, "step": 17465 }, { "epoch": 0.6725697786333013, "grad_norm": 1.3306807279586792, "learning_rate": 0.00014924915499016646, "loss": 1.2877, "step": 17470 }, { "epoch": 0.6727622714148219, "grad_norm": 1.9391852617263794, "learning_rate": 0.00014922283468892935, "loss": 1.1743, "step": 17475 }, { "epoch": 0.6729547641963426, "grad_norm": 1.5213755369186401, "learning_rate": 0.0001491965098865901, "loss": 1.1793, "step": 17480 }, { "epoch": 0.6731472569778634, "grad_norm": 1.6637414693832397, "learning_rate": 0.00014917018058555593, "loss": 1.1441, "step": 17485 }, { "epoch": 0.673339749759384, "grad_norm": 1.7859970331192017, "learning_rate": 0.00014914384678823447, "loss": 1.1376, "step": 17490 }, { "epoch": 0.6735322425409047, "grad_norm": 0.9251899719238281, "learning_rate": 0.00014911750849703378, "loss": 1.0523, "step": 17495 }, { "epoch": 0.6737247353224254, "grad_norm": 2.6382827758789062, "learning_rate": 0.00014909116571436228, "loss": 1.311, "step": 17500 }, { "epoch": 0.673917228103946, "grad_norm": 2.1472413539886475, "learning_rate": 0.00014906481844262888, "loss": 1.3515, "step": 17505 }, { "epoch": 0.6741097208854668, "grad_norm": 1.6070085763931274, "learning_rate": 0.0001490384666842429, "loss": 1.121, "step": 17510 }, { "epoch": 0.6743022136669875, "grad_norm": 1.637009620666504, "learning_rate": 0.00014901211044161393, "loss": 1.1249, "step": 17515 }, { "epoch": 0.6744947064485082, "grad_norm": 1.4050389528274536, "learning_rate": 0.00014898574971715218, "loss": 1.1719, "step": 17520 }, { "epoch": 0.6746871992300288, "grad_norm": 1.7863889932632446, "learning_rate": 0.0001489593845132681, "loss": 1.2576, "step": 17525 }, { "epoch": 0.6748796920115495, "grad_norm": 1.149431586265564, "learning_rate": 0.00014893301483237263, "loss": 1.0863, "step": 17530 }, { "epoch": 0.6750721847930703, "grad_norm": 1.4066704511642456, "learning_rate": 0.0001489066406768771, "loss": 1.1338, "step": 17535 }, { "epoch": 0.675264677574591, "grad_norm": 1.2270228862762451, "learning_rate": 0.00014888026204919327, "loss": 1.1118, "step": 17540 }, { "epoch": 0.6754571703561116, "grad_norm": 1.6182643175125122, "learning_rate": 0.0001488538789517333, "loss": 1.3269, "step": 17545 }, { "epoch": 0.6756496631376323, "grad_norm": 2.3642048835754395, "learning_rate": 0.0001488274913869097, "loss": 1.439, "step": 17550 }, { "epoch": 0.6758421559191531, "grad_norm": 1.8097171783447266, "learning_rate": 0.00014880109935713548, "loss": 1.093, "step": 17555 }, { "epoch": 0.6760346487006738, "grad_norm": 0.8650147914886475, "learning_rate": 0.00014877470286482397, "loss": 1.0413, "step": 17560 }, { "epoch": 0.6762271414821944, "grad_norm": 1.2217522859573364, "learning_rate": 0.00014874830191238903, "loss": 1.1818, "step": 17565 }, { "epoch": 0.6764196342637151, "grad_norm": 1.1500258445739746, "learning_rate": 0.00014872189650224477, "loss": 1.0607, "step": 17570 }, { "epoch": 0.6766121270452358, "grad_norm": 1.1867146492004395, "learning_rate": 0.00014869548663680584, "loss": 0.9716, "step": 17575 }, { "epoch": 0.6768046198267565, "grad_norm": 1.0046483278274536, "learning_rate": 0.00014866907231848723, "loss": 1.1875, "step": 17580 }, { "epoch": 0.6769971126082772, "grad_norm": 2.1072323322296143, "learning_rate": 0.00014864265354970436, "loss": 1.194, "step": 17585 }, { "epoch": 0.6771896053897979, "grad_norm": 1.4290494918823242, "learning_rate": 0.00014861623033287307, "loss": 1.2389, "step": 17590 }, { "epoch": 0.6773820981713186, "grad_norm": 0.8890597820281982, "learning_rate": 0.00014858980267040957, "loss": 0.9362, "step": 17595 }, { "epoch": 0.6775745909528392, "grad_norm": 0.9515128135681152, "learning_rate": 0.00014856337056473045, "loss": 1.039, "step": 17600 }, { "epoch": 0.67776708373436, "grad_norm": 1.540008544921875, "learning_rate": 0.00014853693401825283, "loss": 1.1778, "step": 17605 }, { "epoch": 0.6779595765158807, "grad_norm": 1.0766023397445679, "learning_rate": 0.00014851049303339414, "loss": 0.9362, "step": 17610 }, { "epoch": 0.6781520692974013, "grad_norm": 1.854201078414917, "learning_rate": 0.00014848404761257217, "loss": 0.9427, "step": 17615 }, { "epoch": 0.678344562078922, "grad_norm": 2.292722463607788, "learning_rate": 0.00014845759775820527, "loss": 1.0835, "step": 17620 }, { "epoch": 0.6785370548604427, "grad_norm": 1.768997311592102, "learning_rate": 0.00014843114347271204, "loss": 1.0976, "step": 17625 }, { "epoch": 0.6787295476419635, "grad_norm": 2.223881721496582, "learning_rate": 0.00014840468475851154, "loss": 1.1417, "step": 17630 }, { "epoch": 0.6789220404234841, "grad_norm": 1.1589646339416504, "learning_rate": 0.0001483782216180233, "loss": 1.2536, "step": 17635 }, { "epoch": 0.6791145332050048, "grad_norm": 1.6478285789489746, "learning_rate": 0.00014835175405366718, "loss": 1.1534, "step": 17640 }, { "epoch": 0.6793070259865255, "grad_norm": 1.6837091445922852, "learning_rate": 0.00014832528206786344, "loss": 1.3415, "step": 17645 }, { "epoch": 0.6794995187680462, "grad_norm": 1.6697105169296265, "learning_rate": 0.00014829880566303273, "loss": 1.0241, "step": 17650 }, { "epoch": 0.6796920115495669, "grad_norm": 1.08551025390625, "learning_rate": 0.00014827232484159624, "loss": 1.2322, "step": 17655 }, { "epoch": 0.6798845043310876, "grad_norm": 1.9399616718292236, "learning_rate": 0.00014824583960597543, "loss": 1.393, "step": 17660 }, { "epoch": 0.6800769971126083, "grad_norm": 1.0628485679626465, "learning_rate": 0.00014821934995859216, "loss": 1.2078, "step": 17665 }, { "epoch": 0.6802694898941289, "grad_norm": 0.9613144397735596, "learning_rate": 0.00014819285590186875, "loss": 1.1234, "step": 17670 }, { "epoch": 0.6804619826756496, "grad_norm": 0.9686816930770874, "learning_rate": 0.00014816635743822795, "loss": 1.1959, "step": 17675 }, { "epoch": 0.6806544754571704, "grad_norm": 1.4415709972381592, "learning_rate": 0.00014813985457009282, "loss": 1.0775, "step": 17680 }, { "epoch": 0.6808469682386911, "grad_norm": 1.5800002813339233, "learning_rate": 0.00014811334729988688, "loss": 1.0802, "step": 17685 }, { "epoch": 0.6810394610202117, "grad_norm": 1.1061028242111206, "learning_rate": 0.0001480868356300341, "loss": 1.0415, "step": 17690 }, { "epoch": 0.6812319538017324, "grad_norm": 2.3262946605682373, "learning_rate": 0.00014806031956295868, "loss": 1.2431, "step": 17695 }, { "epoch": 0.6814244465832531, "grad_norm": 1.6517562866210938, "learning_rate": 0.00014803379910108543, "loss": 1.1792, "step": 17700 }, { "epoch": 0.6816169393647739, "grad_norm": 1.3823506832122803, "learning_rate": 0.00014800727424683948, "loss": 1.1293, "step": 17705 }, { "epoch": 0.6818094321462945, "grad_norm": 1.5448585748672485, "learning_rate": 0.00014798074500264627, "loss": 1.2126, "step": 17710 }, { "epoch": 0.6820019249278152, "grad_norm": 1.2395973205566406, "learning_rate": 0.0001479542113709318, "loss": 1.3002, "step": 17715 }, { "epoch": 0.6821944177093359, "grad_norm": 1.8366637229919434, "learning_rate": 0.00014792767335412233, "loss": 1.1798, "step": 17720 }, { "epoch": 0.6823869104908566, "grad_norm": 1.3830804824829102, "learning_rate": 0.00014790113095464465, "loss": 1.3001, "step": 17725 }, { "epoch": 0.6825794032723773, "grad_norm": 10.001764297485352, "learning_rate": 0.0001478745841749259, "loss": 1.1643, "step": 17730 }, { "epoch": 0.682771896053898, "grad_norm": 1.0113561153411865, "learning_rate": 0.00014784803301739352, "loss": 1.1725, "step": 17735 }, { "epoch": 0.6829643888354187, "grad_norm": 2.7240827083587646, "learning_rate": 0.00014782147748447554, "loss": 1.2348, "step": 17740 }, { "epoch": 0.6831568816169393, "grad_norm": 1.0802150964736938, "learning_rate": 0.00014779491757860015, "loss": 1.3556, "step": 17745 }, { "epoch": 0.6833493743984601, "grad_norm": 1.6339032649993896, "learning_rate": 0.00014776835330219623, "loss": 0.9967, "step": 17750 }, { "epoch": 0.6835418671799808, "grad_norm": 1.6983892917633057, "learning_rate": 0.0001477417846576928, "loss": 1.041, "step": 17755 }, { "epoch": 0.6837343599615014, "grad_norm": 1.6230486631393433, "learning_rate": 0.00014771521164751942, "loss": 1.2298, "step": 17760 }, { "epoch": 0.6839268527430221, "grad_norm": 1.1079175472259521, "learning_rate": 0.00014768863427410604, "loss": 1.214, "step": 17765 }, { "epoch": 0.6841193455245428, "grad_norm": 1.1601203680038452, "learning_rate": 0.00014766205253988294, "loss": 1.2399, "step": 17770 }, { "epoch": 0.6843118383060636, "grad_norm": 2.2776849269866943, "learning_rate": 0.00014763546644728088, "loss": 1.0071, "step": 17775 }, { "epoch": 0.6845043310875842, "grad_norm": 1.362021565437317, "learning_rate": 0.00014760887599873094, "loss": 1.1233, "step": 17780 }, { "epoch": 0.6846968238691049, "grad_norm": 1.933518409729004, "learning_rate": 0.00014758228119666472, "loss": 1.0854, "step": 17785 }, { "epoch": 0.6848893166506256, "grad_norm": 1.148533582687378, "learning_rate": 0.00014755568204351407, "loss": 1.0694, "step": 17790 }, { "epoch": 0.6850818094321462, "grad_norm": 1.2880831956863403, "learning_rate": 0.0001475290785417113, "loss": 1.0814, "step": 17795 }, { "epoch": 0.685274302213667, "grad_norm": 1.5790437459945679, "learning_rate": 0.0001475024706936892, "loss": 1.0467, "step": 17800 }, { "epoch": 0.6854667949951877, "grad_norm": 1.636828899383545, "learning_rate": 0.0001474758585018808, "loss": 1.3419, "step": 17805 }, { "epoch": 0.6856592877767084, "grad_norm": 1.0403766632080078, "learning_rate": 0.00014744924196871963, "loss": 1.1468, "step": 17810 }, { "epoch": 0.685851780558229, "grad_norm": 1.1266472339630127, "learning_rate": 0.0001474226210966396, "loss": 1.2723, "step": 17815 }, { "epoch": 0.6860442733397497, "grad_norm": 1.352543830871582, "learning_rate": 0.00014739599588807506, "loss": 1.1345, "step": 17820 }, { "epoch": 0.6862367661212705, "grad_norm": 1.674023151397705, "learning_rate": 0.00014736936634546062, "loss": 1.2522, "step": 17825 }, { "epoch": 0.6864292589027912, "grad_norm": 1.3684656620025635, "learning_rate": 0.00014734273247123144, "loss": 1.1169, "step": 17830 }, { "epoch": 0.6866217516843118, "grad_norm": 1.917075514793396, "learning_rate": 0.00014731609426782297, "loss": 1.2523, "step": 17835 }, { "epoch": 0.6868142444658325, "grad_norm": 1.5463966131210327, "learning_rate": 0.00014728945173767116, "loss": 0.9929, "step": 17840 }, { "epoch": 0.6870067372473532, "grad_norm": 1.7427698373794556, "learning_rate": 0.00014726280488321222, "loss": 1.22, "step": 17845 }, { "epoch": 0.687199230028874, "grad_norm": 1.8021422624588013, "learning_rate": 0.0001472361537068829, "loss": 1.0429, "step": 17850 }, { "epoch": 0.6873917228103946, "grad_norm": 1.571053147315979, "learning_rate": 0.0001472094982111202, "loss": 1.17, "step": 17855 }, { "epoch": 0.6875842155919153, "grad_norm": 1.3607596158981323, "learning_rate": 0.00014718283839836166, "loss": 1.0644, "step": 17860 }, { "epoch": 0.687776708373436, "grad_norm": 0.9396845102310181, "learning_rate": 0.00014715617427104504, "loss": 1.0807, "step": 17865 }, { "epoch": 0.6879692011549567, "grad_norm": 1.605432152748108, "learning_rate": 0.00014712950583160872, "loss": 1.0641, "step": 17870 }, { "epoch": 0.6881616939364774, "grad_norm": 1.4847965240478516, "learning_rate": 0.0001471081679769722, "loss": 1.1625, "step": 17875 }, { "epoch": 0.6883541867179981, "grad_norm": 1.930336594581604, "learning_rate": 0.00014708149178186593, "loss": 1.3346, "step": 17880 }, { "epoch": 0.6885466794995188, "grad_norm": 1.7398570775985718, "learning_rate": 0.00014705481128146917, "loss": 1.2316, "step": 17885 }, { "epoch": 0.6887391722810394, "grad_norm": 1.5817015171051025, "learning_rate": 0.00014702812647822162, "loss": 1.0292, "step": 17890 }, { "epoch": 0.6889316650625602, "grad_norm": 3.2520430088043213, "learning_rate": 0.00014700143737456342, "loss": 1.088, "step": 17895 }, { "epoch": 0.6891241578440809, "grad_norm": 2.165456533432007, "learning_rate": 0.00014697474397293517, "loss": 0.9452, "step": 17900 }, { "epoch": 0.6893166506256015, "grad_norm": 0.9637191295623779, "learning_rate": 0.00014694804627577771, "loss": 1.266, "step": 17905 }, { "epoch": 0.6895091434071222, "grad_norm": 1.9606934785842896, "learning_rate": 0.00014692134428553248, "loss": 1.0773, "step": 17910 }, { "epoch": 0.6897016361886429, "grad_norm": 1.1911338567733765, "learning_rate": 0.0001468946380046411, "loss": 1.1359, "step": 17915 }, { "epoch": 0.6898941289701637, "grad_norm": 1.3913235664367676, "learning_rate": 0.00014686792743554575, "loss": 1.3053, "step": 17920 }, { "epoch": 0.6900866217516843, "grad_norm": 1.2314075231552124, "learning_rate": 0.00014684121258068888, "loss": 1.0624, "step": 17925 }, { "epoch": 0.690279114533205, "grad_norm": 2.1499176025390625, "learning_rate": 0.00014681449344251338, "loss": 1.2147, "step": 17930 }, { "epoch": 0.6904716073147257, "grad_norm": 1.6417664289474487, "learning_rate": 0.00014678777002346264, "loss": 1.1139, "step": 17935 }, { "epoch": 0.6906641000962463, "grad_norm": 1.181154727935791, "learning_rate": 0.00014676104232598026, "loss": 1.0503, "step": 17940 }, { "epoch": 0.6908565928777671, "grad_norm": 1.7786331176757812, "learning_rate": 0.00014673431035251027, "loss": 1.05, "step": 17945 }, { "epoch": 0.6910490856592878, "grad_norm": 0.948625922203064, "learning_rate": 0.00014670757410549724, "loss": 1.0888, "step": 17950 }, { "epoch": 0.6912415784408085, "grad_norm": 1.9812164306640625, "learning_rate": 0.00014668083358738597, "loss": 1.1467, "step": 17955 }, { "epoch": 0.6914340712223291, "grad_norm": 0.9091313481330872, "learning_rate": 0.0001466540888006217, "loss": 1.1226, "step": 17960 }, { "epoch": 0.6916265640038498, "grad_norm": 2.100114583969116, "learning_rate": 0.00014662733974765005, "loss": 1.1233, "step": 17965 }, { "epoch": 0.6918190567853706, "grad_norm": 2.0999033451080322, "learning_rate": 0.00014660058643091702, "loss": 1.086, "step": 17970 }, { "epoch": 0.6920115495668913, "grad_norm": 1.543411374092102, "learning_rate": 0.0001465738288528691, "loss": 1.218, "step": 17975 }, { "epoch": 0.6922040423484119, "grad_norm": 2.6429097652435303, "learning_rate": 0.00014654706701595305, "loss": 1.1425, "step": 17980 }, { "epoch": 0.6923965351299326, "grad_norm": 1.258535385131836, "learning_rate": 0.00014652030092261606, "loss": 1.124, "step": 17985 }, { "epoch": 0.6925890279114533, "grad_norm": 0.9203128814697266, "learning_rate": 0.00014649353057530573, "loss": 1.0035, "step": 17990 }, { "epoch": 0.692781520692974, "grad_norm": 1.7482789754867554, "learning_rate": 0.00014646675597647003, "loss": 1.2393, "step": 17995 }, { "epoch": 0.6929740134744947, "grad_norm": 1.3026279211044312, "learning_rate": 0.0001464399771285573, "loss": 1.294, "step": 18000 }, { "epoch": 0.6931665062560154, "grad_norm": 1.5518649816513062, "learning_rate": 0.00014641319403401628, "loss": 1.2397, "step": 18005 }, { "epoch": 0.6933589990375361, "grad_norm": 1.3904852867126465, "learning_rate": 0.00014638640669529615, "loss": 1.079, "step": 18010 }, { "epoch": 0.6935514918190567, "grad_norm": 0.7677931189537048, "learning_rate": 0.0001463596151148464, "loss": 1.1485, "step": 18015 }, { "epoch": 0.6937439846005775, "grad_norm": 1.1935845613479614, "learning_rate": 0.00014633281929511696, "loss": 1.167, "step": 18020 }, { "epoch": 0.6939364773820982, "grad_norm": 1.8612521886825562, "learning_rate": 0.00014630601923855814, "loss": 1.2335, "step": 18025 }, { "epoch": 0.6941289701636189, "grad_norm": 1.9979881048202515, "learning_rate": 0.00014627921494762055, "loss": 1.0421, "step": 18030 }, { "epoch": 0.6943214629451395, "grad_norm": 1.9426400661468506, "learning_rate": 0.00014625240642475538, "loss": 1.0918, "step": 18035 }, { "epoch": 0.6945139557266603, "grad_norm": 0.9990954399108887, "learning_rate": 0.000146225593672414, "loss": 1.3456, "step": 18040 }, { "epoch": 0.694706448508181, "grad_norm": 2.187206745147705, "learning_rate": 0.00014619877669304834, "loss": 1.0926, "step": 18045 }, { "epoch": 0.6948989412897016, "grad_norm": 1.5417639017105103, "learning_rate": 0.00014617195548911053, "loss": 1.2796, "step": 18050 }, { "epoch": 0.6950914340712223, "grad_norm": 1.476150631904602, "learning_rate": 0.0001461451300630533, "loss": 1.1623, "step": 18055 }, { "epoch": 0.695283926852743, "grad_norm": 1.6524615287780762, "learning_rate": 0.0001461183004173296, "loss": 1.0976, "step": 18060 }, { "epoch": 0.6954764196342638, "grad_norm": 1.4800169467926025, "learning_rate": 0.0001460914665543928, "loss": 1.2641, "step": 18065 }, { "epoch": 0.6956689124157844, "grad_norm": 1.2046303749084473, "learning_rate": 0.00014606462847669674, "loss": 1.0037, "step": 18070 }, { "epoch": 0.6958614051973051, "grad_norm": 1.3457711935043335, "learning_rate": 0.00014603778618669556, "loss": 1.1599, "step": 18075 }, { "epoch": 0.6960538979788258, "grad_norm": 1.8690896034240723, "learning_rate": 0.0001460109396868438, "loss": 1.3016, "step": 18080 }, { "epoch": 0.6962463907603464, "grad_norm": 0.8788353204727173, "learning_rate": 0.00014598408897959639, "loss": 1.0261, "step": 18085 }, { "epoch": 0.6964388835418672, "grad_norm": 1.064239501953125, "learning_rate": 0.00014595723406740868, "loss": 1.1159, "step": 18090 }, { "epoch": 0.6966313763233879, "grad_norm": 0.9102209210395813, "learning_rate": 0.00014593037495273635, "loss": 1.1263, "step": 18095 }, { "epoch": 0.6968238691049086, "grad_norm": 1.4841855764389038, "learning_rate": 0.00014590351163803545, "loss": 1.0526, "step": 18100 }, { "epoch": 0.6970163618864292, "grad_norm": 2.282543182373047, "learning_rate": 0.00014587664412576254, "loss": 1.0876, "step": 18105 }, { "epoch": 0.6972088546679499, "grad_norm": 1.149782657623291, "learning_rate": 0.0001458497724183744, "loss": 1.2092, "step": 18110 }, { "epoch": 0.6974013474494707, "grad_norm": 1.6531153917312622, "learning_rate": 0.0001458228965183283, "loss": 1.2421, "step": 18115 }, { "epoch": 0.6975938402309914, "grad_norm": 2.376281976699829, "learning_rate": 0.00014579601642808192, "loss": 1.2179, "step": 18120 }, { "epoch": 0.697786333012512, "grad_norm": 1.9077723026275635, "learning_rate": 0.0001457691321500932, "loss": 1.1962, "step": 18125 }, { "epoch": 0.6979788257940327, "grad_norm": 1.3130842447280884, "learning_rate": 0.00014574224368682048, "loss": 1.3169, "step": 18130 }, { "epoch": 0.6981713185755534, "grad_norm": 1.0211979150772095, "learning_rate": 0.00014571535104072262, "loss": 1.0256, "step": 18135 }, { "epoch": 0.6983638113570741, "grad_norm": 1.7479397058486938, "learning_rate": 0.00014568845421425875, "loss": 1.0906, "step": 18140 }, { "epoch": 0.6985563041385948, "grad_norm": 1.3305407762527466, "learning_rate": 0.00014566155320988838, "loss": 1.206, "step": 18145 }, { "epoch": 0.6987487969201155, "grad_norm": 1.2185992002487183, "learning_rate": 0.00014563464803007145, "loss": 1.2765, "step": 18150 }, { "epoch": 0.6989412897016362, "grad_norm": 1.3256112337112427, "learning_rate": 0.00014560773867726827, "loss": 1.0899, "step": 18155 }, { "epoch": 0.6991337824831568, "grad_norm": 1.9090956449508667, "learning_rate": 0.0001455808251539395, "loss": 1.1944, "step": 18160 }, { "epoch": 0.6993262752646776, "grad_norm": 1.078116774559021, "learning_rate": 0.00014555390746254622, "loss": 1.1393, "step": 18165 }, { "epoch": 0.6995187680461983, "grad_norm": 1.21144437789917, "learning_rate": 0.00014552698560554988, "loss": 1.0835, "step": 18170 }, { "epoch": 0.699711260827719, "grad_norm": 1.4013081789016724, "learning_rate": 0.00014550005958541227, "loss": 1.0785, "step": 18175 }, { "epoch": 0.6999037536092396, "grad_norm": 1.102122187614441, "learning_rate": 0.00014547312940459562, "loss": 1.0839, "step": 18180 }, { "epoch": 0.7000962463907604, "grad_norm": 1.602994680404663, "learning_rate": 0.00014544619506556256, "loss": 1.2608, "step": 18185 }, { "epoch": 0.7002887391722811, "grad_norm": 2.8694801330566406, "learning_rate": 0.000145419256570776, "loss": 1.3161, "step": 18190 }, { "epoch": 0.7004812319538017, "grad_norm": 1.5687551498413086, "learning_rate": 0.00014539231392269927, "loss": 1.0668, "step": 18195 }, { "epoch": 0.7006737247353224, "grad_norm": 1.1013094186782837, "learning_rate": 0.00014536536712379618, "loss": 1.0829, "step": 18200 }, { "epoch": 0.7008662175168431, "grad_norm": 1.4294344186782837, "learning_rate": 0.00014533841617653075, "loss": 1.0003, "step": 18205 }, { "epoch": 0.7010587102983639, "grad_norm": 1.168997049331665, "learning_rate": 0.0001453114610833675, "loss": 1.2252, "step": 18210 }, { "epoch": 0.7012512030798845, "grad_norm": 1.21929132938385, "learning_rate": 0.0001452845018467713, "loss": 1.102, "step": 18215 }, { "epoch": 0.7014436958614052, "grad_norm": 1.0682016611099243, "learning_rate": 0.00014525753846920738, "loss": 1.0219, "step": 18220 }, { "epoch": 0.7016361886429259, "grad_norm": 1.210161566734314, "learning_rate": 0.00014523057095314142, "loss": 1.0666, "step": 18225 }, { "epoch": 0.7018286814244465, "grad_norm": 0.9966996312141418, "learning_rate": 0.0001452035993010393, "loss": 1.1343, "step": 18230 }, { "epoch": 0.7020211742059673, "grad_norm": 1.2477959394454956, "learning_rate": 0.00014517662351536752, "loss": 1.2147, "step": 18235 }, { "epoch": 0.702213666987488, "grad_norm": 1.8020172119140625, "learning_rate": 0.00014514964359859276, "loss": 1.1945, "step": 18240 }, { "epoch": 0.7024061597690087, "grad_norm": 1.0535303354263306, "learning_rate": 0.0001451226595531822, "loss": 1.0792, "step": 18245 }, { "epoch": 0.7025986525505293, "grad_norm": 1.913590431213379, "learning_rate": 0.0001450956713816033, "loss": 1.1344, "step": 18250 }, { "epoch": 0.70279114533205, "grad_norm": 0.998621940612793, "learning_rate": 0.00014506867908632403, "loss": 1.1139, "step": 18255 }, { "epoch": 0.7029836381135708, "grad_norm": 1.8913546800613403, "learning_rate": 0.0001450416826698126, "loss": 1.0621, "step": 18260 }, { "epoch": 0.7031761308950915, "grad_norm": 1.0329716205596924, "learning_rate": 0.00014501468213453763, "loss": 1.2732, "step": 18265 }, { "epoch": 0.7033686236766121, "grad_norm": 0.9243387579917908, "learning_rate": 0.0001449876774829682, "loss": 1.2272, "step": 18270 }, { "epoch": 0.7035611164581328, "grad_norm": 1.6289262771606445, "learning_rate": 0.0001449606687175737, "loss": 1.0912, "step": 18275 }, { "epoch": 0.7037536092396535, "grad_norm": 2.005293607711792, "learning_rate": 0.00014493365584082384, "loss": 1.018, "step": 18280 }, { "epoch": 0.7039461020211742, "grad_norm": 1.2743504047393799, "learning_rate": 0.00014490663885518881, "loss": 1.0026, "step": 18285 }, { "epoch": 0.7041385948026949, "grad_norm": 1.4915635585784912, "learning_rate": 0.00014487961776313922, "loss": 1.0489, "step": 18290 }, { "epoch": 0.7043310875842156, "grad_norm": 0.9605044722557068, "learning_rate": 0.00014485259256714577, "loss": 1.1053, "step": 18295 }, { "epoch": 0.7045235803657363, "grad_norm": 1.8121784925460815, "learning_rate": 0.0001448255632696799, "loss": 1.1862, "step": 18300 }, { "epoch": 0.7047160731472569, "grad_norm": 1.2540571689605713, "learning_rate": 0.00014479852987321322, "loss": 1.1361, "step": 18305 }, { "epoch": 0.7049085659287777, "grad_norm": 1.4160270690917969, "learning_rate": 0.00014477149238021776, "loss": 1.0917, "step": 18310 }, { "epoch": 0.7051010587102984, "grad_norm": 1.4298075437545776, "learning_rate": 0.0001447444507931659, "loss": 1.1407, "step": 18315 }, { "epoch": 0.705293551491819, "grad_norm": 1.0214334726333618, "learning_rate": 0.00014471740511453037, "loss": 1.0714, "step": 18320 }, { "epoch": 0.7054860442733397, "grad_norm": 1.6246428489685059, "learning_rate": 0.00014469035534678444, "loss": 1.258, "step": 18325 }, { "epoch": 0.7056785370548604, "grad_norm": 1.5467473268508911, "learning_rate": 0.0001446633014924015, "loss": 1.1811, "step": 18330 }, { "epoch": 0.7058710298363812, "grad_norm": 2.038041114807129, "learning_rate": 0.00014463624355385557, "loss": 1.1339, "step": 18335 }, { "epoch": 0.7060635226179018, "grad_norm": 1.5328725576400757, "learning_rate": 0.0001446091815336208, "loss": 1.2261, "step": 18340 }, { "epoch": 0.7062560153994225, "grad_norm": 0.9550712704658508, "learning_rate": 0.0001445821154341719, "loss": 1.0973, "step": 18345 }, { "epoch": 0.7064485081809432, "grad_norm": 1.4610974788665771, "learning_rate": 0.0001445550452579839, "loss": 1.2341, "step": 18350 }, { "epoch": 0.706641000962464, "grad_norm": 1.9539941549301147, "learning_rate": 0.00014452797100753212, "loss": 1.1115, "step": 18355 }, { "epoch": 0.7068334937439846, "grad_norm": 1.136670708656311, "learning_rate": 0.0001445008926852924, "loss": 1.1883, "step": 18360 }, { "epoch": 0.7070259865255053, "grad_norm": 1.2136088609695435, "learning_rate": 0.00014447381029374082, "loss": 1.1384, "step": 18365 }, { "epoch": 0.707218479307026, "grad_norm": 1.3836339712142944, "learning_rate": 0.00014444672383535388, "loss": 1.2371, "step": 18370 }, { "epoch": 0.7074109720885466, "grad_norm": 1.6226662397384644, "learning_rate": 0.00014441963331260848, "loss": 1.3057, "step": 18375 }, { "epoch": 0.7076034648700674, "grad_norm": 1.249576449394226, "learning_rate": 0.0001443925387279819, "loss": 1.0849, "step": 18380 }, { "epoch": 0.7077959576515881, "grad_norm": 1.9330114126205444, "learning_rate": 0.0001443654400839517, "loss": 0.9933, "step": 18385 }, { "epoch": 0.7079884504331088, "grad_norm": 1.4878582954406738, "learning_rate": 0.0001443383373829959, "loss": 0.8842, "step": 18390 }, { "epoch": 0.7081809432146294, "grad_norm": 2.3553292751312256, "learning_rate": 0.00014431123062759286, "loss": 1.1733, "step": 18395 }, { "epoch": 0.7083734359961501, "grad_norm": 0.8834003210067749, "learning_rate": 0.00014428411982022135, "loss": 1.1275, "step": 18400 }, { "epoch": 0.7085659287776709, "grad_norm": 1.331040620803833, "learning_rate": 0.00014425700496336038, "loss": 1.0753, "step": 18405 }, { "epoch": 0.7087584215591916, "grad_norm": 1.0972214937210083, "learning_rate": 0.0001442298860594895, "loss": 1.2045, "step": 18410 }, { "epoch": 0.7089509143407122, "grad_norm": 1.5350794792175293, "learning_rate": 0.00014420276311108857, "loss": 1.0097, "step": 18415 }, { "epoch": 0.7091434071222329, "grad_norm": 1.8360435962677002, "learning_rate": 0.00014417563612063777, "loss": 1.177, "step": 18420 }, { "epoch": 0.7093358999037536, "grad_norm": 1.0898863077163696, "learning_rate": 0.00014414850509061764, "loss": 1.0374, "step": 18425 }, { "epoch": 0.7095283926852743, "grad_norm": 1.2654744386672974, "learning_rate": 0.00014412137002350919, "loss": 1.1494, "step": 18430 }, { "epoch": 0.709720885466795, "grad_norm": 1.8603087663650513, "learning_rate": 0.00014409423092179375, "loss": 1.2723, "step": 18435 }, { "epoch": 0.7099133782483157, "grad_norm": 0.9974476099014282, "learning_rate": 0.00014406708778795296, "loss": 1.1139, "step": 18440 }, { "epoch": 0.7101058710298364, "grad_norm": 0.998330295085907, "learning_rate": 0.00014403994062446893, "loss": 1.2881, "step": 18445 }, { "epoch": 0.710298363811357, "grad_norm": 2.04758882522583, "learning_rate": 0.00014401278943382406, "loss": 1.0089, "step": 18450 }, { "epoch": 0.7104908565928778, "grad_norm": 1.301059603691101, "learning_rate": 0.0001439856342185012, "loss": 1.1405, "step": 18455 }, { "epoch": 0.7106833493743985, "grad_norm": 1.684041142463684, "learning_rate": 0.00014395847498098338, "loss": 1.1387, "step": 18460 }, { "epoch": 0.7108758421559191, "grad_norm": 1.95292067527771, "learning_rate": 0.0001439313117237543, "loss": 1.1659, "step": 18465 }, { "epoch": 0.7110683349374398, "grad_norm": 1.1917790174484253, "learning_rate": 0.00014390414444929775, "loss": 1.0497, "step": 18470 }, { "epoch": 0.7112608277189605, "grad_norm": 1.1583658456802368, "learning_rate": 0.000143876973160098, "loss": 1.2276, "step": 18475 }, { "epoch": 0.7114533205004813, "grad_norm": 1.116721749305725, "learning_rate": 0.00014384979785863976, "loss": 1.3688, "step": 18480 }, { "epoch": 0.7116458132820019, "grad_norm": 1.1651076078414917, "learning_rate": 0.00014382261854740795, "loss": 1.3093, "step": 18485 }, { "epoch": 0.7118383060635226, "grad_norm": 1.2162317037582397, "learning_rate": 0.00014379543522888798, "loss": 1.1324, "step": 18490 }, { "epoch": 0.7120307988450433, "grad_norm": 1.5792020559310913, "learning_rate": 0.0001437682479055656, "loss": 1.123, "step": 18495 }, { "epoch": 0.7122232916265641, "grad_norm": 0.9636641144752502, "learning_rate": 0.00014374105657992688, "loss": 1.0547, "step": 18500 }, { "epoch": 0.7124157844080847, "grad_norm": 1.1409319639205933, "learning_rate": 0.00014371386125445828, "loss": 1.1277, "step": 18505 }, { "epoch": 0.7126082771896054, "grad_norm": 1.074267029762268, "learning_rate": 0.00014368666193164664, "loss": 1.1041, "step": 18510 }, { "epoch": 0.7128007699711261, "grad_norm": 1.2324203252792358, "learning_rate": 0.00014365945861397918, "loss": 1.1274, "step": 18515 }, { "epoch": 0.7129932627526467, "grad_norm": 1.2441449165344238, "learning_rate": 0.00014363225130394343, "loss": 1.0739, "step": 18520 }, { "epoch": 0.7131857555341675, "grad_norm": 1.0249239206314087, "learning_rate": 0.00014360504000402737, "loss": 1.1945, "step": 18525 }, { "epoch": 0.7133782483156882, "grad_norm": 1.0297977924346924, "learning_rate": 0.00014357782471671922, "loss": 1.1694, "step": 18530 }, { "epoch": 0.7135707410972089, "grad_norm": 1.6610252857208252, "learning_rate": 0.00014355060544450767, "loss": 1.2034, "step": 18535 }, { "epoch": 0.7137632338787295, "grad_norm": 1.290869951248169, "learning_rate": 0.0001435233821898818, "loss": 1.1195, "step": 18540 }, { "epoch": 0.7139557266602502, "grad_norm": 1.4730745553970337, "learning_rate": 0.0001434961549553309, "loss": 1.1237, "step": 18545 }, { "epoch": 0.714148219441771, "grad_norm": 1.0857551097869873, "learning_rate": 0.00014346892374334479, "loss": 1.013, "step": 18550 }, { "epoch": 0.7143407122232917, "grad_norm": 1.0761737823486328, "learning_rate": 0.00014344168855641356, "loss": 0.9948, "step": 18555 }, { "epoch": 0.7145332050048123, "grad_norm": 2.012099027633667, "learning_rate": 0.00014341444939702767, "loss": 1.1598, "step": 18560 }, { "epoch": 0.714725697786333, "grad_norm": 1.837538242340088, "learning_rate": 0.000143387206267678, "loss": 1.1389, "step": 18565 }, { "epoch": 0.7149181905678537, "grad_norm": 1.1099295616149902, "learning_rate": 0.0001433599591708557, "loss": 1.0835, "step": 18570 }, { "epoch": 0.7151106833493744, "grad_norm": 0.9746969938278198, "learning_rate": 0.00014333270810905238, "loss": 0.973, "step": 18575 }, { "epoch": 0.7153031761308951, "grad_norm": 1.9786537885665894, "learning_rate": 0.00014330545308475996, "loss": 1.1564, "step": 18580 }, { "epoch": 0.7154956689124158, "grad_norm": 1.020973801612854, "learning_rate": 0.0001432781941004707, "loss": 1.0202, "step": 18585 }, { "epoch": 0.7156881616939365, "grad_norm": 1.2314329147338867, "learning_rate": 0.0001432509311586773, "loss": 1.2654, "step": 18590 }, { "epoch": 0.7158806544754571, "grad_norm": 1.1897294521331787, "learning_rate": 0.00014322366426187277, "loss": 1.3241, "step": 18595 }, { "epoch": 0.7160731472569779, "grad_norm": 1.2122468948364258, "learning_rate": 0.00014319639341255048, "loss": 1.0044, "step": 18600 }, { "epoch": 0.7162656400384986, "grad_norm": 1.5471996068954468, "learning_rate": 0.00014316911861320415, "loss": 1.2251, "step": 18605 }, { "epoch": 0.7164581328200192, "grad_norm": 1.4441865682601929, "learning_rate": 0.00014314183986632788, "loss": 1.1717, "step": 18610 }, { "epoch": 0.7166506256015399, "grad_norm": 1.092637300491333, "learning_rate": 0.00014311455717441616, "loss": 1.0724, "step": 18615 }, { "epoch": 0.7168431183830606, "grad_norm": 1.0974675416946411, "learning_rate": 0.00014308727053996377, "loss": 1.0623, "step": 18620 }, { "epoch": 0.7170356111645814, "grad_norm": 1.513769507408142, "learning_rate": 0.00014305997996546594, "loss": 1.1027, "step": 18625 }, { "epoch": 0.717228103946102, "grad_norm": 1.0637279748916626, "learning_rate": 0.00014303268545341817, "loss": 1.0313, "step": 18630 }, { "epoch": 0.7174205967276227, "grad_norm": 1.3569130897521973, "learning_rate": 0.00014300538700631643, "loss": 1.0324, "step": 18635 }, { "epoch": 0.7176130895091434, "grad_norm": 1.0008260011672974, "learning_rate": 0.00014297808462665688, "loss": 1.0383, "step": 18640 }, { "epoch": 0.717805582290664, "grad_norm": 1.291493535041809, "learning_rate": 0.0001429507783169362, "loss": 1.2128, "step": 18645 }, { "epoch": 0.7179980750721848, "grad_norm": 2.5597760677337646, "learning_rate": 0.0001429234680796514, "loss": 1.146, "step": 18650 }, { "epoch": 0.7181905678537055, "grad_norm": 2.4308478832244873, "learning_rate": 0.00014289615391729974, "loss": 1.3797, "step": 18655 }, { "epoch": 0.7183830606352262, "grad_norm": 1.1110010147094727, "learning_rate": 0.00014286883583237896, "loss": 1.2471, "step": 18660 }, { "epoch": 0.7185755534167468, "grad_norm": 1.075013279914856, "learning_rate": 0.00014284151382738718, "loss": 1.0836, "step": 18665 }, { "epoch": 0.7187680461982676, "grad_norm": 1.9422922134399414, "learning_rate": 0.00014281418790482273, "loss": 1.3271, "step": 18670 }, { "epoch": 0.7189605389797883, "grad_norm": 1.58540678024292, "learning_rate": 0.00014278685806718442, "loss": 1.0762, "step": 18675 }, { "epoch": 0.719153031761309, "grad_norm": 1.1696521043777466, "learning_rate": 0.00014275952431697138, "loss": 1.1783, "step": 18680 }, { "epoch": 0.7193455245428296, "grad_norm": 1.6518898010253906, "learning_rate": 0.0001427321866566831, "loss": 0.9509, "step": 18685 }, { "epoch": 0.7195380173243503, "grad_norm": 1.2448405027389526, "learning_rate": 0.0001427048450888194, "loss": 1.1316, "step": 18690 }, { "epoch": 0.7197305101058711, "grad_norm": 0.9715486168861389, "learning_rate": 0.00014267749961588053, "loss": 1.1547, "step": 18695 }, { "epoch": 0.7199230028873917, "grad_norm": 1.176511287689209, "learning_rate": 0.00014265015024036702, "loss": 1.0325, "step": 18700 }, { "epoch": 0.7201154956689124, "grad_norm": 1.096604824066162, "learning_rate": 0.0001426227969647798, "loss": 0.9389, "step": 18705 }, { "epoch": 0.7203079884504331, "grad_norm": 1.2895269393920898, "learning_rate": 0.00014259543979162017, "loss": 1.1157, "step": 18710 }, { "epoch": 0.7205004812319538, "grad_norm": 1.1590831279754639, "learning_rate": 0.00014256807872338974, "loss": 1.0154, "step": 18715 }, { "epoch": 0.7206929740134745, "grad_norm": 1.1659713983535767, "learning_rate": 0.00014254071376259046, "loss": 1.0744, "step": 18720 }, { "epoch": 0.7208854667949952, "grad_norm": 1.3548671007156372, "learning_rate": 0.00014251334491172473, "loss": 0.9823, "step": 18725 }, { "epoch": 0.7210779595765159, "grad_norm": 1.5639405250549316, "learning_rate": 0.00014248597217329526, "loss": 1.1793, "step": 18730 }, { "epoch": 0.7212704523580366, "grad_norm": 1.9836759567260742, "learning_rate": 0.00014245859554980504, "loss": 1.1827, "step": 18735 }, { "epoch": 0.7214629451395572, "grad_norm": 1.2241086959838867, "learning_rate": 0.00014243121504375753, "loss": 1.2403, "step": 18740 }, { "epoch": 0.721655437921078, "grad_norm": 1.1298317909240723, "learning_rate": 0.0001424038306576565, "loss": 1.1577, "step": 18745 }, { "epoch": 0.7218479307025987, "grad_norm": 1.325210452079773, "learning_rate": 0.00014237644239400605, "loss": 1.1232, "step": 18750 }, { "epoch": 0.7220404234841193, "grad_norm": 1.613929033279419, "learning_rate": 0.00014234905025531066, "loss": 0.9627, "step": 18755 }, { "epoch": 0.72223291626564, "grad_norm": 1.6307876110076904, "learning_rate": 0.00014232165424407517, "loss": 1.0229, "step": 18760 }, { "epoch": 0.7224254090471607, "grad_norm": 0.8971173167228699, "learning_rate": 0.00014229425436280475, "loss": 1.1371, "step": 18765 }, { "epoch": 0.7226179018286815, "grad_norm": 1.3740814924240112, "learning_rate": 0.00014226685061400496, "loss": 1.1431, "step": 18770 }, { "epoch": 0.7228103946102021, "grad_norm": 1.7433820962905884, "learning_rate": 0.00014223944300018163, "loss": 1.1876, "step": 18775 }, { "epoch": 0.7230028873917228, "grad_norm": 1.1470065116882324, "learning_rate": 0.0001422120315238411, "loss": 1.1971, "step": 18780 }, { "epoch": 0.7231953801732435, "grad_norm": 2.0566489696502686, "learning_rate": 0.00014218461618748987, "loss": 1.0274, "step": 18785 }, { "epoch": 0.7233878729547641, "grad_norm": 1.589087724685669, "learning_rate": 0.00014215719699363496, "loss": 1.067, "step": 18790 }, { "epoch": 0.7235803657362849, "grad_norm": 2.473461866378784, "learning_rate": 0.00014212977394478365, "loss": 1.2185, "step": 18795 }, { "epoch": 0.7237728585178056, "grad_norm": 1.3214609622955322, "learning_rate": 0.00014210234704344359, "loss": 1.2501, "step": 18800 }, { "epoch": 0.7239653512993263, "grad_norm": 2.281226873397827, "learning_rate": 0.0001420749162921228, "loss": 1.2262, "step": 18805 }, { "epoch": 0.7241578440808469, "grad_norm": 1.188148021697998, "learning_rate": 0.0001420474816933296, "loss": 1.3338, "step": 18810 }, { "epoch": 0.7243503368623677, "grad_norm": 2.0242867469787598, "learning_rate": 0.00014202004324957279, "loss": 1.1157, "step": 18815 }, { "epoch": 0.7245428296438884, "grad_norm": 1.2399152517318726, "learning_rate": 0.00014199260096336134, "loss": 1.119, "step": 18820 }, { "epoch": 0.7247353224254091, "grad_norm": 1.7323557138442993, "learning_rate": 0.00014196515483720477, "loss": 1.1672, "step": 18825 }, { "epoch": 0.7249278152069297, "grad_norm": 1.4888850450515747, "learning_rate": 0.00014193770487361273, "loss": 0.9814, "step": 18830 }, { "epoch": 0.7251203079884504, "grad_norm": 1.5241479873657227, "learning_rate": 0.0001419102510750954, "loss": 1.0614, "step": 18835 }, { "epoch": 0.7253128007699712, "grad_norm": 1.2932441234588623, "learning_rate": 0.00014188279344416323, "loss": 1.1905, "step": 18840 }, { "epoch": 0.7255052935514918, "grad_norm": 1.4357131719589233, "learning_rate": 0.0001418553319833271, "loss": 1.1303, "step": 18845 }, { "epoch": 0.7256977863330125, "grad_norm": 2.1818439960479736, "learning_rate": 0.00014182786669509806, "loss": 1.2141, "step": 18850 }, { "epoch": 0.7258902791145332, "grad_norm": 2.111520528793335, "learning_rate": 0.00014180039758198774, "loss": 1.165, "step": 18855 }, { "epoch": 0.7260827718960539, "grad_norm": 1.3923039436340332, "learning_rate": 0.00014177292464650796, "loss": 1.3364, "step": 18860 }, { "epoch": 0.7262752646775746, "grad_norm": 1.905661702156067, "learning_rate": 0.0001417454478911709, "loss": 1.1535, "step": 18865 }, { "epoch": 0.7264677574590953, "grad_norm": 1.1814746856689453, "learning_rate": 0.0001417179673184892, "loss": 1.2141, "step": 18870 }, { "epoch": 0.726660250240616, "grad_norm": 1.4515434503555298, "learning_rate": 0.00014169048293097576, "loss": 1.0955, "step": 18875 }, { "epoch": 0.7268527430221366, "grad_norm": 1.2174112796783447, "learning_rate": 0.0001416629947311438, "loss": 1.0399, "step": 18880 }, { "epoch": 0.7270452358036573, "grad_norm": 1.3769662380218506, "learning_rate": 0.00014163550272150698, "loss": 1.2164, "step": 18885 }, { "epoch": 0.7272377285851781, "grad_norm": 1.3401464223861694, "learning_rate": 0.00014160800690457927, "loss": 1.1039, "step": 18890 }, { "epoch": 0.7274302213666988, "grad_norm": 1.1210380792617798, "learning_rate": 0.0001415805072828749, "loss": 1.0771, "step": 18895 }, { "epoch": 0.7276227141482194, "grad_norm": 1.3425636291503906, "learning_rate": 0.00014155300385890863, "loss": 1.1506, "step": 18900 }, { "epoch": 0.7278152069297401, "grad_norm": 1.155220866203308, "learning_rate": 0.0001415254966351954, "loss": 1.1321, "step": 18905 }, { "epoch": 0.7280076997112608, "grad_norm": 1.440024733543396, "learning_rate": 0.0001414979856142506, "loss": 1.2324, "step": 18910 }, { "epoch": 0.7282001924927816, "grad_norm": 1.6521823406219482, "learning_rate": 0.0001414704707985899, "loss": 1.196, "step": 18915 }, { "epoch": 0.7283926852743022, "grad_norm": 3.4958372116088867, "learning_rate": 0.00014144295219072937, "loss": 0.9906, "step": 18920 }, { "epoch": 0.7285851780558229, "grad_norm": 0.9254593849182129, "learning_rate": 0.00014141542979318538, "loss": 1.2552, "step": 18925 }, { "epoch": 0.7287776708373436, "grad_norm": 1.519364833831787, "learning_rate": 0.00014138790360847473, "loss": 1.0491, "step": 18930 }, { "epoch": 0.7289701636188642, "grad_norm": 1.199167013168335, "learning_rate": 0.0001413603736391144, "loss": 0.9939, "step": 18935 }, { "epoch": 0.729162656400385, "grad_norm": 1.0213391780853271, "learning_rate": 0.00014133283988762192, "loss": 1.222, "step": 18940 }, { "epoch": 0.7293551491819057, "grad_norm": 1.27894127368927, "learning_rate": 0.00014130530235651506, "loss": 1.2881, "step": 18945 }, { "epoch": 0.7295476419634264, "grad_norm": 1.1660070419311523, "learning_rate": 0.0001412777610483119, "loss": 1.1839, "step": 18950 }, { "epoch": 0.729740134744947, "grad_norm": 0.9614414572715759, "learning_rate": 0.00014125021596553093, "loss": 1.0397, "step": 18955 }, { "epoch": 0.7299326275264677, "grad_norm": 1.5278538465499878, "learning_rate": 0.00014122266711069095, "loss": 1.2835, "step": 18960 }, { "epoch": 0.7301251203079885, "grad_norm": 1.2992238998413086, "learning_rate": 0.00014119511448631118, "loss": 1.2873, "step": 18965 }, { "epoch": 0.7303176130895092, "grad_norm": 1.0794028043746948, "learning_rate": 0.00014116755809491104, "loss": 1.1677, "step": 18970 }, { "epoch": 0.7305101058710298, "grad_norm": 1.672555685043335, "learning_rate": 0.00014113999793901046, "loss": 0.9295, "step": 18975 }, { "epoch": 0.7307025986525505, "grad_norm": 1.630053997039795, "learning_rate": 0.00014111243402112957, "loss": 1.1635, "step": 18980 }, { "epoch": 0.7308950914340713, "grad_norm": 1.3171367645263672, "learning_rate": 0.00014108486634378895, "loss": 1.065, "step": 18985 }, { "epoch": 0.731087584215592, "grad_norm": 1.1997402906417847, "learning_rate": 0.00014105729490950948, "loss": 1.0747, "step": 18990 }, { "epoch": 0.7312800769971126, "grad_norm": 1.6320029497146606, "learning_rate": 0.00014102971972081233, "loss": 1.2414, "step": 18995 }, { "epoch": 0.7314725697786333, "grad_norm": 1.3852897882461548, "learning_rate": 0.00014100214078021915, "loss": 1.0307, "step": 19000 }, { "epoch": 0.731665062560154, "grad_norm": 1.29547119140625, "learning_rate": 0.00014097455809025178, "loss": 1.1411, "step": 19005 }, { "epoch": 0.7318575553416747, "grad_norm": 1.0764034986495972, "learning_rate": 0.00014094697165343252, "loss": 1.1789, "step": 19010 }, { "epoch": 0.7320500481231954, "grad_norm": 1.7445317506790161, "learning_rate": 0.00014091938147228395, "loss": 1.2379, "step": 19015 }, { "epoch": 0.7322425409047161, "grad_norm": 1.844789743423462, "learning_rate": 0.00014089178754932898, "loss": 1.0126, "step": 19020 }, { "epoch": 0.7324350336862367, "grad_norm": 1.370970368385315, "learning_rate": 0.00014086418988709095, "loss": 1.3182, "step": 19025 }, { "epoch": 0.7326275264677574, "grad_norm": 1.2565025091171265, "learning_rate": 0.00014083658848809347, "loss": 1.1753, "step": 19030 }, { "epoch": 0.7328200192492782, "grad_norm": 1.7159111499786377, "learning_rate": 0.00014080898335486046, "loss": 1.0572, "step": 19035 }, { "epoch": 0.7330125120307989, "grad_norm": 1.8323345184326172, "learning_rate": 0.0001407813744899163, "loss": 1.0822, "step": 19040 }, { "epoch": 0.7332050048123195, "grad_norm": 1.6878646612167358, "learning_rate": 0.00014075376189578553, "loss": 1.1133, "step": 19045 }, { "epoch": 0.7333974975938402, "grad_norm": 1.7448841333389282, "learning_rate": 0.00014072614557499323, "loss": 1.0922, "step": 19050 }, { "epoch": 0.7335899903753609, "grad_norm": 0.9125509262084961, "learning_rate": 0.00014069852553006472, "loss": 1.1788, "step": 19055 }, { "epoch": 0.7337824831568817, "grad_norm": 1.8741627931594849, "learning_rate": 0.00014067090176352563, "loss": 1.0538, "step": 19060 }, { "epoch": 0.7339749759384023, "grad_norm": 3.1138720512390137, "learning_rate": 0.00014064327427790201, "loss": 1.256, "step": 19065 }, { "epoch": 0.734167468719923, "grad_norm": 1.3083161115646362, "learning_rate": 0.00014061564307572022, "loss": 0.976, "step": 19070 }, { "epoch": 0.7343599615014437, "grad_norm": 1.176721215248108, "learning_rate": 0.00014058800815950687, "loss": 1.1733, "step": 19075 }, { "epoch": 0.7345524542829643, "grad_norm": 0.9016759395599365, "learning_rate": 0.00014056036953178906, "loss": 1.0671, "step": 19080 }, { "epoch": 0.7347449470644851, "grad_norm": 1.4011337757110596, "learning_rate": 0.00014053272719509417, "loss": 1.1453, "step": 19085 }, { "epoch": 0.7349374398460058, "grad_norm": 1.2671010494232178, "learning_rate": 0.00014050508115194988, "loss": 1.1453, "step": 19090 }, { "epoch": 0.7351299326275265, "grad_norm": 1.3316471576690674, "learning_rate": 0.00014047743140488422, "loss": 0.9451, "step": 19095 }, { "epoch": 0.7353224254090471, "grad_norm": 1.963815689086914, "learning_rate": 0.0001404497779564256, "loss": 0.993, "step": 19100 }, { "epoch": 0.7355149181905678, "grad_norm": 1.4354350566864014, "learning_rate": 0.00014042212080910276, "loss": 1.1263, "step": 19105 }, { "epoch": 0.7357074109720886, "grad_norm": 1.6670982837677002, "learning_rate": 0.00014039445996544473, "loss": 1.0964, "step": 19110 }, { "epoch": 0.7358999037536093, "grad_norm": 0.9805311560630798, "learning_rate": 0.00014036679542798092, "loss": 1.056, "step": 19115 }, { "epoch": 0.7360923965351299, "grad_norm": 1.4659690856933594, "learning_rate": 0.0001403391271992411, "loss": 1.0984, "step": 19120 }, { "epoch": 0.7362848893166506, "grad_norm": 0.5292593240737915, "learning_rate": 0.00014031145528175525, "loss": 1.0774, "step": 19125 }, { "epoch": 0.7364773820981714, "grad_norm": 1.9471726417541504, "learning_rate": 0.00014028377967805392, "loss": 1.1648, "step": 19130 }, { "epoch": 0.736669874879692, "grad_norm": 1.2082020044326782, "learning_rate": 0.0001402561003906678, "loss": 1.0764, "step": 19135 }, { "epoch": 0.7368623676612127, "grad_norm": 1.558237075805664, "learning_rate": 0.00014022841742212792, "loss": 1.3944, "step": 19140 }, { "epoch": 0.7370548604427334, "grad_norm": 1.7463306188583374, "learning_rate": 0.0001402007307749658, "loss": 1.0599, "step": 19145 }, { "epoch": 0.737247353224254, "grad_norm": 1.2820191383361816, "learning_rate": 0.00014017304045171316, "loss": 1.042, "step": 19150 }, { "epoch": 0.7374398460057748, "grad_norm": 1.617754340171814, "learning_rate": 0.00014014534645490206, "loss": 1.1031, "step": 19155 }, { "epoch": 0.7376323387872955, "grad_norm": 1.0561091899871826, "learning_rate": 0.00014011764878706497, "loss": 1.1711, "step": 19160 }, { "epoch": 0.7378248315688162, "grad_norm": 1.0614964962005615, "learning_rate": 0.00014008994745073468, "loss": 1.0783, "step": 19165 }, { "epoch": 0.7380173243503368, "grad_norm": 1.5456453561782837, "learning_rate": 0.0001400622424484442, "loss": 1.0303, "step": 19170 }, { "epoch": 0.7382098171318575, "grad_norm": 1.4854921102523804, "learning_rate": 0.00014003453378272712, "loss": 1.0719, "step": 19175 }, { "epoch": 0.7384023099133783, "grad_norm": 1.4764469861984253, "learning_rate": 0.00014000682145611708, "loss": 1.2755, "step": 19180 }, { "epoch": 0.738594802694899, "grad_norm": 1.6524717807769775, "learning_rate": 0.00013997910547114826, "loss": 1.1086, "step": 19185 }, { "epoch": 0.7387872954764196, "grad_norm": 1.264930248260498, "learning_rate": 0.00013995138583035508, "loss": 1.1087, "step": 19190 }, { "epoch": 0.7389797882579403, "grad_norm": 1.8001179695129395, "learning_rate": 0.0001399236625362723, "loss": 1.1736, "step": 19195 }, { "epoch": 0.739172281039461, "grad_norm": 1.0975139141082764, "learning_rate": 0.00013989593559143507, "loss": 1.1669, "step": 19200 }, { "epoch": 0.7393647738209818, "grad_norm": 1.078940987586975, "learning_rate": 0.0001398682049983788, "loss": 1.1259, "step": 19205 }, { "epoch": 0.7395572666025024, "grad_norm": 1.0370323657989502, "learning_rate": 0.0001398404707596393, "loss": 1.2454, "step": 19210 }, { "epoch": 0.7397497593840231, "grad_norm": 1.8001567125320435, "learning_rate": 0.00013981273287775266, "loss": 1.2803, "step": 19215 }, { "epoch": 0.7399422521655438, "grad_norm": 1.00836181640625, "learning_rate": 0.00013978499135525535, "loss": 1.2406, "step": 19220 }, { "epoch": 0.7401347449470644, "grad_norm": 1.169600009918213, "learning_rate": 0.00013975724619468414, "loss": 1.2738, "step": 19225 }, { "epoch": 0.7403272377285852, "grad_norm": 1.6229758262634277, "learning_rate": 0.00013972949739857613, "loss": 1.1428, "step": 19230 }, { "epoch": 0.7405197305101059, "grad_norm": 1.573930263519287, "learning_rate": 0.00013970174496946873, "loss": 1.1467, "step": 19235 }, { "epoch": 0.7407122232916266, "grad_norm": 1.4224984645843506, "learning_rate": 0.00013967398890989979, "loss": 1.1335, "step": 19240 }, { "epoch": 0.7409047160731472, "grad_norm": 1.5381492376327515, "learning_rate": 0.00013964622922240736, "loss": 1.1332, "step": 19245 }, { "epoch": 0.7410972088546679, "grad_norm": 1.7980502843856812, "learning_rate": 0.0001396184659095299, "loss": 1.2107, "step": 19250 }, { "epoch": 0.7412897016361887, "grad_norm": 0.8735668063163757, "learning_rate": 0.00013959069897380617, "loss": 1.0948, "step": 19255 }, { "epoch": 0.7414821944177094, "grad_norm": 1.1920636892318726, "learning_rate": 0.0001395629284177753, "loss": 1.1663, "step": 19260 }, { "epoch": 0.74167468719923, "grad_norm": 1.3055362701416016, "learning_rate": 0.0001395351542439767, "loss": 1.1108, "step": 19265 }, { "epoch": 0.7418671799807507, "grad_norm": 1.8382583856582642, "learning_rate": 0.00013950737645495014, "loss": 1.0279, "step": 19270 }, { "epoch": 0.7420596727622714, "grad_norm": 0.865042507648468, "learning_rate": 0.00013947959505323577, "loss": 1.1005, "step": 19275 }, { "epoch": 0.7422521655437921, "grad_norm": 1.182671070098877, "learning_rate": 0.0001394518100413739, "loss": 1.199, "step": 19280 }, { "epoch": 0.7424446583253128, "grad_norm": 1.5641695261001587, "learning_rate": 0.00013942402142190532, "loss": 1.098, "step": 19285 }, { "epoch": 0.7426371511068335, "grad_norm": 0.924503743648529, "learning_rate": 0.0001393962291973712, "loss": 1.2765, "step": 19290 }, { "epoch": 0.7428296438883542, "grad_norm": 1.1100239753723145, "learning_rate": 0.00013936843337031287, "loss": 1.2573, "step": 19295 }, { "epoch": 0.7430221366698749, "grad_norm": 1.2185837030410767, "learning_rate": 0.0001393406339432721, "loss": 1.2064, "step": 19300 }, { "epoch": 0.7432146294513956, "grad_norm": 1.5463718175888062, "learning_rate": 0.000139312830918791, "loss": 1.1559, "step": 19305 }, { "epoch": 0.7434071222329163, "grad_norm": 1.8315119743347168, "learning_rate": 0.00013928502429941188, "loss": 1.2292, "step": 19310 }, { "epoch": 0.7435996150144369, "grad_norm": 1.303144097328186, "learning_rate": 0.00013925721408767757, "loss": 1.1463, "step": 19315 }, { "epoch": 0.7437921077959576, "grad_norm": 1.2040412425994873, "learning_rate": 0.00013922940028613106, "loss": 0.9717, "step": 19320 }, { "epoch": 0.7439846005774784, "grad_norm": 1.0547009706497192, "learning_rate": 0.0001392015828973158, "loss": 1.0389, "step": 19325 }, { "epoch": 0.7441770933589991, "grad_norm": 1.597541093826294, "learning_rate": 0.00013917376192377543, "loss": 1.0937, "step": 19330 }, { "epoch": 0.7443695861405197, "grad_norm": 0.9714812636375427, "learning_rate": 0.00013914593736805402, "loss": 1.1641, "step": 19335 }, { "epoch": 0.7445620789220404, "grad_norm": 1.2114696502685547, "learning_rate": 0.00013911810923269603, "loss": 1.2167, "step": 19340 }, { "epoch": 0.7447545717035611, "grad_norm": 1.330718755722046, "learning_rate": 0.000139090277520246, "loss": 1.0583, "step": 19345 }, { "epoch": 0.7449470644850819, "grad_norm": 2.2889277935028076, "learning_rate": 0.0001390624422332491, "loss": 0.9747, "step": 19350 }, { "epoch": 0.7451395572666025, "grad_norm": 1.2337167263031006, "learning_rate": 0.0001390346033742506, "loss": 1.0294, "step": 19355 }, { "epoch": 0.7453320500481232, "grad_norm": 1.0727423429489136, "learning_rate": 0.0001390067609457962, "loss": 1.0961, "step": 19360 }, { "epoch": 0.7455245428296439, "grad_norm": 1.7654608488082886, "learning_rate": 0.0001389789149504319, "loss": 1.0775, "step": 19365 }, { "epoch": 0.7457170356111645, "grad_norm": 1.2609182596206665, "learning_rate": 0.0001389510653907041, "loss": 1.0706, "step": 19370 }, { "epoch": 0.7459095283926853, "grad_norm": 1.906533122062683, "learning_rate": 0.00013892321226915933, "loss": 1.1035, "step": 19375 }, { "epoch": 0.746102021174206, "grad_norm": 1.1768391132354736, "learning_rate": 0.00013889535558834462, "loss": 1.097, "step": 19380 }, { "epoch": 0.7462945139557267, "grad_norm": 1.7764432430267334, "learning_rate": 0.00013886749535080737, "loss": 1.2136, "step": 19385 }, { "epoch": 0.7464870067372473, "grad_norm": 2.2302021980285645, "learning_rate": 0.0001388396315590951, "loss": 1.1236, "step": 19390 }, { "epoch": 0.746679499518768, "grad_norm": 1.0161263942718506, "learning_rate": 0.00013881176421575583, "loss": 1.3818, "step": 19395 }, { "epoch": 0.7468719923002888, "grad_norm": 1.002767562866211, "learning_rate": 0.00013878389332333784, "loss": 1.1468, "step": 19400 }, { "epoch": 0.7470644850818094, "grad_norm": 1.637412667274475, "learning_rate": 0.00013875601888438968, "loss": 1.2074, "step": 19405 }, { "epoch": 0.7472569778633301, "grad_norm": 1.440507173538208, "learning_rate": 0.00013872814090146036, "loss": 0.9903, "step": 19410 }, { "epoch": 0.7474494706448508, "grad_norm": 1.8063361644744873, "learning_rate": 0.00013870025937709913, "loss": 1.1508, "step": 19415 }, { "epoch": 0.7476419634263715, "grad_norm": 1.0809664726257324, "learning_rate": 0.0001386723743138555, "loss": 1.1509, "step": 19420 }, { "epoch": 0.7478344562078922, "grad_norm": 0.7736053466796875, "learning_rate": 0.00013864448571427945, "loss": 1.048, "step": 19425 }, { "epoch": 0.7480269489894129, "grad_norm": 1.588610291481018, "learning_rate": 0.00013861659358092117, "loss": 1.1118, "step": 19430 }, { "epoch": 0.7482194417709336, "grad_norm": 1.5141923427581787, "learning_rate": 0.00013858869791633124, "loss": 1.1847, "step": 19435 }, { "epoch": 0.7484119345524542, "grad_norm": 1.6033471822738647, "learning_rate": 0.00013856079872306046, "loss": 1.1109, "step": 19440 }, { "epoch": 0.748604427333975, "grad_norm": 1.1898064613342285, "learning_rate": 0.0001385328960036601, "loss": 1.1758, "step": 19445 }, { "epoch": 0.7487969201154957, "grad_norm": 1.8286123275756836, "learning_rate": 0.00013850498976068166, "loss": 1.1827, "step": 19450 }, { "epoch": 0.7489894128970164, "grad_norm": 1.6806395053863525, "learning_rate": 0.00013847707999667698, "loss": 0.9877, "step": 19455 }, { "epoch": 0.749181905678537, "grad_norm": 2.1087961196899414, "learning_rate": 0.00013844916671419823, "loss": 1.2052, "step": 19460 }, { "epoch": 0.7493743984600577, "grad_norm": 1.4643951654434204, "learning_rate": 0.0001384212499157979, "loss": 0.9652, "step": 19465 }, { "epoch": 0.7495668912415785, "grad_norm": 1.4248661994934082, "learning_rate": 0.00013839332960402872, "loss": 1.1903, "step": 19470 }, { "epoch": 0.7497593840230992, "grad_norm": 1.8101911544799805, "learning_rate": 0.0001383709908267036, "loss": 1.0335, "step": 19475 }, { "epoch": 0.7499518768046198, "grad_norm": 1.9175690412521362, "learning_rate": 0.00013834306419730473, "loss": 1.2055, "step": 19480 }, { "epoch": 0.7501443695861405, "grad_norm": 0.930147647857666, "learning_rate": 0.00013831513406168663, "loss": 1.2383, "step": 19485 }, { "epoch": 0.7503368623676612, "grad_norm": 2.496994733810425, "learning_rate": 0.00013828720042240338, "loss": 1.2861, "step": 19490 }, { "epoch": 0.750529355149182, "grad_norm": 1.9224547147750854, "learning_rate": 0.00013825926328200926, "loss": 1.247, "step": 19495 }, { "epoch": 0.7507218479307026, "grad_norm": 1.7266567945480347, "learning_rate": 0.00013823132264305894, "loss": 1.1127, "step": 19500 }, { "epoch": 0.7509143407122233, "grad_norm": 1.2304484844207764, "learning_rate": 0.00013820337850810744, "loss": 1.1432, "step": 19505 }, { "epoch": 0.751106833493744, "grad_norm": 2.311600685119629, "learning_rate": 0.00013817543087971004, "loss": 1.2405, "step": 19510 }, { "epoch": 0.7512993262752646, "grad_norm": 2.358445644378662, "learning_rate": 0.0001381474797604224, "loss": 1.2407, "step": 19515 }, { "epoch": 0.7514918190567854, "grad_norm": 1.2297358512878418, "learning_rate": 0.00013811952515280042, "loss": 0.9701, "step": 19520 }, { "epoch": 0.7516843118383061, "grad_norm": 1.260389804840088, "learning_rate": 0.00013809156705940037, "loss": 1.1711, "step": 19525 }, { "epoch": 0.7518768046198268, "grad_norm": 0.9936877489089966, "learning_rate": 0.00013806360548277886, "loss": 0.9045, "step": 19530 }, { "epoch": 0.7520692974013474, "grad_norm": 1.951540470123291, "learning_rate": 0.0001380356404254928, "loss": 1.0988, "step": 19535 }, { "epoch": 0.7522617901828681, "grad_norm": 1.0897135734558105, "learning_rate": 0.00013800767189009935, "loss": 1.0216, "step": 19540 }, { "epoch": 0.7524542829643889, "grad_norm": 1.3618708848953247, "learning_rate": 0.00013797969987915608, "loss": 1.0604, "step": 19545 }, { "epoch": 0.7526467757459095, "grad_norm": 1.413282871246338, "learning_rate": 0.00013795172439522087, "loss": 1.2045, "step": 19550 }, { "epoch": 0.7528392685274302, "grad_norm": 1.4086360931396484, "learning_rate": 0.00013792374544085187, "loss": 1.0724, "step": 19555 }, { "epoch": 0.7530317613089509, "grad_norm": 1.2165982723236084, "learning_rate": 0.00013789576301860757, "loss": 1.1886, "step": 19560 }, { "epoch": 0.7532242540904716, "grad_norm": 1.4711132049560547, "learning_rate": 0.00013786777713104678, "loss": 1.1847, "step": 19565 }, { "epoch": 0.7534167468719923, "grad_norm": 1.0978587865829468, "learning_rate": 0.00013783978778072862, "loss": 1.1521, "step": 19570 }, { "epoch": 0.753609239653513, "grad_norm": 1.1508560180664062, "learning_rate": 0.00013781179497021251, "loss": 1.089, "step": 19575 }, { "epoch": 0.7538017324350337, "grad_norm": 1.3086730241775513, "learning_rate": 0.00013778379870205829, "loss": 1.2293, "step": 19580 }, { "epoch": 0.7539942252165543, "grad_norm": 1.63782799243927, "learning_rate": 0.0001377557989788259, "loss": 1.0373, "step": 19585 }, { "epoch": 0.754186717998075, "grad_norm": 1.4707633256912231, "learning_rate": 0.00013772779580307584, "loss": 1.0481, "step": 19590 }, { "epoch": 0.7543792107795958, "grad_norm": 1.6030997037887573, "learning_rate": 0.0001376997891773688, "loss": 1.0953, "step": 19595 }, { "epoch": 0.7545717035611165, "grad_norm": 1.0709367990493774, "learning_rate": 0.00013767177910426574, "loss": 1.1094, "step": 19600 }, { "epoch": 0.7547641963426371, "grad_norm": 1.2302757501602173, "learning_rate": 0.00013764376558632807, "loss": 0.9815, "step": 19605 }, { "epoch": 0.7549566891241578, "grad_norm": 2.4043121337890625, "learning_rate": 0.00013761574862611737, "loss": 1.1146, "step": 19610 }, { "epoch": 0.7551491819056786, "grad_norm": 1.2333440780639648, "learning_rate": 0.00013758772822619565, "loss": 1.367, "step": 19615 }, { "epoch": 0.7553416746871993, "grad_norm": 2.032453775405884, "learning_rate": 0.0001375597043891252, "loss": 1.1401, "step": 19620 }, { "epoch": 0.7555341674687199, "grad_norm": 1.1483811140060425, "learning_rate": 0.00013753167711746858, "loss": 1.0757, "step": 19625 }, { "epoch": 0.7557266602502406, "grad_norm": 2.314659833908081, "learning_rate": 0.0001375036464137887, "loss": 1.162, "step": 19630 }, { "epoch": 0.7559191530317613, "grad_norm": 1.460924744606018, "learning_rate": 0.0001374756122806488, "loss": 1.1596, "step": 19635 }, { "epoch": 0.756111645813282, "grad_norm": 1.628796935081482, "learning_rate": 0.0001374475747206124, "loss": 1.2437, "step": 19640 }, { "epoch": 0.7563041385948027, "grad_norm": 0.9428819417953491, "learning_rate": 0.0001374195337362434, "loss": 1.1804, "step": 19645 }, { "epoch": 0.7564966313763234, "grad_norm": 1.1497089862823486, "learning_rate": 0.00013739148933010587, "loss": 1.0776, "step": 19650 }, { "epoch": 0.7566891241578441, "grad_norm": 1.2695974111557007, "learning_rate": 0.00013736344150476435, "loss": 1.2446, "step": 19655 }, { "epoch": 0.7568816169393647, "grad_norm": 1.4802236557006836, "learning_rate": 0.00013733539026278364, "loss": 1.066, "step": 19660 }, { "epoch": 0.7570741097208855, "grad_norm": 1.7089695930480957, "learning_rate": 0.0001373073356067288, "loss": 1.0265, "step": 19665 }, { "epoch": 0.7572666025024062, "grad_norm": 1.4578391313552856, "learning_rate": 0.00013727927753916523, "loss": 1.0214, "step": 19670 }, { "epoch": 0.7574590952839269, "grad_norm": 0.7848085761070251, "learning_rate": 0.00013725121606265872, "loss": 1.0254, "step": 19675 }, { "epoch": 0.7576515880654475, "grad_norm": 1.5217918157577515, "learning_rate": 0.00013722315117977525, "loss": 1.2226, "step": 19680 }, { "epoch": 0.7578440808469682, "grad_norm": 1.0195049047470093, "learning_rate": 0.00013719508289308118, "loss": 1.0981, "step": 19685 }, { "epoch": 0.758036573628489, "grad_norm": 0.8907167315483093, "learning_rate": 0.00013716701120514323, "loss": 1.0012, "step": 19690 }, { "epoch": 0.7582290664100096, "grad_norm": 1.6701477766036987, "learning_rate": 0.00013713893611852824, "loss": 1.1048, "step": 19695 }, { "epoch": 0.7584215591915303, "grad_norm": 1.4811270236968994, "learning_rate": 0.0001371108576358036, "loss": 1.2534, "step": 19700 }, { "epoch": 0.758614051973051, "grad_norm": 2.0855724811553955, "learning_rate": 0.00013708277575953686, "loss": 0.968, "step": 19705 }, { "epoch": 0.7588065447545717, "grad_norm": 1.4841949939727783, "learning_rate": 0.00013705469049229594, "loss": 1.1039, "step": 19710 }, { "epoch": 0.7589990375360924, "grad_norm": 1.2720580101013184, "learning_rate": 0.000137026601836649, "loss": 0.9966, "step": 19715 }, { "epoch": 0.7591915303176131, "grad_norm": 2.275491714477539, "learning_rate": 0.00013699850979516465, "loss": 1.1684, "step": 19720 }, { "epoch": 0.7593840230991338, "grad_norm": 1.2187795639038086, "learning_rate": 0.00013697041437041167, "loss": 0.9793, "step": 19725 }, { "epoch": 0.7595765158806544, "grad_norm": 1.1858078241348267, "learning_rate": 0.00013694231556495915, "loss": 1.1833, "step": 19730 }, { "epoch": 0.7597690086621751, "grad_norm": 1.2739187479019165, "learning_rate": 0.00013691421338137664, "loss": 1.1139, "step": 19735 }, { "epoch": 0.7599615014436959, "grad_norm": 1.7635918855667114, "learning_rate": 0.00013688610782223382, "loss": 1.0935, "step": 19740 }, { "epoch": 0.7601539942252166, "grad_norm": 1.7312551736831665, "learning_rate": 0.00013685799889010084, "loss": 1.036, "step": 19745 }, { "epoch": 0.7603464870067372, "grad_norm": 1.222069501876831, "learning_rate": 0.00013682988658754797, "loss": 1.1653, "step": 19750 }, { "epoch": 0.7605389797882579, "grad_norm": 2.664635181427002, "learning_rate": 0.00013680177091714596, "loss": 1.281, "step": 19755 }, { "epoch": 0.7607314725697787, "grad_norm": 1.2842050790786743, "learning_rate": 0.00013677365188146577, "loss": 1.1799, "step": 19760 }, { "epoch": 0.7609239653512994, "grad_norm": 1.45145583152771, "learning_rate": 0.00013674552948307874, "loss": 1.1625, "step": 19765 }, { "epoch": 0.76111645813282, "grad_norm": 1.8897767066955566, "learning_rate": 0.00013671740372455648, "loss": 1.1714, "step": 19770 }, { "epoch": 0.7613089509143407, "grad_norm": 1.171235203742981, "learning_rate": 0.00013668927460847084, "loss": 1.2752, "step": 19775 }, { "epoch": 0.7615014436958614, "grad_norm": 0.9240397810935974, "learning_rate": 0.00013666114213739408, "loss": 1.0669, "step": 19780 }, { "epoch": 0.7616939364773821, "grad_norm": 1.654099941253662, "learning_rate": 0.0001366330063138988, "loss": 1.1941, "step": 19785 }, { "epoch": 0.7618864292589028, "grad_norm": 1.2961543798446655, "learning_rate": 0.00013660486714055768, "loss": 1.2552, "step": 19790 }, { "epoch": 0.7620789220404235, "grad_norm": 1.2810674905776978, "learning_rate": 0.00013657672461994398, "loss": 1.1035, "step": 19795 }, { "epoch": 0.7622714148219442, "grad_norm": 3.574240207672119, "learning_rate": 0.00013654857875463111, "loss": 1.1724, "step": 19800 }, { "epoch": 0.7624639076034648, "grad_norm": 1.0426640510559082, "learning_rate": 0.00013652042954719282, "loss": 1.351, "step": 19805 }, { "epoch": 0.7626564003849856, "grad_norm": 0.9059193730354309, "learning_rate": 0.00013649227700020318, "loss": 1.1989, "step": 19810 }, { "epoch": 0.7628488931665063, "grad_norm": 2.0250661373138428, "learning_rate": 0.00013646412111623657, "loss": 1.5794, "step": 19815 }, { "epoch": 0.763041385948027, "grad_norm": 1.030274748802185, "learning_rate": 0.00013643596189786758, "loss": 0.965, "step": 19820 }, { "epoch": 0.7632338787295476, "grad_norm": 1.976044774055481, "learning_rate": 0.0001364077993476713, "loss": 1.1147, "step": 19825 }, { "epoch": 0.7634263715110683, "grad_norm": 1.6923823356628418, "learning_rate": 0.00013637963346822292, "loss": 1.1618, "step": 19830 }, { "epoch": 0.7636188642925891, "grad_norm": 1.3266521692276, "learning_rate": 0.00013635146426209805, "loss": 1.1782, "step": 19835 }, { "epoch": 0.7638113570741097, "grad_norm": 1.6700036525726318, "learning_rate": 0.00013632329173187256, "loss": 0.9154, "step": 19840 }, { "epoch": 0.7640038498556304, "grad_norm": 1.5041186809539795, "learning_rate": 0.00013629511588012273, "loss": 1.1082, "step": 19845 }, { "epoch": 0.7641963426371511, "grad_norm": 1.4730234146118164, "learning_rate": 0.0001362669367094249, "loss": 1.128, "step": 19850 }, { "epoch": 0.7643888354186718, "grad_norm": 1.415727972984314, "learning_rate": 0.00013623875422235602, "loss": 1.1844, "step": 19855 }, { "epoch": 0.7645813282001925, "grad_norm": 1.9785696268081665, "learning_rate": 0.00013621056842149306, "loss": 1.1183, "step": 19860 }, { "epoch": 0.7647738209817132, "grad_norm": 1.0615553855895996, "learning_rate": 0.00013618237930941357, "loss": 1.1257, "step": 19865 }, { "epoch": 0.7649663137632339, "grad_norm": 1.749930500984192, "learning_rate": 0.00013615418688869512, "loss": 0.9933, "step": 19870 }, { "epoch": 0.7651588065447545, "grad_norm": 1.5585590600967407, "learning_rate": 0.0001361259911619158, "loss": 1.1877, "step": 19875 }, { "epoch": 0.7653512993262752, "grad_norm": 1.4237456321716309, "learning_rate": 0.00013609779213165393, "loss": 1.0918, "step": 19880 }, { "epoch": 0.765543792107796, "grad_norm": 1.2364110946655273, "learning_rate": 0.00013606958980048805, "loss": 1.0557, "step": 19885 }, { "epoch": 0.7657362848893167, "grad_norm": 1.0982424020767212, "learning_rate": 0.00013604138417099712, "loss": 1.1845, "step": 19890 }, { "epoch": 0.7659287776708373, "grad_norm": 0.8089034557342529, "learning_rate": 0.00013601317524576038, "loss": 1.139, "step": 19895 }, { "epoch": 0.766121270452358, "grad_norm": 1.0913941860198975, "learning_rate": 0.0001359849630273573, "loss": 1.2198, "step": 19900 }, { "epoch": 0.7663137632338787, "grad_norm": 1.3398661613464355, "learning_rate": 0.00013595674751836777, "loss": 0.9453, "step": 19905 }, { "epoch": 0.7665062560153995, "grad_norm": 2.1962482929229736, "learning_rate": 0.00013592852872137186, "loss": 1.3174, "step": 19910 }, { "epoch": 0.7666987487969201, "grad_norm": 1.4308804273605347, "learning_rate": 0.00013590030663895001, "loss": 1.1015, "step": 19915 }, { "epoch": 0.7668912415784408, "grad_norm": 0.915403425693512, "learning_rate": 0.00013587208127368292, "loss": 1.0555, "step": 19920 }, { "epoch": 0.7670837343599615, "grad_norm": 1.0108091831207275, "learning_rate": 0.00013584385262815164, "loss": 1.1591, "step": 19925 }, { "epoch": 0.7672762271414822, "grad_norm": 1.7001339197158813, "learning_rate": 0.00013581562070493747, "loss": 1.1671, "step": 19930 }, { "epoch": 0.7674687199230029, "grad_norm": 0.7533661723136902, "learning_rate": 0.00013578738550662207, "loss": 0.9644, "step": 19935 }, { "epoch": 0.7676612127045236, "grad_norm": 1.1101553440093994, "learning_rate": 0.0001357591470357873, "loss": 1.0671, "step": 19940 }, { "epoch": 0.7678537054860443, "grad_norm": 2.112529993057251, "learning_rate": 0.00013573090529501544, "loss": 1.1979, "step": 19945 }, { "epoch": 0.7680461982675649, "grad_norm": 1.2636981010437012, "learning_rate": 0.000135702660286889, "loss": 1.1494, "step": 19950 }, { "epoch": 0.7682386910490857, "grad_norm": 1.5712652206420898, "learning_rate": 0.00013567441201399073, "loss": 1.1171, "step": 19955 }, { "epoch": 0.7684311838306064, "grad_norm": 1.0548748970031738, "learning_rate": 0.00013564616047890383, "loss": 0.9806, "step": 19960 }, { "epoch": 0.768623676612127, "grad_norm": 1.828020453453064, "learning_rate": 0.00013561790568421172, "loss": 1.2924, "step": 19965 }, { "epoch": 0.7688161693936477, "grad_norm": 1.037383794784546, "learning_rate": 0.00013558964763249804, "loss": 1.0602, "step": 19970 }, { "epoch": 0.7690086621751684, "grad_norm": 1.3003454208374023, "learning_rate": 0.00013556138632634686, "loss": 1.0049, "step": 19975 }, { "epoch": 0.7692011549566892, "grad_norm": 1.0770816802978516, "learning_rate": 0.00013553312176834247, "loss": 1.2497, "step": 19980 }, { "epoch": 0.7693936477382098, "grad_norm": 1.5512239933013916, "learning_rate": 0.00013550485396106947, "loss": 1.053, "step": 19985 }, { "epoch": 0.7695861405197305, "grad_norm": 1.0486184358596802, "learning_rate": 0.0001354765829071128, "loss": 1.2014, "step": 19990 }, { "epoch": 0.7697786333012512, "grad_norm": 1.2066504955291748, "learning_rate": 0.00013544830860905762, "loss": 1.1933, "step": 19995 }, { "epoch": 0.7699711260827719, "grad_norm": 1.090703010559082, "learning_rate": 0.00013542003106948943, "loss": 1.1504, "step": 20000 } ], "logging_steps": 5, "max_steps": 51950, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 10000, "total_flos": 6.241026539658117e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }